Update README.md
#1
by lwilkinson - opened
README.md
CHANGED
|
@@ -19,7 +19,6 @@ It was evaluated on several tasks to assess its quality in comparison to the unq
|
|
| 19 |
VLLM_USE_V2_MODEL_RUNNER=1
|
| 20 |
vllm serve RedHatAI/diffusiongemma-26B-A4B-it-FP8-dynamic \
|
| 21 |
--trust-remote-code \
|
| 22 |
-
--attention-backend TRITON_ATTN \
|
| 23 |
--max-num-seqs 4 \
|
| 24 |
--hf-overrides '{"diffusion_sampler": "entropy_bound", "diffusion_entropy_bound": 0.1}' \
|
| 25 |
--default-chat-template-kwargs '{"enable_thinking": true}'
|
|
|
|
| 19 |
VLLM_USE_V2_MODEL_RUNNER=1
|
| 20 |
vllm serve RedHatAI/diffusiongemma-26B-A4B-it-FP8-dynamic \
|
| 21 |
--trust-remote-code \
|
|
|
|
| 22 |
--max-num-seqs 4 \
|
| 23 |
--hf-overrides '{"diffusion_sampler": "entropy_bound", "diffusion_entropy_bound": 0.1}' \
|
| 24 |
--default-chat-template-kwargs '{"enable_thinking": true}'
|