[script] fix vllm version (#7193)

Former-commit-id: ababdde597b2b9bf0ab3f30f036bc8d97de07f03
2025-11-28 19:24:20 +08:00 · 2025-03-06 17:14:17 +08:00 · 2025-03-06 17:14:17 +08:00 · f4ec4fa6ad
commit f4ec4fa6ad
parent 2635794727
1 changed files with 1 additions and 1 deletions
--- a/scripts/vllm_infer.py
+++ b/scripts/vllm_infer.py
@ -55,7 +55,7 @@ def vllm_infer(
    Performs batch generation using vLLM engine, which supports tensor parallelism.
    Usage: python vllm_infer.py --model_name_or_path meta-llama/Llama-2-7b-hf --template llama --dataset alpaca_en_demo
    """
-    check_version("vllm>=0.4.3,<=0.7.2")
+    check_version("vllm>=0.4.3,<=0.7.3")
    if pipeline_parallel_size > get_device_count():
        raise ValueError("Pipeline parallel size should be smaller than the number of gpus.")