[infer] fix vllm args (#7235)

Former-commit-id: ef7af457fc
2026-03-10 13:56:00 +08:00 · 2025-03-11 01:15:35 +08:00
parent 0a43bc1960
commit 317d0855d2
4 changed files with 32 additions and 26 deletions
--- a/src/llamafactory/chat/vllm_engine.py
+++ b/src/llamafactory/chat/vllm_engine.py
@@ -170,7 +170,7 @@ class VllmEngine(BaseEngine):
            or 1.0,  # repetition_penalty must > 0
            temperature=temperature if temperature is not None else self.generating_args["temperature"],
            top_p=(top_p if top_p is not None else self.generating_args["top_p"]) or 1.0,  # top_p must > 0
-            top_k=top_k if top_k is not None else self.generating_args["top_k"],
+            top_k=(top_k if top_k is not None else self.generating_args["top_k"]) or -1,  # top_k must > 0
            stop=stop,
            stop_token_ids=self.template.get_stop_token_ids(self.tokenizer),
            max_tokens=max_tokens,