fix #2782 #2798

2025-12-16 11:50:35 +08:00 · 2024-03-12 15:53:29 +08:00
parent c901aa63ff
commit 07f9b754a7
3 changed files with 19 additions and 2 deletions
--- a/src/llmtuner/hparams/model_args.py
+++ b/src/llmtuner/hparams/model_args.py
@@ -89,6 +89,14 @@ class ModelArguments:
        default=2048,
        metadata={"help": "Maximum input length of the vLLM engine."},
    )
+    vllm_gpu_util: float = field(
+        default=0.9,
+        metadata={"help": "The fraction of GPU memory in (0,1) to be used for the vLLM engine."},
+    )
+    vllm_enforce_eager: bool = field(
+        default=False,
+        metadata={"help": "Whether or not to disable CUDA graph in the vLLM engine."},
+    )
    hf_hub_token: Optional[str] = field(
        default=None,
        metadata={"help": "Auth token to log in with Hugging Face Hub."},