feat: pass the max_lora_rank parameter to vLLM backend

Former-commit-id: b20d62ba3ccc5c02529d19e22b7adcfe8b88c326
This commit is contained in:
juejuezi 2024-05-17 16:07:39 +08:00
parent 9af3dce3c8
commit 20326affde
2 changed files with 2 additions and 0 deletions

View File

@ -59,6 +59,7 @@ class VllmEngine(BaseEngine):
"disable_log_requests": True, "disable_log_requests": True,
"enforce_eager": model_args.vllm_enforce_eager, "enforce_eager": model_args.vllm_enforce_eager,
"enable_lora": model_args.adapter_name_or_path is not None, "enable_lora": model_args.adapter_name_or_path is not None,
"max_lora_rank": model_args.vllm_max_lora_rank,
} }
if model_args.visual_inputs: if model_args.visual_inputs:

View File

@ -117,6 +117,7 @@ class ModelArguments:
default=False, default=False,
metadata={"help": "Whether or not to disable CUDA graph in the vLLM engine."}, metadata={"help": "Whether or not to disable CUDA graph in the vLLM engine."},
) )
vllm_max_lora_rank: int = field(default=8, metadata={"help": "The maximum supported rank of all LoRAs."})
offload_folder: str = field( offload_folder: str = field(
default="offload", default="offload",
metadata={"help": "Path to offload model weights."}, metadata={"help": "Path to offload model weights."},