From 20326affdee37ece8cee5653fc4615653edb4ed1 Mon Sep 17 00:00:00 2001 From: juejuezi Date: Fri, 17 May 2024 16:07:39 +0800 Subject: [PATCH] feat: pass the `max_lora_rank` parameter to vLLM backend Former-commit-id: b20d62ba3ccc5c02529d19e22b7adcfe8b88c326 --- src/llamafactory/chat/vllm_engine.py | 1 + src/llamafactory/hparams/model_args.py | 1 + 2 files changed, 2 insertions(+) diff --git a/src/llamafactory/chat/vllm_engine.py b/src/llamafactory/chat/vllm_engine.py index 8d602655..ba0cc1b3 100644 --- a/src/llamafactory/chat/vllm_engine.py +++ b/src/llamafactory/chat/vllm_engine.py @@ -59,6 +59,7 @@ class VllmEngine(BaseEngine): "disable_log_requests": True, "enforce_eager": model_args.vllm_enforce_eager, "enable_lora": model_args.adapter_name_or_path is not None, + "max_lora_rank": model_args.vllm_max_lora_rank, } if model_args.visual_inputs: diff --git a/src/llamafactory/hparams/model_args.py b/src/llamafactory/hparams/model_args.py index ac70bb3c..20ff74aa 100644 --- a/src/llamafactory/hparams/model_args.py +++ b/src/llamafactory/hparams/model_args.py @@ -117,6 +117,7 @@ class ModelArguments: default=False, metadata={"help": "Whether or not to disable CUDA graph in the vLLM engine."}, ) + vllm_max_lora_rank: int = field(default=8, metadata={"help": "The maximum supported rank of all LoRAs."}) offload_folder: str = field( default="offload", metadata={"help": "Path to offload model weights."},