diff --git a/examples/extras/adam_mini/qwen2_full_sft.yaml b/examples/extras/adam_mini/qwen2_full_sft.yaml index 3637a8e8..4f227d50 100644 --- a/examples/extras/adam_mini/qwen2_full_sft.yaml +++ b/examples/extras/adam_mini/qwen2_full_sft.yaml @@ -10,7 +10,7 @@ use_adam_mini: true ### dataset dataset: identity,alpaca_en_demo template: qwen -cutoff_len: 1024 +cutoff_len: 2048 max_samples: 1000 overwrite_cache: true preprocessing_num_workers: 16 diff --git a/examples/extras/badam/llama3_full_sft.yaml b/examples/extras/badam/llama3_full_sft.yaml index 5b91fe7e..00b857ec 100644 --- a/examples/extras/badam/llama3_full_sft.yaml +++ b/examples/extras/badam/llama3_full_sft.yaml @@ -15,7 +15,7 @@ badam_verbose: 2 ### dataset dataset: identity,alpaca_en_demo template: llama3 -cutoff_len: 1024 +cutoff_len: 2048 max_samples: 1000 overwrite_cache: true preprocessing_num_workers: 16 diff --git a/examples/extras/fsdp_qlora/llama3_lora_sft.yaml b/examples/extras/fsdp_qlora/llama3_lora_sft.yaml index 6c80ef58..7c6c6cd9 100644 --- a/examples/extras/fsdp_qlora/llama3_lora_sft.yaml +++ b/examples/extras/fsdp_qlora/llama3_lora_sft.yaml @@ -11,7 +11,7 @@ lora_target: all ### dataset dataset: identity,alpaca_en_demo template: llama3 -cutoff_len: 1024 +cutoff_len: 2048 max_samples: 1000 overwrite_cache: true preprocessing_num_workers: 16 diff --git a/examples/extras/galore/llama3_full_sft.yaml b/examples/extras/galore/llama3_full_sft.yaml index 2911496d..4036fc86 100644 --- a/examples/extras/galore/llama3_full_sft.yaml +++ b/examples/extras/galore/llama3_full_sft.yaml @@ -14,7 +14,7 @@ galore_scale: 2.0 ### dataset dataset: identity,alpaca_en_demo template: llama3 -cutoff_len: 1024 +cutoff_len: 2048 max_samples: 1000 overwrite_cache: true preprocessing_num_workers: 16 diff --git a/examples/extras/llama_pro/llama3_freeze_sft.yaml b/examples/extras/llama_pro/llama3_freeze_sft.yaml index 07f3e1ca..5c5ca8d3 100644 --- a/examples/extras/llama_pro/llama3_freeze_sft.yaml +++ b/examples/extras/llama_pro/llama3_freeze_sft.yaml @@ -12,7 +12,7 @@ use_llama_pro: true ### dataset dataset: identity,alpaca_en_demo template: llama3 -cutoff_len: 1024 +cutoff_len: 2048 max_samples: 1000 overwrite_cache: true preprocessing_num_workers: 16 diff --git a/examples/extras/loraplus/llama3_lora_sft.yaml b/examples/extras/loraplus/llama3_lora_sft.yaml index 062a312b..23a9fcd8 100644 --- a/examples/extras/loraplus/llama3_lora_sft.yaml +++ b/examples/extras/loraplus/llama3_lora_sft.yaml @@ -11,7 +11,7 @@ loraplus_lr_ratio: 16.0 ### dataset dataset: identity,alpaca_en_demo template: llama3 -cutoff_len: 1024 +cutoff_len: 2048 max_samples: 1000 overwrite_cache: true preprocessing_num_workers: 16 diff --git a/examples/extras/mod/llama3_full_sft.yaml b/examples/extras/mod/llama3_full_sft.yaml index f82bbd4c..08d65f8c 100644 --- a/examples/extras/mod/llama3_full_sft.yaml +++ b/examples/extras/mod/llama3_full_sft.yaml @@ -10,7 +10,7 @@ mixture_of_depths: convert ### dataset dataset: identity,alpaca_en_demo template: llama3 -cutoff_len: 1024 +cutoff_len: 2048 max_samples: 1000 overwrite_cache: true preprocessing_num_workers: 16 diff --git a/examples/extras/pissa/llama3_lora_sft.yaml b/examples/extras/pissa/llama3_lora_sft.yaml index 6e368036..81fe45db 100644 --- a/examples/extras/pissa/llama3_lora_sft.yaml +++ b/examples/extras/pissa/llama3_lora_sft.yaml @@ -13,7 +13,7 @@ pissa_convert: true ### dataset dataset: identity,alpaca_en_demo template: llama3 -cutoff_len: 1024 +cutoff_len: 2048 max_samples: 1000 overwrite_cache: true preprocessing_num_workers: 16 diff --git a/examples/train_full/llama3_full_predict.yaml b/examples/train_full/llama3_full_predict.yaml index 5d2b6028..dcac4925 100644 --- a/examples/train_full/llama3_full_predict.yaml +++ b/examples/train_full/llama3_full_predict.yaml @@ -9,7 +9,7 @@ finetuning_type: full ### dataset eval_dataset: identity,alpaca_en_demo template: llama3 -cutoff_len: 1024 +cutoff_len: 2048 max_samples: 50 overwrite_cache: true preprocessing_num_workers: 16 diff --git a/examples/train_full/llama3_full_sft_ds3.yaml b/examples/train_full/llama3_full_sft_ds3.yaml index c64596a1..e8b151b5 100644 --- a/examples/train_full/llama3_full_sft_ds3.yaml +++ b/examples/train_full/llama3_full_sft_ds3.yaml @@ -10,7 +10,7 @@ deepspeed: examples/deepspeed/ds_z3_config.json ### dataset dataset: identity,alpaca_en_demo template: llama3 -cutoff_len: 1024 +cutoff_len: 2048 max_samples: 1000 overwrite_cache: true preprocessing_num_workers: 16 diff --git a/examples/train_full/qwen2vl_full_sft.yaml b/examples/train_full/qwen2vl_full_sft.yaml index a6cd40fb..60057d42 100644 --- a/examples/train_full/qwen2vl_full_sft.yaml +++ b/examples/train_full/qwen2vl_full_sft.yaml @@ -10,7 +10,7 @@ deepspeed: examples/deepspeed/ds_z3_config.json ### dataset dataset: mllm_demo,identity template: qwen2_vl -cutoff_len: 1024 +cutoff_len: 2048 max_samples: 1000 overwrite_cache: true preprocessing_num_workers: 16 diff --git a/examples/train_lora/llama3_lora_dpo.yaml b/examples/train_lora/llama3_lora_dpo.yaml index d87c0669..db026608 100644 --- a/examples/train_lora/llama3_lora_dpo.yaml +++ b/examples/train_lora/llama3_lora_dpo.yaml @@ -12,7 +12,7 @@ pref_loss: sigmoid # choices: [sigmoid (dpo), orpo, simpo] ### dataset dataset: dpo_en_demo template: llama3 -cutoff_len: 1024 +cutoff_len: 2048 max_samples: 1000 overwrite_cache: true preprocessing_num_workers: 16 diff --git a/examples/train_lora/llama3_lora_kto.yaml b/examples/train_lora/llama3_lora_kto.yaml index 08208c25..2c21f752 100644 --- a/examples/train_lora/llama3_lora_kto.yaml +++ b/examples/train_lora/llama3_lora_kto.yaml @@ -11,7 +11,7 @@ pref_beta: 0.1 ### dataset dataset: kto_en_demo template: llama3 -cutoff_len: 1024 +cutoff_len: 2048 max_samples: 1000 overwrite_cache: true preprocessing_num_workers: 16 diff --git a/examples/train_lora/llama3_lora_ppo.yaml b/examples/train_lora/llama3_lora_ppo.yaml index 512e90ea..fc2dfb24 100644 --- a/examples/train_lora/llama3_lora_ppo.yaml +++ b/examples/train_lora/llama3_lora_ppo.yaml @@ -11,7 +11,7 @@ lora_target: all ### dataset dataset: identity,alpaca_en_demo template: llama3 -cutoff_len: 1024 +cutoff_len: 2048 max_samples: 1000 overwrite_cache: true preprocessing_num_workers: 16 diff --git a/examples/train_lora/llama3_lora_predict.yaml b/examples/train_lora/llama3_lora_predict.yaml index c406251e..f7119a8a 100644 --- a/examples/train_lora/llama3_lora_predict.yaml +++ b/examples/train_lora/llama3_lora_predict.yaml @@ -10,7 +10,7 @@ finetuning_type: lora ### dataset eval_dataset: identity,alpaca_en_demo template: llama3 -cutoff_len: 1024 +cutoff_len: 2048 max_samples: 50 overwrite_cache: true preprocessing_num_workers: 16 diff --git a/examples/train_lora/llama3_lora_pretrain.yaml b/examples/train_lora/llama3_lora_pretrain.yaml index 7e3ea06c..71f8603c 100644 --- a/examples/train_lora/llama3_lora_pretrain.yaml +++ b/examples/train_lora/llama3_lora_pretrain.yaml @@ -9,7 +9,7 @@ lora_target: all ### dataset dataset: c4_demo -cutoff_len: 1024 +cutoff_len: 2048 max_samples: 1000 overwrite_cache: true preprocessing_num_workers: 16 diff --git a/examples/train_lora/llama3_lora_reward.yaml b/examples/train_lora/llama3_lora_reward.yaml index 96c32238..b8450ef0 100644 --- a/examples/train_lora/llama3_lora_reward.yaml +++ b/examples/train_lora/llama3_lora_reward.yaml @@ -10,7 +10,7 @@ lora_target: all ### dataset dataset: dpo_en_demo template: llama3 -cutoff_len: 1024 +cutoff_len: 2048 max_samples: 1000 overwrite_cache: true preprocessing_num_workers: 16 diff --git a/examples/train_lora/llama3_lora_sft.yaml b/examples/train_lora/llama3_lora_sft.yaml index 55a8077e..8f998a69 100644 --- a/examples/train_lora/llama3_lora_sft.yaml +++ b/examples/train_lora/llama3_lora_sft.yaml @@ -10,7 +10,7 @@ lora_target: all ### dataset dataset: identity,alpaca_en_demo template: llama3 -cutoff_len: 1024 +cutoff_len: 2048 max_samples: 1000 overwrite_cache: true preprocessing_num_workers: 16 diff --git a/examples/train_lora/llama3_lora_sft_ds0.yaml b/examples/train_lora/llama3_lora_sft_ds0.yaml index f1442faa..d5cae8fd 100644 --- a/examples/train_lora/llama3_lora_sft_ds0.yaml +++ b/examples/train_lora/llama3_lora_sft_ds0.yaml @@ -11,7 +11,7 @@ deepspeed: examples/deepspeed/ds_z0_config.json ### dataset dataset: identity,alpaca_en_demo template: llama3 -cutoff_len: 1024 +cutoff_len: 2048 max_samples: 1000 overwrite_cache: true preprocessing_num_workers: 16 diff --git a/examples/train_lora/llama3_lora_sft_ds3.yaml b/examples/train_lora/llama3_lora_sft_ds3.yaml index 66e7007e..e13d1a76 100644 --- a/examples/train_lora/llama3_lora_sft_ds3.yaml +++ b/examples/train_lora/llama3_lora_sft_ds3.yaml @@ -11,7 +11,7 @@ deepspeed: examples/deepspeed/ds_z3_config.json ### dataset dataset: identity,alpaca_en_demo template: llama3 -cutoff_len: 1024 +cutoff_len: 2048 max_samples: 1000 overwrite_cache: true preprocessing_num_workers: 16 diff --git a/examples/train_lora/llama3_preprocess.yaml b/examples/train_lora/llama3_preprocess.yaml index 34bb9efc..c7344797 100644 --- a/examples/train_lora/llama3_preprocess.yaml +++ b/examples/train_lora/llama3_preprocess.yaml @@ -10,7 +10,7 @@ lora_target: all ### dataset dataset: identity,alpaca_en_demo template: llama3 -cutoff_len: 1024 +cutoff_len: 2048 max_samples: 1000 overwrite_cache: true preprocessing_num_workers: 16 diff --git a/examples/train_lora/llava1_5_lora_sft.yaml b/examples/train_lora/llava1_5_lora_sft.yaml index 00a2ebc0..1831153b 100644 --- a/examples/train_lora/llava1_5_lora_sft.yaml +++ b/examples/train_lora/llava1_5_lora_sft.yaml @@ -10,7 +10,7 @@ lora_target: all ### dataset dataset: mllm_demo template: llava -cutoff_len: 1024 +cutoff_len: 2048 max_samples: 1000 overwrite_cache: true preprocessing_num_workers: 16 diff --git a/examples/train_lora/qwen2vl_lora_dpo.yaml b/examples/train_lora/qwen2vl_lora_dpo.yaml index 4ff72cea..08d9d8bd 100644 --- a/examples/train_lora/qwen2vl_lora_dpo.yaml +++ b/examples/train_lora/qwen2vl_lora_dpo.yaml @@ -12,7 +12,7 @@ pref_loss: sigmoid # choices: [sigmoid (dpo), orpo, simpo] ### dataset dataset: rlhf_v template: qwen2_vl -cutoff_len: 1024 +cutoff_len: 2048 max_samples: 1000 overwrite_cache: true preprocessing_num_workers: 16 diff --git a/examples/train_lora/qwen2vl_lora_sft.yaml b/examples/train_lora/qwen2vl_lora_sft.yaml index c82a856a..c8a18db2 100644 --- a/examples/train_lora/qwen2vl_lora_sft.yaml +++ b/examples/train_lora/qwen2vl_lora_sft.yaml @@ -10,7 +10,7 @@ lora_target: all ### dataset dataset: mllm_demo,identity # video: mllm_video_demo template: qwen2_vl -cutoff_len: 1024 +cutoff_len: 2048 max_samples: 1000 overwrite_cache: true preprocessing_num_workers: 16 diff --git a/examples/train_qlora/llama3_lora_sft_aqlm.yaml b/examples/train_qlora/llama3_lora_sft_aqlm.yaml index 3519d46b..c426e310 100644 --- a/examples/train_qlora/llama3_lora_sft_aqlm.yaml +++ b/examples/train_qlora/llama3_lora_sft_aqlm.yaml @@ -10,7 +10,7 @@ lora_target: all ### dataset dataset: identity,alpaca_en_demo template: llama3 -cutoff_len: 1024 +cutoff_len: 2048 max_samples: 1000 overwrite_cache: true preprocessing_num_workers: 16 diff --git a/examples/train_qlora/llama3_lora_sft_awq.yaml b/examples/train_qlora/llama3_lora_sft_awq.yaml index df48669b..09ab19a4 100644 --- a/examples/train_qlora/llama3_lora_sft_awq.yaml +++ b/examples/train_qlora/llama3_lora_sft_awq.yaml @@ -10,7 +10,7 @@ lora_target: all ### dataset dataset: identity,alpaca_en_demo template: llama3 -cutoff_len: 1024 +cutoff_len: 2048 max_samples: 1000 overwrite_cache: true preprocessing_num_workers: 16 diff --git a/examples/train_qlora/llama3_lora_sft_gptq.yaml b/examples/train_qlora/llama3_lora_sft_gptq.yaml index 61fa9bb4..9b282a76 100644 --- a/examples/train_qlora/llama3_lora_sft_gptq.yaml +++ b/examples/train_qlora/llama3_lora_sft_gptq.yaml @@ -10,7 +10,7 @@ lora_target: all ### dataset dataset: identity,alpaca_en_demo template: llama3 -cutoff_len: 1024 +cutoff_len: 2048 max_samples: 1000 overwrite_cache: true preprocessing_num_workers: 16 diff --git a/examples/train_qlora/llama3_lora_sft_otfq.yaml b/examples/train_qlora/llama3_lora_sft_otfq.yaml index 80a05768..ed6b4748 100644 --- a/examples/train_qlora/llama3_lora_sft_otfq.yaml +++ b/examples/train_qlora/llama3_lora_sft_otfq.yaml @@ -12,7 +12,7 @@ lora_target: all ### dataset dataset: identity,alpaca_en_demo template: llama3 -cutoff_len: 1024 +cutoff_len: 2048 max_samples: 1000 overwrite_cache: true preprocessing_num_workers: 16 diff --git a/setup.py b/setup.py index 2fd7832d..74b6f73b 100644 --- a/setup.py +++ b/setup.py @@ -54,7 +54,7 @@ extra_require = { "gptq": ["optimum>=1.17.0", "auto-gptq>=0.5.0"], "awq": ["autoawq"], "aqlm": ["aqlm[gpu]>=1.1.0"], - "vllm": ["vllm>=0.4.3,<=0.6.3"], + "vllm": ["vllm>=0.4.3,<0.6.4"], "galore": ["galore-torch"], "badam": ["badam>=1.2.1"], "adam-mini": ["adam-mini"], diff --git a/src/llamafactory/chat/vllm_engine.py b/src/llamafactory/chat/vllm_engine.py index 21f09a58..f1a6e9d8 100644 --- a/src/llamafactory/chat/vllm_engine.py +++ b/src/llamafactory/chat/vllm_engine.py @@ -83,6 +83,7 @@ class VllmEngine(BaseEngine): "enable_lora": model_args.adapter_name_or_path is not None, "max_lora_rank": model_args.vllm_max_lora_rank, } + engine_args.update(model_args.vllm_config) if getattr(config, "is_yi_vl_derived_model", None): import vllm.model_executor.models.llava @@ -173,7 +174,7 @@ class VllmEngine(BaseEngine): multi_modal_data = None result_generator = self.model.generate( - inputs={"prompt_token_ids": prompt_ids, "multi_modal_data": multi_modal_data}, + {"prompt_token_ids": prompt_ids, "multi_modal_data": multi_modal_data}, sampling_params=sampling_params, request_id=request_id, lora_request=self.lora_request, diff --git a/src/llamafactory/hparams/data_args.py b/src/llamafactory/hparams/data_args.py index 7c89c016..2d7e30df 100644 --- a/src/llamafactory/hparams/data_args.py +++ b/src/llamafactory/hparams/data_args.py @@ -46,7 +46,7 @@ class DataArguments: metadata={"help": "Path to the folder containing the images or videos. Defaults to `dataset_dir`."}, ) cutoff_len: int = field( - default=1024, + default=2048, metadata={"help": "The cutoff length of the tokenized inputs in the dataset."}, ) train_on_prompt: bool = field( diff --git a/src/llamafactory/hparams/model_args.py b/src/llamafactory/hparams/model_args.py index 9847d707..2f980142 100644 --- a/src/llamafactory/hparams/model_args.py +++ b/src/llamafactory/hparams/model_args.py @@ -15,10 +15,12 @@ # See the License for the specific language governing permissions and # limitations under the License. +import json from dataclasses import dataclass, field, fields from typing import Any, Dict, Literal, Optional, Union import torch +from transformers.training_args import _convert_str_dict from typing_extensions import Self @@ -125,7 +127,7 @@ class VllmArguments: """ vllm_maxlen: int = field( - default=2048, + default=4096, metadata={"help": "Maximum sequence (prompt + response) length of the vLLM engine."}, ) vllm_gpu_util: float = field( @@ -140,6 +142,10 @@ class VllmArguments: default=32, metadata={"help": "Maximum rank of all LoRAs in the vLLM engine."}, ) + vllm_config: Optional[Union[dict, str]] = field( + default=None, + metadata={"help": "Config to initialize the vllm engine. Please use JSON strings."}, + ) @dataclass @@ -312,6 +318,9 @@ class ModelArguments(QuantizationArguments, ProcessorArguments, ExportArguments, if self.export_quantization_bit is not None and self.export_quantization_dataset is None: raise ValueError("Quantization dataset is necessary for exporting.") + if isinstance(self.vllm_config, str) and self.vllm_config.startswith("{"): + self.vllm_config = _convert_str_dict(json.loads(self.vllm_config)) + @classmethod def copyfrom(cls, source: "Self", **kwargs) -> "Self": init_args, lazy_args = {}, {} diff --git a/src/llamafactory/hparams/parser.py b/src/llamafactory/hparams/parser.py index 54310fbf..5bc16dac 100644 --- a/src/llamafactory/hparams/parser.py +++ b/src/llamafactory/hparams/parser.py @@ -122,7 +122,7 @@ def _check_extra_dependencies( require_version("mixture-of-depth>=1.1.6", "To fix: pip install mixture-of-depth>=1.1.6") if model_args.infer_backend == "vllm": - require_version("vllm>=0.4.3,<=0.6.3", "To fix: pip install vllm>=0.4.3,<=0.6.3") + require_version("vllm>=0.4.3,<0.6.4", "To fix: pip install vllm>=0.4.3,<0.6.4") if finetuning_args.use_galore: require_version("galore_torch", "To fix: pip install galore_torch") diff --git a/src/llamafactory/webui/components/train.py b/src/llamafactory/webui/components/train.py index 6766cbb0..bd53d163 100644 --- a/src/llamafactory/webui/components/train.py +++ b/src/llamafactory/webui/components/train.py @@ -68,7 +68,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]: ) with gr.Row(): - cutoff_len = gr.Slider(minimum=4, maximum=131072, value=1024, step=1) + cutoff_len = gr.Slider(minimum=4, maximum=131072, value=2048, step=1) batch_size = gr.Slider(minimum=1, maximum=1024, value=2, step=1) gradient_accumulation_steps = gr.Slider(minimum=1, maximum=1024, value=8, step=1) val_size = gr.Slider(minimum=0, maximum=1, value=0, step=0.001)