add vllm config

Former-commit-id: 58ab4579dc81a1dcea2bf5938ba3f3116cecfc76
This commit is contained in:
hiyouga 2024-11-10 21:28:18 +08:00
parent 1e00de38cb
commit 0d18cca0db
34 changed files with 44 additions and 34 deletions

View File

@ -10,7 +10,7 @@ use_adam_mini: true
### dataset ### dataset
dataset: identity,alpaca_en_demo dataset: identity,alpaca_en_demo
template: qwen template: qwen
cutoff_len: 1024 cutoff_len: 2048
max_samples: 1000 max_samples: 1000
overwrite_cache: true overwrite_cache: true
preprocessing_num_workers: 16 preprocessing_num_workers: 16

View File

@ -15,7 +15,7 @@ badam_verbose: 2
### dataset ### dataset
dataset: identity,alpaca_en_demo dataset: identity,alpaca_en_demo
template: llama3 template: llama3
cutoff_len: 1024 cutoff_len: 2048
max_samples: 1000 max_samples: 1000
overwrite_cache: true overwrite_cache: true
preprocessing_num_workers: 16 preprocessing_num_workers: 16

View File

@ -11,7 +11,7 @@ lora_target: all
### dataset ### dataset
dataset: identity,alpaca_en_demo dataset: identity,alpaca_en_demo
template: llama3 template: llama3
cutoff_len: 1024 cutoff_len: 2048
max_samples: 1000 max_samples: 1000
overwrite_cache: true overwrite_cache: true
preprocessing_num_workers: 16 preprocessing_num_workers: 16

View File

@ -14,7 +14,7 @@ galore_scale: 2.0
### dataset ### dataset
dataset: identity,alpaca_en_demo dataset: identity,alpaca_en_demo
template: llama3 template: llama3
cutoff_len: 1024 cutoff_len: 2048
max_samples: 1000 max_samples: 1000
overwrite_cache: true overwrite_cache: true
preprocessing_num_workers: 16 preprocessing_num_workers: 16

View File

@ -12,7 +12,7 @@ use_llama_pro: true
### dataset ### dataset
dataset: identity,alpaca_en_demo dataset: identity,alpaca_en_demo
template: llama3 template: llama3
cutoff_len: 1024 cutoff_len: 2048
max_samples: 1000 max_samples: 1000
overwrite_cache: true overwrite_cache: true
preprocessing_num_workers: 16 preprocessing_num_workers: 16

View File

@ -11,7 +11,7 @@ loraplus_lr_ratio: 16.0
### dataset ### dataset
dataset: identity,alpaca_en_demo dataset: identity,alpaca_en_demo
template: llama3 template: llama3
cutoff_len: 1024 cutoff_len: 2048
max_samples: 1000 max_samples: 1000
overwrite_cache: true overwrite_cache: true
preprocessing_num_workers: 16 preprocessing_num_workers: 16

View File

@ -10,7 +10,7 @@ mixture_of_depths: convert
### dataset ### dataset
dataset: identity,alpaca_en_demo dataset: identity,alpaca_en_demo
template: llama3 template: llama3
cutoff_len: 1024 cutoff_len: 2048
max_samples: 1000 max_samples: 1000
overwrite_cache: true overwrite_cache: true
preprocessing_num_workers: 16 preprocessing_num_workers: 16

View File

@ -13,7 +13,7 @@ pissa_convert: true
### dataset ### dataset
dataset: identity,alpaca_en_demo dataset: identity,alpaca_en_demo
template: llama3 template: llama3
cutoff_len: 1024 cutoff_len: 2048
max_samples: 1000 max_samples: 1000
overwrite_cache: true overwrite_cache: true
preprocessing_num_workers: 16 preprocessing_num_workers: 16

View File

@ -9,7 +9,7 @@ finetuning_type: full
### dataset ### dataset
eval_dataset: identity,alpaca_en_demo eval_dataset: identity,alpaca_en_demo
template: llama3 template: llama3
cutoff_len: 1024 cutoff_len: 2048
max_samples: 50 max_samples: 50
overwrite_cache: true overwrite_cache: true
preprocessing_num_workers: 16 preprocessing_num_workers: 16

View File

@ -10,7 +10,7 @@ deepspeed: examples/deepspeed/ds_z3_config.json
### dataset ### dataset
dataset: identity,alpaca_en_demo dataset: identity,alpaca_en_demo
template: llama3 template: llama3
cutoff_len: 1024 cutoff_len: 2048
max_samples: 1000 max_samples: 1000
overwrite_cache: true overwrite_cache: true
preprocessing_num_workers: 16 preprocessing_num_workers: 16

View File

@ -10,7 +10,7 @@ deepspeed: examples/deepspeed/ds_z3_config.json
### dataset ### dataset
dataset: mllm_demo,identity dataset: mllm_demo,identity
template: qwen2_vl template: qwen2_vl
cutoff_len: 1024 cutoff_len: 2048
max_samples: 1000 max_samples: 1000
overwrite_cache: true overwrite_cache: true
preprocessing_num_workers: 16 preprocessing_num_workers: 16

View File

@ -12,7 +12,7 @@ pref_loss: sigmoid # choices: [sigmoid (dpo), orpo, simpo]
### dataset ### dataset
dataset: dpo_en_demo dataset: dpo_en_demo
template: llama3 template: llama3
cutoff_len: 1024 cutoff_len: 2048
max_samples: 1000 max_samples: 1000
overwrite_cache: true overwrite_cache: true
preprocessing_num_workers: 16 preprocessing_num_workers: 16

View File

@ -11,7 +11,7 @@ pref_beta: 0.1
### dataset ### dataset
dataset: kto_en_demo dataset: kto_en_demo
template: llama3 template: llama3
cutoff_len: 1024 cutoff_len: 2048
max_samples: 1000 max_samples: 1000
overwrite_cache: true overwrite_cache: true
preprocessing_num_workers: 16 preprocessing_num_workers: 16

View File

@ -11,7 +11,7 @@ lora_target: all
### dataset ### dataset
dataset: identity,alpaca_en_demo dataset: identity,alpaca_en_demo
template: llama3 template: llama3
cutoff_len: 1024 cutoff_len: 2048
max_samples: 1000 max_samples: 1000
overwrite_cache: true overwrite_cache: true
preprocessing_num_workers: 16 preprocessing_num_workers: 16

View File

@ -10,7 +10,7 @@ finetuning_type: lora
### dataset ### dataset
eval_dataset: identity,alpaca_en_demo eval_dataset: identity,alpaca_en_demo
template: llama3 template: llama3
cutoff_len: 1024 cutoff_len: 2048
max_samples: 50 max_samples: 50
overwrite_cache: true overwrite_cache: true
preprocessing_num_workers: 16 preprocessing_num_workers: 16

View File

@ -9,7 +9,7 @@ lora_target: all
### dataset ### dataset
dataset: c4_demo dataset: c4_demo
cutoff_len: 1024 cutoff_len: 2048
max_samples: 1000 max_samples: 1000
overwrite_cache: true overwrite_cache: true
preprocessing_num_workers: 16 preprocessing_num_workers: 16

View File

@ -10,7 +10,7 @@ lora_target: all
### dataset ### dataset
dataset: dpo_en_demo dataset: dpo_en_demo
template: llama3 template: llama3
cutoff_len: 1024 cutoff_len: 2048
max_samples: 1000 max_samples: 1000
overwrite_cache: true overwrite_cache: true
preprocessing_num_workers: 16 preprocessing_num_workers: 16

View File

@ -10,7 +10,7 @@ lora_target: all
### dataset ### dataset
dataset: identity,alpaca_en_demo dataset: identity,alpaca_en_demo
template: llama3 template: llama3
cutoff_len: 1024 cutoff_len: 2048
max_samples: 1000 max_samples: 1000
overwrite_cache: true overwrite_cache: true
preprocessing_num_workers: 16 preprocessing_num_workers: 16

View File

@ -11,7 +11,7 @@ deepspeed: examples/deepspeed/ds_z0_config.json
### dataset ### dataset
dataset: identity,alpaca_en_demo dataset: identity,alpaca_en_demo
template: llama3 template: llama3
cutoff_len: 1024 cutoff_len: 2048
max_samples: 1000 max_samples: 1000
overwrite_cache: true overwrite_cache: true
preprocessing_num_workers: 16 preprocessing_num_workers: 16

View File

@ -11,7 +11,7 @@ deepspeed: examples/deepspeed/ds_z3_config.json
### dataset ### dataset
dataset: identity,alpaca_en_demo dataset: identity,alpaca_en_demo
template: llama3 template: llama3
cutoff_len: 1024 cutoff_len: 2048
max_samples: 1000 max_samples: 1000
overwrite_cache: true overwrite_cache: true
preprocessing_num_workers: 16 preprocessing_num_workers: 16

View File

@ -10,7 +10,7 @@ lora_target: all
### dataset ### dataset
dataset: identity,alpaca_en_demo dataset: identity,alpaca_en_demo
template: llama3 template: llama3
cutoff_len: 1024 cutoff_len: 2048
max_samples: 1000 max_samples: 1000
overwrite_cache: true overwrite_cache: true
preprocessing_num_workers: 16 preprocessing_num_workers: 16

View File

@ -10,7 +10,7 @@ lora_target: all
### dataset ### dataset
dataset: mllm_demo dataset: mllm_demo
template: llava template: llava
cutoff_len: 1024 cutoff_len: 2048
max_samples: 1000 max_samples: 1000
overwrite_cache: true overwrite_cache: true
preprocessing_num_workers: 16 preprocessing_num_workers: 16

View File

@ -12,7 +12,7 @@ pref_loss: sigmoid # choices: [sigmoid (dpo), orpo, simpo]
### dataset ### dataset
dataset: rlhf_v dataset: rlhf_v
template: qwen2_vl template: qwen2_vl
cutoff_len: 1024 cutoff_len: 2048
max_samples: 1000 max_samples: 1000
overwrite_cache: true overwrite_cache: true
preprocessing_num_workers: 16 preprocessing_num_workers: 16

View File

@ -10,7 +10,7 @@ lora_target: all
### dataset ### dataset
dataset: mllm_demo,identity # video: mllm_video_demo dataset: mllm_demo,identity # video: mllm_video_demo
template: qwen2_vl template: qwen2_vl
cutoff_len: 1024 cutoff_len: 2048
max_samples: 1000 max_samples: 1000
overwrite_cache: true overwrite_cache: true
preprocessing_num_workers: 16 preprocessing_num_workers: 16

View File

@ -10,7 +10,7 @@ lora_target: all
### dataset ### dataset
dataset: identity,alpaca_en_demo dataset: identity,alpaca_en_demo
template: llama3 template: llama3
cutoff_len: 1024 cutoff_len: 2048
max_samples: 1000 max_samples: 1000
overwrite_cache: true overwrite_cache: true
preprocessing_num_workers: 16 preprocessing_num_workers: 16

View File

@ -10,7 +10,7 @@ lora_target: all
### dataset ### dataset
dataset: identity,alpaca_en_demo dataset: identity,alpaca_en_demo
template: llama3 template: llama3
cutoff_len: 1024 cutoff_len: 2048
max_samples: 1000 max_samples: 1000
overwrite_cache: true overwrite_cache: true
preprocessing_num_workers: 16 preprocessing_num_workers: 16

View File

@ -10,7 +10,7 @@ lora_target: all
### dataset ### dataset
dataset: identity,alpaca_en_demo dataset: identity,alpaca_en_demo
template: llama3 template: llama3
cutoff_len: 1024 cutoff_len: 2048
max_samples: 1000 max_samples: 1000
overwrite_cache: true overwrite_cache: true
preprocessing_num_workers: 16 preprocessing_num_workers: 16

View File

@ -12,7 +12,7 @@ lora_target: all
### dataset ### dataset
dataset: identity,alpaca_en_demo dataset: identity,alpaca_en_demo
template: llama3 template: llama3
cutoff_len: 1024 cutoff_len: 2048
max_samples: 1000 max_samples: 1000
overwrite_cache: true overwrite_cache: true
preprocessing_num_workers: 16 preprocessing_num_workers: 16

View File

@ -54,7 +54,7 @@ extra_require = {
"gptq": ["optimum>=1.17.0", "auto-gptq>=0.5.0"], "gptq": ["optimum>=1.17.0", "auto-gptq>=0.5.0"],
"awq": ["autoawq"], "awq": ["autoawq"],
"aqlm": ["aqlm[gpu]>=1.1.0"], "aqlm": ["aqlm[gpu]>=1.1.0"],
"vllm": ["vllm>=0.4.3,<=0.6.3"], "vllm": ["vllm>=0.4.3,<0.6.4"],
"galore": ["galore-torch"], "galore": ["galore-torch"],
"badam": ["badam>=1.2.1"], "badam": ["badam>=1.2.1"],
"adam-mini": ["adam-mini"], "adam-mini": ["adam-mini"],

View File

@ -83,6 +83,7 @@ class VllmEngine(BaseEngine):
"enable_lora": model_args.adapter_name_or_path is not None, "enable_lora": model_args.adapter_name_or_path is not None,
"max_lora_rank": model_args.vllm_max_lora_rank, "max_lora_rank": model_args.vllm_max_lora_rank,
} }
engine_args.update(model_args.vllm_config)
if getattr(config, "is_yi_vl_derived_model", None): if getattr(config, "is_yi_vl_derived_model", None):
import vllm.model_executor.models.llava import vllm.model_executor.models.llava
@ -173,7 +174,7 @@ class VllmEngine(BaseEngine):
multi_modal_data = None multi_modal_data = None
result_generator = self.model.generate( result_generator = self.model.generate(
inputs={"prompt_token_ids": prompt_ids, "multi_modal_data": multi_modal_data}, {"prompt_token_ids": prompt_ids, "multi_modal_data": multi_modal_data},
sampling_params=sampling_params, sampling_params=sampling_params,
request_id=request_id, request_id=request_id,
lora_request=self.lora_request, lora_request=self.lora_request,

View File

@ -46,7 +46,7 @@ class DataArguments:
metadata={"help": "Path to the folder containing the images or videos. Defaults to `dataset_dir`."}, metadata={"help": "Path to the folder containing the images or videos. Defaults to `dataset_dir`."},
) )
cutoff_len: int = field( cutoff_len: int = field(
default=1024, default=2048,
metadata={"help": "The cutoff length of the tokenized inputs in the dataset."}, metadata={"help": "The cutoff length of the tokenized inputs in the dataset."},
) )
train_on_prompt: bool = field( train_on_prompt: bool = field(

View File

@ -15,10 +15,12 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import json
from dataclasses import dataclass, field, fields from dataclasses import dataclass, field, fields
from typing import Any, Dict, Literal, Optional, Union from typing import Any, Dict, Literal, Optional, Union
import torch import torch
from transformers.training_args import _convert_str_dict
from typing_extensions import Self from typing_extensions import Self
@ -125,7 +127,7 @@ class VllmArguments:
""" """
vllm_maxlen: int = field( vllm_maxlen: int = field(
default=2048, default=4096,
metadata={"help": "Maximum sequence (prompt + response) length of the vLLM engine."}, metadata={"help": "Maximum sequence (prompt + response) length of the vLLM engine."},
) )
vllm_gpu_util: float = field( vllm_gpu_util: float = field(
@ -140,6 +142,10 @@ class VllmArguments:
default=32, default=32,
metadata={"help": "Maximum rank of all LoRAs in the vLLM engine."}, metadata={"help": "Maximum rank of all LoRAs in the vLLM engine."},
) )
vllm_config: Optional[Union[dict, str]] = field(
default=None,
metadata={"help": "Config to initialize the vllm engine. Please use JSON strings."},
)
@dataclass @dataclass
@ -312,6 +318,9 @@ class ModelArguments(QuantizationArguments, ProcessorArguments, ExportArguments,
if self.export_quantization_bit is not None and self.export_quantization_dataset is None: if self.export_quantization_bit is not None and self.export_quantization_dataset is None:
raise ValueError("Quantization dataset is necessary for exporting.") raise ValueError("Quantization dataset is necessary for exporting.")
if isinstance(self.vllm_config, str) and self.vllm_config.startswith("{"):
self.vllm_config = _convert_str_dict(json.loads(self.vllm_config))
@classmethod @classmethod
def copyfrom(cls, source: "Self", **kwargs) -> "Self": def copyfrom(cls, source: "Self", **kwargs) -> "Self":
init_args, lazy_args = {}, {} init_args, lazy_args = {}, {}

View File

@ -122,7 +122,7 @@ def _check_extra_dependencies(
require_version("mixture-of-depth>=1.1.6", "To fix: pip install mixture-of-depth>=1.1.6") require_version("mixture-of-depth>=1.1.6", "To fix: pip install mixture-of-depth>=1.1.6")
if model_args.infer_backend == "vllm": if model_args.infer_backend == "vllm":
require_version("vllm>=0.4.3,<=0.6.3", "To fix: pip install vllm>=0.4.3,<=0.6.3") require_version("vllm>=0.4.3,<0.6.4", "To fix: pip install vllm>=0.4.3,<0.6.4")
if finetuning_args.use_galore: if finetuning_args.use_galore:
require_version("galore_torch", "To fix: pip install galore_torch") require_version("galore_torch", "To fix: pip install galore_torch")

View File

@ -68,7 +68,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
) )
with gr.Row(): with gr.Row():
cutoff_len = gr.Slider(minimum=4, maximum=131072, value=1024, step=1) cutoff_len = gr.Slider(minimum=4, maximum=131072, value=2048, step=1)
batch_size = gr.Slider(minimum=1, maximum=1024, value=2, step=1) batch_size = gr.Slider(minimum=1, maximum=1024, value=2, step=1)
gradient_accumulation_steps = gr.Slider(minimum=1, maximum=1024, value=8, step=1) gradient_accumulation_steps = gr.Slider(minimum=1, maximum=1024, value=8, step=1)
val_size = gr.Slider(minimum=0, maximum=1, value=0, step=0.001) val_size = gr.Slider(minimum=0, maximum=1, value=0, step=0.001)