diff --git a/requirements.txt b/requirements.txt index fe60a183..67982a3a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,16 +3,14 @@ datasets>=2.16.0,<=3.5.0 accelerate>=0.34.0,<=1.6.0 peft>=0.14.0,<=0.15.1 trl>=0.8.6,<=0.9.6 -tokenizers>=0.19.0,<=0.21.0 -gradio>=4.38.0,<=5.21.0 -pandas>=2.0.0 +tokenizers>=0.19.0,<=0.21.1 +gradio>=4.38.0,<=5.25.0 scipy einops sentencepiece tiktoken protobuf uvicorn -pydantic fastapi sse-starlette matplotlib>=3.7.0 @@ -21,6 +19,7 @@ packaging pyyaml numpy<2.0.0 pydantic<=2.10.6 +pandas>=2.0.0 av librosa tyro<0.9.0 diff --git a/src/llamafactory/cli.py b/src/llamafactory/cli.py index 075ef845..7e7e06cf 100644 --- a/src/llamafactory/cli.py +++ b/src/llamafactory/cli.py @@ -15,6 +15,7 @@ import os import subprocess import sys +from copy import deepcopy from enum import Enum, unique from . import launcher @@ -96,6 +97,13 @@ def main(): if int(nnodes) > 1: print(f"Multi-node training enabled: num nodes: {nnodes}, node rank: {node_rank}") + env = deepcopy(os.environ) + if is_env_enabled("OPTIM_TORCH", "1"): + # optimize DDP, see https://zhuanlan.zhihu.com/p/671834539 + env["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True" + env["TORCH_NCCL_AVOID_RECORD_STREAMS"] = "1" + + # NOTE: DO NOT USE shell=True to avoid security risk process = subprocess.run( ( "torchrun --nnodes {nnodes} --node_rank {node_rank} --nproc_per_node {nproc_per_node} " @@ -110,7 +118,9 @@ def main(): file_name=launcher.__file__, args=" ".join(sys.argv[1:]), ) - .split() + .split(), + env=env, + check=True, ) sys.exit(process.returncode) else: diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py index 9291968f..e10b6b7f 100644 --- a/src/llamafactory/extras/constants.py +++ b/src/llamafactory/extras/constants.py @@ -727,23 +727,23 @@ register_model_group( }, "GLM-4-9B-Chat-0414": { DownloadSource.DEFAULT: "THUDM/GLM-4-9B-Chat-0414", - DownloadSource.MODELSCOPE: "ZhipuAI/GLM-4-9B-Chat-0414" , + DownloadSource.MODELSCOPE: "ZhipuAI/GLM-4-9B-Chat-0414", }, "GLM-4-32B-0414": { DownloadSource.DEFAULT: "THUDM/GLM-4-32B-0414", - DownloadSource.MODELSCOPE: "ZhipuAI/GLM-4-32B-0414" , + DownloadSource.MODELSCOPE: "ZhipuAI/GLM-4-32B-0414", }, "GLM-4-32B-Chat-0414": { DownloadSource.DEFAULT: "THUDM/GLM-4-32B-Chat-0414", - DownloadSource.MODELSCOPE: "ZhipuAI/GLM-4-32B-Chat-0414" , + DownloadSource.MODELSCOPE: "ZhipuAI/GLM-4-32B-Chat-0414", }, "GLM-4-Z1-9B-Chat-0414": { DownloadSource.DEFAULT: "THUDM/GLM-4-Z1-9B-0414", - DownloadSource.MODELSCOPE: "ZhipuAI/GLM-4-Z1-9B-0414" , + DownloadSource.MODELSCOPE: "ZhipuAI/GLM-4-Z1-9B-0414", }, "GLM-4-Z1-32B-Chat-0414": { DownloadSource.DEFAULT: "THUDM/GLM-4-Z1-32B-0414", - DownloadSource.MODELSCOPE: "ZhipuAI/GLM-4-Z1-32B-0414" , + DownloadSource.MODELSCOPE: "ZhipuAI/GLM-4-Z1-32B-0414", }, }, template="glm4", diff --git a/src/llamafactory/hparams/parser.py b/src/llamafactory/hparams/parser.py index 1b9d7153..5be79ed1 100644 --- a/src/llamafactory/hparams/parser.py +++ b/src/llamafactory/hparams/parser.py @@ -390,8 +390,10 @@ def get_train_args(args: Optional[Union[dict[str, Any], list[str]]] = None) -> _ def get_infer_args(args: Optional[Union[dict[str, Any], list[str]]] = None) -> _INFER_CLS: model_args, data_args, finetuning_args, generating_args = _parse_infer_args(args) + # Setup logging _set_transformers_logging() + # Check arguments if model_args.infer_backend == "vllm": if finetuning_args.stage != "sft": raise ValueError("vLLM engine only supports auto-regressive models.") @@ -408,6 +410,7 @@ def get_infer_args(args: Optional[Union[dict[str, Any], list[str]]] = None) -> _ _verify_model_args(model_args, data_args, finetuning_args) _check_extra_dependencies(model_args, finetuning_args) + # Post-process model arguments if model_args.export_dir is not None and model_args.export_device == "cpu": model_args.device_map = {"": torch.device("cpu")} if data_args.cutoff_len != DataArguments().cutoff_len: # override cutoff_len if it is not default @@ -421,8 +424,10 @@ def get_infer_args(args: Optional[Union[dict[str, Any], list[str]]] = None) -> _ def get_eval_args(args: Optional[Union[dict[str, Any], list[str]]] = None) -> _EVAL_CLS: model_args, data_args, eval_args, finetuning_args = _parse_eval_args(args) + # Setup logging _set_transformers_logging() + # Check arguments if model_args.infer_backend == "vllm": raise ValueError("vLLM backend is only available for API, CLI and Web.") diff --git a/src/llamafactory/model/patcher.py b/src/llamafactory/model/patcher.py index de0bd299..8be0e7bd 100644 --- a/src/llamafactory/model/patcher.py +++ b/src/llamafactory/model/patcher.py @@ -96,6 +96,7 @@ def patch_config( model_args.compute_dtype = infer_optim_dtype(model_dtype=getattr(config, "torch_dtype", None)) if is_torch_npu_available(): + # avoid JIT compile on NPU devices, see https://zhuanlan.zhihu.com/p/660875458 torch.npu.set_compile_mode(jit_compile=is_env_enabled("JIT_COMPILE")) configure_attn_implementation(config, model_args, is_trainable) diff --git a/src/llamafactory/webui/runner.py b/src/llamafactory/webui/runner.py index 8f9176d2..923a7105 100644 --- a/src/llamafactory/webui/runner.py +++ b/src/llamafactory/webui/runner.py @@ -368,6 +368,7 @@ class Runner: if args.get("deepspeed", None) is not None: env["FORCE_TORCHRUN"] = "1" + # NOTE: DO NOT USE shell=True to avoid security risk self.trainer = Popen(["llamafactory-cli", "train", save_cmd(args)], env=env) yield from self.monitor()