diff --git a/.gitignore b/.gitignore index 603806e0..f3307c4f 100644 --- a/.gitignore +++ b/.gitignore @@ -176,3 +176,4 @@ output/ wandb/ swanlog/ generated_predictions.jsonl +predictions_score.json diff --git a/README.md b/README.md index 06e9335a..710b46cd 100644 --- a/README.md +++ b/README.md @@ -233,6 +233,9 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ +> [!NOTE] +> If you cannot use the latest feature, please pull the latest code and install LLaMA-Factory again. + ## Supported Models | Model | Model size | Template | @@ -463,7 +466,7 @@ cd LLaMA-Factory pip install -e ".[torch,metrics]" ``` -Extra dependencies available: torch, torch-npu, metrics, deepspeed, liger-kernel, bitsandbytes, hqq, eetq, gptq, awq, aqlm, vllm, sglang, galore, apollo, badam, adam-mini, qwen, minicpm_v, modelscope, openmind, swanlab, quality +Extra dependencies available: torch, torch-npu, metrics, deepspeed, liger-kernel, bitsandbytes, hqq, eetq, gptq, aqlm, vllm, sglang, galore, apollo, badam, adam-mini, qwen, minicpm_v, modelscope, openmind, swanlab, quality > [!TIP] > Use `pip install --no-deps -e .` to resolve package conflicts. diff --git a/README_zh.md b/README_zh.md index e9faa49b..bba64696 100644 --- a/README_zh.md +++ b/README_zh.md @@ -236,6 +236,9 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc +> [!NOTE] +> 如果您无法使用最新的功能,请尝试重新拉取代码并再次安装 LLaMA-Factory。 + ## 模型 | 模型名 | 参数量 | Template | @@ -466,7 +469,7 @@ cd LLaMA-Factory pip install -e ".[torch,metrics]" ``` -可选的额外依赖项:torch、torch-npu、metrics、deepspeed、liger-kernel、bitsandbytes、hqq、eetq、gptq、awq、aqlm、vllm、sglang、galore、apollo、badam、adam-mini、qwen、minicpm_v、modelscope、openmind、swanlab、quality +可选的额外依赖项:torch、torch-npu、metrics、deepspeed、liger-kernel、bitsandbytes、hqq、eetq、gptq、aqlm、vllm、sglang、galore、apollo、badam、adam-mini、qwen、minicpm_v、modelscope、openmind、swanlab、quality > [!TIP] > 遇到包冲突时,可使用 `pip install --no-deps -e .` 解决。 diff --git a/assets/wechat.jpg b/assets/wechat.jpg index d743ca53..b5a225e0 100644 Binary files a/assets/wechat.jpg and b/assets/wechat.jpg differ diff --git a/assets/wechat_npu.jpg b/assets/wechat_npu.jpg index 91b7c3e4..45bbc99b 100644 Binary files a/assets/wechat_npu.jpg and b/assets/wechat_npu.jpg differ diff --git a/examples/README.md b/examples/README.md index d4bdf1e3..281e69bc 100644 --- a/examples/README.md +++ b/examples/README.md @@ -195,10 +195,11 @@ llamafactory-cli export examples/merge_lora/llama3_full_sft.yaml ### Inferring LoRA Fine-Tuned Models -#### Batch Generation using vLLM Tensor Parallel +#### Evaluation using vLLM's Multi-GPU Inference ``` -python scripts/vllm_infer.py --model_name_or_path path_to_merged_model --dataset alpaca_en_demo +python scripts/vllm_infer.py --model_name_or_path meta-llama/Meta-Llama-3-8B-Instruct --template llama3 --dataset alpaca_en_demo +python scripts/eval_bleu_rouge.py generated_predictions.jsonl ``` #### Use CLI ChatBox @@ -281,9 +282,3 @@ llamafactory-cli train examples/extras/llama_pro/llama3_freeze_sft.yaml ```bash bash examples/extras/fsdp_qlora/train.sh ``` - -#### Computing BLEU and ROUGE Scores - -```bash -llamafactory-cli train examples/extras/nlg_eval/llama3_lora_predict.yaml -``` diff --git a/examples/README_zh.md b/examples/README_zh.md index 727d6593..3035dd15 100644 --- a/examples/README_zh.md +++ b/examples/README_zh.md @@ -195,10 +195,11 @@ llamafactory-cli export examples/merge_lora/llama3_full_sft.yaml ### 推理 LoRA 模型 -#### 使用 vLLM+TP 批量推理 +#### 使用 vLLM 多卡推理评估 ``` -python scripts/vllm_infer.py --model_name_or_path path_to_merged_model --dataset alpaca_en_demo +python scripts/vllm_infer.py --model_name_or_path meta-llama/Meta-Llama-3-8B-Instruct --template llama3 --dataset alpaca_en_demo +python scripts/eval_bleu_rouge.py generated_predictions.jsonl ``` #### 使用命令行对话框 @@ -281,9 +282,3 @@ llamafactory-cli train examples/extras/llama_pro/llama3_freeze_sft.yaml ```bash bash examples/extras/fsdp_qlora/train.sh ``` - -#### 计算 BLEU 和 ROUGE 分数 - -```bash -llamafactory-cli train examples/extras/nlg_eval/llama3_lora_predict.yaml -``` diff --git a/pyproject.toml b/pyproject.toml index 2585539b..de9617ad 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -88,6 +88,14 @@ conflicts = [ { extra = "torch-npu" }, { extra = "vllm" }, ], + [ + { extra = "torch-npu" }, + { extra = "sglang" }, + ], + [ + { extra = "vllm" }, + { extra = "sglang" }, + ], [ { extra = "sglang" }, { extra = "minicpm_v" }, diff --git a/setup.py b/setup.py index e00edb3d..2561d4a4 100644 --- a/setup.py +++ b/setup.py @@ -51,9 +51,8 @@ extra_require = { "hqq": ["hqq"], "eetq": ["eetq"], "gptq": ["optimum>=1.17.0", "auto-gptq>=0.5.0"], - "awq": ["autoawq"], "aqlm": ["aqlm[gpu]>=1.1.0"], - "vllm": ["vllm>=0.4.3,<=0.8.4"], + "vllm": ["vllm>=0.4.3,<=0.8.5"], "sglang": ["sglang[srt]>=0.4.5", "transformers==4.51.1"], "galore": ["galore-torch"], "apollo": ["apollo-torch"], diff --git a/src/llamafactory/hparams/parser.py b/src/llamafactory/hparams/parser.py index cfe71498..539b9152 100644 --- a/src/llamafactory/hparams/parser.py +++ b/src/llamafactory/hparams/parser.py @@ -148,10 +148,10 @@ def _check_extra_dependencies( check_version("mixture-of-depth>=1.1.6", mandatory=True) if model_args.infer_backend == EngineName.VLLM: - check_version("vllm>=0.4.3,<=0.8.4") + check_version("vllm>=0.4.3,<=0.8.5") check_version("vllm", mandatory=True) elif model_args.infer_backend == EngineName.SGLANG: - check_version("sglang>=0.4.4") + check_version("sglang>=0.4.5") check_version("sglang", mandatory=True) if finetuning_args.use_galore: