From 95ac3f2373b82662c1bd855c284d3379e6a763d3 Mon Sep 17 00:00:00 2001 From: Yaowei Zheng Date: Wed, 31 Dec 2025 22:22:40 +0800 Subject: [PATCH] [release] Bye 2025 (#9702) --- .github/workflows/tests.yml | 2 +- README.md | 12 ++-- README_zh.md | 12 ++-- examples/README.md | 70 ++++++++----------- examples/README_zh.md | 70 ++++++++----------- examples/inference/llama3_lora_sft.yaml | 5 -- .../inference/{qwen2_5vl.yaml => qwen3.yaml} | 4 +- ...ama3_full_sft.yaml => qwen3_full_sft.yaml} | 4 +- examples/inference/qwen3_lora_sft.yaml | 5 ++ .../inference/{llama3.yaml => qwen3vl.yaml} | 4 +- .../infer_lora}/deepseek2_lora_sft_kt.yaml | 0 .../infer_lora}/deepseek3_kt.yaml | 0 .../infer_lora}/deepseek3_lora_sft_kt.yaml | 0 .../infer_lora}/qwen3moe_lora_sft_kt.yaml | 0 .../DeepSeek-V2-Chat-sft-amx.yaml | 0 .../kt_optimize_rules/DeepSeek-V2-Chat.yaml | 0 ...epSeek-V2-Lite-Chat-sft-amx-multi-gpu.yaml | 0 .../DeepSeek-V2-Lite-Chat-sft-amx.yaml | 0 .../DeepSeek-V2-Lite-Chat-sft.yaml | 0 .../DeepSeek-V2-Lite-Chat.yaml | 0 .../DeepSeek-V3-Chat-amx.yaml | 0 .../DeepSeek-V3-Chat-sft-amx-multi-gpu-4.yaml | 0 .../DeepSeek-V3-Chat-sft-amx-multi-gpu.yaml | 0 .../DeepSeek-V3-Chat-sft-amx.yaml | 0 .../kt_optimize_rules/Qwen3Moe-sft-amx.yaml | 0 .../train_lora/deepseek2_lora_sft_kt.yaml | 0 .../train_lora/deepseek3_lora_sft_kt.yaml | 0 .../train_lora/qwen3moe_lora_sft_kt.yaml | 0 ...ama3_full_sft.yaml => qwen3_full_sft.yaml} | 6 +- .../{llama3_gptq.yaml => qwen3_gptq.yaml} | 6 +- ..._5vl_lora_sft.yaml => qwen3_lora_sft.yaml} | 8 +-- ...a3_lora_sft.yaml => qwen3vl_lora_sft.yaml} | 8 +-- ...ama3_full_sft.yaml => qwen3_full_sft.yaml} | 7 +- .../train_full/qwen3_full_sft_autotp.yaml | 46 ------------ ...vl_full_sft.yaml => qwen3vl_full_sft.yaml} | 7 +- examples/train_lora/llama3_lora_eval.yaml | 19 ----- examples/train_lora/llama3_lora_ppo.yaml | 43 ------------ examples/train_lora/llama3_lora_sft.yaml | 46 ------------ examples/train_lora/llama4_lora_sft_ds3.yaml | 49 ------------- ...ama3_lora_dpo.yaml => qwen3_lora_dpo.yaml} | 7 +- ...ama3_lora_kto.yaml => qwen3_lora_kto.yaml} | 7 +- ...pretrain.yaml => qwen3_lora_pretrain.yaml} | 5 +- ...ora_reward.yaml => qwen3_lora_reward.yaml} | 7 +- .../{llama3_lora_sft.sh => qwen3_lora_sft.sh} | 7 +- ...{gpt_lora_sft.yaml => qwen3_lora_sft.yaml} | 7 +- ...a_sft_ds3.yaml => qwen3_lora_sft_ds3.yaml} | 7 +- ...a_sft_ray.yaml => qwen3_lora_sft_ray.yaml} | 7 +- ..._preprocess.yaml => qwen3_preprocess.yaml} | 11 ++- ...vl_lora_dpo.yaml => qwen3vl_lora_dpo.yaml} | 7 +- ...vl_lora_sft.yaml => qwen3vl_lora_sft.yaml} | 7 +- .../train_qlora/llama3_lora_sft_aqlm.yaml | 1 - examples/train_qlora/llama3_lora_sft_awq.yaml | 1 - .../train_qlora/llama3_lora_sft_gptq.yaml | 1 - ...b_npu.yaml => qwen3_lora_sft_bnb_npu.yaml} | 7 +- ...sft_otfq.yaml => qwen3_lora_sft_otfq.yaml} | 7 +- pyproject.toml | 3 +- scripts/vllm_infer.py | 19 +++-- src/llamafactory/extras/env.py | 2 +- tests_v1/config/test_args_parser.py | 2 +- 59 files changed, 154 insertions(+), 401 deletions(-) delete mode 100644 examples/inference/llama3_lora_sft.yaml rename examples/inference/{qwen2_5vl.yaml => qwen3.yaml} (59%) rename examples/inference/{llama3_full_sft.yaml => qwen3_full_sft.yaml} (60%) create mode 100644 examples/inference/qwen3_lora_sft.yaml rename examples/inference/{llama3.yaml => qwen3vl.yaml} (59%) rename examples/{inference => ktransformers/infer_lora}/deepseek2_lora_sft_kt.yaml (100%) rename examples/{inference => ktransformers/infer_lora}/deepseek3_kt.yaml (100%) rename examples/{inference => ktransformers/infer_lora}/deepseek3_lora_sft_kt.yaml (100%) rename examples/{inference => ktransformers/infer_lora}/qwen3moe_lora_sft_kt.yaml (100%) rename examples/{ => ktransformers}/kt_optimize_rules/DeepSeek-V2-Chat-sft-amx.yaml (100%) rename examples/{ => ktransformers}/kt_optimize_rules/DeepSeek-V2-Chat.yaml (100%) rename examples/{ => ktransformers}/kt_optimize_rules/DeepSeek-V2-Lite-Chat-sft-amx-multi-gpu.yaml (100%) rename examples/{ => ktransformers}/kt_optimize_rules/DeepSeek-V2-Lite-Chat-sft-amx.yaml (100%) rename examples/{ => ktransformers}/kt_optimize_rules/DeepSeek-V2-Lite-Chat-sft.yaml (100%) rename examples/{ => ktransformers}/kt_optimize_rules/DeepSeek-V2-Lite-Chat.yaml (100%) rename examples/{ => ktransformers}/kt_optimize_rules/DeepSeek-V3-Chat-amx.yaml (100%) rename examples/{ => ktransformers}/kt_optimize_rules/DeepSeek-V3-Chat-sft-amx-multi-gpu-4.yaml (100%) rename examples/{ => ktransformers}/kt_optimize_rules/DeepSeek-V3-Chat-sft-amx-multi-gpu.yaml (100%) rename examples/{ => ktransformers}/kt_optimize_rules/DeepSeek-V3-Chat-sft-amx.yaml (100%) rename examples/{ => ktransformers}/kt_optimize_rules/Qwen3Moe-sft-amx.yaml (100%) rename examples/{ => ktransformers}/train_lora/deepseek2_lora_sft_kt.yaml (100%) rename examples/{ => ktransformers}/train_lora/deepseek3_lora_sft_kt.yaml (100%) rename examples/{ => ktransformers}/train_lora/qwen3moe_lora_sft_kt.yaml (100%) rename examples/merge_lora/{llama3_full_sft.yaml => qwen3_full_sft.yaml} (56%) rename examples/merge_lora/{llama3_gptq.yaml => qwen3_gptq.yaml} (66%) rename examples/merge_lora/{qwen2_5vl_lora_sft.yaml => qwen3_lora_sft.yaml} (58%) rename examples/merge_lora/{llama3_lora_sft.yaml => qwen3vl_lora_sft.yaml} (57%) rename examples/train_full/{llama3_full_sft.yaml => qwen3_full_sft.yaml} (87%) delete mode 100644 examples/train_full/qwen3_full_sft_autotp.yaml rename examples/train_full/{qwen2_5vl_full_sft.yaml => qwen3vl_full_sft.yaml} (87%) delete mode 100644 examples/train_lora/llama3_lora_eval.yaml delete mode 100644 examples/train_lora/llama3_lora_ppo.yaml delete mode 100644 examples/train_lora/llama3_lora_sft.yaml delete mode 100644 examples/train_lora/llama4_lora_sft_ds3.yaml rename examples/train_lora/{llama3_lora_dpo.yaml => qwen3_lora_dpo.yaml} (86%) rename examples/train_lora/{llama3_lora_kto.yaml => qwen3_lora_kto.yaml} (84%) rename examples/train_lora/{llama3_lora_pretrain.yaml => qwen3_lora_pretrain.yaml} (86%) rename examples/train_lora/{llama3_lora_reward.yaml => qwen3_lora_reward.yaml} (85%) rename examples/train_lora/{llama3_lora_sft.sh => qwen3_lora_sft.sh} (84%) rename examples/train_lora/{gpt_lora_sft.yaml => qwen3_lora_sft.yaml} (87%) rename examples/train_lora/{llama3_lora_sft_ds3.yaml => qwen3_lora_sft_ds3.yaml} (87%) rename examples/train_lora/{llama3_lora_sft_ray.yaml => qwen3_lora_sft_ray.yaml} (87%) rename examples/train_lora/{llama3_preprocess.yaml => qwen3_preprocess.yaml} (58%) rename examples/train_lora/{qwen2_5vl_lora_dpo.yaml => qwen3vl_lora_dpo.yaml} (87%) rename examples/train_lora/{qwen2_5vl_lora_sft.yaml => qwen3vl_lora_sft.yaml} (86%) rename examples/train_qlora/{llama3_lora_sft_bnb_npu.yaml => qwen3_lora_sft_bnb_npu.yaml} (86%) rename examples/train_qlora/{llama3_lora_sft_otfq.yaml => qwen3_lora_sft_otfq.yaml} (86%) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index a10d7d850..f4166aab1 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -27,7 +27,7 @@ jobs: python: - "3.11" - "3.12" - # - "3.13" # enable after trl is upgraded + - "3.13" os: - "ubuntu-latest" - "windows-latest" diff --git a/README.md b/README.md index 84d468e19..b530e6087 100644 --- a/README.md +++ b/README.md @@ -639,7 +639,7 @@ cd transformers pip install . ``` -3. Set `double_quantization: false` in the configuration. You can refer to the [example](examples/train_qlora/llama3_lora_sft_bnb_npu.yaml). +3. Set `double_quantization: false` in the configuration. You can refer to the [example](examples/train_qlora/qwen3_lora_sft_bnb_npu.yaml). @@ -654,12 +654,12 @@ You can also use **[Easy Dataset](https://github.com/ConardLi/easy-dataset)**, * ### Quickstart -Use the following 3 commands to run LoRA **fine-tuning**, **inference** and **merging** of the Llama3-8B-Instruct model, respectively. +Use the following 3 commands to run LoRA **fine-tuning**, **inference** and **merging** of the Qwen3-4B-Instruct model, respectively. ```bash -llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml -llamafactory-cli chat examples/inference/llama3_lora_sft.yaml -llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml +llamafactory-cli train examples/train_lora/qwen3_lora_sft.yaml +llamafactory-cli chat examples/inference/qwen3_lora_sft.yaml +llamafactory-cli export examples/merge_lora/qwen3_lora_sft.yaml ``` See [examples/README.md](examples/README.md) for advanced usage (including distributed training). @@ -782,7 +782,7 @@ When building the Docker image, use `-v ./hf_cache:/root/.cache/huggingface` arg ### Deploy with OpenAI-style API and vLLM ```bash -API_PORT=8000 llamafactory-cli api examples/inference/llama3.yaml infer_backend=vllm vllm_enforce_eager=true +API_PORT=8000 llamafactory-cli api examples/inference/qwen3.yaml infer_backend=vllm vllm_enforce_eager=true ``` > [!TIP] diff --git a/README_zh.md b/README_zh.md index 3af79201c..8b786aabb 100644 --- a/README_zh.md +++ b/README_zh.md @@ -641,7 +641,7 @@ cd transformers pip install . ``` -3. 在训练参数中设置 `double_quantization: false`,可参考[示例](examples/train_qlora/llama3_lora_sft_bnb_npu.yaml)。 +3. 在训练参数中设置 `double_quantization: false`,可参考[示例](examples/train_qlora/qwen3_lora_sft_bnb_npu.yaml)。 @@ -656,12 +656,12 @@ pip install . ### 快速开始 -下面三行命令分别对 Llama3-8B-Instruct 模型进行 LoRA **微调**、**推理**和**合并**。 +下面三行命令分别对 Qwen3-4B-Instruct 模型进行 LoRA **微调**、**推理**和**合并**。 ```bash -llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml -llamafactory-cli chat examples/inference/llama3_lora_sft.yaml -llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml +llamafactory-cli train examples/train_lora/qwen3_lora_sft.yaml +llamafactory-cli chat examples/inference/qwen3_lora_sft.yaml +llamafactory-cli export examples/merge_lora/qwen3_lora_sft.yaml ``` 高级用法请参考 [examples/README_zh.md](examples/README_zh.md)(包括多 GPU 微调)。 @@ -787,7 +787,7 @@ docker exec -it llamafactory bash ### 利用 vLLM 部署 OpenAI API ```bash -API_PORT=8000 llamafactory-cli api examples/inference/llama3.yaml infer_backend=vllm vllm_enforce_eager=true +API_PORT=8000 llamafactory-cli api examples/inference/qwen3.yaml infer_backend=vllm vllm_enforce_eager=true ``` > [!TIP] diff --git a/examples/README.md b/examples/README.md index 3fa7b1d1d..1d79d574c 100644 --- a/examples/README.md +++ b/examples/README.md @@ -18,19 +18,19 @@ By default, LLaMA-Factory uses all visible computing devices. Basic usage: ```bash -llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml +llamafactory-cli train examples/train_lora/qwen3_lora_sft.yaml ``` Advanced usage: ```bash -CUDA_VISIBLE_DEVICES=0,1 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml \ +CUDA_VISIBLE_DEVICES=0,1 llamafactory-cli train examples/train_lora/qwen3_lora_sft.yaml \ learning_rate=1e-5 \ logging_steps=1 ``` ```bash -bash examples/train_lora/llama3_lora_sft.sh +bash examples/train_lora/qwen3_lora_sft.sh ``` ## Examples @@ -40,49 +40,43 @@ bash examples/train_lora/llama3_lora_sft.sh #### (Continuous) Pre-Training ```bash -llamafactory-cli train examples/train_lora/llama3_lora_pretrain.yaml +llamafactory-cli train examples/train_lora/qwen3_lora_pretrain.yaml ``` #### Supervised Fine-Tuning ```bash -llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml +llamafactory-cli train examples/train_lora/qwen3_lora_sft.yaml ``` #### Multimodal Supervised Fine-Tuning ```bash -llamafactory-cli train examples/train_lora/qwen2_5vl_lora_sft.yaml +llamafactory-cli train examples/train_lora/qwen3vl_lora_sft.yaml ``` #### DPO/ORPO/SimPO Training ```bash -llamafactory-cli train examples/train_lora/llama3_lora_dpo.yaml +llamafactory-cli train examples/train_lora/qwen3_lora_dpo.yaml ``` #### Multimodal DPO/ORPO/SimPO Training ```bash -llamafactory-cli train examples/train_lora/qwen2_5vl_lora_dpo.yaml +llamafactory-cli train examples/train_lora/qwen3vl_lora_dpo.yaml ``` #### Reward Modeling ```bash -llamafactory-cli train examples/train_lora/llama3_lora_reward.yaml -``` - -#### PPO Training - -```bash -llamafactory-cli train examples/train_lora/llama3_lora_ppo.yaml +llamafactory-cli train examples/train_lora/qwen3_lora_reward.yaml ``` #### KTO Training ```bash -llamafactory-cli train examples/train_lora/llama3_lora_kto.yaml +llamafactory-cli train examples/train_lora/qwen3_lora_kto.yaml ``` #### Preprocess Dataset @@ -90,32 +84,26 @@ llamafactory-cli train examples/train_lora/llama3_lora_kto.yaml It is useful for large dataset, use `tokenized_path` in config to load the preprocessed dataset. ```bash -llamafactory-cli train examples/train_lora/llama3_preprocess.yaml -``` - -#### Evaluating on MMLU/CMMLU/C-Eval Benchmarks - -```bash -llamafactory-cli eval examples/train_lora/llama3_lora_eval.yaml +llamafactory-cli train examples/train_lora/qwen3_preprocess.yaml ``` #### Supervised Fine-Tuning on Multiple Nodes ```bash -FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml -FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml +FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/qwen3_lora_sft.yaml +FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/qwen3_lora_sft.yaml ``` #### Supervised Fine-Tuning with DeepSpeed ZeRO-3 (Weight Sharding) ```bash -FORCE_TORCHRUN=1 llamafactory-cli train examples/train_lora/llama3_lora_sft_ds3.yaml +FORCE_TORCHRUN=1 llamafactory-cli train examples/train_lora/qwen3_lora_sft_ds3.yaml ``` #### Supervised Fine-Tuning with Ray on 4 GPUs ```bash -USE_RAY=1 llamafactory-cli train examples/train_lora/llama3_lora_sft_ray.yaml +USE_RAY=1 llamafactory-cli train examples/train_lora/qwen3_lora_sft_ray.yaml ``` ### QLoRA Fine-Tuning @@ -123,13 +111,13 @@ USE_RAY=1 llamafactory-cli train examples/train_lora/llama3_lora_sft_ray.yaml #### Supervised Fine-Tuning with 4/8-bit Bitsandbytes/HQQ/EETQ Quantization (Recommended) ```bash -llamafactory-cli train examples/train_qlora/llama3_lora_sft_otfq.yaml +llamafactory-cli train examples/train_qlora/qwen3_lora_sft_otfq.yaml ``` #### Supervised Fine-Tuning with 4-bit Bitsandbytes Quantization on Ascend NPU ```bash -llamafactory-cli train examples/train_qlora/llama3_lora_sft_bnb_npu.yaml +llamafactory-cli train examples/train_qlora/qwen3_lora_sft_bnb_npu.yaml ``` #### Supervised Fine-Tuning with 4/8-bit GPTQ Quantization @@ -155,14 +143,14 @@ llamafactory-cli train examples/train_qlora/llama3_lora_sft_aqlm.yaml #### Supervised Fine-Tuning on Single Node ```bash -FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/llama3_full_sft.yaml +FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/qwen3_full_sft.yaml ``` #### Supervised Fine-Tuning on Multiple Nodes ```bash -FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft.yaml -FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft.yaml +FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/qwen3_full_sft.yaml +FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/qwen3_full_sft.yaml ``` ### Elastic and Fault-Tolerant Supervised Fine-Tuning on Multiple Nodes @@ -170,13 +158,13 @@ FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 To launch an elastic job with `MAX_RESTARTS` failures retries, run the following on at least `MIN_NNODES` nodes and at most `MAX_NNODES` nodes. `RDZV_ID` should be set as a unique job id (shared by all nodes participating in the job). See also [torchrun](https://docs.pytorch.org/docs/stable/elastic/run.html). ```bash -FORCE_TORCHRUN=1 MIN_NNODES=1 MAX_NNODES=3 MAX_RESTARTS=3 RDZV_ID=llamafactory MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft.yaml +FORCE_TORCHRUN=1 MIN_NNODES=1 MAX_NNODES=3 MAX_RESTARTS=3 RDZV_ID=llamafactory MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/qwen3_full_sft.yaml ``` #### Multimodal Supervised Fine-Tuning ```bash -FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/qwen2_5vl_full_sft.yaml +FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/qwen3vl_full_sft.yaml ``` ### Merging LoRA Adapters and Quantization @@ -186,19 +174,19 @@ FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/qwen2_5vl_full_sft.y Note: DO NOT use quantized model or `quantization_bit` when merging LoRA adapters. ```bash -llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml +llamafactory-cli export examples/merge_lora/qwen3_lora_sft.yaml ``` #### Quantizing Model using AutoGPTQ ```bash -llamafactory-cli export examples/merge_lora/llama3_gptq.yaml +llamafactory-cli export examples/merge_lora/qwen3_gptq.yaml ``` ### Save Ollama modelfile ```bash -llamafactory-cli export examples/merge_lora/llama3_full_sft.yaml +llamafactory-cli export examples/merge_lora/qwen3_full_sft.yaml ``` ### Inferring LoRA Fine-Tuned Models @@ -206,26 +194,26 @@ llamafactory-cli export examples/merge_lora/llama3_full_sft.yaml #### Evaluation using vLLM's Multi-GPU Inference ``` -python scripts/vllm_infer.py --model_name_or_path meta-llama/Meta-Llama-3-8B-Instruct --template llama3 --dataset alpaca_en_demo +python scripts/vllm_infer.py --model_name_or_path Qwen/Qwen3-4B-Instruct-2507 --template qwen3_nothink --dataset alpaca_en_demo python scripts/eval_bleu_rouge.py generated_predictions.jsonl ``` #### Use CLI ChatBox ```bash -llamafactory-cli chat examples/inference/llama3_lora_sft.yaml +llamafactory-cli chat examples/inference/qwen3_lora_sft.yaml ``` #### Use Web UI ChatBox ```bash -llamafactory-cli webchat examples/inference/llama3_lora_sft.yaml +llamafactory-cli webchat examples/inference/qwen3_lora_sft.yaml ``` #### Launch OpenAI-style API ```bash -llamafactory-cli api examples/inference/llama3_lora_sft.yaml +llamafactory-cli api examples/inference/qwen3_lora_sft.yaml ``` ### Extras diff --git a/examples/README_zh.md b/examples/README_zh.md index aa42e4917..95f292838 100644 --- a/examples/README_zh.md +++ b/examples/README_zh.md @@ -18,19 +18,19 @@ LLaMA-Factory 默认使用所有可见的计算设备。 基础用法: ```bash -llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml +llamafactory-cli train examples/train_lora/qwen3_lora_sft.yaml ``` 高级用法: ```bash -CUDA_VISIBLE_DEVICES=0,1 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml \ +CUDA_VISIBLE_DEVICES=0,1 llamafactory-cli train examples/train_lora/qwen3_lora_sft.yaml \ learning_rate=1e-5 \ logging_steps=1 ``` ```bash -bash examples/train_lora/llama3_lora_sft.sh +bash examples/train_lora/qwen3_lora_sft.sh ``` ## 示例 @@ -40,49 +40,43 @@ bash examples/train_lora/llama3_lora_sft.sh #### (增量)预训练 ```bash -llamafactory-cli train examples/train_lora/llama3_lora_pretrain.yaml +llamafactory-cli train examples/train_lora/qwen3_lora_pretrain.yaml ``` #### 指令监督微调 ```bash -llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml +llamafactory-cli train examples/train_lora/qwen3_lora_sft.yaml ``` #### 多模态指令监督微调 ```bash -llamafactory-cli train examples/train_lora/qwen2_5vl_lora_sft.yaml +llamafactory-cli train examples/train_lora/qwen3vl_lora_sft.yaml ``` #### DPO/ORPO/SimPO 训练 ```bash -llamafactory-cli train examples/train_lora/llama3_lora_dpo.yaml +llamafactory-cli train examples/train_lora/qwen3_lora_dpo.yaml ``` #### 多模态 DPO/ORPO/SimPO 训练 ```bash -llamafactory-cli train examples/train_lora/qwen2_5vl_lora_dpo.yaml +llamafactory-cli train examples/train_lora/qwen3vl_lora_dpo.yaml ``` #### 奖励模型训练 ```bash -llamafactory-cli train examples/train_lora/llama3_lora_reward.yaml -``` - -#### PPO 训练 - -```bash -llamafactory-cli train examples/train_lora/llama3_lora_ppo.yaml +llamafactory-cli train examples/train_lora/qwen3_lora_reward.yaml ``` #### KTO 训练 ```bash -llamafactory-cli train examples/train_lora/llama3_lora_kto.yaml +llamafactory-cli train examples/train_lora/qwen3_lora_kto.yaml ``` #### 预处理数据集 @@ -90,20 +84,14 @@ llamafactory-cli train examples/train_lora/llama3_lora_kto.yaml 对于大数据集有帮助,在配置中使用 `tokenized_path` 以加载预处理后的数据集。 ```bash -llamafactory-cli train examples/train_lora/llama3_preprocess.yaml -``` - -#### 在 MMLU/CMMLU/C-Eval 上评估 - -```bash -llamafactory-cli eval examples/train_lora/llama3_lora_eval.yaml +llamafactory-cli train examples/train_lora/qwen3_preprocess.yaml ``` #### 多机指令监督微调 ```bash -FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml -FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml +FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/qwen3_lora_sft.yaml +FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/qwen3_lora_sft.yaml ``` ### 支持弹性和容错的多机指令监督微调 @@ -111,19 +99,19 @@ FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 要启动一个支持弹性节点和容错的多机指令微调,在每个节点上执行以下命令。弹性节点数量范围为 `MIN_NNODES:MAX_NNODES`,每个节点最多允许因为错误重启 `MAX_RESTARTS` 次。`RDZV_ID` 应设置为一个唯一的作业 ID(由参与该作业的所有节点共享)。更多新可以参考官方文档 [torchrun](https://docs.pytorch.org/docs/stable/elastic/run.html)。 ```bash -FORCE_TORCHRUN=1 MIN_NNODES=1 MAX_NNODES=3 MAX_RESTARTS=3 RDZV_ID=llamafactory MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft.yaml +FORCE_TORCHRUN=1 MIN_NNODES=1 MAX_NNODES=3 MAX_RESTARTS=3 RDZV_ID=llamafactory MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/qwen3_full_sft.yaml ``` #### 使用 DeepSpeed ZeRO-3 平均分配显存 ```bash -FORCE_TORCHRUN=1 llamafactory-cli train examples/train_lora/llama3_lora_sft_ds3.yaml +FORCE_TORCHRUN=1 llamafactory-cli train examples/train_lora/qwen3_lora_sft_ds3.yaml ``` #### 使用 Ray 在 4 张 GPU 上微调 ```bash -USE_RAY=1 llamafactory-cli train examples/train_lora/llama3_lora_sft_ray.yaml +USE_RAY=1 llamafactory-cli train examples/train_lora/qwen3_lora_sft_ray.yaml ``` ### QLoRA 微调 @@ -131,13 +119,13 @@ USE_RAY=1 llamafactory-cli train examples/train_lora/llama3_lora_sft_ray.yaml #### 基于 4/8 比特 Bitsandbytes/HQQ/EETQ 量化进行指令监督微调(推荐) ```bash -llamafactory-cli train examples/train_qlora/llama3_lora_sft_otfq.yaml +llamafactory-cli train examples/train_qlora/qwen3_lora_sft_otfq.yaml ``` #### 在 NPU 上基于 4 比特 Bitsandbytes 量化进行指令监督微调 ```bash -llamafactory-cli train examples/train_qlora/llama3_lora_sft_bnb_npu.yaml +llamafactory-cli train examples/train_qlora/qwen3_lora_sft_bnb_npu.yaml ``` #### 基于 4/8 比特 GPTQ 量化进行指令监督微调 @@ -163,20 +151,20 @@ llamafactory-cli train examples/train_qlora/llama3_lora_sft_aqlm.yaml #### 在单机上进行指令监督微调 ```bash -FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/llama3_full_sft.yaml +FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/qwen3_full_sft.yaml ``` #### 在多机上进行指令监督微调 ```bash -FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft.yaml -FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft.yaml +FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/qwen3_full_sft.yaml +FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/qwen3_full_sft.yaml ``` #### 多模态指令监督微调 ```bash -FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/qwen2_5vl_full_sft.yaml +FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/qwen3vl_full_sft.yaml ``` ### 合并 LoRA 适配器与模型量化 @@ -186,19 +174,19 @@ FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/qwen2_5vl_full_sft.y 注:请勿使用量化后的模型或 `quantization_bit` 参数来合并 LoRA 适配器。 ```bash -llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml +llamafactory-cli export examples/merge_lora/qwen3_lora_sft.yaml ``` #### 使用 AutoGPTQ 量化模型 ```bash -llamafactory-cli export examples/merge_lora/llama3_gptq.yaml +llamafactory-cli export examples/merge_lora/qwen3_gptq.yaml ``` ### 保存 Ollama 配置文件 ```bash -llamafactory-cli export examples/merge_lora/llama3_full_sft.yaml +llamafactory-cli export examples/merge_lora/qwen3_full_sft.yaml ``` ### 推理 LoRA 模型 @@ -206,26 +194,26 @@ llamafactory-cli export examples/merge_lora/llama3_full_sft.yaml #### 使用 vLLM 多卡推理评估 ``` -python scripts/vllm_infer.py --model_name_or_path meta-llama/Meta-Llama-3-8B-Instruct --template llama3 --dataset alpaca_en_demo +python scripts/vllm_infer.py --model_name_or_path Qwen/Qwen3-4B-Instruct-2507 --template qwen3_nothink --dataset alpaca_en_demo python scripts/eval_bleu_rouge.py generated_predictions.jsonl ``` #### 使用命令行对话框 ```bash -llamafactory-cli chat examples/inference/llama3_lora_sft.yaml +llamafactory-cli chat examples/inference/qwen3_lora_sft.yaml ``` #### 使用浏览器对话框 ```bash -llamafactory-cli webchat examples/inference/llama3_lora_sft.yaml +llamafactory-cli webchat examples/inference/qwen3_lora_sft.yaml ``` #### 启动 OpenAI 风格 API ```bash -llamafactory-cli api examples/inference/llama3_lora_sft.yaml +llamafactory-cli api examples/inference/qwen3_lora_sft.yaml ``` ### 杂项 diff --git a/examples/inference/llama3_lora_sft.yaml b/examples/inference/llama3_lora_sft.yaml deleted file mode 100644 index e7fd04254..000000000 --- a/examples/inference/llama3_lora_sft.yaml +++ /dev/null @@ -1,5 +0,0 @@ -model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct -adapter_name_or_path: saves/llama3-8b/lora/sft -template: llama3 -infer_backend: huggingface # choices: [huggingface, vllm, sglang, ktransformers] -trust_remote_code: true diff --git a/examples/inference/qwen2_5vl.yaml b/examples/inference/qwen3.yaml similarity index 59% rename from examples/inference/qwen2_5vl.yaml rename to examples/inference/qwen3.yaml index 67b78d4fa..1c4232cd8 100644 --- a/examples/inference/qwen2_5vl.yaml +++ b/examples/inference/qwen3.yaml @@ -1,4 +1,4 @@ -model_name_or_path: Qwen/Qwen2.5-VL-7B-Instruct -template: qwen2_vl +model_name_or_path: Qwen/Qwen3-4B-Instruct-2507 +template: qwen3_nothink infer_backend: huggingface # choices: [huggingface, vllm, sglang, ktransformers] trust_remote_code: true diff --git a/examples/inference/llama3_full_sft.yaml b/examples/inference/qwen3_full_sft.yaml similarity index 60% rename from examples/inference/llama3_full_sft.yaml rename to examples/inference/qwen3_full_sft.yaml index 64fc24899..b11f0e803 100644 --- a/examples/inference/llama3_full_sft.yaml +++ b/examples/inference/qwen3_full_sft.yaml @@ -1,4 +1,4 @@ -model_name_or_path: saves/llama3-8b/full/sft -template: llama3 +model_name_or_path: saves/qwen3-4b/full/sft +template: qwen3_nothink infer_backend: huggingface # choices: [huggingface, vllm, sglang, ktransformers] trust_remote_code: true diff --git a/examples/inference/qwen3_lora_sft.yaml b/examples/inference/qwen3_lora_sft.yaml new file mode 100644 index 000000000..44d8471c5 --- /dev/null +++ b/examples/inference/qwen3_lora_sft.yaml @@ -0,0 +1,5 @@ +model_name_or_path: Qwen/Qwen3-4B-Instruct-2507 +adapter_name_or_path: saves/qwen3-4b/lora/sft +template: qwen3_nothink +infer_backend: huggingface # choices: [huggingface, vllm, sglang, ktransformers] +trust_remote_code: true diff --git a/examples/inference/llama3.yaml b/examples/inference/qwen3vl.yaml similarity index 59% rename from examples/inference/llama3.yaml rename to examples/inference/qwen3vl.yaml index 9315e7977..0c0b5dcbf 100644 --- a/examples/inference/llama3.yaml +++ b/examples/inference/qwen3vl.yaml @@ -1,4 +1,4 @@ -model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct -template: llama3 +model_name_or_path: Qwen/Qwen3-VL-4B-Instruct +template: qwen3_vl_nothink infer_backend: huggingface # choices: [huggingface, vllm, sglang, ktransformers] trust_remote_code: true diff --git a/examples/inference/deepseek2_lora_sft_kt.yaml b/examples/ktransformers/infer_lora/deepseek2_lora_sft_kt.yaml similarity index 100% rename from examples/inference/deepseek2_lora_sft_kt.yaml rename to examples/ktransformers/infer_lora/deepseek2_lora_sft_kt.yaml diff --git a/examples/inference/deepseek3_kt.yaml b/examples/ktransformers/infer_lora/deepseek3_kt.yaml similarity index 100% rename from examples/inference/deepseek3_kt.yaml rename to examples/ktransformers/infer_lora/deepseek3_kt.yaml diff --git a/examples/inference/deepseek3_lora_sft_kt.yaml b/examples/ktransformers/infer_lora/deepseek3_lora_sft_kt.yaml similarity index 100% rename from examples/inference/deepseek3_lora_sft_kt.yaml rename to examples/ktransformers/infer_lora/deepseek3_lora_sft_kt.yaml diff --git a/examples/inference/qwen3moe_lora_sft_kt.yaml b/examples/ktransformers/infer_lora/qwen3moe_lora_sft_kt.yaml similarity index 100% rename from examples/inference/qwen3moe_lora_sft_kt.yaml rename to examples/ktransformers/infer_lora/qwen3moe_lora_sft_kt.yaml diff --git a/examples/kt_optimize_rules/DeepSeek-V2-Chat-sft-amx.yaml b/examples/ktransformers/kt_optimize_rules/DeepSeek-V2-Chat-sft-amx.yaml similarity index 100% rename from examples/kt_optimize_rules/DeepSeek-V2-Chat-sft-amx.yaml rename to examples/ktransformers/kt_optimize_rules/DeepSeek-V2-Chat-sft-amx.yaml diff --git a/examples/kt_optimize_rules/DeepSeek-V2-Chat.yaml b/examples/ktransformers/kt_optimize_rules/DeepSeek-V2-Chat.yaml similarity index 100% rename from examples/kt_optimize_rules/DeepSeek-V2-Chat.yaml rename to examples/ktransformers/kt_optimize_rules/DeepSeek-V2-Chat.yaml diff --git a/examples/kt_optimize_rules/DeepSeek-V2-Lite-Chat-sft-amx-multi-gpu.yaml b/examples/ktransformers/kt_optimize_rules/DeepSeek-V2-Lite-Chat-sft-amx-multi-gpu.yaml similarity index 100% rename from examples/kt_optimize_rules/DeepSeek-V2-Lite-Chat-sft-amx-multi-gpu.yaml rename to examples/ktransformers/kt_optimize_rules/DeepSeek-V2-Lite-Chat-sft-amx-multi-gpu.yaml diff --git a/examples/kt_optimize_rules/DeepSeek-V2-Lite-Chat-sft-amx.yaml b/examples/ktransformers/kt_optimize_rules/DeepSeek-V2-Lite-Chat-sft-amx.yaml similarity index 100% rename from examples/kt_optimize_rules/DeepSeek-V2-Lite-Chat-sft-amx.yaml rename to examples/ktransformers/kt_optimize_rules/DeepSeek-V2-Lite-Chat-sft-amx.yaml diff --git a/examples/kt_optimize_rules/DeepSeek-V2-Lite-Chat-sft.yaml b/examples/ktransformers/kt_optimize_rules/DeepSeek-V2-Lite-Chat-sft.yaml similarity index 100% rename from examples/kt_optimize_rules/DeepSeek-V2-Lite-Chat-sft.yaml rename to examples/ktransformers/kt_optimize_rules/DeepSeek-V2-Lite-Chat-sft.yaml diff --git a/examples/kt_optimize_rules/DeepSeek-V2-Lite-Chat.yaml b/examples/ktransformers/kt_optimize_rules/DeepSeek-V2-Lite-Chat.yaml similarity index 100% rename from examples/kt_optimize_rules/DeepSeek-V2-Lite-Chat.yaml rename to examples/ktransformers/kt_optimize_rules/DeepSeek-V2-Lite-Chat.yaml diff --git a/examples/kt_optimize_rules/DeepSeek-V3-Chat-amx.yaml b/examples/ktransformers/kt_optimize_rules/DeepSeek-V3-Chat-amx.yaml similarity index 100% rename from examples/kt_optimize_rules/DeepSeek-V3-Chat-amx.yaml rename to examples/ktransformers/kt_optimize_rules/DeepSeek-V3-Chat-amx.yaml diff --git a/examples/kt_optimize_rules/DeepSeek-V3-Chat-sft-amx-multi-gpu-4.yaml b/examples/ktransformers/kt_optimize_rules/DeepSeek-V3-Chat-sft-amx-multi-gpu-4.yaml similarity index 100% rename from examples/kt_optimize_rules/DeepSeek-V3-Chat-sft-amx-multi-gpu-4.yaml rename to examples/ktransformers/kt_optimize_rules/DeepSeek-V3-Chat-sft-amx-multi-gpu-4.yaml diff --git a/examples/kt_optimize_rules/DeepSeek-V3-Chat-sft-amx-multi-gpu.yaml b/examples/ktransformers/kt_optimize_rules/DeepSeek-V3-Chat-sft-amx-multi-gpu.yaml similarity index 100% rename from examples/kt_optimize_rules/DeepSeek-V3-Chat-sft-amx-multi-gpu.yaml rename to examples/ktransformers/kt_optimize_rules/DeepSeek-V3-Chat-sft-amx-multi-gpu.yaml diff --git a/examples/kt_optimize_rules/DeepSeek-V3-Chat-sft-amx.yaml b/examples/ktransformers/kt_optimize_rules/DeepSeek-V3-Chat-sft-amx.yaml similarity index 100% rename from examples/kt_optimize_rules/DeepSeek-V3-Chat-sft-amx.yaml rename to examples/ktransformers/kt_optimize_rules/DeepSeek-V3-Chat-sft-amx.yaml diff --git a/examples/kt_optimize_rules/Qwen3Moe-sft-amx.yaml b/examples/ktransformers/kt_optimize_rules/Qwen3Moe-sft-amx.yaml similarity index 100% rename from examples/kt_optimize_rules/Qwen3Moe-sft-amx.yaml rename to examples/ktransformers/kt_optimize_rules/Qwen3Moe-sft-amx.yaml diff --git a/examples/train_lora/deepseek2_lora_sft_kt.yaml b/examples/ktransformers/train_lora/deepseek2_lora_sft_kt.yaml similarity index 100% rename from examples/train_lora/deepseek2_lora_sft_kt.yaml rename to examples/ktransformers/train_lora/deepseek2_lora_sft_kt.yaml diff --git a/examples/train_lora/deepseek3_lora_sft_kt.yaml b/examples/ktransformers/train_lora/deepseek3_lora_sft_kt.yaml similarity index 100% rename from examples/train_lora/deepseek3_lora_sft_kt.yaml rename to examples/ktransformers/train_lora/deepseek3_lora_sft_kt.yaml diff --git a/examples/train_lora/qwen3moe_lora_sft_kt.yaml b/examples/ktransformers/train_lora/qwen3moe_lora_sft_kt.yaml similarity index 100% rename from examples/train_lora/qwen3moe_lora_sft_kt.yaml rename to examples/ktransformers/train_lora/qwen3moe_lora_sft_kt.yaml diff --git a/examples/merge_lora/llama3_full_sft.yaml b/examples/merge_lora/qwen3_full_sft.yaml similarity index 56% rename from examples/merge_lora/llama3_full_sft.yaml rename to examples/merge_lora/qwen3_full_sft.yaml index dd6953720..9c6fb9255 100644 --- a/examples/merge_lora/llama3_full_sft.yaml +++ b/examples/merge_lora/qwen3_full_sft.yaml @@ -1,10 +1,10 @@ ### model -model_name_or_path: saves/llama3-8b/full/sft -template: llama3 +model_name_or_path: saves/qwen3-4b/full/sft +template: qwen3_nothink trust_remote_code: true ### export -export_dir: output/llama3_full_sft +export_dir: saves/qwen3_sft_merged export_size: 5 export_device: cpu # choices: [cpu, auto] export_legacy_format: false diff --git a/examples/merge_lora/llama3_gptq.yaml b/examples/merge_lora/qwen3_gptq.yaml similarity index 66% rename from examples/merge_lora/llama3_gptq.yaml rename to examples/merge_lora/qwen3_gptq.yaml index 2a3d2fd6d..800bc8d04 100644 --- a/examples/merge_lora/llama3_gptq.yaml +++ b/examples/merge_lora/qwen3_gptq.yaml @@ -1,10 +1,10 @@ ### model -model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct -template: llama3 +model_name_or_path: Qwen/Qwen3-4B-Instruct-2507 +template: qwen3_nothink trust_remote_code: true ### export -export_dir: output/llama3_gptq +export_dir: saves/qwen3_gptq export_quantization_bit: 4 export_quantization_dataset: data/c4_demo.jsonl export_size: 5 diff --git a/examples/merge_lora/qwen2_5vl_lora_sft.yaml b/examples/merge_lora/qwen3_lora_sft.yaml similarity index 58% rename from examples/merge_lora/qwen2_5vl_lora_sft.yaml rename to examples/merge_lora/qwen3_lora_sft.yaml index 38a5c7c4f..f4b93f1ba 100644 --- a/examples/merge_lora/qwen2_5vl_lora_sft.yaml +++ b/examples/merge_lora/qwen3_lora_sft.yaml @@ -1,13 +1,13 @@ ### Note: DO NOT use quantized model or quantization_bit when merging lora adapters ### model -model_name_or_path: Qwen/Qwen2.5-VL-7B-Instruct -adapter_name_or_path: saves/qwen2_5vl-7b/lora/sft -template: qwen2_vl +model_name_or_path: Qwen/Qwen3-4B-Instruct-2507 +adapter_name_or_path: saves/qwen3-4b/lora/sft +template: qwen3_nothink trust_remote_code: true ### export -export_dir: output/qwen2_5vl_lora_sft +export_dir: saves/qwen3_sft_merged export_size: 5 export_device: cpu # choices: [cpu, auto] export_legacy_format: false diff --git a/examples/merge_lora/llama3_lora_sft.yaml b/examples/merge_lora/qwen3vl_lora_sft.yaml similarity index 57% rename from examples/merge_lora/llama3_lora_sft.yaml rename to examples/merge_lora/qwen3vl_lora_sft.yaml index 2b011d8d9..647b0c1ea 100644 --- a/examples/merge_lora/llama3_lora_sft.yaml +++ b/examples/merge_lora/qwen3vl_lora_sft.yaml @@ -1,13 +1,13 @@ ### Note: DO NOT use quantized model or quantization_bit when merging lora adapters ### model -model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct -adapter_name_or_path: saves/llama3-8b/lora/sft -template: llama3 +model_name_or_path: Qwen/Qwen3-VL-4B-Instruct +adapter_name_or_path: saves/qwen3-vl-4b/lora/sft +template: qwen3_vl_nothink trust_remote_code: true ### export -export_dir: output/llama3_lora_sft +export_dir: saves/qwen3_vl_sft_merged export_size: 5 export_device: cpu # choices: [cpu, auto] export_legacy_format: false diff --git a/examples/train_full/llama3_full_sft.yaml b/examples/train_full/qwen3_full_sft.yaml similarity index 87% rename from examples/train_full/llama3_full_sft.yaml rename to examples/train_full/qwen3_full_sft.yaml index fb7066a73..adb7a1dfe 100644 --- a/examples/train_full/llama3_full_sft.yaml +++ b/examples/train_full/qwen3_full_sft.yaml @@ -1,5 +1,5 @@ ### model -model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct +model_name_or_path: Qwen/Qwen3-4B-Instruct-2507 trust_remote_code: true ### method @@ -10,15 +10,14 @@ deepspeed: examples/deepspeed/ds_z3_config.json # choices: [ds_z0_config.json, ### dataset dataset: identity,alpaca_en_demo -template: llama3 +template: qwen3_nothink cutoff_len: 2048 max_samples: 1000 -overwrite_cache: true preprocessing_num_workers: 16 dataloader_num_workers: 4 ### output -output_dir: saves/llama3-8b/full/sft +output_dir: saves/qwen3-4b/full/sft logging_steps: 10 save_steps: 500 plot_loss: true diff --git a/examples/train_full/qwen3_full_sft_autotp.yaml b/examples/train_full/qwen3_full_sft_autotp.yaml deleted file mode 100644 index 2726203f7..000000000 --- a/examples/train_full/qwen3_full_sft_autotp.yaml +++ /dev/null @@ -1,46 +0,0 @@ -### model -model_name_or_path: Qwen/Qwen3-32B -trust_remote_code: true -use_v1_kernels: true - -### method -stage: sft -do_train: true -finetuning_type: full -deepspeed: examples/deepspeed/ds_z2_autotp_config.json - -### dataset -dataset: identity,alpaca_en_demo -template: qwen3 -cutoff_len: 2048 -max_samples: 1000 -overwrite_cache: true -preprocessing_num_workers: 16 -dataloader_num_workers: 4 - -### output -output_dir: saves/qwen3-32b/full/sft_autotp -logging_steps: 1 -save_steps: 500 -plot_loss: true -overwrite_output_dir: true -save_only_model: false -report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow] - -### train -per_device_train_batch_size: 4 -gradient_accumulation_steps: 1 -learning_rate: 1.0e-4 -num_train_epochs: 3.0 -lr_scheduler_type: cosine -warmup_ratio: 0.1 -bf16: true -ddp_timeout: 180000000 -resume_from_checkpoint: null - -### eval -# eval_dataset: alpaca_en_demo -# val_size: 0.1 -# per_device_eval_batch_size: 1 -# eval_strategy: steps -# eval_steps: 500 diff --git a/examples/train_full/qwen2_5vl_full_sft.yaml b/examples/train_full/qwen3vl_full_sft.yaml similarity index 87% rename from examples/train_full/qwen2_5vl_full_sft.yaml rename to examples/train_full/qwen3vl_full_sft.yaml index bd9ac90df..06c6d9528 100644 --- a/examples/train_full/qwen2_5vl_full_sft.yaml +++ b/examples/train_full/qwen3vl_full_sft.yaml @@ -1,5 +1,5 @@ ### model -model_name_or_path: Qwen/Qwen2.5-VL-7B-Instruct +model_name_or_path: Qwen/Qwen3-VL-4B-Instruct image_max_pixels: 262144 video_max_pixels: 16384 trust_remote_code: true @@ -15,15 +15,14 @@ deepspeed: examples/deepspeed/ds_z3_config.json ### dataset dataset: mllm_demo,identity,alpaca_en_demo -template: qwen2_vl +template: qwen3_vl_nothink cutoff_len: 2048 max_samples: 1000 -overwrite_cache: true preprocessing_num_workers: 16 dataloader_num_workers: 4 ### output -output_dir: saves/qwen2_5vl-7b/full/sft +output_dir: saves/qwen3-vl-4b/full/sft logging_steps: 10 save_steps: 500 plot_loss: true diff --git a/examples/train_lora/llama3_lora_eval.yaml b/examples/train_lora/llama3_lora_eval.yaml deleted file mode 100644 index 60d7c2f39..000000000 --- a/examples/train_lora/llama3_lora_eval.yaml +++ /dev/null @@ -1,19 +0,0 @@ -### model -model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct -adapter_name_or_path: saves/llama3-8b/lora/sft -trust_remote_code: true - -### method -finetuning_type: lora - -### dataset -task: mmlu_test # choices: [mmlu_test, ceval_validation, cmmlu_test] -template: fewshot -lang: en -n_shot: 5 - -### output -save_dir: saves/llama3-8b/lora/eval - -### eval -batch_size: 4 diff --git a/examples/train_lora/llama3_lora_ppo.yaml b/examples/train_lora/llama3_lora_ppo.yaml deleted file mode 100644 index 879448190..000000000 --- a/examples/train_lora/llama3_lora_ppo.yaml +++ /dev/null @@ -1,43 +0,0 @@ -### model -model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct -reward_model: saves/llama3-8b/lora/reward -trust_remote_code: true - -### method -stage: ppo -do_train: true -finetuning_type: lora -lora_rank: 8 -lora_target: all - -### dataset -dataset: identity,alpaca_en_demo -template: llama3 -cutoff_len: 2048 -max_samples: 1000 -overwrite_cache: true -preprocessing_num_workers: 16 -dataloader_num_workers: 4 - -### output -output_dir: saves/llama3-8b/lora/ppo -logging_steps: 10 -save_steps: 500 -plot_loss: true -overwrite_output_dir: true -report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow] - -### train -per_device_train_batch_size: 1 -gradient_accumulation_steps: 8 -learning_rate: 1.0e-5 -num_train_epochs: 3.0 -lr_scheduler_type: cosine -warmup_ratio: 0.1 -bf16: true -ddp_timeout: 180000000 - -### generate -max_new_tokens: 512 -top_k: 0 -top_p: 0.9 diff --git a/examples/train_lora/llama3_lora_sft.yaml b/examples/train_lora/llama3_lora_sft.yaml deleted file mode 100644 index 157d66104..000000000 --- a/examples/train_lora/llama3_lora_sft.yaml +++ /dev/null @@ -1,46 +0,0 @@ -### model -model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct -trust_remote_code: true - -### method -stage: sft -do_train: true -finetuning_type: lora -lora_rank: 8 -lora_target: all - -### dataset -dataset: identity,alpaca_en_demo -template: llama3 -cutoff_len: 2048 -max_samples: 1000 -overwrite_cache: true -preprocessing_num_workers: 16 -dataloader_num_workers: 4 - -### output -output_dir: saves/llama3-8b/lora/sft -logging_steps: 10 -save_steps: 500 -plot_loss: true -overwrite_output_dir: true -save_only_model: false -report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow] - -### train -per_device_train_batch_size: 1 -gradient_accumulation_steps: 8 -learning_rate: 1.0e-4 -num_train_epochs: 3.0 -lr_scheduler_type: cosine -warmup_ratio: 0.1 -bf16: true -ddp_timeout: 180000000 -resume_from_checkpoint: null - -### eval -# eval_dataset: alpaca_en_demo -# val_size: 0.1 -# per_device_eval_batch_size: 1 -# eval_strategy: steps -# eval_steps: 500 diff --git a/examples/train_lora/llama4_lora_sft_ds3.yaml b/examples/train_lora/llama4_lora_sft_ds3.yaml deleted file mode 100644 index 6c5bb7bb7..000000000 --- a/examples/train_lora/llama4_lora_sft_ds3.yaml +++ /dev/null @@ -1,49 +0,0 @@ -# pip install git+https://github.com/hiyouga/transformers.git@llama4_train - -### model -model_name_or_path: meta-llama/Llama-4-Scout-17B-16E-Instruct -trust_remote_code: true - -### method -stage: sft -do_train: true -finetuning_type: lora -lora_rank: 8 -lora_target: all -deepspeed: examples/deepspeed/ds_z3_config.json # choices: [ds_z0_config.json, ds_z2_config.json, ds_z3_config.json] - -### dataset -dataset: mllm_demo,identity,alpaca_en_demo -template: llama4 -cutoff_len: 2048 -max_samples: 1000 -overwrite_cache: true -preprocessing_num_workers: 16 -dataloader_num_workers: 4 - -### output -output_dir: saves/llama4-8b/lora/sft -logging_steps: 10 -save_steps: 500 -plot_loss: true -overwrite_output_dir: true -save_only_model: false -report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow] - -### train -per_device_train_batch_size: 1 -gradient_accumulation_steps: 2 -learning_rate: 1.0e-4 -num_train_epochs: 3.0 -lr_scheduler_type: cosine -warmup_ratio: 0.1 -bf16: true -ddp_timeout: 180000000 -resume_from_checkpoint: null - -### eval -# eval_dataset: alpaca_en_demo -# val_size: 0.1 -# per_device_eval_batch_size: 1 -# eval_strategy: steps -# eval_steps: 500 diff --git a/examples/train_lora/llama3_lora_dpo.yaml b/examples/train_lora/qwen3_lora_dpo.yaml similarity index 86% rename from examples/train_lora/llama3_lora_dpo.yaml rename to examples/train_lora/qwen3_lora_dpo.yaml index fd8c042c1..78f4d31f6 100644 --- a/examples/train_lora/llama3_lora_dpo.yaml +++ b/examples/train_lora/qwen3_lora_dpo.yaml @@ -1,5 +1,5 @@ ### model -model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct +model_name_or_path: Qwen/Qwen3-4B-Instruct-2507 trust_remote_code: true ### method @@ -13,15 +13,14 @@ pref_loss: sigmoid # choices: [sigmoid (dpo), orpo, simpo] ### dataset dataset: dpo_en_demo -template: llama3 +template: qwen3_nothink cutoff_len: 2048 max_samples: 1000 -overwrite_cache: true preprocessing_num_workers: 16 dataloader_num_workers: 4 ### output -output_dir: saves/llama3-8b/lora/dpo +output_dir: saves/qwen3-4b/lora/dpo logging_steps: 10 save_steps: 500 plot_loss: true diff --git a/examples/train_lora/llama3_lora_kto.yaml b/examples/train_lora/qwen3_lora_kto.yaml similarity index 84% rename from examples/train_lora/llama3_lora_kto.yaml rename to examples/train_lora/qwen3_lora_kto.yaml index 113b9129f..51e67318a 100644 --- a/examples/train_lora/llama3_lora_kto.yaml +++ b/examples/train_lora/qwen3_lora_kto.yaml @@ -1,5 +1,5 @@ ### model -model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct +model_name_or_path: Qwen/Qwen3-4B-Instruct-2507 trust_remote_code: true ### method @@ -12,15 +12,14 @@ pref_beta: 0.1 ### dataset dataset: kto_en_demo -template: llama3 +template: qwen3_nothink cutoff_len: 2048 max_samples: 1000 -overwrite_cache: true preprocessing_num_workers: 16 dataloader_num_workers: 4 ### output -output_dir: saves/llama3-8b/lora/kto +output_dir: saves/qwen3-4b/lora/kto logging_steps: 10 save_steps: 500 plot_loss: true diff --git a/examples/train_lora/llama3_lora_pretrain.yaml b/examples/train_lora/qwen3_lora_pretrain.yaml similarity index 86% rename from examples/train_lora/llama3_lora_pretrain.yaml rename to examples/train_lora/qwen3_lora_pretrain.yaml index 3c851d705..a14e9b462 100644 --- a/examples/train_lora/llama3_lora_pretrain.yaml +++ b/examples/train_lora/qwen3_lora_pretrain.yaml @@ -1,5 +1,5 @@ ### model -model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct +model_name_or_path: Qwen/Qwen3-4B-Instruct-2507 trust_remote_code: true ### method @@ -13,12 +13,11 @@ lora_target: all dataset: c4_demo cutoff_len: 2048 max_samples: 1000 -overwrite_cache: true preprocessing_num_workers: 16 dataloader_num_workers: 4 ### output -output_dir: saves/llama3-8b/lora/pretrain +output_dir: saves/qwen3-4b/lora/pretrain logging_steps: 10 save_steps: 500 plot_loss: true diff --git a/examples/train_lora/llama3_lora_reward.yaml b/examples/train_lora/qwen3_lora_reward.yaml similarity index 85% rename from examples/train_lora/llama3_lora_reward.yaml rename to examples/train_lora/qwen3_lora_reward.yaml index 48230b552..17887c02d 100644 --- a/examples/train_lora/llama3_lora_reward.yaml +++ b/examples/train_lora/qwen3_lora_reward.yaml @@ -1,5 +1,5 @@ ### model -model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct +model_name_or_path: Qwen/Qwen3-4B-Instruct-2507 trust_remote_code: true ### method @@ -11,15 +11,14 @@ lora_target: all ### dataset dataset: dpo_en_demo -template: llama3 +template: qwen3_nothink cutoff_len: 2048 max_samples: 1000 -overwrite_cache: true preprocessing_num_workers: 16 dataloader_num_workers: 4 ### output -output_dir: saves/llama3-8b/lora/reward +output_dir: saves/qwen3-4b/lora/reward logging_steps: 10 save_steps: 500 plot_loss: true diff --git a/examples/train_lora/llama3_lora_sft.sh b/examples/train_lora/qwen3_lora_sft.sh similarity index 84% rename from examples/train_lora/llama3_lora_sft.sh rename to examples/train_lora/qwen3_lora_sft.sh index 59db2c58e..bc63ac2d1 100644 --- a/examples/train_lora/llama3_lora_sft.sh +++ b/examples/train_lora/qwen3_lora_sft.sh @@ -2,7 +2,7 @@ set -x -MODEL_PATH=meta-llama/Meta-Llama-3-8B-Instruct +MODEL_PATH=Qwen/Qwen3-4B-Instruct-2507 llamafactory-cli train \ --model_name_or_path ${MODEL_PATH} \ @@ -13,13 +13,12 @@ llamafactory-cli train \ --lora_rank 8 \ --lora_target all \ --dataset identity,alpaca_en_demo \ - --template llama3 \ + --template qwen3_nothink \ --cutoff_len 2048 \ --max_samples 1000 \ - --overwrite_cache \ --preprocessing_num_workers 16 \ --dataloader_num_workers 4 \ - --output_dir saves/llama3-8b/lora/sft \ + --output_dir saves/qwen3-4b/lora/sft \ --logging_steps 10 \ --save_steps 500 \ --plot_loss \ diff --git a/examples/train_lora/gpt_lora_sft.yaml b/examples/train_lora/qwen3_lora_sft.yaml similarity index 87% rename from examples/train_lora/gpt_lora_sft.yaml rename to examples/train_lora/qwen3_lora_sft.yaml index b07615b1c..ba19e261c 100644 --- a/examples/train_lora/gpt_lora_sft.yaml +++ b/examples/train_lora/qwen3_lora_sft.yaml @@ -1,5 +1,5 @@ ### model -model_name_or_path: openai/gpt-oss-20b +model_name_or_path: Qwen/Qwen3-4B-Instruct-2507 trust_remote_code: true ### method @@ -11,15 +11,14 @@ lora_target: all ### dataset dataset: identity,alpaca_en_demo -template: gpt +template: qwen3_nothink cutoff_len: 2048 max_samples: 1000 -overwrite_cache: true preprocessing_num_workers: 16 dataloader_num_workers: 4 ### output -output_dir: saves/gpt-20b/lora/sft +output_dir: saves/qwen3-4b/lora/sft logging_steps: 10 save_steps: 500 plot_loss: true diff --git a/examples/train_lora/llama3_lora_sft_ds3.yaml b/examples/train_lora/qwen3_lora_sft_ds3.yaml similarity index 87% rename from examples/train_lora/llama3_lora_sft_ds3.yaml rename to examples/train_lora/qwen3_lora_sft_ds3.yaml index e20b35179..6fcf1c6c2 100644 --- a/examples/train_lora/llama3_lora_sft_ds3.yaml +++ b/examples/train_lora/qwen3_lora_sft_ds3.yaml @@ -1,5 +1,5 @@ ### model -model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct +model_name_or_path: Qwen/Qwen3-4B-Instruct-2507 trust_remote_code: true ### method @@ -12,15 +12,14 @@ deepspeed: examples/deepspeed/ds_z3_config.json # choices: [ds_z0_config.json, ### dataset dataset: identity,alpaca_en_demo -template: llama3 +template: qwen3_nothink cutoff_len: 2048 max_samples: 1000 -overwrite_cache: true preprocessing_num_workers: 16 dataloader_num_workers: 4 ### output -output_dir: saves/llama3-8b/lora/sft +output_dir: saves/qwen3-4b/lora/sft logging_steps: 10 save_steps: 500 plot_loss: true diff --git a/examples/train_lora/llama3_lora_sft_ray.yaml b/examples/train_lora/qwen3_lora_sft_ray.yaml similarity index 87% rename from examples/train_lora/llama3_lora_sft_ray.yaml rename to examples/train_lora/qwen3_lora_sft_ray.yaml index 8c03bf9ee..0cbc59546 100644 --- a/examples/train_lora/llama3_lora_sft_ray.yaml +++ b/examples/train_lora/qwen3_lora_sft_ray.yaml @@ -1,5 +1,5 @@ ### model -model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct # or use local absolute path +model_name_or_path: Qwen/Qwen3-4B-Instruct-2507 # or use local absolute path trust_remote_code: true ### method @@ -12,10 +12,9 @@ lora_target: all ### dataset dataset: identity,alpaca_en_demo dataset_dir: REMOTE:llamafactory/demo_data # or use local absolute path -template: llama3 +template: qwen3_nothink cutoff_len: 2048 max_samples: 1000 -overwrite_cache: true preprocessing_num_workers: 16 dataloader_num_workers: 4 @@ -29,7 +28,7 @@ save_only_model: false report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow] ### ray -ray_run_name: llama3_8b_sft_lora +ray_run_name: qwen3_4b_sft_lora ray_storage_path: ./saves ray_num_workers: 4 # Number of GPUs to use. placement_strategy: PACK diff --git a/examples/train_lora/llama3_preprocess.yaml b/examples/train_lora/qwen3_preprocess.yaml similarity index 58% rename from examples/train_lora/llama3_preprocess.yaml rename to examples/train_lora/qwen3_preprocess.yaml index fbaf01f0f..60901654c 100644 --- a/examples/train_lora/llama3_preprocess.yaml +++ b/examples/train_lora/qwen3_preprocess.yaml @@ -1,5 +1,5 @@ ### model -model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct +model_name_or_path: Qwen/Qwen3-4B-Instruct-2507 trust_remote_code: true ### method @@ -11,13 +11,12 @@ lora_target: all ### dataset dataset: identity,alpaca_en_demo -template: llama3 +template: qwen3_nothink cutoff_len: 2048 max_samples: 1000 -overwrite_cache: true preprocessing_num_workers: 16 -tokenized_path: saves/llama3-8b/dataset/sft +tokenized_path: saves/qwen3-4b/dataset/sft -### output -output_dir: saves/llama3-8b/lora/sft +### output (not used) +output_dir: saves/qwen3-4b/lora/sft overwrite_output_dir: true diff --git a/examples/train_lora/qwen2_5vl_lora_dpo.yaml b/examples/train_lora/qwen3vl_lora_dpo.yaml similarity index 87% rename from examples/train_lora/qwen2_5vl_lora_dpo.yaml rename to examples/train_lora/qwen3vl_lora_dpo.yaml index 2140c90d5..12e9a615b 100644 --- a/examples/train_lora/qwen2_5vl_lora_dpo.yaml +++ b/examples/train_lora/qwen3vl_lora_dpo.yaml @@ -1,5 +1,5 @@ ### model -model_name_or_path: Qwen/Qwen2.5-VL-7B-Instruct +model_name_or_path: Qwen/Qwen3-VL-4B-Instruct image_max_pixels: 262144 video_max_pixels: 16384 trust_remote_code: true @@ -15,15 +15,14 @@ pref_loss: sigmoid # choices: [sigmoid (dpo), orpo, simpo] ### dataset dataset: rlhf_v -template: qwen2_vl +template: qwen3_vl_nothink cutoff_len: 2048 max_samples: 1000 -overwrite_cache: true preprocessing_num_workers: 16 dataloader_num_workers: 4 ### output -output_dir: saves/qwen2_5vl-7b/lora/dpo +output_dir: saves/qwen3-vl-4b/lora/dpo logging_steps: 10 save_steps: 500 plot_loss: true diff --git a/examples/train_lora/qwen2_5vl_lora_sft.yaml b/examples/train_lora/qwen3vl_lora_sft.yaml similarity index 86% rename from examples/train_lora/qwen2_5vl_lora_sft.yaml rename to examples/train_lora/qwen3vl_lora_sft.yaml index 6177cfd5f..749bfe60c 100644 --- a/examples/train_lora/qwen2_5vl_lora_sft.yaml +++ b/examples/train_lora/qwen3vl_lora_sft.yaml @@ -1,5 +1,5 @@ ### model -model_name_or_path: Qwen/Qwen2.5-VL-7B-Instruct +model_name_or_path: Qwen/Qwen3-VL-4B-Instruct image_max_pixels: 262144 video_max_pixels: 16384 trust_remote_code: true @@ -13,15 +13,14 @@ lora_target: all ### dataset dataset: mllm_demo,identity,alpaca_en_demo # video: mllm_video_demo -template: qwen2_vl +template: qwen3_vl_nothink cutoff_len: 2048 max_samples: 1000 -overwrite_cache: true preprocessing_num_workers: 16 dataloader_num_workers: 4 ### output -output_dir: saves/qwen2_5vl-7b/lora/sft +output_dir: saves/qwen3-vl-4b/lora/sft logging_steps: 10 save_steps: 500 plot_loss: true diff --git a/examples/train_qlora/llama3_lora_sft_aqlm.yaml b/examples/train_qlora/llama3_lora_sft_aqlm.yaml index a7d44c7ea..16a0a4a2c 100644 --- a/examples/train_qlora/llama3_lora_sft_aqlm.yaml +++ b/examples/train_qlora/llama3_lora_sft_aqlm.yaml @@ -14,7 +14,6 @@ dataset: identity,alpaca_en_demo template: llama3 cutoff_len: 2048 max_samples: 1000 -overwrite_cache: true preprocessing_num_workers: 16 dataloader_num_workers: 4 diff --git a/examples/train_qlora/llama3_lora_sft_awq.yaml b/examples/train_qlora/llama3_lora_sft_awq.yaml index 861edfde3..9c57c6a13 100644 --- a/examples/train_qlora/llama3_lora_sft_awq.yaml +++ b/examples/train_qlora/llama3_lora_sft_awq.yaml @@ -14,7 +14,6 @@ dataset: identity,alpaca_en_demo template: llama3 cutoff_len: 2048 max_samples: 1000 -overwrite_cache: true preprocessing_num_workers: 16 dataloader_num_workers: 4 diff --git a/examples/train_qlora/llama3_lora_sft_gptq.yaml b/examples/train_qlora/llama3_lora_sft_gptq.yaml index 729d8628b..fd23e65c1 100644 --- a/examples/train_qlora/llama3_lora_sft_gptq.yaml +++ b/examples/train_qlora/llama3_lora_sft_gptq.yaml @@ -14,7 +14,6 @@ dataset: identity,alpaca_en_demo template: llama3 cutoff_len: 2048 max_samples: 1000 -overwrite_cache: true preprocessing_num_workers: 16 dataloader_num_workers: 4 diff --git a/examples/train_qlora/llama3_lora_sft_bnb_npu.yaml b/examples/train_qlora/qwen3_lora_sft_bnb_npu.yaml similarity index 86% rename from examples/train_qlora/llama3_lora_sft_bnb_npu.yaml rename to examples/train_qlora/qwen3_lora_sft_bnb_npu.yaml index d68ce665c..0301ee15a 100644 --- a/examples/train_qlora/llama3_lora_sft_bnb_npu.yaml +++ b/examples/train_qlora/qwen3_lora_sft_bnb_npu.yaml @@ -1,5 +1,5 @@ ### model -model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct +model_name_or_path: Qwen/Qwen3-4B-Instruct-2507 quantization_bit: 4 quantization_method: bnb double_quantization: false @@ -14,15 +14,14 @@ lora_target: all ### dataset dataset: identity,alpaca_en_demo -template: llama3 +template: qwen3_nothink cutoff_len: 2048 max_samples: 1000 -overwrite_cache: true preprocessing_num_workers: 16 dataloader_num_workers: 4 ### output -output_dir: saves/llama3-8b/lora/sft +output_dir: saves/qwen3-4b/lora/sft logging_steps: 10 save_steps: 500 plot_loss: true diff --git a/examples/train_qlora/llama3_lora_sft_otfq.yaml b/examples/train_qlora/qwen3_lora_sft_otfq.yaml similarity index 86% rename from examples/train_qlora/llama3_lora_sft_otfq.yaml rename to examples/train_qlora/qwen3_lora_sft_otfq.yaml index 1a157afec..3a0e3d457 100644 --- a/examples/train_qlora/llama3_lora_sft_otfq.yaml +++ b/examples/train_qlora/qwen3_lora_sft_otfq.yaml @@ -1,5 +1,5 @@ ### model -model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct +model_name_or_path: Qwen/Qwen3-4B-Instruct-2507 quantization_bit: 4 # choices: [8 (bnb/hqq/eetq), 4 (bnb/hqq), 3 (hqq), 2 (hqq)] quantization_method: bnb # choices: [bnb, hqq, eetq] trust_remote_code: true @@ -13,15 +13,14 @@ lora_target: all ### dataset dataset: identity,alpaca_en_demo -template: llama3 +template: qwen3_nothink cutoff_len: 2048 max_samples: 1000 -overwrite_cache: true preprocessing_num_workers: 16 dataloader_num_workers: 4 ### output -output_dir: saves/llama3-8b/lora/sft +output_dir: saves/qwen3-4b/lora/sft logging_steps: 10 save_steps: 500 plot_loss: true diff --git a/pyproject.toml b/pyproject.toml index a60f47606..ef03b82f8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,8 +41,7 @@ dependencies = [ "torch>=2.4.0", "torchvision>=0.19.0", "torchaudio>=2.4.0", - "transformers>=4.51.0,<=4.56.2,!=4.52.0; python_version < '3.10'", - "transformers>=4.51.0,<=4.57.1,!=4.52.0,!=4.57.0; python_version >= '3.10'", + "transformers>=4.51.0,<=4.57.1,!=4.52.0,!=4.57.0", "datasets>=2.16.0,<=4.0.0", "accelerate>=1.3.0,<=1.11.0", "peft>=0.14.0,<=0.17.1", diff --git a/scripts/vllm_infer.py b/scripts/vllm_infer.py index 4d74004e0..c794b7c7b 100644 --- a/scripts/vllm_infer.py +++ b/scripts/vllm_infer.py @@ -18,9 +18,10 @@ import time import av import fire +from datasets import load_dataset +from eval_bleu_rouge import compute_metrics from tqdm import tqdm from transformers import Seq2SeqTrainingArguments -from datasets import load_dataset from llamafactory.data import get_dataset, get_template_and_fix_tokenizer from llamafactory.extras.constants import IGNORE_INDEX @@ -29,8 +30,6 @@ from llamafactory.extras.packages import is_vllm_available from llamafactory.hparams import get_infer_args from llamafactory.model import load_tokenizer -from eval_bleu_rouge import compute_metrics - if is_vllm_available(): from vllm import LLM, SamplingParams @@ -235,10 +234,10 @@ def vllm_infer( print(f"{len(all_prompts)} total generated results have been saved at {save_name}.") print("*" * 70) - # Write all matrix results when matrix_save_name is not None, + # Write all matrix results when matrix_save_name is not None, # The result matrix is referencing src.llamafactory.train.sft.workflow.run_sft # 127~132 # trainer.save_metrics("predict", predict_results.metrics) - # + # # { # "predict_bleu-4": 4.349975, # "predict_model_preparation_time": 0.0128, @@ -265,11 +264,11 @@ def vllm_infer( print(f"predict_{task}: {score:.4f}") average_score["predict_" + task] = score - average_score['predict_model_preparation_time'] = preparation_time - average_score['predict_runtime'] = predict_time + average_score["predict_model_preparation_time"] = preparation_time + average_score["predict_runtime"] = predict_time num_steps = len(range(0, len(train_dataset), batch_size)) - average_score['predict_samples_per_second'] = len(dataset) / predict_time if predict_time > 0 else 0.0 - average_score['predict_steps_per_second'] = num_steps / predict_time if predict_time > 0 else 0.0 + average_score["predict_samples_per_second"] = len(dataset) / predict_time if predict_time > 0 else 0.0 + average_score["predict_steps_per_second"] = num_steps / predict_time if predict_time > 0 else 0.0 with open(matrix_save_name, "w", encoding="utf-8") as f: json.dump(average_score, f, indent=4) @@ -280,4 +279,4 @@ def vllm_infer( if __name__ == "__main__": - fire.Fire(vllm_infer) \ No newline at end of file + fire.Fire(vllm_infer) diff --git a/src/llamafactory/extras/env.py b/src/llamafactory/extras/env.py index d39b99453..d839ee924 100644 --- a/src/llamafactory/extras/env.py +++ b/src/llamafactory/extras/env.py @@ -19,7 +19,7 @@ from collections import OrderedDict -VERSION = "0.9.4.dev0" +VERSION = "0.9.4" def print_env() -> None: diff --git a/tests_v1/config/test_args_parser.py b/tests_v1/config/test_args_parser.py index 945e0e572..b39f4532c 100644 --- a/tests_v1/config/test_args_parser.py +++ b/tests_v1/config/test_args_parser.py @@ -12,8 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import pathlib +import sys from unittest.mock import patch from llamafactory.v1.config.arg_parser import get_args