mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2026-03-03 02:05:59 +08:00
Compare commits
2 Commits
c8d7e85b3e
...
v0.9.4
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
95ac3f2373 | ||
|
|
000526908a |
10
.github/workflows/tests.yml
vendored
10
.github/workflows/tests.yml
vendored
@@ -27,23 +27,23 @@ jobs:
|
|||||||
python:
|
python:
|
||||||
- "3.11"
|
- "3.11"
|
||||||
- "3.12"
|
- "3.12"
|
||||||
# - "3.13" # enable after trl is upgraded
|
- "3.13"
|
||||||
os:
|
os:
|
||||||
- "ubuntu-latest"
|
- "ubuntu-latest"
|
||||||
- "windows-latest"
|
- "windows-latest"
|
||||||
- "macos-latest"
|
- "macos-latest"
|
||||||
transformers:
|
transformers:
|
||||||
- null
|
- ""
|
||||||
include: # test backward compatibility
|
include: # test backward compatibility
|
||||||
- python: "3.11"
|
|
||||||
os: "ubuntu-latest"
|
|
||||||
transformers: "4.49.0"
|
|
||||||
- python: "3.11"
|
- python: "3.11"
|
||||||
os: "ubuntu-latest"
|
os: "ubuntu-latest"
|
||||||
transformers: "4.51.0"
|
transformers: "4.51.0"
|
||||||
- python: "3.11"
|
- python: "3.11"
|
||||||
os: "ubuntu-latest"
|
os: "ubuntu-latest"
|
||||||
transformers: "4.53.0"
|
transformers: "4.53.0"
|
||||||
|
- python: "3.11"
|
||||||
|
os: "ubuntu-latest"
|
||||||
|
transformers: "4.55.0"
|
||||||
|
|
||||||
runs-on: ${{ matrix.os }}
|
runs-on: ${{ matrix.os }}
|
||||||
|
|
||||||
|
|||||||
12
README.md
12
README.md
@@ -639,7 +639,7 @@ cd transformers
|
|||||||
pip install .
|
pip install .
|
||||||
```
|
```
|
||||||
|
|
||||||
3. Set `double_quantization: false` in the configuration. You can refer to the [example](examples/train_qlora/llama3_lora_sft_bnb_npu.yaml).
|
3. Set `double_quantization: false` in the configuration. You can refer to the [example](examples/train_qlora/qwen3_lora_sft_bnb_npu.yaml).
|
||||||
|
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
@@ -654,12 +654,12 @@ You can also use **[Easy Dataset](https://github.com/ConardLi/easy-dataset)**, *
|
|||||||
|
|
||||||
### Quickstart
|
### Quickstart
|
||||||
|
|
||||||
Use the following 3 commands to run LoRA **fine-tuning**, **inference** and **merging** of the Llama3-8B-Instruct model, respectively.
|
Use the following 3 commands to run LoRA **fine-tuning**, **inference** and **merging** of the Qwen3-4B-Instruct model, respectively.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
|
llamafactory-cli train examples/train_lora/qwen3_lora_sft.yaml
|
||||||
llamafactory-cli chat examples/inference/llama3_lora_sft.yaml
|
llamafactory-cli chat examples/inference/qwen3_lora_sft.yaml
|
||||||
llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml
|
llamafactory-cli export examples/merge_lora/qwen3_lora_sft.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
See [examples/README.md](examples/README.md) for advanced usage (including distributed training).
|
See [examples/README.md](examples/README.md) for advanced usage (including distributed training).
|
||||||
@@ -782,7 +782,7 @@ When building the Docker image, use `-v ./hf_cache:/root/.cache/huggingface` arg
|
|||||||
### Deploy with OpenAI-style API and vLLM
|
### Deploy with OpenAI-style API and vLLM
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
API_PORT=8000 llamafactory-cli api examples/inference/llama3.yaml infer_backend=vllm vllm_enforce_eager=true
|
API_PORT=8000 llamafactory-cli api examples/inference/qwen3.yaml infer_backend=vllm vllm_enforce_eager=true
|
||||||
```
|
```
|
||||||
|
|
||||||
> [!TIP]
|
> [!TIP]
|
||||||
|
|||||||
12
README_zh.md
12
README_zh.md
@@ -641,7 +641,7 @@ cd transformers
|
|||||||
pip install .
|
pip install .
|
||||||
```
|
```
|
||||||
|
|
||||||
3. 在训练参数中设置 `double_quantization: false`,可参考[示例](examples/train_qlora/llama3_lora_sft_bnb_npu.yaml)。
|
3. 在训练参数中设置 `double_quantization: false`,可参考[示例](examples/train_qlora/qwen3_lora_sft_bnb_npu.yaml)。
|
||||||
|
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
@@ -656,12 +656,12 @@ pip install .
|
|||||||
|
|
||||||
### 快速开始
|
### 快速开始
|
||||||
|
|
||||||
下面三行命令分别对 Llama3-8B-Instruct 模型进行 LoRA **微调**、**推理**和**合并**。
|
下面三行命令分别对 Qwen3-4B-Instruct 模型进行 LoRA **微调**、**推理**和**合并**。
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
|
llamafactory-cli train examples/train_lora/qwen3_lora_sft.yaml
|
||||||
llamafactory-cli chat examples/inference/llama3_lora_sft.yaml
|
llamafactory-cli chat examples/inference/qwen3_lora_sft.yaml
|
||||||
llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml
|
llamafactory-cli export examples/merge_lora/qwen3_lora_sft.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
高级用法请参考 [examples/README_zh.md](examples/README_zh.md)(包括多 GPU 微调)。
|
高级用法请参考 [examples/README_zh.md](examples/README_zh.md)(包括多 GPU 微调)。
|
||||||
@@ -787,7 +787,7 @@ docker exec -it llamafactory bash
|
|||||||
### 利用 vLLM 部署 OpenAI API
|
### 利用 vLLM 部署 OpenAI API
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
API_PORT=8000 llamafactory-cli api examples/inference/llama3.yaml infer_backend=vllm vllm_enforce_eager=true
|
API_PORT=8000 llamafactory-cli api examples/inference/qwen3.yaml infer_backend=vllm vllm_enforce_eager=true
|
||||||
```
|
```
|
||||||
|
|
||||||
> [!TIP]
|
> [!TIP]
|
||||||
|
|||||||
@@ -18,19 +18,19 @@ By default, LLaMA-Factory uses all visible computing devices.
|
|||||||
Basic usage:
|
Basic usage:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
|
llamafactory-cli train examples/train_lora/qwen3_lora_sft.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
Advanced usage:
|
Advanced usage:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
CUDA_VISIBLE_DEVICES=0,1 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml \
|
CUDA_VISIBLE_DEVICES=0,1 llamafactory-cli train examples/train_lora/qwen3_lora_sft.yaml \
|
||||||
learning_rate=1e-5 \
|
learning_rate=1e-5 \
|
||||||
logging_steps=1
|
logging_steps=1
|
||||||
```
|
```
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
bash examples/train_lora/llama3_lora_sft.sh
|
bash examples/train_lora/qwen3_lora_sft.sh
|
||||||
```
|
```
|
||||||
|
|
||||||
## Examples
|
## Examples
|
||||||
@@ -40,49 +40,43 @@ bash examples/train_lora/llama3_lora_sft.sh
|
|||||||
#### (Continuous) Pre-Training
|
#### (Continuous) Pre-Training
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llamafactory-cli train examples/train_lora/llama3_lora_pretrain.yaml
|
llamafactory-cli train examples/train_lora/qwen3_lora_pretrain.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Supervised Fine-Tuning
|
#### Supervised Fine-Tuning
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
|
llamafactory-cli train examples/train_lora/qwen3_lora_sft.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Multimodal Supervised Fine-Tuning
|
#### Multimodal Supervised Fine-Tuning
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llamafactory-cli train examples/train_lora/qwen2_5vl_lora_sft.yaml
|
llamafactory-cli train examples/train_lora/qwen3vl_lora_sft.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
#### DPO/ORPO/SimPO Training
|
#### DPO/ORPO/SimPO Training
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llamafactory-cli train examples/train_lora/llama3_lora_dpo.yaml
|
llamafactory-cli train examples/train_lora/qwen3_lora_dpo.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Multimodal DPO/ORPO/SimPO Training
|
#### Multimodal DPO/ORPO/SimPO Training
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llamafactory-cli train examples/train_lora/qwen2_5vl_lora_dpo.yaml
|
llamafactory-cli train examples/train_lora/qwen3vl_lora_dpo.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Reward Modeling
|
#### Reward Modeling
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llamafactory-cli train examples/train_lora/llama3_lora_reward.yaml
|
llamafactory-cli train examples/train_lora/qwen3_lora_reward.yaml
|
||||||
```
|
|
||||||
|
|
||||||
#### PPO Training
|
|
||||||
|
|
||||||
```bash
|
|
||||||
llamafactory-cli train examples/train_lora/llama3_lora_ppo.yaml
|
|
||||||
```
|
```
|
||||||
|
|
||||||
#### KTO Training
|
#### KTO Training
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llamafactory-cli train examples/train_lora/llama3_lora_kto.yaml
|
llamafactory-cli train examples/train_lora/qwen3_lora_kto.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Preprocess Dataset
|
#### Preprocess Dataset
|
||||||
@@ -90,32 +84,26 @@ llamafactory-cli train examples/train_lora/llama3_lora_kto.yaml
|
|||||||
It is useful for large dataset, use `tokenized_path` in config to load the preprocessed dataset.
|
It is useful for large dataset, use `tokenized_path` in config to load the preprocessed dataset.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llamafactory-cli train examples/train_lora/llama3_preprocess.yaml
|
llamafactory-cli train examples/train_lora/qwen3_preprocess.yaml
|
||||||
```
|
|
||||||
|
|
||||||
#### Evaluating on MMLU/CMMLU/C-Eval Benchmarks
|
|
||||||
|
|
||||||
```bash
|
|
||||||
llamafactory-cli eval examples/train_lora/llama3_lora_eval.yaml
|
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Supervised Fine-Tuning on Multiple Nodes
|
#### Supervised Fine-Tuning on Multiple Nodes
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
|
FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/qwen3_lora_sft.yaml
|
||||||
FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
|
FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/qwen3_lora_sft.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Supervised Fine-Tuning with DeepSpeed ZeRO-3 (Weight Sharding)
|
#### Supervised Fine-Tuning with DeepSpeed ZeRO-3 (Weight Sharding)
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
FORCE_TORCHRUN=1 llamafactory-cli train examples/train_lora/llama3_lora_sft_ds3.yaml
|
FORCE_TORCHRUN=1 llamafactory-cli train examples/train_lora/qwen3_lora_sft_ds3.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Supervised Fine-Tuning with Ray on 4 GPUs
|
#### Supervised Fine-Tuning with Ray on 4 GPUs
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
USE_RAY=1 llamafactory-cli train examples/train_lora/llama3_lora_sft_ray.yaml
|
USE_RAY=1 llamafactory-cli train examples/train_lora/qwen3_lora_sft_ray.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
### QLoRA Fine-Tuning
|
### QLoRA Fine-Tuning
|
||||||
@@ -123,13 +111,13 @@ USE_RAY=1 llamafactory-cli train examples/train_lora/llama3_lora_sft_ray.yaml
|
|||||||
#### Supervised Fine-Tuning with 4/8-bit Bitsandbytes/HQQ/EETQ Quantization (Recommended)
|
#### Supervised Fine-Tuning with 4/8-bit Bitsandbytes/HQQ/EETQ Quantization (Recommended)
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llamafactory-cli train examples/train_qlora/llama3_lora_sft_otfq.yaml
|
llamafactory-cli train examples/train_qlora/qwen3_lora_sft_otfq.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Supervised Fine-Tuning with 4-bit Bitsandbytes Quantization on Ascend NPU
|
#### Supervised Fine-Tuning with 4-bit Bitsandbytes Quantization on Ascend NPU
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llamafactory-cli train examples/train_qlora/llama3_lora_sft_bnb_npu.yaml
|
llamafactory-cli train examples/train_qlora/qwen3_lora_sft_bnb_npu.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Supervised Fine-Tuning with 4/8-bit GPTQ Quantization
|
#### Supervised Fine-Tuning with 4/8-bit GPTQ Quantization
|
||||||
@@ -155,14 +143,14 @@ llamafactory-cli train examples/train_qlora/llama3_lora_sft_aqlm.yaml
|
|||||||
#### Supervised Fine-Tuning on Single Node
|
#### Supervised Fine-Tuning on Single Node
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/llama3_full_sft.yaml
|
FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/qwen3_full_sft.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Supervised Fine-Tuning on Multiple Nodes
|
#### Supervised Fine-Tuning on Multiple Nodes
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft.yaml
|
FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/qwen3_full_sft.yaml
|
||||||
FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft.yaml
|
FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/qwen3_full_sft.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
### Elastic and Fault-Tolerant Supervised Fine-Tuning on Multiple Nodes
|
### Elastic and Fault-Tolerant Supervised Fine-Tuning on Multiple Nodes
|
||||||
@@ -170,13 +158,13 @@ FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500
|
|||||||
To launch an elastic job with `MAX_RESTARTS` failures retries, run the following on at least `MIN_NNODES` nodes and at most `MAX_NNODES` nodes. `RDZV_ID` should be set as a unique job id (shared by all nodes participating in the job). See also [torchrun](https://docs.pytorch.org/docs/stable/elastic/run.html).
|
To launch an elastic job with `MAX_RESTARTS` failures retries, run the following on at least `MIN_NNODES` nodes and at most `MAX_NNODES` nodes. `RDZV_ID` should be set as a unique job id (shared by all nodes participating in the job). See also [torchrun](https://docs.pytorch.org/docs/stable/elastic/run.html).
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
FORCE_TORCHRUN=1 MIN_NNODES=1 MAX_NNODES=3 MAX_RESTARTS=3 RDZV_ID=llamafactory MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft.yaml
|
FORCE_TORCHRUN=1 MIN_NNODES=1 MAX_NNODES=3 MAX_RESTARTS=3 RDZV_ID=llamafactory MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/qwen3_full_sft.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Multimodal Supervised Fine-Tuning
|
#### Multimodal Supervised Fine-Tuning
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/qwen2_5vl_full_sft.yaml
|
FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/qwen3vl_full_sft.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
### Merging LoRA Adapters and Quantization
|
### Merging LoRA Adapters and Quantization
|
||||||
@@ -186,19 +174,19 @@ FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/qwen2_5vl_full_sft.y
|
|||||||
Note: DO NOT use quantized model or `quantization_bit` when merging LoRA adapters.
|
Note: DO NOT use quantized model or `quantization_bit` when merging LoRA adapters.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml
|
llamafactory-cli export examples/merge_lora/qwen3_lora_sft.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Quantizing Model using AutoGPTQ
|
#### Quantizing Model using AutoGPTQ
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llamafactory-cli export examples/merge_lora/llama3_gptq.yaml
|
llamafactory-cli export examples/merge_lora/qwen3_gptq.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
### Save Ollama modelfile
|
### Save Ollama modelfile
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llamafactory-cli export examples/merge_lora/llama3_full_sft.yaml
|
llamafactory-cli export examples/merge_lora/qwen3_full_sft.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
### Inferring LoRA Fine-Tuned Models
|
### Inferring LoRA Fine-Tuned Models
|
||||||
@@ -206,26 +194,26 @@ llamafactory-cli export examples/merge_lora/llama3_full_sft.yaml
|
|||||||
#### Evaluation using vLLM's Multi-GPU Inference
|
#### Evaluation using vLLM's Multi-GPU Inference
|
||||||
|
|
||||||
```
|
```
|
||||||
python scripts/vllm_infer.py --model_name_or_path meta-llama/Meta-Llama-3-8B-Instruct --template llama3 --dataset alpaca_en_demo
|
python scripts/vllm_infer.py --model_name_or_path Qwen/Qwen3-4B-Instruct-2507 --template qwen3_nothink --dataset alpaca_en_demo
|
||||||
python scripts/eval_bleu_rouge.py generated_predictions.jsonl
|
python scripts/eval_bleu_rouge.py generated_predictions.jsonl
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Use CLI ChatBox
|
#### Use CLI ChatBox
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llamafactory-cli chat examples/inference/llama3_lora_sft.yaml
|
llamafactory-cli chat examples/inference/qwen3_lora_sft.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Use Web UI ChatBox
|
#### Use Web UI ChatBox
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llamafactory-cli webchat examples/inference/llama3_lora_sft.yaml
|
llamafactory-cli webchat examples/inference/qwen3_lora_sft.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Launch OpenAI-style API
|
#### Launch OpenAI-style API
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llamafactory-cli api examples/inference/llama3_lora_sft.yaml
|
llamafactory-cli api examples/inference/qwen3_lora_sft.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
### Extras
|
### Extras
|
||||||
|
|||||||
@@ -18,19 +18,19 @@ LLaMA-Factory 默认使用所有可见的计算设备。
|
|||||||
基础用法:
|
基础用法:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
|
llamafactory-cli train examples/train_lora/qwen3_lora_sft.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
高级用法:
|
高级用法:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
CUDA_VISIBLE_DEVICES=0,1 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml \
|
CUDA_VISIBLE_DEVICES=0,1 llamafactory-cli train examples/train_lora/qwen3_lora_sft.yaml \
|
||||||
learning_rate=1e-5 \
|
learning_rate=1e-5 \
|
||||||
logging_steps=1
|
logging_steps=1
|
||||||
```
|
```
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
bash examples/train_lora/llama3_lora_sft.sh
|
bash examples/train_lora/qwen3_lora_sft.sh
|
||||||
```
|
```
|
||||||
|
|
||||||
## 示例
|
## 示例
|
||||||
@@ -40,49 +40,43 @@ bash examples/train_lora/llama3_lora_sft.sh
|
|||||||
#### (增量)预训练
|
#### (增量)预训练
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llamafactory-cli train examples/train_lora/llama3_lora_pretrain.yaml
|
llamafactory-cli train examples/train_lora/qwen3_lora_pretrain.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
#### 指令监督微调
|
#### 指令监督微调
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
|
llamafactory-cli train examples/train_lora/qwen3_lora_sft.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
#### 多模态指令监督微调
|
#### 多模态指令监督微调
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llamafactory-cli train examples/train_lora/qwen2_5vl_lora_sft.yaml
|
llamafactory-cli train examples/train_lora/qwen3vl_lora_sft.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
#### DPO/ORPO/SimPO 训练
|
#### DPO/ORPO/SimPO 训练
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llamafactory-cli train examples/train_lora/llama3_lora_dpo.yaml
|
llamafactory-cli train examples/train_lora/qwen3_lora_dpo.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
#### 多模态 DPO/ORPO/SimPO 训练
|
#### 多模态 DPO/ORPO/SimPO 训练
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llamafactory-cli train examples/train_lora/qwen2_5vl_lora_dpo.yaml
|
llamafactory-cli train examples/train_lora/qwen3vl_lora_dpo.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
#### 奖励模型训练
|
#### 奖励模型训练
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llamafactory-cli train examples/train_lora/llama3_lora_reward.yaml
|
llamafactory-cli train examples/train_lora/qwen3_lora_reward.yaml
|
||||||
```
|
|
||||||
|
|
||||||
#### PPO 训练
|
|
||||||
|
|
||||||
```bash
|
|
||||||
llamafactory-cli train examples/train_lora/llama3_lora_ppo.yaml
|
|
||||||
```
|
```
|
||||||
|
|
||||||
#### KTO 训练
|
#### KTO 训练
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llamafactory-cli train examples/train_lora/llama3_lora_kto.yaml
|
llamafactory-cli train examples/train_lora/qwen3_lora_kto.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
#### 预处理数据集
|
#### 预处理数据集
|
||||||
@@ -90,20 +84,14 @@ llamafactory-cli train examples/train_lora/llama3_lora_kto.yaml
|
|||||||
对于大数据集有帮助,在配置中使用 `tokenized_path` 以加载预处理后的数据集。
|
对于大数据集有帮助,在配置中使用 `tokenized_path` 以加载预处理后的数据集。
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llamafactory-cli train examples/train_lora/llama3_preprocess.yaml
|
llamafactory-cli train examples/train_lora/qwen3_preprocess.yaml
|
||||||
```
|
|
||||||
|
|
||||||
#### 在 MMLU/CMMLU/C-Eval 上评估
|
|
||||||
|
|
||||||
```bash
|
|
||||||
llamafactory-cli eval examples/train_lora/llama3_lora_eval.yaml
|
|
||||||
```
|
```
|
||||||
|
|
||||||
#### 多机指令监督微调
|
#### 多机指令监督微调
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
|
FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/qwen3_lora_sft.yaml
|
||||||
FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
|
FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/qwen3_lora_sft.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
### 支持弹性和容错的多机指令监督微调
|
### 支持弹性和容错的多机指令监督微调
|
||||||
@@ -111,19 +99,19 @@ FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500
|
|||||||
要启动一个支持弹性节点和容错的多机指令微调,在每个节点上执行以下命令。弹性节点数量范围为 `MIN_NNODES:MAX_NNODES`,每个节点最多允许因为错误重启 `MAX_RESTARTS` 次。`RDZV_ID` 应设置为一个唯一的作业 ID(由参与该作业的所有节点共享)。更多新可以参考官方文档 [torchrun](https://docs.pytorch.org/docs/stable/elastic/run.html)。
|
要启动一个支持弹性节点和容错的多机指令微调,在每个节点上执行以下命令。弹性节点数量范围为 `MIN_NNODES:MAX_NNODES`,每个节点最多允许因为错误重启 `MAX_RESTARTS` 次。`RDZV_ID` 应设置为一个唯一的作业 ID(由参与该作业的所有节点共享)。更多新可以参考官方文档 [torchrun](https://docs.pytorch.org/docs/stable/elastic/run.html)。
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
FORCE_TORCHRUN=1 MIN_NNODES=1 MAX_NNODES=3 MAX_RESTARTS=3 RDZV_ID=llamafactory MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft.yaml
|
FORCE_TORCHRUN=1 MIN_NNODES=1 MAX_NNODES=3 MAX_RESTARTS=3 RDZV_ID=llamafactory MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/qwen3_full_sft.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
#### 使用 DeepSpeed ZeRO-3 平均分配显存
|
#### 使用 DeepSpeed ZeRO-3 平均分配显存
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
FORCE_TORCHRUN=1 llamafactory-cli train examples/train_lora/llama3_lora_sft_ds3.yaml
|
FORCE_TORCHRUN=1 llamafactory-cli train examples/train_lora/qwen3_lora_sft_ds3.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
#### 使用 Ray 在 4 张 GPU 上微调
|
#### 使用 Ray 在 4 张 GPU 上微调
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
USE_RAY=1 llamafactory-cli train examples/train_lora/llama3_lora_sft_ray.yaml
|
USE_RAY=1 llamafactory-cli train examples/train_lora/qwen3_lora_sft_ray.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
### QLoRA 微调
|
### QLoRA 微调
|
||||||
@@ -131,13 +119,13 @@ USE_RAY=1 llamafactory-cli train examples/train_lora/llama3_lora_sft_ray.yaml
|
|||||||
#### 基于 4/8 比特 Bitsandbytes/HQQ/EETQ 量化进行指令监督微调(推荐)
|
#### 基于 4/8 比特 Bitsandbytes/HQQ/EETQ 量化进行指令监督微调(推荐)
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llamafactory-cli train examples/train_qlora/llama3_lora_sft_otfq.yaml
|
llamafactory-cli train examples/train_qlora/qwen3_lora_sft_otfq.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
#### 在 NPU 上基于 4 比特 Bitsandbytes 量化进行指令监督微调
|
#### 在 NPU 上基于 4 比特 Bitsandbytes 量化进行指令监督微调
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llamafactory-cli train examples/train_qlora/llama3_lora_sft_bnb_npu.yaml
|
llamafactory-cli train examples/train_qlora/qwen3_lora_sft_bnb_npu.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
#### 基于 4/8 比特 GPTQ 量化进行指令监督微调
|
#### 基于 4/8 比特 GPTQ 量化进行指令监督微调
|
||||||
@@ -163,20 +151,20 @@ llamafactory-cli train examples/train_qlora/llama3_lora_sft_aqlm.yaml
|
|||||||
#### 在单机上进行指令监督微调
|
#### 在单机上进行指令监督微调
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/llama3_full_sft.yaml
|
FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/qwen3_full_sft.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
#### 在多机上进行指令监督微调
|
#### 在多机上进行指令监督微调
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft.yaml
|
FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/qwen3_full_sft.yaml
|
||||||
FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft.yaml
|
FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/qwen3_full_sft.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
#### 多模态指令监督微调
|
#### 多模态指令监督微调
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/qwen2_5vl_full_sft.yaml
|
FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/qwen3vl_full_sft.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
### 合并 LoRA 适配器与模型量化
|
### 合并 LoRA 适配器与模型量化
|
||||||
@@ -186,19 +174,19 @@ FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/qwen2_5vl_full_sft.y
|
|||||||
注:请勿使用量化后的模型或 `quantization_bit` 参数来合并 LoRA 适配器。
|
注:请勿使用量化后的模型或 `quantization_bit` 参数来合并 LoRA 适配器。
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml
|
llamafactory-cli export examples/merge_lora/qwen3_lora_sft.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
#### 使用 AutoGPTQ 量化模型
|
#### 使用 AutoGPTQ 量化模型
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llamafactory-cli export examples/merge_lora/llama3_gptq.yaml
|
llamafactory-cli export examples/merge_lora/qwen3_gptq.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
### 保存 Ollama 配置文件
|
### 保存 Ollama 配置文件
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llamafactory-cli export examples/merge_lora/llama3_full_sft.yaml
|
llamafactory-cli export examples/merge_lora/qwen3_full_sft.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
### 推理 LoRA 模型
|
### 推理 LoRA 模型
|
||||||
@@ -206,26 +194,26 @@ llamafactory-cli export examples/merge_lora/llama3_full_sft.yaml
|
|||||||
#### 使用 vLLM 多卡推理评估
|
#### 使用 vLLM 多卡推理评估
|
||||||
|
|
||||||
```
|
```
|
||||||
python scripts/vllm_infer.py --model_name_or_path meta-llama/Meta-Llama-3-8B-Instruct --template llama3 --dataset alpaca_en_demo
|
python scripts/vllm_infer.py --model_name_or_path Qwen/Qwen3-4B-Instruct-2507 --template qwen3_nothink --dataset alpaca_en_demo
|
||||||
python scripts/eval_bleu_rouge.py generated_predictions.jsonl
|
python scripts/eval_bleu_rouge.py generated_predictions.jsonl
|
||||||
```
|
```
|
||||||
|
|
||||||
#### 使用命令行对话框
|
#### 使用命令行对话框
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llamafactory-cli chat examples/inference/llama3_lora_sft.yaml
|
llamafactory-cli chat examples/inference/qwen3_lora_sft.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
#### 使用浏览器对话框
|
#### 使用浏览器对话框
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llamafactory-cli webchat examples/inference/llama3_lora_sft.yaml
|
llamafactory-cli webchat examples/inference/qwen3_lora_sft.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
#### 启动 OpenAI 风格 API
|
#### 启动 OpenAI 风格 API
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llamafactory-cli api examples/inference/llama3_lora_sft.yaml
|
llamafactory-cli api examples/inference/qwen3_lora_sft.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
### 杂项
|
### 杂项
|
||||||
|
|||||||
@@ -1,5 +0,0 @@
|
|||||||
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
|
||||||
adapter_name_or_path: saves/llama3-8b/lora/sft
|
|
||||||
template: llama3
|
|
||||||
infer_backend: huggingface # choices: [huggingface, vllm, sglang, ktransformers]
|
|
||||||
trust_remote_code: true
|
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
model_name_or_path: Qwen/Qwen2.5-VL-7B-Instruct
|
model_name_or_path: Qwen/Qwen3-4B-Instruct-2507
|
||||||
template: qwen2_vl
|
template: qwen3_nothink
|
||||||
infer_backend: huggingface # choices: [huggingface, vllm, sglang, ktransformers]
|
infer_backend: huggingface # choices: [huggingface, vllm, sglang, ktransformers]
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
model_name_or_path: saves/llama3-8b/full/sft
|
model_name_or_path: saves/qwen3-4b/full/sft
|
||||||
template: llama3
|
template: qwen3_nothink
|
||||||
infer_backend: huggingface # choices: [huggingface, vllm, sglang, ktransformers]
|
infer_backend: huggingface # choices: [huggingface, vllm, sglang, ktransformers]
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
5
examples/inference/qwen3_lora_sft.yaml
Normal file
5
examples/inference/qwen3_lora_sft.yaml
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
model_name_or_path: Qwen/Qwen3-4B-Instruct-2507
|
||||||
|
adapter_name_or_path: saves/qwen3-4b/lora/sft
|
||||||
|
template: qwen3_nothink
|
||||||
|
infer_backend: huggingface # choices: [huggingface, vllm, sglang, ktransformers]
|
||||||
|
trust_remote_code: true
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
model_name_or_path: Qwen/Qwen3-VL-4B-Instruct
|
||||||
template: llama3
|
template: qwen3_vl_nothink
|
||||||
infer_backend: huggingface # choices: [huggingface, vllm, sglang, ktransformers]
|
infer_backend: huggingface # choices: [huggingface, vllm, sglang, ktransformers]
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
@@ -1,10 +1,10 @@
|
|||||||
### model
|
### model
|
||||||
model_name_or_path: saves/llama3-8b/full/sft
|
model_name_or_path: saves/qwen3-4b/full/sft
|
||||||
template: llama3
|
template: qwen3_nothink
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
### export
|
### export
|
||||||
export_dir: output/llama3_full_sft
|
export_dir: saves/qwen3_sft_merged
|
||||||
export_size: 5
|
export_size: 5
|
||||||
export_device: cpu # choices: [cpu, auto]
|
export_device: cpu # choices: [cpu, auto]
|
||||||
export_legacy_format: false
|
export_legacy_format: false
|
||||||
@@ -1,10 +1,10 @@
|
|||||||
### model
|
### model
|
||||||
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
model_name_or_path: Qwen/Qwen3-4B-Instruct-2507
|
||||||
template: llama3
|
template: qwen3_nothink
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
### export
|
### export
|
||||||
export_dir: output/llama3_gptq
|
export_dir: saves/qwen3_gptq
|
||||||
export_quantization_bit: 4
|
export_quantization_bit: 4
|
||||||
export_quantization_dataset: data/c4_demo.jsonl
|
export_quantization_dataset: data/c4_demo.jsonl
|
||||||
export_size: 5
|
export_size: 5
|
||||||
@@ -1,13 +1,13 @@
|
|||||||
### Note: DO NOT use quantized model or quantization_bit when merging lora adapters
|
### Note: DO NOT use quantized model or quantization_bit when merging lora adapters
|
||||||
|
|
||||||
### model
|
### model
|
||||||
model_name_or_path: Qwen/Qwen2.5-VL-7B-Instruct
|
model_name_or_path: Qwen/Qwen3-4B-Instruct-2507
|
||||||
adapter_name_or_path: saves/qwen2_5vl-7b/lora/sft
|
adapter_name_or_path: saves/qwen3-4b/lora/sft
|
||||||
template: qwen2_vl
|
template: qwen3_nothink
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
### export
|
### export
|
||||||
export_dir: output/qwen2_5vl_lora_sft
|
export_dir: saves/qwen3_sft_merged
|
||||||
export_size: 5
|
export_size: 5
|
||||||
export_device: cpu # choices: [cpu, auto]
|
export_device: cpu # choices: [cpu, auto]
|
||||||
export_legacy_format: false
|
export_legacy_format: false
|
||||||
@@ -1,13 +1,13 @@
|
|||||||
### Note: DO NOT use quantized model or quantization_bit when merging lora adapters
|
### Note: DO NOT use quantized model or quantization_bit when merging lora adapters
|
||||||
|
|
||||||
### model
|
### model
|
||||||
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
model_name_or_path: Qwen/Qwen3-VL-4B-Instruct
|
||||||
adapter_name_or_path: saves/llama3-8b/lora/sft
|
adapter_name_or_path: saves/qwen3-vl-4b/lora/sft
|
||||||
template: llama3
|
template: qwen3_vl_nothink
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
### export
|
### export
|
||||||
export_dir: output/llama3_lora_sft
|
export_dir: saves/qwen3_vl_sft_merged
|
||||||
export_size: 5
|
export_size: 5
|
||||||
export_device: cpu # choices: [cpu, auto]
|
export_device: cpu # choices: [cpu, auto]
|
||||||
export_legacy_format: false
|
export_legacy_format: false
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
### model
|
### model
|
||||||
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
model_name_or_path: Qwen/Qwen3-4B-Instruct-2507
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
### method
|
### method
|
||||||
@@ -10,15 +10,14 @@ deepspeed: examples/deepspeed/ds_z3_config.json # choices: [ds_z0_config.json,
|
|||||||
|
|
||||||
### dataset
|
### dataset
|
||||||
dataset: identity,alpaca_en_demo
|
dataset: identity,alpaca_en_demo
|
||||||
template: llama3
|
template: qwen3_nothink
|
||||||
cutoff_len: 2048
|
cutoff_len: 2048
|
||||||
max_samples: 1000
|
max_samples: 1000
|
||||||
overwrite_cache: true
|
|
||||||
preprocessing_num_workers: 16
|
preprocessing_num_workers: 16
|
||||||
dataloader_num_workers: 4
|
dataloader_num_workers: 4
|
||||||
|
|
||||||
### output
|
### output
|
||||||
output_dir: saves/llama3-8b/full/sft
|
output_dir: saves/qwen3-4b/full/sft
|
||||||
logging_steps: 10
|
logging_steps: 10
|
||||||
save_steps: 500
|
save_steps: 500
|
||||||
plot_loss: true
|
plot_loss: true
|
||||||
@@ -1,46 +0,0 @@
|
|||||||
### model
|
|
||||||
model_name_or_path: Qwen/Qwen3-32B
|
|
||||||
trust_remote_code: true
|
|
||||||
use_v1_kernels: true
|
|
||||||
|
|
||||||
### method
|
|
||||||
stage: sft
|
|
||||||
do_train: true
|
|
||||||
finetuning_type: full
|
|
||||||
deepspeed: examples/deepspeed/ds_z2_autotp_config.json
|
|
||||||
|
|
||||||
### dataset
|
|
||||||
dataset: identity,alpaca_en_demo
|
|
||||||
template: qwen3
|
|
||||||
cutoff_len: 2048
|
|
||||||
max_samples: 1000
|
|
||||||
overwrite_cache: true
|
|
||||||
preprocessing_num_workers: 16
|
|
||||||
dataloader_num_workers: 4
|
|
||||||
|
|
||||||
### output
|
|
||||||
output_dir: saves/qwen3-32b/full/sft_autotp
|
|
||||||
logging_steps: 1
|
|
||||||
save_steps: 500
|
|
||||||
plot_loss: true
|
|
||||||
overwrite_output_dir: true
|
|
||||||
save_only_model: false
|
|
||||||
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
|
|
||||||
|
|
||||||
### train
|
|
||||||
per_device_train_batch_size: 4
|
|
||||||
gradient_accumulation_steps: 1
|
|
||||||
learning_rate: 1.0e-4
|
|
||||||
num_train_epochs: 3.0
|
|
||||||
lr_scheduler_type: cosine
|
|
||||||
warmup_ratio: 0.1
|
|
||||||
bf16: true
|
|
||||||
ddp_timeout: 180000000
|
|
||||||
resume_from_checkpoint: null
|
|
||||||
|
|
||||||
### eval
|
|
||||||
# eval_dataset: alpaca_en_demo
|
|
||||||
# val_size: 0.1
|
|
||||||
# per_device_eval_batch_size: 1
|
|
||||||
# eval_strategy: steps
|
|
||||||
# eval_steps: 500
|
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
### model
|
### model
|
||||||
model_name_or_path: Qwen/Qwen2.5-VL-7B-Instruct
|
model_name_or_path: Qwen/Qwen3-VL-4B-Instruct
|
||||||
image_max_pixels: 262144
|
image_max_pixels: 262144
|
||||||
video_max_pixels: 16384
|
video_max_pixels: 16384
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
@@ -15,15 +15,14 @@ deepspeed: examples/deepspeed/ds_z3_config.json
|
|||||||
|
|
||||||
### dataset
|
### dataset
|
||||||
dataset: mllm_demo,identity,alpaca_en_demo
|
dataset: mllm_demo,identity,alpaca_en_demo
|
||||||
template: qwen2_vl
|
template: qwen3_vl_nothink
|
||||||
cutoff_len: 2048
|
cutoff_len: 2048
|
||||||
max_samples: 1000
|
max_samples: 1000
|
||||||
overwrite_cache: true
|
|
||||||
preprocessing_num_workers: 16
|
preprocessing_num_workers: 16
|
||||||
dataloader_num_workers: 4
|
dataloader_num_workers: 4
|
||||||
|
|
||||||
### output
|
### output
|
||||||
output_dir: saves/qwen2_5vl-7b/full/sft
|
output_dir: saves/qwen3-vl-4b/full/sft
|
||||||
logging_steps: 10
|
logging_steps: 10
|
||||||
save_steps: 500
|
save_steps: 500
|
||||||
plot_loss: true
|
plot_loss: true
|
||||||
@@ -1,19 +0,0 @@
|
|||||||
### model
|
|
||||||
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
|
||||||
adapter_name_or_path: saves/llama3-8b/lora/sft
|
|
||||||
trust_remote_code: true
|
|
||||||
|
|
||||||
### method
|
|
||||||
finetuning_type: lora
|
|
||||||
|
|
||||||
### dataset
|
|
||||||
task: mmlu_test # choices: [mmlu_test, ceval_validation, cmmlu_test]
|
|
||||||
template: fewshot
|
|
||||||
lang: en
|
|
||||||
n_shot: 5
|
|
||||||
|
|
||||||
### output
|
|
||||||
save_dir: saves/llama3-8b/lora/eval
|
|
||||||
|
|
||||||
### eval
|
|
||||||
batch_size: 4
|
|
||||||
@@ -1,43 +0,0 @@
|
|||||||
### model
|
|
||||||
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
|
||||||
reward_model: saves/llama3-8b/lora/reward
|
|
||||||
trust_remote_code: true
|
|
||||||
|
|
||||||
### method
|
|
||||||
stage: ppo
|
|
||||||
do_train: true
|
|
||||||
finetuning_type: lora
|
|
||||||
lora_rank: 8
|
|
||||||
lora_target: all
|
|
||||||
|
|
||||||
### dataset
|
|
||||||
dataset: identity,alpaca_en_demo
|
|
||||||
template: llama3
|
|
||||||
cutoff_len: 2048
|
|
||||||
max_samples: 1000
|
|
||||||
overwrite_cache: true
|
|
||||||
preprocessing_num_workers: 16
|
|
||||||
dataloader_num_workers: 4
|
|
||||||
|
|
||||||
### output
|
|
||||||
output_dir: saves/llama3-8b/lora/ppo
|
|
||||||
logging_steps: 10
|
|
||||||
save_steps: 500
|
|
||||||
plot_loss: true
|
|
||||||
overwrite_output_dir: true
|
|
||||||
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
|
|
||||||
|
|
||||||
### train
|
|
||||||
per_device_train_batch_size: 1
|
|
||||||
gradient_accumulation_steps: 8
|
|
||||||
learning_rate: 1.0e-5
|
|
||||||
num_train_epochs: 3.0
|
|
||||||
lr_scheduler_type: cosine
|
|
||||||
warmup_ratio: 0.1
|
|
||||||
bf16: true
|
|
||||||
ddp_timeout: 180000000
|
|
||||||
|
|
||||||
### generate
|
|
||||||
max_new_tokens: 512
|
|
||||||
top_k: 0
|
|
||||||
top_p: 0.9
|
|
||||||
@@ -1,46 +0,0 @@
|
|||||||
### model
|
|
||||||
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
|
||||||
trust_remote_code: true
|
|
||||||
|
|
||||||
### method
|
|
||||||
stage: sft
|
|
||||||
do_train: true
|
|
||||||
finetuning_type: lora
|
|
||||||
lora_rank: 8
|
|
||||||
lora_target: all
|
|
||||||
|
|
||||||
### dataset
|
|
||||||
dataset: identity,alpaca_en_demo
|
|
||||||
template: llama3
|
|
||||||
cutoff_len: 2048
|
|
||||||
max_samples: 1000
|
|
||||||
overwrite_cache: true
|
|
||||||
preprocessing_num_workers: 16
|
|
||||||
dataloader_num_workers: 4
|
|
||||||
|
|
||||||
### output
|
|
||||||
output_dir: saves/llama3-8b/lora/sft
|
|
||||||
logging_steps: 10
|
|
||||||
save_steps: 500
|
|
||||||
plot_loss: true
|
|
||||||
overwrite_output_dir: true
|
|
||||||
save_only_model: false
|
|
||||||
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
|
|
||||||
|
|
||||||
### train
|
|
||||||
per_device_train_batch_size: 1
|
|
||||||
gradient_accumulation_steps: 8
|
|
||||||
learning_rate: 1.0e-4
|
|
||||||
num_train_epochs: 3.0
|
|
||||||
lr_scheduler_type: cosine
|
|
||||||
warmup_ratio: 0.1
|
|
||||||
bf16: true
|
|
||||||
ddp_timeout: 180000000
|
|
||||||
resume_from_checkpoint: null
|
|
||||||
|
|
||||||
### eval
|
|
||||||
# eval_dataset: alpaca_en_demo
|
|
||||||
# val_size: 0.1
|
|
||||||
# per_device_eval_batch_size: 1
|
|
||||||
# eval_strategy: steps
|
|
||||||
# eval_steps: 500
|
|
||||||
@@ -1,49 +0,0 @@
|
|||||||
# pip install git+https://github.com/hiyouga/transformers.git@llama4_train
|
|
||||||
|
|
||||||
### model
|
|
||||||
model_name_or_path: meta-llama/Llama-4-Scout-17B-16E-Instruct
|
|
||||||
trust_remote_code: true
|
|
||||||
|
|
||||||
### method
|
|
||||||
stage: sft
|
|
||||||
do_train: true
|
|
||||||
finetuning_type: lora
|
|
||||||
lora_rank: 8
|
|
||||||
lora_target: all
|
|
||||||
deepspeed: examples/deepspeed/ds_z3_config.json # choices: [ds_z0_config.json, ds_z2_config.json, ds_z3_config.json]
|
|
||||||
|
|
||||||
### dataset
|
|
||||||
dataset: mllm_demo,identity,alpaca_en_demo
|
|
||||||
template: llama4
|
|
||||||
cutoff_len: 2048
|
|
||||||
max_samples: 1000
|
|
||||||
overwrite_cache: true
|
|
||||||
preprocessing_num_workers: 16
|
|
||||||
dataloader_num_workers: 4
|
|
||||||
|
|
||||||
### output
|
|
||||||
output_dir: saves/llama4-8b/lora/sft
|
|
||||||
logging_steps: 10
|
|
||||||
save_steps: 500
|
|
||||||
plot_loss: true
|
|
||||||
overwrite_output_dir: true
|
|
||||||
save_only_model: false
|
|
||||||
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
|
|
||||||
|
|
||||||
### train
|
|
||||||
per_device_train_batch_size: 1
|
|
||||||
gradient_accumulation_steps: 2
|
|
||||||
learning_rate: 1.0e-4
|
|
||||||
num_train_epochs: 3.0
|
|
||||||
lr_scheduler_type: cosine
|
|
||||||
warmup_ratio: 0.1
|
|
||||||
bf16: true
|
|
||||||
ddp_timeout: 180000000
|
|
||||||
resume_from_checkpoint: null
|
|
||||||
|
|
||||||
### eval
|
|
||||||
# eval_dataset: alpaca_en_demo
|
|
||||||
# val_size: 0.1
|
|
||||||
# per_device_eval_batch_size: 1
|
|
||||||
# eval_strategy: steps
|
|
||||||
# eval_steps: 500
|
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
### model
|
### model
|
||||||
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
model_name_or_path: Qwen/Qwen3-4B-Instruct-2507
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
### method
|
### method
|
||||||
@@ -13,15 +13,14 @@ pref_loss: sigmoid # choices: [sigmoid (dpo), orpo, simpo]
|
|||||||
|
|
||||||
### dataset
|
### dataset
|
||||||
dataset: dpo_en_demo
|
dataset: dpo_en_demo
|
||||||
template: llama3
|
template: qwen3_nothink
|
||||||
cutoff_len: 2048
|
cutoff_len: 2048
|
||||||
max_samples: 1000
|
max_samples: 1000
|
||||||
overwrite_cache: true
|
|
||||||
preprocessing_num_workers: 16
|
preprocessing_num_workers: 16
|
||||||
dataloader_num_workers: 4
|
dataloader_num_workers: 4
|
||||||
|
|
||||||
### output
|
### output
|
||||||
output_dir: saves/llama3-8b/lora/dpo
|
output_dir: saves/qwen3-4b/lora/dpo
|
||||||
logging_steps: 10
|
logging_steps: 10
|
||||||
save_steps: 500
|
save_steps: 500
|
||||||
plot_loss: true
|
plot_loss: true
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
### model
|
### model
|
||||||
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
model_name_or_path: Qwen/Qwen3-4B-Instruct-2507
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
### method
|
### method
|
||||||
@@ -12,15 +12,14 @@ pref_beta: 0.1
|
|||||||
|
|
||||||
### dataset
|
### dataset
|
||||||
dataset: kto_en_demo
|
dataset: kto_en_demo
|
||||||
template: llama3
|
template: qwen3_nothink
|
||||||
cutoff_len: 2048
|
cutoff_len: 2048
|
||||||
max_samples: 1000
|
max_samples: 1000
|
||||||
overwrite_cache: true
|
|
||||||
preprocessing_num_workers: 16
|
preprocessing_num_workers: 16
|
||||||
dataloader_num_workers: 4
|
dataloader_num_workers: 4
|
||||||
|
|
||||||
### output
|
### output
|
||||||
output_dir: saves/llama3-8b/lora/kto
|
output_dir: saves/qwen3-4b/lora/kto
|
||||||
logging_steps: 10
|
logging_steps: 10
|
||||||
save_steps: 500
|
save_steps: 500
|
||||||
plot_loss: true
|
plot_loss: true
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
### model
|
### model
|
||||||
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
model_name_or_path: Qwen/Qwen3-4B-Instruct-2507
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
### method
|
### method
|
||||||
@@ -13,12 +13,11 @@ lora_target: all
|
|||||||
dataset: c4_demo
|
dataset: c4_demo
|
||||||
cutoff_len: 2048
|
cutoff_len: 2048
|
||||||
max_samples: 1000
|
max_samples: 1000
|
||||||
overwrite_cache: true
|
|
||||||
preprocessing_num_workers: 16
|
preprocessing_num_workers: 16
|
||||||
dataloader_num_workers: 4
|
dataloader_num_workers: 4
|
||||||
|
|
||||||
### output
|
### output
|
||||||
output_dir: saves/llama3-8b/lora/pretrain
|
output_dir: saves/qwen3-4b/lora/pretrain
|
||||||
logging_steps: 10
|
logging_steps: 10
|
||||||
save_steps: 500
|
save_steps: 500
|
||||||
plot_loss: true
|
plot_loss: true
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
### model
|
### model
|
||||||
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
model_name_or_path: Qwen/Qwen3-4B-Instruct-2507
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
### method
|
### method
|
||||||
@@ -11,15 +11,14 @@ lora_target: all
|
|||||||
|
|
||||||
### dataset
|
### dataset
|
||||||
dataset: dpo_en_demo
|
dataset: dpo_en_demo
|
||||||
template: llama3
|
template: qwen3_nothink
|
||||||
cutoff_len: 2048
|
cutoff_len: 2048
|
||||||
max_samples: 1000
|
max_samples: 1000
|
||||||
overwrite_cache: true
|
|
||||||
preprocessing_num_workers: 16
|
preprocessing_num_workers: 16
|
||||||
dataloader_num_workers: 4
|
dataloader_num_workers: 4
|
||||||
|
|
||||||
### output
|
### output
|
||||||
output_dir: saves/llama3-8b/lora/reward
|
output_dir: saves/qwen3-4b/lora/reward
|
||||||
logging_steps: 10
|
logging_steps: 10
|
||||||
save_steps: 500
|
save_steps: 500
|
||||||
plot_loss: true
|
plot_loss: true
|
||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
set -x
|
set -x
|
||||||
|
|
||||||
MODEL_PATH=meta-llama/Meta-Llama-3-8B-Instruct
|
MODEL_PATH=Qwen/Qwen3-4B-Instruct-2507
|
||||||
|
|
||||||
llamafactory-cli train \
|
llamafactory-cli train \
|
||||||
--model_name_or_path ${MODEL_PATH} \
|
--model_name_or_path ${MODEL_PATH} \
|
||||||
@@ -13,13 +13,12 @@ llamafactory-cli train \
|
|||||||
--lora_rank 8 \
|
--lora_rank 8 \
|
||||||
--lora_target all \
|
--lora_target all \
|
||||||
--dataset identity,alpaca_en_demo \
|
--dataset identity,alpaca_en_demo \
|
||||||
--template llama3 \
|
--template qwen3_nothink \
|
||||||
--cutoff_len 2048 \
|
--cutoff_len 2048 \
|
||||||
--max_samples 1000 \
|
--max_samples 1000 \
|
||||||
--overwrite_cache \
|
|
||||||
--preprocessing_num_workers 16 \
|
--preprocessing_num_workers 16 \
|
||||||
--dataloader_num_workers 4 \
|
--dataloader_num_workers 4 \
|
||||||
--output_dir saves/llama3-8b/lora/sft \
|
--output_dir saves/qwen3-4b/lora/sft \
|
||||||
--logging_steps 10 \
|
--logging_steps 10 \
|
||||||
--save_steps 500 \
|
--save_steps 500 \
|
||||||
--plot_loss \
|
--plot_loss \
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
### model
|
### model
|
||||||
model_name_or_path: openai/gpt-oss-20b
|
model_name_or_path: Qwen/Qwen3-4B-Instruct-2507
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
### method
|
### method
|
||||||
@@ -11,15 +11,14 @@ lora_target: all
|
|||||||
|
|
||||||
### dataset
|
### dataset
|
||||||
dataset: identity,alpaca_en_demo
|
dataset: identity,alpaca_en_demo
|
||||||
template: gpt
|
template: qwen3_nothink
|
||||||
cutoff_len: 2048
|
cutoff_len: 2048
|
||||||
max_samples: 1000
|
max_samples: 1000
|
||||||
overwrite_cache: true
|
|
||||||
preprocessing_num_workers: 16
|
preprocessing_num_workers: 16
|
||||||
dataloader_num_workers: 4
|
dataloader_num_workers: 4
|
||||||
|
|
||||||
### output
|
### output
|
||||||
output_dir: saves/gpt-20b/lora/sft
|
output_dir: saves/qwen3-4b/lora/sft
|
||||||
logging_steps: 10
|
logging_steps: 10
|
||||||
save_steps: 500
|
save_steps: 500
|
||||||
plot_loss: true
|
plot_loss: true
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
### model
|
### model
|
||||||
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
model_name_or_path: Qwen/Qwen3-4B-Instruct-2507
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
### method
|
### method
|
||||||
@@ -12,15 +12,14 @@ deepspeed: examples/deepspeed/ds_z3_config.json # choices: [ds_z0_config.json,
|
|||||||
|
|
||||||
### dataset
|
### dataset
|
||||||
dataset: identity,alpaca_en_demo
|
dataset: identity,alpaca_en_demo
|
||||||
template: llama3
|
template: qwen3_nothink
|
||||||
cutoff_len: 2048
|
cutoff_len: 2048
|
||||||
max_samples: 1000
|
max_samples: 1000
|
||||||
overwrite_cache: true
|
|
||||||
preprocessing_num_workers: 16
|
preprocessing_num_workers: 16
|
||||||
dataloader_num_workers: 4
|
dataloader_num_workers: 4
|
||||||
|
|
||||||
### output
|
### output
|
||||||
output_dir: saves/llama3-8b/lora/sft
|
output_dir: saves/qwen3-4b/lora/sft
|
||||||
logging_steps: 10
|
logging_steps: 10
|
||||||
save_steps: 500
|
save_steps: 500
|
||||||
plot_loss: true
|
plot_loss: true
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
### model
|
### model
|
||||||
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct # or use local absolute path
|
model_name_or_path: Qwen/Qwen3-4B-Instruct-2507 # or use local absolute path
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
### method
|
### method
|
||||||
@@ -12,10 +12,9 @@ lora_target: all
|
|||||||
### dataset
|
### dataset
|
||||||
dataset: identity,alpaca_en_demo
|
dataset: identity,alpaca_en_demo
|
||||||
dataset_dir: REMOTE:llamafactory/demo_data # or use local absolute path
|
dataset_dir: REMOTE:llamafactory/demo_data # or use local absolute path
|
||||||
template: llama3
|
template: qwen3_nothink
|
||||||
cutoff_len: 2048
|
cutoff_len: 2048
|
||||||
max_samples: 1000
|
max_samples: 1000
|
||||||
overwrite_cache: true
|
|
||||||
preprocessing_num_workers: 16
|
preprocessing_num_workers: 16
|
||||||
dataloader_num_workers: 4
|
dataloader_num_workers: 4
|
||||||
|
|
||||||
@@ -29,7 +28,7 @@ save_only_model: false
|
|||||||
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
|
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
|
||||||
|
|
||||||
### ray
|
### ray
|
||||||
ray_run_name: llama3_8b_sft_lora
|
ray_run_name: qwen3_4b_sft_lora
|
||||||
ray_storage_path: ./saves
|
ray_storage_path: ./saves
|
||||||
ray_num_workers: 4 # Number of GPUs to use.
|
ray_num_workers: 4 # Number of GPUs to use.
|
||||||
placement_strategy: PACK
|
placement_strategy: PACK
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
### model
|
### model
|
||||||
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
model_name_or_path: Qwen/Qwen3-4B-Instruct-2507
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
### method
|
### method
|
||||||
@@ -11,13 +11,12 @@ lora_target: all
|
|||||||
|
|
||||||
### dataset
|
### dataset
|
||||||
dataset: identity,alpaca_en_demo
|
dataset: identity,alpaca_en_demo
|
||||||
template: llama3
|
template: qwen3_nothink
|
||||||
cutoff_len: 2048
|
cutoff_len: 2048
|
||||||
max_samples: 1000
|
max_samples: 1000
|
||||||
overwrite_cache: true
|
|
||||||
preprocessing_num_workers: 16
|
preprocessing_num_workers: 16
|
||||||
tokenized_path: saves/llama3-8b/dataset/sft
|
tokenized_path: saves/qwen3-4b/dataset/sft
|
||||||
|
|
||||||
### output
|
### output (not used)
|
||||||
output_dir: saves/llama3-8b/lora/sft
|
output_dir: saves/qwen3-4b/lora/sft
|
||||||
overwrite_output_dir: true
|
overwrite_output_dir: true
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
### model
|
### model
|
||||||
model_name_or_path: Qwen/Qwen2.5-VL-7B-Instruct
|
model_name_or_path: Qwen/Qwen3-VL-4B-Instruct
|
||||||
image_max_pixels: 262144
|
image_max_pixels: 262144
|
||||||
video_max_pixels: 16384
|
video_max_pixels: 16384
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
@@ -15,15 +15,14 @@ pref_loss: sigmoid # choices: [sigmoid (dpo), orpo, simpo]
|
|||||||
|
|
||||||
### dataset
|
### dataset
|
||||||
dataset: rlhf_v
|
dataset: rlhf_v
|
||||||
template: qwen2_vl
|
template: qwen3_vl_nothink
|
||||||
cutoff_len: 2048
|
cutoff_len: 2048
|
||||||
max_samples: 1000
|
max_samples: 1000
|
||||||
overwrite_cache: true
|
|
||||||
preprocessing_num_workers: 16
|
preprocessing_num_workers: 16
|
||||||
dataloader_num_workers: 4
|
dataloader_num_workers: 4
|
||||||
|
|
||||||
### output
|
### output
|
||||||
output_dir: saves/qwen2_5vl-7b/lora/dpo
|
output_dir: saves/qwen3-vl-4b/lora/dpo
|
||||||
logging_steps: 10
|
logging_steps: 10
|
||||||
save_steps: 500
|
save_steps: 500
|
||||||
plot_loss: true
|
plot_loss: true
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
### model
|
### model
|
||||||
model_name_or_path: Qwen/Qwen2.5-VL-7B-Instruct
|
model_name_or_path: Qwen/Qwen3-VL-4B-Instruct
|
||||||
image_max_pixels: 262144
|
image_max_pixels: 262144
|
||||||
video_max_pixels: 16384
|
video_max_pixels: 16384
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
@@ -13,15 +13,14 @@ lora_target: all
|
|||||||
|
|
||||||
### dataset
|
### dataset
|
||||||
dataset: mllm_demo,identity,alpaca_en_demo # video: mllm_video_demo
|
dataset: mllm_demo,identity,alpaca_en_demo # video: mllm_video_demo
|
||||||
template: qwen2_vl
|
template: qwen3_vl_nothink
|
||||||
cutoff_len: 2048
|
cutoff_len: 2048
|
||||||
max_samples: 1000
|
max_samples: 1000
|
||||||
overwrite_cache: true
|
|
||||||
preprocessing_num_workers: 16
|
preprocessing_num_workers: 16
|
||||||
dataloader_num_workers: 4
|
dataloader_num_workers: 4
|
||||||
|
|
||||||
### output
|
### output
|
||||||
output_dir: saves/qwen2_5vl-7b/lora/sft
|
output_dir: saves/qwen3-vl-4b/lora/sft
|
||||||
logging_steps: 10
|
logging_steps: 10
|
||||||
save_steps: 500
|
save_steps: 500
|
||||||
plot_loss: true
|
plot_loss: true
|
||||||
@@ -14,7 +14,6 @@ dataset: identity,alpaca_en_demo
|
|||||||
template: llama3
|
template: llama3
|
||||||
cutoff_len: 2048
|
cutoff_len: 2048
|
||||||
max_samples: 1000
|
max_samples: 1000
|
||||||
overwrite_cache: true
|
|
||||||
preprocessing_num_workers: 16
|
preprocessing_num_workers: 16
|
||||||
dataloader_num_workers: 4
|
dataloader_num_workers: 4
|
||||||
|
|
||||||
|
|||||||
@@ -14,7 +14,6 @@ dataset: identity,alpaca_en_demo
|
|||||||
template: llama3
|
template: llama3
|
||||||
cutoff_len: 2048
|
cutoff_len: 2048
|
||||||
max_samples: 1000
|
max_samples: 1000
|
||||||
overwrite_cache: true
|
|
||||||
preprocessing_num_workers: 16
|
preprocessing_num_workers: 16
|
||||||
dataloader_num_workers: 4
|
dataloader_num_workers: 4
|
||||||
|
|
||||||
|
|||||||
@@ -14,7 +14,6 @@ dataset: identity,alpaca_en_demo
|
|||||||
template: llama3
|
template: llama3
|
||||||
cutoff_len: 2048
|
cutoff_len: 2048
|
||||||
max_samples: 1000
|
max_samples: 1000
|
||||||
overwrite_cache: true
|
|
||||||
preprocessing_num_workers: 16
|
preprocessing_num_workers: 16
|
||||||
dataloader_num_workers: 4
|
dataloader_num_workers: 4
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
### model
|
### model
|
||||||
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
model_name_or_path: Qwen/Qwen3-4B-Instruct-2507
|
||||||
quantization_bit: 4
|
quantization_bit: 4
|
||||||
quantization_method: bnb
|
quantization_method: bnb
|
||||||
double_quantization: false
|
double_quantization: false
|
||||||
@@ -14,15 +14,14 @@ lora_target: all
|
|||||||
|
|
||||||
### dataset
|
### dataset
|
||||||
dataset: identity,alpaca_en_demo
|
dataset: identity,alpaca_en_demo
|
||||||
template: llama3
|
template: qwen3_nothink
|
||||||
cutoff_len: 2048
|
cutoff_len: 2048
|
||||||
max_samples: 1000
|
max_samples: 1000
|
||||||
overwrite_cache: true
|
|
||||||
preprocessing_num_workers: 16
|
preprocessing_num_workers: 16
|
||||||
dataloader_num_workers: 4
|
dataloader_num_workers: 4
|
||||||
|
|
||||||
### output
|
### output
|
||||||
output_dir: saves/llama3-8b/lora/sft
|
output_dir: saves/qwen3-4b/lora/sft
|
||||||
logging_steps: 10
|
logging_steps: 10
|
||||||
save_steps: 500
|
save_steps: 500
|
||||||
plot_loss: true
|
plot_loss: true
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
### model
|
### model
|
||||||
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
model_name_or_path: Qwen/Qwen3-4B-Instruct-2507
|
||||||
quantization_bit: 4 # choices: [8 (bnb/hqq/eetq), 4 (bnb/hqq), 3 (hqq), 2 (hqq)]
|
quantization_bit: 4 # choices: [8 (bnb/hqq/eetq), 4 (bnb/hqq), 3 (hqq), 2 (hqq)]
|
||||||
quantization_method: bnb # choices: [bnb, hqq, eetq]
|
quantization_method: bnb # choices: [bnb, hqq, eetq]
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
@@ -13,15 +13,14 @@ lora_target: all
|
|||||||
|
|
||||||
### dataset
|
### dataset
|
||||||
dataset: identity,alpaca_en_demo
|
dataset: identity,alpaca_en_demo
|
||||||
template: llama3
|
template: qwen3_nothink
|
||||||
cutoff_len: 2048
|
cutoff_len: 2048
|
||||||
max_samples: 1000
|
max_samples: 1000
|
||||||
overwrite_cache: true
|
|
||||||
preprocessing_num_workers: 16
|
preprocessing_num_workers: 16
|
||||||
dataloader_num_workers: 4
|
dataloader_num_workers: 4
|
||||||
|
|
||||||
### output
|
### output
|
||||||
output_dir: saves/llama3-8b/lora/sft
|
output_dir: saves/qwen3-4b/lora/sft
|
||||||
logging_steps: 10
|
logging_steps: 10
|
||||||
save_steps: 500
|
save_steps: 500
|
||||||
plot_loss: true
|
plot_loss: true
|
||||||
@@ -41,12 +41,11 @@ dependencies = [
|
|||||||
"torch>=2.4.0",
|
"torch>=2.4.0",
|
||||||
"torchvision>=0.19.0",
|
"torchvision>=0.19.0",
|
||||||
"torchaudio>=2.4.0",
|
"torchaudio>=2.4.0",
|
||||||
"transformers>=4.49.0,<=4.56.2,!=4.52.0; python_version < '3.10'",
|
"transformers>=4.51.0,<=4.57.1,!=4.52.0,!=4.57.0",
|
||||||
"transformers>=4.49.0,<=4.57.1,!=4.52.0,!=4.57.0; python_version >= '3.10'",
|
|
||||||
"datasets>=2.16.0,<=4.0.0",
|
"datasets>=2.16.0,<=4.0.0",
|
||||||
"accelerate>=1.3.0,<=1.11.0",
|
"accelerate>=1.3.0,<=1.11.0",
|
||||||
"peft>=0.14.0,<=0.17.1",
|
"peft>=0.14.0,<=0.17.1",
|
||||||
"trl>=0.8.6,<=0.9.6",
|
"trl>=0.18.0,<=0.24.0",
|
||||||
"torchdata>=0.10.0,<=0.11.0",
|
"torchdata>=0.10.0,<=0.11.0",
|
||||||
# gui
|
# gui
|
||||||
"gradio>=4.38.0,<=5.50.0",
|
"gradio>=4.38.0,<=5.50.0",
|
||||||
|
|||||||
@@ -18,9 +18,10 @@ import time
|
|||||||
|
|
||||||
import av
|
import av
|
||||||
import fire
|
import fire
|
||||||
|
from datasets import load_dataset
|
||||||
|
from eval_bleu_rouge import compute_metrics
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
from transformers import Seq2SeqTrainingArguments
|
from transformers import Seq2SeqTrainingArguments
|
||||||
from datasets import load_dataset
|
|
||||||
|
|
||||||
from llamafactory.data import get_dataset, get_template_and_fix_tokenizer
|
from llamafactory.data import get_dataset, get_template_and_fix_tokenizer
|
||||||
from llamafactory.extras.constants import IGNORE_INDEX
|
from llamafactory.extras.constants import IGNORE_INDEX
|
||||||
@@ -29,8 +30,6 @@ from llamafactory.extras.packages import is_vllm_available
|
|||||||
from llamafactory.hparams import get_infer_args
|
from llamafactory.hparams import get_infer_args
|
||||||
from llamafactory.model import load_tokenizer
|
from llamafactory.model import load_tokenizer
|
||||||
|
|
||||||
from eval_bleu_rouge import compute_metrics
|
|
||||||
|
|
||||||
|
|
||||||
if is_vllm_available():
|
if is_vllm_available():
|
||||||
from vllm import LLM, SamplingParams
|
from vllm import LLM, SamplingParams
|
||||||
@@ -235,10 +234,10 @@ def vllm_infer(
|
|||||||
print(f"{len(all_prompts)} total generated results have been saved at {save_name}.")
|
print(f"{len(all_prompts)} total generated results have been saved at {save_name}.")
|
||||||
print("*" * 70)
|
print("*" * 70)
|
||||||
|
|
||||||
# Write all matrix results when matrix_save_name is not None,
|
# Write all matrix results when matrix_save_name is not None,
|
||||||
# The result matrix is referencing src.llamafactory.train.sft.workflow.run_sft # 127~132
|
# The result matrix is referencing src.llamafactory.train.sft.workflow.run_sft # 127~132
|
||||||
# trainer.save_metrics("predict", predict_results.metrics)
|
# trainer.save_metrics("predict", predict_results.metrics)
|
||||||
#
|
#
|
||||||
# {
|
# {
|
||||||
# "predict_bleu-4": 4.349975,
|
# "predict_bleu-4": 4.349975,
|
||||||
# "predict_model_preparation_time": 0.0128,
|
# "predict_model_preparation_time": 0.0128,
|
||||||
@@ -265,11 +264,11 @@ def vllm_infer(
|
|||||||
print(f"predict_{task}: {score:.4f}")
|
print(f"predict_{task}: {score:.4f}")
|
||||||
average_score["predict_" + task] = score
|
average_score["predict_" + task] = score
|
||||||
|
|
||||||
average_score['predict_model_preparation_time'] = preparation_time
|
average_score["predict_model_preparation_time"] = preparation_time
|
||||||
average_score['predict_runtime'] = predict_time
|
average_score["predict_runtime"] = predict_time
|
||||||
num_steps = len(range(0, len(train_dataset), batch_size))
|
num_steps = len(range(0, len(train_dataset), batch_size))
|
||||||
average_score['predict_samples_per_second'] = len(dataset) / predict_time if predict_time > 0 else 0.0
|
average_score["predict_samples_per_second"] = len(dataset) / predict_time if predict_time > 0 else 0.0
|
||||||
average_score['predict_steps_per_second'] = num_steps / predict_time if predict_time > 0 else 0.0
|
average_score["predict_steps_per_second"] = num_steps / predict_time if predict_time > 0 else 0.0
|
||||||
|
|
||||||
with open(matrix_save_name, "w", encoding="utf-8") as f:
|
with open(matrix_save_name, "w", encoding="utf-8") as f:
|
||||||
json.dump(average_score, f, indent=4)
|
json.dump(average_score, f, indent=4)
|
||||||
@@ -280,4 +279,4 @@ def vllm_infer(
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
fire.Fire(vllm_infer)
|
fire.Fire(vllm_infer)
|
||||||
|
|||||||
@@ -19,7 +19,7 @@
|
|||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
|
|
||||||
|
|
||||||
VERSION = "0.9.4.dev0"
|
VERSION = "0.9.4"
|
||||||
|
|
||||||
|
|
||||||
def print_env() -> None:
|
def print_env() -> None:
|
||||||
|
|||||||
@@ -94,11 +94,11 @@ def check_version(requirement: str, mandatory: bool = False) -> None:
|
|||||||
|
|
||||||
def check_dependencies() -> None:
|
def check_dependencies() -> None:
|
||||||
r"""Check the version of the required packages."""
|
r"""Check the version of the required packages."""
|
||||||
check_version("transformers>=4.49.0,<=4.57.1")
|
check_version("transformers>=4.51.0,<=4.57.1")
|
||||||
check_version("datasets>=2.16.0,<=4.0.0")
|
check_version("datasets>=2.16.0,<=4.0.0")
|
||||||
check_version("accelerate>=1.3.0,<=1.11.0")
|
check_version("accelerate>=1.3.0,<=1.11.0")
|
||||||
check_version("peft>=0.14.0,<=0.17.1")
|
check_version("peft>=0.14.0,<=0.17.1")
|
||||||
check_version("trl>=0.8.6,<=0.9.6")
|
check_version("trl>=0.18.0,<=0.24.0")
|
||||||
|
|
||||||
|
|
||||||
def calculate_tps(dataset: list[dict[str, Any]], metrics: dict[str, float], stage: Literal["sft", "rm"]) -> float:
|
def calculate_tps(dataset: list[dict[str, Any]], metrics: dict[str, float], stage: Literal["sft", "rm"]) -> float:
|
||||||
|
|||||||
@@ -26,6 +26,7 @@ import torch.nn.functional as F
|
|||||||
from transformers import Trainer
|
from transformers import Trainer
|
||||||
from trl import DPOTrainer
|
from trl import DPOTrainer
|
||||||
from trl.trainer import disable_dropout_in_model
|
from trl.trainer import disable_dropout_in_model
|
||||||
|
from trl.trainer.utils import prepare_deepspeed
|
||||||
from typing_extensions import override
|
from typing_extensions import override
|
||||||
|
|
||||||
from ...extras.constants import IGNORE_INDEX
|
from ...extras.constants import IGNORE_INDEX
|
||||||
@@ -95,7 +96,7 @@ class CustomDPOTrainer(DPOTrainer):
|
|||||||
if not (
|
if not (
|
||||||
getattr(ref_model, "is_loaded_in_8bit", False) or getattr(ref_model, "is_loaded_in_4bit", False)
|
getattr(ref_model, "is_loaded_in_8bit", False) or getattr(ref_model, "is_loaded_in_4bit", False)
|
||||||
): # quantized models are already set on the correct device
|
): # quantized models are already set on the correct device
|
||||||
self.ref_model = self._prepare_deepspeed(self.ref_model)
|
self.ref_model = prepare_deepspeed(self.ref_model, self.accelerator)
|
||||||
else:
|
else:
|
||||||
self.ref_model = self.accelerator.prepare_model(self.ref_model, evaluation_mode=True)
|
self.ref_model = self.accelerator.prepare_model(self.ref_model, evaluation_mode=True)
|
||||||
self.ref_model.eval()
|
self.ref_model.eval()
|
||||||
@@ -210,7 +211,7 @@ class CustomDPOTrainer(DPOTrainer):
|
|||||||
@override
|
@override
|
||||||
def concatenated_forward(
|
def concatenated_forward(
|
||||||
self, model: "PreTrainedModel", batch: dict[str, "torch.Tensor"], is_ref_model: bool = False
|
self, model: "PreTrainedModel", batch: dict[str, "torch.Tensor"], is_ref_model: bool = False
|
||||||
) -> tuple["torch.Tensor", "torch.Tensor", "torch.Tensor", "torch.Tensor", "torch.Tensor"]:
|
) -> dict[str, "torch.Tensor"]:
|
||||||
r"""Compute the sum log probabilities of the labels under given logits if loss_type is not IPO, ORPO or SimPO.
|
r"""Compute the sum log probabilities of the labels under given logits if loss_type is not IPO, ORPO or SimPO.
|
||||||
|
|
||||||
Otherwise the average log probabilities.
|
Otherwise the average log probabilities.
|
||||||
@@ -230,11 +231,18 @@ class CustomDPOTrainer(DPOTrainer):
|
|||||||
chosen_logps, rejected_logps = all_logps.split(batch_size, dim=0)
|
chosen_logps, rejected_logps = all_logps.split(batch_size, dim=0)
|
||||||
chosen_logits, rejected_logits = all_logits.split(batch_size, dim=0)
|
chosen_logits, rejected_logits = all_logits.split(batch_size, dim=0)
|
||||||
chosen_length, _ = valid_length.split(batch_size, dim=0)
|
chosen_length, _ = valid_length.split(batch_size, dim=0)
|
||||||
|
|
||||||
if self.loss_type in ["ipo", "orpo", "simpo"]:
|
if self.loss_type in ["ipo", "orpo", "simpo"]:
|
||||||
return chosen_logps, rejected_logps, chosen_logits, rejected_logits, chosen_logps
|
chosen_logps_avg = chosen_logps
|
||||||
else:
|
else:
|
||||||
return chosen_logps, rejected_logps, chosen_logits, rejected_logits, chosen_logps / chosen_length
|
chosen_logps_avg = chosen_logps / chosen_length
|
||||||
|
|
||||||
|
return {
|
||||||
|
"chosen_logps": chosen_logps,
|
||||||
|
"rejected_logps": rejected_logps,
|
||||||
|
"chosen_logits": chosen_logits,
|
||||||
|
"rejected_logits": rejected_logits,
|
||||||
|
"chosen_logps_avg": chosen_logps_avg,
|
||||||
|
}
|
||||||
|
|
||||||
@override
|
@override
|
||||||
def compute_reference_log_probs(
|
def compute_reference_log_probs(
|
||||||
@@ -252,9 +260,9 @@ class CustomDPOTrainer(DPOTrainer):
|
|||||||
ref_context = nullcontext()
|
ref_context = nullcontext()
|
||||||
|
|
||||||
with torch.no_grad(), ref_context:
|
with torch.no_grad(), ref_context:
|
||||||
reference_chosen_logps, reference_rejected_logps, *_ = self.concatenated_forward(
|
ref_output = self.concatenated_forward(ref_model, batch, is_ref_model=True)
|
||||||
ref_model, batch, is_ref_model=True
|
reference_chosen_logps = ref_output["chosen_logps"]
|
||||||
)
|
reference_rejected_logps = ref_output["rejected_logps"]
|
||||||
|
|
||||||
return reference_chosen_logps, reference_rejected_logps
|
return reference_chosen_logps, reference_rejected_logps
|
||||||
|
|
||||||
@@ -267,13 +275,13 @@ class CustomDPOTrainer(DPOTrainer):
|
|||||||
) -> tuple["torch.Tensor", dict[str, "torch.Tensor"]]:
|
) -> tuple["torch.Tensor", dict[str, "torch.Tensor"]]:
|
||||||
r"""Compute the DPO loss and other metrics for the given batch of inputs for train or test."""
|
r"""Compute the DPO loss and other metrics for the given batch of inputs for train or test."""
|
||||||
metrics = {}
|
metrics = {}
|
||||||
(
|
|
||||||
policy_chosen_logps,
|
model_output = self.concatenated_forward(model, batch)
|
||||||
policy_rejected_logps,
|
policy_chosen_logps = model_output["chosen_logps"]
|
||||||
policy_chosen_logits,
|
policy_rejected_logps = model_output["rejected_logps"]
|
||||||
policy_rejected_logits,
|
policy_chosen_logits = model_output["chosen_logits"]
|
||||||
policy_chosen_logps_avg,
|
policy_rejected_logits = model_output["rejected_logits"]
|
||||||
) = self.concatenated_forward(model, batch)
|
policy_chosen_logps_avg = model_output["chosen_logps_avg"]
|
||||||
|
|
||||||
reference_chosen_logps, reference_rejected_logps = self.compute_reference_log_probs(model, batch)
|
reference_chosen_logps, reference_rejected_logps = self.compute_reference_log_probs(model, batch)
|
||||||
losses, chosen_rewards, rejected_rewards = self.compute_preference_loss(
|
losses, chosen_rewards, rejected_rewards = self.compute_preference_loss(
|
||||||
|
|||||||
@@ -25,6 +25,7 @@ import torch
|
|||||||
from transformers import Trainer
|
from transformers import Trainer
|
||||||
from trl import KTOTrainer
|
from trl import KTOTrainer
|
||||||
from trl.trainer import disable_dropout_in_model
|
from trl.trainer import disable_dropout_in_model
|
||||||
|
from trl.trainer.utils import prepare_deepspeed
|
||||||
from typing_extensions import override
|
from typing_extensions import override
|
||||||
|
|
||||||
from ...extras.constants import IGNORE_INDEX
|
from ...extras.constants import IGNORE_INDEX
|
||||||
@@ -77,6 +78,13 @@ class CustomKTOTrainer(KTOTrainer):
|
|||||||
self.desirable_weight = finetuning_args.kto_chosen_weight
|
self.desirable_weight = finetuning_args.kto_chosen_weight
|
||||||
self.undesirable_weight = finetuning_args.kto_rejected_weight
|
self.undesirable_weight = finetuning_args.kto_rejected_weight
|
||||||
self.ftx_gamma = finetuning_args.pref_ftx
|
self.ftx_gamma = finetuning_args.pref_ftx
|
||||||
|
# trl
|
||||||
|
# Not all losses require a KL calculation
|
||||||
|
self.calculate_KL = True
|
||||||
|
if hasattr(self, "loss_type") and self.loss_type in ["apo_zero_unpaired"]:
|
||||||
|
self.calculate_KL = False
|
||||||
|
else:
|
||||||
|
self.loss_type = "kto"
|
||||||
|
|
||||||
Trainer.__init__(self, model=model, **kwargs)
|
Trainer.__init__(self, model=model, **kwargs)
|
||||||
self.model_accepts_loss_kwargs = False # overwrite trainer's default behavior
|
self.model_accepts_loss_kwargs = False # overwrite trainer's default behavior
|
||||||
@@ -90,7 +98,7 @@ class CustomKTOTrainer(KTOTrainer):
|
|||||||
if not (
|
if not (
|
||||||
getattr(ref_model, "is_loaded_in_8bit", False) or getattr(ref_model, "is_loaded_in_4bit", False)
|
getattr(ref_model, "is_loaded_in_8bit", False) or getattr(ref_model, "is_loaded_in_4bit", False)
|
||||||
): # quantized models are already set on the correct device
|
): # quantized models are already set on the correct device
|
||||||
self.ref_model = self._prepare_deepspeed(self.ref_model)
|
self.ref_model = prepare_deepspeed(self.ref_model, self.accelerator)
|
||||||
else:
|
else:
|
||||||
self.ref_model = self.accelerator.prepare_model(self.ref_model, evaluation_mode=True)
|
self.ref_model = self.accelerator.prepare_model(self.ref_model, evaluation_mode=True)
|
||||||
self.ref_model.eval()
|
self.ref_model.eval()
|
||||||
|
|||||||
@@ -33,12 +33,12 @@ from transformers.trainer_pt_utils import remove_dummy_checkpoint
|
|||||||
from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR
|
from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR
|
||||||
from transformers.utils import SAFE_WEIGHTS_NAME, WEIGHTS_NAME
|
from transformers.utils import SAFE_WEIGHTS_NAME, WEIGHTS_NAME
|
||||||
from trl import PPOConfig, PPOTrainer
|
from trl import PPOConfig, PPOTrainer
|
||||||
from trl.core import PPODecorators, logprobs_from_logits
|
from trl import __version__ as trl_version
|
||||||
from trl.models.utils import unwrap_model_for_generation
|
from trl.models.utils import unwrap_model_for_generation
|
||||||
from typing_extensions import override
|
from typing_extensions import override
|
||||||
|
|
||||||
from ...extras import logging
|
from ...extras import logging
|
||||||
from ...extras.misc import AverageMeter, count_parameters, get_current_device, get_logits_processor
|
from ...extras.misc import AverageMeter, count_parameters, get_current_device, get_logits_processor, torch_gc
|
||||||
from ..callbacks import FixValueHeadModelCallback, SaveProcessorCallback
|
from ..callbacks import FixValueHeadModelCallback, SaveProcessorCallback
|
||||||
from ..trainer_utils import create_custom_optimizer, create_custom_scheduler
|
from ..trainer_utils import create_custom_optimizer, create_custom_scheduler
|
||||||
from .ppo_utils import dump_layernorm, get_rewards_from_server, replace_model, restore_layernorm
|
from .ppo_utils import dump_layernorm, get_rewards_from_server, replace_model, restore_layernorm
|
||||||
@@ -83,6 +83,19 @@ class CustomPPOTrainer(PPOTrainer, Trainer):
|
|||||||
if eval_dataset is not None:
|
if eval_dataset is not None:
|
||||||
raise NotImplementedError("PPOTrainer does not support eval dataset yet.")
|
raise NotImplementedError("PPOTrainer does not support eval dataset yet.")
|
||||||
|
|
||||||
|
# Check if TRL version is compatible (0.8.6 <= version <= 0.9.6)
|
||||||
|
try:
|
||||||
|
from transformers.utils.versions import require_version
|
||||||
|
|
||||||
|
require_version(
|
||||||
|
"trl>=0.8.6,<=0.9.6",
|
||||||
|
"Incompatible TRL version detected. LLaMA-Factory ppo requires TRL version >=0.8.6,<=0.9.6. "
|
||||||
|
f"Found version {trl_version}. Please install the correct version with: `pip install trl>=0.8.6,<=0.9.6`\n"
|
||||||
|
"To fix: run `DISABLE_VERSION_CHECK=1 llamafactory-cli train example_ppo.yaml`\n",
|
||||||
|
)
|
||||||
|
except ImportError as e:
|
||||||
|
raise e
|
||||||
|
|
||||||
backward_batch_size = training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps
|
backward_batch_size = training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps
|
||||||
ppo_config = PPOConfig(
|
ppo_config = PPOConfig(
|
||||||
model_name=model_args.model_name_or_path,
|
model_name=model_args.model_name_or_path,
|
||||||
@@ -406,7 +419,6 @@ class CustomPPOTrainer(PPOTrainer, Trainer):
|
|||||||
return rewards.float().detach() # use fp32 type
|
return rewards.float().detach() # use fp32 type
|
||||||
|
|
||||||
@override
|
@override
|
||||||
@PPODecorators.empty_device_cache()
|
|
||||||
def batched_forward_pass(
|
def batched_forward_pass(
|
||||||
self,
|
self,
|
||||||
model: "AutoModelForCausalLMWithValueHead",
|
model: "AutoModelForCausalLMWithValueHead",
|
||||||
@@ -420,6 +432,9 @@ class CustomPPOTrainer(PPOTrainer, Trainer):
|
|||||||
|
|
||||||
Subclass and override to inject custom behavior.
|
Subclass and override to inject custom behavior.
|
||||||
"""
|
"""
|
||||||
|
from trl.core import logprobs_from_logits
|
||||||
|
|
||||||
|
torch_gc()
|
||||||
bs = len(queries)
|
bs = len(queries)
|
||||||
fbs = self.config.mini_batch_size
|
fbs = self.config.mini_batch_size
|
||||||
all_logprobs = []
|
all_logprobs = []
|
||||||
|
|||||||
@@ -108,7 +108,7 @@ def create_modelcard_and_push(
|
|||||||
elif training_args.push_to_hub:
|
elif training_args.push_to_hub:
|
||||||
trainer.push_to_hub(**kwargs)
|
trainer.push_to_hub(**kwargs)
|
||||||
else:
|
else:
|
||||||
trainer.create_model_card(license="other", **kwargs) # prevent from connecting to hub
|
Trainer.create_model_card(trainer, license="other", **kwargs) # prevent from connecting to hub
|
||||||
|
|
||||||
|
|
||||||
def create_ref_model(
|
def create_ref_model(
|
||||||
|
|||||||
@@ -12,8 +12,8 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
import sys
|
|
||||||
import pathlib
|
import pathlib
|
||||||
|
import sys
|
||||||
from unittest.mock import patch
|
from unittest.mock import patch
|
||||||
|
|
||||||
from llamafactory.v1.config.arg_parser import get_args
|
from llamafactory.v1.config.arg_parser import get_args
|
||||||
|
|||||||
Reference in New Issue
Block a user