mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-08-20 20:52:50 +08:00
[misc] fix packing and eval plot (#7623)
This commit is contained in:
parent
7e0cdb1a76
commit
5817cda37e
@ -5,7 +5,7 @@
|
|||||||
[](https://github.com/hiyouga/LLaMA-Factory/graphs/contributors)
|
[](https://github.com/hiyouga/LLaMA-Factory/graphs/contributors)
|
||||||
[](https://github.com/hiyouga/LLaMA-Factory/actions/workflows/tests.yml)
|
[](https://github.com/hiyouga/LLaMA-Factory/actions/workflows/tests.yml)
|
||||||
[](https://pypi.org/project/llamafactory/)
|
[](https://pypi.org/project/llamafactory/)
|
||||||
[](https://scholar.google.com/scholar?cites=12620864006390196564)
|
[](https://scholar.google.com/scholar?cites=12620864006390196564)
|
||||||
[](https://github.com/hiyouga/LLaMA-Factory/pulls)
|
[](https://github.com/hiyouga/LLaMA-Factory/pulls)
|
||||||
|
|
||||||
[](https://twitter.com/llamafactory_ai)
|
[](https://twitter.com/llamafactory_ai)
|
||||||
@ -112,7 +112,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
|
|||||||
|
|
||||||
[25/03/15] We supported **[SGLang](https://github.com/sgl-project/sglang)** as inference backend. Try `infer_backend: sglang` to accelerate inference.
|
[25/03/15] We supported **[SGLang](https://github.com/sgl-project/sglang)** as inference backend. Try `infer_backend: sglang` to accelerate inference.
|
||||||
|
|
||||||
[25/03/12] We supported fine-tuning the **[Gemma-3](https://huggingface.co/blog/gemma3)** model.
|
[25/03/12] We supported fine-tuning the **[Gemma 3](https://huggingface.co/blog/gemma3)** model.
|
||||||
|
|
||||||
[25/02/24] Announcing **[EasyR1](https://github.com/hiyouga/EasyR1)**, an efficient, scalable and multi-modality RL training framework for efficient GRPO training.
|
[25/02/24] Announcing **[EasyR1](https://github.com/hiyouga/EasyR1)**, an efficient, scalable and multi-modality RL training framework for efficient GRPO training.
|
||||||
|
|
||||||
@ -873,7 +873,7 @@ If you have a project that should be incorporated, please contact via email or c
|
|||||||
|
|
||||||
This repository is licensed under the [Apache-2.0 License](LICENSE).
|
This repository is licensed under the [Apache-2.0 License](LICENSE).
|
||||||
|
|
||||||
Please follow the model licenses to use the corresponding model weights: [Baichuan 2](https://huggingface.co/baichuan-inc/Baichuan2-7B-Base/blob/main/Community%20License%20for%20Baichuan%202%20Model.pdf) / [BLOOM](https://huggingface.co/spaces/bigscience/license) / [ChatGLM3](https://github.com/THUDM/ChatGLM3/blob/main/MODEL_LICENSE) / [Command R](https://cohere.com/c4ai-cc-by-nc-license) / [DeepSeek](https://github.com/deepseek-ai/DeepSeek-LLM/blob/main/LICENSE-MODEL) / [Falcon](https://huggingface.co/tiiuae/falcon-180B/blob/main/LICENSE.txt) / [Gemma](https://ai.google.dev/gemma/terms) / [GLM-4](https://huggingface.co/THUDM/glm-4-9b/blob/main/LICENSE) / [GPT-2](https://github.com/openai/gpt-2/blob/master/LICENSE) / [Granite](LICENSE) / [Index](https://huggingface.co/IndexTeam/Index-1.9B/blob/main/LICENSE) / [InternLM](https://github.com/InternLM/InternLM#license) / [Llama](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) / [Llama 2 (LLaVA-1.5)](https://ai.meta.com/llama/license/) / [Llama 3](https://llama.meta.com/llama3/license/) / [MiniCPM](https://github.com/OpenBMB/MiniCPM/blob/main/MiniCPM%20Model%20License.md) / [Mistral/Mixtral/Pixtral](LICENSE) / [OLMo](LICENSE) / [Phi-1.5/Phi-2](https://huggingface.co/microsoft/phi-1_5/resolve/main/Research%20License.docx) / [Phi-3/Phi-4](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/blob/main/LICENSE) / [Qwen](https://github.com/QwenLM/Qwen/blob/main/Tongyi%20Qianwen%20LICENSE%20AGREEMENT) / [Skywork](https://huggingface.co/Skywork/Skywork-13B-base/blob/main/Skywork%20Community%20License.pdf) / [StarCoder 2](https://huggingface.co/spaces/bigcode/bigcode-model-license-agreement) / [TeleChat2](https://huggingface.co/Tele-AI/telechat-7B/blob/main/TeleChat%E6%A8%A1%E5%9E%8B%E7%A4%BE%E5%8C%BA%E8%AE%B8%E5%8F%AF%E5%8D%8F%E8%AE%AE.pdf) / [XVERSE](https://github.com/xverse-ai/XVERSE-13B/blob/main/MODEL_LICENSE.pdf) / [Yi](https://huggingface.co/01-ai/Yi-6B/blob/main/LICENSE) / [Yi-1.5](LICENSE) / [Yuan 2](https://github.com/IEIT-Yuan/Yuan-2.0/blob/main/LICENSE-Yuan)
|
Please follow the model licenses to use the corresponding model weights: [Baichuan 2](https://huggingface.co/baichuan-inc/Baichuan2-7B-Base/blob/main/Community%20License%20for%20Baichuan%202%20Model.pdf) / [BLOOM](https://huggingface.co/spaces/bigscience/license) / [ChatGLM3](https://github.com/THUDM/ChatGLM3/blob/main/MODEL_LICENSE) / [Command R](https://cohere.com/c4ai-cc-by-nc-license) / [DeepSeek](https://github.com/deepseek-ai/DeepSeek-LLM/blob/main/LICENSE-MODEL) / [Falcon](https://huggingface.co/tiiuae/falcon-180B/blob/main/LICENSE.txt) / [Gemma](https://ai.google.dev/gemma/terms) / [GLM-4](https://huggingface.co/THUDM/glm-4-9b/blob/main/LICENSE) / [GPT-2](https://github.com/openai/gpt-2/blob/master/LICENSE) / [Granite](LICENSE) / [Index](https://huggingface.co/IndexTeam/Index-1.9B/blob/main/LICENSE) / [InternLM](https://github.com/InternLM/InternLM#license) / [Llama](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) / [Llama 2](https://ai.meta.com/llama/license/) / [Llama 3](https://llama.meta.com/llama3/license/) / [Llama 4](https://github.com/meta-llama/llama-models/blob/main/models/llama4/LICENSE) / [MiniCPM](https://github.com/OpenBMB/MiniCPM/blob/main/MiniCPM%20Model%20License.md) / [Mistral/Mixtral/Pixtral](LICENSE) / [OLMo](LICENSE) / [Phi-1.5/Phi-2](https://huggingface.co/microsoft/phi-1_5/resolve/main/Research%20License.docx) / [Phi-3/Phi-4](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/blob/main/LICENSE) / [Qwen](https://github.com/QwenLM/Qwen/blob/main/Tongyi%20Qianwen%20LICENSE%20AGREEMENT) / [Skywork](https://huggingface.co/Skywork/Skywork-13B-base/blob/main/Skywork%20Community%20License.pdf) / [StarCoder 2](https://huggingface.co/spaces/bigcode/bigcode-model-license-agreement) / [TeleChat2](https://huggingface.co/Tele-AI/telechat-7B/blob/main/TeleChat%E6%A8%A1%E5%9E%8B%E7%A4%BE%E5%8C%BA%E8%AE%B8%E5%8F%AF%E5%8D%8F%E8%AE%AE.pdf) / [XVERSE](https://github.com/xverse-ai/XVERSE-13B/blob/main/MODEL_LICENSE.pdf) / [Yi](https://huggingface.co/01-ai/Yi-6B/blob/main/LICENSE) / [Yi-1.5](LICENSE) / [Yuan 2](https://github.com/IEIT-Yuan/Yuan-2.0/blob/main/LICENSE-Yuan)
|
||||||
|
|
||||||
## Citation
|
## Citation
|
||||||
|
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
[](https://github.com/hiyouga/LLaMA-Factory/graphs/contributors)
|
[](https://github.com/hiyouga/LLaMA-Factory/graphs/contributors)
|
||||||
[](https://github.com/hiyouga/LLaMA-Factory/actions/workflows/tests.yml)
|
[](https://github.com/hiyouga/LLaMA-Factory/actions/workflows/tests.yml)
|
||||||
[](https://pypi.org/project/llamafactory/)
|
[](https://pypi.org/project/llamafactory/)
|
||||||
[](https://scholar.google.com/scholar?cites=12620864006390196564)
|
[](https://scholar.google.com/scholar?cites=12620864006390196564)
|
||||||
[](https://github.com/hiyouga/LLaMA-Factory/pulls)
|
[](https://github.com/hiyouga/LLaMA-Factory/pulls)
|
||||||
|
|
||||||
[](https://twitter.com/llamafactory_ai)
|
[](https://twitter.com/llamafactory_ai)
|
||||||
@ -114,7 +114,7 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc
|
|||||||
|
|
||||||
[25/03/15] 我们支持了 **[SGLang](https://github.com/sgl-project/sglang)** 推理后端,请使用 `infer_backend: sglang` 启用。
|
[25/03/15] 我们支持了 **[SGLang](https://github.com/sgl-project/sglang)** 推理后端,请使用 `infer_backend: sglang` 启用。
|
||||||
|
|
||||||
[25/03/12] 我们支持了 **[Gemma-3](https://huggingface.co/blog/gemma3)** 模型的微调。
|
[25/03/12] 我们支持了 **[Gemma 3](https://huggingface.co/blog/gemma3)** 模型的微调。
|
||||||
|
|
||||||
[25/02/24] 我们宣布开源 **[EasyR1](https://github.com/hiyouga/EasyR1)**,一个高效可扩展的多模态强化学习框架,支持高效的 GRPO 训练。
|
[25/02/24] 我们宣布开源 **[EasyR1](https://github.com/hiyouga/EasyR1)**,一个高效可扩展的多模态强化学习框架,支持高效的 GRPO 训练。
|
||||||
|
|
||||||
@ -876,7 +876,7 @@ swanlab_run_name: test_run # 可选
|
|||||||
|
|
||||||
本仓库的代码依照 [Apache-2.0](LICENSE) 协议开源。
|
本仓库的代码依照 [Apache-2.0](LICENSE) 协议开源。
|
||||||
|
|
||||||
使用模型权重时,请遵循对应的模型协议:[Baichuan 2](https://huggingface.co/baichuan-inc/Baichuan2-7B-Base/blob/main/Community%20License%20for%20Baichuan%202%20Model.pdf) / [BLOOM](https://huggingface.co/spaces/bigscience/license) / [ChatGLM3](https://github.com/THUDM/ChatGLM3/blob/main/MODEL_LICENSE) / [Command R](https://cohere.com/c4ai-cc-by-nc-license) / [DeepSeek](https://github.com/deepseek-ai/DeepSeek-LLM/blob/main/LICENSE-MODEL) / [Falcon](https://huggingface.co/tiiuae/falcon-180B/blob/main/LICENSE.txt) / [Gemma](https://ai.google.dev/gemma/terms) / [GLM-4](https://huggingface.co/THUDM/glm-4-9b/blob/main/LICENSE) / [GPT-2](https://github.com/openai/gpt-2/blob/master/LICENSE) / [Granite](LICENSE) / [Index](https://huggingface.co/IndexTeam/Index-1.9B/blob/main/LICENSE) / [InternLM](https://github.com/InternLM/InternLM#license) / [Llama](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) / [Llama 2 (LLaVA-1.5)](https://ai.meta.com/llama/license/) / [Llama 3](https://llama.meta.com/llama3/license/) / [MiniCPM](https://github.com/OpenBMB/MiniCPM/blob/main/MiniCPM%20Model%20License.md) / [Mistral/Mixtral/Pixtral](LICENSE) / [OLMo](LICENSE) / [Phi-1.5/Phi-2](https://huggingface.co/microsoft/phi-1_5/resolve/main/Research%20License.docx) / [Phi-3/Phi-4](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/blob/main/LICENSE) / [Qwen](https://github.com/QwenLM/Qwen/blob/main/Tongyi%20Qianwen%20LICENSE%20AGREEMENT) / [Skywork](https://huggingface.co/Skywork/Skywork-13B-base/blob/main/Skywork%20Community%20License.pdf) / [StarCoder 2](https://huggingface.co/spaces/bigcode/bigcode-model-license-agreement) / [TeleChat2](https://huggingface.co/Tele-AI/telechat-7B/blob/main/TeleChat%E6%A8%A1%E5%9E%8B%E7%A4%BE%E5%8C%BA%E8%AE%B8%E5%8F%AF%E5%8D%8F%E8%AE%AE.pdf) / [XVERSE](https://github.com/xverse-ai/XVERSE-13B/blob/main/MODEL_LICENSE.pdf) / [Yi](https://huggingface.co/01-ai/Yi-6B/blob/main/LICENSE) / [Yi-1.5](LICENSE) / [Yuan 2](https://github.com/IEIT-Yuan/Yuan-2.0/blob/main/LICENSE-Yuan)
|
使用模型权重时,请遵循对应的模型协议:[Baichuan 2](https://huggingface.co/baichuan-inc/Baichuan2-7B-Base/blob/main/Community%20License%20for%20Baichuan%202%20Model.pdf) / [BLOOM](https://huggingface.co/spaces/bigscience/license) / [ChatGLM3](https://github.com/THUDM/ChatGLM3/blob/main/MODEL_LICENSE) / [Command R](https://cohere.com/c4ai-cc-by-nc-license) / [DeepSeek](https://github.com/deepseek-ai/DeepSeek-LLM/blob/main/LICENSE-MODEL) / [Falcon](https://huggingface.co/tiiuae/falcon-180B/blob/main/LICENSE.txt) / [Gemma](https://ai.google.dev/gemma/terms) / [GLM-4](https://huggingface.co/THUDM/glm-4-9b/blob/main/LICENSE) / [GPT-2](https://github.com/openai/gpt-2/blob/master/LICENSE) / [Granite](LICENSE) / [Index](https://huggingface.co/IndexTeam/Index-1.9B/blob/main/LICENSE) / [InternLM](https://github.com/InternLM/InternLM#license) / [Llama](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) / [Llama 2](https://ai.meta.com/llama/license/) / [Llama 3](https://llama.meta.com/llama3/license/) / [Llama 4](https://github.com/meta-llama/llama-models/blob/main/models/llama4/LICENSE) / [MiniCPM](https://github.com/OpenBMB/MiniCPM/blob/main/MiniCPM%20Model%20License.md) / [Mistral/Mixtral/Pixtral](LICENSE) / [OLMo](LICENSE) / [Phi-1.5/Phi-2](https://huggingface.co/microsoft/phi-1_5/resolve/main/Research%20License.docx) / [Phi-3/Phi-4](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/blob/main/LICENSE) / [Qwen](https://github.com/QwenLM/Qwen/blob/main/Tongyi%20Qianwen%20LICENSE%20AGREEMENT) / [Skywork](https://huggingface.co/Skywork/Skywork-13B-base/blob/main/Skywork%20Community%20License.pdf) / [StarCoder 2](https://huggingface.co/spaces/bigcode/bigcode-model-license-agreement) / [TeleChat2](https://huggingface.co/Tele-AI/telechat-7B/blob/main/TeleChat%E6%A8%A1%E5%9E%8B%E7%A4%BE%E5%8C%BA%E8%AE%B8%E5%8F%AF%E5%8D%8F%E8%AE%AE.pdf) / [XVERSE](https://github.com/xverse-ai/XVERSE-13B/blob/main/MODEL_LICENSE.pdf) / [Yi](https://huggingface.co/01-ai/Yi-6B/blob/main/LICENSE) / [Yi-1.5](LICENSE) / [Yuan 2](https://github.com/IEIT-Yuan/Yuan-2.0/blob/main/LICENSE-Yuan)
|
||||||
|
|
||||||
## 引用
|
## 引用
|
||||||
|
|
||||||
|
@ -15,6 +15,7 @@ cutoff_len: 2048
|
|||||||
max_samples: 1000
|
max_samples: 1000
|
||||||
overwrite_cache: true
|
overwrite_cache: true
|
||||||
preprocessing_num_workers: 16
|
preprocessing_num_workers: 16
|
||||||
|
dataloader_num_workers: 4
|
||||||
|
|
||||||
### output
|
### output
|
||||||
output_dir: saves/qwen2-1_5b/full/sft
|
output_dir: saves/qwen2-1_5b/full/sft
|
||||||
@ -22,6 +23,8 @@ logging_steps: 10
|
|||||||
save_steps: 500
|
save_steps: 500
|
||||||
plot_loss: true
|
plot_loss: true
|
||||||
overwrite_output_dir: true
|
overwrite_output_dir: true
|
||||||
|
save_only_model: false
|
||||||
|
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
|
||||||
|
|
||||||
### train
|
### train
|
||||||
per_device_train_batch_size: 1
|
per_device_train_batch_size: 1
|
||||||
|
@ -20,6 +20,7 @@ cutoff_len: 2048
|
|||||||
max_samples: 1000
|
max_samples: 1000
|
||||||
overwrite_cache: true
|
overwrite_cache: true
|
||||||
preprocessing_num_workers: 16
|
preprocessing_num_workers: 16
|
||||||
|
dataloader_num_workers: 4
|
||||||
|
|
||||||
### output
|
### output
|
||||||
output_dir: saves/llama3-8b/full/sft
|
output_dir: saves/llama3-8b/full/sft
|
||||||
@ -27,6 +28,8 @@ logging_steps: 10
|
|||||||
save_steps: 500
|
save_steps: 500
|
||||||
plot_loss: true
|
plot_loss: true
|
||||||
overwrite_output_dir: true
|
overwrite_output_dir: true
|
||||||
|
save_only_model: false
|
||||||
|
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
|
||||||
|
|
||||||
### train
|
### train
|
||||||
per_device_train_batch_size: 1
|
per_device_train_batch_size: 1
|
||||||
|
@ -20,6 +20,7 @@ cutoff_len: 2048
|
|||||||
max_samples: 1000
|
max_samples: 1000
|
||||||
overwrite_cache: true
|
overwrite_cache: true
|
||||||
preprocessing_num_workers: 16
|
preprocessing_num_workers: 16
|
||||||
|
dataloader_num_workers: 4
|
||||||
|
|
||||||
### output
|
### output
|
||||||
output_dir: saves/llama3-8b/full/sft
|
output_dir: saves/llama3-8b/full/sft
|
||||||
@ -27,6 +28,8 @@ logging_steps: 10
|
|||||||
save_steps: 500
|
save_steps: 500
|
||||||
plot_loss: true
|
plot_loss: true
|
||||||
overwrite_output_dir: true
|
overwrite_output_dir: true
|
||||||
|
save_only_model: false
|
||||||
|
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
|
||||||
|
|
||||||
### train
|
### train
|
||||||
per_device_train_batch_size: 1
|
per_device_train_batch_size: 1
|
||||||
|
@ -17,6 +17,7 @@ cutoff_len: 2048
|
|||||||
max_samples: 1000
|
max_samples: 1000
|
||||||
overwrite_cache: true
|
overwrite_cache: true
|
||||||
preprocessing_num_workers: 16
|
preprocessing_num_workers: 16
|
||||||
|
dataloader_num_workers: 4
|
||||||
|
|
||||||
### output
|
### output
|
||||||
output_dir: saves/llama3-8b/lora/sft
|
output_dir: saves/llama3-8b/lora/sft
|
||||||
@ -24,6 +25,8 @@ logging_steps: 10
|
|||||||
save_steps: 500
|
save_steps: 500
|
||||||
plot_loss: true
|
plot_loss: true
|
||||||
overwrite_output_dir: true
|
overwrite_output_dir: true
|
||||||
|
save_only_model: false
|
||||||
|
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
|
||||||
|
|
||||||
### train
|
### train
|
||||||
per_device_train_batch_size: 1
|
per_device_train_batch_size: 1
|
||||||
|
@ -19,6 +19,7 @@ cutoff_len: 2048
|
|||||||
max_samples: 1000
|
max_samples: 1000
|
||||||
overwrite_cache: true
|
overwrite_cache: true
|
||||||
preprocessing_num_workers: 16
|
preprocessing_num_workers: 16
|
||||||
|
dataloader_num_workers: 4
|
||||||
|
|
||||||
### output
|
### output
|
||||||
output_dir: saves/llama3-8b/full/sft
|
output_dir: saves/llama3-8b/full/sft
|
||||||
@ -26,6 +27,8 @@ logging_steps: 10
|
|||||||
save_steps: 500
|
save_steps: 500
|
||||||
plot_loss: true
|
plot_loss: true
|
||||||
overwrite_output_dir: true
|
overwrite_output_dir: true
|
||||||
|
save_only_model: false
|
||||||
|
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
|
||||||
|
|
||||||
### train
|
### train
|
||||||
per_device_train_batch_size: 1
|
per_device_train_batch_size: 1
|
||||||
|
@ -17,6 +17,7 @@ cutoff_len: 2048
|
|||||||
max_samples: 1000
|
max_samples: 1000
|
||||||
overwrite_cache: true
|
overwrite_cache: true
|
||||||
preprocessing_num_workers: 16
|
preprocessing_num_workers: 16
|
||||||
|
dataloader_num_workers: 4
|
||||||
|
|
||||||
### output
|
### output
|
||||||
output_dir: saves/llama3-8b-pro/freeze/sft
|
output_dir: saves/llama3-8b-pro/freeze/sft
|
||||||
@ -24,6 +25,8 @@ logging_steps: 10
|
|||||||
save_steps: 500
|
save_steps: 500
|
||||||
plot_loss: true
|
plot_loss: true
|
||||||
overwrite_output_dir: true
|
overwrite_output_dir: true
|
||||||
|
save_only_model: false
|
||||||
|
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
|
||||||
|
|
||||||
### train
|
### train
|
||||||
per_device_train_batch_size: 1
|
per_device_train_batch_size: 1
|
||||||
|
@ -17,6 +17,7 @@ cutoff_len: 2048
|
|||||||
max_samples: 1000
|
max_samples: 1000
|
||||||
overwrite_cache: true
|
overwrite_cache: true
|
||||||
preprocessing_num_workers: 16
|
preprocessing_num_workers: 16
|
||||||
|
dataloader_num_workers: 4
|
||||||
|
|
||||||
### output
|
### output
|
||||||
output_dir: saves/llama3-8b/lora/sft
|
output_dir: saves/llama3-8b/lora/sft
|
||||||
@ -24,6 +25,8 @@ logging_steps: 10
|
|||||||
save_steps: 500
|
save_steps: 500
|
||||||
plot_loss: true
|
plot_loss: true
|
||||||
overwrite_output_dir: true
|
overwrite_output_dir: true
|
||||||
|
save_only_model: false
|
||||||
|
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
|
||||||
|
|
||||||
### train
|
### train
|
||||||
per_device_train_batch_size: 1
|
per_device_train_batch_size: 1
|
||||||
|
@ -15,6 +15,7 @@ cutoff_len: 2048
|
|||||||
max_samples: 1000
|
max_samples: 1000
|
||||||
overwrite_cache: true
|
overwrite_cache: true
|
||||||
preprocessing_num_workers: 16
|
preprocessing_num_workers: 16
|
||||||
|
dataloader_num_workers: 4
|
||||||
|
|
||||||
### output
|
### output
|
||||||
output_dir: saves/llama3-8b-mod/full/sft
|
output_dir: saves/llama3-8b-mod/full/sft
|
||||||
@ -22,6 +23,8 @@ logging_steps: 10
|
|||||||
save_steps: 500
|
save_steps: 500
|
||||||
plot_loss: true
|
plot_loss: true
|
||||||
overwrite_output_dir: true
|
overwrite_output_dir: true
|
||||||
|
save_only_model: false
|
||||||
|
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
|
||||||
|
|
||||||
### train
|
### train
|
||||||
per_device_train_batch_size: 1
|
per_device_train_batch_size: 1
|
||||||
|
@ -18,10 +18,12 @@ cutoff_len: 2048
|
|||||||
max_samples: 50
|
max_samples: 50
|
||||||
overwrite_cache: true
|
overwrite_cache: true
|
||||||
preprocessing_num_workers: 16
|
preprocessing_num_workers: 16
|
||||||
|
dataloader_num_workers: 4
|
||||||
|
|
||||||
### output
|
### output
|
||||||
output_dir: saves/llama3-8b/lora/predict
|
output_dir: saves/llama3-8b/lora/predict
|
||||||
overwrite_output_dir: true
|
overwrite_output_dir: true
|
||||||
|
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
|
||||||
|
|
||||||
### eval
|
### eval
|
||||||
per_device_eval_batch_size: 1
|
per_device_eval_batch_size: 1
|
||||||
|
@ -19,6 +19,7 @@ cutoff_len: 2048
|
|||||||
max_samples: 1000
|
max_samples: 1000
|
||||||
overwrite_cache: true
|
overwrite_cache: true
|
||||||
preprocessing_num_workers: 16
|
preprocessing_num_workers: 16
|
||||||
|
dataloader_num_workers: 4
|
||||||
|
|
||||||
### output
|
### output
|
||||||
output_dir: saves/llama3-8b/lora/sft
|
output_dir: saves/llama3-8b/lora/sft
|
||||||
@ -26,6 +27,8 @@ logging_steps: 10
|
|||||||
save_steps: 500
|
save_steps: 500
|
||||||
plot_loss: true
|
plot_loss: true
|
||||||
overwrite_output_dir: true
|
overwrite_output_dir: true
|
||||||
|
save_only_model: false
|
||||||
|
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
|
||||||
|
|
||||||
### train
|
### train
|
||||||
per_device_train_batch_size: 1
|
per_device_train_batch_size: 1
|
||||||
|
@ -24,6 +24,7 @@ save_steps: 500
|
|||||||
plot_loss: true
|
plot_loss: true
|
||||||
overwrite_output_dir: true
|
overwrite_output_dir: true
|
||||||
save_only_model: false
|
save_only_model: false
|
||||||
|
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
|
||||||
|
|
||||||
### train
|
### train
|
||||||
per_device_train_batch_size: 1
|
per_device_train_batch_size: 1
|
||||||
|
@ -8,10 +8,10 @@ trust_remote_code: true
|
|||||||
stage: sft
|
stage: sft
|
||||||
do_train: true
|
do_train: true
|
||||||
finetuning_type: full
|
finetuning_type: full
|
||||||
freeze_vision_tower: true # choices: [true, false]
|
freeze_vision_tower: true
|
||||||
freeze_multi_modal_projector: true # choices: [true, false]
|
freeze_multi_modal_projector: true
|
||||||
freeze_language_model: false # choices: [true, false]
|
freeze_language_model: false
|
||||||
deepspeed: examples/deepspeed/ds_z3_config.json # choices: [ds_z0_config.json, ds_z2_config.json, ds_z3_config.json]
|
deepspeed: examples/deepspeed/ds_z3_config.json
|
||||||
|
|
||||||
### dataset
|
### dataset
|
||||||
dataset: mllm_demo,identity,alpaca_en_demo
|
dataset: mllm_demo,identity,alpaca_en_demo
|
||||||
@ -29,6 +29,7 @@ save_steps: 500
|
|||||||
plot_loss: true
|
plot_loss: true
|
||||||
overwrite_output_dir: true
|
overwrite_output_dir: true
|
||||||
save_only_model: false
|
save_only_model: false
|
||||||
|
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
|
||||||
|
|
||||||
### train
|
### train
|
||||||
per_device_train_batch_size: 1
|
per_device_train_batch_size: 1
|
||||||
|
@ -27,6 +27,7 @@ save_steps: 500
|
|||||||
plot_loss: true
|
plot_loss: true
|
||||||
overwrite_output_dir: true
|
overwrite_output_dir: true
|
||||||
save_only_model: false
|
save_only_model: false
|
||||||
|
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
|
||||||
|
|
||||||
### train
|
### train
|
||||||
per_device_train_batch_size: 1
|
per_device_train_batch_size: 1
|
||||||
|
@ -17,6 +17,7 @@ cutoff_len: 2048
|
|||||||
max_samples: 1000
|
max_samples: 1000
|
||||||
overwrite_cache: true
|
overwrite_cache: true
|
||||||
preprocessing_num_workers: 16
|
preprocessing_num_workers: 16
|
||||||
|
dataloader_num_workers: 4
|
||||||
|
|
||||||
### output
|
### output
|
||||||
output_dir: saves/llama3-8b/lora/kto
|
output_dir: saves/llama3-8b/lora/kto
|
||||||
@ -24,6 +25,7 @@ logging_steps: 10
|
|||||||
save_steps: 500
|
save_steps: 500
|
||||||
plot_loss: true
|
plot_loss: true
|
||||||
overwrite_output_dir: true
|
overwrite_output_dir: true
|
||||||
|
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
|
||||||
|
|
||||||
### train
|
### train
|
||||||
per_device_train_batch_size: 1
|
per_device_train_batch_size: 1
|
||||||
|
@ -17,6 +17,7 @@ cutoff_len: 2048
|
|||||||
max_samples: 1000
|
max_samples: 1000
|
||||||
overwrite_cache: true
|
overwrite_cache: true
|
||||||
preprocessing_num_workers: 16
|
preprocessing_num_workers: 16
|
||||||
|
dataloader_num_workers: 4
|
||||||
|
|
||||||
### output
|
### output
|
||||||
output_dir: saves/llama3-8b/lora/ppo
|
output_dir: saves/llama3-8b/lora/ppo
|
||||||
@ -24,6 +25,7 @@ logging_steps: 10
|
|||||||
save_steps: 500
|
save_steps: 500
|
||||||
plot_loss: true
|
plot_loss: true
|
||||||
overwrite_output_dir: true
|
overwrite_output_dir: true
|
||||||
|
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
|
||||||
|
|
||||||
### train
|
### train
|
||||||
per_device_train_batch_size: 1
|
per_device_train_batch_size: 1
|
||||||
|
@ -24,6 +24,7 @@ save_steps: 500
|
|||||||
plot_loss: true
|
plot_loss: true
|
||||||
overwrite_output_dir: true
|
overwrite_output_dir: true
|
||||||
save_only_model: false
|
save_only_model: false
|
||||||
|
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
|
||||||
|
|
||||||
### train
|
### train
|
||||||
per_device_train_batch_size: 1
|
per_device_train_batch_size: 1
|
||||||
|
@ -25,6 +25,7 @@ save_steps: 500
|
|||||||
plot_loss: true
|
plot_loss: true
|
||||||
overwrite_output_dir: true
|
overwrite_output_dir: true
|
||||||
save_only_model: false
|
save_only_model: false
|
||||||
|
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
|
||||||
|
|
||||||
### train
|
### train
|
||||||
per_device_train_batch_size: 1
|
per_device_train_batch_size: 1
|
||||||
|
@ -25,6 +25,7 @@ save_steps: 500
|
|||||||
plot_loss: true
|
plot_loss: true
|
||||||
overwrite_output_dir: true
|
overwrite_output_dir: true
|
||||||
save_only_model: false
|
save_only_model: false
|
||||||
|
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
|
||||||
|
|
||||||
### train
|
### train
|
||||||
per_device_train_batch_size: 1
|
per_device_train_batch_size: 1
|
||||||
|
@ -26,6 +26,7 @@ save_steps: 500
|
|||||||
plot_loss: true
|
plot_loss: true
|
||||||
overwrite_output_dir: true
|
overwrite_output_dir: true
|
||||||
save_only_model: false
|
save_only_model: false
|
||||||
|
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
|
||||||
|
|
||||||
### train
|
### train
|
||||||
per_device_train_batch_size: 1
|
per_device_train_batch_size: 1
|
||||||
|
@ -26,6 +26,7 @@ save_steps: 500
|
|||||||
plot_loss: true
|
plot_loss: true
|
||||||
overwrite_output_dir: true
|
overwrite_output_dir: true
|
||||||
save_only_model: false
|
save_only_model: false
|
||||||
|
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
|
||||||
|
|
||||||
### ray
|
### ray
|
||||||
ray_run_name: llama3_8b_sft_lora
|
ray_run_name: llama3_8b_sft_lora
|
||||||
|
@ -28,10 +28,11 @@ save_steps: 500
|
|||||||
plot_loss: true
|
plot_loss: true
|
||||||
overwrite_output_dir: true
|
overwrite_output_dir: true
|
||||||
save_only_model: false
|
save_only_model: false
|
||||||
|
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
|
||||||
|
|
||||||
### train
|
### train
|
||||||
per_device_train_batch_size: 1
|
per_device_train_batch_size: 1
|
||||||
gradient_accumulation_steps: 8
|
gradient_accumulation_steps: 2
|
||||||
learning_rate: 1.0e-4
|
learning_rate: 1.0e-4
|
||||||
num_train_epochs: 3.0
|
num_train_epochs: 3.0
|
||||||
lr_scheduler_type: cosine
|
lr_scheduler_type: cosine
|
@ -25,6 +25,7 @@ save_steps: 500
|
|||||||
plot_loss: true
|
plot_loss: true
|
||||||
overwrite_output_dir: true
|
overwrite_output_dir: true
|
||||||
save_only_model: false
|
save_only_model: false
|
||||||
|
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
|
||||||
|
|
||||||
### train
|
### train
|
||||||
per_device_train_batch_size: 1
|
per_device_train_batch_size: 1
|
||||||
|
@ -29,6 +29,7 @@ save_steps: 500
|
|||||||
plot_loss: true
|
plot_loss: true
|
||||||
overwrite_output_dir: true
|
overwrite_output_dir: true
|
||||||
save_only_model: false
|
save_only_model: false
|
||||||
|
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
|
||||||
|
|
||||||
### train
|
### train
|
||||||
per_device_train_batch_size: 1
|
per_device_train_batch_size: 1
|
||||||
|
@ -27,6 +27,7 @@ save_steps: 500
|
|||||||
plot_loss: true
|
plot_loss: true
|
||||||
overwrite_output_dir: true
|
overwrite_output_dir: true
|
||||||
save_only_model: false
|
save_only_model: false
|
||||||
|
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
|
||||||
|
|
||||||
### train
|
### train
|
||||||
per_device_train_batch_size: 1
|
per_device_train_batch_size: 1
|
||||||
|
@ -16,6 +16,7 @@ cutoff_len: 2048
|
|||||||
max_samples: 1000
|
max_samples: 1000
|
||||||
overwrite_cache: true
|
overwrite_cache: true
|
||||||
preprocessing_num_workers: 16
|
preprocessing_num_workers: 16
|
||||||
|
dataloader_num_workers: 4
|
||||||
|
|
||||||
### output
|
### output
|
||||||
output_dir: saves/llama3-8b/lora/sft
|
output_dir: saves/llama3-8b/lora/sft
|
||||||
@ -23,6 +24,8 @@ logging_steps: 10
|
|||||||
save_steps: 500
|
save_steps: 500
|
||||||
plot_loss: true
|
plot_loss: true
|
||||||
overwrite_output_dir: true
|
overwrite_output_dir: true
|
||||||
|
save_only_model: false
|
||||||
|
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
|
||||||
|
|
||||||
### train
|
### train
|
||||||
per_device_train_batch_size: 1
|
per_device_train_batch_size: 1
|
||||||
|
@ -16,6 +16,7 @@ cutoff_len: 2048
|
|||||||
max_samples: 1000
|
max_samples: 1000
|
||||||
overwrite_cache: true
|
overwrite_cache: true
|
||||||
preprocessing_num_workers: 16
|
preprocessing_num_workers: 16
|
||||||
|
dataloader_num_workers: 4
|
||||||
|
|
||||||
### output
|
### output
|
||||||
output_dir: saves/llama3-8b/lora/sft
|
output_dir: saves/llama3-8b/lora/sft
|
||||||
@ -23,6 +24,8 @@ logging_steps: 10
|
|||||||
save_steps: 500
|
save_steps: 500
|
||||||
plot_loss: true
|
plot_loss: true
|
||||||
overwrite_output_dir: true
|
overwrite_output_dir: true
|
||||||
|
save_only_model: false
|
||||||
|
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
|
||||||
|
|
||||||
### train
|
### train
|
||||||
per_device_train_batch_size: 1
|
per_device_train_batch_size: 1
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
### model
|
### model
|
||||||
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
quantization_bit: 4
|
quantization_bit: 4
|
||||||
quantization_method: bitsandbytes
|
quantization_method: bnb
|
||||||
double_quantization: false
|
double_quantization: false
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
@ -19,6 +19,7 @@ cutoff_len: 2048
|
|||||||
max_samples: 1000
|
max_samples: 1000
|
||||||
overwrite_cache: true
|
overwrite_cache: true
|
||||||
preprocessing_num_workers: 16
|
preprocessing_num_workers: 16
|
||||||
|
dataloader_num_workers: 4
|
||||||
|
|
||||||
### output
|
### output
|
||||||
output_dir: saves/llama3-8b/lora/sft
|
output_dir: saves/llama3-8b/lora/sft
|
||||||
@ -26,6 +27,8 @@ logging_steps: 10
|
|||||||
save_steps: 500
|
save_steps: 500
|
||||||
plot_loss: true
|
plot_loss: true
|
||||||
overwrite_output_dir: true
|
overwrite_output_dir: true
|
||||||
|
save_only_model: false
|
||||||
|
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
|
||||||
|
|
||||||
### train
|
### train
|
||||||
per_device_train_batch_size: 1
|
per_device_train_batch_size: 1
|
||||||
|
@ -16,6 +16,7 @@ cutoff_len: 2048
|
|||||||
max_samples: 1000
|
max_samples: 1000
|
||||||
overwrite_cache: true
|
overwrite_cache: true
|
||||||
preprocessing_num_workers: 16
|
preprocessing_num_workers: 16
|
||||||
|
dataloader_num_workers: 4
|
||||||
|
|
||||||
### output
|
### output
|
||||||
output_dir: saves/llama3-8b/lora/sft
|
output_dir: saves/llama3-8b/lora/sft
|
||||||
@ -23,6 +24,8 @@ logging_steps: 10
|
|||||||
save_steps: 500
|
save_steps: 500
|
||||||
plot_loss: true
|
plot_loss: true
|
||||||
overwrite_output_dir: true
|
overwrite_output_dir: true
|
||||||
|
save_only_model: false
|
||||||
|
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
|
||||||
|
|
||||||
### train
|
### train
|
||||||
per_device_train_batch_size: 1
|
per_device_train_batch_size: 1
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
### model
|
### model
|
||||||
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
quantization_bit: 4
|
quantization_bit: 4 # choices: [8 (bnb/hqq/eetq), 4 (bnb/hqq), 3 (hqq), 2 (hqq)]
|
||||||
quantization_method: bitsandbytes # choices: [bitsandbytes (4/8), hqq (2/3/4/5/6/8), eetq (8)]
|
quantization_method: bnb # choices: [bnb, hqq, eetq]
|
||||||
trust_remote_code: true
|
trust_remote_code: true
|
||||||
|
|
||||||
### method
|
### method
|
||||||
@ -18,6 +18,7 @@ cutoff_len: 2048
|
|||||||
max_samples: 1000
|
max_samples: 1000
|
||||||
overwrite_cache: true
|
overwrite_cache: true
|
||||||
preprocessing_num_workers: 16
|
preprocessing_num_workers: 16
|
||||||
|
dataloader_num_workers: 4
|
||||||
|
|
||||||
### output
|
### output
|
||||||
output_dir: saves/llama3-8b/lora/sft
|
output_dir: saves/llama3-8b/lora/sft
|
||||||
@ -25,6 +26,8 @@ logging_steps: 10
|
|||||||
save_steps: 500
|
save_steps: 500
|
||||||
plot_loss: true
|
plot_loss: true
|
||||||
overwrite_output_dir: true
|
overwrite_output_dir: true
|
||||||
|
save_only_model: false
|
||||||
|
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
|
||||||
|
|
||||||
### train
|
### train
|
||||||
per_device_train_batch_size: 1
|
per_device_train_batch_size: 1
|
||||||
|
@ -21,9 +21,9 @@ from datasets import load_dataset
|
|||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import jieba
|
import jieba # type: ignore
|
||||||
from nltk.translate.bleu_score import SmoothingFunction, sentence_bleu
|
from nltk.translate.bleu_score import SmoothingFunction, sentence_bleu # type: ignore
|
||||||
from rouge_chinese import Rouge
|
from rouge_chinese import Rouge # type: ignore
|
||||||
|
|
||||||
jieba.setLogLevel(logging.CRITICAL)
|
jieba.setLogLevel(logging.CRITICAL)
|
||||||
jieba.initialize()
|
jieba.initialize()
|
||||||
@ -52,6 +52,7 @@ def compute_metrics(sample):
|
|||||||
metric_result = {}
|
metric_result = {}
|
||||||
for k, v in result.items():
|
for k, v in result.items():
|
||||||
metric_result[k] = round(v["f"] * 100, 4)
|
metric_result[k] = round(v["f"] * 100, 4)
|
||||||
|
|
||||||
metric_result["bleu-4"] = round(bleu_score * 100, 4)
|
metric_result["bleu-4"] = round(bleu_score * 100, 4)
|
||||||
|
|
||||||
return metric_result
|
return metric_result
|
||||||
|
@ -1,7 +1,4 @@
|
|||||||
# Copyright 2025 HuggingFace Inc. and the LlamaFactory team.
|
# Copyright 2025 the LlamaFactory team.
|
||||||
#
|
|
||||||
# This code is based on the HuggingFace's PEFT library.
|
|
||||||
# https://github.com/huggingface/peft/blob/v0.10.0/examples/loftq_finetuning/quantize_save_load.py
|
|
||||||
#
|
#
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
# you may not use this file except in compliance with the License.
|
# you may not use this file except in compliance with the License.
|
||||||
@ -14,12 +11,13 @@
|
|||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
import fire
|
import fire
|
||||||
from peft import PeftModel
|
from peft import PeftModel
|
||||||
from transformers import AutoModel, AutoProcessor, AutoTokenizer, Qwen2_5OmniThinkerForConditionalGeneration
|
from transformers import AutoModel, AutoProcessor, Qwen2_5OmniThinkerForConditionalGeneration # type: ignore
|
||||||
|
|
||||||
|
|
||||||
def merge_lora(
|
def merge_lora(
|
||||||
@ -41,20 +39,14 @@ def merge_lora(
|
|||||||
save_path (str): Directory where the merged model and configurations will be saved.
|
save_path (str): Directory where the merged model and configurations will be saved.
|
||||||
"""
|
"""
|
||||||
# 1. Load the original model, tokenizer, and processor
|
# 1. Load the original model, tokenizer, and processor
|
||||||
model = AutoModel.from_pretrained(base_model_path)
|
model = AutoModel.from_pretrained(base_model_path, torch_dtype="auto", device_map="cpu")
|
||||||
tokenizer = AutoTokenizer.from_pretrained(base_model_path)
|
processor = AutoProcessor.from_pretrained(base_model_path)
|
||||||
|
print("Successfully loaded the original model and tokenizer.")
|
||||||
try:
|
|
||||||
processor = AutoProcessor.from_pretrained(base_model_path)
|
|
||||||
except Exception:
|
|
||||||
print("Processor configuration not found, skipping processor load.")
|
|
||||||
processor = None
|
|
||||||
|
|
||||||
print("Successfully loaded the original model, tokenizer, and processor (if available).")
|
|
||||||
|
|
||||||
# 2. Extract the submodule to be merged (e.g., model.thinker)
|
# 2. Extract the submodule to be merged (e.g., model.thinker)
|
||||||
if not hasattr(model, submodule_name):
|
if not hasattr(model, submodule_name):
|
||||||
raise AttributeError(f"The model does not have a submodule named '{submodule_name}'.")
|
raise AttributeError(f"The model does not have a submodule named '{submodule_name}'.")
|
||||||
|
|
||||||
base_submodule = getattr(model, submodule_name)
|
base_submodule = getattr(model, submodule_name)
|
||||||
print(f"Successfully extracted submodule: {submodule_name}.")
|
print(f"Successfully extracted submodule: {submodule_name}.")
|
||||||
|
|
||||||
@ -71,11 +63,8 @@ def merge_lora(
|
|||||||
|
|
||||||
# 6. Save the final merged model along with the tokenizer and processor configuration
|
# 6. Save the final merged model along with the tokenizer and processor configuration
|
||||||
model.save_pretrained(save_path)
|
model.save_pretrained(save_path)
|
||||||
tokenizer.save_pretrained(save_path)
|
processor.save_pretrained(save_path)
|
||||||
if processor is not None:
|
print(f"Merged model and tokenizer saved to {save_path}.")
|
||||||
processor.save_pretrained(save_path)
|
|
||||||
|
|
||||||
print(f"Merged model and configuration saved to {save_path}.")
|
|
||||||
|
|
||||||
source_file = os.path.join(base_model_path, extra_file)
|
source_file = os.path.join(base_model_path, extra_file)
|
||||||
target_file = os.path.join(save_path, extra_file)
|
target_file = os.path.join(save_path, extra_file)
|
||||||
@ -89,7 +78,7 @@ def merge_lora(
|
|||||||
def save_full_model(
|
def save_full_model(
|
||||||
saved_thinker_path: str,
|
saved_thinker_path: str,
|
||||||
base_model_path: str,
|
base_model_path: str,
|
||||||
save_path: str,
|
save_path: str = "./merged_model_checkpoint",
|
||||||
extra_file: str = "spk_dict.pt",
|
extra_file: str = "spk_dict.pt",
|
||||||
):
|
):
|
||||||
"""Load the saved thinker module and the original model, replace the thinker in the original model.
|
"""Load the saved thinker module and the original model, replace the thinker in the original model.
|
||||||
@ -99,26 +88,23 @@ def save_full_model(
|
|||||||
Args:
|
Args:
|
||||||
saved_thinker_path (str): Path to the saved thinker weights.
|
saved_thinker_path (str): Path to the saved thinker weights.
|
||||||
base_model_path (str): Directory path of the original model.
|
base_model_path (str): Directory path of the original model.
|
||||||
save_path (str): Directory where the final complete model will be saved.
|
save_path (str): Directory where the merged model and configurations will be saved.
|
||||||
extra_file (str): Name of the extra file to be copied (default: "spk_dict.pt").
|
extra_file (str): Name of the extra file to be copied (default: "spk_dict.pt").
|
||||||
"""
|
"""
|
||||||
# Load the thinker module
|
# 1. Load the saved thinker module and the original model
|
||||||
thinker = Qwen2_5OmniThinkerForConditionalGeneration.from_pretrained(saved_thinker_path, device_map="cpu")
|
thinker = Qwen2_5OmniThinkerForConditionalGeneration.from_pretrained(
|
||||||
# Load the original model
|
saved_thinker_path, torch_dtype="auto", device_map="cpu"
|
||||||
base_model = AutoModel.from_pretrained(base_model_path, device_map="cpu")
|
)
|
||||||
# Replace the thinker module in the original model
|
base_model = AutoModel.from_pretrained(base_model_path, torch_dtype="auto", device_map="cpu")
|
||||||
base_model.thinker = thinker
|
base_model.thinker = thinker
|
||||||
|
|
||||||
# Load the processor and tokenizer
|
# 2. Save the complete model along with its tokenizer and processor configuration
|
||||||
processor = AutoProcessor.from_pretrained(base_model_path, trust_remote_code=True)
|
processor = AutoProcessor.from_pretrained(base_model_path)
|
||||||
tokenizer = AutoTokenizer.from_pretrained(base_model_path, trust_remote_code=True)
|
|
||||||
|
|
||||||
# Save the complete model along with its configurations
|
|
||||||
base_model.save_pretrained(save_path)
|
base_model.save_pretrained(save_path)
|
||||||
tokenizer.save_pretrained(save_path)
|
|
||||||
processor.save_pretrained(save_path)
|
processor.save_pretrained(save_path)
|
||||||
print(f"Complete model, tokenizer, and processor configuration have been saved to {save_path}.")
|
print(f"Merged model and tokenizer saved to {save_path}.")
|
||||||
|
|
||||||
|
# 3. Copy the extra file from the base model directory to the save_path
|
||||||
source_file = os.path.join(base_model_path, extra_file)
|
source_file = os.path.join(base_model_path, extra_file)
|
||||||
target_file = os.path.join(save_path, extra_file)
|
target_file = os.path.join(save_path, extra_file)
|
||||||
if os.path.exists(source_file):
|
if os.path.exists(source_file):
|
||||||
|
@ -20,7 +20,7 @@ from transformers import Seq2SeqTrainingArguments
|
|||||||
|
|
||||||
from llamafactory.data import get_dataset, get_template_and_fix_tokenizer
|
from llamafactory.data import get_dataset, get_template_and_fix_tokenizer
|
||||||
from llamafactory.extras.constants import IGNORE_INDEX
|
from llamafactory.extras.constants import IGNORE_INDEX
|
||||||
from llamafactory.extras.misc import check_version, get_device_count
|
from llamafactory.extras.misc import get_device_count
|
||||||
from llamafactory.extras.packages import is_vllm_available
|
from llamafactory.extras.packages import is_vllm_available
|
||||||
from llamafactory.hparams import get_infer_args
|
from llamafactory.hparams import get_infer_args
|
||||||
from llamafactory.model import load_tokenizer
|
from llamafactory.model import load_tokenizer
|
||||||
@ -56,7 +56,6 @@ def vllm_infer(
|
|||||||
|
|
||||||
Usage: python vllm_infer.py --model_name_or_path meta-llama/Llama-2-7b-hf --template llama --dataset alpaca_en_demo
|
Usage: python vllm_infer.py --model_name_or_path meta-llama/Llama-2-7b-hf --template llama --dataset alpaca_en_demo
|
||||||
"""
|
"""
|
||||||
check_version("vllm>=0.4.3,<=0.8.2")
|
|
||||||
if pipeline_parallel_size > get_device_count():
|
if pipeline_parallel_size > get_device_count():
|
||||||
raise ValueError("Pipeline parallel size should be smaller than the number of gpus.")
|
raise ValueError("Pipeline parallel size should be smaller than the number of gpus.")
|
||||||
|
|
||||||
|
4
setup.py
4
setup.py
@ -45,7 +45,7 @@ extra_require = {
|
|||||||
"torch": ["torch>=1.13.1"],
|
"torch": ["torch>=1.13.1"],
|
||||||
"torch-npu": ["torch==2.4.0", "torch-npu==2.4.0.post2", "decorator"],
|
"torch-npu": ["torch==2.4.0", "torch-npu==2.4.0.post2", "decorator"],
|
||||||
"metrics": ["nltk", "jieba", "rouge-chinese"],
|
"metrics": ["nltk", "jieba", "rouge-chinese"],
|
||||||
"deepspeed": ["deepspeed>=0.10.0,<=0.16.4"],
|
"deepspeed": ["deepspeed>=0.10.0,<=0.16.5"],
|
||||||
"liger-kernel": ["liger-kernel>=0.5.5"],
|
"liger-kernel": ["liger-kernel>=0.5.5"],
|
||||||
"bitsandbytes": ["bitsandbytes>=0.39.0"],
|
"bitsandbytes": ["bitsandbytes>=0.39.0"],
|
||||||
"hqq": ["hqq"],
|
"hqq": ["hqq"],
|
||||||
@ -53,7 +53,7 @@ extra_require = {
|
|||||||
"gptq": ["optimum>=1.17.0", "auto-gptq>=0.5.0"],
|
"gptq": ["optimum>=1.17.0", "auto-gptq>=0.5.0"],
|
||||||
"awq": ["autoawq"],
|
"awq": ["autoawq"],
|
||||||
"aqlm": ["aqlm[gpu]>=1.1.0"],
|
"aqlm": ["aqlm[gpu]>=1.1.0"],
|
||||||
"vllm": ["vllm>=0.4.3,<=0.8.2"],
|
"vllm": ["vllm>=0.4.3,<=0.8.3"],
|
||||||
"sglang": ["sglang[srt]>=0.4.4", "transformers==4.48.3"],
|
"sglang": ["sglang[srt]>=0.4.4", "transformers==4.48.3"],
|
||||||
"galore": ["galore-torch"],
|
"galore": ["galore-torch"],
|
||||||
"apollo": ["apollo-torch"],
|
"apollo": ["apollo-torch"],
|
||||||
|
@ -24,7 +24,6 @@ import torch.nn.functional as F
|
|||||||
from transformers import DataCollatorForSeq2Seq
|
from transformers import DataCollatorForSeq2Seq
|
||||||
|
|
||||||
from ..extras.constants import AUDIO_PLACEHOLDER, IGNORE_INDEX, IMAGE_PLACEHOLDER
|
from ..extras.constants import AUDIO_PLACEHOLDER, IGNORE_INDEX, IMAGE_PLACEHOLDER
|
||||||
from ..extras.misc import get_current_device
|
|
||||||
from ..extras.packages import is_pillow_available
|
from ..extras.packages import is_pillow_available
|
||||||
|
|
||||||
|
|
||||||
@ -65,30 +64,19 @@ def prepare_4d_attention_mask(attention_mask_with_indices: "torch.Tensor", dtype
|
|||||||
where `o` equals to `0.0`, `x` equals to `min_dtype`.
|
where `o` equals to `0.0`, `x` equals to `min_dtype`.
|
||||||
"""
|
"""
|
||||||
_, seq_len = attention_mask_with_indices.size()
|
_, seq_len = attention_mask_with_indices.size()
|
||||||
|
|
||||||
# Move to compute device if the source is CPU.
|
|
||||||
source_device = attention_mask_with_indices.device
|
|
||||||
compute_device = get_current_device() if source_device.type == "cpu" else source_device
|
|
||||||
if compute_device != source_device:
|
|
||||||
attention_mask_with_indices = attention_mask_with_indices.to(compute_device)
|
|
||||||
|
|
||||||
min_dtype = torch.finfo(dtype).min
|
min_dtype = torch.finfo(dtype).min
|
||||||
zero_tensor = torch.tensor(0, dtype=dtype, device=compute_device)
|
zero_tensor = torch.tensor(0, dtype=dtype)
|
||||||
|
|
||||||
# Create a non-padding mask.
|
# Create a non-padding mask.
|
||||||
non_padding = (attention_mask_with_indices != 0).unsqueeze(1).unsqueeze(2)
|
non_padding_mask = (attention_mask_with_indices != 0).unsqueeze(1).unsqueeze(2)
|
||||||
# Create indices for comparison.
|
# Create indices for comparison.
|
||||||
indices = attention_mask_with_indices.unsqueeze(1).unsqueeze(2) # [bsz, 1, 1, seq_len]
|
indices = attention_mask_with_indices.unsqueeze(1).unsqueeze(2) # [bsz, 1, 1, seq_len]
|
||||||
indices_t = attention_mask_with_indices.unsqueeze(1).unsqueeze(3) # [bsz, 1, seq_len, 1]
|
indices_t = attention_mask_with_indices.unsqueeze(1).unsqueeze(3) # [bsz, 1, seq_len, 1]
|
||||||
# Create a lower triangular mask.
|
# Create a lower triangular mask.
|
||||||
tril_mask = torch.tril(torch.ones((seq_len, seq_len), dtype=torch.bool, device=compute_device))
|
tril_mask = torch.tril(torch.ones((seq_len, seq_len), dtype=torch.bool))
|
||||||
attention_mask_4d = (indices == indices_t) & non_padding & tril_mask
|
attention_mask_4d = (indices == indices_t) & non_padding_mask & tril_mask
|
||||||
# Invert the attention mask.
|
# Invert the attention mask.
|
||||||
attention_mask_4d = torch.where(attention_mask_4d, zero_tensor, min_dtype)
|
attention_mask_4d = torch.where(attention_mask_4d, zero_tensor, min_dtype)
|
||||||
|
|
||||||
# Move back to original device if needed.
|
|
||||||
if compute_device != source_device:
|
|
||||||
attention_mask_4d = attention_mask_4d.to(source_device)
|
|
||||||
return attention_mask_4d
|
return attention_mask_4d
|
||||||
|
|
||||||
|
|
||||||
|
@ -493,8 +493,8 @@ class Llama4Plugin(BasePlugin):
|
|||||||
messages = deepcopy(messages)
|
messages = deepcopy(messages)
|
||||||
for message in messages:
|
for message in messages:
|
||||||
content = message["content"]
|
content = message["content"]
|
||||||
placeholder_count = content.count(IMAGE_PLACEHOLDER)
|
|
||||||
if self.expand_mm_tokens:
|
if self.expand_mm_tokens:
|
||||||
|
placeholder_count = content.count(IMAGE_PLACEHOLDER)
|
||||||
prompt_splits = content.split(IMAGE_PLACEHOLDER)
|
prompt_splits = content.split(IMAGE_PLACEHOLDER)
|
||||||
new_content = []
|
new_content = []
|
||||||
for local_image_index, split_part in enumerate(prompt_splits):
|
for local_image_index, split_part in enumerate(prompt_splits):
|
||||||
@ -507,6 +507,8 @@ class Llama4Plugin(BasePlugin):
|
|||||||
new_content.append(tokens_for_this_image)
|
new_content.append(tokens_for_this_image)
|
||||||
|
|
||||||
content = "".join(new_content)
|
content = "".join(new_content)
|
||||||
|
else:
|
||||||
|
content = content.replace(IMAGE_PLACEHOLDER, self.image_token)
|
||||||
|
|
||||||
message["content"] = content
|
message["content"] = content
|
||||||
|
|
||||||
|
@ -164,28 +164,28 @@ class PackedSupervisedDatasetProcessor(SupervisedDatasetProcessor):
|
|||||||
model_inputs = defaultdict(list)
|
model_inputs = defaultdict(list)
|
||||||
knapsacks = greedy_knapsack(lengths, self.data_args.cutoff_len)
|
knapsacks = greedy_knapsack(lengths, self.data_args.cutoff_len)
|
||||||
for knapsack in knapsacks:
|
for knapsack in knapsacks:
|
||||||
packed_input_ids, packed_attention_masks, packed_labels = [], [], []
|
packed_input_ids, packed_attention_masks, packed_position_ids, packed_labels = [], [], [], []
|
||||||
packed_images, packed_videos, packed_audios, packed_position_ids = [], [], [], []
|
packed_images, packed_videos, packed_audios = [], [], []
|
||||||
for i, length in enumerate(knapsack):
|
for i, length in enumerate(knapsack):
|
||||||
index = length2indexes[length].pop()
|
index = length2indexes[length].pop()
|
||||||
packed_input_ids += batch_input_ids[index]
|
packed_input_ids += batch_input_ids[index]
|
||||||
|
packed_position_ids += list(range(len(batch_input_ids[index]))) # NOTE: pad_to_multiple_of ignore this
|
||||||
packed_labels += batch_labels[index]
|
packed_labels += batch_labels[index]
|
||||||
packed_images += batch_images[index]
|
packed_images += batch_images[index]
|
||||||
packed_videos += batch_videos[index]
|
packed_videos += batch_videos[index]
|
||||||
packed_audios += batch_audios[index]
|
packed_audios += batch_audios[index]
|
||||||
if self.data_args.neat_packing:
|
if self.data_args.neat_packing:
|
||||||
packed_attention_masks += [i + 1] * len(batch_input_ids[index]) # start from 1
|
packed_attention_masks += [i + 1] * len(batch_input_ids[index]) # start from 1
|
||||||
packed_position_ids += list(range(len(batch_input_ids[index])))
|
|
||||||
else:
|
else:
|
||||||
packed_attention_masks += [1] * len(batch_input_ids[index])
|
packed_attention_masks += [1] * len(batch_input_ids[index])
|
||||||
|
|
||||||
if len(packed_input_ids) < self.data_args.cutoff_len + 1: # avoid flash_attn drops attn mask
|
if len(packed_input_ids) < self.data_args.cutoff_len + 1: # avoid flash_attn drops attn mask
|
||||||
pad_length = self.data_args.cutoff_len - len(packed_input_ids) + 1
|
pad_length = self.data_args.cutoff_len - len(packed_input_ids) + 1
|
||||||
packed_input_ids += [self.tokenizer.pad_token_id] * pad_length
|
packed_input_ids += [self.tokenizer.pad_token_id] * pad_length
|
||||||
|
packed_position_ids += [0] * pad_length
|
||||||
packed_labels += [IGNORE_INDEX] * pad_length
|
packed_labels += [IGNORE_INDEX] * pad_length
|
||||||
if self.data_args.neat_packing:
|
if self.data_args.neat_packing:
|
||||||
packed_attention_masks += [0] * pad_length
|
packed_attention_masks += [0] * pad_length
|
||||||
packed_position_ids += [0] * pad_length
|
|
||||||
else:
|
else:
|
||||||
packed_attention_masks += [1] * pad_length # more efficient flash_attn
|
packed_attention_masks += [1] * pad_length # more efficient flash_attn
|
||||||
|
|
||||||
@ -194,10 +194,10 @@ class PackedSupervisedDatasetProcessor(SupervisedDatasetProcessor):
|
|||||||
|
|
||||||
model_inputs["input_ids"].append(packed_input_ids)
|
model_inputs["input_ids"].append(packed_input_ids)
|
||||||
model_inputs["attention_mask"].append(packed_attention_masks)
|
model_inputs["attention_mask"].append(packed_attention_masks)
|
||||||
|
model_inputs["position_ids"].append(packed_position_ids)
|
||||||
model_inputs["labels"].append(packed_labels)
|
model_inputs["labels"].append(packed_labels)
|
||||||
model_inputs["images"].append(packed_images or None)
|
model_inputs["images"].append(packed_images or None)
|
||||||
model_inputs["videos"].append(packed_videos or None)
|
model_inputs["videos"].append(packed_videos or None)
|
||||||
model_inputs["audios"].append(packed_audios or None)
|
model_inputs["audios"].append(packed_audios or None)
|
||||||
model_inputs["position_ids"].append(packed_position_ids or None)
|
|
||||||
|
|
||||||
return model_inputs
|
return model_inputs
|
||||||
|
@ -1370,7 +1370,7 @@ register_template(
|
|||||||
slots=["<|im_start|>user\n<tool_response>\n{{content}}\n</tool_response><|im_end|>\n<|im_start|>assistant\n"]
|
slots=["<|im_start|>user\n<tool_response>\n{{content}}\n</tool_response><|im_end|>\n<|im_start|>assistant\n"]
|
||||||
),
|
),
|
||||||
format_tools=ToolFormatter(tool_format="qwen"),
|
format_tools=ToolFormatter(tool_format="qwen"),
|
||||||
default_system="You are a helpful assistant.",
|
default_system="You are Qwen, created by Alibaba Cloud. You are a helpful assistant.",
|
||||||
stop_words=["<|im_end|>"],
|
stop_words=["<|im_end|>"],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -14,7 +14,7 @@
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
from collections import OrderedDict, defaultdict
|
from collections import OrderedDict, defaultdict
|
||||||
from enum import Enum
|
from enum import Enum, unique
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from peft.utils import SAFETENSORS_WEIGHTS_NAME as SAFE_ADAPTER_WEIGHTS_NAME
|
from peft.utils import SAFETENSORS_WEIGHTS_NAME as SAFE_ADAPTER_WEIGHTS_NAME
|
||||||
@ -115,6 +115,19 @@ class DownloadSource(str, Enum):
|
|||||||
OPENMIND = "om"
|
OPENMIND = "om"
|
||||||
|
|
||||||
|
|
||||||
|
@unique
|
||||||
|
class QuantizationMethod(str, Enum):
|
||||||
|
r"""Borrowed from `transformers.utils.quantization_config.QuantizationMethod`."""
|
||||||
|
|
||||||
|
BNB = "bnb"
|
||||||
|
GPTQ = "gptq"
|
||||||
|
AWQ = "awq"
|
||||||
|
AQLM = "aqlm"
|
||||||
|
QUANTO = "quanto"
|
||||||
|
EETQ = "eetq"
|
||||||
|
HQQ = "hqq"
|
||||||
|
|
||||||
|
|
||||||
class RopeScaling(str, Enum):
|
class RopeScaling(str, Enum):
|
||||||
LINEAR = "linear"
|
LINEAR = "linear"
|
||||||
DYNAMIC = "dynamic"
|
DYNAMIC = "dynamic"
|
||||||
|
@ -160,5 +160,11 @@ class DataArguments:
|
|||||||
if self.mask_history and self.train_on_prompt:
|
if self.mask_history and self.train_on_prompt:
|
||||||
raise ValueError("`mask_history` is incompatible with `train_on_prompt`.")
|
raise ValueError("`mask_history` is incompatible with `train_on_prompt`.")
|
||||||
|
|
||||||
|
if self.neat_packing:
|
||||||
|
self.packing = True
|
||||||
|
|
||||||
|
if self.packing:
|
||||||
|
self.cutoff_len -= 1 # avoid pad_to_multiple_of, needs improve
|
||||||
|
|
||||||
def to_dict(self) -> dict[str, Any]:
|
def to_dict(self) -> dict[str, Any]:
|
||||||
return asdict(self)
|
return asdict(self)
|
||||||
|
@ -23,7 +23,7 @@ import torch
|
|||||||
from transformers.training_args import _convert_str_dict
|
from transformers.training_args import _convert_str_dict
|
||||||
from typing_extensions import Self
|
from typing_extensions import Self
|
||||||
|
|
||||||
from ..extras.constants import AttentionFunction, EngineName, RopeScaling
|
from ..extras.constants import AttentionFunction, EngineName, QuantizationMethod, RopeScaling
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@ -184,8 +184,8 @@ class BaseModelArguments:
|
|||||||
class QuantizationArguments:
|
class QuantizationArguments:
|
||||||
r"""Arguments pertaining to the quantization method."""
|
r"""Arguments pertaining to the quantization method."""
|
||||||
|
|
||||||
quantization_method: Literal["bitsandbytes", "hqq", "eetq"] = field(
|
quantization_method: QuantizationMethod = field(
|
||||||
default="bitsandbytes",
|
default=QuantizationMethod.BNB,
|
||||||
metadata={"help": "Quantization method to use for on-the-fly quantization."},
|
metadata={"help": "Quantization method to use for on-the-fly quantization."},
|
||||||
)
|
)
|
||||||
quantization_bit: Optional[int] = field(
|
quantization_bit: Optional[int] = field(
|
||||||
|
@ -135,7 +135,7 @@ def _check_extra_dependencies(
|
|||||||
check_version("mixture-of-depth>=1.1.6", mandatory=True)
|
check_version("mixture-of-depth>=1.1.6", mandatory=True)
|
||||||
|
|
||||||
if model_args.infer_backend == EngineName.VLLM:
|
if model_args.infer_backend == EngineName.VLLM:
|
||||||
check_version("vllm>=0.4.3,<=0.8.2")
|
check_version("vllm>=0.4.3,<=0.8.3")
|
||||||
check_version("vllm", mandatory=True)
|
check_version("vllm", mandatory=True)
|
||||||
elif model_args.infer_backend == EngineName.SGLANG:
|
elif model_args.infer_backend == EngineName.SGLANG:
|
||||||
check_version("sglang>=0.4.4")
|
check_version("sglang>=0.4.4")
|
||||||
@ -285,10 +285,6 @@ def get_train_args(args: Optional[Union[dict[str, Any], list[str]]] = None) -> _
|
|||||||
if model_args.use_unsloth and is_deepspeed_zero3_enabled():
|
if model_args.use_unsloth and is_deepspeed_zero3_enabled():
|
||||||
raise ValueError("Unsloth is incompatible with DeepSpeed ZeRO-3.")
|
raise ValueError("Unsloth is incompatible with DeepSpeed ZeRO-3.")
|
||||||
|
|
||||||
if data_args.neat_packing and not data_args.packing:
|
|
||||||
logger.warning_rank0("`neat_packing` requires `packing` is True. Change `packing` to True.")
|
|
||||||
data_args.packing = True
|
|
||||||
|
|
||||||
_verify_model_args(model_args, data_args, finetuning_args)
|
_verify_model_args(model_args, data_args, finetuning_args)
|
||||||
_check_extra_dependencies(model_args, finetuning_args, training_args)
|
_check_extra_dependencies(model_args, finetuning_args, training_args)
|
||||||
|
|
||||||
|
@ -97,12 +97,13 @@ def load_tokenizer(model_args: "ModelArguments") -> "TokenizerModule":
|
|||||||
processor = AutoProcessor.from_pretrained(model_args.model_name_or_path, **init_kwargs)
|
processor = AutoProcessor.from_pretrained(model_args.model_name_or_path, **init_kwargs)
|
||||||
patch_processor(processor, tokenizer, model_args)
|
patch_processor(processor, tokenizer, model_args)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.debug(f"Processor was not found: {e}.")
|
logger.debug(f"Failed to load processor: {e}.")
|
||||||
processor = None
|
processor = None
|
||||||
|
|
||||||
# Avoid load tokenizer, see:
|
# Avoid load tokenizer, see:
|
||||||
# https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/models/auto/processing_auto.py#L324
|
# https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/models/auto/processing_auto.py#L324
|
||||||
if processor is not None and "Processor" not in processor.__class__.__name__:
|
if processor is not None and "Processor" not in processor.__class__.__name__:
|
||||||
|
logger.debug("The loaded processor is not an instance of Processor. Dropping it.")
|
||||||
processor = None
|
processor = None
|
||||||
|
|
||||||
return {"tokenizer": tokenizer, "processor": processor}
|
return {"tokenizer": tokenizer, "processor": processor}
|
||||||
|
@ -18,7 +18,6 @@
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
import random
|
import random
|
||||||
from enum import Enum, unique
|
|
||||||
from typing import TYPE_CHECKING, Any
|
from typing import TYPE_CHECKING, Any
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
@ -28,7 +27,7 @@ from transformers.integrations import is_deepspeed_zero3_enabled
|
|||||||
from transformers.modeling_utils import is_fsdp_enabled
|
from transformers.modeling_utils import is_fsdp_enabled
|
||||||
|
|
||||||
from ...extras import logging
|
from ...extras import logging
|
||||||
from ...extras.constants import FILEEXT2TYPE
|
from ...extras.constants import FILEEXT2TYPE, QuantizationMethod
|
||||||
from ...extras.misc import check_version, get_current_device
|
from ...extras.misc import check_version, get_current_device
|
||||||
|
|
||||||
|
|
||||||
@ -41,19 +40,6 @@ if TYPE_CHECKING:
|
|||||||
logger = logging.get_logger(__name__)
|
logger = logging.get_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
@unique
|
|
||||||
class QuantizationMethod(str, Enum):
|
|
||||||
r"""Borrowed from `transformers.utils.quantization_config.QuantizationMethod`."""
|
|
||||||
|
|
||||||
BITS_AND_BYTES = "bitsandbytes"
|
|
||||||
GPTQ = "gptq"
|
|
||||||
AWQ = "awq"
|
|
||||||
AQLM = "aqlm"
|
|
||||||
QUANTO = "quanto"
|
|
||||||
EETQ = "eetq"
|
|
||||||
HQQ = "hqq"
|
|
||||||
|
|
||||||
|
|
||||||
def _get_quantization_dataset(tokenizer: "PreTrainedTokenizer", model_args: "ModelArguments") -> list[dict[str, Any]]:
|
def _get_quantization_dataset(tokenizer: "PreTrainedTokenizer", model_args: "ModelArguments") -> list[dict[str, Any]]:
|
||||||
r"""Prepare the tokenized dataset to perform AutoGPTQ. Do not use tensor output for JSON serialization."""
|
r"""Prepare the tokenized dataset to perform AutoGPTQ. Do not use tensor output for JSON serialization."""
|
||||||
if os.path.isfile(model_args.export_quantization_dataset):
|
if os.path.isfile(model_args.export_quantization_dataset):
|
||||||
@ -145,7 +131,7 @@ def configure_quantization(
|
|||||||
logger.info_rank0(f"Quantizing model to {model_args.export_quantization_bit} bit with AutoGPTQ.")
|
logger.info_rank0(f"Quantizing model to {model_args.export_quantization_bit} bit with AutoGPTQ.")
|
||||||
|
|
||||||
elif model_args.quantization_bit is not None: # on-the-fly
|
elif model_args.quantization_bit is not None: # on-the-fly
|
||||||
if model_args.quantization_method == QuantizationMethod.BITS_AND_BYTES.value:
|
if model_args.quantization_method == QuantizationMethod.BNB:
|
||||||
if model_args.quantization_bit == 8:
|
if model_args.quantization_bit == 8:
|
||||||
check_version("bitsandbytes>=0.37.0", mandatory=True)
|
check_version("bitsandbytes>=0.37.0", mandatory=True)
|
||||||
init_kwargs["quantization_config"] = BitsAndBytesConfig(load_in_8bit=True)
|
init_kwargs["quantization_config"] = BitsAndBytesConfig(load_in_8bit=True)
|
||||||
@ -173,7 +159,7 @@ def configure_quantization(
|
|||||||
init_kwargs["device_map"] = {"": get_current_device()} # change auto device map for inference
|
init_kwargs["device_map"] = {"": get_current_device()} # change auto device map for inference
|
||||||
|
|
||||||
logger.info_rank0(f"Quantizing model to {model_args.quantization_bit} bit with bitsandbytes.")
|
logger.info_rank0(f"Quantizing model to {model_args.quantization_bit} bit with bitsandbytes.")
|
||||||
elif model_args.quantization_method == QuantizationMethod.HQQ.value:
|
elif model_args.quantization_method == QuantizationMethod.HQQ:
|
||||||
if model_args.quantization_bit not in [8, 6, 5, 4, 3, 2, 1]:
|
if model_args.quantization_bit not in [8, 6, 5, 4, 3, 2, 1]:
|
||||||
raise ValueError("HQQ only accepts 1/2/3/4/5/6/8-bit quantization.")
|
raise ValueError("HQQ only accepts 1/2/3/4/5/6/8-bit quantization.")
|
||||||
|
|
||||||
@ -185,7 +171,7 @@ def configure_quantization(
|
|||||||
nbits=model_args.quantization_bit, quant_zero=False, quant_scale=False, axis=0
|
nbits=model_args.quantization_bit, quant_zero=False, quant_scale=False, axis=0
|
||||||
) # use ATEN kernel (axis=0) for performance
|
) # use ATEN kernel (axis=0) for performance
|
||||||
logger.info_rank0(f"Quantizing model to {model_args.quantization_bit} bit with HQQ.")
|
logger.info_rank0(f"Quantizing model to {model_args.quantization_bit} bit with HQQ.")
|
||||||
elif model_args.quantization_method == QuantizationMethod.EETQ.value:
|
elif model_args.quantization_method == QuantizationMethod.EETQ:
|
||||||
if model_args.quantization_bit != 8:
|
if model_args.quantization_bit != 8:
|
||||||
raise ValueError("EETQ only accepts 8-bit quantization.")
|
raise ValueError("EETQ only accepts 8-bit quantization.")
|
||||||
|
|
||||||
|
@ -91,7 +91,13 @@ def run_dpo(
|
|||||||
trainer.save_metrics("train", train_result.metrics)
|
trainer.save_metrics("train", train_result.metrics)
|
||||||
trainer.save_state()
|
trainer.save_state()
|
||||||
if trainer.is_world_process_zero() and finetuning_args.plot_loss:
|
if trainer.is_world_process_zero() and finetuning_args.plot_loss:
|
||||||
plot_loss(training_args.output_dir, keys=["loss", "eval_loss", "rewards/accuracies"])
|
keys = ["loss", "rewards/accuracies"]
|
||||||
|
if isinstance(dataset_module["eval_dataset"], dict):
|
||||||
|
keys += [f"eval_{key}_loss" for key in dataset_module["eval_dataset"].keys()]
|
||||||
|
else:
|
||||||
|
keys += ["eval_loss"]
|
||||||
|
|
||||||
|
plot_loss(training_args.output_dir, keys=keys)
|
||||||
|
|
||||||
# Evaluation
|
# Evaluation
|
||||||
if training_args.do_eval:
|
if training_args.do_eval:
|
||||||
|
@ -82,7 +82,13 @@ def run_kto(
|
|||||||
trainer.save_metrics("train", train_result.metrics)
|
trainer.save_metrics("train", train_result.metrics)
|
||||||
trainer.save_state()
|
trainer.save_state()
|
||||||
if trainer.is_world_process_zero() and finetuning_args.plot_loss:
|
if trainer.is_world_process_zero() and finetuning_args.plot_loss:
|
||||||
plot_loss(training_args.output_dir, keys=["loss", "eval_loss", "rewards/chosen"])
|
keys = ["loss", "rewards/chosen"]
|
||||||
|
if isinstance(dataset_module["eval_dataset"], dict):
|
||||||
|
keys += [f"eval_{key}_loss" for key in dataset_module["eval_dataset"].keys()]
|
||||||
|
else:
|
||||||
|
keys += ["eval_loss"]
|
||||||
|
|
||||||
|
plot_loss(training_args.output_dir, keys=keys)
|
||||||
|
|
||||||
# Evaluation
|
# Evaluation
|
||||||
if training_args.do_eval:
|
if training_args.do_eval:
|
||||||
|
@ -66,7 +66,13 @@ def run_pt(
|
|||||||
trainer.save_metrics("train", train_result.metrics)
|
trainer.save_metrics("train", train_result.metrics)
|
||||||
trainer.save_state()
|
trainer.save_state()
|
||||||
if trainer.is_world_process_zero() and finetuning_args.plot_loss:
|
if trainer.is_world_process_zero() and finetuning_args.plot_loss:
|
||||||
plot_loss(training_args.output_dir, keys=["loss", "eval_loss"])
|
keys = ["loss"]
|
||||||
|
if isinstance(dataset_module["eval_dataset"], dict):
|
||||||
|
keys += [f"eval_{key}_loss" for key in dataset_module["eval_dataset"].keys()]
|
||||||
|
else:
|
||||||
|
keys += ["eval_loss"]
|
||||||
|
|
||||||
|
plot_loss(training_args.output_dir, keys=keys)
|
||||||
|
|
||||||
# Evaluation
|
# Evaluation
|
||||||
if training_args.do_eval:
|
if training_args.do_eval:
|
||||||
|
@ -74,7 +74,15 @@ def run_rm(
|
|||||||
trainer.save_metrics("train", train_result.metrics)
|
trainer.save_metrics("train", train_result.metrics)
|
||||||
trainer.save_state()
|
trainer.save_state()
|
||||||
if trainer.is_world_process_zero() and finetuning_args.plot_loss:
|
if trainer.is_world_process_zero() and finetuning_args.plot_loss:
|
||||||
plot_loss(training_args.output_dir, keys=["loss", "eval_loss", "eval_accuracy"])
|
keys = ["loss"]
|
||||||
|
if isinstance(dataset_module["eval_dataset"], dict):
|
||||||
|
keys += sum(
|
||||||
|
[[f"eval_{key}_loss", f"eval_{key}_accuracy"] for key in dataset_module["eval_dataset"].keys()], []
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
keys += ["eval_loss", "eval_accuracy"]
|
||||||
|
|
||||||
|
plot_loss(training_args.output_dir, keys=keys)
|
||||||
|
|
||||||
# Evaluation
|
# Evaluation
|
||||||
if training_args.do_eval:
|
if training_args.do_eval:
|
||||||
|
@ -110,7 +110,15 @@ def run_sft(
|
|||||||
trainer.save_metrics("train", train_result.metrics)
|
trainer.save_metrics("train", train_result.metrics)
|
||||||
trainer.save_state()
|
trainer.save_state()
|
||||||
if trainer.is_world_process_zero() and finetuning_args.plot_loss:
|
if trainer.is_world_process_zero() and finetuning_args.plot_loss:
|
||||||
plot_loss(training_args.output_dir, keys=["loss", "eval_loss", "eval_accuracy"])
|
keys = ["loss"]
|
||||||
|
if isinstance(dataset_module["eval_dataset"], dict):
|
||||||
|
keys += sum(
|
||||||
|
[[f"eval_{key}_loss", f"eval_{key}_accuracy"] for key in dataset_module["eval_dataset"].keys()], []
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
keys += ["eval_loss", "eval_accuracy"]
|
||||||
|
|
||||||
|
plot_loss(training_args.output_dir, keys=keys)
|
||||||
|
|
||||||
if training_args.predict_with_generate:
|
if training_args.predict_with_generate:
|
||||||
tokenizer.padding_side = "left" # use left-padding in generation
|
tokenizer.padding_side = "left" # use left-padding in generation
|
||||||
|
@ -42,7 +42,7 @@ def create_top() -> dict[str, "Component"]:
|
|||||||
|
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
quantization_bit = gr.Dropdown(choices=["none", "8", "4"], value="none", allow_custom_value=True)
|
quantization_bit = gr.Dropdown(choices=["none", "8", "4"], value="none", allow_custom_value=True)
|
||||||
quantization_method = gr.Dropdown(choices=["bitsandbytes", "hqq", "eetq"], value="bitsandbytes")
|
quantization_method = gr.Dropdown(choices=["bnb", "hqq", "eetq"], value="bnb")
|
||||||
template = gr.Dropdown(choices=list(TEMPLATES.keys()), value="default")
|
template = gr.Dropdown(choices=list(TEMPLATES.keys()), value="default")
|
||||||
rope_scaling = gr.Dropdown(choices=["none", "linear", "dynamic", "yarn", "llama3"], value="none")
|
rope_scaling = gr.Dropdown(choices=["none", "linear", "dynamic", "yarn", "llama3"], value="none")
|
||||||
booster = gr.Dropdown(choices=["auto", "flashattn2", "unsloth", "liger_kernel"], value="auto")
|
booster = gr.Dropdown(choices=["auto", "flashattn2", "unsloth", "liger_kernel"], value="auto")
|
||||||
|
@ -25,10 +25,10 @@ from llamafactory.train.test_utils import load_dataset_module
|
|||||||
|
|
||||||
DEMO_DATA = os.getenv("DEMO_DATA", "llamafactory/demo_data")
|
DEMO_DATA = os.getenv("DEMO_DATA", "llamafactory/demo_data")
|
||||||
|
|
||||||
TINY_LLAMA = os.getenv("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
|
TINY_LLAMA3 = os.getenv("TINY_LLAMA3", "llamafactory/tiny-random-Llama-3")
|
||||||
|
|
||||||
TRAIN_ARGS = {
|
TRAIN_ARGS = {
|
||||||
"model_name_or_path": TINY_LLAMA,
|
"model_name_or_path": TINY_LLAMA3,
|
||||||
"stage": "kto",
|
"stage": "kto",
|
||||||
"do_train": True,
|
"do_train": True,
|
||||||
"finetuning_type": "full",
|
"finetuning_type": "full",
|
||||||
@ -45,7 +45,7 @@ TRAIN_ARGS = {
|
|||||||
@pytest.mark.parametrize("num_samples", [16])
|
@pytest.mark.parametrize("num_samples", [16])
|
||||||
def test_feedback_data(num_samples: int):
|
def test_feedback_data(num_samples: int):
|
||||||
train_dataset = load_dataset_module(**TRAIN_ARGS)["train_dataset"]
|
train_dataset = load_dataset_module(**TRAIN_ARGS)["train_dataset"]
|
||||||
ref_tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA)
|
ref_tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA3)
|
||||||
original_data = load_dataset(DEMO_DATA, name="kto_en_demo", split="train")
|
original_data = load_dataset(DEMO_DATA, name="kto_en_demo", split="train")
|
||||||
indexes = random.choices(range(len(original_data)), k=num_samples)
|
indexes = random.choices(range(len(original_data)), k=num_samples)
|
||||||
for index in indexes:
|
for index in indexes:
|
||||||
|
@ -25,10 +25,10 @@ from llamafactory.train.test_utils import load_dataset_module
|
|||||||
|
|
||||||
DEMO_DATA = os.getenv("DEMO_DATA", "llamafactory/demo_data")
|
DEMO_DATA = os.getenv("DEMO_DATA", "llamafactory/demo_data")
|
||||||
|
|
||||||
TINY_LLAMA = os.getenv("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
|
TINY_LLAMA3 = os.getenv("TINY_LLAMA3", "llamafactory/tiny-random-Llama-3")
|
||||||
|
|
||||||
TRAIN_ARGS = {
|
TRAIN_ARGS = {
|
||||||
"model_name_or_path": TINY_LLAMA,
|
"model_name_or_path": TINY_LLAMA3,
|
||||||
"stage": "rm",
|
"stage": "rm",
|
||||||
"do_train": True,
|
"do_train": True,
|
||||||
"finetuning_type": "full",
|
"finetuning_type": "full",
|
||||||
@ -54,7 +54,7 @@ def _convert_sharegpt_to_openai(messages: list[dict[str, str]]) -> list[dict[str
|
|||||||
@pytest.mark.parametrize("num_samples", [16])
|
@pytest.mark.parametrize("num_samples", [16])
|
||||||
def test_pairwise_data(num_samples: int):
|
def test_pairwise_data(num_samples: int):
|
||||||
train_dataset = load_dataset_module(**TRAIN_ARGS)["train_dataset"]
|
train_dataset = load_dataset_module(**TRAIN_ARGS)["train_dataset"]
|
||||||
ref_tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA)
|
ref_tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA3)
|
||||||
original_data = load_dataset(DEMO_DATA, name="dpo_en_demo", split="train")
|
original_data = load_dataset(DEMO_DATA, name="dpo_en_demo", split="train")
|
||||||
indexes = random.choices(range(len(original_data)), k=num_samples)
|
indexes = random.choices(range(len(original_data)), k=num_samples)
|
||||||
for index in indexes:
|
for index in indexes:
|
||||||
|
@ -25,12 +25,12 @@ from llamafactory.train.test_utils import load_dataset_module
|
|||||||
|
|
||||||
DEMO_DATA = os.getenv("DEMO_DATA", "llamafactory/demo_data")
|
DEMO_DATA = os.getenv("DEMO_DATA", "llamafactory/demo_data")
|
||||||
|
|
||||||
TINY_LLAMA = os.getenv("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
|
TINY_LLAMA3 = os.getenv("TINY_LLAMA3", "llamafactory/tiny-random-Llama-3")
|
||||||
|
|
||||||
TINY_DATA = os.getenv("TINY_DATA", "llamafactory/tiny-supervised-dataset")
|
TINY_DATA = os.getenv("TINY_DATA", "llamafactory/tiny-supervised-dataset")
|
||||||
|
|
||||||
TRAIN_ARGS = {
|
TRAIN_ARGS = {
|
||||||
"model_name_or_path": TINY_LLAMA,
|
"model_name_or_path": TINY_LLAMA3,
|
||||||
"stage": "sft",
|
"stage": "sft",
|
||||||
"do_train": True,
|
"do_train": True,
|
||||||
"finetuning_type": "full",
|
"finetuning_type": "full",
|
||||||
@ -45,7 +45,7 @@ TRAIN_ARGS = {
|
|||||||
@pytest.mark.parametrize("num_samples", [16])
|
@pytest.mark.parametrize("num_samples", [16])
|
||||||
def test_supervised_single_turn(num_samples: int):
|
def test_supervised_single_turn(num_samples: int):
|
||||||
train_dataset = load_dataset_module(dataset_dir="ONLINE", dataset=TINY_DATA, **TRAIN_ARGS)["train_dataset"]
|
train_dataset = load_dataset_module(dataset_dir="ONLINE", dataset=TINY_DATA, **TRAIN_ARGS)["train_dataset"]
|
||||||
ref_tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA)
|
ref_tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA3)
|
||||||
original_data = load_dataset(TINY_DATA, split="train")
|
original_data = load_dataset(TINY_DATA, split="train")
|
||||||
indexes = random.choices(range(len(original_data)), k=num_samples)
|
indexes = random.choices(range(len(original_data)), k=num_samples)
|
||||||
for index in indexes:
|
for index in indexes:
|
||||||
@ -66,7 +66,7 @@ def test_supervised_multi_turn(num_samples: int):
|
|||||||
train_dataset = load_dataset_module(dataset_dir="REMOTE:" + DEMO_DATA, dataset="system_chat", **TRAIN_ARGS)[
|
train_dataset = load_dataset_module(dataset_dir="REMOTE:" + DEMO_DATA, dataset="system_chat", **TRAIN_ARGS)[
|
||||||
"train_dataset"
|
"train_dataset"
|
||||||
]
|
]
|
||||||
ref_tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA)
|
ref_tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA3)
|
||||||
original_data = load_dataset(DEMO_DATA, name="system_chat", split="train")
|
original_data = load_dataset(DEMO_DATA, name="system_chat", split="train")
|
||||||
indexes = random.choices(range(len(original_data)), k=num_samples)
|
indexes = random.choices(range(len(original_data)), k=num_samples)
|
||||||
for index in indexes:
|
for index in indexes:
|
||||||
@ -79,7 +79,7 @@ def test_supervised_train_on_prompt(num_samples: int):
|
|||||||
train_dataset = load_dataset_module(
|
train_dataset = load_dataset_module(
|
||||||
dataset_dir="REMOTE:" + DEMO_DATA, dataset="system_chat", train_on_prompt=True, **TRAIN_ARGS
|
dataset_dir="REMOTE:" + DEMO_DATA, dataset="system_chat", train_on_prompt=True, **TRAIN_ARGS
|
||||||
)["train_dataset"]
|
)["train_dataset"]
|
||||||
ref_tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA)
|
ref_tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA3)
|
||||||
original_data = load_dataset(DEMO_DATA, name="system_chat", split="train")
|
original_data = load_dataset(DEMO_DATA, name="system_chat", split="train")
|
||||||
indexes = random.choices(range(len(original_data)), k=num_samples)
|
indexes = random.choices(range(len(original_data)), k=num_samples)
|
||||||
for index in indexes:
|
for index in indexes:
|
||||||
@ -93,7 +93,7 @@ def test_supervised_mask_history(num_samples: int):
|
|||||||
train_dataset = load_dataset_module(
|
train_dataset = load_dataset_module(
|
||||||
dataset_dir="REMOTE:" + DEMO_DATA, dataset="system_chat", mask_history=True, **TRAIN_ARGS
|
dataset_dir="REMOTE:" + DEMO_DATA, dataset="system_chat", mask_history=True, **TRAIN_ARGS
|
||||||
)["train_dataset"]
|
)["train_dataset"]
|
||||||
ref_tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA)
|
ref_tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA3)
|
||||||
original_data = load_dataset(DEMO_DATA, name="system_chat", split="train")
|
original_data = load_dataset(DEMO_DATA, name="system_chat", split="train")
|
||||||
indexes = random.choices(range(len(original_data)), k=num_samples)
|
indexes = random.choices(range(len(original_data)), k=num_samples)
|
||||||
for index in indexes:
|
for index in indexes:
|
||||||
|
@ -24,12 +24,12 @@ from llamafactory.train.test_utils import load_dataset_module
|
|||||||
|
|
||||||
DEMO_DATA = os.getenv("DEMO_DATA", "llamafactory/demo_data")
|
DEMO_DATA = os.getenv("DEMO_DATA", "llamafactory/demo_data")
|
||||||
|
|
||||||
TINY_LLAMA = os.getenv("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
|
TINY_LLAMA3 = os.getenv("TINY_LLAMA3", "llamafactory/tiny-random-Llama-3")
|
||||||
|
|
||||||
TINY_DATA = os.getenv("TINY_DATA", "llamafactory/tiny-supervised-dataset")
|
TINY_DATA = os.getenv("TINY_DATA", "llamafactory/tiny-supervised-dataset")
|
||||||
|
|
||||||
TRAIN_ARGS = {
|
TRAIN_ARGS = {
|
||||||
"model_name_or_path": TINY_LLAMA,
|
"model_name_or_path": TINY_LLAMA3,
|
||||||
"stage": "ppo",
|
"stage": "ppo",
|
||||||
"do_train": True,
|
"do_train": True,
|
||||||
"finetuning_type": "full",
|
"finetuning_type": "full",
|
||||||
@ -48,7 +48,7 @@ TRAIN_ARGS = {
|
|||||||
@pytest.mark.parametrize("num_samples", [16])
|
@pytest.mark.parametrize("num_samples", [16])
|
||||||
def test_unsupervised_data(num_samples: int):
|
def test_unsupervised_data(num_samples: int):
|
||||||
train_dataset = load_dataset_module(**TRAIN_ARGS)["train_dataset"]
|
train_dataset = load_dataset_module(**TRAIN_ARGS)["train_dataset"]
|
||||||
ref_tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA)
|
ref_tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA3)
|
||||||
original_data = load_dataset(DEMO_DATA, name="system_chat", split="train")
|
original_data = load_dataset(DEMO_DATA, name="system_chat", split="train")
|
||||||
indexes = random.choices(range(len(original_data)), k=num_samples)
|
indexes = random.choices(range(len(original_data)), k=num_samples)
|
||||||
for index in indexes:
|
for index in indexes:
|
||||||
|
@ -24,11 +24,11 @@ from llamafactory.hparams import get_infer_args
|
|||||||
from llamafactory.model import load_tokenizer
|
from llamafactory.model import load_tokenizer
|
||||||
|
|
||||||
|
|
||||||
TINY_LLAMA = os.getenv("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
|
TINY_LLAMA3 = os.getenv("TINY_LLAMA3", "llamafactory/tiny-random-Llama-3")
|
||||||
|
|
||||||
|
|
||||||
def test_base_collator():
|
def test_base_collator():
|
||||||
model_args, data_args, *_ = get_infer_args({"model_name_or_path": TINY_LLAMA, "template": "default"})
|
model_args, data_args, *_ = get_infer_args({"model_name_or_path": TINY_LLAMA3, "template": "default"})
|
||||||
tokenizer_module = load_tokenizer(model_args)
|
tokenizer_module = load_tokenizer(model_args)
|
||||||
template = get_template_and_fix_tokenizer(tokenizer_module["tokenizer"], data_args)
|
template = get_template_and_fix_tokenizer(tokenizer_module["tokenizer"], data_args)
|
||||||
data_collator = MultiModalDataCollatorForSeq2Seq(
|
data_collator = MultiModalDataCollatorForSeq2Seq(
|
||||||
|
@ -19,12 +19,12 @@ from llamafactory.train.test_utils import load_dataset_module
|
|||||||
|
|
||||||
DEMO_DATA = os.getenv("DEMO_DATA", "llamafactory/demo_data")
|
DEMO_DATA = os.getenv("DEMO_DATA", "llamafactory/demo_data")
|
||||||
|
|
||||||
TINY_LLAMA = os.getenv("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
|
TINY_LLAMA3 = os.getenv("TINY_LLAMA3", "llamafactory/tiny-random-Llama-3")
|
||||||
|
|
||||||
TINY_DATA = os.getenv("TINY_DATA", "llamafactory/tiny-supervised-dataset")
|
TINY_DATA = os.getenv("TINY_DATA", "llamafactory/tiny-supervised-dataset")
|
||||||
|
|
||||||
TRAIN_ARGS = {
|
TRAIN_ARGS = {
|
||||||
"model_name_or_path": TINY_LLAMA,
|
"model_name_or_path": TINY_LLAMA3,
|
||||||
"stage": "sft",
|
"stage": "sft",
|
||||||
"do_train": True,
|
"do_train": True,
|
||||||
"finetuning_type": "full",
|
"finetuning_type": "full",
|
||||||
|
@ -20,7 +20,6 @@ import torch
|
|||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
|
||||||
from llamafactory.data.mm_plugin import get_mm_plugin
|
from llamafactory.data.mm_plugin import get_mm_plugin
|
||||||
from llamafactory.extras.packages import is_transformers_version_greater_than
|
|
||||||
from llamafactory.hparams import get_infer_args
|
from llamafactory.hparams import get_infer_args
|
||||||
from llamafactory.model import load_tokenizer
|
from llamafactory.model import load_tokenizer
|
||||||
|
|
||||||
@ -35,7 +34,8 @@ if TYPE_CHECKING:
|
|||||||
|
|
||||||
HF_TOKEN = os.getenv("HF_TOKEN")
|
HF_TOKEN = os.getenv("HF_TOKEN")
|
||||||
|
|
||||||
TINY_LLAMA = os.getenv("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
|
TINY_LLAMA3 = os.getenv("TINY_LLAMA3", "llamafactory/tiny-random-Llama-3")
|
||||||
|
TINY_LLAMA4 = os.getenv("TINY_LLAMA4", "llamafactory/tiny-random-Llama-4")
|
||||||
|
|
||||||
MM_MESSAGES = [
|
MM_MESSAGES = [
|
||||||
{"role": "user", "content": "<image>What is in this image?"},
|
{"role": "user", "content": "<image>What is in this image?"},
|
||||||
@ -130,13 +130,13 @@ def _check_plugin(
|
|||||||
|
|
||||||
|
|
||||||
def test_base_plugin():
|
def test_base_plugin():
|
||||||
tokenizer_module = _load_tokenizer_module(model_name_or_path=TINY_LLAMA)
|
tokenizer_module = _load_tokenizer_module(model_name_or_path=TINY_LLAMA3)
|
||||||
base_plugin = get_mm_plugin(name="base")
|
base_plugin = get_mm_plugin(name="base")
|
||||||
check_inputs = {"plugin": base_plugin, **tokenizer_module}
|
check_inputs = {"plugin": base_plugin, **tokenizer_module}
|
||||||
_check_plugin(**check_inputs)
|
_check_plugin(**check_inputs)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skipif(not HF_TOKEN or not is_transformers_version_greater_than("4.50.0"), reason="Gated model.")
|
@pytest.mark.skipif(not HF_TOKEN, reason="Gated model.")
|
||||||
def test_gemma3_plugin():
|
def test_gemma3_plugin():
|
||||||
image_seqlen = 256
|
image_seqlen = 256
|
||||||
tokenizer_module = _load_tokenizer_module(model_name_or_path="google/gemma-3-4b-it")
|
tokenizer_module = _load_tokenizer_module(model_name_or_path="google/gemma-3-4b-it")
|
||||||
@ -157,6 +157,27 @@ def test_gemma3_plugin():
|
|||||||
_check_plugin(**check_inputs)
|
_check_plugin(**check_inputs)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.xfail(reason="Unknown error.")
|
||||||
|
def test_llama4_plugin():
|
||||||
|
tokenizer_module = _load_tokenizer_module(model_name_or_path=TINY_LLAMA4)
|
||||||
|
processor = tokenizer_module["processor"]
|
||||||
|
llama4_plugin = get_mm_plugin(name="llama4", image_token="<|image|>")
|
||||||
|
check_inputs = {"plugin": llama4_plugin, **tokenizer_module}
|
||||||
|
mm_inputs = _get_mm_inputs(tokenizer_module["processor"])
|
||||||
|
image_height, image_width = mm_inputs["pixel_values"][0].shape[-2:]
|
||||||
|
num_patches_per_chunk = int(
|
||||||
|
(image_height // processor.patch_size) * (image_width // processor.patch_size) // processor.downsample_ratio
|
||||||
|
)
|
||||||
|
aspect_ratios = mm_inputs.pop("aspect_ratios")
|
||||||
|
tokens_for_this_image = processor._prompt_split_image(aspect_ratios[0], num_patches_per_chunk)
|
||||||
|
check_inputs["expected_mm_messages"] = [
|
||||||
|
{key: value.replace("<image>", tokens_for_this_image) for key, value in message.items()}
|
||||||
|
for message in MM_MESSAGES
|
||||||
|
]
|
||||||
|
check_inputs["expected_mm_inputs"] = mm_inputs
|
||||||
|
_check_plugin(**check_inputs)
|
||||||
|
|
||||||
|
|
||||||
def test_llava_plugin():
|
def test_llava_plugin():
|
||||||
image_seqlen = 576
|
image_seqlen = 576
|
||||||
tokenizer_module = _load_tokenizer_module(model_name_or_path="llava-hf/llava-1.5-7b-hf")
|
tokenizer_module = _load_tokenizer_module(model_name_or_path="llava-hf/llava-1.5-7b-hf")
|
||||||
|
@ -29,7 +29,8 @@ if TYPE_CHECKING:
|
|||||||
|
|
||||||
HF_TOKEN = os.getenv("HF_TOKEN")
|
HF_TOKEN = os.getenv("HF_TOKEN")
|
||||||
|
|
||||||
TINY_LLAMA = os.getenv("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
|
TINY_LLAMA3 = os.getenv("TINY_LLAMA3", "llamafactory/tiny-random-Llama-3")
|
||||||
|
TINY_LLAMA4 = os.getenv("TINY_LLAMA4", "llamafactory/tiny-random-Llama-4")
|
||||||
|
|
||||||
MESSAGES = [
|
MESSAGES = [
|
||||||
{"role": "user", "content": "How are you"},
|
{"role": "user", "content": "How are you"},
|
||||||
@ -75,7 +76,7 @@ def _check_template(model_id: str, template_name: str, prompt_str: str, answer_s
|
|||||||
|
|
||||||
@pytest.mark.parametrize("use_fast", [True, False])
|
@pytest.mark.parametrize("use_fast", [True, False])
|
||||||
def test_encode_oneturn(use_fast: bool):
|
def test_encode_oneturn(use_fast: bool):
|
||||||
tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA, use_fast=use_fast)
|
tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA3, use_fast=use_fast)
|
||||||
template = get_template_and_fix_tokenizer(tokenizer, DataArguments(template="llama3"))
|
template = get_template_and_fix_tokenizer(tokenizer, DataArguments(template="llama3"))
|
||||||
prompt_ids, answer_ids = template.encode_oneturn(tokenizer, MESSAGES)
|
prompt_ids, answer_ids = template.encode_oneturn(tokenizer, MESSAGES)
|
||||||
prompt_str = (
|
prompt_str = (
|
||||||
@ -90,7 +91,7 @@ def test_encode_oneturn(use_fast: bool):
|
|||||||
|
|
||||||
@pytest.mark.parametrize("use_fast", [True, False])
|
@pytest.mark.parametrize("use_fast", [True, False])
|
||||||
def test_encode_multiturn(use_fast: bool):
|
def test_encode_multiturn(use_fast: bool):
|
||||||
tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA, use_fast=use_fast)
|
tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA3, use_fast=use_fast)
|
||||||
template = get_template_and_fix_tokenizer(tokenizer, DataArguments(template="llama3"))
|
template = get_template_and_fix_tokenizer(tokenizer, DataArguments(template="llama3"))
|
||||||
encoded_pairs = template.encode_multiturn(tokenizer, MESSAGES)
|
encoded_pairs = template.encode_multiturn(tokenizer, MESSAGES)
|
||||||
prompt_str_1 = (
|
prompt_str_1 = (
|
||||||
@ -111,8 +112,8 @@ def test_encode_multiturn(use_fast: bool):
|
|||||||
|
|
||||||
@pytest.mark.parametrize("use_fast", [True, False])
|
@pytest.mark.parametrize("use_fast", [True, False])
|
||||||
def test_jinja_template(use_fast: bool):
|
def test_jinja_template(use_fast: bool):
|
||||||
tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA, use_fast=use_fast)
|
tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA3, use_fast=use_fast)
|
||||||
ref_tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA, use_fast=use_fast)
|
ref_tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA3, use_fast=use_fast)
|
||||||
template = get_template_and_fix_tokenizer(tokenizer, DataArguments(template="llama3"))
|
template = get_template_and_fix_tokenizer(tokenizer, DataArguments(template="llama3"))
|
||||||
tokenizer.chat_template = template._get_jinja_template(tokenizer) # llama3 template no replace
|
tokenizer.chat_template = template._get_jinja_template(tokenizer) # llama3 template no replace
|
||||||
assert tokenizer.chat_template != ref_tokenizer.chat_template
|
assert tokenizer.chat_template != ref_tokenizer.chat_template
|
||||||
@ -120,7 +121,7 @@ def test_jinja_template(use_fast: bool):
|
|||||||
|
|
||||||
|
|
||||||
def test_ollama_modelfile():
|
def test_ollama_modelfile():
|
||||||
tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA)
|
tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA3)
|
||||||
template = get_template_and_fix_tokenizer(tokenizer, DataArguments(template="llama3"))
|
template = get_template_and_fix_tokenizer(tokenizer, DataArguments(template="llama3"))
|
||||||
assert template.get_ollama_modelfile(tokenizer) == (
|
assert template.get_ollama_modelfile(tokenizer) == (
|
||||||
"# ollama modelfile auto-generated by llamafactory\n\n"
|
"# ollama modelfile auto-generated by llamafactory\n\n"
|
||||||
@ -137,7 +138,7 @@ def test_ollama_modelfile():
|
|||||||
|
|
||||||
|
|
||||||
def test_get_stop_token_ids():
|
def test_get_stop_token_ids():
|
||||||
tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA)
|
tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA3)
|
||||||
template = get_template_and_fix_tokenizer(tokenizer, DataArguments(template="llama3"))
|
template = get_template_and_fix_tokenizer(tokenizer, DataArguments(template="llama3"))
|
||||||
assert set(template.get_stop_token_ids(tokenizer)) == {128008, 128009}
|
assert set(template.get_stop_token_ids(tokenizer)) == {128008, 128009}
|
||||||
|
|
||||||
@ -152,7 +153,7 @@ def test_gemma_template(use_fast: bool):
|
|||||||
"<start_of_turn>model\n"
|
"<start_of_turn>model\n"
|
||||||
)
|
)
|
||||||
answer_str = "很高兴认识你!<end_of_turn>\n"
|
answer_str = "很高兴认识你!<end_of_turn>\n"
|
||||||
_check_template("google/gemma-2-9b-it", "gemma", prompt_str, answer_str, use_fast)
|
_check_template("google/gemma-3-4b-it", "gemma", prompt_str, answer_str, use_fast)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skipif(not HF_TOKEN, reason="Gated model.")
|
@pytest.mark.skipif(not HF_TOKEN, reason="Gated model.")
|
||||||
@ -168,7 +169,20 @@ def test_llama3_template(use_fast: bool):
|
|||||||
_check_template("meta-llama/Meta-Llama-3-8B-Instruct", "llama3", prompt_str, answer_str, use_fast)
|
_check_template("meta-llama/Meta-Llama-3-8B-Instruct", "llama3", prompt_str, answer_str, use_fast)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skipif(not HF_TOKEN, reason="Gated model.")
|
@pytest.mark.parametrize(
|
||||||
|
"use_fast", [True, pytest.param(False, marks=pytest.mark.xfail(reason="Llama 4 has no slow tokenizer."))]
|
||||||
|
)
|
||||||
|
def test_llama4_template(use_fast: bool):
|
||||||
|
prompt_str = (
|
||||||
|
"<|begin_of_text|><|header_start|>user<|header_end|>\n\nHow are you<|eot|>"
|
||||||
|
"<|header_start|>assistant<|header_end|>\n\nI am fine!<|eot|>"
|
||||||
|
"<|header_start|>user<|header_end|>\n\n你好<|eot|>"
|
||||||
|
"<|header_start|>assistant<|header_end|>\n\n"
|
||||||
|
)
|
||||||
|
answer_str = "很高兴认识你!<|eot|>"
|
||||||
|
_check_template(TINY_LLAMA4, "llama4", prompt_str, answer_str, use_fast)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"use_fast", [True, pytest.param(False, marks=pytest.mark.xfail(reason="Phi-4 slow tokenizer is broken."))]
|
"use_fast", [True, pytest.param(False, marks=pytest.mark.xfail(reason="Phi-4 slow tokenizer is broken."))]
|
||||||
)
|
)
|
||||||
@ -183,35 +197,21 @@ def test_phi4_template(use_fast: bool):
|
|||||||
_check_template("microsoft/phi-4", "phi4", prompt_str, answer_str, use_fast)
|
_check_template("microsoft/phi-4", "phi4", prompt_str, answer_str, use_fast)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skipif(not HF_TOKEN, reason="Gated model.") # TODO: why it is gated?
|
|
||||||
@pytest.mark.parametrize("use_fast", [True, False])
|
@pytest.mark.parametrize("use_fast", [True, False])
|
||||||
def test_qwen_template(use_fast: bool):
|
def test_qwen_template(use_fast: bool):
|
||||||
prompt_str = (
|
prompt_str = (
|
||||||
"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n"
|
"<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n"
|
||||||
"<|im_start|>user\nHow are you<|im_end|>\n"
|
"<|im_start|>user\nHow are you<|im_end|>\n"
|
||||||
"<|im_start|>assistant\nI am fine!<|im_end|>\n"
|
"<|im_start|>assistant\nI am fine!<|im_end|>\n"
|
||||||
"<|im_start|>user\n你好<|im_end|>\n"
|
"<|im_start|>user\n你好<|im_end|>\n"
|
||||||
"<|im_start|>assistant\n"
|
"<|im_start|>assistant\n"
|
||||||
)
|
)
|
||||||
answer_str = "很高兴认识你!<|im_end|>\n"
|
answer_str = "很高兴认识你!<|im_end|>\n"
|
||||||
_check_template("Qwen/Qwen2-7B-Instruct", "qwen", prompt_str, answer_str, use_fast)
|
_check_template("Qwen/Qwen2.5-7B-Instruct", "qwen", prompt_str, answer_str, use_fast)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("use_fast", [True, False])
|
def test_parse_llama3_template():
|
||||||
@pytest.mark.xfail(reason="Yi tokenizer is broken.")
|
tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA3, token=HF_TOKEN)
|
||||||
def test_yi_template(use_fast: bool):
|
|
||||||
prompt_str = (
|
|
||||||
"<|im_start|>user\nHow are you<|im_end|>\n"
|
|
||||||
"<|im_start|>assistant\nI am fine!<|im_end|>\n"
|
|
||||||
"<|im_start|>user\n你好<|im_end|>\n"
|
|
||||||
"<|im_start|>assistant\n"
|
|
||||||
)
|
|
||||||
answer_str = "很高兴认识你!<|im_end|>\n"
|
|
||||||
_check_template("01-ai/Yi-1.5-6B-Chat", "yi", prompt_str, answer_str, use_fast)
|
|
||||||
|
|
||||||
|
|
||||||
def test_parse_template():
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA, token=HF_TOKEN)
|
|
||||||
template = parse_template(tokenizer)
|
template = parse_template(tokenizer)
|
||||||
assert template.format_user.slots == [
|
assert template.format_user.slots == [
|
||||||
"<|start_header_id|>user<|end_header_id|>\n\n{{content}}<|eot_id|>"
|
"<|start_header_id|>user<|end_header_id|>\n\n{{content}}<|eot_id|>"
|
||||||
@ -223,12 +223,11 @@ def test_parse_template():
|
|||||||
assert template.default_system == ""
|
assert template.default_system == ""
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skipif(not HF_TOKEN, reason="Gated model.")
|
|
||||||
def test_parse_qwen_template():
|
def test_parse_qwen_template():
|
||||||
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-7B-Instruct", token=HF_TOKEN)
|
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-7B-Instruct", token=HF_TOKEN)
|
||||||
template = parse_template(tokenizer)
|
template = parse_template(tokenizer)
|
||||||
assert template.format_user.slots == ["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]
|
assert template.format_user.slots == ["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]
|
||||||
assert template.format_assistant.slots == ["{{content}}<|im_end|>\n"]
|
assert template.format_assistant.slots == ["{{content}}<|im_end|>\n"]
|
||||||
assert template.format_system.slots == ["<|im_start|>system\n{{content}}<|im_end|>\n"]
|
assert template.format_system.slots == ["<|im_start|>system\n{{content}}<|im_end|>\n"]
|
||||||
assert template.format_prefix.slots == []
|
assert template.format_prefix.slots == []
|
||||||
assert template.default_system == "You are a helpful assistant."
|
assert template.default_system == "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."
|
||||||
|
@ -17,10 +17,10 @@ import os
|
|||||||
from llamafactory.chat import ChatModel
|
from llamafactory.chat import ChatModel
|
||||||
|
|
||||||
|
|
||||||
TINY_LLAMA = os.getenv("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
|
TINY_LLAMA3 = os.getenv("TINY_LLAMA3", "llamafactory/tiny-random-Llama-3")
|
||||||
|
|
||||||
INFER_ARGS = {
|
INFER_ARGS = {
|
||||||
"model_name_or_path": TINY_LLAMA,
|
"model_name_or_path": TINY_LLAMA3,
|
||||||
"finetuning_type": "lora",
|
"finetuning_type": "lora",
|
||||||
"template": "llama3",
|
"template": "llama3",
|
||||||
"infer_dtype": "float16",
|
"infer_dtype": "float16",
|
||||||
|
@ -21,12 +21,12 @@ from llamafactory.train.tuner import export_model, run_exp
|
|||||||
|
|
||||||
DEMO_DATA = os.getenv("DEMO_DATA", "llamafactory/demo_data")
|
DEMO_DATA = os.getenv("DEMO_DATA", "llamafactory/demo_data")
|
||||||
|
|
||||||
TINY_LLAMA = os.getenv("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
|
TINY_LLAMA3 = os.getenv("TINY_LLAMA3", "llamafactory/tiny-random-Llama-3")
|
||||||
|
|
||||||
TINY_LLAMA_ADAPTER = os.getenv("TINY_LLAMA_ADAPTER", "llamafactory/tiny-random-Llama-3-lora")
|
TINY_LLAMA_ADAPTER = os.getenv("TINY_LLAMA_ADAPTER", "llamafactory/tiny-random-Llama-3-lora")
|
||||||
|
|
||||||
TRAIN_ARGS = {
|
TRAIN_ARGS = {
|
||||||
"model_name_or_path": TINY_LLAMA,
|
"model_name_or_path": TINY_LLAMA3,
|
||||||
"do_train": True,
|
"do_train": True,
|
||||||
"finetuning_type": "lora",
|
"finetuning_type": "lora",
|
||||||
"dataset_dir": "REMOTE:" + DEMO_DATA,
|
"dataset_dir": "REMOTE:" + DEMO_DATA,
|
||||||
@ -35,10 +35,11 @@ TRAIN_ARGS = {
|
|||||||
"overwrite_output_dir": True,
|
"overwrite_output_dir": True,
|
||||||
"per_device_train_batch_size": 1,
|
"per_device_train_batch_size": 1,
|
||||||
"max_steps": 1,
|
"max_steps": 1,
|
||||||
|
"report_to": "none",
|
||||||
}
|
}
|
||||||
|
|
||||||
INFER_ARGS = {
|
INFER_ARGS = {
|
||||||
"model_name_or_path": TINY_LLAMA,
|
"model_name_or_path": TINY_LLAMA3,
|
||||||
"adapter_name_or_path": TINY_LLAMA_ADAPTER,
|
"adapter_name_or_path": TINY_LLAMA_ADAPTER,
|
||||||
"finetuning_type": "lora",
|
"finetuning_type": "lora",
|
||||||
"template": "llama3",
|
"template": "llama3",
|
||||||
|
@ -21,10 +21,10 @@ from llamafactory.extras.packages import is_transformers_version_greater_than
|
|||||||
from llamafactory.train.test_utils import load_infer_model
|
from llamafactory.train.test_utils import load_infer_model
|
||||||
|
|
||||||
|
|
||||||
TINY_LLAMA = os.getenv("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
|
TINY_LLAMA3 = os.getenv("TINY_LLAMA3", "llamafactory/tiny-random-Llama-3")
|
||||||
|
|
||||||
INFER_ARGS = {
|
INFER_ARGS = {
|
||||||
"model_name_or_path": TINY_LLAMA,
|
"model_name_or_path": TINY_LLAMA3,
|
||||||
"template": "llama3",
|
"template": "llama3",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -21,10 +21,10 @@ from llamafactory.extras.misc import get_current_device
|
|||||||
from llamafactory.train.test_utils import load_train_model
|
from llamafactory.train.test_utils import load_train_model
|
||||||
|
|
||||||
|
|
||||||
TINY_LLAMA = os.getenv("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
|
TINY_LLAMA3 = os.getenv("TINY_LLAMA3", "llamafactory/tiny-random-Llama-3")
|
||||||
|
|
||||||
TRAIN_ARGS = {
|
TRAIN_ARGS = {
|
||||||
"model_name_or_path": TINY_LLAMA,
|
"model_name_or_path": TINY_LLAMA3,
|
||||||
"stage": "sft",
|
"stage": "sft",
|
||||||
"do_train": True,
|
"do_train": True,
|
||||||
"finetuning_type": "lora",
|
"finetuning_type": "lora",
|
||||||
|
@ -19,12 +19,12 @@ import pytest
|
|||||||
from llamafactory.train.test_utils import compare_model, load_infer_model, load_reference_model, patch_valuehead_model
|
from llamafactory.train.test_utils import compare_model, load_infer_model, load_reference_model, patch_valuehead_model
|
||||||
|
|
||||||
|
|
||||||
TINY_LLAMA = os.getenv("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
|
TINY_LLAMA3 = os.getenv("TINY_LLAMA3", "llamafactory/tiny-random-Llama-3")
|
||||||
|
|
||||||
TINY_LLAMA_VALUEHEAD = os.getenv("TINY_LLAMA_VALUEHEAD", "llamafactory/tiny-random-Llama-3-valuehead")
|
TINY_LLAMA_VALUEHEAD = os.getenv("TINY_LLAMA_VALUEHEAD", "llamafactory/tiny-random-Llama-3-valuehead")
|
||||||
|
|
||||||
INFER_ARGS = {
|
INFER_ARGS = {
|
||||||
"model_name_or_path": TINY_LLAMA,
|
"model_name_or_path": TINY_LLAMA3,
|
||||||
"template": "llama3",
|
"template": "llama3",
|
||||||
"infer_dtype": "float16",
|
"infer_dtype": "float16",
|
||||||
}
|
}
|
||||||
@ -37,7 +37,7 @@ def fix_valuehead_cpu_loading():
|
|||||||
|
|
||||||
def test_base():
|
def test_base():
|
||||||
model = load_infer_model(**INFER_ARGS)
|
model = load_infer_model(**INFER_ARGS)
|
||||||
ref_model = load_reference_model(TINY_LLAMA)
|
ref_model = load_reference_model(TINY_LLAMA3)
|
||||||
compare_model(model, ref_model)
|
compare_model(model, ref_model)
|
||||||
|
|
||||||
|
|
||||||
|
@ -19,10 +19,10 @@ import torch
|
|||||||
from llamafactory.train.test_utils import load_infer_model, load_train_model
|
from llamafactory.train.test_utils import load_infer_model, load_train_model
|
||||||
|
|
||||||
|
|
||||||
TINY_LLAMA = os.getenv("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
|
TINY_LLAMA3 = os.getenv("TINY_LLAMA3", "llamafactory/tiny-random-Llama-3")
|
||||||
|
|
||||||
TRAIN_ARGS = {
|
TRAIN_ARGS = {
|
||||||
"model_name_or_path": TINY_LLAMA,
|
"model_name_or_path": TINY_LLAMA3,
|
||||||
"stage": "sft",
|
"stage": "sft",
|
||||||
"do_train": True,
|
"do_train": True,
|
||||||
"finetuning_type": "freeze",
|
"finetuning_type": "freeze",
|
||||||
@ -36,7 +36,7 @@ TRAIN_ARGS = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
INFER_ARGS = {
|
INFER_ARGS = {
|
||||||
"model_name_or_path": TINY_LLAMA,
|
"model_name_or_path": TINY_LLAMA3,
|
||||||
"finetuning_type": "freeze",
|
"finetuning_type": "freeze",
|
||||||
"template": "llama3",
|
"template": "llama3",
|
||||||
"infer_dtype": "float16",
|
"infer_dtype": "float16",
|
||||||
|
@ -19,10 +19,10 @@ import torch
|
|||||||
from llamafactory.train.test_utils import load_infer_model, load_train_model
|
from llamafactory.train.test_utils import load_infer_model, load_train_model
|
||||||
|
|
||||||
|
|
||||||
TINY_LLAMA = os.getenv("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
|
TINY_LLAMA3 = os.getenv("TINY_LLAMA3", "llamafactory/tiny-random-Llama-3")
|
||||||
|
|
||||||
TRAIN_ARGS = {
|
TRAIN_ARGS = {
|
||||||
"model_name_or_path": TINY_LLAMA,
|
"model_name_or_path": TINY_LLAMA3,
|
||||||
"stage": "sft",
|
"stage": "sft",
|
||||||
"do_train": True,
|
"do_train": True,
|
||||||
"finetuning_type": "full",
|
"finetuning_type": "full",
|
||||||
@ -36,7 +36,7 @@ TRAIN_ARGS = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
INFER_ARGS = {
|
INFER_ARGS = {
|
||||||
"model_name_or_path": TINY_LLAMA,
|
"model_name_or_path": TINY_LLAMA3,
|
||||||
"finetuning_type": "full",
|
"finetuning_type": "full",
|
||||||
"template": "llama3",
|
"template": "llama3",
|
||||||
"infer_dtype": "float16",
|
"infer_dtype": "float16",
|
||||||
|
@ -27,14 +27,14 @@ from llamafactory.train.test_utils import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
TINY_LLAMA = os.getenv("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
|
TINY_LLAMA3 = os.getenv("TINY_LLAMA3", "llamafactory/tiny-random-Llama-3")
|
||||||
|
|
||||||
TINY_LLAMA_ADAPTER = os.getenv("TINY_LLAMA_ADAPTER", "llamafactory/tiny-random-Llama-3-lora")
|
TINY_LLAMA_ADAPTER = os.getenv("TINY_LLAMA_ADAPTER", "llamafactory/tiny-random-Llama-3-lora")
|
||||||
|
|
||||||
TINY_LLAMA_VALUEHEAD = os.getenv("TINY_LLAMA_VALUEHEAD", "llamafactory/tiny-random-Llama-3-valuehead")
|
TINY_LLAMA_VALUEHEAD = os.getenv("TINY_LLAMA_VALUEHEAD", "llamafactory/tiny-random-Llama-3-valuehead")
|
||||||
|
|
||||||
TRAIN_ARGS = {
|
TRAIN_ARGS = {
|
||||||
"model_name_or_path": TINY_LLAMA,
|
"model_name_or_path": TINY_LLAMA3,
|
||||||
"stage": "sft",
|
"stage": "sft",
|
||||||
"do_train": True,
|
"do_train": True,
|
||||||
"finetuning_type": "lora",
|
"finetuning_type": "lora",
|
||||||
@ -48,7 +48,7 @@ TRAIN_ARGS = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
INFER_ARGS = {
|
INFER_ARGS = {
|
||||||
"model_name_or_path": TINY_LLAMA,
|
"model_name_or_path": TINY_LLAMA3,
|
||||||
"adapter_name_or_path": TINY_LLAMA_ADAPTER,
|
"adapter_name_or_path": TINY_LLAMA_ADAPTER,
|
||||||
"finetuning_type": "lora",
|
"finetuning_type": "lora",
|
||||||
"template": "llama3",
|
"template": "llama3",
|
||||||
@ -81,13 +81,13 @@ def test_lora_train_extra_modules():
|
|||||||
|
|
||||||
def test_lora_train_old_adapters():
|
def test_lora_train_old_adapters():
|
||||||
model = load_train_model(adapter_name_or_path=TINY_LLAMA_ADAPTER, create_new_adapter=False, **TRAIN_ARGS)
|
model = load_train_model(adapter_name_or_path=TINY_LLAMA_ADAPTER, create_new_adapter=False, **TRAIN_ARGS)
|
||||||
ref_model = load_reference_model(TINY_LLAMA, TINY_LLAMA_ADAPTER, use_lora=True, is_trainable=True)
|
ref_model = load_reference_model(TINY_LLAMA3, TINY_LLAMA_ADAPTER, use_lora=True, is_trainable=True)
|
||||||
compare_model(model, ref_model)
|
compare_model(model, ref_model)
|
||||||
|
|
||||||
|
|
||||||
def test_lora_train_new_adapters():
|
def test_lora_train_new_adapters():
|
||||||
model = load_train_model(adapter_name_or_path=TINY_LLAMA_ADAPTER, create_new_adapter=True, **TRAIN_ARGS)
|
model = load_train_model(adapter_name_or_path=TINY_LLAMA_ADAPTER, create_new_adapter=True, **TRAIN_ARGS)
|
||||||
ref_model = load_reference_model(TINY_LLAMA, TINY_LLAMA_ADAPTER, use_lora=True, is_trainable=True)
|
ref_model = load_reference_model(TINY_LLAMA3, TINY_LLAMA_ADAPTER, use_lora=True, is_trainable=True)
|
||||||
compare_model(
|
compare_model(
|
||||||
model, ref_model, diff_keys=["q_proj", "k_proj", "v_proj", "o_proj", "up_proj", "gate_proj", "down_proj"]
|
model, ref_model, diff_keys=["q_proj", "k_proj", "v_proj", "o_proj", "up_proj", "gate_proj", "down_proj"]
|
||||||
)
|
)
|
||||||
@ -105,5 +105,5 @@ def test_lora_train_valuehead():
|
|||||||
|
|
||||||
def test_lora_inference():
|
def test_lora_inference():
|
||||||
model = load_infer_model(**INFER_ARGS)
|
model = load_infer_model(**INFER_ARGS)
|
||||||
ref_model = load_reference_model(TINY_LLAMA, TINY_LLAMA_ADAPTER, use_lora=True).merge_and_unload()
|
ref_model = load_reference_model(TINY_LLAMA3, TINY_LLAMA_ADAPTER, use_lora=True).merge_and_unload()
|
||||||
compare_model(model, ref_model)
|
compare_model(model, ref_model)
|
||||||
|
@ -19,12 +19,12 @@ import pytest
|
|||||||
from llamafactory.train.test_utils import compare_model, load_infer_model, load_reference_model, load_train_model
|
from llamafactory.train.test_utils import compare_model, load_infer_model, load_reference_model, load_train_model
|
||||||
|
|
||||||
|
|
||||||
TINY_LLAMA = os.getenv("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
|
TINY_LLAMA3 = os.getenv("TINY_LLAMA3", "llamafactory/tiny-random-Llama-3")
|
||||||
|
|
||||||
TINY_LLAMA_PISSA = os.getenv("TINY_LLAMA_ADAPTER", "llamafactory/tiny-random-Llama-3-pissa")
|
TINY_LLAMA_PISSA = os.getenv("TINY_LLAMA_ADAPTER", "llamafactory/tiny-random-Llama-3-pissa")
|
||||||
|
|
||||||
TRAIN_ARGS = {
|
TRAIN_ARGS = {
|
||||||
"model_name_or_path": TINY_LLAMA,
|
"model_name_or_path": TINY_LLAMA3,
|
||||||
"stage": "sft",
|
"stage": "sft",
|
||||||
"do_train": True,
|
"do_train": True,
|
||||||
"finetuning_type": "lora",
|
"finetuning_type": "lora",
|
||||||
|
@ -27,10 +27,10 @@ from llamafactory.train.sft.trainer import CustomSeq2SeqTrainer
|
|||||||
|
|
||||||
DEMO_DATA = os.getenv("DEMO_DATA", "llamafactory/demo_data")
|
DEMO_DATA = os.getenv("DEMO_DATA", "llamafactory/demo_data")
|
||||||
|
|
||||||
TINY_LLAMA = os.getenv("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
|
TINY_LLAMA3 = os.getenv("TINY_LLAMA3", "llamafactory/tiny-random-Llama-3")
|
||||||
|
|
||||||
TRAIN_ARGS = {
|
TRAIN_ARGS = {
|
||||||
"model_name_or_path": TINY_LLAMA,
|
"model_name_or_path": TINY_LLAMA3,
|
||||||
"stage": "sft",
|
"stage": "sft",
|
||||||
"do_train": True,
|
"do_train": True,
|
||||||
"finetuning_type": "lora",
|
"finetuning_type": "lora",
|
||||||
@ -41,6 +41,7 @@ TRAIN_ARGS = {
|
|||||||
"overwrite_output_dir": True,
|
"overwrite_output_dir": True,
|
||||||
"per_device_train_batch_size": 1,
|
"per_device_train_batch_size": 1,
|
||||||
"max_steps": 1,
|
"max_steps": 1,
|
||||||
|
"report_to": "none",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,2 +1,2 @@
|
|||||||
# change if test fails
|
# change if test fails
|
||||||
0.9.3.101
|
0.9.3.102
|
||||||
|
Loading…
x
Reference in New Issue
Block a user