Compare commits

...

242 Commits
v0.9.2 ... main

Author SHA1 Message Date
Yaowei Zheng
4ba7de0434
[model] Qwen3-30B-A3B-Thinking-2507 (#8788) 2025-07-30 23:05:12 +08:00
Yaowei Zheng
ea8a2d60d0
[deps] upgrade vllm to 0.10.0 (#8787) 2025-07-30 22:26:38 +08:00
Yaowei Zheng
ae0ef374a3
[assets] update readme (#8784) 2025-07-30 17:57:17 +08:00
Yaowei Zheng
edd112f35c
[model] add qwen3 2507 model (#8783) 2025-07-30 17:19:19 +08:00
Kingsley
7218d4aa96
[model] support keye-vl-8b (#8776) 2025-07-29 21:24:08 +08:00
Kingsley
4380b7b35e
[model] update glm4.5 (#8770) 2025-07-29 19:57:29 +08:00
Yaowei Zheng
3307ff1d4a
[model] add qwen3 2507 models (#8750) 2025-07-25 20:21:47 +08:00
Kingsley
2aadc90c2d
[model] add glm4moe (#8689) 2025-07-25 19:53:45 +08:00
Yaowei Zheng
2353e16e20
[assets] update readme (#8739) 2025-07-24 23:13:24 +08:00
Yaowei Zheng
6812f5e1f5
[assets] update wechat (#8731) 2025-07-24 02:35:47 +08:00
Steven sun
2077875622
[model] support granite4 (#8680) 2025-07-21 14:15:36 +08:00
Yaowei Zheng
678b7d69d2
[assets] update wechat (#8685) 2025-07-18 19:13:26 +08:00
Yaowei Zheng
f00742b078
[assets] update wechat (#8637) 2025-07-14 21:24:28 +08:00
Kingsley
fdb70c04e0
[data] fix gemma3n mmplugin (#8627) 2025-07-14 13:32:57 +08:00
Kingsley
95ed6c45cd
[model] add Devstral-Small-2507 (#8614) 2025-07-11 18:59:53 +08:00
Yaowei Zheng
cf1087d409
[webui] fix elems (#8587) 2025-07-08 21:16:42 +08:00
Kingsley
766884fa5c
[data] support glm4.1v video training (#8571) 2025-07-08 16:29:44 +08:00
Yaowei Zheng
6a8d88826e
[webui] fix abort finish (#8569) 2025-07-07 23:07:46 +08:00
Yaowei Zheng
043103e1c9
[webui] support other hub (#8567) 2025-07-07 22:18:48 +08:00
Yaowei Zheng
5817583630
[deps] bump transformers to 4.49.0 (#8564) 2025-07-07 20:31:50 +08:00
Yaowei Zheng
62bd2c8047
[assets] update wechat (#8565) 2025-07-07 20:29:20 +08:00
Redwood-Digital
1b549e3199
[docs] add nvidia-container-toolkit to Linux Docker setup instructions (#8557) 2025-07-06 19:37:08 +08:00
Vivek Iyer
c6290db118
Revert "[model] add lora dropout to unsloth" - requested feature already exists (#8554)
Co-authored-by: viyer <vivek_iyer2@apple.com>
2025-07-05 11:25:31 +08:00
wjunLu
d30cbcdfa5
[ci] Add workflow for building NPU image (#8546)
Co-authored-by: Yaowei Zheng <hiyouga@buaa.edu.cn>
2025-07-04 20:56:59 +08:00
Vivek Iyer
62c6943699
[model] add lora dropout to unsloth (#8548)
Co-authored-by: viyer <vivek_iyer2@apple.com>
2025-07-04 14:56:36 +08:00
Ze-Yi LIN
8e7727f4ee
[tracking] fix swanlab hparams (#8532)
Co-authored-by: Yaowei Zheng <hiyouga@buaa.edu.cn>
2025-07-02 22:08:44 +08:00
Yaowei Zheng
e117e3c2b7
[assets] update issue template (#8530) 2025-07-02 19:29:58 +08:00
Yaowei Zheng
dcd75e7063
[assets] update readme (#8529) 2025-07-02 17:42:27 +08:00
Kingsley
4465e4347e
[assets] update readme (#8519) 2025-07-02 15:38:38 +08:00
Kingsley
c5a08291f4
[model] add gemma3n (#8509) 2025-07-01 22:37:24 +08:00
Yaowei Zheng
544b7dc2ed
[assets] update wechat (#8517) 2025-07-01 21:23:48 +08:00
Injae Ryou
ac6c93df1f
[parser] update config loading to use OmegaConf #7793 (#8505) 2025-07-01 21:05:13 +08:00
Kingsley
0b188ca00c
[model] add GLM-4.1V (#8462) 2025-06-30 01:09:41 +08:00
Liu Jiajun
0a004904bd
[data] fix gemma2 eos token (#8480)
Co-authored-by: Yaowei Zheng <hiyouga@buaa.edu.cn>
2025-06-27 18:19:15 +08:00
Yaowei Zheng
bb7bf51554
Merge commit from fork 2025-06-26 13:55:42 +08:00
Yaowei Zheng
7242caf0ff
[assets] update readme (#8461) 2025-06-25 22:15:03 +08:00
Yaowei Zheng
ed57b7ba2a
[webui] upgrade webui and fix api (#8460) 2025-06-25 21:59:58 +08:00
Yaowei Zheng
b10333dafb
[model] do not force load processor (#8457) 2025-06-25 19:43:00 +08:00
Yaowei Zheng
6b46c8b689
[assets] update wechat (#8458) 2025-06-25 19:42:28 +08:00
Yaowei Zheng
be27eae175
[data] fix audio reader (#8448) 2025-06-24 20:53:20 +08:00
Yaowei Zheng
31b0787e12
[misc] fix ci (#8441) 2025-06-24 16:37:32 +08:00
Kingsley
fffa43be86
[model] Add mistral-small 3.2 & kimi-dev (#8433) 2025-06-24 14:59:47 +08:00
Yaowei Zheng
8ed085e403
[model] add kimi vl 2506 (#8432) 2025-06-23 17:56:48 +08:00
Vivek Iyer
1221533542
[model] unsloth resume from checkpoint bug (#8423)
Co-authored-by: viyer <vivek_iyer2@apple.com>
2025-06-23 16:43:54 +08:00
codingma
8a3bddc7fa
[assets] update wechat 2025-06-23 14:42:52 +08:00
Yaowei Zheng
3a119ed5a2
[assets] update wechat (#8414) 2025-06-20 03:38:43 +08:00
Dhia Eddine Rhaiem
0d7d0ea972
[model] add support for Falcon H1 (#8403) 2025-06-18 16:51:23 +08:00
Yaowei Zheng
0e1fea71d2
[assets] update readme (#8396) 2025-06-17 16:15:20 +08:00
Yaowei Zheng
ec04d7b89c
[misc] set dev version (#8389) 2025-06-17 01:39:25 +08:00
Yaowei Zheng
cabc9207be
[ci] add docker version (#8390) 2025-06-17 01:37:47 +08:00
Yaowei Zheng
f3d144f001
[model] fix vlm utils (#8388) 2025-06-17 01:08:49 +08:00
Yaowei Zheng
af2f75e688
[data] fix qwen2vl pos ids (#8387) 2025-06-17 00:48:54 +08:00
Yaowei Zheng
9f2f12b0fe
[version] release v0.9.3 (#8386) 2025-06-16 19:21:32 +08:00
Yaowei Zheng
bb84c3c83e
[assets] update wechat (#8385) 2025-06-16 18:23:22 +08:00
Aman Gupta
1cfe42916d
[trainer] Add LD-DPO objective (#8362) 2025-06-12 16:10:38 +08:00
Yaowei Zheng
5ed62a29c5
[misc] tiny fixes (#8348) 2025-06-10 15:30:58 +08:00
阿丹(adan)
d39d3106cb
[model] support MiniCPM4 (#8314) 2025-06-10 14:38:39 +08:00
Kingsley
8ffe7daa8d
[model] support Mistral3.1 small 2503 (#8335) 2025-06-09 10:37:42 +08:00
Chenhao Zhang
8fa55db1ec
[assets] Add awesome works used LLaMA-Factory (#8333) 2025-06-09 10:21:17 +08:00
Yaowei Zheng
d8a5050cfb
[assets] update wechat (#8328) 2025-06-07 09:00:07 +08:00
Yaowei Zheng
7ecc2d46ca
[model] fix model generate (#8327) 2025-06-07 08:47:50 +08:00
Vivek Iyer
d325a1a7c7
[model] pushing FFT with unsloth (#8325)
Co-authored-by: viyer <vivek_iyer2@apple.com>
2025-06-07 08:20:58 +08:00
Yaowei Zheng
239ced076c
[data] fix empty template (#8312) 2025-06-06 13:50:50 +08:00
Yaowei Zheng
f5f356649b
[setup] fix uv (#8311) 2025-06-06 11:54:15 +08:00
Yaowei Zheng
13fd43617c
[assets] update readme (#8303) 2025-06-05 23:23:15 +08:00
Kingsley
fcd8662306
[tests] add visual model save test (#8248)
Co-authored-by: Yaowei Zheng <hiyouga@buaa.edu.cn>
2025-06-05 20:38:01 +08:00
Yaowei Zheng
cecba57b3e
[assets] fix npu docker (#8298) 2025-06-05 19:09:20 +08:00
Butui Hu
83688b0b4d
[launcher] Add elastic and fault-tolerant training support (#8286)
Signed-off-by: Butui Hu <hot123tea123@gmail.com>
2025-06-05 16:40:03 +08:00
Kingsley
5308424705
[script] add Script description for qwen_omni_merge (#8293) 2025-06-05 13:22:01 +08:00
Yaowei Zheng
81c4d9bee6
[assets] update docker files (#8291) 2025-06-04 23:30:46 +08:00
Yaowei Zheng
ee676d29f6
[assets] update readme (#8288) 2025-06-04 17:46:12 +08:00
Yaowei Zheng
e3d5e0fa28
[assets] add icon (#8276) 2025-06-03 20:36:21 +08:00
Kingsley
3425bc6e71
[data] support nested images input for videos (#8264) 2025-06-03 20:26:29 +08:00
Ze-Yi LIN
6cc247e815
[tracking] swanlab add llamafactory tag (#8258) 2025-06-03 18:42:29 +08:00
Yaowei Zheng
16a3f8a71b
[assets] update wechat (#8270) 2025-06-03 17:33:57 +08:00
Kingsley
65aa86ed39
[model] add MIMO_VL (#8249) 2025-06-01 03:54:54 +08:00
Yaowei Zheng
fba9c9d9b9
[deps] upgrade transformers to 4.52.4 (#8245) 2025-05-31 16:51:40 +08:00
Akshat Sehgal
21829b5eaf
[model] add smollm2 support (#8220) 2025-05-31 16:29:01 +08:00
hoshi-hiyouga
762c2d776f
[assets] update readme (#8235) 2025-05-30 16:52:12 +08:00
Kingsley
e31afdfd4f
[scripts] specify model class for qwen_omni merge (#8227) 2025-05-30 14:20:12 +08:00
hoshi-hiyouga
be02003d16
[model] add deepseek 0528 models (#8215) 2025-05-29 21:37:07 +08:00
hoshi-hiyouga
a4048b7bb6
[assets] fix docker images (#8203) 2025-05-28 22:26:05 +08:00
yzoaim
73b12baaaf
[workflow] auto push docker images (#8181)
Co-authored-by: hoshi-hiyouga <hiyouga@buaa.edu.cn>
2025-05-28 20:21:15 +08:00
hoshi-hiyouga
55d37dc472
[assets] update Dockerfile (#8201) 2025-05-28 20:20:59 +08:00
hoshi-hiyouga
54ffd06026
[webui] fix skip args (#8195) 2025-05-28 18:11:07 +08:00
Muqi Li
00c4988f89
[assets] fix incorrect user_tag in dataset_info.json to prevent skipped entries (#8197) 2025-05-28 18:01:37 +08:00
Youngwoo Kim
57c6e23247
[data] Reading files from cloud is broken (#8182) (#8183) 2025-05-28 15:50:44 +08:00
hoshi-hiyouga
07f792148e
[assets] fix docker image (#8180) 2025-05-27 19:01:31 +08:00
hoshi-hiyouga
e542f95710
[data] fix shared file system (#8179) 2025-05-27 18:36:03 +08:00
hoshi-hiyouga
4ecf4daeb2
[webui] add extra args to export (#8178) 2025-05-27 18:25:31 +08:00
hoshi-hiyouga
519ac92803
[assets] update docker files (#8176) 2025-05-27 18:15:23 +08:00
hoshi-hiyouga
16e1a5097e
[webui] add infer extra args (#8167) 2025-05-27 12:04:00 +08:00
hoshi-hiyouga
09436c1f45
[webui] fix input args (#8162) 2025-05-27 02:05:54 +08:00
hoshi-hiyouga
f3a1dc8483
[model] add smollm2 and medgemma (#8161) 2025-05-26 23:19:58 +08:00
hoshi-hiyouga
dc8cca11b3
[deps] upgrade transformers (#8159) 2025-05-26 22:03:58 +08:00
Akshat Sehgal
e6f45d696b
feat: add smollm support (#8050) 2025-05-26 19:47:54 +08:00
wangzhan
c477ae6405
[api] support repetition_penalty and align presence_penalty with OpenAI Client (#7958) 2025-05-26 18:45:11 +08:00
hoshi-hiyouga
52dead8775
[assets] update wechat (#8156) 2025-05-26 18:28:02 +08:00
Kingsley
a9211a730e
[data] fix internvl plugin when using PIL images (#8129) 2025-05-22 01:32:59 +08:00
hoshi-hiyouga
763fbc294b
[misc] update data readme (#8128) 2025-05-21 22:41:18 +08:00
hoshi-hiyouga
b0c8ba73e0
[deps] update to transformers 4.52 (#8125) 2025-05-21 05:16:18 +08:00
hoshi-hiyouga
b3b2c9f1ee
[data] llama3 multi tool support (#8124) 2025-05-21 02:01:12 +08:00
hoshi-hiyouga
f96c085857
[assets] update readme (#8110) 2025-05-20 02:44:18 +08:00
hoshi-hiyouga
b83a38eb98
[data] qwen3 fixes (#8109) 2025-05-20 02:00:30 +08:00
hoshi-hiyouga
f3fd67a9bb
[model] switch to gptqmodel (#8108) 2025-05-19 22:25:40 +08:00
piamo
a6f3adf930
[model] update rope kwargs for yarn (#8101) 2025-05-19 20:07:54 +08:00
hoshi-hiyouga
ed2f89efaf
[doc] add no build isolation (#8103) 2025-05-19 19:25:13 +08:00
Ma, Xiaochen
16e26236eb
[trainer] fix KeyError at end of pretrain (#8099) 2025-05-19 18:01:26 +08:00
Biao Wang
89a0d10c18
[misc] fix cli (#8095)
Co-authored-by: wangbiao11 <wangbiao11@baidu.com>
2025-05-19 17:59:39 +08:00
Saiya
820ed764c4
[infer] support lora adapter for SGLang backend (#8067) 2025-05-16 23:33:47 +08:00
Kingsley
66f719dd96
[data] add forward compatibility for video_utils in Transformers 4.52.0 (#8077) 2025-05-16 17:41:04 +08:00
Eric Tang
130bfaf8e3
[data] support loading folder from remote (#8078) 2025-05-16 15:35:38 +08:00
Shawn Tao
e8a18c17e9
[infer] Modify vllm_infer.py to batch preprocess to avoid too much files opened error (#8051)
Co-authored-by: Kingsley <82590017+Kuangdd01@users.noreply.github.com>
2025-05-15 10:54:35 +08:00
hoshi-hiyouga
2b23c0a7a1
[assets] update wechat (#8057) 2025-05-14 18:01:48 +08:00
hoshi-hiyouga
ab2c05115b
[assets] update windows installation (#8042) 2025-05-13 17:01:56 +08:00
hoshi-hiyouga
8d472c20cb
[model] add seed coder and qwen3 quant models (#8039) 2025-05-13 15:59:55 +08:00
hoshi-hiyouga
845af89ea4
[data] fix kimi vl template (#8015) 2025-05-11 20:45:19 +08:00
Kingsley
cef3a0b2e2
[scripts] add video params for vllm infer (#7992) 2025-05-09 21:16:52 +08:00
yunhao-tech
865ac07491
[data] Avoid repetitive tool description warp (#8000)
Co-authored-by: chenyunhao <chenyunhao@wps.cn>
Co-authored-by: hoshi-hiyouga <hiyouga@buaa.edu.cn>
2025-05-09 21:16:37 +08:00
tpoisonooo
f584db50cf
[docs] add GraphGen (#7974) 2025-05-07 12:23:11 +02:00
hoshi-hiyouga
97e0a4cb5c
[misc] update liger kernel patch (#7966) 2025-05-06 20:32:16 +02:00
hoshi-hiyouga
c6bcca4c83
[example] update examples (#7964) 2025-05-06 17:24:25 +02:00
Kingsley
5ee9eb64d8
[model] add mimo7b (#7946) 2025-05-06 17:10:30 +02:00
hoshi-hiyouga
937447bd8a
[misc] fix qwen2 omni (#7962) 2025-05-06 15:39:13 +02:00
hoshi-hiyouga
52f25651a2
[model] add qwen2 omni 3b (#7945) 2025-05-03 16:36:51 +08:00
Eric Chen
75d7c35fdf
[assets] Warp Support README Update (#7887) 2025-05-02 00:08:48 +08:00
hoshi-hiyouga
6a584b4092
[hparam] add enable think argument (#7928) 2025-04-30 17:21:30 +08:00
hoshi-hiyouga
41ec928683
[data] fix base plugin (#7924) 2025-04-30 16:28:05 +08:00
hoshi-hiyouga
d8295cd601
[data] optimize qwen3 loss computation (#7923) 2025-04-30 16:18:00 +08:00
hoshi-hiyouga
a8430f4244
[misc] fix uv (#7913) 2025-04-30 07:45:03 +08:00
hoshi-hiyouga
072bfe29d3
[data] add eval_on_each_dataset arg (#7912) 2025-04-30 06:56:43 +08:00
hoshi-hiyouga
c5b1d07e7c
[data] replace eos token for base models (#7911) 2025-04-30 06:52:28 +08:00
hoshi-hiyouga
77c569e071
[data] improve mm plugin (#7910) 2025-04-30 06:34:28 +08:00
hoshi-hiyouga
ae392e054c
[model] add qwen3 (#7885) 2025-04-29 09:34:05 +08:00
Kingsley
369474451d
[data] fix qwen2.5 omni template (#7883) 2025-04-29 00:58:23 +08:00
hoshi-hiyouga
1f338deb87
[model] fix dsv3 leaf node (#7879) 2025-04-28 18:11:09 +08:00
hoshi-hiyouga
00b5c05946
[data] fix qwen2 omni plugin (#7875) 2025-04-28 14:22:41 +08:00
zhaop-l
1bd319d16c
[trainer] make projector trainable in freeze training (#7872)
Co-authored-by: hoshi-hiyouga <hiyouga@buaa.edu.cn>
2025-04-28 13:19:37 +08:00
hoshi-hiyouga
fcca3b0b0d
[data] fix minicpmo vllm infer (#7870) 2025-04-28 01:59:53 +08:00
Kingsley
035e98035c
fix attn patch for kimivl (#7867) 2025-04-27 23:12:28 +08:00
Eric Tang
b4407e4b0b
[ray] add storage filesystem to ray config (#7854) 2025-04-27 22:12:40 +08:00
hoshi-hiyouga
036a76e9cb
[assets] update wechat (#7840) 2025-04-24 16:31:05 +08:00
hoshi-hiyouga
4fbdc65fcb
[model] fix vit gradient checkpointing (#7830) 2025-04-23 22:48:48 +08:00
hoshi-hiyouga
2989d39239
Merge commit from fork 2025-04-23 16:38:27 +08:00
hoshi-hiyouga
1344416378
[model] fix moe zero3 (#7826) 2025-04-23 15:30:49 +08:00
Kingsley
1dd67eb042
[data] fix internvl plugin (#7817) 2025-04-23 00:58:22 +08:00
hoshi-hiyouga
2b7d564e3b
[assets] update model readme (#7804) 2025-04-22 16:43:56 +08:00
Kingsley
d43013f14a
[model] add arch check for InternVL (#7803) 2025-04-22 16:38:05 +08:00
Kingsley
c91165a5a6
[misc] update internvl constants (#7801) 2025-04-22 15:53:08 +08:00
hoshi-hiyouga
7f3c31f6f4
[trainer] support early stop (#7797) 2025-04-22 01:59:33 +08:00
hoshi-hiyouga
92101f34a1
[data] improve mmplugin (#7795) 2025-04-22 01:25:33 +08:00
hoshi-hiyouga
a62cba3d05
[example] add bash usage (#7794) 2025-04-22 00:25:51 +08:00
Juanxi Tian
d128382d3c
[trainer] Add Muon Optimizer (#7749)
Co-authored-by: hoshi-hiyouga <hiyouga@buaa.edu.cn>
2025-04-21 23:38:37 +08:00
hoshi-hiyouga
278df4308d
[parser] support omegaconf (#7793) 2025-04-21 23:30:30 +08:00
Changrui Chen
81768df04c
[data] Fix wrong position ids with packed attention masks (#7754)
Co-authored-by: hoshi-hiyouga <hiyouga@buaa.edu.cn>
2025-04-21 23:19:36 +08:00
flashJd
1302ca39f6
[misc] fix new tokens adding (#7253)
Co-authored-by: hoshi-hiyouga <hiyouga@buaa.edu.cn>
2025-04-21 23:19:02 +08:00
ddddng
b8cddbc7d7
[model] fix gemma3 export (#7786)
Co-authored-by: hoshi-hiyouga <hiyouga@buaa.edu.cn>
2025-04-21 23:07:11 +08:00
Sachin Beldona
ec7257e70f
[misc] fix bug in constant (#7765)
Co-authored-by: Sachin Beldona <sbeldona@cs.cmu.edu>
2025-04-21 23:06:31 +08:00
hoshi-hiyouga
a4455e3021
[assets] update wechat (#7792) 2025-04-21 21:29:42 +08:00
hoshi-hiyouga
610f164c69
[trainer] fix pt loss (#7748)
* fix pt loss

* robust

* fix

* test
2025-04-17 03:15:35 +08:00
hoshi-hiyouga
0a0cfeb782
[breaking] bump transformers to 4.45.0 & improve ci (#7746)
* update ci

* fix

* fix

* fix

* fix

* fix
2025-04-17 02:36:48 +08:00
hoshi-hiyouga
4831552856
[infer] set env for vllm ascend (#7745) 2025-04-17 01:08:55 +08:00
Kingsley
125513fa5c
[model] support intern-VL 2.5-3 series (#7258)
* add internvl and rebase

* fix for internvl2&3

* remove lines

* fix video_inputs & lint

* nit

* add constants

* remove lines

* fix

* fix error

* pass ci

* pass ci

* skip internvl & nit
2025-04-17 00:31:30 +08:00
ENg-122
8543400584
[misc] improve entrypoint (#7345)
* 纯粹优化下入口代码,因为看到if else太多了

* Update cli.py

---------

Co-authored-by: hoshi-hiyouga <hiyouga@buaa.edu.cn>
2025-04-16 21:48:23 +08:00
leo-pony
e1fdd6e2f8
[infer] support vllm-ascend (#7739) 2025-04-16 20:06:47 +08:00
codingma
d07983dceb
[assets] wechat (#7740) 2025-04-16 18:02:01 +08:00
hoshi-hiyouga
9b94211045
[api] fix chat messages (#7732) 2025-04-15 16:39:08 +08:00
hoshi-hiyouga
0fe5631f9b
[deps] upgrade vllm (#7728) 2025-04-15 14:57:40 +08:00
Joe Schoonover
b5d667cebf
[docker] patch docker-rocm (#7725)
* Update Dockerfile

* Fix typo

* Fix syntax for /bin/sh conditional

* Add build args to docker-compose

* Change shell to /bin/bash

This is required for "==" syntax in conditional string comparison
2025-04-15 13:36:39 +08:00
hoshi-hiyouga
ac8c6fdd3a
[assets] update model readme (#7724) 2025-04-15 00:41:09 +08:00
Kingsley
df8752e8ee
[model] Support Kimi_VL thinking/instruct (#7719)
* add kimi_vl

* patch config

* check version

* Update mm_plugin.py

* Update mm_plugin.py

---------

Co-authored-by: hoshi-hiyouga <hiyouga@buaa.edu.cn>
2025-04-15 00:21:58 +08:00
hoshi-hiyouga
3a13d2cdb1
[misc] fix env vars (#7715) 2025-04-14 16:04:04 +08:00
hoshi-hiyouga
3ef36d0057
[misc] upgrade cli (#7714) 2025-04-14 15:41:22 +08:00
hoshi-hiyouga
1fd4d14fbb
[deps] upgrade transformers (#7704) 2025-04-13 18:11:34 +08:00
Yuxuan Zhang
481ecbf9c5
[model] add GLM-4-0414 (#7695)
* Update README_zh.md

* update
2025-04-13 17:10:45 +08:00
hoshi-hiyouga
60a84f664b
[deps] fix uv conflicts (#7686)
* fix #7678

* Update setup.py

* Update tests.yml

* Update publish.yml

* Update Makefile
2025-04-11 18:02:24 +08:00
hoshi-hiyouga
11bcafd06a
[assets] update wechat (#7674) 2025-04-10 20:10:46 +08:00
Eric Tang
6c53471de2
[data] support for specifying a dataset in cloud storage (#7567)
* add support for loading datasets from s3/gcs

* add comments to readme

* run linter and address comments

* add option to pass in kwargs to ray init (i.e. runtime env)

* address comment

* revert mixed up changes
2025-04-10 11:31:35 +08:00
Eric Tang
39c1e29ed7
[ray] allow for specifying ray.init kwargs (i.e. runtime_env) (#7647)
* ray init kwargs

* Update trainer_utils.py

* fix ray args

---------

Co-authored-by: hoshi-hiyouga <hiyouga@buaa.edu.cn>
2025-04-10 11:31:05 +08:00
Dain Kim
ee840b4e01
[bugfix] enable_gemma_liger_kernel (#7660)
- The `enable_liger_kernel` function for the Gemma model series was not executed due to the existing `if` statement in the code.
- Changed the line to an `elif` statement so that the `apply_liger_kernel` function is executed properly.

resolved: #7628
2025-04-10 11:27:30 +08:00
jilongW
3bdc7e1e6c
[misc] fix cuda warn on intel GPU (#7655) 2025-04-09 21:37:54 +08:00
hoshi-hiyouga
34fdabe005
[data] add coig-p dataset (#7657) 2025-04-09 21:18:25 +08:00
hoshi-hiyouga
24cb890432
[assets] update readme (#7654) 2025-04-09 18:27:38 +08:00
hoshi-hiyouga
39876b85fc
[assets] update readme (#7644) 2025-04-09 01:06:06 +08:00
Kingsley
7d8bee96fc
[data] Fix bugs of use_audio_in_video in Qwen2.5 Omni (#7638)
* cache _mm_inputs

* nit

* support for use_audio_in_video

* remove cache

* fix data

* Update mllm_video_audio_demo.json
2025-04-08 18:40:10 +08:00
Shawn Tao
8f5f4cc559
[trainer] fix key error (#7635) 2025-04-08 18:39:50 +08:00
Adarsh Shirawalmath
8ee26642f3
[sglang] support transformers 4.51.0 (#7639) 2025-04-08 18:39:23 +08:00
hoshi-hiyouga
5817cda37e
[misc] fix packing and eval plot (#7623) 2025-04-07 18:20:57 +08:00
hoshi-hiyouga
7e0cdb1a76
[assets] update readme (#7612) 2025-04-06 13:58:49 +08:00
hoshi-hiyouga
6c200fd218
[model] add llama4 (#7611) 2025-04-06 13:42:31 +08:00
hoshi-hiyouga
61b24c3827
[assets] update wechat (#7594) 2025-04-03 17:45:26 +08:00
Kingsley
32cb086be1
[data] fix qwen2.5 omni plugin (#7578)
* specific entry

* Update mm_plugin.py

* fix fps cal

---------

Co-authored-by: hoshi-hiyouga <hiyouga@buaa.edu.cn>
2025-04-02 23:58:39 +08:00
Kingsley
80f8d037d0
[data] fix qwen2.5 omni plugin (#7573)
* align key with qwen2vl

* nit && change scripts
2025-04-02 21:28:52 +08:00
gechengze
11997593be
[trainer] fix batch processing in PPO trainer (#7576) 2025-04-02 21:17:48 +08:00
hoshi-hiyouga
903db09822
[infer] vllm video/audio inference (#7566) 2025-04-02 02:27:04 +08:00
hoshi-hiyouga
aaf2e6ba2a
[model] fix kv cache (#7564) 2025-04-01 23:07:46 +08:00
Yu Shi Jie
9deece1d50
[model] fix use_cache patching for gemma3 multimodal (#7500) 2025-04-01 16:06:48 +08:00
Ritesh Goru
f06a74ad4e
[data] specify position_ids in PackedSupervisedDatasetProcessor for neat_packing (#7318)
* use position_ids for neat_packing with fa2

* revert fa2 changes
2025-04-01 16:03:13 +08:00
taoharry
6faa6fb53d
[webui] fix launch with proxy (#7332) 2025-04-01 15:52:56 +08:00
Billy Cao
5d1cc863a4
[data] shard the dataset to allow multiprocessing when streaming is enabled (#7530)
* Shard the dataset when streaming to allow multiprocessing

* Allow user to not set dataset_shards to ensure backward compatibility
2025-04-01 15:36:23 +08:00
Hao
6d6e0f44fc
[trainer] new kto mismatch pair creation strategy (#7509) 2025-04-01 15:21:53 +08:00
hoshi-hiyouga
2d421c57bf
[data] fix qwen2.5 omni collator (#7553) 2025-04-01 00:15:12 +08:00
Kingsley
185c76f6ad
[model] add Qwen2.5-Omni model (#7537)
* preserve image_sizes

* preserve image_sizes

* init plugin

* support audio-text2text lora

* nit

* support image/video-text2text, audio-text2text

* remove args

* remove lines

* add docs && nit

* remove some comments

* fix && add merge part script

* add license
2025-03-31 20:39:35 +08:00
hoshi-hiyouga
468eea6f6d
[deps] pin pydantic to 2.10.6 (#7546) 2025-03-31 14:42:28 +08:00
hoshi-hiyouga
49436e93e6
[assets] update wechat (#7523) 2025-03-28 17:44:36 +08:00
Kingsley
b00cb2ed42
[data] fix pixtral plugin (#7505)
* preserve `image_sizes`

* add comments
2025-03-27 17:06:40 +08:00
Xu-pixel
f547334604
[3rdparty] support swanlab lark notification (#7481) 2025-03-27 01:52:01 +08:00
Kdump
01166841cf
[trainer] fix wsd scheduler (#7304)
* [trainer] Warmup_stable_decay supports setting the number of stable and decay steps according to the warmup_ratio ratio

* Update trainer_utils.py

---------

Co-authored-by: hoshi-hiyouga <hiyouga@buaa.edu.cn>
2025-03-26 15:25:02 +08:00
hoshi-hiyouga
59e12bffe8
[model] add qwen2vl 32b & upgrade peft (#7469)
* add qwen2vl 32b

* fix ci

* upgrade peft to 0.15

* fix ci

* fix ci
2025-03-25 12:15:58 +08:00
GuoCoder
b6d8749bf3
[model] fix lora on quant models (#7456)
Co-authored-by: root <root@ai>
2025-03-25 11:59:46 +08:00
Xiaosu Zhu
bc9ada9db7
[misc] update liger-kernel's monkey patch (#7453)
* Update liger_kernel.py

* Update setup.py
2025-03-25 11:58:52 +08:00
AbdelKarim ELJANDOUBI
b6dc7e01e2
[misc] enable liger kernel for gemma3 text and paligemma (#7466)
* add gemma3 text

* add paligemma (1,2 and 2 mix)
2025-03-25 09:27:43 +08:00
Kenny Lam
59a56f7226
[misc] enable liger kernel for gemma3 (#7462) 2025-03-24 19:09:59 +08:00
hoshi-hiyouga
9abee9cd1a
[assets] update wechat (#7455) 2025-03-24 14:53:10 +08:00
hoshi-hiyouga
833edc7c73
[assets] fix gemma3 readme (#7449) 2025-03-24 10:31:25 +08:00
hoshi-hiyouga
42e090d38b
[trainer] fix vlm loss for transformers 4.49 (#7448) 2025-03-24 10:24:05 +08:00
rumichi
747e02d60d
[docker] upgrade to torch 2.6 (#7442) 2025-03-23 21:18:08 +08:00
hoshi-hiyouga
c841e92116
[misc] fix ci (#7441)
* fix ci

* improve ci
2025-03-23 21:09:35 +08:00
hoshi-hiyouga
fbf49e2500
[misc] fix license (#7440) 2025-03-23 19:31:56 +08:00
SnowFox4004
7d4dc25c23
[scripts] support compute score on vllm's predictions (#7419)
* enable manual bleu&rouge eval by adding `scripts/eval_bleu_rouge.py`

* added libraries check

* update: 使用datasets库的多进程加速处理

* update:
- 使用 fire.Fire
- 修改代码格式

* Update eval_bleu_rouge.py: correctly uses fire

Deleted the code of using sys.argv

* Update eval_bleu_rouge.py

---------

Co-authored-by: SnowFox4004 <manba@out>
Co-authored-by: hoshi-hiyouga <hiyouga@buaa.edu.cn>
2025-03-23 19:21:01 +08:00
hoshi-hiyouga
b1b78daf06
[deps] upgrade transformers to 4.50.0 (#7437)
* upgrade transformers

* fix hf cache

* fix dpo trainer
2025-03-23 17:44:27 +08:00
hoshi-hiyouga
dfbe1391e9
[deps] upgrade vllm to 0.8 (#7436) 2025-03-23 14:32:22 +08:00
Guo, Quan
ebc989ad4a
[misc] fix sglang deps (#7432)
* feat: Add transformer version requirement for sglang

* feat: add srt to sglang which is required for running sglang

Other options are srt_hip, srt_xpu, srt_npu, srt_hpu, srt_cpu, for different computation architectures.
2025-03-23 14:07:10 +08:00
Eric Tang
d8a5571be7
[3rdparty] fix redundant process group destroy for ray (#7395)
* fix redundant process group destroy for ray

* Update tuner.py

---------

Co-authored-by: hoshi-hiyouga <hiyouga@buaa.edu.cn>
2025-03-21 10:56:47 +08:00
hoshi-hiyouga
555b71a1cb
[version] fix minicpmo (#7378) 2025-03-20 16:59:31 +08:00
hoshi-hiyouga
4a5d0f0ba7
[assets] update wechat (#7361) 2025-03-18 21:31:09 +08:00
hoshi-hiyouga
c518146e62
[misc] set dev version (#7351) 2025-03-18 00:10:53 +08:00
hoshi-hiyouga
1d2131e5cb
[data] fix template (#7349) 2025-03-17 23:45:20 +08:00
hoshi-hiyouga
48a6584fb1
[assets] update videos (#7340)
* Update README.md

* Update README_zh.md
2025-03-17 15:48:02 +08:00
Hertz
a71e685021
[model] support hunyuan 7b (#7317)
* [Model]supported tencent-hunyuan model

* [Model]supported tencent-hunyuan model(fix)

* [Model]supported tencent-hunyuan model(fix)
2025-03-15 20:55:24 +08:00
Qiaolin Yu
30038d9ce7
[inference] support sglang backend (#7278)
* Mimic SGLang offline Engine

* Add more tests and args

* Pass all current tests

* Clean Code

* fix sample_params

* clean code

* Fix Stream Chat

* change sglang from engine mode to server mode

* fix

* Fix Review Issues

* Use SGLang Built-In Utilities

* Fix test SGLang

* Some Doc Issue

* fix sglang engine

* add readme

---------

Co-authored-by: Jin Pan <jpan236@wisc.edu>
Co-authored-by: hiyouga <hiyouga@buaa.edu.cn>
2025-03-15 04:37:58 +08:00
hoshi-hiyouga
ef5f1c1def
[data] gemma3 plugin pan and scan (#7294)
* gemma3 pan and scan

* add test case

* fix test
2025-03-13 23:29:23 +08:00
Victor Nogueira
3dff4ecca8
[dataset] fix ultrachat_200k dataset (#7259)
The `HuggingFaceH4/ultrachat_200k` dataset doesn't contain the default "train" split. The correct split is "train_sft".
2025-03-13 20:20:18 +08:00
hoshi-hiyouga
0dbce72fb8
[assets] update wechat (#7288) 2025-03-13 18:48:59 +08:00
hoshi-hiyouga
e9b427d535
[assets] update video (#7287) 2025-03-13 18:45:47 +08:00
Ritesh Goru
d7d79f7e06
[data] efficient 4d_attention_mask creation in neat_packing (#7272) 2025-03-13 03:31:12 +08:00
hoshi-hiyouga
9ccfb97a2c
[misc] update format (#7277) 2025-03-13 02:53:08 +08:00
hoshi-hiyouga
165d3ed084
[model] support gemma3 (#7273) 2025-03-13 01:35:23 +08:00
hoshi-hiyouga
142fd7e755
[misc] upgrade deps (#7257) 2025-03-12 00:33:47 +08:00
hoshi-hiyouga
7c1640ed5f
[misc] upgrade format to py39 (#7256) 2025-03-12 00:08:41 +08:00
hoshi-hiyouga
cdafa8a15e
[ci] update workflow (#7255) 2025-03-11 22:57:49 +08:00
hoshi-hiyouga
b256ca86f0
[core] release v0.9.2 (#7254) 2025-03-11 22:42:23 +08:00
230 changed files with 8457 additions and 5563 deletions

View File

@ -3,12 +3,12 @@
.github
.venv
cache
data
docker
saves
hf_cache
ms_cache
om_cache
shared_data
output
.dockerignore
.gitattributes

View File

@ -16,6 +16,8 @@ USE_MODELSCOPE_HUB=
USE_OPENMIND_HUB=
USE_RAY=
RECORD_VRAM=
OPTIM_TORCH=
NPU_JIT_COMPILE=
# torchrun
FORCE_TORCHRUN=
MASTER_ADDR=

View File

@ -12,7 +12,7 @@ body:
attributes:
value: |
Please do not create issues that are not related to framework bugs under this category, use **[Discussions](https://github.com/hiyouga/LLaMA-Factory/discussions/categories/q-a)** instead.
请勿在此分类下创建和框架 bug 无关的 issues请使用 **[讨论区](https://github.com/hiyouga/LLaMA-Factory/discussions/categories/q-a)**。
请勿在此分类下创建和框架 bug 无关的 issues训练问题求助请使用 **[讨论区](https://github.com/hiyouga/LLaMA-Factory/discussions/categories/q-a)**。
- type: checkboxes
id: reminder
@ -47,8 +47,6 @@ body:
description: |
Please provide entry arguments, error messages and stack traces that reproduces the problem.
请提供入口参数,错误日志以及异常堆栈以便于我们复现问题。
Remember to wrap your log messages with \`\`\`.
请务必使用 Markdown 标签 \`\`\` 来包裹您的日志信息。
value: |
```text

View File

@ -1 +1,8 @@
blank_issues_enabled: false
contact_links:
- name: 📚 FAQs | 常见问题
url: https://github.com/hiyouga/LLaMA-Factory/issues/4614
about: Reading in advance is recommended | 建议提前阅读
- name: Discussions | 讨论区
url: https://github.com/hiyouga/LLaMA-Factory/discussions
about: Please ask fine-tuning questions here | 请在这里讨论训练问题

108
.github/workflows/docker.yml vendored Normal file
View File

@ -0,0 +1,108 @@
name: docker
on:
workflow_dispatch:
push:
branches:
- "main"
paths:
- "**/*.py"
- "requirements.txt"
- "docker/**"
- ".github/workflows/*.yml"
pull_request:
branches:
- "main"
paths:
- "**/*.py"
- "requirements.txt"
- "docker/**"
- ".github/workflows/*.yml"
jobs:
build:
strategy:
fail-fast: false
matrix:
device:
- "cuda"
- "npu"
runs-on: ubuntu-latest
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.device }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
environment:
name: docker
url: https://hub.docker.com/r/hiyouga/llamafactory
steps:
- name: Free up disk space
uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
with:
tool-cache: true
docker-images: false
- name: Checkout
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.9"
- name: Get llamafactory version
id: version
run: |
echo "tag=$(python setup.py --version | sed 's/\.dev0//')" >> "$GITHUB_OUTPUT"
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to Docker Hub
if: ${{ github.event_name != 'pull_request' }}
uses: docker/login-action@v3
with:
username: ${{ vars.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Login to Quay
if: ${{ github.event_name != 'pull_request' && matrix.device == 'npu' }}
uses: docker/login-action@v3
with:
registry: quay.io
username: ${{ vars.QUAY_ASCEND_USERNAME }}
password: ${{ secrets.QUAY_ASCEND_TOKEN }}
- name: Build and push Docker image (CUDA)
if: ${{ matrix.device == 'cuda' }}
uses: docker/build-push-action@v6
with:
context: .
file: ./docker/docker-cuda/Dockerfile
build-args: |
EXTRAS=metrics,deepspeed,liger-kernel
push: ${{ github.event_name != 'pull_request' }}
tags: |
docker.io/hiyouga/llamafactory:latest
docker.io/hiyouga/llamafactory:${{ steps.version.outputs.tag }}
cache-from: type=gha
cache-to: type=gha,mode=max
- name: Build and push Docker image (NPU)
if: ${{ matrix.device == 'npu' }}
uses: docker/build-push-action@v6
with:
context: .
platforms: linux/amd64,linux/arm64
file: ./docker/docker-npu/Dockerfile
push: ${{ github.event_name != 'pull_request' }}
tags: |
docker.io/hiyouga/llamafactory:latest-npu-a2
docker.io/hiyouga/llamafactory:${{ steps.version.outputs.tag }}-npu-a2
quay.io/ascend/llamafactory:latest-npu-a2
quay.io/ascend/llamafactory:${{ steps.version.outputs.tag }}-npu-a2
cache-from: type=gha
cache-to: type=gha,mode=max

View File

@ -19,7 +19,7 @@ jobs:
ISSUE_TITLE: ${{ github.event.issue.title }}
run: |
LABEL=""
NPU_KEYWORDS=(npu huawei ascend 华为 昇腾)
NPU_KEYWORDS=(npu huawei ascend 华为 昇腾 910)
ISSUE_TITLE_LOWER=$(echo $ISSUE_TITLE | tr '[:upper:]' '[:lower:]')
for KEYWORD in ${NPU_KEYWORDS[@]}; do
if [[ $ISSUE_TITLE_LOWER == *$KEYWORD* ]] && [[ $ISSUE_TITLE_LOWER != *input* ]]; then

View File

@ -1,6 +1,7 @@
name: publish
on:
workflow_dispatch:
release:
types:
- published
@ -27,14 +28,9 @@ jobs:
with:
python-version: "3.9"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install build
- name: Build package
run: |
python -m build
make build
- name: Publish package
uses: pypa/gh-action-pypi-publish@release/v1

View File

@ -1,18 +1,19 @@
name: tests
on:
workflow_dispatch:
push:
branches:
- "main"
paths:
- "**.py"
- "**/*.py"
- "requirements.txt"
- ".github/workflows/*.yml"
pull_request:
branches:
- "main"
paths:
- "**.py"
- "**/*.py"
- "requirements.txt"
- ".github/workflows/*.yml"
@ -21,7 +22,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version:
python:
- "3.9"
- "3.10"
- "3.11"
@ -30,9 +31,22 @@ jobs:
- "ubuntu-latest"
- "windows-latest"
- "macos-13"
transformers:
- null
include: # test backward compatibility
- python: "3.9"
os: "ubuntu-latest"
transformers: "4.49.0"
- python: "3.9"
os: "ubuntu-latest"
transformers: "4.51.0"
runs-on: ${{ matrix.os }}
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.os }}-${{ matrix.python }}-${{ matrix.transformers }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
OS_NAME: ${{ matrix.os }}
@ -44,19 +58,42 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
python-version: ${{ matrix.python }}
cache: "pip"
cache-dependency-path: "setup.py"
cache-dependency-path: "**/requirements*.txt"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install ".[torch,dev]"
- name: Install transformers
if: ${{ matrix.transformers }}
run: |
python -m pip install "transformers==${{ matrix.transformers }}"
- name: Cache files
id: hf-hub-cache
uses: actions/cache@v4
with:
path: ${{ runner.temp }}/huggingface
key: huggingface-${{ matrix.os }}-${{ matrix.python }}-${{ matrix.transformers }}-${{ hashFiles('tests/version.txt') }}
- name: Check quality
run: |
make style && make quality
- name: Check license
run: |
make license
- name: Check build
run: |
make build
- name: Test with pytest
run: |
make test
env:
HF_HOME: ${{ runner.temp }}/huggingface
HF_HUB_OFFLINE: "${{ steps.hf-hub-cache.outputs.cache-hit == 'true' && '1' || '0' }}"

3
.gitignore vendored
View File

@ -166,8 +166,8 @@ cython_debug/
uv.lock
# custom .gitignore
ms_cache/
hf_cache/
ms_cache/
om_cache/
cache/
config/
@ -176,3 +176,4 @@ output/
wandb/
swanlog/
generated_predictions.jsonl
predictions_score.json

View File

@ -1,14 +1,17 @@
.PHONY: build commit quality style test
.PHONY: build commit license quality style test
check_dirs := scripts src tests setup.py
build:
pip install build && python -m build
pip3 install build && python3 -m build
commit:
pre-commit install
pre-commit run --all-files
license:
python3 tests/check_license.py $(check_dirs)
quality:
ruff check $(check_dirs)
ruff format --check $(check_dirs)

257
README.md
View File

@ -5,8 +5,8 @@
[![GitHub contributors](https://img.shields.io/github/contributors/hiyouga/LLaMA-Factory?color=orange)](https://github.com/hiyouga/LLaMA-Factory/graphs/contributors)
[![GitHub workflow](https://github.com/hiyouga/LLaMA-Factory/actions/workflows/tests.yml/badge.svg)](https://github.com/hiyouga/LLaMA-Factory/actions/workflows/tests.yml)
[![PyPI](https://img.shields.io/pypi/v/llamafactory)](https://pypi.org/project/llamafactory/)
[![Citation](https://img.shields.io/badge/citation-349-green)](https://scholar.google.com/scholar?cites=12620864006390196564)
[![GitHub pull request](https://img.shields.io/badge/PRs-welcome-blue)](https://github.com/hiyouga/LLaMA-Factory/pulls)
[![Citation](https://img.shields.io/badge/citation-730-green)](https://scholar.google.com/scholar?cites=12620864006390196564)
[![Docker Pulls](https://img.shields.io/docker/pulls/hiyouga/llamafactory)](https://hub.docker.com/r/hiyouga/llamafactory/tags)
[![Twitter](https://img.shields.io/twitter/follow/llamafactory_ai)](https://twitter.com/llamafactory_ai)
[![Discord](https://dcbadge.vercel.app/api/server/rKfvV9r9FK?compact=true&style=flat)](https://discord.gg/rKfvV9r9FK)
@ -14,34 +14,49 @@
[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1eRTPn37ltBbYsISy9Aw2NuI2Aq5CQrD9?usp=sharing)
[![Open in DSW](https://gallery.pai-ml.com/assets/open-in-dsw.svg)](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory)
[![Spaces](https://img.shields.io/badge/🤗-Open%20in%20Spaces-blue)](https://huggingface.co/spaces/hiyouga/LLaMA-Board)
[![Studios](https://img.shields.io/badge/ModelScope-Open%20in%20Studios-blue)](https://modelscope.cn/studios/hiyouga/LLaMA-Board)
[![SageMaker](https://img.shields.io/badge/SageMaker-Open%20in%20AWS-blue)](https://aws.amazon.com/cn/blogs/china/a-one-stop-code-free-model-fine-tuning-deployment-platform-based-on-sagemaker-and-llama-factory/)
[![Open in Alaya](assets/alaya_new.svg)](https://docs.alayanew.com/docs/documents/newActivities/llamafactory/?utm_source=LLaMA-Factory)
[![Open in Spaces](https://img.shields.io/badge/🤗-Open%20in%20Spaces-blue)](https://huggingface.co/spaces/hiyouga/LLaMA-Board)
[![Open in Studios](https://img.shields.io/badge/ModelScope-Open%20in%20Studios-blue)](https://modelscope.cn/studios/hiyouga/LLaMA-Board)
[![Open in Novita](https://img.shields.io/badge/Novita-Deploy%20Template-blue)](https://novita.ai/templates-library/105981?sharer=88115474-394e-4bda-968e-b88e123d0c47)
<h3 align="center">
Easily fine-tune 100+ large language models with zero-code <a href="#quickstart">CLI</a> and <a href="#fine-tuning-with-llama-board-gui-powered-by-gradio">Web UI</a>
</h3>
<p align="center">
<picture>
<img alt="Github trend" src="https://trendshift.io/api/badge/repositories/4535">
</picture>
</p>
### Used by [Amazon](https://aws.amazon.com/cn/blogs/machine-learning/how-apoidea-group-enhances-visual-information-extraction-from-banking-documents-with-multimodal-models-using-llama-factory-on-amazon-sagemaker-hyperpod/), [NVIDIA](https://developer.nvidia.com/rtx/ai-toolkit), [Aliyun](https://help.aliyun.com/zh/pai/use-cases/fine-tune-a-llama-3-model-with-llama-factory), etc.
👋 Join our [WeChat](assets/wechat.jpg) or [NPU user group](assets/wechat_npu.jpg).
<div align="center" markdown="1">
### Supporters ❤️
<a href="https://warp.dev/llama-factory">
<img alt="Warp sponsorship" width="400" src="https://github.com/user-attachments/assets/ab8dd143-b0fd-4904-bdc5-dd7ecac94eae">
</a>
#### [Warp, the agentic terminal for developers](https://warp.dev/llama-factory)
[Available for MacOS, Linux, & Windows](https://warp.dev/llama-factory)
----
### Easily fine-tune 100+ large language models with zero-code [CLI](#quickstart) and [Web UI](#fine-tuning-with-llama-board-gui-powered-by-gradio)
![GitHub Trend](https://trendshift.io/api/badge/repositories/4535)
</div>
👋 Join our [WeChat group](assets/wechat.jpg), [NPU user group](assets/wechat_npu.jpg) or [Alaya NeW user group](assets/wechat_alaya.png).
\[ English | [中文](README_zh.md) \]
**Fine-tuning a large language model can be easy as...**
https://github.com/user-attachments/assets/7c96b465-9df7-45f4-8053-bf03e58386d3
https://github.com/user-attachments/assets/3991a3a8-4276-4d30-9cab-4cb0c4b9b99e
Choose your path:
- **Documentation**: https://llamafactory.readthedocs.io/en/latest/
- **Documentation (WIP)**: https://llamafactory.readthedocs.io/en/latest/
- **Documentation (AMD GPU)**: https://rocm.docs.amd.com/projects/ai-developer-hub/en/latest/notebooks/fine_tune/llama_factory_llama3.html
- **Colab (free)**: https://colab.research.google.com/drive/1eRTPn37ltBbYsISy9Aw2NuI2Aq5CQrD9?usp=sharing
- **Local machine**: Please refer to [usage](#getting-started)
- **PAI-DSW (free trial)**: [Llama3 Example](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory) | [Qwen2-VL Example](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory_qwen2vl) | [DeepSeek-R1-Distill Example](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory_deepseek_r1_distill_7b)
- **Amazon SageMaker**: [Blog](https://aws.amazon.com/cn/blogs/china/a-one-stop-code-free-model-fine-tuning-deployment-platform-based-on-sagemaker-and-llama-factory/)
- **PAI-DSW (free trial)**: https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory
- **Alaya NeW (cloud GPU deal)**: https://docs.alayanew.com/docs/documents/useGuide/LLaMAFactory/mutiple/?utm_source=LLaMA-Factory
> [!NOTE]
> Except for the above links, all other websites are unauthorized third-party websites. Please carefully use them.
@ -49,7 +64,7 @@ Choose your path:
## Table of Contents
- [Features](#features)
- [Benchmark](#benchmark)
- [Blogs](#blogs)
- [Changelog](#changelog)
- [Supported Models](#supported-models)
- [Supported Training Approaches](#supported-training-approaches)
@ -76,51 +91,71 @@ Choose your path:
- **Various models**: LLaMA, LLaVA, Mistral, Mixtral-MoE, Qwen, Qwen2-VL, DeepSeek, Yi, Gemma, ChatGLM, Phi, etc.
- **Integrated methods**: (Continuous) pre-training, (multimodal) supervised fine-tuning, reward modeling, PPO, DPO, KTO, ORPO, etc.
- **Scalable resources**: 16-bit full-tuning, freeze-tuning, LoRA and 2/3/4/5/6/8-bit QLoRA via AQLM/AWQ/GPTQ/LLM.int8/HQQ/EETQ.
- **Advanced algorithms**: [GaLore](https://github.com/jiaweizzhao/GaLore), [BAdam](https://github.com/Ledzy/BAdam), [APOLLO](https://github.com/zhuhanqing/APOLLO), [Adam-mini](https://github.com/zyushun/Adam-mini), DoRA, LongLoRA, LLaMA Pro, Mixture-of-Depths, LoRA+, LoftQ and PiSSA.
- **Advanced algorithms**: [GaLore](https://github.com/jiaweizzhao/GaLore), [BAdam](https://github.com/Ledzy/BAdam), [APOLLO](https://github.com/zhuhanqing/APOLLO), [Adam-mini](https://github.com/zyushun/Adam-mini), [Muon](https://github.com/KellerJordan/Muon), DoRA, LongLoRA, LLaMA Pro, Mixture-of-Depths, LoRA+, LoftQ and PiSSA.
- **Practical tricks**: [FlashAttention-2](https://github.com/Dao-AILab/flash-attention), [Unsloth](https://github.com/unslothai/unsloth), [Liger Kernel](https://github.com/linkedin/Liger-Kernel), RoPE scaling, NEFTune and rsLoRA.
- **Wide tasks**: Multi-turn dialogue, tool using, image understanding, visual grounding, video recognition, audio understanding, etc.
- **Experiment monitors**: LlamaBoard, TensorBoard, Wandb, MLflow, SwanLab, etc.
- **Faster inference**: OpenAI-style API, Gradio UI and CLI with vLLM worker.
- **Experiment monitors**: LlamaBoard, TensorBoard, Wandb, MLflow, [SwanLab](https://github.com/SwanHubX/SwanLab), etc.
- **Faster inference**: OpenAI-style API, Gradio UI and CLI with [vLLM worker](https://github.com/vllm-project/vllm) or [SGLang worker](https://github.com/sgl-project/sglang).
### Day-N Support for Fine-Tuning Cutting-Edge Models
| Support Date | Model Name |
| ------------ | ---------------------------------------------------------- |
| Day 0 | Qwen2.5 / Qwen2-VL / QwQ / QvQ / InternLM3 / MiniCPM-o-2.6 |
| Day 1 | Llama 3 / GLM-4 / Mistral Small / PaliGemma2 |
| Support Date | Model Name |
| ------------ | -------------------------------------------------------------------- |
| Day 0 | Qwen3 / Qwen2.5-VL / Gemma 3 / GLM-4.1V / InternLM 3 / MiniCPM-o-2.6 |
| Day 1 | Llama 3 / GLM-4 / Mistral Small / PaliGemma2 / Llama 4 |
## Benchmark
## Blogs
Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ptuning), LLaMA Factory's LoRA tuning offers up to **3.7 times faster** training speed with a better Rouge score on the advertising text generation task. By leveraging 4-bit quantization technique, LLaMA Factory's QLoRA further improves the efficiency regarding the GPU memory.
- [Fine-tune Llama3.1-70B for Medical Diagnosis using LLaMA-Factory](https://docs.alayanew.com/docs/documents/bestPractice/bigModel/llama70B/) (Chinese)
- [A One-Stop Code-Free Model Reinforcement Learning and Deployment Platform based on LLaMA-Factory and EasyR1](https://aws.amazon.com/cn/blogs/china/building-llm-model-hub-based-on-llamafactory-and-easyr1/) (Chinese)
- [How Apoidea Group enhances visual information extraction from banking documents with multimodal models using LLaMA-Factory on Amazon SageMaker HyperPod](https://aws.amazon.com/cn/blogs/machine-learning/how-apoidea-group-enhances-visual-information-extraction-from-banking-documents-with-multimodal-models-using-llama-factory-on-amazon-sagemaker-hyperpod/) (English)
- [Easy Dataset × LLaMA Factory: Enabling LLMs to Efficiently Learn Domain Knowledge](https://buaa-act.feishu.cn/wiki/GVzlwYcRFiR8OLkHbL6cQpYin7g) (English)
![benchmark](assets/benchmark.svg)
<details><summary>All Blogs</summary>
<details><summary>Definitions</summary>
- **Training Speed**: the number of training samples processed per second during the training. (bs=4, cutoff_len=1024)
- **Rouge Score**: Rouge-2 score on the development set of the [advertising text generation](https://aclanthology.org/D19-1321.pdf) task. (bs=4, cutoff_len=1024)
- **GPU Memory**: Peak GPU memory usage in 4-bit quantized training. (bs=1, cutoff_len=1024)
- We adopt `pre_seq_len=128` for ChatGLM's P-Tuning and `lora_rank=32` for LLaMA Factory's LoRA tuning.
- [Fine-tune Qwen2.5-VL for Autonomous Driving using LLaMA-Factory](https://docs.alayanew.com/docs/documents/useGuide/LLaMAFactory/mutiple/?utm_source=LLaMA-Factory) (Chinese)
- [LLaMA Factory: Fine-tuning the DeepSeek-R1-Distill-Qwen-7B Model for News Classifier](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory_deepseek_r1_distill_7b) (Chinese)
- [A One-Stop Code-Free Model Fine-Tuning \& Deployment Platform based on SageMaker and LLaMA-Factory](https://aws.amazon.com/cn/blogs/china/a-one-stop-code-free-model-fine-tuning-deployment-platform-based-on-sagemaker-and-llama-factory/) (Chinese)
- [LLaMA Factory Multi-Modal Fine-Tuning Practice: Fine-Tuning Qwen2-VL for Personal Tourist Guide](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory_qwen2vl) (Chinese)
- [LLaMA Factory: Fine-tuning Llama3 for Role-Playing](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory) (Chinese)
</details>
## Changelog
[25/07/02] We supported fine-tuning the **[GLM-4.1V-9B-Thinking](https://github.com/THUDM/GLM-4.1V-Thinking)** model. Please install transformers from **main** branch to use.
[25/04/28] We supported fine-tuning the **[Qwen3](https://qwenlm.github.io/blog/qwen3/)** model family.
[25/04/21] We supported the **[Muon](https://github.com/KellerJordan/Muon)** optimizer. See [examples](examples/README.md) for usage. Thank [@tianshijing](https://github.com/tianshijing)'s PR.
[25/04/16] We supported fine-tuning the **[InternVL3](https://huggingface.co/OpenGVLab/InternVL3-8B)** model. See [PR #7258](https://github.com/hiyouga/LLaMA-Factory/pull/7258) to get started.
[25/04/14] We supported fine-tuning the **[GLM-Z1](https://huggingface.co/THUDM/GLM-Z1-9B-0414)** and **[Kimi-VL](https://huggingface.co/moonshotai/Kimi-VL-A3B-Instruct)** models.
[25/04/06] We supported fine-tuning the **[Llama 4](https://ai.meta.com/blog/llama-4-multimodal-intelligence/)** model. See [PR #7611](https://github.com/hiyouga/LLaMA-Factory/pull/7611) to get started.
<details><summary>Full Changelog</summary>
[25/03/31] We supported fine-tuning the **[Qwen2.5 Omni](https://qwenlm.github.io/blog/qwen2.5-omni/)** model. See [PR #7537](https://github.com/hiyouga/LLaMA-Factory/pull/7537) to get started.
[25/03/15] We supported **[SGLang](https://github.com/sgl-project/sglang)** as inference backend. Try `infer_backend: sglang` to accelerate inference.
[25/03/12] We supported fine-tuning the **[Gemma 3](https://huggingface.co/blog/gemma3)** model.
[25/02/24] Announcing **[EasyR1](https://github.com/hiyouga/EasyR1)**, an efficient, scalable and multi-modality RL training framework for efficient GRPO training.
[25/02/11] We supported saving the **[Ollama](https://github.com/ollama/ollama)** modelfile when exporting the model checkpoints. See [examples](examples/README.md) for usage.
[25/02/05] We supported fine-tuning the **[Qwen2-Audio](Qwen/Qwen2-Audio-7B-Instruct)** and **[MiniCPM-o-2.6](https://huggingface.co/openbmb/MiniCPM-o-2_6)** on audio understanding tasks.
[25/01/31] We supported fine-tuning the **[DeepSeek-R1](https://huggingface.co/deepseek-ai/DeepSeek-R1)** and **[Qwen2.5-VL](https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct)** model.
<details><summary>Full Changelog</summary>
[25/01/31] We supported fine-tuning the **[DeepSeek-R1](https://huggingface.co/deepseek-ai/DeepSeek-R1)** and **[Qwen2.5-VL](https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct)** models.
[25/01/15] We supported **[APOLLO](https://arxiv.org/abs/2412.05270)** optimizer. See [examples](examples/README.md) for usage.
[25/01/14] We supported fine-tuning the **[MiniCPM-o-2.6](https://huggingface.co/openbmb/MiniCPM-o-2_6)** and **[MiniCPM-V-2.6](https://huggingface.co/openbmb/MiniCPM-V-2_6)** models. Thank [@BUAADreamer](https://github.com/BUAADreamer)'s PR.
[25/01/14] We supported fine-tuning the **[InternLM3](https://huggingface.co/collections/internlm/)** models. Thank [@hhaAndroid](https://github.com/hhaAndroid)'s PR.
[25/01/14] We supported fine-tuning the **[InternLM 3](https://huggingface.co/collections/internlm/)** models. Thank [@hhaAndroid](https://github.com/hhaAndroid)'s PR.
[25/01/10] We supported fine-tuning the **[Phi-4](https://huggingface.co/microsoft/phi-4)** model.
@ -216,6 +251,9 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
</details>
> [!TIP]
> If you cannot use the latest feature, please pull the latest code and install LLaMA-Factory again.
## Supported Models
| Model | Model size | Template |
@ -226,22 +264,32 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
| [Command R](https://huggingface.co/CohereForAI) | 35B/104B | cohere |
| [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek |
| [DeepSeek 2.5/3](https://huggingface.co/deepseek-ai) | 236B/671B | deepseek3 |
| [DeepSeek R1 (Distill)](https://huggingface.co/deepseek-ai) | 1.5B/7B/8B/14B/32B/70B/671B | deepseek3 |
| [DeepSeek R1 (Distill)](https://huggingface.co/deepseek-ai) | 1.5B/7B/8B/14B/32B/70B/671B | deepseekr1 |
| [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon |
| [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma |
| [GLM-4](https://huggingface.co/THUDM) | 9B | glm4 |
| [Falcon-H1](https://huggingface.co/tiiuae) | 0.5B/1.5B/3B/7B/34B | falcon_h1 |
| [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma/gemma2 |
| [Gemma 3/Gemma 3n](https://huggingface.co/google) | 1B/4B/6B/8B/12B/27B | gemma3/gemma3n |
| [GLM-4/GLM-4-0414/GLM-Z1](https://huggingface.co/zai-org) | 9B/32B | glm4/glmz1 |
| [GLM-4.1V](https://huggingface.co/zai-org)* | 9B | glm4v |
| [GLM-4.5](https://huggingface.co/zai-org)* | 106B/355B | glm4_moe |
| [GPT-2](https://huggingface.co/openai-community) | 0.1B/0.4B/0.8B/1.5B | - |
| [Granite 3.0-3.1](https://huggingface.co/ibm-granite) | 1B/2B/3B/8B | granite3 |
| [Granite 3.0-3.3](https://huggingface.co/ibm-granite) | 1B/2B/3B/8B | granite3 |
| [Granite 4](https://huggingface.co/ibm-granite) | 7B | granite4 |
| [Hunyuan](https://huggingface.co/tencent/) | 7B | hunyuan |
| [Index](https://huggingface.co/IndexTeam) | 1.9B | index |
| [InternLM 2-3](https://huggingface.co/internlm) | 7B/8B/20B | intern2 |
| [InternVL 2.5-3](https://huggingface.co/OpenGVLab) | 1B/2B/8B/14B/38B/78B | intern_vl |
| [Kimi-VL](https://huggingface.co/moonshotai) | 16B | kimi_vl |
| [Llama](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - |
| [Llama 2](https://huggingface.co/meta-llama) | 7B/13B/70B | llama2 |
| [Llama 3-3.3](https://huggingface.co/meta-llama) | 1B/3B/8B/70B | llama3 |
| [Llama 4](https://huggingface.co/meta-llama) | 109B/402B | llama4 |
| [Llama 3.2 Vision](https://huggingface.co/meta-llama) | 11B/90B | mllama |
| [LLaVA-1.5](https://huggingface.co/llava-hf) | 7B/13B | llava |
| [LLaVA-NeXT](https://huggingface.co/llava-hf) | 7B/8B/13B/34B/72B/110B | llava_next |
| [LLaVA-NeXT-Video](https://huggingface.co/llava-hf) | 7B/34B | llava_next_video |
| [MiniCPM](https://huggingface.co/openbmb) | 1B/2B/4B | cpm/cpm3 |
| [MiMo](https://huggingface.co/XiaomiMiMo) | 7B | mimo |
| [MiniCPM](https://huggingface.co/openbmb) | 0.5B/1B/2B/4B/8B | cpm/cpm3/cpm4 |
| [MiniCPM-o-2.6/MiniCPM-V-2.6](https://huggingface.co/openbmb) | 8B | minicpm_o/minicpm_v |
| [Ministral/Mistral-Nemo](https://huggingface.co/mistralai) | 8B/12B | ministral |
| [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | mistral |
@ -253,9 +301,12 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
| [Phi-3-small](https://huggingface.co/microsoft) | 7B | phi_small |
| [Phi-4](https://huggingface.co/microsoft) | 14B | phi4 |
| [Pixtral](https://huggingface.co/mistralai) | 12B | pixtral |
| [Qwen/QwQ (1-2.5) (Code/Math/MoE)](https://huggingface.co/Qwen) | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen |
| [Qwen (1-2.5) (Code/Math/MoE/QwQ)](https://huggingface.co/Qwen) | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen |
| [Qwen3 (MoE)](https://huggingface.co/Qwen) | 0.6B/1.7B/4B/8B/14B/32B/235B | qwen3 |
| [Qwen2-Audio](https://huggingface.co/Qwen) | 7B | qwen2_audio |
| [Qwen2-VL/Qwen2.5-VL/QVQ](https://huggingface.co/Qwen) | 2B/3B/7B/72B | qwen2_vl |
| [Qwen2.5-Omni](https://huggingface.co/Qwen) | 3B/7B | qwen2_omni |
| [Qwen2-VL/Qwen2.5-VL/QVQ](https://huggingface.co/Qwen) | 2B/3B/7B/32B/72B | qwen2_vl |
| [Seed Coder](https://huggingface.co/ByteDance-Seed) | 8B | seed_coder |
| [Skywork o1](https://huggingface.co/Skywork) | 8B | skywork_o1 |
| [StarCoder 2](https://huggingface.co/bigcode) | 3B/7B/15B | - |
| [TeleChat2](https://huggingface.co/Tele-AI) | 3B/7B/35B/115B | telechat2 |
@ -268,6 +319,10 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
> For the "base" models, the `template` argument can be chosen from `default`, `alpaca`, `vicuna` etc. But make sure to use the **corresponding template** for the "instruct/chat" models.
>
> Remember to use the **SAME** template in training and inference.
>
> \*: You should install the `transformers` from main branch and use `DISABLE_VERSION_CHECK=1` to skip version check.
>
> \*\*: You need to install a specific version of `transformers` to use the corresponding model.
Please refer to [constants.py](src/llamafactory/extras/constants.py) for a full list of models we supported.
@ -371,8 +426,10 @@ You also can add a custom chat template to [template.py](src/llamafactory/data/t
- [DPO mixed (en&zh)](https://huggingface.co/datasets/hiyouga/DPO-En-Zh-20k)
- [UltraFeedback (en)](https://huggingface.co/datasets/HuggingFaceH4/ultrafeedback_binarized)
- [COIG-P (zh)](https://huggingface.co/datasets/m-a-p/COIG-P)
- [RLHF-V (en)](https://huggingface.co/datasets/openbmb/RLHF-V-Dataset)
- [VLFeedback (en)](https://huggingface.co/datasets/Zhihui/VLFeedback)
- [RLAIF-V (en)](https://huggingface.co/datasets/openbmb/RLAIF-V-Dataset)
- [Orca DPO Pairs (en)](https://huggingface.co/datasets/Intel/orca_dpo_pairs)
- [HH-RLHF (en)](https://huggingface.co/datasets/Anthropic/hh-rlhf)
- [Nectar (en)](https://huggingface.co/datasets/berkeley-nest/Nectar)
@ -393,11 +450,12 @@ huggingface-cli login
| Mandatory | Minimum | Recommend |
| ------------ | ------- | --------- |
| python | 3.9 | 3.10 |
| torch | 1.13.1 | 2.5.1 |
| transformers | 4.41.2 | 4.49.0 |
| torch | 2.0.0 | 2.6.0 |
| torchvision | 0.15.0 | 0.21.0 |
| transformers | 4.49.0 | 4.50.0 |
| datasets | 2.16.0 | 3.2.0 |
| accelerate | 0.34.0 | 1.2.1 |
| peft | 0.11.1 | 0.12.0 |
| peft | 0.14.0 | 0.15.1 |
| trl | 0.8.6 | 0.9.6 |
| Optional | Minimum | Recommend |
@ -405,8 +463,8 @@ huggingface-cli login
| CUDA | 11.6 | 12.2 |
| deepspeed | 0.10.0 | 0.16.4 |
| bitsandbytes | 0.39.0 | 0.43.1 |
| vllm | 0.4.3 | 0.7.3 |
| flash-attn | 2.3.0 | 2.7.2 |
| vllm | 0.4.3 | 0.8.2 |
| flash-attn | 2.5.6 | 2.7.2 |
### Hardware Requirement
@ -428,16 +486,27 @@ huggingface-cli login
> [!IMPORTANT]
> Installation is mandatory.
#### Install from Source
```bash
git clone --depth 1 https://github.com/hiyouga/LLaMA-Factory.git
cd LLaMA-Factory
pip install -e ".[torch,metrics]"
pip install -e ".[torch,metrics]" --no-build-isolation
```
Extra dependencies available: torch, torch-npu, metrics, deepspeed, liger-kernel, bitsandbytes, hqq, eetq, gptq, awq, aqlm, vllm, galore, apollo, badam, adam-mini, qwen, minicpm_v, modelscope, openmind, swanlab, quality
Extra dependencies available: torch, torch-npu, metrics, deepspeed, liger-kernel, bitsandbytes, hqq, eetq, gptq, aqlm, vllm, sglang, galore, apollo, badam, adam-mini, qwen, minicpm_v, openmind, swanlab, dev
> [!TIP]
> Use `pip install --no-deps -e .` to resolve package conflicts.
#### Install from Docker Image
```bash
docker run -it --rm --gpus=all --ipc=host hiyouga/llamafactory:latest
```
This image is built on Ubuntu 22.04 (x86\_64), CUDA 12.4, Python 3.11, PyTorch 2.6.0, and Flash-attn 2.7.4.
Find the pre-built images: https://hub.docker.com/r/hiyouga/llamafactory/tags
Please refer to [build docker](#build-docker) to build the image yourself.
<details><summary>Setting up a virtual environment with <b>uv</b></summary>
@ -457,6 +526,20 @@ uv run --prerelease=allow llamafactory-cli train examples/train_lora/llama3_lora
<details><summary>For Windows users</summary>
#### Install PyTorch
You need to manually install the GPU version of PyTorch on the Windows platform. Please refer to the [official website](https://pytorch.org/get-started/locally/) and the following command to install PyTorch with CUDA support:
```bash
pip uninstall torch torchvision torchaudio
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu126
python -c "import torch; print(torch.cuda.is_available())"
```
If you see `True` then you have successfully installed PyTorch with CUDA support.
Try `dataloader_num_workers: 0` if you encounter `Can't pickle local object` error.
#### Install BitsAndBytes
If you want to enable the quantized LoRA (QLoRA) on the Windows platform, you need to install a pre-built version of `bitsandbytes` library, which supports CUDA 11.1 to 12.2, please select the appropriate [release version](https://github.com/jllllll/bitsandbytes-windows-webui/releases/tag/wheels) based on your CUDA version.
@ -495,6 +578,7 @@ source /usr/local/Ascend/ascend-toolkit/set_env.sh
| torch | 2.1.0 | 2.4.0 |
| torch-npu | 2.1.0 | 2.4.0.post2 |
| deepspeed | 0.13.2 | 0.13.2 |
| vllm-ascend | - | 0.7.3 |
Remember to use `ASCEND_RT_VISIBLE_DEVICES` instead of `CUDA_VISIBLE_DEVICES` to specify the device to use.
@ -540,11 +624,13 @@ pip install .
### Data Preparation
Please refer to [data/README.md](data/README.md) for checking the details about the format of dataset files. You can either use datasets on HuggingFace / ModelScope / Modelers hub or load the dataset in local disk.
Please refer to [data/README.md](data/README.md) for checking the details about the format of dataset files. You can use datasets on HuggingFace / ModelScope / Modelers hub, load the dataset in local disk, or specify a path to s3/gcs cloud storage.
> [!NOTE]
> Please update `data/dataset_info.json` to use your custom dataset.
You can also use **[Easy Dataset](https://github.com/ConardLi/easy-dataset)**, **[DataFlow](https://github.com/OpenDCAI/DataFlow)** and **[GraphGen](https://github.com/open-sciencelab/GraphGen)** to create synthetic data for fine-tuning.
### Quickstart
Use the following 3 commands to run LoRA **fine-tuning**, **inference** and **merging** of the Llama3-8B-Instruct model, respectively.
@ -600,22 +686,13 @@ For CUDA users:
```bash
docker build -f ./docker/docker-cuda/Dockerfile \
--build-arg INSTALL_BNB=false \
--build-arg INSTALL_VLLM=false \
--build-arg INSTALL_DEEPSPEED=false \
--build-arg INSTALL_FLASHATTN=false \
--build-arg PIP_INDEX=https://pypi.org/simple \
--build-arg EXTRAS=metrics \
-t llamafactory:latest .
docker run -dit --gpus=all \
-v ./hf_cache:/root/.cache/huggingface \
-v ./ms_cache:/root/.cache/modelscope \
-v ./om_cache:/root/.cache/openmind \
-v ./data:/app/data \
-v ./output:/app/output \
docker run -dit --ipc=host --gpus=all \
-p 7860:7860 \
-p 8000:8000 \
--shm-size 16G \
--name llamafactory \
llamafactory:latest
@ -625,19 +702,12 @@ docker exec -it llamafactory bash
For Ascend NPU users:
```bash
# Choose docker image upon your environment
docker build -f ./docker/docker-npu/Dockerfile \
--build-arg INSTALL_DEEPSPEED=false \
--build-arg PIP_INDEX=https://pypi.org/simple \
--build-arg EXTRAS=torch-npu,metrics \
-t llamafactory:latest .
# Change `device` upon your resources
docker run -dit \
-v ./hf_cache:/root/.cache/huggingface \
-v ./ms_cache:/root/.cache/modelscope \
-v ./om_cache:/root/.cache/openmind \
-v ./data:/app/data \
-v ./output:/app/output \
docker run -dit --ipc=host \
-v /usr/local/dcmi:/usr/local/dcmi \
-v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi \
-v /usr/local/Ascend/driver:/usr/local/Ascend/driver \
@ -648,7 +718,6 @@ docker run -dit \
--device /dev/davinci_manager \
--device /dev/devmm_svm \
--device /dev/hisi_hdc \
--shm-size 16G \
--name llamafactory \
llamafactory:latest
@ -659,25 +728,15 @@ For AMD ROCm users:
```bash
docker build -f ./docker/docker-rocm/Dockerfile \
--build-arg INSTALL_BNB=false \
--build-arg INSTALL_VLLM=false \
--build-arg INSTALL_DEEPSPEED=false \
--build-arg INSTALL_FLASHATTN=false \
--build-arg PIP_INDEX=https://pypi.org/simple \
--build-arg EXTRAS=metrics \
-t llamafactory:latest .
docker run -dit \
-v ./hf_cache:/root/.cache/huggingface \
-v ./ms_cache:/root/.cache/modelscope \
-v ./om_cache:/root/.cache/openmind \
-v ./data:/app/data \
-v ./output:/app/output \
-v ./saves:/app/saves \
docker run -dit --ipc=host \
-p 7860:7860 \
-p 8000:8000 \
--device /dev/kfd \
--device /dev/dri \
--shm-size 16G \
--name llamafactory \
llamafactory:latest
@ -686,12 +745,14 @@ docker exec -it llamafactory bash
</details>
<details><summary>Details about volume</summary>
<details><summary>Use Docker volumes</summary>
- `hf_cache`: Utilize Hugging Face cache on the host machine. Reassignable if a cache already exists in a different directory.
- `ms_cache`: Similar to Hugging Face cache but for ModelScope users.
- `om_cache`: Similar to Hugging Face cache but for Modelers users.
- `data`: Place datasets on this dir of the host machine so that they can be selected on LLaMA Board GUI.
You can uncomment `VOLUME [ "/root/.cache/huggingface", "/app/shared_data", "/app/output" ]` in the Dockerfile to use data volumes.
When building the Docker image, use `-v ./hf_cache:/root/.cache/huggingface` argument to mount the local directory to the container. The following data volumes are available.
- `hf_cache`: Utilize Hugging Face cache on the host machine.
- `shared_data`: The directionary to store datasets on the host machine.
- `output`: Set export dir to this location so that the merged result can be accessed directly on the host machine.
</details>
@ -699,7 +760,7 @@ docker exec -it llamafactory bash
### Deploy with OpenAI-style API and vLLM
```bash
API_PORT=8000 llamafactory-cli api examples/inference/llama3_vllm.yaml
API_PORT=8000 llamafactory-cli api examples/inference/llama3.yaml infer_backend=vllm vllm_enforce_eager=true
```
> [!TIP]
@ -841,6 +902,7 @@ If you have a project that should be incorporated, please contact via email or c
1. Xia et al. Using Pre-trained Language Model for Accurate ESG Prediction. FinNLP 2024. [[paper]](https://aclanthology.org/2024.finnlp-2.1/)
1. Liang et al. I-SHEEP: Self-Alignment of LLM from Scratch through an Iterative Self-Enhancement Paradigm. 2024. [[arxiv]](https://arxiv.org/abs/2408.08072)
1. Bai et al. Aligning Large Language Model with Direct Multi-Preference Optimization for Recommendation. CIKM 2024. [[paper]](https://dl.acm.org/doi/10.1145/3627673.3679611)
1. Zhang et al. CPsyCoun: A Report-based Multi-turn Dialogue Reconstruction and Evaluation Framework for Chinese Psychological Counseling. ACL 2024. [[paper]](https://aclanthology.org/2024.findings-acl.830.pdf)
1. **[StarWhisper](https://github.com/Yu-Yang-Li/StarWhisper)**: A large language model for Astronomy, based on ChatGLM2-6B and Qwen-14B.
1. **[DISC-LawLLM](https://github.com/FudanDISC/DISC-LawLLM)**: A large language model specialized in Chinese legal domain, based on Baichuan-13B, is capable of retrieving and reasoning on legal knowledge.
1. **[Sunsimiao](https://github.com/X-D-Lab/Sunsimiao)**: A large language model specialized in Chinese medical domain, based on Baichuan-7B and ChatGLM-6B.
@ -854,14 +916,15 @@ If you have a project that should be incorporated, please contact via email or c
1. **[RAG-Retrieval](https://github.com/NLPJCL/RAG-Retrieval)**: A full pipeline for RAG retrieval model fine-tuning, inference, and distillation. [[blog]](https://zhuanlan.zhihu.com/p/987727357)
1. **[360-LLaMA-Factory](https://github.com/Qihoo360/360-LLaMA-Factory)**: A modified library that supports long sequence SFT & DPO using ring attention.
1. **[Sky-T1](https://novasky-ai.github.io/posts/sky-t1/)**: An o1-like model fine-tuned by NovaSky AI with very small cost.
1. **[WeClone](https://github.com/xming521/WeClone)**: One-stop solution for creating your digital avatar from chat logs.
1. **[EmoLLM](https://github.com/SmartFlowAI/EmoLLM)**: A project about large language models (LLMs) and mental health.
</details>
## License
This repository is licensed under the [Apache-2.0 License](LICENSE).
Please follow the model licenses to use the corresponding model weights: [Baichuan 2](https://huggingface.co/baichuan-inc/Baichuan2-7B-Base/blob/main/Community%20License%20for%20Baichuan%202%20Model.pdf) / [BLOOM](https://huggingface.co/spaces/bigscience/license) / [ChatGLM3](https://github.com/THUDM/ChatGLM3/blob/main/MODEL_LICENSE) / [Command R](https://cohere.com/c4ai-cc-by-nc-license) / [DeepSeek](https://github.com/deepseek-ai/DeepSeek-LLM/blob/main/LICENSE-MODEL) / [Falcon](https://huggingface.co/tiiuae/falcon-180B/blob/main/LICENSE.txt) / [Gemma](https://ai.google.dev/gemma/terms) / [GLM-4](https://huggingface.co/THUDM/glm-4-9b/blob/main/LICENSE) / [GPT-2](https://github.com/openai/gpt-2/blob/master/LICENSE) / [Granite](LICENSE) / [Index](https://huggingface.co/IndexTeam/Index-1.9B/blob/main/LICENSE) / [InternLM](https://github.com/InternLM/InternLM#license) / [Llama](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) / [Llama 2 (LLaVA-1.5)](https://ai.meta.com/llama/license/) / [Llama 3](https://llama.meta.com/llama3/license/) / [MiniCPM](https://github.com/OpenBMB/MiniCPM/blob/main/MiniCPM%20Model%20License.md) / [Mistral/Mixtral/Pixtral](LICENSE) / [OLMo](LICENSE) / [Phi-1.5/Phi-2](https://huggingface.co/microsoft/phi-1_5/resolve/main/Research%20License.docx) / [Phi-3/Phi-4](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/blob/main/LICENSE) / [Qwen](https://github.com/QwenLM/Qwen/blob/main/Tongyi%20Qianwen%20LICENSE%20AGREEMENT) / [Skywork](https://huggingface.co/Skywork/Skywork-13B-base/blob/main/Skywork%20Community%20License.pdf) / [StarCoder 2](https://huggingface.co/spaces/bigcode/bigcode-model-license-agreement) / [TeleChat2](https://huggingface.co/Tele-AI/telechat-7B/blob/main/TeleChat%E6%A8%A1%E5%9E%8B%E7%A4%BE%E5%8C%BA%E8%AE%B8%E5%8F%AF%E5%8D%8F%E8%AE%AE.pdf) / [XVERSE](https://github.com/xverse-ai/XVERSE-13B/blob/main/MODEL_LICENSE.pdf) / [Yi](https://huggingface.co/01-ai/Yi-6B/blob/main/LICENSE) / [Yi-1.5](LICENSE) / [Yuan 2](https://github.com/IEIT-Yuan/Yuan-2.0/blob/main/LICENSE-Yuan)
Please follow the model licenses to use the corresponding model weights: [Baichuan 2](https://huggingface.co/baichuan-inc/Baichuan2-7B-Base/blob/main/Community%20License%20for%20Baichuan%202%20Model.pdf) / [BLOOM](https://huggingface.co/spaces/bigscience/license) / [ChatGLM3](https://github.com/THUDM/ChatGLM3/blob/main/MODEL_LICENSE) / [Command R](https://cohere.com/c4ai-cc-by-nc-license) / [DeepSeek](https://github.com/deepseek-ai/DeepSeek-LLM/blob/main/LICENSE-MODEL) / [Falcon](https://huggingface.co/tiiuae/falcon-180B/blob/main/LICENSE.txt) / [Gemma](https://ai.google.dev/gemma/terms) / [GLM-4](https://huggingface.co/THUDM/glm-4-9b/blob/main/LICENSE) / [GPT-2](https://github.com/openai/gpt-2/blob/master/LICENSE) / [Granite](LICENSE) / [Index](https://huggingface.co/IndexTeam/Index-1.9B/blob/main/LICENSE) / [InternLM](https://github.com/InternLM/InternLM#license) / [Llama](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) / [Llama 2](https://ai.meta.com/llama/license/) / [Llama 3](https://llama.meta.com/llama3/license/) / [Llama 4](https://github.com/meta-llama/llama-models/blob/main/models/llama4/LICENSE) / [MiniCPM](https://github.com/OpenBMB/MiniCPM/blob/main/MiniCPM%20Model%20License.md) / [Mistral/Mixtral/Pixtral](LICENSE) / [OLMo](LICENSE) / [Phi-1.5/Phi-2](https://huggingface.co/microsoft/phi-1_5/resolve/main/Research%20License.docx) / [Phi-3/Phi-4](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/blob/main/LICENSE) / [Qwen](https://github.com/QwenLM/Qwen/blob/main/Tongyi%20Qianwen%20LICENSE%20AGREEMENT) / [Skywork](https://huggingface.co/Skywork/Skywork-13B-base/blob/main/Skywork%20Community%20License.pdf) / [StarCoder 2](https://huggingface.co/spaces/bigcode/bigcode-model-license-agreement) / [TeleChat2](https://huggingface.co/Tele-AI/telechat-7B/blob/main/TeleChat%E6%A8%A1%E5%9E%8B%E7%A4%BE%E5%8C%BA%E8%AE%B8%E5%8F%AF%E5%8D%8F%E8%AE%AE.pdf) / [XVERSE](https://github.com/xverse-ai/XVERSE-13B/blob/main/MODEL_LICENSE.pdf) / [Yi](https://huggingface.co/01-ai/Yi-6B/blob/main/LICENSE) / [Yi-1.5](LICENSE) / [Yuan 2](https://github.com/IEIT-Yuan/Yuan-2.0/blob/main/LICENSE-Yuan)
## Citation

View File

@ -5,8 +5,8 @@
[![GitHub contributors](https://img.shields.io/github/contributors/hiyouga/LLaMA-Factory?color=orange)](https://github.com/hiyouga/LLaMA-Factory/graphs/contributors)
[![GitHub workflow](https://github.com/hiyouga/LLaMA-Factory/actions/workflows/tests.yml/badge.svg)](https://github.com/hiyouga/LLaMA-Factory/actions/workflows/tests.yml)
[![PyPI](https://img.shields.io/pypi/v/llamafactory)](https://pypi.org/project/llamafactory/)
[![Citation](https://img.shields.io/badge/citation-349-green)](https://scholar.google.com/scholar?cites=12620864006390196564)
[![GitHub pull request](https://img.shields.io/badge/PRs-welcome-blue)](https://github.com/hiyouga/LLaMA-Factory/pulls)
[![Citation](https://img.shields.io/badge/citation-730-green)](https://scholar.google.com/scholar?cites=12620864006390196564)
[![Docker Pulls](https://img.shields.io/docker/pulls/hiyouga/llamafactory)](https://hub.docker.com/r/hiyouga/llamafactory/tags)
[![Twitter](https://img.shields.io/twitter/follow/llamafactory_ai)](https://twitter.com/llamafactory_ai)
[![Discord](https://dcbadge.vercel.app/api/server/rKfvV9r9FK?compact=true&style=flat)](https://discord.gg/rKfvV9r9FK)
@ -14,36 +14,51 @@
[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1d5KQtbemerlSDSxZIfAaWXhKr30QypiK?usp=sharing)
[![Open in DSW](https://gallery.pai-ml.com/assets/open-in-dsw.svg)](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory)
[![Spaces](https://img.shields.io/badge/🤗-Open%20in%20Spaces-blue)](https://huggingface.co/spaces/hiyouga/LLaMA-Board)
[![Studios](https://img.shields.io/badge/ModelScope-Open%20in%20Studios-blue)](https://modelscope.cn/studios/hiyouga/LLaMA-Board)
[![SageMaker](https://img.shields.io/badge/SageMaker-Open%20in%20AWS-blue)](https://aws.amazon.com/cn/blogs/china/a-one-stop-code-free-model-fine-tuning-deployment-platform-based-on-sagemaker-and-llama-factory/)
[![Open in Alaya](assets/alaya_new.svg)](https://docs.alayanew.com/docs/documents/newActivities/llamafactory/?utm_source=LLaMA-Factory)
[![Open in Spaces](https://img.shields.io/badge/🤗-Open%20in%20Spaces-blue)](https://huggingface.co/spaces/hiyouga/LLaMA-Board)
[![Open in Studios](https://img.shields.io/badge/ModelScope-Open%20in%20Studios-blue)](https://modelscope.cn/studios/hiyouga/LLaMA-Board)
[![Open in Novita](https://img.shields.io/badge/Novita-Deploy%20Template-blue)](https://novita.ai/templates-library/105981?sharer=88115474-394e-4bda-968e-b88e123d0c47)
<h3 align="center">
使用零代码<a href="#快速开始">命令行</a><a href="#llama-board-可视化微调由-gradio-驱动">Web UI</a> 轻松微调百余种大模型
</h3>
<p align="center">
<picture>
<img alt="Github trend" src="https://trendshift.io/api/badge/repositories/4535">
</picture>
</p>
### 获得[亚马逊](https://aws.amazon.com/cn/blogs/china/a-one-stop-code-free-model-fine-tuning-deployment-platform-based-on-sagemaker-and-llama-factory/)、[英伟达](https://developer.nvidia.cn/rtx/ai-toolkit)、[阿里云](https://help.aliyun.com/zh/pai/use-cases/fine-tune-a-llama-3-model-with-llama-factory)等的应用。
<div align="center" markdown="1">
👋 加入我们的[微信群](assets/wechat.jpg)或 [NPU 用户群](assets/wechat_npu.jpg)。
### 赞助商 ❤️
<a href="https://warp.dev/llama-factory">
<img alt="Warp sponsorship" width="400" src="https://github.com/user-attachments/assets/ab8dd143-b0fd-4904-bdc5-dd7ecac94eae">
</a>
#### [Warp面向开发者的智能终端](https://warp.dev/llama-factory)
[适用于 MacOS、Linux 和 Windows](https://warp.dev/llama-factory)
----
### 使用零代码[命令行](#快速开始)与 [Web UI](#llama-board-可视化微调由-gradio-驱动) 轻松微调百余种大模型
![GitHub Trend](https://trendshift.io/api/badge/repositories/4535)
</div>
👋 加入我们的[微信群](assets/wechat.jpg)、[NPU 用户群](assets/wechat_npu.jpg)或 [九章智算云算力优惠群](assets/wechat_alaya.png)。
\[ [English](README.md) | 中文 \]
**微调大模型可以像这样轻松…**
https://github.com/user-attachments/assets/e6ce34b0-52d5-4f3e-a830-592106c4c272
https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc
选择你的打开方式:
- **入门教程**https://zhuanlan.zhihu.com/p/695287607
- **微调视频教程**https://www.bilibili.com/video/BV1djgRzxEts/
- **框架文档**https://llamafactory.readthedocs.io/zh-cn/latest/
- **框架文档(昇腾 NPU**https://ascend.github.io/docs/sources/llamafactory/
- **Colab免费**https://colab.research.google.com/drive/1d5KQtbemerlSDSxZIfAaWXhKr30QypiK?usp=sharing
- **本地机器**:请见[如何使用](#如何使用)
- **PAI-DSW免费试用**[Llama3 案例](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory) | [Qwen2-VL 案例](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory_qwen2vl) | [DeepSeek-R1-Distill 案例](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory_deepseek_r1_distill_7b)
- **Amazon SageMaker**[博客](https://aws.amazon.com/cn/blogs/china/a-one-stop-code-free-model-fine-tuning-deployment-platform-based-on-sagemaker-and-llama-factory/)
- **PAI-DSW免费试用**https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory
- **九章智算云(算力优惠活动)**https://docs.alayanew.com/docs/documents/useGuide/LLaMAFactory/mutiple/?utm_source=LLaMA-Factory
> [!NOTE]
> 除上述链接以外的其他网站均为未经许可的第三方网站,请小心甄别。
@ -51,7 +66,7 @@ https://github.com/user-attachments/assets/e6ce34b0-52d5-4f3e-a830-592106c4c272
## 目录
- [项目特色](#项目特色)
- [性能指标](#性能指标)
- [官方博客](#官方博客)
- [更新日志](#更新日志)
- [模型](#模型)
- [训练方法](#训练方法)
@ -78,36 +93,58 @@ https://github.com/user-attachments/assets/e6ce34b0-52d5-4f3e-a830-592106c4c272
- **多种模型**LLaMA、LLaVA、Mistral、Mixtral-MoE、Qwen、Qwen2-VL、DeepSeek、Yi、Gemma、ChatGLM、Phi 等等。
- **集成方法**增量预训练、多模态指令监督微调、奖励模型训练、PPO 训练、DPO 训练、KTO 训练、ORPO 训练等等。
- **多种精度**16 比特全参数微调、冻结微调、LoRA 微调和基于 AQLM/AWQ/GPTQ/LLM.int8/HQQ/EETQ 的 2/3/4/5/6/8 比特 QLoRA 微调。
- **先进算法**[GaLore](https://github.com/jiaweizzhao/GaLore)、[BAdam](https://github.com/Ledzy/BAdam)、[APOLLO](https://github.com/zhuhanqing/APOLLO)、[Adam-mini](https://github.com/zyushun/Adam-mini)、DoRA、LongLoRA、LLaMA Pro、Mixture-of-Depths、LoRA+、LoftQ 和 PiSSA。
- **先进算法**[GaLore](https://github.com/jiaweizzhao/GaLore)、[BAdam](https://github.com/Ledzy/BAdam)、[APOLLO](https://github.com/zhuhanqing/APOLLO)、[Adam-mini](https://github.com/zyushun/Adam-mini)、[Muon](https://github.com/KellerJordan/Muon)、DoRA、LongLoRA、LLaMA Pro、Mixture-of-Depths、LoRA+、LoftQ 和 PiSSA。
- **实用技巧**[FlashAttention-2](https://github.com/Dao-AILab/flash-attention)、[Unsloth](https://github.com/unslothai/unsloth)、[Liger Kernel](https://github.com/linkedin/Liger-Kernel)、RoPE scaling、NEFTune 和 rsLoRA。
- **广泛任务**:多轮对话、工具调用、图像理解、视觉定位、视频识别和语音理解等等。
- **实验监控**LlamaBoard、TensorBoard、Wandb、MLflow、SwanLab 等等。
- **极速推理**:基于 vLLM 的 OpenAI 风格 API、浏览器界面和命令行接口。
- **实验监控**LlamaBoard、TensorBoard、Wandb、MLflow、[SwanLab](https://github.com/SwanHubX/SwanLab) 等等。
- **极速推理**:基于 [vLLM](https://github.com/vllm-project/vllm) 或 [SGLang](https://github.com/sgl-project/sglang) 的 OpenAI 风格 API、浏览器界面和命令行接口。
### 最新模型的 Day-N 微调适配
| 适配时间 | 模型名称 |
| ------------ | ---------------------------------------------------------- |
| Day 0 | Qwen2.5 / Qwen2-VL / QwQ / QvQ / InternLM3 / MiniCPM-o-2.6 |
| Day 1 | Llama 3 / GLM-4 / Mistral Small / PaliGemma2 |
| 适配时间 | 模型名称 |
| ------------ | -------------------------------------------------------------------- |
| Day 0 | Qwen3 / Qwen2.5-VL / Gemma 3 / GLM-4.1V / InternLM 3 / MiniCPM-o-2.6 |
| Day 1 | Llama 3 / GLM-4 / Mistral Small / PaliGemma2 / Llama 4 |
## 性能指标
## 官方博客
与 ChatGLM 官方的 [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ptuning) 微调相比LLaMA Factory 的 LoRA 微调提供了 **3.7 倍**的加速比,同时在广告文案生成任务上取得了更高的 Rouge 分数。结合 4 比特量化技术LLaMA Factory 的 QLoRA 微调进一步降低了 GPU 显存消耗。
- [使用 LLaMA-Factory 微调 Llama3.1-70B 医学诊断模型](https://docs.alayanew.com/docs/documents/bestPractice/bigModel/llama70B/)(中文)
- [基于 LLaMA-Factory 和 EasyR1 打造一站式无代码大模型强化学习和部署平台 LLM Model Hub](https://aws.amazon.com/cn/blogs/china/building-llm-model-hub-based-on-llamafactory-and-easyr1/)(中文)
- [通过亚马逊 SageMaker HyperPod 上的 LLaMA-Factory 增强多模态模型银行文档的视觉信息提取](https://aws.amazon.com/cn/blogs/machine-learning/how-apoidea-group-enhances-visual-information-extraction-from-banking-documents-with-multimodal-models-using-llama-factory-on-amazon-sagemaker-hyperpod/)(英文)
- [Easy Dataset × LLaMA Factory: 让大模型高效学习领域知识](https://buaa-act.feishu.cn/wiki/KY9xwTGs1iqHrRkjXBwcZP9WnL9)(中文)
![benchmark](assets/benchmark.svg)
<details><summary>全部博客</summary>
<details><summary>变量定义</summary>
- **Training Speed**: 训练阶段每秒处理的样本数量。(批处理大小=4截断长度=1024
- **Rouge Score**: [广告文案生成](https://aclanthology.org/D19-1321.pdf)任务验证集上的 Rouge-2 分数。(批处理大小=4截断长度=1024
- **GPU Memory**: 4 比特量化训练的 GPU 显存峰值。(批处理大小=1截断长度=1024
- 我们在 ChatGLM 的 P-Tuning 中采用 `pre_seq_len=128`,在 LLaMA Factory 的 LoRA 微调中采用 `lora_rank=32`
- [使用 LLaMA-Factory 微调 Qwen2.5-VL 实现自动驾驶场景微调](https://docs.alayanew.com/docs/documents/useGuide/LLaMAFactory/mutiple/?utm_source=LLaMA-Factory)(中文)
- [LLaMA Factory微调 DeepSeek-R1-Distill-Qwen-7B 模型实现新闻标题分类器](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory_deepseek_r1_distill_7b)(中文)
- [基于 Amazon SageMaker 和 LLaMA-Factory 打造一站式无代码模型微调部署平台 Model Hub](https://aws.amazon.com/cn/blogs/china/a-one-stop-code-free-model-fine-tuning-deployment-platform-based-on-sagemaker-and-llama-factory/)(中文)
- [LLaMA Factory 多模态微调实践:微调 Qwen2-VL 构建文旅大模型](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory_qwen2vl)(中文)
- [LLaMA Factory微调 Llama3 模型实现角色扮演](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory)(中文)
</details>
## 更新日志
[25/07/02] 我们支持了 **[GLM-4.1V-9B-Thinking](https://github.com/THUDM/GLM-4.1V-Thinking)** 模型的微调。请安装 transformers 的 main 分支版本以使用。
[25/04/28] 我们支持了 **[Qwen3](https://qwenlm.github.io/blog/qwen3/)** 系列模型的微调。
[25/04/21] 我们支持了 **[Muon](https://github.com/KellerJordan/Muon)** 优化器。详细用法请参照 [examples](examples/README_zh.md)。感谢 [@tianshijing](https://github.com/tianshijing) 的 PR。
[25/04/16] 我们支持了 **[InternVL3](https://huggingface.co/OpenGVLab/InternVL3-8B)** 模型的微调。查看 [PR #7258](https://github.com/hiyouga/LLaMA-Factory/pull/7258) 以使用。
[25/04/14] 我们支持了 **[GLM-Z1](https://huggingface.co/THUDM/GLM-Z1-9B-0414)** 和 **[Kimi-VL](https://huggingface.co/moonshotai/Kimi-VL-A3B-Instruct)** 模型的微调。
[25/04/06] 我们支持了 **[Llama 4](https://ai.meta.com/blog/llama-4-multimodal-intelligence/)** 模型的微调。查看 [PR #7611](https://github.com/hiyouga/LLaMA-Factory/pull/7611) 以使用。
<details><summary>展开日志</summary>
[25/03/31] 我们支持了 **[Qwen2.5 Omni](https://qwenlm.github.io/blog/qwen2.5-omni/)** 模型的微调。查看 [PR #7537](https://github.com/hiyouga/LLaMA-Factory/pull/7537) 以使用。
[25/03/15] 我们支持了 **[SGLang](https://github.com/sgl-project/sglang)** 推理后端,请使用 `infer_backend: sglang` 启用。
[25/03/12] 我们支持了 **[Gemma 3](https://huggingface.co/blog/gemma3)** 模型的微调。
[25/02/24] 我们宣布开源 **[EasyR1](https://github.com/hiyouga/EasyR1)**,一个高效可扩展的多模态强化学习框架,支持高效的 GRPO 训练。
[25/02/11] 我们支持了在导出模型时保存 **[Ollama](https://github.com/ollama/ollama)** 配置文件。详细用法请参照 [examples](examples/README_zh.md)。
@ -116,13 +153,11 @@ https://github.com/user-attachments/assets/e6ce34b0-52d5-4f3e-a830-592106c4c272
[25/01/31] 我们支持了 **[DeepSeek-R1](https://huggingface.co/deepseek-ai/DeepSeek-R1)** 和 **[Qwen2.5-VL](https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct)** 模型的微调。
<details><summary>展开日志</summary>
[25/01/15] 我们支持了 **[APOLLO](https://arxiv.org/abs/2412.05270)** 优化器。详细用法请参照 [examples](examples/README_zh.md)。
[25/01/14] 我们支持了 **[MiniCPM-o-2.6](https://huggingface.co/openbmb/MiniCPM-o-2_6)** 和 **[MiniCPM-V-2.6](https://huggingface.co/openbmb/MiniCPM-V-2_6)** 模型的微调。 感谢 [@BUAADreamer](https://github.com/BUAADreamer) 的 PR.
[25/01/14] 我们支持了 **[InternLM3](https://huggingface.co/collections/internlm/)** 模型的微调。感谢 [@hhaAndroid](https://github.com/hhaAndroid) 的 PR。
[25/01/14] 我们支持了 **[InternLM 3](https://huggingface.co/collections/internlm/)** 模型的微调。感谢 [@hhaAndroid](https://github.com/hhaAndroid) 的 PR。
[25/01/10] 我们支持了 **[Phi-4](https://huggingface.co/microsoft/phi-4)** 模型的微调。
@ -218,6 +253,9 @@ https://github.com/user-attachments/assets/e6ce34b0-52d5-4f3e-a830-592106c4c272
</details>
> [!TIP]
> 如果您无法使用最新的功能,请尝试重新拉取代码并再次安装 LLaMA-Factory。
## 模型
| 模型名 | 参数量 | Template |
@ -228,22 +266,32 @@ https://github.com/user-attachments/assets/e6ce34b0-52d5-4f3e-a830-592106c4c272
| [Command R](https://huggingface.co/CohereForAI) | 35B/104B | cohere |
| [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek |
| [DeepSeek 2.5/3](https://huggingface.co/deepseek-ai) | 236B/671B | deepseek3 |
| [DeepSeek R1 (Distill)](https://huggingface.co/deepseek-ai) | 1.5B/7B/8B/14B/32B/70B/671B | deepseek3 |
| [DeepSeek R1 (Distill)](https://huggingface.co/deepseek-ai) | 1.5B/7B/8B/14B/32B/70B/671B | deepseekr1 |
| [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon |
| [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma |
| [GLM-4](https://huggingface.co/THUDM) | 9B | glm4 |
| [Falcon-H1](https://huggingface.co/tiiuae) | 0.5B/1.5B/3B/7B/34B | falcon_h1 |
| [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma/gemma2 |
| [Gemma 3/Gemma 3n](https://huggingface.co/google) | 1B/4B/6B/8B/12B/27B | gemma3/gemma3n |
| [GLM-4/GLM-4-0414/GLM-Z1](https://huggingface.co/zai-org) | 9B/32B | glm4/glmz1 |
| [GLM-4.1V](https://huggingface.co/zai-org)* | 9B | glm4v |
| [GLM-4.5](https://huggingface.co/zai-org)* | 106B/355B | glm4_moe |
| [GPT-2](https://huggingface.co/openai-community) | 0.1B/0.4B/0.8B/1.5B | - |
| [Granite 3.0-3.1](https://huggingface.co/ibm-granite) | 1B/2B/3B/8B | granite3 |
| [Granite 3.0-3.3](https://huggingface.co/ibm-granite) | 1B/2B/3B/8B | granite3 |
| [Granite 4](https://huggingface.co/ibm-granite) | 7B | granite4 |
| [Hunyuan](https://huggingface.co/tencent/) | 7B | hunyuan |
| [Index](https://huggingface.co/IndexTeam) | 1.9B | index |
| [InternLM 2-3](https://huggingface.co/internlm) | 7B/8B/20B | intern2 |
| [InternVL 2.5-3](https://huggingface.co/OpenGVLab) | 1B/2B/8B/14B/38B/78B | intern_vl |
| [Kimi-VL](https://huggingface.co/moonshotai) | 16B | kimi_vl |
| [Llama](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - |
| [Llama 2](https://huggingface.co/meta-llama) | 7B/13B/70B | llama2 |
| [Llama 3-3.3](https://huggingface.co/meta-llama) | 1B/3B/8B/70B | llama3 |
| [Llama 4](https://huggingface.co/meta-llama) | 109B/402B | llama4 |
| [Llama 3.2 Vision](https://huggingface.co/meta-llama) | 11B/90B | mllama |
| [LLaVA-1.5](https://huggingface.co/llava-hf) | 7B/13B | llava |
| [LLaVA-NeXT](https://huggingface.co/llava-hf) | 7B/8B/13B/34B/72B/110B | llava_next |
| [LLaVA-NeXT-Video](https://huggingface.co/llava-hf) | 7B/34B | llava_next_video |
| [MiniCPM](https://huggingface.co/openbmb) | 1B/2B/4B | cpm/cpm3 |
| [MiMo](https://huggingface.co/XiaomiMiMo) | 7B | mimo |
| [MiniCPM](https://huggingface.co/openbmb) | 0.5B/1B/2B/4B/8B | cpm/cpm3/cpm4 |
| [MiniCPM-o-2.6/MiniCPM-V-2.6](https://huggingface.co/openbmb) | 8B | minicpm_o/minicpm_v |
| [Ministral/Mistral-Nemo](https://huggingface.co/mistralai) | 8B/12B | ministral |
| [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | mistral |
@ -255,9 +303,12 @@ https://github.com/user-attachments/assets/e6ce34b0-52d5-4f3e-a830-592106c4c272
| [Phi-3-small](https://huggingface.co/microsoft) | 7B | phi_small |
| [Phi-4](https://huggingface.co/microsoft) | 14B | phi4 |
| [Pixtral](https://huggingface.co/mistralai) | 12B | pixtral |
| [Qwen/QwQ (1-2.5) (Code/Math/MoE)](https://huggingface.co/Qwen) | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen |
| [Qwen (1-2.5) (Code/Math/MoE/QwQ)](https://huggingface.co/Qwen) | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen |
| [Qwen3 (MoE)](https://huggingface.co/Qwen) | 0.6B/1.7B/4B/8B/14B/32B/235B | qwen3 |
| [Qwen2-Audio](https://huggingface.co/Qwen) | 7B | qwen2_audio |
| [Qwen2-VL/Qwen2.5-VL/QVQ](https://huggingface.co/Qwen) | 2B/3B/7B/72B | qwen2_vl |
| [Qwen2.5-Omni](https://huggingface.co/Qwen) | 3B/7B | qwen2_omni |
| [Qwen2-VL/Qwen2.5-VL/QVQ](https://huggingface.co/Qwen) | 2B/3B/7B/32B/72B | qwen2_vl |
| [Seed Coder](https://huggingface.co/ByteDance-Seed) | 8B | seed_coder |
| [Skywork o1](https://huggingface.co/Skywork) | 8B | skywork_o1 |
| [StarCoder 2](https://huggingface.co/bigcode) | 3B/7B/15B | - |
| [TeleChat2](https://huggingface.co/Tele-AI) | 3B/7B/35B/115B | telechat2 |
@ -270,6 +321,10 @@ https://github.com/user-attachments/assets/e6ce34b0-52d5-4f3e-a830-592106c4c272
> 对于所有“基座”Base模型`template` 参数可以是 `default`, `alpaca`, `vicuna` 等任意值。但“对话”Instruct/Chat模型请务必使用**对应的模板**。
>
> 请务必在训练和推理时采用**完全一致**的模板。
>
> \*:您需要从 main 分支安装 `transformers` 并使用 `DISABLE_VERSION_CHECK=1` 来跳过版本检查。
>
> \*\*:您需要安装特定版本的 `transformers` 以使用该模型。
项目所支持模型的完整列表请参阅 [constants.py](src/llamafactory/extras/constants.py)。
@ -373,8 +428,10 @@ https://github.com/user-attachments/assets/e6ce34b0-52d5-4f3e-a830-592106c4c272
- [DPO mixed (en&zh)](https://huggingface.co/datasets/hiyouga/DPO-En-Zh-20k)
- [UltraFeedback (en)](https://huggingface.co/datasets/HuggingFaceH4/ultrafeedback_binarized)
- [COIG-P (zh)](https://huggingface.co/datasets/m-a-p/COIG-P)
- [RLHF-V (en)](https://huggingface.co/datasets/openbmb/RLHF-V-Dataset)
- [VLFeedback (en)](https://huggingface.co/datasets/Zhihui/VLFeedback)
- [RLAIF-V (en)](https://huggingface.co/datasets/openbmb/RLAIF-V-Dataset)
- [Orca DPO Pairs (en)](https://huggingface.co/datasets/Intel/orca_dpo_pairs)
- [HH-RLHF (en)](https://huggingface.co/datasets/Anthropic/hh-rlhf)
- [Nectar (en)](https://huggingface.co/datasets/berkeley-nest/Nectar)
@ -392,23 +449,24 @@ huggingface-cli login
## 软硬件依赖
| 必需项 | 至少 | 推荐 |
| 必需项 | 至少 | 推荐 |
| ------------ | ------- | --------- |
| python | 3.9 | 3.10 |
| torch | 1.13.1 | 2.5.1 |
| transformers | 4.41.2 | 4.49.0 |
| torch | 2.0.0 | 2.6.0 |
| torchvision | 0.15.0 | 0.21.0 |
| transformers | 4.49.0 | 4.50.0 |
| datasets | 2.16.0 | 3.2.0 |
| accelerate | 0.34.0 | 1.2.1 |
| peft | 0.11.1 | 0.12.0 |
| peft | 0.14.0 | 0.15.1 |
| trl | 0.8.6 | 0.9.6 |
| 可选项 | 至少 | 推荐 |
| 可选项 | 至少 | 推荐 |
| ------------ | ------- | --------- |
| CUDA | 11.6 | 12.2 |
| deepspeed | 0.10.0 | 0.16.4 |
| bitsandbytes | 0.39.0 | 0.43.1 |
| vllm | 0.4.3 | 0.7.3 |
| flash-attn | 2.3.0 | 2.7.2 |
| vllm | 0.4.3 | 0.8.2 |
| flash-attn | 2.5.6 | 2.7.2 |
### 硬件依赖
@ -430,16 +488,27 @@ huggingface-cli login
> [!IMPORTANT]
> 此步骤为必需。
#### 从源码安装
```bash
git clone --depth 1 https://github.com/hiyouga/LLaMA-Factory.git
cd LLaMA-Factory
pip install -e ".[torch,metrics]"
pip install -e ".[torch,metrics]" --no-build-isolation
```
可选的额外依赖项torch、torch-npu、metrics、deepspeed、liger-kernel、bitsandbytes、hqq、eetq、gptq、awq、aqlm、vllm、galore、apollo、badam、adam-mini、qwen、minicpm_v、modelscope、openmind、swanlab、quality
可选的额外依赖项torch、torch-npu、metrics、deepspeed、liger-kernel、bitsandbytes、hqq、eetq、gptq、aqlm、vllm、sglang、galore、apollo、badam、adam-mini、qwen、minicpm_v、openmind、swanlab、dev
> [!TIP]
> 遇到包冲突时,可使用 `pip install --no-deps -e .` 解决。
#### 从镜像安装
```bash
docker run -it --rm --gpus=all --ipc=host hiyouga/llamafactory:latest
```
该镜像基于 Ubuntu 22.04x86\_64、CUDA 12.4、Python 3.11、PyTorch 2.6.0 和 Flash-attn 2.7.4 构建。
查看全部镜像https://hub.docker.com/r/hiyouga/llamafactory/tags
请参阅[构建 Docker](#构建-docker) 来重新构建镜像。
<details><summary>使用 <b>uv</b> 构建虚拟环境</summary>
@ -457,9 +526,22 @@ uv run --prerelease=allow llamafactory-cli train examples/train_lora/llama3_lora
</details>
<details><summary>Windows 用户指南</summary>
#### 安装 PyTorch
Windows 平台需要额外手动安装 GPU 版本的 PyTorch 依赖包,您可以参考[官方网站](https://pytorch.org/get-started/locally/)和以下命令安装并测试 PyTorch 是否正确安装。
```bash
pip uninstall torch torchvision torchaudio
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu126
python -c "import torch; print(torch.cuda.is_available())"
```
如果看到 `True` 则说明安装成功。
若遇到类似 `Can't pickle local object` 的报错,请设置 `dataloader_num_workers: 0`
#### 安装 BitsAndBytes
如果要在 Windows 平台上开启量化 LoRAQLoRA需要安装预编译的 `bitsandbytes` 库, 支持 CUDA 11.1 到 12.2, 请根据您的 CUDA 版本情况选择适合的[发布版本](https://github.com/jllllll/bitsandbytes-windows-webui/releases/tag/wheels)。
@ -498,6 +580,7 @@ source /usr/local/Ascend/ascend-toolkit/set_env.sh
| torch | 2.1.0 | 2.4.0 |
| torch-npu | 2.1.0 | 2.4.0.post2 |
| deepspeed | 0.13.2 | 0.13.2 |
| vllm-ascend | - | 0.7.3 |
请使用 `ASCEND_RT_VISIBLE_DEVICES` 而非 `CUDA_VISIBLE_DEVICES` 来指定运算设备。
@ -548,6 +631,8 @@ pip install .
> [!NOTE]
> 使用自定义数据集时,请更新 `data/dataset_info.json` 文件。
您也可以使用 **[Easy Dataset](https://github.com/ConardLi/easy-dataset)**、**[DataFlow](https://github.com/OpenDCAI/DataFlow)** 和 **[GraphGen](https://github.com/open-sciencelab/GraphGen)** 构建用于微调的合成数据。
### 快速开始
下面三行命令分别对 Llama3-8B-Instruct 模型进行 LoRA **微调**、**推理**和**合并**。
@ -603,22 +688,13 @@ CUDA 用户:
```bash
docker build -f ./docker/docker-cuda/Dockerfile \
--build-arg INSTALL_BNB=false \
--build-arg INSTALL_VLLM=false \
--build-arg INSTALL_DEEPSPEED=false \
--build-arg INSTALL_FLASHATTN=false \
--build-arg PIP_INDEX=https://pypi.org/simple \
--build-arg EXTRAS=metrics \
-t llamafactory:latest .
docker run -dit --gpus=all \
-v ./hf_cache:/root/.cache/huggingface \
-v ./ms_cache:/root/.cache/modelscope \
-v ./om_cache:/root/.cache/openmind \
-v ./data:/app/data \
-v ./output:/app/output \
docker run -dit --ipc=host --gpus=all \
-p 7860:7860 \
-p 8000:8000 \
--shm-size 16G \
--name llamafactory \
llamafactory:latest
@ -628,19 +704,12 @@ docker exec -it llamafactory bash
昇腾 NPU 用户:
```bash
# 根据您的环境选择镜像
docker build -f ./docker/docker-npu/Dockerfile \
--build-arg INSTALL_DEEPSPEED=false \
--build-arg PIP_INDEX=https://pypi.org/simple \
--build-arg EXTRAS=torch-npu,metrics \
-t llamafactory:latest .
# 根据您的资源更改 `device`
docker run -dit \
-v ./hf_cache:/root/.cache/huggingface \
-v ./ms_cache:/root/.cache/modelscope \
-v ./om_cache:/root/.cache/openmind \
-v ./data:/app/data \
-v ./output:/app/output \
docker run -dit --ipc=host \
-v /usr/local/dcmi:/usr/local/dcmi \
-v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi \
-v /usr/local/Ascend/driver:/usr/local/Ascend/driver \
@ -651,7 +720,6 @@ docker run -dit \
--device /dev/davinci_manager \
--device /dev/devmm_svm \
--device /dev/hisi_hdc \
--shm-size 16G \
--name llamafactory \
llamafactory:latest
@ -662,25 +730,15 @@ AMD ROCm 用户:
```bash
docker build -f ./docker/docker-rocm/Dockerfile \
--build-arg INSTALL_BNB=false \
--build-arg INSTALL_VLLM=false \
--build-arg INSTALL_DEEPSPEED=false \
--build-arg INSTALL_FLASHATTN=false \
--build-arg PIP_INDEX=https://pypi.org/simple \
--build-arg EXTRAS=metrics \
-t llamafactory:latest .
docker run -dit \
-v ./hf_cache:/root/.cache/huggingface \
-v ./ms_cache:/root/.cache/modelscope \
-v ./om_cache:/root/.cache/openmind \
-v ./data:/app/data \
-v ./output:/app/output \
-v ./saves:/app/saves \
docker run -dit --ipc=host \
-p 7860:7860 \
-p 8000:8000 \
--device /dev/kfd \
--device /dev/dri \
--shm-size 16G \
--name llamafactory \
llamafactory:latest
@ -689,12 +747,14 @@ docker exec -it llamafactory bash
</details>
<details><summary>数据卷详情</summary>
<details><summary>使用数据卷</summary>
- `hf_cache`:使用宿主机的 Hugging Face 缓存文件夹,允许更改为新的目录。
- `ms_cache`:类似 Hugging Face 缓存文件夹,为 ModelScope 用户提供。
- `om_cache`:类似 Hugging Face 缓存文件夹,为 Modelers 用户提供。
- `data`:宿主机中存放数据集的文件夹路径。
您可以通过移除 Dockerfile 中 `VOLUME [ "/root/.cache/huggingface", "/app/shared_data", "/app/output" ]` 的注释来使用数据卷。
在构建 Docker 时使用参数 `-v ./hf_cache:/root/.cache/huggingface` 来挂载数据卷。各个数据卷的含义表示如下。
- `hf_cache`:使用宿主机的 Hugging Face 缓存文件夹。
- `shared_data`:宿主机中存放数据集的文件夹路径。
- `output`:将导出目录设置为该路径后,即可在宿主机中访问导出后的模型。
</details>
@ -702,7 +762,7 @@ docker exec -it llamafactory bash
### 利用 vLLM 部署 OpenAI API
```bash
API_PORT=8000 llamafactory-cli api examples/inference/llama3_vllm.yaml
API_PORT=8000 llamafactory-cli api examples/inference/llama3.yaml infer_backend=vllm vllm_enforce_eager=true
```
> [!TIP]
@ -857,6 +917,7 @@ swanlab_run_name: test_run # 可选
1. **[RAG-Retrieval](https://github.com/NLPJCL/RAG-Retrieval)**:一个全链路 RAG 检索模型微调、推理和蒸馏代码库。[[blog]](https://zhuanlan.zhihu.com/p/987727357)
1. **[360-LLaMA-Factory](https://github.com/Qihoo360/360-LLaMA-Factory)**:一个魔改后的代码库,通过 Ring Attention 支持长序列的 SFT 和 DPO 训练。
1. **[Sky-T1](https://novasky-ai.github.io/posts/sky-t1/)**:由 NovaSky AI 微调的低成本类 o1 长推理模型。
1. **[WeClone](https://github.com/xming521/WeClone)**:从聊天记录创造数字分身的一站式解决方案。
</details>
@ -864,7 +925,7 @@ swanlab_run_name: test_run # 可选
本仓库的代码依照 [Apache-2.0](LICENSE) 协议开源。
使用模型权重时,请遵循对应的模型协议:[Baichuan 2](https://huggingface.co/baichuan-inc/Baichuan2-7B-Base/blob/main/Community%20License%20for%20Baichuan%202%20Model.pdf) / [BLOOM](https://huggingface.co/spaces/bigscience/license) / [ChatGLM3](https://github.com/THUDM/ChatGLM3/blob/main/MODEL_LICENSE) / [Command R](https://cohere.com/c4ai-cc-by-nc-license) / [DeepSeek](https://github.com/deepseek-ai/DeepSeek-LLM/blob/main/LICENSE-MODEL) / [Falcon](https://huggingface.co/tiiuae/falcon-180B/blob/main/LICENSE.txt) / [Gemma](https://ai.google.dev/gemma/terms) / [GLM-4](https://huggingface.co/THUDM/glm-4-9b/blob/main/LICENSE) / [GPT-2](https://github.com/openai/gpt-2/blob/master/LICENSE) / [Granite](LICENSE) / [Index](https://huggingface.co/IndexTeam/Index-1.9B/blob/main/LICENSE) / [InternLM](https://github.com/InternLM/InternLM#license) / [Llama](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) / [Llama 2 (LLaVA-1.5)](https://ai.meta.com/llama/license/) / [Llama 3](https://llama.meta.com/llama3/license/) / [MiniCPM](https://github.com/OpenBMB/MiniCPM/blob/main/MiniCPM%20Model%20License.md) / [Mistral/Mixtral/Pixtral](LICENSE) / [OLMo](LICENSE) / [Phi-1.5/Phi-2](https://huggingface.co/microsoft/phi-1_5/resolve/main/Research%20License.docx) / [Phi-3/Phi-4](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/blob/main/LICENSE) / [Qwen](https://github.com/QwenLM/Qwen/blob/main/Tongyi%20Qianwen%20LICENSE%20AGREEMENT) / [Skywork](https://huggingface.co/Skywork/Skywork-13B-base/blob/main/Skywork%20Community%20License.pdf) / [StarCoder 2](https://huggingface.co/spaces/bigcode/bigcode-model-license-agreement) / [TeleChat2](https://huggingface.co/Tele-AI/telechat-7B/blob/main/TeleChat%E6%A8%A1%E5%9E%8B%E7%A4%BE%E5%8C%BA%E8%AE%B8%E5%8F%AF%E5%8D%8F%E8%AE%AE.pdf) / [XVERSE](https://github.com/xverse-ai/XVERSE-13B/blob/main/MODEL_LICENSE.pdf) / [Yi](https://huggingface.co/01-ai/Yi-6B/blob/main/LICENSE) / [Yi-1.5](LICENSE) / [Yuan 2](https://github.com/IEIT-Yuan/Yuan-2.0/blob/main/LICENSE-Yuan)
使用模型权重时,请遵循对应的模型协议:[Baichuan 2](https://huggingface.co/baichuan-inc/Baichuan2-7B-Base/blob/main/Community%20License%20for%20Baichuan%202%20Model.pdf) / [BLOOM](https://huggingface.co/spaces/bigscience/license) / [ChatGLM3](https://github.com/THUDM/ChatGLM3/blob/main/MODEL_LICENSE) / [Command R](https://cohere.com/c4ai-cc-by-nc-license) / [DeepSeek](https://github.com/deepseek-ai/DeepSeek-LLM/blob/main/LICENSE-MODEL) / [Falcon](https://huggingface.co/tiiuae/falcon-180B/blob/main/LICENSE.txt) / [Gemma](https://ai.google.dev/gemma/terms) / [GLM-4](https://huggingface.co/THUDM/glm-4-9b/blob/main/LICENSE) / [GPT-2](https://github.com/openai/gpt-2/blob/master/LICENSE) / [Granite](LICENSE) / [Index](https://huggingface.co/IndexTeam/Index-1.9B/blob/main/LICENSE) / [InternLM](https://github.com/InternLM/InternLM#license) / [Llama](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) / [Llama 2](https://ai.meta.com/llama/license/) / [Llama 3](https://llama.meta.com/llama3/license/) / [Llama 4](https://github.com/meta-llama/llama-models/blob/main/models/llama4/LICENSE) / [MiniCPM](https://github.com/OpenBMB/MiniCPM/blob/main/MiniCPM%20Model%20License.md) / [Mistral/Mixtral/Pixtral](LICENSE) / [OLMo](LICENSE) / [Phi-1.5/Phi-2](https://huggingface.co/microsoft/phi-1_5/resolve/main/Research%20License.docx) / [Phi-3/Phi-4](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/blob/main/LICENSE) / [Qwen](https://github.com/QwenLM/Qwen/blob/main/Tongyi%20Qianwen%20LICENSE%20AGREEMENT) / [Skywork](https://huggingface.co/Skywork/Skywork-13B-base/blob/main/Skywork%20Community%20License.pdf) / [StarCoder 2](https://huggingface.co/spaces/bigcode/bigcode-model-license-agreement) / [TeleChat2](https://huggingface.co/Tele-AI/telechat-7B/blob/main/TeleChat%E6%A8%A1%E5%9E%8B%E7%A4%BE%E5%8C%BA%E8%AE%B8%E5%8F%AF%E5%8D%8F%E8%AE%AE.pdf) / [XVERSE](https://github.com/xverse-ai/XVERSE-13B/blob/main/MODEL_LICENSE.pdf) / [Yi](https://huggingface.co/01-ai/Yi-6B/blob/main/LICENSE) / [Yi-1.5](LICENSE) / [Yuan 2](https://github.com/IEIT-Yuan/Yuan-2.0/blob/main/LICENSE-Yuan)
## 引用

38
assets/alaya_new.svg Normal file

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 47 KiB

File diff suppressed because it is too large Load Diff

Before

Width:  |  Height:  |  Size: 28 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 164 KiB

After

Width:  |  Height:  |  Size: 166 KiB

BIN
assets/wechat_alaya.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 209 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 167 KiB

After

Width:  |  Height:  |  Size: 171 KiB

View File

@ -1,12 +1,15 @@
The [dataset_info.json](dataset_info.json) contains all available datasets. If you are using a custom dataset, please **make sure** to add a *dataset description* in `dataset_info.json` and specify `dataset: dataset_name` before training to use it.
Currently we support datasets in **alpaca** and **sharegpt** format.
The `dataset_info.json` file should be put in the `dataset_dir` directory. You can change `dataset_dir` to use another directory. The default value is `./data`.
Currently we support datasets in **alpaca** and **sharegpt** format. Allowed file types include json, jsonl, csv, parquet, arrow.
```json
"dataset_name": {
"hf_hub_url": "the name of the dataset repository on the Hugging Face hub. (if specified, ignore script_url and file_name)",
"ms_hub_url": "the name of the dataset repository on the Model Scope hub. (if specified, ignore script_url and file_name)",
"script_url": "the name of the directory containing a dataset loading script. (if specified, ignore file_name)",
"hf_hub_url": "the name of the dataset repository on the Hugging Face hub. (if specified, ignore script_url, file_name and cloud_file_name)",
"ms_hub_url": "the name of the dataset repository on the Model Scope hub. (if specified, ignore script_url, file_name and cloud_file_name)",
"script_url": "the name of the directory containing a dataset loading script. (if specified, ignore file_name and cloud_file_name)",
"cloud_file_name": "the name of the dataset file in s3/gcs cloud storage. (if specified, ignore file_name)",
"file_name": "the name of the dataset folder or dataset file in this directory. (required if above are not specified)",
"formatting": "the format of the dataset. (optional, default: alpaca, can be chosen from {alpaca, sharegpt})",
"ranking": "whether the dataset is a preference dataset or not. (default: False)",
@ -47,7 +50,9 @@ Currently we support datasets in **alpaca** and **sharegpt** format.
* [Example dataset](alpaca_en_demo.json)
In supervised fine-tuning, the `instruction` column will be concatenated with the `input` column and used as the human prompt, then the human prompt would be `instruction\ninput`. The `output` column represents the model response.
In supervised fine-tuning, the `instruction` column will be concatenated with the `input` column and used as the user prompt, then the user prompt would be `instruction\ninput`. The `output` column represents the model response.
For reasoning models, if the dataset contains chain-of-thought (CoT), the CoT needs to be placed in the model responses, such as `<think>cot</think>output`.
The `system` column will be used as the system prompt if specified.
@ -56,13 +61,13 @@ The `history` column is a list consisting of string tuples representing prompt-r
```json
[
{
"instruction": "human instruction (required)",
"input": "human input (optional)",
"instruction": "user instruction (required)",
"input": "user input (optional)",
"output": "model response (required)",
"system": "system prompt (optional)",
"history": [
["human instruction in the first round (optional)", "model response in the first round (optional)"],
["human instruction in the second round (optional)", "model response in the second round (optional)"]
["user instruction in the first round (optional)", "model response in the first round (optional)"],
["user instruction in the second round (optional)", "model response in the second round (optional)"]
]
}
]
@ -83,9 +88,14 @@ Regarding the above dataset, the *dataset description* in `dataset_info.json` sh
}
```
> [!TIP]
> If the model has reasoning capabilities (e.g. Qwen3) but the dataset does not contain chain-of-thought (CoT), LLaMA-Factory will automatically add empty CoT to the data. When `enable_thinking` is `True` (slow thinking, by default), the empty CoT will be added to the model responses and loss computation will be considered; otherwise (fast thinking), it will be added to the user prompts and loss computation will be ignored. Please keep the `enable_thinking` parameter consistent during training and inference.
>
> If you want to train data containing CoT with slow thinking and data without CoT with fast thinking, you can set `enable_thinking` to `None`. However, this feature is relatively complicated and should be used with caution.
### Pre-training Dataset
- [Example dataset](c4_demo.json)
- [Example dataset](c4_demo.jsonl)
In pre-training, only the `text` column will be used for model learning.
@ -116,8 +126,8 @@ It requires a better response in `chosen` column and a worse response in `reject
```json
[
{
"instruction": "human instruction (required)",
"input": "human input (optional)",
"instruction": "user instruction (required)",
"input": "user input (optional)",
"chosen": "chosen answer (required)",
"rejected": "rejected answer (required)"
}
@ -163,7 +173,7 @@ An additional column `audios` is required. Please refer to the [sharegpt](#share
Compared to the alpaca format, the sharegpt format allows the datasets have **more roles**, such as human, gpt, observation and function. They are presented in a list of objects in the `conversations` column.
Note that the human and observation should appear in odd positions, while gpt and function should appear in even positions.
Note that the human and observation should appear in odd positions, while gpt and function should appear in even positions. The gpt and function will be learned by the model.
```json
[
@ -171,7 +181,7 @@ Note that the human and observation should appear in odd positions, while gpt an
"conversations": [
{
"from": "human",
"value": "human instruction"
"value": "user instruction"
},
{
"from": "function_call",
@ -222,7 +232,7 @@ Preference datasets in sharegpt format also require a better message in `chosen`
"conversations": [
{
"from": "human",
"value": "human instruction"
"value": "user instruction"
},
{
"from": "gpt",
@ -230,7 +240,7 @@ Preference datasets in sharegpt format also require a better message in `chosen`
},
{
"from": "human",
"value": "human instruction"
"value": "user instruction"
}
],
"chosen": {
@ -272,7 +282,7 @@ KTO datasets require a extra `kto_tag` column containing the boolean human feedb
"conversations": [
{
"from": "human",
"value": "human instruction"
"value": "user instruction"
},
{
"from": "gpt",
@ -311,7 +321,7 @@ The number of images should be identical to the `<image>` tokens in the conversa
"conversations": [
{
"from": "human",
"value": "<image>human instruction"
"value": "<image>user instruction"
},
{
"from": "gpt",
@ -352,7 +362,7 @@ The number of videos should be identical to the `<video>` tokens in the conversa
"conversations": [
{
"from": "human",
"value": "<video>human instruction"
"value": "<video>user instruction"
},
{
"from": "gpt",
@ -393,7 +403,7 @@ The number of audios should be identical to the `<audio>` tokens in the conversa
"conversations": [
{
"from": "human",
"value": "<audio>human instruction"
"value": "<audio>user instruction"
},
{
"from": "gpt",
@ -434,7 +444,7 @@ The openai format is simply a special case of the sharegpt format, where the fir
},
{
"role": "user",
"content": "human instruction"
"content": "user instruction"
},
{
"role": "assistant",

View File

@ -1,6 +1,8 @@
[dataset_info.json](dataset_info.json) 包含了所有可用的数据集。如果您希望使用自定义数据集,请**务必**在 `dataset_info.json` 文件中添加*数据集描述*,并通过修改 `dataset: 数据集名称` 配置来使用数据集。
目前我们支持 **alpaca** 格式和 **sharegpt** 格式的数据集。
其中 `dataset_info.json` 文件应放置在 `dataset_dir` 目录下。您可以通过修改 `dataset_dir` 参数来使用其他目录。默认值为 `./data`
目前我们支持 **alpaca** 格式和 **sharegpt** 格式的数据集。允许的文件类型包括 json、jsonl、csv、parquet 和 arrow。
```json
"数据集名称": {
@ -47,7 +49,9 @@
- [样例数据集](alpaca_zh_demo.json)
在指令监督微调时,`instruction` 列对应的内容会与 `input` 列对应的内容拼接后作为人类指令,即人类指令为 `instruction\ninput`。而 `output` 列对应的内容为模型回答。
在指令监督微调时,`instruction` 列对应的内容会与 `input` 列对应的内容拼接后作为提示词,即提示词为 `instruction\ninput`。而 `output` 列对应的内容为模型回答。
对于推理类模型的微调,如果数据集包含思维链,则需要把思维链放在模型回答中,例如 `<think>cot</think>output`
如果指定,`system` 列对应的内容将被作为系统提示词。
@ -56,8 +60,8 @@
```json
[
{
"instruction": "人类指令(必填)",
"input": "人类输入(选填)",
"instruction": "用户指令(必填)",
"input": "用户输入(选填)",
"output": "模型回答(必填)",
"system": "系统提示词(选填)",
"history": [
@ -83,9 +87,14 @@
}
```
> [!TIP]
> 如果模型本身具备推理能力(如 Qwen3而数据集不包含思维链LLaMA-Factory 会自动为数据添加空思维链。当 `enable_thinking``True` 时(慢思考,默认),空思维链会添加到模型回答中并且计算损失,否则会添加到用户指令中并且不计算损失(快思考)。请在训练和推理时保持 `enable_thinking` 参数一致。
>
> 如果您希望训练包含思维链的数据时使用慢思考,训练不包含思维链的数据时使用快思考,可以设置 `enable_thinking``None`。但该功能较为复杂,请谨慎使用。
### 预训练数据集
- [样例数据集](c4_demo.json)
- [样例数据集](c4_demo.jsonl)
在预训练时,只有 `text` 列中的内容会用于模型学习。
@ -116,8 +125,8 @@
```json
[
{
"instruction": "人类指令(必填)",
"input": "人类输入(选填)",
"instruction": "用户指令(必填)",
"input": "用户输入(选填)",
"chosen": "优质回答(必填)",
"rejected": "劣质回答(必填)"
}
@ -163,7 +172,7 @@ KTO 数据集需要提供额外的 `kto_tag` 列。详情请参阅 [sharegpt](#s
相比 alpaca 格式的数据集sharegpt 格式支持**更多的角色种类**,例如 human、gpt、observation、function 等等。它们构成一个对象列表呈现在 `conversations` 列中。
注意其中 human 和 observation 必须出现在奇数位置gpt 和 function 必须出现在偶数位置。
注意其中 human 和 observation 必须出现在奇数位置gpt 和 function 必须出现在偶数位置。默认所有的 gpt 和 function 会被用于学习。
```json
[
@ -171,7 +180,7 @@ KTO 数据集需要提供额外的 `kto_tag` 列。详情请参阅 [sharegpt](#s
"conversations": [
{
"from": "human",
"value": "人类指令"
"value": "用户指令"
},
{
"from": "function_call",
@ -222,7 +231,7 @@ Sharegpt 格式的偏好数据集同样需要在 `chosen` 列中提供更优的
"conversations": [
{
"from": "human",
"value": "人类指令"
"value": "用户指令"
},
{
"from": "gpt",
@ -230,7 +239,7 @@ Sharegpt 格式的偏好数据集同样需要在 `chosen` 列中提供更优的
},
{
"from": "human",
"value": "人类指令"
"value": "用户指令"
}
],
"chosen": {
@ -272,7 +281,7 @@ KTO 数据集需要额外添加一个 `kto_tag` 列,包含 bool 类型的人
"conversations": [
{
"from": "human",
"value": "人类指令"
"value": "用户指令"
},
{
"from": "gpt",
@ -311,7 +320,7 @@ KTO 数据集需要额外添加一个 `kto_tag` 列,包含 bool 类型的人
"conversations": [
{
"from": "human",
"value": "<image>人类指令"
"value": "<image><image>用户指令"
},
{
"from": "gpt",
@ -319,6 +328,7 @@ KTO 数据集需要额外添加一个 `kto_tag` 列,包含 bool 类型的人
}
],
"images": [
"图像路径(必填)",
"图像路径(必填)"
]
}
@ -352,7 +362,7 @@ KTO 数据集需要额外添加一个 `kto_tag` 列,包含 bool 类型的人
"conversations": [
{
"from": "human",
"value": "<video>人类指令"
"value": "<video><video>用户指令"
},
{
"from": "gpt",
@ -360,6 +370,7 @@ KTO 数据集需要额外添加一个 `kto_tag` 列,包含 bool 类型的人
}
],
"videos": [
"视频路径(必填)",
"视频路径(必填)"
]
}
@ -393,7 +404,7 @@ KTO 数据集需要额外添加一个 `kto_tag` 列,包含 bool 类型的人
"conversations": [
{
"from": "human",
"value": "<audio>人类指令"
"value": "<audio><audio>用户指令"
},
{
"from": "gpt",
@ -401,6 +412,7 @@ KTO 数据集需要额外添加一个 `kto_tag` 列,包含 bool 类型的人
}
],
"audios": [
"音频路径(必填)",
"音频路径(必填)"
]
}
@ -435,7 +447,7 @@ OpenAI 格式仅仅是 sharegpt 格式的一种特殊情况,其中第一条消
},
{
"role": "user",
"content": "人类指令"
"content": "用户指令"
},
{
"role": "assistant",

View File

@ -4154,11 +4154,6 @@
"input": "The beauty of friendship",
"output": "In life we all embark on journeys grand,\nAnd wander through the trials of our days;\nAt times we'll come across a barren land,\nWhere hardships test and challenge in new ways.\n\nBut through the toil, we're never all alone,\nFor friendship shines as brilliant as the sun;\nWith bonds that strengthen, deepen and have grown,\nA beauty that cannot be rivaled, none.\n\nA friend who's there with hands to lift us high,\nThrough laughter and the joy, through tears and pain,\nA beacon when the road ahead's unclear,\nA treasure more than riches one could gain.\n\nOh friendship, pure and ever shining bright,\nA beauty that brings everlasting light."
},
{
"instruction": "The news and other sources have been claiming that Democracy is losing ground around the world. Give an example of a recent event to back this claim up.",
"input": "",
"output": "One recent example that supports this claim is the events in Hong Kong. In 2019 and 2020, the Chinese government imposed a national security law to crack down on pro-democracy protests in Hong Kong, effectively curtailing the freedoms and autonomy that the region once enjoyed. This move by Beijing to assert greater control over Hong Kong has led to concerns by human rights advocates and international leaders that democracy is being undermined in the territory. There is fear that Hong Kongs outspoken media, independent judiciary, and civil liberties look increasingly vulnerable as Beijing tightens its grip on the city. Additionally, there have been arrests of multiple leading pro-Democracy figures such as Joshua Wong, Agnes Chow and Jimmy Lai in addition to many others who have become political prisoners."
},
{
"instruction": "Redact the following sentence to remove all personal information: \"John Smith from Birmingham works in a factory.\"",
"input": "",

View File

@ -1,3 +1,18 @@
# Copyright 2025 the LlamaFactory team.
# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import os
@ -10,7 +25,7 @@ _DESCRIPTION = "BELLE multiturn chat dataset."
_CITATION = """\
@article{belle2023exploring,
title={Exploring the Impact of Instruction Data Scaling on Large Language Models: An Empirical Study on Real-World Use Cases},
title={Exploring the Impact of Instruction Data Scaling on Large Language Models},
author={Yunjie Ji, Yong Deng, Yan Gong, Yiping Peng, Qiang Niu, Lei Zhang, Baochang Ma, Xiangang Li},
journal={arXiv preprint arXiv:2303.14742},
year={2023}

File diff suppressed because one or more lines are too long

300
data/c4_demo.jsonl Normal file

File diff suppressed because one or more lines are too long

View File

@ -66,6 +66,21 @@
"assistant_tag": "assistant"
}
},
"mllm_video_audio_demo": {
"file_name": "mllm_video_audio_demo.json",
"formatting": "sharegpt",
"columns": {
"messages": "messages",
"videos": "videos",
"audios": "audios"
},
"tags": {
"role_tag": "role",
"content_tag": "content",
"user_tag": "user",
"assistant_tag": "assistant"
}
},
"alpaca_en": {
"hf_hub_url": "llamafactory/alpaca_en",
"ms_hub_url": "llamafactory/alpaca_en",
@ -232,6 +247,7 @@
"ultrachat_200k": {
"hf_hub_url": "HuggingFaceH4/ultrachat_200k",
"ms_hub_url": "AI-ModelScope/ultrachat_200k",
"split": "train_sft",
"formatting": "sharegpt",
"columns": {
"messages": "messages"
@ -258,7 +274,7 @@
"tags": {
"role_tag": "role",
"content_tag": "content",
"user_tag": "human",
"user_tag": "user",
"assistant_tag": "assistant"
}
},
@ -511,6 +527,16 @@
"rejected": "rejected"
}
},
"coig_p": {
"hf_hub_url": "m-a-p/COIG-P",
"ranking": true,
"formatting": "sharegpt",
"columns": {
"messages": "conversations",
"chosen": "chosen",
"rejected": "rejected"
}
},
"rlhf_v": {
"hf_hub_url": "llamafactory/RLHF-V",
"ranking": true,
@ -533,6 +559,16 @@
"images": "images"
}
},
"rlaif_v": {
"hf_hub_url": "openbmb/RLAIF-V-Dataset",
"ranking": true,
"columns": {
"prompt": "question",
"chosen": "chosen",
"rejected": "rejected",
"images": "image"
}
},
"orca_pairs": {
"hf_hub_url": "Intel/orca_dpo_pairs",
"ranking": true,
@ -606,7 +642,7 @@
}
},
"c4_demo": {
"file_name": "c4_demo.json",
"file_name": "c4_demo.jsonl",
"columns": {
"prompt": "text"
}

View File

@ -1,6 +1,20 @@
# Copyright 2025 the LlamaFactory team.
# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import os
from typing import List
import datasets
@ -50,7 +64,7 @@ class HhRlhfEn(datasets.GeneratorBasedBuilder):
datasets.SplitGenerator(name=datasets.Split.TEST, gen_kwargs={"filepaths": file_path["test"]}),
]
def _generate_examples(self, filepaths: List[str]):
def _generate_examples(self, filepaths: list[str]):
key = 0
for filepath in filepaths:
with open(filepath, encoding="utf-8") as f:

BIN
data/mllm_demo_data/4.mp3 Normal file

Binary file not shown.

BIN
data/mllm_demo_data/4.mp4 Normal file

Binary file not shown.

View File

@ -0,0 +1,56 @@
[
{
"messages": [
{
"content": "<video><audio>What is the video describing?",
"role": "user"
},
{
"content": "A girl who is drawing a picture of a guitar and feel nervous.",
"role": "assistant"
}
],
"videos": [
"mllm_demo_data/4.mp4"
],
"audios": [
"mllm_demo_data/4.mp3"
]
},
{
"messages": [
{
"content": "<video><audio>What does this girl say?",
"role": "user"
},
{
"content": "She says: 'Hello! Take a look at what am I drawing!'",
"role": "assistant"
}
],
"videos": [
"mllm_demo_data/4.mp4"
],
"audios": [
"mllm_demo_data/4.mp3"
]
},
{
"messages": [
{
"content": "<video><audio>What is this girl drawing with?",
"role": "user"
},
{
"content": "She is drawing with an iPad.",
"role": "assistant"
}
],
"videos": [
"mllm_demo_data/4.mp4"
],
"audios": [
"mllm_demo_data/4.mp3"
]
}
]

View File

@ -1,6 +1,20 @@
# Copyright 2025 the LlamaFactory team.
# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import os
from typing import List
import datasets
@ -11,7 +25,7 @@ _DESCRIPTION = "UltraChat: Large-scale, Informative, and Diverse Multi-round Dia
_CITATION = """\
@misc{UltraChat,
author = {Ding, Ning and Chen, Yulin and Xu, Bokai and Hu, Shengding and Qin, Yujia and Liu, Zhiyuan and Sun, Maosong and Zhou, Bowen},
author = {Ding, Ning and Chen, Yulin and Xu, Bokai and Hu, Shengding and others},
title = {UltraChat: A Large-scale Auto-generated Multi-round Dialogue Data},
year = {2023},
publisher = {GitHub},
@ -40,7 +54,7 @@ class UltraChat(datasets.GeneratorBasedBuilder):
file_paths = [dl_manager.download(_BASE_DATA_URL.format(idx=idx)) for idx in range(10)] # multiple shards
return [datasets.SplitGenerator(name=datasets.Split.TRAIN, gen_kwargs={"filepaths": file_paths})]
def _generate_examples(self, filepaths: List[str]):
def _generate_examples(self, filepaths: list[str]):
for filepath in filepaths:
with open(filepath, encoding="utf-8") as f:
for row in f:
@ -49,7 +63,7 @@ class UltraChat(datasets.GeneratorBasedBuilder):
except Exception:
continue
key: int = data["id"]
content: List[str] = data["data"]
content: list[str] = data["data"]
if len(content) % 2 == 1:
content.pop(-1)
if len(content) < 2:

View File

@ -1,101 +1,66 @@
# Default use the NVIDIA official image with PyTorch 2.3.0
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/index.html
ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:24.02-py3
# https://hub.docker.com/r/hiyouga/pytorch/tags
ARG BASE_IMAGE=hiyouga/pytorch:th2.6.0-cu124-flashattn2.7.4-cxx11abi0-devel
FROM ${BASE_IMAGE}
# Installation arguments
ARG PIP_INDEX=https://pypi.org/simple
ARG EXTRAS=metrics
ARG INSTALL_FLASHATTN=false
ARG HTTP_PROXY=""
# Define environments
ENV MAX_JOBS=4
ENV MAX_JOBS=16
ENV FLASH_ATTENTION_FORCE_BUILD=TRUE
ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
ENV DEBIAN_FRONTEND=noninteractive
ENV NODE_OPTIONS=""
ENV PIP_ROOT_USER_ACTION=ignore
ENV http_proxy="${HTTP_PROXY}"
ENV https_proxy="${HTTP_PROXY}"
# Define installation arguments
ARG INSTALL_BNB=false
ARG INSTALL_VLLM=false
ARG INSTALL_DEEPSPEED=false
ARG INSTALL_FLASHATTN=false
ARG INSTALL_LIGER_KERNEL=false
ARG INSTALL_HQQ=false
ARG INSTALL_EETQ=false
ARG PIP_INDEX=https://pypi.org/simple
ARG HTTP_PROXY=
# Use Bash instead of default /bin/sh
SHELL ["/bin/bash", "-c"]
# Set the working directory
WORKDIR /app
# Set http proxy
RUN if [ -n "$HTTP_PROXY" ]; then \
echo "Configuring proxy..."; \
export http_proxy=$HTTP_PROXY; \
export https_proxy=$HTTP_PROXY; \
fi
# Change pip source
RUN pip config set global.index-url "${PIP_INDEX}" && \
pip config set global.extra-index-url "${PIP_INDEX}" && \
pip install --no-cache-dir --upgrade pip packaging wheel setuptools
# Install the requirements
COPY requirements.txt /app
RUN pip config set global.index-url "$PIP_INDEX" && \
pip config set global.extra-index-url "$PIP_INDEX" && \
python -m pip install --upgrade pip && \
if [ -n "$HTTP_PROXY" ]; then \
python -m pip install --proxy=$HTTP_PROXY -r requirements.txt; \
else \
python -m pip install -r requirements.txt; \
fi
RUN pip install --no-cache-dir -r requirements.txt
# Copy the rest of the application into the image
COPY . /app
# Install the LLaMA Factory
RUN EXTRA_PACKAGES="metrics"; \
if [ "$INSTALL_BNB" == "true" ]; then \
EXTRA_PACKAGES="${EXTRA_PACKAGES},bitsandbytes"; \
fi; \
if [ "$INSTALL_VLLM" == "true" ]; then \
EXTRA_PACKAGES="${EXTRA_PACKAGES},vllm"; \
fi; \
if [ "$INSTALL_DEEPSPEED" == "true" ]; then \
EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \
fi; \
if [ "$INSTALL_LIGER_KERNEL" == "true" ]; then \
EXTRA_PACKAGES="${EXTRA_PACKAGES},liger-kernel"; \
fi; \
if [ "$INSTALL_HQQ" == "true" ]; then \
EXTRA_PACKAGES="${EXTRA_PACKAGES},hqq"; \
fi; \
if [ "$INSTALL_EETQ" == "true" ]; then \
EXTRA_PACKAGES="${EXTRA_PACKAGES},eetq"; \
fi; \
if [ -n "$HTTP_PROXY" ]; then \
pip install --proxy=$HTTP_PROXY -e ".[$EXTRA_PACKAGES]"; \
else \
pip install -e ".[$EXTRA_PACKAGES]"; \
fi
# Install LLaMA Factory
RUN pip install --no-cache-dir -e ".[${EXTRAS}]" --no-build-isolation
# Rebuild flash attention
RUN pip uninstall -y transformer-engine flash-attn && \
if [ "$INSTALL_FLASHATTN" == "true" ]; then \
RUN if [ "${INSTALL_FLASHATTN}" == "true" ]; then \
pip uninstall -y ninja && \
if [ -n "$HTTP_PROXY" ]; then \
pip install --proxy=$HTTP_PROXY ninja && \
pip install --proxy=$HTTP_PROXY --no-cache-dir flash-attn --no-build-isolation; \
else \
pip install ninja && \
pip install --no-cache-dir flash-attn --no-build-isolation; \
fi; \
fi
# Unset http proxy
RUN if [ -n "$HTTP_PROXY" ]; then \
unset http_proxy; \
unset https_proxy; \
pip install --no-cache-dir ninja && \
pip install --no-cache-dir flash-attn --no-build-isolation; \
fi
# Set up volumes
VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/app/data", "/app/output" ]
# VOLUME [ "/root/.cache/huggingface", "/app/shared_data", "/app/output" ]
# Expose port 7860 for the LLaMA Board
ENV GRADIO_SERVER_PORT 7860
# Expose port 7860 for LLaMA Board
ENV GRADIO_SERVER_PORT=7860
EXPOSE 7860
# Expose port 8000 for the API service
ENV API_PORT 8000
# Expose port 8000 for API service
ENV API_PORT=8000
EXPOSE 8000
# unset proxy
ENV http_proxy=
ENV https_proxy=
# Reset pip config
RUN pip config unset global.index-url && \
pip config unset global.extra-index-url

View File

@ -0,0 +1,55 @@
# Start from the pytorch official image (ubuntu-22.04 + cuda-12.4.1 + python-3.11)
# https://hub.docker.com/r/pytorch/pytorch/tags
FROM pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel
# Define environments
ENV MAX_JOBS=16
ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
ENV DEBIAN_FRONTEND=noninteractive
ENV NODE_OPTIONS=""
ENV PIP_ROOT_USER_ACTION=ignore
# Define installation arguments
ARG APT_SOURCE=https://mirrors.tuna.tsinghua.edu.cn/ubuntu/
ARG PIP_INDEX=https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
# Set apt source
RUN cp /etc/apt/sources.list /etc/apt/sources.list.bak && \
{ \
echo "deb ${APT_SOURCE} jammy main restricted universe multiverse"; \
echo "deb ${APT_SOURCE} jammy-updates main restricted universe multiverse"; \
echo "deb ${APT_SOURCE} jammy-backports main restricted universe multiverse"; \
echo "deb ${APT_SOURCE} jammy-security main restricted universe multiverse"; \
} > /etc/apt/sources.list
# Install systemctl and wget
RUN apt-get update && \
apt-get install -y -o Dpkg::Options::="--force-confdef" systemd wget && \
apt-get clean
# Install git and vim
RUN apt-get update && \
apt-get install -y git vim && \
apt-get clean
# Install gcc and g++
RUN apt-get update && \
apt-get install -y gcc g++ && \
apt-get clean
# Change pip source
RUN pip config set global.index-url "${PIP_INDEX}" && \
pip config set global.extra-index-url "${PIP_INDEX}" && \
pip install --no-cache-dir --upgrade pip packaging wheel setuptools
# Install flash-attn-2.7.4.post1 (cxx11abi=False)
RUN wget -nv https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp311-cp311-linux_x86_64.whl && \
pip install --no-cache-dir flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp311-cp311-linux_x86_64.whl
# Install flashinfer-0.2.2.post1+cu124 (cxx11abi=False)
RUN wget -nv https://github.com/flashinfer-ai/flashinfer/releases/download/v0.2.2.post1/flashinfer_python-0.2.2.post1+cu124torch2.6-cp38-abi3-linux_x86_64.whl && \
pip install --no-cache-dir flashinfer_python-0.2.2.post1+cu124torch2.6-cp38-abi3-linux_x86_64.whl
# Reset pip config
RUN pip config unset global.index-url && \
pip config unset global.extra-index-url

View File

@ -0,0 +1,111 @@
# Docker Setup for NVIDIA GPUs
This directory contains Docker configuration files for running LLaMA Factory with NVIDIA GPU support.
## Prerequisites
### Linux-specific Requirements
Before running the Docker container with GPU support, you need to install the following packages:
1. **Docker**: The container runtime
```bash
# Ubuntu/Debian
sudo apt-get update
sudo apt-get install docker.io
# Or install Docker Engine from the official repository:
# https://docs.docker.com/engine/install/
```
2. **Docker Compose** (if using the docker-compose method):
```bash
# Ubuntu/Debian
sudo apt-get install docker-compose
# Or install the latest version:
# https://docs.docker.com/compose/install/
```
3. **NVIDIA Container Toolkit** (required for GPU support):
```bash
# Add the NVIDIA GPG key and repository
distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -
curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list
# Install nvidia-container-toolkit
sudo apt-get update
sudo apt-get install -y nvidia-container-toolkit
# Restart Docker to apply changes
sudo systemctl restart docker
```
**Note**: Without `nvidia-container-toolkit`, the Docker container will not be able to access your NVIDIA GPU.
### Verify GPU Access
After installation, verify that Docker can access your GPU:
```bash
sudo docker run --rm --gpus all nvidia/cuda:12.4.0-base-ubuntu22.04 nvidia-smi
```
If successful, you should see your GPU information displayed.
## Usage
### Using Docker Compose (Recommended)
```bash
cd docker/docker-cuda/
docker compose up -d
docker compose exec llamafactory bash
```
### Using Docker Run
```bash
# Build the image
docker build -f ./docker/docker-cuda/Dockerfile \
--build-arg PIP_INDEX=https://pypi.org/simple \
--build-arg EXTRAS=metrics \
-t llamafactory:latest .
# Run the container
docker run -dit --ipc=host --gpus=all \
-p 7860:7860 \
-p 8000:8000 \
--name llamafactory \
llamafactory:latest
# Enter the container
docker exec -it llamafactory bash
```
## Troubleshooting
### GPU Not Detected
If your GPU is not detected inside the container:
1. Ensure `nvidia-container-toolkit` is installed
2. Check that the Docker daemon has been restarted after installation
3. Verify your NVIDIA drivers are properly installed: `nvidia-smi`
4. Check Docker GPU support: `docker run --rm --gpus all ubuntu nvidia-smi`
### Permission Denied
If you get permission errors, ensure your user is in the docker group:
```bash
sudo usermod -aG docker $USER
# Log out and back in for changes to take effect
```
## Additional Notes
- The default image is built on Ubuntu 22.04 (x86_64), CUDA 12.4, Python 3.11, PyTorch 2.6.0, and Flash-attn 2.7.4
- For different CUDA versions, you may need to adjust the base image in the Dockerfile
- Make sure your NVIDIA driver version is compatible with the CUDA version used in the Docker image

View File

@ -4,27 +4,15 @@ services:
dockerfile: ./docker/docker-cuda/Dockerfile
context: ../..
args:
INSTALL_BNB: "false"
INSTALL_VLLM: "false"
INSTALL_DEEPSPEED: "false"
INSTALL_FLASHATTN: "false"
INSTALL_LIGER_KERNEL: "false"
INSTALL_HQQ: "false"
INSTALL_EETQ: "false"
PIP_INDEX: https://pypi.org/simple
EXTRAS: metrics
container_name: llamafactory
volumes:
- ../../hf_cache:/root/.cache/huggingface
- ../../ms_cache:/root/.cache/modelscope
- ../../om_cache:/root/.cache/openmind
- ../../data:/app/data
- ../../output:/app/output
ports:
- "7860:7860"
- "8000:8000"
ipc: host
tty: true
shm_size: "16gb"
# shm_size: "16gb" # ipc: host is set
stdin_open: true
command: bash
deploy:
@ -33,5 +21,5 @@ services:
devices:
- driver: nvidia
count: "all"
capabilities: [gpu]
capabilities: [ gpu ]
restart: unless-stopped

View File

@ -1,67 +1,63 @@
# Use the Ubuntu 22.04 image with CANN 8.0.rc1
# More versions can be found at https://hub.docker.com/r/ascendai/cann/tags
# FROM ascendai/cann:8.0.rc1-910-ubuntu22.04-py3.8
FROM ascendai/cann:8.0.0-910b-ubuntu22.04-py3.10
# FROM ascendai/cann:8.0.rc1-910-openeuler22.03-py3.8
# FROM ascendai/cann:8.0.rc1-910b-openeuler22.03-py3.8
# https://hub.docker.com/r/ascendai/cann/tags
ARG BASE_IMAGE=ascendai/cann:8.1.rc1-910b-ubuntu22.04-py3.11
FROM ${BASE_IMAGE}
# Installation arguments
ARG PIP_INDEX=https://pypi.org/simple
ARG EXTRAS=torch-npu,metrics
ARG HTTP_PROXY=""
ARG PYTORCH_INDEX=https://download.pytorch.org/whl/cpu
# Define environments
ENV MAX_JOBS=16
ENV FLASH_ATTENTION_FORCE_BUILD=TRUE
ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
ENV DEBIAN_FRONTEND=noninteractive
ENV NODE_OPTIONS=""
ENV PIP_ROOT_USER_ACTION=ignore
ENV http_proxy="${HTTP_PROXY}"
ENV https_proxy="${HTTP_PROXY}"
# Define installation arguments
ARG INSTALL_DEEPSPEED=false
ARG PIP_INDEX=https://pypi.org/simple
ARG TORCH_INDEX=https://download.pytorch.org/whl/cpu
ARG HTTP_PROXY=
# Use Bash instead of default /bin/sh
SHELL ["/bin/bash", "-c"]
# Set the working directory
WORKDIR /app
# Set http proxy
RUN if [ -n "$HTTP_PROXY" ]; then \
echo "Configuring proxy..."; \
export http_proxy=$HTTP_PROXY; \
export https_proxy=$HTTP_PROXY; \
fi
# Change pip source
RUN pip config set global.index-url "${PIP_INDEX}" && \
pip config set global.extra-index-url "${PIP_INDEX}" && \
pip install --no-cache-dir --upgrade pip packaging wheel setuptools
# Install torch-npu
RUN pip uninstall -y torch torchvision torchaudio && \
pip install --no-cache-dir "torch-npu==2.5.1" "torchvision==0.20.1" --index-url "${PYTORCH_INDEX}"
# Install the requirements
COPY requirements.txt /app
RUN pip config set global.index-url "$PIP_INDEX" && \
pip config set global.extra-index-url "$TORCH_INDEX" && \
python -m pip install --upgrade pip && \
if [ -n "$HTTP_PROXY" ]; then \
python -m pip install --proxy=$HTTP_PROXY -r requirements.txt; \
else \
python -m pip install -r requirements.txt; \
fi
RUN pip install --no-cache-dir -r requirements.txt
# Copy the rest of the application into the image
COPY . /app
# Install the LLaMA Factory
RUN EXTRA_PACKAGES="torch-npu,metrics"; \
if [ "$INSTALL_DEEPSPEED" == "true" ]; then \
EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \
fi; \
if [ -n "$HTTP_PROXY" ]; then \
pip install --proxy=$HTTP_PROXY -e ".[$EXTRA_PACKAGES]"; \
else \
pip install -e ".[$EXTRA_PACKAGES]"; \
fi
# Unset http proxy
RUN if [ -n "$HTTP_PROXY" ]; then \
unset http_proxy; \
unset https_proxy; \
fi
# Install LLaMA Factory
RUN pip install --no-cache-dir -e ".[${EXTRAS}]" --no-build-isolation
# Set up volumes
VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/app/data", "/app/output" ]
# VOLUME [ "/root/.cache/huggingface", "/app/shared_data", "/app/output" ]
# Expose port 7860 for the LLaMA Board
ENV GRADIO_SERVER_PORT 7860
# Expose port 7860 for LLaMA Board
ENV GRADIO_SERVER_PORT=7860
EXPOSE 7860
# Expose port 8000 for the API service
ENV API_PORT 8000
# Expose port 8000 for API service
ENV API_PORT=8000
EXPOSE 8000
# unset proxy
ENV http_proxy=
ENV https_proxy=
# Reset pip config
RUN pip config unset global.index-url && \
pip config unset global.extra-index-url

View File

@ -4,15 +4,10 @@ services:
dockerfile: ./docker/docker-npu/Dockerfile
context: ../..
args:
INSTALL_DEEPSPEED: "false"
PIP_INDEX: https://pypi.org/simple
EXTRAS: torch-npu,metrics
container_name: llamafactory
volumes:
- ../../hf_cache:/root/.cache/huggingface
- ../../ms_cache:/root/.cache/modelscope
- ../../om_cache:/root/.cache/openmind
- ../../data:/app/data
- ../../output:/app/output
- /usr/local/dcmi:/usr/local/dcmi
- /usr/local/bin/npu-smi:/usr/local/bin/npu-smi
- /usr/local/Ascend/driver:/usr/local/Ascend/driver
@ -22,7 +17,7 @@ services:
- "8000:8000"
ipc: host
tty: true
shm_size: "16gb"
# shm_size: "16gb" # ipc: host is set
stdin_open: true
command: bash
devices:

View File

@ -1,93 +1,71 @@
FROM hardandheavy/transformers-rocm:2.2.0
# https://hub.docker.com/r/rocm/pytorch/tags
ARG BASE_IMAGE=rocm/pytorch:rocm6.4.1_ubuntu22.04_py3.10_pytorch_release_2.6.0
FROM ${BASE_IMAGE}
# Installation arguments
ARG PIP_INDEX=https://pypi.org/simple
ARG EXTRAS=metrics
ARG INSTALL_FLASHATTN=false
ARG HTTP_PROXY=""
ARG PYTORCH_INDEX=https://download.pytorch.org/whl/rocm6.3
# Define environments
ENV MAX_JOBS=4
ENV MAX_JOBS=16
ENV FLASH_ATTENTION_FORCE_BUILD=TRUE
ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
ENV DEBIAN_FRONTEND=noninteractive
ENV NODE_OPTIONS=""
ENV PIP_ROOT_USER_ACTION=ignore
ENV http_proxy="${HTTP_PROXY}"
ENV https_proxy="${HTTP_PROXY}"
# Define installation arguments
ARG INSTALL_BNB=false
ARG INSTALL_VLLM=false
ARG INSTALL_DEEPSPEED=false
ARG INSTALL_FLASHATTN=false
ARG INSTALL_LIGER_KERNEL=false
ARG INSTALL_HQQ=false
ARG PIP_INDEX=https://pypi.org/simple
ARG HTTP_PROXY=
# Use Bash instead of default /bin/sh
SHELL ["/bin/bash", "-c"]
# Set the working directory
WORKDIR /app
# Set http proxy
RUN if [ -n "$HTTP_PROXY" ]; then \
echo "Configuring proxy..."; \
export http_proxy=$HTTP_PROXY; \
export https_proxy=$HTTP_PROXY; \
fi
# Change pip source
RUN pip config set global.index-url "${PIP_INDEX}" && \
pip config set global.extra-index-url "${PIP_INDEX}" && \
pip install --no-cache-dir --upgrade pip packaging wheel setuptools
# Reinstall pytorch rocm
RUN pip uninstall -y torch torchvision torchaudio && \
pip install --no-cache-dir --pre torch torchvision torchaudio --index-url "${PYTORCH_INDEX}"
# Install the requirements
COPY requirements.txt /app
RUN pip config set global.index-url "$PIP_INDEX" && \
pip config set global.extra-index-url "$PIP_INDEX" && \
python -m pip install --upgrade pip && \
if [ -n "$HTTP_PROXY" ]; then \
python -m pip install --proxy=$HTTP_PROXY -r requirements.txt; \
else \
python -m pip install -r requirements.txt; \
fi
RUN pip install --no-cache-dir -r requirements.txt
# Copy the rest of the application into the image
COPY . /app
# Install the LLaMA Factory
RUN EXTRA_PACKAGES="metrics"; \
if [ "$INSTALL_BNB" == "true" ]; then \
EXTRA_PACKAGES="${EXTRA_PACKAGES},bitsandbytes"; \
fi; \
if [ "$INSTALL_VLLM" == "true" ]; then \
EXTRA_PACKAGES="${EXTRA_PACKAGES},vllm"; \
fi; \
if [ "$INSTALL_DEEPSPEED" == "true" ]; then \
EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \
fi; \
if [ "$INSTALL_LIGER_KERNEL" == "true" ]; then \
EXTRA_PACKAGES="${EXTRA_PACKAGES},liger-kernel"; \
fi; \
if [ "$INSTALL_HQQ" == "true" ]; then \
EXTRA_PACKAGES="${EXTRA_PACKAGES},hqq"; \
fi; \
if [ -n "$HTTP_PROXY" ]; then \
pip install --proxy=$HTTP_PROXY -e ".[$EXTRA_PACKAGES]"; \
else \
pip install -e ".[$EXTRA_PACKAGES]"; \
fi
# Install LLaMA Factory
RUN pip install --no-cache-dir -e ".[${EXTRAS}]" --no-build-isolation
# Rebuild flash attention
RUN pip uninstall -y transformer-engine flash-attn && \
if [ "$INSTALL_FLASHATTN" == "true" ]; then \
RUN if [ "${INSTALL_FLASHATTN}" == "true" ]; then \
pip uninstall -y ninja && \
if [ -n "$HTTP_PROXY" ]; then \
pip install --proxy=$HTTP_PROXY ninja && \
pip install --proxy=$HTTP_PROXY --no-cache-dir flash-attn --no-build-isolation; \
else \
pip install ninja && \
pip install --no-cache-dir flash-attn --no-build-isolation; \
fi; \
fi
# Unset http proxy
RUN if [ -n "$HTTP_PROXY" ]; then \
unset http_proxy; \
unset https_proxy; \
pip install --no-cache-dir ninja && \
pip install --no-cache-dir flash-attn --no-build-isolation; \
fi
# Set up volumes
VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/app/data", "/app/output" ]
# VOLUME [ "/root/.cache/huggingface", "/app/shared_data", "/app/output" ]
# Expose port 7860 for the LLaMA Board
ENV GRADIO_SERVER_PORT 7860
# Expose port 7860 for LLaMA Board
ENV GRADIO_SERVER_PORT=7860
EXPOSE 7860
# Expose port 8000 for the API service
ENV API_PORT 8000
# Expose port 8000 for API service
ENV API_PORT=8000
EXPOSE 8000
# unset proxy
ENV http_proxy=
ENV https_proxy=
# Reset pip config
RUN pip config unset global.index-url && \
pip config unset global.extra-index-url

View File

@ -4,27 +4,15 @@ services:
dockerfile: ./docker/docker-rocm/Dockerfile
context: ../..
args:
INSTALL_BNB: "false"
INSTALL_VLLM: "false"
INSTALL_DEEPSPEED: "false"
INSTALL_FLASHATTN: "false"
INSTALL_LIGER_KERNEL: "false"
INSTALL_HQQ: "false"
PIP_INDEX: https://pypi.org/simple
EXTRAS: metrics
container_name: llamafactory
volumes:
- ../../hf_cache:/root/.cache/huggingface
- ../../ms_cache:/root/.cache/modelscope
- ../../om_cache:/root/.cache/openmind
- ../../data:/app/data
- ../../output:/app/output
- ../../saves:/app/saves
ports:
- "7860:7860"
- "8000:8000"
ipc: host
tty: true
shm_size: "16gb"
# shm_size: "16gb" # ipc: host is set
stdin_open: true
command: bash
devices:

View File

@ -1,3 +1,4 @@
# Copyright 2025 the LlamaFactory team.
# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
#
# Licensed under the Apache License, Version 2.0 (the "License");
@ -21,14 +22,15 @@ import pandas as pd
_CITATION = """\
@article{huang2023ceval,
title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models},
author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},
author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and others},
journal={arXiv preprint arXiv:2305.08322},
year={2023}
}
"""
_DESCRIPTION = """\
C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.
C-Eval is a comprehensive Chinese evaluation suite for foundation models.
It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.
"""
_HOMEPAGE = "https://cevalbenchmark.com"

View File

@ -1,3 +1,4 @@
# Copyright 2025 the LlamaFactory team.
# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
#
# Licensed under the Apache License, Version 2.0 (the "License");
@ -21,14 +22,15 @@ import pandas as pd
_CITATION = """\
@article{li2023cmmlu,
title={CMMLU: Measuring massive multitask language understanding in Chinese},
author={Haonan Li and Yixuan Zhang and Fajri Koto and Yifei Yang and Hai Zhao and Yeyun Gong and Nan Duan and Timothy Baldwin},
author={Haonan Li and Yixuan Zhang and Fajri Koto and Yifei Yang and others,
journal={arXiv preprint arXiv:2306.09212},
year={2023}
}
"""
_DESCRIPTION = """\
CMMLU is a comprehensive Chinese assessment suite specifically designed to evaluate the advanced knowledge and reasoning abilities of LLMs within the Chinese language and cultural context.
CMMLU is a comprehensive Chinese assessment suite specifically designed to evaluate the advanced knowledge
and reasoning abilities of LLMs within the Chinese language and cultural context.
"""
_HOMEPAGE = "https://github.com/haonan-li/CMMLU"

View File

@ -1,3 +1,4 @@
# Copyright 2025 the LlamaFactory team.
# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
#
# Licensed under the Apache License, Version 2.0 (the "License");
@ -21,14 +22,15 @@ import pandas as pd
_CITATION = """\
@article{hendryckstest2021,
title={Measuring Massive Multitask Language Understanding},
author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},
author={Dan Hendrycks and Collin Burns and others},
journal={Proceedings of the International Conference on Learning Representations (ICLR)},
year={2021}
}
"""
_DESCRIPTION = """\
Measuring Massive Multitask Language Understanding by Dan Hendrycks, Collin Burns, Steven Basart, Andy Zou, Mantas Mazeika, Dawn Song, and Jacob Steinhardt (ICLR 2021).
Measuring Massive Multitask Language Understanding by Dan Hendrycks, Collin Burns, Steven Basart,
Andy Zou, Mantas Mazeika, Dawn Song, and Jacob Steinhardt (ICLR 2021).
"""
_HOMEPAGE = "https://github.com/hendrycks/test"

View File

@ -15,6 +15,24 @@ Use `CUDA_VISIBLE_DEVICES` (GPU) or `ASCEND_RT_VISIBLE_DEVICES` (NPU) to choose
By default, LLaMA-Factory uses all visible computing devices.
Basic usage:
```bash
llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
```
Advanced usage:
```bash
CUDA_VISIBLE_DEVICES=0,1 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml \
learning_rate=1e-5 \
logging_steps=1
```
```bash
bash examples/train_lora/llama3_lora_sft.sh
```
## Examples
### LoRA Fine-Tuning
@ -34,8 +52,7 @@ llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
#### Multimodal Supervised Fine-Tuning
```bash
llamafactory-cli train examples/train_lora/llava1_5_lora_sft.yaml
llamafactory-cli train examples/train_lora/qwen2vl_lora_sft.yaml
llamafactory-cli train examples/train_lora/qwen2_5vl_lora_sft.yaml
```
#### DPO/ORPO/SimPO Training
@ -47,7 +64,7 @@ llamafactory-cli train examples/train_lora/llama3_lora_dpo.yaml
#### Multimodal DPO/ORPO/SimPO Training
```bash
llamafactory-cli train examples/train_lora/qwen2vl_lora_dpo.yaml
llamafactory-cli train examples/train_lora/qwen2_5vl_lora_dpo.yaml
```
#### Reward Modeling
@ -148,10 +165,18 @@ FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500
FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft.yaml
```
### Elastic and Fault-Tolerant Supervised Fine-Tuning on Multiple Nodes
To launch an elastic job with `MAX_RESTARTS` failures retries, run the following on at least `MIN_NNODES` nodes and at most `MAX_NNODES` nodes. `RDZV_ID` should be set as a unique job id (shared by all nodes participating in the job). See also [torchrun](https://docs.pytorch.org/docs/stable/elastic/run.html).
```bash
FORCE_TORCHRUN=1 MIN_NNODES=1 MAX_NNODES=3 MAX_RESTARTS=3 RDZV_ID=llamafactory MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft.yaml
```
#### Multimodal Supervised Fine-Tuning
```bash
FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/qwen2vl_full_sft.yaml
FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/qwen2_5vl_full_sft.yaml
```
### Merging LoRA Adapters and Quantization
@ -178,10 +203,11 @@ llamafactory-cli export examples/merge_lora/llama3_full_sft.yaml
### Inferring LoRA Fine-Tuned Models
#### Batch Generation using vLLM Tensor Parallel
#### Evaluation using vLLM's Multi-GPU Inference
```
python scripts/vllm_infer.py --model_name_or_path path_to_merged_model --dataset alpaca_en_demo
python scripts/vllm_infer.py --model_name_or_path meta-llama/Meta-Llama-3-8B-Instruct --template llama3 --dataset alpaca_en_demo
python scripts/eval_bleu_rouge.py generated_predictions.jsonl
```
#### Use CLI ChatBox
@ -228,6 +254,12 @@ llamafactory-cli train examples/extras/badam/llama3_full_sft.yaml
llamafactory-cli train examples/extras/adam_mini/qwen2_full_sft.yaml
```
#### Full-Parameter Fine-Tuning using Muon
```bash
llamafactory-cli train examples/extras/muon/qwen2_full_sft.yaml
```
#### LoRA+ Fine-Tuning
```bash
@ -258,9 +290,3 @@ llamafactory-cli train examples/extras/llama_pro/llama3_freeze_sft.yaml
```bash
bash examples/extras/fsdp_qlora/train.sh
```
#### Computing BLEU and ROUGE Scores
```bash
llamafactory-cli train examples/extras/nlg_eval/llama3_lora_predict.yaml
```

View File

@ -15,6 +15,24 @@
LLaMA-Factory 默认使用所有可见的计算设备。
基础用法:
```bash
llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
```
高级用法:
```bash
CUDA_VISIBLE_DEVICES=0,1 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml \
learning_rate=1e-5 \
logging_steps=1
```
```bash
bash examples/train_lora/llama3_lora_sft.sh
```
## 示例
### LoRA 微调
@ -34,8 +52,7 @@ llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
#### 多模态指令监督微调
```bash
llamafactory-cli train examples/train_lora/llava1_5_lora_sft.yaml
llamafactory-cli train examples/train_lora/qwen2vl_lora_sft.yaml
llamafactory-cli train examples/train_lora/qwen2_5vl_lora_sft.yaml
```
#### DPO/ORPO/SimPO 训练
@ -47,7 +64,7 @@ llamafactory-cli train examples/train_lora/llama3_lora_dpo.yaml
#### 多模态 DPO/ORPO/SimPO 训练
```bash
llamafactory-cli train examples/train_lora/qwen2vl_lora_dpo.yaml
llamafactory-cli train examples/train_lora/qwen2_5vl_lora_dpo.yaml
```
#### 奖励模型训练
@ -89,6 +106,14 @@ FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500
FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
```
### 支持弹性和容错的多机指令监督微调
要启动一个支持弹性节点和容错的多机指令微调,在每个节点上执行以下命令。弹性节点数量范围为 `MIN_NNODES:MAX_NNODES`,每个节点最多允许因为错误重启 `MAX_RESTARTS` 次。`RDZV_ID` 应设置为一个唯一的作业 ID由参与该作业的所有节点共享。更多新可以参考官方文档 [torchrun](https://docs.pytorch.org/docs/stable/elastic/run.html)。
```bash
FORCE_TORCHRUN=1 MIN_NNODES=1 MAX_NNODES=3 MAX_RESTARTS=3 RDZV_ID=llamafactory MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft.yaml
```
#### 使用 DeepSpeed ZeRO-3 平均分配显存
```bash
@ -151,7 +176,7 @@ FORCE_TORCHRUN=1 NNODES=2 NODE_RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500
#### 多模态指令监督微调
```bash
FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/qwen2vl_full_sft.yaml
FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/qwen2_5vl_full_sft.yaml
```
### 合并 LoRA 适配器与模型量化
@ -178,10 +203,11 @@ llamafactory-cli export examples/merge_lora/llama3_full_sft.yaml
### 推理 LoRA 模型
#### 使用 vLLM+TP 批量推理
#### 使用 vLLM 多卡推理评估
```
python scripts/vllm_infer.py --model_name_or_path path_to_merged_model --dataset alpaca_en_demo
python scripts/vllm_infer.py --model_name_or_path meta-llama/Meta-Llama-3-8B-Instruct --template llama3 --dataset alpaca_en_demo
python scripts/eval_bleu_rouge.py generated_predictions.jsonl
```
#### 使用命令行对话框
@ -228,6 +254,12 @@ llamafactory-cli train examples/extras/badam/llama3_full_sft.yaml
llamafactory-cli train examples/extras/adam_mini/qwen2_full_sft.yaml
```
#### 使用 Muon 进行全参数训练
```bash
llamafactory-cli train examples/extras/muon/qwen2_full_sft.yaml
```
#### LoRA+ 微调
```bash
@ -258,9 +290,3 @@ llamafactory-cli train examples/extras/llama_pro/llama3_freeze_sft.yaml
```bash
bash examples/extras/fsdp_qlora/train.sh
```
#### 计算 BLEU 和 ROUGE 分数
```bash
llamafactory-cli train examples/extras/nlg_eval/llama3_lora_predict.yaml
```

View File

@ -7,16 +7,16 @@ fsdp_config:
fsdp_backward_prefetch: BACKWARD_PRE
fsdp_forward_prefetch: false
fsdp_cpu_ram_efficient_loading: true
fsdp_offload_params: true # offload may affect training speed
fsdp_offload_params: false
fsdp_sharding_strategy: FULL_SHARD
fsdp_state_dict_type: FULL_STATE_DICT
fsdp_sync_module_states: true
fsdp_use_orig_params: true
machine_rank: 0
main_training_function: main
mixed_precision: bf16 # or fp16
num_machines: 1 # the number of nodes
num_processes: 2 # the number of GPUs in all nodes
mixed_precision: bf16 # or fp16
num_machines: 1 # the number of nodes
num_processes: 2 # the number of GPUs in all nodes
rdzv_backend: static
same_network: true
tpu_env: []

View File

@ -0,0 +1,25 @@
compute_environment: LOCAL_MACHINE
debug: false
distributed_type: FSDP
downcast_bf16: 'no'
fsdp_config:
fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP
fsdp_backward_prefetch: BACKWARD_PRE
fsdp_forward_prefetch: false
fsdp_cpu_ram_efficient_loading: true
fsdp_offload_params: true # offload may affect training speed
fsdp_sharding_strategy: FULL_SHARD
fsdp_state_dict_type: FULL_STATE_DICT
fsdp_sync_module_states: true
fsdp_use_orig_params: true
machine_rank: 0
main_training_function: main
mixed_precision: bf16 # or fp16
num_machines: 1 # the number of nodes
num_processes: 2 # the number of GPUs in all nodes
rdzv_backend: static
same_network: true
tpu_env: []
tpu_use_cluster: false
tpu_use_sudo: false
use_cpu: false

View File

@ -15,6 +15,7 @@ cutoff_len: 2048
max_samples: 1000
overwrite_cache: true
preprocessing_num_workers: 16
dataloader_num_workers: 4
### output
output_dir: saves/qwen2-1_5b/full/sft
@ -22,6 +23,8 @@ logging_steps: 10
save_steps: 500
plot_loss: true
overwrite_output_dir: true
save_only_model: false
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
### train
per_device_train_batch_size: 1

View File

@ -20,6 +20,7 @@ cutoff_len: 2048
max_samples: 1000
overwrite_cache: true
preprocessing_num_workers: 16
dataloader_num_workers: 4
### output
output_dir: saves/llama3-8b/full/sft
@ -27,6 +28,8 @@ logging_steps: 10
save_steps: 500
plot_loss: true
overwrite_output_dir: true
save_only_model: false
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
### train
per_device_train_batch_size: 1

View File

@ -20,6 +20,7 @@ cutoff_len: 2048
max_samples: 1000
overwrite_cache: true
preprocessing_num_workers: 16
dataloader_num_workers: 4
### output
output_dir: saves/llama3-8b/full/sft
@ -27,6 +28,8 @@ logging_steps: 10
save_steps: 500
plot_loss: true
overwrite_output_dir: true
save_only_model: false
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
### train
per_device_train_batch_size: 1

View File

@ -17,6 +17,7 @@ cutoff_len: 2048
max_samples: 1000
overwrite_cache: true
preprocessing_num_workers: 16
dataloader_num_workers: 4
### output
output_dir: saves/llama3-8b/lora/sft
@ -24,6 +25,8 @@ logging_steps: 10
save_steps: 500
plot_loss: true
overwrite_output_dir: true
save_only_model: false
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
### train
per_device_train_batch_size: 1

View File

@ -19,6 +19,7 @@ cutoff_len: 2048
max_samples: 1000
overwrite_cache: true
preprocessing_num_workers: 16
dataloader_num_workers: 4
### output
output_dir: saves/llama3-8b/full/sft
@ -26,6 +27,8 @@ logging_steps: 10
save_steps: 500
plot_loss: true
overwrite_output_dir: true
save_only_model: false
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
### train
per_device_train_batch_size: 1

View File

@ -17,6 +17,7 @@ cutoff_len: 2048
max_samples: 1000
overwrite_cache: true
preprocessing_num_workers: 16
dataloader_num_workers: 4
### output
output_dir: saves/llama3-8b-pro/freeze/sft
@ -24,6 +25,8 @@ logging_steps: 10
save_steps: 500
plot_loss: true
overwrite_output_dir: true
save_only_model: false
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
### train
per_device_train_batch_size: 1

View File

@ -17,6 +17,7 @@ cutoff_len: 2048
max_samples: 1000
overwrite_cache: true
preprocessing_num_workers: 16
dataloader_num_workers: 4
### output
output_dir: saves/llama3-8b/lora/sft
@ -24,6 +25,8 @@ logging_steps: 10
save_steps: 500
plot_loss: true
overwrite_output_dir: true
save_only_model: false
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
### train
per_device_train_batch_size: 1

View File

@ -15,6 +15,7 @@ cutoff_len: 2048
max_samples: 1000
overwrite_cache: true
preprocessing_num_workers: 16
dataloader_num_workers: 4
### output
output_dir: saves/llama3-8b-mod/full/sft
@ -22,6 +23,8 @@ logging_steps: 10
save_steps: 500
plot_loss: true
overwrite_output_dir: true
save_only_model: false
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
### train
per_device_train_batch_size: 1

View File

@ -1,17 +1,16 @@
### model
model_name_or_path: llava-hf/llava-1.5-7b-hf
model_name_or_path: Qwen/Qwen2-1.5B-Instruct
trust_remote_code: true
### method
stage: sft
do_train: true
finetuning_type: lora
lora_rank: 8
lora_target: all
finetuning_type: full
use_muon: true
### dataset
dataset: mllm_demo
template: llava
dataset: identity,alpaca_en_demo
template: qwen
cutoff_len: 2048
max_samples: 1000
overwrite_cache: true
@ -19,23 +18,23 @@ preprocessing_num_workers: 16
dataloader_num_workers: 4
### output
output_dir: saves/llava1_5-7b/lora/sft
output_dir: saves/qwen2-1_5b/full/sft
logging_steps: 10
save_steps: 500
plot_loss: true
overwrite_output_dir: true
save_only_model: false
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
### train
per_device_train_batch_size: 1
gradient_accumulation_steps: 8
learning_rate: 1.0e-4
learning_rate: 1.0e-5
num_train_epochs: 3.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
bf16: true
ddp_timeout: 180000000
resume_from_checkpoint: null
### eval
# val_size: 0.1

View File

@ -18,10 +18,12 @@ cutoff_len: 2048
max_samples: 50
overwrite_cache: true
preprocessing_num_workers: 16
dataloader_num_workers: 4
### output
output_dir: saves/llama3-8b/lora/predict
overwrite_output_dir: true
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
### eval
per_device_eval_batch_size: 1

View File

@ -19,6 +19,7 @@ cutoff_len: 2048
max_samples: 1000
overwrite_cache: true
preprocessing_num_workers: 16
dataloader_num_workers: 4
### output
output_dir: saves/llama3-8b/lora/sft
@ -26,6 +27,8 @@ logging_steps: 10
save_steps: 500
plot_loss: true
overwrite_output_dir: true
save_only_model: false
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
### train
per_device_train_batch_size: 1

View File

@ -1,4 +1,4 @@
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
template: llama3
infer_backend: huggingface # choices: [huggingface, vllm]
infer_backend: huggingface # choices: [huggingface, vllm, sglang]
trust_remote_code: true

View File

@ -1,4 +1,4 @@
model_name_or_path: saves/llama3-8b/full/sft
template: llama3
infer_backend: huggingface # choices: [huggingface, vllm]
infer_backend: huggingface # choices: [huggingface, vllm, sglang]
trust_remote_code: true

View File

@ -1,5 +1,5 @@
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
adapter_name_or_path: saves/llama3-8b/lora/sft
template: llama3
infer_backend: huggingface # choices: [huggingface, vllm]
infer_backend: huggingface # choices: [huggingface, vllm, sglang]
trust_remote_code: true

View File

@ -1,5 +0,0 @@
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
template: llama3
infer_backend: vllm
vllm_enforce_eager: true
trust_remote_code: true

View File

@ -1,4 +0,0 @@
model_name_or_path: llava-hf/llava-1.5-7b-hf
template: llava
infer_backend: huggingface # choices: [huggingface, vllm]
trust_remote_code: true

View File

@ -0,0 +1,4 @@
model_name_or_path: Qwen/Qwen2.5-VL-7B-Instruct
template: qwen2_vl
infer_backend: huggingface # choices: [huggingface, vllm, sglang]
trust_remote_code: true

View File

@ -1,4 +0,0 @@
model_name_or_path: Qwen/Qwen2-VL-7B-Instruct
template: qwen2_vl
infer_backend: huggingface # choices: [huggingface, vllm]
trust_remote_code: true

View File

@ -6,5 +6,5 @@ trust_remote_code: true
### export
export_dir: output/llama3_full_sft
export_size: 5
export_device: cpu
export_device: cpu # choices: [cpu, auto]
export_legacy_format: false

View File

@ -6,7 +6,7 @@ trust_remote_code: true
### export
export_dir: output/llama3_gptq
export_quantization_bit: 4
export_quantization_dataset: data/c4_demo.json
export_quantization_dataset: data/c4_demo.jsonl
export_size: 5
export_device: cpu
export_device: cpu # choices: [cpu, auto]
export_legacy_format: false

View File

@ -9,5 +9,5 @@ trust_remote_code: true
### export
export_dir: output/llama3_lora_sft
export_size: 5
export_device: cpu
export_device: cpu # choices: [cpu, auto]
export_legacy_format: false

View File

@ -1,13 +1,13 @@
### Note: DO NOT use quantized model or quantization_bit when merging lora adapters
### model
model_name_or_path: Qwen/Qwen2-VL-7B-Instruct
adapter_name_or_path: saves/qwen2_vl-7b/lora/sft
model_name_or_path: Qwen/Qwen2.5-VL-7B-Instruct
adapter_name_or_path: saves/qwen2_5vl-7b/lora/sft
template: qwen2_vl
trust_remote_code: true
### export
export_dir: output/qwen2_vl_lora_sft
export_dir: output/qwen2_5vl_lora_sft
export_size: 5
export_device: cpu
export_device: cpu # choices: [cpu, auto]
export_legacy_format: false

View File

@ -24,6 +24,7 @@ save_steps: 500
plot_loss: true
overwrite_output_dir: true
save_only_model: false
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
### train
per_device_train_batch_size: 1

View File

@ -0,0 +1,49 @@
### model
model_name_or_path: Qwen/Qwen2.5-VL-7B-Instruct
image_max_pixels: 262144
video_max_pixels: 16384
trust_remote_code: true
### method
stage: sft
do_train: true
finetuning_type: full
freeze_vision_tower: true
freeze_multi_modal_projector: true
freeze_language_model: false
deepspeed: examples/deepspeed/ds_z3_config.json
### dataset
dataset: mllm_demo,identity,alpaca_en_demo
template: qwen2_vl
cutoff_len: 2048
max_samples: 1000
overwrite_cache: true
preprocessing_num_workers: 16
dataloader_num_workers: 4
### output
output_dir: saves/qwen2_5vl-7b/full/sft
logging_steps: 10
save_steps: 500
plot_loss: true
overwrite_output_dir: true
save_only_model: false
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
### train
per_device_train_batch_size: 1
gradient_accumulation_steps: 2
learning_rate: 1.0e-5
num_train_epochs: 3.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
bf16: true
ddp_timeout: 180000000
resume_from_checkpoint: null
### eval
# val_size: 0.1
# per_device_eval_batch_size: 1
# eval_strategy: steps
# eval_steps: 500

View File

@ -27,6 +27,7 @@ save_steps: 500
plot_loss: true
overwrite_output_dir: true
save_only_model: false
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
### train
per_device_train_batch_size: 1

View File

@ -17,6 +17,7 @@ cutoff_len: 2048
max_samples: 1000
overwrite_cache: true
preprocessing_num_workers: 16
dataloader_num_workers: 4
### output
output_dir: saves/llama3-8b/lora/kto
@ -24,6 +25,7 @@ logging_steps: 10
save_steps: 500
plot_loss: true
overwrite_output_dir: true
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
### train
per_device_train_batch_size: 1

View File

@ -17,6 +17,7 @@ cutoff_len: 2048
max_samples: 1000
overwrite_cache: true
preprocessing_num_workers: 16
dataloader_num_workers: 4
### output
output_dir: saves/llama3-8b/lora/ppo
@ -24,6 +25,7 @@ logging_steps: 10
save_steps: 500
plot_loss: true
overwrite_output_dir: true
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
### train
per_device_train_batch_size: 1

View File

@ -24,6 +24,7 @@ save_steps: 500
plot_loss: true
overwrite_output_dir: true
save_only_model: false
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
### train
per_device_train_batch_size: 1

View File

@ -25,6 +25,7 @@ save_steps: 500
plot_loss: true
overwrite_output_dir: true
save_only_model: false
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
### train
per_device_train_batch_size: 1

View File

@ -0,0 +1,36 @@
#!/bin/bash
set -x
MODEL_PATH=meta-llama/Meta-Llama-3-8B-Instruct
llamafactory-cli train \
--model_name_or_path ${MODEL_PATH} \
--trust_remote_code \
--stage sft \
--do_train \
--finetuning_type lora \
--lora_rank 8 \
--lora_target all \
--dataset identity,alpaca_en_demo \
--template llama3 \
--cutoff_len 2048 \
--max_samples 1000 \
--overwrite_cache \
--preprocessing_num_workers 16 \
--dataloader_num_workers 4 \
--output_dir saves/llama3-8b/lora/sft \
--logging_steps 10 \
--save_steps 500 \
--plot_loss \
--overwrite_output_dir \
--save_only_model false \
--report_to none \
--per_device_train_batch_size 1 \
--gradient_accumulation_steps 8 \
--learning_rate 1e-4 \
--num_train_epochs 3.0 \
--lr_scheduler_type cosine \
--warmup_ratio 0.1 \
--bf16 \
--ddp_timeout 180000000

View File

@ -25,6 +25,7 @@ save_steps: 500
plot_loss: true
overwrite_output_dir: true
save_only_model: false
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
### train
per_device_train_batch_size: 1

View File

@ -26,6 +26,7 @@ save_steps: 500
plot_loss: true
overwrite_output_dir: true
save_only_model: false
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
### train
per_device_train_batch_size: 1

View File

@ -26,14 +26,21 @@ save_steps: 500
plot_loss: true
overwrite_output_dir: true
save_only_model: false
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
### ray
ray_run_name: llama3_8b_sft_lora
ray_storage_path: ./saves
ray_num_workers: 4 # number of GPUs to use
ray_num_workers: 4 # Number of GPUs to use.
placement_strategy: PACK
resources_per_worker:
GPU: 1
placement_strategy: PACK
# ray_init_kwargs:
# runtime_env:
# env_vars:
# <YOUR-ENV-VAR-HERE>: "<YOUR-ENV-VAR-HERE>"
# pip:
# - emoji
### train
per_device_train_batch_size: 1

View File

@ -1,21 +1,20 @@
# pip install git+https://github.com/hiyouga/transformers.git@llama4_train
### model
model_name_or_path: Qwen/Qwen2-VL-7B-Instruct
image_max_pixels: 262144
video_max_pixels: 16384
model_name_or_path: meta-llama/Llama-4-Scout-17B-16E-Instruct
trust_remote_code: true
### method
stage: sft
do_train: true
finetuning_type: full
freeze_vision_tower: true # choices: [true, false]
freeze_multi_modal_projector: true # choices: [true, false]
freeze_language_model: false # choices: [true, false]
finetuning_type: lora
lora_rank: 8
lora_target: all
deepspeed: examples/deepspeed/ds_z3_config.json # choices: [ds_z0_config.json, ds_z2_config.json, ds_z3_config.json]
### dataset
dataset: mllm_demo,identity,alpaca_en_demo
template: qwen2_vl
template: llama4
cutoff_len: 2048
max_samples: 1000
overwrite_cache: true
@ -23,17 +22,18 @@ preprocessing_num_workers: 16
dataloader_num_workers: 4
### output
output_dir: saves/qwen2_vl-7b/full/sft
output_dir: saves/llama4-8b/lora/sft
logging_steps: 10
save_steps: 500
plot_loss: true
overwrite_output_dir: true
save_only_model: false
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
### train
per_device_train_batch_size: 1
gradient_accumulation_steps: 2
learning_rate: 1.0e-5
learning_rate: 1.0e-4
num_train_epochs: 3.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
@ -42,6 +42,7 @@ ddp_timeout: 180000000
resume_from_checkpoint: null
### eval
# eval_dataset: alpaca_en_demo
# val_size: 0.1
# per_device_eval_batch_size: 1
# eval_strategy: steps

View File

@ -1,5 +1,5 @@
### model
model_name_or_path: Qwen/Qwen2-VL-7B-Instruct
model_name_or_path: Qwen/Qwen2.5-VL-7B-Instruct
image_max_pixels: 262144
video_max_pixels: 16384
trust_remote_code: true
@ -23,12 +23,13 @@ preprocessing_num_workers: 16
dataloader_num_workers: 4
### output
output_dir: saves/qwen2_vl-7b/lora/dpo
output_dir: saves/qwen2_5vl-7b/lora/dpo
logging_steps: 10
save_steps: 500
plot_loss: true
overwrite_output_dir: true
save_only_model: false
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
### train
per_device_train_batch_size: 1

View File

@ -1,5 +1,5 @@
### model
model_name_or_path: Qwen/Qwen2-VL-7B-Instruct
model_name_or_path: Qwen/Qwen2.5-VL-7B-Instruct
image_max_pixels: 262144
video_max_pixels: 16384
trust_remote_code: true
@ -21,12 +21,13 @@ preprocessing_num_workers: 16
dataloader_num_workers: 4
### output
output_dir: saves/qwen2_vl-7b/lora/sft
output_dir: saves/qwen2_5vl-7b/lora/sft
logging_steps: 10
save_steps: 500
plot_loss: true
overwrite_output_dir: true
save_only_model: false
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
### train
per_device_train_batch_size: 1

View File

@ -16,6 +16,7 @@ cutoff_len: 2048
max_samples: 1000
overwrite_cache: true
preprocessing_num_workers: 16
dataloader_num_workers: 4
### output
output_dir: saves/llama3-8b/lora/sft
@ -23,6 +24,8 @@ logging_steps: 10
save_steps: 500
plot_loss: true
overwrite_output_dir: true
save_only_model: false
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
### train
per_device_train_batch_size: 1

View File

@ -16,6 +16,7 @@ cutoff_len: 2048
max_samples: 1000
overwrite_cache: true
preprocessing_num_workers: 16
dataloader_num_workers: 4
### output
output_dir: saves/llama3-8b/lora/sft
@ -23,6 +24,8 @@ logging_steps: 10
save_steps: 500
plot_loss: true
overwrite_output_dir: true
save_only_model: false
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
### train
per_device_train_batch_size: 1

View File

@ -1,7 +1,7 @@
### model
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
quantization_bit: 4
quantization_method: bitsandbytes
quantization_method: bnb
double_quantization: false
trust_remote_code: true
@ -19,6 +19,7 @@ cutoff_len: 2048
max_samples: 1000
overwrite_cache: true
preprocessing_num_workers: 16
dataloader_num_workers: 4
### output
output_dir: saves/llama3-8b/lora/sft
@ -26,6 +27,8 @@ logging_steps: 10
save_steps: 500
plot_loss: true
overwrite_output_dir: true
save_only_model: false
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
### train
per_device_train_batch_size: 1

View File

@ -16,6 +16,7 @@ cutoff_len: 2048
max_samples: 1000
overwrite_cache: true
preprocessing_num_workers: 16
dataloader_num_workers: 4
### output
output_dir: saves/llama3-8b/lora/sft
@ -23,6 +24,8 @@ logging_steps: 10
save_steps: 500
plot_loss: true
overwrite_output_dir: true
save_only_model: false
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
### train
per_device_train_batch_size: 1

View File

@ -1,7 +1,7 @@
### model
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
quantization_bit: 4
quantization_method: bitsandbytes # choices: [bitsandbytes (4/8), hqq (2/3/4/5/6/8), eetq (8)]
quantization_bit: 4 # choices: [8 (bnb/hqq/eetq), 4 (bnb/hqq), 3 (hqq), 2 (hqq)]
quantization_method: bnb # choices: [bnb, hqq, eetq]
trust_remote_code: true
### method
@ -18,6 +18,7 @@ cutoff_len: 2048
max_samples: 1000
overwrite_cache: true
preprocessing_num_workers: 16
dataloader_num_workers: 4
### output
output_dir: saves/llama3-8b/lora/sft
@ -25,6 +26,8 @@ logging_steps: 10
save_steps: 500
plot_loss: true
overwrite_output_dir: true
save_only_model: false
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
### train
per_device_train_batch_size: 1

View File

@ -19,13 +19,36 @@ dynamic = [
]
[tool.ruff]
target-version = "py38"
target-version = "py39"
line-length = 119
indent-width = 4
[tool.ruff.lint]
ignore = ["C408", "C901", "E501", "E731", "E741", "W605"]
select = ["C", "E", "F", "I", "W"]
ignore = [
"C408", # collection
"C901", # complex
"E501", # line too long
"E731", # lambda function
"E741", # ambiguous var name
"D100", # no doc public module
"D101", # no doc public class
"D102", # no doc public method
"D103", # no doc public function
"D104", # no doc public package
"D105", # no doc magic method
"D107", # no doc __init__
]
extend-select = [
"C", # complexity
"E", # error
"F", # pyflakes
"I", # isort
"W", # warning
"UP", # pyupgrade
"D", # pydocstyle
"PT009", # pytest assert
"RUF022", # sort __all__
]
[tool.ruff.lint.isort]
lines-after-imports = 2
@ -38,9 +61,12 @@ known-third-party = [
"peft",
"torch",
"transformers",
"trl"
"trl",
]
[tool.ruff.lint.pydocstyle]
convention = "google"
[tool.ruff.format]
quote-style = "double"
indent-style = "space"
@ -56,10 +82,14 @@ conflicts = [
],
[
{ extra = "torch-npu" },
{ extra = "liger-kernel" },
{ extra = "vllm" },
],
[
{ extra = "torch-npu" },
{ extra = "sglang" },
],
[
{ extra = "vllm" },
]
{ extra = "sglang" },
],
]

View File

@ -1,26 +1,36 @@
transformers>=4.41.2,<=4.49.0,!=4.46.*,!=4.47.*,!=4.48.*;python_version<'3.10'
transformers>=4.41.2,<=4.49.0,!=4.46.*,!=4.47.*,!=4.48.0;python_version>='3.10'
datasets>=2.16.0,<=3.2.0
accelerate>=0.34.0,<=1.2.1
peft>=0.11.1,<=0.12.0
# core deps
transformers>=4.49.0,<=4.52.4,!=4.52.0; sys_platform != 'darwin'
transformers>=4.49.0,<=4.51.3,!=4.52.0; sys_platform == 'darwin'
datasets>=2.16.0,<=3.6.0
accelerate>=1.3.0,<=1.7.0
peft>=0.14.0,<=0.15.2
trl>=0.8.6,<=0.9.6
tokenizers>=0.19.0,<=0.21.0
gradio>=4.38.0,<=5.21.0
tokenizers>=0.19.0,<=0.21.1
# gui
gradio>=4.38.0,<=5.31.0
matplotlib>=3.7.0
tyro<0.9.0
# ops
einops
numpy<2.0.0
pandas>=2.0.0
scipy
einops
# model and tokenizer
sentencepiece
tiktoken
modelscope>=1.14.0
hf-transfer
# python
fire
omegaconf
packaging
protobuf
pyyaml
pydantic<=2.10.6
# api
uvicorn
pydantic
fastapi
sse-starlette
matplotlib>=3.7.0
fire
packaging
pyyaml
numpy<2.0.0
# media
av
librosa
tyro<0.9.0

View File

@ -23,8 +23,8 @@ require_version("openai>=1.5.0", "To fix: pip install openai>=1.5.0")
def main():
client = OpenAI(
api_key="{}".format(os.environ.get("API_KEY", "0")),
base_url="http://localhost:{}/v1".format(os.environ.get("API_PORT", 8000)),
api_key="{}".format(os.getenv("API_KEY", "0")),
base_url="http://localhost:{}/v1".format(os.getenv("API_PORT", 8000)),
)
messages = []
messages.append(

View File

@ -14,7 +14,6 @@
import json
import os
from typing import Sequence
from openai import OpenAI
from transformers.utils.versions import require_version
@ -23,7 +22,7 @@ from transformers.utils.versions import require_version
require_version("openai>=1.5.0", "To fix: pip install openai>=1.5.0")
def calculate_gpa(grades: Sequence[str], hours: Sequence[int]) -> float:
def calculate_gpa(grades: list[str], hours: list[int]) -> float:
grade_to_score = {"A": 4, "B": 3, "C": 2}
total_score, total_hour = 0, 0
for grade, hour in zip(grades, hours):
@ -34,8 +33,8 @@ def calculate_gpa(grades: Sequence[str], hours: Sequence[int]) -> float:
def main():
client = OpenAI(
api_key="{}".format(os.environ.get("API_KEY", "0")),
base_url="http://localhost:{}/v1".format(os.environ.get("API_PORT", 8000)),
api_key="{}".format(os.getenv("API_KEY", "0")),
base_url="http://localhost:{}/v1".format(os.getenv("API_PORT", 8000)),
)
tools = [
{

View File

@ -15,7 +15,7 @@
import json
import os
from collections import OrderedDict
from typing import Any, Dict
from typing import Any
import fire
import torch
@ -29,13 +29,13 @@ CONFIG_NAME = "config.json"
def save_weight(input_dir: str, output_dir: str, shard_size: str, save_safetensors: bool):
baichuan2_state_dict: Dict[str, torch.Tensor] = OrderedDict()
baichuan2_state_dict: dict[str, torch.Tensor] = OrderedDict()
for filepath in tqdm(os.listdir(input_dir), desc="Load weights"):
if os.path.isfile(os.path.join(input_dir, filepath)) and filepath.endswith(".bin"):
shard_weight = torch.load(os.path.join(input_dir, filepath), map_location="cpu")
shard_weight = torch.load(os.path.join(input_dir, filepath), map_location="cpu", weights_only=True)
baichuan2_state_dict.update(shard_weight)
llama_state_dict: Dict[str, torch.Tensor] = OrderedDict()
llama_state_dict: dict[str, torch.Tensor] = OrderedDict()
for key, value in tqdm(baichuan2_state_dict.items(), desc="Convert format"):
if "W_pack" in key:
proj_size = value.size(0) // 3
@ -75,7 +75,7 @@ def save_weight(input_dir: str, output_dir: str, shard_size: str, save_safetenso
def save_config(input_dir: str, output_dir: str):
with open(os.path.join(input_dir, CONFIG_NAME), encoding="utf-8") as f:
llama2_config_dict: Dict[str, Any] = json.load(f)
llama2_config_dict: dict[str, Any] = json.load(f)
llama2_config_dict["architectures"] = ["LlamaForCausalLM"]
llama2_config_dict.pop("auto_map", None)
@ -94,8 +94,8 @@ def llamafy_baichuan2(
shard_size: str = "2GB",
save_safetensors: bool = True,
):
r"""
Converts the Baichuan2-7B model in the same format as LLaMA2-7B.
r"""Convert the Baichuan2-7B model in the same format as LLaMA2-7B.
Usage: python llamafy_baichuan2.py --input_dir input --output_dir output
Converted model: https://huggingface.co/hiyouga/Baichuan2-7B-Base-LLaMAfied
"""

View File

@ -15,7 +15,7 @@
import json
import os
from collections import OrderedDict
from typing import Any, Dict
from typing import Any
import fire
import torch
@ -37,14 +37,14 @@ CONFIG_NAME = "config.json"
def save_weight(input_dir: str, output_dir: str, shard_size: str, save_safetensors: bool) -> str:
qwen_state_dict: Dict[str, torch.Tensor] = OrderedDict()
qwen_state_dict: dict[str, torch.Tensor] = OrderedDict()
for filepath in tqdm(os.listdir(input_dir), desc="Load weights"):
if os.path.isfile(os.path.join(input_dir, filepath)) and filepath.endswith(".safetensors"):
with safe_open(os.path.join(input_dir, filepath), framework="pt", device="cpu") as f:
for key in f.keys():
qwen_state_dict[key] = f.get_tensor(key)
llama_state_dict: Dict[str, torch.Tensor] = OrderedDict()
llama_state_dict: dict[str, torch.Tensor] = OrderedDict()
torch_dtype = None
for key, value in tqdm(qwen_state_dict.items(), desc="Convert format"):
if torch_dtype is None:
@ -112,9 +112,9 @@ def save_weight(input_dir: str, output_dir: str, shard_size: str, save_safetenso
def save_config(input_dir: str, output_dir: str, torch_dtype: str):
with open(os.path.join(input_dir, CONFIG_NAME), encoding="utf-8") as f:
qwen_config_dict: Dict[str, Any] = json.load(f)
qwen_config_dict: dict[str, Any] = json.load(f)
llama2_config_dict: Dict[str, Any] = OrderedDict()
llama2_config_dict: dict[str, Any] = OrderedDict()
llama2_config_dict["architectures"] = ["LlamaForCausalLM"]
llama2_config_dict["hidden_act"] = "silu"
llama2_config_dict["hidden_size"] = qwen_config_dict["hidden_size"]
@ -147,8 +147,8 @@ def llamafy_qwen(
shard_size: str = "2GB",
save_safetensors: bool = False,
):
r"""
Converts the Qwen models in the same format as LLaMA2.
r"""Convert the Qwen models in the same format as LLaMA2.
Usage: python llamafy_qwen.py --input_dir input --output_dir output
Converted model: https://huggingface.co/hiyouga/Qwen-14B-Chat-LLaMAfied
"""

View File

@ -0,0 +1,39 @@
# Copyright 2025 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from transformers import Llama4Config, Llama4ForConditionalGeneration, Llama4TextConfig, Llama4VisionConfig
if __name__ == "__main__":
vision_config = Llama4VisionConfig(
hidden_size=1408,
image_size=336,
intermediate_size=5632,
num_attention_heads=16,
num_hidden_layers=4,
vision_output_dim=4096,
)
text_config = Llama4TextConfig(
hidden_size=512,
intermediate_size=1024,
intermediate_size_mlp=1024,
num_hidden_layers=4,
num_attention_heads=8,
num_key_value_heads=2,
head_dim=512 // 8,
num_local_experts=2,
)
config = Llama4Config(vision_config=vision_config, text_config=text_config)
model = Llama4ForConditionalGeneration._from_config(config)
model.save_pretrained("tiny-llama4")

View File

@ -0,0 +1,79 @@
# Copyright 2025 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import logging
import time
import fire
from datasets import load_dataset
try:
import jieba # type: ignore
from nltk.translate.bleu_score import SmoothingFunction, sentence_bleu # type: ignore
from rouge_chinese import Rouge # type: ignore
jieba.setLogLevel(logging.CRITICAL)
jieba.initialize()
except ImportError:
print("Please install llamafactory with `pip install -e .[metrics]`.")
raise
def compute_metrics(sample):
hypothesis = list(jieba.cut(sample["predict"]))
reference = list(jieba.cut(sample["label"]))
bleu_score = sentence_bleu(
[list(sample["label"])],
list(sample["predict"]),
smoothing_function=SmoothingFunction().method3,
)
if len(" ".join(hypothesis).split()) == 0 or len(" ".join(reference).split()) == 0:
result = {"rouge-1": {"f": 0.0}, "rouge-2": {"f": 0.0}, "rouge-l": {"f": 0.0}}
else:
rouge = Rouge()
scores = rouge.get_scores(" ".join(hypothesis), " ".join(reference))
result = scores[0]
metric_result = {}
for k, v in result.items():
metric_result[k] = round(v["f"] * 100, 4)
metric_result["bleu-4"] = round(bleu_score * 100, 4)
return metric_result
def main(filename: str):
start_time = time.time()
dataset = load_dataset("json", data_files=filename, split="train")
dataset = dataset.map(compute_metrics, num_proc=8, remove_columns=dataset.column_names)
score_dict = dataset.to_dict()
average_score = {}
for task, scores in sorted(score_dict.items(), key=lambda x: x[0]):
print(f"{task}: {sum(scores) / len(scores):.4f}")
average_score[task] = sum(scores) / len(scores)
with open("predictions_score.json", "w", encoding="utf-8") as f:
json.dump(average_score, f, indent=4)
print(f"\nDone in {time.time() - start_time:.3f}s.\nScore file saved to predictions_score.json")
if __name__ == "__main__":
fire.Fire(main)

View File

@ -18,7 +18,7 @@
import json
import os
from collections import OrderedDict
from typing import TYPE_CHECKING, Dict
from typing import TYPE_CHECKING
import fire
import torch
@ -44,11 +44,11 @@ def block_expansion(
shard_size: str = "5GB",
save_safetensors: bool = True,
):
r"""
Performs block expansion for LLaMA, Mistral, Qwen2 or Yi models.
r"""Perform block expansion for LLaMA, Mistral, Qwen2 or Yi models.
Usage: python llama_pro.py --model_name_or_path meta-llama/Llama-2-7b-hf --output_dir llama2_pro --num_expand 8
"""
config: "PretrainedConfig" = AutoConfig.from_pretrained(model_name_or_path, trust_remote_code=True)
config: PretrainedConfig = AutoConfig.from_pretrained(model_name_or_path, trust_remote_code=True)
num_layers = getattr(config, "num_hidden_layers")
if num_layers % num_expand != 0:
raise ValueError(f"`num_layers` {num_layers} should be divisible by `num_expand` {num_expand}.")
@ -70,7 +70,7 @@ def block_expansion(
split = num_layers // num_expand
layer_cnt = 0
state_dict = model.state_dict()
output_state_dict: Dict[str, "torch.Tensor"] = OrderedDict()
output_state_dict: dict[str, torch.Tensor] = OrderedDict()
for i in range(num_layers):
for key, value in state_dict.items():
if f".{i:d}." in key:

View File

@ -38,8 +38,8 @@ def quantize_loftq(
lora_target: tuple = ("q_proj", "v_proj"),
save_safetensors: bool = True,
):
r"""
Initializes LoRA weights with LoRA-fine-tuning-aware Quantization (LoftQ)
r"""Initialize LoRA weights with LoRA-fine-tuning-aware Quantization (LoftQ).
Usage: python loftq_init.py --model_name_or_path path_to_model --output_dir output_dir
"""
if isinstance(lora_target, str):
@ -72,7 +72,7 @@ def quantize_loftq(
print(f"Adapter weights saved in {loftq_dir}")
# Save base model
base_model: "PreTrainedModel" = peft_model.unload()
base_model: PreTrainedModel = peft_model.unload()
base_model.save_pretrained(output_dir, safe_serialization=save_safetensors)
tokenizer.save_pretrained(output_dir)
print(f"Model weights saved in {output_dir}")

View File

@ -37,8 +37,8 @@ def quantize_pissa(
lora_target: tuple = ("q_proj", "v_proj"),
save_safetensors: bool = True,
):
r"""
Initializes LoRA weights with Principal Singular values and Singular vectors Adaptation (PiSSA)
r"""Initialize LoRA weights with Principal Singular values and Singular vectors Adaptation (PiSSA).
Usage: python pissa_init.py --model_name_or_path path_to_model --output_dir output_dir
"""
if isinstance(lora_target, str):
@ -67,7 +67,7 @@ def quantize_pissa(
print(f"Adapter weights saved in {pissa_dir}")
# Save base model
base_model: "PreTrainedModel" = peft_model.unload()
base_model: PreTrainedModel = peft_model.unload()
base_model.save_pretrained(output_dir, safe_serialization=save_safetensors)
tokenizer.save_pretrained(output_dir)
print(f"Model weights saved in {output_dir}")

136
scripts/qwen_omni_merge.py Normal file
View File

@ -0,0 +1,136 @@
# Copyright 2025 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Why we need this script for qwen_omni?
Because the qwen_omni model is constructed by two parts:
1. [Thinker]:[audio_encoder, vision_encoder, LLM backbone], which our repository does support to post-training.
2. [Talker]: [audio_decoder, wave_model], which is not supported to post-training without specific tokenizer.
When we post-training the model, we exactly train the [Thinker] part, and the [Talker] part is dropped.
So, to get the complete model, we need to merge the [Talker] part back to the [Thinker] part.
LoRA mode: [Thinker + LoRA weights] + [Original Talker] -> [Omni model]
Full mode: [Thinker] + [Original Talker] -> [Omni model]
For Processor, we do saved the processor from trained model instead of the original model.
"""
import os
import shutil
import fire
from peft import PeftModel
from transformers import (
AutoProcessor,
Qwen2_5OmniForConditionalGeneration, # type: ignore
Qwen2_5OmniThinkerForConditionalGeneration,
)
def merge_lora(
base_model_path: str,
lora_checkpoint_path: str,
extra_file: str = "spk_dict.pt",
submodule_name: str = "thinker",
save_path: str = "./merged_model_checkpoint",
):
"""Load the original model, merge the LoRA weights.
For a specified submodule, and save the final merged model along with its configurations.
Args:
base_model_path (str): Path to the original model directory.
lora_checkpoint_path (str): Path to the directory containing LoRA weights.
extra_file (str): Name of the extra file to be copied (default: "spk_dict.pt").
submodule_name (str): Name of the submodule to merge (default: "thinker").
save_path (str): Directory where the merged model and configurations will be saved.
"""
# 1. Load the original model
model = Qwen2_5OmniForConditionalGeneration.from_pretrained(base_model_path, torch_dtype="auto", device_map="cpu")
print("Successfully loaded the original model.")
# 2. Extract the submodule to be merged (e.g., model.thinker)
if not hasattr(model, submodule_name):
raise AttributeError(f"The model does not have a submodule named '{submodule_name}'.")
base_submodule = getattr(model, submodule_name)
print(f"Successfully extracted submodule: {submodule_name}.")
# 3. Load the LoRA weights onto the extracted submodule
lora_model = PeftModel.from_pretrained(base_submodule, lora_checkpoint_path)
processor = AutoProcessor.from_pretrained(lora_checkpoint_path)
print("LoRA weights and processor loaded successfully.")
# 4. Merge the LoRA weights into the submodule and unload the LoRA modules
merged_submodule = lora_model.merge_and_unload()
print("LoRA weights merged successfully.")
# 5. Replace the original submodule with the merged submodule in the model
setattr(model, submodule_name, merged_submodule)
# 6. Save the final merged model along with the tokenizer and processor configuration
model.save_pretrained(save_path)
processor.save_pretrained(save_path)
print(f"Merged model and tokenizer saved to {save_path}.")
source_file = os.path.join(base_model_path, extra_file)
target_file = os.path.join(save_path, extra_file)
if os.path.exists(source_file):
shutil.copy(source_file, target_file)
print(f"File '{extra_file}' copied from {base_model_path} to {save_path}.")
else:
print(f"File '{extra_file}' not found in {base_model_path}, skipping copy.")
def save_full_model(
saved_thinker_path: str,
base_model_path: str,
save_path: str = "./merged_model_checkpoint",
extra_file: str = "spk_dict.pt",
):
"""Load the saved thinker module and the original model, replace the thinker in the original model.
Then save the complete model along with its tokenizer and processor configuration.
Args:
saved_thinker_path (str): Path to the saved thinker weights.
base_model_path (str): Directory path of the original model.
save_path (str): Directory where the merged model and configurations will be saved.
extra_file (str): Name of the extra file to be copied (default: "spk_dict.pt").
"""
# 1. Load the saved thinker module and the original model
thinker = Qwen2_5OmniThinkerForConditionalGeneration.from_pretrained(
saved_thinker_path, torch_dtype="auto", device_map="cpu"
)
base_model = Qwen2_5OmniForConditionalGeneration.from_pretrained(
base_model_path, torch_dtype="auto", device_map="cpu"
)
base_model.thinker = thinker
# 2. Save the complete model along with its tokenizer and processor configuration
processor = AutoProcessor.from_pretrained(saved_thinker_path)
base_model.save_pretrained(save_path)
processor.save_pretrained(save_path)
print(f"Merged model and processor saved to {save_path}.")
# 3. Copy the extra file from the base model directory to the save_path
source_file = os.path.join(base_model_path, extra_file)
target_file = os.path.join(save_path, extra_file)
if os.path.exists(source_file):
shutil.copy(source_file, target_file)
print(f"File '{extra_file}' copied from {base_model_path} to {save_path}.")
else:
print(f"File '{extra_file}' not found in {base_model_path}, skipping copy.")
if __name__ == "__main__":
fire.Fire({"save_full": save_full_model, "merge_lora": merge_lora})

View File

@ -29,8 +29,8 @@ def calculate_flops(
seq_length: int = 512,
flash_attn: str = "auto",
):
r"""
Calculates the flops of pre-trained models.
r"""Calculate the flops of pre-trained models.
Usage: python cal_flops.py --model_name_or_path path_to_model --batch_size 1 --seq_length 512
"""
with get_accelerator().device(0):

View File

@ -45,8 +45,8 @@ def calculate_lr(
is_mistral_or_gemma: bool = False, # mistral and gemma models opt for a smaller learning rate,
packing: bool = False,
):
r"""
Calculates the optimal learning rate for 7B/13B models using LLaMA's hyper-parameters.
r"""Calculate the optimal learning rate for 7B/13B models using LLaMA's hyper-parameters.
Usage:
python cal_lr.py --model_name_or_path path_to_model --dataset alpaca_en_demo --cutoff_len 1024 --batch_size 16
"""
@ -89,9 +89,8 @@ def calculate_lr(
lr = BASE_LR * math.sqrt(token_batch_size / BASE_BS) # lr ~ sqrt(batch_size)
lr = lr / 6.0 if is_mistral_or_gemma else lr
print(
"Optimal learning rate is {:.2e} for valid ratio% {:.2f} and effective token batch size {:.2f}".format(
lr, valid_ratio * 100, token_batch_size
)
f"Optimal learning rate is {lr:.2e} for valid ratio% {valid_ratio * 100:.2f} "
f"and effective token batch size {token_batch_size:.2f}"
)

View File

@ -34,9 +34,7 @@ def compute_model_flops(
include_recompute: bool = False,
include_flashattn: bool = False,
) -> int:
r"""
Calculates the FLOPs of model per forward/backward pass.
"""
r"""Calculate the FLOPs of model per forward/backward pass."""
config = AutoConfig.from_pretrained(model_name_or_path)
hidden_size = getattr(config, "hidden_size", None)
vocab_size = getattr(config, "vocab_size", None)
@ -86,9 +84,7 @@ def compute_model_flops(
def compute_device_flops(world_size: int) -> float:
r"""
Calculates the FLOPs of the device capability per second.
"""
r"""Calculate the FLOPs of the device capability per second."""
device_name = torch.cuda.get_device_name()
if "H100" in device_name or "H800" in device_name:
return 989 * 1e12 * world_size
@ -114,8 +110,8 @@ def calculate_mfu(
liger_kernel: bool = False,
unsloth_gc: bool = False,
) -> float:
r"""
Calculates MFU for given model and hyper-params.
r"""Calculate MFU for given model and hyper-params.
Usage: python cal_mfu.py --model_name_or_path path_to_model --batch_size 1 --seq_length 1024
"""
args = {

Some files were not shown because too many files have changed in this diff Show More