mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-08-22 22:02:51 +08:00
update dataset
Former-commit-id: 182b42504399d2755897b9737db1d36655a0fa50
This commit is contained in:
parent
d370fd724c
commit
d46c136c0e
@ -63,6 +63,10 @@
|
||||
|
||||
- For pre-training:
|
||||
- [Wiki Demo (en)](data/wiki_demo.txt)
|
||||
- [RefinedWeb (en)](https://huggingface.co/datasets/tiiuae/falcon-refinedweb)
|
||||
- [StarCoder (en)](https://huggingface.co/datasets/bigcode/starcoderdata)
|
||||
- [Wikipedia (en)](https://huggingface.co/datasets/olm/olm-wikipedia-20221220)
|
||||
- [Wikipedia (zh)](https://huggingface.co/datasets/pleisto/wikipedia-cn-20230720-filtered)
|
||||
- For supervised fine-tuning:
|
||||
- [Stanford Alpaca (en)](https://github.com/tatsu-lab/stanford_alpaca)
|
||||
- [Stanford Alpaca (zh)](https://github.com/ymcui/Chinese-LLaMA-Alpaca)
|
||||
|
@ -63,6 +63,10 @@
|
||||
|
||||
- 用于二次预训练:
|
||||
- [Wiki Demo (en)](data/wiki_demo.txt)
|
||||
- [RefinedWeb (en)](https://huggingface.co/datasets/tiiuae/falcon-refinedweb)
|
||||
- [StarCoder (en)](https://huggingface.co/datasets/bigcode/starcoderdata)
|
||||
- [Wikipedia (en)](https://huggingface.co/datasets/olm/olm-wikipedia-20221220)
|
||||
- [Wikipedia (zh)](https://huggingface.co/datasets/pleisto/wikipedia-cn-20230720-filtered)
|
||||
- 用于指令监督微调:
|
||||
- [Stanford Alpaca (en)](https://github.com/tatsu-lab/stanford_alpaca)
|
||||
- [Stanford Alpaca (zh)](https://github.com/ymcui/Chinese-LLaMA-Alpaca)
|
||||
|
@ -21,7 +21,7 @@
|
||||
},
|
||||
"oaast_sft": {
|
||||
"file_name": "oaast_sft.json",
|
||||
"file_sha1": "08912e34fb165db137d3436db4c35321e33b28d1",
|
||||
"file_sha1": "7baf5d43e67a91f9bbdf4e400dbe033b87e9757e",
|
||||
"columns": {
|
||||
"prompt": "instruction",
|
||||
"query": "input",
|
||||
@ -31,7 +31,7 @@
|
||||
},
|
||||
"oaast_sft_zh": {
|
||||
"file_name": "oaast_sft_zh.json",
|
||||
"file_sha1": "e0a2e7e8eff355434ada6c9b7f70bb915f941dd4",
|
||||
"file_sha1": "a6a91f18f80f37b10ded9cf633fb50c033bf7b9f",
|
||||
"columns": {
|
||||
"prompt": "instruction",
|
||||
"query": "input",
|
||||
@ -187,5 +187,41 @@
|
||||
"response": "",
|
||||
"history": ""
|
||||
}
|
||||
},
|
||||
"refinedweb": {
|
||||
"hf_hub_url": "tiiuae/falcon-refinedweb",
|
||||
"columns": {
|
||||
"prompt": "content",
|
||||
"query": "",
|
||||
"response": "",
|
||||
"history": ""
|
||||
}
|
||||
},
|
||||
"starcoder": {
|
||||
"hf_hub_url": "bigcode/starcoderdata",
|
||||
"columns": {
|
||||
"prompt": "content",
|
||||
"query": "",
|
||||
"response": "",
|
||||
"history": ""
|
||||
}
|
||||
},
|
||||
"wikipedia_en": {
|
||||
"hf_hub_url": "olm/olm-wikipedia-20221220",
|
||||
"columns": {
|
||||
"prompt": "text",
|
||||
"query": "",
|
||||
"response": "",
|
||||
"history": ""
|
||||
}
|
||||
},
|
||||
"wikipedia_zh": {
|
||||
"hf_hub_url": "pleisto/wikipedia-cn-20230720-filtered",
|
||||
"columns": {
|
||||
"prompt": "completion",
|
||||
"query": "",
|
||||
"response": "",
|
||||
"history": ""
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1 +1 @@
|
||||
0a57fbc1d8cb08a8cd71c5eb8425cf59206ffed6
|
||||
57fd080be5bffe4153fe3ee26a175e3d56da30f3
|
File diff suppressed because one or more lines are too long
Loading…
x
Reference in New Issue
Block a user