mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-12-14 10:56:56 +08:00
@@ -6,7 +6,9 @@ If you are using a custom dataset, please provide your dataset definition in the
|
||||
"script_url": "the name of the directory containing a dataset loading script. (if specified, ignore below 2 arguments)",
|
||||
"file_name": "the name of the dataset file in the this directory. (required if above are not specified)",
|
||||
"file_sha1": "the SHA-1 hash value of the dataset file. (optional)",
|
||||
"subset": "",
|
||||
"ranking": "whether the examples contains ranked responses or not. (default: false)",
|
||||
"formatting": "",
|
||||
"columns": {
|
||||
"prompt": "the name of the column in the datasets containing the prompts. (default: instruction)",
|
||||
"query": "the name of the column in the datasets containing the queries. (default: input)",
|
||||
|
||||
@@ -71,14 +71,14 @@
|
||||
"guanaco": {
|
||||
"hf_hub_url": "JosephusCheung/GuanacoDataset"
|
||||
},
|
||||
"belle_0.5m": {
|
||||
"hf_hub_url": "BelleGroup/train_0.5M_CN"
|
||||
"belle_2m": {
|
||||
"hf_hub_url": "BelleGroup/train_2M_CN"
|
||||
},
|
||||
"belle_1m": {
|
||||
"hf_hub_url": "BelleGroup/train_1M_CN"
|
||||
},
|
||||
"belle_2m": {
|
||||
"hf_hub_url": "BelleGroup/train_2M_CN"
|
||||
"belle_0.5m": {
|
||||
"hf_hub_url": "BelleGroup/train_0.5M_CN"
|
||||
},
|
||||
"belle_dialog": {
|
||||
"hf_hub_url": "BelleGroup/generated_chat_0.4M"
|
||||
@@ -90,80 +90,116 @@
|
||||
"script_url": "belle_multiturn",
|
||||
"columns": {
|
||||
"prompt": "instruction",
|
||||
"query": "",
|
||||
"response": "output",
|
||||
"history": "history"
|
||||
}
|
||||
},
|
||||
"ultra_chat": {
|
||||
"script_url": "ultra_chat",
|
||||
"columns": {
|
||||
"prompt": "instruction",
|
||||
"response": "output",
|
||||
"history": "history"
|
||||
}
|
||||
},
|
||||
"open_platypus": {
|
||||
"hf_hub_url": "garage-bAInd/Open-Platypus"
|
||||
},
|
||||
"codealpaca": {
|
||||
"hf_hub_url": "sahil2801/CodeAlpaca-20k"
|
||||
},
|
||||
"alpaca_cot": {
|
||||
"hf_hub_url": "QingyiSi/Alpaca-CoT"
|
||||
},
|
||||
"firefly": {
|
||||
"hf_hub_url": "YeungNLP/firefly-train-1.1M",
|
||||
"columns": {
|
||||
"prompt": "input",
|
||||
"query": "",
|
||||
"response": "target",
|
||||
"history": ""
|
||||
}
|
||||
},
|
||||
"mathinstruct": {
|
||||
"hf_hub_url": "TIGER-Lab/MathInstruct",
|
||||
"columns": {
|
||||
"prompt": "instruction",
|
||||
"query": "",
|
||||
"response": "output",
|
||||
"history": ""
|
||||
"response": "output"
|
||||
}
|
||||
},
|
||||
"firefly": {
|
||||
"hf_hub_url": "YeungNLP/firefly-train-1.1M",
|
||||
"columns": {
|
||||
"prompt": "input",
|
||||
"response": "target"
|
||||
}
|
||||
},
|
||||
"webqa": {
|
||||
"hf_hub_url": "suolyer/webqa",
|
||||
"columns": {
|
||||
"prompt": "input",
|
||||
"query": "",
|
||||
"response": "output",
|
||||
"history": ""
|
||||
"response": "output"
|
||||
}
|
||||
},
|
||||
"ultra_chat": {
|
||||
"script_url": "ultra_chat",
|
||||
"columns": {
|
||||
"prompt": "instruction",
|
||||
"query": "",
|
||||
"response": "output",
|
||||
"history": "history"
|
||||
}
|
||||
},
|
||||
"novel_tokens512_50k": {
|
||||
"webnovel": {
|
||||
"hf_hub_url": "zxbsmk/webnovel_cn"
|
||||
},
|
||||
"adgen": {
|
||||
"hf_hub_url": "HasturOfficial/adgen",
|
||||
"columns": {
|
||||
"prompt": "content",
|
||||
"query": "",
|
||||
"response": "summary",
|
||||
"history": ""
|
||||
"response": "summary"
|
||||
}
|
||||
},
|
||||
"comparison_gpt4_en": {
|
||||
"file_name": "comparison_gpt4_data_en.json",
|
||||
"file_sha1": "96fa18313544e22444fe20eead7754b17da452ae",
|
||||
"ranking": true
|
||||
"sharegpt_hyper": {
|
||||
"hf_hub_url": "totally-not-an-llm/sharegpt-hyperfiltered-3k",
|
||||
"columns": {
|
||||
"prompt": "conversations",
|
||||
"query": "from",
|
||||
"response": "value"
|
||||
},
|
||||
"formatting": "sharegpt"
|
||||
},
|
||||
"comparison_gpt4_zh": {
|
||||
"file_name": "comparison_gpt4_data_zh.json",
|
||||
"file_sha1": "515b18ed497199131ddcc1af950345c11dc5c7fd",
|
||||
"ranking": true
|
||||
"sharegpt4": {
|
||||
"hf_hub_url": "shibing624/sharegpt_gpt4",
|
||||
"columns": {
|
||||
"prompt": "conversations",
|
||||
"query": "from",
|
||||
"response": "value"
|
||||
},
|
||||
"formatting": "sharegpt"
|
||||
},
|
||||
"ultrachat_200k": {
|
||||
"hf_hub_url": "HuggingFaceH4/ultrachat_200k",
|
||||
"columns": {
|
||||
"prompt": "messages",
|
||||
"query": "role",
|
||||
"response": "content"
|
||||
},
|
||||
"formatting": "sharegpt"
|
||||
},
|
||||
"agent_instruct": {
|
||||
"hf_hub_url": "THUDM/AgentInstruct",
|
||||
"columns": {
|
||||
"prompt": "conversations",
|
||||
"query": "from",
|
||||
"response": "value"
|
||||
},
|
||||
"formatting": "sharegpt"
|
||||
},
|
||||
"lmsys_chat": {
|
||||
"hf_hub_url": "lmsys/lmsys-chat-1m",
|
||||
"columns": {
|
||||
"prompt": "conversation",
|
||||
"query": "role",
|
||||
"response": "content"
|
||||
},
|
||||
"formatting": "sharegpt"
|
||||
},
|
||||
"evol_instruct": {
|
||||
"hf_hub_url": "WizardLM/WizardLM_evol_instruct_V2_196k",
|
||||
"columns": {
|
||||
"prompt": "conversations",
|
||||
"query": "from",
|
||||
"response": "value"
|
||||
},
|
||||
"formatting": "sharegpt"
|
||||
},
|
||||
"hh_rlhf_en": {
|
||||
"script_url": "hh_rlhf_en",
|
||||
"columns": {
|
||||
"prompt": "instruction",
|
||||
"query": "",
|
||||
"response": "output",
|
||||
"history": "history"
|
||||
},
|
||||
@@ -191,59 +227,71 @@
|
||||
},
|
||||
"ranking": true
|
||||
},
|
||||
"comparison_gpt4_en": {
|
||||
"file_name": "comparison_gpt4_data_en.json",
|
||||
"file_sha1": "96fa18313544e22444fe20eead7754b17da452ae",
|
||||
"ranking": true
|
||||
},
|
||||
"comparison_gpt4_zh": {
|
||||
"file_name": "comparison_gpt4_data_zh.json",
|
||||
"file_sha1": "515b18ed497199131ddcc1af950345c11dc5c7fd",
|
||||
"ranking": true
|
||||
},
|
||||
"wiki_demo": {
|
||||
"file_name": "wiki_demo.txt",
|
||||
"file_sha1": "b2288edb05b233e5b35250fd4b308a5fa21fa66d",
|
||||
"columns": {
|
||||
"prompt": "text",
|
||||
"query": "",
|
||||
"response": "",
|
||||
"history": ""
|
||||
"prompt": "text"
|
||||
}
|
||||
},
|
||||
"refinedweb": {
|
||||
"hf_hub_url": "tiiuae/falcon-refinedweb",
|
||||
"columns": {
|
||||
"prompt": "content",
|
||||
"query": "",
|
||||
"response": "",
|
||||
"history": ""
|
||||
"prompt": "content"
|
||||
}
|
||||
},
|
||||
"redpajama_v2": {
|
||||
"hf_hub_url": "togethercomputer/RedPajama-Data-V2",
|
||||
"columns": {
|
||||
"prompt": "raw_content"
|
||||
},
|
||||
"subset": "default"
|
||||
},
|
||||
"wikipedia_en": {
|
||||
"hf_hub_url": "olm/olm-wikipedia-20221220",
|
||||
"columns": {
|
||||
"prompt": "text",
|
||||
"query": "",
|
||||
"response": "",
|
||||
"history": ""
|
||||
"prompt": "text"
|
||||
}
|
||||
},
|
||||
"wikipedia_zh": {
|
||||
"hf_hub_url": "pleisto/wikipedia-cn-20230720-filtered",
|
||||
"columns": {
|
||||
"prompt": "completion",
|
||||
"query": "",
|
||||
"response": "",
|
||||
"history": ""
|
||||
"prompt": "completion"
|
||||
}
|
||||
},
|
||||
"pile": {
|
||||
"hf_hub_url": "EleutherAI/pile",
|
||||
"columns": {
|
||||
"prompt": "text"
|
||||
},
|
||||
"subset": "all"
|
||||
},
|
||||
"skypile": {
|
||||
"hf_hub_url": "Skywork/SkyPile-150B",
|
||||
"columns": {
|
||||
"prompt": "text"
|
||||
}
|
||||
},
|
||||
"the_stack": {
|
||||
"hf_hub_url": "bigcode/the-stack",
|
||||
"columns": {
|
||||
"prompt": "content",
|
||||
"query": "",
|
||||
"response": "",
|
||||
"history": ""
|
||||
"prompt": "content"
|
||||
}
|
||||
},
|
||||
"starcoder": {
|
||||
"hf_hub_url": "bigcode/starcoderdata",
|
||||
"columns": {
|
||||
"prompt": "content",
|
||||
"query": "",
|
||||
"response": "",
|
||||
"history": ""
|
||||
"prompt": "content"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user