mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-12-14 10:56:56 +08:00
@@ -11,7 +11,7 @@ If you are using a custom dataset, please provide your dataset definition in the
|
||||
"folder": "the name of the folder of the dataset repository on the Hugging Face hub. (optional, default: None)",
|
||||
"ranking": "whether the dataset is a preference dataset or not. (default: false)",
|
||||
"formatting": "the format of the dataset. (optional, default: alpaca, can be chosen from {alpaca, sharegpt})",
|
||||
"columns": {
|
||||
"columns (optional)": {
|
||||
"prompt": "the column name in the dataset containing the prompts. (default: instruction)",
|
||||
"query": "the column name in the dataset containing the queries. (default: input)",
|
||||
"response": "the column name in the dataset containing the responses. (default: output)",
|
||||
@@ -20,14 +20,14 @@ If you are using a custom dataset, please provide your dataset definition in the
|
||||
"system": "the column name in the dataset containing the system prompts. (default: None)",
|
||||
"tools": "the column name in the dataset containing the tool description. (default: None)"
|
||||
},
|
||||
"tags": {
|
||||
"tags (optional, used for the sharegpt format)": {
|
||||
"role_tag": "the key in the message represents the identity. (default: from)",
|
||||
"content_tag": "the key in the message represents the content. (default: value)",
|
||||
"user_tag": "the value of the role_tag represents the user. (default: human)",
|
||||
"assistant_tag": "the value of the role_tag represents the assistant. (default: gpt)",
|
||||
"observation_tag": "the value of the role_tag represents the tool results. (default: observation)",
|
||||
"function_tag": "the value of the role_tag represents the function call. (default: function_call)",
|
||||
"system_tag": "the value of the role_tag represents the system prompt. (default: None) incompatible with system column"
|
||||
"system_tag": "the value of the role_tag represents the system prompt. (default: system, can override system column)"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
"folder": "Hugging Face 仓库的文件夹名称(可选,默认:None)",
|
||||
"ranking": "是否为偏好数据集(可选,默认:False)",
|
||||
"formatting": "数据集格式(可选,默认:alpaca,可以为 alpaca 或 sharegpt)",
|
||||
"columns": {
|
||||
"columns(可选)": {
|
||||
"prompt": "数据集代表提示词的表头名称(默认:instruction)",
|
||||
"query": "数据集代表请求的表头名称(默认:input)",
|
||||
"response": "数据集代表回答的表头名称(默认:output)",
|
||||
@@ -20,13 +20,14 @@
|
||||
"system": "数据集代表系统提示的表头名称(默认:None)",
|
||||
"tools": "数据集代表工具描述的表头名称(默认:None)"
|
||||
},
|
||||
"tags": {
|
||||
"tags(可选,用于 sharegpt 格式)": {
|
||||
"role_tag": "消息中代表发送者身份的键名(默认:from)",
|
||||
"content_tag": "消息中代表文本内容的键名(默认:value)",
|
||||
"user_tag": "消息中代表用户的 role_tag(默认:human)",
|
||||
"assistant_tag": "消息中代表助手的 role_tag(默认:gpt)",
|
||||
"observation_tag": "消息中代表工具返回结果的 role_tag(默认:observation)",
|
||||
"function_tag": "消息中代表工具调用的 role_tag(默认:function_call)"
|
||||
"function_tag": "消息中代表工具调用的 role_tag(默认:function_call)",
|
||||
"system_tag": "消息中代表系统提示的 role_tag(默认:system,会覆盖 system 列)"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
@@ -15,9 +15,6 @@
|
||||
"file_name": "alpaca_gpt4_data_zh.json",
|
||||
"file_sha1": "3eaa3bda364ccdd59925d7448a698256c31ef845"
|
||||
},
|
||||
"alpaca-gpt4_de": {
|
||||
"hf_hub_url": "mayflowergmbh/alpaca-gpt4_de"
|
||||
},
|
||||
"self_cognition": {
|
||||
"file_name": "self_cognition.json",
|
||||
"file_sha1": "6287a730ada924fc5d9eadc6d8f865e01b7a6f67"
|
||||
@@ -42,9 +39,6 @@
|
||||
"history": "history"
|
||||
}
|
||||
},
|
||||
"oasst_de": {
|
||||
"hf_hub_url": "mayflowergmbh/oasst_de"
|
||||
},
|
||||
"lima": {
|
||||
"file_name": "lima.json",
|
||||
"file_sha1": "9db59f6b7007dc4b17529fc63379b9cd61640f37",
|
||||
@@ -126,44 +120,8 @@
|
||||
"system": "system_prompt"
|
||||
}
|
||||
},
|
||||
"slimorca": {
|
||||
"hf_hub_url": "Open-Orca/SlimOrca",
|
||||
"formatting": "sharegpt",
|
||||
"columns": {
|
||||
"messages": "conversations"
|
||||
},
|
||||
"tags": {
|
||||
"role_tag": "from",
|
||||
"content_tag": "value",
|
||||
"user_tag": "human",
|
||||
"assistant_tag": "gpt",
|
||||
"system_tag": "system"
|
||||
}
|
||||
},
|
||||
"intel_orca_dpo_pairs_de" : {
|
||||
"hf_hub_url": "mayflowergmbh/intel_orca_dpo_pairs_de",
|
||||
"ranking": true
|
||||
},
|
||||
"airoboros-3.0_de": {
|
||||
"hf_hub_url": "mayflowergmbh/airoboros-3.0_de"
|
||||
},
|
||||
"booksum_de": {
|
||||
"hf_hub_url": "mayflowergmbh/booksum_de"
|
||||
},
|
||||
"dolphin_de": {
|
||||
"hf_hub_url": "mayflowergmbh/dolphin_de"
|
||||
},
|
||||
"wiki_qa_de": {
|
||||
"hf_hub_url": "mayflowergmbh/wiki_qa_de"
|
||||
},
|
||||
"evol-instruct_de": {
|
||||
"hf_hub_url": "mayflowergmbh/evol-instruct_de"
|
||||
},
|
||||
"openschnabeltier_de": {
|
||||
"hf_hub_url": "mayflowergmbh/openschnabeltier_de"
|
||||
},
|
||||
"dolly-15k_de": {
|
||||
"hf_hub_url": "mayflowergmbh/dolly-15k_de"
|
||||
"slimorca": {
|
||||
"hf_hub_url": "Open-Orca/SlimOrca"
|
||||
},
|
||||
"mathinstruct": {
|
||||
"hf_hub_url": "TIGER-Lab/MathInstruct",
|
||||
@@ -180,6 +138,13 @@
|
||||
"response": "target"
|
||||
}
|
||||
},
|
||||
"wikiqa": {
|
||||
"hf_hub_url": "wiki_qa",
|
||||
"columns": {
|
||||
"prompt": "question",
|
||||
"response": "answer"
|
||||
}
|
||||
},
|
||||
"webqa": {
|
||||
"hf_hub_url": "suolyer/webqa",
|
||||
"ms_hub_url": "AI-ModelScope/webqa",
|
||||
@@ -193,7 +158,8 @@
|
||||
"ms_hub_url": "AI-ModelScope/webnovel_cn"
|
||||
},
|
||||
"nectar_sft": {
|
||||
"hf_hub_url": "mlinmg/SFT-Nectar"
|
||||
"hf_hub_url": "mlinmg/SFT-Nectar",
|
||||
"ms_hub_url": "AI-ModelScope/SFT-Nectar"
|
||||
},
|
||||
"deepctrl": {
|
||||
"ms_hub_url": "deepctrl/deepctrl-sft-data"
|
||||
@@ -229,9 +195,6 @@
|
||||
},
|
||||
"formatting": "sharegpt"
|
||||
},
|
||||
"ultrachat_chat_de": {
|
||||
"hf_hub_url": "mayflowergmbh/ultra-chat_de"
|
||||
},
|
||||
"agent_instruct": {
|
||||
"hf_hub_url": "THUDM/AgentInstruct",
|
||||
"ms_hub_url": "ZhipuAI/AgentInstruct",
|
||||
@@ -253,8 +216,36 @@
|
||||
},
|
||||
"evol_instruct": {
|
||||
"hf_hub_url": "WizardLM/WizardLM_evol_instruct_V2_196k",
|
||||
"ms_hub_url": "AI-ModelScope/WizardLM_evol_instruct_V2_196k",
|
||||
"formatting": "sharegpt"
|
||||
},
|
||||
"oasst_de": {
|
||||
"hf_hub_url": "mayflowergmbh/oasst_de"
|
||||
},
|
||||
"dolly_15k_de": {
|
||||
"hf_hub_url": "mayflowergmbh/dolly-15k_de"
|
||||
},
|
||||
"alpaca-gpt4_de": {
|
||||
"hf_hub_url": "mayflowergmbh/alpaca-gpt4_de"
|
||||
},
|
||||
"openschnabeltier_de": {
|
||||
"hf_hub_url": "mayflowergmbh/openschnabeltier_de"
|
||||
},
|
||||
"evol_instruct_de": {
|
||||
"hf_hub_url": "mayflowergmbh/evol-instruct_de"
|
||||
},
|
||||
"dolphin_de": {
|
||||
"hf_hub_url": "mayflowergmbh/dolphin_de"
|
||||
},
|
||||
"booksum_de": {
|
||||
"hf_hub_url": "mayflowergmbh/booksum_de"
|
||||
},
|
||||
"airoboros_de": {
|
||||
"hf_hub_url": "mayflowergmbh/airoboros-3.0_de"
|
||||
},
|
||||
"ultrachat_de": {
|
||||
"hf_hub_url": "mayflowergmbh/ultra-chat_de"
|
||||
},
|
||||
"hh_rlhf_en": {
|
||||
"script_url": "hh_rlhf_en",
|
||||
"columns": {
|
||||
@@ -298,6 +289,11 @@
|
||||
},
|
||||
"nectar_rm": {
|
||||
"hf_hub_url": "mlinmg/RLAIF-Nectar",
|
||||
"ms_hub_url": "AI-ModelScope/RLAIF-Nectar",
|
||||
"ranking": true
|
||||
},
|
||||
"orca_dpo_de" : {
|
||||
"hf_hub_url": "mayflowergmbh/intel_orca_dpo_pairs_de",
|
||||
"ranking": true
|
||||
},
|
||||
"wiki_demo": {
|
||||
@@ -329,6 +325,7 @@
|
||||
},
|
||||
"wikipedia_en": {
|
||||
"hf_hub_url": "olm/olm-wikipedia-20221220",
|
||||
"ms_hub_url": "AI-ModelScope/olm-wikipedia-20221220",
|
||||
"columns": {
|
||||
"prompt": "text"
|
||||
}
|
||||
@@ -342,6 +339,7 @@
|
||||
},
|
||||
"pile": {
|
||||
"hf_hub_url": "EleutherAI/pile",
|
||||
"ms_hub_url": "AI-ModelScope/pile",
|
||||
"columns": {
|
||||
"prompt": "text"
|
||||
},
|
||||
@@ -349,6 +347,7 @@
|
||||
},
|
||||
"skypile": {
|
||||
"hf_hub_url": "Skywork/SkyPile-150B",
|
||||
"ms_hub_url": "AI-ModelScope/SkyPile-150B",
|
||||
"columns": {
|
||||
"prompt": "text"
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user