Slim Orca data parsing

Former-commit-id: 1d3598afa1
This commit is contained in:
Mark Mueller
2024-02-08 19:32:20 +01:00
parent 8e36c5144b
commit 4bd7b8375e
4 changed files with 22 additions and 3 deletions

View File

@@ -26,7 +26,8 @@ If you are using a custom dataset, please provide your dataset definition in the
"user_tag": "the value of the role_tag represents the user. (default: human)",
"assistant_tag": "the value of the role_tag represents the assistant. (default: gpt)",
"observation_tag": "the value of the role_tag represents the tool results. (default: observation)",
"function_tag": "the value of the role_tag represents the function call. (default: function_call)"
"function_tag": "the value of the role_tag represents the function call. (default: function_call)",
"system_tag": "the value of the role_tag represents the system prompt. (default: None) incompatible with system column"
}
}
```

View File

@@ -126,6 +126,20 @@
"system": "system_prompt"
}
},
"slimorca": {
"hf_hub_url": "Open-Orca/SlimOrca",
"formatting": "sharegpt",
"columns": {
"messages": "conversations"
},
"tags": {
"role_tag": "from",
"content_tag": "value",
"user_tag": "human",
"assistant_tag": "gpt",
"system_tag": "system"
}
},
"intel_orca_dpo_pairs_de" : {
"hf_hub_url": "mayflowergmbh/intel_orca_dpo_pairs_de",
"ranking": true