[dataset] fix ultrachat_200k dataset (#7259)

The `HuggingFaceH4/ultrachat_200k` dataset doesn't contain the default "train" split. The correct split is "train_sft".
This commit is contained in:
Victor Nogueira 2025-03-13 13:20:18 +01:00 committed by GitHub
parent 3c974c466e
commit 0ecad4b178

View File

@ -232,6 +232,7 @@
"ultrachat_200k": { "ultrachat_200k": {
"hf_hub_url": "HuggingFaceH4/ultrachat_200k", "hf_hub_url": "HuggingFaceH4/ultrachat_200k",
"ms_hub_url": "AI-ModelScope/ultrachat_200k", "ms_hub_url": "AI-ModelScope/ultrachat_200k",
"split": "train_sft",
"formatting": "sharegpt", "formatting": "sharegpt",
"columns": { "columns": {
"messages": "messages" "messages": "messages"