[dataset] fix ultrachat_200k dataset (#7259)

The `HuggingFaceH4/ultrachat_200k` dataset doesn't contain the default "train" split. The correct split is "train_sft".
This commit is contained in:
Victor Nogueira 2025-03-13 13:20:18 +01:00 committed by GitHub
parent 0dbce72fb8
commit 3dff4ecca8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -232,6 +232,7 @@
"ultrachat_200k": {
"hf_hub_url": "HuggingFaceH4/ultrachat_200k",
"ms_hub_url": "AI-ModelScope/ultrachat_200k",
"split": "train_sft",
"formatting": "sharegpt",
"columns": {
"messages": "messages"