From 0ecad4b178f12edd0c174136ab2f81715e0c154a Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Thu, 13 Mar 2025 13:20:18 +0100 Subject: [PATCH] [dataset] fix ultrachat_200k dataset (#7259) The `HuggingFaceH4/ultrachat_200k` dataset doesn't contain the default "train" split. The correct split is "train_sft". --- data/dataset_info.json | 1 + 1 file changed, 1 insertion(+) diff --git a/data/dataset_info.json b/data/dataset_info.json index 7aaee14f..aba2c6c5 100644 --- a/data/dataset_info.json +++ b/data/dataset_info.json @@ -232,6 +232,7 @@ "ultrachat_200k": { "hf_hub_url": "HuggingFaceH4/ultrachat_200k", "ms_hub_url": "AI-ModelScope/ultrachat_200k", + "split": "train_sft", "formatting": "sharegpt", "columns": { "messages": "messages"