diff --git a/README.md b/README.md index a3c2d81d..c9b51665 100644 --- a/README.md +++ b/README.md @@ -90,10 +90,11 @@ - [BELLE Dialogue 0.4M (zh)](https://huggingface.co/datasets/BelleGroup/generated_chat_0.4M) - [BELLE School Math 0.25M (zh)](https://huggingface.co/datasets/BelleGroup/school_math_0.25M) - [BELLE Multiturn Chat 0.8M (zh)](https://huggingface.co/datasets/BelleGroup/multiturn_chat_0.8M) - - [Firefly 1.1M (zh)](https://huggingface.co/datasets/YeungNLP/firefly-train-1.1M) - [LIMA (en)](https://huggingface.co/datasets/GAIR/lima) - [CodeAlpaca 20k (en)](https://huggingface.co/datasets/sahil2801/CodeAlpaca-20k) - [Alpaca CoT (multilingual)](https://huggingface.co/datasets/QingyiSi/Alpaca-CoT) + - [MathInstruct (en)](https://huggingface.co/datasets/TIGER-Lab/MathInstruct) + - [Firefly 1.1M (zh)](https://huggingface.co/datasets/YeungNLP/firefly-train-1.1M) - [Web QA (zh)](https://huggingface.co/datasets/suolyer/webqa) - [UltraChat (en)](https://github.com/thunlp/UltraChat) - [WebNovel (zh)](https://huggingface.co/datasets/zxbsmk/webnovel_cn) diff --git a/README_zh.md b/README_zh.md index f603780f..de9667e6 100644 --- a/README_zh.md +++ b/README_zh.md @@ -90,10 +90,11 @@ - [BELLE Dialogue 0.4M (zh)](https://huggingface.co/datasets/BelleGroup/generated_chat_0.4M) - [BELLE School Math 0.25M (zh)](https://huggingface.co/datasets/BelleGroup/school_math_0.25M) - [BELLE Multiturn Chat 0.8M (zh)](https://huggingface.co/datasets/BelleGroup/multiturn_chat_0.8M) - - [Firefly 1.1M (zh)](https://huggingface.co/datasets/YeungNLP/firefly-train-1.1M) - [LIMA (en)](https://huggingface.co/datasets/GAIR/lima) - [CodeAlpaca 20k (en)](https://huggingface.co/datasets/sahil2801/CodeAlpaca-20k) - [Alpaca CoT (multilingual)](https://huggingface.co/datasets/QingyiSi/Alpaca-CoT) + - [MathInstruct (en)](https://huggingface.co/datasets/TIGER-Lab/MathInstruct) + - [Firefly 1.1M (zh)](https://huggingface.co/datasets/YeungNLP/firefly-train-1.1M) - [Web QA (zh)](https://huggingface.co/datasets/suolyer/webqa) - [UltraChat (en)](https://github.com/thunlp/UltraChat) - [WebNovel (zh)](https://huggingface.co/datasets/zxbsmk/webnovel_cn) diff --git a/data/dataset_info.json b/data/dataset_info.json index a2a0899e..f30c422a 100644 --- a/data/dataset_info.json +++ b/data/dataset_info.json @@ -95,6 +95,12 @@ "history": "history" } }, + "codealpaca": { + "hf_hub_url": "sahil2801/CodeAlpaca-20k" + }, + "alpaca_cot": { + "hf_hub_url": "QingyiSi/Alpaca-CoT" + }, "firefly": { "hf_hub_url": "YeungNLP/firefly-train-1.1M", "columns": { @@ -104,11 +110,14 @@ "history": "" } }, - "codealpaca": { - "hf_hub_url": "sahil2801/CodeAlpaca-20k" - }, - "alpaca_cot": { - "hf_hub_url": "QingyiSi/Alpaca-CoT" + "mathinstruct": { + "hf_hub_url": "TIGER-Lab/MathInstruct", + "columns": { + "prompt": "instruction", + "query": "", + "response": "output", + "history": "" + } }, "webqa": { "hf_hub_url": "suolyer/webqa",