From 1cd0ea1f13d853d7ee6fa7ab60bc9bdc38f47144 Mon Sep 17 00:00:00 2001 From: hiyouga Date: Wed, 13 Sep 2023 22:30:14 +0800 Subject: [PATCH] add MathInstruct dataset Former-commit-id: 026af87e7fce091a0cda1afd6df3d6ab6189de9a --- README.md | 3 ++- README_zh.md | 3 ++- data/dataset_info.json | 19 ++++++++++++++----- 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index a3c2d81d..c9b51665 100644 --- a/README.md +++ b/README.md @@ -90,10 +90,11 @@ - [BELLE Dialogue 0.4M (zh)](https://huggingface.co/datasets/BelleGroup/generated_chat_0.4M) - [BELLE School Math 0.25M (zh)](https://huggingface.co/datasets/BelleGroup/school_math_0.25M) - [BELLE Multiturn Chat 0.8M (zh)](https://huggingface.co/datasets/BelleGroup/multiturn_chat_0.8M) - - [Firefly 1.1M (zh)](https://huggingface.co/datasets/YeungNLP/firefly-train-1.1M) - [LIMA (en)](https://huggingface.co/datasets/GAIR/lima) - [CodeAlpaca 20k (en)](https://huggingface.co/datasets/sahil2801/CodeAlpaca-20k) - [Alpaca CoT (multilingual)](https://huggingface.co/datasets/QingyiSi/Alpaca-CoT) + - [MathInstruct (en)](https://huggingface.co/datasets/TIGER-Lab/MathInstruct) + - [Firefly 1.1M (zh)](https://huggingface.co/datasets/YeungNLP/firefly-train-1.1M) - [Web QA (zh)](https://huggingface.co/datasets/suolyer/webqa) - [UltraChat (en)](https://github.com/thunlp/UltraChat) - [WebNovel (zh)](https://huggingface.co/datasets/zxbsmk/webnovel_cn) diff --git a/README_zh.md b/README_zh.md index f603780f..de9667e6 100644 --- a/README_zh.md +++ b/README_zh.md @@ -90,10 +90,11 @@ - [BELLE Dialogue 0.4M (zh)](https://huggingface.co/datasets/BelleGroup/generated_chat_0.4M) - [BELLE School Math 0.25M (zh)](https://huggingface.co/datasets/BelleGroup/school_math_0.25M) - [BELLE Multiturn Chat 0.8M (zh)](https://huggingface.co/datasets/BelleGroup/multiturn_chat_0.8M) - - [Firefly 1.1M (zh)](https://huggingface.co/datasets/YeungNLP/firefly-train-1.1M) - [LIMA (en)](https://huggingface.co/datasets/GAIR/lima) - [CodeAlpaca 20k (en)](https://huggingface.co/datasets/sahil2801/CodeAlpaca-20k) - [Alpaca CoT (multilingual)](https://huggingface.co/datasets/QingyiSi/Alpaca-CoT) + - [MathInstruct (en)](https://huggingface.co/datasets/TIGER-Lab/MathInstruct) + - [Firefly 1.1M (zh)](https://huggingface.co/datasets/YeungNLP/firefly-train-1.1M) - [Web QA (zh)](https://huggingface.co/datasets/suolyer/webqa) - [UltraChat (en)](https://github.com/thunlp/UltraChat) - [WebNovel (zh)](https://huggingface.co/datasets/zxbsmk/webnovel_cn) diff --git a/data/dataset_info.json b/data/dataset_info.json index a2a0899e..f30c422a 100644 --- a/data/dataset_info.json +++ b/data/dataset_info.json @@ -95,6 +95,12 @@ "history": "history" } }, + "codealpaca": { + "hf_hub_url": "sahil2801/CodeAlpaca-20k" + }, + "alpaca_cot": { + "hf_hub_url": "QingyiSi/Alpaca-CoT" + }, "firefly": { "hf_hub_url": "YeungNLP/firefly-train-1.1M", "columns": { @@ -104,11 +110,14 @@ "history": "" } }, - "codealpaca": { - "hf_hub_url": "sahil2801/CodeAlpaca-20k" - }, - "alpaca_cot": { - "hf_hub_url": "QingyiSi/Alpaca-CoT" + "mathinstruct": { + "hf_hub_url": "TIGER-Lab/MathInstruct", + "columns": { + "prompt": "instruction", + "query": "", + "response": "output", + "history": "" + } }, "webqa": { "hf_hub_url": "suolyer/webqa",