diff --git a/README.md b/README.md index fc69f055..8a5a1598 100644 --- a/README.md +++ b/README.md @@ -9,12 +9,12 @@ [![Docker Pulls](https://img.shields.io/docker/pulls/hiyouga/llamafactory)](https://hub.docker.com/r/hiyouga/llamafactory/tags) [![Twitter](https://img.shields.io/twitter/follow/llamafactory_ai)](https://twitter.com/llamafactory_ai) -[![Discord](https://dcbadge.vercel.app/api/server/rKfvV9r9FK?compact=true&style=flat)](https://discord.gg/rKfvV9r9FK) +[![Discord](assets/thirdparty/discord.svg)](https://discord.gg/rKfvV9r9FK) -[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1eRTPn37ltBbYsISy9Aw2NuI2Aq5CQrD9?usp=sharing) -[![Open in DSW](https://gallery.pai-ml.com/assets/open-in-dsw.svg)](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory) -[![Open in Lab4ai](assets/lab4ai.svg)](https://www.lab4ai.cn/course/detail?id=7c13e60f6137474eb40f6fd3983c0f46&utm_source=LLaMA-Factory) -[![Open in Online](assets/online.svg)](https://www.llamafactory.com.cn/?utm_source=LLaMA-Factory) +[![Open in Colab](assets/thirdparty/colab.svg)](https://colab.research.google.com/drive/1eRTPn37ltBbYsISy9Aw2NuI2Aq5CQrD9?usp=sharing) +[![Open in DSW](assets/thirdparty/dsw.svg)](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory) +[![Open in Lab4ai](assets/thirdparty/lab4ai.svg)](https://www.lab4ai.cn/course/detail?id=7c13e60f6137474eb40f6fd3983c0f46&utm_source=LLaMA-Factory) +[![Open in Online](assets/thirdparty/online.svg)](https://www.llamafactory.com.cn/?utm_source=LLaMA-Factory) [![Open in Spaces](https://img.shields.io/badge/🤗-Open%20in%20Spaces-blue)](https://huggingface.co/spaces/hiyouga/LLaMA-Board) [![Open in Studios](https://img.shields.io/badge/ModelScope-Open%20in%20Studios-blue)](https://modelscope.cn/studios/hiyouga/LLaMA-Board) [![Open in Novita](https://img.shields.io/badge/Novita-Deploy%20Template-blue)](https://novita.ai/templates-library/105981?sharer=88115474-394e-4bda-968e-b88e123d0c47) @@ -25,7 +25,7 @@ ### Supporters ❤️ -|
Warp sponsorship
Warp, the agentic terminal for developers
Available for MacOS, Linux, & Windows | SerpAPI sponsorship | +|
Warp sponsorship
Warp, the agentic terminal for developers
Available for MacOS, Linux, & Windows | SerpAPI sponsorship | | ---- | ---- | ---- @@ -36,7 +36,7 @@
-👋 Join our [WeChat](assets/wechat.jpg), [NPU](assets/wechat_npu.jpg), [Lab4AI](assets/wechat_lab4ai.jpg), [LLaMA Factory Online](assets/wechat_online.jpg) user group. +👋 Join our [WeChat](assets/wechat/main.jpg), [NPU](assets/wechat/npu.jpg), [Lab4AI](assets/wechat/lab4ai.jpg), [LLaMA Factory Online](assets/wechat/online.jpg) user group. \[ English | [中文](README_zh.md) \] @@ -365,6 +365,11 @@ You also can add a custom chat template to [template.py](src/llamafactory/data/t - [SkyPile (zh)](https://huggingface.co/datasets/Skywork/SkyPile-150B) - [FineWeb (en)](https://huggingface.co/datasets/HuggingFaceFW/fineweb) - [FineWeb-Edu (en)](https://huggingface.co/datasets/HuggingFaceFW/fineweb-edu) +- [CCI3-HQ (zh)](https://huggingface.co/datasets/BAAI/CCI3-HQ) +- [CCI3-Data (zh)](https://huggingface.co/datasets/BAAI/CCI3-Data) +- [CCI4.0-M2-Base-v1 (en&zh)](https://huggingface.co/datasets/BAAI/CCI4.0-M2-Base-v1) +- [CCI4.0-M2-CoT-v1 (en&zh)](https://huggingface.co/datasets/BAAI/CCI4.0-M2-CoT-v1) +- [CCI4.0-M2-Extra-v1 (en&zh)](https://huggingface.co/datasets/BAAI/CCI4.0-M2-Extra-v1) - [The Stack (en)](https://huggingface.co/datasets/bigcode/the-stack) - [StarCoder (en)](https://huggingface.co/datasets/bigcode/starcoderdata) @@ -402,6 +407,7 @@ You also can add a custom chat template to [template.py](src/llamafactory/data/t - [ShareGPT Hyperfiltered (en)](https://huggingface.co/datasets/totally-not-an-llm/sharegpt-hyperfiltered-3k) - [ShareGPT4 (en&zh)](https://huggingface.co/datasets/shibing624/sharegpt_gpt4) - [UltraChat 200k (en)](https://huggingface.co/datasets/HuggingFaceH4/ultrachat_200k) +- [Infinity Instruct (zh)](https://huggingface.co/datasets/BAAI/Infinity-Instruct) - [AgentInstruct (en)](https://huggingface.co/datasets/THUDM/AgentInstruct) - [LMSYS Chat 1M (en)](https://huggingface.co/datasets/lmsys/lmsys-chat-1m) - [Evol Instruct V2 (en)](https://huggingface.co/datasets/WizardLM/WizardLM_evol_instruct_V2_196k) diff --git a/README_zh.md b/README_zh.md index c8c1fba2..6fb5b3bc 100644 --- a/README_zh.md +++ b/README_zh.md @@ -9,12 +9,12 @@ [![Docker Pulls](https://img.shields.io/docker/pulls/hiyouga/llamafactory)](https://hub.docker.com/r/hiyouga/llamafactory/tags) [![Twitter](https://img.shields.io/twitter/follow/llamafactory_ai)](https://twitter.com/llamafactory_ai) -[![Discord](https://dcbadge.vercel.app/api/server/rKfvV9r9FK?compact=true&style=flat)](https://discord.gg/rKfvV9r9FK) +[![Discord](assets/thirdparty/discord.svg)](https://discord.gg/rKfvV9r9FK) -[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1d5KQtbemerlSDSxZIfAaWXhKr30QypiK?usp=sharing) -[![Open in DSW](https://gallery.pai-ml.com/assets/open-in-dsw.svg)](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory) -[![Open in Lab4ai](assets/lab4ai.svg)](https://www.lab4ai.cn/course/detail?id=7c13e60f6137474eb40f6fd3983c0f46&utm_source=LLaMA-Factory) -[![Open in Online](assets/online.svg)](https://www.llamafactory.com.cn/?utm_source=LLaMA-Factory) +[![Open in Colab](assets/thirdparty/colab.svg)](https://colab.research.google.com/drive/1d5KQtbemerlSDSxZIfAaWXhKr30QypiK?usp=sharing) +[![Open in DSW](assets/thirdparty/dsw.svg)](https://gallery.pai-ml.com/#/preview/deepLearning/nlp/llama_factory) +[![Open in Lab4ai](assets/thirdparty/lab4ai.svg)](https://www.lab4ai.cn/course/detail?id=7c13e60f6137474eb40f6fd3983c0f46&utm_source=LLaMA-Factory) +[![Open in Online](assets/thirdparty/online.svg)](https://www.llamafactory.com.cn/?utm_source=LLaMA-Factory) [![Open in Spaces](https://img.shields.io/badge/🤗-Open%20in%20Spaces-blue)](https://huggingface.co/spaces/hiyouga/LLaMA-Board) [![Open in Studios](https://img.shields.io/badge/ModelScope-Open%20in%20Studios-blue)](https://modelscope.cn/studios/hiyouga/LLaMA-Board) [![Open in Novita](https://img.shields.io/badge/Novita-Deploy%20Template-blue)](https://novita.ai/templates-library/105981?sharer=88115474-394e-4bda-968e-b88e123d0c47) @@ -25,7 +25,7 @@ ### 赞助商 ❤️ -|
Warp sponsorship
Warp,面向开发者的智能终端
适用于 MacOS、Linux 和 Windows | SerpAPI sponsorship | +|
Warp sponsorship
Warp,面向开发者的智能终端
适用于 MacOS、Linux 和 Windows | SerpAPI sponsorship | | ---- | ---- | ---- @@ -36,7 +36,7 @@
-👋 加入我们的[微信群](assets/wechat.jpg)、[NPU 用户群](assets/wechat_npu.jpg)、[大模型实验室群](assets/wechat_lab4ai.jpg) 或 [LLaMA Factory Online 用户群](assets/wechat_online.png)。 +👋 加入我们的[微信群](assets/wechat/main.jpg)、[NPU 用户群](assets/wechat/npu.jpg)、[大模型实验室群](assets/wechat/lab4ai.jpg) 或 [LLaMA Factory Online 用户群](assets/wechat/online.png)。 \[ [English](README.md) | 中文 \] @@ -367,6 +367,11 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc - [SkyPile (zh)](https://huggingface.co/datasets/Skywork/SkyPile-150B) - [FineWeb (en)](https://huggingface.co/datasets/HuggingFaceFW/fineweb) - [FineWeb-Edu (en)](https://huggingface.co/datasets/HuggingFaceFW/fineweb-edu) +- [CCI3-HQ (zh)](https://huggingface.co/datasets/BAAI/CCI3-HQ) +- [CCI3-Data (zh)](https://huggingface.co/datasets/BAAI/CCI3-Data) +- [CCI4.0-M2-Base-v1 (en&zh)](https://huggingface.co/datasets/BAAI/CCI4.0-M2-Base-v1) +- [CCI4.0-M2-CoT-v1 (en&zh)](https://huggingface.co/datasets/BAAI/CCI4.0-M2-CoT-v1) +- [CCI4.0-M2-Extra-v1 (en&zh)](https://huggingface.co/datasets/BAAI/CCI4.0-M2-Extra-v1) - [The Stack (en)](https://huggingface.co/datasets/bigcode/the-stack) - [StarCoder (en)](https://huggingface.co/datasets/bigcode/starcoderdata) @@ -404,6 +409,7 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc - [ShareGPT Hyperfiltered (en)](https://huggingface.co/datasets/totally-not-an-llm/sharegpt-hyperfiltered-3k) - [ShareGPT4 (en&zh)](https://huggingface.co/datasets/shibing624/sharegpt_gpt4) - [UltraChat 200k (en)](https://huggingface.co/datasets/HuggingFaceH4/ultrachat_200k) +- [Infinity Instruct (zh)](https://huggingface.co/datasets/BAAI/Infinity-Instruct) - [AgentInstruct (en)](https://huggingface.co/datasets/THUDM/AgentInstruct) - [LMSYS Chat 1M (en)](https://huggingface.co/datasets/lmsys/lmsys-chat-1m) - [Evol Instruct V2 (en)](https://huggingface.co/datasets/WizardLM/WizardLM_evol_instruct_V2_196k) diff --git a/assets/serpapi.svg b/assets/sponsors/serpapi.svg similarity index 100% rename from assets/serpapi.svg rename to assets/sponsors/serpapi.svg diff --git a/assets/warp.jpg b/assets/sponsors/warp.jpg similarity index 100% rename from assets/warp.jpg rename to assets/sponsors/warp.jpg diff --git a/assets/thirdparty/colab.svg b/assets/thirdparty/colab.svg new file mode 100644 index 00000000..e5830d53 --- /dev/null +++ b/assets/thirdparty/colab.svg @@ -0,0 +1 @@ + Open in ColabOpen in Colab diff --git a/assets/thirdparty/discord.svg b/assets/thirdparty/discord.svg new file mode 100644 index 00000000..b94f16cc --- /dev/null +++ b/assets/thirdparty/discord.svg @@ -0,0 +1 @@ +LLaMA FactoryLLaMA Factory diff --git a/assets/thirdparty/dsw.svg b/assets/thirdparty/dsw.svg new file mode 100644 index 00000000..a0df870c --- /dev/null +++ b/assets/thirdparty/dsw.svg @@ -0,0 +1,92 @@ + + + 最终方案备份 6 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/assets/lab4ai.svg b/assets/thirdparty/lab4ai.svg similarity index 100% rename from assets/lab4ai.svg rename to assets/thirdparty/lab4ai.svg diff --git a/assets/online.svg b/assets/thirdparty/online.svg similarity index 100% rename from assets/online.svg rename to assets/thirdparty/online.svg diff --git a/assets/wechat.jpg b/assets/wechat.jpg deleted file mode 100644 index c4b7a7a8..00000000 Binary files a/assets/wechat.jpg and /dev/null differ diff --git a/assets/wechat_lab4ai.jpg b/assets/wechat/lab4ai.jpg similarity index 100% rename from assets/wechat_lab4ai.jpg rename to assets/wechat/lab4ai.jpg diff --git a/assets/wechat/main.jpg b/assets/wechat/main.jpg new file mode 100644 index 00000000..c0b36e52 Binary files /dev/null and b/assets/wechat/main.jpg differ diff --git a/assets/wechat/npu.jpg b/assets/wechat/npu.jpg new file mode 100644 index 00000000..36e8622c Binary files /dev/null and b/assets/wechat/npu.jpg differ diff --git a/assets/wechat_online.jpg b/assets/wechat/online.jpg similarity index 100% rename from assets/wechat_online.jpg rename to assets/wechat/online.jpg diff --git a/assets/wechat_npu.jpg b/assets/wechat_npu.jpg deleted file mode 100644 index aba34ada..00000000 Binary files a/assets/wechat_npu.jpg and /dev/null differ diff --git a/data/dataset_info.json b/data/dataset_info.json index 1ce46396..855c35d9 100644 --- a/data/dataset_info.json +++ b/data/dataset_info.json @@ -259,6 +259,10 @@ "assistant_tag": "assistant" } }, + "infinity_instruct": { + "hf_hub_url": "BAAI/Infinity-Instruct", + "formatting": "sharegpt" + }, "agent_instruct": { "hf_hub_url": "THUDM/AgentInstruct", "ms_hub_url": "ZhipuAI/AgentInstruct", @@ -700,6 +704,36 @@ "prompt": "text" } }, + "cci3_hq": { + "hf_hub_url": "BAAI/CCI3-HQ", + "columns": { + "prompt": "text" + } + }, + "cci3_data": { + "hf_hub_url": "BAAI/CCI3-Data", + "columns": { + "prompt": "text" + } + }, + "cci4_base": { + "hf_hub_url": "BAAI/CCI4.0-M2-Base-v1", + "columns": { + "prompt": "text" + } + }, + "cci4_cot": { + "hf_hub_url": "BAAI/CCI4.0-M2-CoT-v1", + "columns": { + "prompt": "text" + } + }, + "cci4_extra": { + "hf_hub_url": "BAAI/CCI4.0-M2-Extra-v1", + "columns": { + "prompt": "text" + } + }, "the_stack": { "hf_hub_url": "bigcode/the-stack", "ms_hub_url": "AI-ModelScope/the-stack",