diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 818d58fc..32edf6a8 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -17,7 +17,7 @@ on: - ".github/workflows/*.yml" jobs: - check_code_quality: + tests: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 @@ -34,28 +34,6 @@ jobs: - name: Check quality run: | make style && make quality - - pytest: - needs: check_code_quality - strategy: - matrix: - python-version: - - "3.8" - os: - - "ubuntu-latest" - runs-on: ${{ matrix.os }} - steps: - - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - cache: "pip" - cache-dependency-path: "setup.py" - - name: Install dependencies - run: | - python -m pip install --upgrade pip - python -m pip install .[torch,dev] - name: Test with pytest run: | make test diff --git a/README.md b/README.md index 44897420..fb6c5782 100644 --- a/README.md +++ b/README.md @@ -214,6 +214,8 @@ You also can add a custom chat template to [template.py](src/llamafactory/data/t - [Wikipedia (zh)](https://huggingface.co/datasets/pleisto/wikipedia-cn-20230720-filtered) - [Pile (en)](https://huggingface.co/datasets/EleutherAI/pile) - [SkyPile (zh)](https://huggingface.co/datasets/Skywork/SkyPile-150B) +- [FineWeb (en)](https://huggingface.co/datasets/HuggingFaceFW/fineweb) +- [FineWeb-Edu (en)](https://huggingface.co/datasets/HuggingFaceFW/fineweb-edu) - [The Stack (en)](https://huggingface.co/datasets/bigcode/the-stack) - [StarCoder (en)](https://huggingface.co/datasets/bigcode/starcoderdata) @@ -273,6 +275,7 @@ You also can add a custom chat template to [template.py](src/llamafactory/data/t
Preference datasets - [DPO mixed (en&zh)](https://huggingface.co/datasets/hiyouga/DPO-En-Zh-20k) +- [UltraFeedback (en)](https://huggingface.co/datasets/HuggingFaceH4/ultrafeedback_binarized) - [Orca DPO Pairs (en)](https://huggingface.co/datasets/Intel/orca_dpo_pairs) - [HH-RLHF (en)](https://huggingface.co/datasets/Anthropic/hh-rlhf) - [Nectar (en)](https://huggingface.co/datasets/berkeley-nest/Nectar) diff --git a/README_zh.md b/README_zh.md index 8321d202..142254df 100644 --- a/README_zh.md +++ b/README_zh.md @@ -214,6 +214,8 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd - [Wikipedia (zh)](https://huggingface.co/datasets/pleisto/wikipedia-cn-20230720-filtered) - [Pile (en)](https://huggingface.co/datasets/EleutherAI/pile) - [SkyPile (zh)](https://huggingface.co/datasets/Skywork/SkyPile-150B) +- [FineWeb (en)](https://huggingface.co/datasets/HuggingFaceFW/fineweb) +- [FineWeb-Edu (en)](https://huggingface.co/datasets/HuggingFaceFW/fineweb-edu) - [The Stack (en)](https://huggingface.co/datasets/bigcode/the-stack) - [StarCoder (en)](https://huggingface.co/datasets/bigcode/starcoderdata) @@ -273,6 +275,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd
偏好数据集 - [DPO mixed (en&zh)](https://huggingface.co/datasets/hiyouga/DPO-En-Zh-20k) +- [UltraFeedback (en)](https://huggingface.co/datasets/HuggingFaceH4/ultrafeedback_binarized) - [Orca DPO Pairs (en)](https://huggingface.co/datasets/Intel/orca_dpo_pairs) - [HH-RLHF (en)](https://huggingface.co/datasets/Anthropic/hh-rlhf) - [Nectar (en)](https://huggingface.co/datasets/berkeley-nest/Nectar)