mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-11-05 10:22:15 +08:00
Compare commits
3 Commits
2e2f92701f
...
3fe6f0febd
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3fe6f0febd | ||
|
|
40d3691e9e | ||
|
|
af8437095a |
16
.github/workflows/docker.yml
vendored
16
.github/workflows/docker.yml
vendored
@ -18,6 +18,9 @@ on:
|
|||||||
- "requirements.txt"
|
- "requirements.txt"
|
||||||
- "docker/**"
|
- "docker/**"
|
||||||
- ".github/workflows/*.yml"
|
- ".github/workflows/*.yml"
|
||||||
|
release:
|
||||||
|
types:
|
||||||
|
- published
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
build:
|
build:
|
||||||
@ -40,7 +43,7 @@ jobs:
|
|||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Free up disk space
|
- name: Free up disk space
|
||||||
uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
|
uses: jlumbroso/free-disk-space@v1.3.1
|
||||||
with:
|
with:
|
||||||
tool-cache: true
|
tool-cache: true
|
||||||
docker-images: false
|
docker-images: false
|
||||||
@ -51,12 +54,16 @@ jobs:
|
|||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
uses: actions/setup-python@v5
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: "3.9"
|
python-version: "3.10"
|
||||||
|
|
||||||
- name: Get llamafactory version
|
- name: Get llamafactory version
|
||||||
id: version
|
id: version
|
||||||
run: |
|
run: |
|
||||||
echo "tag=$(python setup.py --version | sed 's/\.dev0//')" >> "$GITHUB_OUTPUT"
|
if [ "${{ github.event_name }}" = "release" ]; then
|
||||||
|
echo "tag=$(python setup.py --version)" >> "$GITHUB_OUTPUT"
|
||||||
|
else
|
||||||
|
echo "tag=latest" >> "$GITHUB_OUTPUT"
|
||||||
|
fi
|
||||||
|
|
||||||
- name: Set up Docker Buildx
|
- name: Set up Docker Buildx
|
||||||
uses: docker/setup-buildx-action@v3
|
uses: docker/setup-buildx-action@v3
|
||||||
@ -86,7 +93,6 @@ jobs:
|
|||||||
EXTRAS=metrics,deepspeed,liger-kernel
|
EXTRAS=metrics,deepspeed,liger-kernel
|
||||||
push: ${{ github.event_name != 'pull_request' }}
|
push: ${{ github.event_name != 'pull_request' }}
|
||||||
tags: |
|
tags: |
|
||||||
docker.io/hiyouga/llamafactory:latest
|
|
||||||
docker.io/hiyouga/llamafactory:${{ steps.version.outputs.tag }}
|
docker.io/hiyouga/llamafactory:${{ steps.version.outputs.tag }}
|
||||||
cache-from: type=gha
|
cache-from: type=gha
|
||||||
cache-to: type=gha,mode=max
|
cache-to: type=gha,mode=max
|
||||||
@ -100,9 +106,7 @@ jobs:
|
|||||||
file: ./docker/docker-npu/Dockerfile
|
file: ./docker/docker-npu/Dockerfile
|
||||||
push: ${{ github.event_name != 'pull_request' }}
|
push: ${{ github.event_name != 'pull_request' }}
|
||||||
tags: |
|
tags: |
|
||||||
docker.io/hiyouga/llamafactory:latest-npu-a2
|
|
||||||
docker.io/hiyouga/llamafactory:${{ steps.version.outputs.tag }}-npu-a2
|
docker.io/hiyouga/llamafactory:${{ steps.version.outputs.tag }}-npu-a2
|
||||||
quay.io/ascend/llamafactory:latest-npu-a2
|
|
||||||
quay.io/ascend/llamafactory:${{ steps.version.outputs.tag }}-npu-a2
|
quay.io/ascend/llamafactory:${{ steps.version.outputs.tag }}-npu-a2
|
||||||
cache-from: type=gha
|
cache-from: type=gha
|
||||||
cache-to: type=gha,mode=max
|
cache-to: type=gha,mode=max
|
||||||
|
|||||||
9
.github/workflows/tests.yml
vendored
9
.github/workflows/tests.yml
vendored
@ -30,7 +30,7 @@ jobs:
|
|||||||
os:
|
os:
|
||||||
- "ubuntu-latest"
|
- "ubuntu-latest"
|
||||||
- "windows-latest"
|
- "windows-latest"
|
||||||
- "macos-13"
|
- "macos-latest"
|
||||||
transformers:
|
transformers:
|
||||||
- null
|
- null
|
||||||
include: # test backward compatibility
|
include: # test backward compatibility
|
||||||
@ -75,10 +75,11 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
python -m pip install "transformers==${{ matrix.transformers }}"
|
python -m pip install "transformers==${{ matrix.transformers }}"
|
||||||
|
|
||||||
- name: Install transformers to avoid mac os ci errors
|
- name: Update accelerate to avoid mac os ci errors (before accelerate 1.11.0)
|
||||||
if: ${{ matrix.os == 'macos-13' }}
|
if: ${{ matrix.os == 'macos-latest' }}
|
||||||
run: |
|
run: |
|
||||||
python -m pip install "transformers<=4.51.3"
|
python -m pip uninstall -y accelerate
|
||||||
|
python -m pip install "git+https://github.com/huggingface/accelerate.git"
|
||||||
|
|
||||||
- name: Cache files
|
- name: Cache files
|
||||||
id: hf-hub-cache
|
id: hf-hub-cache
|
||||||
|
|||||||
@ -1,7 +1,8 @@
|
|||||||
# core deps
|
# core deps
|
||||||
transformers>=4.49.0,<=4.56.2,!=4.52.0
|
transformers>=4.49.0,<=4.56.2,!=4.52.0; python_version < '3.10'
|
||||||
|
transformers>=4.49.0,<=4.57.0,!=4.52.0; python_version >= '3.10'
|
||||||
datasets>=2.16.0,<=4.0.0
|
datasets>=2.16.0,<=4.0.0
|
||||||
accelerate>=1.3.0,<=1.10.1
|
accelerate>=1.3.0,<=1.11.0
|
||||||
peft>=0.14.0,<=0.17.1
|
peft>=0.14.0,<=0.17.1
|
||||||
trl>=0.8.6,<=0.9.6
|
trl>=0.8.6,<=0.9.6
|
||||||
# gui
|
# gui
|
||||||
|
|||||||
@ -94,9 +94,9 @@ def check_version(requirement: str, mandatory: bool = False) -> None:
|
|||||||
|
|
||||||
def check_dependencies() -> None:
|
def check_dependencies() -> None:
|
||||||
r"""Check the version of the required packages."""
|
r"""Check the version of the required packages."""
|
||||||
check_version("transformers>=4.49.0,<=4.56.2")
|
check_version("transformers>=4.49.0,<=4.57.0")
|
||||||
check_version("datasets>=2.16.0,<=4.0.0")
|
check_version("datasets>=2.16.0,<=4.0.0")
|
||||||
check_version("accelerate>=1.3.0,<=1.10.1")
|
check_version("accelerate>=1.3.0,<=1.11.0")
|
||||||
check_version("peft>=0.14.0,<=0.17.1")
|
check_version("peft>=0.14.0,<=0.17.1")
|
||||||
check_version("trl>=0.8.6,<=0.9.6")
|
check_version("trl>=0.8.6,<=0.9.6")
|
||||||
|
|
||||||
|
|||||||
@ -40,7 +40,7 @@ def add_z3_leaf_module(model: "PreTrainedModel") -> None:
|
|||||||
|
|
||||||
model_type = getattr(model.config, "model_type", None)
|
model_type = getattr(model.config, "model_type", None)
|
||||||
text_config = getattr(model.config, "text_config", None)
|
text_config = getattr(model.config, "text_config", None)
|
||||||
text_architectures = getattr(text_config, "architectures", None)
|
text_model_type = getattr(text_config, "model_type", None)
|
||||||
|
|
||||||
if model_type == "dbrx":
|
if model_type == "dbrx":
|
||||||
from transformers.models.dbrx.modeling_dbrx import DbrxFFN
|
from transformers.models.dbrx.modeling_dbrx import DbrxFFN
|
||||||
@ -105,11 +105,21 @@ def add_z3_leaf_module(model: "PreTrainedModel") -> None:
|
|||||||
|
|
||||||
_set_z3_leaf_modules(model, [Qwen2MoeSparseMoeBlock])
|
_set_z3_leaf_modules(model, [Qwen2MoeSparseMoeBlock])
|
||||||
|
|
||||||
if model_type == "qwen3_moe" or text_architectures == "Qwen3MoeForCausalLM":
|
if model_type == "qwen3_moe" or text_model_type == "qwen3_moe": # internvl 3.5
|
||||||
from transformers.models.qwen3_moe.modeling_qwen3_moe import Qwen3MoeSparseMoeBlock
|
from transformers.models.qwen3_moe.modeling_qwen3_moe import Qwen3MoeSparseMoeBlock
|
||||||
|
|
||||||
_set_z3_leaf_modules(model, [Qwen3MoeSparseMoeBlock])
|
_set_z3_leaf_modules(model, [Qwen3MoeSparseMoeBlock])
|
||||||
|
|
||||||
|
if model_type == "qwen3_vl_moe":
|
||||||
|
from transformers.models.qwen3_vl_moe.modeling_qwen3_vl_moe import Qwen3VLMoeTextSparseMoeBlock
|
||||||
|
|
||||||
|
_set_z3_leaf_modules(model, [Qwen3VLMoeTextSparseMoeBlock])
|
||||||
|
|
||||||
|
if model_type == "qwen3_omni_moe":
|
||||||
|
from transformers.models.qwen3_omni_moe.modeling_qwen3_omni_moe import Qwen3OmniMoeThinkerTextSparseMoeBlock
|
||||||
|
|
||||||
|
_set_z3_leaf_modules(model, [Qwen3OmniMoeThinkerTextSparseMoeBlock])
|
||||||
|
|
||||||
|
|
||||||
def configure_moe(config: "PretrainedConfig", model_args: "ModelArguments", is_trainable: bool) -> None:
|
def configure_moe(config: "PretrainedConfig", model_args: "ModelArguments", is_trainable: bool) -> None:
|
||||||
if not is_trainable or not model_args.moe_aux_loss_coef:
|
if not is_trainable or not model_args.moe_aux_loss_coef:
|
||||||
|
|||||||
@ -376,7 +376,7 @@ def test_qwen2_vl_plugin():
|
|||||||
@pytest.mark.skipif(not is_transformers_version_greater_than("4.57.0"), reason="Requires transformers>=4.57.0")
|
@pytest.mark.skipif(not is_transformers_version_greater_than("4.57.0"), reason="Requires transformers>=4.57.0")
|
||||||
def test_qwen3_vl_plugin():
|
def test_qwen3_vl_plugin():
|
||||||
frame_seqlen = 1
|
frame_seqlen = 1
|
||||||
tokenizer_module = _load_tokenizer_module(model_name_or_path="Qwen/Qwen3-VL-235B-A22B-Instruct")
|
tokenizer_module = _load_tokenizer_module(model_name_or_path="Qwen/Qwen3-VL-30B-A3B-Instruct")
|
||||||
qwen3_vl_plugin = get_mm_plugin(name="qwen3_vl", video_token="<|video_pad|>")
|
qwen3_vl_plugin = get_mm_plugin(name="qwen3_vl", video_token="<|video_pad|>")
|
||||||
check_inputs = {"plugin": qwen3_vl_plugin, **tokenizer_module}
|
check_inputs = {"plugin": qwen3_vl_plugin, **tokenizer_module}
|
||||||
check_inputs["expected_mm_messages"] = [
|
check_inputs["expected_mm_messages"] = [
|
||||||
|
|||||||
@ -1,2 +1,2 @@
|
|||||||
# change if test fails or cache is outdated
|
# change if test fails or cache is outdated
|
||||||
0.9.4.102
|
0.9.4.103
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user