mirror of
				https://github.com/hiyouga/LLaMA-Factory.git
				synced 2025-11-04 18:02:19 +08:00 
			
		
		
		
	Compare commits
	
		
			3 Commits
		
	
	
		
			2e2f92701f
			...
			3fe6f0febd
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 
						 | 
					3fe6f0febd | ||
| 
						 | 
					40d3691e9e | ||
| 
						 | 
					af8437095a | 
							
								
								
									
										16
									
								
								.github/workflows/docker.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										16
									
								
								.github/workflows/docker.yml
									
									
									
									
										vendored
									
									
								
							@ -18,6 +18,9 @@ on:
 | 
			
		||||
      - "requirements.txt"
 | 
			
		||||
      - "docker/**"
 | 
			
		||||
      - ".github/workflows/*.yml"
 | 
			
		||||
  release:
 | 
			
		||||
    types:
 | 
			
		||||
      - published
 | 
			
		||||
 | 
			
		||||
jobs:
 | 
			
		||||
  build:
 | 
			
		||||
@ -40,7 +43,7 @@ jobs:
 | 
			
		||||
 | 
			
		||||
    steps:
 | 
			
		||||
      - name: Free up disk space
 | 
			
		||||
        uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
 | 
			
		||||
        uses: jlumbroso/free-disk-space@v1.3.1
 | 
			
		||||
        with:
 | 
			
		||||
          tool-cache: true
 | 
			
		||||
          docker-images: false
 | 
			
		||||
@ -51,12 +54,16 @@ jobs:
 | 
			
		||||
      - name: Set up Python
 | 
			
		||||
        uses: actions/setup-python@v5
 | 
			
		||||
        with:
 | 
			
		||||
          python-version: "3.9"
 | 
			
		||||
          python-version: "3.10"
 | 
			
		||||
 | 
			
		||||
      - name: Get llamafactory version
 | 
			
		||||
        id: version
 | 
			
		||||
        run: |
 | 
			
		||||
          echo "tag=$(python setup.py --version | sed 's/\.dev0//')" >> "$GITHUB_OUTPUT"
 | 
			
		||||
          if [ "${{ github.event_name }}" = "release" ]; then
 | 
			
		||||
            echo "tag=$(python setup.py --version)" >> "$GITHUB_OUTPUT"
 | 
			
		||||
          else
 | 
			
		||||
            echo "tag=latest" >> "$GITHUB_OUTPUT"
 | 
			
		||||
          fi
 | 
			
		||||
 | 
			
		||||
      - name: Set up Docker Buildx
 | 
			
		||||
        uses: docker/setup-buildx-action@v3
 | 
			
		||||
@ -86,7 +93,6 @@ jobs:
 | 
			
		||||
            EXTRAS=metrics,deepspeed,liger-kernel
 | 
			
		||||
          push: ${{ github.event_name != 'pull_request' }}
 | 
			
		||||
          tags: |
 | 
			
		||||
            docker.io/hiyouga/llamafactory:latest
 | 
			
		||||
            docker.io/hiyouga/llamafactory:${{ steps.version.outputs.tag }}
 | 
			
		||||
          cache-from: type=gha
 | 
			
		||||
          cache-to: type=gha,mode=max
 | 
			
		||||
@ -100,9 +106,7 @@ jobs:
 | 
			
		||||
          file: ./docker/docker-npu/Dockerfile
 | 
			
		||||
          push: ${{ github.event_name != 'pull_request' }}
 | 
			
		||||
          tags: |
 | 
			
		||||
            docker.io/hiyouga/llamafactory:latest-npu-a2
 | 
			
		||||
            docker.io/hiyouga/llamafactory:${{ steps.version.outputs.tag }}-npu-a2
 | 
			
		||||
            quay.io/ascend/llamafactory:latest-npu-a2
 | 
			
		||||
            quay.io/ascend/llamafactory:${{ steps.version.outputs.tag }}-npu-a2
 | 
			
		||||
          cache-from: type=gha
 | 
			
		||||
          cache-to: type=gha,mode=max
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										9
									
								
								.github/workflows/tests.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										9
									
								
								.github/workflows/tests.yml
									
									
									
									
										vendored
									
									
								
							@ -30,7 +30,7 @@ jobs:
 | 
			
		||||
        os:
 | 
			
		||||
          - "ubuntu-latest"
 | 
			
		||||
          - "windows-latest"
 | 
			
		||||
          - "macos-13"
 | 
			
		||||
          - "macos-latest"
 | 
			
		||||
        transformers:
 | 
			
		||||
          - null
 | 
			
		||||
        include:  # test backward compatibility
 | 
			
		||||
@ -75,10 +75,11 @@ jobs:
 | 
			
		||||
        run: |
 | 
			
		||||
          python -m pip install "transformers==${{ matrix.transformers }}"
 | 
			
		||||
 | 
			
		||||
      - name: Install transformers to avoid mac os ci errors
 | 
			
		||||
        if: ${{ matrix.os == 'macos-13' }}
 | 
			
		||||
      - name: Update accelerate to avoid mac os ci errors (before accelerate 1.11.0)
 | 
			
		||||
        if: ${{ matrix.os == 'macos-latest' }}
 | 
			
		||||
        run: |
 | 
			
		||||
          python -m pip install "transformers<=4.51.3"
 | 
			
		||||
          python -m pip uninstall -y accelerate
 | 
			
		||||
          python -m pip install "git+https://github.com/huggingface/accelerate.git"
 | 
			
		||||
 | 
			
		||||
      - name: Cache files
 | 
			
		||||
        id: hf-hub-cache
 | 
			
		||||
 | 
			
		||||
@ -1,7 +1,8 @@
 | 
			
		||||
# core deps
 | 
			
		||||
transformers>=4.49.0,<=4.56.2,!=4.52.0
 | 
			
		||||
transformers>=4.49.0,<=4.56.2,!=4.52.0; python_version < '3.10'
 | 
			
		||||
transformers>=4.49.0,<=4.57.0,!=4.52.0; python_version >= '3.10'
 | 
			
		||||
datasets>=2.16.0,<=4.0.0
 | 
			
		||||
accelerate>=1.3.0,<=1.10.1
 | 
			
		||||
accelerate>=1.3.0,<=1.11.0
 | 
			
		||||
peft>=0.14.0,<=0.17.1
 | 
			
		||||
trl>=0.8.6,<=0.9.6
 | 
			
		||||
# gui
 | 
			
		||||
 | 
			
		||||
@ -94,9 +94,9 @@ def check_version(requirement: str, mandatory: bool = False) -> None:
 | 
			
		||||
 | 
			
		||||
def check_dependencies() -> None:
 | 
			
		||||
    r"""Check the version of the required packages."""
 | 
			
		||||
    check_version("transformers>=4.49.0,<=4.56.2")
 | 
			
		||||
    check_version("transformers>=4.49.0,<=4.57.0")
 | 
			
		||||
    check_version("datasets>=2.16.0,<=4.0.0")
 | 
			
		||||
    check_version("accelerate>=1.3.0,<=1.10.1")
 | 
			
		||||
    check_version("accelerate>=1.3.0,<=1.11.0")
 | 
			
		||||
    check_version("peft>=0.14.0,<=0.17.1")
 | 
			
		||||
    check_version("trl>=0.8.6,<=0.9.6")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -40,7 +40,7 @@ def add_z3_leaf_module(model: "PreTrainedModel") -> None:
 | 
			
		||||
 | 
			
		||||
    model_type = getattr(model.config, "model_type", None)
 | 
			
		||||
    text_config = getattr(model.config, "text_config", None)
 | 
			
		||||
    text_architectures = getattr(text_config, "architectures", None)
 | 
			
		||||
    text_model_type = getattr(text_config, "model_type", None)
 | 
			
		||||
 | 
			
		||||
    if model_type == "dbrx":
 | 
			
		||||
        from transformers.models.dbrx.modeling_dbrx import DbrxFFN
 | 
			
		||||
@ -105,11 +105,21 @@ def add_z3_leaf_module(model: "PreTrainedModel") -> None:
 | 
			
		||||
 | 
			
		||||
        _set_z3_leaf_modules(model, [Qwen2MoeSparseMoeBlock])
 | 
			
		||||
 | 
			
		||||
    if model_type == "qwen3_moe" or text_architectures == "Qwen3MoeForCausalLM":
 | 
			
		||||
    if model_type == "qwen3_moe" or text_model_type == "qwen3_moe":  # internvl 3.5
 | 
			
		||||
        from transformers.models.qwen3_moe.modeling_qwen3_moe import Qwen3MoeSparseMoeBlock
 | 
			
		||||
 | 
			
		||||
        _set_z3_leaf_modules(model, [Qwen3MoeSparseMoeBlock])
 | 
			
		||||
 | 
			
		||||
    if model_type == "qwen3_vl_moe":
 | 
			
		||||
        from transformers.models.qwen3_vl_moe.modeling_qwen3_vl_moe import Qwen3VLMoeTextSparseMoeBlock
 | 
			
		||||
 | 
			
		||||
        _set_z3_leaf_modules(model, [Qwen3VLMoeTextSparseMoeBlock])
 | 
			
		||||
 | 
			
		||||
    if model_type == "qwen3_omni_moe":
 | 
			
		||||
        from transformers.models.qwen3_omni_moe.modeling_qwen3_omni_moe import Qwen3OmniMoeThinkerTextSparseMoeBlock
 | 
			
		||||
 | 
			
		||||
        _set_z3_leaf_modules(model, [Qwen3OmniMoeThinkerTextSparseMoeBlock])
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def configure_moe(config: "PretrainedConfig", model_args: "ModelArguments", is_trainable: bool) -> None:
 | 
			
		||||
    if not is_trainable or not model_args.moe_aux_loss_coef:
 | 
			
		||||
 | 
			
		||||
@ -376,7 +376,7 @@ def test_qwen2_vl_plugin():
 | 
			
		||||
@pytest.mark.skipif(not is_transformers_version_greater_than("4.57.0"), reason="Requires transformers>=4.57.0")
 | 
			
		||||
def test_qwen3_vl_plugin():
 | 
			
		||||
    frame_seqlen = 1
 | 
			
		||||
    tokenizer_module = _load_tokenizer_module(model_name_or_path="Qwen/Qwen3-VL-235B-A22B-Instruct")
 | 
			
		||||
    tokenizer_module = _load_tokenizer_module(model_name_or_path="Qwen/Qwen3-VL-30B-A3B-Instruct")
 | 
			
		||||
    qwen3_vl_plugin = get_mm_plugin(name="qwen3_vl", video_token="<|video_pad|>")
 | 
			
		||||
    check_inputs = {"plugin": qwen3_vl_plugin, **tokenizer_module}
 | 
			
		||||
    check_inputs["expected_mm_messages"] = [
 | 
			
		||||
 | 
			
		||||
@ -1,2 +1,2 @@
 | 
			
		||||
# change if test fails or cache is outdated
 | 
			
		||||
0.9.4.102
 | 
			
		||||
0.9.4.103
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user