diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 90e9ef87..f5e099fa 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -21,10 +21,17 @@ on: jobs: build: + strategy: + fail-fast: false + matrix: + device: + - "cuda" + - "npu" + runs-on: ubuntu-latest concurrency: - group: ${{ github.workflow }}-${{ github.ref }} + group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.device }} cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} environment: @@ -33,12 +40,10 @@ jobs: steps: - name: Free up disk space - run: | - df -h - sudo rm -rf /usr/share/dotnet - sudo rm -rf /opt/ghc - sudo rm -rf /opt/hostedtoolcache - df -h + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + with: + tool-cache: true + docker-images: false - name: Checkout uses: actions/checkout@v4 @@ -57,13 +62,22 @@ jobs: uses: docker/setup-buildx-action@v3 - name: Login to Docker Hub - if: github.event_name != 'pull_request' + if: ${{ github.event_name != 'pull_request' }} uses: docker/login-action@v3 with: username: ${{ vars.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} - - name: Build and push Docker image + - name: Login to Quay + if: ${{ github.event_name != 'pull_request' && matrix.device == 'npu' }} + uses: docker/login-action@v3 + with: + registry: quay.io + username: ${{ vars.QUAY_ASCEND_USERNAME }} + password: ${{ secrets.QUAY_ASCEND_TOKEN }} + + - name: Build and push Docker image (CUDA) + if: ${{ matrix.device == 'cuda' }} uses: docker/build-push-action@v6 with: context: . @@ -76,3 +90,19 @@ jobs: docker.io/hiyouga/llamafactory:${{ steps.version.outputs.tag }} cache-from: type=gha cache-to: type=gha,mode=max + + - name: Build and push Docker image (NPU) + if: ${{ matrix.device == 'npu' }} + uses: docker/build-push-action@v6 + with: + context: . + platforms: linux/amd64,linux/arm64 + file: ./docker/docker-npu/Dockerfile + push: ${{ github.event_name != 'pull_request' }} + tags: | + docker.io/hiyouga/llamafactory:latest-npu-a2 + docker.io/hiyouga/llamafactory:${{ steps.version.outputs.tag }}-npu-a2 + quay.io/ascend/llamafactory:latest-npu-a2 + quay.io/ascend/llamafactory:${{ steps.version.outputs.tag }}-npu-a2 + cache-from: type=gha + cache-to: type=gha,mode=max diff --git a/docker/docker-npu/Dockerfile b/docker/docker-npu/Dockerfile index 40880dbc..7cd5e8f4 100644 --- a/docker/docker-npu/Dockerfile +++ b/docker/docker-npu/Dockerfile @@ -1,11 +1,12 @@ # https://hub.docker.com/r/ascendai/cann/tags -ARG BASE_IMAGE=ascendai/cann:8.0.0-910b-ubuntu22.04-py3.11 +ARG BASE_IMAGE=ascendai/cann:8.1.rc1-910b-ubuntu22.04-py3.11 FROM ${BASE_IMAGE} # Installation arguments ARG PIP_INDEX=https://pypi.org/simple ARG EXTRAS=torch-npu,metrics ARG HTTP_PROXY="" +ARG PYTORCH_INDEX=https://download.pytorch.org/whl/cpu # Define environments ENV MAX_JOBS=16 @@ -28,6 +29,10 @@ RUN pip config set global.index-url "${PIP_INDEX}" && \ pip config set global.extra-index-url "${PIP_INDEX}" && \ pip install --no-cache-dir --upgrade pip packaging wheel setuptools +# Install torch-npu +RUN pip uninstall -y torch torchvision torchaudio && \ + pip install --no-cache-dir "torch-npu==2.5.1" "torchvision==0.20.1" --index-url "${PYTORCH_INDEX}" + # Install the requirements COPY requirements.txt /app RUN pip install --no-cache-dir -r requirements.txt diff --git a/setup.py b/setup.py index 3c5b445b..73f9424e 100644 --- a/setup.py +++ b/setup.py @@ -43,7 +43,7 @@ def get_console_scripts() -> list[str]: extra_require = { "torch": ["torch>=2.0.0", "torchvision>=0.15.0"], - "torch-npu": ["torch==2.5.1", "torchvision==0.20.1", "torch-npu==2.5.1", "decorator"], + "torch-npu": ["torch-npu==2.5.1", "torchvision==0.20.1", "decorator"], "metrics": ["nltk", "jieba", "rouge-chinese"], "deepspeed": ["deepspeed>=0.10.0,<=0.16.9"], "liger-kernel": ["liger-kernel>=0.5.5"],