From 519ac9280388a0bde12df5b04399b544ccffb1c5 Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Tue, 27 May 2025 18:15:23 +0800 Subject: [PATCH] [assets] update docker files (#8176) --- .gitignore | 2 +- README.md | 24 ++---- README_zh.md | 24 ++---- docker/docker-cuda/Dockerfile | 110 ++++++++---------------- docker/docker-cuda/Dockerfile.base | 55 ++++++++++++ docker/docker-cuda/docker-compose.yml | 12 +-- docker/docker-npu/Dockerfile | 79 ++++++++---------- docker/docker-npu/docker-compose.yml | 4 +- docker/docker-rocm/Dockerfile | 116 +++++++++----------------- docker/docker-rocm/docker-compose.yml | 12 +-- setup.py | 2 +- 11 files changed, 187 insertions(+), 253 deletions(-) create mode 100644 docker/docker-cuda/Dockerfile.base diff --git a/.gitignore b/.gitignore index f3307c4f..0a3a47bd 100644 --- a/.gitignore +++ b/.gitignore @@ -166,8 +166,8 @@ cython_debug/ uv.lock # custom .gitignore -ms_cache/ hf_cache/ +ms_cache/ om_cache/ cache/ config/ diff --git a/README.md b/README.md index 2d6eeef7..b68ea1cb 100644 --- a/README.md +++ b/README.md @@ -663,14 +663,11 @@ For CUDA users: ```bash docker build -f ./docker/docker-cuda/Dockerfile \ - --build-arg INSTALL_BNB=false \ - --build-arg INSTALL_VLLM=false \ - --build-arg INSTALL_DEEPSPEED=false \ - --build-arg INSTALL_FLASHATTN=false \ --build-arg PIP_INDEX=https://pypi.org/simple \ + --build-arg EXTRAS=metrics \ -t llamafactory:latest . -docker run -dit --gpus=all \ +docker run -dit --ipc=host --gpus=all \ -v ./hf_cache:/root/.cache/huggingface \ -v ./ms_cache:/root/.cache/modelscope \ -v ./om_cache:/root/.cache/openmind \ @@ -678,7 +675,6 @@ docker run -dit --gpus=all \ -v ./output:/app/output \ -p 7860:7860 \ -p 8000:8000 \ - --shm-size 16G \ --name llamafactory \ llamafactory:latest @@ -688,14 +684,12 @@ docker exec -it llamafactory bash For Ascend NPU users: ```bash -# Choose docker image upon your environment docker build -f ./docker/docker-npu/Dockerfile \ - --build-arg INSTALL_DEEPSPEED=false \ --build-arg PIP_INDEX=https://pypi.org/simple \ + --build-arg EXTRAS=metrics \ -t llamafactory:latest . -# Change `device` upon your resources -docker run -dit \ +docker run -dit --ipc=host \ -v ./hf_cache:/root/.cache/huggingface \ -v ./ms_cache:/root/.cache/modelscope \ -v ./om_cache:/root/.cache/openmind \ @@ -711,7 +705,6 @@ docker run -dit \ --device /dev/davinci_manager \ --device /dev/devmm_svm \ --device /dev/hisi_hdc \ - --shm-size 16G \ --name llamafactory \ llamafactory:latest @@ -722,25 +715,20 @@ For AMD ROCm users: ```bash docker build -f ./docker/docker-rocm/Dockerfile \ - --build-arg INSTALL_BNB=false \ - --build-arg INSTALL_VLLM=false \ - --build-arg INSTALL_DEEPSPEED=false \ - --build-arg INSTALL_FLASHATTN=false \ --build-arg PIP_INDEX=https://pypi.org/simple \ + --build-arg EXTRAS=metrics \ -t llamafactory:latest . -docker run -dit \ +docker run -dit --ipc=host \ -v ./hf_cache:/root/.cache/huggingface \ -v ./ms_cache:/root/.cache/modelscope \ -v ./om_cache:/root/.cache/openmind \ -v ./data:/app/data \ -v ./output:/app/output \ - -v ./saves:/app/saves \ -p 7860:7860 \ -p 8000:8000 \ --device /dev/kfd \ --device /dev/dri \ - --shm-size 16G \ --name llamafactory \ llamafactory:latest diff --git a/README_zh.md b/README_zh.md index 31c77b75..e1a37d45 100644 --- a/README_zh.md +++ b/README_zh.md @@ -665,14 +665,11 @@ CUDA 用户: ```bash docker build -f ./docker/docker-cuda/Dockerfile \ - --build-arg INSTALL_BNB=false \ - --build-arg INSTALL_VLLM=false \ - --build-arg INSTALL_DEEPSPEED=false \ - --build-arg INSTALL_FLASHATTN=false \ --build-arg PIP_INDEX=https://pypi.org/simple \ + --build-arg EXTRAS=metrics \ -t llamafactory:latest . -docker run -dit --gpus=all \ +docker run -dit --ipc=host --gpus=all \ -v ./hf_cache:/root/.cache/huggingface \ -v ./ms_cache:/root/.cache/modelscope \ -v ./om_cache:/root/.cache/openmind \ @@ -680,7 +677,6 @@ docker run -dit --gpus=all \ -v ./output:/app/output \ -p 7860:7860 \ -p 8000:8000 \ - --shm-size 16G \ --name llamafactory \ llamafactory:latest @@ -690,14 +686,12 @@ docker exec -it llamafactory bash 昇腾 NPU 用户: ```bash -# 根据您的环境选择镜像 docker build -f ./docker/docker-npu/Dockerfile \ - --build-arg INSTALL_DEEPSPEED=false \ --build-arg PIP_INDEX=https://pypi.org/simple \ + --build-arg EXTRAS=metrics \ -t llamafactory:latest . -# 根据您的资源更改 `device` -docker run -dit \ +docker run -dit --ipc=host \ -v ./hf_cache:/root/.cache/huggingface \ -v ./ms_cache:/root/.cache/modelscope \ -v ./om_cache:/root/.cache/openmind \ @@ -713,7 +707,6 @@ docker run -dit \ --device /dev/davinci_manager \ --device /dev/devmm_svm \ --device /dev/hisi_hdc \ - --shm-size 16G \ --name llamafactory \ llamafactory:latest @@ -724,25 +717,20 @@ AMD ROCm 用户: ```bash docker build -f ./docker/docker-rocm/Dockerfile \ - --build-arg INSTALL_BNB=false \ - --build-arg INSTALL_VLLM=false \ - --build-arg INSTALL_DEEPSPEED=false \ - --build-arg INSTALL_FLASHATTN=false \ --build-arg PIP_INDEX=https://pypi.org/simple \ + --build-arg EXTRAS=metrics \ -t llamafactory:latest . -docker run -dit \ +docker run -dit --ipc=host \ -v ./hf_cache:/root/.cache/huggingface \ -v ./ms_cache:/root/.cache/modelscope \ -v ./om_cache:/root/.cache/openmind \ -v ./data:/app/data \ -v ./output:/app/output \ - -v ./saves:/app/saves \ -p 7860:7860 \ -p 8000:8000 \ --device /dev/kfd \ --device /dev/dri \ - --shm-size 16G \ --name llamafactory \ llamafactory:latest diff --git a/docker/docker-cuda/Dockerfile b/docker/docker-cuda/Dockerfile index aac9635e..f0e2c5f0 100644 --- a/docker/docker-cuda/Dockerfile +++ b/docker/docker-cuda/Dockerfile @@ -1,101 +1,63 @@ -# Default use the NVIDIA official image with PyTorch 2.6.0 -# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/index.html -ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:24.12-py3 -FROM ${BASE_IMAGE} +# Installation arguments +ARG BASE_IMAGE=hiyouga/pytorch:th2.6.0-cu124-flashattn2.7.4-cxx11abi0 +ARG PIP_INDEX=https://pypi.org/simple +ARG EXTRAS=metrics +ARG INSTALL_FLASHATTN=false +ARG HTTP_PROXY="" + +# https://hub.docker.com/r/hiyouga/pytorch/tags +FROM "${BASE_IMAGE}" # Define environments -ENV MAX_JOBS=4 +ENV MAX_JOBS=16 ENV FLASH_ATTENTION_FORCE_BUILD=TRUE ENV VLLM_WORKER_MULTIPROC_METHOD=spawn - -# Define installation arguments -ARG INSTALL_BNB=false -ARG INSTALL_VLLM=false -ARG INSTALL_DEEPSPEED=false -ARG INSTALL_FLASHATTN=false -ARG INSTALL_LIGER_KERNEL=false -ARG INSTALL_HQQ=false -ARG INSTALL_EETQ=false -ARG PIP_INDEX=https://pypi.org/simple -ARG HTTP_PROXY= +ENV DEBIAN_FRONTEND=noninteractive +ENV NODE_OPTIONS="" +ENV PIP_ROOT_USER_ACTION=ignore +ENV http_proxy="${HTTP_PROXY}" +ENV https_proxy="${HTTP_PROXY}" # Set the working directory WORKDIR /app -# Set http proxy -RUN if [ -n "$HTTP_PROXY" ]; then \ - echo "Configuring proxy..."; \ - export http_proxy=$HTTP_PROXY; \ - export https_proxy=$HTTP_PROXY; \ - fi +# Change pip source +RUN pip config set global.index-url "${PIP_INDEX}" && \ + pip config set global.extra-index-url "${PIP_INDEX}" && \ + python -m pip install --upgrade pip # Install the requirements COPY requirements.txt /app -RUN pip config set global.index-url "$PIP_INDEX" && \ - pip config set global.extra-index-url "$PIP_INDEX" && \ - python -m pip install --upgrade pip && \ - if [ -n "$HTTP_PROXY" ]; then \ - python -m pip install --proxy=$HTTP_PROXY -r requirements.txt; \ - else \ - python -m pip install -r requirements.txt; \ - fi +RUN pip install --no-cache-dir -r requirements.txt # Copy the rest of the application into the image COPY . /app -# Install the LLaMA Factory -RUN EXTRA_PACKAGES="metrics"; \ - if [ "$INSTALL_BNB" == "true" ]; then \ - EXTRA_PACKAGES="${EXTRA_PACKAGES},bitsandbytes"; \ - fi; \ - if [ "$INSTALL_VLLM" == "true" ]; then \ - EXTRA_PACKAGES="${EXTRA_PACKAGES},vllm"; \ - fi; \ - if [ "$INSTALL_DEEPSPEED" == "true" ]; then \ - EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \ - fi; \ - if [ "$INSTALL_LIGER_KERNEL" == "true" ]; then \ - EXTRA_PACKAGES="${EXTRA_PACKAGES},liger-kernel"; \ - fi; \ - if [ "$INSTALL_HQQ" == "true" ]; then \ - EXTRA_PACKAGES="${EXTRA_PACKAGES},hqq"; \ - fi; \ - if [ "$INSTALL_EETQ" == "true" ]; then \ - EXTRA_PACKAGES="${EXTRA_PACKAGES},eetq"; \ - fi; \ - if [ -n "$HTTP_PROXY" ]; then \ - pip install --proxy=$HTTP_PROXY -e ".[$EXTRA_PACKAGES]"; \ - else \ - pip install -e ".[$EXTRA_PACKAGES]"; \ - fi +# Install LLaMA Factory +RUN pip install --no-cache-dir -e ".[${EXTRAS}]" --no-build-isolation # Rebuild flash attention -RUN pip uninstall -y transformer-engine flash-attn && \ - if [ "$INSTALL_FLASHATTN" == "true" ]; then \ +RUN if [ "$INSTALL_FLASHATTN" == "true" ]; then \ pip uninstall -y ninja && \ - if [ -n "$HTTP_PROXY" ]; then \ - pip install --proxy=$HTTP_PROXY ninja && \ - pip install --proxy=$HTTP_PROXY --no-cache-dir flash-attn --no-build-isolation; \ - else \ - pip install ninja && \ - pip install --no-cache-dir flash-attn --no-build-isolation; \ - fi; \ - fi - - -# Unset http proxy -RUN if [ -n "$HTTP_PROXY" ]; then \ - unset http_proxy; \ - unset https_proxy; \ + pip install --no-cache-dir ninja && \ + pip install --no-cache-dir flash-attn --no-build-isolation; \ fi # Set up volumes -VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/app/data", "/app/output" ] +VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/root/.cache/openmind", "/app/data", "/app/output" ] -# Expose port 7860 for the LLaMA Board +# Expose port 7860 for LLaMA Board ENV GRADIO_SERVER_PORT 7860 EXPOSE 7860 -# Expose port 8000 for the API service +# Expose port 8000 for API service ENV API_PORT 8000 EXPOSE 8000 + +# unset proxy +ENV http_proxy= +ENV https_proxy= + +# Reset pip config +RUN pip config unset global.index-url && \ + pip config unset global.extra-index-url diff --git a/docker/docker-cuda/Dockerfile.base b/docker/docker-cuda/Dockerfile.base new file mode 100644 index 00000000..f8c09827 --- /dev/null +++ b/docker/docker-cuda/Dockerfile.base @@ -0,0 +1,55 @@ +# Start from the pytorch official image (ubuntu-22.04 + cuda-12.4.1 + python-3.11) +# https://hub.docker.com/r/pytorch/pytorch/tags +FROM pytorch/pytorch:2.6.0-cuda12.4-cudnn9-runtime + +# Define environments +ENV MAX_JOBS=16 +ENV VLLM_WORKER_MULTIPROC_METHOD=spawn +ENV DEBIAN_FRONTEND=noninteractive +ENV NODE_OPTIONS="" +ENV PIP_ROOT_USER_ACTION=ignore + +# Define installation arguments +ARG APT_SOURCE=https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ +ARG PIP_INDEX=https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple + +# Set apt source +RUN cp /etc/apt/sources.list /etc/apt/sources.list.bak && \ + { \ + echo "deb ${APT_SOURCE} jammy main restricted universe multiverse"; \ + echo "deb ${APT_SOURCE} jammy-updates main restricted universe multiverse"; \ + echo "deb ${APT_SOURCE} jammy-backports main restricted universe multiverse"; \ + echo "deb ${APT_SOURCE} jammy-security main restricted universe multiverse"; \ + } > /etc/apt/sources.list + +# Install systemctl and wget +RUN apt-get update && \ + apt-get install -y -o Dpkg::Options::="--force-confdef" systemd wget && \ + apt-get clean + +# Install git and vim +RUN apt-get update && \ + apt-get install -y git vim && \ + apt-get clean + +# Install gcc and g++ +RUN apt-get update && \ + apt-get install -y gcc g++ && \ + apt-get clean + +# Change pip source +RUN pip config set global.index-url "${PIP_INDEX}" && \ + pip config set global.extra-index-url "${PIP_INDEX}" && \ + python -m pip install --upgrade pip + +# Install flash-attn-2.7.4.post1 (cxx11abi=False) +RUN wget -nv https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp311-cp311-linux_x86_64.whl && \ + pip install --no-cache-dir flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp311-cp311-linux_x86_64.whl + +# Install flashinfer-0.2.2.post1+cu124 (cxx11abi=False) +RUN wget -nv https://github.com/flashinfer-ai/flashinfer/releases/download/v0.2.2.post1/flashinfer_python-0.2.2.post1+cu124torch2.6-cp38-abi3-linux_x86_64.whl && \ + pip install --no-cache-dir flashinfer_python-0.2.2.post1+cu124torch2.6-cp38-abi3-linux_x86_64.whl + +# Reset pip config +RUN pip config unset global.index-url && \ + pip config unset global.extra-index-url diff --git a/docker/docker-cuda/docker-compose.yml b/docker/docker-cuda/docker-compose.yml index fa386cc8..d115f3b2 100644 --- a/docker/docker-cuda/docker-compose.yml +++ b/docker/docker-cuda/docker-compose.yml @@ -4,14 +4,8 @@ services: dockerfile: ./docker/docker-cuda/Dockerfile context: ../.. args: - INSTALL_BNB: "false" - INSTALL_VLLM: "false" - INSTALL_DEEPSPEED: "false" - INSTALL_FLASHATTN: "false" - INSTALL_LIGER_KERNEL: "false" - INSTALL_HQQ: "false" - INSTALL_EETQ: "false" PIP_INDEX: https://pypi.org/simple + EXTRAS: metrics container_name: llamafactory volumes: - ../../hf_cache:/root/.cache/huggingface @@ -24,7 +18,7 @@ services: - "8000:8000" ipc: host tty: true - shm_size: "16gb" + # shm_size: "16gb" # ipc: host is set stdin_open: true command: bash deploy: @@ -33,5 +27,5 @@ services: devices: - driver: nvidia count: "all" - capabilities: [gpu] + capabilities: [ gpu ] restart: unless-stopped diff --git a/docker/docker-npu/Dockerfile b/docker/docker-npu/Dockerfile index 3431176b..9b507737 100644 --- a/docker/docker-npu/Dockerfile +++ b/docker/docker-npu/Dockerfile @@ -1,67 +1,56 @@ -# Use the Ubuntu 22.04 image with CANN 8.0.rc1 -# More versions can be found at https://hub.docker.com/r/ascendai/cann/tags -# FROM ascendai/cann:8.0.rc1-910-ubuntu22.04-py3.8 -FROM ascendai/cann:8.0.0-910b-ubuntu22.04-py3.10 -# FROM ascendai/cann:8.0.rc1-910-openeuler22.03-py3.8 -# FROM ascendai/cann:8.0.rc1-910b-openeuler22.03-py3.8 +# Installation arguments +ARG BASE_IMAGE=ascendai/cann:8.0.0-910b-ubuntu22.04-py3.11 +ARG PIP_INDEX=https://pypi.org/simple +ARG EXTRAS=metrics +ARG INSTALL_FLASHATTN=false +ARG HTTP_PROXY="" + +# https://hub.docker.com/r/ascendai/cann/tags +FROM "${BASE_IMAGE}" # Define environments +ENV MAX_JOBS=16 +ENV FLASH_ATTENTION_FORCE_BUILD=TRUE +ENV VLLM_WORKER_MULTIPROC_METHOD=spawn ENV DEBIAN_FRONTEND=noninteractive - -# Define installation arguments -ARG INSTALL_DEEPSPEED=false -ARG PIP_INDEX=https://pypi.org/simple -ARG TORCH_INDEX=https://download.pytorch.org/whl/cpu -ARG HTTP_PROXY= +ENV NODE_OPTIONS="" +ENV PIP_ROOT_USER_ACTION=ignore +ENV http_proxy="${HTTP_PROXY}" +ENV https_proxy="${HTTP_PROXY}" # Set the working directory WORKDIR /app -# Set http proxy -RUN if [ -n "$HTTP_PROXY" ]; then \ - echo "Configuring proxy..."; \ - export http_proxy=$HTTP_PROXY; \ - export https_proxy=$HTTP_PROXY; \ - fi +# Change pip source +RUN pip config set global.index-url "${PIP_INDEX}" && \ + pip config set global.extra-index-url "${PIP_INDEX}" && \ + python -m pip install --upgrade pip # Install the requirements COPY requirements.txt /app -RUN pip config set global.index-url "$PIP_INDEX" && \ - pip config set global.extra-index-url "$TORCH_INDEX" && \ - python -m pip install --upgrade pip && \ - if [ -n "$HTTP_PROXY" ]; then \ - python -m pip install --proxy=$HTTP_PROXY -r requirements.txt; \ - else \ - python -m pip install -r requirements.txt; \ - fi +RUN pip install --no-cache-dir -r requirements.txt # Copy the rest of the application into the image COPY . /app -# Install the LLaMA Factory -RUN EXTRA_PACKAGES="torch-npu,metrics"; \ - if [ "$INSTALL_DEEPSPEED" == "true" ]; then \ - EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \ - fi; \ - if [ -n "$HTTP_PROXY" ]; then \ - pip install --proxy=$HTTP_PROXY -e ".[$EXTRA_PACKAGES]"; \ - else \ - pip install -e ".[$EXTRA_PACKAGES]"; \ - fi - -# Unset http proxy -RUN if [ -n "$HTTP_PROXY" ]; then \ - unset http_proxy; \ - unset https_proxy; \ - fi +# Install LLaMA Factory +RUN pip install --no-cache-dir -e ".[${EXTRAS}]" --no-build-isolation # Set up volumes -VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/app/data", "/app/output" ] +VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/root/.cache/openmind", "/app/data", "/app/output" ] -# Expose port 7860 for the LLaMA Board +# Expose port 7860 for LLaMA Board ENV GRADIO_SERVER_PORT 7860 EXPOSE 7860 -# Expose port 8000 for the API service +# Expose port 8000 for API service ENV API_PORT 8000 EXPOSE 8000 + +# unset proxy +ENV http_proxy= +ENV https_proxy= + +# Reset pip config +RUN pip config unset global.index-url && \ + pip config unset global.extra-index-url diff --git a/docker/docker-npu/docker-compose.yml b/docker/docker-npu/docker-compose.yml index dd9a8e17..50664e2e 100644 --- a/docker/docker-npu/docker-compose.yml +++ b/docker/docker-npu/docker-compose.yml @@ -4,8 +4,8 @@ services: dockerfile: ./docker/docker-npu/Dockerfile context: ../.. args: - INSTALL_DEEPSPEED: "false" PIP_INDEX: https://pypi.org/simple + EXTRAS: metrics container_name: llamafactory volumes: - ../../hf_cache:/root/.cache/huggingface @@ -22,7 +22,7 @@ services: - "8000:8000" ipc: host tty: true - shm_size: "16gb" + # shm_size: "16gb" # ipc: host is set stdin_open: true command: bash devices: diff --git a/docker/docker-rocm/Dockerfile b/docker/docker-rocm/Dockerfile index 9595bafa..a552abfb 100644 --- a/docker/docker-rocm/Dockerfile +++ b/docker/docker-rocm/Dockerfile @@ -1,21 +1,23 @@ -FROM hardandheavy/transformers-rocm:2.2.0 +# Installation arguments +ARG BASE_IMAGE=rocm/pytorch:rocm6.4.1_ubuntu22.04_py3.10_pytorch_release_2.6.0 +ARG PIP_INDEX=https://pypi.org/simple +ARG EXTRAS=metrics +ARG INSTALL_FLASHATTN=false +ARG HTTP_PROXY="" +ARG PYTORCH_INDEX=https://download.pytorch.org/whl/rocm6.3 + +# https://hub.docker.com/r/rocm/pytorch/tags +FROM "${BASE_IMAGE}" # Define environments -ENV MAX_JOBS=4 +ENV MAX_JOBS=16 ENV FLASH_ATTENTION_FORCE_BUILD=TRUE ENV VLLM_WORKER_MULTIPROC_METHOD=spawn - -# Define installation arguments -ARG INSTALL_BNB=false -ARG INSTALL_VLLM=false -ARG INSTALL_DEEPSPEED=false -ARG INSTALL_FLASHATTN=false -ARG INSTALL_LIGER_KERNEL=false -ARG INSTALL_HQQ=false -ARG INSTALL_PYTORCH=true -ARG PIP_INDEX=https://pypi.org/simple -ARG HTTP_PROXY= -ARG PYTORCH_INDEX=https://download.pytorch.org/whl/nightly/rocm6.3 +ENV DEBIAN_FRONTEND=noninteractive +ENV NODE_OPTIONS="" +ENV PIP_ROOT_USER_ACTION=ignore +ENV http_proxy="${HTTP_PROXY}" +ENV https_proxy="${HTTP_PROXY}" # Use Bash instead of default /bin/sh SHELL ["/bin/bash", "-c"] @@ -23,83 +25,47 @@ SHELL ["/bin/bash", "-c"] # Set the working directory WORKDIR /app -# Set http proxy -RUN if [ -n "$HTTP_PROXY" ]; then \ - echo "Configuring proxy..."; \ - export http_proxy=$HTTP_PROXY; \ - export https_proxy=$HTTP_PROXY; \ - fi +# Change pip source +RUN pip config set global.index-url "${PIP_INDEX}" && \ + pip config set global.extra-index-url "${PIP_INDEX}" && \ + python -m pip install --upgrade pip + +# Reinstall pytorch rocm +RUN pip uninstall -y torch torchvision torchaudio && \ + pip install --pre torch torchvision torchaudio --index-url "${PYTORCH_INDEX}" # Install the requirements COPY requirements.txt /app -RUN pip config set global.index-url "$PIP_INDEX" && \ - pip config set global.extra-index-url "$PIP_INDEX" && \ - python -m pip install --upgrade pip && \ - if [ -n "$HTTP_PROXY" ]; then \ - python -m pip install --proxy=$HTTP_PROXY -r requirements.txt; \ - else \ - python -m pip install -r requirements.txt; \ - fi +RUN pip install --no-cache-dir -r requirements.txt # Copy the rest of the application into the image COPY . /app -# Install the LLaMA Factory -RUN EXTRA_PACKAGES="metrics"; \ - if [ "$INSTALL_BNB" == "true" ]; then \ - EXTRA_PACKAGES="${EXTRA_PACKAGES},bitsandbytes"; \ - fi; \ - if [ "$INSTALL_VLLM" == "true" ]; then \ - EXTRA_PACKAGES="${EXTRA_PACKAGES},vllm"; \ - fi; \ - if [ "$INSTALL_DEEPSPEED" == "true" ]; then \ - EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \ - fi; \ - if [ "$INSTALL_LIGER_KERNEL" == "true" ]; then \ - EXTRA_PACKAGES="${EXTRA_PACKAGES},liger-kernel"; \ - fi; \ - if [ "$INSTALL_HQQ" == "true" ]; then \ - EXTRA_PACKAGES="${EXTRA_PACKAGES},hqq"; \ - fi; \ - if [ -n "$HTTP_PROXY" ]; then \ - pip install --proxy=$HTTP_PROXY -e ".[$EXTRA_PACKAGES]"; \ - else \ - pip install -e ".[$EXTRA_PACKAGES]"; \ - fi - -# Reinstall pytorch -# This is necessary to ensure that the correct version of PyTorch is installed -RUN if [ "$INSTALL_PYTORCH" == "true" ]; then \ - pip uninstall -y torch torchvision torchaudio && \ - pip install --pre torch torchvision torchaudio --index-url "$PYTORCH_INDEX"; \ - fi +# Install LLaMA Factory +RUN pip install --no-cache-dir -e ".[${EXTRAS}]" --no-build-isolation # Rebuild flash attention -RUN pip uninstall -y transformer-engine flash-attn && \ - if [ "$INSTALL_FLASHATTN" == "true" ]; then \ +RUN if [ "$INSTALL_FLASHATTN" == "true" ]; then \ pip uninstall -y ninja && \ - if [ -n "$HTTP_PROXY" ]; then \ - pip install --proxy=$HTTP_PROXY ninja && \ - pip install --proxy=$HTTP_PROXY --no-cache-dir flash-attn --no-build-isolation; \ - else \ - pip install ninja && \ - pip install --no-cache-dir flash-attn --no-build-isolation; \ - fi; \ - fi - -# Unset http proxy -RUN if [ -n "$HTTP_PROXY" ]; then \ - unset http_proxy; \ - unset https_proxy; \ + pip install --no-cache-dir ninja && \ + pip install --no-cache-dir flash-attn --no-build-isolation; \ fi # Set up volumes -VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/app/data", "/app/output" ] +VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/root/.cache/openmind", "/app/data", "/app/output" ] -# Expose port 7860 for the LLaMA Board +# Expose port 7860 for LLaMA Board ENV GRADIO_SERVER_PORT 7860 EXPOSE 7860 -# Expose port 8000 for the API service +# Expose port 8000 for API service ENV API_PORT 8000 EXPOSE 8000 + +# unset proxy +ENV http_proxy= +ENV https_proxy= + +# Reset pip config +RUN pip config unset global.index-url && \ + pip config unset global.extra-index-url diff --git a/docker/docker-rocm/docker-compose.yml b/docker/docker-rocm/docker-compose.yml index caaf4e16..17786e7c 100644 --- a/docker/docker-rocm/docker-compose.yml +++ b/docker/docker-rocm/docker-compose.yml @@ -4,15 +4,8 @@ services: dockerfile: ./docker/docker-rocm/Dockerfile context: ../.. args: - INSTALL_BNB: "false" - INSTALL_VLLM: "false" - INSTALL_DEEPSPEED: "false" - INSTALL_FLASHATTN: "false" - INSTALL_LIGER_KERNEL: "false" - INSTALL_PYTORCH: "true" - INSTALL_HQQ: "false" PIP_INDEX: https://pypi.org/simple - PYTORCH_INDEX: https://download.pytorch.org/whl/nightly/rocm6.3 + EXTRAS: metrics container_name: llamafactory volumes: - ../../hf_cache:/root/.cache/huggingface @@ -20,13 +13,12 @@ services: - ../../om_cache:/root/.cache/openmind - ../../data:/app/data - ../../output:/app/output - - ../../saves:/app/saves ports: - "7860:7860" - "8000:8000" ipc: host tty: true - shm_size: "16gb" + # shm_size: "16gb" # ipc: host is set stdin_open: true command: bash devices: diff --git a/setup.py b/setup.py index 6066a957..a7fc467c 100644 --- a/setup.py +++ b/setup.py @@ -52,7 +52,7 @@ extra_require = { "eetq": ["eetq"], "gptq": ["optimum>=1.24.0", "gptqmodel>=2.0.0"], "aqlm": ["aqlm[gpu]>=1.1.0"], - "vllm": ["vllm>=0.4.3,<=0.8.5"], + "vllm": ["vllm>=0.4.3,<=0.8.6"], "sglang": ["sglang[srt]>=0.4.5", "transformers==4.51.1"], "galore": ["galore-torch"], "apollo": ["apollo-torch"],