[assets] update docker files (#8176)

2025-12-29 10:10:35 +08:00 · 2025-05-27 18:15:23 +08:00
parent 16e1a5097e
commit 519ac92803
11 changed files with 187 additions and 253 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -166,8 +166,8 @@ cython_debug/
 uv.lock
 # custom .gitignore
 ms_cache/
 hf_cache/
 ms_cache/
 om_cache/
 cache/
 config/
--- a/README.md
+++ b/README.md
@@ -663,14 +663,11 @@ For CUDA users:
 ```bash
 docker build -f ./docker/docker-cuda/Dockerfile \
    --build-arg INSTALL_BNB=false \
    --build-arg INSTALL_VLLM=false \
    --build-arg INSTALL_DEEPSPEED=false \
    --build-arg INSTALL_FLASHATTN=false \
    --build-arg PIP_INDEX=https://pypi.org/simple \
    --build-arg EXTRAS=metrics \
    -t llamafactory:latest .
-docker run -dit --gpus=all \
+docker run -dit --ipc=host --gpus=all \
    -v ./hf_cache:/root/.cache/huggingface \
    -v ./ms_cache:/root/.cache/modelscope \
    -v ./om_cache:/root/.cache/openmind \
@@ -678,7 +675,6 @@ docker run -dit --gpus=all \
    -v ./output:/app/output \
    -p 7860:7860 \
    -p 8000:8000 \
    --shm-size 16G \
    --name llamafactory \
    llamafactory:latest
@@ -688,14 +684,12 @@ docker exec -it llamafactory bash
 For Ascend NPU users:
 ```bash
 # Choose docker image upon your environment
 docker build -f ./docker/docker-npu/Dockerfile \
    --build-arg INSTALL_DEEPSPEED=false \
    --build-arg PIP_INDEX=https://pypi.org/simple \
    --build-arg EXTRAS=metrics \
    -t llamafactory:latest .
-# Change `device` upon your resources
+docker run -dit --ipc=host \
 docker run -dit \
    -v ./hf_cache:/root/.cache/huggingface \
    -v ./ms_cache:/root/.cache/modelscope \
    -v ./om_cache:/root/.cache/openmind \
@@ -711,7 +705,6 @@ docker run -dit \
    --device /dev/davinci_manager \
    --device /dev/devmm_svm \
    --device /dev/hisi_hdc \
    --shm-size 16G \
    --name llamafactory \
    llamafactory:latest
@@ -722,25 +715,20 @@ For AMD ROCm users:
 ```bash
 docker build -f ./docker/docker-rocm/Dockerfile \
    --build-arg INSTALL_BNB=false \
    --build-arg INSTALL_VLLM=false \
    --build-arg INSTALL_DEEPSPEED=false \
    --build-arg INSTALL_FLASHATTN=false \
    --build-arg PIP_INDEX=https://pypi.org/simple \
    --build-arg EXTRAS=metrics \
    -t llamafactory:latest .
-docker run -dit \
+docker run -dit --ipc=host \
    -v ./hf_cache:/root/.cache/huggingface \
    -v ./ms_cache:/root/.cache/modelscope \
    -v ./om_cache:/root/.cache/openmind \
    -v ./data:/app/data \
    -v ./output:/app/output \
    -v ./saves:/app/saves \
    -p 7860:7860 \
    -p 8000:8000 \
    --device /dev/kfd \
    --device /dev/dri \
    --shm-size 16G \
    --name llamafactory \
    llamafactory:latest
--- a/README_zh.md
+++ b/README_zh.md
@@ -665,14 +665,11 @@ CUDA 用户：
 ```bash
 docker build -f ./docker/docker-cuda/Dockerfile \
    --build-arg INSTALL_BNB=false \
    --build-arg INSTALL_VLLM=false \
    --build-arg INSTALL_DEEPSPEED=false \
    --build-arg INSTALL_FLASHATTN=false \
    --build-arg PIP_INDEX=https://pypi.org/simple \
    --build-arg EXTRAS=metrics \
    -t llamafactory:latest .
-docker run -dit --gpus=all \
+docker run -dit --ipc=host --gpus=all \
    -v ./hf_cache:/root/.cache/huggingface \
    -v ./ms_cache:/root/.cache/modelscope \
    -v ./om_cache:/root/.cache/openmind \
@@ -680,7 +677,6 @@ docker run -dit --gpus=all \
    -v ./output:/app/output \
    -p 7860:7860 \
    -p 8000:8000 \
    --shm-size 16G \
    --name llamafactory \
    llamafactory:latest
@@ -690,14 +686,12 @@ docker exec -it llamafactory bash
 昇腾 NPU 用户：
 ```bash
 # 根据您的环境选择镜像
 docker build -f ./docker/docker-npu/Dockerfile \
    --build-arg INSTALL_DEEPSPEED=false \
    --build-arg PIP_INDEX=https://pypi.org/simple \
    --build-arg EXTRAS=metrics \
    -t llamafactory:latest .
-# 根据您的资源更改 `device`
+docker run -dit --ipc=host \
 docker run -dit \
    -v ./hf_cache:/root/.cache/huggingface \
    -v ./ms_cache:/root/.cache/modelscope \
    -v ./om_cache:/root/.cache/openmind \
@@ -713,7 +707,6 @@ docker run -dit \
    --device /dev/davinci_manager \
    --device /dev/devmm_svm \
    --device /dev/hisi_hdc \
    --shm-size 16G \
    --name llamafactory \
    llamafactory:latest
@@ -724,25 +717,20 @@ AMD ROCm 用户：
 ```bash
 docker build -f ./docker/docker-rocm/Dockerfile \
    --build-arg INSTALL_BNB=false \
    --build-arg INSTALL_VLLM=false \
    --build-arg INSTALL_DEEPSPEED=false \
    --build-arg INSTALL_FLASHATTN=false \
    --build-arg PIP_INDEX=https://pypi.org/simple \
    --build-arg EXTRAS=metrics \
    -t llamafactory:latest .
-docker run -dit \
+docker run -dit --ipc=host \
    -v ./hf_cache:/root/.cache/huggingface \
    -v ./ms_cache:/root/.cache/modelscope \
    -v ./om_cache:/root/.cache/openmind \
    -v ./data:/app/data \
    -v ./output:/app/output \
    -v ./saves:/app/saves \
    -p 7860:7860 \
    -p 8000:8000 \
    --device /dev/kfd \
    --device /dev/dri \
    --shm-size 16G \
    --name llamafactory \
    llamafactory:latest
--- a/docker/docker-cuda/Dockerfile
+++ b/docker/docker-cuda/Dockerfile
@@ -1,101 +1,63 @@
-# Default use the NVIDIA official image with PyTorch 2.6.0
+# Installation arguments
-# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/index.html
+ARG BASE_IMAGE=hiyouga/pytorch:th2.6.0-cu124-flashattn2.7.4-cxx11abi0
-ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:24.12-py3
+ARG PIP_INDEX=https://pypi.org/simple
-FROM ${BASE_IMAGE}
+ARG EXTRAS=metrics
 ARG INSTALL_FLASHATTN=false
 ARG HTTP_PROXY=""
 # https://hub.docker.com/r/hiyouga/pytorch/tags
 FROM "${BASE_IMAGE}"
 # Define environments
-ENV MAX_JOBS=4
+ENV MAX_JOBS=16
 ENV FLASH_ATTENTION_FORCE_BUILD=TRUE
 ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
-
+ENV DEBIAN_FRONTEND=noninteractive
-# Define installation arguments
+ENV NODE_OPTIONS=""
-ARG INSTALL_BNB=false
+ENV PIP_ROOT_USER_ACTION=ignore
-ARG INSTALL_VLLM=false
+ENV http_proxy="${HTTP_PROXY}"
-ARG INSTALL_DEEPSPEED=false
+ENV https_proxy="${HTTP_PROXY}"
 ARG INSTALL_FLASHATTN=false
 ARG INSTALL_LIGER_KERNEL=false
 ARG INSTALL_HQQ=false
 ARG INSTALL_EETQ=false
 ARG PIP_INDEX=https://pypi.org/simple
 ARG HTTP_PROXY=
 # Set the working directory
 WORKDIR /app
-# Set http proxy
+# Change pip source
-RUN if [ -n "$HTTP_PROXY" ]; then \
+RUN pip config set global.index-url "${PIP_INDEX}" && \
-        echo "Configuring proxy..."; \
+    pip config set global.extra-index-url "${PIP_INDEX}" && \
-        export http_proxy=$HTTP_PROXY; \
+    python -m pip install --upgrade pip
        export https_proxy=$HTTP_PROXY; \
    fi
 # Install the requirements
 COPY requirements.txt /app
-RUN pip config set global.index-url "$PIP_INDEX" && \
+RUN pip install --no-cache-dir -r requirements.txt
    pip config set global.extra-index-url "$PIP_INDEX" && \
    python -m pip install --upgrade pip && \
    if [ -n "$HTTP_PROXY" ]; then \
        python -m pip install --proxy=$HTTP_PROXY -r requirements.txt; \
    else \
        python -m pip install -r requirements.txt; \
    fi
 # Copy the rest of the application into the image
 COPY . /app
-# Install the LLaMA Factory
+# Install LLaMA Factory
-RUN EXTRA_PACKAGES="metrics"; \
+RUN pip install --no-cache-dir -e ".[${EXTRAS}]" --no-build-isolation
    if [ "$INSTALL_BNB" == "true" ]; then \
        EXTRA_PACKAGES="${EXTRA_PACKAGES},bitsandbytes"; \
    fi; \
    if [ "$INSTALL_VLLM" == "true" ]; then \
        EXTRA_PACKAGES="${EXTRA_PACKAGES},vllm"; \
    fi; \
    if [ "$INSTALL_DEEPSPEED" == "true" ]; then \
        EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \
    fi; \
    if [ "$INSTALL_LIGER_KERNEL" == "true" ]; then \
        EXTRA_PACKAGES="${EXTRA_PACKAGES},liger-kernel"; \
    fi; \
    if [ "$INSTALL_HQQ" == "true" ]; then \
        EXTRA_PACKAGES="${EXTRA_PACKAGES},hqq"; \
    fi; \
    if [ "$INSTALL_EETQ" == "true" ]; then \
        EXTRA_PACKAGES="${EXTRA_PACKAGES},eetq"; \
    fi; \
    if [ -n "$HTTP_PROXY" ]; then \
        pip install --proxy=$HTTP_PROXY -e ".[$EXTRA_PACKAGES]"; \
    else \
        pip install -e ".[$EXTRA_PACKAGES]"; \
    fi
 # Rebuild flash attention
-RUN pip uninstall -y transformer-engine flash-attn && \
+RUN if [ "$INSTALL_FLASHATTN" == "true" ]; then \
    if [ "$INSTALL_FLASHATTN" == "true" ]; then \
        pip uninstall -y ninja && \
-        if [ -n "$HTTP_PROXY" ]; then \
+        pip install --no-cache-dir ninja && \
-            pip install --proxy=$HTTP_PROXY ninja && \
+        pip install --no-cache-dir flash-attn --no-build-isolation; \
            pip install --proxy=$HTTP_PROXY --no-cache-dir flash-attn --no-build-isolation; \
        else \
            pip install ninja && \
            pip install --no-cache-dir flash-attn --no-build-isolation; \
        fi; \
    fi
 # Unset http proxy
 RUN if [ -n "$HTTP_PROXY" ]; then \
        unset http_proxy; \
        unset https_proxy; \
    fi
 # Set up volumes
-VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/app/data", "/app/output" ]
+VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/root/.cache/openmind", "/app/data", "/app/output" ]
-# Expose port 7860 for the LLaMA Board
+# Expose port 7860 for LLaMA Board
 ENV GRADIO_SERVER_PORT 7860
 EXPOSE 7860
-# Expose port 8000 for the API service
+# Expose port 8000 for API service
 ENV API_PORT 8000
 EXPOSE 8000
 # unset proxy
 ENV http_proxy=
 ENV https_proxy=
 # Reset pip config
 RUN pip config unset global.index-url && \
    pip config unset global.extra-index-url
--- a/docker/docker-cuda/Dockerfile.base
+++ b/docker/docker-cuda/Dockerfile.base
@@ -0,0 +1,55 @@
 # Start from the pytorch official image (ubuntu-22.04 + cuda-12.4.1 + python-3.11)
 # https://hub.docker.com/r/pytorch/pytorch/tags
 FROM pytorch/pytorch:2.6.0-cuda12.4-cudnn9-runtime
 # Define environments
 ENV MAX_JOBS=16
 ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
 ENV DEBIAN_FRONTEND=noninteractive
 ENV NODE_OPTIONS=""
 ENV PIP_ROOT_USER_ACTION=ignore
 # Define installation arguments
 ARG APT_SOURCE=https://mirrors.tuna.tsinghua.edu.cn/ubuntu/
 ARG PIP_INDEX=https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
 # Set apt source
 RUN cp /etc/apt/sources.list /etc/apt/sources.list.bak && \
    { \
    echo "deb ${APT_SOURCE} jammy main restricted universe multiverse"; \
    echo "deb ${APT_SOURCE} jammy-updates main restricted universe multiverse"; \
    echo "deb ${APT_SOURCE} jammy-backports main restricted universe multiverse"; \
    echo "deb ${APT_SOURCE} jammy-security main restricted universe multiverse"; \
    } > /etc/apt/sources.list
 # Install systemctl and wget
 RUN apt-get update && \
    apt-get install -y -o Dpkg::Options::="--force-confdef" systemd wget && \
    apt-get clean
 # Install git and vim
 RUN apt-get update && \
    apt-get install -y git vim && \
    apt-get clean
 # Install gcc and g++
 RUN apt-get update && \
    apt-get install -y gcc g++ && \
    apt-get clean
 # Change pip source
 RUN pip config set global.index-url "${PIP_INDEX}" && \
    pip config set global.extra-index-url "${PIP_INDEX}" && \
    python -m pip install --upgrade pip
 # Install flash-attn-2.7.4.post1 (cxx11abi=False)
 RUN wget -nv https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp311-cp311-linux_x86_64.whl && \
    pip install --no-cache-dir flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp311-cp311-linux_x86_64.whl
 # Install flashinfer-0.2.2.post1+cu124 (cxx11abi=False)
 RUN wget -nv https://github.com/flashinfer-ai/flashinfer/releases/download/v0.2.2.post1/flashinfer_python-0.2.2.post1+cu124torch2.6-cp38-abi3-linux_x86_64.whl && \
    pip install --no-cache-dir flashinfer_python-0.2.2.post1+cu124torch2.6-cp38-abi3-linux_x86_64.whl
 # Reset pip config
 RUN pip config unset global.index-url && \
    pip config unset global.extra-index-url
--- a/docker/docker-cuda/docker-compose.yml
+++ b/docker/docker-cuda/docker-compose.yml
@@ -4,14 +4,8 @@ services:
      dockerfile: ./docker/docker-cuda/Dockerfile
      context: ../..
      args:
        INSTALL_BNB: "false"
        INSTALL_VLLM: "false"
        INSTALL_DEEPSPEED: "false"
        INSTALL_FLASHATTN: "false"
        INSTALL_LIGER_KERNEL: "false"
        INSTALL_HQQ: "false"
        INSTALL_EETQ: "false"
        PIP_INDEX: https://pypi.org/simple
        EXTRAS: metrics
    container_name: llamafactory
    volumes:
      - ../../hf_cache:/root/.cache/huggingface
@@ -24,7 +18,7 @@ services:
      - "8000:8000"
    ipc: host
    tty: true
-    shm_size: "16gb"
+    # shm_size: "16gb"  # ipc: host is set
    stdin_open: true
    command: bash
    deploy:
@@ -33,5 +27,5 @@ services:
          devices:
          - driver: nvidia
            count: "all"
-            capabilities: [gpu]
+            capabilities: [ gpu ]
    restart: unless-stopped
--- a/docker/docker-npu/Dockerfile
+++ b/docker/docker-npu/Dockerfile
@@ -1,67 +1,56 @@
-# Use the Ubuntu 22.04 image with CANN 8.0.rc1
+# Installation arguments
-# More versions can be found at https://hub.docker.com/r/ascendai/cann/tags
+ARG BASE_IMAGE=ascendai/cann:8.0.0-910b-ubuntu22.04-py3.11
-# FROM ascendai/cann:8.0.rc1-910-ubuntu22.04-py3.8
+ARG PIP_INDEX=https://pypi.org/simple
-FROM ascendai/cann:8.0.0-910b-ubuntu22.04-py3.10
+ARG EXTRAS=metrics
-# FROM ascendai/cann:8.0.rc1-910-openeuler22.03-py3.8
+ARG INSTALL_FLASHATTN=false
-# FROM ascendai/cann:8.0.rc1-910b-openeuler22.03-py3.8
+ARG HTTP_PROXY=""
 # https://hub.docker.com/r/ascendai/cann/tags
 FROM "${BASE_IMAGE}"
 # Define environments
 ENV MAX_JOBS=16
 ENV FLASH_ATTENTION_FORCE_BUILD=TRUE
 ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
 ENV DEBIAN_FRONTEND=noninteractive
-
+ENV NODE_OPTIONS=""
-# Define installation arguments
+ENV PIP_ROOT_USER_ACTION=ignore
-ARG INSTALL_DEEPSPEED=false
+ENV http_proxy="${HTTP_PROXY}"
-ARG PIP_INDEX=https://pypi.org/simple
+ENV https_proxy="${HTTP_PROXY}"
 ARG TORCH_INDEX=https://download.pytorch.org/whl/cpu
 ARG HTTP_PROXY=
 # Set the working directory
 WORKDIR /app
-# Set http proxy
+# Change pip source
-RUN if [ -n "$HTTP_PROXY" ]; then \
+RUN pip config set global.index-url "${PIP_INDEX}" && \
-        echo "Configuring proxy..."; \
+    pip config set global.extra-index-url "${PIP_INDEX}" && \
-        export http_proxy=$HTTP_PROXY; \
+    python -m pip install --upgrade pip
        export https_proxy=$HTTP_PROXY; \
    fi
 # Install the requirements
 COPY requirements.txt /app
-RUN pip config set global.index-url "$PIP_INDEX" && \
+RUN pip install --no-cache-dir -r requirements.txt
    pip config set global.extra-index-url "$TORCH_INDEX" && \
    python -m pip install --upgrade pip && \
    if [ -n "$HTTP_PROXY" ]; then \
        python -m pip install --proxy=$HTTP_PROXY -r requirements.txt; \
    else \
        python -m pip install -r requirements.txt; \
    fi
 # Copy the rest of the application into the image
 COPY . /app
-# Install the LLaMA Factory
+# Install LLaMA Factory
-RUN EXTRA_PACKAGES="torch-npu,metrics"; \
+RUN pip install --no-cache-dir -e ".[${EXTRAS}]" --no-build-isolation
    if [ "$INSTALL_DEEPSPEED" == "true" ]; then \
        EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \
    fi; \
    if [ -n "$HTTP_PROXY" ]; then \
        pip install --proxy=$HTTP_PROXY -e ".[$EXTRA_PACKAGES]"; \
    else \
        pip install -e ".[$EXTRA_PACKAGES]"; \
    fi
 # Unset http proxy
 RUN if [ -n "$HTTP_PROXY" ]; then \
        unset http_proxy; \
        unset https_proxy; \
    fi
 # Set up volumes
-VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/app/data", "/app/output" ]
+VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/root/.cache/openmind", "/app/data", "/app/output" ]
-# Expose port 7860 for the LLaMA Board
+# Expose port 7860 for LLaMA Board
 ENV GRADIO_SERVER_PORT 7860
 EXPOSE 7860
-# Expose port 8000 for the API service
+# Expose port 8000 for API service
 ENV API_PORT 8000
 EXPOSE 8000
 # unset proxy
 ENV http_proxy=
 ENV https_proxy=
 # Reset pip config
 RUN pip config unset global.index-url && \
    pip config unset global.extra-index-url
--- a/docker/docker-npu/docker-compose.yml
+++ b/docker/docker-npu/docker-compose.yml
@@ -4,8 +4,8 @@ services:
      dockerfile: ./docker/docker-npu/Dockerfile
      context: ../..
      args:
        INSTALL_DEEPSPEED: "false"
        PIP_INDEX: https://pypi.org/simple
        EXTRAS: metrics
    container_name: llamafactory
    volumes:
      - ../../hf_cache:/root/.cache/huggingface
@@ -22,7 +22,7 @@ services:
      - "8000:8000"
    ipc: host
    tty: true
-    shm_size: "16gb"
+    # shm_size: "16gb"  # ipc: host is set
    stdin_open: true
    command: bash
    devices:
--- a/docker/docker-rocm/Dockerfile
+++ b/docker/docker-rocm/Dockerfile
@@ -1,21 +1,23 @@
-FROM hardandheavy/transformers-rocm:2.2.0
+# Installation arguments
 ARG BASE_IMAGE=rocm/pytorch:rocm6.4.1_ubuntu22.04_py3.10_pytorch_release_2.6.0
 ARG PIP_INDEX=https://pypi.org/simple
 ARG EXTRAS=metrics
 ARG INSTALL_FLASHATTN=false
 ARG HTTP_PROXY=""
 ARG PYTORCH_INDEX=https://download.pytorch.org/whl/rocm6.3
 # https://hub.docker.com/r/rocm/pytorch/tags
 FROM "${BASE_IMAGE}"
 # Define environments
-ENV MAX_JOBS=4
+ENV MAX_JOBS=16
 ENV FLASH_ATTENTION_FORCE_BUILD=TRUE
 ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
-
+ENV DEBIAN_FRONTEND=noninteractive
-# Define installation arguments
+ENV NODE_OPTIONS=""
-ARG INSTALL_BNB=false
+ENV PIP_ROOT_USER_ACTION=ignore
-ARG INSTALL_VLLM=false
+ENV http_proxy="${HTTP_PROXY}"
-ARG INSTALL_DEEPSPEED=false
+ENV https_proxy="${HTTP_PROXY}"
 ARG INSTALL_FLASHATTN=false
 ARG INSTALL_LIGER_KERNEL=false
 ARG INSTALL_HQQ=false
 ARG INSTALL_PYTORCH=true
 ARG PIP_INDEX=https://pypi.org/simple
 ARG HTTP_PROXY=
 ARG PYTORCH_INDEX=https://download.pytorch.org/whl/nightly/rocm6.3
 # Use Bash instead of default /bin/sh
 SHELL ["/bin/bash", "-c"]
@@ -23,83 +25,47 @@ SHELL ["/bin/bash", "-c"]
 # Set the working directory
 WORKDIR /app
-# Set http proxy
+# Change pip source
-RUN if [ -n "$HTTP_PROXY" ]; then \
+RUN pip config set global.index-url "${PIP_INDEX}" && \
-        echo "Configuring proxy..."; \
+    pip config set global.extra-index-url "${PIP_INDEX}" && \
-        export http_proxy=$HTTP_PROXY; \
+    python -m pip install --upgrade pip
-        export https_proxy=$HTTP_PROXY; \
+
-    fi
+# Reinstall pytorch rocm
 RUN pip uninstall -y torch torchvision torchaudio && \
    pip install --pre torch torchvision torchaudio --index-url "${PYTORCH_INDEX}"
 # Install the requirements
 COPY requirements.txt /app
-RUN pip config set global.index-url "$PIP_INDEX" && \
+RUN pip install --no-cache-dir -r requirements.txt
    pip config set global.extra-index-url "$PIP_INDEX" && \
    python -m pip install --upgrade pip && \
    if [ -n "$HTTP_PROXY" ]; then \
        python -m pip install --proxy=$HTTP_PROXY -r requirements.txt; \
    else \
        python -m pip install -r requirements.txt; \
    fi
 # Copy the rest of the application into the image
 COPY . /app
-# Install the LLaMA Factory
+# Install LLaMA Factory
-RUN EXTRA_PACKAGES="metrics"; \
+RUN pip install --no-cache-dir -e ".[${EXTRAS}]" --no-build-isolation
    if [ "$INSTALL_BNB" == "true" ]; then \
        EXTRA_PACKAGES="${EXTRA_PACKAGES},bitsandbytes"; \
    fi; \
    if [ "$INSTALL_VLLM" == "true" ]; then \
        EXTRA_PACKAGES="${EXTRA_PACKAGES},vllm"; \
    fi; \
    if [ "$INSTALL_DEEPSPEED" == "true" ]; then \
        EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \
    fi; \
    if [ "$INSTALL_LIGER_KERNEL" == "true" ]; then \
        EXTRA_PACKAGES="${EXTRA_PACKAGES},liger-kernel"; \
    fi; \
    if [ "$INSTALL_HQQ" == "true" ]; then \
        EXTRA_PACKAGES="${EXTRA_PACKAGES},hqq"; \
    fi; \
    if [ -n "$HTTP_PROXY" ]; then \
        pip install --proxy=$HTTP_PROXY -e ".[$EXTRA_PACKAGES]"; \
    else \
        pip install -e ".[$EXTRA_PACKAGES]"; \
    fi
 # Reinstall pytorch
 # This is necessary to ensure that the correct version of PyTorch is installed
 RUN if [ "$INSTALL_PYTORCH" == "true" ]; then \
        pip uninstall -y torch torchvision torchaudio && \
        pip install --pre torch torchvision torchaudio --index-url "$PYTORCH_INDEX"; \
    fi
 # Rebuild flash attention
-RUN pip uninstall -y transformer-engine flash-attn && \
+RUN if [ "$INSTALL_FLASHATTN" == "true" ]; then \
    if [ "$INSTALL_FLASHATTN" == "true" ]; then \
        pip uninstall -y ninja && \
-        if [ -n "$HTTP_PROXY" ]; then \
+        pip install --no-cache-dir ninja && \
-            pip install --proxy=$HTTP_PROXY ninja && \
+        pip install --no-cache-dir flash-attn --no-build-isolation; \
            pip install --proxy=$HTTP_PROXY --no-cache-dir flash-attn --no-build-isolation; \
        else \
            pip install ninja && \
            pip install --no-cache-dir flash-attn --no-build-isolation; \
        fi; \
    fi
 # Unset http proxy
 RUN if [ -n "$HTTP_PROXY" ]; then \
        unset http_proxy; \
        unset https_proxy; \
    fi
 # Set up volumes
-VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/app/data", "/app/output" ]
+VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/root/.cache/openmind", "/app/data", "/app/output" ]
-# Expose port 7860 for the LLaMA Board
+# Expose port 7860 for LLaMA Board
 ENV GRADIO_SERVER_PORT 7860
 EXPOSE 7860
-# Expose port 8000 for the API service
+# Expose port 8000 for API service
 ENV API_PORT 8000
 EXPOSE 8000
 # unset proxy
 ENV http_proxy=
 ENV https_proxy=
 # Reset pip config
 RUN pip config unset global.index-url && \
    pip config unset global.extra-index-url
--- a/docker/docker-rocm/docker-compose.yml
+++ b/docker/docker-rocm/docker-compose.yml
@@ -4,15 +4,8 @@ services:
      dockerfile: ./docker/docker-rocm/Dockerfile
      context: ../..
      args:
        INSTALL_BNB: "false"
        INSTALL_VLLM: "false"
        INSTALL_DEEPSPEED: "false"
        INSTALL_FLASHATTN: "false"
        INSTALL_LIGER_KERNEL: "false"
        INSTALL_PYTORCH: "true"
        INSTALL_HQQ: "false"
        PIP_INDEX: https://pypi.org/simple
-        PYTORCH_INDEX: https://download.pytorch.org/whl/nightly/rocm6.3
+        EXTRAS: metrics
    container_name: llamafactory
    volumes:
      - ../../hf_cache:/root/.cache/huggingface
@@ -20,13 +13,12 @@ services:
      - ../../om_cache:/root/.cache/openmind
      - ../../data:/app/data
      - ../../output:/app/output
      - ../../saves:/app/saves
    ports:
      - "7860:7860"
      - "8000:8000"
    ipc: host
    tty: true
-    shm_size: "16gb"
+    # shm_size: "16gb"  # ipc: host is set
    stdin_open: true
    command: bash
    devices:
--- a/setup.py
+++ b/setup.py
@@ -52,7 +52,7 @@ extra_require = {
    "eetq": ["eetq"],
    "gptq": ["optimum>=1.24.0", "gptqmodel>=2.0.0"],
    "aqlm": ["aqlm[gpu]>=1.1.0"],
-    "vllm": ["vllm>=0.4.3,<=0.8.5"],
+    "vllm": ["vllm>=0.4.3,<=0.8.6"],
    "sglang": ["sglang[srt]>=0.4.5", "transformers==4.51.1"],
    "galore": ["galore-torch"],
    "apollo": ["apollo-torch"],