[assets] update docker files (#8176)

2026-01-09 23:50:36 +08:00 · 2025-05-27 18:15:23 +08:00
parent 46ccf84aaa
commit 00974a3169
11 changed files with 187 additions and 253 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -166,8 +166,8 @@ cython_debug/
 uv.lock

 # custom .gitignore
-ms_cache/
 hf_cache/
+ms_cache/
 om_cache/
 cache/
 config/
--- a/README.md
+++ b/README.md
@@ -663,14 +663,11 @@ For CUDA users:

 ```bash
 docker build -f ./docker/docker-cuda/Dockerfile \
-    --build-arg INSTALL_BNB=false \
-    --build-arg INSTALL_VLLM=false \
-    --build-arg INSTALL_DEEPSPEED=false \
-    --build-arg INSTALL_FLASHATTN=false \
    --build-arg PIP_INDEX=https://pypi.org/simple \
+    --build-arg EXTRAS=metrics \
    -t llamafactory:latest .

-docker run -dit --gpus=all \
+docker run -dit --ipc=host --gpus=all \
    -v ./hf_cache:/root/.cache/huggingface \
    -v ./ms_cache:/root/.cache/modelscope \
    -v ./om_cache:/root/.cache/openmind \
@@ -678,7 +675,6 @@ docker run -dit --gpus=all \
    -v ./output:/app/output \
    -p 7860:7860 \
    -p 8000:8000 \
-    --shm-size 16G \
    --name llamafactory \
    llamafactory:latest

@@ -688,14 +684,12 @@ docker exec -it llamafactory bash
 For Ascend NPU users:

 ```bash
-# Choose docker image upon your environment
 docker build -f ./docker/docker-npu/Dockerfile \
-    --build-arg INSTALL_DEEPSPEED=false \
    --build-arg PIP_INDEX=https://pypi.org/simple \
+    --build-arg EXTRAS=metrics \
    -t llamafactory:latest .

-# Change `device` upon your resources
-docker run -dit \
+docker run -dit --ipc=host \
    -v ./hf_cache:/root/.cache/huggingface \
    -v ./ms_cache:/root/.cache/modelscope \
    -v ./om_cache:/root/.cache/openmind \
@@ -711,7 +705,6 @@ docker run -dit \
    --device /dev/davinci_manager \
    --device /dev/devmm_svm \
    --device /dev/hisi_hdc \
-    --shm-size 16G \
    --name llamafactory \
    llamafactory:latest

@@ -722,25 +715,20 @@ For AMD ROCm users:

 ```bash
 docker build -f ./docker/docker-rocm/Dockerfile \
-    --build-arg INSTALL_BNB=false \
-    --build-arg INSTALL_VLLM=false \
-    --build-arg INSTALL_DEEPSPEED=false \
-    --build-arg INSTALL_FLASHATTN=false \
    --build-arg PIP_INDEX=https://pypi.org/simple \
+    --build-arg EXTRAS=metrics \
    -t llamafactory:latest .

-docker run -dit \
+docker run -dit --ipc=host \
    -v ./hf_cache:/root/.cache/huggingface \
    -v ./ms_cache:/root/.cache/modelscope \
    -v ./om_cache:/root/.cache/openmind \
    -v ./data:/app/data \
    -v ./output:/app/output \
-    -v ./saves:/app/saves \
    -p 7860:7860 \
    -p 8000:8000 \
    --device /dev/kfd \
    --device /dev/dri \
-    --shm-size 16G \
    --name llamafactory \
    llamafactory:latest

--- a/README_zh.md
+++ b/README_zh.md
@@ -665,14 +665,11 @@ CUDA 用户：

 ```bash
 docker build -f ./docker/docker-cuda/Dockerfile \
-    --build-arg INSTALL_BNB=false \
-    --build-arg INSTALL_VLLM=false \
-    --build-arg INSTALL_DEEPSPEED=false \
-    --build-arg INSTALL_FLASHATTN=false \
    --build-arg PIP_INDEX=https://pypi.org/simple \
+    --build-arg EXTRAS=metrics \
    -t llamafactory:latest .

-docker run -dit --gpus=all \
+docker run -dit --ipc=host --gpus=all \
    -v ./hf_cache:/root/.cache/huggingface \
    -v ./ms_cache:/root/.cache/modelscope \
    -v ./om_cache:/root/.cache/openmind \
@@ -680,7 +677,6 @@ docker run -dit --gpus=all \
    -v ./output:/app/output \
    -p 7860:7860 \
    -p 8000:8000 \
-    --shm-size 16G \
    --name llamafactory \
    llamafactory:latest

@@ -690,14 +686,12 @@ docker exec -it llamafactory bash
 昇腾 NPU 用户：

 ```bash
-# 根据您的环境选择镜像
 docker build -f ./docker/docker-npu/Dockerfile \
-    --build-arg INSTALL_DEEPSPEED=false \
    --build-arg PIP_INDEX=https://pypi.org/simple \
+    --build-arg EXTRAS=metrics \
    -t llamafactory:latest .

-# 根据您的资源更改 `device`
-docker run -dit \
+docker run -dit --ipc=host \
    -v ./hf_cache:/root/.cache/huggingface \
    -v ./ms_cache:/root/.cache/modelscope \
    -v ./om_cache:/root/.cache/openmind \
@@ -713,7 +707,6 @@ docker run -dit \
    --device /dev/davinci_manager \
    --device /dev/devmm_svm \
    --device /dev/hisi_hdc \
-    --shm-size 16G \
    --name llamafactory \
    llamafactory:latest

@@ -724,25 +717,20 @@ AMD ROCm 用户：

 ```bash
 docker build -f ./docker/docker-rocm/Dockerfile \
-    --build-arg INSTALL_BNB=false \
-    --build-arg INSTALL_VLLM=false \
-    --build-arg INSTALL_DEEPSPEED=false \
-    --build-arg INSTALL_FLASHATTN=false \
    --build-arg PIP_INDEX=https://pypi.org/simple \
+    --build-arg EXTRAS=metrics \
    -t llamafactory:latest .

-docker run -dit \
+docker run -dit --ipc=host \
    -v ./hf_cache:/root/.cache/huggingface \
    -v ./ms_cache:/root/.cache/modelscope \
    -v ./om_cache:/root/.cache/openmind \
    -v ./data:/app/data \
    -v ./output:/app/output \
-    -v ./saves:/app/saves \
    -p 7860:7860 \
    -p 8000:8000 \
    --device /dev/kfd \
    --device /dev/dri \
-    --shm-size 16G \
    --name llamafactory \
    llamafactory:latest

--- a/docker/docker-cuda/Dockerfile
+++ b/docker/docker-cuda/Dockerfile
@@ -1,101 +1,63 @@
-# Default use the NVIDIA official image with PyTorch 2.6.0
-# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/index.html
-ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:24.12-py3
-FROM ${BASE_IMAGE}
+# Installation arguments
+ARG BASE_IMAGE=hiyouga/pytorch:th2.6.0-cu124-flashattn2.7.4-cxx11abi0
+ARG PIP_INDEX=https://pypi.org/simple
+ARG EXTRAS=metrics
+ARG INSTALL_FLASHATTN=false
+ARG HTTP_PROXY=""
+
+# https://hub.docker.com/r/hiyouga/pytorch/tags
+FROM "${BASE_IMAGE}"

 # Define environments
-ENV MAX_JOBS=4
+ENV MAX_JOBS=16
 ENV FLASH_ATTENTION_FORCE_BUILD=TRUE
 ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
-
-# Define installation arguments
-ARG INSTALL_BNB=false
-ARG INSTALL_VLLM=false
-ARG INSTALL_DEEPSPEED=false
-ARG INSTALL_FLASHATTN=false
-ARG INSTALL_LIGER_KERNEL=false
-ARG INSTALL_HQQ=false
-ARG INSTALL_EETQ=false
-ARG PIP_INDEX=https://pypi.org/simple
-ARG HTTP_PROXY=
+ENV DEBIAN_FRONTEND=noninteractive
+ENV NODE_OPTIONS=""
+ENV PIP_ROOT_USER_ACTION=ignore
+ENV http_proxy="${HTTP_PROXY}"
+ENV https_proxy="${HTTP_PROXY}"

 # Set the working directory
 WORKDIR /app

-# Set http proxy
-RUN if [ -n "$HTTP_PROXY" ]; then \
-        echo "Configuring proxy..."; \
-        export http_proxy=$HTTP_PROXY; \
-        export https_proxy=$HTTP_PROXY; \
-    fi
+# Change pip source
+RUN pip config set global.index-url "${PIP_INDEX}" && \
+    pip config set global.extra-index-url "${PIP_INDEX}" && \
+    python -m pip install --upgrade pip

 # Install the requirements
 COPY requirements.txt /app
-RUN pip config set global.index-url "$PIP_INDEX" && \
-    pip config set global.extra-index-url "$PIP_INDEX" && \
-    python -m pip install --upgrade pip && \
-    if [ -n "$HTTP_PROXY" ]; then \
-        python -m pip install --proxy=$HTTP_PROXY -r requirements.txt; \
-    else \
-        python -m pip install -r requirements.txt; \
-    fi
+RUN pip install --no-cache-dir -r requirements.txt

 # Copy the rest of the application into the image
 COPY . /app

-# Install the LLaMA Factory
-RUN EXTRA_PACKAGES="metrics"; \
-    if [ "$INSTALL_BNB" == "true" ]; then \
-        EXTRA_PACKAGES="${EXTRA_PACKAGES},bitsandbytes"; \
-    fi; \
-    if [ "$INSTALL_VLLM" == "true" ]; then \
-        EXTRA_PACKAGES="${EXTRA_PACKAGES},vllm"; \
-    fi; \
-    if [ "$INSTALL_DEEPSPEED" == "true" ]; then \
-        EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \
-    fi; \
-    if [ "$INSTALL_LIGER_KERNEL" == "true" ]; then \
-        EXTRA_PACKAGES="${EXTRA_PACKAGES},liger-kernel"; \
-    fi; \
-    if [ "$INSTALL_HQQ" == "true" ]; then \
-        EXTRA_PACKAGES="${EXTRA_PACKAGES},hqq"; \
-    fi; \
-    if [ "$INSTALL_EETQ" == "true" ]; then \
-        EXTRA_PACKAGES="${EXTRA_PACKAGES},eetq"; \
-    fi; \
-    if [ -n "$HTTP_PROXY" ]; then \
-        pip install --proxy=$HTTP_PROXY -e ".[$EXTRA_PACKAGES]"; \
-    else \
-        pip install -e ".[$EXTRA_PACKAGES]"; \
-    fi
+# Install LLaMA Factory
+RUN pip install --no-cache-dir -e ".[${EXTRAS}]" --no-build-isolation

 # Rebuild flash attention
-RUN pip uninstall -y transformer-engine flash-attn && \
-    if [ "$INSTALL_FLASHATTN" == "true" ]; then \
+RUN if [ "$INSTALL_FLASHATTN" == "true" ]; then \
        pip uninstall -y ninja && \
-        if [ -n "$HTTP_PROXY" ]; then \
-            pip install --proxy=$HTTP_PROXY ninja && \
-            pip install --proxy=$HTTP_PROXY --no-cache-dir flash-attn --no-build-isolation; \
-        else \
-            pip install ninja && \
-            pip install --no-cache-dir flash-attn --no-build-isolation; \
-        fi; \
-    fi
-
-
-# Unset http proxy
-RUN if [ -n "$HTTP_PROXY" ]; then \
-        unset http_proxy; \
-        unset https_proxy; \
+        pip install --no-cache-dir ninja && \
+        pip install --no-cache-dir flash-attn --no-build-isolation; \
    fi

 # Set up volumes
-VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/app/data", "/app/output" ]
+VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/root/.cache/openmind", "/app/data", "/app/output" ]

-# Expose port 7860 for the LLaMA Board
+# Expose port 7860 for LLaMA Board
 ENV GRADIO_SERVER_PORT 7860
 EXPOSE 7860

-# Expose port 8000 for the API service
+# Expose port 8000 for API service
 ENV API_PORT 8000
 EXPOSE 8000
+
+# unset proxy
+ENV http_proxy=
+ENV https_proxy=
+
+# Reset pip config
+RUN pip config unset global.index-url && \
+    pip config unset global.extra-index-url
--- a/docker/docker-cuda/Dockerfile.base
+++ b/docker/docker-cuda/Dockerfile.base
@@ -0,0 +1,55 @@
+# Start from the pytorch official image (ubuntu-22.04 + cuda-12.4.1 + python-3.11)
+# https://hub.docker.com/r/pytorch/pytorch/tags
+FROM pytorch/pytorch:2.6.0-cuda12.4-cudnn9-runtime
+
+# Define environments
+ENV MAX_JOBS=16
+ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
+ENV DEBIAN_FRONTEND=noninteractive
+ENV NODE_OPTIONS=""
+ENV PIP_ROOT_USER_ACTION=ignore
+
+# Define installation arguments
+ARG APT_SOURCE=https://mirrors.tuna.tsinghua.edu.cn/ubuntu/
+ARG PIP_INDEX=https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
+
+# Set apt source
+RUN cp /etc/apt/sources.list /etc/apt/sources.list.bak && \
+    { \
+    echo "deb ${APT_SOURCE} jammy main restricted universe multiverse"; \
+    echo "deb ${APT_SOURCE} jammy-updates main restricted universe multiverse"; \
+    echo "deb ${APT_SOURCE} jammy-backports main restricted universe multiverse"; \
+    echo "deb ${APT_SOURCE} jammy-security main restricted universe multiverse"; \
+    } > /etc/apt/sources.list
+
+# Install systemctl and wget
+RUN apt-get update && \
+    apt-get install -y -o Dpkg::Options::="--force-confdef" systemd wget && \
+    apt-get clean
+
+# Install git and vim
+RUN apt-get update && \
+    apt-get install -y git vim && \
+    apt-get clean
+
+# Install gcc and g++
+RUN apt-get update && \
+    apt-get install -y gcc g++ && \
+    apt-get clean
+
+# Change pip source
+RUN pip config set global.index-url "${PIP_INDEX}" && \
+    pip config set global.extra-index-url "${PIP_INDEX}" && \
+    python -m pip install --upgrade pip
+
+# Install flash-attn-2.7.4.post1 (cxx11abi=False)
+RUN wget -nv https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp311-cp311-linux_x86_64.whl && \
+    pip install --no-cache-dir flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp311-cp311-linux_x86_64.whl
+
+# Install flashinfer-0.2.2.post1+cu124 (cxx11abi=False)
+RUN wget -nv https://github.com/flashinfer-ai/flashinfer/releases/download/v0.2.2.post1/flashinfer_python-0.2.2.post1+cu124torch2.6-cp38-abi3-linux_x86_64.whl && \
+    pip install --no-cache-dir flashinfer_python-0.2.2.post1+cu124torch2.6-cp38-abi3-linux_x86_64.whl
+
+# Reset pip config
+RUN pip config unset global.index-url && \
+    pip config unset global.extra-index-url
--- a/docker/docker-cuda/docker-compose.yml
+++ b/docker/docker-cuda/docker-compose.yml
@@ -4,14 +4,8 @@ services:
      dockerfile: ./docker/docker-cuda/Dockerfile
      context: ../..
      args:
-        INSTALL_BNB: "false"
-        INSTALL_VLLM: "false"
-        INSTALL_DEEPSPEED: "false"
-        INSTALL_FLASHATTN: "false"
-        INSTALL_LIGER_KERNEL: "false"
-        INSTALL_HQQ: "false"
-        INSTALL_EETQ: "false"
        PIP_INDEX: https://pypi.org/simple
+        EXTRAS: metrics
    container_name: llamafactory
    volumes:
      - ../../hf_cache:/root/.cache/huggingface
@@ -24,7 +18,7 @@ services:
      - "8000:8000"
    ipc: host
    tty: true
-    shm_size: "16gb"
+    # shm_size: "16gb"  # ipc: host is set
    stdin_open: true
    command: bash
    deploy:
@@ -33,5 +27,5 @@ services:
          devices:
          - driver: nvidia
            count: "all"
-            capabilities: [gpu]
+            capabilities: [ gpu ]
    restart: unless-stopped
--- a/docker/docker-npu/Dockerfile
+++ b/docker/docker-npu/Dockerfile
@@ -1,67 +1,56 @@
-# Use the Ubuntu 22.04 image with CANN 8.0.rc1
-# More versions can be found at https://hub.docker.com/r/ascendai/cann/tags
-# FROM ascendai/cann:8.0.rc1-910-ubuntu22.04-py3.8
-FROM ascendai/cann:8.0.0-910b-ubuntu22.04-py3.10
-# FROM ascendai/cann:8.0.rc1-910-openeuler22.03-py3.8
-# FROM ascendai/cann:8.0.rc1-910b-openeuler22.03-py3.8
+# Installation arguments
+ARG BASE_IMAGE=ascendai/cann:8.0.0-910b-ubuntu22.04-py3.11
+ARG PIP_INDEX=https://pypi.org/simple
+ARG EXTRAS=metrics
+ARG INSTALL_FLASHATTN=false
+ARG HTTP_PROXY=""
+
+# https://hub.docker.com/r/ascendai/cann/tags
+FROM "${BASE_IMAGE}"

 # Define environments
+ENV MAX_JOBS=16
+ENV FLASH_ATTENTION_FORCE_BUILD=TRUE
+ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
 ENV DEBIAN_FRONTEND=noninteractive
-
-# Define installation arguments
-ARG INSTALL_DEEPSPEED=false
-ARG PIP_INDEX=https://pypi.org/simple
-ARG TORCH_INDEX=https://download.pytorch.org/whl/cpu
-ARG HTTP_PROXY=
+ENV NODE_OPTIONS=""
+ENV PIP_ROOT_USER_ACTION=ignore
+ENV http_proxy="${HTTP_PROXY}"
+ENV https_proxy="${HTTP_PROXY}"

 # Set the working directory
 WORKDIR /app

-# Set http proxy
-RUN if [ -n "$HTTP_PROXY" ]; then \
-        echo "Configuring proxy..."; \
-        export http_proxy=$HTTP_PROXY; \
-        export https_proxy=$HTTP_PROXY; \
-    fi
+# Change pip source
+RUN pip config set global.index-url "${PIP_INDEX}" && \
+    pip config set global.extra-index-url "${PIP_INDEX}" && \
+    python -m pip install --upgrade pip

 # Install the requirements
 COPY requirements.txt /app
-RUN pip config set global.index-url "$PIP_INDEX" && \
-    pip config set global.extra-index-url "$TORCH_INDEX" && \
-    python -m pip install --upgrade pip && \
-    if [ -n "$HTTP_PROXY" ]; then \
-        python -m pip install --proxy=$HTTP_PROXY -r requirements.txt; \
-    else \
-        python -m pip install -r requirements.txt; \
-    fi
+RUN pip install --no-cache-dir -r requirements.txt

 # Copy the rest of the application into the image
 COPY . /app

-# Install the LLaMA Factory
-RUN EXTRA_PACKAGES="torch-npu,metrics"; \
-    if [ "$INSTALL_DEEPSPEED" == "true" ]; then \
-        EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \
-    fi; \
-    if [ -n "$HTTP_PROXY" ]; then \
-        pip install --proxy=$HTTP_PROXY -e ".[$EXTRA_PACKAGES]"; \
-    else \
-        pip install -e ".[$EXTRA_PACKAGES]"; \
-    fi
-
-# Unset http proxy
-RUN if [ -n "$HTTP_PROXY" ]; then \
-        unset http_proxy; \
-        unset https_proxy; \
-    fi
+# Install LLaMA Factory
+RUN pip install --no-cache-dir -e ".[${EXTRAS}]" --no-build-isolation

 # Set up volumes
-VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/app/data", "/app/output" ]
+VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/root/.cache/openmind", "/app/data", "/app/output" ]

-# Expose port 7860 for the LLaMA Board
+# Expose port 7860 for LLaMA Board
 ENV GRADIO_SERVER_PORT 7860
 EXPOSE 7860

-# Expose port 8000 for the API service
+# Expose port 8000 for API service
 ENV API_PORT 8000
 EXPOSE 8000
+
+# unset proxy
+ENV http_proxy=
+ENV https_proxy=
+
+# Reset pip config
+RUN pip config unset global.index-url && \
+    pip config unset global.extra-index-url
--- a/docker/docker-npu/docker-compose.yml
+++ b/docker/docker-npu/docker-compose.yml
@@ -4,8 +4,8 @@ services:
      dockerfile: ./docker/docker-npu/Dockerfile
      context: ../..
      args:
-        INSTALL_DEEPSPEED: "false"
        PIP_INDEX: https://pypi.org/simple
+        EXTRAS: metrics
    container_name: llamafactory
    volumes:
      - ../../hf_cache:/root/.cache/huggingface
@@ -22,7 +22,7 @@ services:
      - "8000:8000"
    ipc: host
    tty: true
-    shm_size: "16gb"
+    # shm_size: "16gb"  # ipc: host is set
    stdin_open: true
    command: bash
    devices:
--- a/docker/docker-rocm/Dockerfile
+++ b/docker/docker-rocm/Dockerfile
@@ -1,21 +1,23 @@
-FROM hardandheavy/transformers-rocm:2.2.0
+# Installation arguments
+ARG BASE_IMAGE=rocm/pytorch:rocm6.4.1_ubuntu22.04_py3.10_pytorch_release_2.6.0
+ARG PIP_INDEX=https://pypi.org/simple
+ARG EXTRAS=metrics
+ARG INSTALL_FLASHATTN=false
+ARG HTTP_PROXY=""
+ARG PYTORCH_INDEX=https://download.pytorch.org/whl/rocm6.3
+
+# https://hub.docker.com/r/rocm/pytorch/tags
+FROM "${BASE_IMAGE}"

 # Define environments
-ENV MAX_JOBS=4
+ENV MAX_JOBS=16
 ENV FLASH_ATTENTION_FORCE_BUILD=TRUE
 ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
-
-# Define installation arguments
-ARG INSTALL_BNB=false
-ARG INSTALL_VLLM=false
-ARG INSTALL_DEEPSPEED=false
-ARG INSTALL_FLASHATTN=false
-ARG INSTALL_LIGER_KERNEL=false
-ARG INSTALL_HQQ=false
-ARG INSTALL_PYTORCH=true
-ARG PIP_INDEX=https://pypi.org/simple
-ARG HTTP_PROXY=
-ARG PYTORCH_INDEX=https://download.pytorch.org/whl/nightly/rocm6.3
+ENV DEBIAN_FRONTEND=noninteractive
+ENV NODE_OPTIONS=""
+ENV PIP_ROOT_USER_ACTION=ignore
+ENV http_proxy="${HTTP_PROXY}"
+ENV https_proxy="${HTTP_PROXY}"

 # Use Bash instead of default /bin/sh
 SHELL ["/bin/bash", "-c"]
@@ -23,83 +25,47 @@ SHELL ["/bin/bash", "-c"]
 # Set the working directory
 WORKDIR /app

-# Set http proxy
-RUN if [ -n "$HTTP_PROXY" ]; then \
-        echo "Configuring proxy..."; \
-        export http_proxy=$HTTP_PROXY; \
-        export https_proxy=$HTTP_PROXY; \
-    fi
+# Change pip source
+RUN pip config set global.index-url "${PIP_INDEX}" && \
+    pip config set global.extra-index-url "${PIP_INDEX}" && \
+    python -m pip install --upgrade pip
+
+# Reinstall pytorch rocm
+RUN pip uninstall -y torch torchvision torchaudio && \
+    pip install --pre torch torchvision torchaudio --index-url "${PYTORCH_INDEX}"

 # Install the requirements
 COPY requirements.txt /app
-RUN pip config set global.index-url "$PIP_INDEX" && \
-    pip config set global.extra-index-url "$PIP_INDEX" && \
-    python -m pip install --upgrade pip && \
-    if [ -n "$HTTP_PROXY" ]; then \
-        python -m pip install --proxy=$HTTP_PROXY -r requirements.txt; \
-    else \
-        python -m pip install -r requirements.txt; \
-    fi
+RUN pip install --no-cache-dir -r requirements.txt

 # Copy the rest of the application into the image
 COPY . /app

-# Install the LLaMA Factory
-RUN EXTRA_PACKAGES="metrics"; \
-    if [ "$INSTALL_BNB" == "true" ]; then \
-        EXTRA_PACKAGES="${EXTRA_PACKAGES},bitsandbytes"; \
-    fi; \
-    if [ "$INSTALL_VLLM" == "true" ]; then \
-        EXTRA_PACKAGES="${EXTRA_PACKAGES},vllm"; \
-    fi; \
-    if [ "$INSTALL_DEEPSPEED" == "true" ]; then \
-        EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \
-    fi; \
-    if [ "$INSTALL_LIGER_KERNEL" == "true" ]; then \
-        EXTRA_PACKAGES="${EXTRA_PACKAGES},liger-kernel"; \
-    fi; \
-    if [ "$INSTALL_HQQ" == "true" ]; then \
-        EXTRA_PACKAGES="${EXTRA_PACKAGES},hqq"; \
-    fi; \
-    if [ -n "$HTTP_PROXY" ]; then \
-        pip install --proxy=$HTTP_PROXY -e ".[$EXTRA_PACKAGES]"; \
-    else \
-        pip install -e ".[$EXTRA_PACKAGES]"; \
-    fi
-
-# Reinstall pytorch
-# This is necessary to ensure that the correct version of PyTorch is installed
-RUN if [ "$INSTALL_PYTORCH" == "true" ]; then \
-        pip uninstall -y torch torchvision torchaudio && \
-        pip install --pre torch torchvision torchaudio --index-url "$PYTORCH_INDEX"; \
-    fi
+# Install LLaMA Factory
+RUN pip install --no-cache-dir -e ".[${EXTRAS}]" --no-build-isolation

 # Rebuild flash attention
-RUN pip uninstall -y transformer-engine flash-attn && \
-    if [ "$INSTALL_FLASHATTN" == "true" ]; then \
+RUN if [ "$INSTALL_FLASHATTN" == "true" ]; then \
        pip uninstall -y ninja && \
-        if [ -n "$HTTP_PROXY" ]; then \
-            pip install --proxy=$HTTP_PROXY ninja && \
-            pip install --proxy=$HTTP_PROXY --no-cache-dir flash-attn --no-build-isolation; \
-        else \
-            pip install ninja && \
-            pip install --no-cache-dir flash-attn --no-build-isolation; \
-        fi; \
-    fi
-
-# Unset http proxy
-RUN if [ -n "$HTTP_PROXY" ]; then \
-        unset http_proxy; \
-        unset https_proxy; \
+        pip install --no-cache-dir ninja && \
+        pip install --no-cache-dir flash-attn --no-build-isolation; \
    fi

 # Set up volumes
-VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/app/data", "/app/output" ]
+VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/root/.cache/openmind", "/app/data", "/app/output" ]

-# Expose port 7860 for the LLaMA Board
+# Expose port 7860 for LLaMA Board
 ENV GRADIO_SERVER_PORT 7860
 EXPOSE 7860

-# Expose port 8000 for the API service
+# Expose port 8000 for API service
 ENV API_PORT 8000
 EXPOSE 8000
+
+# unset proxy
+ENV http_proxy=
+ENV https_proxy=
+
+# Reset pip config
+RUN pip config unset global.index-url && \
+    pip config unset global.extra-index-url
--- a/docker/docker-rocm/docker-compose.yml
+++ b/docker/docker-rocm/docker-compose.yml
@@ -4,15 +4,8 @@ services:
      dockerfile: ./docker/docker-rocm/Dockerfile
      context: ../..
      args:
-        INSTALL_BNB: "false"
-        INSTALL_VLLM: "false"
-        INSTALL_DEEPSPEED: "false"
-        INSTALL_FLASHATTN: "false"
-        INSTALL_LIGER_KERNEL: "false"
-        INSTALL_PYTORCH: "true"
-        INSTALL_HQQ: "false"
        PIP_INDEX: https://pypi.org/simple
-        PYTORCH_INDEX: https://download.pytorch.org/whl/nightly/rocm6.3
+        EXTRAS: metrics
    container_name: llamafactory
    volumes:
      - ../../hf_cache:/root/.cache/huggingface
@@ -20,13 +13,12 @@ services:
      - ../../om_cache:/root/.cache/openmind
      - ../../data:/app/data
      - ../../output:/app/output
-      - ../../saves:/app/saves
    ports:
      - "7860:7860"
      - "8000:8000"
    ipc: host
    tty: true
-    shm_size: "16gb"
+    # shm_size: "16gb"  # ipc: host is set
    stdin_open: true
    command: bash
    devices:
--- a/setup.py
+++ b/setup.py
@@ -52,7 +52,7 @@ extra_require = {
    "eetq": ["eetq"],
    "gptq": ["optimum>=1.24.0", "gptqmodel>=2.0.0"],
    "aqlm": ["aqlm[gpu]>=1.1.0"],
-    "vllm": ["vllm>=0.4.3,<=0.8.5"],
+    "vllm": ["vllm>=0.4.3,<=0.8.6"],
    "sglang": ["sglang[srt]>=0.4.5", "transformers==4.51.1"],
    "galore": ["galore-torch"],
    "apollo": ["apollo-torch"],