From 519ac9280388a0bde12df5b04399b544ccffb1c5 Mon Sep 17 00:00:00 2001
From: hoshi-hiyouga <hiyouga@buaa.edu.cn>
Date: Tue, 27 May 2025 18:15:23 +0800
Subject: [PATCH] [assets] update docker files (#8176)

---
 .gitignore                            |   2 +-
 README.md                             |  24 ++----
 README_zh.md                          |  24 ++----
 docker/docker-cuda/Dockerfile         | 110 ++++++++----------------
 docker/docker-cuda/Dockerfile.base    |  55 ++++++++++++
 docker/docker-cuda/docker-compose.yml |  12 +--
 docker/docker-npu/Dockerfile          |  79 ++++++++----------
 docker/docker-npu/docker-compose.yml  |   4 +-
 docker/docker-rocm/Dockerfile         | 116 +++++++++-----------------
 docker/docker-rocm/docker-compose.yml |  12 +--
 setup.py                              |   2 +-
 11 files changed, 187 insertions(+), 253 deletions(-)
 create mode 100644 docker/docker-cuda/Dockerfile.base

diff --git a/.gitignore b/.gitignore
index f3307c4f..0a3a47bd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -166,8 +166,8 @@ cython_debug/
 uv.lock
 
 # custom .gitignore
-ms_cache/
 hf_cache/
+ms_cache/
 om_cache/
 cache/
 config/
diff --git a/README.md b/README.md
index 2d6eeef7..b68ea1cb 100644
--- a/README.md
+++ b/README.md
@@ -663,14 +663,11 @@ For CUDA users:
 
 ```bash
 docker build -f ./docker/docker-cuda/Dockerfile \
-    --build-arg INSTALL_BNB=false \
-    --build-arg INSTALL_VLLM=false \
-    --build-arg INSTALL_DEEPSPEED=false \
-    --build-arg INSTALL_FLASHATTN=false \
     --build-arg PIP_INDEX=https://pypi.org/simple \
+    --build-arg EXTRAS=metrics \
     -t llamafactory:latest .
 
-docker run -dit --gpus=all \
+docker run -dit --ipc=host --gpus=all \
     -v ./hf_cache:/root/.cache/huggingface \
     -v ./ms_cache:/root/.cache/modelscope \
     -v ./om_cache:/root/.cache/openmind \
@@ -678,7 +675,6 @@ docker run -dit --gpus=all \
     -v ./output:/app/output \
     -p 7860:7860 \
     -p 8000:8000 \
-    --shm-size 16G \
     --name llamafactory \
     llamafactory:latest
 
@@ -688,14 +684,12 @@ docker exec -it llamafactory bash
 For Ascend NPU users:
 
 ```bash
-# Choose docker image upon your environment
 docker build -f ./docker/docker-npu/Dockerfile \
-    --build-arg INSTALL_DEEPSPEED=false \
     --build-arg PIP_INDEX=https://pypi.org/simple \
+    --build-arg EXTRAS=metrics \
     -t llamafactory:latest .
 
-# Change `device` upon your resources
-docker run -dit \
+docker run -dit --ipc=host \
     -v ./hf_cache:/root/.cache/huggingface \
     -v ./ms_cache:/root/.cache/modelscope \
     -v ./om_cache:/root/.cache/openmind \
@@ -711,7 +705,6 @@ docker run -dit \
     --device /dev/davinci_manager \
     --device /dev/devmm_svm \
     --device /dev/hisi_hdc \
-    --shm-size 16G \
     --name llamafactory \
     llamafactory:latest
 
@@ -722,25 +715,20 @@ For AMD ROCm users:
 
 ```bash
 docker build -f ./docker/docker-rocm/Dockerfile \
-    --build-arg INSTALL_BNB=false \
-    --build-arg INSTALL_VLLM=false \
-    --build-arg INSTALL_DEEPSPEED=false \
-    --build-arg INSTALL_FLASHATTN=false \
     --build-arg PIP_INDEX=https://pypi.org/simple \
+    --build-arg EXTRAS=metrics \
     -t llamafactory:latest .
 
-docker run -dit \
+docker run -dit --ipc=host \
     -v ./hf_cache:/root/.cache/huggingface \
     -v ./ms_cache:/root/.cache/modelscope \
     -v ./om_cache:/root/.cache/openmind \
     -v ./data:/app/data \
     -v ./output:/app/output \
-    -v ./saves:/app/saves \
     -p 7860:7860 \
     -p 8000:8000 \
     --device /dev/kfd \
     --device /dev/dri \
-    --shm-size 16G \
     --name llamafactory \
     llamafactory:latest
 
diff --git a/README_zh.md b/README_zh.md
index 31c77b75..e1a37d45 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -665,14 +665,11 @@ CUDA 用户：
 
 ```bash
 docker build -f ./docker/docker-cuda/Dockerfile \
-    --build-arg INSTALL_BNB=false \
-    --build-arg INSTALL_VLLM=false \
-    --build-arg INSTALL_DEEPSPEED=false \
-    --build-arg INSTALL_FLASHATTN=false \
     --build-arg PIP_INDEX=https://pypi.org/simple \
+    --build-arg EXTRAS=metrics \
     -t llamafactory:latest .
 
-docker run -dit --gpus=all \
+docker run -dit --ipc=host --gpus=all \
     -v ./hf_cache:/root/.cache/huggingface \
     -v ./ms_cache:/root/.cache/modelscope \
     -v ./om_cache:/root/.cache/openmind \
@@ -680,7 +677,6 @@ docker run -dit --gpus=all \
     -v ./output:/app/output \
     -p 7860:7860 \
     -p 8000:8000 \
-    --shm-size 16G \
     --name llamafactory \
     llamafactory:latest
 
@@ -690,14 +686,12 @@ docker exec -it llamafactory bash
 昇腾 NPU 用户：
 
 ```bash
-# 根据您的环境选择镜像
 docker build -f ./docker/docker-npu/Dockerfile \
-    --build-arg INSTALL_DEEPSPEED=false \
     --build-arg PIP_INDEX=https://pypi.org/simple \
+    --build-arg EXTRAS=metrics \
     -t llamafactory:latest .
 
-# 根据您的资源更改 `device`
-docker run -dit \
+docker run -dit --ipc=host \
     -v ./hf_cache:/root/.cache/huggingface \
     -v ./ms_cache:/root/.cache/modelscope \
     -v ./om_cache:/root/.cache/openmind \
@@ -713,7 +707,6 @@ docker run -dit \
     --device /dev/davinci_manager \
     --device /dev/devmm_svm \
     --device /dev/hisi_hdc \
-    --shm-size 16G \
     --name llamafactory \
     llamafactory:latest
 
@@ -724,25 +717,20 @@ AMD ROCm 用户：
 
 ```bash
 docker build -f ./docker/docker-rocm/Dockerfile \
-    --build-arg INSTALL_BNB=false \
-    --build-arg INSTALL_VLLM=false \
-    --build-arg INSTALL_DEEPSPEED=false \
-    --build-arg INSTALL_FLASHATTN=false \
     --build-arg PIP_INDEX=https://pypi.org/simple \
+    --build-arg EXTRAS=metrics \
     -t llamafactory:latest .
 
-docker run -dit \
+docker run -dit --ipc=host \
     -v ./hf_cache:/root/.cache/huggingface \
     -v ./ms_cache:/root/.cache/modelscope \
     -v ./om_cache:/root/.cache/openmind \
     -v ./data:/app/data \
     -v ./output:/app/output \
-    -v ./saves:/app/saves \
     -p 7860:7860 \
     -p 8000:8000 \
     --device /dev/kfd \
     --device /dev/dri \
-    --shm-size 16G \
     --name llamafactory \
     llamafactory:latest
 
diff --git a/docker/docker-cuda/Dockerfile b/docker/docker-cuda/Dockerfile
index aac9635e..f0e2c5f0 100644
--- a/docker/docker-cuda/Dockerfile
+++ b/docker/docker-cuda/Dockerfile
@@ -1,101 +1,63 @@
-# Default use the NVIDIA official image with PyTorch 2.6.0
-# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/index.html
-ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:24.12-py3
-FROM ${BASE_IMAGE}
+# Installation arguments
+ARG BASE_IMAGE=hiyouga/pytorch:th2.6.0-cu124-flashattn2.7.4-cxx11abi0
+ARG PIP_INDEX=https://pypi.org/simple
+ARG EXTRAS=metrics
+ARG INSTALL_FLASHATTN=false
+ARG HTTP_PROXY=""
+
+# https://hub.docker.com/r/hiyouga/pytorch/tags
+FROM "${BASE_IMAGE}"
 
 # Define environments
-ENV MAX_JOBS=4
+ENV MAX_JOBS=16
 ENV FLASH_ATTENTION_FORCE_BUILD=TRUE
 ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
-
-# Define installation arguments
-ARG INSTALL_BNB=false
-ARG INSTALL_VLLM=false
-ARG INSTALL_DEEPSPEED=false
-ARG INSTALL_FLASHATTN=false
-ARG INSTALL_LIGER_KERNEL=false
-ARG INSTALL_HQQ=false
-ARG INSTALL_EETQ=false
-ARG PIP_INDEX=https://pypi.org/simple
-ARG HTTP_PROXY=
+ENV DEBIAN_FRONTEND=noninteractive
+ENV NODE_OPTIONS=""
+ENV PIP_ROOT_USER_ACTION=ignore
+ENV http_proxy="${HTTP_PROXY}"
+ENV https_proxy="${HTTP_PROXY}"
 
 # Set the working directory
 WORKDIR /app
 
-# Set http proxy
-RUN if [ -n "$HTTP_PROXY" ]; then \
-        echo "Configuring proxy..."; \
-        export http_proxy=$HTTP_PROXY; \
-        export https_proxy=$HTTP_PROXY; \
-    fi
+# Change pip source
+RUN pip config set global.index-url "${PIP_INDEX}" && \
+    pip config set global.extra-index-url "${PIP_INDEX}" && \
+    python -m pip install --upgrade pip
 
 # Install the requirements
 COPY requirements.txt /app
-RUN pip config set global.index-url "$PIP_INDEX" && \
-    pip config set global.extra-index-url "$PIP_INDEX" && \
-    python -m pip install --upgrade pip && \
-    if [ -n "$HTTP_PROXY" ]; then \
-        python -m pip install --proxy=$HTTP_PROXY -r requirements.txt; \
-    else \
-        python -m pip install -r requirements.txt; \
-    fi
+RUN pip install --no-cache-dir -r requirements.txt
 
 # Copy the rest of the application into the image
 COPY . /app
 
-# Install the LLaMA Factory
-RUN EXTRA_PACKAGES="metrics"; \
-    if [ "$INSTALL_BNB" == "true" ]; then \
-        EXTRA_PACKAGES="${EXTRA_PACKAGES},bitsandbytes"; \
-    fi; \
-    if [ "$INSTALL_VLLM" == "true" ]; then \
-        EXTRA_PACKAGES="${EXTRA_PACKAGES},vllm"; \
-    fi; \
-    if [ "$INSTALL_DEEPSPEED" == "true" ]; then \
-        EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \
-    fi; \
-    if [ "$INSTALL_LIGER_KERNEL" == "true" ]; then \
-        EXTRA_PACKAGES="${EXTRA_PACKAGES},liger-kernel"; \
-    fi; \
-    if [ "$INSTALL_HQQ" == "true" ]; then \
-        EXTRA_PACKAGES="${EXTRA_PACKAGES},hqq"; \
-    fi; \
-    if [ "$INSTALL_EETQ" == "true" ]; then \
-        EXTRA_PACKAGES="${EXTRA_PACKAGES},eetq"; \
-    fi; \
-    if [ -n "$HTTP_PROXY" ]; then \
-        pip install --proxy=$HTTP_PROXY -e ".[$EXTRA_PACKAGES]"; \
-    else \
-        pip install -e ".[$EXTRA_PACKAGES]"; \
-    fi
+# Install LLaMA Factory
+RUN pip install --no-cache-dir -e ".[${EXTRAS}]" --no-build-isolation
 
 # Rebuild flash attention
-RUN pip uninstall -y transformer-engine flash-attn && \
-    if [ "$INSTALL_FLASHATTN" == "true" ]; then \
+RUN if [ "$INSTALL_FLASHATTN" == "true" ]; then \
         pip uninstall -y ninja && \
-        if [ -n "$HTTP_PROXY" ]; then \
-            pip install --proxy=$HTTP_PROXY ninja && \
-            pip install --proxy=$HTTP_PROXY --no-cache-dir flash-attn --no-build-isolation; \
-        else \
-            pip install ninja && \
-            pip install --no-cache-dir flash-attn --no-build-isolation; \
-        fi; \
-    fi
-
-
-# Unset http proxy
-RUN if [ -n "$HTTP_PROXY" ]; then \
-        unset http_proxy; \
-        unset https_proxy; \
+        pip install --no-cache-dir ninja && \
+        pip install --no-cache-dir flash-attn --no-build-isolation; \
     fi
 
 # Set up volumes
-VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/app/data", "/app/output" ]
+VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/root/.cache/openmind", "/app/data", "/app/output" ]
 
-# Expose port 7860 for the LLaMA Board
+# Expose port 7860 for LLaMA Board
 ENV GRADIO_SERVER_PORT 7860
 EXPOSE 7860
 
-# Expose port 8000 for the API service
+# Expose port 8000 for API service
 ENV API_PORT 8000
 EXPOSE 8000
+
+# unset proxy
+ENV http_proxy=
+ENV https_proxy=
+
+# Reset pip config
+RUN pip config unset global.index-url && \
+    pip config unset global.extra-index-url
diff --git a/docker/docker-cuda/Dockerfile.base b/docker/docker-cuda/Dockerfile.base
new file mode 100644
index 00000000..f8c09827
--- /dev/null
+++ b/docker/docker-cuda/Dockerfile.base
@@ -0,0 +1,55 @@
+# Start from the pytorch official image (ubuntu-22.04 + cuda-12.4.1 + python-3.11)
+# https://hub.docker.com/r/pytorch/pytorch/tags
+FROM pytorch/pytorch:2.6.0-cuda12.4-cudnn9-runtime
+
+# Define environments
+ENV MAX_JOBS=16
+ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
+ENV DEBIAN_FRONTEND=noninteractive
+ENV NODE_OPTIONS=""
+ENV PIP_ROOT_USER_ACTION=ignore
+
+# Define installation arguments
+ARG APT_SOURCE=https://mirrors.tuna.tsinghua.edu.cn/ubuntu/
+ARG PIP_INDEX=https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
+
+# Set apt source
+RUN cp /etc/apt/sources.list /etc/apt/sources.list.bak && \
+    { \
+    echo "deb ${APT_SOURCE} jammy main restricted universe multiverse"; \
+    echo "deb ${APT_SOURCE} jammy-updates main restricted universe multiverse"; \
+    echo "deb ${APT_SOURCE} jammy-backports main restricted universe multiverse"; \
+    echo "deb ${APT_SOURCE} jammy-security main restricted universe multiverse"; \
+    } > /etc/apt/sources.list
+
+# Install systemctl and wget
+RUN apt-get update && \
+    apt-get install -y -o Dpkg::Options::="--force-confdef" systemd wget && \
+    apt-get clean
+
+# Install git and vim
+RUN apt-get update && \
+    apt-get install -y git vim && \
+    apt-get clean
+
+# Install gcc and g++
+RUN apt-get update && \
+    apt-get install -y gcc g++ && \
+    apt-get clean
+
+# Change pip source
+RUN pip config set global.index-url "${PIP_INDEX}" && \
+    pip config set global.extra-index-url "${PIP_INDEX}" && \
+    python -m pip install --upgrade pip
+
+# Install flash-attn-2.7.4.post1 (cxx11abi=False)
+RUN wget -nv https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp311-cp311-linux_x86_64.whl && \
+    pip install --no-cache-dir flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp311-cp311-linux_x86_64.whl
+
+# Install flashinfer-0.2.2.post1+cu124 (cxx11abi=False)
+RUN wget -nv https://github.com/flashinfer-ai/flashinfer/releases/download/v0.2.2.post1/flashinfer_python-0.2.2.post1+cu124torch2.6-cp38-abi3-linux_x86_64.whl && \
+    pip install --no-cache-dir flashinfer_python-0.2.2.post1+cu124torch2.6-cp38-abi3-linux_x86_64.whl
+
+# Reset pip config
+RUN pip config unset global.index-url && \
+    pip config unset global.extra-index-url
diff --git a/docker/docker-cuda/docker-compose.yml b/docker/docker-cuda/docker-compose.yml
index fa386cc8..d115f3b2 100644
--- a/docker/docker-cuda/docker-compose.yml
+++ b/docker/docker-cuda/docker-compose.yml
@@ -4,14 +4,8 @@ services:
       dockerfile: ./docker/docker-cuda/Dockerfile
       context: ../..
       args:
-        INSTALL_BNB: "false"
-        INSTALL_VLLM: "false"
-        INSTALL_DEEPSPEED: "false"
-        INSTALL_FLASHATTN: "false"
-        INSTALL_LIGER_KERNEL: "false"
-        INSTALL_HQQ: "false"
-        INSTALL_EETQ: "false"
         PIP_INDEX: https://pypi.org/simple
+        EXTRAS: metrics
     container_name: llamafactory
     volumes:
       - ../../hf_cache:/root/.cache/huggingface
@@ -24,7 +18,7 @@ services:
       - "8000:8000"
     ipc: host
     tty: true
-    shm_size: "16gb"
+    # shm_size: "16gb"  # ipc: host is set
     stdin_open: true
     command: bash
     deploy:
@@ -33,5 +27,5 @@ services:
           devices:
           - driver: nvidia
             count: "all"
-            capabilities: [gpu]
+            capabilities: [ gpu ]
     restart: unless-stopped
diff --git a/docker/docker-npu/Dockerfile b/docker/docker-npu/Dockerfile
index 3431176b..9b507737 100644
--- a/docker/docker-npu/Dockerfile
+++ b/docker/docker-npu/Dockerfile
@@ -1,67 +1,56 @@
-# Use the Ubuntu 22.04 image with CANN 8.0.rc1
-# More versions can be found at https://hub.docker.com/r/ascendai/cann/tags
-# FROM ascendai/cann:8.0.rc1-910-ubuntu22.04-py3.8
-FROM ascendai/cann:8.0.0-910b-ubuntu22.04-py3.10
-# FROM ascendai/cann:8.0.rc1-910-openeuler22.03-py3.8
-# FROM ascendai/cann:8.0.rc1-910b-openeuler22.03-py3.8
+# Installation arguments
+ARG BASE_IMAGE=ascendai/cann:8.0.0-910b-ubuntu22.04-py3.11
+ARG PIP_INDEX=https://pypi.org/simple
+ARG EXTRAS=metrics
+ARG INSTALL_FLASHATTN=false
+ARG HTTP_PROXY=""
+
+# https://hub.docker.com/r/ascendai/cann/tags
+FROM "${BASE_IMAGE}"
 
 # Define environments
+ENV MAX_JOBS=16
+ENV FLASH_ATTENTION_FORCE_BUILD=TRUE
+ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
 ENV DEBIAN_FRONTEND=noninteractive
-
-# Define installation arguments
-ARG INSTALL_DEEPSPEED=false
-ARG PIP_INDEX=https://pypi.org/simple
-ARG TORCH_INDEX=https://download.pytorch.org/whl/cpu
-ARG HTTP_PROXY=
+ENV NODE_OPTIONS=""
+ENV PIP_ROOT_USER_ACTION=ignore
+ENV http_proxy="${HTTP_PROXY}"
+ENV https_proxy="${HTTP_PROXY}"
 
 # Set the working directory
 WORKDIR /app
 
-# Set http proxy
-RUN if [ -n "$HTTP_PROXY" ]; then \
-        echo "Configuring proxy..."; \
-        export http_proxy=$HTTP_PROXY; \
-        export https_proxy=$HTTP_PROXY; \
-    fi
+# Change pip source
+RUN pip config set global.index-url "${PIP_INDEX}" && \
+    pip config set global.extra-index-url "${PIP_INDEX}" && \
+    python -m pip install --upgrade pip
 
 # Install the requirements
 COPY requirements.txt /app
-RUN pip config set global.index-url "$PIP_INDEX" && \
-    pip config set global.extra-index-url "$TORCH_INDEX" && \
-    python -m pip install --upgrade pip && \
-    if [ -n "$HTTP_PROXY" ]; then \
-        python -m pip install --proxy=$HTTP_PROXY -r requirements.txt; \
-    else \
-        python -m pip install -r requirements.txt; \
-    fi
+RUN pip install --no-cache-dir -r requirements.txt
 
 # Copy the rest of the application into the image
 COPY . /app
 
-# Install the LLaMA Factory
-RUN EXTRA_PACKAGES="torch-npu,metrics"; \
-    if [ "$INSTALL_DEEPSPEED" == "true" ]; then \
-        EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \
-    fi; \
-    if [ -n "$HTTP_PROXY" ]; then \
-        pip install --proxy=$HTTP_PROXY -e ".[$EXTRA_PACKAGES]"; \
-    else \
-        pip install -e ".[$EXTRA_PACKAGES]"; \
-    fi
-
-# Unset http proxy
-RUN if [ -n "$HTTP_PROXY" ]; then \
-        unset http_proxy; \
-        unset https_proxy; \
-    fi
+# Install LLaMA Factory
+RUN pip install --no-cache-dir -e ".[${EXTRAS}]" --no-build-isolation
 
 # Set up volumes
-VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/app/data", "/app/output" ]
+VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/root/.cache/openmind", "/app/data", "/app/output" ]
 
-# Expose port 7860 for the LLaMA Board
+# Expose port 7860 for LLaMA Board
 ENV GRADIO_SERVER_PORT 7860
 EXPOSE 7860
 
-# Expose port 8000 for the API service
+# Expose port 8000 for API service
 ENV API_PORT 8000
 EXPOSE 8000
+
+# unset proxy
+ENV http_proxy=
+ENV https_proxy=
+
+# Reset pip config
+RUN pip config unset global.index-url && \
+    pip config unset global.extra-index-url
diff --git a/docker/docker-npu/docker-compose.yml b/docker/docker-npu/docker-compose.yml
index dd9a8e17..50664e2e 100644
--- a/docker/docker-npu/docker-compose.yml
+++ b/docker/docker-npu/docker-compose.yml
@@ -4,8 +4,8 @@ services:
       dockerfile: ./docker/docker-npu/Dockerfile
       context: ../..
       args:
-        INSTALL_DEEPSPEED: "false"
         PIP_INDEX: https://pypi.org/simple
+        EXTRAS: metrics
     container_name: llamafactory
     volumes:
       - ../../hf_cache:/root/.cache/huggingface
@@ -22,7 +22,7 @@ services:
       - "8000:8000"
     ipc: host
     tty: true
-    shm_size: "16gb"
+    # shm_size: "16gb"  # ipc: host is set
     stdin_open: true
     command: bash
     devices:
diff --git a/docker/docker-rocm/Dockerfile b/docker/docker-rocm/Dockerfile
index 9595bafa..a552abfb 100644
--- a/docker/docker-rocm/Dockerfile
+++ b/docker/docker-rocm/Dockerfile
@@ -1,21 +1,23 @@
-FROM hardandheavy/transformers-rocm:2.2.0
+# Installation arguments
+ARG BASE_IMAGE=rocm/pytorch:rocm6.4.1_ubuntu22.04_py3.10_pytorch_release_2.6.0
+ARG PIP_INDEX=https://pypi.org/simple
+ARG EXTRAS=metrics
+ARG INSTALL_FLASHATTN=false
+ARG HTTP_PROXY=""
+ARG PYTORCH_INDEX=https://download.pytorch.org/whl/rocm6.3
+
+# https://hub.docker.com/r/rocm/pytorch/tags
+FROM "${BASE_IMAGE}"
 
 # Define environments
-ENV MAX_JOBS=4
+ENV MAX_JOBS=16
 ENV FLASH_ATTENTION_FORCE_BUILD=TRUE
 ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
-
-# Define installation arguments
-ARG INSTALL_BNB=false
-ARG INSTALL_VLLM=false
-ARG INSTALL_DEEPSPEED=false
-ARG INSTALL_FLASHATTN=false
-ARG INSTALL_LIGER_KERNEL=false
-ARG INSTALL_HQQ=false
-ARG INSTALL_PYTORCH=true
-ARG PIP_INDEX=https://pypi.org/simple
-ARG HTTP_PROXY=
-ARG PYTORCH_INDEX=https://download.pytorch.org/whl/nightly/rocm6.3
+ENV DEBIAN_FRONTEND=noninteractive
+ENV NODE_OPTIONS=""
+ENV PIP_ROOT_USER_ACTION=ignore
+ENV http_proxy="${HTTP_PROXY}"
+ENV https_proxy="${HTTP_PROXY}"
 
 # Use Bash instead of default /bin/sh
 SHELL ["/bin/bash", "-c"]
@@ -23,83 +25,47 @@ SHELL ["/bin/bash", "-c"]
 # Set the working directory
 WORKDIR /app
 
-# Set http proxy
-RUN if [ -n "$HTTP_PROXY" ]; then \
-        echo "Configuring proxy..."; \
-        export http_proxy=$HTTP_PROXY; \
-        export https_proxy=$HTTP_PROXY; \
-    fi
+# Change pip source
+RUN pip config set global.index-url "${PIP_INDEX}" && \
+    pip config set global.extra-index-url "${PIP_INDEX}" && \
+    python -m pip install --upgrade pip
+
+# Reinstall pytorch rocm
+RUN pip uninstall -y torch torchvision torchaudio && \
+    pip install --pre torch torchvision torchaudio --index-url "${PYTORCH_INDEX}"
 
 # Install the requirements
 COPY requirements.txt /app
-RUN pip config set global.index-url "$PIP_INDEX" && \
-    pip config set global.extra-index-url "$PIP_INDEX" && \
-    python -m pip install --upgrade pip && \
-    if [ -n "$HTTP_PROXY" ]; then \
-        python -m pip install --proxy=$HTTP_PROXY -r requirements.txt; \
-    else \
-        python -m pip install -r requirements.txt; \
-    fi
+RUN pip install --no-cache-dir -r requirements.txt
 
 # Copy the rest of the application into the image
 COPY . /app
 
-# Install the LLaMA Factory
-RUN EXTRA_PACKAGES="metrics"; \
-    if [ "$INSTALL_BNB" == "true" ]; then \
-        EXTRA_PACKAGES="${EXTRA_PACKAGES},bitsandbytes"; \
-    fi; \
-    if [ "$INSTALL_VLLM" == "true" ]; then \
-        EXTRA_PACKAGES="${EXTRA_PACKAGES},vllm"; \
-    fi; \
-    if [ "$INSTALL_DEEPSPEED" == "true" ]; then \
-        EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \
-    fi; \
-    if [ "$INSTALL_LIGER_KERNEL" == "true" ]; then \
-        EXTRA_PACKAGES="${EXTRA_PACKAGES},liger-kernel"; \
-    fi; \
-    if [ "$INSTALL_HQQ" == "true" ]; then \
-        EXTRA_PACKAGES="${EXTRA_PACKAGES},hqq"; \
-    fi; \
-    if [ -n "$HTTP_PROXY" ]; then \
-        pip install --proxy=$HTTP_PROXY -e ".[$EXTRA_PACKAGES]"; \
-    else \
-        pip install -e ".[$EXTRA_PACKAGES]"; \
-    fi
-
-# Reinstall pytorch
-# This is necessary to ensure that the correct version of PyTorch is installed
-RUN if [ "$INSTALL_PYTORCH" == "true" ]; then \
-        pip uninstall -y torch torchvision torchaudio && \
-        pip install --pre torch torchvision torchaudio --index-url "$PYTORCH_INDEX"; \
-    fi
+# Install LLaMA Factory
+RUN pip install --no-cache-dir -e ".[${EXTRAS}]" --no-build-isolation
 
 # Rebuild flash attention
-RUN pip uninstall -y transformer-engine flash-attn && \
-    if [ "$INSTALL_FLASHATTN" == "true" ]; then \
+RUN if [ "$INSTALL_FLASHATTN" == "true" ]; then \
         pip uninstall -y ninja && \
-        if [ -n "$HTTP_PROXY" ]; then \
-            pip install --proxy=$HTTP_PROXY ninja && \
-            pip install --proxy=$HTTP_PROXY --no-cache-dir flash-attn --no-build-isolation; \
-        else \
-            pip install ninja && \
-            pip install --no-cache-dir flash-attn --no-build-isolation; \
-        fi; \
-    fi
-
-# Unset http proxy
-RUN if [ -n "$HTTP_PROXY" ]; then \
-        unset http_proxy; \
-        unset https_proxy; \
+        pip install --no-cache-dir ninja && \
+        pip install --no-cache-dir flash-attn --no-build-isolation; \
     fi
 
 # Set up volumes
-VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/app/data", "/app/output" ]
+VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/root/.cache/openmind", "/app/data", "/app/output" ]
 
-# Expose port 7860 for the LLaMA Board
+# Expose port 7860 for LLaMA Board
 ENV GRADIO_SERVER_PORT 7860
 EXPOSE 7860
 
-# Expose port 8000 for the API service
+# Expose port 8000 for API service
 ENV API_PORT 8000
 EXPOSE 8000
+
+# unset proxy
+ENV http_proxy=
+ENV https_proxy=
+
+# Reset pip config
+RUN pip config unset global.index-url && \
+    pip config unset global.extra-index-url
diff --git a/docker/docker-rocm/docker-compose.yml b/docker/docker-rocm/docker-compose.yml
index caaf4e16..17786e7c 100644
--- a/docker/docker-rocm/docker-compose.yml
+++ b/docker/docker-rocm/docker-compose.yml
@@ -4,15 +4,8 @@ services:
       dockerfile: ./docker/docker-rocm/Dockerfile
       context: ../..
       args:
-        INSTALL_BNB: "false"
-        INSTALL_VLLM: "false"
-        INSTALL_DEEPSPEED: "false"
-        INSTALL_FLASHATTN: "false"
-        INSTALL_LIGER_KERNEL: "false"
-        INSTALL_PYTORCH: "true"
-        INSTALL_HQQ: "false"
         PIP_INDEX: https://pypi.org/simple
-        PYTORCH_INDEX: https://download.pytorch.org/whl/nightly/rocm6.3
+        EXTRAS: metrics
     container_name: llamafactory
     volumes:
       - ../../hf_cache:/root/.cache/huggingface
@@ -20,13 +13,12 @@ services:
       - ../../om_cache:/root/.cache/openmind
       - ../../data:/app/data
       - ../../output:/app/output
-      - ../../saves:/app/saves
     ports:
       - "7860:7860"
       - "8000:8000"
     ipc: host
     tty: true
-    shm_size: "16gb"
+    # shm_size: "16gb"  # ipc: host is set
     stdin_open: true
     command: bash
     devices:
diff --git a/setup.py b/setup.py
index 6066a957..a7fc467c 100644
--- a/setup.py
+++ b/setup.py
@@ -52,7 +52,7 @@ extra_require = {
     "eetq": ["eetq"],
     "gptq": ["optimum>=1.24.0", "gptqmodel>=2.0.0"],
     "aqlm": ["aqlm[gpu]>=1.1.0"],
-    "vllm": ["vllm>=0.4.3,<=0.8.5"],
+    "vllm": ["vllm>=0.4.3,<=0.8.6"],
     "sglang": ["sglang[srt]>=0.4.5", "transformers==4.51.1"],
     "galore": ["galore-torch"],
     "apollo": ["apollo-torch"],