mirror of
				https://github.com/hiyouga/LLaMA-Factory.git
				synced 2025-11-04 18:02:19 +08:00 
			
		
		
		
	[assets] update docker files (#8176)
This commit is contained in:
		
							parent
							
								
									46ccf84aaa
								
							
						
					
					
						commit
						00974a3169
					
				
							
								
								
									
										2
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							@ -166,8 +166,8 @@ cython_debug/
 | 
			
		||||
uv.lock
 | 
			
		||||
 | 
			
		||||
# custom .gitignore
 | 
			
		||||
ms_cache/
 | 
			
		||||
hf_cache/
 | 
			
		||||
ms_cache/
 | 
			
		||||
om_cache/
 | 
			
		||||
cache/
 | 
			
		||||
config/
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										24
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										24
									
								
								README.md
									
									
									
									
									
								
							@ -663,14 +663,11 @@ For CUDA users:
 | 
			
		||||
 | 
			
		||||
```bash
 | 
			
		||||
docker build -f ./docker/docker-cuda/Dockerfile \
 | 
			
		||||
    --build-arg INSTALL_BNB=false \
 | 
			
		||||
    --build-arg INSTALL_VLLM=false \
 | 
			
		||||
    --build-arg INSTALL_DEEPSPEED=false \
 | 
			
		||||
    --build-arg INSTALL_FLASHATTN=false \
 | 
			
		||||
    --build-arg PIP_INDEX=https://pypi.org/simple \
 | 
			
		||||
    --build-arg EXTRAS=metrics \
 | 
			
		||||
    -t llamafactory:latest .
 | 
			
		||||
 | 
			
		||||
docker run -dit --gpus=all \
 | 
			
		||||
docker run -dit --ipc=host --gpus=all \
 | 
			
		||||
    -v ./hf_cache:/root/.cache/huggingface \
 | 
			
		||||
    -v ./ms_cache:/root/.cache/modelscope \
 | 
			
		||||
    -v ./om_cache:/root/.cache/openmind \
 | 
			
		||||
@ -678,7 +675,6 @@ docker run -dit --gpus=all \
 | 
			
		||||
    -v ./output:/app/output \
 | 
			
		||||
    -p 7860:7860 \
 | 
			
		||||
    -p 8000:8000 \
 | 
			
		||||
    --shm-size 16G \
 | 
			
		||||
    --name llamafactory \
 | 
			
		||||
    llamafactory:latest
 | 
			
		||||
 | 
			
		||||
@ -688,14 +684,12 @@ docker exec -it llamafactory bash
 | 
			
		||||
For Ascend NPU users:
 | 
			
		||||
 | 
			
		||||
```bash
 | 
			
		||||
# Choose docker image upon your environment
 | 
			
		||||
docker build -f ./docker/docker-npu/Dockerfile \
 | 
			
		||||
    --build-arg INSTALL_DEEPSPEED=false \
 | 
			
		||||
    --build-arg PIP_INDEX=https://pypi.org/simple \
 | 
			
		||||
    --build-arg EXTRAS=metrics \
 | 
			
		||||
    -t llamafactory:latest .
 | 
			
		||||
 | 
			
		||||
# Change `device` upon your resources
 | 
			
		||||
docker run -dit \
 | 
			
		||||
docker run -dit --ipc=host \
 | 
			
		||||
    -v ./hf_cache:/root/.cache/huggingface \
 | 
			
		||||
    -v ./ms_cache:/root/.cache/modelscope \
 | 
			
		||||
    -v ./om_cache:/root/.cache/openmind \
 | 
			
		||||
@ -711,7 +705,6 @@ docker run -dit \
 | 
			
		||||
    --device /dev/davinci_manager \
 | 
			
		||||
    --device /dev/devmm_svm \
 | 
			
		||||
    --device /dev/hisi_hdc \
 | 
			
		||||
    --shm-size 16G \
 | 
			
		||||
    --name llamafactory \
 | 
			
		||||
    llamafactory:latest
 | 
			
		||||
 | 
			
		||||
@ -722,25 +715,20 @@ For AMD ROCm users:
 | 
			
		||||
 | 
			
		||||
```bash
 | 
			
		||||
docker build -f ./docker/docker-rocm/Dockerfile \
 | 
			
		||||
    --build-arg INSTALL_BNB=false \
 | 
			
		||||
    --build-arg INSTALL_VLLM=false \
 | 
			
		||||
    --build-arg INSTALL_DEEPSPEED=false \
 | 
			
		||||
    --build-arg INSTALL_FLASHATTN=false \
 | 
			
		||||
    --build-arg PIP_INDEX=https://pypi.org/simple \
 | 
			
		||||
    --build-arg EXTRAS=metrics \
 | 
			
		||||
    -t llamafactory:latest .
 | 
			
		||||
 | 
			
		||||
docker run -dit \
 | 
			
		||||
docker run -dit --ipc=host \
 | 
			
		||||
    -v ./hf_cache:/root/.cache/huggingface \
 | 
			
		||||
    -v ./ms_cache:/root/.cache/modelscope \
 | 
			
		||||
    -v ./om_cache:/root/.cache/openmind \
 | 
			
		||||
    -v ./data:/app/data \
 | 
			
		||||
    -v ./output:/app/output \
 | 
			
		||||
    -v ./saves:/app/saves \
 | 
			
		||||
    -p 7860:7860 \
 | 
			
		||||
    -p 8000:8000 \
 | 
			
		||||
    --device /dev/kfd \
 | 
			
		||||
    --device /dev/dri \
 | 
			
		||||
    --shm-size 16G \
 | 
			
		||||
    --name llamafactory \
 | 
			
		||||
    llamafactory:latest
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										24
									
								
								README_zh.md
									
									
									
									
									
								
							
							
						
						
									
										24
									
								
								README_zh.md
									
									
									
									
									
								
							@ -665,14 +665,11 @@ CUDA 用户:
 | 
			
		||||
 | 
			
		||||
```bash
 | 
			
		||||
docker build -f ./docker/docker-cuda/Dockerfile \
 | 
			
		||||
    --build-arg INSTALL_BNB=false \
 | 
			
		||||
    --build-arg INSTALL_VLLM=false \
 | 
			
		||||
    --build-arg INSTALL_DEEPSPEED=false \
 | 
			
		||||
    --build-arg INSTALL_FLASHATTN=false \
 | 
			
		||||
    --build-arg PIP_INDEX=https://pypi.org/simple \
 | 
			
		||||
    --build-arg EXTRAS=metrics \
 | 
			
		||||
    -t llamafactory:latest .
 | 
			
		||||
 | 
			
		||||
docker run -dit --gpus=all \
 | 
			
		||||
docker run -dit --ipc=host --gpus=all \
 | 
			
		||||
    -v ./hf_cache:/root/.cache/huggingface \
 | 
			
		||||
    -v ./ms_cache:/root/.cache/modelscope \
 | 
			
		||||
    -v ./om_cache:/root/.cache/openmind \
 | 
			
		||||
@ -680,7 +677,6 @@ docker run -dit --gpus=all \
 | 
			
		||||
    -v ./output:/app/output \
 | 
			
		||||
    -p 7860:7860 \
 | 
			
		||||
    -p 8000:8000 \
 | 
			
		||||
    --shm-size 16G \
 | 
			
		||||
    --name llamafactory \
 | 
			
		||||
    llamafactory:latest
 | 
			
		||||
 | 
			
		||||
@ -690,14 +686,12 @@ docker exec -it llamafactory bash
 | 
			
		||||
昇腾 NPU 用户:
 | 
			
		||||
 | 
			
		||||
```bash
 | 
			
		||||
# 根据您的环境选择镜像
 | 
			
		||||
docker build -f ./docker/docker-npu/Dockerfile \
 | 
			
		||||
    --build-arg INSTALL_DEEPSPEED=false \
 | 
			
		||||
    --build-arg PIP_INDEX=https://pypi.org/simple \
 | 
			
		||||
    --build-arg EXTRAS=metrics \
 | 
			
		||||
    -t llamafactory:latest .
 | 
			
		||||
 | 
			
		||||
# 根据您的资源更改 `device`
 | 
			
		||||
docker run -dit \
 | 
			
		||||
docker run -dit --ipc=host \
 | 
			
		||||
    -v ./hf_cache:/root/.cache/huggingface \
 | 
			
		||||
    -v ./ms_cache:/root/.cache/modelscope \
 | 
			
		||||
    -v ./om_cache:/root/.cache/openmind \
 | 
			
		||||
@ -713,7 +707,6 @@ docker run -dit \
 | 
			
		||||
    --device /dev/davinci_manager \
 | 
			
		||||
    --device /dev/devmm_svm \
 | 
			
		||||
    --device /dev/hisi_hdc \
 | 
			
		||||
    --shm-size 16G \
 | 
			
		||||
    --name llamafactory \
 | 
			
		||||
    llamafactory:latest
 | 
			
		||||
 | 
			
		||||
@ -724,25 +717,20 @@ AMD ROCm 用户:
 | 
			
		||||
 | 
			
		||||
```bash
 | 
			
		||||
docker build -f ./docker/docker-rocm/Dockerfile \
 | 
			
		||||
    --build-arg INSTALL_BNB=false \
 | 
			
		||||
    --build-arg INSTALL_VLLM=false \
 | 
			
		||||
    --build-arg INSTALL_DEEPSPEED=false \
 | 
			
		||||
    --build-arg INSTALL_FLASHATTN=false \
 | 
			
		||||
    --build-arg PIP_INDEX=https://pypi.org/simple \
 | 
			
		||||
    --build-arg EXTRAS=metrics \
 | 
			
		||||
    -t llamafactory:latest .
 | 
			
		||||
 | 
			
		||||
docker run -dit \
 | 
			
		||||
docker run -dit --ipc=host \
 | 
			
		||||
    -v ./hf_cache:/root/.cache/huggingface \
 | 
			
		||||
    -v ./ms_cache:/root/.cache/modelscope \
 | 
			
		||||
    -v ./om_cache:/root/.cache/openmind \
 | 
			
		||||
    -v ./data:/app/data \
 | 
			
		||||
    -v ./output:/app/output \
 | 
			
		||||
    -v ./saves:/app/saves \
 | 
			
		||||
    -p 7860:7860 \
 | 
			
		||||
    -p 8000:8000 \
 | 
			
		||||
    --device /dev/kfd \
 | 
			
		||||
    --device /dev/dri \
 | 
			
		||||
    --shm-size 16G \
 | 
			
		||||
    --name llamafactory \
 | 
			
		||||
    llamafactory:latest
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -1,101 +1,63 @@
 | 
			
		||||
# Default use the NVIDIA official image with PyTorch 2.6.0
 | 
			
		||||
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/index.html
 | 
			
		||||
ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:24.12-py3
 | 
			
		||||
FROM ${BASE_IMAGE}
 | 
			
		||||
# Installation arguments
 | 
			
		||||
ARG BASE_IMAGE=hiyouga/pytorch:th2.6.0-cu124-flashattn2.7.4-cxx11abi0
 | 
			
		||||
ARG PIP_INDEX=https://pypi.org/simple
 | 
			
		||||
ARG EXTRAS=metrics
 | 
			
		||||
ARG INSTALL_FLASHATTN=false
 | 
			
		||||
ARG HTTP_PROXY=""
 | 
			
		||||
 | 
			
		||||
# https://hub.docker.com/r/hiyouga/pytorch/tags
 | 
			
		||||
FROM "${BASE_IMAGE}"
 | 
			
		||||
 | 
			
		||||
# Define environments
 | 
			
		||||
ENV MAX_JOBS=4
 | 
			
		||||
ENV MAX_JOBS=16
 | 
			
		||||
ENV FLASH_ATTENTION_FORCE_BUILD=TRUE
 | 
			
		||||
ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
 | 
			
		||||
 | 
			
		||||
# Define installation arguments
 | 
			
		||||
ARG INSTALL_BNB=false
 | 
			
		||||
ARG INSTALL_VLLM=false
 | 
			
		||||
ARG INSTALL_DEEPSPEED=false
 | 
			
		||||
ARG INSTALL_FLASHATTN=false
 | 
			
		||||
ARG INSTALL_LIGER_KERNEL=false
 | 
			
		||||
ARG INSTALL_HQQ=false
 | 
			
		||||
ARG INSTALL_EETQ=false
 | 
			
		||||
ARG PIP_INDEX=https://pypi.org/simple
 | 
			
		||||
ARG HTTP_PROXY=
 | 
			
		||||
ENV DEBIAN_FRONTEND=noninteractive
 | 
			
		||||
ENV NODE_OPTIONS=""
 | 
			
		||||
ENV PIP_ROOT_USER_ACTION=ignore
 | 
			
		||||
ENV http_proxy="${HTTP_PROXY}"
 | 
			
		||||
ENV https_proxy="${HTTP_PROXY}"
 | 
			
		||||
 | 
			
		||||
# Set the working directory
 | 
			
		||||
WORKDIR /app
 | 
			
		||||
 | 
			
		||||
# Set http proxy
 | 
			
		||||
RUN if [ -n "$HTTP_PROXY" ]; then \
 | 
			
		||||
        echo "Configuring proxy..."; \
 | 
			
		||||
        export http_proxy=$HTTP_PROXY; \
 | 
			
		||||
        export https_proxy=$HTTP_PROXY; \
 | 
			
		||||
    fi
 | 
			
		||||
# Change pip source
 | 
			
		||||
RUN pip config set global.index-url "${PIP_INDEX}" && \
 | 
			
		||||
    pip config set global.extra-index-url "${PIP_INDEX}" && \
 | 
			
		||||
    python -m pip install --upgrade pip
 | 
			
		||||
 | 
			
		||||
# Install the requirements
 | 
			
		||||
COPY requirements.txt /app
 | 
			
		||||
RUN pip config set global.index-url "$PIP_INDEX" && \
 | 
			
		||||
    pip config set global.extra-index-url "$PIP_INDEX" && \
 | 
			
		||||
    python -m pip install --upgrade pip && \
 | 
			
		||||
    if [ -n "$HTTP_PROXY" ]; then \
 | 
			
		||||
        python -m pip install --proxy=$HTTP_PROXY -r requirements.txt; \
 | 
			
		||||
    else \
 | 
			
		||||
        python -m pip install -r requirements.txt; \
 | 
			
		||||
    fi
 | 
			
		||||
RUN pip install --no-cache-dir -r requirements.txt
 | 
			
		||||
 | 
			
		||||
# Copy the rest of the application into the image
 | 
			
		||||
COPY . /app
 | 
			
		||||
 | 
			
		||||
# Install the LLaMA Factory
 | 
			
		||||
RUN EXTRA_PACKAGES="metrics"; \
 | 
			
		||||
    if [ "$INSTALL_BNB" == "true" ]; then \
 | 
			
		||||
        EXTRA_PACKAGES="${EXTRA_PACKAGES},bitsandbytes"; \
 | 
			
		||||
    fi; \
 | 
			
		||||
    if [ "$INSTALL_VLLM" == "true" ]; then \
 | 
			
		||||
        EXTRA_PACKAGES="${EXTRA_PACKAGES},vllm"; \
 | 
			
		||||
    fi; \
 | 
			
		||||
    if [ "$INSTALL_DEEPSPEED" == "true" ]; then \
 | 
			
		||||
        EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \
 | 
			
		||||
    fi; \
 | 
			
		||||
    if [ "$INSTALL_LIGER_KERNEL" == "true" ]; then \
 | 
			
		||||
        EXTRA_PACKAGES="${EXTRA_PACKAGES},liger-kernel"; \
 | 
			
		||||
    fi; \
 | 
			
		||||
    if [ "$INSTALL_HQQ" == "true" ]; then \
 | 
			
		||||
        EXTRA_PACKAGES="${EXTRA_PACKAGES},hqq"; \
 | 
			
		||||
    fi; \
 | 
			
		||||
    if [ "$INSTALL_EETQ" == "true" ]; then \
 | 
			
		||||
        EXTRA_PACKAGES="${EXTRA_PACKAGES},eetq"; \
 | 
			
		||||
    fi; \
 | 
			
		||||
    if [ -n "$HTTP_PROXY" ]; then \
 | 
			
		||||
        pip install --proxy=$HTTP_PROXY -e ".[$EXTRA_PACKAGES]"; \
 | 
			
		||||
    else \
 | 
			
		||||
        pip install -e ".[$EXTRA_PACKAGES]"; \
 | 
			
		||||
    fi
 | 
			
		||||
# Install LLaMA Factory
 | 
			
		||||
RUN pip install --no-cache-dir -e ".[${EXTRAS}]" --no-build-isolation
 | 
			
		||||
 | 
			
		||||
# Rebuild flash attention
 | 
			
		||||
RUN pip uninstall -y transformer-engine flash-attn && \
 | 
			
		||||
    if [ "$INSTALL_FLASHATTN" == "true" ]; then \
 | 
			
		||||
RUN if [ "$INSTALL_FLASHATTN" == "true" ]; then \
 | 
			
		||||
        pip uninstall -y ninja && \
 | 
			
		||||
        if [ -n "$HTTP_PROXY" ]; then \
 | 
			
		||||
            pip install --proxy=$HTTP_PROXY ninja && \
 | 
			
		||||
            pip install --proxy=$HTTP_PROXY --no-cache-dir flash-attn --no-build-isolation; \
 | 
			
		||||
        else \
 | 
			
		||||
            pip install ninja && \
 | 
			
		||||
            pip install --no-cache-dir flash-attn --no-build-isolation; \
 | 
			
		||||
        fi; \
 | 
			
		||||
    fi
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Unset http proxy
 | 
			
		||||
RUN if [ -n "$HTTP_PROXY" ]; then \
 | 
			
		||||
        unset http_proxy; \
 | 
			
		||||
        unset https_proxy; \
 | 
			
		||||
        pip install --no-cache-dir ninja && \
 | 
			
		||||
        pip install --no-cache-dir flash-attn --no-build-isolation; \
 | 
			
		||||
    fi
 | 
			
		||||
 | 
			
		||||
# Set up volumes
 | 
			
		||||
VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/app/data", "/app/output" ]
 | 
			
		||||
VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/root/.cache/openmind", "/app/data", "/app/output" ]
 | 
			
		||||
 | 
			
		||||
# Expose port 7860 for the LLaMA Board
 | 
			
		||||
# Expose port 7860 for LLaMA Board
 | 
			
		||||
ENV GRADIO_SERVER_PORT 7860
 | 
			
		||||
EXPOSE 7860
 | 
			
		||||
 | 
			
		||||
# Expose port 8000 for the API service
 | 
			
		||||
# Expose port 8000 for API service
 | 
			
		||||
ENV API_PORT 8000
 | 
			
		||||
EXPOSE 8000
 | 
			
		||||
 | 
			
		||||
# unset proxy
 | 
			
		||||
ENV http_proxy=
 | 
			
		||||
ENV https_proxy=
 | 
			
		||||
 | 
			
		||||
# Reset pip config
 | 
			
		||||
RUN pip config unset global.index-url && \
 | 
			
		||||
    pip config unset global.extra-index-url
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										55
									
								
								docker/docker-cuda/Dockerfile.base
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										55
									
								
								docker/docker-cuda/Dockerfile.base
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,55 @@
 | 
			
		||||
# Start from the pytorch official image (ubuntu-22.04 + cuda-12.4.1 + python-3.11)
 | 
			
		||||
# https://hub.docker.com/r/pytorch/pytorch/tags
 | 
			
		||||
FROM pytorch/pytorch:2.6.0-cuda12.4-cudnn9-runtime
 | 
			
		||||
 | 
			
		||||
# Define environments
 | 
			
		||||
ENV MAX_JOBS=16
 | 
			
		||||
ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
 | 
			
		||||
ENV DEBIAN_FRONTEND=noninteractive
 | 
			
		||||
ENV NODE_OPTIONS=""
 | 
			
		||||
ENV PIP_ROOT_USER_ACTION=ignore
 | 
			
		||||
 | 
			
		||||
# Define installation arguments
 | 
			
		||||
ARG APT_SOURCE=https://mirrors.tuna.tsinghua.edu.cn/ubuntu/
 | 
			
		||||
ARG PIP_INDEX=https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
 | 
			
		||||
 | 
			
		||||
# Set apt source
 | 
			
		||||
RUN cp /etc/apt/sources.list /etc/apt/sources.list.bak && \
 | 
			
		||||
    { \
 | 
			
		||||
    echo "deb ${APT_SOURCE} jammy main restricted universe multiverse"; \
 | 
			
		||||
    echo "deb ${APT_SOURCE} jammy-updates main restricted universe multiverse"; \
 | 
			
		||||
    echo "deb ${APT_SOURCE} jammy-backports main restricted universe multiverse"; \
 | 
			
		||||
    echo "deb ${APT_SOURCE} jammy-security main restricted universe multiverse"; \
 | 
			
		||||
    } > /etc/apt/sources.list
 | 
			
		||||
 | 
			
		||||
# Install systemctl and wget
 | 
			
		||||
RUN apt-get update && \
 | 
			
		||||
    apt-get install -y -o Dpkg::Options::="--force-confdef" systemd wget && \
 | 
			
		||||
    apt-get clean
 | 
			
		||||
 | 
			
		||||
# Install git and vim
 | 
			
		||||
RUN apt-get update && \
 | 
			
		||||
    apt-get install -y git vim && \
 | 
			
		||||
    apt-get clean
 | 
			
		||||
 | 
			
		||||
# Install gcc and g++
 | 
			
		||||
RUN apt-get update && \
 | 
			
		||||
    apt-get install -y gcc g++ && \
 | 
			
		||||
    apt-get clean
 | 
			
		||||
 | 
			
		||||
# Change pip source
 | 
			
		||||
RUN pip config set global.index-url "${PIP_INDEX}" && \
 | 
			
		||||
    pip config set global.extra-index-url "${PIP_INDEX}" && \
 | 
			
		||||
    python -m pip install --upgrade pip
 | 
			
		||||
 | 
			
		||||
# Install flash-attn-2.7.4.post1 (cxx11abi=False)
 | 
			
		||||
RUN wget -nv https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp311-cp311-linux_x86_64.whl && \
 | 
			
		||||
    pip install --no-cache-dir flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp311-cp311-linux_x86_64.whl
 | 
			
		||||
 | 
			
		||||
# Install flashinfer-0.2.2.post1+cu124 (cxx11abi=False)
 | 
			
		||||
RUN wget -nv https://github.com/flashinfer-ai/flashinfer/releases/download/v0.2.2.post1/flashinfer_python-0.2.2.post1+cu124torch2.6-cp38-abi3-linux_x86_64.whl && \
 | 
			
		||||
    pip install --no-cache-dir flashinfer_python-0.2.2.post1+cu124torch2.6-cp38-abi3-linux_x86_64.whl
 | 
			
		||||
 | 
			
		||||
# Reset pip config
 | 
			
		||||
RUN pip config unset global.index-url && \
 | 
			
		||||
    pip config unset global.extra-index-url
 | 
			
		||||
@ -4,14 +4,8 @@ services:
 | 
			
		||||
      dockerfile: ./docker/docker-cuda/Dockerfile
 | 
			
		||||
      context: ../..
 | 
			
		||||
      args:
 | 
			
		||||
        INSTALL_BNB: "false"
 | 
			
		||||
        INSTALL_VLLM: "false"
 | 
			
		||||
        INSTALL_DEEPSPEED: "false"
 | 
			
		||||
        INSTALL_FLASHATTN: "false"
 | 
			
		||||
        INSTALL_LIGER_KERNEL: "false"
 | 
			
		||||
        INSTALL_HQQ: "false"
 | 
			
		||||
        INSTALL_EETQ: "false"
 | 
			
		||||
        PIP_INDEX: https://pypi.org/simple
 | 
			
		||||
        EXTRAS: metrics
 | 
			
		||||
    container_name: llamafactory
 | 
			
		||||
    volumes:
 | 
			
		||||
      - ../../hf_cache:/root/.cache/huggingface
 | 
			
		||||
@ -24,7 +18,7 @@ services:
 | 
			
		||||
      - "8000:8000"
 | 
			
		||||
    ipc: host
 | 
			
		||||
    tty: true
 | 
			
		||||
    shm_size: "16gb"
 | 
			
		||||
    # shm_size: "16gb"  # ipc: host is set
 | 
			
		||||
    stdin_open: true
 | 
			
		||||
    command: bash
 | 
			
		||||
    deploy:
 | 
			
		||||
@ -33,5 +27,5 @@ services:
 | 
			
		||||
          devices:
 | 
			
		||||
          - driver: nvidia
 | 
			
		||||
            count: "all"
 | 
			
		||||
            capabilities: [gpu]
 | 
			
		||||
            capabilities: [ gpu ]
 | 
			
		||||
    restart: unless-stopped
 | 
			
		||||
 | 
			
		||||
@ -1,67 +1,56 @@
 | 
			
		||||
# Use the Ubuntu 22.04 image with CANN 8.0.rc1
 | 
			
		||||
# More versions can be found at https://hub.docker.com/r/ascendai/cann/tags
 | 
			
		||||
# FROM ascendai/cann:8.0.rc1-910-ubuntu22.04-py3.8
 | 
			
		||||
FROM ascendai/cann:8.0.0-910b-ubuntu22.04-py3.10
 | 
			
		||||
# FROM ascendai/cann:8.0.rc1-910-openeuler22.03-py3.8
 | 
			
		||||
# FROM ascendai/cann:8.0.rc1-910b-openeuler22.03-py3.8
 | 
			
		||||
# Installation arguments
 | 
			
		||||
ARG BASE_IMAGE=ascendai/cann:8.0.0-910b-ubuntu22.04-py3.11
 | 
			
		||||
ARG PIP_INDEX=https://pypi.org/simple
 | 
			
		||||
ARG EXTRAS=metrics
 | 
			
		||||
ARG INSTALL_FLASHATTN=false
 | 
			
		||||
ARG HTTP_PROXY=""
 | 
			
		||||
 | 
			
		||||
# https://hub.docker.com/r/ascendai/cann/tags
 | 
			
		||||
FROM "${BASE_IMAGE}"
 | 
			
		||||
 | 
			
		||||
# Define environments
 | 
			
		||||
ENV MAX_JOBS=16
 | 
			
		||||
ENV FLASH_ATTENTION_FORCE_BUILD=TRUE
 | 
			
		||||
ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
 | 
			
		||||
ENV DEBIAN_FRONTEND=noninteractive
 | 
			
		||||
 | 
			
		||||
# Define installation arguments
 | 
			
		||||
ARG INSTALL_DEEPSPEED=false
 | 
			
		||||
ARG PIP_INDEX=https://pypi.org/simple
 | 
			
		||||
ARG TORCH_INDEX=https://download.pytorch.org/whl/cpu
 | 
			
		||||
ARG HTTP_PROXY=
 | 
			
		||||
ENV NODE_OPTIONS=""
 | 
			
		||||
ENV PIP_ROOT_USER_ACTION=ignore
 | 
			
		||||
ENV http_proxy="${HTTP_PROXY}"
 | 
			
		||||
ENV https_proxy="${HTTP_PROXY}"
 | 
			
		||||
 | 
			
		||||
# Set the working directory
 | 
			
		||||
WORKDIR /app
 | 
			
		||||
 | 
			
		||||
# Set http proxy
 | 
			
		||||
RUN if [ -n "$HTTP_PROXY" ]; then \
 | 
			
		||||
        echo "Configuring proxy..."; \
 | 
			
		||||
        export http_proxy=$HTTP_PROXY; \
 | 
			
		||||
        export https_proxy=$HTTP_PROXY; \
 | 
			
		||||
    fi
 | 
			
		||||
# Change pip source
 | 
			
		||||
RUN pip config set global.index-url "${PIP_INDEX}" && \
 | 
			
		||||
    pip config set global.extra-index-url "${PIP_INDEX}" && \
 | 
			
		||||
    python -m pip install --upgrade pip
 | 
			
		||||
 | 
			
		||||
# Install the requirements
 | 
			
		||||
COPY requirements.txt /app
 | 
			
		||||
RUN pip config set global.index-url "$PIP_INDEX" && \
 | 
			
		||||
    pip config set global.extra-index-url "$TORCH_INDEX" && \
 | 
			
		||||
    python -m pip install --upgrade pip && \
 | 
			
		||||
    if [ -n "$HTTP_PROXY" ]; then \
 | 
			
		||||
        python -m pip install --proxy=$HTTP_PROXY -r requirements.txt; \
 | 
			
		||||
    else \
 | 
			
		||||
        python -m pip install -r requirements.txt; \
 | 
			
		||||
    fi
 | 
			
		||||
RUN pip install --no-cache-dir -r requirements.txt
 | 
			
		||||
 | 
			
		||||
# Copy the rest of the application into the image
 | 
			
		||||
COPY . /app
 | 
			
		||||
 | 
			
		||||
# Install the LLaMA Factory
 | 
			
		||||
RUN EXTRA_PACKAGES="torch-npu,metrics"; \
 | 
			
		||||
    if [ "$INSTALL_DEEPSPEED" == "true" ]; then \
 | 
			
		||||
        EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \
 | 
			
		||||
    fi; \
 | 
			
		||||
    if [ -n "$HTTP_PROXY" ]; then \
 | 
			
		||||
        pip install --proxy=$HTTP_PROXY -e ".[$EXTRA_PACKAGES]"; \
 | 
			
		||||
    else \
 | 
			
		||||
        pip install -e ".[$EXTRA_PACKAGES]"; \
 | 
			
		||||
    fi
 | 
			
		||||
 | 
			
		||||
# Unset http proxy
 | 
			
		||||
RUN if [ -n "$HTTP_PROXY" ]; then \
 | 
			
		||||
        unset http_proxy; \
 | 
			
		||||
        unset https_proxy; \
 | 
			
		||||
    fi
 | 
			
		||||
# Install LLaMA Factory
 | 
			
		||||
RUN pip install --no-cache-dir -e ".[${EXTRAS}]" --no-build-isolation
 | 
			
		||||
 | 
			
		||||
# Set up volumes
 | 
			
		||||
VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/app/data", "/app/output" ]
 | 
			
		||||
VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/root/.cache/openmind", "/app/data", "/app/output" ]
 | 
			
		||||
 | 
			
		||||
# Expose port 7860 for the LLaMA Board
 | 
			
		||||
# Expose port 7860 for LLaMA Board
 | 
			
		||||
ENV GRADIO_SERVER_PORT 7860
 | 
			
		||||
EXPOSE 7860
 | 
			
		||||
 | 
			
		||||
# Expose port 8000 for the API service
 | 
			
		||||
# Expose port 8000 for API service
 | 
			
		||||
ENV API_PORT 8000
 | 
			
		||||
EXPOSE 8000
 | 
			
		||||
 | 
			
		||||
# unset proxy
 | 
			
		||||
ENV http_proxy=
 | 
			
		||||
ENV https_proxy=
 | 
			
		||||
 | 
			
		||||
# Reset pip config
 | 
			
		||||
RUN pip config unset global.index-url && \
 | 
			
		||||
    pip config unset global.extra-index-url
 | 
			
		||||
 | 
			
		||||
@ -4,8 +4,8 @@ services:
 | 
			
		||||
      dockerfile: ./docker/docker-npu/Dockerfile
 | 
			
		||||
      context: ../..
 | 
			
		||||
      args:
 | 
			
		||||
        INSTALL_DEEPSPEED: "false"
 | 
			
		||||
        PIP_INDEX: https://pypi.org/simple
 | 
			
		||||
        EXTRAS: metrics
 | 
			
		||||
    container_name: llamafactory
 | 
			
		||||
    volumes:
 | 
			
		||||
      - ../../hf_cache:/root/.cache/huggingface
 | 
			
		||||
@ -22,7 +22,7 @@ services:
 | 
			
		||||
      - "8000:8000"
 | 
			
		||||
    ipc: host
 | 
			
		||||
    tty: true
 | 
			
		||||
    shm_size: "16gb"
 | 
			
		||||
    # shm_size: "16gb"  # ipc: host is set
 | 
			
		||||
    stdin_open: true
 | 
			
		||||
    command: bash
 | 
			
		||||
    devices:
 | 
			
		||||
 | 
			
		||||
@ -1,21 +1,23 @@
 | 
			
		||||
FROM hardandheavy/transformers-rocm:2.2.0
 | 
			
		||||
# Installation arguments
 | 
			
		||||
ARG BASE_IMAGE=rocm/pytorch:rocm6.4.1_ubuntu22.04_py3.10_pytorch_release_2.6.0
 | 
			
		||||
ARG PIP_INDEX=https://pypi.org/simple
 | 
			
		||||
ARG EXTRAS=metrics
 | 
			
		||||
ARG INSTALL_FLASHATTN=false
 | 
			
		||||
ARG HTTP_PROXY=""
 | 
			
		||||
ARG PYTORCH_INDEX=https://download.pytorch.org/whl/rocm6.3
 | 
			
		||||
 | 
			
		||||
# https://hub.docker.com/r/rocm/pytorch/tags
 | 
			
		||||
FROM "${BASE_IMAGE}"
 | 
			
		||||
 | 
			
		||||
# Define environments
 | 
			
		||||
ENV MAX_JOBS=4
 | 
			
		||||
ENV MAX_JOBS=16
 | 
			
		||||
ENV FLASH_ATTENTION_FORCE_BUILD=TRUE
 | 
			
		||||
ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
 | 
			
		||||
 | 
			
		||||
# Define installation arguments
 | 
			
		||||
ARG INSTALL_BNB=false
 | 
			
		||||
ARG INSTALL_VLLM=false
 | 
			
		||||
ARG INSTALL_DEEPSPEED=false
 | 
			
		||||
ARG INSTALL_FLASHATTN=false
 | 
			
		||||
ARG INSTALL_LIGER_KERNEL=false
 | 
			
		||||
ARG INSTALL_HQQ=false
 | 
			
		||||
ARG INSTALL_PYTORCH=true
 | 
			
		||||
ARG PIP_INDEX=https://pypi.org/simple
 | 
			
		||||
ARG HTTP_PROXY=
 | 
			
		||||
ARG PYTORCH_INDEX=https://download.pytorch.org/whl/nightly/rocm6.3
 | 
			
		||||
ENV DEBIAN_FRONTEND=noninteractive
 | 
			
		||||
ENV NODE_OPTIONS=""
 | 
			
		||||
ENV PIP_ROOT_USER_ACTION=ignore
 | 
			
		||||
ENV http_proxy="${HTTP_PROXY}"
 | 
			
		||||
ENV https_proxy="${HTTP_PROXY}"
 | 
			
		||||
 | 
			
		||||
# Use Bash instead of default /bin/sh
 | 
			
		||||
SHELL ["/bin/bash", "-c"]
 | 
			
		||||
@ -23,83 +25,47 @@ SHELL ["/bin/bash", "-c"]
 | 
			
		||||
# Set the working directory
 | 
			
		||||
WORKDIR /app
 | 
			
		||||
 | 
			
		||||
# Set http proxy
 | 
			
		||||
RUN if [ -n "$HTTP_PROXY" ]; then \
 | 
			
		||||
        echo "Configuring proxy..."; \
 | 
			
		||||
        export http_proxy=$HTTP_PROXY; \
 | 
			
		||||
        export https_proxy=$HTTP_PROXY; \
 | 
			
		||||
    fi
 | 
			
		||||
# Change pip source
 | 
			
		||||
RUN pip config set global.index-url "${PIP_INDEX}" && \
 | 
			
		||||
    pip config set global.extra-index-url "${PIP_INDEX}" && \
 | 
			
		||||
    python -m pip install --upgrade pip
 | 
			
		||||
 | 
			
		||||
# Reinstall pytorch rocm
 | 
			
		||||
RUN pip uninstall -y torch torchvision torchaudio && \
 | 
			
		||||
    pip install --pre torch torchvision torchaudio --index-url "${PYTORCH_INDEX}"
 | 
			
		||||
 | 
			
		||||
# Install the requirements
 | 
			
		||||
COPY requirements.txt /app
 | 
			
		||||
RUN pip config set global.index-url "$PIP_INDEX" && \
 | 
			
		||||
    pip config set global.extra-index-url "$PIP_INDEX" && \
 | 
			
		||||
    python -m pip install --upgrade pip && \
 | 
			
		||||
    if [ -n "$HTTP_PROXY" ]; then \
 | 
			
		||||
        python -m pip install --proxy=$HTTP_PROXY -r requirements.txt; \
 | 
			
		||||
    else \
 | 
			
		||||
        python -m pip install -r requirements.txt; \
 | 
			
		||||
    fi
 | 
			
		||||
RUN pip install --no-cache-dir -r requirements.txt
 | 
			
		||||
 | 
			
		||||
# Copy the rest of the application into the image
 | 
			
		||||
COPY . /app
 | 
			
		||||
 | 
			
		||||
# Install the LLaMA Factory
 | 
			
		||||
RUN EXTRA_PACKAGES="metrics"; \
 | 
			
		||||
    if [ "$INSTALL_BNB" == "true" ]; then \
 | 
			
		||||
        EXTRA_PACKAGES="${EXTRA_PACKAGES},bitsandbytes"; \
 | 
			
		||||
    fi; \
 | 
			
		||||
    if [ "$INSTALL_VLLM" == "true" ]; then \
 | 
			
		||||
        EXTRA_PACKAGES="${EXTRA_PACKAGES},vllm"; \
 | 
			
		||||
    fi; \
 | 
			
		||||
    if [ "$INSTALL_DEEPSPEED" == "true" ]; then \
 | 
			
		||||
        EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \
 | 
			
		||||
    fi; \
 | 
			
		||||
    if [ "$INSTALL_LIGER_KERNEL" == "true" ]; then \
 | 
			
		||||
        EXTRA_PACKAGES="${EXTRA_PACKAGES},liger-kernel"; \
 | 
			
		||||
    fi; \
 | 
			
		||||
    if [ "$INSTALL_HQQ" == "true" ]; then \
 | 
			
		||||
        EXTRA_PACKAGES="${EXTRA_PACKAGES},hqq"; \
 | 
			
		||||
    fi; \
 | 
			
		||||
    if [ -n "$HTTP_PROXY" ]; then \
 | 
			
		||||
        pip install --proxy=$HTTP_PROXY -e ".[$EXTRA_PACKAGES]"; \
 | 
			
		||||
    else \
 | 
			
		||||
        pip install -e ".[$EXTRA_PACKAGES]"; \
 | 
			
		||||
    fi
 | 
			
		||||
 | 
			
		||||
# Reinstall pytorch
 | 
			
		||||
# This is necessary to ensure that the correct version of PyTorch is installed
 | 
			
		||||
RUN if [ "$INSTALL_PYTORCH" == "true" ]; then \
 | 
			
		||||
        pip uninstall -y torch torchvision torchaudio && \
 | 
			
		||||
        pip install --pre torch torchvision torchaudio --index-url "$PYTORCH_INDEX"; \
 | 
			
		||||
    fi
 | 
			
		||||
# Install LLaMA Factory
 | 
			
		||||
RUN pip install --no-cache-dir -e ".[${EXTRAS}]" --no-build-isolation
 | 
			
		||||
 | 
			
		||||
# Rebuild flash attention
 | 
			
		||||
RUN pip uninstall -y transformer-engine flash-attn && \
 | 
			
		||||
    if [ "$INSTALL_FLASHATTN" == "true" ]; then \
 | 
			
		||||
RUN if [ "$INSTALL_FLASHATTN" == "true" ]; then \
 | 
			
		||||
        pip uninstall -y ninja && \
 | 
			
		||||
        if [ -n "$HTTP_PROXY" ]; then \
 | 
			
		||||
            pip install --proxy=$HTTP_PROXY ninja && \
 | 
			
		||||
            pip install --proxy=$HTTP_PROXY --no-cache-dir flash-attn --no-build-isolation; \
 | 
			
		||||
        else \
 | 
			
		||||
            pip install ninja && \
 | 
			
		||||
            pip install --no-cache-dir flash-attn --no-build-isolation; \
 | 
			
		||||
        fi; \
 | 
			
		||||
    fi
 | 
			
		||||
 | 
			
		||||
# Unset http proxy
 | 
			
		||||
RUN if [ -n "$HTTP_PROXY" ]; then \
 | 
			
		||||
        unset http_proxy; \
 | 
			
		||||
        unset https_proxy; \
 | 
			
		||||
        pip install --no-cache-dir ninja && \
 | 
			
		||||
        pip install --no-cache-dir flash-attn --no-build-isolation; \
 | 
			
		||||
    fi
 | 
			
		||||
 | 
			
		||||
# Set up volumes
 | 
			
		||||
VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/app/data", "/app/output" ]
 | 
			
		||||
VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/root/.cache/openmind", "/app/data", "/app/output" ]
 | 
			
		||||
 | 
			
		||||
# Expose port 7860 for the LLaMA Board
 | 
			
		||||
# Expose port 7860 for LLaMA Board
 | 
			
		||||
ENV GRADIO_SERVER_PORT 7860
 | 
			
		||||
EXPOSE 7860
 | 
			
		||||
 | 
			
		||||
# Expose port 8000 for the API service
 | 
			
		||||
# Expose port 8000 for API service
 | 
			
		||||
ENV API_PORT 8000
 | 
			
		||||
EXPOSE 8000
 | 
			
		||||
 | 
			
		||||
# unset proxy
 | 
			
		||||
ENV http_proxy=
 | 
			
		||||
ENV https_proxy=
 | 
			
		||||
 | 
			
		||||
# Reset pip config
 | 
			
		||||
RUN pip config unset global.index-url && \
 | 
			
		||||
    pip config unset global.extra-index-url
 | 
			
		||||
 | 
			
		||||
@ -4,15 +4,8 @@ services:
 | 
			
		||||
      dockerfile: ./docker/docker-rocm/Dockerfile
 | 
			
		||||
      context: ../..
 | 
			
		||||
      args:
 | 
			
		||||
        INSTALL_BNB: "false"
 | 
			
		||||
        INSTALL_VLLM: "false"
 | 
			
		||||
        INSTALL_DEEPSPEED: "false"
 | 
			
		||||
        INSTALL_FLASHATTN: "false"
 | 
			
		||||
        INSTALL_LIGER_KERNEL: "false"
 | 
			
		||||
        INSTALL_PYTORCH: "true"
 | 
			
		||||
        INSTALL_HQQ: "false"
 | 
			
		||||
        PIP_INDEX: https://pypi.org/simple
 | 
			
		||||
        PYTORCH_INDEX: https://download.pytorch.org/whl/nightly/rocm6.3
 | 
			
		||||
        EXTRAS: metrics
 | 
			
		||||
    container_name: llamafactory
 | 
			
		||||
    volumes:
 | 
			
		||||
      - ../../hf_cache:/root/.cache/huggingface
 | 
			
		||||
@ -20,13 +13,12 @@ services:
 | 
			
		||||
      - ../../om_cache:/root/.cache/openmind
 | 
			
		||||
      - ../../data:/app/data
 | 
			
		||||
      - ../../output:/app/output
 | 
			
		||||
      - ../../saves:/app/saves
 | 
			
		||||
    ports:
 | 
			
		||||
      - "7860:7860"
 | 
			
		||||
      - "8000:8000"
 | 
			
		||||
    ipc: host
 | 
			
		||||
    tty: true
 | 
			
		||||
    shm_size: "16gb"
 | 
			
		||||
    # shm_size: "16gb"  # ipc: host is set
 | 
			
		||||
    stdin_open: true
 | 
			
		||||
    command: bash
 | 
			
		||||
    devices:
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										2
									
								
								setup.py
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								setup.py
									
									
									
									
									
								
							@ -52,7 +52,7 @@ extra_require = {
 | 
			
		||||
    "eetq": ["eetq"],
 | 
			
		||||
    "gptq": ["optimum>=1.24.0", "gptqmodel>=2.0.0"],
 | 
			
		||||
    "aqlm": ["aqlm[gpu]>=1.1.0"],
 | 
			
		||||
    "vllm": ["vllm>=0.4.3,<=0.8.5"],
 | 
			
		||||
    "vllm": ["vllm>=0.4.3,<=0.8.6"],
 | 
			
		||||
    "sglang": ["sglang[srt]>=0.4.5", "transformers==4.51.1"],
 | 
			
		||||
    "galore": ["galore-torch"],
 | 
			
		||||
    "apollo": ["apollo-torch"],
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user