mirror of
				https://github.com/hiyouga/LLaMA-Factory.git
				synced 2025-11-04 18:02:19 +08:00 
			
		
		
		
	tiny fix
Former-commit-id: c1a78a3a9f8ab9d57577cee37f9c457d60863ba2
This commit is contained in:
		
							parent
							
								
									9caf9b6f91
								
							
						
					
					
						commit
						bf99223a80
					
				@ -444,7 +444,7 @@ docker build -f ./docker/docker-cuda/Dockerfile \
 | 
			
		||||
    --build-arg INSTALL_BNB=false \
 | 
			
		||||
    --build-arg INSTALL_VLLM=false \
 | 
			
		||||
    --build-arg INSTALL_DEEPSPEED=false \
 | 
			
		||||
    --build-arg INSTALL_FLASH_ATTN=false \
 | 
			
		||||
    --build-arg INSTALL_FLASHATTN=false \
 | 
			
		||||
    --build-arg PIP_INDEX=https://pypi.org/simple \
 | 
			
		||||
    -t llamafactory:latest .
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -444,7 +444,7 @@ docker build -f ./docker/docker-cuda/Dockerfile \
 | 
			
		||||
    --build-arg INSTALL_BNB=false \
 | 
			
		||||
    --build-arg INSTALL_VLLM=false \
 | 
			
		||||
    --build-arg INSTALL_DEEPSPEED=false \
 | 
			
		||||
    --build-arg INSTALL_FLASH_ATTN=false \
 | 
			
		||||
    --build-arg INSTALL_FLASHATTN=false \
 | 
			
		||||
    --build-arg PIP_INDEX=https://pypi.org/simple \
 | 
			
		||||
    -t llamafactory:latest .
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -2,11 +2,14 @@
 | 
			
		||||
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-24-02.html
 | 
			
		||||
FROM nvcr.io/nvidia/pytorch:24.02-py3
 | 
			
		||||
 | 
			
		||||
# Define environments
 | 
			
		||||
ENV MAX_JOBS=4
 | 
			
		||||
 | 
			
		||||
# Define installation arguments
 | 
			
		||||
ARG INSTALL_BNB=false
 | 
			
		||||
ARG INSTALL_VLLM=false
 | 
			
		||||
ARG INSTALL_DEEPSPEED=false
 | 
			
		||||
ARG INSTALL_FLASH_ATTN=false
 | 
			
		||||
ARG INSTALL_FLASHATTN=false
 | 
			
		||||
ARG PIP_INDEX=https://pypi.org/simple
 | 
			
		||||
 | 
			
		||||
# Set the working directory
 | 
			
		||||
@ -14,34 +17,33 @@ WORKDIR /app
 | 
			
		||||
 | 
			
		||||
# Install the requirements
 | 
			
		||||
COPY requirements.txt /app
 | 
			
		||||
RUN pip config set global.index-url $PIP_INDEX
 | 
			
		||||
RUN pip config set global.extra-index-url $PIP_INDEX
 | 
			
		||||
RUN python -m pip install --upgrade pip
 | 
			
		||||
RUN python -m pip install -r requirements.txt
 | 
			
		||||
RUN pip config set global.index-url "$PIP_INDEX" && \
 | 
			
		||||
    pip config set global.extra-index-url "$PIP_INDEX" && \
 | 
			
		||||
    python -m pip install --upgrade pip && \
 | 
			
		||||
    python -m pip install -r requirements.txt
 | 
			
		||||
 | 
			
		||||
# Rebuild flash attention
 | 
			
		||||
RUN pip uninstall -y transformer-engine flash-attn && \
 | 
			
		||||
    if [ "$INSTALL_FLASHATTN" == "true" ]; then \
 | 
			
		||||
        pip uninstall -y ninja && pip install ninja && \
 | 
			
		||||
        pip install --no-cache-dir flash-attn --no-build-isolation \
 | 
			
		||||
    fi;
 | 
			
		||||
 | 
			
		||||
# Copy the rest of the application into the image
 | 
			
		||||
COPY . /app
 | 
			
		||||
 | 
			
		||||
# Install the LLaMA Factory
 | 
			
		||||
RUN EXTRA_PACKAGES="metrics"; \
 | 
			
		||||
    if [ "$INSTALL_BNB" = "true" ]; then \
 | 
			
		||||
    if [ "$INSTALL_BNB" == "true" ]; then \
 | 
			
		||||
        EXTRA_PACKAGES="${EXTRA_PACKAGES},bitsandbytes"; \
 | 
			
		||||
    fi; \
 | 
			
		||||
    if [ "$INSTALL_VLLM" = "true" ]; then \
 | 
			
		||||
    if [ "$INSTALL_VLLM" == "true" ]; then \
 | 
			
		||||
        EXTRA_PACKAGES="${EXTRA_PACKAGES},vllm"; \
 | 
			
		||||
    fi; \
 | 
			
		||||
    if [ "$INSTALL_DEEPSPEED" = "true" ]; then \
 | 
			
		||||
    if [ "$INSTALL_DEEPSPEED" == "true" ]; then \
 | 
			
		||||
        EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \
 | 
			
		||||
    fi; \
 | 
			
		||||
    pip install -e .[$EXTRA_PACKAGES] && \
 | 
			
		||||
    pip uninstall -y transformer-engine flash-attn
 | 
			
		||||
 | 
			
		||||
# Rebuild flash-attn
 | 
			
		||||
RUN if [ "$INSTALL_FLASH_ATTN" = "true" ]; then \
 | 
			
		||||
        ninja --version || \
 | 
			
		||||
        (pip uninstall -y ninja && pip install ninja) && \
 | 
			
		||||
        MAX_JOBS=4 pip install --no-cache-dir flash-attn --no-build-isolation \
 | 
			
		||||
    fi;
 | 
			
		||||
    pip install -e ".[$EXTRA_PACKAGES]"
 | 
			
		||||
 | 
			
		||||
# Set up volumes
 | 
			
		||||
VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/app/data", "/app/output" ]
 | 
			
		||||
 | 
			
		||||
@ -7,7 +7,7 @@ services:
 | 
			
		||||
        INSTALL_BNB: false
 | 
			
		||||
        INSTALL_VLLM: false
 | 
			
		||||
        INSTALL_DEEPSPEED: false
 | 
			
		||||
        INSTALL_FLASH_ATTN: false
 | 
			
		||||
        INSTALL_FLASHATTN: false
 | 
			
		||||
        PIP_INDEX: https://pypi.org/simple
 | 
			
		||||
    container_name: llamafactory
 | 
			
		||||
    volumes:
 | 
			
		||||
 | 
			
		||||
@ -2,6 +2,7 @@
 | 
			
		||||
# More versions can be found at https://hub.docker.com/r/cosdt/cann/tags
 | 
			
		||||
FROM cosdt/cann:8.0.rc1-910b-ubuntu22.04
 | 
			
		||||
 | 
			
		||||
# Define environments
 | 
			
		||||
ENV DEBIAN_FRONTEND=noninteractive
 | 
			
		||||
 | 
			
		||||
# Define installation arguments
 | 
			
		||||
@ -27,8 +28,7 @@ RUN EXTRA_PACKAGES="torch-npu,metrics"; \
 | 
			
		||||
    if [ "$INSTALL_DEEPSPEED" == "true" ]; then \
 | 
			
		||||
        EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \
 | 
			
		||||
    fi; \
 | 
			
		||||
    pip install -e ".[$EXTRA_PACKAGES]" && \
 | 
			
		||||
    pip uninstall -y transformer-engine flash-attn
 | 
			
		||||
    pip install -e ".[$EXTRA_PACKAGES]"
 | 
			
		||||
 | 
			
		||||
# Set up volumes
 | 
			
		||||
VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/app/data", "/app/output" ]
 | 
			
		||||
 | 
			
		||||
@ -91,7 +91,7 @@ def main():
 | 
			
		||||
            master_addr = os.environ.get("MASTER_ADDR", "127.0.0.1")
 | 
			
		||||
            master_port = os.environ.get("MASTER_PORT", str(random.randint(20001, 29999)))
 | 
			
		||||
            logger.info("Initializing distributed tasks at: {}:{}".format(master_addr, master_port))
 | 
			
		||||
            subproc = subprocess.run(
 | 
			
		||||
            process = subprocess.run(
 | 
			
		||||
                (
 | 
			
		||||
                    "torchrun --nnodes {nnodes} --node_rank {node_rank} --nproc_per_node {nproc_per_node} "
 | 
			
		||||
                    "--master_addr {master_addr} --master_port {master_port} {file_name} {args}"
 | 
			
		||||
@ -106,7 +106,7 @@ def main():
 | 
			
		||||
                ),
 | 
			
		||||
                shell=True,
 | 
			
		||||
            )
 | 
			
		||||
            sys.exit(subproc.returncode)
 | 
			
		||||
            sys.exit(process.returncode)
 | 
			
		||||
        else:
 | 
			
		||||
            run_exp()
 | 
			
		||||
    elif command == Command.WEBDEMO:
 | 
			
		||||
 | 
			
		||||
@ -199,8 +199,8 @@ def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS:
 | 
			
		||||
        if not is_torch_bf16_gpu_available():
 | 
			
		||||
            raise ValueError("This device does not support `pure_bf16`.")
 | 
			
		||||
 | 
			
		||||
        if training_args.deepspeed:
 | 
			
		||||
            raise ValueError("`pure_bf16` is incompatible with DeepSpeed.")
 | 
			
		||||
        if is_deepspeed_zero3_enabled():
 | 
			
		||||
            raise ValueError("`pure_bf16` is incompatible with DeepSpeed ZeRO-3.")
 | 
			
		||||
 | 
			
		||||
        if training_args.fp16 or training_args.bf16:
 | 
			
		||||
            raise ValueError("Turn off mixed precision training when using `pure_bf16`.")
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user