From dcbc8168a84b21bfe4c2eb28cc635c57e3f450d6 Mon Sep 17 00:00:00 2001 From: S3Studio Date: Tue, 12 Mar 2024 14:05:10 +0800 Subject: [PATCH 1/2] improve Docker build and runtime parameters Modify installation method of extra python library. Utilize shared memory of the host machine to increase training performance. Former-commit-id: 6a5693d11d065f6e75c8cdd8b5ed962eb520953c --- Dockerfile | 5 ++--- docker-compose.yml | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index 7f930148..155b86d4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,11 +3,10 @@ FROM cnstark/pytorch:2.0.1-py3.9.17-cuda11.8.0-ubuntu20.04 WORKDIR /app COPY requirements.txt /app/ -RUN pip install -r requirements.txt && \ - pip install tiktoken && \ - pip install transformers_stream_generator +RUN pip install -r requirements.txt COPY . /app/ +RUN pip install -e .[deepspeed,metrics,bitsandbytes,qwen] VOLUME [ "/root/.cache/huggingface/", "/app/data", "/app/output" ] EXPOSE 7860 diff --git a/docker-compose.yml b/docker-compose.yml index 267ea694..9602a3e3 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -12,7 +12,7 @@ services: - ./output:/app/output ports: - "7860:7860" - shm_size: 16G + ipc: host deploy: resources: reservations: From 46ef7416e664a47c7458a1c51aaba65d23e9c6b5 Mon Sep 17 00:00:00 2001 From: S3Studio Date: Thu, 14 Mar 2024 18:03:33 +0800 Subject: [PATCH 2/2] Use official Nvidia base image Note that the flash-attn library is installed in this image and the qwen model will use it automatically. However, if the the host machine's GPU is not compatible with the library, an exception will be raised during the training process as follows: FlashAttention only supports Ampere GPUs or newer. So if the --flash_attn flag is not set, an additional patch for the qwen model's config is necessary to set the default value of use_flash_attn from "auto" to False. Former-commit-id: e75407febdec086f2bdca723a7f69a92b3b1d63f --- Dockerfile | 2 +- src/llmtuner/model/patcher.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 155b86d4..c3d231b5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM cnstark/pytorch:2.0.1-py3.9.17-cuda11.8.0-ubuntu20.04 +FROM nvcr.io/nvidia/pytorch:24.01-py3 WORKDIR /app diff --git a/src/llmtuner/model/patcher.py b/src/llmtuner/model/patcher.py index bd484052..210044f2 100644 --- a/src/llmtuner/model/patcher.py +++ b/src/llmtuner/model/patcher.py @@ -283,6 +283,9 @@ def patch_config( setattr(config, dtype_name, model_args.compute_dtype == dtype) _configure_attn_implementation(model_args, init_kwargs) + if getattr(config, "model_type", None) == "qwen" and init_kwargs["attn_implementation"] != "flash_attention_2": + config.use_flash_attn = False + _configure_rope(config, model_args, is_trainable) _configure_longlora(config, model_args, is_trainable) _configure_quantization(config, tokenizer, model_args, init_kwargs)