Merge pull request #2849 from S3Studio/DockerizeSupport

Improve Dockerize support

Former-commit-id: 113cc047198325b51dac50d8a7ea70396c51e0d9
This commit is contained in:
hoshi-hiyouga 2024-03-15 19:16:02 +08:00 committed by GitHub
commit 30765baa91
3 changed files with 7 additions and 5 deletions

View File

@ -1,13 +1,12 @@
FROM cnstark/pytorch:2.0.1-py3.9.17-cuda11.8.0-ubuntu20.04 FROM nvcr.io/nvidia/pytorch:24.01-py3
WORKDIR /app WORKDIR /app
COPY requirements.txt /app/ COPY requirements.txt /app/
RUN pip install -r requirements.txt && \ RUN pip install -r requirements.txt
pip install tiktoken && \
pip install transformers_stream_generator
COPY . /app/ COPY . /app/
RUN pip install -e .[deepspeed,metrics,bitsandbytes,qwen]
VOLUME [ "/root/.cache/huggingface/", "/app/data", "/app/output" ] VOLUME [ "/root/.cache/huggingface/", "/app/data", "/app/output" ]
EXPOSE 7860 EXPOSE 7860

View File

@ -12,7 +12,7 @@ services:
- ./output:/app/output - ./output:/app/output
ports: ports:
- "7860:7860" - "7860:7860"
shm_size: 16G ipc: host
deploy: deploy:
resources: resources:
reservations: reservations:

View File

@ -283,6 +283,9 @@ def patch_config(
setattr(config, dtype_name, model_args.compute_dtype == dtype) setattr(config, dtype_name, model_args.compute_dtype == dtype)
_configure_attn_implementation(model_args, init_kwargs) _configure_attn_implementation(model_args, init_kwargs)
if getattr(config, "model_type", None) == "qwen" and init_kwargs["attn_implementation"] != "flash_attention_2":
config.use_flash_attn = False
_configure_rope(config, model_args, is_trainable) _configure_rope(config, model_args, is_trainable)
_configure_longlora(config, model_args, is_trainable) _configure_longlora(config, model_args, is_trainable)
_configure_quantization(config, tokenizer, model_args, init_kwargs) _configure_quantization(config, tokenizer, model_args, init_kwargs)