mirror of
				https://github.com/hiyouga/LLaMA-Factory.git
				synced 2025-11-04 18:02:19 +08:00 
			
		
		
		
	Note that the flash-attn library is installed in this image and the qwen model will use it automatically. However, if the the host machine's GPU is not compatible with the library, an exception will be raised during the training process as follows: FlashAttention only supports Ampere GPUs or newer. So if the --flash_attn flag is not set, an additional patch for the qwen model's config is necessary to set the default value of use_flash_attn from "auto" to False. Former-commit-id: cd2f5717d676e1a5afd2f4e7a38402d2e55e7479
		
			
				
	
	
		
			15 lines
		
	
	
		
			307 B
		
	
	
	
		
			Docker
		
	
	
	
	
	
			
		
		
	
	
			15 lines
		
	
	
		
			307 B
		
	
	
	
		
			Docker
		
	
	
	
	
	
FROM nvcr.io/nvidia/pytorch:24.01-py3
 | 
						|
 | 
						|
WORKDIR /app
 | 
						|
 | 
						|
COPY requirements.txt /app/
 | 
						|
RUN pip install -r requirements.txt
 | 
						|
 | 
						|
COPY . /app/
 | 
						|
RUN pip install -e .[deepspeed,metrics,bitsandbytes,qwen]
 | 
						|
 | 
						|
VOLUME [ "/root/.cache/huggingface/", "/app/data", "/app/output" ]
 | 
						|
EXPOSE 7860
 | 
						|
 | 
						|
CMD [ "python", "src/train_web.py" ]
 |