mirror of
				https://github.com/hiyouga/LLaMA-Factory.git
				synced 2025-11-04 18:02:19 +08:00 
			
		
		
		
	Add missing key to init_kwargs
Former-commit-id: 03fc4621dad132164596a58d3e8693787b7e1aca
This commit is contained in:
		
							parent
							
								
									8881237475
								
							
						
					
					
						commit
						6f1c8dacea
					
				@ -1,5 +1,6 @@
 | 
			
		||||
### model
 | 
			
		||||
model_name_or_path: Qwen/Qwen2-1.5B-Instruct
 | 
			
		||||
trust_remote_code: true
 | 
			
		||||
 | 
			
		||||
### method
 | 
			
		||||
stage: sft
 | 
			
		||||
 | 
			
		||||
@ -1,5 +1,6 @@
 | 
			
		||||
### model
 | 
			
		||||
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 | 
			
		||||
trust_remote_code: true
 | 
			
		||||
 | 
			
		||||
### method
 | 
			
		||||
stage: sft
 | 
			
		||||
 | 
			
		||||
@ -1,6 +1,7 @@
 | 
			
		||||
### model
 | 
			
		||||
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 | 
			
		||||
quantization_bit: 4
 | 
			
		||||
trust_remote_code: true
 | 
			
		||||
 | 
			
		||||
### method
 | 
			
		||||
stage: sft
 | 
			
		||||
 | 
			
		||||
@ -1,5 +1,6 @@
 | 
			
		||||
### model
 | 
			
		||||
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 | 
			
		||||
trust_remote_code: true
 | 
			
		||||
 | 
			
		||||
### method
 | 
			
		||||
stage: sft
 | 
			
		||||
 | 
			
		||||
@ -1,5 +1,6 @@
 | 
			
		||||
### model
 | 
			
		||||
model_name_or_path: models/llama3-8b-pro
 | 
			
		||||
trust_remote_code: true
 | 
			
		||||
 | 
			
		||||
### method
 | 
			
		||||
stage: sft
 | 
			
		||||
 | 
			
		||||
@ -1,5 +1,6 @@
 | 
			
		||||
### model
 | 
			
		||||
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 | 
			
		||||
trust_remote_code: true
 | 
			
		||||
 | 
			
		||||
### method
 | 
			
		||||
stage: sft
 | 
			
		||||
 | 
			
		||||
@ -1,5 +1,6 @@
 | 
			
		||||
### model
 | 
			
		||||
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 | 
			
		||||
trust_remote_code: true
 | 
			
		||||
 | 
			
		||||
### method
 | 
			
		||||
stage: sft
 | 
			
		||||
 | 
			
		||||
@ -1,5 +1,6 @@
 | 
			
		||||
### model
 | 
			
		||||
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 | 
			
		||||
trust_remote_code: true
 | 
			
		||||
 | 
			
		||||
### method
 | 
			
		||||
stage: sft
 | 
			
		||||
 | 
			
		||||
@ -1,3 +1,4 @@
 | 
			
		||||
model_name_or_path: saves/llama3-8b/full/sft
 | 
			
		||||
template: llama3
 | 
			
		||||
infer_backend: huggingface  # choices: [huggingface, vllm]
 | 
			
		||||
trust_remote_code: true
 | 
			
		||||
 | 
			
		||||
@ -285,6 +285,10 @@ class ModelArguments(QuantizationArguments, ProcessorArguments, ExportArguments,
 | 
			
		||||
        default=False,
 | 
			
		||||
        metadata={"help": "For debugging purposes, print the status of the parameters in the model."},
 | 
			
		||||
    )
 | 
			
		||||
    trust_remote_code: bool = field(
 | 
			
		||||
        default=False,
 | 
			
		||||
        metadata={"help": "Whether to trust the execution of code from datasets/models defined on the Hub or not."},
 | 
			
		||||
    )
 | 
			
		||||
    compute_dtype: Optional[torch.dtype] = field(
 | 
			
		||||
        default=None,
 | 
			
		||||
        init=False,
 | 
			
		||||
@ -305,16 +309,6 @@ class ModelArguments(QuantizationArguments, ProcessorArguments, ExportArguments,
 | 
			
		||||
        init=False,
 | 
			
		||||
        metadata={"help": "Whether use block diag attention or not, derived from `neat_packing`. Do not specify it."},
 | 
			
		||||
    )
 | 
			
		||||
    trust_remote_code: bool = field(
 | 
			
		||||
        default=False,
 | 
			
		||||
        metadata={
 | 
			
		||||
            "help": (
 | 
			
		||||
                "Whether to trust the execution of code from datasets/models defined on the Hub. "
 | 
			
		||||
                "This option should only be set to `True` for repositories you trust and in which "
 | 
			
		||||
                "you have read the code, as it will execute code present on the Hub on your local machine."
 | 
			
		||||
            )
 | 
			
		||||
        },
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    def __post_init__(self):
 | 
			
		||||
        if self.model_name_or_path is None:
 | 
			
		||||
 | 
			
		||||
@ -152,6 +152,7 @@ class Runner:
 | 
			
		||||
            bf16=(get("train.compute_type") == "bf16"),
 | 
			
		||||
            pure_bf16=(get("train.compute_type") == "pure_bf16"),
 | 
			
		||||
            plot_loss=True,
 | 
			
		||||
            trust_remote_code=True,
 | 
			
		||||
            ddp_timeout=180000000,
 | 
			
		||||
            include_num_input_tokens_seen=False if is_transformers_version_equal_to_4_46() else True,  # FIXME
 | 
			
		||||
        )
 | 
			
		||||
@ -268,6 +269,7 @@ class Runner:
 | 
			
		||||
            top_p=get("eval.top_p"),
 | 
			
		||||
            temperature=get("eval.temperature"),
 | 
			
		||||
            output_dir=get_save_dir(model_name, finetuning_type, get("eval.output_dir")),
 | 
			
		||||
            trust_remote_code=True,
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        if get("eval.predict"):
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user