mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-08-01 03:02:51 +08:00
Add missing key to init_kwargs
Former-commit-id: 1c8ad22a5f167bf4e1c845e273583e5cb3a0214e
This commit is contained in:
parent
fe4546a7bb
commit
76ebd62ac1
@ -1,5 +1,6 @@
|
||||
### model
|
||||
model_name_or_path: Qwen/Qwen2-1.5B-Instruct
|
||||
trust_remote_code: true
|
||||
|
||||
### method
|
||||
stage: sft
|
||||
|
@ -1,5 +1,6 @@
|
||||
### model
|
||||
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
||||
trust_remote_code: true
|
||||
|
||||
### method
|
||||
stage: sft
|
||||
|
@ -1,6 +1,7 @@
|
||||
### model
|
||||
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
||||
quantization_bit: 4
|
||||
trust_remote_code: true
|
||||
|
||||
### method
|
||||
stage: sft
|
||||
|
@ -1,5 +1,6 @@
|
||||
### model
|
||||
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
||||
trust_remote_code: true
|
||||
|
||||
### method
|
||||
stage: sft
|
||||
|
@ -1,5 +1,6 @@
|
||||
### model
|
||||
model_name_or_path: models/llama3-8b-pro
|
||||
trust_remote_code: true
|
||||
|
||||
### method
|
||||
stage: sft
|
||||
|
@ -1,5 +1,6 @@
|
||||
### model
|
||||
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
||||
trust_remote_code: true
|
||||
|
||||
### method
|
||||
stage: sft
|
||||
|
@ -1,5 +1,6 @@
|
||||
### model
|
||||
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
||||
trust_remote_code: true
|
||||
|
||||
### method
|
||||
stage: sft
|
||||
|
@ -1,5 +1,6 @@
|
||||
### model
|
||||
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
||||
trust_remote_code: true
|
||||
|
||||
### method
|
||||
stage: sft
|
||||
|
@ -1,3 +1,4 @@
|
||||
model_name_or_path: saves/llama3-8b/full/sft
|
||||
template: llama3
|
||||
infer_backend: huggingface # choices: [huggingface, vllm]
|
||||
trust_remote_code: true
|
||||
|
@ -285,6 +285,10 @@ class ModelArguments(QuantizationArguments, ProcessorArguments, ExportArguments,
|
||||
default=False,
|
||||
metadata={"help": "For debugging purposes, print the status of the parameters in the model."},
|
||||
)
|
||||
trust_remote_code: bool = field(
|
||||
default=False,
|
||||
metadata={"help": "Whether to trust the execution of code from datasets/models defined on the Hub or not."},
|
||||
)
|
||||
compute_dtype: Optional[torch.dtype] = field(
|
||||
default=None,
|
||||
init=False,
|
||||
@ -305,16 +309,6 @@ class ModelArguments(QuantizationArguments, ProcessorArguments, ExportArguments,
|
||||
init=False,
|
||||
metadata={"help": "Whether use block diag attention or not, derived from `neat_packing`. Do not specify it."},
|
||||
)
|
||||
trust_remote_code: bool = field(
|
||||
default=False,
|
||||
metadata={
|
||||
"help": (
|
||||
"Whether to trust the execution of code from datasets/models defined on the Hub. "
|
||||
"This option should only be set to `True` for repositories you trust and in which "
|
||||
"you have read the code, as it will execute code present on the Hub on your local machine."
|
||||
)
|
||||
},
|
||||
)
|
||||
|
||||
def __post_init__(self):
|
||||
if self.model_name_or_path is None:
|
||||
|
@ -152,6 +152,7 @@ class Runner:
|
||||
bf16=(get("train.compute_type") == "bf16"),
|
||||
pure_bf16=(get("train.compute_type") == "pure_bf16"),
|
||||
plot_loss=True,
|
||||
trust_remote_code=True,
|
||||
ddp_timeout=180000000,
|
||||
include_num_input_tokens_seen=False if is_transformers_version_equal_to_4_46() else True, # FIXME
|
||||
)
|
||||
@ -268,6 +269,7 @@ class Runner:
|
||||
top_p=get("eval.top_p"),
|
||||
temperature=get("eval.temperature"),
|
||||
output_dir=get_save_dir(model_name, finetuning_type, get("eval.output_dir")),
|
||||
trust_remote_code=True,
|
||||
)
|
||||
|
||||
if get("eval.predict"):
|
||||
|
Loading…
x
Reference in New Issue
Block a user