Add missing key to init_kwargs

Former-commit-id: 1c8ad22a5f167bf4e1c845e273583e5cb3a0214e
This commit is contained in:
Yaser Afshar 2024-10-25 10:15:42 -07:00 committed by hiyouga
parent fe4546a7bb
commit 76ebd62ac1
11 changed files with 15 additions and 10 deletions

View File

@ -1,5 +1,6 @@
### model ### model
model_name_or_path: Qwen/Qwen2-1.5B-Instruct model_name_or_path: Qwen/Qwen2-1.5B-Instruct
trust_remote_code: true
### method ### method
stage: sft stage: sft

View File

@ -1,5 +1,6 @@
### model ### model
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
trust_remote_code: true
### method ### method
stage: sft stage: sft

View File

@ -1,6 +1,7 @@
### model ### model
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
quantization_bit: 4 quantization_bit: 4
trust_remote_code: true
### method ### method
stage: sft stage: sft

View File

@ -1,5 +1,6 @@
### model ### model
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
trust_remote_code: true
### method ### method
stage: sft stage: sft

View File

@ -1,5 +1,6 @@
### model ### model
model_name_or_path: models/llama3-8b-pro model_name_or_path: models/llama3-8b-pro
trust_remote_code: true
### method ### method
stage: sft stage: sft

View File

@ -1,5 +1,6 @@
### model ### model
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
trust_remote_code: true
### method ### method
stage: sft stage: sft

View File

@ -1,5 +1,6 @@
### model ### model
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
trust_remote_code: true
### method ### method
stage: sft stage: sft

View File

@ -1,5 +1,6 @@
### model ### model
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
trust_remote_code: true
### method ### method
stage: sft stage: sft

View File

@ -1,3 +1,4 @@
model_name_or_path: saves/llama3-8b/full/sft model_name_or_path: saves/llama3-8b/full/sft
template: llama3 template: llama3
infer_backend: huggingface # choices: [huggingface, vllm] infer_backend: huggingface # choices: [huggingface, vllm]
trust_remote_code: true

View File

@ -285,6 +285,10 @@ class ModelArguments(QuantizationArguments, ProcessorArguments, ExportArguments,
default=False, default=False,
metadata={"help": "For debugging purposes, print the status of the parameters in the model."}, metadata={"help": "For debugging purposes, print the status of the parameters in the model."},
) )
trust_remote_code: bool = field(
default=False,
metadata={"help": "Whether to trust the execution of code from datasets/models defined on the Hub or not."},
)
compute_dtype: Optional[torch.dtype] = field( compute_dtype: Optional[torch.dtype] = field(
default=None, default=None,
init=False, init=False,
@ -305,16 +309,6 @@ class ModelArguments(QuantizationArguments, ProcessorArguments, ExportArguments,
init=False, init=False,
metadata={"help": "Whether use block diag attention or not, derived from `neat_packing`. Do not specify it."}, metadata={"help": "Whether use block diag attention or not, derived from `neat_packing`. Do not specify it."},
) )
trust_remote_code: bool = field(
default=False,
metadata={
"help": (
"Whether to trust the execution of code from datasets/models defined on the Hub. "
"This option should only be set to `True` for repositories you trust and in which "
"you have read the code, as it will execute code present on the Hub on your local machine."
)
},
)
def __post_init__(self): def __post_init__(self):
if self.model_name_or_path is None: if self.model_name_or_path is None:

View File

@ -152,6 +152,7 @@ class Runner:
bf16=(get("train.compute_type") == "bf16"), bf16=(get("train.compute_type") == "bf16"),
pure_bf16=(get("train.compute_type") == "pure_bf16"), pure_bf16=(get("train.compute_type") == "pure_bf16"),
plot_loss=True, plot_loss=True,
trust_remote_code=True,
ddp_timeout=180000000, ddp_timeout=180000000,
include_num_input_tokens_seen=False if is_transformers_version_equal_to_4_46() else True, # FIXME include_num_input_tokens_seen=False if is_transformers_version_equal_to_4_46() else True, # FIXME
) )
@ -268,6 +269,7 @@ class Runner:
top_p=get("eval.top_p"), top_p=get("eval.top_p"),
temperature=get("eval.temperature"), temperature=get("eval.temperature"),
output_dir=get_save_dir(model_name, finetuning_type, get("eval.output_dir")), output_dir=get_save_dir(model_name, finetuning_type, get("eval.output_dir")),
trust_remote_code=True,
) )
if get("eval.predict"): if get("eval.predict"):