Add missing key to init_kwargs

Former-commit-id: 1c8ad22a5f167bf4e1c845e273583e5cb3a0214e
This commit is contained in:
Yaser Afshar 2024-10-25 10:15:42 -07:00 committed by hiyouga
parent fe4546a7bb
commit 76ebd62ac1
11 changed files with 15 additions and 10 deletions

View File

@ -1,5 +1,6 @@
### model
model_name_or_path: Qwen/Qwen2-1.5B-Instruct
trust_remote_code: true
### method
stage: sft

View File

@ -1,5 +1,6 @@
### model
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
trust_remote_code: true
### method
stage: sft

View File

@ -1,6 +1,7 @@
### model
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
quantization_bit: 4
trust_remote_code: true
### method
stage: sft

View File

@ -1,5 +1,6 @@
### model
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
trust_remote_code: true
### method
stage: sft

View File

@ -1,5 +1,6 @@
### model
model_name_or_path: models/llama3-8b-pro
trust_remote_code: true
### method
stage: sft

View File

@ -1,5 +1,6 @@
### model
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
trust_remote_code: true
### method
stage: sft

View File

@ -1,5 +1,6 @@
### model
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
trust_remote_code: true
### method
stage: sft

View File

@ -1,5 +1,6 @@
### model
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
trust_remote_code: true
### method
stage: sft

View File

@ -1,3 +1,4 @@
model_name_or_path: saves/llama3-8b/full/sft
template: llama3
infer_backend: huggingface # choices: [huggingface, vllm]
trust_remote_code: true

View File

@ -285,6 +285,10 @@ class ModelArguments(QuantizationArguments, ProcessorArguments, ExportArguments,
default=False,
metadata={"help": "For debugging purposes, print the status of the parameters in the model."},
)
trust_remote_code: bool = field(
default=False,
metadata={"help": "Whether to trust the execution of code from datasets/models defined on the Hub or not."},
)
compute_dtype: Optional[torch.dtype] = field(
default=None,
init=False,
@ -305,16 +309,6 @@ class ModelArguments(QuantizationArguments, ProcessorArguments, ExportArguments,
init=False,
metadata={"help": "Whether use block diag attention or not, derived from `neat_packing`. Do not specify it."},
)
trust_remote_code: bool = field(
default=False,
metadata={
"help": (
"Whether to trust the execution of code from datasets/models defined on the Hub. "
"This option should only be set to `True` for repositories you trust and in which "
"you have read the code, as it will execute code present on the Hub on your local machine."
)
},
)
def __post_init__(self):
if self.model_name_or_path is None:

View File

@ -152,6 +152,7 @@ class Runner:
bf16=(get("train.compute_type") == "bf16"),
pure_bf16=(get("train.compute_type") == "pure_bf16"),
plot_loss=True,
trust_remote_code=True,
ddp_timeout=180000000,
include_num_input_tokens_seen=False if is_transformers_version_equal_to_4_46() else True, # FIXME
)
@ -268,6 +269,7 @@ class Runner:
top_p=get("eval.top_p"),
temperature=get("eval.temperature"),
output_dir=get_save_dir(model_name, finetuning_type, get("eval.output_dir")),
trust_remote_code=True,
)
if get("eval.predict"):