mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-09-13 00:22:48 +08:00
Merge pull request #5819 from yafshar/remote_code
Add trust_remote_code Parameter and Set Default to False Former-commit-id: 2a832e489b7d88dba3cc8b07ca582cb22ec9a9f1
This commit is contained in:
commit
09419dfbab
@ -1,5 +1,6 @@
|
|||||||
### model
|
### model
|
||||||
model_name_or_path: Qwen/Qwen2-1.5B-Instruct
|
model_name_or_path: Qwen/Qwen2-1.5B-Instruct
|
||||||
|
trust_remote_code: true
|
||||||
|
|
||||||
### method
|
### method
|
||||||
stage: sft
|
stage: sft
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
### model
|
### model
|
||||||
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
|
trust_remote_code: true
|
||||||
|
|
||||||
### method
|
### method
|
||||||
stage: sft
|
stage: sft
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
### model
|
### model
|
||||||
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
quantization_bit: 4
|
quantization_bit: 4
|
||||||
|
trust_remote_code: true
|
||||||
|
|
||||||
### method
|
### method
|
||||||
stage: sft
|
stage: sft
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
### model
|
### model
|
||||||
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
|
trust_remote_code: true
|
||||||
|
|
||||||
### method
|
### method
|
||||||
stage: sft
|
stage: sft
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
### model
|
### model
|
||||||
model_name_or_path: models/llama3-8b-pro
|
model_name_or_path: models/llama3-8b-pro
|
||||||
|
trust_remote_code: true
|
||||||
|
|
||||||
### method
|
### method
|
||||||
stage: sft
|
stage: sft
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
### model
|
### model
|
||||||
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
|
trust_remote_code: true
|
||||||
|
|
||||||
### method
|
### method
|
||||||
stage: sft
|
stage: sft
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
### model
|
### model
|
||||||
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
|
trust_remote_code: true
|
||||||
|
|
||||||
### method
|
### method
|
||||||
stage: sft
|
stage: sft
|
||||||
|
@ -4,6 +4,7 @@
|
|||||||
### model
|
### model
|
||||||
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
adapter_name_or_path: saves/llama3-8b/lora/sft
|
adapter_name_or_path: saves/llama3-8b/lora/sft
|
||||||
|
trust_remote_code: true
|
||||||
|
|
||||||
### method
|
### method
|
||||||
stage: sft
|
stage: sft
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
### model
|
### model
|
||||||
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
|
trust_remote_code: true
|
||||||
|
|
||||||
### method
|
### method
|
||||||
stage: sft
|
stage: sft
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
template: llama3
|
template: llama3
|
||||||
infer_backend: huggingface # choices: [huggingface, vllm]
|
infer_backend: huggingface # choices: [huggingface, vllm]
|
||||||
|
trust_remote_code: true
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
model_name_or_path: saves/llama3-8b/full/sft
|
model_name_or_path: saves/llama3-8b/full/sft
|
||||||
template: llama3
|
template: llama3
|
||||||
infer_backend: huggingface # choices: [huggingface, vllm]
|
infer_backend: huggingface # choices: [huggingface, vllm]
|
||||||
|
trust_remote_code: true
|
||||||
|
@ -2,3 +2,4 @@ model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
|||||||
adapter_name_or_path: saves/llama3-8b/lora/sft
|
adapter_name_or_path: saves/llama3-8b/lora/sft
|
||||||
template: llama3
|
template: llama3
|
||||||
infer_backend: huggingface # choices: [huggingface, vllm]
|
infer_backend: huggingface # choices: [huggingface, vllm]
|
||||||
|
trust_remote_code: true
|
||||||
|
@ -2,3 +2,4 @@ model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
|||||||
template: llama3
|
template: llama3
|
||||||
infer_backend: vllm
|
infer_backend: vllm
|
||||||
vllm_enforce_eager: true
|
vllm_enforce_eager: true
|
||||||
|
trust_remote_code: true
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
model_name_or_path: llava-hf/llava-1.5-7b-hf
|
model_name_or_path: llava-hf/llava-1.5-7b-hf
|
||||||
template: llava
|
template: llava
|
||||||
infer_backend: huggingface # choices: [huggingface, vllm]
|
infer_backend: huggingface # choices: [huggingface, vllm]
|
||||||
|
trust_remote_code: true
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
model_name_or_path: Qwen/Qwen2-VL-7B-Instruct
|
model_name_or_path: Qwen/Qwen2-VL-7B-Instruct
|
||||||
template: qwen2_vl
|
template: qwen2_vl
|
||||||
infer_backend: huggingface # choices: [huggingface, vllm]
|
infer_backend: huggingface # choices: [huggingface, vllm]
|
||||||
|
trust_remote_code: true
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
### model
|
### model
|
||||||
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
template: llama3
|
template: llama3
|
||||||
|
trust_remote_code: true
|
||||||
|
|
||||||
### export
|
### export
|
||||||
export_dir: models/llama3_gptq
|
export_dir: models/llama3_gptq
|
||||||
|
@ -5,6 +5,7 @@ model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
|||||||
adapter_name_or_path: saves/llama3-8b/lora/sft
|
adapter_name_or_path: saves/llama3-8b/lora/sft
|
||||||
template: llama3
|
template: llama3
|
||||||
finetuning_type: lora
|
finetuning_type: lora
|
||||||
|
trust_remote_code: true
|
||||||
|
|
||||||
### export
|
### export
|
||||||
export_dir: models/llama3_lora_sft
|
export_dir: models/llama3_lora_sft
|
||||||
|
@ -5,6 +5,7 @@ model_name_or_path: Qwen/Qwen2-VL-7B-Instruct
|
|||||||
adapter_name_or_path: saves/qwen2_vl-7b/lora/sft
|
adapter_name_or_path: saves/qwen2_vl-7b/lora/sft
|
||||||
template: qwen2_vl
|
template: qwen2_vl
|
||||||
finetuning_type: lora
|
finetuning_type: lora
|
||||||
|
trust_remote_code: true
|
||||||
|
|
||||||
### export
|
### export
|
||||||
export_dir: models/qwen2_vl_lora_sft
|
export_dir: models/qwen2_vl_lora_sft
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
### model
|
### model
|
||||||
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
|
trust_remote_code: true
|
||||||
|
|
||||||
### method
|
### method
|
||||||
stage: sft
|
stage: sft
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
### model
|
### model
|
||||||
model_name_or_path: Qwen/Qwen2-VL-7B-Instruct
|
model_name_or_path: Qwen/Qwen2-VL-7B-Instruct
|
||||||
|
trust_remote_code: true
|
||||||
|
|
||||||
### method
|
### method
|
||||||
stage: sft
|
stage: sft
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
### model
|
### model
|
||||||
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
|
trust_remote_code: true
|
||||||
|
|
||||||
### method
|
### method
|
||||||
stage: dpo
|
stage: dpo
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
### model
|
### model
|
||||||
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
adapter_name_or_path: saves/llama3-8b/lora/sft
|
adapter_name_or_path: saves/llama3-8b/lora/sft
|
||||||
|
trust_remote_code: true
|
||||||
|
|
||||||
### method
|
### method
|
||||||
finetuning_type: lora
|
finetuning_type: lora
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
### model
|
### model
|
||||||
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
|
trust_remote_code: true
|
||||||
|
|
||||||
### method
|
### method
|
||||||
stage: kto
|
stage: kto
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
### model
|
### model
|
||||||
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
reward_model: saves/llama3-8b/lora/reward
|
reward_model: saves/llama3-8b/lora/reward
|
||||||
|
trust_remote_code: true
|
||||||
|
|
||||||
### method
|
### method
|
||||||
stage: ppo
|
stage: ppo
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
### model
|
### model
|
||||||
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
|
trust_remote_code: true
|
||||||
|
|
||||||
### method
|
### method
|
||||||
stage: pt
|
stage: pt
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
### model
|
### model
|
||||||
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
|
trust_remote_code: true
|
||||||
|
|
||||||
### method
|
### method
|
||||||
stage: rm
|
stage: rm
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
### model
|
### model
|
||||||
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
|
trust_remote_code: true
|
||||||
|
|
||||||
### method
|
### method
|
||||||
stage: sft
|
stage: sft
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
### model
|
### model
|
||||||
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
|
trust_remote_code: true
|
||||||
|
|
||||||
### method
|
### method
|
||||||
stage: sft
|
stage: sft
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
### model
|
### model
|
||||||
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
|
trust_remote_code: true
|
||||||
|
|
||||||
### method
|
### method
|
||||||
stage: sft
|
stage: sft
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
### model
|
### model
|
||||||
model_name_or_path: llava-hf/llava-1.5-7b-hf
|
model_name_or_path: llava-hf/llava-1.5-7b-hf
|
||||||
|
trust_remote_code: true
|
||||||
|
|
||||||
### method
|
### method
|
||||||
stage: sft
|
stage: sft
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
### model
|
### model
|
||||||
model_name_or_path: Qwen/Qwen2-VL-7B-Instruct
|
model_name_or_path: Qwen/Qwen2-VL-7B-Instruct
|
||||||
|
trust_remote_code: true
|
||||||
|
|
||||||
### method
|
### method
|
||||||
stage: dpo
|
stage: dpo
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
### model
|
### model
|
||||||
model_name_or_path: Qwen/Qwen2-VL-7B-Instruct
|
model_name_or_path: Qwen/Qwen2-VL-7B-Instruct
|
||||||
|
trust_remote_code: true
|
||||||
|
|
||||||
### method
|
### method
|
||||||
stage: sft
|
stage: sft
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
### model
|
### model
|
||||||
model_name_or_path: ISTA-DASLab/Meta-Llama-3-8B-Instruct-AQLM-2Bit-1x16
|
model_name_or_path: ISTA-DASLab/Meta-Llama-3-8B-Instruct-AQLM-2Bit-1x16
|
||||||
|
trust_remote_code: true
|
||||||
|
|
||||||
### method
|
### method
|
||||||
stage: sft
|
stage: sft
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
### model
|
### model
|
||||||
model_name_or_path: TechxGenus/Meta-Llama-3-8B-Instruct-AWQ
|
model_name_or_path: TechxGenus/Meta-Llama-3-8B-Instruct-AWQ
|
||||||
|
trust_remote_code: true
|
||||||
|
|
||||||
### method
|
### method
|
||||||
stage: sft
|
stage: sft
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
### model
|
### model
|
||||||
model_name_or_path: TechxGenus/Meta-Llama-3-8B-Instruct-GPTQ
|
model_name_or_path: TechxGenus/Meta-Llama-3-8B-Instruct-GPTQ
|
||||||
|
trust_remote_code: true
|
||||||
|
|
||||||
### method
|
### method
|
||||||
stage: sft
|
stage: sft
|
||||||
|
@ -2,6 +2,7 @@
|
|||||||
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
||||||
quantization_bit: 4
|
quantization_bit: 4
|
||||||
quantization_method: bitsandbytes # choices: [bitsandbytes (4/8), hqq (2/3/4/5/6/8), eetq (8)]
|
quantization_method: bitsandbytes # choices: [bitsandbytes (4/8), hqq (2/3/4/5/6/8), eetq (8)]
|
||||||
|
trust_remote_code: true
|
||||||
|
|
||||||
### method
|
### method
|
||||||
stage: sft
|
stage: sft
|
||||||
|
@ -72,7 +72,7 @@ class VllmEngine(BaseEngine):
|
|||||||
|
|
||||||
engine_args = {
|
engine_args = {
|
||||||
"model": model_args.model_name_or_path,
|
"model": model_args.model_name_or_path,
|
||||||
"trust_remote_code": True,
|
"trust_remote_code": model_args.trust_remote_code,
|
||||||
"download_dir": model_args.cache_dir,
|
"download_dir": model_args.cache_dir,
|
||||||
"dtype": model_args.infer_dtype,
|
"dtype": model_args.infer_dtype,
|
||||||
"max_model_len": model_args.vllm_maxlen,
|
"max_model_len": model_args.vllm_maxlen,
|
||||||
|
@ -129,7 +129,7 @@ def _load_single_dataset(
|
|||||||
token=model_args.hf_hub_token,
|
token=model_args.hf_hub_token,
|
||||||
streaming=data_args.streaming,
|
streaming=data_args.streaming,
|
||||||
num_proc=data_args.preprocessing_num_workers,
|
num_proc=data_args.preprocessing_num_workers,
|
||||||
trust_remote_code=True,
|
trust_remote_code=model_args.trust_remote_code,
|
||||||
)
|
)
|
||||||
|
|
||||||
if dataset_attr.num_samples is not None and not data_args.streaming:
|
if dataset_attr.num_samples is not None and not data_args.streaming:
|
||||||
|
@ -100,7 +100,7 @@ class Evaluator:
|
|||||||
cache_dir=self.model_args.cache_dir,
|
cache_dir=self.model_args.cache_dir,
|
||||||
download_mode=self.eval_args.download_mode,
|
download_mode=self.eval_args.download_mode,
|
||||||
token=self.model_args.hf_hub_token,
|
token=self.model_args.hf_hub_token,
|
||||||
trust_remote_code=True,
|
trust_remote_code=self.model_args.trust_remote_code,
|
||||||
)
|
)
|
||||||
pbar.set_postfix_str(categorys[subject]["name"])
|
pbar.set_postfix_str(categorys[subject]["name"])
|
||||||
inputs, outputs, labels = [], [], []
|
inputs, outputs, labels = [], [], []
|
||||||
|
@ -285,6 +285,10 @@ class ModelArguments(QuantizationArguments, ProcessorArguments, ExportArguments,
|
|||||||
default=False,
|
default=False,
|
||||||
metadata={"help": "For debugging purposes, print the status of the parameters in the model."},
|
metadata={"help": "For debugging purposes, print the status of the parameters in the model."},
|
||||||
)
|
)
|
||||||
|
trust_remote_code: bool = field(
|
||||||
|
default=False,
|
||||||
|
metadata={"help": "Whether to trust the execution of code from datasets/models defined on the Hub or not."},
|
||||||
|
)
|
||||||
compute_dtype: Optional[torch.dtype] = field(
|
compute_dtype: Optional[torch.dtype] = field(
|
||||||
default=None,
|
default=None,
|
||||||
init=False,
|
init=False,
|
||||||
|
@ -52,7 +52,7 @@ def _get_init_kwargs(model_args: "ModelArguments") -> Dict[str, Any]:
|
|||||||
skip_check_imports()
|
skip_check_imports()
|
||||||
model_args.model_name_or_path = try_download_model_from_other_hub(model_args)
|
model_args.model_name_or_path = try_download_model_from_other_hub(model_args)
|
||||||
return {
|
return {
|
||||||
"trust_remote_code": True,
|
"trust_remote_code": model_args.trust_remote_code,
|
||||||
"cache_dir": model_args.cache_dir,
|
"cache_dir": model_args.cache_dir,
|
||||||
"revision": model_args.model_revision,
|
"revision": model_args.model_revision,
|
||||||
"token": model_args.hf_hub_token,
|
"token": model_args.hf_hub_token,
|
||||||
@ -155,7 +155,7 @@ def load_model(
|
|||||||
load_class = AutoModelForCausalLM
|
load_class = AutoModelForCausalLM
|
||||||
|
|
||||||
if model_args.train_from_scratch:
|
if model_args.train_from_scratch:
|
||||||
model = load_class.from_config(config, trust_remote_code=True)
|
model = load_class.from_config(config, trust_remote_code=model_args.trust_remote_code)
|
||||||
else:
|
else:
|
||||||
model = load_class.from_pretrained(**init_kwargs)
|
model = load_class.from_pretrained(**init_kwargs)
|
||||||
|
|
||||||
|
@ -39,7 +39,7 @@ def _get_unsloth_kwargs(
|
|||||||
"device_map": {"": get_current_device()},
|
"device_map": {"": get_current_device()},
|
||||||
"rope_scaling": getattr(config, "rope_scaling", None),
|
"rope_scaling": getattr(config, "rope_scaling", None),
|
||||||
"fix_tokenizer": False,
|
"fix_tokenizer": False,
|
||||||
"trust_remote_code": True,
|
"trust_remote_code": model_args.trust_remote_code,
|
||||||
"use_gradient_checkpointing": "unsloth",
|
"use_gradient_checkpointing": "unsloth",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -152,6 +152,7 @@ class Runner:
|
|||||||
bf16=(get("train.compute_type") == "bf16"),
|
bf16=(get("train.compute_type") == "bf16"),
|
||||||
pure_bf16=(get("train.compute_type") == "pure_bf16"),
|
pure_bf16=(get("train.compute_type") == "pure_bf16"),
|
||||||
plot_loss=True,
|
plot_loss=True,
|
||||||
|
trust_remote_code=True,
|
||||||
ddp_timeout=180000000,
|
ddp_timeout=180000000,
|
||||||
include_num_input_tokens_seen=False if is_transformers_version_equal_to_4_46() else True, # FIXME
|
include_num_input_tokens_seen=False if is_transformers_version_equal_to_4_46() else True, # FIXME
|
||||||
)
|
)
|
||||||
@ -268,6 +269,7 @@ class Runner:
|
|||||||
top_p=get("eval.top_p"),
|
top_p=get("eval.top_p"),
|
||||||
temperature=get("eval.temperature"),
|
temperature=get("eval.temperature"),
|
||||||
output_dir=get_save_dir(model_name, finetuning_type, get("eval.output_dir")),
|
output_dir=get_save_dir(model_name, finetuning_type, get("eval.output_dir")),
|
||||||
|
trust_remote_code=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
if get("eval.predict"):
|
if get("eval.predict"):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user