Merge pull request #5819 from yafshar/remote_code

Add trust_remote_code Parameter and Set Default to False Former-commit-id: 2a832e489b
2026-03-12 15:06:00 +08:00 · 2024-12-17 21:10:24 +08:00
parent fc18db6290 76ebd62ac1
commit 09419dfbab
43 changed files with 48 additions and 6 deletions
--- a/examples/extras/adam_mini/qwen2_full_sft.yaml
+++ b/examples/extras/adam_mini/qwen2_full_sft.yaml
@@ -1,5 +1,6 @@
 ### model
 model_name_or_path: Qwen/Qwen2-1.5B-Instruct
 trust_remote_code: true
 ### method
 stage: sft
--- a/examples/extras/badam/llama3_full_sft.yaml
+++ b/examples/extras/badam/llama3_full_sft.yaml
@@ -1,5 +1,6 @@
 ### model
 model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 trust_remote_code: true
 ### method
 stage: sft
--- a/examples/extras/fsdp_qlora/llama3_lora_sft.yaml
+++ b/examples/extras/fsdp_qlora/llama3_lora_sft.yaml
@@ -1,6 +1,7 @@
 ### model
 model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 quantization_bit: 4
 trust_remote_code: true
 ### method
 stage: sft
--- a/examples/extras/galore/llama3_full_sft.yaml
+++ b/examples/extras/galore/llama3_full_sft.yaml
@@ -1,5 +1,6 @@
 ### model
 model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 trust_remote_code: true
 ### method
 stage: sft
--- a/examples/extras/llama_pro/llama3_freeze_sft.yaml
+++ b/examples/extras/llama_pro/llama3_freeze_sft.yaml
@@ -1,5 +1,6 @@
 ### model
 model_name_or_path: models/llama3-8b-pro
 trust_remote_code: true
 ### method
 stage: sft
--- a/examples/extras/loraplus/llama3_lora_sft.yaml
+++ b/examples/extras/loraplus/llama3_lora_sft.yaml
@@ -1,5 +1,6 @@
 ### model
 model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 trust_remote_code: true
 ### method
 stage: sft
--- a/examples/extras/mod/llama3_full_sft.yaml
+++ b/examples/extras/mod/llama3_full_sft.yaml
@@ -1,5 +1,6 @@
 ### model
 model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 trust_remote_code: true
 ### method
 stage: sft
--- a/examples/extras/nlg_eval/llama3_lora_predict.yaml
+++ b/examples/extras/nlg_eval/llama3_lora_predict.yaml
@@ -4,6 +4,7 @@
 ### model
 model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 adapter_name_or_path: saves/llama3-8b/lora/sft
 trust_remote_code: true
 ### method
 stage: sft
--- a/examples/extras/pissa/llama3_lora_sft.yaml
+++ b/examples/extras/pissa/llama3_lora_sft.yaml
@@ -1,5 +1,6 @@
 ### model
 model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 trust_remote_code: true
 ### method
 stage: sft
--- a/examples/inference/llama3.yaml
+++ b/examples/inference/llama3.yaml
@@ -1,3 +1,4 @@
 model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 template: llama3
 infer_backend: huggingface  # choices: [huggingface, vllm]
 trust_remote_code: true
--- a/examples/inference/llama3_full_sft.yaml
+++ b/examples/inference/llama3_full_sft.yaml
@@ -1,3 +1,4 @@
 model_name_or_path: saves/llama3-8b/full/sft
 template: llama3
 infer_backend: huggingface  # choices: [huggingface, vllm]
 trust_remote_code: true
--- a/examples/inference/llama3_lora_sft.yaml
+++ b/examples/inference/llama3_lora_sft.yaml
@@ -2,3 +2,4 @@ model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 adapter_name_or_path: saves/llama3-8b/lora/sft
 template: llama3
 infer_backend: huggingface  # choices: [huggingface, vllm]
 trust_remote_code: true
--- a/examples/inference/llama3_vllm.yaml
+++ b/examples/inference/llama3_vllm.yaml
@@ -2,3 +2,4 @@ model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 template: llama3
 infer_backend: vllm
 vllm_enforce_eager: true
 trust_remote_code: true
--- a/examples/inference/llava1_5.yaml
+++ b/examples/inference/llava1_5.yaml
@@ -1,3 +1,4 @@
 model_name_or_path: llava-hf/llava-1.5-7b-hf
 template: llava
 infer_backend: huggingface  # choices: [huggingface, vllm]
 trust_remote_code: true
--- a/examples/inference/qwen2_vl.yaml
+++ b/examples/inference/qwen2_vl.yaml
@@ -1,3 +1,4 @@
 model_name_or_path: Qwen/Qwen2-VL-7B-Instruct
 template: qwen2_vl
 infer_backend: huggingface  # choices: [huggingface, vllm]
 trust_remote_code: true
--- a/examples/merge_lora/llama3_gptq.yaml
+++ b/examples/merge_lora/llama3_gptq.yaml
@@ -1,6 +1,7 @@
 ### model
 model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 template: llama3
 trust_remote_code: true
 ### export
 export_dir: models/llama3_gptq
--- a/examples/merge_lora/llama3_lora_sft.yaml
+++ b/examples/merge_lora/llama3_lora_sft.yaml
@@ -5,6 +5,7 @@ model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 adapter_name_or_path: saves/llama3-8b/lora/sft
 template: llama3
 finetuning_type: lora
 trust_remote_code: true
 ### export
 export_dir: models/llama3_lora_sft
--- a/examples/merge_lora/qwen2vl_lora_sft.yaml
+++ b/examples/merge_lora/qwen2vl_lora_sft.yaml
@@ -5,6 +5,7 @@ model_name_or_path: Qwen/Qwen2-VL-7B-Instruct
 adapter_name_or_path: saves/qwen2_vl-7b/lora/sft
 template: qwen2_vl
 finetuning_type: lora
 trust_remote_code: true
 ### export
 export_dir: models/qwen2_vl_lora_sft
--- a/examples/train_full/llama3_full_sft.yaml
+++ b/examples/train_full/llama3_full_sft.yaml
@@ -1,5 +1,6 @@
 ### model
 model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 trust_remote_code: true
 ### method
 stage: sft
--- a/examples/train_full/qwen2vl_full_sft.yaml
+++ b/examples/train_full/qwen2vl_full_sft.yaml
@@ -1,5 +1,6 @@
 ### model
 model_name_or_path: Qwen/Qwen2-VL-7B-Instruct
 trust_remote_code: true
 ### method
 stage: sft
--- a/examples/train_lora/llama3_lora_dpo.yaml
+++ b/examples/train_lora/llama3_lora_dpo.yaml
@@ -1,5 +1,6 @@
 ### model
 model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 trust_remote_code: true
 ### method
 stage: dpo
--- a/examples/train_lora/llama3_lora_eval.yaml
+++ b/examples/train_lora/llama3_lora_eval.yaml
@@ -1,6 +1,7 @@
 ### model
 model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 adapter_name_or_path: saves/llama3-8b/lora/sft
 trust_remote_code: true
 ### method
 finetuning_type: lora
--- a/examples/train_lora/llama3_lora_kto.yaml
+++ b/examples/train_lora/llama3_lora_kto.yaml
@@ -1,5 +1,6 @@
 ### model
 model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 trust_remote_code: true
 ### method
 stage: kto
--- a/examples/train_lora/llama3_lora_ppo.yaml
+++ b/examples/train_lora/llama3_lora_ppo.yaml
@@ -1,6 +1,7 @@
 ### model
 model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 reward_model: saves/llama3-8b/lora/reward
 trust_remote_code: true
 ### method
 stage: ppo
--- a/examples/train_lora/llama3_lora_pretrain.yaml
+++ b/examples/train_lora/llama3_lora_pretrain.yaml
@@ -1,5 +1,6 @@
 ### model
 model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 trust_remote_code: true
 ### method
 stage: pt
--- a/examples/train_lora/llama3_lora_reward.yaml
+++ b/examples/train_lora/llama3_lora_reward.yaml
@@ -1,5 +1,6 @@
 ### model
 model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 trust_remote_code: true
 ### method
 stage: rm
--- a/examples/train_lora/llama3_lora_sft.yaml
+++ b/examples/train_lora/llama3_lora_sft.yaml
@@ -1,5 +1,6 @@
 ### model
 model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 trust_remote_code: true
 ### method
 stage: sft
--- a/examples/train_lora/llama3_lora_sft_ds3.yaml
+++ b/examples/train_lora/llama3_lora_sft_ds3.yaml
@@ -1,5 +1,6 @@
 ### model
 model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 trust_remote_code: true
 ### method
 stage: sft
--- a/examples/train_lora/llama3_preprocess.yaml
+++ b/examples/train_lora/llama3_preprocess.yaml
@@ -1,5 +1,6 @@
 ### model
 model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 trust_remote_code: true
 ### method
 stage: sft
--- a/examples/train_lora/llava1_5_lora_sft.yaml
+++ b/examples/train_lora/llava1_5_lora_sft.yaml
@@ -1,5 +1,6 @@
 ### model
 model_name_or_path: llava-hf/llava-1.5-7b-hf
 trust_remote_code: true
 ### method
 stage: sft
--- a/examples/train_lora/qwen2vl_lora_dpo.yaml
+++ b/examples/train_lora/qwen2vl_lora_dpo.yaml
@@ -1,5 +1,6 @@
 ### model
 model_name_or_path: Qwen/Qwen2-VL-7B-Instruct
 trust_remote_code: true
 ### method
 stage: dpo
--- a/examples/train_lora/qwen2vl_lora_sft.yaml
+++ b/examples/train_lora/qwen2vl_lora_sft.yaml
@@ -1,5 +1,6 @@
 ### model
 model_name_or_path: Qwen/Qwen2-VL-7B-Instruct
 trust_remote_code: true
 ### method
 stage: sft
--- a/examples/train_qlora/llama3_lora_sft_aqlm.yaml
+++ b/examples/train_qlora/llama3_lora_sft_aqlm.yaml
@@ -1,5 +1,6 @@
 ### model
 model_name_or_path: ISTA-DASLab/Meta-Llama-3-8B-Instruct-AQLM-2Bit-1x16
 trust_remote_code: true
 ### method
 stage: sft
--- a/examples/train_qlora/llama3_lora_sft_awq.yaml
+++ b/examples/train_qlora/llama3_lora_sft_awq.yaml
@@ -1,5 +1,6 @@
 ### model
 model_name_or_path: TechxGenus/Meta-Llama-3-8B-Instruct-AWQ
 trust_remote_code: true
 ### method
 stage: sft
--- a/examples/train_qlora/llama3_lora_sft_gptq.yaml
+++ b/examples/train_qlora/llama3_lora_sft_gptq.yaml
@@ -1,5 +1,6 @@
 ### model
 model_name_or_path: TechxGenus/Meta-Llama-3-8B-Instruct-GPTQ
 trust_remote_code: true
 ### method
 stage: sft
--- a/examples/train_qlora/llama3_lora_sft_otfq.yaml
+++ b/examples/train_qlora/llama3_lora_sft_otfq.yaml
@@ -2,6 +2,7 @@
 model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 quantization_bit: 4
 quantization_method: bitsandbytes  # choices: [bitsandbytes (4/8), hqq (2/3/4/5/6/8), eetq (8)]
 trust_remote_code: true
 ### method
 stage: sft
--- a/src/llamafactory/chat/vllm_engine.py
+++ b/src/llamafactory/chat/vllm_engine.py
@@ -72,7 +72,7 @@ class VllmEngine(BaseEngine):
        engine_args = {
            "model": model_args.model_name_or_path,
-            "trust_remote_code": True,
+            "trust_remote_code": model_args.trust_remote_code,
            "download_dir": model_args.cache_dir,
            "dtype": model_args.infer_dtype,
            "max_model_len": model_args.vllm_maxlen,
--- a/src/llamafactory/data/loader.py
+++ b/src/llamafactory/data/loader.py
@@ -129,7 +129,7 @@ def _load_single_dataset(
            token=model_args.hf_hub_token,
            streaming=data_args.streaming,
            num_proc=data_args.preprocessing_num_workers,
-            trust_remote_code=True,
+            trust_remote_code=model_args.trust_remote_code,
        )
    if dataset_attr.num_samples is not None and not data_args.streaming:
--- a/src/llamafactory/eval/evaluator.py
+++ b/src/llamafactory/eval/evaluator.py
@@ -100,7 +100,7 @@ class Evaluator:
                cache_dir=self.model_args.cache_dir,
                download_mode=self.eval_args.download_mode,
                token=self.model_args.hf_hub_token,
-                trust_remote_code=True,
+                trust_remote_code=self.model_args.trust_remote_code,
            )
            pbar.set_postfix_str(categorys[subject]["name"])
            inputs, outputs, labels = [], [], []
--- a/src/llamafactory/hparams/model_args.py
+++ b/src/llamafactory/hparams/model_args.py
@@ -285,6 +285,10 @@ class ModelArguments(QuantizationArguments, ProcessorArguments, ExportArguments,
        default=False,
        metadata={"help": "For debugging purposes, print the status of the parameters in the model."},
    )
    trust_remote_code: bool = field(
        default=False,
        metadata={"help": "Whether to trust the execution of code from datasets/models defined on the Hub or not."},
    )
    compute_dtype: Optional[torch.dtype] = field(
        default=None,
        init=False,
--- a/src/llamafactory/model/loader.py
+++ b/src/llamafactory/model/loader.py
@@ -52,7 +52,7 @@ def _get_init_kwargs(model_args: "ModelArguments") -> Dict[str, Any]:
    skip_check_imports()
    model_args.model_name_or_path = try_download_model_from_other_hub(model_args)
    return {
-        "trust_remote_code": True,
+        "trust_remote_code": model_args.trust_remote_code,
        "cache_dir": model_args.cache_dir,
        "revision": model_args.model_revision,
        "token": model_args.hf_hub_token,
@@ -155,7 +155,7 @@ def load_model(
                load_class = AutoModelForCausalLM
            if model_args.train_from_scratch:
-                model = load_class.from_config(config, trust_remote_code=True)
+                model = load_class.from_config(config, trust_remote_code=model_args.trust_remote_code)
            else:
                model = load_class.from_pretrained(**init_kwargs)
--- a/src/llamafactory/model/model_utils/unsloth.py
+++ b/src/llamafactory/model/model_utils/unsloth.py
@@ -39,7 +39,7 @@ def _get_unsloth_kwargs(
        "device_map": {"": get_current_device()},
        "rope_scaling": getattr(config, "rope_scaling", None),
        "fix_tokenizer": False,
-        "trust_remote_code": True,
+        "trust_remote_code": model_args.trust_remote_code,
        "use_gradient_checkpointing": "unsloth",
    }
--- a/src/llamafactory/webui/runner.py
+++ b/src/llamafactory/webui/runner.py
@@ -152,6 +152,7 @@ class Runner:
            bf16=(get("train.compute_type") == "bf16"),
            pure_bf16=(get("train.compute_type") == "pure_bf16"),
            plot_loss=True,
            trust_remote_code=True,
            ddp_timeout=180000000,
            include_num_input_tokens_seen=False if is_transformers_version_equal_to_4_46() else True,  # FIXME
        )
@@ -268,6 +269,7 @@ class Runner:
            top_p=get("eval.top_p"),
            temperature=get("eval.temperature"),
            output_dir=get_save_dir(model_name, finetuning_type, get("eval.output_dir")),
            trust_remote_code=True,
        )
        if get("eval.predict"):