[model] Update ernie_vl to adapt new version (#9665)

2026-03-09 21:25:59 +08:00 · 2025-12-26 19:57:49 +08:00
parent a882e2d5fc
commit 3c17f2722c
5 changed files with 24 additions and 16 deletions
--- a/src/llamafactory/model/loader.py
+++ b/src/llamafactory/model/loader.py
@@ -205,10 +205,6 @@ def load_model(

    if not is_trainable:
        model.requires_grad_(False)
-        for param in model.parameters():
-            if param.data.dtype == torch.float32 and model_args.compute_dtype != torch.float32:
-                param.data = param.data.to(model_args.compute_dtype)
-
        model.eval()
    else:
        model.train()
--- a/src/llamafactory/model/patcher.py
+++ b/src/llamafactory/model/patcher.py
@@ -158,7 +158,7 @@ def patch_config(

    # do not cast data type of the model deepspeed zero3 without qlora
    if not (is_deepspeed_zero3_enabled() and model_args.quantization_bit is None):
-        init_kwargs["torch_dtype"] = model_args.compute_dtype
+        init_kwargs["torch_dtype"] = "auto"

        if init_kwargs["low_cpu_mem_usage"] and not is_fsdp_enabled():  # fsdp does not need device map
            if "device_map" not in init_kwargs and model_args.device_map: