mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-12-29 18:20:35 +08:00
[model] Update ernie_vl to adapt new version (#9665)
This commit is contained in:
@@ -205,10 +205,6 @@ def load_model(
|
||||
|
||||
if not is_trainable:
|
||||
model.requires_grad_(False)
|
||||
for param in model.parameters():
|
||||
if param.data.dtype == torch.float32 and model_args.compute_dtype != torch.float32:
|
||||
param.data = param.data.to(model_args.compute_dtype)
|
||||
|
||||
model.eval()
|
||||
else:
|
||||
model.train()
|
||||
|
||||
@@ -158,7 +158,7 @@ def patch_config(
|
||||
|
||||
# do not cast data type of the model deepspeed zero3 without qlora
|
||||
if not (is_deepspeed_zero3_enabled() and model_args.quantization_bit is None):
|
||||
init_kwargs["torch_dtype"] = model_args.compute_dtype
|
||||
init_kwargs["torch_dtype"] = "auto"
|
||||
|
||||
if init_kwargs["low_cpu_mem_usage"] and not is_fsdp_enabled(): # fsdp does not need device map
|
||||
if "device_map" not in init_kwargs and model_args.device_map:
|
||||
|
||||
Reference in New Issue
Block a user