mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-08-06 21:52:50 +08:00
parent
1ae6f0a5f3
commit
c9d3cc181a
@ -277,6 +277,10 @@ def get_infer_args(args: Optional[Dict[str, Any]] = None) -> _INFER_CLS:
|
|||||||
|
|
||||||
_verify_model_args(model_args, finetuning_args)
|
_verify_model_args(model_args, finetuning_args)
|
||||||
|
|
||||||
|
if model_args.export_dir is not None:
|
||||||
|
model_args.device_map = {"": "cpu"}
|
||||||
|
model_args.compute_dtype = torch.float32
|
||||||
|
else:
|
||||||
model_args.device_map = "auto"
|
model_args.device_map = "auto"
|
||||||
|
|
||||||
return model_args, data_args, finetuning_args, generating_args
|
return model_args, data_args, finetuning_args, generating_args
|
||||||
|
@ -65,8 +65,7 @@ def export_model(args: Optional[Dict[str, Any]] = None):
|
|||||||
if getattr(model, "quantization_method", None) is None: # cannot convert dtype of a quantized model
|
if getattr(model, "quantization_method", None) is None: # cannot convert dtype of a quantized model
|
||||||
output_dtype = getattr(model.config, "torch_dtype", torch.float16)
|
output_dtype = getattr(model.config, "torch_dtype", torch.float16)
|
||||||
setattr(model.config, "torch_dtype", output_dtype)
|
setattr(model.config, "torch_dtype", output_dtype)
|
||||||
for param in model.parameters():
|
model = model.to(output_dtype)
|
||||||
param.data = param.data.to(output_dtype)
|
|
||||||
|
|
||||||
model.save_pretrained(
|
model.save_pretrained(
|
||||||
save_directory=model_args.export_dir,
|
save_directory=model_args.export_dir,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user