diff --git a/src/llmtuner/data/template.py b/src/llmtuner/data/template.py index d4ec22d5..ee7c0da1 100644 --- a/src/llmtuner/data/template.py +++ b/src/llmtuner/data/template.py @@ -758,6 +758,26 @@ register_template( ) +register_template( + name="yuan", + prefix=[ + "{{system}}" + ], + prompt=[ + "{{query}}", + {"token": ""} + ], + system="", + sep=[ + "\n" + ], + stop_words=[ + "" + ], + replace_eos=True +) + + register_template( name="zephyr", prefix=[ diff --git a/src/llmtuner/hparams/model_args.py b/src/llmtuner/hparams/model_args.py index f4e55d43..3f415bee 100644 --- a/src/llmtuner/hparams/model_args.py +++ b/src/llmtuner/hparams/model_args.py @@ -102,6 +102,10 @@ class ModelArguments: default=1024, metadata={"help": "The maximum length of the model inputs used for quantization."} ) + export_lecacy_format: Optional[bool] = field( + default=False, + metadata={"help": "Whether or not to save the `.bin` files instead of `.safetensors`."} + ) def __post_init__(self): self.compute_dtype = None diff --git a/src/llmtuner/train/tuner.py b/src/llmtuner/train/tuner.py index 0af124a3..033c20f5 100644 --- a/src/llmtuner/train/tuner.py +++ b/src/llmtuner/train/tuner.py @@ -47,7 +47,11 @@ def export_model(args: Optional[Dict[str, Any]] = None): model.config.use_cache = True model = model.to("cpu") - model.save_pretrained(model_args.export_dir, max_shard_size="{}GB".format(model_args.export_size)) + model.save_pretrained( + save_directory=model_args.export_dir, + max_shard_size="{}GB".format(model_args.export_size), + safe_serialization=(not model_args.export_lecacy_format) + ) try: tokenizer.padding_side = "left" # restore padding side