support GPTQ tuning #729 #1481 #1545 , fix chatglm template #1453 #1480 #1569

Former-commit-id: 9ea9380145
This commit is contained in:
hiyouga
2023-11-20 22:52:11 +08:00
parent f06c4c8f7a
commit 4966bd7911
5 changed files with 43 additions and 4 deletions

View File

@@ -354,6 +354,35 @@ register_template(
"\n",
"{{system}}"
],
prompt=[
{"token": "<|user|>"},
"\n",
"{{query}}",
{"token": "<|assistant|>"},
"\n" # add an extra newline to avoid error in ChatGLM's process_response method
],
system=(
"You are ChatGLM3, a large language model trained by Zhipu.AI. "
"Follow the user's instructions carefully. Respond using markdown."
),
sep=[],
stop_words=[
"<|user|>",
"<|observation|>"
],
efficient_eos=True
)
register_template(
name="chatglm3_raw", # the raw template for tool tuning
prefix=[
{"token": "[gMASK]"},
{"token": "sop"},
{"token": "<|system|>"},
"\n",
"{{system}}"
],
prompt=[
{"token": "<|user|>"},
"\n",

View File

@@ -65,7 +65,12 @@ def init_adapter(
checkpoint_to_resume = None
if model_args.checkpoint_dir is not None:
if is_trainable and finetuning_args.resume_lora_training:
is_mergeable = True
if getattr(model, "quantization_method", None) == "gptq":
assert len(model_args.checkpoint_dir) == 1, "GPTQ quantized model only accepts a single checkpoint."
is_mergeable = False
if (is_trainable and finetuning_args.resume_lora_training) or (not is_mergeable):
checkpoints_to_merge, checkpoint_to_resume = model_args.checkpoint_dir[:-1], model_args.checkpoint_dir[-1]
else:
checkpoints_to_merge = model_args.checkpoint_dir

View File

@@ -37,8 +37,13 @@ def run_exp(args: Optional[Dict[str, Any]] = None, callbacks: Optional[List["Tra
def export_model(args: Optional[Dict[str, Any]] = None, max_shard_size: Optional[str] = "10GB"):
model_args, _, finetuning_args, _ = get_infer_args(args)
model, tokenizer = load_model_and_tokenizer(model_args, finetuning_args)
if getattr(model, "quantization_method", None) == "gptq":
raise ValueError("Cannot export a GPTQ quantized model.")
model.config.use_cache = True
model.save_pretrained(finetuning_args.export_dir, max_shard_size=max_shard_size)
try:
tokenizer.padding_side = "left" # restore padding side
tokenizer.init_kwargs["padding_side"] = "left"