mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-08-04 04:32:50 +08:00
parent
33dc25e24f
commit
ae1048db6d
@ -217,7 +217,11 @@ register_model_group(
|
|||||||
"Qwen-7B": "Qwen/Qwen-7B",
|
"Qwen-7B": "Qwen/Qwen-7B",
|
||||||
"Qwen-14B": "Qwen/Qwen-14B",
|
"Qwen-14B": "Qwen/Qwen-14B",
|
||||||
"Qwen-7B-Chat": "Qwen/Qwen-7B-Chat",
|
"Qwen-7B-Chat": "Qwen/Qwen-7B-Chat",
|
||||||
"Qwen-14B-Chat": "Qwen/Qwen-14B-Chat"
|
"Qwen-14B-Chat": "Qwen/Qwen-14B-Chat",
|
||||||
|
"Qwen-7B-int8-Chat": "Qwen/Qwen-7B-Chat-Int8",
|
||||||
|
"Qwen-7B-int4-Chat": "Qwen/Qwen-7B-Chat-Int4",
|
||||||
|
"Qwen-14B-int8-Chat": "Qwen/Qwen-14B-Chat-Int8",
|
||||||
|
"Qwen-14B-int4-Chat": "Qwen/Qwen-14B-Chat-Int4"
|
||||||
},
|
},
|
||||||
module="c_attn",
|
module="c_attn",
|
||||||
template="qwen"
|
template="qwen"
|
||||||
@ -266,8 +270,7 @@ register_model_group(
|
|||||||
"Yi-6B": "01-ai/Yi-6B",
|
"Yi-6B": "01-ai/Yi-6B",
|
||||||
"Yi-34B": "01-ai/Yi-34B",
|
"Yi-34B": "01-ai/Yi-34B",
|
||||||
"Yi-34B-Chat": "01-ai/Yi-34B-Chat",
|
"Yi-34B-Chat": "01-ai/Yi-34B-Chat",
|
||||||
"Yi-34B-int8-Chat": "01-ai/Yi-34B-Chat-8bits",
|
"Yi-34B-int8-Chat": "01-ai/Yi-34B-Chat-8bits"
|
||||||
"Yi-34B-int4-Chat": "01-ai/Yi-34B-Chat-4bits"
|
|
||||||
},
|
},
|
||||||
template="yi"
|
template="yi"
|
||||||
)
|
)
|
||||||
|
@ -179,7 +179,7 @@ class FinetuningArguments(FreezeArguments, LoraArguments, RLHFArguments):
|
|||||||
raise ValueError("Reward model is necessary for PPO training.")
|
raise ValueError("Reward model is necessary for PPO training.")
|
||||||
|
|
||||||
if self.stage == "ppo" and self.reward_model_type == "lora" and self.finetuning_type != "lora":
|
if self.stage == "ppo" and self.reward_model_type == "lora" and self.finetuning_type != "lora":
|
||||||
raise ValueError("Lora reward model only supports lora training.")
|
raise ValueError("Freeze/Full PPO training needs `reward_model_type=full`.")
|
||||||
|
|
||||||
def save_to_json(self, json_path: str):
|
def save_to_json(self, json_path: str):
|
||||||
r"""Saves the content of this instance in JSON format inside `json_path`."""
|
r"""Saves the content of this instance in JSON format inside `json_path`."""
|
||||||
|
@ -146,6 +146,9 @@ def load_model_and_tokenizer(
|
|||||||
|
|
||||||
# Quantization configurations (using bitsandbytes library)
|
# Quantization configurations (using bitsandbytes library)
|
||||||
if model_args.quantization_bit is not None:
|
if model_args.quantization_bit is not None:
|
||||||
|
if getattr(config, "quantization_config", None):
|
||||||
|
raise ValueError("Remove `quantization_bit` if you are using a quantized model.")
|
||||||
|
|
||||||
if is_deepspeed_zero3_enabled():
|
if is_deepspeed_zero3_enabled():
|
||||||
raise ValueError("DeepSpeed ZeRO-3 is incompatible with quantization.")
|
raise ValueError("DeepSpeed ZeRO-3 is incompatible with quantization.")
|
||||||
|
|
||||||
|
@ -38,8 +38,8 @@ def export_model(args: Optional[Dict[str, Any]] = None):
|
|||||||
model_args, _, finetuning_args, _ = get_infer_args(args)
|
model_args, _, finetuning_args, _ = get_infer_args(args)
|
||||||
model, tokenizer = load_model_and_tokenizer(model_args, finetuning_args)
|
model, tokenizer = load_model_and_tokenizer(model_args, finetuning_args)
|
||||||
|
|
||||||
if getattr(model, "quantization_method", None) == "gptq":
|
if getattr(model, "quantization_method", None) in ["gptq", "awq"]:
|
||||||
raise ValueError("Cannot export a GPTQ quantized model.")
|
raise ValueError("Cannot export a GPTQ or AWQ quantized model.")
|
||||||
|
|
||||||
model.config.use_cache = True
|
model.config.use_cache = True
|
||||||
model.save_pretrained(finetuning_args.export_dir, max_shard_size="{}GB".format(finetuning_args.export_size))
|
model.save_pretrained(finetuning_args.export_dir, max_shard_size="{}GB".format(finetuning_args.export_size))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user