From 9f77e8b02501919f4c87ba269efb344b28b6a596 Mon Sep 17 00:00:00 2001 From: hiyouga Date: Sat, 16 Dec 2023 16:31:30 +0800 Subject: [PATCH] support autogptq in llama board #246 Former-commit-id: fea01226703d1534b5cf511bcb6a49e73bc86ce1 --- README.md | 2 +- README_zh.md | 2 +- src/llmtuner/extras/constants.py | 2 + src/llmtuner/extras/misc.py | 22 +-------- src/llmtuner/model/parser.py | 60 +++++++++++++++++-------- src/llmtuner/model/patcher.py | 5 +++ src/llmtuner/webui/common.py | 8 +++- src/llmtuner/webui/components/data.py | 7 ++- src/llmtuner/webui/components/export.py | 32 ++++++++++--- src/llmtuner/webui/components/top.py | 2 +- src/llmtuner/webui/components/train.py | 2 +- src/llmtuner/webui/locales.py | 44 +++++++++++++----- 12 files changed, 123 insertions(+), 65 deletions(-) diff --git a/README.md b/README.md index e8933e7a..15734e6e 100644 --- a/README.md +++ b/README.md @@ -482,7 +482,7 @@ python src/export_model.py \ > Merging LoRA weights into a quantized model is not supported. > [!TIP] -> Use `--export_quantization_bit 4` and `--export_quantization_dataset data/wiki_demo.txt` to quantize the model. +> Use `--export_quantization_bit 4` and `--export_quantization_dataset data/c4_demo.json` to quantize the model. ### API Demo diff --git a/README_zh.md b/README_zh.md index 5f593c8e..a2e7b744 100644 --- a/README_zh.md +++ b/README_zh.md @@ -482,7 +482,7 @@ python src/export_model.py \ > 尚不支持量化模型的 LoRA 权重合并及导出。 > [!TIP] -> 使用 `--export_quantization_bit 4` 和 `--export_quantization_dataset data/wiki_demo.txt` 量化导出模型。 +> 使用 `--export_quantization_bit 4` 和 `--export_quantization_dataset data/c4_demo.json` 量化导出模型。 ### API 服务 diff --git a/src/llmtuner/extras/constants.py b/src/llmtuner/extras/constants.py index 26084aa8..6eca14bf 100644 --- a/src/llmtuner/extras/constants.py +++ b/src/llmtuner/extras/constants.py @@ -26,6 +26,8 @@ LOG_FILE_NAME = "trainer_log.jsonl" METHODS = ["full", "freeze", "lora"] +PEFT_METHODS = ["lora"] + SUBJECTS = ["Average", "STEM", "Social Sciences", "Humanities", "Other"] SUPPORTED_MODELS = OrderedDict() diff --git a/src/llmtuner/extras/misc.py b/src/llmtuner/extras/misc.py index 9a50e369..968918e5 100644 --- a/src/llmtuner/extras/misc.py +++ b/src/llmtuner/extras/misc.py @@ -1,13 +1,9 @@ import gc import os -import sys import torch -from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple +from typing import TYPE_CHECKING, Tuple from transformers import InfNanRemoveLogitsProcessor, LogitsProcessorList -from llmtuner.extras.logging import get_logger -logger = get_logger(__name__) - try: from transformers.utils import ( is_torch_bf16_cpu_available, @@ -106,22 +102,6 @@ def infer_optim_dtype(model_dtype: torch.dtype) -> torch.dtype: return torch.float32 -def parse_args(parser: "HfArgumentParser", args: Optional[Dict[str, Any]] = None) -> Tuple[Any]: - if args is not None: - return parser.parse_dict(args) - elif len(sys.argv) == 2 and sys.argv[1].endswith(".yaml"): - return parser.parse_yaml_file(os.path.abspath(sys.argv[1])) - elif len(sys.argv) == 2 and sys.argv[1].endswith(".json"): - return parser.parse_json_file(os.path.abspath(sys.argv[1])) - else: - (*parsed_args, unknown_args) = parser.parse_args_into_dataclasses(return_remaining_strings=True) - if unknown_args: - logger.warning(parser.format_help()) - logger.error(f'\nGot unknown args, potentially deprecated arguments: {unknown_args}\n') - raise ValueError(f"Some specified arguments are not used by the HfArgumentParser: {unknown_args}") - return (*parsed_args,) - - def torch_gc() -> None: r""" Collects GPU memory. diff --git a/src/llmtuner/model/parser.py b/src/llmtuner/model/parser.py index 0873a44c..760ce88e 100644 --- a/src/llmtuner/model/parser.py +++ b/src/llmtuner/model/parser.py @@ -1,5 +1,7 @@ import os +import sys import torch +import logging import datasets import transformers from typing import Any, Dict, Optional, Tuple @@ -7,7 +9,6 @@ from transformers import HfArgumentParser, Seq2SeqTrainingArguments from transformers.trainer_utils import get_last_checkpoint from llmtuner.extras.logging import get_logger -from llmtuner.extras.misc import parse_args from llmtuner.hparams import ( ModelArguments, DataArguments, @@ -40,6 +41,33 @@ _EVAL_CLS = Tuple[ ] +def _parse_args(parser: "HfArgumentParser", args: Optional[Dict[str, Any]] = None) -> Tuple[Any]: + if args is not None: + return parser.parse_dict(args) + + if len(sys.argv) == 2 and sys.argv[1].endswith(".yaml"): + return parser.parse_yaml_file(os.path.abspath(sys.argv[1])) + + if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): + return parser.parse_json_file(os.path.abspath(sys.argv[1])) + + (*parsed_args, unknown_args) = parser.parse_args_into_dataclasses(return_remaining_strings=True) + + if unknown_args: + print(parser.format_help()) + print("Got unknown args, potentially deprecated arguments: {}".format(unknown_args)) + raise ValueError("Some specified arguments are not used by the HfArgumentParser: {}".format(unknown_args)) + + return (*parsed_args,) + + +def _set_transformers_logging(log_level: Optional[int] = logging.INFO) -> None: + datasets.utils.logging.set_verbosity(log_level) + transformers.utils.logging.set_verbosity(log_level) + transformers.utils.logging.enable_default_handler() + transformers.utils.logging.enable_explicit_format() + + def _verify_model_args(model_args: "ModelArguments", finetuning_args: "FinetuningArguments") -> None: if model_args.quantization_bit is not None: if finetuning_args.finetuning_type != "lora": @@ -56,34 +84,28 @@ def _verify_model_args(model_args: "ModelArguments", finetuning_args: "Finetunin raise ValueError("Quantized model only accepts a single adapter. Merge them first.") -def parse_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS: +def _parse_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS: parser = HfArgumentParser(_TRAIN_ARGS) - return parse_args(parser, args) + return _parse_args(parser, args) -def parse_infer_args(args: Optional[Dict[str, Any]] = None) -> _INFER_CLS: +def _parse_infer_args(args: Optional[Dict[str, Any]] = None) -> _INFER_CLS: parser = HfArgumentParser(_INFER_ARGS) - return parse_args(parser, args) + return _parse_args(parser, args) -def parse_eval_args(args: Optional[Dict[str, Any]] = None) -> _EVAL_CLS: +def _parse_eval_args(args: Optional[Dict[str, Any]] = None) -> _EVAL_CLS: parser = HfArgumentParser(_EVAL_ARGS) - return parse_args(parser, args) + return _parse_args(parser, args) def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS: - model_args, data_args, training_args, finetuning_args, generating_args = parse_train_args(args) + model_args, data_args, training_args, finetuning_args, generating_args = _parse_train_args(args) # Setup logging if training_args.should_log: - # The default of training_args.log_level is passive, so we set log level at info here to have that default. - transformers.utils.logging.set_verbosity_info() - - log_level = training_args.get_process_log_level() - datasets.utils.logging.set_verbosity(log_level) - transformers.utils.logging.set_verbosity(log_level) - transformers.utils.logging.enable_default_handler() - transformers.utils.logging.enable_explicit_format() + log_level = training_args.get_process_log_level() + _set_transformers_logging(log_level) # Check arguments data_args.init_for_training(training_args.seed) @@ -193,7 +215,8 @@ def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS: def get_infer_args(args: Optional[Dict[str, Any]] = None) -> _INFER_CLS: - model_args, data_args, finetuning_args, generating_args = parse_infer_args(args) + model_args, data_args, finetuning_args, generating_args = _parse_infer_args(args) + _set_transformers_logging() if data_args.template is None: raise ValueError("Please specify which `template` to use.") @@ -204,7 +227,8 @@ def get_infer_args(args: Optional[Dict[str, Any]] = None) -> _INFER_CLS: def get_eval_args(args: Optional[Dict[str, Any]] = None) -> _EVAL_CLS: - model_args, data_args, eval_args, finetuning_args = parse_eval_args(args) + model_args, data_args, eval_args, finetuning_args = _parse_eval_args(args) + _set_transformers_logging() if data_args.template is None: raise ValueError("Please specify which `template` to use.") diff --git a/src/llmtuner/model/patcher.py b/src/llmtuner/model/patcher.py index c7b916cb..b9c69b73 100644 --- a/src/llmtuner/model/patcher.py +++ b/src/llmtuner/model/patcher.py @@ -76,8 +76,13 @@ def configure_quantization( if finetuning_args.export_quantization_bit is not None: # gptq require_version("optimum>=1.16.0", "To fix: pip install optimum>=1.16.0") require_version("auto_gptq>=0.5.0", "To fix: pip install auto_gptq>=0.5.0") + + if getattr(config, "model_type", None) == "chatglm": + raise ValueError("ChatGLM model is not supported.") + config_kwargs["quantization_config"] = GPTQConfig( bits=finetuning_args.export_quantization_bit, + tokenizer=tokenizer, dataset=get_quantization_dataset(tokenizer, model_args, finetuning_args) ) config_kwargs["device_map"] = "auto" diff --git a/src/llmtuner/webui/common.py b/src/llmtuner/webui/common.py index 7edb391b..750cae73 100644 --- a/src/llmtuner/webui/common.py +++ b/src/llmtuner/webui/common.py @@ -7,6 +7,7 @@ from peft.utils import WEIGHTS_NAME, SAFETENSORS_WEIGHTS_NAME from llmtuner.extras.constants import ( DEFAULT_MODULE, DEFAULT_TEMPLATE, + PEFT_METHODS, SUPPORTED_MODELS, TRAINING_STAGES, DownloadSource @@ -77,8 +78,11 @@ def get_template(model_name: str) -> str: def list_adapters(model_name: str, finetuning_type: str) -> Dict[str, Any]: + if finetuning_type not in PEFT_METHODS: + return gr.update(value=[], choices=[], interactive=False) + adapters = [] - if model_name and finetuning_type == "lora": # full and freeze have no adapter + if model_name and finetuning_type == "lora": save_dir = get_save_dir(model_name, finetuning_type) if save_dir and os.path.isdir(save_dir): for adapter in os.listdir(save_dir): @@ -87,7 +91,7 @@ def list_adapters(model_name: str, finetuning_type: str) -> Dict[str, Any]: and any([os.path.isfile(os.path.join(save_dir, adapter, name)) for name in ADAPTER_NAMES]) ): adapters.append(adapter) - return gr.update(value=[], choices=adapters) + return gr.update(value=[], choices=adapters, interactive=True) def load_dataset_info(dataset_dir: str) -> Dict[str, Dict[str, Any]]: diff --git a/src/llmtuner/webui/components/data.py b/src/llmtuner/webui/components/data.py index effa39da..a74bd029 100644 --- a/src/llmtuner/webui/components/data.py +++ b/src/llmtuner/webui/components/data.py @@ -21,8 +21,11 @@ def next_page(page_index: int, total_num: int) -> int: def can_preview(dataset_dir: str, dataset: list) -> Dict[str, Any]: - with open(os.path.join(dataset_dir, DATA_CONFIG), "r", encoding="utf-8") as f: - dataset_info = json.load(f) + try: + with open(os.path.join(dataset_dir, DATA_CONFIG), "r", encoding="utf-8") as f: + dataset_info = json.load(f) + except: + return gr.update(interactive=False) if ( len(dataset) > 0 diff --git a/src/llmtuner/webui/components/export.py b/src/llmtuner/webui/components/export.py index 59292456..a4b65591 100644 --- a/src/llmtuner/webui/components/export.py +++ b/src/llmtuner/webui/components/export.py @@ -10,6 +10,9 @@ if TYPE_CHECKING: from llmtuner.webui.engine import Engine +GPTQ_BITS = ["8", "4", "3", "2"] + + def save_model( lang: str, model_name: str, @@ -18,6 +21,8 @@ def save_model( finetuning_type: str, template: str, max_shard_size: int, + export_quantization_bit: int, + export_quantization_dataset: str, export_dir: str ) -> Generator[str, None, None]: error = "" @@ -25,23 +30,32 @@ def save_model( error = ALERTS["err_no_model"][lang] elif not model_path: error = ALERTS["err_no_path"][lang] - elif not adapter_path: - error = ALERTS["err_no_adapter"][lang] elif not export_dir: error = ALERTS["err_no_export_dir"][lang] + elif export_quantization_bit in GPTQ_BITS and not export_quantization_dataset: + error = ALERTS["err_no_dataset"][lang] + elif export_quantization_bit not in GPTQ_BITS and not adapter_path: + error = ALERTS["err_no_adapter"][lang] if error: gr.Warning(error) yield error return + if adapter_path: + adapter_name_or_path = ",".join([get_save_dir(model_name, finetuning_type, adapter) for adapter in adapter_path]) + else: + adapter_name_or_path = None + args = dict( model_name_or_path=model_path, - adapter_name_or_path=",".join([get_save_dir(model_name, finetuning_type, adapter) for adapter in adapter_path]), + adapter_name_or_path=adapter_name_or_path, finetuning_type=finetuning_type, template=template, export_dir=export_dir, - export_size=max_shard_size + export_size=max_shard_size, + export_quantization_bit=int(export_quantization_bit) if export_quantization_bit in GPTQ_BITS else None, + export_quantization_dataset=export_quantization_dataset ) yield ALERTS["info_exporting"][lang] @@ -51,9 +65,11 @@ def save_model( def create_export_tab(engine: "Engine") -> Dict[str, "Component"]: with gr.Row(): - export_dir = gr.Textbox() max_shard_size = gr.Slider(value=1, minimum=1, maximum=100) + export_quantization_bit = gr.Dropdown(choices=["none", "8", "4", "3", "2"], value="none") + export_quantization_dataset = gr.Textbox(value="data/c4_demo.json") + export_dir = gr.Textbox() export_btn = gr.Button() info_box = gr.Textbox(show_label=False, interactive=False) @@ -67,14 +83,18 @@ def create_export_tab(engine: "Engine") -> Dict[str, "Component"]: engine.manager.get_elem_by_name("top.finetuning_type"), engine.manager.get_elem_by_name("top.template"), max_shard_size, + export_quantization_bit, + export_quantization_dataset, export_dir ], [info_box] ) return dict( - export_dir=export_dir, max_shard_size=max_shard_size, + export_quantization_bit=export_quantization_bit, + export_quantization_dataset=export_quantization_dataset, + export_dir=export_dir, export_btn=export_btn, info_box=info_box ) diff --git a/src/llmtuner/webui/components/top.py b/src/llmtuner/webui/components/top.py index 4c6e0e57..df78f2f5 100644 --- a/src/llmtuner/webui/components/top.py +++ b/src/llmtuner/webui/components/top.py @@ -20,7 +20,7 @@ def create_top() -> Dict[str, "Component"]: with gr.Row(): finetuning_type = gr.Dropdown(choices=METHODS, value="lora", scale=1) - adapter_path = gr.Dropdown(multiselect=True, scale=5) + adapter_path = gr.Dropdown(multiselect=True, scale=5, allow_custom_value=True) refresh_btn = gr.Button(scale=1) with gr.Accordion(label="Advanced config", open=False) as advanced_tab: diff --git a/src/llmtuner/webui/components/train.py b/src/llmtuner/webui/components/train.py index a6c94c16..45ebffd2 100644 --- a/src/llmtuner/webui/components/train.py +++ b/src/llmtuner/webui/components/train.py @@ -94,7 +94,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]: with gr.Accordion(label="RLHF config", open=False) as rlhf_tab: with gr.Row(): dpo_beta = gr.Slider(value=0.1, minimum=0, maximum=1, step=0.01, scale=1) - reward_model = gr.Dropdown(scale=3) + reward_model = gr.Dropdown(scale=3, allow_custom_value=True) refresh_btn = gr.Button(scale=1) refresh_btn.click( diff --git a/src/llmtuner/webui/locales.py b/src/llmtuner/webui/locales.py index a0e5324b..888ed4a9 100644 --- a/src/llmtuner/webui/locales.py +++ b/src/llmtuner/webui/locales.py @@ -432,11 +432,11 @@ LOCALES = { "reward_model": { "en": { "label": "Reward model", - "info": "Checkpoint of the reward model for PPO training. (Needs to refresh checkpoints)" + "info": "Adapter of the reward model for PPO training. (Needs to refresh adapters)" }, "zh": { "label": "奖励模型", - "info": "PPO 训练中奖励模型的断点路径。(需要刷新断点)" + "info": "PPO 训练中奖励模型的适配器路径。(需要刷新适配器)" } }, "cmd_preview_btn": { @@ -585,6 +585,36 @@ LOCALES = { "label": "温度系数" } }, + "max_shard_size": { + "en": { + "label": "Max shard size (GB)", + "info": "The maximum size for a model file." + }, + "zh": { + "label": "最大分块大小(GB)", + "info": "单个模型文件的最大大小。" + } + }, + "export_quantization_bit": { + "en": { + "label": "Export quantization bit.", + "info": "Quantizing the exported model." + }, + "zh": { + "label": "导出量化等级", + "info": "量化导出模型。" + } + }, + "export_quantization_dataset": { + "en": { + "label": "Export quantization dataset.", + "info": "The calibration dataset used for quantization." + }, + "zh": { + "label": "导出量化数据集", + "info": "量化过程中使用的校准数据集。" + } + }, "export_dir": { "en": { "label": "Export dir", @@ -595,16 +625,6 @@ LOCALES = { "info": "保存导出模型的文件夹路径。" } }, - "max_shard_size": { - "en": { - "label": "Max shard size (GB)", - "info": "The maximum size for a model file." - }, - "zh": { - "label": "最大分块大小(GB)", - "info": "模型文件的最大大小。" - } - }, "export_btn": { "en": { "value": "Export"