From 05ef89cfccb85e56382c616dbbde1345c4c0ac2a Mon Sep 17 00:00:00 2001 From: BUAADreamer <1428195643@qq.com> Date: Wed, 8 May 2024 10:36:36 +0800 Subject: [PATCH 01/12] modify export model Former-commit-id: c7051edae4ce23f85daf204a2aaac134b1f29c3d --- src/llmtuner/train/tuner.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/llmtuner/train/tuner.py b/src/llmtuner/train/tuner.py index e1a997c1..6973a4e5 100644 --- a/src/llmtuner/train/tuner.py +++ b/src/llmtuner/train/tuner.py @@ -15,11 +15,9 @@ from .pt import run_pt from .rm import run_rm from .sft import run_sft - if TYPE_CHECKING: from transformers import TrainerCallback - logger = get_logger(__name__) @@ -52,7 +50,9 @@ def export_model(args: Optional[Dict[str, Any]] = None) -> None: if model_args.adapter_name_or_path is not None and model_args.export_quantization_bit is not None: raise ValueError("Please merge adapters before quantizing the model.") - tokenizer = load_tokenizer(model_args)["tokenizer"] + tokenizer_module = load_tokenizer(model_args)["tokenizer"] + tokenizer = tokenizer_module['tokenizer'] + processor = tokenizer_module['processor'] get_template_and_fix_tokenizer(tokenizer, data_args.template) model = load_model(tokenizer, model_args, finetuning_args) # must after fixing tokenizer to resize vocab @@ -88,3 +88,6 @@ def export_model(args: Optional[Dict[str, Any]] = None) -> None: tokenizer.push_to_hub(model_args.export_hub_model_id, token=model_args.hf_hub_token) except Exception: logger.warning("Cannot save tokenizer, please copy the files manually.") + + if model_args.visual_inputs: + processor.image_processor.save_pretrained(model_args.export_dir) From 172600d4326a444b854059500515958d78d09f95 Mon Sep 17 00:00:00 2001 From: BUAADreamer <1428195643@qq.com> Date: Wed, 8 May 2024 22:50:42 +0800 Subject: [PATCH 02/12] add mllm export Former-commit-id: ce4770d33f6761d3b1d60661efcb0be34a036154 --- src/llmtuner/train/tuner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llmtuner/train/tuner.py b/src/llmtuner/train/tuner.py index 6973a4e5..00349e09 100644 --- a/src/llmtuner/train/tuner.py +++ b/src/llmtuner/train/tuner.py @@ -50,7 +50,7 @@ def export_model(args: Optional[Dict[str, Any]] = None) -> None: if model_args.adapter_name_or_path is not None and model_args.export_quantization_bit is not None: raise ValueError("Please merge adapters before quantizing the model.") - tokenizer_module = load_tokenizer(model_args)["tokenizer"] + tokenizer_module = load_tokenizer(model_args) tokenizer = tokenizer_module['tokenizer'] processor = tokenizer_module['processor'] get_template_and_fix_tokenizer(tokenizer, data_args.template) From e508519e0a7a1abad11857f7bfa2c3b5c33c5d08 Mon Sep 17 00:00:00 2001 From: BUAADreamer <1428195643@qq.com> Date: Thu, 9 May 2024 13:53:39 +0800 Subject: [PATCH 03/12] add mllm processor save and Chinese-LLaVA-Med show Former-commit-id: 110c49fbf79fe0625f091e63746bfabde00add99 --- README.md | 1 + README_zh.md | 2 ++ 2 files changed, 3 insertions(+) diff --git a/README.md b/README.md index 798b7bd4..e71ee552 100644 --- a/README.md +++ b/README.md @@ -467,6 +467,7 @@ If you have a project that should be incorporated, please contact via email or c 1. **[CareGPT](https://github.com/WangRongsheng/CareGPT)**: A series of large language models for Chinese medical domain, based on LLaMA2-7B and Baichuan-13B. 1. **[MachineMindset](https://github.com/PKU-YuanGroup/Machine-Mindset/)**: A series of MBTI Personality large language models, capable of giving any LLM 16 different personality types based on different datasets and training methods. 1. **[Luminia-13B-v3](https://huggingface.co/Nekochu/Luminia-13B-v3)**: A large language model specialized in generate metadata for stable diffusion. [[🤗Demo]](https://huggingface.co/spaces/Nekochu/Luminia-13B_SD_Prompt) +1. **[Chinese-LLaVA-Med](https://github.com/BUAADreamer/Chinese-LLaVA-Med)**: A multimodal large language model specialized in Chinese medical domain, based on LLaVA-1.5-7B. diff --git a/README_zh.md b/README_zh.md index 2c5b1aa1..7c0497c2 100644 --- a/README_zh.md +++ b/README_zh.md @@ -467,6 +467,8 @@ export USE_MODELSCOPE_HUB=1 # Windows 使用 `set USE_MODELSCOPE_HUB=1` 1. **[CareGPT](https://github.com/WangRongsheng/CareGPT)**: 医疗大模型项目 CareGPT,基于 LLaMA2-7B 和 Baichuan-13B 在中文医疗数据上微调而得。 1. **[MachineMindset](https://github.com/PKU-YuanGroup/Machine-Mindset/)**:MBTI性格大模型项目,根据数据集与训练方式让任意 LLM 拥有 16 个不同的性格类型。 1. **[Luminia-13B-v3](https://huggingface.co/Nekochu/Luminia-13B-v3)**:一个用于生成 Stable Diffusion 提示词的大型语言模型。[[🤗Demo]](https://huggingface.co/spaces/Nekochu/Luminia-13B_SD_Prompt) +1. **[Chinese-LLaVA-Med](https://github.com/BUAADreamer/Chinese-LLaVA-Med)**: 中文多模态医学大模型,基于 LLaVA-1.5-7B 在中文多模态医疗数据上微调而得 + From 827a929f1d1d4cb037f756ff9cfe60e3353c5edd Mon Sep 17 00:00:00 2001 From: BUAADreamer <1428195643@qq.com> Date: Thu, 9 May 2024 14:05:19 +0800 Subject: [PATCH 04/12] add push processor to hub Former-commit-id: 7a05a965311edfdfafa57af8342875860d341f27 --- src/llmtuner/train/tuner.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/llmtuner/train/tuner.py b/src/llmtuner/train/tuner.py index 00349e09..11509c20 100644 --- a/src/llmtuner/train/tuner.py +++ b/src/llmtuner/train/tuner.py @@ -91,3 +91,5 @@ def export_model(args: Optional[Dict[str, Any]] = None) -> None: if model_args.visual_inputs: processor.image_processor.save_pretrained(model_args.export_dir) + if model_args.export_hub_model_id is not None: + processor.image_processor.push_to_hub(model_args.export_hub_model_id, token=model_args.hf_hub_token) \ No newline at end of file From 1a78b675beb4a77643631f9e8e6874368da013bc Mon Sep 17 00:00:00 2001 From: BUAADreamer <1428195643@qq.com> Date: Sat, 11 May 2024 13:11:00 +0800 Subject: [PATCH 05/12] add full parameter finetuning of mllm Former-commit-id: f90c1da5636ac3cb8112c5081a3b56b09a17fcf8 --- src/llmtuner/hparams/model_args.py | 4 ++++ src/llmtuner/model/loader.py | 3 ++- src/llmtuner/model/patcher.py | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/llmtuner/hparams/model_args.py b/src/llmtuner/hparams/model_args.py index ac70bb3c..996eabae 100644 --- a/src/llmtuner/hparams/model_args.py +++ b/src/llmtuner/hparams/model_args.py @@ -85,6 +85,10 @@ class ModelArguments: default=False, metadata={"help": "Whethor or not to use multimodal LLM that accepts visual inputs."}, ) + autocast_projector: bool = field( + default=True, + metadata={"help": "Whethor or not to autocast projector."}, + ) moe_aux_loss_coef: Optional[float] = field( default=None, metadata={"help": "Coefficient of the auxiliary router loss in mixture-of-experts model."}, diff --git a/src/llmtuner/model/loader.py b/src/llmtuner/model/loader.py index ead6178f..1dca84a1 100644 --- a/src/llmtuner/model/loader.py +++ b/src/llmtuner/model/loader.py @@ -155,7 +155,8 @@ def load_model( model.eval() else: model.train() - + if model_args.visual_inputs: + model.vision_tower.requires_grad_(False) trainable_params, all_param = count_parameters(model) if is_trainable: param_stats = "trainable params: {:d} || all params: {:d} || trainable%: {:.4f}".format( diff --git a/src/llmtuner/model/patcher.py b/src/llmtuner/model/patcher.py index 31cba492..6ca6f2e5 100644 --- a/src/llmtuner/model/patcher.py +++ b/src/llmtuner/model/patcher.py @@ -101,7 +101,7 @@ def patch_model( if model_args.resize_vocab: resize_embedding_layer(model, tokenizer) - if model_args.visual_inputs: + if model_args.visual_inputs and model_args.autocast_projector: autocast_projector_dtype(model, model_args) if is_trainable: From b530a798c14bec4911c2ecf84be5163eeefc0426 Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Sat, 11 May 2024 22:43:04 +0800 Subject: [PATCH 06/12] Update README.md Former-commit-id: d24c83bb30e2829ba78db90c4c4975788f2eed25 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5880dbea..90c66caf 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ [![GitHub last commit](https://img.shields.io/github/last-commit/hiyouga/LLaMA-Factory)](https://github.com/hiyouga/LLaMA-Factory/commits/main) [![PyPI](https://img.shields.io/pypi/v/llmtuner)](https://pypi.org/project/llmtuner/) [![Downloads](https://static.pepy.tech/badge/llmtuner)](https://pypi.org/project/llmtuner/) -[![Citation](https://img.shields.io/badge/citation-43-green)](#projects-using-llama-factory) +[![Citation](https://img.shields.io/badge/citation-44-green)](#projects-using-llama-factory) [![GitHub pull request](https://img.shields.io/badge/PRs-welcome-blue)](https://github.com/hiyouga/LLaMA-Factory/pulls) [![Discord](https://dcbadge.vercel.app/api/server/rKfvV9r9FK?compact=true&style=flat)](https://discord.gg/rKfvV9r9FK) [![Twitter](https://img.shields.io/twitter/follow/llamafactory_ai)](https://twitter.com/llamafactory_ai) From e2cfcb0a5f71ee86a640a6cdd026307dda4415b5 Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Sat, 11 May 2024 22:44:51 +0800 Subject: [PATCH 07/12] Update README_zh.md Former-commit-id: 1a205478403b5852fac0aa8418cdb8995fbe40e3 --- README_zh.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README_zh.md b/README_zh.md index 330a012e..0aba9043 100644 --- a/README_zh.md +++ b/README_zh.md @@ -5,7 +5,7 @@ [![GitHub last commit](https://img.shields.io/github/last-commit/hiyouga/LLaMA-Factory)](https://github.com/hiyouga/LLaMA-Factory/commits/main) [![PyPI](https://img.shields.io/pypi/v/llmtuner)](https://pypi.org/project/llmtuner/) [![Downloads](https://static.pepy.tech/badge/llmtuner)](https://pypi.org/project/llmtuner/) -[![Citation](https://img.shields.io/badge/citation-43-green)](#使用了-llama-factory-的项目) +[![Citation](https://img.shields.io/badge/citation-44-green)](#使用了-llama-factory-的项目) [![GitHub pull request](https://img.shields.io/badge/PRs-welcome-blue)](https://github.com/hiyouga/LLaMA-Factory/pulls) [![Discord](https://dcbadge.vercel.app/api/server/rKfvV9r9FK?compact=true&style=flat)](https://discord.gg/rKfvV9r9FK) [![Twitter](https://img.shields.io/twitter/follow/llamafactory_ai)](https://twitter.com/llamafactory_ai) @@ -473,7 +473,7 @@ export USE_MODELSCOPE_HUB=1 # Windows 使用 `set USE_MODELSCOPE_HUB=1` 1. **[CareGPT](https://github.com/WangRongsheng/CareGPT)**: 医疗大模型项目 CareGPT,基于 LLaMA2-7B 和 Baichuan-13B 在中文医疗数据上微调而得。 1. **[MachineMindset](https://github.com/PKU-YuanGroup/Machine-Mindset/)**:MBTI性格大模型项目,根据数据集与训练方式让任意 LLM 拥有 16 个不同的性格类型。 1. **[Luminia-13B-v3](https://huggingface.co/Nekochu/Luminia-13B-v3)**:一个用于生成 Stable Diffusion 提示词的大型语言模型。[[🤗Demo]](https://huggingface.co/spaces/Nekochu/Luminia-13B_SD_Prompt) -1. **[Chinese-LLaVA-Med](https://github.com/BUAADreamer/Chinese-LLaVA-Med)**: 中文多模态医学大模型,基于 LLaVA-1.5-7B 在中文多模态医疗数据上微调而得 +1. **[Chinese-LLaVA-Med](https://github.com/BUAADreamer/Chinese-LLaVA-Med)**:中文多模态医学大模型,基于 LLaVA-1.5-7B 在中文多模态医疗数据上微调而得。 From 0ccc76392e22fbe8608182ec6752070043a7298e Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Sat, 11 May 2024 23:54:53 +0800 Subject: [PATCH 08/12] Update tuner.py Former-commit-id: 22afcbdb25160583e5ece28fad0585c7bc70f41a --- src/llmtuner/train/tuner.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/llmtuner/train/tuner.py b/src/llmtuner/train/tuner.py index 11509c20..cf44aa8c 100644 --- a/src/llmtuner/train/tuner.py +++ b/src/llmtuner/train/tuner.py @@ -15,9 +15,11 @@ from .pt import run_pt from .rm import run_rm from .sft import run_sft + if TYPE_CHECKING: from transformers import TrainerCallback + logger = get_logger(__name__) @@ -51,8 +53,8 @@ def export_model(args: Optional[Dict[str, Any]] = None) -> None: raise ValueError("Please merge adapters before quantizing the model.") tokenizer_module = load_tokenizer(model_args) - tokenizer = tokenizer_module['tokenizer'] - processor = tokenizer_module['processor'] + tokenizer = tokenizer_module["tokenizer"] + processor = tokenizer_module["processor"] get_template_and_fix_tokenizer(tokenizer, data_args.template) model = load_model(tokenizer, model_args, finetuning_args) # must after fixing tokenizer to resize vocab @@ -63,7 +65,7 @@ def export_model(args: Optional[Dict[str, Any]] = None) -> None: raise ValueError("The model is not a `PreTrainedModel`, export aborted.") if getattr(model, "quantization_method", None) is None: # cannot convert dtype of a quantized model - output_dtype = getattr(model.config, "torch_dtype", torch.float16) + output_dtype = torch.float16 setattr(model.config, "torch_dtype", output_dtype) model = model.to(output_dtype) @@ -86,10 +88,12 @@ def export_model(args: Optional[Dict[str, Any]] = None) -> None: tokenizer.save_pretrained(model_args.export_dir) if model_args.export_hub_model_id is not None: tokenizer.push_to_hub(model_args.export_hub_model_id, token=model_args.hf_hub_token) + + if model_args.visual_inputs and processor is not None: + getattr(processor, "image_processor").save_pretrained(model_args.export_dir) + if model_args.export_hub_model_id is not None: + getattr(processor, "image_processor").push_to_hub( + model_args.export_hub_model_id, token=model_args.hf_hub_token + ) except Exception: logger.warning("Cannot save tokenizer, please copy the files manually.") - - if model_args.visual_inputs: - processor.image_processor.save_pretrained(model_args.export_dir) - if model_args.export_hub_model_id is not None: - processor.image_processor.push_to_hub(model_args.export_hub_model_id, token=model_args.hf_hub_token) \ No newline at end of file From 382f0964756f5b8f6adc65641e9e0d303facf6fa Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Sat, 11 May 2024 23:55:59 +0800 Subject: [PATCH 09/12] Update tuner.py Former-commit-id: ccd1eb2c0992f75440c0e1c5cd3f02d03aacb085 --- src/llmtuner/train/tuner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llmtuner/train/tuner.py b/src/llmtuner/train/tuner.py index cf44aa8c..ffdc3e60 100644 --- a/src/llmtuner/train/tuner.py +++ b/src/llmtuner/train/tuner.py @@ -65,7 +65,7 @@ def export_model(args: Optional[Dict[str, Any]] = None) -> None: raise ValueError("The model is not a `PreTrainedModel`, export aborted.") if getattr(model, "quantization_method", None) is None: # cannot convert dtype of a quantized model - output_dtype = torch.float16 + output_dtype = getattr(model.config, "torch_dtype", torch.float16) setattr(model.config, "torch_dtype", output_dtype) model = model.to(output_dtype) From d54313fcf932eeaffaa151ca08fc9729be614740 Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Sat, 11 May 2024 23:56:40 +0800 Subject: [PATCH 10/12] Update patcher.py Former-commit-id: 2c88d394d29c6e98ac3a6860848855722614ca52 --- src/llmtuner/model/patcher.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llmtuner/model/patcher.py b/src/llmtuner/model/patcher.py index 6ca6f2e5..31cba492 100644 --- a/src/llmtuner/model/patcher.py +++ b/src/llmtuner/model/patcher.py @@ -101,7 +101,7 @@ def patch_model( if model_args.resize_vocab: resize_embedding_layer(model, tokenizer) - if model_args.visual_inputs and model_args.autocast_projector: + if model_args.visual_inputs: autocast_projector_dtype(model, model_args) if is_trainable: From 4efe56fd68852681c6f18d5b99c08da1b5bec435 Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Sat, 11 May 2024 23:57:05 +0800 Subject: [PATCH 11/12] Update model_args.py Former-commit-id: c4114add4c42c1d7723f7270451a6c9fc656ecd1 --- src/llmtuner/hparams/model_args.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/llmtuner/hparams/model_args.py b/src/llmtuner/hparams/model_args.py index 996eabae..ac70bb3c 100644 --- a/src/llmtuner/hparams/model_args.py +++ b/src/llmtuner/hparams/model_args.py @@ -85,10 +85,6 @@ class ModelArguments: default=False, metadata={"help": "Whethor or not to use multimodal LLM that accepts visual inputs."}, ) - autocast_projector: bool = field( - default=True, - metadata={"help": "Whethor or not to autocast projector."}, - ) moe_aux_loss_coef: Optional[float] = field( default=None, metadata={"help": "Coefficient of the auxiliary router loss in mixture-of-experts model."}, From db47c53486ea3f16da44838ebc790169fe2c90ba Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Sat, 11 May 2024 23:58:47 +0800 Subject: [PATCH 12/12] Update loader.py Former-commit-id: 2fc12790414677bb82736208fb9547640780af2e --- src/llmtuner/model/loader.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/llmtuner/model/loader.py b/src/llmtuner/model/loader.py index 1dca84a1..ead6178f 100644 --- a/src/llmtuner/model/loader.py +++ b/src/llmtuner/model/loader.py @@ -155,8 +155,7 @@ def load_model( model.eval() else: model.train() - if model_args.visual_inputs: - model.vision_tower.requires_grad_(False) + trainable_params, all_param = count_parameters(model) if is_trainable: param_stats = "trainable params: {:d} || all params: {:d} || trainable%: {:.4f}".format(