From 3534a75bcc3d04ad5343ac18d33a2ce43bcbf266 Mon Sep 17 00:00:00 2001 From: BUAADreamer <1428195643@qq.com> Date: Wed, 8 May 2024 10:36:36 +0800 Subject: [PATCH 01/12] modify export model Former-commit-id: 0ca1d1967d8aaf32e20bed06b2e8ae7fc5ab411e --- src/llmtuner/train/tuner.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/llmtuner/train/tuner.py b/src/llmtuner/train/tuner.py index e1a997c1..6973a4e5 100644 --- a/src/llmtuner/train/tuner.py +++ b/src/llmtuner/train/tuner.py @@ -15,11 +15,9 @@ from .pt import run_pt from .rm import run_rm from .sft import run_sft - if TYPE_CHECKING: from transformers import TrainerCallback - logger = get_logger(__name__) @@ -52,7 +50,9 @@ def export_model(args: Optional[Dict[str, Any]] = None) -> None: if model_args.adapter_name_or_path is not None and model_args.export_quantization_bit is not None: raise ValueError("Please merge adapters before quantizing the model.") - tokenizer = load_tokenizer(model_args)["tokenizer"] + tokenizer_module = load_tokenizer(model_args)["tokenizer"] + tokenizer = tokenizer_module['tokenizer'] + processor = tokenizer_module['processor'] get_template_and_fix_tokenizer(tokenizer, data_args.template) model = load_model(tokenizer, model_args, finetuning_args) # must after fixing tokenizer to resize vocab @@ -88,3 +88,6 @@ def export_model(args: Optional[Dict[str, Any]] = None) -> None: tokenizer.push_to_hub(model_args.export_hub_model_id, token=model_args.hf_hub_token) except Exception: logger.warning("Cannot save tokenizer, please copy the files manually.") + + if model_args.visual_inputs: + processor.image_processor.save_pretrained(model_args.export_dir) From f40b602c41435b686391624736c84049e9d9887e Mon Sep 17 00:00:00 2001 From: BUAADreamer <1428195643@qq.com> Date: Wed, 8 May 2024 22:50:42 +0800 Subject: [PATCH 02/12] add mllm export Former-commit-id: ef33856380c8fb137b8eefdcd6f7ec5b12c73b69 --- src/llmtuner/train/tuner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llmtuner/train/tuner.py b/src/llmtuner/train/tuner.py index 6973a4e5..00349e09 100644 --- a/src/llmtuner/train/tuner.py +++ b/src/llmtuner/train/tuner.py @@ -50,7 +50,7 @@ def export_model(args: Optional[Dict[str, Any]] = None) -> None: if model_args.adapter_name_or_path is not None and model_args.export_quantization_bit is not None: raise ValueError("Please merge adapters before quantizing the model.") - tokenizer_module = load_tokenizer(model_args)["tokenizer"] + tokenizer_module = load_tokenizer(model_args) tokenizer = tokenizer_module['tokenizer'] processor = tokenizer_module['processor'] get_template_and_fix_tokenizer(tokenizer, data_args.template) From 08de91434cf511d202deada515b18fbed78d4ce3 Mon Sep 17 00:00:00 2001 From: BUAADreamer <1428195643@qq.com> Date: Thu, 9 May 2024 13:53:39 +0800 Subject: [PATCH 03/12] add mllm processor save and Chinese-LLaVA-Med show Former-commit-id: fdb3955448cd4c8d5a7a2cbceac1e9e6fd1e34cc --- README.md | 1 + README_zh.md | 2 ++ 2 files changed, 3 insertions(+) diff --git a/README.md b/README.md index 798b7bd4..e71ee552 100644 --- a/README.md +++ b/README.md @@ -467,6 +467,7 @@ If you have a project that should be incorporated, please contact via email or c 1. **[CareGPT](https://github.com/WangRongsheng/CareGPT)**: A series of large language models for Chinese medical domain, based on LLaMA2-7B and Baichuan-13B. 1. **[MachineMindset](https://github.com/PKU-YuanGroup/Machine-Mindset/)**: A series of MBTI Personality large language models, capable of giving any LLM 16 different personality types based on different datasets and training methods. 1. **[Luminia-13B-v3](https://huggingface.co/Nekochu/Luminia-13B-v3)**: A large language model specialized in generate metadata for stable diffusion. [[🤗Demo]](https://huggingface.co/spaces/Nekochu/Luminia-13B_SD_Prompt) +1. **[Chinese-LLaVA-Med](https://github.com/BUAADreamer/Chinese-LLaVA-Med)**: A multimodal large language model specialized in Chinese medical domain, based on LLaVA-1.5-7B. diff --git a/README_zh.md b/README_zh.md index 2c5b1aa1..7c0497c2 100644 --- a/README_zh.md +++ b/README_zh.md @@ -467,6 +467,8 @@ export USE_MODELSCOPE_HUB=1 # Windows 使用 `set USE_MODELSCOPE_HUB=1` 1. **[CareGPT](https://github.com/WangRongsheng/CareGPT)**: 医疗大模型项目 CareGPT,基于 LLaMA2-7B 和 Baichuan-13B 在中文医疗数据上微调而得。 1. **[MachineMindset](https://github.com/PKU-YuanGroup/Machine-Mindset/)**:MBTI性格大模型项目,根据数据集与训练方式让任意 LLM 拥有 16 个不同的性格类型。 1. **[Luminia-13B-v3](https://huggingface.co/Nekochu/Luminia-13B-v3)**:一个用于生成 Stable Diffusion 提示词的大型语言模型。[[🤗Demo]](https://huggingface.co/spaces/Nekochu/Luminia-13B_SD_Prompt) +1. **[Chinese-LLaVA-Med](https://github.com/BUAADreamer/Chinese-LLaVA-Med)**: 中文多模态医学大模型,基于 LLaVA-1.5-7B 在中文多模态医疗数据上微调而得 + From a185cf7e189b983c3243212566ca102d60668835 Mon Sep 17 00:00:00 2001 From: BUAADreamer <1428195643@qq.com> Date: Thu, 9 May 2024 14:05:19 +0800 Subject: [PATCH 04/12] add push processor to hub Former-commit-id: 8b997e32fb19d60ab81c2c3437a3b40aa5bdeee3 --- src/llmtuner/train/tuner.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/llmtuner/train/tuner.py b/src/llmtuner/train/tuner.py index 00349e09..11509c20 100644 --- a/src/llmtuner/train/tuner.py +++ b/src/llmtuner/train/tuner.py @@ -91,3 +91,5 @@ def export_model(args: Optional[Dict[str, Any]] = None) -> None: if model_args.visual_inputs: processor.image_processor.save_pretrained(model_args.export_dir) + if model_args.export_hub_model_id is not None: + processor.image_processor.push_to_hub(model_args.export_hub_model_id, token=model_args.hf_hub_token) \ No newline at end of file From 743d0f22b74c3e069465f41fb319eeb556a5dc86 Mon Sep 17 00:00:00 2001 From: BUAADreamer <1428195643@qq.com> Date: Sat, 11 May 2024 13:11:00 +0800 Subject: [PATCH 05/12] add full parameter finetuning of mllm Former-commit-id: 7be7972f28f3e2a0f4138d10d1eb6f06da9d97fb --- src/llmtuner/hparams/model_args.py | 4 ++++ src/llmtuner/model/loader.py | 3 ++- src/llmtuner/model/patcher.py | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/llmtuner/hparams/model_args.py b/src/llmtuner/hparams/model_args.py index ac70bb3c..996eabae 100644 --- a/src/llmtuner/hparams/model_args.py +++ b/src/llmtuner/hparams/model_args.py @@ -85,6 +85,10 @@ class ModelArguments: default=False, metadata={"help": "Whethor or not to use multimodal LLM that accepts visual inputs."}, ) + autocast_projector: bool = field( + default=True, + metadata={"help": "Whethor or not to autocast projector."}, + ) moe_aux_loss_coef: Optional[float] = field( default=None, metadata={"help": "Coefficient of the auxiliary router loss in mixture-of-experts model."}, diff --git a/src/llmtuner/model/loader.py b/src/llmtuner/model/loader.py index ead6178f..1dca84a1 100644 --- a/src/llmtuner/model/loader.py +++ b/src/llmtuner/model/loader.py @@ -155,7 +155,8 @@ def load_model( model.eval() else: model.train() - + if model_args.visual_inputs: + model.vision_tower.requires_grad_(False) trainable_params, all_param = count_parameters(model) if is_trainable: param_stats = "trainable params: {:d} || all params: {:d} || trainable%: {:.4f}".format( diff --git a/src/llmtuner/model/patcher.py b/src/llmtuner/model/patcher.py index 31cba492..6ca6f2e5 100644 --- a/src/llmtuner/model/patcher.py +++ b/src/llmtuner/model/patcher.py @@ -101,7 +101,7 @@ def patch_model( if model_args.resize_vocab: resize_embedding_layer(model, tokenizer) - if model_args.visual_inputs: + if model_args.visual_inputs and model_args.autocast_projector: autocast_projector_dtype(model, model_args) if is_trainable: From fde0304e9fdb0b007c931f0a3233fda46253dea9 Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Sat, 11 May 2024 22:43:04 +0800 Subject: [PATCH 06/12] Update README.md Former-commit-id: b8d5d9c8ef2b76713caba96c91642f4f80fffd3d --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5880dbea..90c66caf 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ [![GitHub last commit](https://img.shields.io/github/last-commit/hiyouga/LLaMA-Factory)](https://github.com/hiyouga/LLaMA-Factory/commits/main) [![PyPI](https://img.shields.io/pypi/v/llmtuner)](https://pypi.org/project/llmtuner/) [![Downloads](https://static.pepy.tech/badge/llmtuner)](https://pypi.org/project/llmtuner/) -[![Citation](https://img.shields.io/badge/citation-43-green)](#projects-using-llama-factory) +[![Citation](https://img.shields.io/badge/citation-44-green)](#projects-using-llama-factory) [![GitHub pull request](https://img.shields.io/badge/PRs-welcome-blue)](https://github.com/hiyouga/LLaMA-Factory/pulls) [![Discord](https://dcbadge.vercel.app/api/server/rKfvV9r9FK?compact=true&style=flat)](https://discord.gg/rKfvV9r9FK) [![Twitter](https://img.shields.io/twitter/follow/llamafactory_ai)](https://twitter.com/llamafactory_ai) From 1dd674697c64af4e56687e01bbd59b99f07c8c77 Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Sat, 11 May 2024 22:44:51 +0800 Subject: [PATCH 07/12] Update README_zh.md Former-commit-id: 1049b29253f332f3f02dff912b8b1b420767de4e --- README_zh.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README_zh.md b/README_zh.md index 330a012e..0aba9043 100644 --- a/README_zh.md +++ b/README_zh.md @@ -5,7 +5,7 @@ [![GitHub last commit](https://img.shields.io/github/last-commit/hiyouga/LLaMA-Factory)](https://github.com/hiyouga/LLaMA-Factory/commits/main) [![PyPI](https://img.shields.io/pypi/v/llmtuner)](https://pypi.org/project/llmtuner/) [![Downloads](https://static.pepy.tech/badge/llmtuner)](https://pypi.org/project/llmtuner/) -[![Citation](https://img.shields.io/badge/citation-43-green)](#使用了-llama-factory-的项目) +[![Citation](https://img.shields.io/badge/citation-44-green)](#使用了-llama-factory-的项目) [![GitHub pull request](https://img.shields.io/badge/PRs-welcome-blue)](https://github.com/hiyouga/LLaMA-Factory/pulls) [![Discord](https://dcbadge.vercel.app/api/server/rKfvV9r9FK?compact=true&style=flat)](https://discord.gg/rKfvV9r9FK) [![Twitter](https://img.shields.io/twitter/follow/llamafactory_ai)](https://twitter.com/llamafactory_ai) @@ -473,7 +473,7 @@ export USE_MODELSCOPE_HUB=1 # Windows 使用 `set USE_MODELSCOPE_HUB=1` 1. **[CareGPT](https://github.com/WangRongsheng/CareGPT)**: 医疗大模型项目 CareGPT,基于 LLaMA2-7B 和 Baichuan-13B 在中文医疗数据上微调而得。 1. **[MachineMindset](https://github.com/PKU-YuanGroup/Machine-Mindset/)**:MBTI性格大模型项目,根据数据集与训练方式让任意 LLM 拥有 16 个不同的性格类型。 1. **[Luminia-13B-v3](https://huggingface.co/Nekochu/Luminia-13B-v3)**:一个用于生成 Stable Diffusion 提示词的大型语言模型。[[🤗Demo]](https://huggingface.co/spaces/Nekochu/Luminia-13B_SD_Prompt) -1. **[Chinese-LLaVA-Med](https://github.com/BUAADreamer/Chinese-LLaVA-Med)**: 中文多模态医学大模型,基于 LLaVA-1.5-7B 在中文多模态医疗数据上微调而得 +1. **[Chinese-LLaVA-Med](https://github.com/BUAADreamer/Chinese-LLaVA-Med)**:中文多模态医学大模型,基于 LLaVA-1.5-7B 在中文多模态医疗数据上微调而得。 From 17975fefd7158caf2ae0a1584f9f19c8c572b16d Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Sat, 11 May 2024 23:54:53 +0800 Subject: [PATCH 08/12] Update tuner.py Former-commit-id: 13851fb04524e3a599b6c07d749f7463b8f75319 --- src/llmtuner/train/tuner.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/llmtuner/train/tuner.py b/src/llmtuner/train/tuner.py index 11509c20..cf44aa8c 100644 --- a/src/llmtuner/train/tuner.py +++ b/src/llmtuner/train/tuner.py @@ -15,9 +15,11 @@ from .pt import run_pt from .rm import run_rm from .sft import run_sft + if TYPE_CHECKING: from transformers import TrainerCallback + logger = get_logger(__name__) @@ -51,8 +53,8 @@ def export_model(args: Optional[Dict[str, Any]] = None) -> None: raise ValueError("Please merge adapters before quantizing the model.") tokenizer_module = load_tokenizer(model_args) - tokenizer = tokenizer_module['tokenizer'] - processor = tokenizer_module['processor'] + tokenizer = tokenizer_module["tokenizer"] + processor = tokenizer_module["processor"] get_template_and_fix_tokenizer(tokenizer, data_args.template) model = load_model(tokenizer, model_args, finetuning_args) # must after fixing tokenizer to resize vocab @@ -63,7 +65,7 @@ def export_model(args: Optional[Dict[str, Any]] = None) -> None: raise ValueError("The model is not a `PreTrainedModel`, export aborted.") if getattr(model, "quantization_method", None) is None: # cannot convert dtype of a quantized model - output_dtype = getattr(model.config, "torch_dtype", torch.float16) + output_dtype = torch.float16 setattr(model.config, "torch_dtype", output_dtype) model = model.to(output_dtype) @@ -86,10 +88,12 @@ def export_model(args: Optional[Dict[str, Any]] = None) -> None: tokenizer.save_pretrained(model_args.export_dir) if model_args.export_hub_model_id is not None: tokenizer.push_to_hub(model_args.export_hub_model_id, token=model_args.hf_hub_token) + + if model_args.visual_inputs and processor is not None: + getattr(processor, "image_processor").save_pretrained(model_args.export_dir) + if model_args.export_hub_model_id is not None: + getattr(processor, "image_processor").push_to_hub( + model_args.export_hub_model_id, token=model_args.hf_hub_token + ) except Exception: logger.warning("Cannot save tokenizer, please copy the files manually.") - - if model_args.visual_inputs: - processor.image_processor.save_pretrained(model_args.export_dir) - if model_args.export_hub_model_id is not None: - processor.image_processor.push_to_hub(model_args.export_hub_model_id, token=model_args.hf_hub_token) \ No newline at end of file From 4781b63edb354f36ccfca5aadf0517f091b13394 Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Sat, 11 May 2024 23:55:59 +0800 Subject: [PATCH 09/12] Update tuner.py Former-commit-id: 5f72439a1d1f5cd9ade8bd30056c0dfb6059c1bd --- src/llmtuner/train/tuner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llmtuner/train/tuner.py b/src/llmtuner/train/tuner.py index cf44aa8c..ffdc3e60 100644 --- a/src/llmtuner/train/tuner.py +++ b/src/llmtuner/train/tuner.py @@ -65,7 +65,7 @@ def export_model(args: Optional[Dict[str, Any]] = None) -> None: raise ValueError("The model is not a `PreTrainedModel`, export aborted.") if getattr(model, "quantization_method", None) is None: # cannot convert dtype of a quantized model - output_dtype = torch.float16 + output_dtype = getattr(model.config, "torch_dtype", torch.float16) setattr(model.config, "torch_dtype", output_dtype) model = model.to(output_dtype) From fb14d57e971dba95f6e4781ff50a3814e76328bf Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Sat, 11 May 2024 23:56:40 +0800 Subject: [PATCH 10/12] Update patcher.py Former-commit-id: 708aa5e098b0be90c9dfe6a30b9912883e3bcc04 --- src/llmtuner/model/patcher.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llmtuner/model/patcher.py b/src/llmtuner/model/patcher.py index 6ca6f2e5..31cba492 100644 --- a/src/llmtuner/model/patcher.py +++ b/src/llmtuner/model/patcher.py @@ -101,7 +101,7 @@ def patch_model( if model_args.resize_vocab: resize_embedding_layer(model, tokenizer) - if model_args.visual_inputs and model_args.autocast_projector: + if model_args.visual_inputs: autocast_projector_dtype(model, model_args) if is_trainable: From 28d9d7274d9fa1c46674acca6ec377fd30aff8ef Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Sat, 11 May 2024 23:57:05 +0800 Subject: [PATCH 11/12] Update model_args.py Former-commit-id: bb2e6b0ea3d5ac62401f075a05feccca3afbc4b4 --- src/llmtuner/hparams/model_args.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/llmtuner/hparams/model_args.py b/src/llmtuner/hparams/model_args.py index 996eabae..ac70bb3c 100644 --- a/src/llmtuner/hparams/model_args.py +++ b/src/llmtuner/hparams/model_args.py @@ -85,10 +85,6 @@ class ModelArguments: default=False, metadata={"help": "Whethor or not to use multimodal LLM that accepts visual inputs."}, ) - autocast_projector: bool = field( - default=True, - metadata={"help": "Whethor or not to autocast projector."}, - ) moe_aux_loss_coef: Optional[float] = field( default=None, metadata={"help": "Coefficient of the auxiliary router loss in mixture-of-experts model."}, From 7e0ffc0a644d9c503c1c3023df3b4d29b521e2aa Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Sat, 11 May 2024 23:58:47 +0800 Subject: [PATCH 12/12] Update loader.py Former-commit-id: 5bfa8e4667534ca7737c5991f59c24ff0d6dc02f --- src/llmtuner/model/loader.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/llmtuner/model/loader.py b/src/llmtuner/model/loader.py index 1dca84a1..ead6178f 100644 --- a/src/llmtuner/model/loader.py +++ b/src/llmtuner/model/loader.py @@ -155,8 +155,7 @@ def load_model( model.eval() else: model.train() - if model_args.visual_inputs: - model.vision_tower.requires_grad_(False) + trainable_params, all_param = count_parameters(model) if is_trainable: param_stats = "trainable params: {:d} || all params: {:d} || trainable%: {:.4f}".format(