From 0a633f80985b0c492967fb3e24fb25865fea9e9f Mon Sep 17 00:00:00 2001 From: shing100 Date: Mon, 30 Sep 2024 09:17:41 +0900 Subject: [PATCH 1/9] add Exaone3.0 template Former-commit-id: 3a9569647f5dbb1dcd9ef6e5cfc39ec8f9b41e89 --- src/llamafactory/data/template.py | 12 ++++++++++++ src/llamafactory/extras/constants.py | 9 +++++++++ 2 files changed, 21 insertions(+) diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py index 7a10a0e3..08988c1c 100644 --- a/src/llamafactory/data/template.py +++ b/src/llamafactory/data/template.py @@ -644,6 +644,18 @@ _register_template( ) +_register_template( + name="exaone", + format_user=StringFormatter(slots=["[|user|]{{content}}\n[|assistant|]"]), + format_system=StringFormatter(slots=["[|system|]{{content}}[|endofturn|]\n"]), + format_assistant=StringFormatter(slots=["{{content}}[|endofturn|]\n"]), + format_separator=EmptyFormatter(slots=["\n"]), + format_prefix=EmptyFormatter(slots=[""]), + stop_words=["[|endofturn|]"], + efficient_eos=True, +) + + _register_template( name="falcon", format_user=StringFormatter(slots=["User: {{content}}\nFalcon:"]), diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py index 87436bc7..1bbb4458 100644 --- a/src/llamafactory/extras/constants.py +++ b/src/llamafactory/extras/constants.py @@ -471,6 +471,15 @@ register_model_group( template="deepseekcoder", ) +regiter_model_group( + models={ + "EXAONE-3.0-7.8B-Instruct": { + DownloadSource.DEFAULT: "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct", + }, + }, + template="exaone", +) + register_model_group( models={ From b5eb939ce363651249467d29c450fbe8b9ce0a25 Mon Sep 17 00:00:00 2001 From: shing100 Date: Mon, 30 Sep 2024 09:19:27 +0900 Subject: [PATCH 2/9] update docs Support model Exaone3.0 Former-commit-id: 826675ff14ddfd291e93c91373df9da43d1388a6 --- README.md | 1 + README_zh.md | 1 + 2 files changed, 2 insertions(+) diff --git a/README.md b/README.md index e62a9d47..38f252a8 100644 --- a/README.md +++ b/README.md @@ -169,6 +169,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ | [ChatGLM3](https://huggingface.co/THUDM) | 6B | chatglm3 | | [Command R](https://huggingface.co/CohereForAI) | 35B/104B | cohere | | [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek | +| [Exaone3.0](https://huggingface.co/LGAI-EXAONE) | 7.8B | exaone | | [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon | | [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma | | [GLM-4](https://huggingface.co/THUDM) | 9B | glm4 | diff --git a/README_zh.md b/README_zh.md index b5da9785..f4190c5a 100644 --- a/README_zh.md +++ b/README_zh.md @@ -170,6 +170,7 @@ https://github.com/user-attachments/assets/e6ce34b0-52d5-4f3e-a830-592106c4c272 | [ChatGLM3](https://huggingface.co/THUDM) | 6B | chatglm3 | | [Command R](https://huggingface.co/CohereForAI) | 35B/104B | cohere | | [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek | +| [Exaone3.0](https://huggingface.co/LGAI-EXAONE) | 7.8B | exaone | | [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon | | [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma | | [GLM-4](https://huggingface.co/THUDM) | 9B | glm4 | From 73b684c7b0f01186f55fd04dc6d9ed96dd62c165 Mon Sep 17 00:00:00 2001 From: "Geun, Lim" Date: Mon, 30 Sep 2024 09:24:44 +0900 Subject: [PATCH 3/9] Update README.md Former-commit-id: d06440e0e71f02f852a7eac5ea6961c8da2a80f4 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 38f252a8..083b0bb4 100644 --- a/README.md +++ b/README.md @@ -169,7 +169,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ | [ChatGLM3](https://huggingface.co/THUDM) | 6B | chatglm3 | | [Command R](https://huggingface.co/CohereForAI) | 35B/104B | cohere | | [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek | -| [Exaone3.0](https://huggingface.co/LGAI-EXAONE) | 7.8B | exaone | +| [Exaone3.0](https://huggingface.co/LGAI-EXAONE) | 7.8B | exaone | | [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon | | [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma | | [GLM-4](https://huggingface.co/THUDM) | 9B | glm4 | From 5f1209bee68c6fbeedd21e7f7760e0737462fcdc Mon Sep 17 00:00:00 2001 From: "Geun, Lim" Date: Mon, 30 Sep 2024 09:25:02 +0900 Subject: [PATCH 4/9] Update README_zh.md Former-commit-id: 0dfe9f763b09e00457700111e5d7d0d8b9a81e32 --- README_zh.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README_zh.md b/README_zh.md index f4190c5a..c3d1e6f8 100644 --- a/README_zh.md +++ b/README_zh.md @@ -170,7 +170,7 @@ https://github.com/user-attachments/assets/e6ce34b0-52d5-4f3e-a830-592106c4c272 | [ChatGLM3](https://huggingface.co/THUDM) | 6B | chatglm3 | | [Command R](https://huggingface.co/CohereForAI) | 35B/104B | cohere | | [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek | -| [Exaone3.0](https://huggingface.co/LGAI-EXAONE) | 7.8B | exaone | +| [Exaone3.0](https://huggingface.co/LGAI-EXAONE) | 7.8B | exaone | | [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon | | [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma | | [GLM-4](https://huggingface.co/THUDM) | 9B | glm4 | From b596102bd56c03b821d038852fa53acfbfa72bad Mon Sep 17 00:00:00 2001 From: shing100 Date: Mon, 30 Sep 2024 09:44:21 +0900 Subject: [PATCH 5/9] fix chat template Exaone3.0 Former-commit-id: 2964b20eb59687e24a901926c0da1547d7c046ca --- README.md | 1 - README_zh.md | 1 - src/llamafactory/data/template.py | 2 -- 3 files changed, 4 deletions(-) diff --git a/README.md b/README.md index 083b0bb4..e62a9d47 100644 --- a/README.md +++ b/README.md @@ -169,7 +169,6 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ | [ChatGLM3](https://huggingface.co/THUDM) | 6B | chatglm3 | | [Command R](https://huggingface.co/CohereForAI) | 35B/104B | cohere | | [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek | -| [Exaone3.0](https://huggingface.co/LGAI-EXAONE) | 7.8B | exaone | | [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon | | [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma | | [GLM-4](https://huggingface.co/THUDM) | 9B | glm4 | diff --git a/README_zh.md b/README_zh.md index c3d1e6f8..b5da9785 100644 --- a/README_zh.md +++ b/README_zh.md @@ -170,7 +170,6 @@ https://github.com/user-attachments/assets/e6ce34b0-52d5-4f3e-a830-592106c4c272 | [ChatGLM3](https://huggingface.co/THUDM) | 6B | chatglm3 | | [Command R](https://huggingface.co/CohereForAI) | 35B/104B | cohere | | [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek | -| [Exaone3.0](https://huggingface.co/LGAI-EXAONE) | 7.8B | exaone | | [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon | | [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma | | [GLM-4](https://huggingface.co/THUDM) | 9B | glm4 | diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py index 08988c1c..e7539019 100644 --- a/src/llamafactory/data/template.py +++ b/src/llamafactory/data/template.py @@ -648,10 +648,8 @@ _register_template( name="exaone", format_user=StringFormatter(slots=["[|user|]{{content}}\n[|assistant|]"]), format_system=StringFormatter(slots=["[|system|]{{content}}[|endofturn|]\n"]), - format_assistant=StringFormatter(slots=["{{content}}[|endofturn|]\n"]), format_separator=EmptyFormatter(slots=["\n"]), format_prefix=EmptyFormatter(slots=[""]), - stop_words=["[|endofturn|]"], efficient_eos=True, ) From 54e749d1cfa299f339fece4e4da7e436b3de1b0d Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Mon, 30 Sep 2024 16:39:48 +0800 Subject: [PATCH 6/9] Update template.py Former-commit-id: 02957fc9c3cd04ef5140f093642b94e74ce86b61 --- src/llamafactory/data/template.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py index e7539019..e832e3ad 100644 --- a/src/llamafactory/data/template.py +++ b/src/llamafactory/data/template.py @@ -649,8 +649,6 @@ _register_template( format_user=StringFormatter(slots=["[|user|]{{content}}\n[|assistant|]"]), format_system=StringFormatter(slots=["[|system|]{{content}}[|endofturn|]\n"]), format_separator=EmptyFormatter(slots=["\n"]), - format_prefix=EmptyFormatter(slots=[""]), - efficient_eos=True, ) From 85ed108fa63c20cf41903e9ca6ebf75648578634 Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Mon, 30 Sep 2024 16:47:52 +0800 Subject: [PATCH 7/9] Update constants.py Former-commit-id: 4d7bb692348717d89bd5c682890e98bce5118743 --- src/llamafactory/extras/constants.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py index 1bbb4458..7ed20c2b 100644 --- a/src/llamafactory/extras/constants.py +++ b/src/llamafactory/extras/constants.py @@ -471,7 +471,8 @@ register_model_group( template="deepseekcoder", ) -regiter_model_group( + +register_model_group( models={ "EXAONE-3.0-7.8B-Instruct": { DownloadSource.DEFAULT: "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct", From b664bcf307753ce7e4432fe87ada0c563d2f4108 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Mon, 30 Sep 2024 17:00:33 +0800 Subject: [PATCH 8/9] lint Former-commit-id: cee3dc484be4bdf31ffe1d8b9c60604b84ed6a00 --- tests/data/test_mm_plugin.py | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/tests/data/test_mm_plugin.py b/tests/data/test_mm_plugin.py index bd2eb45a..75541000 100644 --- a/tests/data/test_mm_plugin.py +++ b/tests/data/test_mm_plugin.py @@ -142,10 +142,7 @@ def test_llava_next_plugin(): check_inputs = {"plugin": llava_next_plugin, "tokenizer": tokenizer, "processor": processor} image_seqlen = 1176 check_inputs["expected_mm_messages"] = [ - { - key: value.replace("", "" * image_seqlen) - for key, value in message.items() - } + {key: value.replace("", "" * image_seqlen) for key, value in message.items()} for message in MM_MESSAGES ] check_inputs["expected_mm_inputs"] = _get_mm_inputs(processor) @@ -158,10 +155,7 @@ def test_llava_next_video_plugin(): check_inputs = {"plugin": llava_next_video_plugin, "tokenizer": tokenizer, "processor": processor} image_seqlen = 1176 check_inputs["expected_mm_messages"] = [ - { - key: value.replace("", "" * image_seqlen) - for key, value in message.items() - } + {key: value.replace("", "" * image_seqlen) for key, value in message.items()} for message in MM_MESSAGES ] check_inputs["expected_mm_inputs"] = _get_mm_inputs(processor) @@ -207,10 +201,7 @@ def test_video_llava_plugin(): check_inputs = {"plugin": video_llava_plugin, "tokenizer": tokenizer, "processor": processor} image_seqlen = 256 check_inputs["expected_mm_messages"] = [ - { - key: value.replace("", "" * image_seqlen) - for key, value in message.items() - } + {key: value.replace("", "" * image_seqlen) for key, value in message.items()} for message in MM_MESSAGES ] check_inputs["expected_mm_inputs"] = _get_mm_inputs(processor) From 3720618c63e82961fa15be4e3364fcd0897167ec Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Mon, 30 Sep 2024 17:07:43 +0800 Subject: [PATCH 9/9] add patch processor func Former-commit-id: 45841bb646afa9d0bc2ea4b6b7b107daa67d90f0 --- src/llamafactory/model/loader.py | 15 +++------------ src/llamafactory/model/patcher.py | 26 ++++++++++++++++++++++++-- 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/src/llamafactory/model/loader.py b/src/llamafactory/model/loader.py index c90913ae..9e47fb72 100644 --- a/src/llamafactory/model/loader.py +++ b/src/llamafactory/model/loader.py @@ -25,8 +25,7 @@ from .model_utils.misc import register_autoclass from .model_utils.mod import convert_pretrained_model_to_mod, load_mod_pretrained_model from .model_utils.unsloth import load_unsloth_pretrained_model from .model_utils.valuehead import load_valuehead_params -from .model_utils.visual import get_image_seqlen, get_patch_size, get_vision_feature_select_strategy -from .patcher import patch_config, patch_model, patch_tokenizer, patch_valuehead_model +from .patcher import patch_config, patch_model, patch_processor, patch_tokenizer, patch_valuehead_model if TYPE_CHECKING: @@ -61,7 +60,7 @@ def _get_init_kwargs(model_args: "ModelArguments") -> Dict[str, Any]: def load_tokenizer(model_args: "ModelArguments") -> "TokenizerModule": r""" - Loads pretrained tokenizer. + Loads pretrained tokenizer and optionally loads processor. Note: including inplace operation of model_args. """ @@ -94,17 +93,9 @@ def load_tokenizer(model_args: "ModelArguments") -> "TokenizerModule": logger.warning("New tokens have been added, changed `resize_vocab` to True.") patch_tokenizer(tokenizer) - try: processor = AutoProcessor.from_pretrained(model_args.model_name_or_path, **init_kwargs) - setattr(processor, "tokenizer", tokenizer) - setattr(processor, "image_seqlen", get_image_seqlen(config)) - setattr(processor, "image_resolution", model_args.image_resolution) - setattr(processor, "patch_size", get_patch_size(config)) - setattr(processor, "video_resolution", model_args.video_resolution) - setattr(processor, "video_fps", model_args.video_fps) - setattr(processor, "video_maxlen", model_args.video_maxlen) - setattr(processor, "vision_feature_select_strategy", get_vision_feature_select_strategy(config)) + patch_processor(processor, config, tokenizer, model_args) except Exception: processor = None diff --git a/src/llamafactory/model/patcher.py b/src/llamafactory/model/patcher.py index 3de82703..e4bb7ac1 100644 --- a/src/llamafactory/model/patcher.py +++ b/src/llamafactory/model/patcher.py @@ -34,11 +34,17 @@ from .model_utils.packing import configure_packing from .model_utils.quantization import configure_quantization from .model_utils.rope import configure_rope from .model_utils.valuehead import prepare_valuehead_model -from .model_utils.visual import autocast_projector_dtype, configure_visual_model +from .model_utils.visual import ( + autocast_projector_dtype, + configure_visual_model, + get_image_seqlen, + get_patch_size, + get_vision_feature_select_strategy, +) if TYPE_CHECKING: - from transformers import PretrainedConfig, PreTrainedTokenizer + from transformers import PretrainedConfig, PreTrainedTokenizer, ProcessorMixin from trl import AutoModelForCausalLMWithValueHead from ..hparams import ModelArguments @@ -52,6 +58,22 @@ def patch_tokenizer(tokenizer: "PreTrainedTokenizer") -> None: tokenizer._pad = MethodType(PreTrainedTokenizerBase._pad, tokenizer) +def patch_processor( + processor: "ProcessorMixin", + config: "PretrainedConfig", + tokenizer: "PreTrainedTokenizer", + model_args: "ModelArguments", +) -> None: + setattr(processor, "tokenizer", tokenizer) + setattr(processor, "image_seqlen", get_image_seqlen(config)) + setattr(processor, "image_resolution", model_args.image_resolution) + setattr(processor, "patch_size", get_patch_size(config)) + setattr(processor, "video_resolution", model_args.video_resolution) + setattr(processor, "video_fps", model_args.video_fps) + setattr(processor, "video_maxlen", model_args.video_maxlen) + setattr(processor, "vision_feature_select_strategy", get_vision_feature_select_strategy(config)) + + def patch_config( config: "PretrainedConfig", tokenizer: "PreTrainedTokenizer",