From 0a633f80985b0c492967fb3e24fb25865fea9e9f Mon Sep 17 00:00:00 2001
From: shing100 <shing100@naver.com>
Date: Mon, 30 Sep 2024 09:17:41 +0900
Subject: [PATCH 1/9] add Exaone3.0 template

Former-commit-id: 3a9569647f5dbb1dcd9ef6e5cfc39ec8f9b41e89
---
 src/llamafactory/data/template.py    | 12 ++++++++++++
 src/llamafactory/extras/constants.py |  9 +++++++++
 2 files changed, 21 insertions(+)

diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py
index 7a10a0e3..08988c1c 100644
--- a/src/llamafactory/data/template.py
+++ b/src/llamafactory/data/template.py
@@ -644,6 +644,18 @@ _register_template(
 )
 
 
+_register_template(
+    name="exaone",
+    format_user=StringFormatter(slots=["[|user|]{{content}}\n[|assistant|]"]),
+    format_system=StringFormatter(slots=["[|system|]{{content}}[|endofturn|]\n"]),
+    format_assistant=StringFormatter(slots=["{{content}}[|endofturn|]\n"]),
+    format_separator=EmptyFormatter(slots=["\n"]),
+    format_prefix=EmptyFormatter(slots=[""]),
+    stop_words=["[|endofturn|]"],
+    efficient_eos=True,
+)
+
+
 _register_template(
     name="falcon",
     format_user=StringFormatter(slots=["User: {{content}}\nFalcon:"]),
diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py
index 87436bc7..1bbb4458 100644
--- a/src/llamafactory/extras/constants.py
+++ b/src/llamafactory/extras/constants.py
@@ -471,6 +471,15 @@ register_model_group(
     template="deepseekcoder",
 )
 
+regiter_model_group(
+    models={
+        "EXAONE-3.0-7.8B-Instruct": {
+            DownloadSource.DEFAULT: "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct",
+        },
+    },
+    template="exaone",
+)
+
 
 register_model_group(
     models={

From b5eb939ce363651249467d29c450fbe8b9ce0a25 Mon Sep 17 00:00:00 2001
From: shing100 <shing100@naver.com>
Date: Mon, 30 Sep 2024 09:19:27 +0900
Subject: [PATCH 2/9] update docs Support model Exaone3.0

Former-commit-id: 826675ff14ddfd291e93c91373df9da43d1388a6
---
 README.md    | 1 +
 README_zh.md | 1 +
 2 files changed, 2 insertions(+)

diff --git a/README.md b/README.md
index e62a9d47..38f252a8 100644
--- a/README.md
+++ b/README.md
@@ -169,6 +169,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
 | [ChatGLM3](https://huggingface.co/THUDM)                          | 6B                               | chatglm3         |
 | [Command R](https://huggingface.co/CohereForAI)                   | 35B/104B                         | cohere           |
 | [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai)         | 7B/16B/67B/236B                  | deepseek         |
+| [Exaone3.0](https://huggingface.co/LGAI-EXAONE)                       | 7.8B                             | exaone    |
 | [Falcon](https://huggingface.co/tiiuae)                           | 7B/11B/40B/180B                  | falcon           |
 | [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google)          | 2B/7B/9B/27B                     | gemma            |
 | [GLM-4](https://huggingface.co/THUDM)                             | 9B                               | glm4             |
diff --git a/README_zh.md b/README_zh.md
index b5da9785..f4190c5a 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -170,6 +170,7 @@ https://github.com/user-attachments/assets/e6ce34b0-52d5-4f3e-a830-592106c4c272
 | [ChatGLM3](https://huggingface.co/THUDM)                          | 6B                               | chatglm3         |
 | [Command R](https://huggingface.co/CohereForAI)                   | 35B/104B                         | cohere           |
 | [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai)         | 7B/16B/67B/236B                  | deepseek         |
+| [Exaone3.0](https://huggingface.co/LGAI-EXAONE)                       | 7.8B                             | exaone    |
 | [Falcon](https://huggingface.co/tiiuae)                           | 7B/11B/40B/180B                  | falcon           |
 | [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google)          | 2B/7B/9B/27B                     | gemma            |
 | [GLM-4](https://huggingface.co/THUDM)                             | 9B                               | glm4             |

From 73b684c7b0f01186f55fd04dc6d9ed96dd62c165 Mon Sep 17 00:00:00 2001
From: "Geun, Lim" <shing100@Naver.com>
Date: Mon, 30 Sep 2024 09:24:44 +0900
Subject: [PATCH 3/9] Update README.md

Former-commit-id: d06440e0e71f02f852a7eac5ea6961c8da2a80f4
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 38f252a8..083b0bb4 100644
--- a/README.md
+++ b/README.md
@@ -169,7 +169,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
 | [ChatGLM3](https://huggingface.co/THUDM)                          | 6B                               | chatglm3         |
 | [Command R](https://huggingface.co/CohereForAI)                   | 35B/104B                         | cohere           |
 | [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai)         | 7B/16B/67B/236B                  | deepseek         |
-| [Exaone3.0](https://huggingface.co/LGAI-EXAONE)                       | 7.8B                             | exaone    |
+| [Exaone3.0](https://huggingface.co/LGAI-EXAONE)                   | 7.8B                             | exaone           |
 | [Falcon](https://huggingface.co/tiiuae)                           | 7B/11B/40B/180B                  | falcon           |
 | [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google)          | 2B/7B/9B/27B                     | gemma            |
 | [GLM-4](https://huggingface.co/THUDM)                             | 9B                               | glm4             |

From 5f1209bee68c6fbeedd21e7f7760e0737462fcdc Mon Sep 17 00:00:00 2001
From: "Geun, Lim" <shing100@Naver.com>
Date: Mon, 30 Sep 2024 09:25:02 +0900
Subject: [PATCH 4/9] Update README_zh.md

Former-commit-id: 0dfe9f763b09e00457700111e5d7d0d8b9a81e32
---
 README_zh.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README_zh.md b/README_zh.md
index f4190c5a..c3d1e6f8 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -170,7 +170,7 @@ https://github.com/user-attachments/assets/e6ce34b0-52d5-4f3e-a830-592106c4c272
 | [ChatGLM3](https://huggingface.co/THUDM)                          | 6B                               | chatglm3         |
 | [Command R](https://huggingface.co/CohereForAI)                   | 35B/104B                         | cohere           |
 | [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai)         | 7B/16B/67B/236B                  | deepseek         |
-| [Exaone3.0](https://huggingface.co/LGAI-EXAONE)                       | 7.8B                             | exaone    |
+| [Exaone3.0](https://huggingface.co/LGAI-EXAONE)                   | 7.8B                             | exaone           |
 | [Falcon](https://huggingface.co/tiiuae)                           | 7B/11B/40B/180B                  | falcon           |
 | [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google)          | 2B/7B/9B/27B                     | gemma            |
 | [GLM-4](https://huggingface.co/THUDM)                             | 9B                               | glm4             |

From b596102bd56c03b821d038852fa53acfbfa72bad Mon Sep 17 00:00:00 2001
From: shing100 <shing100@naver.com>
Date: Mon, 30 Sep 2024 09:44:21 +0900
Subject: [PATCH 5/9] fix chat template Exaone3.0

Former-commit-id: 2964b20eb59687e24a901926c0da1547d7c046ca
---
 README.md                         | 1 -
 README_zh.md                      | 1 -
 src/llamafactory/data/template.py | 2 --
 3 files changed, 4 deletions(-)

diff --git a/README.md b/README.md
index 083b0bb4..e62a9d47 100644
--- a/README.md
+++ b/README.md
@@ -169,7 +169,6 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
 | [ChatGLM3](https://huggingface.co/THUDM)                          | 6B                               | chatglm3         |
 | [Command R](https://huggingface.co/CohereForAI)                   | 35B/104B                         | cohere           |
 | [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai)         | 7B/16B/67B/236B                  | deepseek         |
-| [Exaone3.0](https://huggingface.co/LGAI-EXAONE)                   | 7.8B                             | exaone           |
 | [Falcon](https://huggingface.co/tiiuae)                           | 7B/11B/40B/180B                  | falcon           |
 | [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google)          | 2B/7B/9B/27B                     | gemma            |
 | [GLM-4](https://huggingface.co/THUDM)                             | 9B                               | glm4             |
diff --git a/README_zh.md b/README_zh.md
index c3d1e6f8..b5da9785 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -170,7 +170,6 @@ https://github.com/user-attachments/assets/e6ce34b0-52d5-4f3e-a830-592106c4c272
 | [ChatGLM3](https://huggingface.co/THUDM)                          | 6B                               | chatglm3         |
 | [Command R](https://huggingface.co/CohereForAI)                   | 35B/104B                         | cohere           |
 | [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai)         | 7B/16B/67B/236B                  | deepseek         |
-| [Exaone3.0](https://huggingface.co/LGAI-EXAONE)                   | 7.8B                             | exaone           |
 | [Falcon](https://huggingface.co/tiiuae)                           | 7B/11B/40B/180B                  | falcon           |
 | [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google)          | 2B/7B/9B/27B                     | gemma            |
 | [GLM-4](https://huggingface.co/THUDM)                             | 9B                               | glm4             |
diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py
index 08988c1c..e7539019 100644
--- a/src/llamafactory/data/template.py
+++ b/src/llamafactory/data/template.py
@@ -648,10 +648,8 @@ _register_template(
     name="exaone",
     format_user=StringFormatter(slots=["[|user|]{{content}}\n[|assistant|]"]),
     format_system=StringFormatter(slots=["[|system|]{{content}}[|endofturn|]\n"]),
-    format_assistant=StringFormatter(slots=["{{content}}[|endofturn|]\n"]),
     format_separator=EmptyFormatter(slots=["\n"]),
     format_prefix=EmptyFormatter(slots=[""]),
-    stop_words=["[|endofturn|]"],
     efficient_eos=True,
 )
 

From 54e749d1cfa299f339fece4e4da7e436b3de1b0d Mon Sep 17 00:00:00 2001
From: hoshi-hiyouga <hiyouga@buaa.edu.cn>
Date: Mon, 30 Sep 2024 16:39:48 +0800
Subject: [PATCH 6/9] Update template.py

Former-commit-id: 02957fc9c3cd04ef5140f093642b94e74ce86b61
---
 src/llamafactory/data/template.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py
index e7539019..e832e3ad 100644
--- a/src/llamafactory/data/template.py
+++ b/src/llamafactory/data/template.py
@@ -649,8 +649,6 @@ _register_template(
     format_user=StringFormatter(slots=["[|user|]{{content}}\n[|assistant|]"]),
     format_system=StringFormatter(slots=["[|system|]{{content}}[|endofturn|]\n"]),
     format_separator=EmptyFormatter(slots=["\n"]),
-    format_prefix=EmptyFormatter(slots=[""]),
-    efficient_eos=True,
 )
 
 

From 85ed108fa63c20cf41903e9ca6ebf75648578634 Mon Sep 17 00:00:00 2001
From: hoshi-hiyouga <hiyouga@buaa.edu.cn>
Date: Mon, 30 Sep 2024 16:47:52 +0800
Subject: [PATCH 7/9] Update constants.py

Former-commit-id: 4d7bb692348717d89bd5c682890e98bce5118743
---
 src/llamafactory/extras/constants.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py
index 1bbb4458..7ed20c2b 100644
--- a/src/llamafactory/extras/constants.py
+++ b/src/llamafactory/extras/constants.py
@@ -471,7 +471,8 @@ register_model_group(
     template="deepseekcoder",
 )
 
-regiter_model_group(
+
+register_model_group(
     models={
         "EXAONE-3.0-7.8B-Instruct": {
             DownloadSource.DEFAULT: "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct",

From b664bcf307753ce7e4432fe87ada0c563d2f4108 Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Mon, 30 Sep 2024 17:00:33 +0800
Subject: [PATCH 8/9] lint

Former-commit-id: cee3dc484be4bdf31ffe1d8b9c60604b84ed6a00
---
 tests/data/test_mm_plugin.py | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/tests/data/test_mm_plugin.py b/tests/data/test_mm_plugin.py
index bd2eb45a..75541000 100644
--- a/tests/data/test_mm_plugin.py
+++ b/tests/data/test_mm_plugin.py
@@ -142,10 +142,7 @@ def test_llava_next_plugin():
     check_inputs = {"plugin": llava_next_plugin, "tokenizer": tokenizer, "processor": processor}
     image_seqlen = 1176
     check_inputs["expected_mm_messages"] = [
-        {
-            key: value.replace("<image>", "<image>" * image_seqlen)
-            for key, value in message.items()
-        }
+        {key: value.replace("<image>", "<image>" * image_seqlen) for key, value in message.items()}
         for message in MM_MESSAGES
     ]
     check_inputs["expected_mm_inputs"] = _get_mm_inputs(processor)
@@ -158,10 +155,7 @@ def test_llava_next_video_plugin():
     check_inputs = {"plugin": llava_next_video_plugin, "tokenizer": tokenizer, "processor": processor}
     image_seqlen = 1176
     check_inputs["expected_mm_messages"] = [
-        {
-            key: value.replace("<image>", "<image>" * image_seqlen)
-            for key, value in message.items()
-        }
+        {key: value.replace("<image>", "<image>" * image_seqlen) for key, value in message.items()}
         for message in MM_MESSAGES
     ]
     check_inputs["expected_mm_inputs"] = _get_mm_inputs(processor)
@@ -207,10 +201,7 @@ def test_video_llava_plugin():
     check_inputs = {"plugin": video_llava_plugin, "tokenizer": tokenizer, "processor": processor}
     image_seqlen = 256
     check_inputs["expected_mm_messages"] = [
-        {
-            key: value.replace("<image>", "<image>" * image_seqlen)
-            for key, value in message.items()
-        }
+        {key: value.replace("<image>", "<image>" * image_seqlen) for key, value in message.items()}
         for message in MM_MESSAGES
     ]
     check_inputs["expected_mm_inputs"] = _get_mm_inputs(processor)

From 3720618c63e82961fa15be4e3364fcd0897167ec Mon Sep 17 00:00:00 2001
From: hiyouga <467089858@qq.com>
Date: Mon, 30 Sep 2024 17:07:43 +0800
Subject: [PATCH 9/9] add patch processor func

Former-commit-id: 45841bb646afa9d0bc2ea4b6b7b107daa67d90f0
---
 src/llamafactory/model/loader.py  | 15 +++------------
 src/llamafactory/model/patcher.py | 26 ++++++++++++++++++++++++--
 2 files changed, 27 insertions(+), 14 deletions(-)

diff --git a/src/llamafactory/model/loader.py b/src/llamafactory/model/loader.py
index c90913ae..9e47fb72 100644
--- a/src/llamafactory/model/loader.py
+++ b/src/llamafactory/model/loader.py
@@ -25,8 +25,7 @@ from .model_utils.misc import register_autoclass
 from .model_utils.mod import convert_pretrained_model_to_mod, load_mod_pretrained_model
 from .model_utils.unsloth import load_unsloth_pretrained_model
 from .model_utils.valuehead import load_valuehead_params
-from .model_utils.visual import get_image_seqlen, get_patch_size, get_vision_feature_select_strategy
-from .patcher import patch_config, patch_model, patch_tokenizer, patch_valuehead_model
+from .patcher import patch_config, patch_model, patch_processor, patch_tokenizer, patch_valuehead_model
 
 
 if TYPE_CHECKING:
@@ -61,7 +60,7 @@ def _get_init_kwargs(model_args: "ModelArguments") -> Dict[str, Any]:
 
 def load_tokenizer(model_args: "ModelArguments") -> "TokenizerModule":
     r"""
-    Loads pretrained tokenizer.
+    Loads pretrained tokenizer and optionally loads processor.
 
     Note: including inplace operation of model_args.
     """
@@ -94,17 +93,9 @@ def load_tokenizer(model_args: "ModelArguments") -> "TokenizerModule":
             logger.warning("New tokens have been added, changed `resize_vocab` to True.")
 
     patch_tokenizer(tokenizer)
-
     try:
         processor = AutoProcessor.from_pretrained(model_args.model_name_or_path, **init_kwargs)
-        setattr(processor, "tokenizer", tokenizer)
-        setattr(processor, "image_seqlen", get_image_seqlen(config))
-        setattr(processor, "image_resolution", model_args.image_resolution)
-        setattr(processor, "patch_size", get_patch_size(config))
-        setattr(processor, "video_resolution", model_args.video_resolution)
-        setattr(processor, "video_fps", model_args.video_fps)
-        setattr(processor, "video_maxlen", model_args.video_maxlen)
-        setattr(processor, "vision_feature_select_strategy", get_vision_feature_select_strategy(config))
+        patch_processor(processor, config, tokenizer, model_args)
     except Exception:
         processor = None
 
diff --git a/src/llamafactory/model/patcher.py b/src/llamafactory/model/patcher.py
index 3de82703..e4bb7ac1 100644
--- a/src/llamafactory/model/patcher.py
+++ b/src/llamafactory/model/patcher.py
@@ -34,11 +34,17 @@ from .model_utils.packing import configure_packing
 from .model_utils.quantization import configure_quantization
 from .model_utils.rope import configure_rope
 from .model_utils.valuehead import prepare_valuehead_model
-from .model_utils.visual import autocast_projector_dtype, configure_visual_model
+from .model_utils.visual import (
+    autocast_projector_dtype,
+    configure_visual_model,
+    get_image_seqlen,
+    get_patch_size,
+    get_vision_feature_select_strategy,
+)
 
 
 if TYPE_CHECKING:
-    from transformers import PretrainedConfig, PreTrainedTokenizer
+    from transformers import PretrainedConfig, PreTrainedTokenizer, ProcessorMixin
     from trl import AutoModelForCausalLMWithValueHead
 
     from ..hparams import ModelArguments
@@ -52,6 +58,22 @@ def patch_tokenizer(tokenizer: "PreTrainedTokenizer") -> None:
         tokenizer._pad = MethodType(PreTrainedTokenizerBase._pad, tokenizer)
 
 
+def patch_processor(
+    processor: "ProcessorMixin",
+    config: "PretrainedConfig",
+    tokenizer: "PreTrainedTokenizer",
+    model_args: "ModelArguments",
+) -> None:
+    setattr(processor, "tokenizer", tokenizer)
+    setattr(processor, "image_seqlen", get_image_seqlen(config))
+    setattr(processor, "image_resolution", model_args.image_resolution)
+    setattr(processor, "patch_size", get_patch_size(config))
+    setattr(processor, "video_resolution", model_args.video_resolution)
+    setattr(processor, "video_fps", model_args.video_fps)
+    setattr(processor, "video_maxlen", model_args.video_maxlen)
+    setattr(processor, "vision_feature_select_strategy", get_vision_feature_select_strategy(config))
+
+
 def patch_config(
     config: "PretrainedConfig",
     tokenizer: "PreTrainedTokenizer",