mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-12-16 20:00:36 +08:00
[model] support ERNIE-4.5-VL Models (#9521)
This commit is contained in:
@@ -465,6 +465,38 @@ class BasePlugin(MMPluginMixin):
|
||||
self._validate_input(processor, images, videos, audios)
|
||||
return self._get_mm_inputs(images, videos, audios, processor)
|
||||
|
||||
@dataclass
|
||||
class ErnieVLPlugin(BasePlugin):
|
||||
@override
|
||||
def process_messages(
|
||||
self,
|
||||
messages: list[dict[str, str]],
|
||||
images: list["ImageInput"],
|
||||
videos: list["VideoInput"],
|
||||
audios: list["AudioInput"],
|
||||
processor: Optional["MMProcessor"],
|
||||
) -> list[dict[str, str]]:
|
||||
self._validate_input(processor, images, videos, audios)
|
||||
self._validate_messages(messages, images, videos, audios)
|
||||
messages = deepcopy(messages)
|
||||
image_idx, video_idx = 0, 0
|
||||
for message in messages:
|
||||
content = message["content"]
|
||||
image_token = self.image_token or "<|image@placeholder|>"
|
||||
video_token = self.video_token or "<|video@placeholder|>"
|
||||
while IMAGE_PLACEHOLDER in content:
|
||||
image_idx += 1
|
||||
content = content.replace(
|
||||
IMAGE_PLACEHOLDER, f"Picture {image_idx}:<|IMAGE_START|>{image_token}<|IMAGE_END|>", 1
|
||||
)
|
||||
while VIDEO_PLACEHOLDER in content:
|
||||
video_idx += 1
|
||||
content = content.replace(
|
||||
VIDEO_PLACEHOLDER, f"Video {video_idx}:<|VIDEO_START|>{video_token}<|VIDEO_END|>", 1
|
||||
)
|
||||
message["content"] = content
|
||||
return messages
|
||||
|
||||
|
||||
@dataclass
|
||||
class Gemma3Plugin(BasePlugin):
|
||||
@@ -2039,6 +2071,7 @@ class VideoLlavaPlugin(BasePlugin):
|
||||
|
||||
PLUGINS = {
|
||||
"base": BasePlugin,
|
||||
"ernie_vl": ErnieVLPlugin,
|
||||
"gemma3": Gemma3Plugin,
|
||||
"glm4v": GLM4VPlugin,
|
||||
"gemma3n": Gemma3nPlugin,
|
||||
|
||||
Reference in New Issue
Block a user