[model] support ERNIE-4.5-VL Models (#9521)

This commit is contained in:
Hertz
2025-11-24 16:48:06 +08:00
committed by GitHub
parent 3140c242f0
commit 591fc9ed02
3 changed files with 66 additions and 0 deletions

View File

@@ -465,6 +465,38 @@ class BasePlugin(MMPluginMixin):
self._validate_input(processor, images, videos, audios)
return self._get_mm_inputs(images, videos, audios, processor)
@dataclass
class ErnieVLPlugin(BasePlugin):
@override
def process_messages(
self,
messages: list[dict[str, str]],
images: list["ImageInput"],
videos: list["VideoInput"],
audios: list["AudioInput"],
processor: Optional["MMProcessor"],
) -> list[dict[str, str]]:
self._validate_input(processor, images, videos, audios)
self._validate_messages(messages, images, videos, audios)
messages = deepcopy(messages)
image_idx, video_idx = 0, 0
for message in messages:
content = message["content"]
image_token = self.image_token or "<|image@placeholder|>"
video_token = self.video_token or "<|video@placeholder|>"
while IMAGE_PLACEHOLDER in content:
image_idx += 1
content = content.replace(
IMAGE_PLACEHOLDER, f"Picture {image_idx}:<|IMAGE_START|>{image_token}<|IMAGE_END|>", 1
)
while VIDEO_PLACEHOLDER in content:
video_idx += 1
content = content.replace(
VIDEO_PLACEHOLDER, f"Video {video_idx}:<|VIDEO_START|>{video_token}<|VIDEO_END|>", 1
)
message["content"] = content
return messages
@dataclass
class Gemma3Plugin(BasePlugin):
@@ -2039,6 +2071,7 @@ class VideoLlavaPlugin(BasePlugin):
PLUGINS = {
"base": BasePlugin,
"ernie_vl": ErnieVLPlugin,
"gemma3": Gemma3Plugin,
"glm4v": GLM4VPlugin,
"gemma3n": Gemma3nPlugin,