From 87d6d7dc61f40ce56be1bbfb3169fd70214a6a8c Mon Sep 17 00:00:00 2001 From: hiyouga Date: Mon, 4 Nov 2024 08:18:12 +0000 Subject: [PATCH 1/2] fix chat engines Former-commit-id: 3a220b7992d265c77d9a1a406ef86eefbc699cfe --- src/llamafactory/chat/hf_engine.py | 4 ++-- src/llamafactory/chat/vllm_engine.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/llamafactory/chat/hf_engine.py b/src/llamafactory/chat/hf_engine.py index 3ac04982..eeed9a29 100644 --- a/src/llamafactory/chat/hf_engine.py +++ b/src/llamafactory/chat/hf_engine.py @@ -86,12 +86,12 @@ class HuggingfaceEngine(BaseEngine): mm_input_dict = {"images": [], "videos": [], "imglens": [0], "vidlens": [0]} if images is not None: mm_input_dict.update({"images": images, "imglens": [len(images)]}) - if not any(IMAGE_PLACEHOLDER not in message["content"] for message in messages): + if not any(IMAGE_PLACEHOLDER in message["content"] for message in messages): messages[0]["content"] = IMAGE_PLACEHOLDER * len(images) + messages[0]["content"] if videos is not None: mm_input_dict.update({"videos": videos, "vidlens": [len(videos)]}) - if not any(VIDEO_PLACEHOLDER not in message["content"] for message in messages): + if not any(VIDEO_PLACEHOLDER in message["content"] for message in messages): messages[0]["content"] = VIDEO_PLACEHOLDER * len(videos) + messages[0]["content"] messages = template.mm_plugin.process_messages( diff --git a/src/llamafactory/chat/vllm_engine.py b/src/llamafactory/chat/vllm_engine.py index 37feccc2..5f6612be 100644 --- a/src/llamafactory/chat/vllm_engine.py +++ b/src/llamafactory/chat/vllm_engine.py @@ -107,7 +107,7 @@ class VllmEngine(BaseEngine): ) -> AsyncIterator["RequestOutput"]: request_id = f"chatcmpl-{uuid.uuid4().hex}" if images is not None: - if not any(IMAGE_PLACEHOLDER not in message["content"] for message in messages): + if not any(IMAGE_PLACEHOLDER in message["content"] for message in messages): messages[0]["content"] = IMAGE_PLACEHOLDER * len(images) + messages[0]["content"] paired_messages = messages + [{"role": "assistant", "content": ""}] From e2fa9613020bcb1b43f82f827d0e9fd60d8008fa Mon Sep 17 00:00:00 2001 From: hiyouga Date: Mon, 4 Nov 2024 08:27:20 +0000 Subject: [PATCH 2/2] add image input type Former-commit-id: 6fe260e35ff12662b72f26ec9df44e87b9693551 --- src/llamafactory/data/mm_plugin.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/llamafactory/data/mm_plugin.py b/src/llamafactory/data/mm_plugin.py index f6748883..6a174838 100644 --- a/src/llamafactory/data/mm_plugin.py +++ b/src/llamafactory/data/mm_plugin.py @@ -30,7 +30,7 @@ if TYPE_CHECKING: path: Optional[str] bytes: Optional[bytes] - ImageInput = Union[str, EncodedImage, ImageObject] + ImageInput = Union[str, bytes, EncodedImage, ImageObject] VideoInput = str @@ -104,6 +104,8 @@ class BasePlugin: for image in images: if isinstance(image, str): image = Image.open(image) + elif isinstance(image, bytes): + image = Image.open(BytesIO(image)) elif isinstance(image, dict): if image["bytes"] is not None: image = Image.open(BytesIO(image["bytes"]))