diff --git a/src/llamafactory/chat/vllm_engine.py b/src/llamafactory/chat/vllm_engine.py
index 99542d23..cf567d5f 100644
--- a/src/llamafactory/chat/vllm_engine.py
+++ b/src/llamafactory/chat/vllm_engine.py
@@ -113,9 +113,9 @@ class VllmEngine(BaseEngine):
                 messages[0]["content"] = IMAGE_PLACEHOLDER * len(images) + messages[0]["content"]
 
         if self.template.mm_plugin.__class__.__name__ == "Qwen2vlPlugin":  # temporary solution
-            image_str = "<|vision_start|>" + self.template.mm_plugin.image_token + "<|vision_end|>"
+            image_str = f"<|vision_start|>{self.template.mm_plugin.image_token}<|vision_end|>"
         else:
-            image_str = self.template.mm_plugin.image_token
+            image_str = self.template.mm_plugin.image_token or ""
 
         paired_messages = [
             {"role": message["role"], "content": message["content"].replace(IMAGE_PLACEHOLDER, image_str)}
diff --git a/src/llamafactory/data/mm_plugin.py b/src/llamafactory/data/mm_plugin.py
index b04c2673..248cbd38 100644
--- a/src/llamafactory/data/mm_plugin.py
+++ b/src/llamafactory/data/mm_plugin.py
@@ -82,7 +82,7 @@ class BasePlugin:
         Pre-processes a single image.
         """
         image_resolution: int = kwargs.get("image_resolution")
-        if image.width * image.height > image_resolution:
+        if (image.width * image.height) > image_resolution:
             resize_factor = math.sqrt(image_resolution / (image.width * image.height))
             width, height = int(image.width * resize_factor), int(image.height * resize_factor)
             image = image.resize((width, height), resample=Image.NEAREST)