diff --git a/src/llamafactory/data/mm_plugin.py b/src/llamafactory/data/mm_plugin.py
index acd1981e..909ce7c0 100644
--- a/src/llamafactory/data/mm_plugin.py
+++ b/src/llamafactory/data/mm_plugin.py
@@ -383,7 +383,6 @@ class CpmOPlugin(BasePlugin):
         self._validate_input(images, videos)
         image_bounds_list = []
         valid_image_nums_ls = []
-        flag = False
 
         for input_ids in batch_ids:
             input_ids_ = torch.tensor(input_ids)
@@ -395,8 +394,6 @@ class CpmOPlugin(BasePlugin):
             image_start_tokens += 1
             image_end_tokens = torch.where(end_cond)[0]
             valid_image_nums = max(len(image_start_tokens), len(image_end_tokens))
-            if valid_image_nums > 0:
-                flag = True
             valid_image_nums_ls.append(valid_image_nums)
             image_bounds = torch.hstack(
                 [
@@ -406,10 +403,6 @@ class CpmOPlugin(BasePlugin):
             )
             image_bounds_list.append(image_bounds)
 
-        if not flag and len(images) > 0:
-            valid_image_nums_ls = [1 for _ in range(len(batch_ids))]
-            image_bounds_list = [torch.arange(64) for _ in range(len(batch_ids))]
-
         mm_inputs = self._get_mm_inputs(images, videos, processor, valid_image_nums_ls=valid_image_nums_ls)
         mm_inputs.update({"image_bound": image_bounds_list})
         return mm_inputs
diff --git a/tests/data/test_mm_plugin.py b/tests/data/test_mm_plugin.py
index f84749ec..c9084af0 100644
--- a/tests/data/test_mm_plugin.py
+++ b/tests/data/test_mm_plugin.py
@@ -79,17 +79,6 @@ def _is_close(batch_a: Dict[str, Any], batch_b: Dict[str, Any]) -> None:
             assert len(batch_a[key]) == len(batch_b[key])
             for tensor_a, tensor_b in zip(batch_a[key], batch_b[key]):
                 assert torch.allclose(tensor_a, tensor_b, rtol=1e-4, atol=1e-5)
-        elif (
-            isinstance(batch_a[key], list)
-            and all(isinstance(item, list) for item in batch_a[key])
-            and len(batch_a[key]) > 0
-            and len(batch_a[key][0]) > 0
-            and isinstance(batch_a[key][0][0], torch.Tensor)
-        ):
-            for item_a, item_b in zip(batch_a[key], batch_b[key]):
-                assert len(item_a) == len(item_a)
-                for tensor_a, tensor_b in zip(item_a, item_b):
-                    assert torch.allclose(tensor_a, tensor_b, rtol=1e-4, atol=1e-5)
         else:
             assert batch_a[key] == batch_b[key]
 
@@ -138,25 +127,6 @@ def test_base_plugin():
     _check_plugin(**check_inputs)
 
 
-@pytest.mark.skipif(not HF_TOKEN, reason="Gated model.")
-def test_cpm_o_plugin():
-    tokenizer_module = _load_tokenizer_module(model_name_or_path="/data/fengzc/LLM/checkpoints/MiniCPM-V-2_6")
-    cpm_o_plugin = get_mm_plugin(name="cpm_o", image_token="<image>")
-    check_inputs = {"plugin": cpm_o_plugin, **tokenizer_module}
-    image_seqlen = 64
-    check_inputs["expected_mm_messages"] = [
-        {
-            key: value.replace("<image>", f"<image_id>0</image_id><image>{'<unk>' * image_seqlen}</image>")
-            for key, value in message.items()
-        }
-        for message in MM_MESSAGES
-    ]
-    check_inputs["expected_mm_inputs"] = _get_mm_inputs(tokenizer_module["processor"])
-    check_inputs["expected_mm_inputs"]["image_bound"] = [torch.arange(64)]
-    check_inputs["expected_no_mm_inputs"] = {"image_bound": [torch.tensor([], dtype=torch.int64).reshape(0, 2)]}
-    _check_plugin(**check_inputs)
-
-
 def test_llava_plugin():
     image_seqlen = 576
     tokenizer_module = _load_tokenizer_module(model_name_or_path="llava-hf/llava-1.5-7b-hf")