fix style

Former-commit-id: 0cc7260a93bf7c65451e376245aa143f9237d7d8
2025-12-12 18:08:19 +08:00 · 2025-01-13 14:19:38 +08:00 · 2025-01-13 14:19:38 +08:00 · 4741eec2d1
commit 4741eec2d1
parent d2afe0c63c
3 changed files with 23 additions and 17 deletions
--- a/src/llamafactory/data/collator.py
+++ b/src/llamafactory/data/collator.py
@ -157,7 +157,7 @@ class MultiModalDataCollatorForSeq2Seq(DataCollatorForSeq2Seq):
            features["position_ids"] = [torch.arange(input_ids.size(0)).long() for input_ids in features["input_ids"]]
            features["position_ids"] = pad_sequence(features["position_ids"], batch_first=True, padding_value=0)
            new_features = {"data": features}
-            new_features.update({"labels": features['labels']})
+            new_features.update({"labels": features["labels"]})
            features = new_features

        return features
--- a/src/llamafactory/data/mm_plugin.py
+++ b/src/llamafactory/data/mm_plugin.py
@ -384,7 +384,7 @@ class CpmOPlugin(BasePlugin):
        image_bounds_list = []
        valid_image_nums_ls = []
        flag = False
-        
+
        for input_ids in batch_ids:
            input_ids_ = torch.tensor(input_ids)
            start_cond = (input_ids_ == processor.tokenizer.im_start_id) | (
@ -405,8 +405,8 @@ class CpmOPlugin(BasePlugin):
                ]
            )
            image_bounds_list.append(image_bounds)
-        
-        if not flag and len(images)>0:
+
+        if not flag and len(images) > 0:
            valid_image_nums_ls = [1 for _ in range(len(batch_ids))]
            image_bounds_list = [torch.arange(64) for _ in range(len(batch_ids))]

--- a/tests/data/test_mm_plugin.py
+++ b/tests/data/test_mm_plugin.py
@ -76,11 +76,16 @@ def _is_close(batch_a: Dict[str, Any], batch_b: Dict[str, Any]) -> None:
        if isinstance(batch_a[key], torch.Tensor):
            assert torch.allclose(batch_a[key], batch_b[key], rtol=1e-4, atol=1e-5)
        elif isinstance(batch_a[key], list) and all(isinstance(item, torch.Tensor) for item in batch_a[key]):
-                assert len(batch_a[key]) == len(batch_b[key])
-                for tensor_a, tensor_b in zip(batch_a[key], batch_b[key]):
-                    assert torch.allclose(tensor_a, tensor_b, rtol=1e-4, atol=1e-5)
-        elif isinstance(batch_a[key], list) and all(isinstance(item, list) for item in batch_a[key]) \
-            and len(batch_a[key])>0 and len(batch_a[key][0])>0 and isinstance(batch_a[key][0][0], torch.Tensor):
+            assert len(batch_a[key]) == len(batch_b[key])
+            for tensor_a, tensor_b in zip(batch_a[key], batch_b[key]):
+                assert torch.allclose(tensor_a, tensor_b, rtol=1e-4, atol=1e-5)
+        elif (
+            isinstance(batch_a[key], list)
+            and all(isinstance(item, list) for item in batch_a[key])
+            and len(batch_a[key]) > 0
+            and len(batch_a[key][0]) > 0
+            and isinstance(batch_a[key][0][0], torch.Tensor)
+        ):
            for item_a, item_b in zip(batch_a[key], batch_b[key]):
                assert len(item_a) == len(item_a)
                for tensor_a, tensor_b in zip(item_a, item_b):
@ -140,18 +145,19 @@ def test_cpm_o_plugin():
    check_inputs = {"plugin": cpm_o_plugin, **tokenizer_module}
    image_seqlen = 64
    check_inputs["expected_mm_messages"] = [
-        {key: value.replace("<image>", f"<image_id>0</image_id><image>{'<unk>' * image_seqlen}</image>") for key, value in message.items()}
+        {
+            key: value.replace("<image>", f"<image_id>0</image_id><image>{'<unk>' * image_seqlen}</image>")
+            for key, value in message.items()
+        }
        for message in MM_MESSAGES
    ]
    check_inputs["expected_mm_inputs"] = {
-        "pixel_values": [[]], 
-        "image_sizes": [[]], 
-        "tgt_sizes": [[]], 
-        "image_bound": [torch.tensor([], dtype=torch.int64).reshape(0,2)]
-    }
-    check_inputs["expected_no_mm_inputs"] = {
-        "image_bound": [torch.tensor([], dtype=torch.int64).reshape(0,2)]
+        "pixel_values": [[]],
+        "image_sizes": [[]],
+        "tgt_sizes": [[]],
+        "image_bound": [torch.tensor([], dtype=torch.int64).reshape(0, 2)],
    }
+    check_inputs["expected_no_mm_inputs"] = {"image_bound": [torch.tensor([], dtype=torch.int64).reshape(0, 2)]}
    _check_plugin(**check_inputs)