fix style

Former-commit-id: 0cc7260a93bf7c65451e376245aa143f9237d7d8
This commit is contained in:
fzc8578 2025-01-13 14:19:38 +08:00
parent d2afe0c63c
commit 4741eec2d1
3 changed files with 23 additions and 17 deletions

View File

@ -157,7 +157,7 @@ class MultiModalDataCollatorForSeq2Seq(DataCollatorForSeq2Seq):
features["position_ids"] = [torch.arange(input_ids.size(0)).long() for input_ids in features["input_ids"]]
features["position_ids"] = pad_sequence(features["position_ids"], batch_first=True, padding_value=0)
new_features = {"data": features}
new_features.update({"labels": features['labels']})
new_features.update({"labels": features["labels"]})
features = new_features
return features

View File

@ -384,7 +384,7 @@ class CpmOPlugin(BasePlugin):
image_bounds_list = []
valid_image_nums_ls = []
flag = False
for input_ids in batch_ids:
input_ids_ = torch.tensor(input_ids)
start_cond = (input_ids_ == processor.tokenizer.im_start_id) | (
@ -405,8 +405,8 @@ class CpmOPlugin(BasePlugin):
]
)
image_bounds_list.append(image_bounds)
if not flag and len(images)>0:
if not flag and len(images) > 0:
valid_image_nums_ls = [1 for _ in range(len(batch_ids))]
image_bounds_list = [torch.arange(64) for _ in range(len(batch_ids))]

View File

@ -76,11 +76,16 @@ def _is_close(batch_a: Dict[str, Any], batch_b: Dict[str, Any]) -> None:
if isinstance(batch_a[key], torch.Tensor):
assert torch.allclose(batch_a[key], batch_b[key], rtol=1e-4, atol=1e-5)
elif isinstance(batch_a[key], list) and all(isinstance(item, torch.Tensor) for item in batch_a[key]):
assert len(batch_a[key]) == len(batch_b[key])
for tensor_a, tensor_b in zip(batch_a[key], batch_b[key]):
assert torch.allclose(tensor_a, tensor_b, rtol=1e-4, atol=1e-5)
elif isinstance(batch_a[key], list) and all(isinstance(item, list) for item in batch_a[key]) \
and len(batch_a[key])>0 and len(batch_a[key][0])>0 and isinstance(batch_a[key][0][0], torch.Tensor):
assert len(batch_a[key]) == len(batch_b[key])
for tensor_a, tensor_b in zip(batch_a[key], batch_b[key]):
assert torch.allclose(tensor_a, tensor_b, rtol=1e-4, atol=1e-5)
elif (
isinstance(batch_a[key], list)
and all(isinstance(item, list) for item in batch_a[key])
and len(batch_a[key]) > 0
and len(batch_a[key][0]) > 0
and isinstance(batch_a[key][0][0], torch.Tensor)
):
for item_a, item_b in zip(batch_a[key], batch_b[key]):
assert len(item_a) == len(item_a)
for tensor_a, tensor_b in zip(item_a, item_b):
@ -140,18 +145,19 @@ def test_cpm_o_plugin():
check_inputs = {"plugin": cpm_o_plugin, **tokenizer_module}
image_seqlen = 64
check_inputs["expected_mm_messages"] = [
{key: value.replace("<image>", f"<image_id>0</image_id><image>{'<unk>' * image_seqlen}</image>") for key, value in message.items()}
{
key: value.replace("<image>", f"<image_id>0</image_id><image>{'<unk>' * image_seqlen}</image>")
for key, value in message.items()
}
for message in MM_MESSAGES
]
check_inputs["expected_mm_inputs"] = {
"pixel_values": [[]],
"image_sizes": [[]],
"tgt_sizes": [[]],
"image_bound": [torch.tensor([], dtype=torch.int64).reshape(0,2)]
}
check_inputs["expected_no_mm_inputs"] = {
"image_bound": [torch.tensor([], dtype=torch.int64).reshape(0,2)]
"pixel_values": [[]],
"image_sizes": [[]],
"tgt_sizes": [[]],
"image_bound": [torch.tensor([], dtype=torch.int64).reshape(0, 2)],
}
check_inputs["expected_no_mm_inputs"] = {"image_bound": [torch.tensor([], dtype=torch.int64).reshape(0, 2)]}
_check_plugin(**check_inputs)