[data] Fix minicpmv/o dpo training (#6657)

* fix template name

* tiny fix

* support minicpm-o-2.6

* support inference of minicpmv

* update readme

* support dpo of minicpmv

Former-commit-id: 027942789bf3a28b2506a5730c05c8392ef5c885
This commit is contained in:
Zhangchi Feng 2025-01-15 17:30:37 +08:00 committed by GitHub
parent 8895cf1152
commit 555f17c1ee

View File

@ -155,7 +155,7 @@ class MultiModalDataCollatorForSeq2Seq(DataCollatorForSeq2Seq):
if "image_bound" in features: # for minicpmv inputs if "image_bound" in features: # for minicpmv inputs
bsz, seq_length = features["input_ids"].shape bsz, seq_length = features["input_ids"].shape
features["position_ids"] = torch.arange(seq_length).long().repeat(bsz, 1) features["position_ids"] = torch.arange(seq_length).long().repeat(bsz, 1)
return {"data": features, "labels": features["labels"]} return {"data": features, "input_ids": features["input_ids"], "labels": features["labels"]}
return features return features