From 692b132dbf0124885f8f499c702b6a81de494ca2 Mon Sep 17 00:00:00 2001 From: hiyouga Date: Fri, 8 Sep 2023 20:45:07 +0800 Subject: [PATCH] fix bug in DPO data collator Former-commit-id: 4fc262cdf1347691e253bdfbd96568db5a49c086 --- src/llmtuner/tuner/dpo/collator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llmtuner/tuner/dpo/collator.py b/src/llmtuner/tuner/dpo/collator.py index 2f0f4bdc..5c862b4f 100644 --- a/src/llmtuner/tuner/dpo/collator.py +++ b/src/llmtuner/tuner/dpo/collator.py @@ -16,7 +16,7 @@ class DPODataCollatorWithPadding(DataCollatorForSeq2Seq): if self.tokenizer.padding_side == "left": start, end = feature.size(0) - answer_len, feature.size(0) else: - start, end = prompt_len, answer_len + start, end = prompt_len, prompt_len + answer_len padded_tensor = self.label_pad_token_id * torch.ones_like(feature) padded_tensor[start:end] = feature[start:end] padded_labels.append(padded_tensor)