From 6d6e0f44fccb16cfa0ceccdba42d01f96a1a2e8d Mon Sep 17 00:00:00 2001 From: Hao <88525050+himalalps@users.noreply.github.com> Date: Tue, 1 Apr 2025 15:21:53 +0800 Subject: [PATCH] [trainer] new kto mismatch pair creation strategy (#7509) --- src/llamafactory/data/processor/feedback.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/llamafactory/data/processor/feedback.py b/src/llamafactory/data/processor/feedback.py index ed9359ae..871615b9 100644 --- a/src/llamafactory/data/processor/feedback.py +++ b/src/llamafactory/data/processor/feedback.py @@ -83,8 +83,8 @@ class FeedbackDatasetProcessor(DatasetProcessor): return input_ids, labels, kl_input_ids, kl_labels, kto_tag def preprocess_dataset(self, examples: dict[str, list[Any]]) -> dict[str, list[Any]]: - # create unrelated input-output pairs for estimating the KL term by flipping the matched pairs - kl_response = examples["_response"][::-1] + # Creates mismatched pairs of prompts and completions for the KL dataset by adding a +1 offset to the order of completions. + kl_response = [examples["_response"][-1]] + examples["_response"][:-1] model_inputs = defaultdict(list) for i in range(len(examples["_prompt"])): if len(examples["_prompt"][i]) % 2 != 1 or len(examples["_response"][i]) < 2: