fix #896

Former-commit-id: 4b70d623d817460de4732749110622e4a1b51958
2026-02-07 22:42:16 +08:00 · 2023-09-14 18:37:34 +08:00
parent 0ca36a0f8d
commit 3d9e2de573
1 changed files with 3 additions and 3 deletions
--- a/src/llmtuner/dsets/preprocess.py
+++ b/src/llmtuner/dsets/preprocess.py
@@ -140,9 +140,9 @@ def preprocess_dataset(
        print("input_ids:\n{}".format(example["input_ids"]))
        print("inputs:\n{}".format(tokenizer.decode(example["input_ids"], skip_special_tokens=False)))
        print("label_ids:\n{}".format(example["labels"]))
-        print("labels:\n{}".format(tokenizer.decode([
+        print("labels:\n{}".format(
-            token_id if token_id != IGNORE_INDEX else tokenizer.pad_token_id for token_id in example["labels"]
+            tokenizer.decode(list(filter(lambda x: x != IGNORE_INDEX, example["labels"])), skip_special_tokens=False)
-        ], skip_special_tokens=False)))
+        ))
    def print_pairwise_dataset_example(example):
        print("prompt_ids:\n{}".format(example["prompt_ids"]))