From 4d473894fddf4778a5cbf8449de1b617c5f44983 Mon Sep 17 00:00:00 2001 From: Mark Mueller Date: Thu, 8 Feb 2024 17:56:18 +0100 Subject: [PATCH] Slim Orca data parsing Former-commit-id: ca57d27c39d4e7bc3dd7c3207a23d23d2cbd446b --- src/llmtuner/data/aligner.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/llmtuner/data/aligner.py b/src/llmtuner/data/aligner.py index d4f281bc..cd3a7ea4 100644 --- a/src/llmtuner/data/aligner.py +++ b/src/llmtuner/data/aligner.py @@ -60,6 +60,7 @@ def convert_sharegpt(examples: Dict[str, List[Any]], dataset_attr: "DatasetAttr" if dataset_attr.system_tag and message[dataset_attr.role_tag] == dataset_attr.system_tag: outputs["system"].append(message[dataset_attr.content_tag]) n_sys = 1 + continue if (turn_idx - n_sys) % 2 == 0: accept_tags = [dataset_attr.user_tag, dataset_attr.observation_tag]