Former-commit-id: 091010492b2afc8ad25359323aba760523a4c5af
This commit is contained in:
hiyouga 2024-07-24 17:00:29 +08:00
parent 019c6dad84
commit d63beb7a24

View File

@ -120,15 +120,15 @@ def convert_sharegpt(
even_tags = (dataset_attr.assistant_tag, dataset_attr.function_tag)
accept_tags = (odd_tags, even_tags)
for i, messages in enumerate(examples[dataset_attr.messages]):
if len(messages) == 0:
continue
if dataset_attr.system_tag and messages[0][dataset_attr.role_tag] == dataset_attr.system_tag:
system = messages[0][dataset_attr.content_tag]
messages = messages[1:]
else:
system = examples[dataset_attr.system][i] if dataset_attr.system else ""
if len(messages) == 0:
continue
aligned_messages = []
broken_data = False
for turn_idx, message in enumerate(messages):