enable cutoff len

This commit is contained in:
hiyouga
2024-01-18 12:25:42 +08:00
parent 83dbfce8c3
commit f1067d2b58
8 changed files with 297254 additions and 85 deletions

View File

@@ -29,7 +29,7 @@ class DatasetAttr:
history: Optional[str] = None
messages: Optional[str] = "conversations"
tool: Optional[str] = None
tools: Optional[str] = None
role_tag: Optional[str] = "from"
content_tag: Optional[str] = "value"
@@ -86,7 +86,7 @@ def get_dataset_list(data_args: "DataArguments") -> List["DatasetAttr"]:
if dataset_attr.formatting == "alpaca":
column_names = ["prompt", "query", "response", "history"]
else:
column_names = ["messages", "tool"]
column_names = ["messages", "tools"]
column_names += ["system"]
for column_name in column_names: