support sharegpt format, add datasets

Former-commit-id: 202daf8987ccb7523be03ca535b572b5c9e65994
This commit is contained in:
hiyouga
2023-11-02 23:10:04 +08:00
parent 7d13501b94
commit b77c745b1a
6 changed files with 192 additions and 97 deletions

View File

@@ -6,7 +6,9 @@ If you are using a custom dataset, please provide your dataset definition in the
"script_url": "the name of the directory containing a dataset loading script. (if specified, ignore below 2 arguments)",
"file_name": "the name of the dataset file in the this directory. (required if above are not specified)",
"file_sha1": "the SHA-1 hash value of the dataset file. (optional)",
"subset": "",
"ranking": "whether the examples contains ranked responses or not. (default: false)",
"formatting": "",
"columns": {
"prompt": "the name of the column in the datasets containing the prompts. (default: instruction)",
"query": "the name of the column in the datasets containing the queries. (default: input)",