From 890926e60c8585f60b782f77305331f0afe4f54a Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Thu, 30 May 2024 00:04:26 +0800 Subject: [PATCH] Update README.md Former-commit-id: 65fb69e388c0a04c15ecd11441e567966f51fae5 --- data/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/data/README.md b/data/README.md index dd7ca201..5ceae666 100644 --- a/data/README.md +++ b/data/README.md @@ -12,6 +12,7 @@ Currently we support datasets in **alpaca** and **sharegpt** format. "ranking": "whether the dataset is a preference dataset or not. (default: False)", "subset": "the name of the subset. (optional, default: None)", "folder": "the name of the folder of the dataset repository on the Hugging Face hub. (optional, default: None)", + "num_samples": "the number of samples in the dataset used for training. (optional, default: None)", "columns (optional)": { "prompt": "the column name in the dataset containing the prompts. (default: instruction)", "query": "the column name in the dataset containing the queries. (default: input)", @@ -32,9 +33,8 @@ Currently we support datasets in **alpaca** and **sharegpt** format. "assistant_tag": "the value of the role_tag represents the assistant. (default: gpt)", "observation_tag": "the value of the role_tag represents the tool results. (default: observation)", "function_tag": "the value of the role_tag represents the function call. (default: function_call)", - "system_tag": "the value of the role_tag represents the system prompt. (default: system, can override system column)", - }, - "sample_num": "the number of samples from this dataset can be greater than the total amount of the dataset. (default: None)" + "system_tag": "the value of the role_tag represents the system prompt. (default: system, can override system column)" + } } ```