mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-12-14 10:56:56 +08:00
Merge pull request #3829 from seanzhang-zhichen/add_dataset_sample_num
Add dataset sample num
Former-commit-id: 483eb47e5d
This commit is contained in:
@@ -12,6 +12,7 @@ Currently we support datasets in **alpaca** and **sharegpt** format.
|
||||
"ranking": "whether the dataset is a preference dataset or not. (default: False)",
|
||||
"subset": "the name of the subset. (optional, default: None)",
|
||||
"folder": "the name of the folder of the dataset repository on the Hugging Face hub. (optional, default: None)",
|
||||
"num_samples": "the number of samples in the dataset used for training. (optional, default: None)",
|
||||
"columns (optional)": {
|
||||
"prompt": "the column name in the dataset containing the prompts. (default: instruction)",
|
||||
"query": "the column name in the dataset containing the queries. (default: input)",
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
"ranking": "是否为偏好数据集(可选,默认:False)",
|
||||
"subset": "数据集子集的名称(可选,默认:None)",
|
||||
"folder": "Hugging Face 仓库的文件夹名称(可选,默认:None)",
|
||||
"num_samples": "该数据集中用于训练的样本数量。(可选,默认:None)",
|
||||
"columns(可选)": {
|
||||
"prompt": "数据集代表提示词的表头名称(默认:instruction)",
|
||||
"query": "数据集代表请求的表头名称(默认:input)",
|
||||
|
||||
Reference in New Issue
Block a user