mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-10-14 15:52:49 +08:00
add readme for dataset
Former-commit-id: bdcb0ea40e726e4c5752f938b379ed9a18e7e1d0
This commit is contained in:
parent
cbc7db3478
commit
a6662b73f5
@ -11,7 +11,8 @@ If you are using a custom dataset, please provide your dataset definition in the
|
||||
"query": "the name of the column in the datasets containing the queries. (default: input)",
|
||||
"response": "the name of the column in the datasets containing the responses. (default: output)",
|
||||
"history": "the name of the column in the datasets containing the history of chat. (default: None)"
|
||||
}
|
||||
},
|
||||
"stage": "The stage at which the data is being used: pt, sft, and rm, which correspond to pre-training, supervised fine-tuning(PPO), and reward model (DPO) training, respectively.(default: None)"
|
||||
}
|
||||
```
|
||||
|
||||
@ -26,6 +27,7 @@ For datasets used in reward modeling or DPO training, the `response` column shou
|
||||
"output": [
|
||||
"Chosen answer",
|
||||
"Rejected answer"
|
||||
]
|
||||
],
|
||||
"stage": "rm"
|
||||
}
|
||||
```
|
||||
|
@ -11,7 +11,8 @@
|
||||
"query": "数据集代表请求的表头名称(默认:input)",
|
||||
"response": "数据集代表回答的表头名称(默认:output)",
|
||||
"history": "数据集代表历史对话的表头名称(默认:None)"
|
||||
}
|
||||
},
|
||||
"stage": "数据所应用的训练阶段,可选值有 pt, sft, rm 三个,对应预训练,指令监督微调(PPO),奖励模型(DPO)训练, 默认为None,表示不限制"
|
||||
}
|
||||
```
|
||||
|
||||
@ -26,6 +27,7 @@
|
||||
"output": [
|
||||
"Chosen answer",
|
||||
"Rejected answer"
|
||||
]
|
||||
],
|
||||
"stage": "rm"
|
||||
}
|
||||
```
|
||||
|
Loading…
x
Reference in New Issue
Block a user