diff --git a/data/README.md b/data/README.md
index dc1c8bce..a7375b5d 100644
--- a/data/README.md
+++ b/data/README.md
@@ -11,7 +11,8 @@ If you are using a custom dataset, please provide your dataset definition in the
     "query": "the name of the column in the datasets containing the queries. (default: input)",
     "response": "the name of the column in the datasets containing the responses. (default: output)",
     "history": "the name of the column in the datasets containing the history of chat. (default: None)"
-  }
+  },
+  "stage": "The stage at which the data is being used: pt, sft, and rm, which correspond to pre-training, supervised fine-tuning(PPO), and reward model (DPO) training, respectively.(default: None)"
 }
 ```
 
@@ -26,6 +27,7 @@ For datasets used in reward modeling or DPO training, the `response` column shou
   "output": [
     "Chosen answer",
     "Rejected answer"
-  ]
+  ],
+  "stage": "rm"
 }
 ```
diff --git a/data/README_zh.md b/data/README_zh.md
index 054ee8ea..e23a3e70 100644
--- a/data/README_zh.md
+++ b/data/README_zh.md
@@ -11,7 +11,8 @@
     "query": "数据集代表请求的表头名称（默认：input）",
     "response": "数据集代表回答的表头名称（默认：output）",
     "history": "数据集代表历史对话的表头名称（默认：None）"
-  }
+  },
+  "stage": "数据所应用的训练阶段，可选值有 pt, sft, rm 三个，对应预训练，指令监督微调(PPO)，奖励模型(DPO)训练, 默认为None，表示不限制"
 }
 ```
 
@@ -26,6 +27,7 @@
   "output": [
     "Chosen answer",
     "Rejected answer"
-  ]
+  ],
+  "stage": "rm"
 }
 ```