From e93e9641f52bdc88e2a7f81202ae6edd9ccc1795 Mon Sep 17 00:00:00 2001
From: hiyouga <hiyouga@buaa.edu.cn>
Date: Fri, 18 Aug 2023 01:51:55 +0800
Subject: [PATCH] update readme

Former-commit-id: e4eec9ddfd3a9688733e018a96274dff0d5d9962
---
 README.md    | 77 +++++++++++++++++++++++++++-------------------------
 README_zh.md | 74 ++++++++++++++++++++++++++------------------------
 2 files changed, 78 insertions(+), 73 deletions(-)
diff --git a/README.md b/README.md
index a93a82f0..ce188b20 100644
--- a/README.md
+++ b/README.md
@@ -164,7 +164,9 @@ We strongly recommend using the all-in-one Web UI for newcomers since it can als
 
 Currently the web UI only supports training on **a single GPU**.
 
-### Pre-Training
+### Train on a single GPU
+
+#### Pre-Training
 
 ```bash
 CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
@@ -187,7 +189,7 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
     --fp16
 ```
 
-### Supervised Fine-Tuning
+#### Supervised Fine-Tuning
 
 ```bash
 CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
@@ -210,7 +212,7 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
     --fp16
 ```
 
-### Reward Modeling
+#### Reward Modeling
 
 ```bash
 CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
@@ -234,7 +236,7 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
     --fp16
 ```
 
-### PPO Training
+#### PPO Training
 
 ```bash
 CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
@@ -255,10 +257,11 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
     --save_steps 1000 \
     --learning_rate 1e-5 \
     --num_train_epochs 1.0 \
-    --plot_loss
+    --plot_loss \
+    --fp16
 ```
 
-### DPO Training
+#### DPO Training
 
 ```bash
 CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
@@ -357,40 +360,15 @@ deepspeed --num_gpus 8 --master_port=9901 src/train_bash.py \
 
 </details>
 
-### Evaluation (BLEU and ROUGE_CHINESE)
+### Export model
 
 ```bash
-CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
-    --stage sft \
+python src/export_model.py \
     --model_name_or_path path_to_your_model \
-    --do_eval \
-    --dataset alpaca_gpt4_en \
     --template default \
     --finetuning_type lora \
     --checkpoint_dir path_to_checkpoint \
-    --output_dir path_to_eval_result \
-    --per_device_eval_batch_size 8 \
-    --max_samples 100 \
-    --predict_with_generate
-```
-
-We recommend using `--per_device_eval_batch_size=1` and `--max_target_length 128` at 4/8-bit evaluation.
-
-### Predict
-
-```bash
-CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
-    --stage sft \
-    --model_name_or_path path_to_your_model \
-    --do_predict \
-    --dataset alpaca_gpt4_en \
-    --template default \
-    --finetuning_type lora \
-    --checkpoint_dir path_to_checkpoint \
-    --output_dir path_to_predict_result \
-    --per_device_eval_batch_size 8 \
-    --max_samples 100 \
-    --predict_with_generate
+    --output_dir path_to_export
 ```
 
 ### API Demo
@@ -425,15 +403,40 @@ python src/web_demo.py \
     --checkpoint_dir path_to_checkpoint
 ```
 
-### Export model
+### Evaluation (BLEU and ROUGE_CHINESE)
 
 ```bash
-python src/export_model.py \
+CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
+    --stage sft \
     --model_name_or_path path_to_your_model \
+    --do_eval \
+    --dataset alpaca_gpt4_en \
     --template default \
     --finetuning_type lora \
     --checkpoint_dir path_to_checkpoint \
-    --output_dir path_to_export
+    --output_dir path_to_eval_result \
+    --per_device_eval_batch_size 8 \
+    --max_samples 100 \
+    --predict_with_generate
+```
+
+We recommend using `--per_device_eval_batch_size=1` and `--max_target_length 128` at 4/8-bit evaluation.
+
+### Predict
+
+```bash
+CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
+    --stage sft \
+    --model_name_or_path path_to_your_model \
+    --do_predict \
+    --dataset alpaca_gpt4_en \
+    --template default \
+    --finetuning_type lora \
+    --checkpoint_dir path_to_checkpoint \
+    --output_dir path_to_predict_result \
+    --per_device_eval_batch_size 8 \
+    --max_samples 100 \
+    --predict_with_generate
 ```
 
 ## TODO
diff --git a/README_zh.md b/README_zh.md
index fe96c55a..08cd4504 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -164,7 +164,9 @@ CUDA_VISIBLE_DEVICES=0 python src/train_web.py
 
 目前网页 UI 仅支持**单卡训练**。
 
-### 预训练
+### 单 GPU 训练
+
+#### 预训练
 
 ```bash
 CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
@@ -187,7 +189,7 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
     --fp16
 ```
 
-### 指令监督微调
+#### 指令监督微调
 
 ```bash
 CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
@@ -210,7 +212,7 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
     --fp16
 ```
 
-### 奖励模型训练
+#### 奖励模型训练
 
 ```bash
 CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
@@ -234,7 +236,7 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
     --fp16
 ```
 
-### PPO 训练
+#### PPO 训练
 
 ```bash
 CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
@@ -258,7 +260,7 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
     --plot_loss
 ```
 
-### DPO 训练
+#### DPO 训练
 
 ```bash
 CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
@@ -357,40 +359,15 @@ deepspeed --num_gpus 8 --master_port=9901 src/train_bash.py \
 
 </details>
 
-### 指标评估（BLEU 分数和汉语 ROUGE 分数）
+### 导出微调后的模型
 
 ```bash
-CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
-    --stage sft \
+python src/export_model.py \
     --model_name_or_path path_to_your_model \
-    --do_eval \
-    --dataset alpaca_gpt4_zh \
     --template default \
     --finetuning_type lora \
     --checkpoint_dir path_to_checkpoint \
-    --output_dir path_to_eval_result \
-    --per_device_eval_batch_size 8 \
-    --max_samples 100 \
-    --predict_with_generate
-```
-
-我们建议在量化模型的评估中使用 `--per_device_eval_batch_size=1` 和 `--max_target_length 128`。
-
-### 模型预测
-
-```bash
-CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
-    --stage sft \
-    --model_name_or_path path_to_your_model \
-    --do_predict \
-    --dataset alpaca_gpt4_zh \
-    --template default \
-    --finetuning_type lora \
-    --checkpoint_dir path_to_checkpoint \
-    --output_dir path_to_predict_result \
-    --per_device_eval_batch_size 8 \
-    --max_samples 100 \
-    --predict_with_generate
+    --output_dir path_to_export
 ```
 
 ### API 服务
@@ -425,15 +402,40 @@ python src/web_demo.py \
     --checkpoint_dir path_to_checkpoint
 ```
 
-### 导出微调模型
+### 指标评估（BLEU 分数和汉语 ROUGE 分数）
 
 ```bash
-python src/export_model.py \
+CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
+    --stage sft \
     --model_name_or_path path_to_your_model \
+    --do_eval \
+    --dataset alpaca_gpt4_zh \
     --template default \
     --finetuning_type lora \
     --checkpoint_dir path_to_checkpoint \
-    --output_dir path_to_export
+    --output_dir path_to_eval_result \
+    --per_device_eval_batch_size 8 \
+    --max_samples 100 \
+    --predict_with_generate
+```
+
+我们建议在量化模型的评估中使用 `--per_device_eval_batch_size=1` 和 `--max_target_length 128`。
+
+### 模型预测
+
+```bash
+CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
+    --stage sft \
+    --model_name_or_path path_to_your_model \
+    --do_predict \
+    --dataset alpaca_gpt4_zh \
+    --template default \
+    --finetuning_type lora \
+    --checkpoint_dir path_to_checkpoint \
+    --output_dir path_to_predict_result \
+    --per_device_eval_batch_size 8 \
+    --max_samples 100 \
+    --predict_with_generate
 ```
 
 ## TODO