update readme

Former-commit-id: 5ee87138e4
2026-03-13 15:36:00 +08:00 · 2023-07-28 17:36:00 +08:00
parent 3e3652f4e0
commit f65f0745cc
2 changed files with 6 additions and 2 deletions
--- a/README.md
+++ b/README.md
@@ -198,6 +198,8 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
    --do_train \
    --dataset comparison_gpt4_en \
    --finetuning_type lora \
+    --resume_lora_training False \
+    --checkpoint_dir path_to_sft_checkpoint \
    --output_dir path_to_rm_checkpoint \
    --per_device_train_batch_size 4 \
    --gradient_accumulation_steps 4 \
@@ -219,6 +221,7 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
    --do_train \
    --dataset alpaca_gpt4_en \
    --finetuning_type lora \
+    --resume_lora_training False \
    --checkpoint_dir path_to_sft_checkpoint \
    --reward_model path_to_rm_checkpoint \
    --output_dir path_to_ppo_checkpoint \
@@ -229,7 +232,6 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
    --save_steps 1000 \
    --learning_rate 1e-5 \
    --num_train_epochs 1.0 \
-    --resume_lora_training False \
    --plot_loss
 ```

--- a/README_zh.md
+++ b/README_zh.md
@@ -198,6 +198,8 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
    --do_train \
    --dataset comparison_gpt4_en \
    --finetuning_type lora \
+    --resume_lora_training False \
+    --checkpoint_dir path_to_sft_checkpoint \
    --output_dir path_to_rm_checkpoint \
    --per_device_train_batch_size 4 \
    --gradient_accumulation_steps 4 \
@@ -219,6 +221,7 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
    --do_train \
    --dataset alpaca_gpt4_en \
    --finetuning_type lora \
+    --resume_lora_training False \
    --checkpoint_dir path_to_sft_checkpoint \
    --reward_model path_to_rm_checkpoint \
    --output_dir path_to_ppo_checkpoint \
@@ -229,7 +232,6 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
    --save_steps 1000 \
    --learning_rate 1e-5 \
    --num_train_epochs 1.0 \
-    --resume_lora_training False \
    --plot_loss
 ```