update readme

Former-commit-id: 14d20cd1fdcfd1f2842362f70472b666e5d48c7d
2026-02-06 22:12:19 +08:00 · 2023-07-28 17:36:00 +08:00
parent fafec8b7a5
commit e8748cc6f3
2 changed files with 6 additions and 2 deletions
--- a/README.md
+++ b/README.md
@@ -198,6 +198,8 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
    --do_train \
    --dataset comparison_gpt4_en \
    --finetuning_type lora \
    --resume_lora_training False \
    --checkpoint_dir path_to_sft_checkpoint \
    --output_dir path_to_rm_checkpoint \
    --per_device_train_batch_size 4 \
    --gradient_accumulation_steps 4 \
@@ -219,6 +221,7 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
    --do_train \
    --dataset alpaca_gpt4_en \
    --finetuning_type lora \
    --resume_lora_training False \
    --checkpoint_dir path_to_sft_checkpoint \
    --reward_model path_to_rm_checkpoint \
    --output_dir path_to_ppo_checkpoint \
@@ -229,7 +232,6 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
    --save_steps 1000 \
    --learning_rate 1e-5 \
    --num_train_epochs 1.0 \
    --resume_lora_training False \
    --plot_loss
 ```
--- a/README_zh.md
+++ b/README_zh.md
@@ -198,6 +198,8 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
    --do_train \
    --dataset comparison_gpt4_en \
    --finetuning_type lora \
    --resume_lora_training False \
    --checkpoint_dir path_to_sft_checkpoint \
    --output_dir path_to_rm_checkpoint \
    --per_device_train_batch_size 4 \
    --gradient_accumulation_steps 4 \
@@ -219,6 +221,7 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
    --do_train \
    --dataset alpaca_gpt4_en \
    --finetuning_type lora \
    --resume_lora_training False \
    --checkpoint_dir path_to_sft_checkpoint \
    --reward_model path_to_rm_checkpoint \
    --output_dir path_to_ppo_checkpoint \
@@ -229,7 +232,6 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
    --save_steps 1000 \
    --learning_rate 1e-5 \
    --num_train_epochs 1.0 \
    --resume_lora_training False \
    --plot_loss
 ```