diff --git a/README.md b/README.md index ec62a5a6..94458c80 100644 --- a/README.md +++ b/README.md @@ -198,6 +198,8 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \ --do_train \ --dataset comparison_gpt4_en \ --finetuning_type lora \ + --resume_lora_training False \ + --checkpoint_dir path_to_sft_checkpoint \ --output_dir path_to_rm_checkpoint \ --per_device_train_batch_size 4 \ --gradient_accumulation_steps 4 \ @@ -219,6 +221,7 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \ --do_train \ --dataset alpaca_gpt4_en \ --finetuning_type lora \ + --resume_lora_training False \ --checkpoint_dir path_to_sft_checkpoint \ --reward_model path_to_rm_checkpoint \ --output_dir path_to_ppo_checkpoint \ @@ -229,7 +232,6 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \ --save_steps 1000 \ --learning_rate 1e-5 \ --num_train_epochs 1.0 \ - --resume_lora_training False \ --plot_loss ``` diff --git a/README_zh.md b/README_zh.md index abf674bc..4d882b62 100644 --- a/README_zh.md +++ b/README_zh.md @@ -198,6 +198,8 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \ --do_train \ --dataset comparison_gpt4_en \ --finetuning_type lora \ + --resume_lora_training False \ + --checkpoint_dir path_to_sft_checkpoint \ --output_dir path_to_rm_checkpoint \ --per_device_train_batch_size 4 \ --gradient_accumulation_steps 4 \ @@ -219,6 +221,7 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \ --do_train \ --dataset alpaca_gpt4_en \ --finetuning_type lora \ + --resume_lora_training False \ --checkpoint_dir path_to_sft_checkpoint \ --reward_model path_to_rm_checkpoint \ --output_dir path_to_ppo_checkpoint \ @@ -229,7 +232,6 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \ --save_steps 1000 \ --learning_rate 1e-5 \ --num_train_epochs 1.0 \ - --resume_lora_training False \ --plot_loss ```