From bcd31cf2456a0584765158106292e9b777e84570 Mon Sep 17 00:00:00 2001 From: Yuchen Han <42163912+hannlp@users.noreply.github.com> Date: Fri, 17 Nov 2023 00:15:51 -0800 Subject: [PATCH 1/4] Update finetuning_args.py Former-commit-id: 30e3430553f1f7e09cd57ef2c9843b549746c618 --- src/llmtuner/hparams/finetuning_args.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/llmtuner/hparams/finetuning_args.py b/src/llmtuner/hparams/finetuning_args.py index cfdc8b24..d39812c7 100644 --- a/src/llmtuner/hparams/finetuning_args.py +++ b/src/llmtuner/hparams/finetuning_args.py @@ -74,6 +74,10 @@ class RLHFArguments: default=None, metadata={"help": "Log with either 'wandb' or 'tensorboard' in PPO training."} ) + ppo_epochs: Optional[int] = field( + default=4, + metadata={"help": "Number of optimisation epochs per batch of samples"}, + ) ppo_score_norm: Optional[bool] = field( default=False, metadata={"help": "Use score normalization in PPO training."} From 6af7107938cce678ae04b534e9d1132d322fd10a Mon Sep 17 00:00:00 2001 From: Yuchen Han <42163912+hannlp@users.noreply.github.com> Date: Fri, 17 Nov 2023 00:16:27 -0800 Subject: [PATCH 2/4] Update workflow.py Former-commit-id: f70b7ffe6442217a222e0ef797c407f259a13886 --- src/llmtuner/train/ppo/workflow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llmtuner/train/ppo/workflow.py b/src/llmtuner/train/ppo/workflow.py index 41a99e2c..4fb9d593 100644 --- a/src/llmtuner/train/ppo/workflow.py +++ b/src/llmtuner/train/ppo/workflow.py @@ -45,7 +45,7 @@ def run_ppo( mini_batch_size=training_args.per_device_train_batch_size, batch_size=training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps, gradient_accumulation_steps=training_args.gradient_accumulation_steps, - ppo_epochs=1, + ppo_epochs=finetuning_args.ppo_epochs, max_grad_norm=training_args.max_grad_norm, seed=training_args.seed, optimize_device_cache=True, From cab80a3c568e136757677bcc3ca5df8e32d8c4b5 Mon Sep 17 00:00:00 2001 From: Yuchen Han <42163912+hannlp@users.noreply.github.com> Date: Fri, 17 Nov 2023 00:17:36 -0800 Subject: [PATCH 3/4] Update README.md Former-commit-id: c1532dc6fe5d5b427011bd5509a2bc44ee16d951 --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 7ba2b748..88a820da 100644 --- a/README.md +++ b/README.md @@ -288,6 +288,8 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \ --per_device_train_batch_size 2 \ --gradient_accumulation_steps 4 \ --lr_scheduler_type cosine \ + --top_k 0 \ + --top_p 0.9 \ --logging_steps 10 \ --save_steps 1000 \ --learning_rate 1e-5 \ From 992be39f904ddbd29acc9f217de934fbedb7cbe4 Mon Sep 17 00:00:00 2001 From: Yuchen Han <42163912+hannlp@users.noreply.github.com> Date: Fri, 17 Nov 2023 00:18:07 -0800 Subject: [PATCH 4/4] Update README_zh.md Former-commit-id: 3e8a17c92d700bcafbe6559ea689dc4c0ad0481a --- README_zh.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README_zh.md b/README_zh.md index dbcee80a..f03f27f0 100644 --- a/README_zh.md +++ b/README_zh.md @@ -288,6 +288,8 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \ --per_device_train_batch_size 2 \ --gradient_accumulation_steps 4 \ --lr_scheduler_type cosine \ + --top_k 0 \ + --top_p 0.9 \ --logging_steps 10 \ --save_steps 1000 \ --learning_rate 1e-5 \