Support distributed BAdam.

This commit is contained in:
Jonery
2024-06-18 12:27:47 +08:00
parent ea1f3ba5e0
commit 0f72aac8c9
7 changed files with 46 additions and 30 deletions

View File

@@ -76,6 +76,12 @@ class PairwiseTrainer(Trainer):
self.accelerator.clip_grad_norm_ = MethodType(clip_grad_norm_for_sparse_tensor, self.accelerator)
if (self.args.deepspeed_plugin is not None
and self.args.deepspeed_plugin.zero_stage == 3
):
from badam.utils import BAdamZeRO3Callback
self.callback_handler.add_callback(BAdamZeRO3Callback)
def create_optimizer(self) -> "torch.optim.Optimizer":
if self.optimizer is None:
self.optimizer = create_custom_optimzer(self.model, self.args, self.finetuning_args)