[v1] add cuda fused moe kernel, implementing with triton (#10481)

This commit is contained in:
浮梦
2026-05-20 20:49:42 +08:00
committed by GitHub
parent 368c48968f
commit 2322bf1cc2
7 changed files with 856 additions and 10 deletions

View File

@@ -120,10 +120,10 @@ class CustomKTOTrainer(KTOTrainer):
self.add_callback(BAdamCallback)
@override
def create_optimizer(self) -> "torch.optim.Optimizer":
def create_optimizer(self, *args, **kwargs) -> "torch.optim.Optimizer":
if self.optimizer is None:
self.optimizer = create_custom_optimizer(self.model, self.args, self.finetuning_args)
return super().create_optimizer()
return super().create_optimizer(*args, **kwargs)
@override
def create_scheduler(