From 323ec3f89f3616fce48dc62404e6377b4254f09f Mon Sep 17 00:00:00 2001 From: ldwang Date: Wed, 24 Jan 2024 15:25:31 +0800 Subject: [PATCH] Add patch_mixtral_replace_moe_impl for full training Mitral using DeepSpeed Zero3. Signed-off-by: ldwang Former-commit-id: c284665425e8eefcea2d0dd1c835883e7ce18c97 --- src/llmtuner/model/patcher.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/llmtuner/model/patcher.py b/src/llmtuner/model/patcher.py index 2c7c14b3..0c5d4470 100644 --- a/src/llmtuner/model/patcher.py +++ b/src/llmtuner/model/patcher.py @@ -269,6 +269,7 @@ def patch_config( def patch_mixtral_replace_moe_impl() -> None: + import torch.nn.functional as F def mlp_forward(self, hidden_states): current_hidden_states = self.act_fn(self.w1(hidden_states)) * self.w3(hidden_states) current_hidden_states = self.w2(current_hidden_states)