rename files

Former-commit-id: 74f96efef9
2025-12-16 11:50:35 +08:00 · 2024-06-07 00:09:06 +08:00
parent fd2c64315b
commit 8da149ba40
43 changed files with 53 additions and 53 deletions
--- a/src/llamafactory/model/model_utils/embedding.py
+++ b/src/llamafactory/model/model_utils/embedding.py
@@ -0,0 +1,58 @@
+import math
+from contextlib import nullcontext
+from typing import TYPE_CHECKING
+
+import torch
+from transformers.integrations import is_deepspeed_zero3_enabled
+
+from ...extras.logging import get_logger
+
+
+if TYPE_CHECKING:
+    from transformers import PreTrainedModel, PreTrainedTokenizer
+
+
+logger = get_logger(__name__)
+
+
+def _noisy_mean_initialization(embed_weight: "torch.Tensor", num_new_tokens: int) -> None:
+    embedding_dim = embed_weight.size(1)
+    avg_weight = embed_weight[:-num_new_tokens].mean(dim=0, keepdim=True)
+    noise_weight = torch.empty_like(embed_weight[-num_new_tokens:])
+    noise_weight.normal_(mean=0, std=(1.0 / math.sqrt(embedding_dim)))
+    embed_weight[-num_new_tokens:] = avg_weight + noise_weight
+
+
+def resize_embedding_layer(model: "PreTrainedModel", tokenizer: "PreTrainedTokenizer") -> None:
+    r"""
+    Resize token embeddings.
+    """
+    if is_deepspeed_zero3_enabled():
+        import deepspeed  # type: ignore
+
+        params = [model.get_input_embeddings().weight]
+        if model.get_output_embeddings() is not None and not model.config.tie_word_embeddings:
+            params.append(model.get_output_embeddings().weight)
+
+        context_maybe_zero3 = deepspeed.zero.GatheredParameters(params, modifier_rank=0)
+    else:
+        context_maybe_zero3 = nullcontext()
+
+    with context_maybe_zero3:
+        current_embedding_size = model.get_input_embeddings().weight.size(0)
+
+    if len(tokenizer) > current_embedding_size:
+        if getattr(model, "quantization_method", None):
+            raise ValueError("Cannot resize embedding layers of a quantized model.")
+
+        if not isinstance(model.get_output_embeddings(), torch.nn.Linear):
+            raise ValueError("Current model does not support resizing embedding layers.")
+
+        model.resize_token_embeddings(len(tokenizer), pad_to_multiple_of=64)
+        with context_maybe_zero3:
+            new_embedding_size = model.get_input_embeddings().weight.size(0)
+            num_new_tokens = new_embedding_size - current_embedding_size
+            _noisy_mean_initialization(model.get_input_embeddings().weight.data, num_new_tokens)
+            _noisy_mean_initialization(model.get_output_embeddings().weight.data, num_new_tokens)
+
+        logger.info("Resized token embeddings from {} to {}.".format(current_embedding_size, new_embedding_size))