[v1] add accelerator (#9607)

2025-12-18 04:40:35 +08:00 · 2025-12-12 19:22:06 +08:00
parent 4fd94141a4
commit 203069e11c
36 changed files with 941 additions and 443 deletions
--- a/src/llamafactory/v1/utils/init.py
+++ b/src/llamafactory/v1/utils/init.py
--- a/src/llamafactory/v1/utils/constants.py
+++ b/src/llamafactory/v1/utils/constants.py
@@ -0,0 +1,13 @@
+# Copyright 2025 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/src/llamafactory/v1/utils/logging.py
+++ b/src/llamafactory/v1/utils/logging.py
@@ -0,0 +1,123 @@
+# Copyright 2025 Optuna, HuggingFace Inc. and the LlamaFactory team.
+#
+# This code is inspired by the HuggingFace's transformers library.
+# https://github.com/huggingface/transformers/blob/v5.0.0rc0/src/transformers/utils/logging.py
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import os
+import sys
+import threading
+from functools import lru_cache
+from typing import Optional
+
+
+_thread_lock = threading.RLock()
+_default_handler: Optional["logging.Handler"] = None
+_default_log_level: "logging._Level" = logging.INFO
+
+
+class _Logger(logging.Logger):
+    r"""A logger that supports rank0 logging."""
+
+    def info_rank0(self, *args, **kwargs) -> None:
+        self.info(*args, **kwargs)
+
+    def warning_rank0(self, *args, **kwargs) -> None:
+        self.warning(*args, **kwargs)
+
+    def warning_rank0_once(self, *args, **kwargs) -> None:
+        self.warning(*args, **kwargs)
+
+
+def _get_default_logging_level() -> "logging._Level":
+    r"""Return the default logging level."""
+    env_level_str = os.getenv("LLAMAFACTORY_VERBOSITY", None)
+    if env_level_str:
+        if env_level_str.upper() in logging._nameToLevel:
+            return logging._nameToLevel[env_level_str.upper()]
+        else:
+            raise ValueError(f"Unknown logging level: {env_level_str}.")
+
+    return _default_log_level
+
+
+def _get_library_name() -> str:
+    return __name__.split(".")[0]
+
+
+def _get_library_root_logger() -> "_Logger":
+    return logging.getLogger(_get_library_name())
+
+
+def _configure_library_root_logger() -> None:
+    r"""Configure root logger using a stdout stream handler with an explicit format."""
+    global _default_handler
+
+    with _thread_lock:
+        if _default_handler:  # already configured
+            return
+
+        formatter = logging.Formatter(
+            fmt="[%(levelname)s|%(asctime)s] %(name)s:%(lineno)s >> %(message)s",
+            datefmt="%Y-%m-%d %H:%M:%S",
+        )
+        _default_handler = logging.StreamHandler(sys.stdout)
+        _default_handler.setFormatter(formatter)
+        library_root_logger = _get_library_root_logger()
+        library_root_logger.addHandler(_default_handler)
+        library_root_logger.setLevel(_get_default_logging_level())
+        library_root_logger.propagate = False
+
+
+def get_logger(name: Optional[str] = None) -> "_Logger":
+    r"""Return a logger with the specified name. It it not supposed to be accessed externally."""
+    if name is None:
+        name = _get_library_name()
+
+    _configure_library_root_logger()
+    return logging.getLogger(name)
+
+
+def add_handler(handler: "logging.Handler") -> None:
+    r"""Add a handler to the root logger."""
+    _configure_library_root_logger()
+    _get_library_root_logger().addHandler(handler)
+
+
+def remove_handler(handler: logging.Handler) -> None:
+    r"""Remove a handler to the root logger."""
+    _configure_library_root_logger()
+    _get_library_root_logger().removeHandler(handler)
+
+
+def info_rank0(self: "logging.Logger", *args, **kwargs) -> None:
+    if int(os.getenv("LOCAL_RANK", "0")) == 0:
+        self.info(*args, **kwargs)
+
+
+def warning_rank0(self: "logging.Logger", *args, **kwargs) -> None:
+    if int(os.getenv("LOCAL_RANK", "0")) == 0:
+        self.warning(*args, **kwargs)
+
+
+@lru_cache(None)
+def warning_rank0_once(self: "logging.Logger", *args, **kwargs) -> None:
+    if int(os.getenv("LOCAL_RANK", "0")) == 0:
+        self.warning(*args, **kwargs)
+
+
+logging.Logger.info_rank0 = info_rank0
+logging.Logger.warning_rank0 = warning_rank0
+logging.Logger.warning_rank0_once = warning_rank0_once
--- a/src/llamafactory/v1/utils/packages.py
+++ b/src/llamafactory/v1/utils/packages.py
@@ -0,0 +1,43 @@
+# Copyright 2025 HuggingFace Inc. and the LlamaFactory team.
+#
+# This code is inspired by the HuggingFace's transformers library.
+# https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/utils/import_utils.py
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import importlib.metadata
+import importlib.util
+from functools import lru_cache
+from typing import TYPE_CHECKING
+
+from packaging import version
+
+
+if TYPE_CHECKING:
+    from packaging.version import Version
+
+
+def _is_package_available(name: str) -> bool:
+    return importlib.util.find_spec(name) is not None
+
+
+def _get_package_version(name: str) -> "Version":
+    try:
+        return version.parse(importlib.metadata.version(name))
+    except Exception:
+        return version.parse("0.0.0")
+
+
+@lru_cache
+def is_transformers_version_greater_than(content: str):
+    return _get_package_version("transformers") >= version.parse(content)
--- a/src/llamafactory/v1/utils/plugin.py
+++ b/src/llamafactory/v1/utils/plugin.py
@@ -0,0 +1,86 @@
+# Copyright 2025 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from typing import Callable, Optional
+
+from . import logging
+
+
+logger = logging.get_logger(__name__)
+
+
+class BasePlugin:
+    """Base class for plugins.
+
+    A plugin is a callable object that can be registered and called by name.
+    """
+
+    _registry: dict[str, Callable] = {}
+
+    def __init__(self, name: Optional[str] = None):
+        """Initialize the plugin with a name.
+
+        Args:
+            name (str): The name of the plugin.
+        """
+        self.name = name
+
+    @property
+    def register(self) -> Callable:
+        """Decorator to register a function as a plugin.
+
+        Example usage:
+        ```python
+        @PrintPlugin("hello").register()
+        def print_hello():
+            print("Hello world!")
+        ```
+        """
+        if self.name is None:
+            raise ValueError("Plugin name is not specified.")
+
+        if self.name in self._registry:
+            logger.warning_rank0_once(f"Plugin {self.name} is already registered.")
+
+        def decorator(func: Callable) -> Callable:
+            self._registry[self.name] = func
+            return func
+
+        return decorator
+
+    def __call__(self, *args, **kwargs) -> Callable:
+        """Call the registered function with the given arguments.
+
+        Example usage:
+        ```python
+        PrintPlugin("hello")()
+        ```
+        """
+        if self.name not in self._registry:
+            raise ValueError(f"Plugin {self.name} is not registered.")
+
+        return self._registry[self.name](*args, **kwargs)
+
+
+if __name__ == "__main__":
+
+    class PrintPlugin(BasePlugin):
+        pass
+
+    @PrintPlugin("hello").register
+    def print_hello():
+        print("Hello world!")
+
+    PrintPlugin("hello")()
--- a/src/llamafactory/v1/utils/types.py
+++ b/src/llamafactory/v1/utils/types.py
@@ -0,0 +1,96 @@
+# Copyright 2025 the LlamaFactory team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import TYPE_CHECKING, Literal, TypedDict, Union
+
+from typing_extensions import NotRequired
+
+
+if TYPE_CHECKING:
+    import datasets
+    import numpy as np
+    import torch
+    import torch.utils.data
+    import transformers
+    from torch.distributed.fsdp import FullyShardedDataParallel
+
+    Tensor = torch.Tensor
+    TensorLike = Union[int, float, list[int], list[float], np.ndarray, Tensor]
+    TorchDataset = Union[torch.utils.data.Dataset, torch.utils.data.IterableDataset]
+    HFDataset = Union[datasets.Dataset, datasets.IterableDataset]
+    DataCollator = transformers.DataCollator
+    DataLoader = torch.utils.data.DataLoader
+    HFConfig = transformers.PretrainedConfig
+    HFModel = transformers.PreTrainedModel
+    DistModel = Union[torch.nn.parallel.DistributedDataParallel, FullyShardedDataParallel]
+    Processor = Union[transformers.PreTrainedTokenizer, transformers.ProcessorMixin]
+    Optimizer = torch.optim.Optimizer
+    Scheduler = torch.optim.lr_scheduler.LRScheduler
+else:
+    Tensor = None
+    TensorLike = None
+    TorchDataset = None
+    HFDataset = None
+    DataCollator = None
+    DataLoader = None
+    HFConfig = None
+    HFModel = None
+    DistModel = None
+    Processor = None
+    Optimizer = None
+    Scheduler = None
+
+
+class DatasetInfo(TypedDict, total=False):
+    path: str
+    """Local file path."""
+    source: NotRequired[Literal["hf_hub", "ms_hub", "local"]]
+    """Dataset source, default to "hf_hub"."""
+    split: NotRequired[str]
+    """Dataset split, default to "train"."""
+    converter: NotRequired[str]
+    """Dataset converter, default to None."""
+    size: NotRequired[int]
+    """Number of samples, default to all samples."""
+    weight: NotRequired[float]
+    """Dataset weight, default to 1.0."""
+    streaming: NotRequired[bool]
+    """Is streaming dataset, default to False."""
+
+
+class Content(TypedDict):
+    type: Literal["text", "reasoning", "tools", "tool_calls", "image_url"]
+    value: str
+
+
+class Message(TypedDict):
+    role: Literal["system", "user", "assistant", "tool"]
+    content: list[Content]
+    loss_weight: float
+
+
+class SFTSample(TypedDict):
+    messages: list[Message]
+    extra_info: NotRequired[str]
+    _dataset_name: NotRequired[str]
+
+
+class DPOSample(TypedDict):
+    chosen_messages: list[Message]
+    rejected_messages: list[Message]
+    extra_info: NotRequired[str]
+    _dataset_name: NotRequired[str]
+
+
+Sample = Union[SFTSample, DPOSample]