support rank0 logger

Former-commit-id: c38aa29336
This commit is contained in:
hiyouga
2024-11-02 18:31:04 +08:00
parent 4b2c47fcae
commit e83cb17f97
42 changed files with 316 additions and 252 deletions

View File

@@ -20,6 +20,7 @@ import os
import sys
import threading
from concurrent.futures import ThreadPoolExecutor
from functools import lru_cache
from typing import Optional
from .constants import RUNNING_LOG
@@ -37,12 +38,11 @@ class LoggerHandler(logging.Handler):
def __init__(self, output_dir: str) -> None:
super().__init__()
formatter = logging.Formatter(
fmt="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S"
self._formatter = logging.Formatter(
fmt="[%(levelname)s|%(asctime)s] %(filename)s:%(lineno)s >> %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
self.setLevel(logging.INFO)
self.setFormatter(formatter)
os.makedirs(output_dir, exist_ok=True)
self.running_log = os.path.join(output_dir, RUNNING_LOG)
if os.path.exists(self.running_log):
@@ -58,7 +58,7 @@ class LoggerHandler(logging.Handler):
if record.name == "httpx":
return
log_entry = self.format(record)
log_entry = self._formatter.format(record)
self.thread_pool.submit(self._write_log, log_entry)
def close(self) -> None:
@@ -66,6 +66,21 @@ class LoggerHandler(logging.Handler):
return super().close()
class _Logger(logging.Logger):
r"""
A logger that supports info_rank0 and warning_once.
"""
def info_rank0(self, *args, **kwargs) -> None:
self.info(*args, **kwargs)
def warning_rank0(self, *args, **kwargs) -> None:
self.warning(*args, **kwargs)
def warning_once(self, *args, **kwargs) -> None:
self.warning(*args, **kwargs)
def _get_default_logging_level() -> "logging._Level":
r"""
Returns the default logging level.
@@ -84,7 +99,7 @@ def _get_library_name() -> str:
return __name__.split(".")[0]
def _get_library_root_logger() -> "logging.Logger":
def _get_library_root_logger() -> "_Logger":
return logging.getLogger(_get_library_name())
@@ -95,12 +110,12 @@ def _configure_library_root_logger() -> None:
global _default_handler
with _thread_lock:
if _default_handler:
if _default_handler: # already configured
return
formatter = logging.Formatter(
fmt="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
datefmt="%m/%d/%Y %H:%M:%S",
fmt="[%(levelname)s|%(asctime)s] %(name)s:%(lineno)s >> %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
_default_handler = logging.StreamHandler(sys.stdout)
_default_handler.setFormatter(formatter)
@@ -110,7 +125,7 @@ def _configure_library_root_logger() -> None:
library_root_logger.propagate = False
def get_logger(name: Optional[str] = None) -> "logging.Logger":
def get_logger(name: Optional[str] = None) -> "_Logger":
r"""
Returns a logger with the specified name. It it not supposed to be accessed externally.
"""
@@ -119,3 +134,40 @@ def get_logger(name: Optional[str] = None) -> "logging.Logger":
_configure_library_root_logger()
return logging.getLogger(name)
def add_handler(handler: "logging.Handler") -> None:
r"""
Adds a handler to the root logger.
"""
_configure_library_root_logger()
_get_library_root_logger().addHandler(handler)
def remove_handler(handler: logging.Handler) -> None:
r"""
Removes a handler to the root logger.
"""
_configure_library_root_logger()
_get_library_root_logger().removeHandler(handler)
def info_rank0(self: "logging.Logger", *args, **kwargs) -> None:
if int(os.getenv("LOCAL_RANK", "0")) == 0:
self.info(*args, **kwargs)
def warning_rank0(self: "logging.Logger", *args, **kwargs) -> None:
if int(os.getenv("LOCAL_RANK", "0")) == 0:
self.warning(*args, **kwargs)
@lru_cache(None)
def warning_once(self: "logging.Logger", *args, **kwargs) -> None:
if int(os.getenv("LOCAL_RANK", "0")) == 0:
self.warning(*args, **kwargs)
logging.Logger.info_rank0 = info_rank0
logging.Logger.warning_rank0 = warning_rank0
logging.Logger.warning_once = warning_once

View File

@@ -32,7 +32,7 @@ from transformers.utils import (
)
from transformers.utils.versions import require_version
from .logging import get_logger
from . import logging
_is_fp16_available = is_torch_npu_available() or is_torch_cuda_available()
@@ -48,7 +48,7 @@ if TYPE_CHECKING:
from ..hparams import ModelArguments
logger = get_logger(__name__)
logger = logging.get_logger(__name__)
class AverageMeter:
@@ -76,8 +76,8 @@ def check_dependencies() -> None:
r"""
Checks the version of the required packages.
"""
if os.environ.get("DISABLE_VERSION_CHECK", "0").lower() in ["true", "1"]:
logger.warning("Version checking has been disabled, may lead to unexpected behaviors.")
if os.getenv("DISABLE_VERSION_CHECK", "0").lower() in ["true", "1"]:
logger.warning_once("Version checking has been disabled, may lead to unexpected behaviors.")
else:
require_version("transformers>=4.41.2,<=4.46.1", "To fix: pip install transformers>=4.41.2,<=4.46.1")
require_version("datasets>=2.16.0,<=3.0.2", "To fix: pip install datasets>=2.16.0,<=3.0.2")

View File

@@ -19,7 +19,7 @@ from typing import Any, Dict, List
from transformers.trainer import TRAINER_STATE_NAME
from .logging import get_logger
from . import logging
from .packages import is_matplotlib_available
@@ -28,7 +28,7 @@ if is_matplotlib_available():
import matplotlib.pyplot as plt
logger = get_logger(__name__)
logger = logging.get_logger(__name__)
def smooth(scalars: List[float]) -> List[float]:
@@ -86,7 +86,7 @@ def plot_loss(save_dictionary: str, keys: List[str] = ["loss"]) -> None:
metrics.append(data["log_history"][i][key])
if len(metrics) == 0:
logger.warning(f"No metric {key} to plot.")
logger.warning_rank0(f"No metric {key} to plot.")
continue
plt.figure()