mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2026-02-09 07:22:19 +08:00
[deps] goodbye python 3.9 (#9677)
Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: hiyouga <16256802+hiyouga@users.noreply.github.com> Co-authored-by: hiyouga <hiyouga@buaa.edu.cn>
This commit is contained in:
@@ -15,7 +15,7 @@ import json
|
||||
import os
|
||||
from abc import abstractmethod
|
||||
from dataclasses import dataclass
|
||||
from typing import TYPE_CHECKING, Any, Optional, Union
|
||||
from typing import TYPE_CHECKING, Any, Union
|
||||
|
||||
from ..extras import logging
|
||||
from .data_utils import Role
|
||||
@@ -40,7 +40,7 @@ class DatasetConverter:
|
||||
dataset_attr: "DatasetAttr"
|
||||
data_args: "DataArguments"
|
||||
|
||||
def _find_medias(self, medias: Union["MediaType", list["MediaType"], None]) -> Optional[list["MediaType"]]:
|
||||
def _find_medias(self, medias: Union["MediaType", list["MediaType"], None]) -> list["MediaType"] | None:
|
||||
r"""Optionally concatenate media path to media dir when loading from local disk."""
|
||||
if medias is None:
|
||||
return None
|
||||
|
||||
@@ -16,7 +16,6 @@ import json
|
||||
import re
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Optional, Union
|
||||
|
||||
from typing_extensions import override
|
||||
|
||||
@@ -27,14 +26,14 @@ from .tool_utils import FunctionCall, get_tool_utils
|
||||
@dataclass
|
||||
class Formatter(ABC):
|
||||
slots: SLOTS = field(default_factory=list)
|
||||
tool_format: Optional[str] = None
|
||||
tool_format: str | None = None
|
||||
|
||||
@abstractmethod
|
||||
def apply(self, **kwargs) -> SLOTS:
|
||||
r"""Forms a list of slots according to the inputs to encode."""
|
||||
...
|
||||
|
||||
def extract(self, content: str) -> Union[str, list["FunctionCall"]]:
|
||||
def extract(self, content: str) -> str | list["FunctionCall"]:
|
||||
r"""Extract a list of tuples from the response message if using tools.
|
||||
|
||||
Each tuple consists of function name and function arguments.
|
||||
@@ -156,5 +155,5 @@ class ToolFormatter(Formatter):
|
||||
raise RuntimeError(f"Invalid JSON format in tool description: {str([content])}.") # flat string
|
||||
|
||||
@override
|
||||
def extract(self, content: str) -> Union[str, list["FunctionCall"]]:
|
||||
def extract(self, content: str) -> str | list["FunctionCall"]:
|
||||
return self.tool_utils.tool_extractor(content)
|
||||
|
||||
@@ -162,13 +162,13 @@ def _load_single_dataset(
|
||||
|
||||
|
||||
def _get_merged_dataset(
|
||||
dataset_names: Optional[list[str]],
|
||||
dataset_names: list[str] | None,
|
||||
model_args: "ModelArguments",
|
||||
data_args: "DataArguments",
|
||||
training_args: "Seq2SeqTrainingArguments",
|
||||
stage: Literal["pt", "sft", "rm", "ppo", "kto"],
|
||||
return_dict: bool = False,
|
||||
) -> Optional[Union["Dataset", "IterableDataset", dict[str, "Dataset"]]]:
|
||||
) -> Union["Dataset", "IterableDataset", dict[str, "Dataset"]] | None:
|
||||
r"""Return the merged datasets in the standard format."""
|
||||
if dataset_names is None:
|
||||
return None
|
||||
@@ -227,7 +227,7 @@ def _get_dataset_processor(
|
||||
|
||||
|
||||
def _get_preprocessed_dataset(
|
||||
dataset: Optional[Union["Dataset", "IterableDataset"]],
|
||||
dataset: Union["Dataset", "IterableDataset"] | None,
|
||||
data_args: "DataArguments",
|
||||
training_args: "Seq2SeqTrainingArguments",
|
||||
stage: Literal["pt", "sft", "rm", "ppo", "kto"],
|
||||
@@ -235,7 +235,7 @@ def _get_preprocessed_dataset(
|
||||
tokenizer: "PreTrainedTokenizer",
|
||||
processor: Optional["ProcessorMixin"] = None,
|
||||
is_eval: bool = False,
|
||||
) -> Optional[Union["Dataset", "IterableDataset"]]:
|
||||
) -> Union["Dataset", "IterableDataset"] | None:
|
||||
r"""Preprocesses the dataset, including format checking and tokenization."""
|
||||
if dataset is None:
|
||||
return None
|
||||
|
||||
@@ -22,7 +22,7 @@ import re
|
||||
from copy import deepcopy
|
||||
from dataclasses import dataclass
|
||||
from io import BytesIO
|
||||
from typing import TYPE_CHECKING, BinaryIO, Literal, Optional, TypedDict, Union
|
||||
from typing import TYPE_CHECKING, BinaryIO, Literal, NotRequired, Optional, TypedDict, Union
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
@@ -32,7 +32,7 @@ from transformers.models.mllama.processing_mllama import (
|
||||
convert_sparse_cross_attention_mask_to_dense,
|
||||
get_cross_attention_token_mask,
|
||||
)
|
||||
from typing_extensions import NotRequired, override
|
||||
from typing_extensions import override
|
||||
|
||||
from ..extras.constants import AUDIO_PLACEHOLDER, IGNORE_INDEX, IMAGE_PLACEHOLDER, VIDEO_PLACEHOLDER
|
||||
from ..extras.packages import is_pillow_available, is_pyav_available, is_transformers_version_greater_than
|
||||
@@ -63,8 +63,8 @@ if TYPE_CHECKING:
|
||||
from transformers.video_processing_utils import BaseVideoProcessor
|
||||
|
||||
class EncodedImage(TypedDict):
|
||||
path: Optional[str]
|
||||
bytes: Optional[bytes]
|
||||
path: str | None
|
||||
bytes: bytes | None
|
||||
|
||||
ImageInput = Union[str, bytes, EncodedImage, BinaryIO, ImageObject]
|
||||
VideoInput = Union[str, BinaryIO, list[list[ImageInput]]]
|
||||
@@ -144,9 +144,9 @@ def _check_video_is_nested_images(video: "VideoInput") -> bool:
|
||||
|
||||
@dataclass
|
||||
class MMPluginMixin:
|
||||
image_token: Optional[str]
|
||||
video_token: Optional[str]
|
||||
audio_token: Optional[str]
|
||||
image_token: str | None
|
||||
video_token: str | None
|
||||
audio_token: str | None
|
||||
expand_mm_tokens: bool = True
|
||||
|
||||
def _validate_input(
|
||||
@@ -328,7 +328,7 @@ class MMPluginMixin:
|
||||
videos: list["VideoInput"],
|
||||
audios: list["AudioInput"],
|
||||
processor: "MMProcessor",
|
||||
imglens: Optional[list[int]] = None,
|
||||
imglens: list[int] | None = None,
|
||||
) -> dict[str, "torch.Tensor"]:
|
||||
r"""Process visual inputs.
|
||||
|
||||
@@ -426,13 +426,13 @@ class BasePlugin(MMPluginMixin):
|
||||
def process_token_ids(
|
||||
self,
|
||||
input_ids: list[int],
|
||||
labels: Optional[list[int]],
|
||||
labels: list[int] | None,
|
||||
images: list["ImageInput"],
|
||||
videos: list["VideoInput"],
|
||||
audios: list["AudioInput"],
|
||||
tokenizer: "PreTrainedTokenizer",
|
||||
processor: Optional["MMProcessor"],
|
||||
) -> tuple[list[int], Optional[list[int]]]:
|
||||
) -> tuple[list[int], list[int] | None]:
|
||||
r"""Pre-process token ids after tokenization for VLMs."""
|
||||
self._validate_input(processor, images, videos, audios)
|
||||
return input_ids, labels
|
||||
@@ -1305,13 +1305,13 @@ class PaliGemmaPlugin(BasePlugin):
|
||||
def process_token_ids(
|
||||
self,
|
||||
input_ids: list[int],
|
||||
labels: Optional[list[int]],
|
||||
labels: list[int] | None,
|
||||
images: list["ImageInput"],
|
||||
videos: list["VideoInput"],
|
||||
audios: list["AudioInput"],
|
||||
tokenizer: "PreTrainedTokenizer",
|
||||
processor: Optional["MMProcessor"],
|
||||
) -> tuple[list[int], Optional[list[int]]]:
|
||||
) -> tuple[list[int], list[int] | None]:
|
||||
self._validate_input(processor, images, videos, audios)
|
||||
num_images = len(images)
|
||||
image_seqlen = processor.image_seq_length if self.expand_mm_tokens else 0 # skip mm token
|
||||
@@ -2126,9 +2126,9 @@ def register_mm_plugin(name: str, plugin_class: type["BasePlugin"]) -> None:
|
||||
|
||||
def get_mm_plugin(
|
||||
name: str,
|
||||
image_token: Optional[str] = None,
|
||||
video_token: Optional[str] = None,
|
||||
audio_token: Optional[str] = None,
|
||||
image_token: str | None = None,
|
||||
video_token: str | None = None,
|
||||
audio_token: str | None = None,
|
||||
**kwargs,
|
||||
) -> "BasePlugin":
|
||||
r"""Get plugin for multimodal inputs."""
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
import json
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Literal, Optional, Union
|
||||
from typing import Any, Literal
|
||||
|
||||
from huggingface_hub import hf_hub_download
|
||||
|
||||
@@ -33,40 +33,40 @@ class DatasetAttr:
|
||||
formatting: Literal["alpaca", "sharegpt", "openai"] = "alpaca"
|
||||
ranking: bool = False
|
||||
# extra configs
|
||||
subset: Optional[str] = None
|
||||
subset: str | None = None
|
||||
split: str = "train"
|
||||
folder: Optional[str] = None
|
||||
num_samples: Optional[int] = None
|
||||
folder: str | None = None
|
||||
num_samples: int | None = None
|
||||
# common columns
|
||||
system: Optional[str] = None
|
||||
tools: Optional[str] = None
|
||||
images: Optional[str] = None
|
||||
videos: Optional[str] = None
|
||||
audios: Optional[str] = None
|
||||
system: str | None = None
|
||||
tools: str | None = None
|
||||
images: str | None = None
|
||||
videos: str | None = None
|
||||
audios: str | None = None
|
||||
# dpo columns
|
||||
chosen: Optional[str] = None
|
||||
rejected: Optional[str] = None
|
||||
kto_tag: Optional[str] = None
|
||||
chosen: str | None = None
|
||||
rejected: str | None = None
|
||||
kto_tag: str | None = None
|
||||
# alpaca columns
|
||||
prompt: Optional[str] = "instruction"
|
||||
query: Optional[str] = "input"
|
||||
response: Optional[str] = "output"
|
||||
history: Optional[str] = None
|
||||
prompt: str | None = "instruction"
|
||||
query: str | None = "input"
|
||||
response: str | None = "output"
|
||||
history: str | None = None
|
||||
# sharegpt columns
|
||||
messages: Optional[str] = "conversations"
|
||||
messages: str | None = "conversations"
|
||||
# sharegpt tags
|
||||
role_tag: Optional[str] = "from"
|
||||
content_tag: Optional[str] = "value"
|
||||
user_tag: Optional[str] = "human"
|
||||
assistant_tag: Optional[str] = "gpt"
|
||||
observation_tag: Optional[str] = "observation"
|
||||
function_tag: Optional[str] = "function_call"
|
||||
system_tag: Optional[str] = "system"
|
||||
role_tag: str | None = "from"
|
||||
content_tag: str | None = "value"
|
||||
user_tag: str | None = "human"
|
||||
assistant_tag: str | None = "gpt"
|
||||
observation_tag: str | None = "observation"
|
||||
function_tag: str | None = "function_call"
|
||||
system_tag: str | None = "system"
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return self.dataset_name
|
||||
|
||||
def set_attr(self, key: str, obj: dict[str, Any], default: Optional[Any] = None) -> None:
|
||||
def set_attr(self, key: str, obj: dict[str, Any], default: Any | None = None) -> None:
|
||||
setattr(self, key, obj.get(key, default))
|
||||
|
||||
def join(self, attr: dict[str, Any]) -> None:
|
||||
@@ -90,7 +90,7 @@ class DatasetAttr:
|
||||
self.set_attr(tag, attr["tags"])
|
||||
|
||||
|
||||
def get_dataset_list(dataset_names: Optional[list[str]], dataset_dir: Union[str, dict]) -> list["DatasetAttr"]:
|
||||
def get_dataset_list(dataset_names: list[str] | None, dataset_dir: str | dict) -> list["DatasetAttr"]:
|
||||
r"""Get the attributes of the datasets."""
|
||||
if dataset_names is None:
|
||||
dataset_names = []
|
||||
|
||||
Reference in New Issue
Block a user