[deps] update to transformers 4.52 (#8125)

This commit is contained in:
hoshi-hiyouga
2025-05-21 05:16:18 +08:00
committed by GitHub
parent 56926d76f9
commit 9ae17cd173
28 changed files with 365 additions and 109 deletions

View File

@@ -205,6 +205,14 @@ def load_eval_results(path: os.PathLike) -> str:
return f"```json\n{result}\n```\n"
def calculate_pixels(pixels: str) -> int:
r"""Calculate the number of pixels from the expression."""
if "*" in pixels:
return int(pixels.split("*")[0]) * int(pixels.split("*")[1])
else:
return int(pixels)
def create_ds_config() -> None:
r"""Create deepspeed config in the current directory."""
os.makedirs(DEFAULT_CACHE_DIR, exist_ok=True)

View File

@@ -106,11 +106,11 @@ def create_train_tab(engine: "Engine") -> dict[str, "Component"]:
use_llama_pro = gr.Checkbox()
with gr.Column():
enable_thinking = gr.Checkbox(value=True)
report_to = gr.Dropdown(
choices=["none", "all", "wandb", "mlflow", "neptune", "tensorboard"],
value=["none"],
choices=["none", "wandb", "mlflow", "neptune", "tensorboard", "all"],
value="none",
allow_custom_value=True,
multiselect=True,
)
input_elems.update(
@@ -126,6 +126,7 @@ def create_train_tab(engine: "Engine") -> dict[str, "Component"]:
mask_history,
resize_vocab,
use_llama_pro,
enable_thinking,
report_to,
}
)
@@ -143,6 +144,7 @@ def create_train_tab(engine: "Engine") -> dict[str, "Component"]:
mask_history=mask_history,
resize_vocab=resize_vocab,
use_llama_pro=use_llama_pro,
enable_thinking=enable_thinking,
report_to=report_to,
)
)
@@ -231,6 +233,42 @@ def create_train_tab(engine: "Engine") -> dict[str, "Component"]:
)
)
with gr.Accordion(open=False) as mm_tab:
with gr.Row():
freeze_vision_tower = gr.Checkbox(value=True)
freeze_multi_modal_projector = gr.Checkbox(value=True)
freeze_language_model = gr.Checkbox(value=False)
with gr.Row():
image_max_pixels = gr.Textbox(value="768*768")
image_min_pixels = gr.Textbox(value="32*32")
video_max_pixels = gr.Textbox(value="256*256")
video_min_pixels = gr.Textbox(value="16*16")
input_elems.update(
{
freeze_vision_tower,
freeze_multi_modal_projector,
freeze_language_model,
image_max_pixels,
image_min_pixels,
video_max_pixels,
video_min_pixels,
}
)
elem_dict.update(
dict(
mm_tab=mm_tab,
freeze_vision_tower=freeze_vision_tower,
freeze_multi_modal_projector=freeze_multi_modal_projector,
freeze_language_model=freeze_language_model,
image_max_pixels=image_max_pixels,
image_min_pixels=image_min_pixels,
video_max_pixels=video_max_pixels,
video_min_pixels=video_min_pixels,
)
)
with gr.Accordion(open=False) as galore_tab:
with gr.Row():
use_galore = gr.Checkbox()

View File

@@ -871,6 +871,28 @@ LOCALES = {
"info": "拡張ブロックのパラメータのみをトレーニングします。",
},
},
"enable_thinking": {
"en": {
"label": "Enable thinking",
"info": "Whether or not to enable thinking mode for reasoning models.",
},
"ru": {
"label": "Включить мысли",
"info": "Включить режим мысли для моделей решающего характера.",
},
"zh": {
"label": "启用思考模式",
"info": "是否启用推理模型的思考模式。",
},
"ko": {
"label": "생각 모드 활성화",
"info": "추론 모델의 생각 모드를 활성화할지 여부.",
},
"ja": {
"label": "思考モードを有効化",
"info": "推論モデルの思考モードを有効にするかどうか。",
},
},
"report_to": {
"en": {
"label": "Enable external logger",
@@ -1374,6 +1396,177 @@ LOCALES = {
"info": "PPO トレーニングにおいて報酬スコアをホワイトニング処理します。",
},
},
"mm_tab": {
"en": {
"label": "Multimodal configurations",
},
"ru": {
"label": "Конфигурации мультимедиа",
},
"zh": {
"label": "多模态参数设置",
},
"ko": {
"label": "멀티모달 구성",
},
"ja": {
"label": "多モーダル設定",
},
},
"freeze_vision_tower": {
"en": {
"label": "Freeze vision tower",
"info": "Freeze the vision tower in the model.",
},
"ru": {
"label": "Заморозить башню визиона",
"info": "Заморозить башню визиона в модели.",
},
"zh": {
"label": "冻结视觉编码器",
"info": "冻结模型中的视觉编码器。",
},
"ko": {
"label": "비전 타워 고정",
"info": "모델의 비전 타워를 고정합니다.",
},
"ja": {
"label": "ビジョンタワーの固定",
"info": "モデルのビジョンタワーを固定します。",
},
},
"freeze_multi_modal_projector": {
"en": {
"label": "Freeze multi-modal projector",
"info": "Freeze the multi-modal projector in the model.",
},
"ru": {
"label": "Заморозить мультимодальный проектор",
"info": "Заморозить мультимодальный проектор в модели.",
},
"zh": {
"label": "冻结多模态投影器",
"info": "冻结模型中的多模态投影器。",
},
"ko": {
"label": "멀티모달 프로젝터 고정",
"info": "모델의 멀티모달 프로젝터를 고정합니다.",
},
"ja": {
"label": "多モーダルプロジェクターの固定",
"info": "モデルの多モーダルプロジェクターを固定します。",
},
},
"freeze_language_model": {
"en": {
"label": "Freeze language model",
"info": "Freeze the language model in the model.",
},
"ru": {
"label": "Заморозить язык модели",
"info": "Заморозить язык модели в модели.",
},
"zh": {
"label": "冻结语言模型",
"info": "冻结模型中的语言模型。",
},
"ko": {
"label": "언어 모델 고정",
"info": "모델의 언어 모델을 고정합니다.",
},
"ja": {
"label": "言語モデルの固定",
"info": "モデルの言語モデルを固定します。",
},
},
"image_max_pixels": {
"en": {
"label": "Image max pixels",
"info": "The maximum number of pixels of image inputs.",
},
"ru": {
"label": "Максимальное количество пикселей изображения",
"info": "Максимальное количество пикселей изображения.",
},
"zh": {
"label": "图像最大像素",
"info": "输入图像的最大像素数。",
},
"ko": {
"label": "이미지 최대 픽셀",
"info": "이미지 입력의 최대 픽셀 수입니다.",
},
"ja": {
"label": "画像最大ピクセル",
"info": "画像入力の最大ピクセル数です。",
},
},
"image_min_pixels": {
"en": {
"label": "Image min pixels",
"info": "The minimum number of pixels of image inputs.",
},
"ru": {
"label": "Минимальное количество пикселей изображения",
"info": "Минимальное количество пикселей изображения.",
},
"zh": {
"label": "图像最小像素",
"info": "输入图像的最小像素数。",
},
"ko": {
"label": "이미지 최소 픽셀",
"info": "이미지 입력의 최소 픽셀 수입니다.",
},
"ja": {
"label": "画像最小ピクセル",
"info": "画像入力の最小ピクセル数です。",
},
},
"video_max_pixels": {
"en": {
"label": "Video max pixels",
"info": "The maximum number of pixels of video inputs.",
},
"ru": {
"label": "Максимальное количество пикселей видео",
"info": "Максимальное количество пикселей видео.",
},
"zh": {
"label": "视频最大像素",
"info": "输入视频的最大像素数。",
},
"ko": {
"label": "비디오 최대 픽셀",
"info": "비디오 입력의 최대 픽셀 수입니다.",
},
"ja": {
"label": "ビデオ最大ピクセル",
"info": "ビデオ入力の最大ピクセル数です。",
},
},
"video_min_pixels": {
"en": {
"label": "Video min pixels",
"info": "The minimum number of pixels of video inputs.",
},
"ru": {
"label": "Минимальное количество пикселей видео",
"info": "Минимальное количество пикселей видео.",
},
"zh": {
"label": "视频最小像素",
"info": "输入视频的最小像素数。",
},
"ko": {
"label": "비디오 최소 픽셀",
"info": "비디오 입력의 최소 픽셀 수입니다.",
},
"ja": {
"label": "ビデオ最小ピクセル",
"info": "ビデオ入力の最小ピクセル数です。",
},
},
"galore_tab": {
"en": {
"label": "GaLore configurations",
@@ -2468,23 +2661,6 @@ LOCALES = {
"label": "HTML タグをエスケープ",
},
},
"enable_thinking": {
"en": {
"label": "Enable thinking",
},
"ru": {
"label": "Включить мышление",
},
"zh": {
"label": "启用思考",
},
"ko": {
"label": "사고를 활성화하다",
},
"ja": {
"label": "思考を可能にする",
},
},
"clear_btn": {
"en": {
"value": "Clear history",

View File

@@ -29,6 +29,7 @@ from .common import (
DEFAULT_CACHE_DIR,
DEFAULT_CONFIG_DIR,
abort_process,
calculate_pixels,
gen_cmd,
get_save_dir,
load_args,
@@ -162,7 +163,15 @@ class Runner:
mask_history=get("train.mask_history"),
resize_vocab=get("train.resize_vocab"),
use_llama_pro=get("train.use_llama_pro"),
enable_thinking=get("train.enable_thinking"),
report_to=get("train.report_to"),
freeze_vision_tower=get("train.freeze_vision_tower"),
freeze_multi_modal_projector=get("train.freeze_multi_modal_projector"),
freeze_language_model=get("train.freeze_language_model"),
image_max_pixels=calculate_pixels(get("train.image_max_pixels")),
image_min_pixels=calculate_pixels(get("train.image_min_pixels")),
video_max_pixels=calculate_pixels(get("train.video_max_pixels")),
video_min_pixels=calculate_pixels(get("train.video_min_pixels")),
use_galore=get("train.use_galore"),
use_apollo=get("train.use_apollo"),
use_badam=get("train.use_badam"),
@@ -256,12 +265,6 @@ class Runner:
args["badam_switch_interval"] = get("train.badam_switch_interval")
args["badam_update_ratio"] = get("train.badam_update_ratio")
# report_to
if "none" in args["report_to"]:
args["report_to"] = "none"
elif "all" in args["report_to"]:
args["report_to"] = "all"
# swanlab config
if get("train.use_swanlab"):
args["swanlab_project"] = get("train.swanlab_project")