From ac76a9e1408aebe105fd2ed826e5fa763061a896 Mon Sep 17 00:00:00 2001 From: codingma Date: Sun, 28 Apr 2024 11:31:34 +0800 Subject: [PATCH 1/2] support BAdam in WebUI Former-commit-id: 26f71703935407b94ed0787d91e156296bef9993 --- src/llmtuner/webui/components/train.py | 26 ++++++ src/llmtuner/webui/locales.py | 109 +++++++++++++++++++++++++ src/llmtuner/webui/runner.py | 9 ++ 3 files changed, 144 insertions(+) diff --git a/src/llmtuner/webui/components/train.py b/src/llmtuner/webui/components/train.py index 7dc324af..9d93a9b6 100644 --- a/src/llmtuner/webui/components/train.py +++ b/src/llmtuner/webui/components/train.py @@ -210,6 +210,32 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]: ) ) + with gr.Accordion(open=False) as badam_tab: + with gr.Row(): + use_badam = gr.Checkbox() + badam_mode = gr.Dropdown(choices=["layer", "ratio"], value="layer") + badam_mask_mode = gr.Dropdown(choices=["adjacent", "scatter"], value="adjacent") + badam_switch_mode = gr.Dropdown(choices=["ascending", "descending", "random", "fixed"], value="ascending") + badam_update_ratio = gr.Slider(value=0, minimum=0, maximum=1, step=0.01) + badam_switch_block_every = gr.Slider(value=50, minimum=-1, maximum=200, step=1) + + badam_verbose = gr.Dropdown(choices=[0, 1, 2], value=0) + + input_elems.update({use_badam, badam_mode, badam_switch_block_every, badam_switch_mode, badam_update_ratio, + badam_mask_mode, badam_verbose}) + elem_dict.update( + dict( + badam_tab=badam_tab, + use_badam=use_badam, + badam_mode=badam_mode, + badam_switch_block_every=badam_switch_block_every, + badam_switch_mode=badam_switch_mode, + badam_update_ratio=badam_update_ratio, + badam_mask_mode=badam_mask_mode, + badam_verbose=badam_verbose, + ) + ) + with gr.Row(): cmd_preview_btn = gr.Button() arg_save_btn = gr.Button() diff --git a/src/llmtuner/webui/locales.py b/src/llmtuner/webui/locales.py index d341c7b6..d3dd4dc2 100644 --- a/src/llmtuner/webui/locales.py +++ b/src/llmtuner/webui/locales.py @@ -891,6 +891,115 @@ LOCALES = { "info": "应用 GaLore 的模块名称。使用英文逗号分隔多个名称。", }, }, + "badam_tab": { + "en": { + "label": "BAdam configurations", + }, + "ru": { + "label": "Конфигурации BAdam", + }, + "zh": { + "label": "BAdam 参数设置", + }, + }, + "use_badam": { + "en": { + "label": "Use BAdam", + "info": "Enable the block coordinate optimization with Adam.", + }, + "ru": { + "label": "Использовать BAdam", + "info": "Включите блочную оптимизацию координат с Adam.", + }, + "zh": { + "label": "使用 BAdam", + "info": "使用多Block协同的Adam优化器。", + }, + }, + "badam_mode": { + "en": { + "label": "BAdam mode", + "info": "Whether to use layer-wise or ratio-wise BAdam optimizer.", + }, + "ru": { + "label": "Режим BAdam", + "info": "Использовать оптимизатор BAdam с обработкой слоев или с обработкой коэффициентов.", + }, + "zh": { + "label": "BAdam 模式", + "info": "使用layer或者ratio比例模式。", + }, + }, + "badam_switch_block_every": { + "en": { + "label": "Switch block frequency", + "info": "How often to switch model's block update. Set to -1 to disable the block update.", + }, + "ru": { + "label": "Частота переключения", + "info": "Как часто переключать обновление блока модели. Установите -1, чтобы отключить обновление блока.", + }, + "zh": { + "label": "切换block的频率", + "info": "控制切换block切换的频率,如果是-1,则不切换。", + }, + }, + "badam_switch_mode": { + "en": { + "label": "Switch mode", + "info": "The strategy of picking block to update for layer-wise BAdam.", + }, + "ru": { + "label": "Переключить режим", + "info": "Стратегия выбора блока для обновления в методе BAdam по слоям.", + }, + "zh": { + "label": "Block切换策略", + "info": "如果是layer类型的训练模式,如何切换block。", + }, + }, + "badam_update_ratio": { + "en": { + "label": "Update ratio", + "info": "The ratio of the update for ratio-wise BAdam.", + }, + "ru": { + "label": "Коэффициент обновления", + "info": "Коэффициент обновления для метода BAdam, основанного на коэффициентах.", + }, + "zh": { + "label": "Block更新比例", + "info": "如果是比例类型的训练模式,block每次更新的范围比例。", + }, + }, + "badam_mask_mode": { + "en": { + "label": "Mask mode", + "info": "The mode of the mask for BAdam optimizer.", + }, + "ru": { + "label": "Режим маски", + "info": "Режим маски для оптимизатора BAdam.", + }, + "zh": { + "label": "Mask模式", + "info": "BAdam优化器内训练参数的mask关系。", + }, + }, + "badam_verbose": { + "en": { + "label": "Verbosity level", + "info": "0 for no print, 1 for print the block prefix, 2 for print trainable parameters.", + }, + "ru": { + "label": "Уровень многословности", + "info": "0 для отсутствия печати, 1 для печати префикса блока, 2 для печати обучаемых параметров.", + }, + "zh": { + "label": "输出日志级别", + "info": "0:不输出,1:输出block前缀, 1:输出可训练的参数。", + }, + }, "cmd_preview_btn": { "en": { "value": "Preview command", diff --git a/src/llmtuner/webui/runner.py b/src/llmtuner/webui/runner.py index 8054484f..52584f31 100644 --- a/src/llmtuner/webui/runner.py +++ b/src/llmtuner/webui/runner.py @@ -151,6 +151,7 @@ class Runner: fp16=(get("train.compute_type") == "fp16"), bf16=(get("train.compute_type") == "bf16"), pure_bf16=(get("train.compute_type") == "pure_bf16"), + use_badam=get("train.use_badam"), ) args["disable_tqdm"] = True @@ -198,6 +199,14 @@ class Runner: args["galore_scale"] = get("train.galore_scale") args["galore_target"] = get("train.galore_target") + if args["use_badam"]: + args["badam_mode"] = get("train.badam_mode") + args["badam_switch_block_every"] = get("train.badam_switch_block_every") + args["badam_switch_mode"] = get("train.badam_switch_mode") + args["badam_update_ratio"] = get("train.badam_update_ratio") + args["badam_mask_mode"] = get("train.badam_mask_mode") + args["badam_verbose"] = get("train.badam_verbose") + return args def _parse_eval_args(self, data: Dict["Component", Any]) -> Dict[str, Any]: From 1d00dede8eb8c8d0dc4d0d6d8b487fd5208f7328 Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Thu, 2 May 2024 02:21:27 +0800 Subject: [PATCH 2/2] Update train.py Former-commit-id: dcd53cb89ae92f92ad1242e8988a18cac5292459 --- src/llmtuner/webui/components/train.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/src/llmtuner/webui/components/train.py b/src/llmtuner/webui/components/train.py index 9d93a9b6..be070869 100644 --- a/src/llmtuner/webui/components/train.py +++ b/src/llmtuner/webui/components/train.py @@ -214,25 +214,19 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]: with gr.Row(): use_badam = gr.Checkbox() badam_mode = gr.Dropdown(choices=["layer", "ratio"], value="layer") - badam_mask_mode = gr.Dropdown(choices=["adjacent", "scatter"], value="adjacent") badam_switch_mode = gr.Dropdown(choices=["ascending", "descending", "random", "fixed"], value="ascending") - badam_update_ratio = gr.Slider(value=0, minimum=0, maximum=1, step=0.01) badam_switch_block_every = gr.Slider(value=50, minimum=-1, maximum=200, step=1) + badam_update_ratio = gr.Slider(value=0, minimum=0, maximum=1, step=0.01) - badam_verbose = gr.Dropdown(choices=[0, 1, 2], value=0) - - input_elems.update({use_badam, badam_mode, badam_switch_block_every, badam_switch_mode, badam_update_ratio, - badam_mask_mode, badam_verbose}) + input_elems.update({use_badam, badam_mode, badam_switch_mode, badam_switch_block_every, badam_update_ratio}) elem_dict.update( dict( badam_tab=badam_tab, use_badam=use_badam, badam_mode=badam_mode, - badam_switch_block_every=badam_switch_block_every, badam_switch_mode=badam_switch_mode, + badam_switch_block_every=badam_switch_block_every, badam_update_ratio=badam_update_ratio, - badam_mask_mode=badam_mask_mode, - badam_verbose=badam_verbose, ) )