Merge branch 'main' into feat/support_ms

2026-03-10 05:35:59 +08:00 · 2023-12-01 20:23:46 +08:00
parent 5a2392f105 a5a248d569
commit 00f5c9ee16
16 changed files with 121 additions and 62 deletions
--- a/src/llmtuner/webui/common.py
+++ b/src/llmtuner/webui/common.py
@@ -11,14 +11,22 @@ from transformers.utils import (
    ADAPTER_SAFE_WEIGHTS_NAME
 )

-from llmtuner.extras.constants import DEFAULT_MODULE, DEFAULT_TEMPLATE, SUPPORTED_MODELS, ALL_OFFICIAL_MODELS, TRAINING_STAGES
+
+from llmtuner.extras.constants import (
+    DEFAULT_MODULE,
+    DEFAULT_TEMPLATE,
+    SUPPORTED_MODELS,
+    ALL_OFFICIAL_MODELS,
+    TRAINING_STAGES
+)
+from llmtuner.hparams.data_args import DATA_CONFIG
+


 DEFAULT_CACHE_DIR = "cache"
 DEFAULT_DATA_DIR = "data"
 DEFAULT_SAVE_DIR = "saves"
 USER_CONFIG = "user.config"
-DATA_CONFIG = "dataset_info.json"
 CKPT_NAMES = [
    WEIGHTS_NAME,
    WEIGHTS_INDEX_NAME,
@@ -92,12 +100,12 @@ def list_checkpoint(model_name: str, finetuning_type: str) -> Dict[str, Any]:
    return gr.update(value=[], choices=checkpoints)


-def load_dataset_info(dataset_dir: str) -> Dict[str, Any]:
+def load_dataset_info(dataset_dir: str) -> Dict[str, Dict[str, Any]]:
    try:
        with open(os.path.join(dataset_dir, DATA_CONFIG), "r", encoding="utf-8") as f:
            return json.load(f)
-    except:
-        print("Cannot find {} in {}.".format(DATA_CONFIG, dataset_dir))
+    except Exception as err:
+        print("Cannot open {} due to {}.".format(os.path.join(dataset_dir, DATA_CONFIG), str(err)))
        return {}


--- a/src/llmtuner/webui/components/eval.py
+++ b/src/llmtuner/webui/components/eval.py
@@ -38,10 +38,11 @@ def create_eval_tab(engine: "Engine") -> Dict[str, "Component"]:
        max_new_tokens = gr.Slider(10, 2048, value=128, step=1)
        top_p = gr.Slider(0.01, 1, value=0.7, step=0.01)
        temperature = gr.Slider(0.01, 1.5, value=0.95, step=0.01)
+        output_dir = gr.Textbox()

-    input_elems.update({max_new_tokens, top_p, temperature})
+    input_elems.update({max_new_tokens, top_p, temperature, output_dir})
    elem_dict.update(dict(
-        max_new_tokens=max_new_tokens, top_p=top_p, temperature=temperature
+        max_new_tokens=max_new_tokens, top_p=top_p, temperature=temperature, output_dir=output_dir
    ))

    with gr.Row():
--- a/src/llmtuner/webui/engine.py
+++ b/src/llmtuner/webui/engine.py
@@ -49,7 +49,10 @@ class Engine:
                else:
                    yield self._form_dict({"eval.resume_btn": {"value": True}})
            else:
-                yield self._form_dict({"train.output_dir": {"value": get_time()}})
+                yield self._form_dict({
+                    "train.output_dir": {"value": "train_" + get_time()},
+                    "eval.output_dir": {"value": "eval_" + get_time()},
+                })

    def change_lang(self, lang: str) -> Dict[Component, Dict[str, Any]]:
        return {
--- a/src/llmtuner/webui/locales.py
+++ b/src/llmtuner/webui/locales.py
@@ -132,7 +132,7 @@ LOCALES = {
    "dataset_dir": {
        "en": {
            "label": "Data dir",
-            "info": "Path of the data directory."
+            "info": "Path to the data directory."
        },
        "zh": {
            "label": "数据路径",
@@ -475,12 +475,12 @@ LOCALES = {
    },
    "output_dir": {
        "en": {
-            "label": "Checkpoint name",
-            "info": "Directory to save checkpoint."
+            "label": "Output dir",
+            "info": "Directory for saving results."
        },
        "zh": {
-            "label": "断点名称",
-            "info": "保存模型断点的文件夹名称。"
+            "label": "输出目录",
+            "info": "保存结果的路径。"
        }
    },
    "output_box": {
--- a/src/llmtuner/webui/runner.py
+++ b/src/llmtuner/webui/runner.py
@@ -87,9 +87,9 @@ class Runner:
        user_config = load_config()

        if get("top.checkpoints"):
-            checkpoint_dir = ",".join([get_save_dir(
-                get("top.model_name"), get("top.finetuning_type"), ckpt
-            ) for ckpt in get("top.checkpoints")])
+            checkpoint_dir = ",".join([
+                get_save_dir(get("top.model_name"), get("top.finetuning_type"), ckpt) for ckpt in get("top.checkpoints")
+            ])
        else:
            checkpoint_dir = None

@@ -160,15 +160,11 @@ class Runner:
        user_config = load_config()

        if get("top.checkpoints"):
-            checkpoint_dir = ",".join([get_save_dir(
-                get("top.model_name"), get("top.finetuning_type"), ckpt
-            ) for ckpt in get("top.checkpoints")])
-            output_dir = get_save_dir(
-                get("top.model_name"), get("top.finetuning_type"), "eval_" + "_".join(get("top.checkpoints"))
-            )
+            checkpoint_dir = ",".join([
+                get_save_dir(get("top.model_name"), get("top.finetuning_type"), ckpt) for ckpt in get("top.checkpoints")
+            ])
        else:
            checkpoint_dir = None
-            output_dir = get_save_dir(get("top.model_name"), get("top.finetuning_type"), "eval_base")

        args = dict(
            stage="sft",
@@ -192,7 +188,7 @@ class Runner:
            max_new_tokens=get("eval.max_new_tokens"),
            top_p=get("eval.top_p"),
            temperature=get("eval.temperature"),
-            output_dir=output_dir
+            output_dir=get_save_dir(get("top.model_name"), get("top.finetuning_type"), get("eval.output_dir"))
        )

        if get("eval.predict"):
@@ -242,6 +238,7 @@ class Runner:
        output_dir = get_save_dir(get("top.model_name"), get("top.finetuning_type"), get(
            "{}.output_dir".format("train" if self.do_train else "eval")
        ))
+
        while self.thread.is_alive():
            time.sleep(2)
            if self.aborted:
--- a/src/llmtuner/webui/utils.py
+++ b/src/llmtuner/webui/utils.py
@@ -44,7 +44,8 @@ def can_quantize(finetuning_type: str) -> Dict[str, Any]:
 def gen_cmd(args: Dict[str, Any]) -> str:
    args.pop("disable_tqdm", None)
    args["plot_loss"] = args.get("do_train", None)
-    cmd_lines = ["CUDA_VISIBLE_DEVICES=0 python src/train_bash.py "]
+    current_devices = os.environ.get("CUDA_VISIBLE_DEVICES", "0")
+    cmd_lines = ["CUDA_VISIBLE_DEVICES={} python src/train_bash.py ".format(current_devices)]
    for k, v in args.items():
        if v is not None and v != "":
            cmd_lines.append("    --{} {} ".format(k, str(v)))