From b83a38eb98965fa698936a911b73aac017e73e88 Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Tue, 20 May 2025 02:00:30 +0800 Subject: [PATCH] [data] qwen3 fixes (#8109) --- README.md | 2 +- README_zh.md | 2 +- data/README.md | 35 +++--- data/README_zh.md | 34 +++--- scripts/vllm_infer.py | 12 +- src/llamafactory/chat/hf_engine.py | 5 +- src/llamafactory/chat/sglang_engine.py | 5 +- src/llamafactory/chat/vllm_engine.py | 5 +- src/llamafactory/data/template.py | 51 +++++---- src/llamafactory/hparams/data_args.py | 8 ++ src/llamafactory/hparams/generating_args.py | 10 +- src/llamafactory/webui/chatter.py | 69 +++++++----- tests/data/test_template.py | 119 ++++++++++++-------- 13 files changed, 197 insertions(+), 160 deletions(-) diff --git a/README.md b/README.md index 9e76b58e..2289c535 100644 --- a/README.md +++ b/README.md @@ -250,7 +250,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ -> [!NOTE] +> [!TIP] > If you cannot use the latest feature, please pull the latest code and install LLaMA-Factory again. ## Supported Models diff --git a/README_zh.md b/README_zh.md index 90ad5a0d..b4219d9c 100644 --- a/README_zh.md +++ b/README_zh.md @@ -237,7 +237,7 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc -> [!NOTE] +> [!TIP] > 如果您无法使用最新的功能,请尝试重新拉取代码并再次安装 LLaMA-Factory。 ## 模型 diff --git a/data/README.md b/data/README.md index 5c2e969a..90503351 100644 --- a/data/README.md +++ b/data/README.md @@ -50,7 +50,9 @@ Currently we support datasets in **alpaca** and **sharegpt** format. * [Example dataset](alpaca_en_demo.json) -In supervised fine-tuning, the `instruction` column will be concatenated with the `input` column and used as the human prompt, then the human prompt would be `instruction\ninput`. The `output` column represents the model response. +In supervised fine-tuning, the `instruction` column will be concatenated with the `input` column and used as the user prompt, then the user prompt would be `instruction\ninput`. The `output` column represents the model response. + +For reasoning models, if the dataset contains chain-of-thought (CoT), the CoT needs to be placed in the model responses, such as `cotoutput`. The `system` column will be used as the system prompt if specified. @@ -59,13 +61,13 @@ The `history` column is a list consisting of string tuples representing prompt-r ```json [ { - "instruction": "human instruction (required)", - "input": "human input (optional)", + "instruction": "user instruction (required)", + "input": "user input (optional)", "output": "model response (required)", "system": "system prompt (optional)", "history": [ - ["human instruction in the first round (optional)", "model response in the first round (optional)"], - ["human instruction in the second round (optional)", "model response in the second round (optional)"] + ["user instruction in the first round (optional)", "model response in the first round (optional)"], + ["user instruction in the second round (optional)", "model response in the second round (optional)"] ] } ] @@ -86,6 +88,9 @@ Regarding the above dataset, the *dataset description* in `dataset_info.json` sh } ``` +> [!TIP] +> If the model has reasoning capabilities but the dataset does not contain chain-of-thought (CoT), LLaMA-Factory will automatically add empty CoT to the data. When `enable_thinking` is `True`, the empty CoT will be added to the model responses and loss computation will be considered; otherwise, it will be added to the user prompts and loss computation will be ignored. Please keep the `enable_thinking` parameter consistent during training and inference. + ### Pre-training Dataset - [Example dataset](c4_demo.jsonl) @@ -119,8 +124,8 @@ It requires a better response in `chosen` column and a worse response in `reject ```json [ { - "instruction": "human instruction (required)", - "input": "human input (optional)", + "instruction": "user instruction (required)", + "input": "user input (optional)", "chosen": "chosen answer (required)", "rejected": "rejected answer (required)" } @@ -174,7 +179,7 @@ Note that the human and observation should appear in odd positions, while gpt an "conversations": [ { "from": "human", - "value": "human instruction" + "value": "user instruction" }, { "from": "function_call", @@ -225,7 +230,7 @@ Preference datasets in sharegpt format also require a better message in `chosen` "conversations": [ { "from": "human", - "value": "human instruction" + "value": "user instruction" }, { "from": "gpt", @@ -233,7 +238,7 @@ Preference datasets in sharegpt format also require a better message in `chosen` }, { "from": "human", - "value": "human instruction" + "value": "user instruction" } ], "chosen": { @@ -275,7 +280,7 @@ KTO datasets require a extra `kto_tag` column containing the boolean human feedb "conversations": [ { "from": "human", - "value": "human instruction" + "value": "user instruction" }, { "from": "gpt", @@ -314,7 +319,7 @@ The number of images should be identical to the `` tokens in the conversa "conversations": [ { "from": "human", - "value": "human instruction" + "value": "user instruction" }, { "from": "gpt", @@ -355,7 +360,7 @@ The number of videos should be identical to the `