[data] qwen3 fixes (#8109)

2026-02-07 14:32:23 +08:00 · 2025-05-20 02:00:30 +08:00
parent 45030ff803
commit 9b5baa97f0
13 changed files with 197 additions and 160 deletions
--- a/data/README.md
+++ b/data/README.md
@@ -50,7 +50,9 @@ Currently we support datasets in **alpaca** and **sharegpt** format.

 * [Example dataset](alpaca_en_demo.json)

-In supervised fine-tuning, the `instruction` column will be concatenated with the `input` column and used as the human prompt, then the human prompt would be `instruction\ninput`. The `output` column represents the model response.
+In supervised fine-tuning, the `instruction` column will be concatenated with the `input` column and used as the user prompt, then the user prompt would be `instruction\ninput`. The `output` column represents the model response.
+
+For reasoning models, if the dataset contains chain-of-thought (CoT), the CoT needs to be placed in the model responses, such as `<think>cot</think>output`.

 The `system` column will be used as the system prompt if specified.

@@ -59,13 +61,13 @@ The `history` column is a list consisting of string tuples representing prompt-r
 ```json
 [
  {
-    "instruction": "human instruction (required)",
-    "input": "human input (optional)",
+    "instruction": "user instruction (required)",
+    "input": "user input (optional)",
    "output": "model response (required)",
    "system": "system prompt (optional)",
    "history": [
-      ["human instruction in the first round (optional)", "model response in the first round (optional)"],
-      ["human instruction in the second round (optional)", "model response in the second round (optional)"]
+      ["user instruction in the first round (optional)", "model response in the first round (optional)"],
+      ["user instruction in the second round (optional)", "model response in the second round (optional)"]
    ]
  }
 ]
@@ -86,6 +88,9 @@ Regarding the above dataset, the *dataset description* in `dataset_info.json` sh
 }
 ```

+> [!TIP]  
+> If the model has reasoning capabilities but the dataset does not contain chain-of-thought (CoT), LLaMA-Factory will automatically add empty CoT to the data. When `enable_thinking` is `True`, the empty CoT will be added to the model responses and loss computation will be considered; otherwise, it will be added to the user prompts and loss computation will be ignored. Please keep the `enable_thinking` parameter consistent during training and inference.
+
 ### Pre-training Dataset

 - [Example dataset](c4_demo.jsonl)
@@ -119,8 +124,8 @@ It requires a better response in `chosen` column and a worse response in `reject
 ```json
 [
  {
-    "instruction": "human instruction (required)",
-    "input": "human input (optional)",
+    "instruction": "user instruction (required)",
+    "input": "user input (optional)",
    "chosen": "chosen answer (required)",
    "rejected": "rejected answer (required)"
  }
@@ -174,7 +179,7 @@ Note that the human and observation should appear in odd positions, while gpt an
    "conversations": [
      {
        "from": "human",
-        "value": "human instruction"
+        "value": "user instruction"
      },
      {
        "from": "function_call",
@@ -225,7 +230,7 @@ Preference datasets in sharegpt format also require a better message in `chosen`
    "conversations": [
      {
        "from": "human",
-        "value": "human instruction"
+        "value": "user instruction"
      },
      {
        "from": "gpt",
@@ -233,7 +238,7 @@ Preference datasets in sharegpt format also require a better message in `chosen`
      },
      {
        "from": "human",
-        "value": "human instruction"
+        "value": "user instruction"
      }
    ],
    "chosen": {
@@ -275,7 +280,7 @@ KTO datasets require a extra `kto_tag` column containing the boolean human feedb
    "conversations": [
      {
        "from": "human",
-        "value": "human instruction"
+        "value": "user instruction"
      },
      {
        "from": "gpt",
@@ -314,7 +319,7 @@ The number of images should be identical to the `<image>` tokens in the conversa
    "conversations": [
      {
        "from": "human",
-        "value": "<image>human instruction"
+        "value": "<image>user instruction"
      },
      {
        "from": "gpt",
@@ -355,7 +360,7 @@ The number of videos should be identical to the `<video>` tokens in the conversa
    "conversations": [
      {
        "from": "human",
-        "value": "<video>human instruction"
+        "value": "<video>user instruction"
      },
      {
        "from": "gpt",
@@ -396,7 +401,7 @@ The number of audios should be identical to the `<audio>` tokens in the conversa
    "conversations": [
      {
        "from": "human",
-        "value": "<audio>human instruction"
+        "value": "<audio>user instruction"
      },
      {
        "from": "gpt",
@@ -437,7 +442,7 @@ The openai format is simply a special case of the sharegpt format, where the fir
      },
      {
        "role": "user",
-        "content": "human instruction"
+        "content": "user instruction"
      },
      {
        "role": "assistant",