From 82533114a74e528bbab7fc50d6f98e79504bd5ff Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 8 Jun 2024 21:11:32 +0800 Subject: [PATCH 001/160] update git workflows Former-commit-id: 5a3f26bc53433caa98b2a66294becaf156280a4c --- .github/workflows/label_issue.yml | 17 +++++++++++++++++ .github/workflows/tests.yml | 17 ++++++++--------- 2 files changed, 25 insertions(+), 9 deletions(-) create mode 100644 .github/workflows/label_issue.yml diff --git a/.github/workflows/label_issue.yml b/.github/workflows/label_issue.yml new file mode 100644 index 00000000..b9a5543c --- /dev/null +++ b/.github/workflows/label_issue.yml @@ -0,0 +1,17 @@ +name: label_issue + +on: + issues: + types: + - opened + +jobs: + label_issue: + runs-on: ubuntu-latest + + steps: + - env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + ISSUE_URL: ${{ github.event.issue.html_url }} + run: | + gh issue edit $ISSUE_URL --add-label "pending" diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 32edf6a8..6ddcbc05 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -3,14 +3,7 @@ name: tests on: push: branches: - - main - paths: - - "**.py" - - "requirements.txt" - - ".github/workflows/*.yml" - pull_request: - branches: - - main + - $default-branch paths: - "**.py" - "requirements.txt" @@ -19,21 +12,27 @@ on: jobs: tests: runs-on: ubuntu-latest + steps: - - uses: actions/checkout@v4 + - name: Checkout + uses: actions/checkout@v4 + - name: Set up Python uses: actions/setup-python@v5 with: python-version: "3.8" cache: "pip" cache-dependency-path: "setup.py" + - name: Install dependencies run: | python -m pip install --upgrade pip python -m pip install .[torch,dev] + - name: Check quality run: | make style && make quality + - name: Test with pytest run: | make test From 88528f1a87ff42901a1ff8bc3c5e05b4afa190a0 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 8 Jun 2024 21:15:36 +0800 Subject: [PATCH 002/160] Update tests.yml Former-commit-id: e90f0cc30d6bb819246ccc08935c39e714c179a1 --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 6ddcbc05..f3ac96db 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -3,7 +3,7 @@ name: tests on: push: branches: - - $default-branch + - main paths: - "**.py" - "requirements.txt" From d812249db7ed95543ba9319ed09d582685884481 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 8 Jun 2024 21:25:35 +0800 Subject: [PATCH 003/160] add pr ci Former-commit-id: 9b05bb8540b946d0c74bf804bcafc4a785d22c47 --- .github/workflows/tests.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index f3ac96db..96092662 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -8,6 +8,15 @@ on: - "**.py" - "requirements.txt" - ".github/workflows/*.yml" + pull_request: + types: + - review_requested + branches: + - main + paths: + - "**.py" + - "requirements.txt" + - ".github/workflows/*.yml" jobs: tests: From 041ecd0de159bdf10a2b9f1e99c495592c4329c5 Mon Sep 17 00:00:00 2001 From: "-.-" Date: Sat, 8 Jun 2024 23:51:56 +0800 Subject: [PATCH 004/160] fix README Former-commit-id: fa30028c0b83c38610b596209493a748b8ca0928 --- README.md | 2 +- README_zh.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index fb6c5782..4dea65b9 100644 --- a/README.md +++ b/README.md @@ -335,7 +335,7 @@ huggingface-cli login ```bash git clone --depth 1 https://github.com/hiyouga/LLaMA-Factory.git cd LLaMA-Factory -pip install -e '.[torch,metrics]' +pip install -e ".[torch,metrics]" ``` Extra dependencies available: torch, torch_npu, metrics, deepspeed, bitsandbytes, vllm, galore, badam, gptq, awq, aqlm, qwen, modelscope, quality diff --git a/README_zh.md b/README_zh.md index 142254df..ab0e8cb7 100644 --- a/README_zh.md +++ b/README_zh.md @@ -335,7 +335,7 @@ huggingface-cli login ```bash git clone --depth 1 https://github.com/hiyouga/LLaMA-Factory.git cd LLaMA-Factory -pip install -e '.[torch,metrics]' +pip install -e ".[torch,metrics]" ``` 可选的额外依赖项:torch、torch_npu、metrics、deepspeed、bitsandbytes、vllm、galore、badam、gptq、awq、aqlm、qwen、modelscope、quality From 8db8ed5a413e65751ba5a791cba1de766ffd09d3 Mon Sep 17 00:00:00 2001 From: mMrBun <2015711377@qq.com> Date: Sun, 9 Jun 2024 18:16:15 +0800 Subject: [PATCH 005/160] Implemented the tool_formatter and tool_extractor for glm4 tool_format Former-commit-id: db7fa4490ea7f6966418d2879c895cbc1763b16d --- src/llamafactory/data/formatter.py | 42 +++++++++++++++++++++++++++++- src/llamafactory/data/template.py | 3 ++- 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/src/llamafactory/data/formatter.py b/src/llamafactory/data/formatter.py index 0cd3d6c1..344e01db 100644 --- a/src/llamafactory/data/formatter.py +++ b/src/llamafactory/data/formatter.py @@ -23,6 +23,17 @@ TOOL_SYSTEM_PROMPT = ( ) +GLM4_TOOL_SUFFIX_PROMPT = ( + "在调用上述函数时,请使用 Json 格式表示调用的参数。" +) + +GLM4_TOOL_PROMPT = ( + "你是一个名为 GLM-4 的人工智能助手。你是基于智谱AI训练的语言模型 GLM-4 模型开发的,你的任务是针对用户的问题和要求提供适当的答复和支持," + "{tool_text}" + +) + + def default_tool_formatter(tools: List[Dict[str, Any]]) -> str: tool_text = "" tool_names = [] @@ -53,6 +64,14 @@ def default_tool_formatter(tools: List[Dict[str, Any]]) -> str: ) +def glm4_tool_formatter(tools: List[Dict[str, Any]]) -> str: + tool_text = "" + for tool in tools: + tool_name = tool["name"] + tool_text += f"\n\n## {tool_name}\n\n{json.dumps(tool, ensure_ascii=False, indent=4)}\n{GLM4_TOOL_SUFFIX_PROMPT}" + return GLM4_TOOL_PROMPT.format(tool_text=tool_text) + + def default_tool_extractor(content: str) -> Union[str, Tuple[str, str]]: regex = re.compile(r"Action:\s*([a-zA-Z0-9_]+).*?Action Input:\s*(.*)", re.DOTALL) action_match = re.search(regex, content) @@ -69,10 +88,24 @@ def default_tool_extractor(content: str) -> Union[str, Tuple[str, str]]: return tool_name, json.dumps(arguments, ensure_ascii=False) +def glm4_tool_extractor(content: str) -> Union[str, Tuple[str, str]]: + lines = content.strip().split("\n") + if len(lines) != 2: + return content + tool_name = lines[0].strip() + tool_input = lines[1].strip() + try: + arguments = json.loads(tool_input) + except json.JSONDecodeError: + return content + return tool_name, json.dumps(arguments, ensure_ascii=False) + + + @dataclass class Formatter(ABC): slots: SLOTS = field(default_factory=list) - tool_format: Optional[Literal["default"]] = None + tool_format: Optional[Literal["default", "glm4"]] = None @abstractmethod def apply(self, **kwargs) -> SLOTS: ... @@ -175,6 +208,11 @@ class ToolFormatter(Formatter): if self.tool_format == "default": return [default_tool_formatter(tools)] + elif self.tool_format == "glm4": + """ + '[gMASK]<|system|>\n你是一个名为 GLM-4 的人工智能助手。你是基于智谱AI训练的语言模型 GLM-4 模型开发的,你的任务是针对用户的问题和要求提供适当的答复和支持。\n\n## get_current_weather\n\n{\n "name": "get_current_weather",\n "description": "Get the current weather",\n "parameters": {\n "type": "object",\n "properties": {\n "location": {\n "type": "string",\n "description": "The city and state, e.g. San Francisco, CA"\n },\n "format": {\n "type": "string",\n "enum": [\n "celsius",\n "fahrenheit"\n ],\n "description": "The temperature unit to use. Infer this from the users location."\n }\n },\n "required": [\n "location",\n "format"\n ]\n }\n}\n在调用上述函数时,请使用 Json 格式表示调用的参数。<|user|>\nWhat\'s the weather like in San Francisco, Tokyo, and Paris? use celsius<|assistant|>' + """ + return [glm4_tool_formatter(tools)] else: raise NotImplementedError except Exception: @@ -183,5 +221,7 @@ class ToolFormatter(Formatter): def extract(self, content: str) -> Union[str, Tuple[str, str]]: if self.tool_format == "default": return default_tool_extractor(content) + elif self.tool_format == "glm4": + return glm4_tool_extractor(content) else: raise NotImplementedError diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py index 3dce5ec6..b2aea217 100644 --- a/src/llamafactory/data/template.py +++ b/src/llamafactory/data/template.py @@ -662,9 +662,10 @@ _register_template( name="glm4", format_user=StringFormatter(slots=["<|user|>\n{{content}}<|assistant|>"]), format_assistant=StringFormatter(slots=["\n{{content}}"]), - format_system=StringFormatter(slots=["[gMASK]{{content}}"]), + format_system=StringFormatter(slots=["[gMASK]<|system|>\n{{content}}"]), format_function=FunctionFormatter(slots=["{{name}}\n{{arguments}}"]), format_observation=StringFormatter(slots=["<|observation|>\n{{content}}<|assistant|>"]), + format_tools=ToolFormatter(tool_format="glm4"), stop_words=["<|user|>", "<|observation|>"], efficient_eos=True, force_system=True, From 44829df762c7973f29a1486c8bccec57665ad391 Mon Sep 17 00:00:00 2001 From: mMrBun <2015711377@qq.com> Date: Sun, 9 Jun 2024 18:25:22 +0800 Subject: [PATCH 006/160] Removed unnecessary comments. Former-commit-id: 2b81252aa693871098931cd7873ef83ef4922ba5 --- src/llamafactory/data/formatter.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/llamafactory/data/formatter.py b/src/llamafactory/data/formatter.py index 344e01db..9f58915b 100644 --- a/src/llamafactory/data/formatter.py +++ b/src/llamafactory/data/formatter.py @@ -209,9 +209,6 @@ class ToolFormatter(Formatter): if self.tool_format == "default": return [default_tool_formatter(tools)] elif self.tool_format == "glm4": - """ - '[gMASK]<|system|>\n你是一个名为 GLM-4 的人工智能助手。你是基于智谱AI训练的语言模型 GLM-4 模型开发的,你的任务是针对用户的问题和要求提供适当的答复和支持。\n\n## get_current_weather\n\n{\n "name": "get_current_weather",\n "description": "Get the current weather",\n "parameters": {\n "type": "object",\n "properties": {\n "location": {\n "type": "string",\n "description": "The city and state, e.g. San Francisco, CA"\n },\n "format": {\n "type": "string",\n "enum": [\n "celsius",\n "fahrenheit"\n ],\n "description": "The temperature unit to use. Infer this from the users location."\n }\n },\n "required": [\n "location",\n "format"\n ]\n }\n}\n在调用上述函数时,请使用 Json 格式表示调用的参数。<|user|>\nWhat\'s the weather like in San Francisco, Tokyo, and Paris? use celsius<|assistant|>' - """ return [glm4_tool_formatter(tools)] else: raise NotImplementedError From bc04ca464ac64562871060aa4cd3c2c17dade905 Mon Sep 17 00:00:00 2001 From: mMrBun <2015711377@qq.com> Date: Mon, 10 Jun 2024 02:00:14 +0800 Subject: [PATCH 007/160] Optimize the handling of QWEN2 in scenarios involving multiple tool calls. Former-commit-id: 48f870edc96ada40360f7e6e67cbf58805295b33 --- src/llamafactory/api/chat.py | 13 +++++++----- src/llamafactory/data/formatter.py | 34 ++++++++++++++++++------------ 2 files changed, 28 insertions(+), 19 deletions(-) diff --git a/src/llamafactory/api/chat.py b/src/llamafactory/api/chat.py index 98957bc1..d4db1eea 100644 --- a/src/llamafactory/api/chat.py +++ b/src/llamafactory/api/chat.py @@ -150,11 +150,14 @@ async def create_chat_completion_response( else: result = response.response_text - if isinstance(result, tuple): - name, arguments = result - function = Function(name=name, arguments=arguments) - tool_call = FunctionCall(id="call_{}".format(uuid.uuid4().hex), function=function) - response_message = ChatCompletionMessage(role=Role.ASSISTANT, tool_calls=[tool_call]) + if isinstance(result, list): + tool_calls = [] + for tool in result: + name, arguments = tool + function = Function(name=name, arguments=arguments) + tool_call = FunctionCall(id="call_{}".format(uuid.uuid4().hex), function=function) + tool_calls.append(tool_call) + response_message = ChatCompletionMessage(role=Role.ASSISTANT, tool_calls=tool_calls) finish_reason = Finish.TOOL else: response_message = ChatCompletionMessage(role=Role.ASSISTANT, content=result) diff --git a/src/llamafactory/data/formatter.py b/src/llamafactory/data/formatter.py index 9f58915b..1d917887 100644 --- a/src/llamafactory/data/formatter.py +++ b/src/llamafactory/data/formatter.py @@ -72,23 +72,29 @@ def glm4_tool_formatter(tools: List[Dict[str, Any]]) -> str: return GLM4_TOOL_PROMPT.format(tool_text=tool_text) -def default_tool_extractor(content: str) -> Union[str, Tuple[str, str]]: - regex = re.compile(r"Action:\s*([a-zA-Z0-9_]+).*?Action Input:\s*(.*)", re.DOTALL) - action_match = re.search(regex, content) +def default_tool_extractor(content: str) -> Union[str, List[Tuple[str, str]]]: + regex = re.compile(r"Action:\s*([a-zA-Z0-9_]+)\s*Action Input:\s*({.*?})(?=\nAction:|\Z)", re.DOTALL) + action_match = re.findall(regex, content) if not action_match: return content - tool_name = action_match.group(1).strip() - tool_input = action_match.group(2).strip().strip('"').strip("```") - try: - arguments = json.loads(tool_input) - except json.JSONDecodeError: - return content + results = [] + + for match in action_match: + tool_name, tool_input = match + tool_name = tool_name.strip() + tool_input = tool_input.strip().strip('"').strip("```") - return tool_name, json.dumps(arguments, ensure_ascii=False) + try: + arguments = json.loads(tool_input) + results.append((tool_name, json.dumps(arguments, ensure_ascii=False))) + except json.JSONDecodeError: + return content + + return results -def glm4_tool_extractor(content: str) -> Union[str, Tuple[str, str]]: +def glm4_tool_extractor(content: str) -> Union[str, List[Tuple[str, str]]]: lines = content.strip().split("\n") if len(lines) != 2: return content @@ -98,7 +104,7 @@ def glm4_tool_extractor(content: str) -> Union[str, Tuple[str, str]]: arguments = json.loads(tool_input) except json.JSONDecodeError: return content - return tool_name, json.dumps(arguments, ensure_ascii=False) + return [(tool_name, json.dumps(arguments, ensure_ascii=False))] @@ -110,7 +116,7 @@ class Formatter(ABC): @abstractmethod def apply(self, **kwargs) -> SLOTS: ... - def extract(self, content: str) -> Union[str, Tuple[str, str]]: + def extract(self, content: str) -> Union[str, List[Tuple[str, str]]]: raise NotImplementedError @@ -215,7 +221,7 @@ class ToolFormatter(Formatter): except Exception: return [""] - def extract(self, content: str) -> Union[str, Tuple[str, str]]: + def extract(self, content: str) -> Union[str, List[Tuple[str, str]]]: if self.tool_format == "default": return default_tool_extractor(content) elif self.tool_format == "glm4": From 784088db3fb6c599368d69ba97a232ba00d07c1f Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Mon, 10 Jun 2024 21:24:15 +0800 Subject: [PATCH 008/160] fix #2666 Former-commit-id: f121d5c4f94af9f165132c4309cb9bdc8217d985 --- src/llamafactory/model/adapter.py | 2 +- tests/data/test_supervised.py | 32 ++++++++++++++--------- tests/model/model_utils/test_attention.py | 15 +++++------ tests/model/test_freeze.py | 19 ++++---------- tests/model/test_full.py | 8 +++--- tests/model/test_lora.py | 19 ++++---------- 6 files changed, 41 insertions(+), 54 deletions(-) diff --git a/src/llamafactory/model/adapter.py b/src/llamafactory/model/adapter.py index f4e501a7..34b9eda6 100644 --- a/src/llamafactory/model/adapter.py +++ b/src/llamafactory/model/adapter.py @@ -209,6 +209,7 @@ def _setup_lora_tuning( "lora_alpha": finetuning_args.lora_alpha, "lora_dropout": finetuning_args.lora_dropout, "use_rslora": finetuning_args.use_rslora, + "use_dora": finetuning_args.use_dora, "modules_to_save": finetuning_args.additional_target, } @@ -218,7 +219,6 @@ def _setup_lora_tuning( lora_config = LoraConfig( task_type=TaskType.CAUSAL_LM, inference_mode=False, - use_dora=finetuning_args.use_dora, **peft_kwargs, ) model = get_peft_model(model, lora_config) diff --git a/tests/data/test_supervised.py b/tests/data/test_supervised.py index bb7f71df..63a3453f 100644 --- a/tests/data/test_supervised.py +++ b/tests/data/test_supervised.py @@ -1,4 +1,5 @@ import os +import random import pytest from datasets import load_dataset @@ -8,17 +9,17 @@ from llamafactory.hparams import get_train_args from llamafactory.model import load_tokenizer -TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-LlamaForCausalLM") +TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3") -TRAINING_ARGS = { +TRAIN_ARGS = { "model_name_or_path": TINY_LLAMA, "stage": "sft", "do_train": True, "finetuning_type": "full", - "dataset": "llamafactory/tiny_dataset", + "dataset": "llamafactory/tiny-supervised-dataset", "dataset_dir": "ONLINE", "template": "llama3", - "cutoff_len": 1024, + "cutoff_len": 8192, "overwrite_cache": True, "output_dir": "dummy_dir", "overwrite_output_dir": True, @@ -26,19 +27,24 @@ TRAINING_ARGS = { } -@pytest.mark.parametrize("test_num", [5]) -def test_supervised(test_num: int): - model_args, data_args, training_args, _, _ = get_train_args(TRAINING_ARGS) +@pytest.mark.parametrize("num_samples", [10]) +def test_supervised(num_samples: int): + model_args, data_args, training_args, _, _ = get_train_args(TRAIN_ARGS) tokenizer_module = load_tokenizer(model_args) tokenizer = tokenizer_module["tokenizer"] tokenized_data = get_dataset(model_args, data_args, training_args, stage="sft", **tokenizer_module) - original_data = load_dataset(TRAINING_ARGS["dataset"], split="train") - for test_idx in range(test_num): - decode_result = tokenizer.decode(tokenized_data["input_ids"][test_idx]) + original_data = load_dataset(TRAIN_ARGS["dataset"], split="train") + indexes = random.choices(range(len(original_data)), k=num_samples) + for index in indexes: + decoded_result = tokenizer.decode(tokenized_data["input_ids"][index]) + prompt = original_data[index]["instruction"] + if original_data[index]["input"]: + prompt += "\n" + original_data[index]["input"] + messages = [ - {"role": "user", "content": original_data[test_idx]["instruction"]}, - {"role": "assistant", "content": original_data[test_idx]["output"]}, + {"role": "user", "content": prompt}, + {"role": "assistant", "content": original_data[index]["output"]}, ] templated_result = tokenizer.apply_chat_template(messages, tokenize=False) - assert decode_result == templated_result + assert decoded_result == templated_result diff --git a/tests/model/model_utils/test_attention.py b/tests/model/model_utils/test_attention.py index 4d414289..751adda4 100644 --- a/tests/model/model_utils/test_attention.py +++ b/tests/model/model_utils/test_attention.py @@ -6,7 +6,12 @@ from llamafactory.hparams import get_infer_args from llamafactory.model import load_model, load_tokenizer -TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-LlamaForCausalLM") +TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3") + +INFER_ARGS = { + "model_name_or_path": TINY_LLAMA, + "template": "llama3", +} def test_attention(): @@ -23,13 +28,7 @@ def test_attention(): "fa2": "LlamaFlashAttention2", } for requested_attention in attention_available: - model_args, _, finetuning_args, _ = get_infer_args( - { - "model_name_or_path": TINY_LLAMA, - "template": "llama2", - "flash_attn": requested_attention, - } - ) + model_args, _, finetuning_args, _ = get_infer_args({"flash_attn": requested_attention, **INFER_ARGS}) tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args) for module in model.modules(): diff --git a/tests/model/test_freeze.py b/tests/model/test_freeze.py index c6cdec78..97800696 100644 --- a/tests/model/test_freeze.py +++ b/tests/model/test_freeze.py @@ -6,14 +6,14 @@ from llamafactory.hparams import get_train_args from llamafactory.model import load_model, load_tokenizer -TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-LlamaForCausalLM") +TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3") -TRAINING_ARGS = { +TRAIN_ARGS = { "model_name_or_path": TINY_LLAMA, "stage": "sft", "do_train": True, "finetuning_type": "freeze", - "dataset": "llamafactory/tiny_dataset", + "dataset": "llamafactory/tiny-supervised-dataset", "dataset_dir": "ONLINE", "template": "llama3", "cutoff_len": 1024, @@ -25,12 +25,7 @@ TRAINING_ARGS = { def test_freeze_all_modules(): - model_args, _, _, finetuning_args, _ = get_train_args( - { - "freeze_trainable_layers": 1, - **TRAINING_ARGS, - } - ) + model_args, _, _, finetuning_args, _ = get_train_args({"freeze_trainable_layers": 1, **TRAIN_ARGS}) tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) for name, param in model.named_parameters(): @@ -44,11 +39,7 @@ def test_freeze_all_modules(): def test_freeze_extra_modules(): model_args, _, _, finetuning_args, _ = get_train_args( - { - "freeze_trainable_layers": 1, - "freeze_extra_modules": "embed_tokens,lm_head", - **TRAINING_ARGS, - } + {"freeze_trainable_layers": 1, "freeze_extra_modules": "embed_tokens,lm_head", **TRAIN_ARGS} ) tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) diff --git a/tests/model/test_full.py b/tests/model/test_full.py index ef57a980..6cb78f37 100644 --- a/tests/model/test_full.py +++ b/tests/model/test_full.py @@ -6,14 +6,14 @@ from llamafactory.hparams import get_train_args from llamafactory.model import load_model, load_tokenizer -TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-LlamaForCausalLM") +TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3") -TRAINING_ARGS = { +TRAIN_ARGS = { "model_name_or_path": TINY_LLAMA, "stage": "sft", "do_train": True, "finetuning_type": "full", - "dataset": "llamafactory/tiny_dataset", + "dataset": "llamafactory/tiny-supervised-dataset", "dataset_dir": "ONLINE", "template": "llama3", "cutoff_len": 1024, @@ -25,7 +25,7 @@ TRAINING_ARGS = { def test_full(): - model_args, _, _, finetuning_args, _ = get_train_args(TRAINING_ARGS) + model_args, _, _, finetuning_args, _ = get_train_args(TRAIN_ARGS) tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) for param in model.parameters(): diff --git a/tests/model/test_lora.py b/tests/model/test_lora.py index 1f2c02ae..2e2b89d9 100644 --- a/tests/model/test_lora.py +++ b/tests/model/test_lora.py @@ -6,14 +6,14 @@ from llamafactory.hparams import get_train_args from llamafactory.model import load_model, load_tokenizer -TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-LlamaForCausalLM") +TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3") -TRAINING_ARGS = { +TRAIN_ARGS = { "model_name_or_path": TINY_LLAMA, "stage": "sft", "do_train": True, "finetuning_type": "lora", - "dataset": "llamafactory/tiny_dataset", + "dataset": "llamafactory/tiny-supervised-dataset", "dataset_dir": "ONLINE", "template": "llama3", "cutoff_len": 1024, @@ -25,12 +25,7 @@ TRAINING_ARGS = { def test_lora_all_modules(): - model_args, _, _, finetuning_args, _ = get_train_args( - { - "lora_target": "all", - **TRAINING_ARGS, - } - ) + model_args, _, _, finetuning_args, _ = get_train_args({"lora_target": "all", **TRAIN_ARGS}) tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) linear_modules = set() @@ -48,11 +43,7 @@ def test_lora_all_modules(): def test_lora_extra_modules(): model_args, _, _, finetuning_args, _ = get_train_args( - { - "lora_target": "all", - "additional_target": "embed_tokens,lm_head", - **TRAINING_ARGS, - } + {"lora_target": "all", "additional_target": "embed_tokens,lm_head", **TRAIN_ARGS} ) tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) From 4b2b92fd9aecc6e6f40c44d212f5889d9f692446 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Mon, 10 Jun 2024 23:56:00 +0800 Subject: [PATCH 009/160] update evaluator Former-commit-id: bb8661e62481ff7027b8969f3d8a6a17290c9da3 --- src/llamafactory/eval/evaluator.py | 4 +- src/llamafactory/eval/template.py | 9 ++-- tests/eval/test_eval_template.py | 77 ++++++++++++++++++++++++++++++ 3 files changed, 81 insertions(+), 9 deletions(-) create mode 100644 tests/eval/test_eval_template.py diff --git a/src/llamafactory/eval/evaluator.py b/src/llamafactory/eval/evaluator.py index 192f4815..5c6fb104 100644 --- a/src/llamafactory/eval/evaluator.py +++ b/src/llamafactory/eval/evaluator.py @@ -26,9 +26,7 @@ class Evaluator: self.template = get_template_and_fix_tokenizer(self.tokenizer, self.data_args.template) self.model = load_model(self.tokenizer, self.model_args, finetuning_args) self.eval_template = get_eval_template(self.eval_args.lang) - self.choice_inputs = [ - self.tokenizer.encode(self.eval_template.prefix + ch, add_special_tokens=False)[-1] for ch in CHOICES - ] + self.choice_inputs = [self.tokenizer.encode(ch, add_special_tokens=False)[-1] for ch in CHOICES] @torch.inference_mode() def batch_inference(self, batch_input: Dict[str, torch.Tensor]) -> List[str]: diff --git a/src/llamafactory/eval/template.py b/src/llamafactory/eval/template.py index a4a6ef0e..2cbb5aaf 100644 --- a/src/llamafactory/eval/template.py +++ b/src/llamafactory/eval/template.py @@ -10,7 +10,6 @@ class EvalTemplate: system: str choice: str answer: str - prefix: str def _parse_example(self, example: Dict[str, str]) -> Tuple[str, str]: r""" @@ -42,8 +41,8 @@ class EvalTemplate: eval_templates: Dict[str, "EvalTemplate"] = {} -def _register_eval_template(name: str, system: str, choice: str, answer: str, prefix: str) -> None: - eval_templates[name] = EvalTemplate(system=system, choice=choice, answer=answer, prefix=prefix) +def _register_eval_template(name: str, system: str, choice: str, answer: str) -> None: + eval_templates[name] = EvalTemplate(system=system, choice=choice, answer=answer) def get_eval_template(name: str) -> "EvalTemplate": @@ -56,8 +55,7 @@ _register_eval_template( name="en", system="The following are multiple choice questions (with answers) about {subject}.\n\n", choice="\n{choice}. {content}", - answer="\nAnswer: ", - prefix=" ", + answer="\nAnswer:", ) @@ -66,5 +64,4 @@ _register_eval_template( system="以下是中国关于{subject}考试的单项选择题,请选出其中的正确答案。\n\n", choice="\n{choice}. {content}", answer="\n答案:", - prefix=" ", ) diff --git a/tests/eval/test_eval_template.py b/tests/eval/test_eval_template.py new file mode 100644 index 00000000..f6a91a67 --- /dev/null +++ b/tests/eval/test_eval_template.py @@ -0,0 +1,77 @@ +from llamafactory.eval.template import get_eval_template + + +def test_eval_template_en(): + support_set = [ + { + "question": "Fewshot question", + "A": "Fewshot1", + "B": "Fewshot2", + "C": "Fewshot3", + "D": "Fewshot4", + "answer": "B", + } + ] + example = { + "question": "Target question", + "A": "Target1", + "B": "Target2", + "C": "Target3", + "D": "Target4", + "answer": "C", + } + template = get_eval_template(name="en") + messages = template.format_example(example, support_set=support_set, subject_name="SubName") + assert messages == [ + { + "role": "user", + "content": ( + "The following are multiple choice questions (with answers) about SubName.\n\n" + "Fewshot question\nA. Fewshot1\nB. Fewshot2\nC. Fewshot3\nD. Fewshot4\nAnswer:" + ), + }, + {"role": "assistant", "content": "B"}, + { + "role": "user", + "content": "Target question\nA. Target1\nB. Target2\nC. Target3\nD. Target4\nAnswer:", + }, + {"role": "assistant", "content": "C"}, + ] + + +def test_eval_template_zh(): + support_set = [ + { + "question": "示例问题", + "A": "示例答案1", + "B": "示例答案2", + "C": "示例答案3", + "D": "示例答案4", + "answer": "B", + } + ] + example = { + "question": "目标问题", + "A": "目标答案1", + "B": "目标答案2", + "C": "目标答案3", + "D": "目标答案4", + "answer": "C", + } + template = get_eval_template(name="zh") + messages = template.format_example(example, support_set=support_set, subject_name="主题") + assert messages == [ + { + "role": "user", + "content": ( + "以下是中国关于主题考试的单项选择题,请选出其中的正确答案。\n\n" + "示例问题\nA. 示例答案1\nB. 示例答案2\nC. 示例答案3\nD. 示例答案4\n答案:" + ), + }, + {"role": "assistant", "content": "B"}, + { + "role": "user", + "content": "目标问题\nA. 目标答案1\nB. 目标答案2\nC. 目标答案3\nD. 目标答案4\n答案:", + }, + {"role": "assistant", "content": "C"}, + ] From ea2ca2777fd81dfb29e37f51adfd16cc484636dd Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Tue, 11 Jun 2024 00:19:17 +0800 Subject: [PATCH 010/160] fix #4145 Fix the docker image Former-commit-id: a9838281156fe870bfcde5d1f7afc15264fd4aad --- Dockerfile | 38 ++++++++++++++++++++++++++++++++++---- README.md | 36 ++++++++++++++++++------------------ README_zh.md | 34 ++++++++++++++++++---------------- docker-compose.yml | 10 ++++++++-- 4 files changed, 78 insertions(+), 40 deletions(-) diff --git a/Dockerfile b/Dockerfile index 0a35e355..45849601 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,14 +1,44 @@ -FROM nvcr.io/nvidia/pytorch:24.01-py3 +# Use the NVIDIA official image with PyTorch 2.3.0 +# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-24-02.html +FROM nvcr.io/nvidia/pytorch:24.02-py3 +# Define installation arguments +ARG INSTALL_BNB=false +ARG INSTALL_VLLM=false +ARG INSTALL_DEEPSPEED=false +ARG PIP_INDEX=https://pypi.org/simple + +# Set the working directory WORKDIR /app +# Install the requirements COPY requirements.txt /app/ -RUN pip install -r requirements.txt +RUN pip config set global.index-url $PIP_INDEX +RUN python -m pip install --upgrade pip +RUN python -m pip install -r requirements.txt +# Copy the rest of the application into the image COPY . /app/ -RUN pip install -e .[metrics,bitsandbytes,qwen] +# Install the LLaMA Factory +RUN EXTRA_PACKAGES="metrics"; \ + if [ "$INSTALL_BNB" = "true" ]; then \ + EXTRA_PACKAGES="${EXTRA_PACKAGES},bitsandbytes"; \ + fi; \ + if [ "$INSTALL_VLLM" = "true" ]; then \ + EXTRA_PACKAGES="${EXTRA_PACKAGES},vllm"; \ + fi; \ + if [ "$INSTALL_DEEPSPEED" = "true" ]; then \ + EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \ + fi; \ + pip install -e .[$EXTRA_PACKAGES] && \ + pip uninstall -y transformer-engine + +# Set up volumes VOLUME [ "/root/.cache/huggingface/", "/app/data", "/app/output" ] + +# Expose port 7860 for the LLaMA Board EXPOSE 7860 -CMD [ "llamafactory-cli", "webui" ] +# Expose port 8000 for the API service +EXPOSE 8000 diff --git a/README.md b/README.md index 4dea65b9..35dacd2e 100644 --- a/README.md +++ b/README.md @@ -405,9 +405,9 @@ Please refer to [data/README.md](data/README.md) for checking the details about Use the following 3 commands to run LoRA **fine-tuning**, **inference** and **merging** of the Llama3-8B-Instruct model, respectively. ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml -CUDA_VISIBLE_DEVICES=0 llamafactory-cli chat examples/inference/llama3_lora_sft.yaml -CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml +llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml +llamafactory-cli chat examples/inference/llama3_lora_sft.yaml +llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml ``` See [examples/README.md](examples/README.md) for advanced usage (including distributed training). @@ -417,33 +417,33 @@ See [examples/README.md](examples/README.md) for advanced usage (including distr ### Fine-Tuning with LLaMA Board GUI (powered by [Gradio](https://github.com/gradio-app/gradio)) -#### Use local environment - ```bash -CUDA_VISIBLE_DEVICES=0 GRADIO_SHARE=1 llamafactory-cli webui +llamafactory-cli webui ``` - - -#### Use Docker +### Build Docker ```bash -docker build -f ./Dockerfile -t llama-factory:latest . -docker run --gpus=all \ +docker build -f ./Dockerfile \ + --build-arg INSTALL_BNB=false \ + --build-arg INSTALL_VLLM=false \ + --build-arg INSTALL_DEEPSPEED=false \ + --build-arg PIP_INDEX=https://pypi.org/simple \ + -t llamafactory:latest . + +docker run -it --gpus=all \ -v ./hf_cache:/root/.cache/huggingface/ \ -v ./data:/app/data \ -v ./output:/app/output \ -p 7860:7860 \ + -p 8000:8000 \ --shm-size 16G \ - --name llama_factory \ - -d llama-factory:latest + --name llamafactory \ + llamafactory:latest ``` -#### Use Docker Compose - -```bash -docker compose -f ./docker-compose.yml up -d -``` +> [!TIP] +> Use Docker Compose to build image via `docker compose up -d`.
Details about volume diff --git a/README_zh.md b/README_zh.md index ab0e8cb7..0ddb8b19 100644 --- a/README_zh.md +++ b/README_zh.md @@ -405,9 +405,9 @@ Docker 镜像: 下面三行命令分别对 Llama3-8B-Instruct 模型进行 LoRA **微调**、**推理**和**合并**。 ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml -CUDA_VISIBLE_DEVICES=0 llamafactory-cli chat examples/inference/llama3_lora_sft.yaml -CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml +llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml +llamafactory-cli chat examples/inference/llama3_lora_sft.yaml +llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml ``` 高级用法请参考 [examples/README_zh.md](examples/README_zh.md)(包括多 GPU 微调)。 @@ -417,31 +417,33 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_lora_s ### LLaMA Board 可视化微调(由 [Gradio](https://github.com/gradio-app/gradio) 驱动) -#### 使用本地环境 - ```bash -CUDA_VISIBLE_DEVICES=0 GRADIO_SHARE=1 llamafactory-cli webui +llamafactory-cli webui ``` -#### 使用 Docker +### 构建 Docker ```bash -docker build -f ./Dockerfile -t llama-factory:latest . -docker run --gpus=all \ +docker build -f ./Dockerfile \ + --build-arg INSTALL_BNB=false \ + --build-arg INSTALL_VLLM=false \ + --build-arg INSTALL_DEEPSPEED=false \ + --build-arg PIP_INDEX=https://pypi.org/simple \ + -t llamafactory:latest . + +docker run -it --gpus=all \ -v ./hf_cache:/root/.cache/huggingface/ \ -v ./data:/app/data \ -v ./output:/app/output \ -p 7860:7860 \ + -p 8000:8000 \ --shm-size 16G \ - --name llama_factory \ - -d llama-factory:latest + --name llamafactory \ + llamafactory:latest ``` -#### 使用 Docker Compose - -```bash -docker compose -f ./docker-compose.yml up -d -``` +> [!TIP] +> 通过 `docker compose up -d` 使用 Docker Compose 构建镜像。
数据卷详情 diff --git a/docker-compose.yml b/docker-compose.yml index 9602a3e3..b3e4a34d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,17 +1,23 @@ version: '3.8' services: - llama-factory: + llamafactory: build: dockerfile: Dockerfile context: . - container_name: llama_factory + args: + INSTALL_BNB: false + INSTALL_VLLM: false + INSTALL_DEEPSPEED: false + PIP_INDEX: https://pypi.org/simple + container_name: llamafactory volumes: - ./hf_cache:/root/.cache/huggingface/ - ./data:/app/data - ./output:/app/output ports: - "7860:7860" + - "8000:8000" ipc: host deploy: resources: From 4d7dd0330db1e4e1ac962d5cf973e7d995b35fcb Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Tue, 11 Jun 2024 00:37:17 +0800 Subject: [PATCH 011/160] fix #4160 The split heads should be concatenated in dim=2 Former-commit-id: 4b3f247f270d44df9fe226cfe0dabfb7fcd2deda --- src/llamafactory/model/model_utils/longlora.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/llamafactory/model/model_utils/longlora.py b/src/llamafactory/model/model_utils/longlora.py index c8dc52f5..cd468979 100644 --- a/src/llamafactory/model/model_utils/longlora.py +++ b/src/llamafactory/model/model_utils/longlora.py @@ -96,7 +96,8 @@ def llama_attention_forward( ( attn_output[:, :, : self.num_heads // 2], attn_output[:, :, self.num_heads // 2 :].roll(groupsz // 2, dims=1), - ) + ), + dim=2, ) attn_output = attn_output.reshape(bsz, q_len, self.hidden_size) @@ -194,7 +195,8 @@ def llama_flash_attention_2_forward( ( attn_output[:, :, : self.num_heads // 2], attn_output[:, :, self.num_heads // 2 :].roll(groupsz // 2, dims=1), - ) + ), + dim=2, ) attn_output = attn_output.reshape(bsz, q_len, self.hidden_size).contiguous() @@ -293,7 +295,8 @@ def llama_sdpa_attention_forward( ( attn_output[:, :, : self.num_heads // 2], attn_output[:, :, self.num_heads // 2 :].roll(groupsz // 2, dims=1), - ) + ), + dim=2, ) attn_output = attn_output.reshape(bsz, q_len, self.hidden_size) @@ -303,7 +306,7 @@ def llama_sdpa_attention_forward( def _apply_llama_patch() -> None: - require_version("transformers==4.40.2", "To fix: pip install transformers==4.40.2") + require_version("transformers==4.41.2", "To fix: pip install transformers==4.41.2") LlamaAttention.forward = llama_attention_forward LlamaFlashAttention2.forward = llama_flash_attention_2_forward LlamaSdpaAttention.forward = llama_sdpa_attention_forward From 0f1e59232616c5273e08a2150f5348dc21512c8e Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Tue, 11 Jun 2024 00:44:26 +0800 Subject: [PATCH 012/160] release v0.8.1 Former-commit-id: 875a34f492701d1c644facbe9ede411af2931513 --- src/llamafactory/extras/env.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llamafactory/extras/env.py b/src/llamafactory/extras/env.py index 1d4e43f1..8c34fd96 100644 --- a/src/llamafactory/extras/env.py +++ b/src/llamafactory/extras/env.py @@ -9,7 +9,7 @@ import trl from transformers.utils import is_torch_cuda_available, is_torch_npu_available -VERSION = "0.8.1.dev0" +VERSION = "0.8.1" def print_env() -> None: From f330b736825cec3d2e570c23305ba190ec303dc9 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Tue, 11 Jun 2024 00:50:53 +0800 Subject: [PATCH 013/160] set dev version Former-commit-id: 16c47cc15226119e33e46ba0f2f6ccb37072257f --- src/llamafactory/extras/env.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llamafactory/extras/env.py b/src/llamafactory/extras/env.py index 8c34fd96..a8cb799d 100644 --- a/src/llamafactory/extras/env.py +++ b/src/llamafactory/extras/env.py @@ -9,7 +9,7 @@ import trl from transformers.utils import is_torch_cuda_available, is_torch_npu_available -VERSION = "0.8.1" +VERSION = "0.8.2.dev0" def print_env() -> None: From 2723438531fd993aafb8c83bba784425e1f574a0 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Tue, 11 Jun 2024 01:04:16 +0800 Subject: [PATCH 014/160] tiny fix Former-commit-id: b5e9711ef375cc323fc083e742cccfc974550416 --- src/llamafactory/model/model_utils/longlora.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/llamafactory/model/model_utils/longlora.py b/src/llamafactory/model/model_utils/longlora.py index cd468979..4a8c562a 100644 --- a/src/llamafactory/model/model_utils/longlora.py +++ b/src/llamafactory/model/model_utils/longlora.py @@ -182,11 +182,9 @@ def llama_flash_attention_2_forward( query_states, key_states, value_states = shift(query_states), shift(key_states), shift(value_states) if attention_mask is not None: attention_mask = attention_mask[:, :groupsz].repeat(num_groups, 1) - else: - groupsz = q_len attn_output: torch.Tensor = self._flash_attention_forward( - query_states, key_states, value_states, attention_mask, groupsz, dropout=dropout_rate + query_states, key_states, value_states, attention_mask, query_states.size(1), dropout=dropout_rate ) if getattr(self.config, "group_size_ratio", None) and self.training: # shift back From fce11bb38606cb760c297956c5f9b61c8686b7ff Mon Sep 17 00:00:00 2001 From: Alfredo Luque Date: Tue, 11 Jun 2024 00:07:06 +0000 Subject: [PATCH 015/160] add manifest so requirements.txt in sdist Former-commit-id: b501a3c56c51786c3006a2aca15a145641a4556c --- MANIFEST.in | 1 + 1 file changed, 1 insertion(+) create mode 100644 MANIFEST.in diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 00000000..82c51f63 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +include LICENSE requirements.txt From 27aece94cf24d07af0112833dbabeb33eb73a367 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Tue, 11 Jun 2024 12:48:53 +0800 Subject: [PATCH 016/160] tiny fix Former-commit-id: c4b2e263d9cefbad0fbc5de72422e4ef8edbcb54 --- src/llamafactory/hparams/parser.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/llamafactory/hparams/parser.py b/src/llamafactory/hparams/parser.py index ff1fbf5d..ec5dd62c 100644 --- a/src/llamafactory/hparams/parser.py +++ b/src/llamafactory/hparams/parser.py @@ -171,9 +171,6 @@ def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS: if training_args.do_train and model_args.quantization_device_map == "auto": raise ValueError("Cannot use device map for quantized models in training.") - if finetuning_args.use_dora and model_args.use_unsloth: - raise ValueError("Unsloth does not support DoRA.") - if finetuning_args.pure_bf16: if not is_torch_bf16_gpu_available(): raise ValueError("This device does not support `pure_bf16`.") From 820b6e7b327d035724ed9b422477731d0b75c967 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Tue, 11 Jun 2024 15:38:38 +0800 Subject: [PATCH 017/160] fix #4198 Former-commit-id: 945d2c6cc73542adf9272ebd9aa332ea2c1c7361 --- src/llamafactory/hparams/model_args.py | 12 ++++++++++ src/llamafactory/model/patcher.py | 2 +- src/llamafactory/train/trainer_utils.py | 32 +++++++++++-------------- 3 files changed, 27 insertions(+), 19 deletions(-) diff --git a/src/llamafactory/hparams/model_args.py b/src/llamafactory/hparams/model_args.py index 6352a420..71467770 100644 --- a/src/llamafactory/hparams/model_args.py +++ b/src/llamafactory/hparams/model_args.py @@ -1,6 +1,8 @@ from dataclasses import asdict, dataclass, field from typing import Any, Dict, Literal, Optional +from typing_extensions import Self + @dataclass class ModelArguments: @@ -216,3 +218,13 @@ class ModelArguments: def to_dict(self) -> Dict[str, Any]: return asdict(self) + + @classmethod + def copyfrom(cls, old_arg: Self, **kwargs) -> Self: + arg_dict = old_arg.to_dict() + arg_dict.update(**kwargs) + new_arg = cls(**arg_dict) + new_arg.compute_dtype = old_arg.compute_dtype + new_arg.device_map = old_arg.device_map + new_arg.model_max_length = old_arg.model_max_length + return new_arg diff --git a/src/llamafactory/model/patcher.py b/src/llamafactory/model/patcher.py index 87c92315..18221a10 100644 --- a/src/llamafactory/model/patcher.py +++ b/src/llamafactory/model/patcher.py @@ -79,7 +79,7 @@ def patch_config( if "device_map" not in init_kwargs and model_args.device_map: init_kwargs["device_map"] = model_args.device_map - if init_kwargs["device_map"] == "auto": + if init_kwargs.get("device_map", None) == "auto": init_kwargs["offload_folder"] = model_args.offload_folder diff --git a/src/llamafactory/train/trainer_utils.py b/src/llamafactory/train/trainer_utils.py index 0ddcdb11..7e9cc881 100644 --- a/src/llamafactory/train/trainer_utils.py +++ b/src/llamafactory/train/trainer_utils.py @@ -83,15 +83,12 @@ def create_ref_model( The valuehead parameter is randomly initialized since it is useless for PPO training. """ if finetuning_args.ref_model is not None: - ref_model_args_dict = model_args.to_dict() - ref_model_args_dict.update( - dict( - model_name_or_path=finetuning_args.ref_model, - adapter_name_or_path=finetuning_args.ref_model_adapters, - quantization_bit=finetuning_args.ref_model_quantization_bit, - ) + ref_model_args = ModelArguments.copyfrom( + model_args, + model_name_or_path=finetuning_args.ref_model, + adapter_name_or_path=finetuning_args.ref_model_adapters, + quantization_bit=finetuning_args.ref_model_quantization_bit, ) - ref_model_args = ModelArguments(**ref_model_args_dict) ref_finetuning_args = FinetuningArguments() tokenizer = load_tokenizer(ref_model_args)["tokenizer"] ref_model = load_model( @@ -102,9 +99,11 @@ def create_ref_model( if finetuning_args.finetuning_type == "lora": ref_model = None else: - tokenizer = load_tokenizer(model_args)["tokenizer"] + ref_model_args = ModelArguments.copyfrom(model_args) + ref_finetuning_args = FinetuningArguments() + tokenizer = load_tokenizer(ref_model_args)["tokenizer"] ref_model = load_model( - tokenizer, model_args, finetuning_args, is_trainable=False, add_valuehead=add_valuehead + tokenizer, ref_model_args, ref_finetuning_args, is_trainable=False, add_valuehead=add_valuehead ) logger.info("Created reference model from the model itself.") @@ -139,15 +138,12 @@ def create_reward_model( logger.info("Loaded adapter weights of reward model from {}".format(finetuning_args.reward_model)) return None else: - reward_model_args_dict = model_args.to_dict() - reward_model_args_dict.update( - dict( - model_name_or_path=finetuning_args.reward_model, - adapter_name_or_path=finetuning_args.reward_model_adapters, - quantization_bit=finetuning_args.reward_model_quantization_bit, - ) + reward_model_args = ModelArguments.copyfrom( + model_args, + model_name_or_path=finetuning_args.reward_model, + adapter_name_or_path=finetuning_args.reward_model_adapters, + quantization_bit=finetuning_args.reward_model_quantization_bit, ) - reward_model_args = ModelArguments(**reward_model_args_dict) reward_finetuning_args = FinetuningArguments() tokenizer = load_tokenizer(reward_model_args)["tokenizer"] reward_model = load_model( From f14f67f8036fe52e3f5ff4c08f085b7eb3a20f2a Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Tue, 11 Jun 2024 15:40:21 +0800 Subject: [PATCH 018/160] Update bug-report.yml Former-commit-id: bb022cd867ebf2593e40fc6ba43b768603b129a3 --- .github/ISSUE_TEMPLATE/bug-report.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml index 1d962200..768adea6 100644 --- a/.github/ISSUE_TEMPLATE/bug-report.yml +++ b/.github/ISSUE_TEMPLATE/bug-report.yml @@ -38,7 +38,9 @@ body: 请合理使用 Markdown 标签来格式化您的文本。 placeholder: | + ```bash llamafactory-cli train ... + ``` - type: textarea id: expected-behavior From 77bf3d66c7537a4c423f63b24c67b3e8b653ea60 Mon Sep 17 00:00:00 2001 From: d <913015993@qq.com> Date: Tue, 11 Jun 2024 16:21:48 +0800 Subject: [PATCH 019/160] =?UTF-8?q?=E7=BB=8F=E8=BF=87=E5=A4=A7=E9=87=8F?= =?UTF-8?q?=E7=9A=84=E5=A2=9E=E9=87=8F=E9=A2=84=E8=AE=AD=E7=BB=83=EF=BC=8C?= =?UTF-8?q?=E8=BF=9B=E8=A1=8C=E5=AF=B9=E6=AF=94=E8=AF=95=E9=AA=8C=EF=BC=8C?= =?UTF-8?q?=E5=8F=91=E7=8E=B0=E8=BF=99=E4=B8=AAbug=EF=BC=9Allama3=E5=9C=A8?= =?UTF-8?q?=E9=A2=84=E8=AE=AD=E7=BB=83=E6=97=B6=E4=BD=BF=E7=94=A8=E7=9A=84?= =?UTF-8?q?tokenizer.eos=5Ftoke=E6=98=AF'<|end=5Fof=5Ftext|>'=20=EF=BC=8C?= =?UTF-8?q?=E8=BF=99=E9=87=8C=E5=9C=A8=E6=AF=8F=E6=9D=A1=E6=95=B0=E6=8D=AE?= =?UTF-8?q?=E5=90=8E=E9=9D=A2=E4=B9=9F=E5=BE=97=E7=94=A8=E8=BF=99=E4=B8=AA?= =?UTF-8?q?=EF=BC=8C=E8=80=8C=E4=B8=8D=E6=98=AF'<|eot=5Fid|>'=EF=BC=8C?= =?UTF-8?q?=E5=90=A6=E5=88=99=E5=BE=88=E5=AE=B9=E6=98=93=E5=AF=BC=E8=87=B4?= =?UTF-8?q?=E4=B8=A5=E9=87=8D=E7=9A=84=E6=80=A7=E8=83=BD=E4=B8=8B=E9=99=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Former-commit-id: ef470561f742b16eaa0f99c4cadecd7c84ce6bd2 --- src/llamafactory/data/processors/pretrain.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/llamafactory/data/processors/pretrain.py b/src/llamafactory/data/processors/pretrain.py index 87727b55..4050f74c 100644 --- a/src/llamafactory/data/processors/pretrain.py +++ b/src/llamafactory/data/processors/pretrain.py @@ -12,7 +12,8 @@ def preprocess_pretrain_dataset( examples: Dict[str, List[Any]], tokenizer: "PreTrainedTokenizer", data_args: "DataArguments" ) -> Dict[str, List[List[int]]]: # build grouped texts with format `X1 X2 X3 ...` if packing is enabled - text_examples = [messages[0]["content"] + tokenizer.eos_token for messages in examples["prompt"]] + eos_token = '<|end_of_text|>' if data_args.template == 'llama3' else tokenizer.eos_token + text_examples = [messages[0]["content"] + eos_token for messages in examples["prompt"]] if not data_args.packing: if data_args.template == "gemma": From 08f2f99f4bc527223bf09840267bf4fdecf78cd1 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Tue, 11 Jun 2024 16:52:36 +0800 Subject: [PATCH 020/160] fix deepspeed version Former-commit-id: 938a69bb07d4de7d82928ff01c582032162c1480 --- src/llamafactory/model/model_utils/moe.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/src/llamafactory/model/model_utils/moe.py b/src/llamafactory/model/model_utils/moe.py index e554e45a..8a73c844 100644 --- a/src/llamafactory/model/model_utils/moe.py +++ b/src/llamafactory/model/model_utils/moe.py @@ -1,5 +1,6 @@ -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Sequence +import torch from transformers.integrations import is_deepspeed_zero3_enabled from transformers.utils.versions import require_version @@ -10,6 +11,13 @@ if TYPE_CHECKING: from ...hparams import ModelArguments +def _set_z3_leaf_modules(model: "PreTrainedModel", leaf_modules: Sequence["torch.nn.Module"]) -> None: + require_version("deepspeed>=0.13.0", "To fix: pip install deepspeed>=0.13.0") + from deepspeed.utils import set_z3_leaf_modules # type: ignore + + set_z3_leaf_modules(model, leaf_modules) + + def add_z3_leaf_module(model: "PreTrainedModel") -> None: r""" Sets module as a leaf module to skip partitioning in deepspeed zero3. @@ -17,33 +25,30 @@ def add_z3_leaf_module(model: "PreTrainedModel") -> None: if not is_deepspeed_zero3_enabled(): return - require_version("deepspeed>=0.13.0", "To fix: pip install deepspeed>=0.13.0") - from deepspeed.utils import set_z3_leaf_modules # type: ignore - if getattr(model.config, "model_type", None) == "dbrx": from transformers.models.dbrx.modeling_dbrx import DbrxFFN - set_z3_leaf_modules(model, [DbrxFFN]) + _set_z3_leaf_modules(model, [DbrxFFN]) if getattr(model.config, "model_type", None) == "jamba": from transformers.models.jamba.modeling_jamba import JambaSparseMoeBlock - set_z3_leaf_modules(model, [JambaSparseMoeBlock]) + _set_z3_leaf_modules(model, [JambaSparseMoeBlock]) if getattr(model.config, "model_type", None) == "jetmoe": from transformers.models.jetmoe.modeling_jetmoe import JetMoeMoA, JetMoeMoE - set_z3_leaf_modules(model, [JetMoeMoA, JetMoeMoE]) + _set_z3_leaf_modules(model, [JetMoeMoA, JetMoeMoE]) if getattr(model.config, "model_type", None) == "mixtral": from transformers.models.mixtral.modeling_mixtral import MixtralSparseMoeBlock - set_z3_leaf_modules(model, [MixtralSparseMoeBlock]) + _set_z3_leaf_modules(model, [MixtralSparseMoeBlock]) if getattr(model.config, "model_type", None) == "qwen2moe": from transformers.models.qwen2_moe.modeling_qwen2_moe import Qwen2MoeSparseMoeBlock - set_z3_leaf_modules(model, [Qwen2MoeSparseMoeBlock]) + _set_z3_leaf_modules(model, [Qwen2MoeSparseMoeBlock]) def configure_moe(config: "PretrainedConfig", model_args: "ModelArguments", is_trainable: bool) -> None: From cc9717e2f2462a11cb28a7c09088ae0ae126d899 Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Tue, 11 Jun 2024 17:02:14 +0800 Subject: [PATCH 021/160] Update pretrain.py Former-commit-id: e2317b2a84149e39fddfd6366be3de23dfb71f82 --- src/llamafactory/data/processors/pretrain.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llamafactory/data/processors/pretrain.py b/src/llamafactory/data/processors/pretrain.py index 4050f74c..832c987e 100644 --- a/src/llamafactory/data/processors/pretrain.py +++ b/src/llamafactory/data/processors/pretrain.py @@ -12,7 +12,7 @@ def preprocess_pretrain_dataset( examples: Dict[str, List[Any]], tokenizer: "PreTrainedTokenizer", data_args: "DataArguments" ) -> Dict[str, List[List[int]]]: # build grouped texts with format `X1 X2 X3 ...` if packing is enabled - eos_token = '<|end_of_text|>' if data_args.template == 'llama3' else tokenizer.eos_token + eos_token = "<|end_of_text|>" if data_args.template == "llama3" else tokenizer.eos_token text_examples = [messages[0]["content"] + eos_token for messages in examples["prompt"]] if not data_args.packing: From 16c7c923969bb2871909a1bc629b49f16af7bc68 Mon Sep 17 00:00:00 2001 From: Arthur Kim Date: Wed, 12 Jun 2024 16:49:12 +0900 Subject: [PATCH 022/160] Support vllm==0.5.0 Former-commit-id: e7a8ffd7af21bc3759f055033ba2209fa7a1be0e --- src/llamafactory/chat/vllm_engine.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/llamafactory/chat/vllm_engine.py b/src/llamafactory/chat/vllm_engine.py index 87ce8684..d096f6eb 100644 --- a/src/llamafactory/chat/vllm_engine.py +++ b/src/llamafactory/chat/vllm_engine.py @@ -13,7 +13,10 @@ from .base_engine import BaseEngine, Response if is_vllm_available(): from vllm import AsyncEngineArgs, AsyncLLMEngine, RequestOutput, SamplingParams from vllm.lora.request import LoRARequest - from vllm.sequence import MultiModalData + try: + from vllm.multimodal import MultiModalData # vllm==0.5.0 + except ImportError: + from vllm.sequence import MultiModalData # vllm<0.5.0 if TYPE_CHECKING: From 6392d45ea766727e295bf20b443a31161cc3dc63 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Wed, 12 Jun 2024 16:50:11 +0800 Subject: [PATCH 023/160] fix #4242 Former-commit-id: cf260e7af03f49aa5e3d6daf3b27738ff9b9bcb8 --- Dockerfile | 2 +- src/llamafactory/chat/vllm_engine.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 45849601..3932ff30 100644 --- a/Dockerfile +++ b/Dockerfile @@ -32,7 +32,7 @@ RUN EXTRA_PACKAGES="metrics"; \ EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \ fi; \ pip install -e .[$EXTRA_PACKAGES] && \ - pip uninstall -y transformer-engine + pip uninstall -y transformer-engine flash-attn # Set up volumes VOLUME [ "/root/.cache/huggingface/", "/app/data", "/app/output" ] diff --git a/src/llamafactory/chat/vllm_engine.py b/src/llamafactory/chat/vllm_engine.py index d096f6eb..d2850a6e 100644 --- a/src/llamafactory/chat/vllm_engine.py +++ b/src/llamafactory/chat/vllm_engine.py @@ -13,10 +13,11 @@ from .base_engine import BaseEngine, Response if is_vllm_available(): from vllm import AsyncEngineArgs, AsyncLLMEngine, RequestOutput, SamplingParams from vllm.lora.request import LoRARequest + try: - from vllm.multimodal import MultiModalData # vllm==0.5.0 + from vllm.multimodal import MultiModalData # type: ignore (for vllm>=0.5.0) except ImportError: - from vllm.sequence import MultiModalData # vllm<0.5.0 + from vllm.sequence import MultiModalData # for vllm<0.5.0 if TYPE_CHECKING: From fe2c7eaa930c69773b34d99d289ac39ebd739343 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Wed, 12 Jun 2024 17:39:12 +0800 Subject: [PATCH 024/160] update readme Former-commit-id: a436aaa83f0cf12c8f404459e5486f9369d538ec --- README.md | 2 +- README_zh.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 35dacd2e..a773a5f4 100644 --- a/README.md +++ b/README.md @@ -443,7 +443,7 @@ docker run -it --gpus=all \ ``` > [!TIP] -> Use Docker Compose to build image via `docker compose up -d`. +> Use Docker Compose to build image via `docker-compose up -d`.
Details about volume diff --git a/README_zh.md b/README_zh.md index 0ddb8b19..7a9cb159 100644 --- a/README_zh.md +++ b/README_zh.md @@ -443,7 +443,7 @@ docker run -it --gpus=all \ ``` > [!TIP] -> 通过 `docker compose up -d` 使用 Docker Compose 构建镜像。 +> 通过 `docker-compose up -d` 使用 Docker Compose 构建镜像。
数据卷详情 From 799873aa14cd5809a643e896908dea129aa600fd Mon Sep 17 00:00:00 2001 From: hzhaoy Date: Wed, 12 Jun 2024 18:29:03 +0800 Subject: [PATCH 025/160] adapt vllm==0.5.0 Former-commit-id: 02afd9ff64f23e6707ac739ae1269f41bd70c340 --- src/llamafactory/chat/vllm_engine.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/llamafactory/chat/vllm_engine.py b/src/llamafactory/chat/vllm_engine.py index d2850a6e..34126adf 100644 --- a/src/llamafactory/chat/vllm_engine.py +++ b/src/llamafactory/chat/vllm_engine.py @@ -1,10 +1,12 @@ import uuid from typing import TYPE_CHECKING, AsyncGenerator, AsyncIterator, Dict, List, Optional, Sequence, Union +from packaging import version + from ..data import get_template_and_fix_tokenizer from ..extras.logging import get_logger from ..extras.misc import get_device_count -from ..extras.packages import is_vllm_available +from ..extras.packages import is_vllm_available, _get_package_version from ..model import load_config, load_tokenizer from ..model.model_utils.visual import LlavaMultiModalProjectorForYiVLForVLLM from .base_engine import BaseEngine, Response @@ -14,10 +16,10 @@ if is_vllm_available(): from vllm import AsyncEngineArgs, AsyncLLMEngine, RequestOutput, SamplingParams from vllm.lora.request import LoRARequest - try: - from vllm.multimodal import MultiModalData # type: ignore (for vllm>=0.5.0) - except ImportError: - from vllm.sequence import MultiModalData # for vllm<0.5.0 + if _get_package_version("vllm") >= version.parse("0.5.0"): + from vllm.multimodal.image import ImagePixelData + else: + from vllm.sequence import MultiModalData if TYPE_CHECKING: @@ -110,7 +112,10 @@ class VllmEngine(BaseEngine): if self.processor is not None and image is not None: # add image features image_processor: "BaseImageProcessor" = getattr(self.processor, "image_processor") pixel_values = image_processor(image, return_tensors="pt")["pixel_values"] - multi_modal_data = MultiModalData(type=MultiModalData.Type.IMAGE, data=pixel_values) + if _get_package_version("vllm") >= version.parse("0.5.0"): + multi_modal_data = ImagePixelData(pixel_values) + else: + multi_modal_data = MultiModalData(type=MultiModalData.Type.IMAGE, data=pixel_values) else: multi_modal_data = None From 41beb7f0a398c2e362f80c1fbd8743fede1c4173 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Thu, 13 Jun 2024 00:07:48 +0800 Subject: [PATCH 026/160] fix docker compose usage Former-commit-id: 59a5bd5d5c8d2a44e2dad26b74e77a45e109c8d6 --- README.md | 10 ++++++++-- README_zh.md | 10 ++++++++-- docker-compose.yml | 5 +++-- 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index a773a5f4..65964560 100644 --- a/README.md +++ b/README.md @@ -423,6 +423,8 @@ llamafactory-cli webui ### Build Docker +#### Use Docker + ```bash docker build -f ./Dockerfile \ --build-arg INSTALL_BNB=false \ @@ -442,8 +444,12 @@ docker run -it --gpus=all \ llamafactory:latest ``` -> [!TIP] -> Use Docker Compose to build image via `docker-compose up -d`. +#### Use Docker Compose + +```bash +docker-compose up -d +docker-compose exec -it llamafactory bash +```
Details about volume diff --git a/README_zh.md b/README_zh.md index 7a9cb159..7962a6d1 100644 --- a/README_zh.md +++ b/README_zh.md @@ -423,6 +423,8 @@ llamafactory-cli webui ### 构建 Docker +#### 使用 Docker + ```bash docker build -f ./Dockerfile \ --build-arg INSTALL_BNB=false \ @@ -442,8 +444,12 @@ docker run -it --gpus=all \ llamafactory:latest ``` -> [!TIP] -> 通过 `docker-compose up -d` 使用 Docker Compose 构建镜像。 +#### 使用 Docker Compose + +```bash +docker-compose up -d +docker-compose exec -it llamafactory bash +```
数据卷详情 diff --git a/docker-compose.yml b/docker-compose.yml index b3e4a34d..c5dc34e9 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,5 +1,3 @@ -version: '3.8' - services: llamafactory: build: @@ -19,6 +17,9 @@ services: - "7860:7860" - "8000:8000" ipc: host + tty: true + stdin_open: true + command: bash deploy: resources: reservations: From 5080f2314cd57fd0d769d18f2cdbcd4e002500e3 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Thu, 13 Jun 2024 00:48:44 +0800 Subject: [PATCH 027/160] fix lint Former-commit-id: b170165679317af2b3f03633afac27661b3deb06 --- README.md | 2 +- README_zh.md | 2 +- src/llamafactory/hparams/finetuning_args.py | 17 ++++++++--------- src/llamafactory/hparams/model_args.py | 12 ++++++++---- 4 files changed, 18 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 65964560..994a62c6 100644 --- a/README.md +++ b/README.md @@ -448,7 +448,7 @@ docker run -it --gpus=all \ ```bash docker-compose up -d -docker-compose exec -it llamafactory bash +docker-compose exec llamafactory bash ```
Details about volume diff --git a/README_zh.md b/README_zh.md index 7962a6d1..fa395c6b 100644 --- a/README_zh.md +++ b/README_zh.md @@ -448,7 +448,7 @@ docker run -it --gpus=all \ ```bash docker-compose up -d -docker-compose exec -it llamafactory bash +docker-compose exec llamafactory bash ```
数据卷详情 diff --git a/src/llamafactory/hparams/finetuning_args.py b/src/llamafactory/hparams/finetuning_args.py index 08af31e4..facbe792 100644 --- a/src/llamafactory/hparams/finetuning_args.py +++ b/src/llamafactory/hparams/finetuning_args.py @@ -1,5 +1,5 @@ from dataclasses import dataclass, field -from typing import Literal, Optional +from typing import List, Literal, Optional @dataclass @@ -319,20 +319,19 @@ class FinetuningArguments(FreezeArguments, LoraArguments, RLHFArguments, GaloreA return [item.strip() for item in arg.split(",")] return arg - self.freeze_trainable_modules = split_arg(self.freeze_trainable_modules) - self.freeze_extra_modules = split_arg(self.freeze_extra_modules) - self.lora_alpha = self.lora_alpha or self.lora_rank * 2 - self.lora_target = split_arg(self.lora_target) - self.additional_target = split_arg(self.additional_target) - self.galore_target = split_arg(self.galore_target) + self.freeze_trainable_modules: List[str] = split_arg(self.freeze_trainable_modules) + self.freeze_extra_modules: Optional[List[str]] = split_arg(self.freeze_extra_modules) + self.lora_alpha: int = self.lora_alpha or self.lora_rank * 2 + self.lora_target: List[str] = split_arg(self.lora_target) + self.additional_target: Optional[List[str]] = split_arg(self.additional_target) + self.galore_target: List[str] = split_arg(self.galore_target) self.freeze_vision_tower = self.freeze_vision_tower or self.train_mm_proj_only + self.use_ref_model = self.pref_loss not in ["orpo", "simpo"] assert self.finetuning_type in ["lora", "freeze", "full"], "Invalid fine-tuning method." assert self.ref_model_quantization_bit in [None, 8, 4], "We only accept 4-bit or 8-bit quantization." assert self.reward_model_quantization_bit in [None, 8, 4], "We only accept 4-bit or 8-bit quantization." - self.use_ref_model = self.pref_loss not in ["orpo", "simpo"] - if self.stage == "ppo" and self.reward_model is None: raise ValueError("`reward_model` is necessary for PPO training.") diff --git a/src/llamafactory/hparams/model_args.py b/src/llamafactory/hparams/model_args.py index 71467770..359beafd 100644 --- a/src/llamafactory/hparams/model_args.py +++ b/src/llamafactory/hparams/model_args.py @@ -1,9 +1,13 @@ from dataclasses import asdict, dataclass, field -from typing import Any, Dict, Literal, Optional +from typing import TYPE_CHECKING, Any, Dict, Literal, Optional, Union from typing_extensions import Self +if TYPE_CHECKING: + import torch + + @dataclass class ModelArguments: r""" @@ -194,9 +198,9 @@ class ModelArguments: ) def __post_init__(self): - self.compute_dtype = None - self.device_map = None - self.model_max_length = None + self.compute_dtype: Optional["torch.dtype"] = None + self.device_map: Optional[Union[str, Dict[str, Any]]] = None + self.model_max_length: Optional[int] = None if self.split_special_tokens and self.use_fast_tokenizer: raise ValueError("`split_special_tokens` is only supported for slow tokenizers.") From 1b6786a21f5bd384f1f401598c94287c5b2826d7 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Thu, 13 Jun 2024 01:00:56 +0800 Subject: [PATCH 028/160] add neo-sft dataset Former-commit-id: 34863fa7cb641ceca92e3a2eec914126db537b62 --- README.md | 1 + README_zh.md | 1 + 2 files changed, 2 insertions(+) diff --git a/README.md b/README.md index 994a62c6..5bbaf2d7 100644 --- a/README.md +++ b/README.md @@ -259,6 +259,7 @@ You also can add a custom chat template to [template.py](src/llamafactory/data/t - [Cosmopedia (en)](https://huggingface.co/datasets/HuggingFaceTB/cosmopedia) - [STEM (zh)](https://huggingface.co/datasets/hfl/stem_zh_instruction) - [Ruozhiba (zh)](https://huggingface.co/datasets/hfl/ruozhiba_gpt4_turbo) +- [Neo-sft (zh)](https://huggingface.co/datasets/m-a-p/neo_sft_phase2) - [LLaVA mixed (en&zh)](https://huggingface.co/datasets/BUAADreamer/llava-en-zh-300k) - [Open Assistant (de)](https://huggingface.co/datasets/mayflowergmbh/oasst_de) - [Dolly 15k (de)](https://huggingface.co/datasets/mayflowergmbh/dolly-15k_de) diff --git a/README_zh.md b/README_zh.md index fa395c6b..fb616909 100644 --- a/README_zh.md +++ b/README_zh.md @@ -259,6 +259,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd - [Cosmopedia (en)](https://huggingface.co/datasets/HuggingFaceTB/cosmopedia) - [STEM (zh)](https://huggingface.co/datasets/hfl/stem_zh_instruction) - [Ruozhiba (zh)](https://huggingface.co/datasets/hfl/ruozhiba_gpt4_turbo) +- [Neo-sft (zh)](https://huggingface.co/datasets/m-a-p/neo_sft_phase2) - [LLaVA mixed (en&zh)](https://huggingface.co/datasets/BUAADreamer/llava-en-zh-300k) - [Open Assistant (de)](https://huggingface.co/datasets/mayflowergmbh/oasst_de) - [Dolly 15k (de)](https://huggingface.co/datasets/mayflowergmbh/dolly-15k_de) From 0a75224f62b2c961b922d0cb95514fc5cd683e57 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Thu, 13 Jun 2024 01:58:16 +0800 Subject: [PATCH 029/160] clean code Former-commit-id: f54cafd5c7f0383370d1a2f357834a61a97397ce --- src/llamafactory/chat/vllm_engine.py | 12 +++++----- src/llamafactory/extras/packages.py | 22 +++++-------------- .../model/model_utils/attention.py | 7 +++--- src/llamafactory/train/sft/metric.py | 3 ++- 4 files changed, 17 insertions(+), 27 deletions(-) diff --git a/src/llamafactory/chat/vllm_engine.py b/src/llamafactory/chat/vllm_engine.py index 34126adf..e4c05478 100644 --- a/src/llamafactory/chat/vllm_engine.py +++ b/src/llamafactory/chat/vllm_engine.py @@ -1,12 +1,10 @@ import uuid from typing import TYPE_CHECKING, AsyncGenerator, AsyncIterator, Dict, List, Optional, Sequence, Union -from packaging import version - from ..data import get_template_and_fix_tokenizer from ..extras.logging import get_logger from ..extras.misc import get_device_count -from ..extras.packages import is_vllm_available, _get_package_version +from ..extras.packages import is_vllm_available, is_vllm_version_greater_than_0_5 from ..model import load_config, load_tokenizer from ..model.model_utils.visual import LlavaMultiModalProjectorForYiVLForVLLM from .base_engine import BaseEngine, Response @@ -16,7 +14,7 @@ if is_vllm_available(): from vllm import AsyncEngineArgs, AsyncLLMEngine, RequestOutput, SamplingParams from vllm.lora.request import LoRARequest - if _get_package_version("vllm") >= version.parse("0.5.0"): + if is_vllm_version_greater_than_0_5(): from vllm.multimodal.image import ImagePixelData else: from vllm.sequence import MultiModalData @@ -112,9 +110,9 @@ class VllmEngine(BaseEngine): if self.processor is not None and image is not None: # add image features image_processor: "BaseImageProcessor" = getattr(self.processor, "image_processor") pixel_values = image_processor(image, return_tensors="pt")["pixel_values"] - if _get_package_version("vllm") >= version.parse("0.5.0"): - multi_modal_data = ImagePixelData(pixel_values) - else: + if is_vllm_version_greater_than_0_5(): + multi_modal_data = ImagePixelData(image=pixel_values) + else: # TODO: remove vllm 0.4.3 support multi_modal_data = MultiModalData(type=MultiModalData.Type.IMAGE, data=pixel_values) else: multi_modal_data = None diff --git a/src/llamafactory/extras/packages.py b/src/llamafactory/extras/packages.py index 4c9e6492..0746bb4f 100644 --- a/src/llamafactory/extras/packages.py +++ b/src/llamafactory/extras/packages.py @@ -1,5 +1,6 @@ import importlib.metadata import importlib.util +from functools import lru_cache from typing import TYPE_CHECKING from packaging import version @@ -24,10 +25,6 @@ def is_fastapi_available(): return _is_package_available("fastapi") -def is_flash_attn2_available(): - return _is_package_available("flash_attn") and _get_package_version("flash_attn") > version.parse("2.0.0") - - def is_galore_available(): return _is_package_available("galore_torch") @@ -36,18 +33,10 @@ def is_gradio_available(): return _is_package_available("gradio") -def is_jieba_available(): - return _is_package_available("jieba") - - def is_matplotlib_available(): return _is_package_available("matplotlib") -def is_nltk_available(): - return _is_package_available("nltk") - - def is_pillow_available(): return _is_package_available("PIL") @@ -60,10 +49,6 @@ def is_rouge_available(): return _is_package_available("rouge_chinese") -def is_sdpa_available(): - return _get_package_version("torch") > version.parse("2.1.1") - - def is_starlette_available(): return _is_package_available("sse_starlette") @@ -74,3 +59,8 @@ def is_uvicorn_available(): def is_vllm_available(): return _is_package_available("vllm") + + +@lru_cache +def is_vllm_version_greater_than_0_5(): + return _get_package_version("vllm") >= version.parse("0.5.0") diff --git a/src/llamafactory/model/model_utils/attention.py b/src/llamafactory/model/model_utils/attention.py index b52ddc86..2bd36fdc 100644 --- a/src/llamafactory/model/model_utils/attention.py +++ b/src/llamafactory/model/model_utils/attention.py @@ -1,7 +1,8 @@ from typing import TYPE_CHECKING +from transformers.utils import is_flash_attn_2_available, is_torch_sdpa_available + from ...extras.logging import get_logger -from ...extras.packages import is_flash_attn2_available, is_sdpa_available if TYPE_CHECKING: @@ -21,13 +22,13 @@ def configure_attn_implementation(config: "PretrainedConfig", model_args: "Model requested_attn_implementation = "eager" elif model_args.flash_attn == "sdpa": - if not is_sdpa_available(): + if not is_torch_sdpa_available(): logger.warning("torch>=2.1.1 is required for SDPA attention.") return requested_attn_implementation = "sdpa" elif model_args.flash_attn == "fa2": - if not is_flash_attn2_available(): + if not is_flash_attn_2_available(): logger.warning("FlashAttention-2 is not installed.") return diff --git a/src/llamafactory/train/sft/metric.py b/src/llamafactory/train/sft/metric.py index b135fcfb..6ed356c1 100644 --- a/src/llamafactory/train/sft/metric.py +++ b/src/llamafactory/train/sft/metric.py @@ -2,9 +2,10 @@ from dataclasses import dataclass from typing import TYPE_CHECKING, Dict, Sequence, Tuple, Union import numpy as np +from transformers.utils import is_jieba_available, is_nltk_available from ...extras.constants import IGNORE_INDEX -from ...extras.packages import is_jieba_available, is_nltk_available, is_rouge_available +from ...extras.packages import is_rouge_available if TYPE_CHECKING: From 103a507b39ef6fd2135f8cebef9d2202c1213186 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Thu, 13 Jun 2024 02:25:50 +0800 Subject: [PATCH 030/160] fix #4209 DeepSpeed ZeRO3 has inflight param error when calling model.eval() Former-commit-id: 4be013f18ea6a35b5a11db98db5f0670ffb41619 --- src/llamafactory/train/dpo/trainer.py | 7 +++++-- src/llamafactory/train/kto/trainer.py | 7 +++++-- src/llamafactory/train/ppo/trainer.py | 2 ++ src/llamafactory/train/trainer_utils.py | 13 ------------- 4 files changed, 12 insertions(+), 17 deletions(-) diff --git a/src/llamafactory/train/dpo/trainer.py b/src/llamafactory/train/dpo/trainer.py index d860b29a..5bdb9c43 100644 --- a/src/llamafactory/train/dpo/trainer.py +++ b/src/llamafactory/train/dpo/trainer.py @@ -1,3 +1,4 @@ +import warnings from collections import defaultdict from contextlib import nullcontext from types import MethodType @@ -10,7 +11,7 @@ from trl import DPOTrainer from trl.trainer import disable_dropout_in_model from ...extras.constants import IGNORE_INDEX -from ..trainer_utils import create_custom_optimzer, create_custom_scheduler, get_batch_logps, get_ref_context +from ..trainer_utils import create_custom_optimzer, create_custom_scheduler, get_batch_logps if TYPE_CHECKING: @@ -61,6 +62,8 @@ class CustomDPOTrainer(DPOTrainer): if not hasattr(self, "accelerator"): raise AttributeError("Please update `transformers`.") + warnings.simplefilter("ignore") # remove gc warnings on ref model + if ref_model is not None: if self.is_deepspeed_enabled: if not ( @@ -176,7 +179,7 @@ class CustomDPOTrainer(DPOTrainer): if self.ref_model is None: ref_model = model - ref_context = get_ref_context(self.accelerator, model) + ref_context = self.accelerator.unwrap_model(model).disable_adapter() else: ref_model = self.ref_model ref_context = nullcontext() diff --git a/src/llamafactory/train/kto/trainer.py b/src/llamafactory/train/kto/trainer.py index 22a84e4a..3b4488fc 100644 --- a/src/llamafactory/train/kto/trainer.py +++ b/src/llamafactory/train/kto/trainer.py @@ -1,3 +1,4 @@ +import warnings from collections import defaultdict from contextlib import nullcontext from types import MethodType @@ -9,7 +10,7 @@ from trl import KTOTrainer from trl.trainer import disable_dropout_in_model from ...extras.constants import IGNORE_INDEX -from ..trainer_utils import create_custom_optimzer, create_custom_scheduler, get_batch_logps, get_ref_context +from ..trainer_utils import create_custom_optimzer, create_custom_scheduler, get_batch_logps if TYPE_CHECKING: @@ -60,6 +61,8 @@ class CustomKTOTrainer(KTOTrainer): if not hasattr(self, "accelerator"): raise AttributeError("Please update `transformers`.") + warnings.simplefilter("ignore") # remove gc warnings on ref model + if ref_model is not None: if self.is_deepspeed_enabled: if not ( @@ -143,7 +146,7 @@ class CustomKTOTrainer(KTOTrainer): """ if self.ref_model is None: ref_model = model - ref_context = get_ref_context(self.accelerator, model) + ref_context = self.accelerator.unwrap_model(model).disable_adapter() else: ref_model = self.ref_model ref_context = nullcontext() diff --git a/src/llamafactory/train/ppo/trainer.py b/src/llamafactory/train/ppo/trainer.py index 2e1288e4..737c45a3 100644 --- a/src/llamafactory/train/ppo/trainer.py +++ b/src/llamafactory/train/ppo/trainer.py @@ -1,6 +1,7 @@ import math import os import sys +import warnings from types import MethodType from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple @@ -136,6 +137,7 @@ class CustomPPOTrainer(PPOTrainer, Trainer): device_type = unwrapped_model.pretrained_model.device.type self.amp_context = torch.autocast(device_type, dtype=model_args.compute_dtype) + warnings.simplefilter("ignore") # remove gc warnings on ref model if finetuning_args.reward_model_type == "full": if self.is_deepspeed_enabled: diff --git a/src/llamafactory/train/trainer_utils.py b/src/llamafactory/train/trainer_utils.py index 7e9cc881..48944a63 100644 --- a/src/llamafactory/train/trainer_utils.py +++ b/src/llamafactory/train/trainer_utils.py @@ -1,4 +1,3 @@ -from contextlib import contextmanager from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Tuple, Union import torch @@ -19,7 +18,6 @@ if is_galore_available(): if TYPE_CHECKING: - from accelerate import Accelerator from transformers import PreTrainedModel, Seq2SeqTrainingArguments from trl import AutoModelForCausalLMWithValueHead @@ -154,17 +152,6 @@ def create_reward_model( return reward_model -@contextmanager -def get_ref_context(accelerator: "Accelerator", model: "PreTrainedModel"): - r""" - Gets adapter context for the reference model. - """ - with accelerator.unwrap_model(model).disable_adapter(): - model.eval() - yield - model.train() - - def _get_decay_parameter_names(model: "PreTrainedModel") -> List[str]: r""" Returns a list of names of parameters with weight decay. (weights in non-layernorm layers) From 49b58fd6afddacc66c4f560d970b7b776419f093 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Thu, 13 Jun 2024 02:48:21 +0800 Subject: [PATCH 031/160] fix #4221 Former-commit-id: 05a3be4853b941909e7d193c31e8d62c8c5f879b --- src/llamafactory/data/aligner.py | 8 ++++++-- src/llamafactory/data/loader.py | 11 ++++++----- src/llamafactory/data/preprocess.py | 3 +-- src/llamafactory/data/processors/feedback.py | 3 +-- src/llamafactory/data/processors/pairwise.py | 3 +-- src/llamafactory/data/processors/pretrain.py | 2 +- src/llamafactory/data/processors/supervised.py | 3 +-- src/llamafactory/data/processors/unsupervised.py | 3 +-- src/llamafactory/train/sft/metric.py | 2 +- 9 files changed, 19 insertions(+), 19 deletions(-) diff --git a/src/llamafactory/data/aligner.py b/src/llamafactory/data/aligner.py index 434956af..3e9d5c46 100644 --- a/src/llamafactory/data/aligner.py +++ b/src/llamafactory/data/aligner.py @@ -10,6 +10,7 @@ from .data_utils import Role if TYPE_CHECKING: from datasets import Dataset, IterableDataset + from transformers import Seq2SeqTrainingArguments from ..hparams import DataArguments from .parser import DatasetAttr @@ -175,7 +176,10 @@ def convert_sharegpt( def align_dataset( - dataset: Union["Dataset", "IterableDataset"], dataset_attr: "DatasetAttr", data_args: "DataArguments" + dataset: Union["Dataset", "IterableDataset"], + dataset_attr: "DatasetAttr", + data_args: "DataArguments", + training_args: "Seq2SeqTrainingArguments", ) -> Union["Dataset", "IterableDataset"]: r""" Aligned dataset: @@ -208,7 +212,7 @@ def align_dataset( if not data_args.streaming: kwargs = dict( num_proc=data_args.preprocessing_num_workers, - load_from_cache_file=(not data_args.overwrite_cache), + load_from_cache_file=(not data_args.overwrite_cache) or (training_args.local_process_index != 0), desc="Converting format of dataset", ) diff --git a/src/llamafactory/data/loader.py b/src/llamafactory/data/loader.py index 2c236c76..ba426f81 100644 --- a/src/llamafactory/data/loader.py +++ b/src/llamafactory/data/loader.py @@ -18,8 +18,7 @@ from .template import get_template_and_fix_tokenizer if TYPE_CHECKING: from datasets import Dataset, IterableDataset - from transformers import ProcessorMixin, Seq2SeqTrainingArguments - from transformers.tokenization_utils import PreTrainedTokenizer + from transformers import PreTrainedTokenizer, ProcessorMixin, Seq2SeqTrainingArguments from ..hparams import DataArguments, ModelArguments from .parser import DatasetAttr @@ -32,6 +31,7 @@ def load_single_dataset( dataset_attr: "DatasetAttr", model_args: "ModelArguments", data_args: "DataArguments", + training_args: "Seq2SeqTrainingArguments", ) -> Union["Dataset", "IterableDataset"]: logger.info("Loading dataset {}...".format(dataset_attr)) data_path, data_name, data_dir, data_files = None, None, None, None @@ -123,7 +123,7 @@ def load_single_dataset( max_samples = min(data_args.max_samples, len(dataset)) dataset = dataset.select(range(max_samples)) - return align_dataset(dataset, dataset_attr, data_args) + return align_dataset(dataset, dataset_attr, data_args, training_args) def get_dataset( @@ -157,7 +157,8 @@ def get_dataset( if (stage == "rm" and dataset_attr.ranking is False) or (stage != "rm" and dataset_attr.ranking is True): raise ValueError("The dataset is not applicable in the current training stage.") - all_datasets.append(load_single_dataset(dataset_attr, model_args, data_args)) + all_datasets.append(load_single_dataset(dataset_attr, model_args, data_args, training_args)) + dataset = merge_dataset(all_datasets, data_args, training_args) with training_args.main_process_first(desc="pre-process dataset"): @@ -169,7 +170,7 @@ def get_dataset( if not data_args.streaming: kwargs = dict( num_proc=data_args.preprocessing_num_workers, - load_from_cache_file=(not data_args.overwrite_cache), + load_from_cache_file=(not data_args.overwrite_cache) or (training_args.local_process_index != 0), desc="Running tokenizer on dataset", ) diff --git a/src/llamafactory/data/preprocess.py b/src/llamafactory/data/preprocess.py index 97789c39..875f55d6 100644 --- a/src/llamafactory/data/preprocess.py +++ b/src/llamafactory/data/preprocess.py @@ -13,8 +13,7 @@ from .processors.unsupervised import preprocess_unsupervised_dataset, print_unsu if TYPE_CHECKING: - from transformers import ProcessorMixin, Seq2SeqTrainingArguments - from transformers.tokenization_utils import PreTrainedTokenizer + from transformers import PreTrainedTokenizer, ProcessorMixin, Seq2SeqTrainingArguments from ..hparams import DataArguments from .template import Template diff --git a/src/llamafactory/data/processors/feedback.py b/src/llamafactory/data/processors/feedback.py index 98d83658..5fba452c 100644 --- a/src/llamafactory/data/processors/feedback.py +++ b/src/llamafactory/data/processors/feedback.py @@ -6,8 +6,7 @@ from .processor_utils import get_paligemma_token_type_ids, get_pixel_values if TYPE_CHECKING: - from transformers import ProcessorMixin - from transformers.tokenization_utils import PreTrainedTokenizer + from transformers import PreTrainedTokenizer, ProcessorMixin from ...hparams import DataArguments from ..template import Template diff --git a/src/llamafactory/data/processors/pairwise.py b/src/llamafactory/data/processors/pairwise.py index fe984efa..db52c6a7 100644 --- a/src/llamafactory/data/processors/pairwise.py +++ b/src/llamafactory/data/processors/pairwise.py @@ -6,8 +6,7 @@ from .processor_utils import get_paligemma_token_type_ids, get_pixel_values if TYPE_CHECKING: - from transformers import ProcessorMixin - from transformers.tokenization_utils import PreTrainedTokenizer + from transformers import PreTrainedTokenizer, ProcessorMixin from ...hparams import DataArguments from ..template import Template diff --git a/src/llamafactory/data/processors/pretrain.py b/src/llamafactory/data/processors/pretrain.py index 832c987e..a10ccabd 100644 --- a/src/llamafactory/data/processors/pretrain.py +++ b/src/llamafactory/data/processors/pretrain.py @@ -3,7 +3,7 @@ from typing import TYPE_CHECKING, Any, Dict, List if TYPE_CHECKING: - from transformers.tokenization_utils import PreTrainedTokenizer + from transformers import PreTrainedTokenizer from ...hparams import DataArguments diff --git a/src/llamafactory/data/processors/supervised.py b/src/llamafactory/data/processors/supervised.py index 19d60280..f59f5371 100644 --- a/src/llamafactory/data/processors/supervised.py +++ b/src/llamafactory/data/processors/supervised.py @@ -7,8 +7,7 @@ from .processor_utils import get_paligemma_token_type_ids, get_pixel_values, gre if TYPE_CHECKING: - from transformers import ProcessorMixin - from transformers.tokenization_utils import PreTrainedTokenizer + from transformers import PreTrainedTokenizer, ProcessorMixin from ...hparams import DataArguments from ..template import Template diff --git a/src/llamafactory/data/processors/unsupervised.py b/src/llamafactory/data/processors/unsupervised.py index f711eeac..38497a15 100644 --- a/src/llamafactory/data/processors/unsupervised.py +++ b/src/llamafactory/data/processors/unsupervised.py @@ -6,8 +6,7 @@ from .processor_utils import get_paligemma_token_type_ids, get_pixel_values if TYPE_CHECKING: - from transformers import ProcessorMixin - from transformers.tokenization_utils import PreTrainedTokenizer + from transformers import PreTrainedTokenizer, ProcessorMixin from ...hparams import DataArguments from ..template import Template diff --git a/src/llamafactory/train/sft/metric.py b/src/llamafactory/train/sft/metric.py index 6ed356c1..923238d6 100644 --- a/src/llamafactory/train/sft/metric.py +++ b/src/llamafactory/train/sft/metric.py @@ -9,7 +9,7 @@ from ...extras.packages import is_rouge_available if TYPE_CHECKING: - from transformers.tokenization_utils import PreTrainedTokenizer + from transformers import PreTrainedTokenizer if is_jieba_available(): From 46f441dd370f7d3aca7cbb6ca24c82301f95932f Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Thu, 13 Jun 2024 03:15:06 +0800 Subject: [PATCH 032/160] update examples Former-commit-id: 19681f93db399d695aa8e35f8ec2a9e720875baa --- README.md | 2 +- README_zh.md | 2 +- examples/README.md | 126 ++++++++--------- examples/README_zh.md | 128 ++++++++---------- .../extras/fsdp_qlora/llama3_lora_sft.yaml | 4 +- .../extras/llama_pro/llama3_freeze_sft.yaml | 1 + examples/extras/loraplus/llama3_lora_sft.yaml | 1 + examples/extras/mod/llama3_full_sft.yaml | 1 + examples/lora_multi_gpu/llama3_lora_sft.yaml | 41 ------ .../llama3_full_predict.yaml | 0 .../llama3_full_sft_ds3.yaml} | 0 .../llama3_lora_dpo.yaml | 1 + .../llama3_lora_eval.yaml | 0 .../llama3_lora_kto.yaml | 2 + .../llama3_lora_ppo.yaml | 1 + .../llama3_lora_predict.yaml | 1 + .../llama3_lora_pretrain.yaml | 1 + .../llama3_lora_reward.yaml | 1 + .../llama3_lora_sft.yaml | 1 + .../llama3_lora_sft_ds0.yaml} | 4 +- .../llama3_lora_sft_ds3.yaml} | 4 +- .../llama3_preprocess.yaml | 0 .../llava1_5_lora_sft.yaml | 1 + .../llama3_lora_sft_aqlm.yaml | 1 + .../llama3_lora_sft_awq.yaml | 1 + .../llama3_lora_sft_bitsandbytes.yaml | 1 + .../llama3_lora_sft_gptq.yaml | 1 + 27 files changed, 128 insertions(+), 199 deletions(-) delete mode 100644 examples/lora_multi_gpu/llama3_lora_sft.yaml rename examples/{full_multi_gpu => train_full}/llama3_full_predict.yaml (100%) rename examples/{full_multi_gpu/llama3_full_sft.yaml => train_full/llama3_full_sft_ds3.yaml} (100%) rename examples/{lora_single_gpu => train_lora}/llama3_lora_dpo.yaml (96%) rename examples/{lora_single_gpu => train_lora}/llama3_lora_eval.yaml (100%) rename examples/{lora_single_gpu => train_lora}/llama3_lora_kto.yaml (94%) rename examples/{lora_single_gpu => train_lora}/llama3_lora_ppo.yaml (96%) rename examples/{lora_single_gpu => train_lora}/llama3_lora_predict.yaml (95%) rename examples/{lora_single_gpu => train_lora}/llama3_lora_pretrain.yaml (96%) rename examples/{lora_single_gpu => train_lora}/llama3_lora_reward.yaml (96%) rename examples/{lora_single_gpu => train_lora}/llama3_lora_sft.yaml (96%) rename examples/{lora_multi_npu/llama3_lora_sft_ds.yaml => train_lora/llama3_lora_sft_ds0.yaml} (98%) rename examples/{lora_multi_gpu/llama3_lora_sft_ds.yaml => train_lora/llama3_lora_sft_ds3.yaml} (98%) rename examples/{lora_single_gpu => train_lora}/llama3_preprocess.yaml (100%) rename examples/{lora_single_gpu => train_lora}/llava1_5_lora_sft.yaml (96%) rename examples/{qlora_single_gpu => train_qlora}/llama3_lora_sft_aqlm.yaml (96%) rename examples/{qlora_single_gpu => train_qlora}/llama3_lora_sft_awq.yaml (96%) rename examples/{qlora_single_gpu => train_qlora}/llama3_lora_sft_bitsandbytes.yaml (96%) rename examples/{qlora_single_gpu => train_qlora}/llama3_lora_sft_gptq.yaml (96%) diff --git a/README.md b/README.md index 5bbaf2d7..5dd10d5a 100644 --- a/README.md +++ b/README.md @@ -406,7 +406,7 @@ Please refer to [data/README.md](data/README.md) for checking the details about Use the following 3 commands to run LoRA **fine-tuning**, **inference** and **merging** of the Llama3-8B-Instruct model, respectively. ```bash -llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml +llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml llamafactory-cli chat examples/inference/llama3_lora_sft.yaml llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml ``` diff --git a/README_zh.md b/README_zh.md index fb616909..76bd2d89 100644 --- a/README_zh.md +++ b/README_zh.md @@ -406,7 +406,7 @@ Docker 镜像: 下面三行命令分别对 Llama3-8B-Instruct 模型进行 LoRA **微调**、**推理**和**合并**。 ```bash -llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml +llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml llamafactory-cli chat examples/inference/llama3_lora_sft.yaml llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml ``` diff --git a/examples/README.md b/examples/README.md index f985d552..3372afb9 100644 --- a/examples/README.md +++ b/examples/README.md @@ -4,59 +4,57 @@ Make sure to execute these commands in the `LLaMA-Factory` directory. ## Table of Contents -- [LoRA Fine-Tuning on A Single GPU](#lora-fine-tuning-on-a-single-gpu) -- [QLoRA Fine-Tuning on a Single GPU](#qlora-fine-tuning-on-a-single-gpu) -- [LoRA Fine-Tuning on Multiple GPUs](#lora-fine-tuning-on-multiple-gpus) -- [LoRA Fine-Tuning on Multiple NPUs](#lora-fine-tuning-on-multiple-npus) -- [Full-Parameter Fine-Tuning on Multiple GPUs](#full-parameter-fine-tuning-on-multiple-gpus) +- [LoRA Fine-Tuning](#lora-fine-tuning) +- [QLoRA Fine-Tuning](#qlora-fine-tuning) +- [Full-Parameter Fine-Tuning](#full-parameter-fine-tuning) - [Merging LoRA Adapters and Quantization](#merging-lora-adapters-and-quantization) - [Inferring LoRA Fine-Tuned Models](#inferring-lora-fine-tuned-models) - [Extras](#extras) ## Examples -### LoRA Fine-Tuning on A Single GPU +### LoRA Fine-Tuning #### (Continuous) Pre-Training ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_pretrain.yaml +llamafactory-cli train examples/train_lora/llama3_lora_pretrain.yaml ``` #### Supervised Fine-Tuning ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml +llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml ``` #### Multimodal Supervised Fine-Tuning ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llava1_5_lora_sft.yaml +llamafactory-cli train examples/train_lora/llava1_5_lora_sft.yaml ``` #### Reward Modeling ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_reward.yaml +llamafactory-cli train examples/train_lora/llama3_lora_reward.yaml ``` #### PPO Training ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_ppo.yaml +llamafactory-cli train examples/train_lora/llama3_lora_ppo.yaml ``` #### DPO/ORPO/SimPO Training ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_dpo.yaml +llamafactory-cli train examples/train_lora/llama3_lora_dpo.yaml ``` #### KTO Training ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_kto.yaml +llamafactory-cli train examples/train_lora/llama3_lora_kto.yaml ``` #### Preprocess Dataset @@ -64,95 +62,79 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lo It is useful for large dataset, use `tokenized_path` in config to load the preprocessed dataset. ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_preprocess.yaml +llamafactory-cli train examples/train_lora/llama3_preprocess.yaml ``` #### Evaluating on MMLU/CMMLU/C-Eval Benchmarks ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli eval examples/lora_single_gpu/llama3_lora_eval.yaml +llamafactory-cli eval examples/train_lora/llama3_lora_eval.yaml ``` #### Batch Predicting and Computing BLEU and ROUGE Scores ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_predict.yaml -``` - -### QLoRA Fine-Tuning on a Single GPU - -#### Supervised Fine-Tuning with 4/8-bit Bitsandbytes Quantization (Recommended) - -```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml -``` - -#### Supervised Fine-Tuning with 4/8-bit GPTQ Quantization - -```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml -``` - -#### Supervised Fine-Tuning with 4-bit AWQ Quantization - -```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/qlora_single_gpu/llama3_lora_sft_awq.yaml -``` - -#### Supervised Fine-Tuning with 2-bit AQLM Quantization - -```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml -``` - -### LoRA Fine-Tuning on Multiple GPUs - -#### Supervised Fine-Tuning on Single Node - -```bash -CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft.yaml +llamafactory-cli train examples/train_lora/llama3_lora_predict.yaml ``` #### Supervised Fine-Tuning on Multiple Nodes ```bash -CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft.yaml -CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft.yaml +FORCE_TORCHRUN=1 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml +FORCE_TORCHRUN=1 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml ``` #### Supervised Fine-Tuning with DeepSpeed ZeRO-3 (Weight Sharding) ```bash -CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft_ds.yaml +FORCE_TORCHRUN=1 llamafactory-cli train examples/train_lora/llama3_lora_sft_ds.yaml ``` -### LoRA Fine-Tuning on Multiple NPUs +### QLoRA Fine-Tuning -#### Supervised Fine-Tuning with DeepSpeed ZeRO-0 +#### Supervised Fine-Tuning with 4/8-bit Bitsandbytes Quantization (Recommended) ```bash -ASCEND_RT_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/lora_multi_npu/llama3_lora_sft_ds.yaml +CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/train_qlora/llama3_lora_sft_bitsandbytes.yaml ``` -### Full-Parameter Fine-Tuning on Multiple GPUs +#### Supervised Fine-Tuning with 4/8-bit GPTQ Quantization + +```bash +CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/train_qlora/llama3_lora_sft_gptq.yaml +``` + +#### Supervised Fine-Tuning with 4-bit AWQ Quantization + +```bash +CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/train_qlora/llama3_lora_sft_awq.yaml +``` + +#### Supervised Fine-Tuning with 2-bit AQLM Quantization + +```bash +CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/train_qlora/llama3_lora_sft_aqlm.yaml +``` + +### Full-Parameter Fine-Tuning #### Supervised Fine-Tuning on Single Node ```bash -CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/full_multi_gpu/llama3_full_sft.yaml +FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/llama3_full_sft.yaml ``` #### Supervised Fine-Tuning on Multiple Nodes ```bash -CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/full_multi_gpu/llama3_full_sft.yaml -CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/full_multi_gpu/llama3_full_sft.yaml +FORCE_TORCHRUN=1 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft.yaml +FORCE_TORCHRUN=1 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft.yaml ``` #### Batch Predicting and Computing BLEU and ROUGE Scores ```bash -CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/full_multi_gpu/llama3_full_predict.yaml +llamafactory-cli train examples/train_full/llama3_full_predict.yaml ``` ### Merging LoRA Adapters and Quantization @@ -162,35 +144,33 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/full_multi_gpu/llam Note: DO NOT use quantized model or `quantization_bit` when merging LoRA adapters. ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml +llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml ``` #### Quantizing Model using AutoGPTQ ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_gptq.yaml +llamafactory-cli export examples/merge_lora/llama3_gptq.yaml ``` ### Inferring LoRA Fine-Tuned Models -Use `CUDA_VISIBLE_DEVICES=0,1` to infer models on multiple devices. - #### Use CLI ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli chat examples/inference/llama3_lora_sft.yaml +llamafactory-cli chat examples/inference/llama3_lora_sft.yaml ``` #### Use Web UI ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli webchat examples/inference/llama3_lora_sft.yaml +llamafactory-cli webchat examples/inference/llama3_lora_sft.yaml ``` #### Launch OpenAI-style API ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli api examples/inference/llama3_lora_sft.yaml +llamafactory-cli api examples/inference/llama3_lora_sft.yaml ``` ### Extras @@ -198,32 +178,32 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli api examples/inference/llama3_lora_sft.y #### Full-Parameter Fine-Tuning using GaLore ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/extras/galore/llama3_full_sft.yaml +llamafactory-cli train examples/extras/galore/llama3_full_sft.yaml ``` #### Full-Parameter Fine-Tuning using BAdam ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/extras/badam/llama3_full_sft.yaml +llamafactory-cli train examples/extras/badam/llama3_full_sft.yaml ``` #### LoRA+ Fine-Tuning ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/extras/loraplus/llama3_lora_sft.yaml +llamafactory-cli train examples/extras/loraplus/llama3_lora_sft.yaml ``` #### Mixture-of-Depths Fine-Tuning ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/extras/mod/llama3_full_sft.yaml +llamafactory-cli train examples/extras/mod/llama3_full_sft.yaml ``` #### LLaMA-Pro Fine-Tuning ```bash bash examples/extras/llama_pro/expand.sh -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/extras/llama_pro/llama3_freeze_sft.yaml +llamafactory-cli train examples/extras/llama_pro/llama3_freeze_sft.yaml ``` #### FSDP+QLoRA Fine-Tuning diff --git a/examples/README_zh.md b/examples/README_zh.md index cf5bbf49..64c31fbd 100644 --- a/examples/README_zh.md +++ b/examples/README_zh.md @@ -4,59 +4,57 @@ ## 目录 -- [单 GPU LoRA 微调](#单-gpu-lora-微调) -- [单 GPU QLoRA 微调](#单-gpu-qlora-微调) -- [多 GPU LoRA 微调](#多-gpu-lora-微调) -- [多 NPU LoRA 微调](#多-npu-lora-微调) -- [多 GPU 全参数微调](#多-gpu-全参数微调) +- [LoRA 微调](#lora-微调) +- [QLoRA 微调](#qlora-微调) +- [全参数微调](#全参数微调) - [合并 LoRA 适配器与模型量化](#合并-lora-适配器与模型量化) - [推理 LoRA 模型](#推理-lora-模型) - [杂项](#杂项) ## 示例 -### 单 GPU LoRA 微调 +### LoRA 微调 #### (增量)预训练 ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_pretrain.yaml +llamafactory-cli train examples/train_lora/llama3_lora_pretrain.yaml ``` #### 指令监督微调 ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml +llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml ``` #### 多模态指令监督微调 ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llava1_5_lora_sft.yaml +llamafactory-cli train examples/train_lora/llava1_5_lora_sft.yaml ``` #### 奖励模型训练 ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_reward.yaml +llamafactory-cli train examples/train_lora/llama3_lora_reward.yaml ``` #### PPO 训练 ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_ppo.yaml +llamafactory-cli train examples/train_lora/llama3_lora_ppo.yaml ``` #### DPO/ORPO/SimPO 训练 ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_dpo.yaml +llamafactory-cli train examples/train_lora/llama3_lora_dpo.yaml ``` #### KTO 训练 ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_kto.yaml +llamafactory-cli train examples/train_lora/llama3_lora_kto.yaml ``` #### 预处理数据集 @@ -64,95 +62,79 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lo 对于大数据集有帮助,在配置中使用 `tokenized_path` 以加载预处理后的数据集。 ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_preprocess.yaml +llamafactory-cli train examples/train_lora/llama3_preprocess.yaml ``` #### 在 MMLU/CMMLU/C-Eval 上评估 ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli eval examples/lora_single_gpu/llama3_lora_eval.yaml +llamafactory-cli eval examples/train_lora/llama3_lora_eval.yaml ``` #### 批量预测并计算 BLEU 和 ROUGE 分数 ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_predict.yaml +llamafactory-cli train examples/train_lora/llama3_lora_predict.yaml ``` -### 单 GPU QLoRA 微调 - -#### 基于 4/8 比特 Bitsandbytes 量化进行指令监督微调(推荐) +#### 多机指令监督微调 ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml -``` - -#### 基于 4/8 比特 GPTQ 量化进行指令监督微调 - -```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml -``` - -#### 基于 4 比特 AWQ 量化进行指令监督微调 - -```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/qlora_single_gpu/llama3_lora_sft_awq.yaml -``` - -#### 基于 2 比特 AQLM 量化进行指令监督微调 - -```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml -``` - -### 多 GPU LoRA 微调 - -#### 在单机上进行指令监督微调 - -```bash -CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft.yaml -``` - -#### 在多机上进行指令监督微调 - -```bash -CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft.yaml -CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft.yaml +FORCE_TORCHRUN=1 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml +FORCE_TORCHRUN=1 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml ``` #### 使用 DeepSpeed ZeRO-3 平均分配显存 ```bash -CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/lora_multi_gpu/llama3_lora_sft_ds.yaml +FORCE_TORCHRUN=1 llamafactory-cli train examples/train_lora/llama3_lora_sft_ds.yaml ``` -### 多 NPU LoRA 微调 +### QLoRA 微调 -#### 使用 DeepSpeed ZeRO-0 进行指令监督微调 +#### 基于 4/8 比特 Bitsandbytes 量化进行指令监督微调(推荐) ```bash -ASCEND_RT_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/lora_multi_npu/llama3_lora_sft_ds.yaml +llamafactory-cli train examples/train_qlora/llama3_lora_sft_bitsandbytes.yaml ``` -### 多 GPU 全参数微调 +#### 基于 4/8 比特 GPTQ 量化进行指令监督微调 + +```bash +llamafactory-cli train examples/train_qlora/llama3_lora_sft_gptq.yaml +``` + +#### 基于 4 比特 AWQ 量化进行指令监督微调 + +```bash +llamafactory-cli train examples/train_qlora/llama3_lora_sft_awq.yaml +``` + +#### 基于 2 比特 AQLM 量化进行指令监督微调 + +```bash +llamafactory-cli train examples/train_qlora/llama3_lora_sft_aqlm.yaml +``` + +### 全参数微调 #### 在单机上进行指令监督微调 ```bash -CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/full_multi_gpu/llama3_full_sft.yaml +FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/llama3_full_sft.yaml ``` #### 在多机上进行指令监督微调 ```bash -CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/full_multi_gpu/llama3_full_sft.yaml -CUDA_VISIBLE_DEVICES=0,1,2,3 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/full_multi_gpu/llama3_full_sft.yaml +FORCE_TORCHRUN=1 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft.yaml +FORCE_TORCHRUN=1 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft.yaml ``` #### 批量预测并计算 BLEU 和 ROUGE 分数 ```bash -CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/full_multi_gpu/llama3_full_predict.yaml +llamafactory-cli train examples/train_full/llama3_full_predict.yaml ``` ### 合并 LoRA 适配器与模型量化 @@ -162,35 +144,33 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 llamafactory-cli train examples/full_multi_gpu/llam 注:请勿使用量化后的模型或 `quantization_bit` 参数来合并 LoRA 适配器。 ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml +llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml ``` #### 使用 AutoGPTQ 量化模型 ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_gptq.yaml +llamafactory-cli export examples/merge_lora/llama3_gptq.yaml ``` ### 推理 LoRA 模型 -使用 `CUDA_VISIBLE_DEVICES=0,1` 进行多卡推理。 - #### 使用命令行接口 ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli chat examples/inference/llama3_lora_sft.yaml +llamafactory-cli chat examples/inference/llama3_lora_sft.yaml ``` #### 使用浏览器界面 ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli webchat examples/inference/llama3_lora_sft.yaml +llamafactory-cli webchat examples/inference/llama3_lora_sft.yaml ``` #### 启动 OpenAI 风格 API ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli api examples/inference/llama3_lora_sft.yaml +llamafactory-cli api examples/inference/llama3_lora_sft.yaml ``` ### 杂项 @@ -198,32 +178,32 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli api examples/inference/llama3_lora_sft.y #### 使用 GaLore 进行全参数训练 ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/extras/galore/llama3_full_sft.yaml +llamafactory-cli train examples/extras/galore/llama3_full_sft.yaml ``` #### 使用 BAdam 进行全参数训练 ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/extras/badam/llama3_full_sft.yaml +llamafactory-cli train examples/extras/badam/llama3_full_sft.yaml ``` #### LoRA+ 微调 ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/extras/loraplus/llama3_lora_sft.yaml +llamafactory-cli train examples/extras/loraplus/llama3_lora_sft.yaml ``` #### 深度混合微调 ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/extras/mod/llama3_full_sft.yaml +llamafactory-cli train examples/extras/mod/llama3_full_sft.yaml ``` #### LLaMA-Pro 微调 ```bash bash examples/extras/llama_pro/expand.sh -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/extras/llama_pro/llama3_freeze_sft.yaml +llamafactory-cli train examples/extras/llama_pro/llama3_freeze_sft.yaml ``` #### FSDP+QLoRA 微调 diff --git a/examples/extras/fsdp_qlora/llama3_lora_sft.yaml b/examples/extras/fsdp_qlora/llama3_lora_sft.yaml index 084269ef..cc773991 100644 --- a/examples/extras/fsdp_qlora/llama3_lora_sft.yaml +++ b/examples/extras/fsdp_qlora/llama3_lora_sft.yaml @@ -8,9 +8,6 @@ do_train: true finetuning_type: lora lora_target: all -### ddp -ddp_timeout: 180000000 - ### dataset dataset: identity,alpaca_en_demo template: llama3 @@ -34,6 +31,7 @@ num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 fp16: true +ddp_timeout: 180000000 ### eval val_size: 0.1 diff --git a/examples/extras/llama_pro/llama3_freeze_sft.yaml b/examples/extras/llama_pro/llama3_freeze_sft.yaml index 444a1113..f92d6945 100644 --- a/examples/extras/llama_pro/llama3_freeze_sft.yaml +++ b/examples/extras/llama_pro/llama3_freeze_sft.yaml @@ -32,6 +32,7 @@ num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 fp16: true +ddp_timeout: 180000000 ### eval val_size: 0.1 diff --git a/examples/extras/loraplus/llama3_lora_sft.yaml b/examples/extras/loraplus/llama3_lora_sft.yaml index 1ba654ec..57383ae0 100644 --- a/examples/extras/loraplus/llama3_lora_sft.yaml +++ b/examples/extras/loraplus/llama3_lora_sft.yaml @@ -31,6 +31,7 @@ num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 fp16: true +ddp_timeout: 180000000 ### eval val_size: 0.1 diff --git a/examples/extras/mod/llama3_full_sft.yaml b/examples/extras/mod/llama3_full_sft.yaml index df03c1e0..085febfc 100644 --- a/examples/extras/mod/llama3_full_sft.yaml +++ b/examples/extras/mod/llama3_full_sft.yaml @@ -31,6 +31,7 @@ num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 pure_bf16: true +ddp_timeout: 180000000 ### eval val_size: 0.1 diff --git a/examples/lora_multi_gpu/llama3_lora_sft.yaml b/examples/lora_multi_gpu/llama3_lora_sft.yaml deleted file mode 100644 index 348e53b9..00000000 --- a/examples/lora_multi_gpu/llama3_lora_sft.yaml +++ /dev/null @@ -1,41 +0,0 @@ -### model -model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct - -### method -stage: sft -do_train: true -finetuning_type: lora -lora_target: all - -### ddp -ddp_timeout: 180000000 - -### dataset -dataset: identity,alpaca_en_demo -template: llama3 -cutoff_len: 1024 -max_samples: 1000 -overwrite_cache: true -preprocessing_num_workers: 16 - -### output -output_dir: saves/llama3-8b/lora/sft -logging_steps: 10 -save_steps: 500 -plot_loss: true -overwrite_output_dir: true - -### train -per_device_train_batch_size: 1 -gradient_accumulation_steps: 2 -learning_rate: 1.0e-4 -num_train_epochs: 3.0 -lr_scheduler_type: cosine -warmup_ratio: 0.1 -fp16: true - -### eval -val_size: 0.1 -per_device_eval_batch_size: 1 -eval_strategy: steps -eval_steps: 500 diff --git a/examples/full_multi_gpu/llama3_full_predict.yaml b/examples/train_full/llama3_full_predict.yaml similarity index 100% rename from examples/full_multi_gpu/llama3_full_predict.yaml rename to examples/train_full/llama3_full_predict.yaml diff --git a/examples/full_multi_gpu/llama3_full_sft.yaml b/examples/train_full/llama3_full_sft_ds3.yaml similarity index 100% rename from examples/full_multi_gpu/llama3_full_sft.yaml rename to examples/train_full/llama3_full_sft_ds3.yaml diff --git a/examples/lora_single_gpu/llama3_lora_dpo.yaml b/examples/train_lora/llama3_lora_dpo.yaml similarity index 96% rename from examples/lora_single_gpu/llama3_lora_dpo.yaml rename to examples/train_lora/llama3_lora_dpo.yaml index 78344330..db25fb51 100644 --- a/examples/lora_single_gpu/llama3_lora_dpo.yaml +++ b/examples/train_lora/llama3_lora_dpo.yaml @@ -32,6 +32,7 @@ num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 fp16: true +ddp_timeout: 180000000 ### eval val_size: 0.1 diff --git a/examples/lora_single_gpu/llama3_lora_eval.yaml b/examples/train_lora/llama3_lora_eval.yaml similarity index 100% rename from examples/lora_single_gpu/llama3_lora_eval.yaml rename to examples/train_lora/llama3_lora_eval.yaml diff --git a/examples/lora_single_gpu/llama3_lora_kto.yaml b/examples/train_lora/llama3_lora_kto.yaml similarity index 94% rename from examples/lora_single_gpu/llama3_lora_kto.yaml rename to examples/train_lora/llama3_lora_kto.yaml index d5234c0a..f730c82e 100644 --- a/examples/lora_single_gpu/llama3_lora_kto.yaml +++ b/examples/train_lora/llama3_lora_kto.yaml @@ -6,6 +6,7 @@ stage: kto do_train: true finetuning_type: lora lora_target: all +pref_beta: 0.1 ### dataset dataset: kto_en_demo @@ -30,6 +31,7 @@ num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 fp16: true +ddp_timeout: 180000000 ### eval val_size: 0.1 diff --git a/examples/lora_single_gpu/llama3_lora_ppo.yaml b/examples/train_lora/llama3_lora_ppo.yaml similarity index 96% rename from examples/lora_single_gpu/llama3_lora_ppo.yaml rename to examples/train_lora/llama3_lora_ppo.yaml index 98c842f9..e574014e 100644 --- a/examples/lora_single_gpu/llama3_lora_ppo.yaml +++ b/examples/train_lora/llama3_lora_ppo.yaml @@ -31,6 +31,7 @@ num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 fp16: true +ddp_timeout: 180000000 ### generate max_new_tokens: 512 diff --git a/examples/lora_single_gpu/llama3_lora_predict.yaml b/examples/train_lora/llama3_lora_predict.yaml similarity index 95% rename from examples/lora_single_gpu/llama3_lora_predict.yaml rename to examples/train_lora/llama3_lora_predict.yaml index a127d248..148c8635 100644 --- a/examples/lora_single_gpu/llama3_lora_predict.yaml +++ b/examples/train_lora/llama3_lora_predict.yaml @@ -22,3 +22,4 @@ overwrite_output_dir: true ### eval per_device_eval_batch_size: 1 predict_with_generate: true +ddp_timeout: 180000000 diff --git a/examples/lora_single_gpu/llama3_lora_pretrain.yaml b/examples/train_lora/llama3_lora_pretrain.yaml similarity index 96% rename from examples/lora_single_gpu/llama3_lora_pretrain.yaml rename to examples/train_lora/llama3_lora_pretrain.yaml index db435ca9..839b3e51 100644 --- a/examples/lora_single_gpu/llama3_lora_pretrain.yaml +++ b/examples/train_lora/llama3_lora_pretrain.yaml @@ -29,6 +29,7 @@ num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 fp16: true +ddp_timeout: 180000000 ### eval val_size: 0.1 diff --git a/examples/lora_single_gpu/llama3_lora_reward.yaml b/examples/train_lora/llama3_lora_reward.yaml similarity index 96% rename from examples/lora_single_gpu/llama3_lora_reward.yaml rename to examples/train_lora/llama3_lora_reward.yaml index 1ce42ea4..79559d19 100644 --- a/examples/lora_single_gpu/llama3_lora_reward.yaml +++ b/examples/train_lora/llama3_lora_reward.yaml @@ -30,6 +30,7 @@ num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 fp16: true +ddp_timeout: 180000000 ### eval val_size: 0.1 diff --git a/examples/lora_single_gpu/llama3_lora_sft.yaml b/examples/train_lora/llama3_lora_sft.yaml similarity index 96% rename from examples/lora_single_gpu/llama3_lora_sft.yaml rename to examples/train_lora/llama3_lora_sft.yaml index 651b636f..fe30c575 100644 --- a/examples/lora_single_gpu/llama3_lora_sft.yaml +++ b/examples/train_lora/llama3_lora_sft.yaml @@ -30,6 +30,7 @@ num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 fp16: true +ddp_timeout: 180000000 ### eval val_size: 0.1 diff --git a/examples/lora_multi_npu/llama3_lora_sft_ds.yaml b/examples/train_lora/llama3_lora_sft_ds0.yaml similarity index 98% rename from examples/lora_multi_npu/llama3_lora_sft_ds.yaml rename to examples/train_lora/llama3_lora_sft_ds0.yaml index a0ec8aa1..08b638e6 100644 --- a/examples/lora_multi_npu/llama3_lora_sft_ds.yaml +++ b/examples/train_lora/llama3_lora_sft_ds0.yaml @@ -6,9 +6,6 @@ stage: sft do_train: true finetuning_type: lora lora_target: all - -### ddp -ddp_timeout: 180000000 deepspeed: examples/deepspeed/ds_z0_config.json ### dataset @@ -34,6 +31,7 @@ num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 fp16: true +ddp_timeout: 180000000 ### eval val_size: 0.1 diff --git a/examples/lora_multi_gpu/llama3_lora_sft_ds.yaml b/examples/train_lora/llama3_lora_sft_ds3.yaml similarity index 98% rename from examples/lora_multi_gpu/llama3_lora_sft_ds.yaml rename to examples/train_lora/llama3_lora_sft_ds3.yaml index 1c432fa7..b7266d61 100644 --- a/examples/lora_multi_gpu/llama3_lora_sft_ds.yaml +++ b/examples/train_lora/llama3_lora_sft_ds3.yaml @@ -6,9 +6,6 @@ stage: sft do_train: true finetuning_type: lora lora_target: all - -### ddp -ddp_timeout: 180000000 deepspeed: examples/deepspeed/ds_z3_config.json ### dataset @@ -34,6 +31,7 @@ num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 fp16: true +ddp_timeout: 180000000 ### eval val_size: 0.1 diff --git a/examples/lora_single_gpu/llama3_preprocess.yaml b/examples/train_lora/llama3_preprocess.yaml similarity index 100% rename from examples/lora_single_gpu/llama3_preprocess.yaml rename to examples/train_lora/llama3_preprocess.yaml diff --git a/examples/lora_single_gpu/llava1_5_lora_sft.yaml b/examples/train_lora/llava1_5_lora_sft.yaml similarity index 96% rename from examples/lora_single_gpu/llava1_5_lora_sft.yaml rename to examples/train_lora/llava1_5_lora_sft.yaml index df510a93..55ac31fa 100644 --- a/examples/lora_single_gpu/llava1_5_lora_sft.yaml +++ b/examples/train_lora/llava1_5_lora_sft.yaml @@ -31,6 +31,7 @@ num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 fp16: true +ddp_timeout: 180000000 ### eval val_size: 0.1 diff --git a/examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml b/examples/train_qlora/llama3_lora_sft_aqlm.yaml similarity index 96% rename from examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml rename to examples/train_qlora/llama3_lora_sft_aqlm.yaml index d54d6af6..7b6767d5 100644 --- a/examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml +++ b/examples/train_qlora/llama3_lora_sft_aqlm.yaml @@ -30,6 +30,7 @@ num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 fp16: true +ddp_timeout: 180000000 ### eval val_size: 0.1 diff --git a/examples/qlora_single_gpu/llama3_lora_sft_awq.yaml b/examples/train_qlora/llama3_lora_sft_awq.yaml similarity index 96% rename from examples/qlora_single_gpu/llama3_lora_sft_awq.yaml rename to examples/train_qlora/llama3_lora_sft_awq.yaml index 5cef178a..a2a26e4b 100644 --- a/examples/qlora_single_gpu/llama3_lora_sft_awq.yaml +++ b/examples/train_qlora/llama3_lora_sft_awq.yaml @@ -30,6 +30,7 @@ num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 fp16: true +ddp_timeout: 180000000 ### eval val_size: 0.1 diff --git a/examples/qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml b/examples/train_qlora/llama3_lora_sft_bitsandbytes.yaml similarity index 96% rename from examples/qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml rename to examples/train_qlora/llama3_lora_sft_bitsandbytes.yaml index b308dcab..cc773991 100644 --- a/examples/qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml +++ b/examples/train_qlora/llama3_lora_sft_bitsandbytes.yaml @@ -31,6 +31,7 @@ num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 fp16: true +ddp_timeout: 180000000 ### eval val_size: 0.1 diff --git a/examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml b/examples/train_qlora/llama3_lora_sft_gptq.yaml similarity index 96% rename from examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml rename to examples/train_qlora/llama3_lora_sft_gptq.yaml index b950042e..ad3d854c 100644 --- a/examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml +++ b/examples/train_qlora/llama3_lora_sft_gptq.yaml @@ -30,6 +30,7 @@ num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 fp16: true +ddp_timeout: 180000000 ### eval val_size: 0.1 From dedefecd2b5df10c06f6d524ef1ff62827841148 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Thu, 13 Jun 2024 03:16:20 +0800 Subject: [PATCH 033/160] Update llama3_full_sft_ds3.yaml Former-commit-id: e715af62d521112d9c155cfa91fbb42fa0e77710 --- examples/train_full/llama3_full_sft_ds3.yaml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/examples/train_full/llama3_full_sft_ds3.yaml b/examples/train_full/llama3_full_sft_ds3.yaml index 40b62f24..40afd2ee 100644 --- a/examples/train_full/llama3_full_sft_ds3.yaml +++ b/examples/train_full/llama3_full_sft_ds3.yaml @@ -5,9 +5,6 @@ model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct stage: sft do_train: true finetuning_type: full - -### ddp -ddp_timeout: 180000000 deepspeed: examples/deepspeed/ds_z3_config.json ### dataset @@ -33,6 +30,7 @@ num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 fp16: true +ddp_timeout: 180000000 ### eval val_size: 0.1 From dbd1458adf3e42cc2854980255f9c5a4db705666 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Thu, 13 Jun 2024 03:19:18 +0800 Subject: [PATCH 034/160] add quant check in webui export tab Former-commit-id: 6455ca07061ae9858cd7bc996b28be1fde697a3d --- src/llamafactory/webui/components/export.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/llamafactory/webui/components/export.py b/src/llamafactory/webui/components/export.py index 7e1493c8..9d756a38 100644 --- a/src/llamafactory/webui/components/export.py +++ b/src/llamafactory/webui/components/export.py @@ -21,6 +21,13 @@ if TYPE_CHECKING: GPTQ_BITS = ["8", "4", "3", "2"] +def can_quantize(checkpoint_path: Union[str, List[str]]) -> "gr.Dropdown": + if isinstance(checkpoint_path, list) and len(checkpoint_path) != 0: + return gr.Dropdown(value="none", interactive=False) + else: + return gr.Dropdown(interactive=True) + + def save_model( lang: str, model_name: str, @@ -96,6 +103,9 @@ def create_export_tab(engine: "Engine") -> Dict[str, "Component"]: export_dir = gr.Textbox() export_hub_model_id = gr.Textbox() + checkpoint_path: gr.Dropdown = engine.manager.get_elem_by_id("top.checkpoint_path") + checkpoint_path.change(can_quantize, [checkpoint_path], [export_quantization_bit], queue=False) + export_btn = gr.Button() info_box = gr.Textbox(show_label=False, interactive=False) From 530165d9a5ca9b5182d5ff2620aeb0e959481df3 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Thu, 13 Jun 2024 03:26:10 +0800 Subject: [PATCH 035/160] update examples Former-commit-id: d6bf6231290d79eb3a63e711f18fa711ef18a4f6 --- examples/README.md | 10 ++++++---- examples/README_zh.md | 10 ++++++---- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/examples/README.md b/examples/README.md index 3372afb9..180d5f7b 100644 --- a/examples/README.md +++ b/examples/README.md @@ -11,6 +11,8 @@ Make sure to execute these commands in the `LLaMA-Factory` directory. - [Inferring LoRA Fine-Tuned Models](#inferring-lora-fine-tuned-models) - [Extras](#extras) +Use `CUDA_VISIBLE_DEVICES` (GPU) or `ASCEND_RT_VISIBLE_DEVICES` (NPU) to choose computing devices. + ## Examples ### LoRA Fine-Tuning @@ -87,7 +89,7 @@ FORCE_TORCHRUN=1 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llama #### Supervised Fine-Tuning with DeepSpeed ZeRO-3 (Weight Sharding) ```bash -FORCE_TORCHRUN=1 llamafactory-cli train examples/train_lora/llama3_lora_sft_ds.yaml +FORCE_TORCHRUN=1 llamafactory-cli train examples/train_lora/llama3_lora_sft_ds3.yaml ``` ### QLoRA Fine-Tuning @@ -121,14 +123,14 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/train_qlora/llama3_lora_s #### Supervised Fine-Tuning on Single Node ```bash -FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/llama3_full_sft.yaml +FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/llama3_full_sft_ds3.yaml ``` #### Supervised Fine-Tuning on Multiple Nodes ```bash -FORCE_TORCHRUN=1 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft.yaml -FORCE_TORCHRUN=1 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft.yaml +FORCE_TORCHRUN=1 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft_ds3.yaml +FORCE_TORCHRUN=1 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft_ds3.yaml ``` #### Batch Predicting and Computing BLEU and ROUGE Scores diff --git a/examples/README_zh.md b/examples/README_zh.md index 64c31fbd..b6168a95 100644 --- a/examples/README_zh.md +++ b/examples/README_zh.md @@ -11,6 +11,8 @@ - [推理 LoRA 模型](#推理-lora-模型) - [杂项](#杂项) +使用 `CUDA_VISIBLE_DEVICES`(GPU)或 `ASCEND_RT_VISIBLE_DEVICES`(NPU)选择计算设备。 + ## 示例 ### LoRA 微调 @@ -87,7 +89,7 @@ FORCE_TORCHRUN=1 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llama #### 使用 DeepSpeed ZeRO-3 平均分配显存 ```bash -FORCE_TORCHRUN=1 llamafactory-cli train examples/train_lora/llama3_lora_sft_ds.yaml +FORCE_TORCHRUN=1 llamafactory-cli train examples/train_lora/llama3_lora_sft_ds3.yaml ``` ### QLoRA 微调 @@ -121,14 +123,14 @@ llamafactory-cli train examples/train_qlora/llama3_lora_sft_aqlm.yaml #### 在单机上进行指令监督微调 ```bash -FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/llama3_full_sft.yaml +FORCE_TORCHRUN=1 llamafactory-cli train examples/train_full/llama3_full_sft_ds3.yaml ``` #### 在多机上进行指令监督微调 ```bash -FORCE_TORCHRUN=1 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft.yaml -FORCE_TORCHRUN=1 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft.yaml +FORCE_TORCHRUN=1 NNODES=2 RANK=0 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft_ds3.yaml +FORCE_TORCHRUN=1 NNODES=2 RANK=1 MASTER_ADDR=192.168.0.1 MASTER_PORT=29500 llamafactory-cli train examples/train_full/llama3_full_sft_ds3.yaml ``` #### 批量预测并计算 BLEU 和 ROUGE 分数 From f4f315fd116a293c555ae55f8bd0df64bd5cfa6c Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Thu, 13 Jun 2024 16:02:21 +0800 Subject: [PATCH 036/160] Update README.md Former-commit-id: f8d701cd3ce2e56f95b4f5439b8b48d5b62e0d2b --- examples/README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/README.md b/examples/README.md index 180d5f7b..a6d78936 100644 --- a/examples/README.md +++ b/examples/README.md @@ -97,25 +97,25 @@ FORCE_TORCHRUN=1 llamafactory-cli train examples/train_lora/llama3_lora_sft_ds3. #### Supervised Fine-Tuning with 4/8-bit Bitsandbytes Quantization (Recommended) ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/train_qlora/llama3_lora_sft_bitsandbytes.yaml +llamafactory-cli train examples/train_qlora/llama3_lora_sft_bitsandbytes.yaml ``` #### Supervised Fine-Tuning with 4/8-bit GPTQ Quantization ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/train_qlora/llama3_lora_sft_gptq.yaml +llamafactory-cli train examples/train_qlora/llama3_lora_sft_gptq.yaml ``` #### Supervised Fine-Tuning with 4-bit AWQ Quantization ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/train_qlora/llama3_lora_sft_awq.yaml +llamafactory-cli train examples/train_qlora/llama3_lora_sft_awq.yaml ``` #### Supervised Fine-Tuning with 2-bit AQLM Quantization ```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/train_qlora/llama3_lora_sft_aqlm.yaml +llamafactory-cli train examples/train_qlora/llama3_lora_sft_aqlm.yaml ``` ### Full-Parameter Fine-Tuning From 3ff9b87012977bdc7393bb9e86a31f4bb4e5ff0d Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 15 Jun 2024 04:05:54 +0800 Subject: [PATCH 037/160] add test cases Former-commit-id: 731176ff34cdf0cbf6b41c40c69f4ceb54c2daf6 --- src/llamafactory/chat/vllm_engine.py | 2 +- src/llamafactory/hparams/model_args.py | 8 +-- src/llamafactory/model/adapter.py | 43 +++++++------ src/llamafactory/model/patcher.py | 5 +- src/llamafactory/train/ppo/trainer.py | 3 +- tests/model/test_base.py | 32 ++++++++++ tests/model/test_freeze.py | 22 ++++++- tests/model/test_full.py | 20 ++++++- tests/model/test_lora.py | 83 +++++++++++++++++++++++++- 9 files changed, 184 insertions(+), 34 deletions(-) create mode 100644 tests/model/test_base.py diff --git a/src/llamafactory/chat/vllm_engine.py b/src/llamafactory/chat/vllm_engine.py index e4c05478..f0812a99 100644 --- a/src/llamafactory/chat/vllm_engine.py +++ b/src/llamafactory/chat/vllm_engine.py @@ -52,7 +52,7 @@ class VllmEngine(BaseEngine): "model": model_args.model_name_or_path, "trust_remote_code": True, "download_dir": model_args.cache_dir, - "dtype": model_args.vllm_dtype, + "dtype": model_args.infer_dtype, "max_model_len": model_args.vllm_maxlen, "tensor_parallel_size": get_device_count() or 1, "gpu_memory_utilization": model_args.vllm_gpu_util, diff --git a/src/llamafactory/hparams/model_args.py b/src/llamafactory/hparams/model_args.py index 359beafd..bbac2e4b 100644 --- a/src/llamafactory/hparams/model_args.py +++ b/src/llamafactory/hparams/model_args.py @@ -136,10 +136,6 @@ class ModelArguments: default=8, metadata={"help": "Maximum rank of all LoRAs in the vLLM engine."}, ) - vllm_dtype: Literal["auto", "float16", "bfloat16", "float32"] = field( - default="auto", - metadata={"help": "Data type for model weights and activations in the vLLM engine."}, - ) offload_folder: str = field( default="offload", metadata={"help": "Path to offload model weights."}, @@ -148,6 +144,10 @@ class ModelArguments: default=True, metadata={"help": "Whether or not to use KV cache in generation."}, ) + infer_dtype: Literal["auto", "float16", "bfloat16", "float32"] = field( + default="auto", + metadata={"help": "Data type for model weights and activations at inference."} + ) hf_hub_token: Optional[str] = field( default=None, metadata={"help": "Auth token to log in with Hugging Face Hub."}, diff --git a/src/llamafactory/model/adapter.py b/src/llamafactory/model/adapter.py index 34b9eda6..c37f6009 100644 --- a/src/llamafactory/model/adapter.py +++ b/src/llamafactory/model/adapter.py @@ -25,8 +25,12 @@ def _setup_full_tuning( model: "PreTrainedModel", model_args: "ModelArguments", finetuning_args: "FinetuningArguments", + is_trainable: bool, cast_trainable_params_to_fp32: bool, ) -> None: + if not is_trainable: + return + logger.info("Fine-tuning method: Full") forbidden_modules = set() if model_args.visual_inputs and finetuning_args.freeze_vision_tower: @@ -47,8 +51,12 @@ def _setup_freeze_tuning( model: "PreTrainedModel", model_args: "ModelArguments", finetuning_args: "FinetuningArguments", + is_trainable: bool, cast_trainable_params_to_fp32: bool, ) -> None: + if not is_trainable: + return + logger.info("Fine-tuning method: Freeze") if model_args.visual_inputs: config = model.config.text_config @@ -132,7 +140,9 @@ def _setup_lora_tuning( is_trainable: bool, cast_trainable_params_to_fp32: bool, ) -> "PeftModel": - logger.info("Fine-tuning method: {}".format("DoRA" if finetuning_args.use_dora else "LoRA")) + if is_trainable: + logger.info("Fine-tuning method: {}".format("DoRA" if finetuning_args.use_dora else "LoRA")) + adapter_to_resume = None if model_args.adapter_name_or_path is not None: @@ -173,6 +183,8 @@ def _setup_lora_tuning( offload_folder=model_args.offload_folder, ) + logger.info("Loaded adapter(s): {}".format(",".join(model_args.adapter_name_or_path))) + if is_trainable and adapter_to_resume is None: # create new lora weights while training if len(finetuning_args.lora_target) == 1 and finetuning_args.lora_target[0] == "all": target_modules = find_all_linear_modules(model, finetuning_args.freeze_vision_tower) @@ -227,9 +239,6 @@ def _setup_lora_tuning( for param in filter(lambda p: p.requires_grad, model.parameters()): param.data = param.data.to(torch.float32) - if model_args.adapter_name_or_path is not None: - logger.info("Loaded adapter(s): {}".format(",".join(model_args.adapter_name_or_path))) - return model @@ -247,29 +256,27 @@ def init_adapter( Note that the trainable parameters must be cast to float32. """ - if (not is_trainable) and model_args.adapter_name_or_path is None: - logger.info("Adapter is not found at evaluation, load the base model.") - return model + if is_trainable and getattr(model, "quantization_method", None) and finetuning_args.finetuning_type != "lora": + raise ValueError("Quantized models can only be used for the LoRA tuning.") - if finetuning_args.finetuning_type != "lora" and getattr(model, "quantization_method", None): - raise ValueError("You can only use lora for quantized models.") - - if is_deepspeed_zero3_enabled() or is_fsdp_enabled() or finetuning_args.pure_bf16 or finetuning_args.use_badam: + if not is_trainable: + cast_trainable_params_to_fp32 = False + elif is_deepspeed_zero3_enabled() or is_fsdp_enabled() or finetuning_args.pure_bf16 or finetuning_args.use_badam: logger.info("ZeRO3/FSDP/PureBF16/BAdam detected, remaining trainable params as their original precision.") cast_trainable_params_to_fp32 = False else: logger.info("Upcasting trainable params to float32.") cast_trainable_params_to_fp32 = True - if is_trainable and finetuning_args.finetuning_type == "full": - _setup_full_tuning(model, model_args, finetuning_args, cast_trainable_params_to_fp32) - - if is_trainable and finetuning_args.finetuning_type == "freeze": - _setup_freeze_tuning(model, model_args, finetuning_args, cast_trainable_params_to_fp32) - - if finetuning_args.finetuning_type == "lora": + if finetuning_args.finetuning_type == "full": + _setup_full_tuning(model, model_args, finetuning_args, is_trainable, cast_trainable_params_to_fp32) + elif finetuning_args.finetuning_type == "freeze": + _setup_freeze_tuning(model, model_args, finetuning_args, is_trainable, cast_trainable_params_to_fp32) + elif finetuning_args.finetuning_type == "lora": model = _setup_lora_tuning( config, model, model_args, finetuning_args, is_trainable, cast_trainable_params_to_fp32 ) + else: + raise NotImplementedError("Unknown finetuning type: {}.".format(finetuning_args.finetuning_type)) return model diff --git a/src/llamafactory/model/patcher.py b/src/llamafactory/model/patcher.py index 18221a10..b97ff433 100644 --- a/src/llamafactory/model/patcher.py +++ b/src/llamafactory/model/patcher.py @@ -44,7 +44,10 @@ def patch_config( is_trainable: bool, ) -> None: if model_args.compute_dtype is None: # priority: bf16 > fp16 > fp32 - model_args.compute_dtype = infer_optim_dtype(model_dtype=getattr(config, "torch_dtype", None)) + if model_args.infer_dtype == "auto": + model_args.compute_dtype = infer_optim_dtype(model_dtype=getattr(config, "torch_dtype", None)) + else: + model_args.compute_dtype = getattr(torch, model_args.infer_dtype) if is_torch_npu_available(): use_jit_compile = os.environ.get("JIT_COMPILE", "0").lower() in ["true", "1"] diff --git a/src/llamafactory/train/ppo/trainer.py b/src/llamafactory/train/ppo/trainer.py index 737c45a3..45f47455 100644 --- a/src/llamafactory/train/ppo/trainer.py +++ b/src/llamafactory/train/ppo/trainer.py @@ -135,8 +135,7 @@ class CustomPPOTrainer(PPOTrainer, Trainer): unwrapped_model: "AutoModelForCausalLMWithValueHead" = self.accelerator.unwrap_model(self.model) self.is_chatglm_model = getattr(unwrapped_model.config, "model_type", None) == "chatglm" - device_type = unwrapped_model.pretrained_model.device.type - self.amp_context = torch.autocast(device_type, dtype=model_args.compute_dtype) + self.amp_context = torch.autocast(self.current_device.type, dtype=self.model_args.compute_dtype) warnings.simplefilter("ignore") # remove gc warnings on ref model if finetuning_args.reward_model_type == "full": diff --git a/tests/model/test_base.py b/tests/model/test_base.py new file mode 100644 index 00000000..32a3918e --- /dev/null +++ b/tests/model/test_base.py @@ -0,0 +1,32 @@ +import os + +import torch +from transformers import AutoModelForCausalLM + +from llamafactory.hparams import get_infer_args +from llamafactory.model import load_model, load_tokenizer + + +TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3") + +INFER_ARGS = { + "model_name_or_path": TINY_LLAMA, + "template": "llama3", + "infer_dtype": "float16", +} + + +def compare_model(model_a: "torch.nn.Module", model_b: "torch.nn.Module"): + state_dict_a = model_a.state_dict() + state_dict_b = model_b.state_dict() + assert set(state_dict_a.keys()) == set(state_dict_b.keys()) + for name in state_dict_a.keys(): + assert torch.allclose(state_dict_a[name], state_dict_b[name]) is True + + +def test_base(): + model_args, _, finetuning_args, _ = get_infer_args(INFER_ARGS) + tokenizer_module = load_tokenizer(model_args) + model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=False) + ref_model = AutoModelForCausalLM.from_pretrained(TINY_LLAMA, torch_dtype=model.dtype, device_map=model.device) + compare_model(model, ref_model) diff --git a/tests/model/test_freeze.py b/tests/model/test_freeze.py index 97800696..a0618315 100644 --- a/tests/model/test_freeze.py +++ b/tests/model/test_freeze.py @@ -2,7 +2,7 @@ import os import torch -from llamafactory.hparams import get_train_args +from llamafactory.hparams import get_infer_args, get_train_args from llamafactory.model import load_model, load_tokenizer @@ -23,8 +23,15 @@ TRAIN_ARGS = { "fp16": True, } +INFER_ARGS = { + "model_name_or_path": TINY_LLAMA, + "finetuning_type": "freeze", + "template": "llama3", + "infer_dtype": "float16", +} -def test_freeze_all_modules(): + +def test_freeze_train_all_modules(): model_args, _, _, finetuning_args, _ = get_train_args({"freeze_trainable_layers": 1, **TRAIN_ARGS}) tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) @@ -37,7 +44,7 @@ def test_freeze_all_modules(): assert param.dtype == torch.float16 -def test_freeze_extra_modules(): +def test_freeze_train_extra_modules(): model_args, _, _, finetuning_args, _ = get_train_args( {"freeze_trainable_layers": 1, "freeze_extra_modules": "embed_tokens,lm_head", **TRAIN_ARGS} ) @@ -50,3 +57,12 @@ def test_freeze_extra_modules(): else: assert param.requires_grad is False assert param.dtype == torch.float16 + + +def test_freeze_inference(): + model_args, _, finetuning_args, _ = get_infer_args(INFER_ARGS) + tokenizer_module = load_tokenizer(model_args) + model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=False) + for param in model.parameters(): + assert param.requires_grad is False + assert param.dtype == torch.float16 diff --git a/tests/model/test_full.py b/tests/model/test_full.py index 6cb78f37..802b987c 100644 --- a/tests/model/test_full.py +++ b/tests/model/test_full.py @@ -2,7 +2,7 @@ import os import torch -from llamafactory.hparams import get_train_args +from llamafactory.hparams import get_infer_args, get_train_args from llamafactory.model import load_model, load_tokenizer @@ -23,11 +23,27 @@ TRAIN_ARGS = { "fp16": True, } +INFER_ARGS = { + "model_name_or_path": TINY_LLAMA, + "finetuning_type": "full", + "template": "llama3", + "infer_dtype": "float16", +} -def test_full(): + +def test_full_train(): model_args, _, _, finetuning_args, _ = get_train_args(TRAIN_ARGS) tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) for param in model.parameters(): assert param.requires_grad is True assert param.dtype == torch.float32 + + +def test_full_inference(): + model_args, _, finetuning_args, _ = get_infer_args(INFER_ARGS) + tokenizer_module = load_tokenizer(model_args) + model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=False) + for param in model.parameters(): + assert param.requires_grad is False + assert param.dtype == torch.float16 diff --git a/tests/model/test_lora.py b/tests/model/test_lora.py index 2e2b89d9..3e2503f1 100644 --- a/tests/model/test_lora.py +++ b/tests/model/test_lora.py @@ -1,13 +1,18 @@ import os +from typing import Sequence import torch +from peft import LoraModel, PeftModel +from transformers import AutoModelForCausalLM -from llamafactory.hparams import get_train_args +from llamafactory.hparams import get_infer_args, get_train_args from llamafactory.model import load_model, load_tokenizer TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3") +TINY_LLAMA_ADAPTER = os.environ.get("TINY_LLAMA_ADAPTER", "llamafactory/tiny-random-Llama-3-lora") + TRAIN_ARGS = { "model_name_or_path": TINY_LLAMA, "stage": "sft", @@ -23,8 +28,32 @@ TRAIN_ARGS = { "fp16": True, } +INFER_ARGS = { + "model_name_or_path": TINY_LLAMA, + "adapter_name_or_path": TINY_LLAMA_ADAPTER, + "finetuning_type": "lora", + "template": "llama3", + "infer_dtype": "float16", +} -def test_lora_all_modules(): + +def load_reference_model() -> "torch.nn.Module": + model = AutoModelForCausalLM.from_pretrained(TINY_LLAMA) + return PeftModel.from_pretrained(model, TINY_LLAMA_ADAPTER) + + +def compare_model(model_a: "torch.nn.Module", model_b: "torch.nn.Module", diff_keys: Sequence[str] = []): + state_dict_a = model_a.state_dict() + state_dict_b = model_b.state_dict() + assert set(state_dict_a.keys()) == set(state_dict_b.keys()) + for name in state_dict_a.keys(): + if any(key in name for key in diff_keys): + assert torch.allclose(state_dict_a[name], state_dict_b[name]) is False + else: + assert torch.allclose(state_dict_a[name], state_dict_b[name]) is True + + +def test_lora_train_all_modules(): model_args, _, _, finetuning_args, _ = get_train_args({"lora_target": "all", **TRAIN_ARGS}) tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) @@ -41,7 +70,7 @@ def test_lora_all_modules(): assert linear_modules == {"q_proj", "k_proj", "v_proj", "o_proj", "up_proj", "gate_proj", "down_proj"} -def test_lora_extra_modules(): +def test_lora_train_extra_modules(): model_args, _, _, finetuning_args, _ = get_train_args( {"lora_target": "all", "additional_target": "embed_tokens,lm_head", **TRAIN_ARGS} ) @@ -61,3 +90,51 @@ def test_lora_extra_modules(): assert param.dtype == torch.float16 assert extra_modules == {"embed_tokens", "lm_head"} + + +def test_lora_train_old_adapters(): + model_args, _, _, finetuning_args, _ = get_train_args( + {"adapter_name_or_path": TINY_LLAMA_ADAPTER, "create_new_adapter": False, **TRAIN_ARGS} + ) + tokenizer_module = load_tokenizer(model_args) + model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) + + base_model = AutoModelForCausalLM.from_pretrained(TINY_LLAMA, torch_dtype=model.dtype, device_map=model.device) + ref_model = PeftModel.from_pretrained(base_model, TINY_LLAMA_ADAPTER, is_trainable=True) + for param in filter(lambda p: p.requires_grad, ref_model.parameters()): + param.data = param.data.to(torch.float32) + + compare_model(model, ref_model) + + +def test_lora_train_new_adapters(): + model_args, _, _, finetuning_args, _ = get_train_args( + {"adapter_name_or_path": TINY_LLAMA_ADAPTER, "create_new_adapter": True, **TRAIN_ARGS} + ) + tokenizer_module = load_tokenizer(model_args) + model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) + + base_model = AutoModelForCausalLM.from_pretrained(TINY_LLAMA, torch_dtype=model.dtype, device_map=model.device) + ref_model = PeftModel.from_pretrained(base_model, TINY_LLAMA_ADAPTER, is_trainable=True) + for param in filter(lambda p: p.requires_grad, ref_model.parameters()): + param.data = param.data.to(torch.float32) + + compare_model( + model, ref_model, diff_keys=["q_proj", "k_proj", "v_proj", "o_proj", "up_proj", "gate_proj", "down_proj"] + ) + + +def test_lora_inference(): + model_args, _, finetuning_args, _ = get_infer_args(INFER_ARGS) + tokenizer_module = load_tokenizer(model_args) + model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=False) + + base_model = AutoModelForCausalLM.from_pretrained(TINY_LLAMA, torch_dtype=model.dtype, device_map=model.device) + ref_model: "LoraModel" = PeftModel.from_pretrained(base_model, TINY_LLAMA_ADAPTER) + ref_model = ref_model.merge_and_unload() + compare_model(model, ref_model) + + for name, param in model.named_parameters(): + assert param.requires_grad is False + assert param.dtype == torch.float16 + assert "lora" not in name From a30931fe0ff797428d0f03623389b88d0c8ecd28 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 15 Jun 2024 04:34:55 +0800 Subject: [PATCH 038/160] fix #4295 Former-commit-id: 08f657868f9d605b837c5d8c2946a25cc05c8735 --- src/llamafactory/train/sft/trainer.py | 10 +++++++--- src/llamafactory/train/sft/workflow.py | 2 +- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/llamafactory/train/sft/trainer.py b/src/llamafactory/train/sft/trainer.py index c063b214..6bf5b7c0 100644 --- a/src/llamafactory/train/sft/trainer.py +++ b/src/llamafactory/train/sft/trainer.py @@ -13,6 +13,7 @@ from ..trainer_utils import create_custom_optimzer, create_custom_scheduler if TYPE_CHECKING: + from torch.utils.data import Dataset from transformers import ProcessorMixin from transformers.trainer import PredictionOutput @@ -94,7 +95,7 @@ class CustomSeq2SeqTrainer(Seq2SeqTrainer): padded_tensor[:, -src_tensor.shape[-1] :] = src_tensor # adopt left-padding return padded_tensor.contiguous() # in contiguous memory - def save_predictions(self, predict_results: "PredictionOutput") -> None: + def save_predictions(self, dataset: "Dataset", predict_results: "PredictionOutput") -> None: r""" Saves model predictions to `output_dir`. @@ -120,6 +121,9 @@ class CustomSeq2SeqTrainer(Seq2SeqTrainer): (preds[i][pad_len[0] :], preds[i][: pad_len[0]]), axis=-1 ) # move pad token to last + decoded_inputs = self.tokenizer.batch_decode( + dataset["input_ids"], skip_special_tokens=True, clean_up_tokenization_spaces=False + ) decoded_labels = self.tokenizer.batch_decode( labels, skip_special_tokens=True, clean_up_tokenization_spaces=False ) @@ -127,6 +131,6 @@ class CustomSeq2SeqTrainer(Seq2SeqTrainer): with open(output_prediction_file, "w", encoding="utf-8") as writer: res: List[str] = [] - for label, pred in zip(decoded_labels, decoded_preds): - res.append(json.dumps({"label": label, "predict": pred}, ensure_ascii=False)) + for text, label, pred in zip(decoded_inputs, decoded_labels, decoded_preds): + res.append(json.dumps({"prompt": text, "label": label, "predict": pred}, ensure_ascii=False)) writer.write("\n".join(res)) diff --git a/src/llamafactory/train/sft/workflow.py b/src/llamafactory/train/sft/workflow.py index f09b5173..a989b3f7 100644 --- a/src/llamafactory/train/sft/workflow.py +++ b/src/llamafactory/train/sft/workflow.py @@ -93,7 +93,7 @@ def run_sft( predict_results.metrics.pop("predict_loss", None) trainer.log_metrics("predict", predict_results.metrics) trainer.save_metrics("predict", predict_results.metrics) - trainer.save_predictions(predict_results) + trainer.save_predictions(dataset, predict_results) # Create model card create_modelcard_and_push(trainer, model_args, data_args, training_args, finetuning_args) From c29fa61a9c4eeed1532188524156eacc10db2127 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 15 Jun 2024 04:47:13 +0800 Subject: [PATCH 039/160] fix #4292 Former-commit-id: 4cd4c179d24eab0fcaec2b29b9dd71970f877fe8 --- src/llamafactory/webui/common.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/llamafactory/webui/common.py b/src/llamafactory/webui/common.py index 37b38df0..3b8d5378 100644 --- a/src/llamafactory/webui/common.py +++ b/src/llamafactory/webui/common.py @@ -39,7 +39,11 @@ def get_save_dir(*paths: str) -> os.PathLike: r""" Gets the path to saved model checkpoints. """ - paths = (path.replace(os.path.sep, "").replace(" ", "").strip() for path in paths) + if os.path.sep in paths[-1]: + logger.warning("Found complex path, some features may be not available.") + return paths[-1] + + paths = (path.replace(" ", "").strip() for path in paths) return os.path.join(DEFAULT_SAVE_DIR, *paths) From 2af932d969df3e5b23e8abd524b2f899762c938a Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 15 Jun 2024 04:57:19 +0800 Subject: [PATCH 040/160] disable DP Former-commit-id: c18fd609d268389f3e65274992045a6c9f8e6c1f --- src/llamafactory/hparams/parser.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/llamafactory/hparams/parser.py b/src/llamafactory/hparams/parser.py index ec5dd62c..3476a726 100644 --- a/src/llamafactory/hparams/parser.py +++ b/src/llamafactory/hparams/parser.py @@ -8,6 +8,7 @@ import transformers from transformers import HfArgumentParser, Seq2SeqTrainingArguments from transformers.integrations import is_deepspeed_zero3_enabled from transformers.trainer_utils import get_last_checkpoint +from transformers.training_args import ParallelMode from transformers.utils import is_torch_bf16_gpu_available from transformers.utils.versions import require_version @@ -162,6 +163,9 @@ def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS: ): raise ValueError("PPO only accepts wandb or tensorboard logger.") + if training_args.parallel_mode == ParallelMode.NOT_DISTRIBUTED: + raise ValueError("Please launch distributed training with `llamafactory-cli` or `torchrun`.") + if training_args.max_steps == -1 and data_args.streaming: raise ValueError("Please specify `max_steps` in streaming mode.") @@ -181,14 +185,14 @@ def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS: if ( finetuning_args.use_galore and finetuning_args.galore_layerwise - and training_args.parallel_mode.value == "distributed" + and training_args.parallel_mode == ParallelMode.DISTRIBUTED ): raise ValueError("Distributed training does not support layer-wise GaLore.") if ( finetuning_args.use_badam and finetuning_args.badam_mode == "layer" - and training_args.parallel_mode.value == "distributed" + and training_args.parallel_mode == ParallelMode.DISTRIBUTED ): raise ValueError("Layer-wise BAdam does not yet support distributed training, use ratio-wise BAdam.") @@ -230,7 +234,7 @@ def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS: # Post-process training arguments if ( - training_args.parallel_mode.value == "distributed" + training_args.parallel_mode == ParallelMode.DISTRIBUTED and training_args.ddp_find_unused_parameters is None and finetuning_args.finetuning_type == "lora" ): @@ -290,7 +294,7 @@ def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS: training_args.local_rank, training_args.device, training_args.n_gpu, - training_args.parallel_mode.value == "distributed", + training_args.parallel_mode == ParallelMode.DISTRIBUTED, str(model_args.compute_dtype), ) ) From 2abfad9c1fd906034cdda5956fe02e70a456f654 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 15 Jun 2024 05:11:33 +0800 Subject: [PATCH 041/160] fix #4271 Former-commit-id: 03707e78d29bfcf5d395a64bb38632bdb3ff47ce --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 405ac46e..1756551e 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,7 @@ extra_require = { "torch": ["torch>=1.13.1"], "torch-npu": ["torch==2.1.0", "torch-npu==2.1.0.post3", "decorator"], "metrics": ["nltk", "jieba", "rouge-chinese"], - "deepspeed": ["deepspeed>=0.10.0,<=0.14.0"], + "deepspeed": ["deepspeed>=0.10.0"], "bitsandbytes": ["bitsandbytes>=0.39.0"], "vllm": ["vllm>=0.4.3"], "galore": ["galore-torch"], From d2df3f2d6e9c40cfa8143b2482b6d7d58a0fb70c Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 15 Jun 2024 05:13:16 +0800 Subject: [PATCH 042/160] update readme Former-commit-id: a43d302aa79cbfb9b0606e855b4c1af6865d8e68 --- README.md | 2 +- README_zh.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 5dd10d5a..cae79694 100644 --- a/README.md +++ b/README.md @@ -463,7 +463,7 @@ docker-compose exec llamafactory bash ### Deploy with OpenAI-style API and vLLM ```bash -CUDA_VISIBLE_DEVICES=0,1 API_PORT=8000 llamafactory-cli api examples/inference/llama3_vllm.yaml +API_PORT=8000 llamafactory-cli api examples/inference/llama3_vllm.yaml ``` > [!TIP] diff --git a/README_zh.md b/README_zh.md index 76bd2d89..af3ff8f0 100644 --- a/README_zh.md +++ b/README_zh.md @@ -463,7 +463,7 @@ docker-compose exec llamafactory bash ### 利用 vLLM 部署 OpenAI API ```bash -CUDA_VISIBLE_DEVICES=0,1 API_PORT=8000 llamafactory-cli api examples/inference/llama3_vllm.yaml +API_PORT=8000 llamafactory-cli api examples/inference/llama3_vllm.yaml ``` > [!TIP] From bb8853616685faff541a1e6cd5799fba98b8c1d6 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 15 Jun 2024 17:54:33 +0800 Subject: [PATCH 043/160] add license Former-commit-id: 69cfc98d7c81756a5ab6bf962240e393e449fef0 --- evaluation/ceval/ceval.py | 1 + evaluation/cmmlu/cmmlu.py | 1 + evaluation/mmlu/mmlu.py | 1 + scripts/cal_flops.py | 23 +++++++++-- scripts/cal_lr.py | 23 +++++++++-- scripts/cal_ppl.py | 19 ++++++++- scripts/length_cdf.py | 19 ++++++++- scripts/llama_pro.py | 23 +++++++++-- scripts/llamafy_baichuan2.py | 22 ++++++++-- scripts/llamafy_qwen.py | 21 ++++++++-- scripts/loftq_init.py | 23 +++++++++-- scripts/test_toolcall.py | 15 +++++++ setup.py | 14 +++++++ src/api.py | 14 +++++++ src/llamafactory/__init__.py | 14 +++++++ src/llamafactory/api/app.py | 14 +++++++ src/llamafactory/api/chat.py | 14 +++++++ src/llamafactory/api/common.py | 14 +++++++ src/llamafactory/api/protocol.py | 14 +++++++ src/llamafactory/chat/__init__.py | 14 +++++++ src/llamafactory/chat/base_engine.py | 14 +++++++ src/llamafactory/chat/chat_model.py | 17 ++++++++ src/llamafactory/chat/hf_engine.py | 14 +++++++ src/llamafactory/chat/vllm_engine.py | 14 +++++++ src/llamafactory/cli.py | 14 +++++++ src/llamafactory/data/__init__.py | 14 +++++++ src/llamafactory/data/aligner.py | 14 +++++++ src/llamafactory/data/collator.py | 14 +++++++ src/llamafactory/data/data_utils.py | 14 +++++++ src/llamafactory/data/formatter.py | 14 +++++++ src/llamafactory/data/loader.py | 14 +++++++ src/llamafactory/data/parser.py | 14 +++++++ src/llamafactory/data/preprocess.py | 14 +++++++ src/llamafactory/data/processors/feedback.py | 14 +++++++ src/llamafactory/data/processors/pairwise.py | 14 +++++++ src/llamafactory/data/processors/pretrain.py | 17 ++++++++ .../data/processors/processor_utils.py | 14 +++++++ .../data/processors/supervised.py | 14 +++++++ .../data/processors/unsupervised.py | 14 +++++++ src/llamafactory/data/template.py | 14 +++++++ src/llamafactory/eval/evaluator.py | 39 +++++++++++++++++- src/llamafactory/eval/template.py | 14 +++++++ src/llamafactory/extras/callbacks.py | 14 +++++++ src/llamafactory/extras/constants.py | 14 +++++++ src/llamafactory/extras/env.py | 14 +++++++ src/llamafactory/extras/logging.py | 14 +++++++ src/llamafactory/extras/misc.py | 14 +++++++ src/llamafactory/extras/packages.py | 17 ++++++++ src/llamafactory/extras/ploting.py | 14 +++++++ src/llamafactory/hparams/__init__.py | 14 +++++++ src/llamafactory/hparams/data_args.py | 17 ++++++++ src/llamafactory/hparams/evaluation_args.py | 14 +++++++ src/llamafactory/hparams/finetuning_args.py | 14 +++++++ src/llamafactory/hparams/generating_args.py | 14 +++++++ src/llamafactory/hparams/model_args.py | 17 ++++++++ src/llamafactory/hparams/parser.py | 17 ++++++++ src/llamafactory/launcher.py | 14 +++++++ src/llamafactory/model/__init__.py | 14 +++++++ src/llamafactory/model/adapter.py | 14 +++++++ src/llamafactory/model/loader.py | 14 +++++++ .../model/model_utils/attention.py | 14 +++++++ .../model/model_utils/checkpointing.py | 19 ++++++++- .../model/model_utils/embedding.py | 14 +++++++ .../model/model_utils/longlora.py | 17 ++++++++ src/llamafactory/model/model_utils/misc.py | 14 +++++++ src/llamafactory/model/model_utils/mod.py | 14 +++++++ src/llamafactory/model/model_utils/moe.py | 14 +++++++ .../model/model_utils/quantization.py | 18 ++++++++- src/llamafactory/model/model_utils/rope.py | 18 +++++++++ src/llamafactory/model/model_utils/unsloth.py | 14 +++++++ .../model/model_utils/valuehead.py | 14 +++++++ src/llamafactory/model/model_utils/visual.py | 17 ++++++++ src/llamafactory/model/patcher.py | 14 +++++++ src/llamafactory/train/dpo/__init__.py | 14 +++++++ src/llamafactory/train/dpo/trainer.py | 17 ++++++++ src/llamafactory/train/dpo/workflow.py | 17 +++++++- src/llamafactory/train/kto/__init__.py | 14 +++++++ src/llamafactory/train/kto/trainer.py | 17 ++++++++ src/llamafactory/train/kto/workflow.py | 17 ++++++++ src/llamafactory/train/ppo/__init__.py | 14 +++++++ src/llamafactory/train/ppo/ppo_utils.py | 14 +++++++ src/llamafactory/train/ppo/trainer.py | 17 ++++++++ src/llamafactory/train/ppo/workflow.py | 17 +++++++- src/llamafactory/train/pt/__init__.py | 14 +++++++ src/llamafactory/train/pt/trainer.py | 14 +++++++ src/llamafactory/train/pt/workflow.py | 17 +++++++- src/llamafactory/train/rm/__init__.py | 14 +++++++ src/llamafactory/train/rm/metric.py | 14 +++++++ src/llamafactory/train/rm/trainer.py | 40 ++++++++++++++++++- src/llamafactory/train/rm/workflow.py | 39 +++++++++++++++++- src/llamafactory/train/sft/__init__.py | 14 +++++++ src/llamafactory/train/sft/metric.py | 18 +++++++++ src/llamafactory/train/sft/trainer.py | 17 ++++++++ src/llamafactory/train/sft/workflow.py | 17 +++++++- src/llamafactory/train/trainer_utils.py | 19 +++++++++ src/llamafactory/train/tuner.py | 14 +++++++ src/llamafactory/webui/chatter.py | 14 +++++++ src/llamafactory/webui/common.py | 14 +++++++ src/llamafactory/webui/components/__init__.py | 14 +++++++ src/llamafactory/webui/components/chatbot.py | 14 +++++++ src/llamafactory/webui/components/data.py | 14 +++++++ src/llamafactory/webui/components/eval.py | 14 +++++++ src/llamafactory/webui/components/export.py | 14 +++++++ src/llamafactory/webui/components/infer.py | 14 +++++++ src/llamafactory/webui/components/top.py | 14 +++++++ src/llamafactory/webui/components/train.py | 14 +++++++ src/llamafactory/webui/css.py | 14 +++++++ src/llamafactory/webui/engine.py | 14 +++++++ src/llamafactory/webui/interface.py | 14 +++++++ src/llamafactory/webui/locales.py | 14 +++++++ src/llamafactory/webui/manager.py | 14 +++++++ src/llamafactory/webui/runner.py | 14 +++++++ src/llamafactory/webui/utils.py | 14 +++++++ src/train.py | 14 +++++++ src/webui.py | 14 +++++++ tests/data/test_supervised.py | 14 +++++++ tests/eval/test_eval_template.py | 14 +++++++ tests/model/model_utils/test_attention.py | 14 +++++++ tests/model/test_base.py | 14 +++++++ tests/model/test_freeze.py | 14 +++++++ tests/model/test_full.py | 14 +++++++ tests/model/test_lora.py | 14 +++++++ 122 files changed, 1848 insertions(+), 32 deletions(-) diff --git a/evaluation/ceval/ceval.py b/evaluation/ceval/ceval.py index 4111d6b4..48442d50 100644 --- a/evaluation/ceval/ceval.py +++ b/evaluation/ceval/ceval.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + import os import datasets diff --git a/evaluation/cmmlu/cmmlu.py b/evaluation/cmmlu/cmmlu.py index 37efb328..5ff548a4 100644 --- a/evaluation/cmmlu/cmmlu.py +++ b/evaluation/cmmlu/cmmlu.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + import os import datasets diff --git a/evaluation/mmlu/mmlu.py b/evaluation/mmlu/mmlu.py index a4530250..1065fb31 100644 --- a/evaluation/mmlu/mmlu.py +++ b/evaluation/mmlu/mmlu.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + import os import datasets diff --git a/scripts/cal_flops.py b/scripts/cal_flops.py index ac87e0ab..627b5534 100644 --- a/scripts/cal_flops.py +++ b/scripts/cal_flops.py @@ -1,7 +1,20 @@ # coding=utf-8 -# Calculates the flops of pre-trained models. -# Usage: python cal_flops.py --model_name_or_path path_to_model --batch_size 1 --seq_length 512 -# Inspired by: https://www.deepspeed.ai/tutorials/flops-profiler/ +# Copyright 2024 Microsoft Corporation and the LlamaFactory team. +# +# This code is inspired by Microsoft's DeepSpeed library. +# https://www.deepspeed.ai/tutorials/flops-profiler/ +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import fire import torch @@ -17,6 +30,10 @@ def calculate_flops( seq_length: int = 256, flash_attn: str = "auto", ): + r""" + Calculates the flops of pre-trained models. + Usage: python cal_flops.py --model_name_or_path path_to_model --batch_size 1 --seq_length 512 + """ with get_accelerator().device(0): chat_model = ChatModel(dict(model_name_or_path=model_name_or_path, template="empty", flash_attn=flash_attn)) fake_input = torch.ones((batch_size, seq_length), dtype=torch.long, device=chat_model.model.device) diff --git a/scripts/cal_lr.py b/scripts/cal_lr.py index bfa32cc9..ff21d27c 100644 --- a/scripts/cal_lr.py +++ b/scripts/cal_lr.py @@ -1,7 +1,20 @@ # coding=utf-8 -# Calculates the optimal learning rate for 7B/13B models using LLaMA's hyper-parameters. -# Usage: python cal_lr.py --model_name_or_path path_to_model --dataset alpaca_en --cutoff_len 1024 --batch_size 16 -# Inspired by: https://github.com/imoneoi/openchat/blob/master/ochat/training_deepspeed/train.py +# Copyright 2024 imoneoi and the LlamaFactory team. +# +# This code is inspired by imoneoi's OpenChat library. +# https://github.com/imoneoi/openchat/blob/3.6.0/ochat/training_deepspeed/train.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import math from typing import Literal @@ -32,6 +45,10 @@ def calculate_lr( cutoff_len: int = 1024, # i.e. maximum input length during training is_mistral: bool = False, # mistral model uses a smaller learning rate, ): + r""" + Calculates the optimal learning rate for 7B/13B models using LLaMA's hyper-parameters. + Usage: python cal_lr.py --model_name_or_path path_to_model --dataset alpaca_en --cutoff_len 1024 --batch_size 16 + """ model_args, data_args, training_args, _, _ = get_train_args( dict( stage=stage, diff --git a/scripts/cal_ppl.py b/scripts/cal_ppl.py index 387b756c..fb503629 100644 --- a/scripts/cal_ppl.py +++ b/scripts/cal_ppl.py @@ -1,6 +1,17 @@ # coding=utf-8 -# Calculates the ppl on the dataset of the pre-trained models. -# Usage: python cal_ppl.py --model_name_or_path path_to_model --save_name ppl.json +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import json from dataclasses import dataclass @@ -56,6 +67,10 @@ def cal_ppl( max_samples: Optional[int] = None, train_on_prompt: bool = False, ): + r""" + Calculates the ppl on the dataset of the pre-trained models. + Usage: python cal_ppl.py --model_name_or_path path_to_model --save_name ppl.json + """ model_args, data_args, training_args, finetuning_args, _ = get_train_args( dict( stage=stage, diff --git a/scripts/length_cdf.py b/scripts/length_cdf.py index 7739dcf0..4cdf01e6 100644 --- a/scripts/length_cdf.py +++ b/scripts/length_cdf.py @@ -1,6 +1,17 @@ # coding=utf-8 -# Calculates the distribution of the input lengths in the dataset. -# Usage: python length_cdf.py --model_name_or_path path_to_model --dataset alpaca_en --template default +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from collections import defaultdict @@ -19,6 +30,10 @@ def length_cdf( template: str = "default", interval: int = 1000, ): + r""" + Calculates the distribution of the input lengths in the dataset. + Usage: python length_cdf.py --model_name_or_path path_to_model --dataset alpaca_en --template default + """ model_args, data_args, training_args, _, _ = get_train_args( dict( stage="sft", diff --git a/scripts/llama_pro.py b/scripts/llama_pro.py index 727998ae..f315335a 100644 --- a/scripts/llama_pro.py +++ b/scripts/llama_pro.py @@ -1,7 +1,20 @@ # coding=utf-8 -# Performs block expansion for LLaMA, Mistral, Qwen1.5 or Yi models. -# Usage: python llama_pro.py --model_name_or_path meta-llama/Llama-2-7b-hf --output_dir llama2_pro --num_expand 8 -# Inspired by: https://github.com/TencentARC/LLaMA-Pro/blob/main/scripts/block_expansion.py +# Copyright 2024 Tencent Inc. and the LlamaFactory team. +# +# This code is inspired by Tencent's LLaMA-Pro library. +# https://github.com/TencentARC/LLaMA-Pro/blob/main/scripts/block_expansion.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import json import os @@ -37,6 +50,10 @@ def block_expansion( shard_size: Optional[str] = "2GB", save_safetensors: Optional[bool] = False, ): + r""" + Performs block expansion for LLaMA, Mistral, Qwen1.5 or Yi models. + Usage: python llama_pro.py --model_name_or_path meta-llama/Llama-2-7b-hf --output_dir llama2_pro --num_expand 8 + """ config: "PretrainedConfig" = AutoConfig.from_pretrained(model_name_or_path) num_layers = getattr(config, "num_hidden_layers") setattr(config, "num_hidden_layers", num_layers + num_expand) diff --git a/scripts/llamafy_baichuan2.py b/scripts/llamafy_baichuan2.py index 1ae58879..19284f5f 100644 --- a/scripts/llamafy_baichuan2.py +++ b/scripts/llamafy_baichuan2.py @@ -1,8 +1,17 @@ # coding=utf-8 -# Converts the Baichuan2-7B model in the same format as LLaMA2-7B. -# Usage: python llamafy_baichuan2.py --input_dir input --output_dir output -# Inspired by: https://huggingface.co/fireballoon/baichuan-llama-7b/blob/main/convert_baichuan_to_llama.py -# Converted model: https://huggingface.co/hiyouga/Baichuan2-7B-Base-LLaMAfied +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import json import os @@ -79,6 +88,11 @@ def save_config(input_dir: str, output_dir: str): def llamafy_baichuan2( input_dir: str, output_dir: str, shard_size: Optional[str] = "2GB", save_safetensors: Optional[bool] = False ): + r""" + Converts the Baichuan2-7B model in the same format as LLaMA2-7B. + Usage: python llamafy_baichuan2.py --input_dir input --output_dir output + Converted model: https://huggingface.co/hiyouga/Baichuan2-7B-Base-LLaMAfied + """ try: os.makedirs(output_dir, exist_ok=False) except Exception as e: diff --git a/scripts/llamafy_qwen.py b/scripts/llamafy_qwen.py index 69cf3e8e..e5b59483 100644 --- a/scripts/llamafy_qwen.py +++ b/scripts/llamafy_qwen.py @@ -1,7 +1,17 @@ # coding=utf-8 -# Converts the Qwen models in the same format as LLaMA2. -# Usage: python llamafy_qwen.py --input_dir input --output_dir output -# Converted model: https://huggingface.co/hiyouga/Qwen-14B-Chat-LLaMAfied +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import json import os @@ -131,6 +141,11 @@ def save_config(input_dir: str, output_dir: str, torch_dtype: str): def llamafy_qwen( input_dir: str, output_dir: str, shard_size: Optional[str] = "2GB", save_safetensors: Optional[bool] = False ): + r""" + Converts the Qwen models in the same format as LLaMA2. + Usage: python llamafy_qwen.py --input_dir input --output_dir output + Converted model: https://huggingface.co/hiyouga/Qwen-14B-Chat-LLaMAfied + """ try: os.makedirs(output_dir, exist_ok=False) except Exception as e: diff --git a/scripts/loftq_init.py b/scripts/loftq_init.py index 7f244316..159dea06 100644 --- a/scripts/loftq_init.py +++ b/scripts/loftq_init.py @@ -1,7 +1,20 @@ # coding=utf-8 -# Initializes LoRA weights with LoRA-fine-tuning-aware Quantization (LoftQ) -# Usage: python loftq_init.py --model_name_or_path path_to_model --save_dir output_dir -# Inspired by: https://github.com/huggingface/peft/blob/main/examples/loftq_finetuning/quantize_save_load.py +# Copyright 2024 HuggingFace Inc. and the LlamaFactory team. +# +# This code is inspired by HuggingFace's PEFT library. +# https://github.com/huggingface/peft/blob/v0.10.0/examples/loftq_finetuning/quantize_save_load.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import os from typing import TYPE_CHECKING, Optional @@ -49,6 +62,10 @@ def quantize_loftq( lora_target: Optional[str] = "q_proj,v_proj", save_safetensors: Optional[bool] = False, ): + r""" + Initializes LoRA weights with LoRA-fine-tuning-aware Quantization (LoftQ) + Usage: python loftq_init.py --model_name_or_path path_to_model --save_dir output_dir + """ tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained(model_name_or_path, trust_remote_code=True, torch_dtype="auto") loftq_config = LoftQConfig(loftq_bits=loftq_bits, loftq_iter=loftq_iter) diff --git a/scripts/test_toolcall.py b/scripts/test_toolcall.py index 7e460017..6f6fd06c 100644 --- a/scripts/test_toolcall.py +++ b/scripts/test_toolcall.py @@ -1,3 +1,18 @@ +# coding=utf-8 +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import json import os from typing import Sequence diff --git a/setup.py b/setup.py index 1756551e..3d2ac921 100644 --- a/setup.py +++ b/setup.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os import re diff --git a/src/api.py b/src/api.py index 3655e393..0f925497 100644 --- a/src/api.py +++ b/src/api.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os import uvicorn diff --git a/src/llamafactory/__init__.py b/src/llamafactory/__init__.py index 78230937..9d732777 100644 --- a/src/llamafactory/__init__.py +++ b/src/llamafactory/__init__.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # Level: api, webui > chat, eval, train > data, model > hparams > extras from .cli import VERSION diff --git a/src/llamafactory/api/app.py b/src/llamafactory/api/app.py index 21edab2f..c1264617 100644 --- a/src/llamafactory/api/app.py +++ b/src/llamafactory/api/app.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os from contextlib import asynccontextmanager from typing import Optional diff --git a/src/llamafactory/api/chat.py b/src/llamafactory/api/chat.py index 98957bc1..a2074dbb 100644 --- a/src/llamafactory/api/chat.py +++ b/src/llamafactory/api/chat.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import base64 import io import json diff --git a/src/llamafactory/api/common.py b/src/llamafactory/api/common.py index 5ad9a071..d1ac94de 100644 --- a/src/llamafactory/api/common.py +++ b/src/llamafactory/api/common.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import json from typing import TYPE_CHECKING, Any, Dict diff --git a/src/llamafactory/api/protocol.py b/src/llamafactory/api/protocol.py index 055fa781..a69132ea 100644 --- a/src/llamafactory/api/protocol.py +++ b/src/llamafactory/api/protocol.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import time from enum import Enum, unique from typing import Any, Dict, List, Optional, Union diff --git a/src/llamafactory/chat/__init__.py b/src/llamafactory/chat/__init__.py index a1a79de6..07276d48 100644 --- a/src/llamafactory/chat/__init__.py +++ b/src/llamafactory/chat/__init__.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .base_engine import BaseEngine from .chat_model import ChatModel diff --git a/src/llamafactory/chat/base_engine.py b/src/llamafactory/chat/base_engine.py index 65b6c59c..92a51ebe 100644 --- a/src/llamafactory/chat/base_engine.py +++ b/src/llamafactory/chat/base_engine.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from abc import ABC, abstractmethod from dataclasses import dataclass from typing import TYPE_CHECKING, Any, AsyncGenerator, Dict, List, Literal, Optional, Sequence, Union diff --git a/src/llamafactory/chat/chat_model.py b/src/llamafactory/chat/chat_model.py index 281ef0c1..fb800106 100644 --- a/src/llamafactory/chat/chat_model.py +++ b/src/llamafactory/chat/chat_model.py @@ -1,3 +1,20 @@ +# Copyright 2024 THUDM and the LlamaFactory team. +# +# This code is inspired by the THUDM's ChatGLM implementation. +# https://github.com/THUDM/ChatGLM-6B/blob/main/cli_demo.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import asyncio from threading import Thread from typing import TYPE_CHECKING, Any, AsyncGenerator, Dict, Generator, List, Optional, Sequence diff --git a/src/llamafactory/chat/hf_engine.py b/src/llamafactory/chat/hf_engine.py index 28e6a409..a7ff7015 100644 --- a/src/llamafactory/chat/hf_engine.py +++ b/src/llamafactory/chat/hf_engine.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import asyncio import concurrent.futures import os diff --git a/src/llamafactory/chat/vllm_engine.py b/src/llamafactory/chat/vllm_engine.py index f0812a99..d488a039 100644 --- a/src/llamafactory/chat/vllm_engine.py +++ b/src/llamafactory/chat/vllm_engine.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import uuid from typing import TYPE_CHECKING, AsyncGenerator, AsyncIterator, Dict, List, Optional, Sequence, Union diff --git a/src/llamafactory/cli.py b/src/llamafactory/cli.py index 5042e53c..c7f136b3 100644 --- a/src/llamafactory/cli.py +++ b/src/llamafactory/cli.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os import random import subprocess diff --git a/src/llamafactory/data/__init__.py b/src/llamafactory/data/__init__.py index b08691d3..307853bc 100644 --- a/src/llamafactory/data/__init__.py +++ b/src/llamafactory/data/__init__.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .collator import KTODataCollatorWithPadding, PairwiseDataCollatorWithPadding from .data_utils import Role, split_dataset from .loader import get_dataset diff --git a/src/llamafactory/data/aligner.py b/src/llamafactory/data/aligner.py index 3e9d5c46..299bdca3 100644 --- a/src/llamafactory/data/aligner.py +++ b/src/llamafactory/data/aligner.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os from functools import partial from typing import TYPE_CHECKING, Any, Dict, List, Union diff --git a/src/llamafactory/data/collator.py b/src/llamafactory/data/collator.py index 1dc8dd8d..e4859ff5 100644 --- a/src/llamafactory/data/collator.py +++ b/src/llamafactory/data/collator.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from dataclasses import dataclass from typing import Any, Dict, Sequence diff --git a/src/llamafactory/data/data_utils.py b/src/llamafactory/data/data_utils.py index 9b313112..cc9761b1 100644 --- a/src/llamafactory/data/data_utils.py +++ b/src/llamafactory/data/data_utils.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from enum import Enum, unique from typing import TYPE_CHECKING, Dict, List, Tuple, Union diff --git a/src/llamafactory/data/formatter.py b/src/llamafactory/data/formatter.py index 0cd3d6c1..590e682b 100644 --- a/src/llamafactory/data/formatter.py +++ b/src/llamafactory/data/formatter.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import json import re from abc import ABC, abstractmethod diff --git a/src/llamafactory/data/loader.py b/src/llamafactory/data/loader.py index ba426f81..f44ef5de 100644 --- a/src/llamafactory/data/loader.py +++ b/src/llamafactory/data/loader.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import inspect import os import sys diff --git a/src/llamafactory/data/parser.py b/src/llamafactory/data/parser.py index ec97bfc1..4bebcd68 100644 --- a/src/llamafactory/data/parser.py +++ b/src/llamafactory/data/parser.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import json import os from dataclasses import dataclass diff --git a/src/llamafactory/data/preprocess.py b/src/llamafactory/data/preprocess.py index 875f55d6..9a8b97f3 100644 --- a/src/llamafactory/data/preprocess.py +++ b/src/llamafactory/data/preprocess.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from functools import partial from typing import TYPE_CHECKING, Callable, Literal, Optional, Tuple diff --git a/src/llamafactory/data/processors/feedback.py b/src/llamafactory/data/processors/feedback.py index 5fba452c..219ab353 100644 --- a/src/llamafactory/data/processors/feedback.py +++ b/src/llamafactory/data/processors/feedback.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Tuple from ...extras.constants import IGNORE_INDEX diff --git a/src/llamafactory/data/processors/pairwise.py b/src/llamafactory/data/processors/pairwise.py index db52c6a7..b2939348 100644 --- a/src/llamafactory/data/processors/pairwise.py +++ b/src/llamafactory/data/processors/pairwise.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Tuple from ...extras.constants import IGNORE_INDEX diff --git a/src/llamafactory/data/processors/pretrain.py b/src/llamafactory/data/processors/pretrain.py index a10ccabd..fb4c840c 100644 --- a/src/llamafactory/data/processors/pretrain.py +++ b/src/llamafactory/data/processors/pretrain.py @@ -1,3 +1,20 @@ +# Copyright 2024 HuggingFace Inc. and the LlamaFactory team. +# +# This code is inspired by HuggingFace's transformers library. +# https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/language-modeling/run_clm.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from itertools import chain from typing import TYPE_CHECKING, Any, Dict, List diff --git a/src/llamafactory/data/processors/processor_utils.py b/src/llamafactory/data/processors/processor_utils.py index 9903a053..93df0cd5 100644 --- a/src/llamafactory/data/processors/processor_utils.py +++ b/src/llamafactory/data/processors/processor_utils.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import bisect from typing import TYPE_CHECKING, List, Sequence diff --git a/src/llamafactory/data/processors/supervised.py b/src/llamafactory/data/processors/supervised.py index f59f5371..eb5ffb1a 100644 --- a/src/llamafactory/data/processors/supervised.py +++ b/src/llamafactory/data/processors/supervised.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from collections import defaultdict from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Tuple diff --git a/src/llamafactory/data/processors/unsupervised.py b/src/llamafactory/data/processors/unsupervised.py index 38497a15..75ad4d51 100644 --- a/src/llamafactory/data/processors/unsupervised.py +++ b/src/llamafactory/data/processors/unsupervised.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Tuple from ...extras.logging import get_logger diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py index b600c567..786c679f 100644 --- a/src/llamafactory/data/template.py +++ b/src/llamafactory/data/template.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from dataclasses import dataclass from typing import TYPE_CHECKING, Dict, List, Optional, Sequence, Tuple, Union diff --git a/src/llamafactory/eval/evaluator.py b/src/llamafactory/eval/evaluator.py index 5c6fb104..bbd7a44b 100644 --- a/src/llamafactory/eval/evaluator.py +++ b/src/llamafactory/eval/evaluator.py @@ -1,4 +1,41 @@ -# Inspired by: https://github.com/hendrycks/test/blob/master/evaluate_flan.py +# Copyright 2024 the LlamaFactory team. +# +# This code is inspired by Dan's test library. +# https://github.com/hendrycks/test/blob/master/evaluate_flan.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# MIT License +# +# Copyright (c) 2020 Dan Hendrycks +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. import inspect import json diff --git a/src/llamafactory/eval/template.py b/src/llamafactory/eval/template.py index 2cbb5aaf..7d524e7c 100644 --- a/src/llamafactory/eval/template.py +++ b/src/llamafactory/eval/template.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from dataclasses import dataclass from typing import Dict, List, Sequence, Tuple diff --git a/src/llamafactory/extras/callbacks.py b/src/llamafactory/extras/callbacks.py index 441ebbfd..0dff6a69 100644 --- a/src/llamafactory/extras/callbacks.py +++ b/src/llamafactory/extras/callbacks.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import json import logging import os diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py index 7d96fb5f..e31e7419 100644 --- a/src/llamafactory/extras/constants.py +++ b/src/llamafactory/extras/constants.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from collections import OrderedDict, defaultdict from enum import Enum from typing import Dict, Optional diff --git a/src/llamafactory/extras/env.py b/src/llamafactory/extras/env.py index a8cb799d..586c24c0 100644 --- a/src/llamafactory/extras/env.py +++ b/src/llamafactory/extras/env.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import platform import accelerate diff --git a/src/llamafactory/extras/logging.py b/src/llamafactory/extras/logging.py index 430b8a48..67622212 100644 --- a/src/llamafactory/extras/logging.py +++ b/src/llamafactory/extras/logging.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import logging import os import sys diff --git a/src/llamafactory/extras/misc.py b/src/llamafactory/extras/misc.py index fc33f77e..3d969df1 100644 --- a/src/llamafactory/extras/misc.py +++ b/src/llamafactory/extras/misc.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import gc import os from typing import TYPE_CHECKING, Dict, Tuple diff --git a/src/llamafactory/extras/packages.py b/src/llamafactory/extras/packages.py index 0746bb4f..35f546ab 100644 --- a/src/llamafactory/extras/packages.py +++ b/src/llamafactory/extras/packages.py @@ -1,3 +1,20 @@ +# Copyright 2024 HuggingFace Inc. and the LlamaFactory team. +# +# This code is inspired by HuggingFace's transformers library. +# https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/utils/import_utils.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import importlib.metadata import importlib.util from functools import lru_cache diff --git a/src/llamafactory/extras/ploting.py b/src/llamafactory/extras/ploting.py index dea23bbe..596d55e7 100644 --- a/src/llamafactory/extras/ploting.py +++ b/src/llamafactory/extras/ploting.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import json import math import os diff --git a/src/llamafactory/hparams/__init__.py b/src/llamafactory/hparams/__init__.py index d1ee98dd..cfe448c1 100644 --- a/src/llamafactory/hparams/__init__.py +++ b/src/llamafactory/hparams/__init__.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .data_args import DataArguments from .evaluation_args import EvaluationArguments from .finetuning_args import FinetuningArguments diff --git a/src/llamafactory/hparams/data_args.py b/src/llamafactory/hparams/data_args.py index 1e0cd08c..95284766 100644 --- a/src/llamafactory/hparams/data_args.py +++ b/src/llamafactory/hparams/data_args.py @@ -1,3 +1,20 @@ +# Copyright 2024 HuggingFace Inc. and the LlamaFactory team. +# +# This code is inspired by HuggingFace's transformers library. +# https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/language-modeling/run_clm.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from dataclasses import dataclass, field from typing import Literal, Optional diff --git a/src/llamafactory/hparams/evaluation_args.py b/src/llamafactory/hparams/evaluation_args.py index 5a05f6f6..a7f221ca 100644 --- a/src/llamafactory/hparams/evaluation_args.py +++ b/src/llamafactory/hparams/evaluation_args.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os from dataclasses import dataclass, field from typing import Literal, Optional diff --git a/src/llamafactory/hparams/finetuning_args.py b/src/llamafactory/hparams/finetuning_args.py index facbe792..52dc299e 100644 --- a/src/llamafactory/hparams/finetuning_args.py +++ b/src/llamafactory/hparams/finetuning_args.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from dataclasses import dataclass, field from typing import List, Literal, Optional diff --git a/src/llamafactory/hparams/generating_args.py b/src/llamafactory/hparams/generating_args.py index 0ee17d1a..7ebb4eed 100644 --- a/src/llamafactory/hparams/generating_args.py +++ b/src/llamafactory/hparams/generating_args.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from dataclasses import asdict, dataclass, field from typing import Any, Dict, Optional diff --git a/src/llamafactory/hparams/model_args.py b/src/llamafactory/hparams/model_args.py index bbac2e4b..0a91f0fa 100644 --- a/src/llamafactory/hparams/model_args.py +++ b/src/llamafactory/hparams/model_args.py @@ -1,3 +1,20 @@ +# Copyright 2024 HuggingFace Inc. and the LlamaFactory team. +# +# This code is inspired by HuggingFace's transformers library. +# https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/language-modeling/run_clm.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from dataclasses import asdict, dataclass, field from typing import TYPE_CHECKING, Any, Dict, Literal, Optional, Union diff --git a/src/llamafactory/hparams/parser.py b/src/llamafactory/hparams/parser.py index 3476a726..1c57567c 100644 --- a/src/llamafactory/hparams/parser.py +++ b/src/llamafactory/hparams/parser.py @@ -1,3 +1,20 @@ +# Copyright 2024 HuggingFace Inc. and the LlamaFactory team. +# +# This code is inspired by HuggingFace's transformers library. +# https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/language-modeling/run_clm.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import logging import os import sys diff --git a/src/llamafactory/launcher.py b/src/llamafactory/launcher.py index de154db9..65e0b68f 100644 --- a/src/llamafactory/launcher.py +++ b/src/llamafactory/launcher.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from llamafactory.train.tuner import run_exp diff --git a/src/llamafactory/model/__init__.py b/src/llamafactory/model/__init__.py index 9d23d59f..4abbaa1b 100644 --- a/src/llamafactory/model/__init__.py +++ b/src/llamafactory/model/__init__.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .loader import load_config, load_model, load_tokenizer from .model_utils.misc import find_all_linear_modules from .model_utils.valuehead import load_valuehead_params diff --git a/src/llamafactory/model/adapter.py b/src/llamafactory/model/adapter.py index c37f6009..dfa71525 100644 --- a/src/llamafactory/model/adapter.py +++ b/src/llamafactory/model/adapter.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import re from typing import TYPE_CHECKING diff --git a/src/llamafactory/model/loader.py b/src/llamafactory/model/loader.py index 697a04e7..69cccd93 100644 --- a/src/llamafactory/model/loader.py +++ b/src/llamafactory/model/loader.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import TYPE_CHECKING, Any, Dict, Optional, TypedDict from transformers import AutoConfig, AutoModelForCausalLM, AutoModelForVision2Seq, AutoProcessor, AutoTokenizer diff --git a/src/llamafactory/model/model_utils/attention.py b/src/llamafactory/model/model_utils/attention.py index 2bd36fdc..8ff3807b 100644 --- a/src/llamafactory/model/model_utils/attention.py +++ b/src/llamafactory/model/model_utils/attention.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import TYPE_CHECKING from transformers.utils import is_flash_attn_2_available, is_torch_sdpa_available diff --git a/src/llamafactory/model/model_utils/checkpointing.py b/src/llamafactory/model/model_utils/checkpointing.py index e0657be8..e4e84b12 100644 --- a/src/llamafactory/model/model_utils/checkpointing.py +++ b/src/llamafactory/model/model_utils/checkpointing.py @@ -1,3 +1,21 @@ +# Copyright 2024 HuggingFace Inc. and the LlamaFactory team. +# +# This code is inspired by HuggingFace's Transformers and PEFT library. +# https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/modeling_utils.py +# https://github.com/huggingface/peft/blob/v0.10.0/src/peft/utils/other.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import inspect from functools import partial from types import MethodType @@ -68,7 +86,6 @@ def prepare_model_for_training( (1) cast the layernorm in fp32 (2) make output embedding layer require grads (3) add the upcasting of the lm_head in fp32 - Inspired by: https://github.com/huggingface/peft/blob/v0.7.1/src/peft/utils/other.py#L72 """ if model_args.upcast_layernorm: logger.info("Upcasting layernorm weights in float32.") diff --git a/src/llamafactory/model/model_utils/embedding.py b/src/llamafactory/model/model_utils/embedding.py index 3d9278e3..3ff79828 100644 --- a/src/llamafactory/model/model_utils/embedding.py +++ b/src/llamafactory/model/model_utils/embedding.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import math from contextlib import nullcontext from typing import TYPE_CHECKING diff --git a/src/llamafactory/model/model_utils/longlora.py b/src/llamafactory/model/model_utils/longlora.py index 4a8c562a..7af43dcf 100644 --- a/src/llamafactory/model/model_utils/longlora.py +++ b/src/llamafactory/model/model_utils/longlora.py @@ -1,3 +1,20 @@ +# Copyright 2024 EleutherAI, HuggingFace Inc., and the LlamaFactory team. +# +# This code is based on the EleutherAI's GPT-NeoX and HuggingFace's Transformers libraries. +# https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/models/llama/modeling_llama.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import math from typing import TYPE_CHECKING, Optional, Tuple diff --git a/src/llamafactory/model/model_utils/misc.py b/src/llamafactory/model/model_utils/misc.py index 4851bd29..a2812228 100644 --- a/src/llamafactory/model/model_utils/misc.py +++ b/src/llamafactory/model/model_utils/misc.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import TYPE_CHECKING, List from ...extras.logging import get_logger diff --git a/src/llamafactory/model/model_utils/mod.py b/src/llamafactory/model/model_utils/mod.py index 5708a1a8..ec73af00 100644 --- a/src/llamafactory/model/model_utils/mod.py +++ b/src/llamafactory/model/model_utils/mod.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import TYPE_CHECKING from ...extras.constants import MOD_SUPPORTED_MODELS diff --git a/src/llamafactory/model/model_utils/moe.py b/src/llamafactory/model/model_utils/moe.py index 8a73c844..5c7473aa 100644 --- a/src/llamafactory/model/model_utils/moe.py +++ b/src/llamafactory/model/model_utils/moe.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import TYPE_CHECKING, Sequence import torch diff --git a/src/llamafactory/model/model_utils/quantization.py b/src/llamafactory/model/model_utils/quantization.py index 02a54f07..9e6b9da4 100644 --- a/src/llamafactory/model/model_utils/quantization.py +++ b/src/llamafactory/model/model_utils/quantization.py @@ -1,3 +1,20 @@ +# Copyright 2024 HuggingFace Inc. and the LlamaFactory team. +# +# This code is inspired by HuggingFace's Optimum library. +# https://github.com/huggingface/optimum/blob/v1.20.0/optimum/gptq/data.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os import random from enum import Enum, unique @@ -41,7 +58,6 @@ class QuantizationMethod(str, Enum): def _get_quantization_dataset(tokenizer: "PreTrainedTokenizer", model_args: "ModelArguments") -> List[str]: r""" - Inspired by: https://github.com/huggingface/optimum/blob/v1.16.0/optimum/gptq/data.py#L133 TODO: remove tokenizer.decode() https://github.com/huggingface/optimum/pull/1600 """ if os.path.isfile(model_args.export_quantization_dataset): diff --git a/src/llamafactory/model/model_utils/rope.py b/src/llamafactory/model/model_utils/rope.py index 93ab8929..88303c4d 100644 --- a/src/llamafactory/model/model_utils/rope.py +++ b/src/llamafactory/model/model_utils/rope.py @@ -1,3 +1,21 @@ +# Copyright 2024 LMSYS and the LlamaFactory team. +# Copyright 2023 Rohan Taori, Ishaan Gulrajani, Tianyi Zhang, Yann Dubois, Xuechen Li +# +# This code is inspired by the LMSYS's FastChat library. +# https://github.com/lm-sys/FastChat/blob/v0.2.30/fastchat/train/train.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import math from typing import TYPE_CHECKING diff --git a/src/llamafactory/model/model_utils/unsloth.py b/src/llamafactory/model/model_utils/unsloth.py index 8a16409d..9cfaec61 100644 --- a/src/llamafactory/model/model_utils/unsloth.py +++ b/src/llamafactory/model/model_utils/unsloth.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import TYPE_CHECKING, Any, Dict, Optional from ...extras.logging import get_logger diff --git a/src/llamafactory/model/model_utils/valuehead.py b/src/llamafactory/model/model_utils/valuehead.py index 64333688..9ab3d45a 100644 --- a/src/llamafactory/model/model_utils/valuehead.py +++ b/src/llamafactory/model/model_utils/valuehead.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import TYPE_CHECKING, Dict import torch diff --git a/src/llamafactory/model/model_utils/visual.py b/src/llamafactory/model/model_utils/visual.py index c8260b7f..37237485 100644 --- a/src/llamafactory/model/model_utils/visual.py +++ b/src/llamafactory/model/model_utils/visual.py @@ -1,3 +1,20 @@ +# Copyright 2024 HuggingFace Inc. and the LlamaFactory team. +# +# This code is inspired by HuggingFace's TRL library. +# https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/models/llava/modeling_llava.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import TYPE_CHECKING, Tuple import torch diff --git a/src/llamafactory/model/patcher.py b/src/llamafactory/model/patcher.py index b97ff433..053516e4 100644 --- a/src/llamafactory/model/patcher.py +++ b/src/llamafactory/model/patcher.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os from types import MethodType from typing import TYPE_CHECKING, Any, Dict diff --git a/src/llamafactory/train/dpo/__init__.py b/src/llamafactory/train/dpo/__init__.py index 43fe9420..9ce0d089 100644 --- a/src/llamafactory/train/dpo/__init__.py +++ b/src/llamafactory/train/dpo/__init__.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .workflow import run_dpo diff --git a/src/llamafactory/train/dpo/trainer.py b/src/llamafactory/train/dpo/trainer.py index 5bdb9c43..475d08c3 100644 --- a/src/llamafactory/train/dpo/trainer.py +++ b/src/llamafactory/train/dpo/trainer.py @@ -1,3 +1,20 @@ +# Copyright 2024 HuggingFace Inc. and the LlamaFactory team. +# +# This code is inspired by HuggingFace's TRL library. +# https://github.com/huggingface/trl/blob/v0.8.0/trl/trainer/dpo_trainer.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import warnings from collections import defaultdict from contextlib import nullcontext diff --git a/src/llamafactory/train/dpo/workflow.py b/src/llamafactory/train/dpo/workflow.py index 992985b0..8c3c2eb1 100644 --- a/src/llamafactory/train/dpo/workflow.py +++ b/src/llamafactory/train/dpo/workflow.py @@ -1,4 +1,19 @@ -# Inspired by: https://github.com/huggingface/trl/blob/main/examples/research_projects/stack_llama_2/scripts/dpo_llama2.py +# Copyright 2024 HuggingFace Inc. and the LlamaFactory team. +# +# This code is inspired by HuggingFace's TRL library. +# https://github.com/huggingface/trl/blob/v0.8.0/examples/scripts/dpo.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from typing import TYPE_CHECKING, List, Optional diff --git a/src/llamafactory/train/kto/__init__.py b/src/llamafactory/train/kto/__init__.py index 34c7905a..a1900368 100644 --- a/src/llamafactory/train/kto/__init__.py +++ b/src/llamafactory/train/kto/__init__.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .workflow import run_kto diff --git a/src/llamafactory/train/kto/trainer.py b/src/llamafactory/train/kto/trainer.py index 3b4488fc..6e96fc0c 100644 --- a/src/llamafactory/train/kto/trainer.py +++ b/src/llamafactory/train/kto/trainer.py @@ -1,3 +1,20 @@ +# Copyright 2024 HuggingFace Inc. and the LlamaFactory team. +# +# This code is inspired by HuggingFace's TRL library. +# https://github.com/huggingface/trl/blob/v0.8.0/trl/trainer/kto_trainer.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import warnings from collections import defaultdict from contextlib import nullcontext diff --git a/src/llamafactory/train/kto/workflow.py b/src/llamafactory/train/kto/workflow.py index c79b160b..8a7af6d4 100644 --- a/src/llamafactory/train/kto/workflow.py +++ b/src/llamafactory/train/kto/workflow.py @@ -1,3 +1,20 @@ +# Copyright 2024 HuggingFace Inc. and the LlamaFactory team. +# +# This code is inspired by HuggingFace's TRL library. +# https://github.com/huggingface/trl/blob/v0.8.0/examples/scripts/kto.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import TYPE_CHECKING, List, Optional from ...data import KTODataCollatorWithPadding, get_dataset, split_dataset diff --git a/src/llamafactory/train/ppo/__init__.py b/src/llamafactory/train/ppo/__init__.py index d17336d5..161f6f5d 100644 --- a/src/llamafactory/train/ppo/__init__.py +++ b/src/llamafactory/train/ppo/__init__.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .workflow import run_ppo diff --git a/src/llamafactory/train/ppo/ppo_utils.py b/src/llamafactory/train/ppo/ppo_utils.py index fec3fc1e..05c40946 100644 --- a/src/llamafactory/train/ppo/ppo_utils.py +++ b/src/llamafactory/train/ppo/ppo_utils.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import json from contextlib import nullcontext from typing import TYPE_CHECKING, Dict, List, Literal, Optional diff --git a/src/llamafactory/train/ppo/trainer.py b/src/llamafactory/train/ppo/trainer.py index 45f47455..61420f3b 100644 --- a/src/llamafactory/train/ppo/trainer.py +++ b/src/llamafactory/train/ppo/trainer.py @@ -1,3 +1,20 @@ +# Copyright 2024 HuggingFace Inc. and the LlamaFactory team. +# +# This code is inspired by HuggingFace's TRL library. +# https://github.com/huggingface/trl/blob/v0.8.0/trl/trainer/ppo_trainer.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import math import os import sys diff --git a/src/llamafactory/train/ppo/workflow.py b/src/llamafactory/train/ppo/workflow.py index 111704c6..891d539a 100644 --- a/src/llamafactory/train/ppo/workflow.py +++ b/src/llamafactory/train/ppo/workflow.py @@ -1,4 +1,19 @@ -# Inspired by: https://github.com/lvwerra/trl/blob/main/examples/research_projects/stack_llama/scripts/rl_training.py +# Copyright 2024 HuggingFace Inc. and the LlamaFactory team. +# +# This code is inspired by HuggingFace's TRL library. +# https://github.com/huggingface/trl/blob/v0.8.0/examples/scripts/ppo.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from typing import TYPE_CHECKING, List, Optional diff --git a/src/llamafactory/train/pt/__init__.py b/src/llamafactory/train/pt/__init__.py index bdf397f6..d80e6f22 100644 --- a/src/llamafactory/train/pt/__init__.py +++ b/src/llamafactory/train/pt/__init__.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .workflow import run_pt diff --git a/src/llamafactory/train/pt/trainer.py b/src/llamafactory/train/pt/trainer.py index 1d96e82f..09729f2e 100644 --- a/src/llamafactory/train/pt/trainer.py +++ b/src/llamafactory/train/pt/trainer.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from types import MethodType from typing import TYPE_CHECKING, Dict, Optional diff --git a/src/llamafactory/train/pt/workflow.py b/src/llamafactory/train/pt/workflow.py index 8a635567..f1df314e 100644 --- a/src/llamafactory/train/pt/workflow.py +++ b/src/llamafactory/train/pt/workflow.py @@ -1,4 +1,19 @@ -# Inspired by: https://github.com/huggingface/transformers/blob/v4.34.1/examples/pytorch/language-modeling/run_clm.py +# Copyright 2024 HuggingFace Inc. and the LlamaFactory team. +# +# This code is inspired by HuggingFace's transformers library. +# https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/language-modeling/run_clm.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import math from typing import TYPE_CHECKING, List, Optional diff --git a/src/llamafactory/train/rm/__init__.py b/src/llamafactory/train/rm/__init__.py index dedac35f..48278315 100644 --- a/src/llamafactory/train/rm/__init__.py +++ b/src/llamafactory/train/rm/__init__.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .workflow import run_rm diff --git a/src/llamafactory/train/rm/metric.py b/src/llamafactory/train/rm/metric.py index 99dc6ab8..fb880b1c 100644 --- a/src/llamafactory/train/rm/metric.py +++ b/src/llamafactory/train/rm/metric.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import Dict, Sequence, Tuple, Union import numpy as np diff --git a/src/llamafactory/train/rm/trainer.py b/src/llamafactory/train/rm/trainer.py index bfb344dc..14695d7d 100644 --- a/src/llamafactory/train/rm/trainer.py +++ b/src/llamafactory/train/rm/trainer.py @@ -1,3 +1,42 @@ +# Copyright 2024 the LlamaFactory team. +# +# This code is inspired by CarperAI's trlx library. +# https://github.com/CarperAI/trlx/blob/v0.7.0/examples/summarize_rlhf/reward_model/reward_model.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# MIT License +# +# Copyright (c) 2022 CarperAI +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + import json import os from types import MethodType @@ -79,7 +118,6 @@ class PairwiseTrainer(Trainer): chosen_scores, rejected_scores = [], [] # Compute pairwise loss. Only backprop on the different tokens before padding - # Inspired by: https://github.com/CarperAI/trlx/blob/main/examples/summarize_rlhf/reward_model/reward_model.py loss = 0 for i in range(batch_size): chosen_length = (chosen_input_ids[i] != self.tokenizer.pad_token_id).nonzero()[-1] + 1 diff --git a/src/llamafactory/train/rm/workflow.py b/src/llamafactory/train/rm/workflow.py index 2e9e194b..75c0a2bf 100644 --- a/src/llamafactory/train/rm/workflow.py +++ b/src/llamafactory/train/rm/workflow.py @@ -1,4 +1,41 @@ -# Inspired by: https://github.com/CarperAI/trlx/blob/main/examples/summarize_rlhf/reward_model/train_reward_model_gptj.py +# Copyright 2024 the LlamaFactory team. +# +# This code is inspired by CarperAI's trlx library. +# https://github.com/CarperAI/trlx/blob/v0.7.0/examples/summarize_rlhf/reward_model/train_reward_model_gptj.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# MIT License +# +# Copyright (c) 2022 CarperAI +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. from typing import TYPE_CHECKING, List, Optional diff --git a/src/llamafactory/train/sft/__init__.py b/src/llamafactory/train/sft/__init__.py index f2f84e78..475dfe5f 100644 --- a/src/llamafactory/train/sft/__init__.py +++ b/src/llamafactory/train/sft/__init__.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .workflow import run_sft diff --git a/src/llamafactory/train/sft/metric.py b/src/llamafactory/train/sft/metric.py index 923238d6..d2147c22 100644 --- a/src/llamafactory/train/sft/metric.py +++ b/src/llamafactory/train/sft/metric.py @@ -1,3 +1,21 @@ +# Copyright 2024 HuggingFace Inc., THUDM, and the LlamaFactory team. +# +# This code is inspired by HuggingFace's transformers library and THUDM's ChatGLM implementation. +# https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/summarization/run_summarization.py +# https://github.com/THUDM/ChatGLM-6B/blob/main/ptuning/main.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from dataclasses import dataclass from typing import TYPE_CHECKING, Dict, Sequence, Tuple, Union diff --git a/src/llamafactory/train/sft/trainer.py b/src/llamafactory/train/sft/trainer.py index 6bf5b7c0..6ab6914e 100644 --- a/src/llamafactory/train/sft/trainer.py +++ b/src/llamafactory/train/sft/trainer.py @@ -1,3 +1,20 @@ +# Copyright 2024 HuggingFace Inc. and the LlamaFactory team. +# +# This code is inspired by HuggingFace's transformers library. +# https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/trainer_seq2seq.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import json import os from types import MethodType diff --git a/src/llamafactory/train/sft/workflow.py b/src/llamafactory/train/sft/workflow.py index a989b3f7..dfc71cfb 100644 --- a/src/llamafactory/train/sft/workflow.py +++ b/src/llamafactory/train/sft/workflow.py @@ -1,4 +1,19 @@ -# Inspired by: https://github.com/huggingface/transformers/blob/v4.34.1/examples/pytorch/summarization/run_summarization.py +# Copyright 2024 HuggingFace Inc. and the LlamaFactory team. +# +# This code is inspired by HuggingFace's transformers library. +# https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/summarization/run_summarization.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from typing import TYPE_CHECKING, List, Optional diff --git a/src/llamafactory/train/trainer_utils.py b/src/llamafactory/train/trainer_utils.py index 48944a63..5621d5df 100644 --- a/src/llamafactory/train/trainer_utils.py +++ b/src/llamafactory/train/trainer_utils.py @@ -1,3 +1,22 @@ +# Copyright 2024 HuggingFace Inc. and the LlamaFactory team. +# +# This code is inspired by the GaLore's implementation: https://github.com/jiaweizzhao/GaLore +# and the LoRA+'s implementation: https://github.com/nikhil-ghosh-berkeley/loraplus +# and the BAdam's implementation: https://github.com/Ledzy/BAdam +# and the TRL's implementation: https://github.com/huggingface/trl +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Tuple, Union import torch diff --git a/src/llamafactory/train/tuner.py b/src/llamafactory/train/tuner.py index eed875e9..788b4c4f 100644 --- a/src/llamafactory/train/tuner.py +++ b/src/llamafactory/train/tuner.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import TYPE_CHECKING, Any, Dict, List, Optional import torch diff --git a/src/llamafactory/webui/chatter.py b/src/llamafactory/webui/chatter.py index c82710d3..864c41c7 100644 --- a/src/llamafactory/webui/chatter.py +++ b/src/llamafactory/webui/chatter.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import json import os from typing import TYPE_CHECKING, Dict, Generator, List, Optional, Sequence, Tuple diff --git a/src/llamafactory/webui/common.py b/src/llamafactory/webui/common.py index 3b8d5378..980428a4 100644 --- a/src/llamafactory/webui/common.py +++ b/src/llamafactory/webui/common.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import json import os from collections import defaultdict diff --git a/src/llamafactory/webui/components/__init__.py b/src/llamafactory/webui/components/__init__.py index 5c1e21b8..715fb6e4 100644 --- a/src/llamafactory/webui/components/__init__.py +++ b/src/llamafactory/webui/components/__init__.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .chatbot import create_chat_box from .eval import create_eval_tab from .export import create_export_tab diff --git a/src/llamafactory/webui/components/chatbot.py b/src/llamafactory/webui/components/chatbot.py index f83694b1..ad74114b 100644 --- a/src/llamafactory/webui/components/chatbot.py +++ b/src/llamafactory/webui/components/chatbot.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import TYPE_CHECKING, Dict, Tuple from ...data import Role diff --git a/src/llamafactory/webui/components/data.py b/src/llamafactory/webui/components/data.py index 232b973d..88e500cf 100644 --- a/src/llamafactory/webui/components/data.py +++ b/src/llamafactory/webui/components/data.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import json import os from typing import TYPE_CHECKING, Any, Dict, List, Tuple diff --git a/src/llamafactory/webui/components/eval.py b/src/llamafactory/webui/components/eval.py index 0a7a0f44..b522913e 100644 --- a/src/llamafactory/webui/components/eval.py +++ b/src/llamafactory/webui/components/eval.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import TYPE_CHECKING, Dict from ...extras.packages import is_gradio_available diff --git a/src/llamafactory/webui/components/export.py b/src/llamafactory/webui/components/export.py index 9d756a38..14257949 100644 --- a/src/llamafactory/webui/components/export.py +++ b/src/llamafactory/webui/components/export.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import TYPE_CHECKING, Dict, Generator, List, Union from ...extras.constants import PEFT_METHODS diff --git a/src/llamafactory/webui/components/infer.py b/src/llamafactory/webui/components/infer.py index 970f4629..03bccd7f 100644 --- a/src/llamafactory/webui/components/infer.py +++ b/src/llamafactory/webui/components/infer.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import TYPE_CHECKING, Dict from ...extras.packages import is_gradio_available diff --git a/src/llamafactory/webui/components/top.py b/src/llamafactory/webui/components/top.py index fd0ead3d..2515a83d 100644 --- a/src/llamafactory/webui/components/top.py +++ b/src/llamafactory/webui/components/top.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import TYPE_CHECKING, Dict from ...data import TEMPLATES diff --git a/src/llamafactory/webui/components/train.py b/src/llamafactory/webui/components/train.py index 72dfc858..673f6bf4 100644 --- a/src/llamafactory/webui/components/train.py +++ b/src/llamafactory/webui/components/train.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import TYPE_CHECKING, Dict from transformers.trainer_utils import SchedulerType diff --git a/src/llamafactory/webui/css.py b/src/llamafactory/webui/css.py index 36e3d4c2..53982119 100644 --- a/src/llamafactory/webui/css.py +++ b/src/llamafactory/webui/css.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + CSS = r""" .duplicate-button { margin: auto !important; diff --git a/src/llamafactory/webui/engine.py b/src/llamafactory/webui/engine.py index eb6142d3..04893215 100644 --- a/src/llamafactory/webui/engine.py +++ b/src/llamafactory/webui/engine.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import TYPE_CHECKING, Any, Dict from .chatter import WebChatModel diff --git a/src/llamafactory/webui/interface.py b/src/llamafactory/webui/interface.py index bae3ba76..d25f4d38 100644 --- a/src/llamafactory/webui/interface.py +++ b/src/llamafactory/webui/interface.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os from ..extras.packages import is_gradio_available diff --git a/src/llamafactory/webui/locales.py b/src/llamafactory/webui/locales.py index e30feab2..427f01b8 100644 --- a/src/llamafactory/webui/locales.py +++ b/src/llamafactory/webui/locales.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + LOCALES = { "lang": { "en": { diff --git a/src/llamafactory/webui/manager.py b/src/llamafactory/webui/manager.py index 326fdb8d..7e9b801a 100644 --- a/src/llamafactory/webui/manager.py +++ b/src/llamafactory/webui/manager.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import TYPE_CHECKING, Dict, Generator, List, Set, Tuple diff --git a/src/llamafactory/webui/runner.py b/src/llamafactory/webui/runner.py index 35014628..76982934 100644 --- a/src/llamafactory/webui/runner.py +++ b/src/llamafactory/webui/runner.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os from copy import deepcopy from subprocess import Popen, TimeoutExpired diff --git a/src/llamafactory/webui/utils.py b/src/llamafactory/webui/utils.py index e39f2aa4..6ce2a8e7 100644 --- a/src/llamafactory/webui/utils.py +++ b/src/llamafactory/webui/utils.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import json import os import signal diff --git a/src/train.py b/src/train.py index b20aa9d2..6703ffdb 100644 --- a/src/train.py +++ b/src/train.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from llamafactory.train.tuner import run_exp diff --git a/src/webui.py b/src/webui.py index bbefb54e..99370af2 100644 --- a/src/webui.py +++ b/src/webui.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os from llamafactory.webui.interface import create_ui diff --git a/tests/data/test_supervised.py b/tests/data/test_supervised.py index 63a3453f..a72800d2 100644 --- a/tests/data/test_supervised.py +++ b/tests/data/test_supervised.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os import random diff --git a/tests/eval/test_eval_template.py b/tests/eval/test_eval_template.py index f6a91a67..f85d9d57 100644 --- a/tests/eval/test_eval_template.py +++ b/tests/eval/test_eval_template.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from llamafactory.eval.template import get_eval_template diff --git a/tests/model/model_utils/test_attention.py b/tests/model/model_utils/test_attention.py index 751adda4..97ac9dcc 100644 --- a/tests/model/model_utils/test_attention.py +++ b/tests/model/model_utils/test_attention.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os from transformers.utils import is_flash_attn_2_available, is_torch_sdpa_available diff --git a/tests/model/test_base.py b/tests/model/test_base.py index 32a3918e..462e8cfa 100644 --- a/tests/model/test_base.py +++ b/tests/model/test_base.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os import torch diff --git a/tests/model/test_freeze.py b/tests/model/test_freeze.py index a0618315..ac5a023c 100644 --- a/tests/model/test_freeze.py +++ b/tests/model/test_freeze.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os import torch diff --git a/tests/model/test_full.py b/tests/model/test_full.py index 802b987c..bcd6480f 100644 --- a/tests/model/test_full.py +++ b/tests/model/test_full.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os import torch diff --git a/tests/model/test_lora.py b/tests/model/test_lora.py index 3e2503f1..e49c026c 100644 --- a/tests/model/test_lora.py +++ b/tests/model/test_lora.py @@ -1,3 +1,17 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os from typing import Sequence From 308abfec6c0cc60560ab53aec8b203d1be9f5e45 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 15 Jun 2024 17:58:52 +0800 Subject: [PATCH 044/160] add minicpm #4227 Former-commit-id: e1bb18ce60be9a1b203989def30f1b9194286325 --- src/llamafactory/extras/constants.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py index e31e7419..73a9969d 100644 --- a/src/llamafactory/extras/constants.py +++ b/src/llamafactory/extras/constants.py @@ -682,6 +682,21 @@ register_model_group( ) +register_model_group( + models={ + "MiniCPM-2B-SFT-Chat": { + DownloadSource.DEFAULT: "openbmb/MiniCPM-2B-sft-bf16", + DownloadSource.MODELSCOPE: "OpenBMB/miniCPM-bf16", + }, + "MiniCPM-2B-DPO-Chat": { + DownloadSource.DEFAULT: "openbmb/MiniCPM-2B-dpo-bf16", + DownloadSource.MODELSCOPE: "OpenBMB/MiniCPM-2B-dpo-bf16", + }, + }, + template="cpm", +) + + register_model_group( models={ "Mistral-7B-v0.1": { From 7f90b0cd209f7b943c7ca941e8238f0108f407ea Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 15 Jun 2024 19:51:20 +0800 Subject: [PATCH 045/160] add tests Former-commit-id: 484634ee9c982e82e919ff67d507e0210345182d --- Makefile | 2 +- src/llamafactory/extras/misc.py | 9 ++- tests/data/test_supervised.py | 2 +- tests/model/model_utils/test_checkpointing.py | 74 +++++++++++++++++++ tests/model/test_base.py | 30 +++++++- tests/model/test_freeze.py | 3 + tests/model/test_full.py | 2 + tests/model/test_lora.py | 58 +++++++++++++-- 8 files changed, 166 insertions(+), 14 deletions(-) create mode 100644 tests/model/model_utils/test_checkpointing.py diff --git a/Makefile b/Makefile index 65be047b..3f13b215 100644 --- a/Makefile +++ b/Makefile @@ -11,4 +11,4 @@ style: ruff format $(check_dirs) test: - pytest tests/ + CUDA_VISIBLE_DEVICES= pytest tests/ diff --git a/src/llamafactory/extras/misc.py b/src/llamafactory/extras/misc.py index 3d969df1..93153b3e 100644 --- a/src/llamafactory/extras/misc.py +++ b/src/llamafactory/extras/misc.py @@ -22,6 +22,7 @@ from transformers import InfNanRemoveLogitsProcessor, LogitsProcessorList, PreTr from transformers.utils import ( SAFE_WEIGHTS_NAME, WEIGHTS_NAME, + is_safetensors_available, is_torch_bf16_gpu_available, is_torch_cuda_available, is_torch_mps_available, @@ -34,6 +35,11 @@ from .constants import V_HEAD_SAFE_WEIGHTS_NAME, V_HEAD_WEIGHTS_NAME from .logging import get_logger +if is_safetensors_available(): + from safetensors import safe_open + from safetensors.torch import save_file + + _is_fp16_available = is_torch_npu_available() or is_torch_cuda_available() try: _is_bf16_available = is_torch_bf16_gpu_available() @@ -128,9 +134,6 @@ def fix_valuehead_checkpoint( return if safe_serialization: - from safetensors import safe_open - from safetensors.torch import save_file - path_to_checkpoint = os.path.join(output_dir, SAFE_WEIGHTS_NAME) with safe_open(path_to_checkpoint, framework="pt", device="cpu") as f: state_dict: Dict[str, torch.Tensor] = {key: f.get_tensor(key) for key in f.keys()} diff --git a/tests/data/test_supervised.py b/tests/data/test_supervised.py index a72800d2..9f7b2dbf 100644 --- a/tests/data/test_supervised.py +++ b/tests/data/test_supervised.py @@ -41,7 +41,7 @@ TRAIN_ARGS = { } -@pytest.mark.parametrize("num_samples", [10]) +@pytest.mark.parametrize("num_samples", [16]) def test_supervised(num_samples: int): model_args, data_args, training_args, _, _ = get_train_args(TRAIN_ARGS) tokenizer_module = load_tokenizer(model_args) diff --git a/tests/model/model_utils/test_checkpointing.py b/tests/model/model_utils/test_checkpointing.py new file mode 100644 index 00000000..670e693d --- /dev/null +++ b/tests/model/model_utils/test_checkpointing.py @@ -0,0 +1,74 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import torch + +from llamafactory.extras.misc import get_current_device +from llamafactory.hparams import get_train_args +from llamafactory.model import load_model, load_tokenizer + + +TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3") + +TRAIN_ARGS = { + "model_name_or_path": TINY_LLAMA, + "stage": "sft", + "do_train": True, + "finetuning_type": "lora", + "lora_target": "all", + "dataset": "llamafactory/tiny-supervised-dataset", + "dataset_dir": "ONLINE", + "template": "llama3", + "cutoff_len": 1024, + "overwrite_cache": True, + "output_dir": "dummy_dir", + "overwrite_output_dir": True, + "fp16": True, +} + + +def test_checkpointing_enable(): + model_args, _, _, finetuning_args, _ = get_train_args({"disable_gradient_checkpointing": False, **TRAIN_ARGS}) + tokenizer_module = load_tokenizer(model_args) + model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) + for module in filter(lambda m: hasattr(m, "gradient_checkpointing"), model.modules()): + assert getattr(module, "gradient_checkpointing") is True + + +def test_checkpointing_disable(): + model_args, _, _, finetuning_args, _ = get_train_args({"disable_gradient_checkpointing": True, **TRAIN_ARGS}) + tokenizer_module = load_tokenizer(model_args) + model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) + for module in filter(lambda m: hasattr(m, "gradient_checkpointing"), model.modules()): + assert getattr(module, "gradient_checkpointing") is False + + +def test_upcast_layernorm(): + model_args, _, _, finetuning_args, _ = get_train_args({"upcast_layernorm": True, **TRAIN_ARGS}) + tokenizer_module = load_tokenizer(model_args) + model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) + for name, param in model.named_parameters(): + if param.ndim == 1 and "norm" in name: + assert param.dtype == torch.float32 + + +def test_upcast_lmhead_output(): + model_args, _, _, finetuning_args, _ = get_train_args({"upcast_lmhead_output": True, **TRAIN_ARGS}) + tokenizer_module = load_tokenizer(model_args) + model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) + inputs = torch.randn((1, 16), dtype=torch.float16, device=get_current_device()) + outputs: "torch.Tensor" = model.lm_head(inputs) + assert outputs.dtype == torch.float32 diff --git a/tests/model/test_base.py b/tests/model/test_base.py index 462e8cfa..ee0b2886 100644 --- a/tests/model/test_base.py +++ b/tests/model/test_base.py @@ -13,16 +13,21 @@ # limitations under the License. import os +from typing import Dict import torch from transformers import AutoModelForCausalLM +from trl import AutoModelForCausalLMWithValueHead +from llamafactory.extras.misc import get_current_device from llamafactory.hparams import get_infer_args from llamafactory.model import load_model, load_tokenizer TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3") +TINY_LLAMA_VALUEHEAD = os.environ.get("TINY_LLAMA_VALUEHEAD", "llamafactory/tiny-random-Llama-3-valuehead") + INFER_ARGS = { "model_name_or_path": TINY_LLAMA, "template": "llama3", @@ -38,9 +43,32 @@ def compare_model(model_a: "torch.nn.Module", model_b: "torch.nn.Module"): assert torch.allclose(state_dict_a[name], state_dict_b[name]) is True +def post_init(self: "AutoModelForCausalLMWithValueHead", state_dict: Dict[str, "torch.Tensor"]): + state_dict = {k[7:]: state_dict[k] for k in state_dict.keys() if k.startswith("v_head.")} + self.v_head.load_state_dict(state_dict, strict=False) + del state_dict + + def test_base(): model_args, _, finetuning_args, _ = get_infer_args(INFER_ARGS) tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=False) - ref_model = AutoModelForCausalLM.from_pretrained(TINY_LLAMA, torch_dtype=model.dtype, device_map=model.device) + + ref_model = AutoModelForCausalLM.from_pretrained( + TINY_LLAMA, torch_dtype=torch.float16, device_map=get_current_device() + ) + compare_model(model, ref_model) + + +def test_valuehead(): + AutoModelForCausalLMWithValueHead.post_init = post_init # patch for CPU test + model_args, _, finetuning_args, _ = get_infer_args(INFER_ARGS) + tokenizer_module = load_tokenizer(model_args) + model = load_model( + tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=False, add_valuehead=True + ) + + ref_model = AutoModelForCausalLMWithValueHead.from_pretrained( + TINY_LLAMA_VALUEHEAD, torch_dtype=torch.float16, device_map=get_current_device() + ) compare_model(model, ref_model) diff --git a/tests/model/test_freeze.py b/tests/model/test_freeze.py index ac5a023c..5f478af6 100644 --- a/tests/model/test_freeze.py +++ b/tests/model/test_freeze.py @@ -49,6 +49,7 @@ def test_freeze_train_all_modules(): model_args, _, _, finetuning_args, _ = get_train_args({"freeze_trainable_layers": 1, **TRAIN_ARGS}) tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) + for name, param in model.named_parameters(): if name.startswith("model.layers.1."): assert param.requires_grad is True @@ -64,6 +65,7 @@ def test_freeze_train_extra_modules(): ) tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) + for name, param in model.named_parameters(): if name.startswith("model.layers.1.") or any(module in name for module in ["embed_tokens", "lm_head"]): assert param.requires_grad is True @@ -77,6 +79,7 @@ def test_freeze_inference(): model_args, _, finetuning_args, _ = get_infer_args(INFER_ARGS) tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=False) + for param in model.parameters(): assert param.requires_grad is False assert param.dtype == torch.float16 diff --git a/tests/model/test_full.py b/tests/model/test_full.py index bcd6480f..0a6e0743 100644 --- a/tests/model/test_full.py +++ b/tests/model/test_full.py @@ -49,6 +49,7 @@ def test_full_train(): model_args, _, _, finetuning_args, _ = get_train_args(TRAIN_ARGS) tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) + for param in model.parameters(): assert param.requires_grad is True assert param.dtype == torch.float32 @@ -58,6 +59,7 @@ def test_full_inference(): model_args, _, finetuning_args, _ = get_infer_args(INFER_ARGS) tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=False) + for param in model.parameters(): assert param.requires_grad is False assert param.dtype == torch.float16 diff --git a/tests/model/test_lora.py b/tests/model/test_lora.py index e49c026c..4923c8ad 100644 --- a/tests/model/test_lora.py +++ b/tests/model/test_lora.py @@ -18,7 +18,9 @@ from typing import Sequence import torch from peft import LoraModel, PeftModel from transformers import AutoModelForCausalLM +from trl import AutoModelForCausalLMWithValueHead +from llamafactory.extras.misc import get_current_device from llamafactory.hparams import get_infer_args, get_train_args from llamafactory.model import load_model, load_tokenizer @@ -27,6 +29,8 @@ TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3") TINY_LLAMA_ADAPTER = os.environ.get("TINY_LLAMA_ADAPTER", "llamafactory/tiny-random-Llama-3-lora") +TINY_LLAMA_VALUEHEAD = os.environ.get("TINY_LLAMA_VALUEHEAD", "llamafactory/tiny-random-Llama-3-valuehead") + TRAIN_ARGS = { "model_name_or_path": TINY_LLAMA, "stage": "sft", @@ -67,10 +71,29 @@ def compare_model(model_a: "torch.nn.Module", model_b: "torch.nn.Module", diff_k assert torch.allclose(state_dict_a[name], state_dict_b[name]) is True +def test_lora_train_qv_modules(): + model_args, _, _, finetuning_args, _ = get_train_args({"lora_target": "q_proj,v_proj", **TRAIN_ARGS}) + tokenizer_module = load_tokenizer(model_args) + model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) + + linear_modules = set() + for name, param in model.named_parameters(): + if any(module in name for module in ["lora_A", "lora_B"]): + linear_modules.add(name.split(".lora_", maxsplit=1)[0].split(".")[-1]) + assert param.requires_grad is True + assert param.dtype == torch.float32 + else: + assert param.requires_grad is False + assert param.dtype == torch.float16 + + assert linear_modules == {"q_proj", "v_proj"} + + def test_lora_train_all_modules(): model_args, _, _, finetuning_args, _ = get_train_args({"lora_target": "all", **TRAIN_ARGS}) tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) + linear_modules = set() for name, param in model.named_parameters(): if any(module in name for module in ["lora_A", "lora_B"]): @@ -90,6 +113,7 @@ def test_lora_train_extra_modules(): ) tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) + extra_modules = set() for name, param in model.named_parameters(): if any(module in name for module in ["lora_A", "lora_B"]): @@ -113,7 +137,9 @@ def test_lora_train_old_adapters(): tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) - base_model = AutoModelForCausalLM.from_pretrained(TINY_LLAMA, torch_dtype=model.dtype, device_map=model.device) + base_model = AutoModelForCausalLM.from_pretrained( + TINY_LLAMA, torch_dtype=torch.float16, device_map=get_current_device() + ) ref_model = PeftModel.from_pretrained(base_model, TINY_LLAMA_ADAPTER, is_trainable=True) for param in filter(lambda p: p.requires_grad, ref_model.parameters()): param.data = param.data.to(torch.float32) @@ -128,7 +154,9 @@ def test_lora_train_new_adapters(): tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) - base_model = AutoModelForCausalLM.from_pretrained(TINY_LLAMA, torch_dtype=model.dtype, device_map=model.device) + base_model = AutoModelForCausalLM.from_pretrained( + TINY_LLAMA, torch_dtype=torch.float16, device_map=get_current_device() + ) ref_model = PeftModel.from_pretrained(base_model, TINY_LLAMA_ADAPTER, is_trainable=True) for param in filter(lambda p: p.requires_grad, ref_model.parameters()): param.data = param.data.to(torch.float32) @@ -138,17 +166,31 @@ def test_lora_train_new_adapters(): ) +def test_lora_train_valuehead(): + model_args, _, finetuning_args, _ = get_infer_args(INFER_ARGS) + tokenizer_module = load_tokenizer(model_args) + model = load_model( + tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True, add_valuehead=True + ) + + ref_model: "AutoModelForCausalLMWithValueHead" = AutoModelForCausalLMWithValueHead.from_pretrained( + TINY_LLAMA_VALUEHEAD, torch_dtype=torch.float16, device_map=get_current_device() + ) + state_dict = model.state_dict() + ref_state_dict = ref_model.state_dict() + + assert torch.allclose(state_dict["v_head.summary.weight"], ref_state_dict["v_head.summary.weight"]) + assert torch.allclose(state_dict["v_head.summary.bias"], ref_state_dict["v_head.summary.bias"]) + + def test_lora_inference(): model_args, _, finetuning_args, _ = get_infer_args(INFER_ARGS) tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=False) - base_model = AutoModelForCausalLM.from_pretrained(TINY_LLAMA, torch_dtype=model.dtype, device_map=model.device) + base_model = AutoModelForCausalLM.from_pretrained( + TINY_LLAMA, torch_dtype=torch.float16, device_map=get_current_device() + ) ref_model: "LoraModel" = PeftModel.from_pretrained(base_model, TINY_LLAMA_ADAPTER) ref_model = ref_model.merge_and_unload() compare_model(model, ref_model) - - for name, param in model.named_parameters(): - assert param.requires_grad is False - assert param.dtype == torch.float16 - assert "lora" not in name From 14f7bfc545164f57ee2644d098a4cff44867d630 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 15 Jun 2024 20:06:17 +0800 Subject: [PATCH 046/160] use fixture Former-commit-id: 10761985691b9f934f7689c1f82aa6dd68febcca --- src/llamafactory/hparams/model_args.py | 2 +- tests/model/test_base.py | 15 ++++++++++----- tests/model/test_lora.py | 14 +++++++++++++- 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/src/llamafactory/hparams/model_args.py b/src/llamafactory/hparams/model_args.py index 0a91f0fa..53bdbdf2 100644 --- a/src/llamafactory/hparams/model_args.py +++ b/src/llamafactory/hparams/model_args.py @@ -163,7 +163,7 @@ class ModelArguments: ) infer_dtype: Literal["auto", "float16", "bfloat16", "float32"] = field( default="auto", - metadata={"help": "Data type for model weights and activations at inference."} + metadata={"help": "Data type for model weights and activations at inference."}, ) hf_hub_token: Optional[str] = field( default=None, diff --git a/tests/model/test_base.py b/tests/model/test_base.py index ee0b2886..2deedde2 100644 --- a/tests/model/test_base.py +++ b/tests/model/test_base.py @@ -15,6 +15,7 @@ import os from typing import Dict +import pytest import torch from transformers import AutoModelForCausalLM from trl import AutoModelForCausalLMWithValueHead @@ -43,10 +44,14 @@ def compare_model(model_a: "torch.nn.Module", model_b: "torch.nn.Module"): assert torch.allclose(state_dict_a[name], state_dict_b[name]) is True -def post_init(self: "AutoModelForCausalLMWithValueHead", state_dict: Dict[str, "torch.Tensor"]): - state_dict = {k[7:]: state_dict[k] for k in state_dict.keys() if k.startswith("v_head.")} - self.v_head.load_state_dict(state_dict, strict=False) - del state_dict +@pytest.fixture +def fix_valuehead_cpu_loading(): + def post_init(self: "AutoModelForCausalLMWithValueHead", state_dict: Dict[str, "torch.Tensor"]): + state_dict = {k[7:]: state_dict[k] for k in state_dict.keys() if k.startswith("v_head.")} + self.v_head.load_state_dict(state_dict, strict=False) + del state_dict + + AutoModelForCausalLMWithValueHead.post_init = post_init def test_base(): @@ -60,8 +65,8 @@ def test_base(): compare_model(model, ref_model) +@pytest.mark.usefixtures("fix_valuehead_cpu_loading") def test_valuehead(): - AutoModelForCausalLMWithValueHead.post_init = post_init # patch for CPU test model_args, _, finetuning_args, _ = get_infer_args(INFER_ARGS) tokenizer_module = load_tokenizer(model_args) model = load_model( diff --git a/tests/model/test_lora.py b/tests/model/test_lora.py index 4923c8ad..fe032332 100644 --- a/tests/model/test_lora.py +++ b/tests/model/test_lora.py @@ -13,8 +13,9 @@ # limitations under the License. import os -from typing import Sequence +from typing import Dict, Sequence +import pytest import torch from peft import LoraModel, PeftModel from transformers import AutoModelForCausalLM @@ -71,6 +72,16 @@ def compare_model(model_a: "torch.nn.Module", model_b: "torch.nn.Module", diff_k assert torch.allclose(state_dict_a[name], state_dict_b[name]) is True +@pytest.fixture +def fix_valuehead_cpu_loading(): + def post_init(self: "AutoModelForCausalLMWithValueHead", state_dict: Dict[str, "torch.Tensor"]): + state_dict = {k[7:]: state_dict[k] for k in state_dict.keys() if k.startswith("v_head.")} + self.v_head.load_state_dict(state_dict, strict=False) + del state_dict + + AutoModelForCausalLMWithValueHead.post_init = post_init + + def test_lora_train_qv_modules(): model_args, _, _, finetuning_args, _ = get_train_args({"lora_target": "q_proj,v_proj", **TRAIN_ARGS}) tokenizer_module = load_tokenizer(model_args) @@ -166,6 +177,7 @@ def test_lora_train_new_adapters(): ) +@pytest.mark.usefixtures("fix_valuehead_cpu_loading") def test_lora_train_valuehead(): model_args, _, finetuning_args, _ = get_infer_args(INFER_ARGS) tokenizer_module = load_tokenizer(model_args) From 05f3a3c944e161df7bbcd579c80bba14095f61a5 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sun, 16 Jun 2024 01:06:41 +0800 Subject: [PATCH 047/160] tiny fix Former-commit-id: f7f440986b0ae3b38ea9f2da80789629d4f79ea1 --- scripts/cal_flops.py | 2 +- scripts/cal_lr.py | 2 +- scripts/llama_pro.py | 2 +- src/llamafactory/data/processors/pretrain.py | 2 +- src/llamafactory/eval/evaluator.py | 2 +- src/llamafactory/extras/packages.py | 2 +- src/llamafactory/hparams/data_args.py | 2 +- src/llamafactory/model/model_utils/checkpointing.py | 2 +- src/llamafactory/model/model_utils/longlora.py | 6 ++++-- src/llamafactory/model/model_utils/quantization.py | 2 +- src/llamafactory/model/model_utils/visual.py | 2 +- src/llamafactory/train/dpo/workflow.py | 2 +- src/llamafactory/train/kto/trainer.py | 4 ++-- src/llamafactory/train/kto/workflow.py | 2 +- src/llamafactory/train/ppo/trainer.py | 2 +- src/llamafactory/train/ppo/workflow.py | 2 +- src/llamafactory/train/pt/workflow.py | 2 +- src/llamafactory/train/rm/trainer.py | 4 ++-- src/llamafactory/train/rm/workflow.py | 2 +- src/llamafactory/train/sft/metric.py | 2 +- src/llamafactory/train/sft/workflow.py | 2 +- tests/model/test_base.py | 2 +- 22 files changed, 27 insertions(+), 25 deletions(-) diff --git a/scripts/cal_flops.py b/scripts/cal_flops.py index 627b5534..32526d89 100644 --- a/scripts/cal_flops.py +++ b/scripts/cal_flops.py @@ -1,7 +1,7 @@ # coding=utf-8 # Copyright 2024 Microsoft Corporation and the LlamaFactory team. # -# This code is inspired by Microsoft's DeepSpeed library. +# This code is inspired by the Microsoft's DeepSpeed library. # https://www.deepspeed.ai/tutorials/flops-profiler/ # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/scripts/cal_lr.py b/scripts/cal_lr.py index ff21d27c..ad6992cb 100644 --- a/scripts/cal_lr.py +++ b/scripts/cal_lr.py @@ -1,7 +1,7 @@ # coding=utf-8 # Copyright 2024 imoneoi and the LlamaFactory team. # -# This code is inspired by imoneoi's OpenChat library. +# This code is inspired by the imoneoi's OpenChat library. # https://github.com/imoneoi/openchat/blob/3.6.0/ochat/training_deepspeed/train.py # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/scripts/llama_pro.py b/scripts/llama_pro.py index f315335a..395375ef 100644 --- a/scripts/llama_pro.py +++ b/scripts/llama_pro.py @@ -1,7 +1,7 @@ # coding=utf-8 # Copyright 2024 Tencent Inc. and the LlamaFactory team. # -# This code is inspired by Tencent's LLaMA-Pro library. +# This code is inspired by the Tencent's LLaMA-Pro library. # https://github.com/TencentARC/LLaMA-Pro/blob/main/scripts/block_expansion.py # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/llamafactory/data/processors/pretrain.py b/src/llamafactory/data/processors/pretrain.py index fb4c840c..67d6009b 100644 --- a/src/llamafactory/data/processors/pretrain.py +++ b/src/llamafactory/data/processors/pretrain.py @@ -1,6 +1,6 @@ # Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # -# This code is inspired by HuggingFace's transformers library. +# This code is inspired by the HuggingFace's transformers library. # https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/language-modeling/run_clm.py # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/llamafactory/eval/evaluator.py b/src/llamafactory/eval/evaluator.py index bbd7a44b..d3140793 100644 --- a/src/llamafactory/eval/evaluator.py +++ b/src/llamafactory/eval/evaluator.py @@ -1,6 +1,6 @@ # Copyright 2024 the LlamaFactory team. # -# This code is inspired by Dan's test library. +# This code is inspired by the Dan's test library. # https://github.com/hendrycks/test/blob/master/evaluate_flan.py # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/llamafactory/extras/packages.py b/src/llamafactory/extras/packages.py index 35f546ab..0a84a293 100644 --- a/src/llamafactory/extras/packages.py +++ b/src/llamafactory/extras/packages.py @@ -1,6 +1,6 @@ # Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # -# This code is inspired by HuggingFace's transformers library. +# This code is inspired by the HuggingFace's transformers library. # https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/utils/import_utils.py # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/llamafactory/hparams/data_args.py b/src/llamafactory/hparams/data_args.py index 95284766..39290e21 100644 --- a/src/llamafactory/hparams/data_args.py +++ b/src/llamafactory/hparams/data_args.py @@ -1,6 +1,6 @@ # Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # -# This code is inspired by HuggingFace's transformers library. +# This code is inspired by the HuggingFace's transformers library. # https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/language-modeling/run_clm.py # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/llamafactory/model/model_utils/checkpointing.py b/src/llamafactory/model/model_utils/checkpointing.py index e4e84b12..f5314125 100644 --- a/src/llamafactory/model/model_utils/checkpointing.py +++ b/src/llamafactory/model/model_utils/checkpointing.py @@ -1,6 +1,6 @@ # Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # -# This code is inspired by HuggingFace's Transformers and PEFT library. +# This code is inspired by the HuggingFace's Transformers and PEFT library. # https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/modeling_utils.py # https://github.com/huggingface/peft/blob/v0.10.0/src/peft/utils/other.py # diff --git a/src/llamafactory/model/model_utils/longlora.py b/src/llamafactory/model/model_utils/longlora.py index 7af43dcf..af30bd50 100644 --- a/src/llamafactory/model/model_utils/longlora.py +++ b/src/llamafactory/model/model_utils/longlora.py @@ -1,7 +1,9 @@ -# Copyright 2024 EleutherAI, HuggingFace Inc., and the LlamaFactory team. +# Copyright 2024 EleutherAI, HuggingFace Inc., Yukang Chen, and the LlamaFactory team. # -# This code is based on the EleutherAI's GPT-NeoX and HuggingFace's Transformers libraries. +# This code is based on the EleutherAI's GPT-NeoX and the HuggingFace's Transformers libraries. # https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/models/llama/modeling_llama.py +# This code is also inspired by the original LongLoRA implementation. +# https://github.com/dvlab-research/LongLoRA/blob/main/llama_attn_replace.py # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/model/model_utils/quantization.py b/src/llamafactory/model/model_utils/quantization.py index 9e6b9da4..0a0fca34 100644 --- a/src/llamafactory/model/model_utils/quantization.py +++ b/src/llamafactory/model/model_utils/quantization.py @@ -1,6 +1,6 @@ # Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # -# This code is inspired by HuggingFace's Optimum library. +# This code is inspired by the HuggingFace's Optimum library. # https://github.com/huggingface/optimum/blob/v1.20.0/optimum/gptq/data.py # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/llamafactory/model/model_utils/visual.py b/src/llamafactory/model/model_utils/visual.py index 37237485..700bf470 100644 --- a/src/llamafactory/model/model_utils/visual.py +++ b/src/llamafactory/model/model_utils/visual.py @@ -1,6 +1,6 @@ # Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # -# This code is inspired by HuggingFace's TRL library. +# This code is inspired by the HuggingFace's Transformers library. # https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/models/llava/modeling_llava.py # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/llamafactory/train/dpo/workflow.py b/src/llamafactory/train/dpo/workflow.py index 8c3c2eb1..431b5285 100644 --- a/src/llamafactory/train/dpo/workflow.py +++ b/src/llamafactory/train/dpo/workflow.py @@ -1,6 +1,6 @@ # Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # -# This code is inspired by HuggingFace's TRL library. +# This code is inspired by the HuggingFace's TRL library. # https://github.com/huggingface/trl/blob/v0.8.0/examples/scripts/dpo.py # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/llamafactory/train/kto/trainer.py b/src/llamafactory/train/kto/trainer.py index 6e96fc0c..91d68975 100644 --- a/src/llamafactory/train/kto/trainer.py +++ b/src/llamafactory/train/kto/trainer.py @@ -1,6 +1,6 @@ # Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # -# This code is inspired by HuggingFace's TRL library. +# This code is inspired by the HuggingFace's TRL library. # https://github.com/huggingface/trl/blob/v0.8.0/trl/trainer/kto_trainer.py # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -114,8 +114,8 @@ class CustomKTOTrainer(KTOTrainer): def _save(self, output_dir: Optional[str] = None, state_dict: Optional[Dict[str, "torch.Tensor"]] = None) -> None: super()._save(output_dir, state_dict) + output_dir = output_dir if output_dir is not None else self.args.output_dir if self.processor is not None: - output_dir = output_dir if output_dir is not None else self.args.output_dir getattr(self.processor, "image_processor").save_pretrained(output_dir) def forward( diff --git a/src/llamafactory/train/kto/workflow.py b/src/llamafactory/train/kto/workflow.py index 8a7af6d4..8182a184 100644 --- a/src/llamafactory/train/kto/workflow.py +++ b/src/llamafactory/train/kto/workflow.py @@ -1,6 +1,6 @@ # Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # -# This code is inspired by HuggingFace's TRL library. +# This code is inspired by the HuggingFace's TRL library. # https://github.com/huggingface/trl/blob/v0.8.0/examples/scripts/kto.py # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/llamafactory/train/ppo/trainer.py b/src/llamafactory/train/ppo/trainer.py index 61420f3b..df4a37be 100644 --- a/src/llamafactory/train/ppo/trainer.py +++ b/src/llamafactory/train/ppo/trainer.py @@ -1,6 +1,6 @@ # Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # -# This code is inspired by HuggingFace's TRL library. +# This code is inspired by the HuggingFace's TRL library. # https://github.com/huggingface/trl/blob/v0.8.0/trl/trainer/ppo_trainer.py # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/llamafactory/train/ppo/workflow.py b/src/llamafactory/train/ppo/workflow.py index 891d539a..4f4d2820 100644 --- a/src/llamafactory/train/ppo/workflow.py +++ b/src/llamafactory/train/ppo/workflow.py @@ -1,6 +1,6 @@ # Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # -# This code is inspired by HuggingFace's TRL library. +# This code is inspired by the HuggingFace's TRL library. # https://github.com/huggingface/trl/blob/v0.8.0/examples/scripts/ppo.py # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/llamafactory/train/pt/workflow.py b/src/llamafactory/train/pt/workflow.py index f1df314e..b84a0e7d 100644 --- a/src/llamafactory/train/pt/workflow.py +++ b/src/llamafactory/train/pt/workflow.py @@ -1,6 +1,6 @@ # Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # -# This code is inspired by HuggingFace's transformers library. +# This code is inspired by the HuggingFace's transformers library. # https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/language-modeling/run_clm.py # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/llamafactory/train/rm/trainer.py b/src/llamafactory/train/rm/trainer.py index 14695d7d..7f91e5f5 100644 --- a/src/llamafactory/train/rm/trainer.py +++ b/src/llamafactory/train/rm/trainer.py @@ -1,6 +1,6 @@ # Copyright 2024 the LlamaFactory team. # -# This code is inspired by CarperAI's trlx library. +# This code is inspired by the CarperAI's trlx library. # https://github.com/CarperAI/trlx/blob/v0.7.0/examples/summarize_rlhf/reward_model/reward_model.py # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -89,8 +89,8 @@ class PairwiseTrainer(Trainer): def _save(self, output_dir: Optional[str] = None, state_dict: Optional[Dict[str, "torch.Tensor"]] = None) -> None: super()._save(output_dir, state_dict) + output_dir = output_dir if output_dir is not None else self.args.output_dir if self.processor is not None: - output_dir = output_dir if output_dir is not None else self.args.output_dir getattr(self.processor, "image_processor").save_pretrained(output_dir) def compute_loss( diff --git a/src/llamafactory/train/rm/workflow.py b/src/llamafactory/train/rm/workflow.py index 75c0a2bf..6f24e964 100644 --- a/src/llamafactory/train/rm/workflow.py +++ b/src/llamafactory/train/rm/workflow.py @@ -1,6 +1,6 @@ # Copyright 2024 the LlamaFactory team. # -# This code is inspired by CarperAI's trlx library. +# This code is inspired by the CarperAI's trlx library. # https://github.com/CarperAI/trlx/blob/v0.7.0/examples/summarize_rlhf/reward_model/train_reward_model_gptj.py # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/llamafactory/train/sft/metric.py b/src/llamafactory/train/sft/metric.py index d2147c22..95bfcb69 100644 --- a/src/llamafactory/train/sft/metric.py +++ b/src/llamafactory/train/sft/metric.py @@ -1,6 +1,6 @@ # Copyright 2024 HuggingFace Inc., THUDM, and the LlamaFactory team. # -# This code is inspired by HuggingFace's transformers library and THUDM's ChatGLM implementation. +# This code is inspired by the HuggingFace's transformers library and the THUDM's ChatGLM implementation. # https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/summarization/run_summarization.py # https://github.com/THUDM/ChatGLM-6B/blob/main/ptuning/main.py # diff --git a/src/llamafactory/train/sft/workflow.py b/src/llamafactory/train/sft/workflow.py index dfc71cfb..885bc7ac 100644 --- a/src/llamafactory/train/sft/workflow.py +++ b/src/llamafactory/train/sft/workflow.py @@ -1,6 +1,6 @@ # Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # -# This code is inspired by HuggingFace's transformers library. +# This code is inspired by the HuggingFace's transformers library. # https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/summarization/run_summarization.py # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tests/model/test_base.py b/tests/model/test_base.py index 2deedde2..954492ef 100644 --- a/tests/model/test_base.py +++ b/tests/model/test_base.py @@ -41,7 +41,7 @@ def compare_model(model_a: "torch.nn.Module", model_b: "torch.nn.Module"): state_dict_b = model_b.state_dict() assert set(state_dict_a.keys()) == set(state_dict_b.keys()) for name in state_dict_a.keys(): - assert torch.allclose(state_dict_a[name], state_dict_b[name]) is True + assert torch.allclose(state_dict_a[name], state_dict_b[name]) @pytest.fixture From 32f45c9e91c7ce12936ba4d8c6ea55a8cc3ba31d Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sun, 16 Jun 2024 01:08:12 +0800 Subject: [PATCH 048/160] support pissa Former-commit-id: ef8e45f2eaf466c54e9a671512a2974575677b08 --- README.md | 6 +- README_zh.md | 6 +- examples/README.md | 6 ++ examples/README_zh.md | 6 ++ examples/extras/pissa/llama3_lora_sft.yaml | 42 ++++++++++ scripts/loftq_init.py | 72 +++++++---------- scripts/pissa_init.py | 79 ++++++++++++++++++ src/llamafactory/hparams/finetuning_args.py | 20 ++++- src/llamafactory/hparams/model_args.py | 8 +- src/llamafactory/hparams/parser.py | 5 +- src/llamafactory/model/adapter.py | 25 ++++-- src/llamafactory/train/dpo/trainer.py | 13 ++- src/llamafactory/train/pt/trainer.py | 12 ++- src/llamafactory/train/sft/trainer.py | 13 ++- src/llamafactory/train/trainer_utils.py | 54 ++++++++++++- src/llamafactory/webui/components/train.py | 9 ++- src/llamafactory/webui/locales.py | 14 ++++ src/llamafactory/webui/runner.py | 2 + tests/model/test_pissa.py | 90 +++++++++++++++++++++ 19 files changed, 406 insertions(+), 76 deletions(-) create mode 100644 examples/extras/pissa/llama3_lora_sft.yaml create mode 100644 scripts/pissa_init.py create mode 100644 tests/model/test_pissa.py diff --git a/README.md b/README.md index cae79694..cb9a7222 100644 --- a/README.md +++ b/README.md @@ -49,7 +49,7 @@ Choose your path: - **Various models**: LLaMA, LLaVA, Mistral, Mixtral-MoE, Qwen, Yi, Gemma, Baichuan, ChatGLM, Phi, etc. - **Integrated methods**: (Continuous) pre-training, (multimodal) supervised fine-tuning, reward modeling, PPO, DPO, KTO, ORPO, etc. - **Scalable resources**: 32-bit full-tuning, 16-bit freeze-tuning, 16-bit LoRA and 2/4/8-bit QLoRA via AQLM/AWQ/GPTQ/LLM.int8. -- **Advanced algorithms**: GaLore, BAdam, DoRA, LongLoRA, LLaMA Pro, Mixture-of-Depths, LoRA+, LoftQ and Agent tuning. +- **Advanced algorithms**: GaLore, BAdam, DoRA, LongLoRA, LLaMA Pro, Mixture-of-Depths, LoRA+, LoftQ, PiSSA and Agent tuning. - **Practical tricks**: FlashAttention-2, Unsloth, RoPE scaling, NEFTune and rsLoRA. - **Experiment monitors**: LlamaBoard, TensorBoard, Wandb, MLflow, etc. - **Faster inference**: OpenAI-style API, Gradio UI and CLI with vLLM worker. @@ -71,9 +71,9 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ ## Changelog -[24/06/07] We supported fine-tuning the **[Qwen-2](https://qwenlm.github.io/blog/qwen2/)** series models. +[24/06/16] We support **[PiSSA](https://arxiv.org/abs/2404.02948)** algorithm. See [examples](examples/README.md) for usage. -[24/06/05] We supported fine-tuning the **[GLM-4-9B/GLM-4-9B-Chat](https://github.com/THUDM/GLM-4)** models. +[24/06/07] We supported fine-tuning the **[Qwen2](https://qwenlm.github.io/blog/qwen2/)** and **[GLM-4](https://github.com/THUDM/GLM-4)** models. [24/05/26] We supported **[SimPO](https://arxiv.org/abs/2405.14734)** algorithm for preference learning. See [examples](examples/README.md) for usage. diff --git a/README_zh.md b/README_zh.md index af3ff8f0..5c005f30 100644 --- a/README_zh.md +++ b/README_zh.md @@ -49,7 +49,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd - **多种模型**:LLaMA、LLaVA、Mistral、Mixtral-MoE、Qwen、Yi、Gemma、Baichuan、ChatGLM、Phi 等等。 - **集成方法**:(增量)预训练、(多模态)指令监督微调、奖励模型训练、PPO 训练、DPO 训练、KTO 训练、ORPO 训练等等。 - **多种精度**:32 比特全参数微调、16 比特冻结微调、16 比特 LoRA 微调和基于 AQLM/AWQ/GPTQ/LLM.int8 的 2/4/8 比特 QLoRA 微调。 -- **先进算法**:GaLore、BAdam、DoRA、LongLoRA、LLaMA Pro、Mixture-of-Depths、LoRA+、LoftQ 和 Agent 微调。 +- **先进算法**:GaLore、BAdam、DoRA、LongLoRA、LLaMA Pro、Mixture-of-Depths、LoRA+、LoftQ、PiSSA 和 Agent 微调。 - **实用技巧**:FlashAttention-2、Unsloth、RoPE scaling、NEFTune 和 rsLoRA。 - **实验监控**:LlamaBoard、TensorBoard、Wandb、MLflow 等等。 - **极速推理**:基于 vLLM 的 OpenAI 风格 API、浏览器界面和命令行接口。 @@ -71,9 +71,9 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd ## 更新日志 -[24/06/07] 我们支持了 **[Qwen-2](https://qwenlm.github.io/blog/qwen2/)** 系列模型的微调。 +[24/06/16] 我们支持了 **[PiSSA](https://arxiv.org/abs/2404.02948)** 算法。详细用法请参照 [examples](examples/README_zh.md)。 -[24/06/05] 我们支持了 **[GLM-4-9B/GLM-4-9B-Chat](https://github.com/THUDM/GLM-4)** 模型的微调。 +[24/06/07] 我们支持了 **[Qwen2](https://qwenlm.github.io/blog/qwen2/)** 和 **[GLM-4](https://github.com/THUDM/GLM-4)** 模型的微调。 [24/05/26] 我们支持了 **[SimPO](https://arxiv.org/abs/2405.14734)** 偏好对齐算法。详细用法请参照 [examples](examples/README_zh.md)。 diff --git a/examples/README.md b/examples/README.md index a6d78936..902d26b1 100644 --- a/examples/README.md +++ b/examples/README.md @@ -213,3 +213,9 @@ llamafactory-cli train examples/extras/llama_pro/llama3_freeze_sft.yaml ```bash bash examples/extras/fsdp_qlora/single_node.sh ``` + +#### PiSSA Fine-Tuning + +```bash +llamafactory-cli train examples/extras/pissa/llama3_lora_sft.yaml +``` diff --git a/examples/README_zh.md b/examples/README_zh.md index b6168a95..586e498c 100644 --- a/examples/README_zh.md +++ b/examples/README_zh.md @@ -213,3 +213,9 @@ llamafactory-cli train examples/extras/llama_pro/llama3_freeze_sft.yaml ```bash bash examples/extras/fsdp_qlora/single_node.sh ``` + +#### PiSSA 微调 + +```bash +llamafactory-cli train examples/extras/pissa/llama3_lora_sft.yaml +``` diff --git a/examples/extras/pissa/llama3_lora_sft.yaml b/examples/extras/pissa/llama3_lora_sft.yaml new file mode 100644 index 00000000..fd4b9f1d --- /dev/null +++ b/examples/extras/pissa/llama3_lora_sft.yaml @@ -0,0 +1,42 @@ +### model +model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct + +### method +stage: sft +do_train: true +finetuning_type: lora +lora_target: all +pissa_init: true +pissa_iter: 4 +pissa_convert: true + +### dataset +dataset: identity,alpaca_en_demo +template: llama3 +cutoff_len: 1024 +max_samples: 1000 +overwrite_cache: true +preprocessing_num_workers: 16 + +### output +output_dir: saves/llama3-8b/lora/sft +logging_steps: 10 +save_steps: 500 +plot_loss: true +overwrite_output_dir: true + +### train +per_device_train_batch_size: 1 +gradient_accumulation_steps: 8 +learning_rate: 1.0e-4 +num_train_epochs: 3.0 +lr_scheduler_type: cosine +warmup_ratio: 0.1 +fp16: true +ddp_timeout: 180000000 + +### eval +val_size: 0.1 +per_device_eval_batch_size: 1 +eval_strategy: steps +eval_steps: 500 diff --git a/scripts/loftq_init.py b/scripts/loftq_init.py index 159dea06..556f342c 100644 --- a/scripts/loftq_init.py +++ b/scripts/loftq_init.py @@ -1,7 +1,7 @@ # coding=utf-8 # Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # -# This code is inspired by HuggingFace's PEFT library. +# This code is based on the HuggingFace's PEFT library. # https://github.com/huggingface/peft/blob/v0.10.0/examples/loftq_finetuning/quantize_save_load.py # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -17,11 +17,9 @@ # limitations under the License. import os -from typing import TYPE_CHECKING, Optional +from typing import TYPE_CHECKING import fire -import torch -import torch.nn as nn from peft import LoftQConfig, LoraConfig, TaskType, get_peft_model from transformers import AutoModelForCausalLM, AutoTokenizer @@ -30,41 +28,20 @@ if TYPE_CHECKING: from transformers import PreTrainedModel -class Shell(nn.Module): - def __init__(self, weight: torch.Tensor, bias: Optional[torch.Tensor] = None): - super().__init__() - self.weight = nn.Parameter(weight, requires_grad=False) - if bias is not None: - self.bias = nn.Parameter(bias, requires_grad=False) - - -def unwrap_model(model: nn.Module, pattern=".base_layer") -> None: - for name in {k.split(pattern)[0] for k, _ in model.named_modules() if pattern in k}: - parent_name = ".".join(name.split(".")[:-1]) - child_name = name.split(".")[-1] - parent_module = model.get_submodule(parent_name) - child_module = getattr(parent_module, child_name) - base_layer = getattr(child_module, "base_layer") - weight = getattr(base_layer, "weight", None) - bias = getattr(base_layer, "bias", None) - setattr(parent_module, child_name, Shell(weight, bias)) - - print("Model unwrapped.") - - def quantize_loftq( model_name_or_path: str, - save_dir: str, - loftq_bits: Optional[int] = 4, - loftq_iter: Optional[int] = 1, - lora_alpha: Optional[int] = None, - lora_rank: Optional[int] = 16, - lora_target: Optional[str] = "q_proj,v_proj", - save_safetensors: Optional[bool] = False, + output_dir: str, + loftq_bits: int = 4, + loftq_iter: int = 4, + lora_alpha: int = None, + lora_rank: int = 16, + lora_dropout: float = 0, + lora_target: str = "q_proj,v_proj", + save_safetensors: bool = True, ): r""" Initializes LoRA weights with LoRA-fine-tuning-aware Quantization (LoftQ) - Usage: python loftq_init.py --model_name_or_path path_to_model --save_dir output_dir + Usage: python loftq_init.py --model_name_or_path path_to_model --output_dir output_dir """ tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained(model_name_or_path, trust_remote_code=True, torch_dtype="auto") @@ -74,25 +51,34 @@ def quantize_loftq( inference_mode=True, r=lora_rank, lora_alpha=lora_alpha if lora_alpha is not None else lora_rank * 2, - lora_dropout=0.1, + lora_dropout=lora_dropout, target_modules=[name.strip() for name in lora_target.split(",")], init_lora_weights="loftq", loftq_config=loftq_config, ) # Init LoftQ model - lora_model = get_peft_model(model, lora_config) - base_model: "PreTrainedModel" = lora_model.get_base_model() + print("Initializing LoftQ weights, it may be take several minutes, wait patiently.") + peft_model = get_peft_model(model, lora_config) + loftq_dir = os.path.join(output_dir, "loftq_init") # Save LoftQ model - setattr(lora_model.base_model.peft_config["default"], "base_model_name_or_path", save_dir) - setattr(lora_model.base_model.peft_config["default"], "init_lora_weights", True) - lora_model.save_pretrained(os.path.join(save_dir, "adapters"), safe_serialization=save_safetensors) + setattr(peft_model.peft_config["default"], "base_model_name_or_path", output_dir) + setattr(peft_model.peft_config["default"], "init_lora_weights", True) # don't apply loftq again + peft_model.save_pretrained(loftq_dir, safe_serialization=save_safetensors) + print("Adapter weights saved in {}".format(loftq_dir)) # Save base model - unwrap_model(base_model) - base_model.save_pretrained(save_dir, safe_serialization=save_safetensors) - tokenizer.save_pretrained(save_dir) + base_model: "PreTrainedModel" = peft_model.unload() + base_model.save_pretrained(output_dir, safe_serialization=save_safetensors) + tokenizer.save_pretrained(output_dir) + print("Model weights saved in {}".format(output_dir)) + + print("Fine-tune this model with:") + print("model_name_or_path: {}".format(output_dir)) + print("adapter_name_or_path: {}".format(loftq_dir)) + print("finetuning_type: lora") + print("quantization_bit: {}".format(loftq_bits)) if __name__ == "__main__": diff --git a/scripts/pissa_init.py b/scripts/pissa_init.py new file mode 100644 index 00000000..1b673c45 --- /dev/null +++ b/scripts/pissa_init.py @@ -0,0 +1,79 @@ +# coding=utf-8 +# Copyright 2024 HuggingFace Inc. and the LlamaFactory team. +# +# This code is based on the HuggingFace's PEFT library. +# https://github.com/huggingface/peft/blob/v0.11.0/examples/pissa_finetuning/preprocess.py +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from typing import TYPE_CHECKING + +import fire +from peft import LoraConfig, TaskType, get_peft_model +from transformers import AutoModelForCausalLM, AutoTokenizer + + +if TYPE_CHECKING: + from transformers import PreTrainedModel + + +def quantize_pissa( + model_name_or_path: str, + output_dir: str, + pissa_iter: int = 4, + lora_alpha: int = None, + lora_rank: int = 16, + lora_dropout: float = 0, + lora_target: str = "q_proj,v_proj", + save_safetensors: bool = True, +): + r""" + Initializes LoRA weights with Principal Singular values and Singular vectors Adaptation (PiSSA) + Usage: python pissa_init.py --model_name_or_path path_to_model --output_dir output_dir + """ + tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True) + model = AutoModelForCausalLM.from_pretrained(model_name_or_path, trust_remote_code=True, torch_dtype="auto") + lora_config = LoraConfig( + task_type=TaskType.CAUSAL_LM, + r=lora_rank, + lora_alpha=lora_alpha if lora_alpha is not None else lora_rank * 2, + lora_dropout=lora_dropout, + target_modules=[name.strip() for name in lora_target.split(",")], + init_lora_weights="pissa" if pissa_iter == -1 else "pissa_niter_{}".format(pissa_iter) + ) + + # Init PiSSA model + peft_model = get_peft_model(model, lora_config) + pissa_dir = os.path.join(output_dir, "pissa_init") + + # Save PiSSA model + setattr(peft_model.peft_config["default"], "init_lora_weights", True) # don't apply pissa again + peft_model.save_pretrained(pissa_dir, safe_serialization=save_safetensors) + print("Adapter weights saved in {}".format(pissa_dir)) + + # Save base model + base_model: "PreTrainedModel" = peft_model.unload() + base_model.save_pretrained(output_dir, safe_serialization=save_safetensors) + tokenizer.save_pretrained(output_dir) + print("Model weights saved in {}".format(output_dir)) + + print("Fine-tune this model with:") + print("model_name_or_path: {}".format(output_dir)) + print("adapter_name_or_path: {}".format(pissa_dir)) + print("finetuning_type: lora") + print("pissa_convert: true") + + +if __name__ == "__main__": + fire.Fire(quantize_pissa) diff --git a/src/llamafactory/hparams/finetuning_args.py b/src/llamafactory/hparams/finetuning_args.py index 52dc299e..1ef46eca 100644 --- a/src/llamafactory/hparams/finetuning_args.py +++ b/src/llamafactory/hparams/finetuning_args.py @@ -108,6 +108,18 @@ class LoraArguments: default=False, metadata={"help": "Whether or not to use the weight-decomposed lora method (DoRA)."}, ) + pissa_init: bool = field( + default=False, + metadata={"help": "Whether or not to initialize a PiSSA adapter."}, + ) + pissa_iter: int = field( + default=4, + metadata={"help": "The number of iteration steps performed by FSVD in PiSSA. Use -1 to disable it."}, + ) + pissa_convert: bool = field( + default=False, + metadata={"help": "Whether or not to convert the PiSSA adapter to a normal LoRA adapter."}, + ) create_new_adapter: bool = field( default=False, metadata={"help": "Whether or not to create a new adapter with randomly initialized weight."}, @@ -340,7 +352,7 @@ class FinetuningArguments(FreezeArguments, LoraArguments, RLHFArguments, GaloreA self.additional_target: Optional[List[str]] = split_arg(self.additional_target) self.galore_target: List[str] = split_arg(self.galore_target) self.freeze_vision_tower = self.freeze_vision_tower or self.train_mm_proj_only - self.use_ref_model = self.pref_loss not in ["orpo", "simpo"] + self.use_ref_model = (self.stage == "dpo" and self.pref_loss not in ["orpo", "simpo"]) assert self.finetuning_type in ["lora", "freeze", "full"], "Invalid fine-tuning method." assert self.ref_model_quantization_bit in [None, 8, 4], "We only accept 4-bit or 8-bit quantization." @@ -367,5 +379,11 @@ class FinetuningArguments(FreezeArguments, LoraArguments, RLHFArguments, GaloreA if self.loraplus_lr_ratio is not None and self.finetuning_type != "lora": raise ValueError("`loraplus_lr_ratio` is only valid for LoRA training.") + if self.pissa_convert and self.finetuning_type != "lora": + raise ValueError("`pissa_convert` is only valid for LoRA training.") + + if self.pissa_convert and (self.stage in ["rm", "ppo", "kto"] or self.use_ref_model): + raise ValueError("Cannot use PiSSA for current training stage.") + if self.train_mm_proj_only and self.finetuning_type != "full": raise ValueError("`train_mm_proj_only` is only valid for full training.") diff --git a/src/llamafactory/hparams/model_args.py b/src/llamafactory/hparams/model_args.py index 53bdbdf2..996e9130 100644 --- a/src/llamafactory/hparams/model_args.py +++ b/src/llamafactory/hparams/model_args.py @@ -1,6 +1,6 @@ # Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # -# This code is inspired by HuggingFace's transformers library. +# This code is inspired by the HuggingFace's transformers library. # https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/language-modeling/run_clm.py # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -45,6 +45,10 @@ class ModelArguments: ) }, ) + adapter_folder: Optional[str] = field( + default=None, + metadata={"help": "The folder containing the adapter weights to load."}, + ) cache_dir: Optional[str] = field( default=None, metadata={"help": "Where to store the pre-trained models downloaded from huggingface.co or modelscope.cn."}, @@ -150,7 +154,7 @@ class ModelArguments: metadata={"help": "Whether or not to disable CUDA graph in the vLLM engine."}, ) vllm_max_lora_rank: int = field( - default=8, + default=32, metadata={"help": "Maximum rank of all LoRAs in the vLLM engine."}, ) offload_folder: str = field( diff --git a/src/llamafactory/hparams/parser.py b/src/llamafactory/hparams/parser.py index 1c57567c..31a805f6 100644 --- a/src/llamafactory/hparams/parser.py +++ b/src/llamafactory/hparams/parser.py @@ -1,6 +1,6 @@ # Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # -# This code is inspired by HuggingFace's transformers library. +# This code is inspired by the HuggingFace's transformers library. # https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/language-modeling/run_clm.py # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -90,6 +90,9 @@ def _verify_model_args(model_args: "ModelArguments", finetuning_args: "Finetunin if finetuning_args.finetuning_type != "lora": raise ValueError("Quantization is only compatible with the LoRA method.") + if finetuning_args.use_pissa: + raise ValueError("Please use scripts/pissa_init.py for quantized PiSSA.") + if model_args.resize_vocab: raise ValueError("Cannot resize embedding layers of a quantized model.") diff --git a/src/llamafactory/model/adapter.py b/src/llamafactory/model/adapter.py index dfa71525..a8f3a256 100644 --- a/src/llamafactory/model/adapter.py +++ b/src/llamafactory/model/adapter.py @@ -179,8 +179,16 @@ def _setup_lora_tuning( else: adapter_to_merge = model_args.adapter_name_or_path + init_kwargs = { + "subfolder": model_args.adapter_folder, + "offload_folder": model_args.offload_folder, + "cache_dir": model_args.cache_dir, + "revision": model_args.model_revision, + "token": model_args.hf_hub_token, + } + for adapter in adapter_to_merge: - model: "LoraModel" = PeftModel.from_pretrained(model, adapter, offload_folder=model_args.offload_folder) + model: "LoraModel" = PeftModel.from_pretrained(model, adapter, **init_kwargs) model = model.merge_and_unload() if len(adapter_to_merge) > 0: @@ -190,12 +198,7 @@ def _setup_lora_tuning( if model_args.use_unsloth: model = load_unsloth_peft_model(config, model_args, is_trainable=is_trainable) else: - model = PeftModel.from_pretrained( - model, - adapter_to_resume, - is_trainable=is_trainable, - offload_folder=model_args.offload_folder, - ) + model = PeftModel.from_pretrained(model, adapter_to_resume, is_trainable=is_trainable, **init_kwargs) logger.info("Loaded adapter(s): {}".format(",".join(model_args.adapter_name_or_path))) @@ -242,6 +245,14 @@ def _setup_lora_tuning( if model_args.use_unsloth: model = get_unsloth_peft_model(model, model_args, peft_kwargs) else: + if finetuning_args.pissa_init: + if finetuning_args.pissa_iter == -1: + logger.info("Using PiSSA initialization.") + peft_kwargs["init_lora_weights"] = "pissa" + else: + logger.info("Using PiSSA initialization with FSVD steps {}.".format(finetuning_args.pissa_iter)) + peft_kwargs["init_lora_weights"] = "pissa_niter_{}".format(finetuning_args.pissa_iter) + lora_config = LoraConfig( task_type=TaskType.CAUSAL_LM, inference_mode=False, diff --git a/src/llamafactory/train/dpo/trainer.py b/src/llamafactory/train/dpo/trainer.py index 475d08c3..9928d0bc 100644 --- a/src/llamafactory/train/dpo/trainer.py +++ b/src/llamafactory/train/dpo/trainer.py @@ -1,6 +1,6 @@ # Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # -# This code is inspired by HuggingFace's TRL library. +# This code is inspired by the HuggingFace's TRL library. # https://github.com/huggingface/trl/blob/v0.8.0/trl/trainer/dpo_trainer.py # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -15,6 +15,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os import warnings from collections import defaultdict from contextlib import nullcontext @@ -28,7 +29,7 @@ from trl import DPOTrainer from trl.trainer import disable_dropout_in_model from ...extras.constants import IGNORE_INDEX -from ..trainer_utils import create_custom_optimzer, create_custom_scheduler, get_batch_logps +from ..trainer_utils import convert_pissa_adapter, create_custom_optimzer, create_custom_scheduler, get_batch_logps if TYPE_CHECKING: @@ -91,6 +92,9 @@ class CustomDPOTrainer(DPOTrainer): self.ref_model = self.accelerator.prepare_model(self.ref_model, evaluation_mode=True) self.ref_model.eval() + if finetuning_args.pissa_convert: + self.save_model(os.path.join(self.args.output_dir, "pissa_init")) + if finetuning_args.use_badam: from badam import clip_grad_norm_for_sparse_tensor @@ -109,8 +113,11 @@ class CustomDPOTrainer(DPOTrainer): def _save(self, output_dir: Optional[str] = None, state_dict: Optional[Dict[str, "torch.Tensor"]] = None) -> None: super()._save(output_dir, state_dict) + output_dir = output_dir if output_dir is not None else self.args.output_dir + if self.finetuning_args.pissa_convert: + convert_pissa_adapter(output_dir, state_dict, self.accelerator, self.model, self.args) + if self.processor is not None: - output_dir = output_dir if output_dir is not None else self.args.output_dir getattr(self.processor, "image_processor").save_pretrained(output_dir) def odds_ratio_loss(self, chosen_logps: "torch.Tensor", rejected_logps: "torch.Tensor") -> "torch.Tensor": diff --git a/src/llamafactory/train/pt/trainer.py b/src/llamafactory/train/pt/trainer.py index 09729f2e..f9e04cb5 100644 --- a/src/llamafactory/train/pt/trainer.py +++ b/src/llamafactory/train/pt/trainer.py @@ -12,13 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os from types import MethodType from typing import TYPE_CHECKING, Dict, Optional from transformers import Trainer from ...extras.logging import get_logger -from ..trainer_utils import create_custom_optimzer, create_custom_scheduler +from ..trainer_utils import convert_pissa_adapter, create_custom_optimzer, create_custom_scheduler if TYPE_CHECKING: @@ -42,6 +43,10 @@ class CustomTrainer(Trainer): super().__init__(**kwargs) self.finetuning_args = finetuning_args self.processor = processor + + if finetuning_args.pissa_convert: + self.save_model(os.path.join(self.args.output_dir, "pissa_init")) + if finetuning_args.use_badam: from badam import clip_grad_norm_for_sparse_tensor @@ -60,6 +65,9 @@ class CustomTrainer(Trainer): def _save(self, output_dir: Optional[str] = None, state_dict: Optional[Dict[str, "torch.Tensor"]] = None) -> None: super()._save(output_dir, state_dict) + output_dir = output_dir if output_dir is not None else self.args.output_dir + if self.finetuning_args.pissa_convert: + convert_pissa_adapter(output_dir, state_dict, self.accelerator, self.model, self.args) + if self.processor is not None: - output_dir = output_dir if output_dir is not None else self.args.output_dir getattr(self.processor, "image_processor").save_pretrained(output_dir) diff --git a/src/llamafactory/train/sft/trainer.py b/src/llamafactory/train/sft/trainer.py index 6ab6914e..921e49ab 100644 --- a/src/llamafactory/train/sft/trainer.py +++ b/src/llamafactory/train/sft/trainer.py @@ -1,6 +1,6 @@ # Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # -# This code is inspired by HuggingFace's transformers library. +# This code is inspired by the HuggingFace's transformers library. # https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/trainer_seq2seq.py # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -26,7 +26,7 @@ from transformers import Seq2SeqTrainer from ...extras.constants import IGNORE_INDEX from ...extras.logging import get_logger -from ..trainer_utils import create_custom_optimzer, create_custom_scheduler +from ..trainer_utils import convert_pissa_adapter, create_custom_optimzer, create_custom_scheduler if TYPE_CHECKING: @@ -51,6 +51,10 @@ class CustomSeq2SeqTrainer(Seq2SeqTrainer): super().__init__(**kwargs) self.finetuning_args = finetuning_args self.processor = processor + + if finetuning_args.pissa_convert: + self.save_model(os.path.join(self.args.output_dir, "pissa_init")) + if finetuning_args.use_badam: from badam import clip_grad_norm_for_sparse_tensor @@ -69,8 +73,11 @@ class CustomSeq2SeqTrainer(Seq2SeqTrainer): def _save(self, output_dir: Optional[str] = None, state_dict: Optional[Dict[str, "torch.Tensor"]] = None) -> None: super()._save(output_dir, state_dict) + output_dir = output_dir if output_dir is not None else self.args.output_dir + if self.finetuning_args.pissa_convert: + convert_pissa_adapter(output_dir, state_dict, self.accelerator, self.model, self.args) + if self.processor is not None: - output_dir = output_dir if output_dir is not None else self.args.output_dir getattr(self.processor, "image_processor").save_pretrained(output_dir) def prediction_step( diff --git a/src/llamafactory/train/trainer_utils.py b/src/llamafactory/train/trainer_utils.py index 5621d5df..2d6bab24 100644 --- a/src/llamafactory/train/trainer_utils.py +++ b/src/llamafactory/train/trainer_utils.py @@ -1,9 +1,9 @@ # Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # -# This code is inspired by the GaLore's implementation: https://github.com/jiaweizzhao/GaLore -# and the LoRA+'s implementation: https://github.com/nikhil-ghosh-berkeley/loraplus -# and the BAdam's implementation: https://github.com/Ledzy/BAdam -# and the TRL's implementation: https://github.com/huggingface/trl +# This code is inspired by the original GaLore's implementation: https://github.com/jiaweizzhao/GaLore +# and the original LoRA+'s implementation: https://github.com/nikhil-ghosh-berkeley/loraplus +# and the original BAdam's implementation: https://github.com/Ledzy/BAdam +# and the HuggingFace's TRL library: https://github.com/huggingface/trl # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -17,9 +17,11 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Tuple, Union import torch +from peft import PeftModel from transformers import Trainer from transformers.optimization import get_scheduler from transformers.pytorch_utils import ALL_LAYERNORM_LAYERS @@ -37,6 +39,7 @@ if is_galore_available(): if TYPE_CHECKING: + from accelerate import Accelerator from transformers import PreTrainedModel, Seq2SeqTrainingArguments from trl import AutoModelForCausalLMWithValueHead @@ -171,6 +174,49 @@ def create_reward_model( return reward_model +def convert_pissa_adapter( + output_dir: str, + state_dict: Dict[str, "torch.Tensor"], + accelerator: "Accelerator", + model: "PreTrainedModel", + training_args: "Seq2SeqTrainingArguments", +) -> None: + r""" + Converts the PiSSA adapter to a LoRA adapter. + """ + pissa_init_dir = os.path.join(training_args.output_dir, "pissa_init") + pissa_backup_dir = os.path.join(output_dir, "pissa_backup") + if output_dir == pissa_init_dir: + logger.info("Initial PiSSA adatper will be saved at: {}.".format(pissa_init_dir)) + unwrapped_model = accelerator.unwrap_model(model) + if isinstance(unwrapped_model, PeftModel): + init_lora_weights = getattr(unwrapped_model.peft_config["default"], "init_lora_weights") + setattr(unwrapped_model.peft_config["default"], "init_lora_weights", True) + unwrapped_model.save_pretrained( + output_dir, + state_dict=state_dict, + safe_serialization=training_args.save_safetensors, + ) + setattr(unwrapped_model.peft_config["default"], "init_lora_weights", init_lora_weights) + elif output_dir == training_args.output_dir: # at the end of training + logger.info("Converted PiSSA adapter will be saved at: {}.".format(output_dir)) + unwrapped_model = accelerator.unwrap_model(model) + if isinstance(unwrapped_model, PeftModel): # backup the pissa adapter for further use + unwrapped_model.save_pretrained( + pissa_backup_dir, + state_dict=state_dict, + safe_serialization=training_args.save_safetensors, + ) + unwrapped_model.save_pretrained( + output_dir, + state_dict=state_dict, + safe_serialization=training_args.save_safetensors, + convert_pissa_to_lora=pissa_init_dir, + ) + unwrapped_model.load_adapter(pissa_backup_dir, "default", is_trainable=True) + unwrapped_model.set_adapter("default") + + def _get_decay_parameter_names(model: "PreTrainedModel") -> List[str]: r""" Returns a list of names of parameters with weight decay. (weights in non-layernorm layers) diff --git a/src/llamafactory/webui/components/train.py b/src/llamafactory/webui/components/train.py index 673f6bf4..874f3c5e 100644 --- a/src/llamafactory/webui/components/train.py +++ b/src/llamafactory/webui/components/train.py @@ -163,10 +163,9 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]: create_new_adapter = gr.Checkbox() with gr.Row(): - with gr.Column(scale=1): - use_rslora = gr.Checkbox() - use_dora = gr.Checkbox() - + use_rslora = gr.Checkbox() + use_dora = gr.Checkbox() + use_pissa = gr.Checkbox() lora_target = gr.Textbox(scale=2) additional_target = gr.Textbox(scale=2) @@ -179,6 +178,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]: create_new_adapter, use_rslora, use_dora, + use_pissa, lora_target, additional_target, } @@ -193,6 +193,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]: create_new_adapter=create_new_adapter, use_rslora=use_rslora, use_dora=use_dora, + use_pissa=use_pissa, lora_target=lora_target, additional_target=additional_target, ) diff --git a/src/llamafactory/webui/locales.py b/src/llamafactory/webui/locales.py index 427f01b8..8e8d6fce 100644 --- a/src/llamafactory/webui/locales.py +++ b/src/llamafactory/webui/locales.py @@ -732,6 +732,20 @@ LOCALES = { "info": "使用权重分解的 LoRA。", }, }, + "use_pissa": { + "en": { + "label": "Use PiSSA", + "info": "Use PiSSA method.", + }, + "ru": { + "label": "используйте PiSSA", + "info": "Используйте метод PiSSA.", + }, + "zh": { + "label": "使用 PiSSA", + "info": "使用 PiSSA 方法。", + }, + }, "lora_target": { "en": { "label": "LoRA modules (optional)", diff --git a/src/llamafactory/webui/runner.py b/src/llamafactory/webui/runner.py index 76982934..13dbba03 100644 --- a/src/llamafactory/webui/runner.py +++ b/src/llamafactory/webui/runner.py @@ -173,6 +173,8 @@ class Runner: args["create_new_adapter"] = get("train.create_new_adapter") args["use_rslora"] = get("train.use_rslora") args["use_dora"] = get("train.use_dora") + args["pissa_init"] = get("train.use_pissa") + args["pissa_convert"] = get("train.use_pissa") args["lora_target"] = get("train.lora_target") or "all" args["additional_target"] = get("train.additional_target") or None diff --git a/tests/model/test_pissa.py b/tests/model/test_pissa.py new file mode 100644 index 00000000..70c424fd --- /dev/null +++ b/tests/model/test_pissa.py @@ -0,0 +1,90 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import torch +from peft import LoraModel, PeftModel +from transformers import AutoModelForCausalLM + +from llamafactory.extras.misc import get_current_device +from llamafactory.hparams import get_infer_args, get_train_args +from llamafactory.model import load_model, load_tokenizer + + +TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3") + +TINY_LLAMA_PISSA = os.environ.get("TINY_LLAMA_ADAPTER", "llamafactory/tiny-random-Llama-3-pissa") + +TRAIN_ARGS = { + "model_name_or_path": TINY_LLAMA, + "stage": "sft", + "do_train": True, + "finetuning_type": "lora", + "pissa_init": True, + "pissa_iter": -1, + "dataset": "llamafactory/tiny-supervised-dataset", + "dataset_dir": "ONLINE", + "template": "llama3", + "cutoff_len": 1024, + "overwrite_cache": True, + "output_dir": "dummy_dir", + "overwrite_output_dir": True, + "fp16": True, +} + +INFER_ARGS = { + "model_name_or_path": TINY_LLAMA_PISSA, + "adapter_name_or_path": TINY_LLAMA_PISSA, + "adapter_folder": "pissa_init", + "finetuning_type": "lora", + "template": "llama3", + "infer_dtype": "float16", +} + + +def compare_model(model_a: "torch.nn.Module", model_b: "torch.nn.Module"): + state_dict_a = model_a.state_dict() + state_dict_b = model_b.state_dict() + assert set(state_dict_a.keys()) == set(state_dict_b.keys()) + for name in state_dict_a.keys(): + assert torch.allclose(state_dict_a[name], state_dict_b[name]) + + +def test_pissa_init(): + model_args, _, _, finetuning_args, _ = get_train_args(TRAIN_ARGS) + tokenizer_module = load_tokenizer(model_args) + model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) + + base_model = AutoModelForCausalLM.from_pretrained( + TINY_LLAMA_PISSA, torch_dtype=torch.float16, device_map=get_current_device() + ) + ref_model = PeftModel.from_pretrained(base_model, TINY_LLAMA_PISSA, subfolder="pissa_init", is_trainable=True) + for param in filter(lambda p: p.requires_grad, ref_model.parameters()): + param.data = param.data.to(torch.float32) + + compare_model(model, ref_model) + + +def test_pissa_inference(): + model_args, _, finetuning_args, _ = get_infer_args(INFER_ARGS) + tokenizer_module = load_tokenizer(model_args) + model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=False) + + base_model = AutoModelForCausalLM.from_pretrained( + TINY_LLAMA_PISSA, torch_dtype=torch.float16, device_map=get_current_device() + ) + ref_model: "LoraModel" = PeftModel.from_pretrained(base_model, TINY_LLAMA_PISSA, subfolder="pissa_init") + ref_model = ref_model.merge_and_unload() + compare_model(model, ref_model) From 9049f72d2f517a7ed4f7fc863354eea730d5f2cd Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sun, 16 Jun 2024 01:21:06 +0800 Subject: [PATCH 049/160] increase tol Former-commit-id: c29071445e34aed23123fdf883a4d877744a1b0e --- tests/model/test_pissa.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/model/test_pissa.py b/tests/model/test_pissa.py index 70c424fd..41d02752 100644 --- a/tests/model/test_pissa.py +++ b/tests/model/test_pissa.py @@ -59,7 +59,7 @@ def compare_model(model_a: "torch.nn.Module", model_b: "torch.nn.Module"): state_dict_b = model_b.state_dict() assert set(state_dict_a.keys()) == set(state_dict_b.keys()) for name in state_dict_a.keys(): - assert torch.allclose(state_dict_a[name], state_dict_b[name]) + assert torch.allclose(state_dict_a[name], state_dict_b[name], rtol=1e-3, atol=1e-3) def test_pissa_init(): From 8393b0866659023e1829985f24f4685fdcae040e Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sun, 16 Jun 2024 01:22:23 +0800 Subject: [PATCH 050/160] Update tests.yml Former-commit-id: 82e83615a706293abbf266d11c57caedafdd4c5b --- .github/workflows/tests.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 96092662..98bd9455 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -9,8 +9,6 @@ on: - "requirements.txt" - ".github/workflows/*.yml" pull_request: - types: - - review_requested branches: - main paths: From 727943f078186df085b130d4acf2f448ac96d92b Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sun, 16 Jun 2024 01:38:44 +0800 Subject: [PATCH 051/160] fix tol Former-commit-id: bdb54bcb477126687db789bd89f2df84e424a2a3 --- src/llamafactory/train/trainer_utils.py | 1 + tests/model/test_base.py | 2 +- tests/model/test_lora.py | 4 ++-- tests/model/test_pissa.py | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/llamafactory/train/trainer_utils.py b/src/llamafactory/train/trainer_utils.py index 2d6bab24..9052c96d 100644 --- a/src/llamafactory/train/trainer_utils.py +++ b/src/llamafactory/train/trainer_utils.py @@ -213,6 +213,7 @@ def convert_pissa_adapter( safe_serialization=training_args.save_safetensors, convert_pissa_to_lora=pissa_init_dir, ) + # TODO: the model is applied pissa again unexpectedly unwrapped_model.load_adapter(pissa_backup_dir, "default", is_trainable=True) unwrapped_model.set_adapter("default") diff --git a/tests/model/test_base.py b/tests/model/test_base.py index 954492ef..e1991b20 100644 --- a/tests/model/test_base.py +++ b/tests/model/test_base.py @@ -41,7 +41,7 @@ def compare_model(model_a: "torch.nn.Module", model_b: "torch.nn.Module"): state_dict_b = model_b.state_dict() assert set(state_dict_a.keys()) == set(state_dict_b.keys()) for name in state_dict_a.keys(): - assert torch.allclose(state_dict_a[name], state_dict_b[name]) + assert torch.allclose(state_dict_a[name], state_dict_b[name], rtol=1e-4, atol=1e-5) @pytest.fixture diff --git a/tests/model/test_lora.py b/tests/model/test_lora.py index fe032332..64566fe8 100644 --- a/tests/model/test_lora.py +++ b/tests/model/test_lora.py @@ -67,9 +67,9 @@ def compare_model(model_a: "torch.nn.Module", model_b: "torch.nn.Module", diff_k assert set(state_dict_a.keys()) == set(state_dict_b.keys()) for name in state_dict_a.keys(): if any(key in name for key in diff_keys): - assert torch.allclose(state_dict_a[name], state_dict_b[name]) is False + assert torch.allclose(state_dict_a[name], state_dict_b[name], rtol=1e-4, atol=1e-5) is False else: - assert torch.allclose(state_dict_a[name], state_dict_b[name]) is True + assert torch.allclose(state_dict_a[name], state_dict_b[name], rtol=1e-4, atol=1e-5) is True @pytest.fixture diff --git a/tests/model/test_pissa.py b/tests/model/test_pissa.py index 41d02752..030310d0 100644 --- a/tests/model/test_pissa.py +++ b/tests/model/test_pissa.py @@ -59,7 +59,7 @@ def compare_model(model_a: "torch.nn.Module", model_b: "torch.nn.Module"): state_dict_b = model_b.state_dict() assert set(state_dict_a.keys()) == set(state_dict_b.keys()) for name in state_dict_a.keys(): - assert torch.allclose(state_dict_a[name], state_dict_b[name], rtol=1e-3, atol=1e-3) + assert torch.allclose(state_dict_a[name], state_dict_b[name], rtol=1e-4, atol=1e-5) def test_pissa_init(): From 76cd879c8421388c39d83595673ee4123ea5991a Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sun, 16 Jun 2024 01:43:43 +0800 Subject: [PATCH 052/160] update pr template Former-commit-id: 0b7c29674fda10c0ac87e0a0c75990feabb5a3de --- .github/PULL_REQUEST_TEMPLATE.md | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index b31e9d19..d23d6be3 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -5,3 +5,4 @@ Fixes # (issue) ## Before submitting - [ ] Did you read the [contributor guideline](https://github.com/hiyouga/LLaMA-Factory/blob/main/.github/CONTRIBUTING.md)? +- [ ] Did you write any new necessary tests? From ca67b7a568592814ad577fba68cca666eafd94f0 Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Sun, 16 Jun 2024 02:57:00 +0800 Subject: [PATCH 053/160] Update parser.py Former-commit-id: d10c97193d08bd368aca1a72f0d1d8a96c76765d --- src/llamafactory/hparams/parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/llamafactory/hparams/parser.py b/src/llamafactory/hparams/parser.py index 31a805f6..467fc43d 100644 --- a/src/llamafactory/hparams/parser.py +++ b/src/llamafactory/hparams/parser.py @@ -90,8 +90,8 @@ def _verify_model_args(model_args: "ModelArguments", finetuning_args: "Finetunin if finetuning_args.finetuning_type != "lora": raise ValueError("Quantization is only compatible with the LoRA method.") - if finetuning_args.use_pissa: - raise ValueError("Please use scripts/pissa_init.py for quantized PiSSA.") + if finetuning_args.pissa_init: + raise ValueError("Please use scripts/pissa_init.py to initialize PiSSA for a quantized model.") if model_args.resize_vocab: raise ValueError("Cannot resize embedding layers of a quantized model.") From 5e802b06452c2f38565b0e3295bc2eef1a206c35 Mon Sep 17 00:00:00 2001 From: Eli Costa <87460497+EliMCosta@users.noreply.github.com> Date: Sun, 16 Jun 2024 11:19:25 -0300 Subject: [PATCH 054/160] Update README.md Add Magpie and Webinstruct to README Former-commit-id: 2b32b9263f12605e48e11dce9b5fbb746d790745 --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index cb9a7222..30c90e9b 100644 --- a/README.md +++ b/README.md @@ -270,6 +270,8 @@ You also can add a custom chat template to [template.py](src/llamafactory/data/t - [Booksum (de)](https://huggingface.co/datasets/mayflowergmbh/booksum_de) - [Airoboros (de)](https://huggingface.co/datasets/mayflowergmbh/airoboros-3.0_de) - [Ultrachat (de)](https://huggingface.co/datasets/mayflowergmbh/ultra-chat_de) +- [WebInstructSub (en)](https://huggingface.co/datasets/TIGER-Lab/WebInstructSub) +- [Magpie-Pro-300K-Filtered (en)](https://huggingface.co/datasets/Magpie-Align/Magpie-Pro-300K-Filtered)
From 9a0aca42a5d0252165820536148c02aca215c22f Mon Sep 17 00:00:00 2001 From: Eli Costa <87460497+EliMCosta@users.noreply.github.com> Date: Sun, 16 Jun 2024 11:22:06 -0300 Subject: [PATCH 055/160] Update README_zh.md Add Magpie and WebInstruct to README Former-commit-id: 6cf5323959fe9500ba06ab28980fcc8f62e1373f --- README_zh.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README_zh.md b/README_zh.md index 5c005f30..531d9b56 100644 --- a/README_zh.md +++ b/README_zh.md @@ -270,8 +270,8 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd - [Booksum (de)](https://huggingface.co/datasets/mayflowergmbh/booksum_de) - [Airoboros (de)](https://huggingface.co/datasets/mayflowergmbh/airoboros-3.0_de) - [Ultrachat (de)](https://huggingface.co/datasets/mayflowergmbh/ultra-chat_de) - -
+- [WebInstructSub (en)](https://huggingface.co/datasets/TIGER-Lab/WebInstructSub) +- [Magpie-Pro-300K-Filtered (en)](https://huggingface.co/datasets/Magpie-Align/Magpie-Pro-300K-Filtered)
偏好数据集 From d3b0048d8c58c9f3d357f540dfc6d7e8f1197720 Mon Sep 17 00:00:00 2001 From: Eli Costa <87460497+EliMCosta@users.noreply.github.com> Date: Sun, 16 Jun 2024 11:34:31 -0300 Subject: [PATCH 056/160] Update README_zh.md Fix details tag in datasets menus Former-commit-id: d79c1bd4806e9ea13115fabebf9da2d19b0a52be --- README_zh.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README_zh.md b/README_zh.md index 531d9b56..711596f0 100644 --- a/README_zh.md +++ b/README_zh.md @@ -273,6 +273,8 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd - [WebInstructSub (en)](https://huggingface.co/datasets/TIGER-Lab/WebInstructSub) - [Magpie-Pro-300K-Filtered (en)](https://huggingface.co/datasets/Magpie-Align/Magpie-Pro-300K-Filtered) +
+
偏好数据集 - [DPO mixed (en&zh)](https://huggingface.co/datasets/hiyouga/DPO-En-Zh-20k) From a720b82e63a4051ed5c933ede0570de69e07b10a Mon Sep 17 00:00:00 2001 From: Eli Costa <87460497+EliMCosta@users.noreply.github.com> Date: Sun, 16 Jun 2024 19:16:23 -0300 Subject: [PATCH 057/160] Fix Dockerfile Adds the commands to correctly execute LLama-Factory servers Former-commit-id: 22af40f0895a6f88709a495febeca8507d41d989 --- Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Dockerfile b/Dockerfile index 3932ff30..9c2f645c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -42,3 +42,5 @@ EXPOSE 7860 # Expose port 8000 for the API service EXPOSE 8000 + +CMD [ "llamafactory-cli", "webui" ] From 63bfe9967e48c9b2bab5ab3406a795015c6718cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=83=A1=E7=BF=80?= <1020327414@qq.com> Date: Mon, 17 Jun 2024 16:45:57 +0800 Subject: [PATCH 058/160] Update requirements.txt add pandas version requirements Former-commit-id: ed1cf559aa2d02588aacf55a17b439473651f626 --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 9e00555e..5c7202a5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,6 +4,7 @@ accelerate>=0.30.1 peft>=0.11.1 trl>=0.8.6 gradio>=4.0.0 +pandas>=2.2.2 scipy einops sentencepiece From 485a80d2947acfb6cdd635ab7c079b03629a5b47 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Mon, 17 Jun 2024 17:47:25 +0800 Subject: [PATCH 059/160] tiny fix Former-commit-id: 2289436567a7860d25d9da0afb39e4a3e5e83839 --- examples/README.md | 14 ++++---- examples/README_zh.md | 14 ++++---- .../fsdp_qlora/{single_node.sh => train.sh} | 0 scripts/llama_pro.py | 2 +- scripts/loftq_init.py | 2 +- scripts/pissa_init.py | 5 ++- tests/model/test_lora.py | 34 +++++++------------ 7 files changed, 32 insertions(+), 39 deletions(-) rename examples/extras/fsdp_qlora/{single_node.sh => train.sh} (100%) diff --git a/examples/README.md b/examples/README.md index 902d26b1..007a81ab 100644 --- a/examples/README.md +++ b/examples/README.md @@ -195,6 +195,12 @@ llamafactory-cli train examples/extras/badam/llama3_full_sft.yaml llamafactory-cli train examples/extras/loraplus/llama3_lora_sft.yaml ``` +#### PiSSA Fine-Tuning + +```bash +llamafactory-cli train examples/extras/pissa/llama3_lora_sft.yaml +``` + #### Mixture-of-Depths Fine-Tuning ```bash @@ -211,11 +217,5 @@ llamafactory-cli train examples/extras/llama_pro/llama3_freeze_sft.yaml #### FSDP+QLoRA Fine-Tuning ```bash -bash examples/extras/fsdp_qlora/single_node.sh -``` - -#### PiSSA Fine-Tuning - -```bash -llamafactory-cli train examples/extras/pissa/llama3_lora_sft.yaml +bash examples/extras/fsdp_qlora/train.sh ``` diff --git a/examples/README_zh.md b/examples/README_zh.md index 586e498c..b9d90f25 100644 --- a/examples/README_zh.md +++ b/examples/README_zh.md @@ -195,6 +195,12 @@ llamafactory-cli train examples/extras/badam/llama3_full_sft.yaml llamafactory-cli train examples/extras/loraplus/llama3_lora_sft.yaml ``` +#### PiSSA 微调 + +```bash +llamafactory-cli train examples/extras/pissa/llama3_lora_sft.yaml +``` + #### 深度混合微调 ```bash @@ -211,11 +217,5 @@ llamafactory-cli train examples/extras/llama_pro/llama3_freeze_sft.yaml #### FSDP+QLoRA 微调 ```bash -bash examples/extras/fsdp_qlora/single_node.sh -``` - -#### PiSSA 微调 - -```bash -llamafactory-cli train examples/extras/pissa/llama3_lora_sft.yaml +bash examples/extras/fsdp_qlora/train.sh ``` diff --git a/examples/extras/fsdp_qlora/single_node.sh b/examples/extras/fsdp_qlora/train.sh similarity index 100% rename from examples/extras/fsdp_qlora/single_node.sh rename to examples/extras/fsdp_qlora/train.sh diff --git a/scripts/llama_pro.py b/scripts/llama_pro.py index 395375ef..17bf6fc2 100644 --- a/scripts/llama_pro.py +++ b/scripts/llama_pro.py @@ -120,7 +120,7 @@ def block_expansion( json.dump(index, f, indent=2, sort_keys=True) print("Model weights saved in {}".format(output_dir)) - print("Fine-tune this model with:") + print("- Fine-tune this model with:") print("model_name_or_path: {}".format(output_dir)) print("finetuning_type: freeze") print("freeze_trainable_layers: {}".format(num_expand)) diff --git a/scripts/loftq_init.py b/scripts/loftq_init.py index 556f342c..b9506fa3 100644 --- a/scripts/loftq_init.py +++ b/scripts/loftq_init.py @@ -74,7 +74,7 @@ def quantize_loftq( tokenizer.save_pretrained(output_dir) print("Model weights saved in {}".format(output_dir)) - print("Fine-tune this model with:") + print("- Fine-tune this model with:") print("model_name_or_path: {}".format(output_dir)) print("adapter_name_or_path: {}".format(loftq_dir)) print("finetuning_type: lora") diff --git a/scripts/pissa_init.py b/scripts/pissa_init.py index 1b673c45..10b81efc 100644 --- a/scripts/pissa_init.py +++ b/scripts/pissa_init.py @@ -68,11 +68,14 @@ def quantize_pissa( tokenizer.save_pretrained(output_dir) print("Model weights saved in {}".format(output_dir)) - print("Fine-tune this model with:") + print("- Fine-tune this model with:") print("model_name_or_path: {}".format(output_dir)) print("adapter_name_or_path: {}".format(pissa_dir)) print("finetuning_type: lora") + print("pissa_init: false") print("pissa_convert: true") + print("- and optionally with:") + print("quantization_bit: 4") if __name__ == "__main__": diff --git a/tests/model/test_lora.py b/tests/model/test_lora.py index 64566fe8..630e5f75 100644 --- a/tests/model/test_lora.py +++ b/tests/model/test_lora.py @@ -56,9 +56,15 @@ INFER_ARGS = { } -def load_reference_model() -> "torch.nn.Module": - model = AutoModelForCausalLM.from_pretrained(TINY_LLAMA) - return PeftModel.from_pretrained(model, TINY_LLAMA_ADAPTER) +def load_reference_model(is_trainable: bool = False) -> "LoraModel": + model = AutoModelForCausalLM.from_pretrained( + TINY_LLAMA, torch_dtype=torch.float16, device_map=get_current_device() + ) + lora_model = PeftModel.from_pretrained(model, TINY_LLAMA_ADAPTER, is_trainable=is_trainable) + for param in filter(lambda p: p.requires_grad, lora_model.parameters()): + param.data = param.data.to(torch.float32) + + return lora_model def compare_model(model_a: "torch.nn.Module", model_b: "torch.nn.Module", diff_keys: Sequence[str] = []): @@ -148,13 +154,7 @@ def test_lora_train_old_adapters(): tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) - base_model = AutoModelForCausalLM.from_pretrained( - TINY_LLAMA, torch_dtype=torch.float16, device_map=get_current_device() - ) - ref_model = PeftModel.from_pretrained(base_model, TINY_LLAMA_ADAPTER, is_trainable=True) - for param in filter(lambda p: p.requires_grad, ref_model.parameters()): - param.data = param.data.to(torch.float32) - + ref_model = load_reference_model(is_trainable=True) compare_model(model, ref_model) @@ -165,13 +165,7 @@ def test_lora_train_new_adapters(): tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) - base_model = AutoModelForCausalLM.from_pretrained( - TINY_LLAMA, torch_dtype=torch.float16, device_map=get_current_device() - ) - ref_model = PeftModel.from_pretrained(base_model, TINY_LLAMA_ADAPTER, is_trainable=True) - for param in filter(lambda p: p.requires_grad, ref_model.parameters()): - param.data = param.data.to(torch.float32) - + ref_model = load_reference_model(is_trainable=True) compare_model( model, ref_model, diff_keys=["q_proj", "k_proj", "v_proj", "o_proj", "up_proj", "gate_proj", "down_proj"] ) @@ -200,9 +194,5 @@ def test_lora_inference(): tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=False) - base_model = AutoModelForCausalLM.from_pretrained( - TINY_LLAMA, torch_dtype=torch.float16, device_map=get_current_device() - ) - ref_model: "LoraModel" = PeftModel.from_pretrained(base_model, TINY_LLAMA_ADAPTER) - ref_model = ref_model.merge_and_unload() + ref_model = load_reference_model().merge_and_unload() compare_model(model, ref_model) From 60d9896a70fd9ea5a77df8daeaa46117f16e9582 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Mon, 17 Jun 2024 18:17:48 +0800 Subject: [PATCH 060/160] fix #4326 Former-commit-id: 3c2c45812a720d92f7f5b15b9f03370fe6bf069e --- src/llamafactory/model/adapter.py | 16 +++++++++++++--- .../model/model_utils/quantization.py | 18 ++++++++---------- src/llamafactory/model/patcher.py | 5 ++++- 3 files changed, 25 insertions(+), 14 deletions(-) diff --git a/src/llamafactory/model/adapter.py b/src/llamafactory/model/adapter.py index a8f3a256..34518878 100644 --- a/src/llamafactory/model/adapter.py +++ b/src/llamafactory/model/adapter.py @@ -281,12 +281,22 @@ def init_adapter( Note that the trainable parameters must be cast to float32. """ - if is_trainable and getattr(model, "quantization_method", None) and finetuning_args.finetuning_type != "lora": - raise ValueError("Quantized models can only be used for the LoRA tuning.") + if is_trainable and getattr(model, "quantization_method", None) is not None: + if finetuning_args.finetuning_type != "lora": + raise ValueError("Quantized models can only be used for the LoRA tuning.") + if finetuning_args.pissa_init: + raise ValueError("Cannot initialize PiSSA adapter on quantized models.") + + # cast trainable parameters to float32 if: + # 1. is_trainable and quantization_bit is not None (qlora) + # 2. is_trainable and not deepspeed zero3 and not fsdp (zero3 or fsdp already in float32) + # 3. is_trainable and not pure_bf16 and not badam if not is_trainable: cast_trainable_params_to_fp32 = False - elif is_deepspeed_zero3_enabled() or is_fsdp_enabled() or finetuning_args.pure_bf16 or finetuning_args.use_badam: + elif model_args.quantization_bit is None and ( + is_deepspeed_zero3_enabled() or is_fsdp_enabled() or finetuning_args.pure_bf16 or finetuning_args.use_badam + ): logger.info("ZeRO3/FSDP/PureBF16/BAdam detected, remaining trainable params as their original precision.") cast_trainable_params_to_fp32 = False else: diff --git a/src/llamafactory/model/model_utils/quantization.py b/src/llamafactory/model/model_utils/quantization.py index 0a0fca34..5251f84f 100644 --- a/src/llamafactory/model/model_utils/quantization.py +++ b/src/llamafactory/model/model_utils/quantization.py @@ -1,6 +1,7 @@ # Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # -# This code is inspired by the HuggingFace's Optimum library. +# This code is inspired by the HuggingFace's Transformers and Optimum library. +# https://github.com/huggingface/transformers/blob/v4.41.0/src/transformers/utils/quantization_config.py # https://github.com/huggingface/optimum/blob/v1.20.0/optimum/gptq/data.py # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -96,10 +97,7 @@ def configure_quantization( """ if getattr(config, "quantization_config", None): # ptq if is_deepspeed_zero3_enabled(): - raise ValueError("DeepSpeed ZeRO-3 is incompatible with quantized models.") - - if model_args.quantization_device_map != "auto": - init_kwargs["device_map"] = {"": get_current_device()} + raise ValueError("DeepSpeed ZeRO-3 is incompatible with PTQ-quantized models.") quantization_config: Dict[str, Any] = getattr(config, "quantization_config", None) quant_method = quantization_config.get("quant_method", "") @@ -152,15 +150,15 @@ def configure_quantization( bnb_4bit_quant_storage=model_args.compute_dtype, # crucial for fsdp+qlora ) + # assign device map if: + # 1. not deepspeed zero3 and not fsdp + # 2. not auto quantization device map if is_deepspeed_zero3_enabled() or is_fsdp_enabled() or model_args.quantization_device_map == "auto": if model_args.quantization_bit != 4: - raise ValueError("Only 4-bit quantized model can use auto device map.") + raise ValueError("Only 4-bit quantized model can use fsdp+qlora or auto device map.") - require_version("transformers>=4.39.0", "To fix: pip install transformers>=4.39.0") - require_version("accelerate>=0.28.0", "To fix: pip install accelerate>=0.28.0") require_version("bitsandbytes>=0.43.0", "To fix: pip install bitsandbytes>=0.43.0") - init_kwargs["torch_dtype"] = model_args.compute_dtype # fsdp+qlora requires same dtype else: - init_kwargs["device_map"] = {"": get_current_device()} + init_kwargs["device_map"] = {"": get_current_device()} # change auto device map for inference logger.info("Quantizing model to {} bit.".format(model_args.quantization_bit)) diff --git a/src/llamafactory/model/patcher.py b/src/llamafactory/model/patcher.py index 053516e4..8fa17d08 100644 --- a/src/llamafactory/model/patcher.py +++ b/src/llamafactory/model/patcher.py @@ -89,7 +89,10 @@ def patch_config( # deepspeed zero3 is not compatible with low_cpu_mem_usage init_kwargs["low_cpu_mem_usage"] = model_args.low_cpu_mem_usage and (not is_deepspeed_zero3_enabled()) - if not is_deepspeed_zero3_enabled() and not is_fsdp_enabled(): # cast dtype and device if not use zero3 or fsdp + # cast data type of the model if: + # 1. not deepspeed zero3 and not fsdp (keep zero3 or fsdp in float32) + # 2. fsdp + qlora + if model_args.quantization_bit is not None or (not is_deepspeed_zero3_enabled() and not is_fsdp_enabled()): init_kwargs["torch_dtype"] = model_args.compute_dtype if init_kwargs["low_cpu_mem_usage"]: # device map requires low_cpu_mem_usage=True From ba303fd1aa7b558e7b86871bbac567fd405ea039 Mon Sep 17 00:00:00 2001 From: Jonery Date: Mon, 17 Jun 2024 18:18:10 +0800 Subject: [PATCH 061/160] adapt for badam with ds zero3 Former-commit-id: fff2a020ec8713022bd8145f4a7168168ea07ca4 --- src/llamafactory/hparams/parser.py | 12 ++++++------ src/llamafactory/train/sft/trainer.py | 15 +++++++++++++++ src/llamafactory/train/utils.py | 7 +++++++ 3 files changed, 28 insertions(+), 6 deletions(-) diff --git a/src/llamafactory/hparams/parser.py b/src/llamafactory/hparams/parser.py index 6311297e..fe108657 100644 --- a/src/llamafactory/hparams/parser.py +++ b/src/llamafactory/hparams/parser.py @@ -184,12 +184,12 @@ def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS: ): raise ValueError("Distributed training does not support layer-wise GaLore.") - if ( - finetuning_args.use_badam - and finetuning_args.badam_mode == "layer" - and training_args.parallel_mode.value == "distributed" - ): - raise ValueError("Layer-wise BAdam does not yet support distributed training, use ratio-wise BAdam.") + # if ( + # finetuning_args.use_badam + # and finetuning_args.badam_mode == "layer" + # and training_args.parallel_mode.value == "distributed" + # ): + # raise ValueError("Layer-wise BAdam does not yet support distributed training, use ratio-wise BAdam.") if (finetuning_args.use_galore or finetuning_args.use_badam) and training_args.deepspeed is not None: raise ValueError("GaLore and BAdam are incompatible with DeepSpeed yet.") diff --git a/src/llamafactory/train/sft/trainer.py b/src/llamafactory/train/sft/trainer.py index 35671e1b..cd73bf5c 100644 --- a/src/llamafactory/train/sft/trainer.py +++ b/src/llamafactory/train/sft/trainer.py @@ -55,6 +55,21 @@ class CustomSeq2SeqTrainer(Seq2SeqTrainer): output_dir = output_dir if output_dir is not None else self.args.output_dir getattr(self.processor, "image_processor").save_pretrained(output_dir) + def training_step(self, *args, **kwargs): + r""" + Update the reference to deepspeed optimizer + """ + if self.finetuning_args.use_badam and \ + self.args.deepspeed_plugin is not None and \ + self.args.deepspeed_plugin.zero_stage == 3: + + ds_optim = self.optimizer.optimizer + badam_optim = ds_optim.optimizer + badam_optim.ds_optimizer = ds_optim + + return super().training_step(*args, **kwargs) + + def prediction_step( self, model: "torch.nn.Module", diff --git a/src/llamafactory/train/utils.py b/src/llamafactory/train/utils.py index 23834f2d..b189922b 100644 --- a/src/llamafactory/train/utils.py +++ b/src/llamafactory/train/utils.py @@ -309,6 +309,12 @@ def _create_badam_optimizer( dict(params=decay_params, weight_decay=training_args.weight_decay), ] + ds_zero3_enabled = False + if hasattr(training_args, "deepspeed_plugin") and training_args.deepspeed_plugin is not None: + assert training_args.deepspeed_plugin.zero_stage == 3, f"BAdam only supports deepspeed ZeRO-3 stage, got {training_args.deepspeed_plugin.zero_stage}" + assert finetuning_args.badam_mode == "layer", "BAdam only supports layer-wise update in ZeRO-3 stage" + ds_zero3_enabled = True + if finetuning_args.badam_mode == "layer": from badam import BlockOptimizer @@ -321,6 +327,7 @@ def _create_badam_optimizer( start_block=finetuning_args.badam_start_block, switch_mode=finetuning_args.badam_switch_mode, verbose=finetuning_args.badam_verbose, + ds_zero3_enabled=ds_zero3_enabled ) logger.info( f"Using BAdam optimizer with layer-wise update, switch mode is {finetuning_args.badam_switch_mode}, " From 7408e778cade09a0c24a10815ae22d57b8eb74f0 Mon Sep 17 00:00:00 2001 From: Jonery Date: Mon, 17 Jun 2024 18:29:36 +0800 Subject: [PATCH 062/160] update gitigore Former-commit-id: 0068648aee07840cd2a08071e093436aee3f5cb6 --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 0355c666..2486e728 100644 --- a/.gitignore +++ b/.gitignore @@ -163,3 +163,5 @@ cython_debug/ user.config saves/ cache/ +wandb +ds_badam_exp \ No newline at end of file From 77242f41692482de2b120e143b23e158da582ce6 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Mon, 17 Jun 2024 18:47:24 +0800 Subject: [PATCH 063/160] update readme Former-commit-id: 07c629f77c3978f339402e578cde1aede3f37699 --- README.md | 2 +- README_zh.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index cb9a7222..60045118 100644 --- a/README.md +++ b/README.md @@ -481,7 +481,7 @@ Train the model by specifying a model ID of the ModelScope Hub as the `model_nam ### Use W&B Logger -To use [Weights & Biases](https://wandb.ai) for logging experimental results, you need to add the following arguments. +To use [Weights & Biases](https://wandb.ai) for logging experimental results, you need to add the following arguments to yaml files. ```yaml report_to: wandb diff --git a/README_zh.md b/README_zh.md index 5c005f30..fc824561 100644 --- a/README_zh.md +++ b/README_zh.md @@ -481,7 +481,7 @@ export USE_MODELSCOPE_HUB=1 # Windows 使用 `set USE_MODELSCOPE_HUB=1` ### 使用 W&B 面板 -若要使用 [Weights & Biases](https://wandb.ai) 记录实验数据,请添加下面的参数。 +若要使用 [Weights & Biases](https://wandb.ai) 记录实验数据,请在 yaml 文件中添加下面的参数。 ```yaml report_to: wandb From 43fab306b6932b424c5d0b03c2f138c9aee54c2b Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Mon, 17 Jun 2024 19:07:17 +0800 Subject: [PATCH 064/160] update chat engine #4335 Former-commit-id: b163df7de48777e4319c9ccc736b0acdd5f473ed --- src/llamafactory/chat/chat_model.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/llamafactory/chat/chat_model.py b/src/llamafactory/chat/chat_model.py index fb800106..2a72f422 100644 --- a/src/llamafactory/chat/chat_model.py +++ b/src/llamafactory/chat/chat_model.py @@ -31,7 +31,7 @@ if TYPE_CHECKING: from .base_engine import BaseEngine, Response -def _start_background_loop(loop: asyncio.AbstractEventLoop) -> None: +def _start_background_loop(loop: "asyncio.AbstractEventLoop") -> None: asyncio.set_event_loop(loop) loop.run_forever() @@ -49,7 +49,8 @@ class ChatModel: self._loop = asyncio.new_event_loop() self._thread = Thread(target=_start_background_loop, args=(self._loop,), daemon=True) self._thread.start() - asyncio.run_coroutine_threadsafe(self.engine.start(), self._loop) + task = asyncio.run_coroutine_threadsafe(self.engine.start(), self._loop) + task.result() def chat( self, From 875270b851d6073bcbd90ab350c281bf77af1e01 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Mon, 17 Jun 2024 22:35:56 +0800 Subject: [PATCH 065/160] lint Former-commit-id: a19a7ac99af62b6715c96274f6350b124a784331 --- scripts/pissa_init.py | 2 +- src/llamafactory/hparams/finetuning_args.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/pissa_init.py b/scripts/pissa_init.py index 10b81efc..50239727 100644 --- a/scripts/pissa_init.py +++ b/scripts/pissa_init.py @@ -50,7 +50,7 @@ def quantize_pissa( lora_alpha=lora_alpha if lora_alpha is not None else lora_rank * 2, lora_dropout=lora_dropout, target_modules=[name.strip() for name in lora_target.split(",")], - init_lora_weights="pissa" if pissa_iter == -1 else "pissa_niter_{}".format(pissa_iter) + init_lora_weights="pissa" if pissa_iter == -1 else "pissa_niter_{}".format(pissa_iter), ) # Init PiSSA model diff --git a/src/llamafactory/hparams/finetuning_args.py b/src/llamafactory/hparams/finetuning_args.py index 1ef46eca..b676891e 100644 --- a/src/llamafactory/hparams/finetuning_args.py +++ b/src/llamafactory/hparams/finetuning_args.py @@ -352,7 +352,7 @@ class FinetuningArguments(FreezeArguments, LoraArguments, RLHFArguments, GaloreA self.additional_target: Optional[List[str]] = split_arg(self.additional_target) self.galore_target: List[str] = split_arg(self.galore_target) self.freeze_vision_tower = self.freeze_vision_tower or self.train_mm_proj_only - self.use_ref_model = (self.stage == "dpo" and self.pref_loss not in ["orpo", "simpo"]) + self.use_ref_model = self.stage == "dpo" and self.pref_loss not in ["orpo", "simpo"] assert self.finetuning_type in ["lora", "freeze", "full"], "Invalid fine-tuning method." assert self.ref_model_quantization_bit in [None, 8, 4], "We only accept 4-bit or 8-bit quantization." From 12fcfc2b724341888dcdf797daaf1e670089e01b Mon Sep 17 00:00:00 2001 From: Jonery Date: Tue, 18 Jun 2024 12:27:47 +0800 Subject: [PATCH 066/160] Support distributed BAdam. Former-commit-id: bdcb986e37975911c190a74d3e60bb77aa2033bd --- src/llamafactory/hparams/parser.py | 24 ++++++++++-------------- src/llamafactory/train/dpo/trainer.py | 6 ++++++ src/llamafactory/train/kto/trainer.py | 6 ++++++ src/llamafactory/train/ppo/trainer.py | 6 ++++++ src/llamafactory/train/pt/trainer.py | 6 ++++++ src/llamafactory/train/rm/trainer.py | 6 ++++++ src/llamafactory/train/sft/trainer.py | 22 ++++++---------------- 7 files changed, 46 insertions(+), 30 deletions(-) diff --git a/src/llamafactory/hparams/parser.py b/src/llamafactory/hparams/parser.py index f922bbfd..598a34e5 100644 --- a/src/llamafactory/hparams/parser.py +++ b/src/llamafactory/hparams/parser.py @@ -209,24 +209,20 @@ def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS: ): raise ValueError("Distributed training does not support layer-wise GaLore.") -<<<<<<< HEAD - # if ( - # finetuning_args.use_badam - # and finetuning_args.badam_mode == "layer" - # and training_args.parallel_mode.value == "distributed" - # ): - # raise ValueError("Layer-wise BAdam does not yet support distributed training, use ratio-wise BAdam.") -======= if ( finetuning_args.use_badam - and finetuning_args.badam_mode == "layer" - and training_args.parallel_mode == ParallelMode.DISTRIBUTED + and training_args.parallel_mode.value == "distributed" ): - raise ValueError("Layer-wise BAdam does not yet support distributed training, use ratio-wise BAdam.") ->>>>>>> upstream/main + if finetuning_args.badam_mode == "ratio": + raise ValueError("Ratio-wise BAdam does not yet support distributed training, use layer-wise BAdam: --badam_mode layer") + if (finetuning_args.badam_mode == "layer" + and training_args.deepspeed_plugin is not None + and training_args.deepspeed_plugin.zero_stage < 3 + ): + raise ValueError(f"Layer-wise BAdam only supports DeepSpeed ZeRO 3 stage, got stage {self.args.deepspeed_plugin.zero_stage}") - if (finetuning_args.use_galore or finetuning_args.use_badam) and training_args.deepspeed is not None: - raise ValueError("GaLore and BAdam are incompatible with DeepSpeed yet.") + if (finetuning_args.use_galore) and training_args.deepspeed is not None: + raise ValueError("GaLore are incompatible with DeepSpeed yet.") if model_args.infer_backend == "vllm": raise ValueError("vLLM backend is only available for API, CLI and Web.") diff --git a/src/llamafactory/train/dpo/trainer.py b/src/llamafactory/train/dpo/trainer.py index 9928d0bc..284bf41a 100644 --- a/src/llamafactory/train/dpo/trainer.py +++ b/src/llamafactory/train/dpo/trainer.py @@ -100,6 +100,12 @@ class CustomDPOTrainer(DPOTrainer): self.accelerator.clip_grad_norm_ = MethodType(clip_grad_norm_for_sparse_tensor, self.accelerator) + if (self.args.deepspeed_plugin is not None + and self.args.deepspeed_plugin.zero_stage == 3 + ): + from badam.utils import BAdamZeRO3Callback + self.callback_handler.add_callback(BAdamZeRO3Callback) + def create_optimizer(self) -> "torch.optim.Optimizer": if self.optimizer is None: self.optimizer = create_custom_optimzer(self.model, self.args, self.finetuning_args) diff --git a/src/llamafactory/train/kto/trainer.py b/src/llamafactory/train/kto/trainer.py index 91d68975..d8b609e0 100644 --- a/src/llamafactory/train/kto/trainer.py +++ b/src/llamafactory/train/kto/trainer.py @@ -95,6 +95,12 @@ class CustomKTOTrainer(KTOTrainer): self.accelerator.clip_grad_norm_ = MethodType(clip_grad_norm_for_sparse_tensor, self.accelerator) + if (self.args.deepspeed_plugin is not None + and self.args.deepspeed_plugin.zero_stage == 3 + ): + from badam.utils import BAdamZeRO3Callback + self.callback_handler.add_callback(BAdamZeRO3Callback) + def create_optimizer(self) -> "torch.optim.Optimizer": if self.optimizer is None: self.optimizer = create_custom_optimzer(self.model, self.args, self.finetuning_args) diff --git a/src/llamafactory/train/ppo/trainer.py b/src/llamafactory/train/ppo/trainer.py index df4a37be..5a18cbaa 100644 --- a/src/llamafactory/train/ppo/trainer.py +++ b/src/llamafactory/train/ppo/trainer.py @@ -170,6 +170,12 @@ class CustomPPOTrainer(PPOTrainer, Trainer): self.accelerator.clip_grad_norm_ = MethodType(clip_grad_norm_for_sparse_tensor, self.accelerator) + if (self.args.deepspeed_plugin is not None + and self.args.deepspeed_plugin.zero_stage == 3 + ): + from badam.utils import BAdamZeRO3Callback + self.callback_handler.add_callback(BAdamZeRO3Callback) + def ppo_train(self, resume_from_checkpoint: Optional[str] = None) -> None: r""" Implements training loop for the PPO stage, like _inner_training_loop() in Huggingface's Trainer. diff --git a/src/llamafactory/train/pt/trainer.py b/src/llamafactory/train/pt/trainer.py index f9e04cb5..1e5e9f6a 100644 --- a/src/llamafactory/train/pt/trainer.py +++ b/src/llamafactory/train/pt/trainer.py @@ -52,6 +52,12 @@ class CustomTrainer(Trainer): self.accelerator.clip_grad_norm_ = MethodType(clip_grad_norm_for_sparse_tensor, self.accelerator) + if (self.args.deepspeed_plugin is not None + and self.args.deepspeed_plugin.zero_stage == 3 + ): + from badam.utils import BAdamZeRO3Callback + self.callback_handler.add_callback(BAdamZeRO3Callback) + def create_optimizer(self) -> "torch.optim.Optimizer": if self.optimizer is None: self.optimizer = create_custom_optimzer(self.model, self.args, self.finetuning_args) diff --git a/src/llamafactory/train/rm/trainer.py b/src/llamafactory/train/rm/trainer.py index 7f91e5f5..5d0e6263 100644 --- a/src/llamafactory/train/rm/trainer.py +++ b/src/llamafactory/train/rm/trainer.py @@ -76,6 +76,12 @@ class PairwiseTrainer(Trainer): self.accelerator.clip_grad_norm_ = MethodType(clip_grad_norm_for_sparse_tensor, self.accelerator) + if (self.args.deepspeed_plugin is not None + and self.args.deepspeed_plugin.zero_stage == 3 + ): + from badam.utils import BAdamZeRO3Callback + self.callback_handler.add_callback(BAdamZeRO3Callback) + def create_optimizer(self) -> "torch.optim.Optimizer": if self.optimizer is None: self.optimizer = create_custom_optimzer(self.model, self.args, self.finetuning_args) diff --git a/src/llamafactory/train/sft/trainer.py b/src/llamafactory/train/sft/trainer.py index 0628ea59..9446d245 100644 --- a/src/llamafactory/train/sft/trainer.py +++ b/src/llamafactory/train/sft/trainer.py @@ -57,9 +57,14 @@ class CustomSeq2SeqTrainer(Seq2SeqTrainer): if finetuning_args.use_badam: from badam import clip_grad_norm_for_sparse_tensor - self.accelerator.clip_grad_norm_ = MethodType(clip_grad_norm_for_sparse_tensor, self.accelerator) + if (self.args.deepspeed_plugin is not None + and self.args.deepspeed_plugin.zero_stage == 3 + ): + from badam.utils import BAdamZeRO3Callback + self.callback_handler.add_callback(BAdamZeRO3Callback) + def create_optimizer(self) -> "torch.optim.Optimizer": if self.optimizer is None: self.optimizer = create_custom_optimzer(self.model, self.args, self.finetuning_args) @@ -80,21 +85,6 @@ class CustomSeq2SeqTrainer(Seq2SeqTrainer): if self.processor is not None: getattr(self.processor, "image_processor").save_pretrained(output_dir) - def training_step(self, *args, **kwargs): - r""" - Update the reference to deepspeed optimizer - """ - if self.finetuning_args.use_badam and \ - self.args.deepspeed_plugin is not None and \ - self.args.deepspeed_plugin.zero_stage == 3: - - ds_optim = self.optimizer.optimizer - badam_optim = ds_optim.optimizer - badam_optim.ds_optimizer = ds_optim - - return super().training_step(*args, **kwargs) - - def prediction_step( self, model: "torch.nn.Module", From 870a54ac84b874c9498944f2be56934a74638cb8 Mon Sep 17 00:00:00 2001 From: Jonery Date: Tue, 18 Jun 2024 12:39:26 +0800 Subject: [PATCH 067/160] fix typo Former-commit-id: d4bee3716dbf8a84564d5bcc2059172604819f3e --- src/llamafactory/hparams/parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llamafactory/hparams/parser.py b/src/llamafactory/hparams/parser.py index 598a34e5..680559ac 100644 --- a/src/llamafactory/hparams/parser.py +++ b/src/llamafactory/hparams/parser.py @@ -219,7 +219,7 @@ def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS: and training_args.deepspeed_plugin is not None and training_args.deepspeed_plugin.zero_stage < 3 ): - raise ValueError(f"Layer-wise BAdam only supports DeepSpeed ZeRO 3 stage, got stage {self.args.deepspeed_plugin.zero_stage}") + raise ValueError(f"Layer-wise BAdam only supports DeepSpeed ZeRO 3 stage, got stage {training_args.deepspeed_plugin.zero_stage}") if (finetuning_args.use_galore) and training_args.deepspeed is not None: raise ValueError("GaLore are incompatible with DeepSpeed yet.") From c7479751e8e355d6b76744f0623ce66221b4eb03 Mon Sep 17 00:00:00 2001 From: Jonery Date: Tue, 18 Jun 2024 13:50:26 +0800 Subject: [PATCH 068/160] add example Former-commit-id: 75603db09b085e3f703286b87abe041af020e615 --- examples/extras/badam/llama3_badam_sft.yaml | 40 +++++++++++++++++++++ examples/extras/badam/train_single_gpu.sh | 37 +++++++++++++++++++ examples/extras/badam/train_zero3.sh | 39 ++++++++++++++++++++ 3 files changed, 116 insertions(+) create mode 100644 examples/extras/badam/llama3_badam_sft.yaml create mode 100644 examples/extras/badam/train_single_gpu.sh create mode 100644 examples/extras/badam/train_zero3.sh diff --git a/examples/extras/badam/llama3_badam_sft.yaml b/examples/extras/badam/llama3_badam_sft.yaml new file mode 100644 index 00000000..f5adb220 --- /dev/null +++ b/examples/extras/badam/llama3_badam_sft.yaml @@ -0,0 +1,40 @@ +### model +model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct + +### method +stage: sft +do_train: true +finetuning_type: full +use_badam: true +badam_switch_mode: ascending +badam_switch_interval: 50 +badam_verbose: 2 + +### dataset +dataset: identity,alpaca_en_demo +template: llama3 +cutoff_len: 1024 +max_samples: 1000 +overwrite_cache: true +preprocessing_num_workers: 16 + +### output +output_dir: saves/llama3-8b/full/sft +logging_steps: 10 +save_steps: 500 +plot_loss: true +overwrite_output_dir: true + +### train +per_device_train_batch_size: 1 +gradient_accumulation_steps: 8 +learning_rate: 1.0e-6 +num_train_epochs: 3.0 +lr_scheduler_type: cosine +warmup_ratio: 0.1 + +### eval +val_size: 0.1 +per_device_eval_batch_size: 1 +eval_strategy: steps +eval_steps: 500 diff --git a/examples/extras/badam/train_single_gpu.sh b/examples/extras/badam/train_single_gpu.sh new file mode 100644 index 00000000..8af79007 --- /dev/null +++ b/examples/extras/badam/train_single_gpu.sh @@ -0,0 +1,37 @@ +#!/bin/bash +export CUDA_VISIBLE_DEVICES=0 + +cd ../../.. + +llamafactory-cli train \ + --stage sft \ + --do_train True \ + --model_name_or_path meta-llama/Llama-2-13b-hf \ + --preprocessing_num_workers 16 \ + --finetuning_type full \ + --template default \ + --flash_attn auto \ + --dataset_dir data \ + --dataset alpaca_en_demo \ + --cutoff_len 1024 \ + --learning_rate 1e-6 \ + --num_train_epochs 3.0 \ + --max_samples 100000 \ + --per_device_train_batch_size 1 \ + --gradient_accumulation_steps 8 \ + --lr_scheduler_type cosine \ + --max_grad_norm 1.0 \ + --logging_steps 5 \ + --save_steps 100 \ + --warmup_steps 0 \ + --optim adamw_torch \ + --packing False \ + --report_to none \ + --use_badam True \ + --output_dir saves/LLaMA2-13B/full/BAdam \ + --plot_loss True \ + --ddp_timeout 180000000 \ + --include_num_input_tokens_seen True \ + --badam_mode layer \ + --badam_switch_mode ascending \ + --badam_switch_interval 50 \ No newline at end of file diff --git a/examples/extras/badam/train_zero3.sh b/examples/extras/badam/train_zero3.sh new file mode 100644 index 00000000..3b182134 --- /dev/null +++ b/examples/extras/badam/train_zero3.sh @@ -0,0 +1,39 @@ +#!/bin/bash +export CUDA_VISIBLE_DEVICES=0,1,2,3 + +cd ../../.. + +llamafactory-cli train \ + --stage sft \ + --do_train True \ + --model_name_or_path meta-llama/Llama-2-13b-hf \ + --preprocessing_num_workers 16 \ + --finetuning_type full \ + --template default \ + --flash_attn auto \ + --dataset_dir data \ + --dataset alpaca_en_demo \ + --cutoff_len 1024 \ + --learning_rate 1e-6 \ + --num_train_epochs 3.0 \ + --max_samples 100000 \ + --per_device_train_batch_size 8 \ + --gradient_accumulation_steps 2 \ + --lr_scheduler_type cosine \ + --max_grad_norm 1.0 \ + --logging_steps 5 \ + --save_steps 100 \ + --warmup_steps 0 \ + --optim adamw_torch \ + --packing False \ + --report_to none \ + --use_badam True \ + --output_dir saves/LLaMA2-13B/full/BAdam \ + --fp16 True \ + --plot_loss True \ + --ddp_timeout 180000000 \ + --include_num_input_tokens_seen True \ + --badam_mode layer \ + --badam_switch_mode ascending \ + --badam_switch_interval 50 \ + --deepspeed cache/ds_z3_config.json \ No newline at end of file From 372da52d4ac7264454db23b09b579a4555947cda Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Tue, 18 Jun 2024 22:08:56 +0800 Subject: [PATCH 069/160] fix #4335 Former-commit-id: 2ab449adbb160f339a0586edeb846fa311ad8382 --- src/llamafactory/chat/base_engine.py | 5 ----- src/llamafactory/chat/chat_model.py | 2 -- src/llamafactory/chat/hf_engine.py | 10 ++++------ src/llamafactory/chat/vllm_engine.py | 3 --- 4 files changed, 4 insertions(+), 16 deletions(-) diff --git a/src/llamafactory/chat/base_engine.py b/src/llamafactory/chat/base_engine.py index 92a51ebe..ccdf4c92 100644 --- a/src/llamafactory/chat/base_engine.py +++ b/src/llamafactory/chat/base_engine.py @@ -50,11 +50,6 @@ class BaseEngine(ABC): generating_args: "GeneratingArguments", ) -> None: ... - @abstractmethod - async def start( - self, - ) -> None: ... - @abstractmethod async def chat( self, diff --git a/src/llamafactory/chat/chat_model.py b/src/llamafactory/chat/chat_model.py index 2a72f422..5c83fa67 100644 --- a/src/llamafactory/chat/chat_model.py +++ b/src/llamafactory/chat/chat_model.py @@ -49,8 +49,6 @@ class ChatModel: self._loop = asyncio.new_event_loop() self._thread = Thread(target=_start_background_loop, args=(self._loop,), daemon=True) self._thread.start() - task = asyncio.run_coroutine_threadsafe(self.engine.start(), self._loop) - task.result() def chat( self, diff --git a/src/llamafactory/chat/hf_engine.py b/src/llamafactory/chat/hf_engine.py index a7ff7015..30200456 100644 --- a/src/llamafactory/chat/hf_engine.py +++ b/src/llamafactory/chat/hf_engine.py @@ -59,6 +59,7 @@ class HuggingfaceEngine(BaseEngine): self.tokenizer, model_args, finetuning_args, is_trainable=False, add_valuehead=(not self.can_generate) ) # must after fixing tokenizer to resize vocab self.generating_args = generating_args.to_dict() + self.semaphore = asyncio.Semaphore(int(os.environ.get("MAX_CONCURRENT", "1"))) @staticmethod def _process_args( @@ -259,9 +260,6 @@ class HuggingfaceEngine(BaseEngine): return scores - async def start(self) -> None: - self._semaphore = asyncio.Semaphore(int(os.environ.get("MAX_CONCURRENT", 1))) - async def chat( self, messages: Sequence[Dict[str, str]], @@ -286,7 +284,7 @@ class HuggingfaceEngine(BaseEngine): image, input_kwargs, ) - async with self._semaphore: + async with self.semaphore: with concurrent.futures.ThreadPoolExecutor() as pool: return await loop.run_in_executor(pool, self._chat, *input_args) @@ -314,7 +312,7 @@ class HuggingfaceEngine(BaseEngine): image, input_kwargs, ) - async with self._semaphore: + async with self.semaphore: with concurrent.futures.ThreadPoolExecutor() as pool: stream = self._stream_chat(*input_args) while True: @@ -333,6 +331,6 @@ class HuggingfaceEngine(BaseEngine): loop = asyncio.get_running_loop() input_args = (self.model, self.tokenizer, batch_input, input_kwargs) - async with self._semaphore: + async with self.semaphore: with concurrent.futures.ThreadPoolExecutor() as pool: return await loop.run_in_executor(pool, self._get_scores, *input_args) diff --git a/src/llamafactory/chat/vllm_engine.py b/src/llamafactory/chat/vllm_engine.py index d488a039..2626d612 100644 --- a/src/llamafactory/chat/vllm_engine.py +++ b/src/llamafactory/chat/vllm_engine.py @@ -183,9 +183,6 @@ class VllmEngine(BaseEngine): ) return result_generator - async def start(self) -> None: - pass - async def chat( self, messages: Sequence[Dict[str, str]], From c106cc24e4631c3bc313c21f4fa85309f49cc8b7 Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Tue, 18 Jun 2024 22:27:24 +0800 Subject: [PATCH 070/160] Update requirements.txt Former-commit-id: da8684f9f0b0103d4fa81279343a48ecd0fcc0cd --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 5c7202a5..f76524d8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ accelerate>=0.30.1 peft>=0.11.1 trl>=0.8.6 gradio>=4.0.0 -pandas>=2.2.2 +pandas>=2.0.0 scipy einops sentencepiece From 4bc0bea0e9b6c5b01555f8768480b51be95f49be Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Tue, 18 Jun 2024 22:42:45 +0800 Subject: [PATCH 071/160] fix #4357 Former-commit-id: a6741bba8cebd16a6a3f97a2dc81057d0e27eb39 --- src/llamafactory/chat/hf_engine.py | 7 +++++++ src/llamafactory/hparams/parser.py | 1 + src/llamafactory/model/model_utils/rope.py | 4 ++-- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/llamafactory/chat/hf_engine.py b/src/llamafactory/chat/hf_engine.py index 30200456..9e60175b 100644 --- a/src/llamafactory/chat/hf_engine.py +++ b/src/llamafactory/chat/hf_engine.py @@ -59,6 +59,13 @@ class HuggingfaceEngine(BaseEngine): self.tokenizer, model_args, finetuning_args, is_trainable=False, add_valuehead=(not self.can_generate) ) # must after fixing tokenizer to resize vocab self.generating_args = generating_args.to_dict() + try: + asyncio.get_event_loop() + except RuntimeError: + logger.warning("There is no current event loop, creating a new one.") + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + self.semaphore = asyncio.Semaphore(int(os.environ.get("MAX_CONCURRENT", "1"))) @staticmethod diff --git a/src/llamafactory/hparams/parser.py b/src/llamafactory/hparams/parser.py index 467fc43d..9b305016 100644 --- a/src/llamafactory/hparams/parser.py +++ b/src/llamafactory/hparams/parser.py @@ -353,6 +353,7 @@ def get_infer_args(args: Optional[Dict[str, Any]] = None) -> _INFER_CLS: if model_args.export_dir is not None and model_args.export_device == "cpu": model_args.device_map = {"": torch.device("cpu")} + model_args.model_max_length = data_args.cutoff_len else: model_args.device_map = "auto" diff --git a/src/llamafactory/model/model_utils/rope.py b/src/llamafactory/model/model_utils/rope.py index 88303c4d..4373ee19 100644 --- a/src/llamafactory/model/model_utils/rope.py +++ b/src/llamafactory/model/model_utils/rope.py @@ -39,8 +39,8 @@ def configure_rope(config: "PretrainedConfig", model_args: "ModelArguments", is_ logger.warning("Current model does not support RoPE scaling.") return - if is_trainable: - if model_args.rope_scaling == "dynamic": + if model_args.model_max_length is not None: + if is_trainable and model_args.rope_scaling == "dynamic": logger.warning( "Dynamic NTK scaling may not work well with fine-tuning. " "See: https://github.com/huggingface/transformers/pull/24653" From 665df5d7339e4979fdb4a5c4f6e4ee6a620e27bc Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Tue, 18 Jun 2024 22:53:54 +0800 Subject: [PATCH 072/160] add deepseek coder v2 #4346 Former-commit-id: d83d3846d8e3bf5c40d4b90c24e2c5909ec61864 --- README.md | 58 ++++++++++++++-------------- README_zh.md | 58 ++++++++++++++-------------- src/llamafactory/extras/constants.py | 12 ++++++ 3 files changed, 70 insertions(+), 58 deletions(-) diff --git a/README.md b/README.md index 60045118..ca9e7e1d 100644 --- a/README.md +++ b/README.md @@ -151,35 +151,35 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ ## Supported Models -| Model | Model size | Template | -| -------------------------------------------------------- | -------------------------------- | --------- | -| [Baichuan2](https://huggingface.co/baichuan-inc) | 7B/13B | baichuan2 | -| [BLOOM](https://huggingface.co/bigscience) | 560M/1.1B/1.7B/3B/7.1B/176B | - | -| [BLOOMZ](https://huggingface.co/bigscience) | 560M/1.1B/1.7B/3B/7.1B/176B | - | -| [ChatGLM3](https://huggingface.co/THUDM) | 6B | chatglm3 | -| [Command-R](https://huggingface.co/CohereForAI) | 35B/104B | cohere | -| [DeepSeek (MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek | -| [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon | -| [Gemma/CodeGemma](https://huggingface.co/google) | 2B/7B | gemma | -| [GLM4](https://huggingface.co/THUDM) | 9B | glm4 | -| [InternLM2](https://huggingface.co/internlm) | 7B/20B | intern2 | -| [LLaMA](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - | -| [LLaMA-2](https://huggingface.co/meta-llama) | 7B/13B/70B | llama2 | -| [LLaMA-3](https://huggingface.co/meta-llama) | 8B/70B | llama3 | -| [LLaVA-1.5](https://huggingface.co/llava-hf) | 7B/13B | vicuna | -| [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | mistral | -| [OLMo](https://huggingface.co/allenai) | 1B/7B | - | -| [PaliGemma](https://huggingface.co/google) | 3B | gemma | -| [Phi-1.5/2](https://huggingface.co/microsoft) | 1.3B/2.7B | - | -| [Phi-3](https://huggingface.co/microsoft) | 4B/7B/14B | phi | -| [Qwen](https://huggingface.co/Qwen) | 1.8B/7B/14B/72B | qwen | -| [Qwen1.5 (Code/MoE)](https://huggingface.co/Qwen) | 0.5B/1.8B/4B/7B/14B/32B/72B/110B | qwen | -| [Qwen2 (MoE)](https://huggingface.co/Qwen) | 0.5B/1.5B/7B/57B/72B | qwen | -| [StarCoder2](https://huggingface.co/bigcode) | 3B/7B/15B | - | -| [XVERSE](https://huggingface.co/xverse) | 7B/13B/65B | xverse | -| [Yi (1/1.5)](https://huggingface.co/01-ai) | 6B/9B/34B | yi | -| [Yi-VL](https://huggingface.co/01-ai) | 6B/34B | yi_vl | -| [Yuan](https://huggingface.co/IEITYuan) | 2B/51B/102B | yuan | +| Model | Model size | Template | +| --------------------------------------------------------- | -------------------------------- | --------- | +| [Baichuan2](https://huggingface.co/baichuan-inc) | 7B/13B | baichuan2 | +| [BLOOM](https://huggingface.co/bigscience) | 560M/1.1B/1.7B/3B/7.1B/176B | - | +| [BLOOMZ](https://huggingface.co/bigscience) | 560M/1.1B/1.7B/3B/7.1B/176B | - | +| [ChatGLM3](https://huggingface.co/THUDM) | 6B | chatglm3 | +| [Command-R](https://huggingface.co/CohereForAI) | 35B/104B | cohere | +| [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek | +| [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon | +| [Gemma/CodeGemma](https://huggingface.co/google) | 2B/7B | gemma | +| [GLM4](https://huggingface.co/THUDM) | 9B | glm4 | +| [InternLM2](https://huggingface.co/internlm) | 7B/20B | intern2 | +| [LLaMA](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - | +| [LLaMA-2](https://huggingface.co/meta-llama) | 7B/13B/70B | llama2 | +| [LLaMA-3](https://huggingface.co/meta-llama) | 8B/70B | llama3 | +| [LLaVA-1.5](https://huggingface.co/llava-hf) | 7B/13B | vicuna | +| [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | mistral | +| [OLMo](https://huggingface.co/allenai) | 1B/7B | - | +| [PaliGemma](https://huggingface.co/google) | 3B | gemma | +| [Phi-1.5/2](https://huggingface.co/microsoft) | 1.3B/2.7B | - | +| [Phi-3](https://huggingface.co/microsoft) | 4B/7B/14B | phi | +| [Qwen](https://huggingface.co/Qwen) | 1.8B/7B/14B/72B | qwen | +| [Qwen1.5 (Code/MoE)](https://huggingface.co/Qwen) | 0.5B/1.8B/4B/7B/14B/32B/72B/110B | qwen | +| [Qwen2 (MoE)](https://huggingface.co/Qwen) | 0.5B/1.5B/7B/57B/72B | qwen | +| [StarCoder2](https://huggingface.co/bigcode) | 3B/7B/15B | - | +| [XVERSE](https://huggingface.co/xverse) | 7B/13B/65B | xverse | +| [Yi (1/1.5)](https://huggingface.co/01-ai) | 6B/9B/34B | yi | +| [Yi-VL](https://huggingface.co/01-ai) | 6B/34B | yi_vl | +| [Yuan](https://huggingface.co/IEITYuan) | 2B/51B/102B | yuan | > [!NOTE] > For the "base" models, the `template` argument can be chosen from `default`, `alpaca`, `vicuna` etc. But make sure to use the **corresponding template** for the "instruct/chat" models. diff --git a/README_zh.md b/README_zh.md index fc824561..2ede76ba 100644 --- a/README_zh.md +++ b/README_zh.md @@ -151,35 +151,35 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd ## 模型 -| 模型名 | 模型大小 | Template | -| -------------------------------------------------------- | -------------------------------- | --------- | -| [Baichuan2](https://huggingface.co/baichuan-inc) | 7B/13B | baichuan2 | -| [BLOOM](https://huggingface.co/bigscience) | 560M/1.1B/1.7B/3B/7.1B/176B | - | -| [BLOOMZ](https://huggingface.co/bigscience) | 560M/1.1B/1.7B/3B/7.1B/176B | - | -| [ChatGLM3](https://huggingface.co/THUDM) | 6B | chatglm3 | -| [Command-R](https://huggingface.co/CohereForAI) | 35B/104B | cohere | -| [DeepSeek (MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek | -| [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon | -| [Gemma/CodeGemma](https://huggingface.co/google) | 2B/7B | gemma | -| [GLM4](https://huggingface.co/THUDM) | 9B | glm4 | -| [InternLM2](https://huggingface.co/internlm) | 7B/20B | intern2 | -| [LLaMA](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - | -| [LLaMA-2](https://huggingface.co/meta-llama) | 7B/13B/70B | llama2 | -| [LLaMA-3](https://huggingface.co/meta-llama) | 8B/70B | llama3 | -| [LLaVA-1.5](https://huggingface.co/llava-hf) | 7B/13B | vicuna | -| [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | mistral | -| [OLMo](https://huggingface.co/allenai) | 1B/7B | - | -| [PaliGemma](https://huggingface.co/google) | 3B | gemma | -| [Phi-1.5/2](https://huggingface.co/microsoft) | 1.3B/2.7B | - | -| [Phi-3](https://huggingface.co/microsoft) | 4B/7B/14B | phi | -| [Qwen](https://huggingface.co/Qwen) | 1.8B/7B/14B/72B | qwen | -| [Qwen1.5 (Code/MoE)](https://huggingface.co/Qwen) | 0.5B/1.8B/4B/7B/14B/32B/72B/110B | qwen | -| [Qwen2 (MoE)](https://huggingface.co/Qwen) | 0.5B/1.5B/7B/57B/72B | qwen | -| [StarCoder2](https://huggingface.co/bigcode) | 3B/7B/15B | - | -| [XVERSE](https://huggingface.co/xverse) | 7B/13B/65B | xverse | -| [Yi (1/1.5)](https://huggingface.co/01-ai) | 6B/9B/34B | yi | -| [Yi-VL](https://huggingface.co/01-ai) | 6B/34B | yi_vl | -| [Yuan](https://huggingface.co/IEITYuan) | 2B/51B/102B | yuan | +| 模型名 | 模型大小 | Template | +| --------------------------------------------------------- | -------------------------------- | --------- | +| [Baichuan2](https://huggingface.co/baichuan-inc) | 7B/13B | baichuan2 | +| [BLOOM](https://huggingface.co/bigscience) | 560M/1.1B/1.7B/3B/7.1B/176B | - | +| [BLOOMZ](https://huggingface.co/bigscience) | 560M/1.1B/1.7B/3B/7.1B/176B | - | +| [ChatGLM3](https://huggingface.co/THUDM) | 6B | chatglm3 | +| [Command-R](https://huggingface.co/CohereForAI) | 35B/104B | cohere | +| [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek | +| [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon | +| [Gemma/CodeGemma](https://huggingface.co/google) | 2B/7B | gemma | +| [GLM4](https://huggingface.co/THUDM) | 9B | glm4 | +| [InternLM2](https://huggingface.co/internlm) | 7B/20B | intern2 | +| [LLaMA](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - | +| [LLaMA-2](https://huggingface.co/meta-llama) | 7B/13B/70B | llama2 | +| [LLaMA-3](https://huggingface.co/meta-llama) | 8B/70B | llama3 | +| [LLaVA-1.5](https://huggingface.co/llava-hf) | 7B/13B | vicuna | +| [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | mistral | +| [OLMo](https://huggingface.co/allenai) | 1B/7B | - | +| [PaliGemma](https://huggingface.co/google) | 3B | gemma | +| [Phi-1.5/2](https://huggingface.co/microsoft) | 1.3B/2.7B | - | +| [Phi-3](https://huggingface.co/microsoft) | 4B/7B/14B | phi | +| [Qwen](https://huggingface.co/Qwen) | 1.8B/7B/14B/72B | qwen | +| [Qwen1.5 (Code/MoE)](https://huggingface.co/Qwen) | 0.5B/1.8B/4B/7B/14B/32B/72B/110B | qwen | +| [Qwen2 (MoE)](https://huggingface.co/Qwen) | 0.5B/1.5B/7B/57B/72B | qwen | +| [StarCoder2](https://huggingface.co/bigcode) | 3B/7B/15B | - | +| [XVERSE](https://huggingface.co/xverse) | 7B/13B/65B | xverse | +| [Yi (1/1.5)](https://huggingface.co/01-ai) | 6B/9B/34B | yi | +| [Yi-VL](https://huggingface.co/01-ai) | 6B/34B | yi_vl | +| [Yuan](https://huggingface.co/IEITYuan) | 2B/51B/102B | yuan | > [!NOTE] > 对于所有“基座”(Base)模型,`template` 参数可以是 `default`, `alpaca`, `vicuna` 等任意值。但“对话”(Instruct/Chat)模型请务必使用**对应的模板**。 diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py index 73a9969d..36265c8e 100644 --- a/src/llamafactory/extras/constants.py +++ b/src/llamafactory/extras/constants.py @@ -403,6 +403,18 @@ register_model_group( DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-V2-Chat", DownloadSource.MODELSCOPE: "deepseek-ai/DeepSeek-V2-Chat", }, + "DeepSeek-MoE-Coder-16B-Base": { + DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-Coder-V2-Lite-Base", + }, + "DeepSeek-MoE-Coder-236B-Base": { + DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-Coder-V2-Base", + }, + "DeepSeek-MoE-Coder-16B-Chat": { + DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct", + }, + "DeepSeek-MoE-Coder-236B-Chat": { + DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-Coder-V2-Instruct", + }, }, template="deepseek", ) From 85f3a09c83fc59cd8523ea3f9d47201a39e42e3d Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Tue, 18 Jun 2024 23:32:18 +0800 Subject: [PATCH 073/160] tiny fix Former-commit-id: bb750fa3dde03ec024ae75596ecd4b884cb126c6 --- Dockerfile | 1 + README.md | 4 ++-- README_zh.md | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index 9c2f645c..61d58005 100644 --- a/Dockerfile +++ b/Dockerfile @@ -43,4 +43,5 @@ EXPOSE 7860 # Expose port 8000 for the API service EXPOSE 8000 +# Launch LLaMA Board CMD [ "llamafactory-cli", "webui" ] diff --git a/README.md b/README.md index 2d95583b..0e8e55f7 100644 --- a/README.md +++ b/README.md @@ -260,6 +260,8 @@ You also can add a custom chat template to [template.py](src/llamafactory/data/t - [STEM (zh)](https://huggingface.co/datasets/hfl/stem_zh_instruction) - [Ruozhiba (zh)](https://huggingface.co/datasets/hfl/ruozhiba_gpt4_turbo) - [Neo-sft (zh)](https://huggingface.co/datasets/m-a-p/neo_sft_phase2) +- [WebInstructSub (en)](https://huggingface.co/datasets/TIGER-Lab/WebInstructSub) +- [Magpie-Pro-300K-Filtered (en)](https://huggingface.co/datasets/Magpie-Align/Magpie-Pro-300K-Filtered) - [LLaVA mixed (en&zh)](https://huggingface.co/datasets/BUAADreamer/llava-en-zh-300k) - [Open Assistant (de)](https://huggingface.co/datasets/mayflowergmbh/oasst_de) - [Dolly 15k (de)](https://huggingface.co/datasets/mayflowergmbh/dolly-15k_de) @@ -270,8 +272,6 @@ You also can add a custom chat template to [template.py](src/llamafactory/data/t - [Booksum (de)](https://huggingface.co/datasets/mayflowergmbh/booksum_de) - [Airoboros (de)](https://huggingface.co/datasets/mayflowergmbh/airoboros-3.0_de) - [Ultrachat (de)](https://huggingface.co/datasets/mayflowergmbh/ultra-chat_de) -- [WebInstructSub (en)](https://huggingface.co/datasets/TIGER-Lab/WebInstructSub) -- [Magpie-Pro-300K-Filtered (en)](https://huggingface.co/datasets/Magpie-Align/Magpie-Pro-300K-Filtered)
diff --git a/README_zh.md b/README_zh.md index ed5475d3..152cd6fa 100644 --- a/README_zh.md +++ b/README_zh.md @@ -260,6 +260,8 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd - [STEM (zh)](https://huggingface.co/datasets/hfl/stem_zh_instruction) - [Ruozhiba (zh)](https://huggingface.co/datasets/hfl/ruozhiba_gpt4_turbo) - [Neo-sft (zh)](https://huggingface.co/datasets/m-a-p/neo_sft_phase2) +- [WebInstructSub (en)](https://huggingface.co/datasets/TIGER-Lab/WebInstructSub) +- [Magpie-Pro-300K-Filtered (en)](https://huggingface.co/datasets/Magpie-Align/Magpie-Pro-300K-Filtered) - [LLaVA mixed (en&zh)](https://huggingface.co/datasets/BUAADreamer/llava-en-zh-300k) - [Open Assistant (de)](https://huggingface.co/datasets/mayflowergmbh/oasst_de) - [Dolly 15k (de)](https://huggingface.co/datasets/mayflowergmbh/dolly-15k_de) @@ -270,8 +272,6 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd - [Booksum (de)](https://huggingface.co/datasets/mayflowergmbh/booksum_de) - [Airoboros (de)](https://huggingface.co/datasets/mayflowergmbh/airoboros-3.0_de) - [Ultrachat (de)](https://huggingface.co/datasets/mayflowergmbh/ultra-chat_de) -- [WebInstructSub (en)](https://huggingface.co/datasets/TIGER-Lab/WebInstructSub) -- [Magpie-Pro-300K-Filtered (en)](https://huggingface.co/datasets/Magpie-Align/Magpie-Pro-300K-Filtered)
From e36a994fe67b23569845479be54d0cc709be63be Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Wed, 19 Jun 2024 03:23:51 +0800 Subject: [PATCH 074/160] fix tool formatter, allow parallel function #4362 Former-commit-id: b8f16c976db4ecec1cc8558851c8cbfb6a5b7e9c --- src/llamafactory/api/chat.py | 19 ++--- src/llamafactory/data/formatter.py | 120 ++++++++++++--------------- src/llamafactory/data/template.py | 16 +++- src/llamafactory/webui/chatter.py | 13 ++- tests/data/test_formatter.py | 125 +++++++++++++++++++++++++++++ 5 files changed, 207 insertions(+), 86 deletions(-) create mode 100644 tests/data/test_formatter.py diff --git a/src/llamafactory/api/chat.py b/src/llamafactory/api/chat.py index 945856cb..2c7e11e2 100644 --- a/src/llamafactory/api/chat.py +++ b/src/llamafactory/api/chat.py @@ -92,9 +92,11 @@ def _process_request( raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid role") if message.role == Role.ASSISTANT and isinstance(message.tool_calls, list) and len(message.tool_calls): - name = message.tool_calls[0].function.name - arguments = message.tool_calls[0].function.arguments - content = json.dumps({"name": name, "argument": arguments}, ensure_ascii=False) + tool_calls = [ + {"name": tool_call.function.name, "argument": tool_call.function.arguments} + for tool_call in message.tool_calls + ] + content = json.dumps(tool_calls, ensure_ascii=False) input_messages.append({"role": ROLE_MAPPING[Role.FUNCTION], "content": content}) elif isinstance(message.content, list): for input_item in message.content: @@ -118,7 +120,7 @@ def _process_request( if isinstance(tool_list, list) and len(tool_list): try: tools = json.dumps([dictify(tool.function) for tool in tool_list], ensure_ascii=False) - except Exception: + except json.JSONDecodeError: raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid tools") else: tools = None @@ -160,17 +162,16 @@ async def create_chat_completion_response( choices = [] for i, response in enumerate(responses): if tools: - result = chat_model.engine.template.format_tools.extract(response.response_text) + result = chat_model.engine.template.extract_tool(response.response_text) else: result = response.response_text if isinstance(result, list): tool_calls = [] for tool in result: - name, arguments = tool - function = Function(name=name, arguments=arguments) - tool_call = FunctionCall(id="call_{}".format(uuid.uuid4().hex), function=function) - tool_calls.append(tool_call) + function = Function(name=tool[0], arguments=tool[1]) + tool_calls.append(FunctionCall(id="call_{}".format(uuid.uuid4().hex), function=function)) + response_message = ChatCompletionMessage(role=Role.ASSISTANT, tool_calls=tool_calls) finish_reason = Finish.TOOL else: diff --git a/src/llamafactory/data/formatter.py b/src/llamafactory/data/formatter.py index fa35df5b..70be6a5a 100644 --- a/src/llamafactory/data/formatter.py +++ b/src/llamafactory/data/formatter.py @@ -22,29 +22,20 @@ from typing import Any, Dict, List, Literal, Optional, Sequence, Set, Tuple, Uni SLOTS = Sequence[Union[str, Set[str], Dict[str, str]]] -JSON_FORMAT_PROMPT = ( - """, in a JSON format representing the kwargs (e.g. ```{"input": "hello world", "num_beams": 5}```)""" -) - - -TOOL_SYSTEM_PROMPT = ( +DEFAULT_TOOL_PROMPT = ( "You have access to the following tools:\n{tool_text}" "Use the following format if using a tool:\n" "```\n" "Action: tool name (one of [{tool_names}]).\n" - "Action Input: the input to the tool{format_prompt}.\n" + "Action Input: the input to the tool, in a JSON format representing the kwargs " + """(e.g. ```{{"input": "hello world", "num_beams": 5}}```).\n""" "```\n" ) -GLM4_TOOL_SUFFIX_PROMPT = ( - "在调用上述函数时,请使用 Json 格式表示调用的参数。" -) - GLM4_TOOL_PROMPT = ( - "你是一个名为 GLM-4 的人工智能助手。你是基于智谱AI训练的语言模型 GLM-4 模型开发的,你的任务是针对用户的问题和要求提供适当的答复和支持," - "{tool_text}" - + "你是一个名为 GLM-4 的人工智能助手。你是基于智谱AI训练的语言模型 GLM-4 模型开发的," + "你的任务是针对用户的问题和要求提供适当的答复和支持。{tool_text}" ) @@ -73,32 +64,19 @@ def default_tool_formatter(tools: List[Dict[str, Any]]) -> str: ) tool_names.append(tool["name"]) - return TOOL_SYSTEM_PROMPT.format( - tool_text=tool_text, tool_names=", ".join(tool_names), format_prompt=JSON_FORMAT_PROMPT - ) + return DEFAULT_TOOL_PROMPT.format(tool_text=tool_text, tool_names=", ".join(tool_names)) -def glm4_tool_formatter(tools: List[Dict[str, Any]]) -> str: - tool_text = "" - for tool in tools: - tool_name = tool["name"] - tool_text += f"\n\n## {tool_name}\n\n{json.dumps(tool, ensure_ascii=False, indent=4)}\n{GLM4_TOOL_SUFFIX_PROMPT}" - return GLM4_TOOL_PROMPT.format(tool_text=tool_text) - - def default_tool_extractor(content: str) -> Union[str, List[Tuple[str, str]]]: - regex = re.compile(r"Action:\s*([a-zA-Z0-9_]+)\s*Action Input:\s*({.*?})(?=\nAction:|\Z)", re.DOTALL) - action_match = re.findall(regex, content) + regex = re.compile(r"Action:\s*([a-zA-Z0-9_]+)\s*Action Input:\s*(.+?)(?=\s*Action:|$)", re.DOTALL) + action_match: List[Tuple[str, str]] = re.findall(regex, content) if not action_match: return content results = [] - for match in action_match: - tool_name, tool_input = match - tool_name = tool_name.strip() - tool_input = tool_input.strip().strip('"').strip("```") - + tool_name = match[0].strip() + tool_input = match[1].strip().strip('"').strip("```") try: arguments = json.loads(tool_input) results.append((tool_name, json.dumps(arguments, ensure_ascii=False))) @@ -108,19 +86,28 @@ def default_tool_extractor(content: str) -> Union[str, List[Tuple[str, str]]]: return results +def glm4_tool_formatter(tools: List[Dict[str, Any]]) -> str: + tool_text = "" + for tool in tools: + tool_text += "\n\n## {name}\n\n{body}\n在调用上述函数时,请使用 Json 格式表示调用的参数。".format( + name=tool["name"], body=json.dumps(tool, indent=4, ensure_ascii=False) + ) + + return GLM4_TOOL_PROMPT.format(tool_text=tool_text) + + def glm4_tool_extractor(content: str) -> Union[str, List[Tuple[str, str]]]: - lines = content.strip().split("\n") - if len(lines) != 2: + if "\n" not in content: return content - tool_name = lines[0].strip() - tool_input = lines[1].strip() + + tool_name, tool_input = content.split("\n", maxsplit=1) try: arguments = json.loads(tool_input) except json.JSONDecodeError: return content + return [(tool_name, json.dumps(arguments, ensure_ascii=False))] - @dataclass class Formatter(ABC): @@ -193,22 +180,28 @@ class FunctionFormatter(Formatter): def apply(self, **kwargs) -> SLOTS: content = kwargs.pop("content") + functions: List[Tuple[str, str]] = [] try: - function = json.loads(content) - name = function["name"] - arguments = json.dumps(function["arguments"], ensure_ascii=False) - except Exception: - name, arguments = "", "" + tool_calls = json.loads(content) + if not isinstance(tool_calls, list): # parallel function call + tool_calls = [tool_calls] + + for tool_call in tool_calls: + functions.append((tool_call["name"], json.dumps(tool_call["arguments"], ensure_ascii=False))) + + except json.JSONDecodeError: + functions = [] elements = [] - for slot in self.slots: - if isinstance(slot, str): - slot = slot.replace("{{name}}", name).replace("{{arguments}}", arguments) - elements.append(slot) - elif isinstance(slot, (dict, set)): - elements.append(slot) - else: - raise RuntimeError("Input must be string, set[str] or dict[str, str], got {}".format(type(slot))) + for name, arguments in functions: + for slot in self.slots: + if isinstance(slot, str): + slot = slot.replace("{{name}}", name).replace("{{arguments}}", arguments) + elements.append(slot) + elif isinstance(slot, (dict, set)): + elements.append(slot) + else: + raise RuntimeError("Input must be string, set[str] or dict[str, str], got {}".format(type(slot))) return elements @@ -216,29 +209,22 @@ class FunctionFormatter(Formatter): @dataclass class ToolFormatter(Formatter): def __post_init__(self): - if self.tool_format is None: + if self.tool_format == "default": + self._tool_formatter = default_tool_formatter + self._tool_extractor = default_tool_extractor + elif self.tool_format == "glm4": + self._tool_formatter = glm4_tool_formatter + self._tool_extractor = glm4_tool_extractor + else: raise ValueError("Tool format was not found.") def apply(self, **kwargs) -> SLOTS: content = kwargs.pop("content") try: tools = json.loads(content) - if not len(tools): - return [""] - - if self.tool_format == "default": - return [default_tool_formatter(tools)] - elif self.tool_format == "glm4": - return [glm4_tool_formatter(tools)] - else: - raise NotImplementedError - except Exception: + return [self._tool_formatter(tools) if len(tools) != 0 else ""] + except json.JSONDecodeError: return [""] def extract(self, content: str) -> Union[str, List[Tuple[str, str]]]: - if self.tool_format == "default": - return default_tool_extractor(content) - elif self.tool_format == "glm4": - return glm4_tool_extractor(content) - else: - raise NotImplementedError + return self._tool_extractor(content) diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py index d97699b0..77694c59 100644 --- a/src/llamafactory/data/template.py +++ b/src/llamafactory/data/template.py @@ -79,6 +79,12 @@ class Template: """ return self._encode(tokenizer, messages, system, tools, cutoff_len, reserved_label_len) + def extract_tool(self, content: str) -> Union[str, List[Tuple[str, str]]]: + r""" + Extracts tool message. + """ + return self.format_tools.extract(content) + def _encode( self, tokenizer: "PreTrainedTokenizer", @@ -100,7 +106,8 @@ class Template: if i == 0 and (system or tools or self.force_system): tool_text = self.format_tools.apply(content=tools)[0] if tools else "" elements += self.format_system.apply(content=(system + tool_text)) - elif i > 0 and i % 2 == 0: + + if i > 0 and i % 2 == 0: elements += self.format_separator.apply() if message["role"] == Role.USER.value: @@ -191,7 +198,8 @@ class Llama2Template(Template): if i == 0 and (system or tools or self.force_system): tool_text = self.format_tools.apply(content=tools)[0] if tools else "" system_text = self.format_system.apply(content=(system + tool_text))[0] - elif i > 0 and i % 2 == 0: + + if i > 0 and i % 2 == 0: elements += self.format_separator.apply() if message["role"] == Role.USER.value: @@ -259,7 +267,9 @@ def _register_template( template_class = Llama2Template if name.startswith("llama2") else Template default_user_formatter = StringFormatter(slots=["{{content}}"]) default_assistant_formatter = StringFormatter(slots=["{{content}}"] + eos_slots) - default_function_formatter = FunctionFormatter(slots=["Action: {{name}}\nAction Input: {{arguments}}"] + eos_slots) + default_function_formatter = FunctionFormatter( + slots=["Action: {{name}}\nAction Input: {{arguments}}\n"] + eos_slots + ) default_tool_formatter = ToolFormatter(tool_format="default") default_separator_formatter = EmptyFormatter() TEMPLATES[name] = template_class( diff --git a/src/llamafactory/webui/chatter.py b/src/llamafactory/webui/chatter.py index 864c41c7..a2b54dce 100644 --- a/src/llamafactory/webui/chatter.py +++ b/src/llamafactory/webui/chatter.py @@ -140,16 +140,15 @@ class WebChatModel(ChatModel): ): response += new_text if tools: - result = self.engine.template.format_tools.extract(response) + result = self.engine.template.extract_tool(response) else: result = response - if isinstance(result, tuple): - name, arguments = result - arguments = json.loads(arguments) - tool_call = json.dumps({"name": name, "arguments": arguments}, ensure_ascii=False) - output_messages = messages + [{"role": Role.FUNCTION.value, "content": tool_call}] - bot_text = "```json\n" + tool_call + "\n```" + if isinstance(result, list): + tool_calls = [{"name": tool[0], "arguments": json.loads(tool[1])} for tool in result] + tool_calls = json.dumps(tool_calls, indent=4, ensure_ascii=False) + output_messages = messages + [{"role": Role.FUNCTION.value, "content": tool_calls}] + bot_text = "```json\n" + tool_calls + "\n```" else: output_messages = messages + [{"role": Role.ASSISTANT.value, "content": result}] bot_text = result diff --git a/tests/data/test_formatter.py b/tests/data/test_formatter.py new file mode 100644 index 00000000..430eb0e6 --- /dev/null +++ b/tests/data/test_formatter.py @@ -0,0 +1,125 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json + +from llamafactory.data.formatter import EmptyFormatter, FunctionFormatter, StringFormatter, ToolFormatter + + +def test_empty_formatter(): + formatter = EmptyFormatter(slots=["\n"]) + assert formatter.apply() == ["\n"] + + +def test_string_formatter(): + formatter = StringFormatter(slots=["", "Human: {{content}}\nAssistant:"]) + assert formatter.apply(content="Hi") == ["", "Human: Hi\nAssistant:"] + + +def test_function_formatter(): + formatter = FunctionFormatter(slots=["Action: {{name}}\nAction Input: {{arguments}}\n"]) + tool_calls = json.dumps({"name": "tool_name", "arguments": {"foo": "bar", "size": 10}}) + assert formatter.apply(content=tool_calls) == [ + """Action: tool_name\nAction Input: {\"foo\": \"bar\", \"size\": 10}\n""" + ] + + +def test_multi_function_formatter(): + formatter = FunctionFormatter(slots=["Action: {{name}}\nAction Input: {{arguments}}\n"]) + tool_calls = json.dumps([{"name": "tool_name", "arguments": {"foo": "bar", "size": 10}}] * 2) + assert formatter.apply(content=tool_calls) == [ + """Action: tool_name\nAction Input: {\"foo\": \"bar\", \"size\": 10}\n""", + """Action: tool_name\nAction Input: {\"foo\": \"bar\", \"size\": 10}\n""", + ] + + +def test_default_tool_formatter(): + formatter = ToolFormatter(tool_format="default") + tools = [ + { + "name": "test_tool", + "description": "tool_desc", + "parameters": { + "type": "object", + "properties": { + "foo": {"type": "string", "description": "foo_desc"}, + "bar": {"type": "number", "description": "bar_desc"}, + }, + "required": ["foo"], + }, + } + ] + assert formatter.apply(content=json.dumps(tools)) == [ + "You have access to the following tools:\n" + "> Tool Name: test_tool\n" + "Tool Description: tool_desc\n" + "Tool Args:\n" + " - foo (string, required): foo_desc\n" + " - bar (number): bar_desc\n\n" + "Use the following format if using a tool:\n" + "```\n" + "Action: tool name (one of [test_tool]).\n" + "Action Input: the input to the tool, in a JSON format representing the kwargs " + """(e.g. ```{"input": "hello world", "num_beams": 5}```).\n""" + "```\n" + ] + + +def test_default_tool_extractor(): + formatter = ToolFormatter(tool_format="default") + result = """Action: test_tool\nAction Input: {"foo": "bar", "size": 10}\n""" + assert formatter.extract(result) == [("test_tool", """{"foo": "bar", "size": 10}""")] + + +def test_default_multi_tool_extractor(): + formatter = ToolFormatter(tool_format="default") + result = ( + """Action: test_tool\nAction Input: {"foo": "bar", "size": 10}\n""" + """Action: another_tool\nAction Input: {"foo": "job", "size": 2}\n""" + ) + assert formatter.extract(result) == [ + ("test_tool", """{"foo": "bar", "size": 10}"""), + ("another_tool", """{"foo": "job", "size": 2}"""), + ] + + +def test_glm4_tool_formatter(): + formatter = ToolFormatter(tool_format="glm4") + tools = [ + { + "name": "test_tool", + "description": "tool_desc", + "parameters": { + "type": "object", + "properties": { + "foo": {"type": "string", "description": "foo_desc"}, + "bar": {"type": "number", "description": "bar_desc"}, + }, + "required": ["foo"], + }, + } + ] + assert formatter.apply(content=json.dumps(tools)) == [ + "你是一个名为 GLM-4 的人工智能助手。你是基于智谱AI训练的语言模型 GLM-4 模型开发的," + "你的任务是针对用户的问题和要求提供适当的答复和支持。" + "\n\n## test_tool\n\n{}\n在调用上述函数时,请使用 Json 格式表示调用的参数。".format( + json.dumps(tools[0], indent=4) + ) + ] + + +def test_glm4_tool_extractor(): + formatter = ToolFormatter(tool_format="glm4") + result = """test_tool\n{"foo": "bar", "size": 10}\n""" + assert formatter.extract(result) == [("test_tool", """{"foo": "bar", "size": 10}""")] From 1ca639a77748b842e279f2baf2c8f301aa52272e Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Wed, 19 Jun 2024 03:39:52 +0800 Subject: [PATCH 075/160] use prefix to replace force system Former-commit-id: 731d9a964f1c3dbfb83825524d697831e691fb9d --- src/llamafactory/data/template.py | 75 +++++++++++++------------------ 1 file changed, 30 insertions(+), 45 deletions(-) diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py index 77694c59..a12e9c88 100644 --- a/src/llamafactory/data/template.py +++ b/src/llamafactory/data/template.py @@ -38,12 +38,12 @@ class Template: format_observation: "Formatter" format_tools: "Formatter" format_separator: "Formatter" + format_prefix: "Formatter" default_system: str stop_words: List[str] image_token: str efficient_eos: bool replace_eos: bool - force_system: bool def encode_oneturn( self, @@ -102,8 +102,9 @@ class Template: system = system or self.default_system encoded_messages = [] for i, message in enumerate(messages): - elements = [] - if i == 0 and (system or tools or self.force_system): + elements = self.format_prefix.apply() + + if i == 0 and (system or tools): tool_text = self.format_tools.apply(content=tools)[0] if tools else "" elements += self.format_system.apply(content=(system + tool_text)) @@ -193,9 +194,10 @@ class Llama2Template(Template): system = system or self.default_system encoded_messages = [] for i, message in enumerate(messages): - elements = [] + elements = self.format_prefix.apply() + system_text = "" - if i == 0 and (system or tools or self.force_system): + if i == 0 and (system or tools): tool_text = self.format_tools.apply(content=tools)[0] if tools else "" system_text = self.format_system.apply(content=(system + tool_text))[0] @@ -230,12 +232,12 @@ def _register_template( format_observation: Optional["Formatter"] = None, format_tools: Optional["Formatter"] = None, format_separator: Optional["Formatter"] = None, + format_prefix: Optional["Formatter"] = None, default_system: str = "", stop_words: List[str] = [], image_token: str = "", efficient_eos: bool = False, replace_eos: bool = False, - force_system: bool = False, ) -> None: r""" Registers a chat template. @@ -272,6 +274,7 @@ def _register_template( ) default_tool_formatter = ToolFormatter(tool_format="default") default_separator_formatter = EmptyFormatter() + default_prefix_formatter = EmptyFormatter() TEMPLATES[name] = template_class( format_user=format_user or default_user_formatter, format_assistant=format_assistant or default_assistant_formatter, @@ -280,12 +283,12 @@ def _register_template( format_observation=format_observation or format_user or default_user_formatter, format_tools=format_tools or default_tool_formatter, format_separator=format_separator or default_separator_formatter, + format_prefix=format_prefix or default_prefix_formatter, default_system=default_system, stop_words=stop_words, image_token=image_token, efficient_eos=efficient_eos, replace_eos=replace_eos, - force_system=force_system, ) @@ -329,7 +332,7 @@ def _convert_slots_to_jinja(slots: "SLOTS", tokenizer: "PreTrainedTokenizer", pl def _get_jinja_template(template: "Template", tokenizer: "PreTrainedTokenizer") -> str: - jinja_template = "" + jinja_template = _convert_slots_to_jinja(template.format_prefix.apply(), tokenizer) if template.default_system: jinja_template += "{% set system_message = '" + _jinja_escape(template.default_system) + "' %}" @@ -339,11 +342,7 @@ def _get_jinja_template(template: "Template", tokenizer: "PreTrainedTokenizer") ) system_message = _convert_slots_to_jinja(template.format_system.apply(), tokenizer, placeholder="system_message") - if isinstance(template, Llama2Template): - pass - elif template.force_system: - jinja_template += "{{ " + system_message + " }}" - else: + if not isinstance(template, Llama2Template): jinja_template += "{% if system_message is defined %}{{ " + system_message + " }}{% endif %}" jinja_template += "{% for message in messages %}" @@ -459,9 +458,8 @@ _register_template( _register_template( name="belle", format_user=StringFormatter(slots=["Human: {{content}}\n\nBelle: "]), - format_system=StringFormatter(slots=[{"bos_token"}, "{{content}}"]), format_separator=EmptyFormatter(slots=["\n\n"]), - force_system=True, + format_prefix=EmptyFormatter(slots=[{"bos_token"}]), ) @@ -486,10 +484,9 @@ _register_template( _register_template( name="chatglm2", format_user=StringFormatter(slots=["[Round {{idx}}]\n\n问:{{content}}\n\n答:"]), - format_system=StringFormatter(slots=[{"token": "[gMASK]"}, {"token": "sop"}, "{{content}}"]), format_separator=EmptyFormatter(slots=["\n\n"]), + format_prefix=EmptyFormatter(slots=[{"token": "[gMASK]"}, {"token": "sop"}]), efficient_eos=True, - force_system=True, ) @@ -497,14 +494,14 @@ _register_template( name="chatglm3", format_user=StringFormatter(slots=[{"token": "<|user|>"}, "\n", "{{content}}", {"token": "<|assistant|>"}]), format_assistant=StringFormatter(slots=["\n", "{{content}}"]), - format_system=StringFormatter(slots=[{"token": "[gMASK]"}, {"token": "sop"}, "{{content}}"]), + format_system=StringFormatter(slots=[{"token": "<|system|>"}, "\n", "{{content}}"]), format_function=FunctionFormatter(slots=["{{name}}\n{{arguments}}"]), format_observation=StringFormatter( slots=[{"token": "<|observation|>"}, "\n", "{{content}}", {"token": "<|assistant|>"}] ), + format_prefix=EmptyFormatter(slots=[{"token": "[gMASK]"}, {"token": "sop"}]), stop_words=["<|user|>", "<|observation|>"], efficient_eos=True, - force_system=True, ) @@ -512,13 +509,12 @@ _register_template( name="chatglm3_system", format_user=StringFormatter(slots=[{"token": "<|user|>"}, "\n", "{{content}}", {"token": "<|assistant|>"}]), format_assistant=StringFormatter(slots=["\n", "{{content}}"]), - format_system=StringFormatter( - slots=[{"token": "[gMASK]"}, {"token": "sop"}, {"token": "<|system|>"}, "\n", "{{content}}"] - ), + format_system=StringFormatter(slots=[{"token": "<|system|>"}, "\n", "{{content}}"]), format_function=FunctionFormatter(slots=["{{name}}\n{{arguments}}"]), format_observation=StringFormatter( slots=[{"token": "<|observation|>"}, "\n", "{{content}}", {"token": "<|assistant|>"}] ), + format_prefix=EmptyFormatter(slots=[{"token": "[gMASK]"}, {"token": "sop"}]), default_system=( "You are ChatGLM3, a large language model trained by Zhipu.AI. " "Follow the user's instructions carefully. Respond using markdown." @@ -553,8 +549,7 @@ _register_template( _register_template( name="codegeex2", - format_system=StringFormatter(slots=[{"token": "[gMASK]"}, {"token": "sop"}, "{{content}}"]), - force_system=True, + format_prefix=EmptyFormatter(slots=[{"token": "[gMASK]"}, {"token": "sop"}]), ) @@ -581,8 +576,7 @@ _register_template( _register_template( name="cpm", format_user=StringFormatter(slots=["<用户>{{content}}"]), - format_system=StringFormatter(slots=[{"bos_token"}, "{{content}}"]), - force_system=True, + format_prefix=EmptyFormatter(slots=[{"bos_token"}]), ) @@ -615,8 +609,7 @@ _register_template( _register_template( name="deepseek", format_user=StringFormatter(slots=["User: {{content}}\n\nAssistant:"]), - format_system=StringFormatter(slots=[{"bos_token"}, "{{content}}"]), - force_system=True, + format_prefix=EmptyFormatter(slots=[{"bos_token"}]), ) @@ -648,9 +641,8 @@ _register_template( name="empty", format_user=StringFormatter(slots=["{{content}}"]), format_assistant=StringFormatter(slots=["{{content}}"]), - format_system=StringFormatter(slots=[{"bos_token"}, "{{content}}"]), + format_prefix=EmptyFormatter(slots=[{"bos_token"}]), efficient_eos=True, - force_system=True, ) @@ -672,13 +664,12 @@ _register_template( _register_template( name="gemma", format_user=StringFormatter(slots=["user\n{{content}}\nmodel\n"]), - format_system=StringFormatter(slots=[{"bos_token"}, "{{content}}"]), format_observation=StringFormatter( slots=["tool\n{{content}}\nmodel\n"] ), format_separator=EmptyFormatter(slots=["\n"]), + format_prefix=EmptyFormatter(slots=[{"bos_token"}]), efficient_eos=True, - force_system=True, ) @@ -686,13 +677,13 @@ _register_template( name="glm4", format_user=StringFormatter(slots=["<|user|>\n{{content}}<|assistant|>"]), format_assistant=StringFormatter(slots=["\n{{content}}"]), - format_system=StringFormatter(slots=["[gMASK]<|system|>\n{{content}}"]), + format_system=StringFormatter(slots=["<|system|>\n{{content}}"]), format_function=FunctionFormatter(slots=["{{name}}\n{{arguments}}"]), format_observation=StringFormatter(slots=["<|observation|>\n{{content}}<|assistant|>"]), format_tools=ToolFormatter(tool_format="glm4"), + format_prefix=EmptyFormatter(slots=["[gMASK]"]), stop_words=["<|user|>", "<|observation|>"], efficient_eos=True, - force_system=True, ) @@ -768,24 +759,21 @@ _register_template( _register_template( name="mistral", format_user=StringFormatter(slots=["[INST] {{content}} [/INST]"]), - format_system=StringFormatter(slots=[{"bos_token"}, "{{content}}"]), - force_system=True, + format_prefix=EmptyFormatter(slots=[{"bos_token"}]), ) _register_template( name="olmo", format_user=StringFormatter(slots=["<|user|>\n{{content}}<|assistant|>\n"]), - format_system=StringFormatter(slots=[{"eos_token"}, "{{content}}"]), - force_system=True, + format_prefix=EmptyFormatter(slots=[{"eos_token"}]), ) _register_template( name="openchat", format_user=StringFormatter(slots=["GPT4 Correct User: {{content}}", {"eos_token"}, "GPT4 Correct Assistant:"]), - format_system=StringFormatter(slots=[{"bos_token"}, "{{content}}"]), - force_system=True, + format_prefix=EmptyFormatter(slots=[{"bos_token"}]), ) @@ -799,18 +787,16 @@ _register_template( ) ] ), - format_system=StringFormatter(slots=[{"bos_token"}, "{{content}}"]), + format_prefix=EmptyFormatter(slots=[{"bos_token"}]), stop_words=["<|eot_id|>"], replace_eos=True, - force_system=True, ) _register_template( name="orion", format_user=StringFormatter(slots=["Human: {{content}}\n\nAssistant: ", {"eos_token"}]), - format_system=StringFormatter(slots=[{"bos_token"}, "{{content}}"]), - force_system=True, + format_prefix=EmptyFormatter(slots=[{"bos_token"}]), ) @@ -852,7 +838,6 @@ _register_template( format_separator=EmptyFormatter(slots=["\n"]), stop_words=["<|end|>"], replace_eos=True, - force_system=True, ) From 235ed85b0f2fff6867f3d3528b367d6478405106 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Wed, 19 Jun 2024 03:49:23 +0800 Subject: [PATCH 076/160] fix bug Former-commit-id: 412139eaa2fde98ba19e1257d21144382a59f0d6 --- src/llamafactory/data/template.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py index a12e9c88..c9af9605 100644 --- a/src/llamafactory/data/template.py +++ b/src/llamafactory/data/template.py @@ -102,7 +102,10 @@ class Template: system = system or self.default_system encoded_messages = [] for i, message in enumerate(messages): - elements = self.format_prefix.apply() + elements = [] + + if i == 0: + elements += self.format_prefix.apply() if i == 0 and (system or tools): tool_text = self.format_tools.apply(content=tools)[0] if tools else "" @@ -194,7 +197,10 @@ class Llama2Template(Template): system = system or self.default_system encoded_messages = [] for i, message in enumerate(messages): - elements = self.format_prefix.apply() + elements = [] + + if i == 0: + elements += self.format_prefix.apply() system_text = "" if i == 0 and (system or tools): From fa3150548ed1eb5b3071cc0715d2c0a473c0075f Mon Sep 17 00:00:00 2001 From: Jonery Date: Wed, 19 Jun 2024 12:29:40 +0800 Subject: [PATCH 077/160] Cleaner integration. Former-commit-id: 26d4b05d424bd71f570195dd433258caf6465d92 --- src/llamafactory/hparams/parser.py | 7 ++----- src/llamafactory/train/dpo/trainer.py | 12 +++--------- src/llamafactory/train/kto/trainer.py | 12 +++--------- src/llamafactory/train/ppo/trainer.py | 12 +++--------- src/llamafactory/train/pt/trainer.py | 12 +++--------- src/llamafactory/train/rm/trainer.py | 12 +++--------- src/llamafactory/train/sft/trainer.py | 11 +++-------- src/llamafactory/train/trainer_utils.py | 10 ++++------ 8 files changed, 24 insertions(+), 64 deletions(-) diff --git a/src/llamafactory/hparams/parser.py b/src/llamafactory/hparams/parser.py index 680559ac..c7c17ae0 100644 --- a/src/llamafactory/hparams/parser.py +++ b/src/llamafactory/hparams/parser.py @@ -215,11 +215,8 @@ def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS: ): if finetuning_args.badam_mode == "ratio": raise ValueError("Ratio-wise BAdam does not yet support distributed training, use layer-wise BAdam: --badam_mode layer") - if (finetuning_args.badam_mode == "layer" - and training_args.deepspeed_plugin is not None - and training_args.deepspeed_plugin.zero_stage < 3 - ): - raise ValueError(f"Layer-wise BAdam only supports DeepSpeed ZeRO 3 stage, got stage {training_args.deepspeed_plugin.zero_stage}") + if finetuning_args.badam_mode == "layer" and (not is_deepspeed_zero3_enabled()): + raise ValueError(f"Layer-wise BAdam only supports DeepSpeed ZeRO 3 stage.") if (finetuning_args.use_galore) and training_args.deepspeed is not None: raise ValueError("GaLore are incompatible with DeepSpeed yet.") diff --git a/src/llamafactory/train/dpo/trainer.py b/src/llamafactory/train/dpo/trainer.py index 284bf41a..a3e0e961 100644 --- a/src/llamafactory/train/dpo/trainer.py +++ b/src/llamafactory/train/dpo/trainer.py @@ -96,15 +96,9 @@ class CustomDPOTrainer(DPOTrainer): self.save_model(os.path.join(self.args.output_dir, "pissa_init")) if finetuning_args.use_badam: - from badam import clip_grad_norm_for_sparse_tensor - - self.accelerator.clip_grad_norm_ = MethodType(clip_grad_norm_for_sparse_tensor, self.accelerator) - - if (self.args.deepspeed_plugin is not None - and self.args.deepspeed_plugin.zero_stage == 3 - ): - from badam.utils import BAdamZeRO3Callback - self.callback_handler.add_callback(BAdamZeRO3Callback) + from badam import clip_grad_norm_old_version, BAdamCallback + self.accelerator.clip_grad_norm_ = MethodType(clip_grad_norm_old_version, self.accelerator) + self.callback_handler.add_callback(BAdamCallback) def create_optimizer(self) -> "torch.optim.Optimizer": if self.optimizer is None: diff --git a/src/llamafactory/train/kto/trainer.py b/src/llamafactory/train/kto/trainer.py index d8b609e0..0d50987f 100644 --- a/src/llamafactory/train/kto/trainer.py +++ b/src/llamafactory/train/kto/trainer.py @@ -91,15 +91,9 @@ class CustomKTOTrainer(KTOTrainer): self.ref_model.eval() if finetuning_args.use_badam: - from badam import clip_grad_norm_for_sparse_tensor - - self.accelerator.clip_grad_norm_ = MethodType(clip_grad_norm_for_sparse_tensor, self.accelerator) - - if (self.args.deepspeed_plugin is not None - and self.args.deepspeed_plugin.zero_stage == 3 - ): - from badam.utils import BAdamZeRO3Callback - self.callback_handler.add_callback(BAdamZeRO3Callback) + from badam import clip_grad_norm_old_version, BAdamCallback + self.accelerator.clip_grad_norm_ = MethodType(clip_grad_norm_old_version, self.accelerator) + self.callback_handler.add_callback(BAdamCallback) def create_optimizer(self) -> "torch.optim.Optimizer": if self.optimizer is None: diff --git a/src/llamafactory/train/ppo/trainer.py b/src/llamafactory/train/ppo/trainer.py index 5a18cbaa..f81831e6 100644 --- a/src/llamafactory/train/ppo/trainer.py +++ b/src/llamafactory/train/ppo/trainer.py @@ -166,15 +166,9 @@ class CustomPPOTrainer(PPOTrainer, Trainer): self.reward_model = self.accelerator.prepare_model(self.reward_model, evaluation_mode=True) if finetuning_args.use_badam: - from badam import clip_grad_norm_for_sparse_tensor - - self.accelerator.clip_grad_norm_ = MethodType(clip_grad_norm_for_sparse_tensor, self.accelerator) - - if (self.args.deepspeed_plugin is not None - and self.args.deepspeed_plugin.zero_stage == 3 - ): - from badam.utils import BAdamZeRO3Callback - self.callback_handler.add_callback(BAdamZeRO3Callback) + from badam import clip_grad_norm_old_version, BAdamCallback + self.accelerator.clip_grad_norm_ = MethodType(clip_grad_norm_old_version, self.accelerator) + self.callback_handler.add_callback(BAdamCallback) def ppo_train(self, resume_from_checkpoint: Optional[str] = None) -> None: r""" diff --git a/src/llamafactory/train/pt/trainer.py b/src/llamafactory/train/pt/trainer.py index 1e5e9f6a..d3516b41 100644 --- a/src/llamafactory/train/pt/trainer.py +++ b/src/llamafactory/train/pt/trainer.py @@ -48,15 +48,9 @@ class CustomTrainer(Trainer): self.save_model(os.path.join(self.args.output_dir, "pissa_init")) if finetuning_args.use_badam: - from badam import clip_grad_norm_for_sparse_tensor - - self.accelerator.clip_grad_norm_ = MethodType(clip_grad_norm_for_sparse_tensor, self.accelerator) - - if (self.args.deepspeed_plugin is not None - and self.args.deepspeed_plugin.zero_stage == 3 - ): - from badam.utils import BAdamZeRO3Callback - self.callback_handler.add_callback(BAdamZeRO3Callback) + from badam import clip_grad_norm_old_version, BAdamCallback + self.accelerator.clip_grad_norm_ = MethodType(clip_grad_norm_old_version, self.accelerator) + self.callback_handler.add_callback(BAdamCallback) def create_optimizer(self) -> "torch.optim.Optimizer": if self.optimizer is None: diff --git a/src/llamafactory/train/rm/trainer.py b/src/llamafactory/train/rm/trainer.py index 5d0e6263..433251cf 100644 --- a/src/llamafactory/train/rm/trainer.py +++ b/src/llamafactory/train/rm/trainer.py @@ -72,15 +72,9 @@ class PairwiseTrainer(Trainer): self.processor = processor self.can_return_loss = True # override property to return eval_loss if finetuning_args.use_badam: - from badam import clip_grad_norm_for_sparse_tensor - - self.accelerator.clip_grad_norm_ = MethodType(clip_grad_norm_for_sparse_tensor, self.accelerator) - - if (self.args.deepspeed_plugin is not None - and self.args.deepspeed_plugin.zero_stage == 3 - ): - from badam.utils import BAdamZeRO3Callback - self.callback_handler.add_callback(BAdamZeRO3Callback) + from badam import clip_grad_norm_old_version, BAdamCallback + self.accelerator.clip_grad_norm_ = MethodType(clip_grad_norm_old_version, self.accelerator) + self.callback_handler.add_callback(BAdamCallback) def create_optimizer(self) -> "torch.optim.Optimizer": if self.optimizer is None: diff --git a/src/llamafactory/train/sft/trainer.py b/src/llamafactory/train/sft/trainer.py index 9446d245..45799b96 100644 --- a/src/llamafactory/train/sft/trainer.py +++ b/src/llamafactory/train/sft/trainer.py @@ -56,14 +56,9 @@ class CustomSeq2SeqTrainer(Seq2SeqTrainer): self.save_model(os.path.join(self.args.output_dir, "pissa_init")) if finetuning_args.use_badam: - from badam import clip_grad_norm_for_sparse_tensor - self.accelerator.clip_grad_norm_ = MethodType(clip_grad_norm_for_sparse_tensor, self.accelerator) - - if (self.args.deepspeed_plugin is not None - and self.args.deepspeed_plugin.zero_stage == 3 - ): - from badam.utils import BAdamZeRO3Callback - self.callback_handler.add_callback(BAdamZeRO3Callback) + from badam import clip_grad_norm_old_version, BAdamCallback + self.accelerator.clip_grad_norm_ = MethodType(clip_grad_norm_old_version, self.accelerator) + self.callback_handler.add_callback(BAdamCallback) def create_optimizer(self) -> "torch.optim.Optimizer": if self.optimizer is None: diff --git a/src/llamafactory/train/trainer_utils.py b/src/llamafactory/train/trainer_utils.py index 98c38842..b57f5a6e 100644 --- a/src/llamafactory/train/trainer_utils.py +++ b/src/llamafactory/train/trainer_utils.py @@ -371,11 +371,8 @@ def _create_badam_optimizer( dict(params=decay_params, weight_decay=training_args.weight_decay), ] - ds_zero3_enabled = False - if hasattr(training_args, "deepspeed_plugin") and training_args.deepspeed_plugin is not None: - assert training_args.deepspeed_plugin.zero_stage == 3, f"BAdam only supports deepspeed ZeRO-3 stage, got {training_args.deepspeed_plugin.zero_stage}" - assert finetuning_args.badam_mode == "layer", "BAdam only supports layer-wise update in ZeRO-3 stage" - ds_zero3_enabled = True + from transformers.integrations import is_deepspeed_zero3_enabled + ds_zero3_enabled = is_deepspeed_zero3_enabled() if finetuning_args.badam_mode == "layer": from badam import BlockOptimizer @@ -400,6 +397,7 @@ def _create_badam_optimizer( elif finetuning_args.badam_mode == "ratio": from badam import BlockOptimizerRatio + assert not ds_zero3_enabled, "BAdam with ratio-based update does not support Deepspeed ZeRO-3 yet, use layer-wise update instead: --badam_mode layer." assert finetuning_args.badam_update_ratio > 1e-6 optimizer = BlockOptimizerRatio( param_groups=param_groups, @@ -411,7 +409,7 @@ def _create_badam_optimizer( **optim_kwargs, ) logger.info( - f"Using BAdam optimizer with ratio-wise update, update ratio is {finetuning_args.badam_update_ratio}, " + f"Using BAdam optimizer with ratio-based update, update ratio is {finetuning_args.badam_update_ratio}, " f"mask mode is {finetuning_args.badam_mask_mode}" ) From 3e0fa4a8dad19e0ead5c288d607dc6e9a60b6ff0 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Wed, 19 Jun 2024 17:44:05 +0800 Subject: [PATCH 078/160] fix templates Former-commit-id: 6f357d59b73309c5955683008632e7f320e7dcb1 --- src/llamafactory/data/formatter.py | 2 +- src/llamafactory/data/template.py | 48 ++++++++----------------- src/llamafactory/train/trainer_utils.py | 3 +- 3 files changed, 18 insertions(+), 35 deletions(-) diff --git a/src/llamafactory/data/formatter.py b/src/llamafactory/data/formatter.py index 70be6a5a..ed9ba8b8 100644 --- a/src/llamafactory/data/formatter.py +++ b/src/llamafactory/data/formatter.py @@ -68,7 +68,7 @@ def default_tool_formatter(tools: List[Dict[str, Any]]) -> str: def default_tool_extractor(content: str) -> Union[str, List[Tuple[str, str]]]: - regex = re.compile(r"Action:\s*([a-zA-Z0-9_]+)\s*Action Input:\s*(.+?)(?=\s*Action:|$)", re.DOTALL) + regex = re.compile(r"Action:\s*([a-zA-Z0-9_]+)\s*Action Input:\s*(.+?)(?=\s*Action:|\s*$)", re.DOTALL) action_match: List[Tuple[str, str]] = re.findall(regex, content) if not action_match: return content diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py index c9af9605..3c4bc5ec 100644 --- a/src/llamafactory/data/template.py +++ b/src/llamafactory/data/template.py @@ -478,11 +478,7 @@ _register_template( _register_template( name="breeze", format_user=StringFormatter(slots=["[INST] {{content}} [/INST] "]), - format_system=StringFormatter(slots=[{"bos_token"}, "{{content}}"]), - default_system=( - "You are a helpful AI assistant built by MediaTek Research. " - "The user you are helping speaks Traditional Chinese and comes from Taiwan." - ), + format_prefix=EmptyFormatter(slots=[{"bos_token"}]), efficient_eos=True, ) @@ -569,13 +565,8 @@ _register_template( ) ] ), - format_system=StringFormatter( - slots=[{"bos_token"}, "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{{content}}<|END_OF_TURN_TOKEN|>"] - ), - default_system=( - "You are Command-R, a brilliant, sophisticated, AI-assistant trained to assist human users " - "by providing thorough responses. You are trained by Cohere." - ), + format_system=StringFormatter(slots=["<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{{content}}<|END_OF_TURN_TOKEN|>"]), + format_prefix=EmptyFormatter(slots=[{"bos_token"}]), ) @@ -645,8 +636,6 @@ _register_template( _register_template( name="empty", - format_user=StringFormatter(slots=["{{content}}"]), - format_assistant=StringFormatter(slots=["{{content}}"]), format_prefix=EmptyFormatter(slots=[{"bos_token"}]), efficient_eos=True, ) @@ -695,25 +684,21 @@ _register_template( _register_template( name="intern", - format_user=StringFormatter(slots=["<|User|>:{{content}}", {"token": ""}, "\n<|Bot|>:"]), - format_separator=EmptyFormatter(slots=[{"token": ""}, "\n"]), + format_user=StringFormatter(slots=["<|User|>:{{content}}\n<|Bot|>:"]), + format_system=StringFormatter(slots=["<|System|>:{{content}}\n"]), + format_separator=EmptyFormatter(slots=["\n"]), + format_prefix=EmptyFormatter(slots=[{"bos_token"}]), stop_words=[""], - efficient_eos=True, + efficient_eos=True, # internlm tokenizer cannot set eos_token_id ) _register_template( name="intern2", format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]), - format_system=StringFormatter(slots=[{"bos_token"}, "<|im_start|>system\n{{content}}<|im_end|>\n"]), - format_separator=EmptyFormatter(slots=["\n"]), - default_system=( - "You are an AI assistant whose name is InternLM (书生·浦语).\n" - "- InternLM (书生·浦语) is a conversational language model that is developed " - "by Shanghai AI Laboratory (上海人工智能实验室). It is designed to be helpful, honest, and harmless.\n" - "- InternLM (书生·浦语) can understand and communicate fluently in the language chosen " - "by the user such as English and 中文." - ), + format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n"]), + format_separator=EmptyFormatter(slots=["<|im_end|>\n"]), + format_prefix=EmptyFormatter(slots=[{"bos_token"}]), stop_words=["<|im_end|>"], efficient_eos=True, # internlm2 tokenizer cannot set eos_token_id ) @@ -722,7 +707,6 @@ _register_template( _register_template( name="llama2", format_user=StringFormatter(slots=[{"bos_token"}, "[INST] {{content}} [/INST]"]), - format_assistant=StringFormatter(slots=[" {{content}} ", {"eos_token"}]), format_system=StringFormatter(slots=["<>\n{{content}}\n<>\n\n"]), ) @@ -745,9 +729,7 @@ _register_template( ) ] ), - format_system=StringFormatter( - slots=[{"bos_token"}, "<|start_header_id|>system<|end_header_id|>\n\n{{content}}<|eot_id|>"] - ), + format_system=StringFormatter(slots=["<|start_header_id|>system<|end_header_id|>\n\n{{content}}<|eot_id|>"]), format_observation=StringFormatter( slots=[ ( @@ -756,7 +738,7 @@ _register_template( ) ] ), - default_system="You are a helpful assistant.", + format_prefix=EmptyFormatter(slots=[{"bos_token"}]), stop_words=["<|eot_id|>"], replace_eos=True, ) @@ -809,9 +791,9 @@ _register_template( _register_template( name="phi", format_user=StringFormatter(slots=["<|user|>\n{{content}}<|end|>\n<|assistant|>\n"]), - format_system=StringFormatter(slots=[{"bos_token"}, "<|system|>\n{{content}}<|end|>\n"]), + format_system=StringFormatter(slots=["<|system|>\n{{content}}<|end|>\n"]), format_separator=EmptyFormatter(slots=["\n"]), - default_system="You are a helpful AI assistant.", + format_prefix=EmptyFormatter(slots=[{"bos_token"}]), stop_words=["<|end|>"], replace_eos=True, ) diff --git a/src/llamafactory/train/trainer_utils.py b/src/llamafactory/train/trainer_utils.py index 9052c96d..c1b90155 100644 --- a/src/llamafactory/train/trainer_utils.py +++ b/src/llamafactory/train/trainer_utils.py @@ -198,6 +198,7 @@ def convert_pissa_adapter( safe_serialization=training_args.save_safetensors, ) setattr(unwrapped_model.peft_config["default"], "init_lora_weights", init_lora_weights) + elif output_dir == training_args.output_dir: # at the end of training logger.info("Converted PiSSA adapter will be saved at: {}.".format(output_dir)) unwrapped_model = accelerator.unwrap_model(model) @@ -233,7 +234,7 @@ def _create_galore_optimizer( finetuning_args: "FinetuningArguments", ) -> "torch.optim.Optimizer": if len(finetuning_args.galore_target) == 1 and finetuning_args.galore_target[0] == "all": - galore_targets = find_all_linear_modules(model) + galore_targets = find_all_linear_modules(model, finetuning_args.freeze_vision_tower) else: galore_targets = finetuning_args.galore_target From c65f7e9bd5b204d143eb7768a87857d619c92017 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Wed, 19 Jun 2024 20:03:50 +0800 Subject: [PATCH 079/160] fix jinja template Former-commit-id: 0ebf2e2ee23918d28b0cbb20ba456732d6eedfbb --- src/llamafactory/data/template.py | 6 +++++- tests/data/test_supervised.py | 9 +++++--- tests/data/test_template.py | 35 +++++++++++++++++++++++++++++++ 3 files changed, 46 insertions(+), 4 deletions(-) create mode 100644 tests/data/test_template.py diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py index 3c4bc5ec..393ebfea 100644 --- a/src/llamafactory/data/template.py +++ b/src/llamafactory/data/template.py @@ -338,7 +338,11 @@ def _convert_slots_to_jinja(slots: "SLOTS", tokenizer: "PreTrainedTokenizer", pl def _get_jinja_template(template: "Template", tokenizer: "PreTrainedTokenizer") -> str: - jinja_template = _convert_slots_to_jinja(template.format_prefix.apply(), tokenizer) + jinja_template = "" + + prefix = _convert_slots_to_jinja(template.format_prefix.apply(), tokenizer) + if prefix: + jinja_template += "{{ " + prefix + " }}" if template.default_system: jinja_template += "{% set system_message = '" + _jinja_escape(template.default_system) + "' %}" diff --git a/tests/data/test_supervised.py b/tests/data/test_supervised.py index 9f7b2dbf..9cb49615 100644 --- a/tests/data/test_supervised.py +++ b/tests/data/test_supervised.py @@ -17,6 +17,7 @@ import random import pytest from datasets import load_dataset +from transformers import AutoTokenizer from llamafactory.data import get_dataset from llamafactory.hparams import get_train_args @@ -48,10 +49,11 @@ def test_supervised(num_samples: int): tokenizer = tokenizer_module["tokenizer"] tokenized_data = get_dataset(model_args, data_args, training_args, stage="sft", **tokenizer_module) + ref_tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA) + original_data = load_dataset(TRAIN_ARGS["dataset"], split="train") indexes = random.choices(range(len(original_data)), k=num_samples) for index in indexes: - decoded_result = tokenizer.decode(tokenized_data["input_ids"][index]) prompt = original_data[index]["instruction"] if original_data[index]["input"]: prompt += "\n" + original_data[index]["input"] @@ -60,5 +62,6 @@ def test_supervised(num_samples: int): {"role": "user", "content": prompt}, {"role": "assistant", "content": original_data[index]["output"]}, ] - templated_result = tokenizer.apply_chat_template(messages, tokenize=False) - assert decoded_result == templated_result + templated_result = ref_tokenizer.apply_chat_template(messages, tokenize=False) + decoded_result = tokenizer.decode(tokenized_data["input_ids"][index]) + assert templated_result == decoded_result diff --git a/tests/data/test_template.py b/tests/data/test_template.py new file mode 100644 index 00000000..9d73c116 --- /dev/null +++ b/tests/data/test_template.py @@ -0,0 +1,35 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from transformers import AutoTokenizer + +from llamafactory.data import get_template_and_fix_tokenizer + + +TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3") + + +def test_jinja_template(): + tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA) + ref_tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA) + get_template_and_fix_tokenizer(tokenizer, name="llama3") + assert tokenizer.chat_template != ref_tokenizer.chat_template + + messages = [ + {"role": "user", "content": "hi!"}, + {"role": "assistant", "content": "hello there"}, + ] + assert tokenizer.apply_chat_template(messages) == ref_tokenizer.apply_chat_template(messages) From b631bdc5b7b50bab61c479ad6ec0383b04d1dc2e Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Wed, 19 Jun 2024 20:42:09 +0800 Subject: [PATCH 080/160] release v0.8.2 Former-commit-id: 3050bbe51d46acd8473275d2713fc28932e4a3d3 --- .github/workflows/publish.yml | 39 ++++++++++++++++++++++++++++++++++ src/llamafactory/extras/env.py | 2 +- 2 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/publish.yml diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml new file mode 100644 index 00000000..bb39be3a --- /dev/null +++ b/.github/workflows/publish.yml @@ -0,0 +1,39 @@ +name: publish + +on: + release: + types: [published] + +jobs: + publish: + name: Upload release to PyPI + + runs-on: ubuntu-latest + + environment: + name: release + url: https://pypi.org/p/llamafactory + + permissions: + id-token: write + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.8" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install build + + - name: Build package + run: | + python -m build + + - name: Publish package + uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/src/llamafactory/extras/env.py b/src/llamafactory/extras/env.py index 586c24c0..ee224148 100644 --- a/src/llamafactory/extras/env.py +++ b/src/llamafactory/extras/env.py @@ -23,7 +23,7 @@ import trl from transformers.utils import is_torch_cuda_available, is_torch_npu_available -VERSION = "0.8.2.dev0" +VERSION = "0.8.2" def print_env() -> None: From f0bff183245ae93ac6caff5515f711f758bb1aec Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Wed, 19 Jun 2024 20:46:33 +0800 Subject: [PATCH 081/160] Update publish.yml Former-commit-id: 60b0633e29c9e701aa3813bd1fdc0282bd07f7c8 --- .github/workflows/publish.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index bb39be3a..15c7153e 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -2,7 +2,8 @@ name: publish on: release: - types: [published] + types: + - published jobs: publish: From a7d7f79855b0f89557ff74463cbcced8bd43e9a3 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Wed, 19 Jun 2024 21:08:16 +0800 Subject: [PATCH 082/160] set dev version Former-commit-id: 221665345d97f839ce4ba8d54643da30c71b6083 --- src/llamafactory/extras/env.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llamafactory/extras/env.py b/src/llamafactory/extras/env.py index ee224148..ab387231 100644 --- a/src/llamafactory/extras/env.py +++ b/src/llamafactory/extras/env.py @@ -23,7 +23,7 @@ import trl from transformers.utils import is_torch_cuda_available, is_torch_npu_available -VERSION = "0.8.2" +VERSION = "0.8.3.dev0" def print_env() -> None: From 5f5d4c1923b7477d7497c88cdf92f7649f6cf386 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Wed, 19 Jun 2024 21:27:00 +0800 Subject: [PATCH 083/160] update patcher Former-commit-id: afb365e515d615dd62f791622450debab60ce5cc --- src/llamafactory/model/model_utils/checkpointing.py | 10 ++++------ src/llamafactory/model/patcher.py | 5 +++++ tests/model/model_utils/test_checkpointing.py | 2 +- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/llamafactory/model/model_utils/checkpointing.py b/src/llamafactory/model/model_utils/checkpointing.py index f5314125..f4f3d8a5 100644 --- a/src/llamafactory/model/model_utils/checkpointing.py +++ b/src/llamafactory/model/model_utils/checkpointing.py @@ -78,9 +78,7 @@ def _fp32_forward_post_hook( return output.to(torch.float32) -def prepare_model_for_training( - model: "PreTrainedModel", model_args: "ModelArguments", output_layer_name: str = "lm_head" -) -> None: +def prepare_model_for_training(model: "PreTrainedModel", model_args: "ModelArguments") -> None: r""" Includes: (1) cast the layernorm in fp32 @@ -104,8 +102,8 @@ def prepare_model_for_training( setattr(model.config, "use_cache", False) # turn off when gradient checkpointing is enabled logger.info("Gradient checkpointing enabled.") - if hasattr(model, output_layer_name) and model_args.upcast_lmhead_output: - logger.info("Upcasting lm_head outputs in float32.") - output_layer = getattr(model, output_layer_name) + if model_args.upcast_lmhead_output: + output_layer = model.get_output_embeddings() if isinstance(output_layer, torch.nn.Linear) and output_layer.weight.dtype != torch.float32: + logger.info("Upcasting lm_head outputs in float32.") output_layer.register_forward_hook(_fp32_forward_post_hook) diff --git a/src/llamafactory/model/patcher.py b/src/llamafactory/model/patcher.py index 8fa17d08..a53fde98 100644 --- a/src/llamafactory/model/patcher.py +++ b/src/llamafactory/model/patcher.py @@ -152,6 +152,10 @@ def patch_valuehead_model(model: "AutoModelForCausalLMWithValueHead") -> None: if isinstance(self.pretrained_model, PreTrainedModel): return self.pretrained_model.get_input_embeddings() + def get_output_embeddings(self: "AutoModelForCausalLMWithValueHead") -> torch.nn.Module: + if isinstance(self.pretrained_model, PreTrainedModel): + return self.pretrained_model.get_output_embeddings() + def create_or_update_model_card(self: "AutoModelForCausalLMWithValueHead", output_dir: str) -> None: if isinstance(self.pretrained_model, PeftModel): self.pretrained_model.create_or_update_model_card(output_dir) @@ -160,4 +164,5 @@ def patch_valuehead_model(model: "AutoModelForCausalLMWithValueHead") -> None: setattr(model, "_keys_to_ignore_on_save", ignore_modules) setattr(model, "tie_weights", MethodType(tie_weights, model)) setattr(model, "get_input_embeddings", MethodType(get_input_embeddings, model)) + setattr(model, "get_output_embeddings", MethodType(get_output_embeddings, model)) setattr(model, "create_or_update_model_card", MethodType(create_or_update_model_card, model)) diff --git a/tests/model/model_utils/test_checkpointing.py b/tests/model/model_utils/test_checkpointing.py index 670e693d..9b6dfc9e 100644 --- a/tests/model/model_utils/test_checkpointing.py +++ b/tests/model/model_utils/test_checkpointing.py @@ -70,5 +70,5 @@ def test_upcast_lmhead_output(): tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) inputs = torch.randn((1, 16), dtype=torch.float16, device=get_current_device()) - outputs: "torch.Tensor" = model.lm_head(inputs) + outputs: "torch.Tensor" = model.get_output_embeddings()(inputs) assert outputs.dtype == torch.float32 From b2f5c0e0db51ae1814615c11aaaec4def9cd2d83 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Wed, 19 Jun 2024 23:22:28 +0800 Subject: [PATCH 084/160] fix llamaboard abort Former-commit-id: 9ef609a2c0185040e531dea3829a6f481539cdea --- src/llamafactory/webui/runner.py | 4 ++-- src/llamafactory/webui/utils.py | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/llamafactory/webui/runner.py b/src/llamafactory/webui/runner.py index 13dbba03..6cd21b07 100644 --- a/src/llamafactory/webui/runner.py +++ b/src/llamafactory/webui/runner.py @@ -24,7 +24,7 @@ from ..extras.misc import is_gpu_or_npu_available, torch_gc from ..extras.packages import is_gradio_available from .common import DEFAULT_CACHE_DIR, DEFAULT_CONFIG_DIR, get_save_dir, load_config from .locales import ALERTS, LOCALES -from .utils import abort_leaf_process, gen_cmd, get_eval_results, get_trainer_info, load_args, save_args, save_cmd +from .utils import abort_process, gen_cmd, get_eval_results, get_trainer_info, load_args, save_args, save_cmd if is_gradio_available(): @@ -52,7 +52,7 @@ class Runner: def set_abort(self) -> None: self.aborted = True if self.trainer is not None: - abort_leaf_process(self.trainer.pid) + abort_process(self.trainer.pid) def _initialize(self, data: Dict["Component", Any], do_train: bool, from_preview: bool) -> str: get = lambda elem_id: data[self.manager.get_elem_by_id(elem_id)] diff --git a/src/llamafactory/webui/utils.py b/src/llamafactory/webui/utils.py index 6ce2a8e7..a616bcba 100644 --- a/src/llamafactory/webui/utils.py +++ b/src/llamafactory/webui/utils.py @@ -33,16 +33,16 @@ if is_gradio_available(): import gradio as gr -def abort_leaf_process(pid: int) -> None: +def abort_process(pid: int) -> None: r""" - Aborts the leaf processes. + Aborts the processes recursively in a bottom-up way. """ children = psutil.Process(pid).children() if children: for child in children: - abort_leaf_process(child.pid) - else: - os.kill(pid, signal.SIGABRT) + abort_process(child.pid) + + os.kill(pid, signal.SIGABRT) def can_quantize(finetuning_type: str) -> "gr.Dropdown": From 0edccc11a5856cdfc76a1952e6d72626b96cab48 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Wed, 19 Jun 2024 23:46:03 +0800 Subject: [PATCH 085/160] improve llamaboard Former-commit-id: e606ab35c0eced667dde7137c2d72848f264c96c --- src/llamafactory/webui/components/top.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/llamafactory/webui/components/top.py b/src/llamafactory/webui/components/top.py index 2515a83d..09d43ac8 100644 --- a/src/llamafactory/webui/components/top.py +++ b/src/llamafactory/webui/components/top.py @@ -50,9 +50,13 @@ def create_top() -> Dict[str, "Component"]: visual_inputs = gr.Checkbox(scale=1) model_name.change(get_model_info, [model_name], [model_path, template, visual_inputs], queue=False) - model_name.input(save_config, inputs=[lang, model_name], queue=False) + model_name.input(save_config, inputs=[lang, model_name], queue=False).then( + list_checkpoints, [model_name, finetuning_type], [checkpoint_path], queue=False + ) model_path.input(save_config, inputs=[lang, model_name, model_path], queue=False) - finetuning_type.change(can_quantize, [finetuning_type], [quantization_bit], queue=False) + finetuning_type.change(can_quantize, [finetuning_type], [quantization_bit], queue=False).then( + list_checkpoints, [model_name, finetuning_type], [checkpoint_path], queue=False + ) checkpoint_path.focus(list_checkpoints, [model_name, finetuning_type], [checkpoint_path], queue=False) return dict( From b232552d425a6c782f0c5310fbc1725b531fa7e1 Mon Sep 17 00:00:00 2001 From: MengqingCao Date: Thu, 20 Jun 2024 02:09:47 +0000 Subject: [PATCH 086/160] update dependencies Former-commit-id: 25164273d1ca7a8f6f99b41279e342906f6bc4d5 --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index f76524d8..7380add4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,3 +18,4 @@ matplotlib>=3.7.0 fire packaging pyyaml +numpy<2.0.0 From af2cb33bb219feb0a9fc2595cb41f054196481a7 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Thu, 20 Jun 2024 22:56:05 +0800 Subject: [PATCH 087/160] tiny fix Former-commit-id: 2d8d47f6126d68db1701ed18fc31310c6f14dd49 --- src/llamafactory/hparams/parser.py | 3 +++ src/llamafactory/model/adapter.py | 17 ++++++++--------- src/llamafactory/model/patcher.py | 4 ++-- src/llamafactory/webui/components/top.py | 4 ++-- 4 files changed, 15 insertions(+), 13 deletions(-) diff --git a/src/llamafactory/hparams/parser.py b/src/llamafactory/hparams/parser.py index 9b305016..a593bf45 100644 --- a/src/llamafactory/hparams/parser.py +++ b/src/llamafactory/hparams/parser.py @@ -199,6 +199,9 @@ def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS: if not is_torch_bf16_gpu_available(): raise ValueError("This device does not support `pure_bf16`.") + if training_args.deepspeed: + raise ValueError("`pure_bf16` is incompatible with DeepSpeed.") + if training_args.fp16 or training_args.bf16: raise ValueError("Turn off mixed precision training when using `pure_bf16`.") diff --git a/src/llamafactory/model/adapter.py b/src/llamafactory/model/adapter.py index 34518878..7caef9cc 100644 --- a/src/llamafactory/model/adapter.py +++ b/src/llamafactory/model/adapter.py @@ -289,16 +289,15 @@ def init_adapter( raise ValueError("Cannot initialize PiSSA adapter on quantized models.") # cast trainable parameters to float32 if: - # 1. is_trainable and quantization_bit is not None (qlora) - # 2. is_trainable and not deepspeed zero3 and not fsdp (zero3 or fsdp already in float32) - # 3. is_trainable and not pure_bf16 and not badam + # 1. is_trainable and not pure_bf16 and not badam and quantization_bit is not None (qlora) + # 2. is_trainable and not pure_bf16 and not badam and not zero3 and not fsdp (zero3 or fsdp already in fp32) + cast_trainable_params_to_fp32 = False if not is_trainable: - cast_trainable_params_to_fp32 = False - elif model_args.quantization_bit is None and ( - is_deepspeed_zero3_enabled() or is_fsdp_enabled() or finetuning_args.pure_bf16 or finetuning_args.use_badam - ): - logger.info("ZeRO3/FSDP/PureBF16/BAdam detected, remaining trainable params as their original precision.") - cast_trainable_params_to_fp32 = False + pass + elif finetuning_args.pure_bf16 or finetuning_args.use_badam: + logger.info("Pure bf16 / BAdam detected, remaining trainable params in half precision.") + elif model_args.quantization_bit is None and (is_deepspeed_zero3_enabled() or is_fsdp_enabled()): + logger.info("ZeRO3 / FSDP detected, remaining trainable params in float32.") else: logger.info("Upcasting trainable params to float32.") cast_trainable_params_to_fp32 = True diff --git a/src/llamafactory/model/patcher.py b/src/llamafactory/model/patcher.py index a53fde98..35153649 100644 --- a/src/llamafactory/model/patcher.py +++ b/src/llamafactory/model/patcher.py @@ -91,8 +91,8 @@ def patch_config( # cast data type of the model if: # 1. not deepspeed zero3 and not fsdp (keep zero3 or fsdp in float32) - # 2. fsdp + qlora - if model_args.quantization_bit is not None or (not is_deepspeed_zero3_enabled() and not is_fsdp_enabled()): + # 2. quantization_bit is not None (qlora) + if (not is_deepspeed_zero3_enabled() and not is_fsdp_enabled()) or model_args.quantization_bit is not None: init_kwargs["torch_dtype"] = model_args.compute_dtype if init_kwargs["low_cpu_mem_usage"]: # device map requires low_cpu_mem_usage=True diff --git a/src/llamafactory/webui/components/top.py b/src/llamafactory/webui/components/top.py index 09d43ac8..18b9a7d2 100644 --- a/src/llamafactory/webui/components/top.py +++ b/src/llamafactory/webui/components/top.py @@ -49,10 +49,10 @@ def create_top() -> Dict[str, "Component"]: booster = gr.Radio(choices=["none", "flashattn2", "unsloth"], value="none", scale=3) visual_inputs = gr.Checkbox(scale=1) - model_name.change(get_model_info, [model_name], [model_path, template, visual_inputs], queue=False) - model_name.input(save_config, inputs=[lang, model_name], queue=False).then( + model_name.change(get_model_info, [model_name], [model_path, template, visual_inputs], queue=False).then( list_checkpoints, [model_name, finetuning_type], [checkpoint_path], queue=False ) + model_name.input(save_config, inputs=[lang, model_name], queue=False) model_path.input(save_config, inputs=[lang, model_name, model_path], queue=False) finetuning_type.change(can_quantize, [finetuning_type], [quantization_bit], queue=False).then( list_checkpoints, [model_name, finetuning_type], [checkpoint_path], queue=False From 05abe47c8bc2eb4af0b7431d7e6ef92a1ab43371 Mon Sep 17 00:00:00 2001 From: Erich Schubert Date: Fri, 21 Jun 2024 09:14:21 +0200 Subject: [PATCH 088/160] Print help if no arguments given Former-commit-id: 08dfb7ec636fd5bfbb30dac9d5fba6e32bfc6728 --- src/llamafactory/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llamafactory/cli.py b/src/llamafactory/cli.py index c7f136b3..af9dd5f5 100644 --- a/src/llamafactory/cli.py +++ b/src/llamafactory/cli.py @@ -74,7 +74,7 @@ class Command(str, Enum): def main(): - command = sys.argv.pop(1) + command = sys.argv.pop(1) if len(sys.argv) > 0 else Command.HELP if command == Command.API: run_api() elif command == Command.CHAT: From f29c1ac6e54617ded23a29f6b0d98018e272cf92 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 22 Jun 2024 00:00:38 +0800 Subject: [PATCH 089/160] fix api Former-commit-id: dcbd6d86dfc49f12529b02ec331e3e5c05740061 --- src/llamafactory/api/chat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llamafactory/api/chat.py b/src/llamafactory/api/chat.py index 2c7e11e2..72b2ae50 100644 --- a/src/llamafactory/api/chat.py +++ b/src/llamafactory/api/chat.py @@ -93,7 +93,7 @@ def _process_request( if message.role == Role.ASSISTANT and isinstance(message.tool_calls, list) and len(message.tool_calls): tool_calls = [ - {"name": tool_call.function.name, "argument": tool_call.function.arguments} + {"name": tool_call.function.name, "arguments": tool_call.function.arguments} for tool_call in message.tool_calls ] content = json.dumps(tool_calls, ensure_ascii=False) From 4513a2cc75f023ad6b5aaae3e98b584470bcbe1e Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 22 Jun 2024 01:31:32 +0800 Subject: [PATCH 090/160] remove dup template Former-commit-id: 5fec12203b24608af4d4993f44a657eb5a0348e5 --- src/llamafactory/data/template.py | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py index 393ebfea..b5bf688c 100644 --- a/src/llamafactory/data/template.py +++ b/src/llamafactory/data/template.py @@ -511,25 +511,6 @@ _register_template( ) -_register_template( - name="chatglm3_system", - format_user=StringFormatter(slots=[{"token": "<|user|>"}, "\n", "{{content}}", {"token": "<|assistant|>"}]), - format_assistant=StringFormatter(slots=["\n", "{{content}}"]), - format_system=StringFormatter(slots=[{"token": "<|system|>"}, "\n", "{{content}}"]), - format_function=FunctionFormatter(slots=["{{name}}\n{{arguments}}"]), - format_observation=StringFormatter( - slots=[{"token": "<|observation|>"}, "\n", "{{content}}", {"token": "<|assistant|>"}] - ), - format_prefix=EmptyFormatter(slots=[{"token": "[gMASK]"}, {"token": "sop"}]), - default_system=( - "You are ChatGLM3, a large language model trained by Zhipu.AI. " - "Follow the user's instructions carefully. Respond using markdown." - ), - stop_words=["<|user|>", "<|observation|>"], - efficient_eos=True, -) - - _register_template( name="chatml", format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]), From 43a065bb071217af77cbe8ecdd690a8168a79131 Mon Sep 17 00:00:00 2001 From: mMrBun <2015711377@qq.com> Date: Sat, 22 Jun 2024 02:00:13 +0800 Subject: [PATCH 091/160] Add tool_format to overwrite tool formatter template Former-commit-id: af08971ca50443fd5597e5e4412a3aa17214502f --- src/llamafactory/chat/hf_engine.py | 2 +- src/llamafactory/chat/vllm_engine.py | 2 +- src/llamafactory/data/template.py | 5 ++++- src/llamafactory/hparams/data_args.py | 4 ++++ 4 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/llamafactory/chat/hf_engine.py b/src/llamafactory/chat/hf_engine.py index 9e60175b..22a24339 100644 --- a/src/llamafactory/chat/hf_engine.py +++ b/src/llamafactory/chat/hf_engine.py @@ -54,7 +54,7 @@ class HuggingfaceEngine(BaseEngine): self.tokenizer = tokenizer_module["tokenizer"] self.processor = tokenizer_module["processor"] self.tokenizer.padding_side = "left" if self.can_generate else "right" - self.template = get_template_and_fix_tokenizer(self.tokenizer, data_args.template) + self.template = get_template_and_fix_tokenizer(self.tokenizer, data_args.template, data_args.tool_format) self.model = load_model( self.tokenizer, model_args, finetuning_args, is_trainable=False, add_valuehead=(not self.can_generate) ) # must after fixing tokenizer to resize vocab diff --git a/src/llamafactory/chat/vllm_engine.py b/src/llamafactory/chat/vllm_engine.py index 2626d612..f0d23676 100644 --- a/src/llamafactory/chat/vllm_engine.py +++ b/src/llamafactory/chat/vllm_engine.py @@ -59,7 +59,7 @@ class VllmEngine(BaseEngine): self.tokenizer = tokenizer_module["tokenizer"] self.processor = tokenizer_module["processor"] self.tokenizer.padding_side = "left" - self.template = get_template_and_fix_tokenizer(self.tokenizer, data_args.template) + self.template = get_template_and_fix_tokenizer(self.tokenizer, data_args.template, data_args.tool_format) self.generating_args = generating_args.to_dict() engine_args = { diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py index b5bf688c..3d8ded3b 100644 --- a/src/llamafactory/data/template.py +++ b/src/llamafactory/data/template.py @@ -379,6 +379,7 @@ def _get_jinja_template(template: "Template", tokenizer: "PreTrainedTokenizer") def get_template_and_fix_tokenizer( tokenizer: "PreTrainedTokenizer", name: Optional[str] = None, + tool_format: Optional[str] = None, ) -> Template: if name is None: template = TEMPLATES["empty"] # placeholder @@ -386,6 +387,9 @@ def get_template_and_fix_tokenizer( template = TEMPLATES.get(name, None) if template is None: raise ValueError("Template {} does not exist.".format(name)) + + if tool_format: + template.format_tools = ToolFormatter(tool_format=tool_format) stop_words = template.stop_words if template.replace_eos: @@ -660,7 +664,6 @@ _register_template( format_system=StringFormatter(slots=["<|system|>\n{{content}}"]), format_function=FunctionFormatter(slots=["{{name}}\n{{arguments}}"]), format_observation=StringFormatter(slots=["<|observation|>\n{{content}}<|assistant|>"]), - format_tools=ToolFormatter(tool_format="glm4"), format_prefix=EmptyFormatter(slots=["[gMASK]"]), stop_words=["<|user|>", "<|observation|>"], efficient_eos=True, diff --git a/src/llamafactory/hparams/data_args.py b/src/llamafactory/hparams/data_args.py index 39290e21..959742e3 100644 --- a/src/llamafactory/hparams/data_args.py +++ b/src/llamafactory/hparams/data_args.py @@ -29,6 +29,10 @@ class DataArguments: default=None, metadata={"help": "Which template to use for constructing prompts in training and inference."}, ) + tool_format: Optional[str] = field( + default=None, + metadata={"help": "Specifies the tool format template for function calling ."}, + ) dataset: Optional[str] = field( default=None, metadata={"help": "The name of provided dataset(s) to use. Use commas to separate multiple datasets."}, From 4c89aca243ffd5e2230437f0ac812c7048739a1a Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Mon, 24 Jun 2024 18:22:12 +0800 Subject: [PATCH 092/160] update readme Former-commit-id: a1477208471039d3578980f929f1ca8c2a07aa96 --- CITATION.cff | 10 ++++-- README.md | 49 +++++++++++++++++++-------- README_zh.md | 49 +++++++++++++++++++-------- src/llamafactory/extras/constants.py | 2 +- src/llamafactory/train/ppo/trainer.py | 16 ++++----- 5 files changed, 85 insertions(+), 41 deletions(-) diff --git a/CITATION.cff b/CITATION.cff index 4caf3787..a572b5fa 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -12,12 +12,14 @@ authors: given-names: "Yanhan" - family-names: "Luo" given-names: "Zheyan" +- family-names: "Feng" + given-names: "Zhangchi" - family-names: "Ma" given-names: "Yongqiang" title: "LlamaFactory: Unified Efficient Fine-Tuning of 100+ Language Models" url: "https://arxiv.org/abs/2403.13372" preferred-citation: - type: article + type: conference-paper authors: - family-names: "Zheng" given-names: "Yaowei" @@ -29,9 +31,13 @@ preferred-citation: given-names: "Yanhan" - family-names: "Luo" given-names: "Zheyan" + - family-names: "Feng" + given-names: "Zhangchi" - family-names: "Ma" given-names: "Yongqiang" - journal: "arXiv preprint arXiv:2403.13372" + booktitle: "Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 3: System Demonstrations)" title: "LlamaFactory: Unified Efficient Fine-Tuning of 100+ Language Models" url: "https://arxiv.org/abs/2403.13372" year: 2024 + publisher: "Association for Computational Linguistics" + address: "Bangkok, Thailand" diff --git a/README.md b/README.md index 0e8e55f7..9462964c 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ [![GitHub Code License](https://img.shields.io/github/license/hiyouga/LLaMA-Factory)](LICENSE) [![GitHub last commit](https://img.shields.io/github/last-commit/hiyouga/LLaMA-Factory)](https://github.com/hiyouga/LLaMA-Factory/commits/main) [![PyPI](https://img.shields.io/pypi/v/llamafactory)](https://pypi.org/project/llamafactory/) -[![Citation](https://img.shields.io/badge/citation-44-green)](#projects-using-llama-factory) +[![Citation](https://img.shields.io/badge/citation-63-green)](#projects-using-llama-factory) [![GitHub pull request](https://img.shields.io/badge/PRs-welcome-blue)](https://github.com/hiyouga/LLaMA-Factory/pulls) [![Discord](https://dcbadge.vercel.app/api/server/rKfvV9r9FK?compact=true&style=flat)](https://discord.gg/rKfvV9r9FK) [![Twitter](https://img.shields.io/twitter/follow/llamafactory_ai)](https://twitter.com/llamafactory_ai) @@ -15,7 +15,7 @@ [![GitHub Tread](https://trendshift.io/api/badge/repositories/4535)](https://trendshift.io/repositories/4535) -👋 Join our [WeChat](assets/wechat.jpg). +👋 Join our [WeChat](assets/wechat.jpg) or [NPU user group](assets/wechat_npu.jpg). \[ English | [中文](README_zh.md) \] @@ -360,8 +360,6 @@ To enable FlashAttention-2 on the Windows platform, you need to install the prec
For Ascend NPU users -Join [NPU user group](assets/wechat_npu.jpg). - To install LLaMA Factory on Ascend NPU devices, please specify extra dependencies: `pip install -e '.[torch-npu,metrics]'`. Additionally, you need to install the **[Ascend CANN Toolkit and Kernels](https://www.hiascend.com/developer/download/community/result?module=cann)**. Please follow the [installation tutorial](https://www.hiascend.com/document/detail/en/CANNCommunityEdition/600alphaX/softwareinstall/instg/atlasdeploy_03_0031.html) or use the following commands: ```bash @@ -503,38 +501,55 @@ If you have a project that should be incorporated, please contact via email or c 1. Wang et al. UbiPhysio: Support Daily Functioning, Fitness, and Rehabilitation with Action Understanding and Feedback in Natural Language. 2023. [[arxiv]](https://arxiv.org/abs/2308.10526) 1. Luceri et al. Leveraging Large Language Models to Detect Influence Campaigns in Social Media. 2023. [[arxiv]](https://arxiv.org/abs/2311.07816) 1. Zhang et al. Alleviating Hallucinations of Large Language Models through Induced Hallucinations. 2023. [[arxiv]](https://arxiv.org/abs/2312.15710) -1. Wang et al. Know Your Needs Better: Towards Structured Understanding of Marketer Demands with Analogical Reasoning Augmented LLMs. 2024. [[arxiv]](https://arxiv.org/abs/2401.04319) -1. Wang et al. CANDLE: Iterative Conceptualization and Instantiation Distillation from Large Language Models for Commonsense Reasoning. 2024. [[arxiv]](https://arxiv.org/abs/2401.07286) +1. Wang et al. Know Your Needs Better: Towards Structured Understanding of Marketer Demands with Analogical Reasoning Augmented LLMs. KDD 2024. [[arxiv]](https://arxiv.org/abs/2401.04319) +1. Wang et al. CANDLE: Iterative Conceptualization and Instantiation Distillation from Large Language Models for Commonsense Reasoning. ACL 2024. [[arxiv]](https://arxiv.org/abs/2401.07286) 1. Choi et al. FACT-GPT: Fact-Checking Augmentation via Claim Matching with LLMs. 2024. [[arxiv]](https://arxiv.org/abs/2402.05904) 1. Zhang et al. AutoMathText: Autonomous Data Selection with Language Models for Mathematical Texts. 2024. [[arxiv]](https://arxiv.org/abs/2402.07625) 1. Lyu et al. KnowTuning: Knowledge-aware Fine-tuning for Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2402.11176) 1. Yang et al. LaCo: Large Language Model Pruning via Layer Collaps. 2024. [[arxiv]](https://arxiv.org/abs/2402.11187) 1. Bhardwaj et al. Language Models are Homer Simpson! Safety Re-Alignment of Fine-tuned Language Models through Task Arithmetic. 2024. [[arxiv]](https://arxiv.org/abs/2402.11746) 1. Yang et al. Enhancing Empathetic Response Generation by Augmenting LLMs with Small-scale Empathetic Models. 2024. [[arxiv]](https://arxiv.org/abs/2402.11801) -1. Yi et al. Generation Meets Verification: Accelerating Large Language Model Inference with Smart Parallel Auto-Correct Decoding. 2024. [[arxiv]](https://arxiv.org/abs/2402.11809) +1. Yi et al. Generation Meets Verification: Accelerating Large Language Model Inference with Smart Parallel Auto-Correct Decoding. ACL 2024 Findings. [[arxiv]](https://arxiv.org/abs/2402.11809) 1. Cao et al. Head-wise Shareable Attention for Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2402.11819) 1. Zhang et al. Enhancing Multilingual Capabilities of Large Language Models through Self-Distillation from Resource-Rich Languages. 2024. [[arxiv]](https://arxiv.org/abs/2402.12204) 1. Kim et al. Efficient and Effective Vocabulary Expansion Towards Multilingual Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2402.14714) -1. Yu et al. KIEval: A Knowledge-grounded Interactive Evaluation Framework for Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2402.15043) +1. Yu et al. KIEval: A Knowledge-grounded Interactive Evaluation Framework for Large Language Models. ACL 2024. [[arxiv]](https://arxiv.org/abs/2402.15043) 1. Huang et al. Key-Point-Driven Data Synthesis with its Enhancement on Mathematical Reasoning. 2024. [[arxiv]](https://arxiv.org/abs/2403.02333) 1. Duan et al. Negating Negatives: Alignment without Human Positive Samples via Distributional Dispreference Optimization. 2024. [[arxiv]](https://arxiv.org/abs/2403.03419) 1. Xie and Schwertfeger. Empowering Robotics with Large Language Models: osmAG Map Comprehension with LLMs. 2024. [[arxiv]](https://arxiv.org/abs/2403.08228) 1. Wu et al. Large Language Models are Parallel Multilingual Learners. 2024. [[arxiv]](https://arxiv.org/abs/2403.09073) 1. Zhang et al. EDT: Improving Large Language Models' Generation by Entropy-based Dynamic Temperature Sampling. 2024. [[arxiv]](https://arxiv.org/abs/2403.14541) 1. Weller et al. FollowIR: Evaluating and Teaching Information Retrieval Models to Follow Instructions. 2024. [[arxiv]](https://arxiv.org/abs/2403.15246) -1. Hongbin Na. CBT-LLM: A Chinese Large Language Model for Cognitive Behavioral Therapy-based Mental Health Question Answering. 2024. [[arxiv]](https://arxiv.org/abs/2403.16008) +1. Hongbin Na. CBT-LLM: A Chinese Large Language Model for Cognitive Behavioral Therapy-based Mental Health Question Answering. COLING 2024. [[arxiv]](https://arxiv.org/abs/2403.16008) 1. Zan et al. CodeS: Natural Language to Code Repository via Multi-Layer Sketch. 2024. [[arxiv]](https://arxiv.org/abs/2403.16443) 1. Liu et al. Extensive Self-Contrast Enables Feedback-Free Language Model Alignment. 2024. [[arxiv]](https://arxiv.org/abs/2404.00604) 1. Luo et al. BAdam: A Memory Efficient Full Parameter Training Method for Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2404.02827) 1. Du et al. Chinese Tiny LLM: Pretraining a Chinese-Centric Large Language Model. 2024. [[arxiv]](https://arxiv.org/abs/2404.04167) -1. Ma et al. Parameter Efficient Quasi-Orthogonal Fine-Tuning via Givens Rotation. 2024. [[arxiv]](https://arxiv.org/abs/2404.04316) +1. Ma et al. Parameter Efficient Quasi-Orthogonal Fine-Tuning via Givens Rotation. ICML 2024. [[arxiv]](https://arxiv.org/abs/2404.04316) 1. Liu et al. Dynamic Generation of Personalities with Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2404.07084) 1. Shang et al. How Far Have We Gone in Stripped Binary Code Understanding Using Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2404.09836) 1. Huang et al. LLMTune: Accelerate Database Knob Tuning with Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2404.11581) 1. Deng et al. Text-Tuple-Table: Towards Information Integration in Text-to-Table Generation via Global Tuple Extraction. 2024. [[arxiv]](https://arxiv.org/abs/2404.14215) 1. Acikgoz et al. Hippocrates: An Open-Source Framework for Advancing Large Language Models in Healthcare. 2024. [[arxiv]](https://arxiv.org/abs/2404.16621) -1. Zhang et al. Small Language Models Need Strong Verifiers to Self-Correct Reasoning. 2024. [[arxiv]](https://arxiv.org/abs/2404.17140) -1. Zhou et al. FREB-TQA: A Fine-Grained Robustness Evaluation Benchmark for Table Question Answering. 2024. [[arxiv]](https://arxiv.org/abs/2404.18585) +1. Zhang et al. Small Language Models Need Strong Verifiers to Self-Correct Reasoning. ACL 2024 Findings. [[arxiv]](https://arxiv.org/abs/2404.17140) +1. Zhou et al. FREB-TQA: A Fine-Grained Robustness Evaluation Benchmark for Table Question Answering. NAACL 2024. [[arxiv]](https://arxiv.org/abs/2404.18585) +1. Xu et al. Large Language Models for Cyber Security: A Systematic Literature Review. 2024. [[arxiv]](https://arxiv.org/abs/2405.04760) +1. Dammu et al. "They are uncultured": Unveiling Covert Harms and Social Threats in LLM Generated Conversations. 2024. [[arxiv]](https://arxiv.org/abs/2405.05378) +1. Yi et al. A safety realignment framework via subspace-oriented model fusion for large language models. 2024. [[arxiv]](https://arxiv.org/abs/2405.09055) +1. Lou et al. SPO: Multi-Dimensional Preference Sequential Alignment With Implicit Reward Modeling. 2024. [[arxiv]](https://arxiv.org/abs/2405.12739) +1. Zhang et al. Getting More from Less: Large Language Models are Good Spontaneous Multilingual Learners. 2024. [[arxiv]](https://arxiv.org/abs/2405.13816) +1. Zhang et al. TS-Align: A Teacher-Student Collaborative Framework for Scalable Iterative Finetuning of Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2405.20215) +1. Zihong Chen. Sentence Segmentation and Sentence Punctuation Based on XunziALLM. 2024. [[paper]](https://aclanthology.org/2024.lt4hala-1.30) +1. Gao et al. The Best of Both Worlds: Toward an Honest and Helpful Large Language Model. 2024. [[arxiv]](https://arxiv.org/abs/2406.00380) +1. Wang and Song. MARS: Benchmarking the Metaphysical Reasoning Abilities of Language Models with a Multi-task Evaluation Dataset. 2024. [[arxiv]](https://arxiv.org/abs/2406.02106) +1. Hu et al. Computational Limits of Low-Rank Adaptation (LoRA) for Transformer-Based Models. 2024. [[arxiv]](https://arxiv.org/abs/2406.03136) +1. Ge et al. Time Sensitive Knowledge Editing through Efficient Finetuning. ACL 2024. [[arxiv]](https://arxiv.org/abs/2406.04496) +1. Tan et al. Peer Review as A Multi-Turn and Long-Context Dialogue with Role-Based Interactions. 2024. [[arxiv]](https://arxiv.org/abs/2406.05688) +1. Song et al. Turbo Sparse: Achieving LLM SOTA Performance with Minimal Activated Parameters. 2024. [[arxiv]](https://arxiv.org/abs/2406.05955) +1. Gu et al. RWKV-CLIP: A Robust Vision-Language Representation Learner. 2024. [[arxiv]](https://arxiv.org/abs/2406.06973) +1. Chen et al. Advancing Tool-Augmented Large Language Models: Integrating Insights from Errors in Inference Trees. 2024. [[arxiv]](https://arxiv.org/abs/2406.07115) +1. Zhu et al. Are Large Language Models Good Statisticians?. 2024. [[arxiv]](https://arxiv.org/abs/2406.07815) +1. Li et al. Know the Unknown: An Uncertainty-Sensitive Method for LLM Instruction Tuning. 2024. [[arxiv]](https://arxiv.org/abs/2406.10099) 1. **[StarWhisper](https://github.com/Yu-Yang-Li/StarWhisper)**: A large language model for Astronomy, based on ChatGLM2-6B and Qwen-14B. 1. **[DISC-LawLLM](https://github.com/FudanDISC/DISC-LawLLM)**: A large language model specialized in Chinese legal domain, based on Baichuan-13B, is capable of retrieving and reasoning on legal knowledge. 1. **[Sunsimiao](https://github.com/X-D-Lab/Sunsimiao)**: A large language model specialized in Chinese medical domain, based on Baichuan-7B and ChatGLM-6B. @@ -542,6 +557,8 @@ If you have a project that should be incorporated, please contact via email or c 1. **[MachineMindset](https://github.com/PKU-YuanGroup/Machine-Mindset/)**: A series of MBTI Personality large language models, capable of giving any LLM 16 different personality types based on different datasets and training methods. 1. **[Luminia-13B-v3](https://huggingface.co/Nekochu/Luminia-13B-v3)**: A large language model specialized in generate metadata for stable diffusion. [[🤗Demo]](https://huggingface.co/spaces/Nekochu/Luminia-13B_SD_Prompt) 1. **[Chinese-LLaVA-Med](https://github.com/BUAADreamer/Chinese-LLaVA-Med)**: A multimodal large language model specialized in Chinese medical domain, based on LLaVA-1.5-7B. +1. **[AutoRE](https://github.com/THUDM/AutoRE)**: A document-level relation extraction system based on large language models. +1. **[NVIDIA RTX AI Toolkit](https://github.com/NVIDIA/RTX-AI-Toolkit)**: SDKs for fine-tuning LLMs on Windows PC for NVIDIA RTX.
@@ -556,10 +573,12 @@ Please follow the model licenses to use the corresponding model weights: [Baichu If this work is helpful, please kindly cite as: ```bibtex -@article{zheng2024llamafactory, +@inproceedings{zheng2024llamafactory, title={LlamaFactory: Unified Efficient Fine-Tuning of 100+ Language Models}, - author={Yaowei Zheng and Richong Zhang and Junhao Zhang and Yanhan Ye and Zheyan Luo and Yongqiang Ma}, - journal={arXiv preprint arXiv:2403.13372}, + author={Yaowei Zheng and Richong Zhang and Junhao Zhang and Yanhan Ye and Zheyan Luo and Zhangchi Feng and Yongqiang Ma}, + booktitle={Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 3: System Demonstrations)}, + address={Bangkok, Thailand}, + publisher={Association for Computational Linguistics}, year={2024}, url={http://arxiv.org/abs/2403.13372} } diff --git a/README_zh.md b/README_zh.md index 152cd6fa..2073ce17 100644 --- a/README_zh.md +++ b/README_zh.md @@ -15,7 +15,7 @@ [![GitHub Tread](https://trendshift.io/api/badge/repositories/4535)](https://trendshift.io/repositories/4535) -👋 加入我们的[微信群](assets/wechat.jpg)。 +👋 加入我们的[微信群](assets/wechat.jpg)或 [NPU 用户群](assets/wechat_npu.jpg)。 \[ [English](README.md) | 中文 \] @@ -360,8 +360,6 @@ pip install https://github.com/jllllll/bitsandbytes-windows-webui/releases/downl
昇腾 NPU 用户指南 -加入 [NPU 用户群](assets/wechat_npu.jpg)。 - 在昇腾 NPU 设备上安装 LLaMA Factory 时,需要指定额外依赖项,使用 `pip install -e '.[torch-npu,metrics]'` 命令安装。此外,还需要安装 **[Ascend CANN Toolkit and Kernels](https://www.hiascend.com/developer/download/community/result?module=cann)**,安装方法请参考[安装教程](https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/80RC2alpha002/quickstart/quickstart/quickstart_18_0004.html)或使用以下命令: ```bash @@ -503,38 +501,55 @@ run_name: test_run # 可选 1. Wang et al. UbiPhysio: Support Daily Functioning, Fitness, and Rehabilitation with Action Understanding and Feedback in Natural Language. 2023. [[arxiv]](https://arxiv.org/abs/2308.10526) 1. Luceri et al. Leveraging Large Language Models to Detect Influence Campaigns in Social Media. 2023. [[arxiv]](https://arxiv.org/abs/2311.07816) 1. Zhang et al. Alleviating Hallucinations of Large Language Models through Induced Hallucinations. 2023. [[arxiv]](https://arxiv.org/abs/2312.15710) -1. Wang et al. Know Your Needs Better: Towards Structured Understanding of Marketer Demands with Analogical Reasoning Augmented LLMs. 2024. [[arxiv]](https://arxiv.org/abs/2401.04319) -1. Wang et al. CANDLE: Iterative Conceptualization and Instantiation Distillation from Large Language Models for Commonsense Reasoning. 2024. [[arxiv]](https://arxiv.org/abs/2401.07286) +1. Wang et al. Know Your Needs Better: Towards Structured Understanding of Marketer Demands with Analogical Reasoning Augmented LLMs. KDD 2024. [[arxiv]](https://arxiv.org/abs/2401.04319) +1. Wang et al. CANDLE: Iterative Conceptualization and Instantiation Distillation from Large Language Models for Commonsense Reasoning. ACL 2024. [[arxiv]](https://arxiv.org/abs/2401.07286) 1. Choi et al. FACT-GPT: Fact-Checking Augmentation via Claim Matching with LLMs. 2024. [[arxiv]](https://arxiv.org/abs/2402.05904) 1. Zhang et al. AutoMathText: Autonomous Data Selection with Language Models for Mathematical Texts. 2024. [[arxiv]](https://arxiv.org/abs/2402.07625) 1. Lyu et al. KnowTuning: Knowledge-aware Fine-tuning for Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2402.11176) 1. Yang et al. LaCo: Large Language Model Pruning via Layer Collaps. 2024. [[arxiv]](https://arxiv.org/abs/2402.11187) 1. Bhardwaj et al. Language Models are Homer Simpson! Safety Re-Alignment of Fine-tuned Language Models through Task Arithmetic. 2024. [[arxiv]](https://arxiv.org/abs/2402.11746) 1. Yang et al. Enhancing Empathetic Response Generation by Augmenting LLMs with Small-scale Empathetic Models. 2024. [[arxiv]](https://arxiv.org/abs/2402.11801) -1. Yi et al. Generation Meets Verification: Accelerating Large Language Model Inference with Smart Parallel Auto-Correct Decoding. 2024. [[arxiv]](https://arxiv.org/abs/2402.11809) +1. Yi et al. Generation Meets Verification: Accelerating Large Language Model Inference with Smart Parallel Auto-Correct Decoding. ACL 2024 Findings. [[arxiv]](https://arxiv.org/abs/2402.11809) 1. Cao et al. Head-wise Shareable Attention for Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2402.11819) 1. Zhang et al. Enhancing Multilingual Capabilities of Large Language Models through Self-Distillation from Resource-Rich Languages. 2024. [[arxiv]](https://arxiv.org/abs/2402.12204) 1. Kim et al. Efficient and Effective Vocabulary Expansion Towards Multilingual Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2402.14714) -1. Yu et al. KIEval: A Knowledge-grounded Interactive Evaluation Framework for Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2402.15043) +1. Yu et al. KIEval: A Knowledge-grounded Interactive Evaluation Framework for Large Language Models. ACL 2024. [[arxiv]](https://arxiv.org/abs/2402.15043) 1. Huang et al. Key-Point-Driven Data Synthesis with its Enhancement on Mathematical Reasoning. 2024. [[arxiv]](https://arxiv.org/abs/2403.02333) 1. Duan et al. Negating Negatives: Alignment without Human Positive Samples via Distributional Dispreference Optimization. 2024. [[arxiv]](https://arxiv.org/abs/2403.03419) 1. Xie and Schwertfeger. Empowering Robotics with Large Language Models: osmAG Map Comprehension with LLMs. 2024. [[arxiv]](https://arxiv.org/abs/2403.08228) 1. Wu et al. Large Language Models are Parallel Multilingual Learners. 2024. [[arxiv]](https://arxiv.org/abs/2403.09073) 1. Zhang et al. EDT: Improving Large Language Models' Generation by Entropy-based Dynamic Temperature Sampling. 2024. [[arxiv]](https://arxiv.org/abs/2403.14541) 1. Weller et al. FollowIR: Evaluating and Teaching Information Retrieval Models to Follow Instructions. 2024. [[arxiv]](https://arxiv.org/abs/2403.15246) -1. Hongbin Na. CBT-LLM: A Chinese Large Language Model for Cognitive Behavioral Therapy-based Mental Health Question Answering. 2024. [[arxiv]](https://arxiv.org/abs/2403.16008) +1. Hongbin Na. CBT-LLM: A Chinese Large Language Model for Cognitive Behavioral Therapy-based Mental Health Question Answering. COLING 2024. [[arxiv]](https://arxiv.org/abs/2403.16008) 1. Zan et al. CodeS: Natural Language to Code Repository via Multi-Layer Sketch. 2024. [[arxiv]](https://arxiv.org/abs/2403.16443) 1. Liu et al. Extensive Self-Contrast Enables Feedback-Free Language Model Alignment. 2024. [[arxiv]](https://arxiv.org/abs/2404.00604) 1. Luo et al. BAdam: A Memory Efficient Full Parameter Training Method for Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2404.02827) 1. Du et al. Chinese Tiny LLM: Pretraining a Chinese-Centric Large Language Model. 2024. [[arxiv]](https://arxiv.org/abs/2404.04167) -1. Ma et al. Parameter Efficient Quasi-Orthogonal Fine-Tuning via Givens Rotation. 2024. [[arxiv]](https://arxiv.org/abs/2404.04316) +1. Ma et al. Parameter Efficient Quasi-Orthogonal Fine-Tuning via Givens Rotation. ICML 2024. [[arxiv]](https://arxiv.org/abs/2404.04316) 1. Liu et al. Dynamic Generation of Personalities with Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2404.07084) 1. Shang et al. How Far Have We Gone in Stripped Binary Code Understanding Using Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2404.09836) 1. Huang et al. LLMTune: Accelerate Database Knob Tuning with Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2404.11581) 1. Deng et al. Text-Tuple-Table: Towards Information Integration in Text-to-Table Generation via Global Tuple Extraction. 2024. [[arxiv]](https://arxiv.org/abs/2404.14215) 1. Acikgoz et al. Hippocrates: An Open-Source Framework for Advancing Large Language Models in Healthcare. 2024. [[arxiv]](https://arxiv.org/abs/2404.16621) -1. Zhang et al. Small Language Models Need Strong Verifiers to Self-Correct Reasoning. 2024. [[arxiv]](https://arxiv.org/abs/2404.17140) -1. Zhou et al. FREB-TQA: A Fine-Grained Robustness Evaluation Benchmark for Table Question Answering. 2024. [[arxiv]](https://arxiv.org/abs/2404.18585) +1. Zhang et al. Small Language Models Need Strong Verifiers to Self-Correct Reasoning. ACL 2024 Findings. [[arxiv]](https://arxiv.org/abs/2404.17140) +1. Zhou et al. FREB-TQA: A Fine-Grained Robustness Evaluation Benchmark for Table Question Answering. NAACL 2024. [[arxiv]](https://arxiv.org/abs/2404.18585) +1. Xu et al. Large Language Models for Cyber Security: A Systematic Literature Review. 2024. [[arxiv]](https://arxiv.org/abs/2405.04760) +1. Dammu et al. "They are uncultured": Unveiling Covert Harms and Social Threats in LLM Generated Conversations. 2024. [[arxiv]](https://arxiv.org/abs/2405.05378) +1. Yi et al. A safety realignment framework via subspace-oriented model fusion for large language models. 2024. [[arxiv]](https://arxiv.org/abs/2405.09055) +1. Lou et al. SPO: Multi-Dimensional Preference Sequential Alignment With Implicit Reward Modeling. 2024. [[arxiv]](https://arxiv.org/abs/2405.12739) +1. Zhang et al. Getting More from Less: Large Language Models are Good Spontaneous Multilingual Learners. 2024. [[arxiv]](https://arxiv.org/abs/2405.13816) +1. Zhang et al. TS-Align: A Teacher-Student Collaborative Framework for Scalable Iterative Finetuning of Large Language Models. 2024. [[arxiv]](https://arxiv.org/abs/2405.20215) +1. Zihong Chen. Sentence Segmentation and Sentence Punctuation Based on XunziALLM. 2024. [[paper]](https://aclanthology.org/2024.lt4hala-1.30) +1. Gao et al. The Best of Both Worlds: Toward an Honest and Helpful Large Language Model. 2024. [[arxiv]](https://arxiv.org/abs/2406.00380) +1. Wang and Song. MARS: Benchmarking the Metaphysical Reasoning Abilities of Language Models with a Multi-task Evaluation Dataset. 2024. [[arxiv]](https://arxiv.org/abs/2406.02106) +1. Hu et al. Computational Limits of Low-Rank Adaptation (LoRA) for Transformer-Based Models. 2024. [[arxiv]](https://arxiv.org/abs/2406.03136) +1. Ge et al. Time Sensitive Knowledge Editing through Efficient Finetuning. ACL 2024. [[arxiv]](https://arxiv.org/abs/2406.04496) +1. Tan et al. Peer Review as A Multi-Turn and Long-Context Dialogue with Role-Based Interactions. 2024. [[arxiv]](https://arxiv.org/abs/2406.05688) +1. Song et al. Turbo Sparse: Achieving LLM SOTA Performance with Minimal Activated Parameters. 2024. [[arxiv]](https://arxiv.org/abs/2406.05955) +1. Gu et al. RWKV-CLIP: A Robust Vision-Language Representation Learner. 2024. [[arxiv]](https://arxiv.org/abs/2406.06973) +1. Chen et al. Advancing Tool-Augmented Large Language Models: Integrating Insights from Errors in Inference Trees. 2024. [[arxiv]](https://arxiv.org/abs/2406.07115) +1. Zhu et al. Are Large Language Models Good Statisticians?. 2024. [[arxiv]](https://arxiv.org/abs/2406.07815) +1. Li et al. Know the Unknown: An Uncertainty-Sensitive Method for LLM Instruction Tuning. 2024. [[arxiv]](https://arxiv.org/abs/2406.10099) 1. **[StarWhisper](https://github.com/Yu-Yang-Li/StarWhisper)**: 天文大模型 StarWhisper,基于 ChatGLM2-6B 和 Qwen-14B 在天文数据上微调而得。 1. **[DISC-LawLLM](https://github.com/FudanDISC/DISC-LawLLM)**: 中文法律领域大模型 DISC-LawLLM,基于 Baichuan-13B 微调而得,具有法律推理和知识检索能力。 1. **[Sunsimiao](https://github.com/X-D-Lab/Sunsimiao)**: 孙思邈中文医疗大模型 Sumsimiao,基于 Baichuan-7B 和 ChatGLM-6B 在中文医疗数据上微调而得。 @@ -542,6 +557,8 @@ run_name: test_run # 可选 1. **[MachineMindset](https://github.com/PKU-YuanGroup/Machine-Mindset/)**:MBTI性格大模型项目,根据数据集与训练方式让任意 LLM 拥有 16 个不同的性格类型。 1. **[Luminia-13B-v3](https://huggingface.co/Nekochu/Luminia-13B-v3)**:一个用于生成 Stable Diffusion 提示词的大型语言模型。[[🤗Demo]](https://huggingface.co/spaces/Nekochu/Luminia-13B_SD_Prompt) 1. **[Chinese-LLaVA-Med](https://github.com/BUAADreamer/Chinese-LLaVA-Med)**:中文多模态医学大模型,基于 LLaVA-1.5-7B 在中文多模态医疗数据上微调而得。 +1. **[AutoRE](https://github.com/THUDM/AutoRE)**:基于大语言模型的文档级关系抽取系统。 +1. **[NVIDIA RTX AI Toolkit](https://github.com/NVIDIA/RTX-AI-Toolkit)**: 在 Windows 主机上利用英伟达 RTX 设备进行大型语言模型微调的开发包。
@@ -556,10 +573,12 @@ run_name: test_run # 可选 如果您觉得此项目有帮助,请考虑以下列格式引用 ```bibtex -@article{zheng2024llamafactory, - title={LlamaFactory: Unified Efficient Fine-Tuning of 100+ Language Models}, - author={Yaowei Zheng and Richong Zhang and Junhao Zhang and Yanhan Ye and Zheyan Luo and Yongqiang Ma}, - journal={arXiv preprint arXiv:2403.13372}, +@inproceedings{zheng2024llamafactory, + title={LlamaFactory: Unified Efficient Fine-Tuning of 100+ Language Models}, + author={Yaowei Zheng and Richong Zhang and Junhao Zhang and Yanhan Ye and Zheyan Luo and Zhangchi Feng and Yongqiang Ma}, + booktitle={Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 3: System Demonstrations)}, + address={Bangkok, Thailand}, + publisher={Association for Computational Linguistics}, year={2024}, url={http://arxiv.org/abs/2403.13372} } diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py index 36265c8e..866f39d4 100644 --- a/src/llamafactory/extras/constants.py +++ b/src/llamafactory/extras/constants.py @@ -579,7 +579,7 @@ register_model_group( register_model_group( models={ - "Jambda-v0.1": { + "Jamba-v0.1": { DownloadSource.DEFAULT: "ai21labs/Jamba-v0.1", DownloadSource.MODELSCOPE: "AI-ModelScope/Jamba-v0.1", } diff --git a/src/llamafactory/train/ppo/trainer.py b/src/llamafactory/train/ppo/trainer.py index df4a37be..38f4c6c8 100644 --- a/src/llamafactory/train/ppo/trainer.py +++ b/src/llamafactory/train/ppo/trainer.py @@ -202,18 +202,18 @@ class CustomPPOTrainer(PPOTrainer, Trainer): if self.is_world_process_zero(): logger.info("***** Running training *****") - logger.info(" Num examples = {}".format(num_examples)) - logger.info(" Num Epochs = {}".format(num_train_epochs)) - logger.info(" Instantaneous batch size per device = {}".format(self.args.per_device_train_batch_size)) + logger.info(" Num examples = {:,}".format(num_examples)) + logger.info(" Num Epochs = {:,}".format(num_train_epochs)) + logger.info(" Instantaneous batch size per device = {:,}".format(self.args.per_device_train_batch_size)) logger.info( - " Total train batch size (w. parallel, buffer, distributed & accumulation) = {}".format( + " Total train batch size (w. parallel, buffer, distributed & accumulation) = {:,}".format( total_train_batch_size ) ) - logger.info(" Gradient Accumulation steps = {}".format(self.args.gradient_accumulation_steps)) - logger.info(" Num optimization epochs per batch = {}".format(self.finetuning_args.ppo_epochs)) - logger.info(" Total training steps = {}".format(max_steps)) - logger.info(" Number of trainable parameters = {}".format(count_parameters(self.model)[0])) + logger.info(" Gradient Accumulation steps = {:,}".format(self.args.gradient_accumulation_steps)) + logger.info(" Num optimization epochs per batch = {:,}".format(self.finetuning_args.ppo_epochs)) + logger.info(" Total training steps = {:,}".format(max_steps)) + logger.info(" Number of trainable parameters = {:,}".format(count_parameters(self.model)[0])) dataiter = iter(self.dataloader) loss_meter = AverageMeter() From 826d7808b427fb118914720a85e932bb26b8a940 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Mon, 24 Jun 2024 18:29:04 +0800 Subject: [PATCH 093/160] update readme Former-commit-id: 0775d56ee3cfde34e28a48cbf4a583f4530def19 --- CITATION.cff | 3 ++- README_zh.md | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CITATION.cff b/CITATION.cff index a572b5fa..01b4c9fd 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -20,6 +20,8 @@ title: "LlamaFactory: Unified Efficient Fine-Tuning of 100+ Language Models" url: "https://arxiv.org/abs/2403.13372" preferred-citation: type: conference-paper + conference: + name: "Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 3: System Demonstrations)" authors: - family-names: "Zheng" given-names: "Yaowei" @@ -35,7 +37,6 @@ preferred-citation: given-names: "Zhangchi" - family-names: "Ma" given-names: "Yongqiang" - booktitle: "Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 3: System Demonstrations)" title: "LlamaFactory: Unified Efficient Fine-Tuning of 100+ Language Models" url: "https://arxiv.org/abs/2403.13372" year: 2024 diff --git a/README_zh.md b/README_zh.md index 2073ce17..8b77e91e 100644 --- a/README_zh.md +++ b/README_zh.md @@ -4,7 +4,7 @@ [![GitHub Code License](https://img.shields.io/github/license/hiyouga/LLaMA-Factory)](LICENSE) [![GitHub last commit](https://img.shields.io/github/last-commit/hiyouga/LLaMA-Factory)](https://github.com/hiyouga/LLaMA-Factory/commits/main) [![PyPI](https://img.shields.io/pypi/v/llamafactory)](https://pypi.org/project/llamafactory/) -[![Citation](https://img.shields.io/badge/citation-44-green)](#使用了-llama-factory-的项目) +[![Citation](https://img.shields.io/badge/citation-63-green)](#使用了-llama-factory-的项目) [![GitHub pull request](https://img.shields.io/badge/PRs-welcome-blue)](https://github.com/hiyouga/LLaMA-Factory/pulls) [![Discord](https://dcbadge.vercel.app/api/server/rKfvV9r9FK?compact=true&style=flat)](https://discord.gg/rKfvV9r9FK) [![Twitter](https://img.shields.io/twitter/follow/llamafactory_ai)](https://twitter.com/llamafactory_ai) From af2607de1a1b0259e250e77bd6a5f2c5c1b6f223 Mon Sep 17 00:00:00 2001 From: MengqingCao Date: Mon, 24 Jun 2024 10:57:36 +0000 Subject: [PATCH 094/160] update docker files 1. add docker-npu (Dockerfile and docker-compose.yml) 2. move cuda docker to docker-cuda and tiny changes to adapt to the new path Former-commit-id: 5431c1f18aadb072208efe7fd8e36fdcfbf807c2 --- README.md | 61 +++++++++++++++++-- README_zh.md | 58 ++++++++++++++++-- Dockerfile => docker/docker-cuda/Dockerfile | 10 +-- .../docker-cuda/docker-compose.yml | 6 +- docker/docker-npu/Dockerfile | 40 ++++++++++++ docker/docker-npu/docker-compose.yml | 31 ++++++++++ 6 files changed, 187 insertions(+), 19 deletions(-) rename Dockerfile => docker/docker-cuda/Dockerfile (87%) rename docker-compose.yml => docker/docker-cuda/docker-compose.yml (80%) create mode 100644 docker/docker-npu/Dockerfile create mode 100644 docker/docker-npu/docker-compose.yml diff --git a/README.md b/README.md index 9462964c..1107ae0b 100644 --- a/README.md +++ b/README.md @@ -383,10 +383,11 @@ source /usr/local/Ascend/ascend-toolkit/set_env.sh | torch-npu | 2.1.0 | 2.1.0.post3 | | deepspeed | 0.13.2 | 0.13.2 | -Docker image: +Docker users please refer to [Build Docker](#Build-Docker). -- 32GB: [Download page](http://mirrors.cn-central-221.ovaijisuan.com/detail/130.html) -- 64GB: [Download page](http://mirrors.cn-central-221.ovaijisuan.com/detail/131.html) +**NOTE** + +The default docker image is [cosdt/cann:8.0.rc1-910b-ubuntu22.04](https://hub.docker.com/layers/cosdt/cann/8.0.rc1-910b-ubuntu22.04/images/sha256-29ef8aacf6b2babd292f06f00b9190c212e7c79a947411e213135e4d41a178a9?context=explore). More options can be found at [cosdt/cann](https://hub.docker.com/r/cosdt/cann/tags). Remember to use `ASCEND_RT_VISIBLE_DEVICES` instead of `CUDA_VISIBLE_DEVICES` to specify the device to use. @@ -426,7 +427,10 @@ llamafactory-cli webui #### Use Docker +
For NVIDIA GPU users: + ```bash +cd ./docker/docker-cuda docker build -f ./Dockerfile \ --build-arg INSTALL_BNB=false \ --build-arg INSTALL_VLLM=false \ @@ -435,18 +439,63 @@ docker build -f ./Dockerfile \ -t llamafactory:latest . docker run -it --gpus=all \ - -v ./hf_cache:/root/.cache/huggingface/ \ - -v ./data:/app/data \ - -v ./output:/app/output \ + -v /$(dirname $(dirname "$PWD"))/hf_cache:/root/.cache/huggingface/ \ + -v /$(dirname $(dirname "$PWD"))/data:/app/data \ + -v /$(dirname $(dirname "$PWD"))/output:/app/output \ -p 7860:7860 \ -p 8000:8000 \ --shm-size 16G \ --name llamafactory \ llamafactory:latest ``` +
+ +
For Ascend NPU users: + +```bash +cd ./docker/docker-npu +docker build -f ./Dockerfile \ + --build-arg INSTALL_DEEPSPEED=false \ + --build-arg PIP_INDEX=https://pypi.org/simple \ + -t llamafactory:latest . + +# add --device for multi-npu usage +# or modify --device to change npu card +docker run -it \ + -v /$(dirname $(dirname "$PWD"))/hf_cache:/root/.cache/huggingface/ \ + -v /$(dirname $(dirname "$PWD"))/data:/app/data \ + -v /$(dirname $(dirname "$PWD"))/output:/app/output \ + -v /usr/local/dcmi:/usr/local/dcmi \ + -v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi \ + -v /usr/local/Ascend/driver/lib64:/usr/local/Ascend/driver/lib64 \ + -v /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info \ + -v /etc/ascend_install.info:/etc/ascend_install.info \ + -p 7860:7860 \ + -p 8000:8000 \ + --device /dev/davinci0 \ + --device /dev/davinci_manager \ + --device /dev/devmm_svm \ + --device /dev/hisi_hdc \ + --shm-size 16G \ + --name llamafactory \ + llamafactory:latest +``` +
#### Use Docker Compose +Firstly enter your docker path: + +```bash +# for NVIDIA GPU users +cd ./docker/docker-cuda + +# for Ascend NPU users +cd ./docker/docker-npu +``` + +Then run the following command to build docker image and start the container: + ```bash docker-compose up -d docker-compose exec llamafactory bash diff --git a/README_zh.md b/README_zh.md index 8b77e91e..6326c0b5 100644 --- a/README_zh.md +++ b/README_zh.md @@ -383,10 +383,11 @@ source /usr/local/Ascend/ascend-toolkit/set_env.sh | torch-npu | 2.1.0 | 2.1.0.post3 | | deepspeed | 0.13.2 | 0.13.2 | -Docker 镜像: +Docker用户请参考 [构建 Docker](#构建-Docker). -- 32GB:[下载地址](http://mirrors.cn-central-221.ovaijisuan.com/detail/130.html) -- 64GB:[下载地址](http://mirrors.cn-central-221.ovaijisuan.com/detail/131.html) +**NOTE** + +默认镜像为 [cosdt/cann:8.0.rc1-910b-ubuntu22.04](https://hub.docker.com/layers/cosdt/cann/8.0.rc1-910b-ubuntu22.04/images/sha256-29ef8aacf6b2babd292f06f00b9190c212e7c79a947411e213135e4d41a178a9?context=explore). 更多选择见 [cosdt/cann](https://hub.docker.com/r/cosdt/cann/tags). 请使用 `ASCEND_RT_VISIBLE_DEVICES` 而非 `CUDA_VISIBLE_DEVICES` 来指定运算设备。 @@ -426,7 +427,10 @@ llamafactory-cli webui #### 使用 Docker +
NVIDIA GPU 用户: + ```bash +cd ./docker/docker-cuda docker build -f ./Dockerfile \ --build-arg INSTALL_BNB=false \ --build-arg INSTALL_VLLM=false \ @@ -435,18 +439,60 @@ docker build -f ./Dockerfile \ -t llamafactory:latest . docker run -it --gpus=all \ - -v ./hf_cache:/root/.cache/huggingface/ \ - -v ./data:/app/data \ - -v ./output:/app/output \ + -v /$(dirname $(dirname "$PWD"))/hf_cache:/root/.cache/huggingface/ \ + -v /$(dirname $(dirname "$PWD"))/data:/app/data \ + -v /$(dirname $(dirname "$PWD"))/output:/app/output \ -p 7860:7860 \ -p 8000:8000 \ --shm-size 16G \ --name llamafactory \ llamafactory:latest ``` +
+ +
Ascend NPU 用户: + +```bash +cd ./docker/docker-npu +docker build -f ./Dockerfile \ + --build-arg INSTALL_DEEPSPEED=false \ + --build-arg PIP_INDEX=https://pypi.org/simple \ + -t llamafactory:latest . + +# 增加 --device 来使用多卡 NPU 或修改第一个 --device 来更改 NPU 卡 +docker run -it \ + -v /$(dirname $(dirname "$PWD"))/hf_cache:/root/.cache/huggingface/ \ + -v /$(dirname $(dirname "$PWD"))/data:/app/data \ + -v /$(dirname $(dirname "$PWD"))/output:/app/output \ + -v /usr/local/dcmi:/usr/local/dcmi \ + -v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi \ + -v /usr/local/Ascend/driver/lib64:/usr/local/Ascend/driver/lib64 \ + -v /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info \ + -v /etc/ascend_install.info:/etc/ascend_install.info \ + -p 7860:7860 \ + -p 8000:8000 \ + --device /dev/davinci0 \ + --device /dev/davinci_manager \ + --device /dev/devmm_svm \ + --device /dev/hisi_hdc \ + --shm-size 16G \ + --name llamafactory \ + llamafactory:latest +``` +
#### 使用 Docker Compose +首先进入 docker 目录: +```bash +# NVIDIA GPU 用户 +cd ./docker/docker-cuda + +# Ascend NPU 用户 +cd ./docker/docker-npu +``` +然后运行以下命令创建 docker 镜像并启动容器: + ```bash docker-compose up -d docker-compose exec llamafactory bash diff --git a/Dockerfile b/docker/docker-cuda/Dockerfile similarity index 87% rename from Dockerfile rename to docker/docker-cuda/Dockerfile index 61d58005..72797a26 100644 --- a/Dockerfile +++ b/docker/docker-cuda/Dockerfile @@ -9,16 +9,18 @@ ARG INSTALL_DEEPSPEED=false ARG PIP_INDEX=https://pypi.org/simple # Set the working directory -WORKDIR /app +WORKDIR /app/LLaMA-Factory + +RUN cd /app && \ + git config --global http.version HTTP/1.1 && \ + git clone https://github.com/hiyouga/LLaMA-Factory.git && \ + cd /app/LLaMA-Factory # Install the requirements -COPY requirements.txt /app/ RUN pip config set global.index-url $PIP_INDEX RUN python -m pip install --upgrade pip RUN python -m pip install -r requirements.txt -# Copy the rest of the application into the image -COPY . /app/ # Install the LLaMA Factory RUN EXTRA_PACKAGES="metrics"; \ diff --git a/docker-compose.yml b/docker/docker-cuda/docker-compose.yml similarity index 80% rename from docker-compose.yml rename to docker/docker-cuda/docker-compose.yml index c5dc34e9..a470aa60 100644 --- a/docker-compose.yml +++ b/docker/docker-cuda/docker-compose.yml @@ -10,9 +10,9 @@ services: PIP_INDEX: https://pypi.org/simple container_name: llamafactory volumes: - - ./hf_cache:/root/.cache/huggingface/ - - ./data:/app/data - - ./output:/app/output + - ../../hf_cache:/root/.cache/huggingface/ + - ../../data:/app/LLaMA-Factory/data + - ../../output:/app/LLaMA-Factory/output ports: - "7860:7860" - "8000:8000" diff --git a/docker/docker-npu/Dockerfile b/docker/docker-npu/Dockerfile new file mode 100644 index 00000000..9456bcbf --- /dev/null +++ b/docker/docker-npu/Dockerfile @@ -0,0 +1,40 @@ +# Using ubuntu 22.04 images with cann 8.0.rc1 +# More options can be found at https://hub.docker.com/r/cosdt/cann/tags +FROM cosdt/cann:8.0.rc1-910b-ubuntu22.04 + +ENV DEBIAN_FRONTEND=noninteractive + +# Define installation arguments +ARG INSTALL_DEEPSPEED=false +ARG PIP_INDEX=https://pypi.org/simple + +# Set the working directory +WORKDIR /app/LLaMA-Factory + +RUN cd /app && \ + git config --global http.version HTTP/1.1 && \ + git clone https://github.com/hiyouga/LLaMA-Factory.git && \ + cd /app/LLaMA-Factory + +RUN pip config set global.index-url $PIP_INDEX +RUN python3 -m pip install --upgrade pip + +# Install the LLaMA Factory +RUN EXTRA_PACKAGES="torch-npu,metrics"; \ + if [ "$INSTALL_DEEPSPEED" = "true" ]; then \ + EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \ + fi; \ + pip install -e .[$EXTRA_PACKAGES] && \ + pip uninstall -y transformer-engine flash-attn + +# Set up volumes +VOLUME [ "/root/.cache/huggingface/", "/app/data", "/app/output" ] + +# Expose port 7860 for the LLaMA Board +EXPOSE 7860 + +# Expose port 8000 for the API service +EXPOSE 8000 + +# Launch LLaMA Board +CMD [ "llamafactory-cli", "webui" ] diff --git a/docker/docker-npu/docker-compose.yml b/docker/docker-npu/docker-compose.yml new file mode 100644 index 00000000..93eb6718 --- /dev/null +++ b/docker/docker-npu/docker-compose.yml @@ -0,0 +1,31 @@ +services: + llamafactory: + build: + dockerfile: Dockerfile + context: . + args: + INSTALL_DEEPSPEED: false + PIP_INDEX: https://pypi.org/simple + container_name: llamafactory + volumes: + - ../../hf_cache:/root/.cache/huggingface/ + - ../../data:/app/LLaMA-Factory/data + - ../../output:/app/LLaMA-Factory/output + - /usr/local/dcmi:/usr/local/dcmi + - /usr/local/bin/npu-smi:/usr/local/bin/npu-smi + - /usr/local/Ascend/driver/lib64:/usr/local/Ascend/driver/lib64 + - /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info + - /etc/ascend_install.info:/etc/ascend_install.info + ports: + - "7860:7860" + - "8000:8000" + ipc: host + tty: true + stdin_open: true + command: bash + devices: + - /dev/davinci0 + - /dev/davinci_manager + - /dev/devmm_svm + - /dev/hisi_hdc + restart: unless-stopped From 2926265a140b0a446cb0968a2151e7ae60a66b0b Mon Sep 17 00:00:00 2001 From: MengqingCao Date: Mon, 24 Jun 2024 12:27:00 +0000 Subject: [PATCH 095/160] auto-label npu issue Former-commit-id: d19c9eac783377151e58731723fb7cbb2dab3323 --- .github/workflows/label_issue.yml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/.github/workflows/label_issue.yml b/.github/workflows/label_issue.yml index b9a5543c..352b4b25 100644 --- a/.github/workflows/label_issue.yml +++ b/.github/workflows/label_issue.yml @@ -13,5 +13,18 @@ jobs: - env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} ISSUE_URL: ${{ github.event.issue.html_url }} + ISSUE_TITLE: "${{ github.event.issue.title }}" run: | gh issue edit $ISSUE_URL --add-label "pending" + + # auto-add label for npu + NPU_KEYWORDS=("npu" "ascend" "昇腾") + LABEL_NPU="npu" + ISSUE_TITLE_LOWER=$(echo "$ISSUE_TITLE" | tr '[:upper:]' '[:lower:]') + + for keyword in "${NPU_KEYWORDS[@]}"; do + if [[ "$ISSUE_TITLE_LOWER" == *"$keyword"* ]] && [[ "$ISSUE_TITLE_LOWER" != *"input"* ]]; then + gh issue edit "$ISSUE_URL" --add-label "$LABEL_NPU" + break + fi + done From 16e950454e06322cb69668c5971376ddcf64fc45 Mon Sep 17 00:00:00 2001 From: stceum <50257864+stceum@users.noreply.github.com> Date: Mon, 24 Jun 2024 20:39:20 +0800 Subject: [PATCH 096/160] Bug Fix: `off` is parsed as `False` in yaml file, changed to `disabled` to avoid this. Former-commit-id: 171289d8e4c111fdca2b100282b64c74a04a4726 --- src/llamafactory/hparams/model_args.py | 2 +- src/llamafactory/hparams/parser.py | 4 ++++ src/llamafactory/model/model_utils/attention.py | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/llamafactory/hparams/model_args.py b/src/llamafactory/hparams/model_args.py index 996e9130..9b51c064 100644 --- a/src/llamafactory/hparams/model_args.py +++ b/src/llamafactory/hparams/model_args.py @@ -97,7 +97,7 @@ class ModelArguments: default=None, metadata={"help": "Which scaling strategy should be adopted for the RoPE embeddings."}, ) - flash_attn: Literal["off", "sdpa", "fa2", "auto"] = field( + flash_attn: Literal["disabled", "sdpa", "fa2", "auto"] = field( default="auto", metadata={"help": "Enable FlashAttention for faster training and inference."}, ) diff --git a/src/llamafactory/hparams/parser.py b/src/llamafactory/hparams/parser.py index a593bf45..9ef2d607 100644 --- a/src/llamafactory/hparams/parser.py +++ b/src/llamafactory/hparams/parser.py @@ -102,6 +102,10 @@ def _verify_model_args(model_args: "ModelArguments", finetuning_args: "Finetunin if model_args.adapter_name_or_path is not None and len(model_args.adapter_name_or_path) != 1: raise ValueError("Quantized model only accepts a single adapter. Merge them first.") + # In case that `flash_attn` is set to `off` in the yaml file, and parsed as `False` afterwards. + if model_args.flash_attn == False: + raise ValueError("flash_attn should be \"disabled\", \"sdpa\", \"fa2\" or \"auto\".") + def _check_extra_dependencies( model_args: "ModelArguments", diff --git a/src/llamafactory/model/model_utils/attention.py b/src/llamafactory/model/model_utils/attention.py index 8ff3807b..dfd90936 100644 --- a/src/llamafactory/model/model_utils/attention.py +++ b/src/llamafactory/model/model_utils/attention.py @@ -32,7 +32,7 @@ def configure_attn_implementation(config: "PretrainedConfig", model_args: "Model if model_args.flash_attn == "auto": return - elif model_args.flash_attn == "off": + elif model_args.flash_attn == "disabled": requested_attn_implementation = "eager" elif model_args.flash_attn == "sdpa": From 5675c47a014c7679370bfd38ad40666b64c9bd89 Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Mon, 24 Jun 2024 21:35:34 +0800 Subject: [PATCH 097/160] Update test_attention.py Former-commit-id: c2cc7a0f152aa14fc03ae413f4a9dc06742a29d7 --- tests/model/model_utils/test_attention.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/model/model_utils/test_attention.py b/tests/model/model_utils/test_attention.py index 97ac9dcc..4cae3d7c 100644 --- a/tests/model/model_utils/test_attention.py +++ b/tests/model/model_utils/test_attention.py @@ -29,7 +29,7 @@ INFER_ARGS = { def test_attention(): - attention_available = ["off"] + attention_available = ["disabled"] if is_torch_sdpa_available(): attention_available.append("sdpa") @@ -37,7 +37,7 @@ def test_attention(): attention_available.append("fa2") llama_attention_classes = { - "off": "LlamaAttention", + "disabled": "LlamaAttention", "sdpa": "LlamaSdpaAttention", "fa2": "LlamaFlashAttention2", } From d475dd3809682acd9d871cf74edf604593664b73 Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Mon, 24 Jun 2024 21:37:42 +0800 Subject: [PATCH 098/160] Update parser.py Former-commit-id: 60e605cd9d399bd04432864ede9c84302890eac8 --- src/llamafactory/hparams/parser.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/llamafactory/hparams/parser.py b/src/llamafactory/hparams/parser.py index 9ef2d607..a593bf45 100644 --- a/src/llamafactory/hparams/parser.py +++ b/src/llamafactory/hparams/parser.py @@ -102,10 +102,6 @@ def _verify_model_args(model_args: "ModelArguments", finetuning_args: "Finetunin if model_args.adapter_name_or_path is not None and len(model_args.adapter_name_or_path) != 1: raise ValueError("Quantized model only accepts a single adapter. Merge them first.") - # In case that `flash_attn` is set to `off` in the yaml file, and parsed as `False` afterwards. - if model_args.flash_attn == False: - raise ValueError("flash_attn should be \"disabled\", \"sdpa\", \"fa2\" or \"auto\".") - def _check_extra_dependencies( model_args: "ModelArguments", From 6b738d1c89fc52b9a6696016f9981cdd723fc64a Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Mon, 24 Jun 2024 21:59:39 +0800 Subject: [PATCH 099/160] Update label_issue.yml Former-commit-id: 90785a69c6210c3a02babb12c56fb7900095247c --- .github/workflows/label_issue.yml | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/.github/workflows/label_issue.yml b/.github/workflows/label_issue.yml index 352b4b25..507b6be7 100644 --- a/.github/workflows/label_issue.yml +++ b/.github/workflows/label_issue.yml @@ -13,18 +13,15 @@ jobs: - env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} ISSUE_URL: ${{ github.event.issue.html_url }} - ISSUE_TITLE: "${{ github.event.issue.title }}" + ISSUE_TITLE: ${{ github.event.issue.title }} run: | - gh issue edit $ISSUE_URL --add-label "pending" - - # auto-add label for npu - NPU_KEYWORDS=("npu" "ascend" "昇腾") - LABEL_NPU="npu" - ISSUE_TITLE_LOWER=$(echo "$ISSUE_TITLE" | tr '[:upper:]' '[:lower:]') - - for keyword in "${NPU_KEYWORDS[@]}"; do - if [[ "$ISSUE_TITLE_LOWER" == *"$keyword"* ]] && [[ "$ISSUE_TITLE_LOWER" != *"input"* ]]; then - gh issue edit "$ISSUE_URL" --add-label "$LABEL_NPU" + LABEL=pending + NPU_KEYWORDS=(npu ascend 昇腾) + ISSUE_TITLE_LOWER=$(echo $ISSUE_TITLE | tr '[:upper:]' '[:lower:]') + for KEYWORD in ${NPU_KEYWORDS[@]}; do + if [[ $ISSUE_TITLE_LOWER == *$KEYWORD* ]] && [[ $ISSUE_TITLE_LOWER != *input* ]] then + LABEL=pending,npu break fi done + gh issue edit $ISSUE_URL --add-label $LABEL From a4f2d5aa6f4b58bbe33e7a94c17582987109b97d Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Mon, 24 Jun 2024 22:01:23 +0800 Subject: [PATCH 100/160] Update label_issue.yml Former-commit-id: dc2f7998b4ae9d7223c7c16732d835cea2a28713 --- .github/workflows/label_issue.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/label_issue.yml b/.github/workflows/label_issue.yml index 507b6be7..0e10f0b9 100644 --- a/.github/workflows/label_issue.yml +++ b/.github/workflows/label_issue.yml @@ -19,7 +19,7 @@ jobs: NPU_KEYWORDS=(npu ascend 昇腾) ISSUE_TITLE_LOWER=$(echo $ISSUE_TITLE | tr '[:upper:]' '[:lower:]') for KEYWORD in ${NPU_KEYWORDS[@]}; do - if [[ $ISSUE_TITLE_LOWER == *$KEYWORD* ]] && [[ $ISSUE_TITLE_LOWER != *input* ]] then + if [[ $ISSUE_TITLE_LOWER == *$KEYWORD* ]] && [[ $ISSUE_TITLE_LOWER != *input* ]]; then LABEL=pending,npu break fi From a79e93f3357b78d8576e516711cc721995ac23a4 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Mon, 24 Jun 2024 22:34:31 +0800 Subject: [PATCH 101/160] fix #4410 Former-commit-id: f49adc4ab5eade21d7a9e029212f17688ee9b0cf --- src/llamafactory/data/formatter.py | 4 ++-- src/llamafactory/hparams/model_args.py | 2 +- src/llamafactory/model/patcher.py | 6 +++--- src/llamafactory/webui/chatter.py | 1 + src/llamafactory/webui/components/infer.py | 17 ++++++++++++++--- src/llamafactory/webui/locales.py | 11 +++++++++++ 6 files changed, 32 insertions(+), 9 deletions(-) diff --git a/src/llamafactory/data/formatter.py b/src/llamafactory/data/formatter.py index ed9ba8b8..b5dc57ff 100644 --- a/src/llamafactory/data/formatter.py +++ b/src/llamafactory/data/formatter.py @@ -34,8 +34,8 @@ DEFAULT_TOOL_PROMPT = ( GLM4_TOOL_PROMPT = ( - "你是一个名为 GLM-4 的人工智能助手。你是基于智谱AI训练的语言模型 GLM-4 模型开发的," - "你的任务是针对用户的问题和要求提供适当的答复和支持。{tool_text}" + "你是一个名为 ChatGLM 的人工智能助手。你是基于智谱AI训练的语言模型 GLM-4 模型开发的," + "你的任务是针对用户的问题和要求提供适当的答复和支持。# 可用工具{tool_text}" ) diff --git a/src/llamafactory/hparams/model_args.py b/src/llamafactory/hparams/model_args.py index 9b51c064..3f21145d 100644 --- a/src/llamafactory/hparams/model_args.py +++ b/src/llamafactory/hparams/model_args.py @@ -97,7 +97,7 @@ class ModelArguments: default=None, metadata={"help": "Which scaling strategy should be adopted for the RoPE embeddings."}, ) - flash_attn: Literal["disabled", "sdpa", "fa2", "auto"] = field( + flash_attn: Literal["auto", "disabled", "sdpa", "fa2"] = field( default="auto", metadata={"help": "Enable FlashAttention for faster training and inference."}, ) diff --git a/src/llamafactory/model/patcher.py b/src/llamafactory/model/patcher.py index 35153649..24cd2601 100644 --- a/src/llamafactory/model/patcher.py +++ b/src/llamafactory/model/patcher.py @@ -58,10 +58,10 @@ def patch_config( is_trainable: bool, ) -> None: if model_args.compute_dtype is None: # priority: bf16 > fp16 > fp32 - if model_args.infer_dtype == "auto": - model_args.compute_dtype = infer_optim_dtype(model_dtype=getattr(config, "torch_dtype", None)) - else: + if model_args.infer_dtype != "auto" and not is_trainable: model_args.compute_dtype = getattr(torch, model_args.infer_dtype) + else: + model_args.compute_dtype = infer_optim_dtype(model_dtype=getattr(config, "torch_dtype", None)) if is_torch_npu_available(): use_jit_compile = os.environ.get("JIT_COMPILE", "0").lower() in ["true", "1"] diff --git a/src/llamafactory/webui/chatter.py b/src/llamafactory/webui/chatter.py index a2b54dce..652c341c 100644 --- a/src/llamafactory/webui/chatter.py +++ b/src/llamafactory/webui/chatter.py @@ -87,6 +87,7 @@ class WebChatModel(ChatModel): visual_inputs=get("top.visual_inputs"), rope_scaling=get("top.rope_scaling") if get("top.rope_scaling") in ["linear", "dynamic"] else None, infer_backend=get("infer.infer_backend"), + infer_dtype=get("infer.infer_dtype"), ) if checkpoint_path: diff --git a/src/llamafactory/webui/components/infer.py b/src/llamafactory/webui/components/infer.py index 03bccd7f..a0064479 100644 --- a/src/llamafactory/webui/components/infer.py +++ b/src/llamafactory/webui/components/infer.py @@ -32,15 +32,26 @@ def create_infer_tab(engine: "Engine") -> Dict[str, "Component"]: input_elems = engine.manager.get_base_elems() elem_dict = dict() - infer_backend = gr.Dropdown(choices=["huggingface", "vllm"], value="huggingface") + with gr.Row(): + infer_backend = gr.Dropdown(choices=["huggingface", "vllm"], value="huggingface") + infer_dtype = gr.Dropdown(choices=["auto", "float16", "bfloat16", "float32"], value="auto") + with gr.Row(): load_btn = gr.Button() unload_btn = gr.Button() info_box = gr.Textbox(show_label=False, interactive=False) - input_elems.update({infer_backend}) - elem_dict.update(dict(infer_backend=infer_backend, load_btn=load_btn, unload_btn=unload_btn, info_box=info_box)) + input_elems.update({infer_backend, infer_dtype}) + elem_dict.update( + dict( + infer_backend=infer_backend, + infer_dtype=infer_dtype, + load_btn=load_btn, + unload_btn=unload_btn, + info_box=info_box, + ) + ) chatbot, messages, chat_elems = create_chat_box(engine, visible=False) elem_dict.update(chat_elems) diff --git a/src/llamafactory/webui/locales.py b/src/llamafactory/webui/locales.py index 8e8d6fce..cd166584 100644 --- a/src/llamafactory/webui/locales.py +++ b/src/llamafactory/webui/locales.py @@ -1206,6 +1206,17 @@ LOCALES = { "label": "推理引擎", }, }, + "infer_dtype": { + "en": { + "label": "Inference data type", + }, + "ru": { + "label": "Тип данных для вывода", + }, + "zh": { + "label": "推理数据类型", + }, + }, "load_btn": { "en": { "value": "Load model", From 9a1ec1984566d9c94c5377377956e14c87fc021a Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Mon, 24 Jun 2024 23:06:18 +0800 Subject: [PATCH 102/160] Update loader.py Former-commit-id: afa59d61844595e6b615227e6bfdc0b16c8015dd --- src/llamafactory/data/loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llamafactory/data/loader.py b/src/llamafactory/data/loader.py index f44ef5de..8e7062db 100644 --- a/src/llamafactory/data/loader.py +++ b/src/llamafactory/data/loader.py @@ -148,7 +148,7 @@ def get_dataset( tokenizer: "PreTrainedTokenizer", processor: Optional["ProcessorMixin"] = None, ) -> Union["Dataset", "IterableDataset"]: - template = get_template_and_fix_tokenizer(tokenizer, data_args.template) + template = get_template_and_fix_tokenizer(tokenizer, data_args.template, data_args.tool_format) if data_args.train_on_prompt and template.efficient_eos: raise ValueError("Current template does not support `train_on_prompt`.") From 5d6cf55208c59fb209cdb938a700e295e73b9496 Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Mon, 24 Jun 2024 23:12:59 +0800 Subject: [PATCH 103/160] Update template.py Former-commit-id: d53517bff6f8734221d7df9982f3bdd4d2eb2cab --- src/llamafactory/data/template.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py index 3d8ded3b..3a72a858 100644 --- a/src/llamafactory/data/template.py +++ b/src/llamafactory/data/template.py @@ -664,6 +664,7 @@ _register_template( format_system=StringFormatter(slots=["<|system|>\n{{content}}"]), format_function=FunctionFormatter(slots=["{{name}}\n{{arguments}}"]), format_observation=StringFormatter(slots=["<|observation|>\n{{content}}<|assistant|>"]), + format_tools=ToolFormatter(tool_format="glm4"), format_prefix=EmptyFormatter(slots=["[gMASK]"]), stop_words=["<|user|>", "<|observation|>"], efficient_eos=True, From 0b331a318baecee4c4e2af1e50a66d3e2bc299e3 Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Mon, 24 Jun 2024 23:14:36 +0800 Subject: [PATCH 104/160] Update test_formatter.py Former-commit-id: d13ef043441734189b05e739dbbebb16077a6f0b --- tests/data/test_formatter.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/data/test_formatter.py b/tests/data/test_formatter.py index 430eb0e6..a01e8a7e 100644 --- a/tests/data/test_formatter.py +++ b/tests/data/test_formatter.py @@ -111,9 +111,9 @@ def test_glm4_tool_formatter(): } ] assert formatter.apply(content=json.dumps(tools)) == [ - "你是一个名为 GLM-4 的人工智能助手。你是基于智谱AI训练的语言模型 GLM-4 模型开发的," - "你的任务是针对用户的问题和要求提供适当的答复和支持。" - "\n\n## test_tool\n\n{}\n在调用上述函数时,请使用 Json 格式表示调用的参数。".format( + "你是一个名为 ChatGLM 的人工智能助手。你是基于智谱AI训练的语言模型 GLM-4 模型开发的," + "你的任务是针对用户的问题和要求提供适当的答复和支持。# 可用工具\n\n" + "## test_tool\n\n{}\n在调用上述函数时,请使用 Json 格式表示调用的参数。".format( json.dumps(tools[0], indent=4) ) ] From 8bcdb6f52cda26b0b5580c1d3b26117c6745f1a3 Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Mon, 24 Jun 2024 23:21:10 +0800 Subject: [PATCH 105/160] Update cli.py Former-commit-id: 9db6126496ec9e834541823715f700f92b3968c7 --- src/llamafactory/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llamafactory/cli.py b/src/llamafactory/cli.py index af9dd5f5..e4846780 100644 --- a/src/llamafactory/cli.py +++ b/src/llamafactory/cli.py @@ -74,7 +74,7 @@ class Command(str, Enum): def main(): - command = sys.argv.pop(1) if len(sys.argv) > 0 else Command.HELP + command = sys.argv.pop(1) if len(sys.argv) != 1 else Command.HELP if command == Command.API: run_api() elif command == Command.CHAT: From 2e5d521ed8fb012ec60222233cce1e8120e5f861 Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Mon, 24 Jun 2024 23:41:35 +0800 Subject: [PATCH 106/160] Update Dockerfile Former-commit-id: 632681d8ece0eaac59bb364d971435a3bc6665a9 --- docker/docker-cuda/Dockerfile | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/docker/docker-cuda/Dockerfile b/docker/docker-cuda/Dockerfile index 72797a26..61d58005 100644 --- a/docker/docker-cuda/Dockerfile +++ b/docker/docker-cuda/Dockerfile @@ -9,18 +9,16 @@ ARG INSTALL_DEEPSPEED=false ARG PIP_INDEX=https://pypi.org/simple # Set the working directory -WORKDIR /app/LLaMA-Factory - -RUN cd /app && \ - git config --global http.version HTTP/1.1 && \ - git clone https://github.com/hiyouga/LLaMA-Factory.git && \ - cd /app/LLaMA-Factory +WORKDIR /app # Install the requirements +COPY requirements.txt /app/ RUN pip config set global.index-url $PIP_INDEX RUN python -m pip install --upgrade pip RUN python -m pip install -r requirements.txt +# Copy the rest of the application into the image +COPY . /app/ # Install the LLaMA Factory RUN EXTRA_PACKAGES="metrics"; \ From b5cdef43a1e46d00fbaef65ebf7f8ed35e049167 Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Tue, 25 Jun 2024 00:46:08 +0800 Subject: [PATCH 107/160] Update Dockerfile Former-commit-id: c897a70501707c0f4c432bb8e9a9beeb4e8953a3 --- docker/docker-cuda/Dockerfile | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docker/docker-cuda/Dockerfile b/docker/docker-cuda/Dockerfile index 61d58005..2d20bfe4 100644 --- a/docker/docker-cuda/Dockerfile +++ b/docker/docker-cuda/Dockerfile @@ -12,13 +12,14 @@ ARG PIP_INDEX=https://pypi.org/simple WORKDIR /app # Install the requirements -COPY requirements.txt /app/ +COPY requirements.txt /app RUN pip config set global.index-url $PIP_INDEX +RUN pip config set global.extra-index-url $PIP_INDEX RUN python -m pip install --upgrade pip RUN python -m pip install -r requirements.txt # Copy the rest of the application into the image -COPY . /app/ +COPY . /app # Install the LLaMA Factory RUN EXTRA_PACKAGES="metrics"; \ @@ -38,10 +39,9 @@ RUN EXTRA_PACKAGES="metrics"; \ VOLUME [ "/root/.cache/huggingface/", "/app/data", "/app/output" ] # Expose port 7860 for the LLaMA Board +ENV GRADIO_SERVER_PORT 7860 EXPOSE 7860 # Expose port 8000 for the API service +ENV API_PORT 8000 EXPOSE 8000 - -# Launch LLaMA Board -CMD [ "llamafactory-cli", "webui" ] From 8360544d6579a26a43422a6642545c2ba0db288c Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Tue, 25 Jun 2024 00:46:47 +0800 Subject: [PATCH 108/160] Update docker-compose.yml Former-commit-id: 56af208074e6af5465183af85367e7edd89d5aa6 --- docker/docker-cuda/docker-compose.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docker/docker-cuda/docker-compose.yml b/docker/docker-cuda/docker-compose.yml index a470aa60..04d6531a 100644 --- a/docker/docker-cuda/docker-compose.yml +++ b/docker/docker-cuda/docker-compose.yml @@ -1,8 +1,8 @@ services: llamafactory: build: - dockerfile: Dockerfile - context: . + dockerfile: ./docker/docker-cuda/Dockerfile + context: ../.. args: INSTALL_BNB: false INSTALL_VLLM: false @@ -10,9 +10,9 @@ services: PIP_INDEX: https://pypi.org/simple container_name: llamafactory volumes: - - ../../hf_cache:/root/.cache/huggingface/ - - ../../data:/app/LLaMA-Factory/data - - ../../output:/app/LLaMA-Factory/output + - ./hf_cache:/root/.cache/huggingface/ + - ./data:/app/data + - ./output:/app/output ports: - "7860:7860" - "8000:8000" From 37013f80689d52c39aaccefd72b9e02be0feee20 Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Tue, 25 Jun 2024 00:50:34 +0800 Subject: [PATCH 109/160] Update Dockerfile Former-commit-id: cdcd9455c19311394e148476a28ca75849c845b2 --- docker/docker-npu/Dockerfile | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/docker/docker-npu/Dockerfile b/docker/docker-npu/Dockerfile index 9456bcbf..0fdd4472 100644 --- a/docker/docker-npu/Dockerfile +++ b/docker/docker-npu/Dockerfile @@ -1,5 +1,5 @@ -# Using ubuntu 22.04 images with cann 8.0.rc1 -# More options can be found at https://hub.docker.com/r/cosdt/cann/tags +# Use the Ubuntu 22.04 image with CANN 8.0.rc1 +# More versions can be found at https://hub.docker.com/r/cosdt/cann/tags FROM cosdt/cann:8.0.rc1-910b-ubuntu22.04 ENV DEBIAN_FRONTEND=noninteractive @@ -9,15 +9,17 @@ ARG INSTALL_DEEPSPEED=false ARG PIP_INDEX=https://pypi.org/simple # Set the working directory -WORKDIR /app/LLaMA-Factory - -RUN cd /app && \ - git config --global http.version HTTP/1.1 && \ - git clone https://github.com/hiyouga/LLaMA-Factory.git && \ - cd /app/LLaMA-Factory +WORKDIR /app +# Install the requirements +COPY requirements.txt /app RUN pip config set global.index-url $PIP_INDEX -RUN python3 -m pip install --upgrade pip +RUN pip config set global.extra-index-url $PIP_INDEX +RUN python -m pip install --upgrade pip +RUN python -m pip install -r requirements.txt + +# Copy the rest of the application into the image +COPY . /app # Install the LLaMA Factory RUN EXTRA_PACKAGES="torch-npu,metrics"; \ @@ -31,10 +33,9 @@ RUN EXTRA_PACKAGES="torch-npu,metrics"; \ VOLUME [ "/root/.cache/huggingface/", "/app/data", "/app/output" ] # Expose port 7860 for the LLaMA Board +ENV GRADIO_SERVER_PORT 7860 EXPOSE 7860 # Expose port 8000 for the API service +ENV API_PORT 8000 EXPOSE 8000 - -# Launch LLaMA Board -CMD [ "llamafactory-cli", "webui" ] From 8367ec03a7dc36b4bfcf6c1f0e9cc49166a6302c Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Tue, 25 Jun 2024 00:54:28 +0800 Subject: [PATCH 110/160] Update docker-compose.yml Former-commit-id: e038daf8dfa5d948b70c18469cb5a0be9aec464a --- docker/docker-npu/docker-compose.yml | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/docker/docker-npu/docker-compose.yml b/docker/docker-npu/docker-compose.yml index 93eb6718..7fff6e73 100644 --- a/docker/docker-npu/docker-compose.yml +++ b/docker/docker-npu/docker-compose.yml @@ -1,20 +1,19 @@ services: llamafactory: build: - dockerfile: Dockerfile - context: . + dockerfile: ./docker/docker-npu/Dockerfile + context: ../.. args: INSTALL_DEEPSPEED: false PIP_INDEX: https://pypi.org/simple container_name: llamafactory volumes: - - ../../hf_cache:/root/.cache/huggingface/ - - ../../data:/app/LLaMA-Factory/data - - ../../output:/app/LLaMA-Factory/output + - ./hf_cache:/root/.cache/huggingface/ + - ./data:/app/data + - ./output:/app/output - /usr/local/dcmi:/usr/local/dcmi - /usr/local/bin/npu-smi:/usr/local/bin/npu-smi - - /usr/local/Ascend/driver/lib64:/usr/local/Ascend/driver/lib64 - - /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info + - /usr/local/Ascend/driver:/usr/local/Ascend/driver - /etc/ascend_install.info:/etc/ascend_install.info ports: - "7860:7860" From cec2f1fc00ae29f627c897343035752936344cd7 Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Tue, 25 Jun 2024 01:03:38 +0800 Subject: [PATCH 111/160] Update README.md Former-commit-id: abe7aca5e133960da9200e3a036d9a550f474171 --- README.md | 84 ++++++++++++++++++++++++------------------------------- 1 file changed, 37 insertions(+), 47 deletions(-) diff --git a/README.md b/README.md index 1107ae0b..a20b848b 100644 --- a/README.md +++ b/README.md @@ -383,12 +383,6 @@ source /usr/local/Ascend/ascend-toolkit/set_env.sh | torch-npu | 2.1.0 | 2.1.0.post3 | | deepspeed | 0.13.2 | 0.13.2 | -Docker users please refer to [Build Docker](#Build-Docker). - -**NOTE** - -The default docker image is [cosdt/cann:8.0.rc1-910b-ubuntu22.04](https://hub.docker.com/layers/cosdt/cann/8.0.rc1-910b-ubuntu22.04/images/sha256-29ef8aacf6b2babd292f06f00b9190c212e7c79a947411e213135e4d41a178a9?context=explore). More options can be found at [cosdt/cann](https://hub.docker.com/r/cosdt/cann/tags). - Remember to use `ASCEND_RT_VISIBLE_DEVICES` instead of `CUDA_VISIBLE_DEVICES` to specify the device to use. If you cannot infer model on NPU devices, try setting `do_sample: false` in the configurations. @@ -425,50 +419,62 @@ llamafactory-cli webui ### Build Docker -#### Use Docker - -
For NVIDIA GPU users: +For CUDA users: ```bash -cd ./docker/docker-cuda -docker build -f ./Dockerfile \ +docker-compose -f ./docker/docker-cuda/docker-compose.yml up -d +docker-compose exec llamafactory bash +``` + +For Ascend NPU users: + +```bash +docker-compose -f ./docker/docker-npu/docker-compose.yml up -d +docker-compose exec llamafactory bash +``` + +
Build without Docker Compose + +For CUDA users: + +```bash +docker build -f ./docker/docker-cuda/Dockerfile \ --build-arg INSTALL_BNB=false \ --build-arg INSTALL_VLLM=false \ --build-arg INSTALL_DEEPSPEED=false \ --build-arg PIP_INDEX=https://pypi.org/simple \ -t llamafactory:latest . -docker run -it --gpus=all \ - -v /$(dirname $(dirname "$PWD"))/hf_cache:/root/.cache/huggingface/ \ - -v /$(dirname $(dirname "$PWD"))/data:/app/data \ - -v /$(dirname $(dirname "$PWD"))/output:/app/output \ +docker run -dit --gpus=all \ + -v ./hf_cache:/root/.cache/huggingface/ \ + -v ./data:/app/data \ + -v ./output:/app/output \ -p 7860:7860 \ -p 8000:8000 \ --shm-size 16G \ --name llamafactory \ llamafactory:latest -``` -
-
For Ascend NPU users: +docker exec -it llamafactory bash +``` + +For Ascend NPU users: ```bash -cd ./docker/docker-npu -docker build -f ./Dockerfile \ +# Change docker image upon your environment +docker build -f ./docker/docker-npu/Dockerfile \ --build-arg INSTALL_DEEPSPEED=false \ --build-arg PIP_INDEX=https://pypi.org/simple \ -t llamafactory:latest . -# add --device for multi-npu usage -# or modify --device to change npu card -docker run -it \ - -v /$(dirname $(dirname "$PWD"))/hf_cache:/root/.cache/huggingface/ \ - -v /$(dirname $(dirname "$PWD"))/data:/app/data \ - -v /$(dirname $(dirname "$PWD"))/output:/app/output \ +# Change `device` upon your resources +docker run -dit \ + -v ./hf_cache:/root/.cache/huggingface/ \ + -v ./data:/app/data \ + -v ./output:/app/output \ -v /usr/local/dcmi:/usr/local/dcmi \ -v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi \ - -v /usr/local/Ascend/driver/lib64:/usr/local/Ascend/driver/lib64 \ - -v /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info \ + -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \ -v /etc/ascend_install.info:/etc/ascend_install.info \ -p 7860:7860 \ -p 8000:8000 \ @@ -479,28 +485,12 @@ docker run -it \ --shm-size 16G \ --name llamafactory \ llamafactory:latest + +docker exec -it llamafactory bash ``` +
-#### Use Docker Compose - -Firstly enter your docker path: - -```bash -# for NVIDIA GPU users -cd ./docker/docker-cuda - -# for Ascend NPU users -cd ./docker/docker-npu -``` - -Then run the following command to build docker image and start the container: - -```bash -docker-compose up -d -docker-compose exec llamafactory bash -``` -
Details about volume - hf_cache: Utilize Hugging Face cache on the host machine. Reassignable if a cache already exists in a different directory. From b55eb3047407ec3e06fbaeaa2fb0e526e9219384 Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Tue, 25 Jun 2024 01:06:59 +0800 Subject: [PATCH 112/160] Update README_zh.md Former-commit-id: f0c95160fea48b8c6291f42beb79ac089177fbb2 --- README_zh.md | 83 ++++++++++++++++++++++++---------------------------- 1 file changed, 38 insertions(+), 45 deletions(-) diff --git a/README_zh.md b/README_zh.md index 6326c0b5..3bed0846 100644 --- a/README_zh.md +++ b/README_zh.md @@ -360,7 +360,7 @@ pip install https://github.com/jllllll/bitsandbytes-windows-webui/releases/downl
昇腾 NPU 用户指南 -在昇腾 NPU 设备上安装 LLaMA Factory 时,需要指定额外依赖项,使用 `pip install -e '.[torch-npu,metrics]'` 命令安装。此外,还需要安装 **[Ascend CANN Toolkit and Kernels](https://www.hiascend.com/developer/download/community/result?module=cann)**,安装方法请参考[安装教程](https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/80RC2alpha002/quickstart/quickstart/quickstart_18_0004.html)或使用以下命令: +在昇腾 NPU 设备上安装 LLaMA Factory 时,需要指定额外依赖项,使用 `pip install -e ".[torch-npu,metrics]"` 命令安装。此外,还需要安装 **[Ascend CANN Toolkit and Kernels](https://www.hiascend.com/developer/download/community/result?module=cann)**,安装方法请参考[安装教程](https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/80RC2alpha002/quickstart/quickstart/quickstart_18_0004.html)或使用以下命令: ```bash # 请替换 URL 为 CANN 版本和设备型号对应的 URL @@ -383,12 +383,6 @@ source /usr/local/Ascend/ascend-toolkit/set_env.sh | torch-npu | 2.1.0 | 2.1.0.post3 | | deepspeed | 0.13.2 | 0.13.2 | -Docker用户请参考 [构建 Docker](#构建-Docker). - -**NOTE** - -默认镜像为 [cosdt/cann:8.0.rc1-910b-ubuntu22.04](https://hub.docker.com/layers/cosdt/cann/8.0.rc1-910b-ubuntu22.04/images/sha256-29ef8aacf6b2babd292f06f00b9190c212e7c79a947411e213135e4d41a178a9?context=explore). 更多选择见 [cosdt/cann](https://hub.docker.com/r/cosdt/cann/tags). - 请使用 `ASCEND_RT_VISIBLE_DEVICES` 而非 `CUDA_VISIBLE_DEVICES` 来指定运算设备。 如果遇到无法正常推理的情况,请尝试设置 `do_sample: false`。 @@ -425,49 +419,62 @@ llamafactory-cli webui ### 构建 Docker -#### 使用 Docker - -
NVIDIA GPU 用户: +CUDA 用户: ```bash -cd ./docker/docker-cuda -docker build -f ./Dockerfile \ +docker-compose -f ./docker/docker-cuda/docker-compose.yml up -d +docker-compose exec llamafactory bash +``` + +昇腾 NPU 用户: + +```bash +docker-compose -f ./docker/docker-npu/docker-compose.yml up -d +docker-compose exec llamafactory bash +``` + +
不使用 Docker Compose 构建 + +CUDA 用户: + +```bash +docker build -f ./docker/docker-cuda/Dockerfile \ --build-arg INSTALL_BNB=false \ --build-arg INSTALL_VLLM=false \ --build-arg INSTALL_DEEPSPEED=false \ --build-arg PIP_INDEX=https://pypi.org/simple \ -t llamafactory:latest . -docker run -it --gpus=all \ - -v /$(dirname $(dirname "$PWD"))/hf_cache:/root/.cache/huggingface/ \ - -v /$(dirname $(dirname "$PWD"))/data:/app/data \ - -v /$(dirname $(dirname "$PWD"))/output:/app/output \ +docker run -dit --gpus=all \ + -v ./hf_cache:/root/.cache/huggingface/ \ + -v ./data:/app/data \ + -v ./output:/app/output \ -p 7860:7860 \ -p 8000:8000 \ --shm-size 16G \ --name llamafactory \ llamafactory:latest -``` -
-
Ascend NPU 用户: +docker exec -it llamafactory bash +``` + +昇腾 NPU 用户: ```bash -cd ./docker/docker-npu -docker build -f ./Dockerfile \ +# 根据您的环境选择镜像 +docker build -f ./docker/docker-npu/Dockerfile \ --build-arg INSTALL_DEEPSPEED=false \ --build-arg PIP_INDEX=https://pypi.org/simple \ -t llamafactory:latest . -# 增加 --device 来使用多卡 NPU 或修改第一个 --device 来更改 NPU 卡 -docker run -it \ - -v /$(dirname $(dirname "$PWD"))/hf_cache:/root/.cache/huggingface/ \ - -v /$(dirname $(dirname "$PWD"))/data:/app/data \ - -v /$(dirname $(dirname "$PWD"))/output:/app/output \ +# 根据您的资源更改 `device` +docker run -dit \ + -v ./hf_cache:/root/.cache/huggingface/ \ + -v ./data:/app/data \ + -v ./output:/app/output \ -v /usr/local/dcmi:/usr/local/dcmi \ -v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi \ - -v /usr/local/Ascend/driver/lib64:/usr/local/Ascend/driver/lib64 \ - -v /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info \ + -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \ -v /etc/ascend_install.info:/etc/ascend_install.info \ -p 7860:7860 \ -p 8000:8000 \ @@ -478,26 +485,12 @@ docker run -it \ --shm-size 16G \ --name llamafactory \ llamafactory:latest + +docker exec -it llamafactory bash ``` +
-#### 使用 Docker Compose - -首先进入 docker 目录: -```bash -# NVIDIA GPU 用户 -cd ./docker/docker-cuda - -# Ascend NPU 用户 -cd ./docker/docker-npu -``` -然后运行以下命令创建 docker 镜像并启动容器: - -```bash -docker-compose up -d -docker-compose exec llamafactory bash -``` -
数据卷详情 - hf_cache:使用宿主机的 Hugging Face 缓存文件夹,允许更改为新的目录。 From 135bfbf7c1eb7d0c214d6add293cbd18e865f900 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Tue, 25 Jun 2024 01:15:19 +0800 Subject: [PATCH 113/160] tiny fix Former-commit-id: bb57478366a70a0871af30ab31c890f471e27ff4 --- .dockerignore | 5 +++-- README.md | 10 ++++++---- README_zh.md | 6 ++++-- src/llamafactory/data/formatter.py | 2 +- src/llamafactory/data/template.py | 6 +++--- src/llamafactory/hparams/data_args.py | 8 ++++---- src/llamafactory/webui/components/train.py | 2 +- src/llamafactory/webui/runner.py | 2 +- 8 files changed, 23 insertions(+), 18 deletions(-) diff --git a/.dockerignore b/.dockerignore index 2ac0e11d..75cd2209 100644 --- a/.dockerignore +++ b/.dockerignore @@ -4,10 +4,11 @@ .venv cache data +docker +examples +saves hf_cache output -examples .dockerignore .gitattributes .gitignore -Dockerfile diff --git a/README.md b/README.md index a20b848b..e1b45236 100644 --- a/README.md +++ b/README.md @@ -360,7 +360,7 @@ To enable FlashAttention-2 on the Windows platform, you need to install the prec
For Ascend NPU users -To install LLaMA Factory on Ascend NPU devices, please specify extra dependencies: `pip install -e '.[torch-npu,metrics]'`. Additionally, you need to install the **[Ascend CANN Toolkit and Kernels](https://www.hiascend.com/developer/download/community/result?module=cann)**. Please follow the [installation tutorial](https://www.hiascend.com/document/detail/en/CANNCommunityEdition/600alphaX/softwareinstall/instg/atlasdeploy_03_0031.html) or use the following commands: +To install LLaMA Factory on Ascend NPU devices, please specify extra dependencies: `pip install -e ".[torch-npu,metrics]"`. Additionally, you need to install the **[Ascend CANN Toolkit and Kernels](https://www.hiascend.com/developer/download/community/result?module=cann)**. Please follow the [installation tutorial](https://www.hiascend.com/document/detail/en/CANNCommunityEdition/600alphaX/softwareinstall/instg/atlasdeploy_03_0031.html) or use the following commands: ```bash # replace the url according to your CANN version and devices @@ -422,14 +422,16 @@ llamafactory-cli webui For CUDA users: ```bash -docker-compose -f ./docker/docker-cuda/docker-compose.yml up -d +cd docker/docker-cuda/ +docker-compose up -d docker-compose exec llamafactory bash ``` For Ascend NPU users: ```bash -docker-compose -f ./docker/docker-npu/docker-compose.yml up -d +cd docker/docker-npu/ +docker-compose up -d docker-compose exec llamafactory bash ``` @@ -461,7 +463,7 @@ docker exec -it llamafactory bash For Ascend NPU users: ```bash -# Change docker image upon your environment +# Choose docker image upon your environment docker build -f ./docker/docker-npu/Dockerfile \ --build-arg INSTALL_DEEPSPEED=false \ --build-arg PIP_INDEX=https://pypi.org/simple \ diff --git a/README_zh.md b/README_zh.md index 3bed0846..32edb1f7 100644 --- a/README_zh.md +++ b/README_zh.md @@ -422,14 +422,16 @@ llamafactory-cli webui CUDA 用户: ```bash -docker-compose -f ./docker/docker-cuda/docker-compose.yml up -d +cd docker/docker-cuda/ +docker-compose up -d docker-compose exec llamafactory bash ``` 昇腾 NPU 用户: ```bash -docker-compose -f ./docker/docker-npu/docker-compose.yml up -d +cd docker/docker-npu/ +docker-compose up -d docker-compose exec llamafactory bash ``` diff --git a/src/llamafactory/data/formatter.py b/src/llamafactory/data/formatter.py index b5dc57ff..88ebf682 100644 --- a/src/llamafactory/data/formatter.py +++ b/src/llamafactory/data/formatter.py @@ -216,7 +216,7 @@ class ToolFormatter(Formatter): self._tool_formatter = glm4_tool_formatter self._tool_extractor = glm4_tool_extractor else: - raise ValueError("Tool format was not found.") + raise NotImplementedError("Tool format {} was not found.".format(self.tool_format)) def apply(self, **kwargs) -> SLOTS: content = kwargs.pop("content") diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py index 3a72a858..193ff482 100644 --- a/src/llamafactory/data/template.py +++ b/src/llamafactory/data/template.py @@ -387,8 +387,9 @@ def get_template_and_fix_tokenizer( template = TEMPLATES.get(name, None) if template is None: raise ValueError("Template {} does not exist.".format(name)) - - if tool_format: + + if tool_format is not None: + logger.info("Using tool format: {}.".format(tool_format)) template.format_tools = ToolFormatter(tool_format=tool_format) stop_words = template.stop_words @@ -625,7 +626,6 @@ _register_template( _register_template( name="empty", - format_prefix=EmptyFormatter(slots=[{"bos_token"}]), efficient_eos=True, ) diff --git a/src/llamafactory/hparams/data_args.py b/src/llamafactory/hparams/data_args.py index 959742e3..dad13820 100644 --- a/src/llamafactory/hparams/data_args.py +++ b/src/llamafactory/hparams/data_args.py @@ -29,10 +29,6 @@ class DataArguments: default=None, metadata={"help": "Which template to use for constructing prompts in training and inference."}, ) - tool_format: Optional[str] = field( - default=None, - metadata={"help": "Specifies the tool format template for function calling ."}, - ) dataset: Optional[str] = field( default=None, metadata={"help": "The name of provided dataset(s) to use. Use commas to separate multiple datasets."}, @@ -105,6 +101,10 @@ class DataArguments: "help": "Whether or not to pack the sequences in training. Will automatically enable in pre-training." }, ) + tool_format: Optional[str] = field( + default=None, + metadata={"help": "Tool format to use for constructing function calling examples."}, + ) tokenized_path: Optional[str] = field( default=None, metadata={"help": "Path to save or load the tokenized datasets."}, diff --git a/src/llamafactory/webui/components/train.py b/src/llamafactory/webui/components/train.py index 874f3c5e..f33c37ee 100644 --- a/src/llamafactory/webui/components/train.py +++ b/src/llamafactory/webui/components/train.py @@ -291,7 +291,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]: with gr.Column(scale=1): loss_viewer = gr.Plot() - input_elems.update({output_dir, config_path, device_count, ds_stage, ds_offload}) + input_elems.update({output_dir, config_path, ds_stage, ds_offload}) elem_dict.update( dict( cmd_preview_btn=cmd_preview_btn, diff --git a/src/llamafactory/webui/runner.py b/src/llamafactory/webui/runner.py index 6cd21b07..549ec765 100644 --- a/src/llamafactory/webui/runner.py +++ b/src/llamafactory/webui/runner.py @@ -306,7 +306,7 @@ class Runner: def _form_config_dict(self, data: Dict["Component", Any]) -> Dict[str, Any]: config_dict = {} - skip_ids = ["top.lang", "top.model_path", "train.output_dir", "train.config_path", "train.device_count"] + skip_ids = ["top.lang", "top.model_path", "train.output_dir", "train.config_path"] for elem, value in data.items(): elem_id = self.manager.get_id_by_elem(elem) if elem_id not in skip_ids: From 98fb3d015a634122b84ea282d70e3dabdaa46dc1 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Tue, 25 Jun 2024 01:51:29 +0800 Subject: [PATCH 114/160] fix #4419 Former-commit-id: 15069c3ca814d5ac9beec77d914b71cde7ea0f47 --- README.md | 6 ++++-- README_zh.md | 6 ++++-- docker/docker-cuda/Dockerfile | 2 +- docker/docker-cuda/docker-compose.yml | 3 ++- docker/docker-npu/Dockerfile | 2 +- docker/docker-npu/docker-compose.yml | 3 ++- 6 files changed, 14 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index e1b45236..4b42edd7 100644 --- a/README.md +++ b/README.md @@ -448,7 +448,8 @@ docker build -f ./docker/docker-cuda/Dockerfile \ -t llamafactory:latest . docker run -dit --gpus=all \ - -v ./hf_cache:/root/.cache/huggingface/ \ + -v ./hf_cache:/root/.cache/huggingface \ + -v ./ms_cache:/root/.cache/modelscope \ -v ./data:/app/data \ -v ./output:/app/output \ -p 7860:7860 \ @@ -471,7 +472,8 @@ docker build -f ./docker/docker-npu/Dockerfile \ # Change `device` upon your resources docker run -dit \ - -v ./hf_cache:/root/.cache/huggingface/ \ + -v ./hf_cache:/root/.cache/huggingface \ + -v ./ms_cache:/root/.cache/modelscope \ -v ./data:/app/data \ -v ./output:/app/output \ -v /usr/local/dcmi:/usr/local/dcmi \ diff --git a/README_zh.md b/README_zh.md index 32edb1f7..3926c09d 100644 --- a/README_zh.md +++ b/README_zh.md @@ -448,7 +448,8 @@ docker build -f ./docker/docker-cuda/Dockerfile \ -t llamafactory:latest . docker run -dit --gpus=all \ - -v ./hf_cache:/root/.cache/huggingface/ \ + -v ./hf_cache:/root/.cache/huggingface \ + -v ./ms_cache:/root/.cache/modelscope \ -v ./data:/app/data \ -v ./output:/app/output \ -p 7860:7860 \ @@ -471,7 +472,8 @@ docker build -f ./docker/docker-npu/Dockerfile \ # 根据您的资源更改 `device` docker run -dit \ - -v ./hf_cache:/root/.cache/huggingface/ \ + -v ./hf_cache:/root/.cache/huggingface \ + -v ./ms_cache:/root/.cache/modelscope \ -v ./data:/app/data \ -v ./output:/app/output \ -v /usr/local/dcmi:/usr/local/dcmi \ diff --git a/docker/docker-cuda/Dockerfile b/docker/docker-cuda/Dockerfile index 2d20bfe4..827b7b3c 100644 --- a/docker/docker-cuda/Dockerfile +++ b/docker/docker-cuda/Dockerfile @@ -36,7 +36,7 @@ RUN EXTRA_PACKAGES="metrics"; \ pip uninstall -y transformer-engine flash-attn # Set up volumes -VOLUME [ "/root/.cache/huggingface/", "/app/data", "/app/output" ] +VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/app/data", "/app/output" ] # Expose port 7860 for the LLaMA Board ENV GRADIO_SERVER_PORT 7860 diff --git a/docker/docker-cuda/docker-compose.yml b/docker/docker-cuda/docker-compose.yml index 04d6531a..1c0a3c75 100644 --- a/docker/docker-cuda/docker-compose.yml +++ b/docker/docker-cuda/docker-compose.yml @@ -10,7 +10,8 @@ services: PIP_INDEX: https://pypi.org/simple container_name: llamafactory volumes: - - ./hf_cache:/root/.cache/huggingface/ + - ./hf_cache:/root/.cache/huggingface + - ./ms_cache:/root/.cache/modelscope - ./data:/app/data - ./output:/app/output ports: diff --git a/docker/docker-npu/Dockerfile b/docker/docker-npu/Dockerfile index 0fdd4472..08de626b 100644 --- a/docker/docker-npu/Dockerfile +++ b/docker/docker-npu/Dockerfile @@ -30,7 +30,7 @@ RUN EXTRA_PACKAGES="torch-npu,metrics"; \ pip uninstall -y transformer-engine flash-attn # Set up volumes -VOLUME [ "/root/.cache/huggingface/", "/app/data", "/app/output" ] +VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/app/data", "/app/output" ] # Expose port 7860 for the LLaMA Board ENV GRADIO_SERVER_PORT 7860 diff --git a/docker/docker-npu/docker-compose.yml b/docker/docker-npu/docker-compose.yml index 7fff6e73..a6b878fd 100644 --- a/docker/docker-npu/docker-compose.yml +++ b/docker/docker-npu/docker-compose.yml @@ -8,7 +8,8 @@ services: PIP_INDEX: https://pypi.org/simple container_name: llamafactory volumes: - - ./hf_cache:/root/.cache/huggingface/ + - ./hf_cache:/root/.cache/huggingface + - ./ms_cache:/root/.cache/modelscope - ./data:/app/data - ./output:/app/output - /usr/local/dcmi:/usr/local/dcmi From 9fd7a410bb899f6aa28f2007bc7573b151f3dfe4 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Tue, 25 Jun 2024 01:54:53 +0800 Subject: [PATCH 115/160] tiny fix about badam Former-commit-id: 03f49267c7406e36aee35639f86e6e0383897090 --- .github/workflows/tests.yml | 2 +- .gitignore | 8 ++-- ...ma3_lora_sft.yaml => llama3_full_sft.yaml} | 2 +- ...adam_sft.yaml => llama3_full_sft_ds3.yaml} | 4 +- examples/extras/badam/train_single_gpu.sh | 37 ------------------ examples/extras/badam/train_zero3.sh | 39 ------------------- setup.py | 2 +- src/llamafactory/hparams/parser.py | 14 +++---- src/llamafactory/train/dpo/trainer.py | 3 +- src/llamafactory/train/kto/trainer.py | 3 +- src/llamafactory/train/ppo/trainer.py | 3 +- src/llamafactory/train/pt/trainer.py | 3 +- src/llamafactory/train/rm/trainer.py | 3 +- src/llamafactory/train/sft/trainer.py | 3 +- src/llamafactory/train/trainer_utils.py | 7 +--- 15 files changed, 31 insertions(+), 102 deletions(-) rename examples/extras/badam/{llama3_lora_sft.yaml => llama3_full_sft.yaml} (97%) rename examples/extras/badam/{llama3_badam_sft.yaml => llama3_full_sft_ds3.yaml} (89%) delete mode 100644 examples/extras/badam/train_single_gpu.sh delete mode 100644 examples/extras/badam/train_zero3.sh diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 98bd9455..73d77de5 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -34,7 +34,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - python -m pip install .[torch,dev] + python -m pip install ".[torch,dev]" - name: Check quality run: | diff --git a/.gitignore b/.gitignore index 2486e728..82e6e9e6 100644 --- a/.gitignore +++ b/.gitignore @@ -160,8 +160,8 @@ cython_debug/ .idea/ # custom .gitignore -user.config -saves/ cache/ -wandb -ds_badam_exp \ No newline at end of file +config/ +saves/ +output/ +wandb/ diff --git a/examples/extras/badam/llama3_lora_sft.yaml b/examples/extras/badam/llama3_full_sft.yaml similarity index 97% rename from examples/extras/badam/llama3_lora_sft.yaml rename to examples/extras/badam/llama3_full_sft.yaml index a78de2fa..31d61c33 100644 --- a/examples/extras/badam/llama3_lora_sft.yaml +++ b/examples/extras/badam/llama3_full_sft.yaml @@ -6,6 +6,7 @@ stage: sft do_train: true finetuning_type: full use_badam: true +badam_mode: layer badam_switch_mode: ascending badam_switch_interval: 50 badam_verbose: 2 @@ -32,7 +33,6 @@ learning_rate: 1.0e-4 num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 -pure_bf16: true ### eval val_size: 0.1 diff --git a/examples/extras/badam/llama3_badam_sft.yaml b/examples/extras/badam/llama3_full_sft_ds3.yaml similarity index 89% rename from examples/extras/badam/llama3_badam_sft.yaml rename to examples/extras/badam/llama3_full_sft_ds3.yaml index f5adb220..f2d7309f 100644 --- a/examples/extras/badam/llama3_badam_sft.yaml +++ b/examples/extras/badam/llama3_full_sft_ds3.yaml @@ -6,9 +6,11 @@ stage: sft do_train: true finetuning_type: full use_badam: true +badam_mode: layer badam_switch_mode: ascending badam_switch_interval: 50 badam_verbose: 2 +deepspeed: examples/deepspeed/ds_z3_config.json ### dataset dataset: identity,alpaca_en_demo @@ -28,7 +30,7 @@ overwrite_output_dir: true ### train per_device_train_batch_size: 1 gradient_accumulation_steps: 8 -learning_rate: 1.0e-6 +learning_rate: 1.0e-4 num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 diff --git a/examples/extras/badam/train_single_gpu.sh b/examples/extras/badam/train_single_gpu.sh deleted file mode 100644 index 8af79007..00000000 --- a/examples/extras/badam/train_single_gpu.sh +++ /dev/null @@ -1,37 +0,0 @@ -#!/bin/bash -export CUDA_VISIBLE_DEVICES=0 - -cd ../../.. - -llamafactory-cli train \ - --stage sft \ - --do_train True \ - --model_name_or_path meta-llama/Llama-2-13b-hf \ - --preprocessing_num_workers 16 \ - --finetuning_type full \ - --template default \ - --flash_attn auto \ - --dataset_dir data \ - --dataset alpaca_en_demo \ - --cutoff_len 1024 \ - --learning_rate 1e-6 \ - --num_train_epochs 3.0 \ - --max_samples 100000 \ - --per_device_train_batch_size 1 \ - --gradient_accumulation_steps 8 \ - --lr_scheduler_type cosine \ - --max_grad_norm 1.0 \ - --logging_steps 5 \ - --save_steps 100 \ - --warmup_steps 0 \ - --optim adamw_torch \ - --packing False \ - --report_to none \ - --use_badam True \ - --output_dir saves/LLaMA2-13B/full/BAdam \ - --plot_loss True \ - --ddp_timeout 180000000 \ - --include_num_input_tokens_seen True \ - --badam_mode layer \ - --badam_switch_mode ascending \ - --badam_switch_interval 50 \ No newline at end of file diff --git a/examples/extras/badam/train_zero3.sh b/examples/extras/badam/train_zero3.sh deleted file mode 100644 index 3b182134..00000000 --- a/examples/extras/badam/train_zero3.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/bash -export CUDA_VISIBLE_DEVICES=0,1,2,3 - -cd ../../.. - -llamafactory-cli train \ - --stage sft \ - --do_train True \ - --model_name_or_path meta-llama/Llama-2-13b-hf \ - --preprocessing_num_workers 16 \ - --finetuning_type full \ - --template default \ - --flash_attn auto \ - --dataset_dir data \ - --dataset alpaca_en_demo \ - --cutoff_len 1024 \ - --learning_rate 1e-6 \ - --num_train_epochs 3.0 \ - --max_samples 100000 \ - --per_device_train_batch_size 8 \ - --gradient_accumulation_steps 2 \ - --lr_scheduler_type cosine \ - --max_grad_norm 1.0 \ - --logging_steps 5 \ - --save_steps 100 \ - --warmup_steps 0 \ - --optim adamw_torch \ - --packing False \ - --report_to none \ - --use_badam True \ - --output_dir saves/LLaMA2-13B/full/BAdam \ - --fp16 True \ - --plot_loss True \ - --ddp_timeout 180000000 \ - --include_num_input_tokens_seen True \ - --badam_mode layer \ - --badam_switch_mode ascending \ - --badam_switch_interval 50 \ - --deepspeed cache/ds_z3_config.json \ No newline at end of file diff --git a/setup.py b/setup.py index 3d2ac921..64f50a87 100644 --- a/setup.py +++ b/setup.py @@ -41,7 +41,7 @@ extra_require = { "bitsandbytes": ["bitsandbytes>=0.39.0"], "vllm": ["vllm>=0.4.3"], "galore": ["galore-torch"], - "badam": ["badam"], + "badam": ["badam>=1.2.1"], "gptq": ["optimum>=1.16.0", "auto-gptq>=0.5.0"], "awq": ["autoawq"], "aqlm": ["aqlm[gpu]>=1.1.0"], diff --git a/src/llamafactory/hparams/parser.py b/src/llamafactory/hparams/parser.py index f2ccd5e6..a4b7f7a5 100644 --- a/src/llamafactory/hparams/parser.py +++ b/src/llamafactory/hparams/parser.py @@ -121,7 +121,7 @@ def _check_extra_dependencies( require_version("galore_torch", "To fix: pip install galore_torch") if finetuning_args.use_badam: - require_version("badam", "To fix: pip install badam") + require_version("badam>=1.2.1", "To fix: pip install badam>=1.2.1") if finetuning_args.plot_loss: require_version("matplotlib", "To fix: pip install matplotlib") @@ -214,15 +214,15 @@ def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS: if ( finetuning_args.use_badam - and training_args.parallel_mode.value == "distributed" + and training_args.parallel_mode == ParallelMode.DISTRIBUTED ): if finetuning_args.badam_mode == "ratio": - raise ValueError("Ratio-wise BAdam does not yet support distributed training, use layer-wise BAdam: --badam_mode layer") - if finetuning_args.badam_mode == "layer" and (not is_deepspeed_zero3_enabled()): - raise ValueError(f"Layer-wise BAdam only supports DeepSpeed ZeRO 3 stage.") + raise ValueError("Radio-based BAdam does not yet support distributed training, use layer-wise BAdam.") + elif not is_deepspeed_zero3_enabled(): + raise ValueError("Layer-wise BAdam only supports DeepSpeed ZeRO-3 training.") - if (finetuning_args.use_galore) and training_args.deepspeed is not None: - raise ValueError("GaLore are incompatible with DeepSpeed yet.") + if finetuning_args.use_galore and training_args.deepspeed is not None: + raise ValueError("GaLore is incompatible with DeepSpeed yet.") if model_args.infer_backend == "vllm": raise ValueError("vLLM backend is only available for API, CLI and Web.") diff --git a/src/llamafactory/train/dpo/trainer.py b/src/llamafactory/train/dpo/trainer.py index a3e0e961..ed4fd5d9 100644 --- a/src/llamafactory/train/dpo/trainer.py +++ b/src/llamafactory/train/dpo/trainer.py @@ -96,7 +96,8 @@ class CustomDPOTrainer(DPOTrainer): self.save_model(os.path.join(self.args.output_dir, "pissa_init")) if finetuning_args.use_badam: - from badam import clip_grad_norm_old_version, BAdamCallback + from badam import BAdamCallback, clip_grad_norm_old_version + self.accelerator.clip_grad_norm_ = MethodType(clip_grad_norm_old_version, self.accelerator) self.callback_handler.add_callback(BAdamCallback) diff --git a/src/llamafactory/train/kto/trainer.py b/src/llamafactory/train/kto/trainer.py index 0d50987f..c2edf95a 100644 --- a/src/llamafactory/train/kto/trainer.py +++ b/src/llamafactory/train/kto/trainer.py @@ -91,7 +91,8 @@ class CustomKTOTrainer(KTOTrainer): self.ref_model.eval() if finetuning_args.use_badam: - from badam import clip_grad_norm_old_version, BAdamCallback + from badam import BAdamCallback, clip_grad_norm_old_version + self.accelerator.clip_grad_norm_ = MethodType(clip_grad_norm_old_version, self.accelerator) self.callback_handler.add_callback(BAdamCallback) diff --git a/src/llamafactory/train/ppo/trainer.py b/src/llamafactory/train/ppo/trainer.py index 2d5d7ffc..70d01919 100644 --- a/src/llamafactory/train/ppo/trainer.py +++ b/src/llamafactory/train/ppo/trainer.py @@ -166,7 +166,8 @@ class CustomPPOTrainer(PPOTrainer, Trainer): self.reward_model = self.accelerator.prepare_model(self.reward_model, evaluation_mode=True) if finetuning_args.use_badam: - from badam import clip_grad_norm_old_version, BAdamCallback + from badam import BAdamCallback, clip_grad_norm_old_version + self.accelerator.clip_grad_norm_ = MethodType(clip_grad_norm_old_version, self.accelerator) self.callback_handler.add_callback(BAdamCallback) diff --git a/src/llamafactory/train/pt/trainer.py b/src/llamafactory/train/pt/trainer.py index d3516b41..b6fb161d 100644 --- a/src/llamafactory/train/pt/trainer.py +++ b/src/llamafactory/train/pt/trainer.py @@ -48,7 +48,8 @@ class CustomTrainer(Trainer): self.save_model(os.path.join(self.args.output_dir, "pissa_init")) if finetuning_args.use_badam: - from badam import clip_grad_norm_old_version, BAdamCallback + from badam import BAdamCallback, clip_grad_norm_old_version + self.accelerator.clip_grad_norm_ = MethodType(clip_grad_norm_old_version, self.accelerator) self.callback_handler.add_callback(BAdamCallback) diff --git a/src/llamafactory/train/rm/trainer.py b/src/llamafactory/train/rm/trainer.py index 433251cf..70c2e9a0 100644 --- a/src/llamafactory/train/rm/trainer.py +++ b/src/llamafactory/train/rm/trainer.py @@ -72,7 +72,8 @@ class PairwiseTrainer(Trainer): self.processor = processor self.can_return_loss = True # override property to return eval_loss if finetuning_args.use_badam: - from badam import clip_grad_norm_old_version, BAdamCallback + from badam import BAdamCallback, clip_grad_norm_old_version + self.accelerator.clip_grad_norm_ = MethodType(clip_grad_norm_old_version, self.accelerator) self.callback_handler.add_callback(BAdamCallback) diff --git a/src/llamafactory/train/sft/trainer.py b/src/llamafactory/train/sft/trainer.py index 45799b96..8f18317f 100644 --- a/src/llamafactory/train/sft/trainer.py +++ b/src/llamafactory/train/sft/trainer.py @@ -56,7 +56,8 @@ class CustomSeq2SeqTrainer(Seq2SeqTrainer): self.save_model(os.path.join(self.args.output_dir, "pissa_init")) if finetuning_args.use_badam: - from badam import clip_grad_norm_old_version, BAdamCallback + from badam import BAdamCallback, clip_grad_norm_old_version + self.accelerator.clip_grad_norm_ = MethodType(clip_grad_norm_old_version, self.accelerator) self.callback_handler.add_callback(BAdamCallback) diff --git a/src/llamafactory/train/trainer_utils.py b/src/llamafactory/train/trainer_utils.py index 0206dcb6..21d41c36 100644 --- a/src/llamafactory/train/trainer_utils.py +++ b/src/llamafactory/train/trainer_utils.py @@ -23,6 +23,7 @@ from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Tuple, Union import torch from peft import PeftModel from transformers import Trainer +from transformers.integrations import is_deepspeed_zero3_enabled from transformers.optimization import get_scheduler from transformers.pytorch_utils import ALL_LAYERNORM_LAYERS from transformers.trainer_pt_utils import get_parameter_names @@ -372,9 +373,6 @@ def _create_badam_optimizer( dict(params=decay_params, weight_decay=training_args.weight_decay), ] - from transformers.integrations import is_deepspeed_zero3_enabled - ds_zero3_enabled = is_deepspeed_zero3_enabled() - if finetuning_args.badam_mode == "layer": from badam import BlockOptimizer @@ -387,7 +385,7 @@ def _create_badam_optimizer( start_block=finetuning_args.badam_start_block, switch_mode=finetuning_args.badam_switch_mode, verbose=finetuning_args.badam_verbose, - ds_zero3_enabled=ds_zero3_enabled + ds_zero3_enabled=is_deepspeed_zero3_enabled(), ) logger.info( f"Using BAdam optimizer with layer-wise update, switch mode is {finetuning_args.badam_switch_mode}, " @@ -398,7 +396,6 @@ def _create_badam_optimizer( elif finetuning_args.badam_mode == "ratio": from badam import BlockOptimizerRatio - assert not ds_zero3_enabled, "BAdam with ratio-based update does not support Deepspeed ZeRO-3 yet, use layer-wise update instead: --badam_mode layer." assert finetuning_args.badam_update_ratio > 1e-6 optimizer = BlockOptimizerRatio( param_groups=param_groups, From 0f82a55305edc6e18048c5ee40ec844d7a9c9f34 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Tue, 25 Jun 2024 02:31:44 +0800 Subject: [PATCH 116/160] fix #4379 Former-commit-id: 96bedb4b6445a04ff8b97fb2aadace50b2f882df --- src/llamafactory/train/tuner.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/llamafactory/train/tuner.py b/src/llamafactory/train/tuner.py index 788b4c4f..a02fff22 100644 --- a/src/llamafactory/train/tuner.py +++ b/src/llamafactory/train/tuner.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os +import shutil from typing import TYPE_CHECKING, Any, Dict, List, Optional import torch @@ -19,6 +21,7 @@ from transformers import PreTrainedModel from ..data import get_template_and_fix_tokenizer from ..extras.callbacks import LogCallback +from ..extras.constants import V_HEAD_SAFE_WEIGHTS_NAME, V_HEAD_WEIGHTS_NAME from ..extras.logging import get_logger from ..hparams import get_infer_args, get_train_args from ..model import load_model, load_tokenizer @@ -98,6 +101,25 @@ def export_model(args: Optional[Dict[str, Any]] = None) -> None: safe_serialization=(not model_args.export_legacy_format), ) + if finetuning_args.stage == "rm": + if model_args.adapter_name_or_path is not None: + vhead_path = model_args.adapter_name_or_path[-1] + else: + vhead_path = model_args.model_name_or_path + + if os.path.exists(os.path.join(vhead_path, V_HEAD_SAFE_WEIGHTS_NAME)): + shutil.copy( + os.path.join(vhead_path, V_HEAD_SAFE_WEIGHTS_NAME), + os.path.join(model_args.export_dir, V_HEAD_SAFE_WEIGHTS_NAME), + ) + logger.info("Copied valuehead to {}.".format(model_args.export_dir)) + elif os.path.exists(os.path.join(vhead_path, V_HEAD_WEIGHTS_NAME)): + shutil.copy( + os.path.join(vhead_path, V_HEAD_WEIGHTS_NAME), + os.path.join(model_args.export_dir, V_HEAD_WEIGHTS_NAME), + ) + logger.info("Copied valuehead to {}.".format(model_args.export_dir)) + try: tokenizer.padding_side = "left" # restore padding side tokenizer.init_kwargs["padding_side"] = "left" From f51b435bcfb04eb919b119a274f03f4b399b5981 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Tue, 25 Jun 2024 02:34:04 +0800 Subject: [PATCH 117/160] fix #4432 Former-commit-id: 972a3b469c600bc6528aef3a49b6fdec63d65803 --- src/llamafactory/model/loader.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/llamafactory/model/loader.py b/src/llamafactory/model/loader.py index 69cccd93..e1015821 100644 --- a/src/llamafactory/model/loader.py +++ b/src/llamafactory/model/loader.py @@ -14,6 +14,7 @@ from typing import TYPE_CHECKING, Any, Dict, Optional, TypedDict +import torch from transformers import AutoConfig, AutoModelForCausalLM, AutoModelForVision2Seq, AutoProcessor, AutoTokenizer from trl import AutoModelForCausalLMWithValueHead @@ -175,6 +176,10 @@ def load_model( if not is_trainable: model.requires_grad_(False) + for param in model.parameters(): + if param.data.dtype == torch.float32 and model_args.compute_dtype != torch.float32: + param.data = param.data.to(model_args.compute_dtype) + model.eval() else: model.train() From cd899734f3c819ad9b1bfa12b02461c7f7f6cb7b Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Tue, 25 Jun 2024 02:51:49 +0800 Subject: [PATCH 118/160] fix test case Former-commit-id: 6663057cfbdc96385d901a5dfba22cfcd7a61b23 --- tests/model/test_base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/model/test_base.py b/tests/model/test_base.py index e1991b20..6431a504 100644 --- a/tests/model/test_base.py +++ b/tests/model/test_base.py @@ -73,7 +73,8 @@ def test_valuehead(): tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=False, add_valuehead=True ) - ref_model = AutoModelForCausalLMWithValueHead.from_pretrained( + ref_model: "AutoModelForCausalLMWithValueHead" = AutoModelForCausalLMWithValueHead.from_pretrained( TINY_LLAMA_VALUEHEAD, torch_dtype=torch.float16, device_map=get_current_device() ) + ref_model.v_head = ref_model.v_head.to(torch.float16) compare_model(model, ref_model) From 98f382fda3f0a85cae3621eb2a62af72d4937659 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Tue, 25 Jun 2024 02:55:50 +0800 Subject: [PATCH 119/160] lint Former-commit-id: c9e424d2198b5872ce118a6ab4c109bf73be2bee --- src/llamafactory/hparams/parser.py | 5 +---- tests/data/test_formatter.py | 4 +--- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/src/llamafactory/hparams/parser.py b/src/llamafactory/hparams/parser.py index a4b7f7a5..d4bcfbc6 100644 --- a/src/llamafactory/hparams/parser.py +++ b/src/llamafactory/hparams/parser.py @@ -212,10 +212,7 @@ def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS: ): raise ValueError("Distributed training does not support layer-wise GaLore.") - if ( - finetuning_args.use_badam - and training_args.parallel_mode == ParallelMode.DISTRIBUTED - ): + if finetuning_args.use_badam and training_args.parallel_mode == ParallelMode.DISTRIBUTED: if finetuning_args.badam_mode == "ratio": raise ValueError("Radio-based BAdam does not yet support distributed training, use layer-wise BAdam.") elif not is_deepspeed_zero3_enabled(): diff --git a/tests/data/test_formatter.py b/tests/data/test_formatter.py index a01e8a7e..37b21dc5 100644 --- a/tests/data/test_formatter.py +++ b/tests/data/test_formatter.py @@ -113,9 +113,7 @@ def test_glm4_tool_formatter(): assert formatter.apply(content=json.dumps(tools)) == [ "你是一个名为 ChatGLM 的人工智能助手。你是基于智谱AI训练的语言模型 GLM-4 模型开发的," "你的任务是针对用户的问题和要求提供适当的答复和支持。# 可用工具\n\n" - "## test_tool\n\n{}\n在调用上述函数时,请使用 Json 格式表示调用的参数。".format( - json.dumps(tools[0], indent=4) - ) + "## test_tool\n\n{}\n在调用上述函数时,请使用 Json 格式表示调用的参数。".format(json.dumps(tools[0], indent=4)) ] From a4f69d8914ac9b73997a452242f40e23f29605c3 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Tue, 25 Jun 2024 14:34:13 +0800 Subject: [PATCH 120/160] fix #4456 Former-commit-id: 920f4fa4ca9e08bcf0d16450e085ee0fa8b4e1c5 --- .dockerignore | 1 - 1 file changed, 1 deletion(-) diff --git a/.dockerignore b/.dockerignore index 75cd2209..23ad75a8 100644 --- a/.dockerignore +++ b/.dockerignore @@ -5,7 +5,6 @@ cache data docker -examples saves hf_cache output From daea86e0470b8b4c0cf31265798e670bac2f6a49 Mon Sep 17 00:00:00 2001 From: hzhaoy Date: Tue, 25 Jun 2024 15:13:07 +0800 Subject: [PATCH 121/160] support flash-attn in Dockerfile Former-commit-id: 0dba000aa178f915cea7d75bf0c9d47e671a21d2 --- docker/docker-cuda/Dockerfile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docker/docker-cuda/Dockerfile b/docker/docker-cuda/Dockerfile index 827b7b3c..06a172f0 100644 --- a/docker/docker-cuda/Dockerfile +++ b/docker/docker-cuda/Dockerfile @@ -35,6 +35,11 @@ RUN EXTRA_PACKAGES="metrics"; \ pip install -e .[$EXTRA_PACKAGES] && \ pip uninstall -y transformer-engine flash-attn +# Rebuild flash-attn +RUN ninja --version || \ + (pip uninstall -y ninja && pip install ninja) && \ + MAX_JOBS=4 pip install --no-cache-dir flash-attn --no-build-isolation + # Set up volumes VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/app/data", "/app/output" ] From e115d55585380fddb4695ec6be1d25aa2463d351 Mon Sep 17 00:00:00 2001 From: MengqingCao Date: Wed, 26 Jun 2024 02:15:00 +0000 Subject: [PATCH 122/160] fix docker-compose path Former-commit-id: 9de3c24aa2a8268be06c8fef8e47f4fb6715c7ec --- docker/docker-cuda/docker-compose.yml | 8 ++++---- docker/docker-npu/docker-compose.yml | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/docker/docker-cuda/docker-compose.yml b/docker/docker-cuda/docker-compose.yml index 1c0a3c75..e2d1a5ad 100644 --- a/docker/docker-cuda/docker-compose.yml +++ b/docker/docker-cuda/docker-compose.yml @@ -10,10 +10,10 @@ services: PIP_INDEX: https://pypi.org/simple container_name: llamafactory volumes: - - ./hf_cache:/root/.cache/huggingface - - ./ms_cache:/root/.cache/modelscope - - ./data:/app/data - - ./output:/app/output + - ../../hf_cache:/root/.cache/huggingface + - ../../ms_cache:/root/.cache/modelscope + - ../../data:/app/data + - ../../output:/app/output ports: - "7860:7860" - "8000:8000" diff --git a/docker/docker-npu/docker-compose.yml b/docker/docker-npu/docker-compose.yml index a6b878fd..657cba9f 100644 --- a/docker/docker-npu/docker-compose.yml +++ b/docker/docker-npu/docker-compose.yml @@ -8,10 +8,10 @@ services: PIP_INDEX: https://pypi.org/simple container_name: llamafactory volumes: - - ./hf_cache:/root/.cache/huggingface - - ./ms_cache:/root/.cache/modelscope - - ./data:/app/data - - ./output:/app/output + - ../../hf_cache:/root/.cache/huggingface + - ../../ms_cache:/root/.cache/modelscope + - ../../data:/app/data + - ../../output:/app/output - /usr/local/dcmi:/usr/local/dcmi - /usr/local/bin/npu-smi:/usr/local/bin/npu-smi - /usr/local/Ascend/driver:/usr/local/Ascend/driver From d82d86e16dfbb9ec5e8cc48d3c79bf559b685fb3 Mon Sep 17 00:00:00 2001 From: fanjunliang Date: Wed, 26 Jun 2024 18:21:42 +0800 Subject: [PATCH 123/160] fix torch-npu dependency Former-commit-id: 7c8a8061d0cda6342f6c883748fb6bc6650df9f9 --- docker/docker-npu/Dockerfile | 9 +++++---- setup.py | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/docker/docker-npu/Dockerfile b/docker/docker-npu/Dockerfile index 08de626b..0ec16107 100644 --- a/docker/docker-npu/Dockerfile +++ b/docker/docker-npu/Dockerfile @@ -7,16 +7,17 @@ ENV DEBIAN_FRONTEND=noninteractive # Define installation arguments ARG INSTALL_DEEPSPEED=false ARG PIP_INDEX=https://pypi.org/simple +ARG EXTRA_INDEX=https://download.pytorch.org/whl/cpu # Set the working directory WORKDIR /app # Install the requirements COPY requirements.txt /app -RUN pip config set global.index-url $PIP_INDEX -RUN pip config set global.extra-index-url $PIP_INDEX -RUN python -m pip install --upgrade pip -RUN python -m pip install -r requirements.txt +RUN pip config set global.index-url $PIP_INDEX && \ + pip config set global.extra-index-url $EXTRA_INDEX && \ + pip install --upgrade pip && \ + pip install -r requirements.txt # Copy the rest of the application into the image COPY . /app diff --git a/setup.py b/setup.py index 64f50a87..89301d1b 100644 --- a/setup.py +++ b/setup.py @@ -35,7 +35,7 @@ def get_requires(): extra_require = { "torch": ["torch>=1.13.1"], - "torch-npu": ["torch==2.1.0", "torch-npu==2.1.0.post3", "decorator"], + "torch-npu": ["torch==2.1.0+cpu", "torch-npu==2.1.0.post3", "decorator"], "metrics": ["nltk", "jieba", "rouge-chinese"], "deepspeed": ["deepspeed>=0.10.0"], "bitsandbytes": ["bitsandbytes>=0.39.0"], From cf2dc4c44462dd917b3ddd234399f35d21eef08f Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Wed, 26 Jun 2024 19:43:16 +0800 Subject: [PATCH 124/160] fix #4556 Former-commit-id: 81faa9a985c14e83e38f42aedd228edb676b0695 --- scripts/loftq_init.py | 8 ++++++-- scripts/pissa_init.py | 8 ++++++-- src/llamafactory/data/template.py | 2 +- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/scripts/loftq_init.py b/scripts/loftq_init.py index b9506fa3..4d2c01b9 100644 --- a/scripts/loftq_init.py +++ b/scripts/loftq_init.py @@ -36,15 +36,19 @@ def quantize_loftq( lora_alpha: int = None, lora_rank: int = 16, lora_dropout: float = 0, - lora_target: str = "q_proj,v_proj", + lora_target: tuple = ("q_proj", "v_proj"), save_safetensors: bool = True, ): r""" Initializes LoRA weights with LoRA-fine-tuning-aware Quantization (LoftQ) Usage: python loftq_init.py --model_name_or_path path_to_model --output_dir output_dir """ + if isinstance(lora_target, str): + lora_target = [name.strip() for name in lora_target.split(",")] + tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained(model_name_or_path, trust_remote_code=True, torch_dtype="auto") + loftq_config = LoftQConfig(loftq_bits=loftq_bits, loftq_iter=loftq_iter) lora_config = LoraConfig( task_type=TaskType.CAUSAL_LM, @@ -52,7 +56,7 @@ def quantize_loftq( r=lora_rank, lora_alpha=lora_alpha if lora_alpha is not None else lora_rank * 2, lora_dropout=lora_dropout, - target_modules=[name.strip() for name in lora_target.split(",")], + target_modules=lora_target, init_lora_weights="loftq", loftq_config=loftq_config, ) diff --git a/scripts/pissa_init.py b/scripts/pissa_init.py index 50239727..ad9d161c 100644 --- a/scripts/pissa_init.py +++ b/scripts/pissa_init.py @@ -35,21 +35,25 @@ def quantize_pissa( lora_alpha: int = None, lora_rank: int = 16, lora_dropout: float = 0, - lora_target: str = "q_proj,v_proj", + lora_target: tuple = ("q_proj", "v_proj"), save_safetensors: bool = True, ): r""" Initializes LoRA weights with Principal Singular values and Singular vectors Adaptation (PiSSA) Usage: python pissa_init.py --model_name_or_path path_to_model --output_dir output_dir """ + if isinstance(lora_target, str): + lora_target = [name.strip() for name in lora_target.split(",")] + tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained(model_name_or_path, trust_remote_code=True, torch_dtype="auto") + lora_config = LoraConfig( task_type=TaskType.CAUSAL_LM, r=lora_rank, lora_alpha=lora_alpha if lora_alpha is not None else lora_rank * 2, lora_dropout=lora_dropout, - target_modules=[name.strip() for name in lora_target.split(",")], + target_modules=lora_target, init_lora_weights="pissa" if pissa_iter == -1 else "pissa_niter_{}".format(pissa_iter), ) diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py index 193ff482..53f16df4 100644 --- a/src/llamafactory/data/template.py +++ b/src/llamafactory/data/template.py @@ -618,7 +618,7 @@ _register_template( _register_template( name="default", - format_user=StringFormatter(slots=["Human: {{content}}\nAssistant: "]), + format_user=StringFormatter(slots=["Human: {{content}}\nAssistant:"]), format_system=StringFormatter(slots=["{{content}}\n"]), format_separator=EmptyFormatter(slots=["\n"]), ) From 72ba29d81a498e7284ac79b3ee7b294b443521be Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Wed, 26 Jun 2024 19:52:35 +0800 Subject: [PATCH 125/160] fix #4458 Former-commit-id: aab14b15268dbe74ded22549dbd3677474868cbb --- src/llamafactory/train/ppo/trainer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/llamafactory/train/ppo/trainer.py b/src/llamafactory/train/ppo/trainer.py index 70d01919..c5f6e175 100644 --- a/src/llamafactory/train/ppo/trainer.py +++ b/src/llamafactory/train/ppo/trainer.py @@ -99,10 +99,10 @@ class CustomPPOTrainer(PPOTrainer, Trainer): ) # Add deepspeed config - ppo_config.accelerator_kwargs["kwargs_handlers"] = [ - DistributedDataParallelKwargs(find_unused_parameters=training_args.ddp_find_unused_parameters) - ] if training_args.deepspeed_plugin is not None: + ppo_config.accelerator_kwargs["kwargs_handlers"] = [ + DistributedDataParallelKwargs(find_unused_parameters=training_args.ddp_find_unused_parameters) + ] ppo_config.accelerator_kwargs["deepspeed_plugin"] = training_args.deepspeed_plugin # Create optimizer and scheduler From 08fa707085d0c1587a47dfa27dce42b37cab17aa Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Wed, 26 Jun 2024 22:11:44 +0800 Subject: [PATCH 126/160] improve autogptq integration Former-commit-id: d68408c7b123b8ff92014db35cac0b24b414a6f4 --- setup.py | 2 +- .../model/model_utils/quantization.py | 41 ++++++++++++------- 2 files changed, 27 insertions(+), 16 deletions(-) diff --git a/setup.py b/setup.py index 64f50a87..8254b6d4 100644 --- a/setup.py +++ b/setup.py @@ -42,7 +42,7 @@ extra_require = { "vllm": ["vllm>=0.4.3"], "galore": ["galore-torch"], "badam": ["badam>=1.2.1"], - "gptq": ["optimum>=1.16.0", "auto-gptq>=0.5.0"], + "gptq": ["optimum>=1.17.0", "auto-gptq>=0.5.0"], "awq": ["autoawq"], "aqlm": ["aqlm[gpu]>=1.1.0"], "qwen": ["transformers_stream_generator"], diff --git a/src/llamafactory/model/model_utils/quantization.py b/src/llamafactory/model/model_utils/quantization.py index 5251f84f..fab61cb8 100644 --- a/src/llamafactory/model/model_utils/quantization.py +++ b/src/llamafactory/model/model_utils/quantization.py @@ -57,9 +57,9 @@ class QuantizationMethod(str, Enum): HQQ = "hqq" -def _get_quantization_dataset(tokenizer: "PreTrainedTokenizer", model_args: "ModelArguments") -> List[str]: +def _get_quantization_dataset(tokenizer: "PreTrainedTokenizer", model_args: "ModelArguments") -> List[Dict[str, Any]]: r""" - TODO: remove tokenizer.decode() https://github.com/huggingface/optimum/pull/1600 + Prepares the dataset to perform AutoGPTQ. """ if os.path.isfile(model_args.export_quantization_dataset): data_path = FILEEXT2TYPE.get(model_args.export_quantization_dataset.split(".")[-1], None) @@ -68,20 +68,32 @@ def _get_quantization_dataset(tokenizer: "PreTrainedTokenizer", model_args: "Mod data_path = model_args.export_quantization_dataset data_files = None - dataset = load_dataset(path=data_path, data_files=data_files, split="train", cache_dir=model_args.cache_dir) - maxlen = model_args.export_quantization_maxlen + dataset = load_dataset( + path=data_path, + data_files=data_files, + split="train", + cache_dir=model_args.cache_dir, + token=model_args.hf_hub_token, + ) samples = [] + maxlen = model_args.export_quantization_maxlen for _ in range(model_args.export_quantization_nsamples): + n_try = 0 while True: + if n_try > 100: + raise ValueError("Cannot find satisfying example, considering decrease `export_quantization_maxlen`.") + sample_idx = random.randint(0, len(dataset) - 1) - sample: Dict[str, torch.Tensor] = tokenizer(dataset[sample_idx]["text"], return_tensors="pt") - if sample["input_ids"].size(1) >= maxlen: + sample: Dict[str, "torch.Tensor"] = tokenizer(dataset[sample_idx]["text"], return_tensors="pt") + n_try += 1 + if sample["input_ids"].size(1) > maxlen: break # TODO: fix large maxlen word_idx = random.randint(0, sample["input_ids"].size(1) - maxlen - 1) input_ids = sample["input_ids"][:, word_idx : word_idx + maxlen] - samples.append(tokenizer.decode(input_ids[0].tolist(), skip_special_tokens=True)) + attention_mask = sample["attention_mask"][:, word_idx : word_idx + maxlen] + samples.append({"input_ids": input_ids, "attention_mask": attention_mask}) return samples @@ -119,21 +131,20 @@ def configure_quantization( logger.info("Loading {}-bit {}-quantized model.".format(quant_bits, quant_method.upper())) elif model_args.export_quantization_bit is not None: # auto-gptq - require_version("optimum>=1.16.0", "To fix: pip install optimum>=1.16.0") + require_version("optimum>=1.17.0", "To fix: pip install optimum>=1.17.0") require_version("auto_gptq>=0.5.0", "To fix: pip install auto_gptq>=0.5.0") from accelerate.utils import get_max_memory if getattr(config, "model_type", None) == "chatglm": - raise ValueError("ChatGLM model is not supported.") + raise ValueError("ChatGLM model is not supported yet.") init_kwargs["quantization_config"] = GPTQConfig( bits=model_args.export_quantization_bit, - tokenizer=tokenizer, dataset=_get_quantization_dataset(tokenizer, model_args), ) init_kwargs["device_map"] = "auto" init_kwargs["max_memory"] = get_max_memory() - logger.info("Quantizing model to {} bit.".format(model_args.export_quantization_bit)) + logger.info("Quantizing model to {} bit with AutoGPTQ.".format(model_args.export_quantization_bit)) elif model_args.quantization_bit is not None: # bnb if model_args.quantization_bit == 8: @@ -150,9 +161,9 @@ def configure_quantization( bnb_4bit_quant_storage=model_args.compute_dtype, # crucial for fsdp+qlora ) - # assign device map if: - # 1. not deepspeed zero3 and not fsdp - # 2. not auto quantization device map + # Do not assign device map if: + # 1. deepspeed zero3 or fsdp (train) + # 2. auto quantization device map (inference) if is_deepspeed_zero3_enabled() or is_fsdp_enabled() or model_args.quantization_device_map == "auto": if model_args.quantization_bit != 4: raise ValueError("Only 4-bit quantized model can use fsdp+qlora or auto device map.") @@ -161,4 +172,4 @@ def configure_quantization( else: init_kwargs["device_map"] = {"": get_current_device()} # change auto device map for inference - logger.info("Quantizing model to {} bit.".format(model_args.quantization_bit)) + logger.info("Quantizing model to {} bit with bitsandbytes.".format(model_args.quantization_bit)) From b46bd07119a69111047c303c03db9b8c1bb94f39 Mon Sep 17 00:00:00 2001 From: hzhaoy Date: Thu, 27 Jun 2024 00:11:04 +0800 Subject: [PATCH 127/160] add flash-attn installation flag in Dockerfile Former-commit-id: 2535044e95f6df628bd1f01e0eecb02407105d79 --- README.md | 1 + README_zh.md | 1 + docker/docker-cuda/Dockerfile | 9 ++++++--- docker/docker-cuda/docker-compose.yml | 1 + 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 4b42edd7..cdca8333 100644 --- a/README.md +++ b/README.md @@ -444,6 +444,7 @@ docker build -f ./docker/docker-cuda/Dockerfile \ --build-arg INSTALL_BNB=false \ --build-arg INSTALL_VLLM=false \ --build-arg INSTALL_DEEPSPEED=false \ + --build-arg INSTALL_FLASH_ATTN=false \ --build-arg PIP_INDEX=https://pypi.org/simple \ -t llamafactory:latest . diff --git a/README_zh.md b/README_zh.md index 3926c09d..d26c8268 100644 --- a/README_zh.md +++ b/README_zh.md @@ -444,6 +444,7 @@ docker build -f ./docker/docker-cuda/Dockerfile \ --build-arg INSTALL_BNB=false \ --build-arg INSTALL_VLLM=false \ --build-arg INSTALL_DEEPSPEED=false \ + --build-arg INSTALL_FLASH_ATTN=false \ --build-arg PIP_INDEX=https://pypi.org/simple \ -t llamafactory:latest . diff --git a/docker/docker-cuda/Dockerfile b/docker/docker-cuda/Dockerfile index 06a172f0..44aaf538 100644 --- a/docker/docker-cuda/Dockerfile +++ b/docker/docker-cuda/Dockerfile @@ -6,6 +6,7 @@ FROM nvcr.io/nvidia/pytorch:24.02-py3 ARG INSTALL_BNB=false ARG INSTALL_VLLM=false ARG INSTALL_DEEPSPEED=false +ARG INSTALL_FLASH_ATTN=false ARG PIP_INDEX=https://pypi.org/simple # Set the working directory @@ -36,9 +37,11 @@ RUN EXTRA_PACKAGES="metrics"; \ pip uninstall -y transformer-engine flash-attn # Rebuild flash-attn -RUN ninja --version || \ - (pip uninstall -y ninja && pip install ninja) && \ - MAX_JOBS=4 pip install --no-cache-dir flash-attn --no-build-isolation +RUN if [ "$INSTALL_FLASH_ATTN" = "true" ]; then \ + ninja --version || \ + (pip uninstall -y ninja && pip install ninja) && \ + MAX_JOBS=4 pip install --no-cache-dir flash-attn --no-build-isolation \ + fi; # Set up volumes VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/app/data", "/app/output" ] diff --git a/docker/docker-cuda/docker-compose.yml b/docker/docker-cuda/docker-compose.yml index 1c0a3c75..ad269cb0 100644 --- a/docker/docker-cuda/docker-compose.yml +++ b/docker/docker-cuda/docker-compose.yml @@ -7,6 +7,7 @@ services: INSTALL_BNB: false INSTALL_VLLM: false INSTALL_DEEPSPEED: false + INSTALL_FLASH_ATTN: false PIP_INDEX: https://pypi.org/simple container_name: llamafactory volumes: From 8aaf1185a53841110e3ff857a0d6cf5161ee72a6 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Thu, 27 Jun 2024 00:29:42 +0800 Subject: [PATCH 128/160] support HQQ/EETQ #4113 Former-commit-id: b7cb51ddb394f04fe4646b2c297fc8d918c9979e --- README.md | 4 +- README_zh.md | 4 +- setup.py | 8 +- src/llamafactory/extras/env.py | 5 +- src/llamafactory/hparams/model_args.py | 7 +- src/llamafactory/model/__init__.py | 2 + src/llamafactory/model/loader.py | 4 +- .../model/model_utils/quantization.py | 75 ++++++++++++------- src/llamafactory/webui/chatter.py | 10 ++- src/llamafactory/webui/common.py | 2 + src/llamafactory/webui/components/export.py | 5 +- src/llamafactory/webui/components/top.py | 13 ++-- src/llamafactory/webui/locales.py | 20 ++++- src/llamafactory/webui/manager.py | 1 + src/llamafactory/webui/runner.py | 18 ++++- src/llamafactory/webui/utils.py | 13 ++++ 16 files changed, 134 insertions(+), 57 deletions(-) diff --git a/README.md b/README.md index 4b42edd7..443c8cf7 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ Choose your path: - **Various models**: LLaMA, LLaVA, Mistral, Mixtral-MoE, Qwen, Yi, Gemma, Baichuan, ChatGLM, Phi, etc. - **Integrated methods**: (Continuous) pre-training, (multimodal) supervised fine-tuning, reward modeling, PPO, DPO, KTO, ORPO, etc. -- **Scalable resources**: 32-bit full-tuning, 16-bit freeze-tuning, 16-bit LoRA and 2/4/8-bit QLoRA via AQLM/AWQ/GPTQ/LLM.int8. +- **Scalable resources**: 16-bit full-tuning, freeze-tuning, LoRA and 2/3/4/5/6/8-bit QLoRA via AQLM/AWQ/GPTQ/LLM.int8/HQQ/EETQ. - **Advanced algorithms**: GaLore, BAdam, DoRA, LongLoRA, LLaMA Pro, Mixture-of-Depths, LoRA+, LoftQ, PiSSA and Agent tuning. - **Practical tricks**: FlashAttention-2, Unsloth, RoPE scaling, NEFTune and rsLoRA. - **Experiment monitors**: LlamaBoard, TensorBoard, Wandb, MLflow, etc. @@ -341,7 +341,7 @@ cd LLaMA-Factory pip install -e ".[torch,metrics]" ``` -Extra dependencies available: torch, torch_npu, metrics, deepspeed, bitsandbytes, vllm, galore, badam, gptq, awq, aqlm, qwen, modelscope, quality +Extra dependencies available: torch, torch-npu, metrics, deepspeed, bitsandbytes, hqq, eetq, gptq, awq, aqlm, vllm, galore, badam, qwen, modelscope, quality > [!TIP] > Use `pip install --no-deps -e .` to resolve package conflicts. diff --git a/README_zh.md b/README_zh.md index 3926c09d..d5172a7d 100644 --- a/README_zh.md +++ b/README_zh.md @@ -48,7 +48,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd - **多种模型**:LLaMA、LLaVA、Mistral、Mixtral-MoE、Qwen、Yi、Gemma、Baichuan、ChatGLM、Phi 等等。 - **集成方法**:(增量)预训练、(多模态)指令监督微调、奖励模型训练、PPO 训练、DPO 训练、KTO 训练、ORPO 训练等等。 -- **多种精度**:32 比特全参数微调、16 比特冻结微调、16 比特 LoRA 微调和基于 AQLM/AWQ/GPTQ/LLM.int8 的 2/4/8 比特 QLoRA 微调。 +- **多种精度**:16 比特全参数微调、冻结微调、LoRA 微调和基于 AQLM/AWQ/GPTQ/LLM.int8/HQQ/EETQ 的 2/3/4/5/6/8 比特 QLoRA 微调。 - **先进算法**:GaLore、BAdam、DoRA、LongLoRA、LLaMA Pro、Mixture-of-Depths、LoRA+、LoftQ、PiSSA 和 Agent 微调。 - **实用技巧**:FlashAttention-2、Unsloth、RoPE scaling、NEFTune 和 rsLoRA。 - **实验监控**:LlamaBoard、TensorBoard、Wandb、MLflow 等等。 @@ -341,7 +341,7 @@ cd LLaMA-Factory pip install -e ".[torch,metrics]" ``` -可选的额外依赖项:torch、torch_npu、metrics、deepspeed、bitsandbytes、vllm、galore、badam、gptq、awq、aqlm、qwen、modelscope、quality +可选的额外依赖项:torch、torch-npu、metrics、deepspeed、bitsandbytes、hqq、eetq、gptq、awq、aqlm、vllm、galore、badam、qwen、modelscope、quality > [!TIP] > 遇到包冲突时,可使用 `pip install --no-deps -e .` 解决。 diff --git a/setup.py b/setup.py index 8254b6d4..d43c311c 100644 --- a/setup.py +++ b/setup.py @@ -39,12 +39,14 @@ extra_require = { "metrics": ["nltk", "jieba", "rouge-chinese"], "deepspeed": ["deepspeed>=0.10.0"], "bitsandbytes": ["bitsandbytes>=0.39.0"], - "vllm": ["vllm>=0.4.3"], - "galore": ["galore-torch"], - "badam": ["badam>=1.2.1"], + "hqq": ["hqq"], + "eetq": ["eetq"], "gptq": ["optimum>=1.17.0", "auto-gptq>=0.5.0"], "awq": ["autoawq"], "aqlm": ["aqlm[gpu]>=1.1.0"], + "vllm": ["vllm>=0.4.3"], + "galore": ["galore-torch"], + "badam": ["badam>=1.2.1"], "qwen": ["transformers_stream_generator"], "modelscope": ["modelscope"], "dev": ["ruff", "pytest"], diff --git a/src/llamafactory/extras/env.py b/src/llamafactory/extras/env.py index ab387231..14876048 100644 --- a/src/llamafactory/extras/env.py +++ b/src/llamafactory/extras/env.py @@ -1,4 +1,7 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2024 HuggingFace Inc. and the LlamaFactory team. +# +# This code is inspired by the HuggingFace's transformers library. +# https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/commands/env.py # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/hparams/model_args.py b/src/llamafactory/hparams/model_args.py index 3f21145d..087c8c38 100644 --- a/src/llamafactory/hparams/model_args.py +++ b/src/llamafactory/hparams/model_args.py @@ -77,6 +77,10 @@ class ModelArguments: default=True, metadata={"help": "Whether or not to use memory-efficient model loading."}, ) + quantization_method: Literal["bitsandbytes", "hqq", "eetq"] = field( + default="bitsandbytes", + metadata={"help": "Quantization method to use for on-the-fly quantization."}, + ) quantization_bit: Optional[int] = field( default=None, metadata={"help": "The number of bits to quantize the model using bitsandbytes."}, @@ -235,9 +239,6 @@ class ModelArguments: if self.new_special_tokens is not None: # support multiple special tokens self.new_special_tokens = [token.strip() for token in self.new_special_tokens.split(",")] - assert self.quantization_bit in [None, 8, 4], "We only accept 4-bit or 8-bit quantization." - assert self.export_quantization_bit in [None, 8, 4, 3, 2], "We only accept 2/3/4/8-bit quantization." - if self.export_quantization_bit is not None and self.export_quantization_dataset is None: raise ValueError("Quantization dataset is necessary for exporting.") diff --git a/src/llamafactory/model/__init__.py b/src/llamafactory/model/__init__.py index 4abbaa1b..48cfe76c 100644 --- a/src/llamafactory/model/__init__.py +++ b/src/llamafactory/model/__init__.py @@ -14,10 +14,12 @@ from .loader import load_config, load_model, load_tokenizer from .model_utils.misc import find_all_linear_modules +from .model_utils.quantization import QuantizationMethod from .model_utils.valuehead import load_valuehead_params __all__ = [ + "QuantizationMethod", "load_config", "load_model", "load_tokenizer", diff --git a/src/llamafactory/model/loader.py b/src/llamafactory/model/loader.py index e1015821..1261d17a 100644 --- a/src/llamafactory/model/loader.py +++ b/src/llamafactory/model/loader.py @@ -186,11 +186,11 @@ def load_model( trainable_params, all_param = count_parameters(model) if is_trainable: - param_stats = "trainable params: {:d} || all params: {:d} || trainable%: {:.4f}".format( + param_stats = "trainable params: {:,} || all params: {:,} || trainable%: {:.4f}".format( trainable_params, all_param, 100 * trainable_params / all_param ) else: - param_stats = "all params: {:d}".format(all_param) + param_stats = "all params: {:,}".format(all_param) logger.info(param_stats) diff --git a/src/llamafactory/model/model_utils/quantization.py b/src/llamafactory/model/model_utils/quantization.py index fab61cb8..3203b4aa 100644 --- a/src/llamafactory/model/model_utils/quantization.py +++ b/src/llamafactory/model/model_utils/quantization.py @@ -23,7 +23,7 @@ from typing import TYPE_CHECKING, Any, Dict, List import torch from datasets import load_dataset -from transformers import BitsAndBytesConfig, GPTQConfig +from transformers import BitsAndBytesConfig, EetqConfig, GPTQConfig, HqqConfig from transformers.integrations import is_deepspeed_zero3_enabled from transformers.modeling_utils import is_fsdp_enabled from transformers.utils.versions import require_version @@ -59,7 +59,7 @@ class QuantizationMethod(str, Enum): def _get_quantization_dataset(tokenizer: "PreTrainedTokenizer", model_args: "ModelArguments") -> List[Dict[str, Any]]: r""" - Prepares the dataset to perform AutoGPTQ. + Prepares the tokenized dataset to perform AutoGPTQ. Do not use tensor output for JSON serialization. """ if os.path.isfile(model_args.export_quantization_dataset): data_path = FILEEXT2TYPE.get(model_args.export_quantization_dataset.split(".")[-1], None) @@ -93,7 +93,7 @@ def _get_quantization_dataset(tokenizer: "PreTrainedTokenizer", model_args: "Mod word_idx = random.randint(0, sample["input_ids"].size(1) - maxlen - 1) input_ids = sample["input_ids"][:, word_idx : word_idx + maxlen] attention_mask = sample["attention_mask"][:, word_idx : word_idx + maxlen] - samples.append({"input_ids": input_ids, "attention_mask": attention_mask}) + samples.append({"input_ids": input_ids.tolist(), "attention_mask": attention_mask.tolist()}) return samples @@ -105,7 +105,7 @@ def configure_quantization( init_kwargs: Dict[str, Any], ) -> None: r""" - Priority: PTQ-quantized (training) > AutoGPTQ (export) > Bitsandbytes (training) + Priority: PTQ-quantized (train/infer) > AutoGPTQ (export) > On-the-fly quantization (train/infer) """ if getattr(config, "quantization_config", None): # ptq if is_deepspeed_zero3_enabled(): @@ -131,6 +131,9 @@ def configure_quantization( logger.info("Loading {}-bit {}-quantized model.".format(quant_bits, quant_method.upper())) elif model_args.export_quantization_bit is not None: # auto-gptq + if model_args.export_quantization_bit not in [8, 4, 3, 2]: + raise ValueError("AutoGPTQ only accepts 2/3/4/8-bit quantization.") + require_version("optimum>=1.17.0", "To fix: pip install optimum>=1.17.0") require_version("auto_gptq>=0.5.0", "To fix: pip install auto_gptq>=0.5.0") from accelerate.utils import get_max_memory @@ -146,30 +149,48 @@ def configure_quantization( init_kwargs["max_memory"] = get_max_memory() logger.info("Quantizing model to {} bit with AutoGPTQ.".format(model_args.export_quantization_bit)) - elif model_args.quantization_bit is not None: # bnb - if model_args.quantization_bit == 8: - require_version("bitsandbytes>=0.37.0", "To fix: pip install bitsandbytes>=0.37.0") - init_kwargs["quantization_config"] = BitsAndBytesConfig(load_in_8bit=True) + elif model_args.quantization_bit is not None: # on-the-fly + if model_args.quantization_method == QuantizationMethod.BITS_AND_BYTES.value: + if model_args.quantization_bit == 8: + require_version("bitsandbytes>=0.37.0", "To fix: pip install bitsandbytes>=0.37.0") + init_kwargs["quantization_config"] = BitsAndBytesConfig(load_in_8bit=True) + elif model_args.quantization_bit == 4: + require_version("bitsandbytes>=0.39.0", "To fix: pip install bitsandbytes>=0.39.0") + init_kwargs["quantization_config"] = BitsAndBytesConfig( + load_in_4bit=True, + bnb_4bit_compute_dtype=model_args.compute_dtype, + bnb_4bit_use_double_quant=model_args.double_quantization, + bnb_4bit_quant_type=model_args.quantization_type, + bnb_4bit_quant_storage=model_args.compute_dtype, # crucial for fsdp+qlora + ) + else: + raise ValueError("Bitsandbytes only accepts 4-bit or 8-bit quantization.") - elif model_args.quantization_bit == 4: - require_version("bitsandbytes>=0.39.0", "To fix: pip install bitsandbytes>=0.39.0") - init_kwargs["quantization_config"] = BitsAndBytesConfig( - load_in_4bit=True, - bnb_4bit_compute_dtype=model_args.compute_dtype, - bnb_4bit_use_double_quant=model_args.double_quantization, - bnb_4bit_quant_type=model_args.quantization_type, - bnb_4bit_quant_storage=model_args.compute_dtype, # crucial for fsdp+qlora - ) + # Do not assign device map if: + # 1. deepspeed zero3 or fsdp (train) + # 2. auto quantization device map (inference) + if is_deepspeed_zero3_enabled() or is_fsdp_enabled() or model_args.quantization_device_map == "auto": + if model_args.quantization_bit != 4: + raise ValueError("Only 4-bit quantized model can use fsdp+qlora or auto device map.") - # Do not assign device map if: - # 1. deepspeed zero3 or fsdp (train) - # 2. auto quantization device map (inference) - if is_deepspeed_zero3_enabled() or is_fsdp_enabled() or model_args.quantization_device_map == "auto": - if model_args.quantization_bit != 4: - raise ValueError("Only 4-bit quantized model can use fsdp+qlora or auto device map.") + require_version("bitsandbytes>=0.43.0", "To fix: pip install bitsandbytes>=0.43.0") + else: + init_kwargs["device_map"] = {"": get_current_device()} # change auto device map for inference - require_version("bitsandbytes>=0.43.0", "To fix: pip install bitsandbytes>=0.43.0") - else: - init_kwargs["device_map"] = {"": get_current_device()} # change auto device map for inference + logger.info("Quantizing model to {} bit with bitsandbytes.".format(model_args.quantization_bit)) + elif model_args.quantization_method == QuantizationMethod.HQQ.value: + if model_args.quantization_bit not in [8, 6, 5, 4, 3, 2, 1]: + raise ValueError("HQQ only accepts 1/2/3/4/5/6/8-bit quantization.") - logger.info("Quantizing model to {} bit with bitsandbytes.".format(model_args.quantization_bit)) + require_version("hqq", "To fix: pip install hqq") + init_kwargs["quantization_config"] = HqqConfig( + nbits=model_args.quantization_bit, quant_zero=False, quant_scale=False, axis=0 + ) # use ATEN kernel (axis=0) for performance + logger.info("Quantizing model to {} bit with HQQ.".format(model_args.quantization_bit)) + elif model_args.quantization_method == QuantizationMethod.EETQ.value: + if model_args.quantization_bit != 8: + raise ValueError("EETQ only accepts 8-bit quantization.") + + require_version("eetq", "To fix: pip install eetq") + init_kwargs["quantization_config"] = EetqConfig() + logger.info("Quantizing model to {} bit with EETQ.".format(model_args.quantization_bit)) diff --git a/src/llamafactory/webui/chatter.py b/src/llamafactory/webui/chatter.py index 652c341c..8abef920 100644 --- a/src/llamafactory/webui/chatter.py +++ b/src/llamafactory/webui/chatter.py @@ -23,7 +23,7 @@ from ..data import Role from ..extras.constants import PEFT_METHODS from ..extras.misc import torch_gc from ..extras.packages import is_gradio_available -from .common import get_save_dir +from .common import QUANTIZATION_BITS, get_save_dir from .locales import ALERTS @@ -76,11 +76,17 @@ class WebChatModel(ChatModel): yield error return + if get("top.quantization_bit") in QUANTIZATION_BITS: + quantization_bit = int(get("top.quantization_bit")) + else: + quantization_bit = None + yield ALERTS["info_loading"][lang] args = dict( model_name_or_path=model_path, finetuning_type=finetuning_type, - quantization_bit=int(get("top.quantization_bit")) if get("top.quantization_bit") in ["8", "4"] else None, + quantization_bit=quantization_bit, + quantization_method=get("top.quantization_method"), template=get("top.template"), flash_attn="fa2" if get("top.booster") == "flashattn2" else "auto", use_unsloth=(get("top.booster") == "unsloth"), diff --git a/src/llamafactory/webui/common.py b/src/llamafactory/webui/common.py index 980428a4..bced18f0 100644 --- a/src/llamafactory/webui/common.py +++ b/src/llamafactory/webui/common.py @@ -47,6 +47,8 @@ DEFAULT_CONFIG_DIR = "config" DEFAULT_DATA_DIR = "data" DEFAULT_SAVE_DIR = "saves" USER_CONFIG = "user_config.yaml" +QUANTIZATION_BITS = ["8", "6", "5", "4", "3", "2", "1"] +GPTQ_BITS = ["8", "4", "3", "2"] def get_save_dir(*paths: str) -> os.PathLike: diff --git a/src/llamafactory/webui/components/export.py b/src/llamafactory/webui/components/export.py index 14257949..0a938f02 100644 --- a/src/llamafactory/webui/components/export.py +++ b/src/llamafactory/webui/components/export.py @@ -18,7 +18,7 @@ from ...extras.constants import PEFT_METHODS from ...extras.misc import torch_gc from ...extras.packages import is_gradio_available from ...train.tuner import export_model -from ..common import get_save_dir +from ..common import GPTQ_BITS, get_save_dir from ..locales import ALERTS @@ -32,9 +32,6 @@ if TYPE_CHECKING: from ..engine import Engine -GPTQ_BITS = ["8", "4", "3", "2"] - - def can_quantize(checkpoint_path: Union[str, List[str]]) -> "gr.Dropdown": if isinstance(checkpoint_path, list) and len(checkpoint_path) != 0: return gr.Dropdown(value="none", interactive=False) diff --git a/src/llamafactory/webui/components/top.py b/src/llamafactory/webui/components/top.py index 18b9a7d2..e331d5e4 100644 --- a/src/llamafactory/webui/components/top.py +++ b/src/llamafactory/webui/components/top.py @@ -18,7 +18,7 @@ from ...data import TEMPLATES from ...extras.constants import METHODS, SUPPORTED_MODELS from ...extras.packages import is_gradio_available from ..common import get_model_info, list_checkpoints, save_config -from ..utils import can_quantize +from ..utils import can_quantize, can_quantize_to if is_gradio_available(): @@ -43,10 +43,11 @@ def create_top() -> Dict[str, "Component"]: with gr.Accordion(open=False) as advanced_tab: with gr.Row(): - quantization_bit = gr.Dropdown(choices=["none", "8", "4"], value="none", scale=2) - template = gr.Dropdown(choices=list(TEMPLATES.keys()), value="default", scale=2) - rope_scaling = gr.Radio(choices=["none", "linear", "dynamic"], value="none", scale=3) - booster = gr.Radio(choices=["none", "flashattn2", "unsloth"], value="none", scale=3) + quantization_bit = gr.Dropdown(choices=["none", "8", "4"], value="none", scale=1) + quantization_method = gr.Dropdown(choices=["bitsandbytes", "hqq", "eetq"], value="bitsandbytes", scale=1) + template = gr.Dropdown(choices=list(TEMPLATES.keys()), value="default", scale=1) + rope_scaling = gr.Radio(choices=["none", "linear", "dynamic"], value="none", scale=2) + booster = gr.Radio(choices=["auto", "flashattn2", "unsloth"], value="auto", scale=2) visual_inputs = gr.Checkbox(scale=1) model_name.change(get_model_info, [model_name], [model_path, template, visual_inputs], queue=False).then( @@ -58,6 +59,7 @@ def create_top() -> Dict[str, "Component"]: list_checkpoints, [model_name, finetuning_type], [checkpoint_path], queue=False ) checkpoint_path.focus(list_checkpoints, [model_name, finetuning_type], [checkpoint_path], queue=False) + quantization_method.change(can_quantize_to, [quantization_method], [quantization_bit], queue=False) return dict( lang=lang, @@ -67,6 +69,7 @@ def create_top() -> Dict[str, "Component"]: checkpoint_path=checkpoint_path, advanced_tab=advanced_tab, quantization_bit=quantization_bit, + quantization_method=quantization_method, template=template, rope_scaling=rope_scaling, booster=booster, diff --git a/src/llamafactory/webui/locales.py b/src/llamafactory/webui/locales.py index cd166584..435876e7 100644 --- a/src/llamafactory/webui/locales.py +++ b/src/llamafactory/webui/locales.py @@ -85,15 +85,29 @@ LOCALES = { "quantization_bit": { "en": { "label": "Quantization bit", - "info": "Enable 4/8-bit model quantization (QLoRA).", + "info": "Enable quantization (QLoRA).", }, "ru": { "label": "Уровень квантования", - "info": "Включить 4/8-битное квантование модели (QLoRA).", + "info": "Включить квантование (QLoRA).", }, "zh": { "label": "量化等级", - "info": "启用 4/8 比特模型量化(QLoRA)。", + "info": "启用量化(QLoRA)。", + }, + }, + "quantization_method": { + "en": { + "label": "Quantization method", + "info": "Quantization algorithm to use.", + }, + "ru": { + "label": "Метод квантования", + "info": "Алгоритм квантования, который следует использовать.", + }, + "zh": { + "label": "量化方法", + "info": "使用的量化算法。", }, }, "template": { diff --git a/src/llamafactory/webui/manager.py b/src/llamafactory/webui/manager.py index 7e9b801a..ebe9f1b9 100644 --- a/src/llamafactory/webui/manager.py +++ b/src/llamafactory/webui/manager.py @@ -71,6 +71,7 @@ class Manager: self._id_to_elem["top.finetuning_type"], self._id_to_elem["top.checkpoint_path"], self._id_to_elem["top.quantization_bit"], + self._id_to_elem["top.quantization_method"], self._id_to_elem["top.template"], self._id_to_elem["top.rope_scaling"], self._id_to_elem["top.booster"], diff --git a/src/llamafactory/webui/runner.py b/src/llamafactory/webui/runner.py index 549ec765..f7fbac30 100644 --- a/src/llamafactory/webui/runner.py +++ b/src/llamafactory/webui/runner.py @@ -22,7 +22,7 @@ from transformers.trainer import TRAINING_ARGS_NAME from ..extras.constants import LLAMABOARD_CONFIG, PEFT_METHODS, TRAINING_STAGES from ..extras.misc import is_gpu_or_npu_available, torch_gc from ..extras.packages import is_gradio_available -from .common import DEFAULT_CACHE_DIR, DEFAULT_CONFIG_DIR, get_save_dir, load_config +from .common import DEFAULT_CACHE_DIR, DEFAULT_CONFIG_DIR, QUANTIZATION_BITS, get_save_dir, load_config from .locales import ALERTS, LOCALES from .utils import abort_process, gen_cmd, get_eval_results, get_trainer_info, load_args, save_args, save_cmd @@ -104,6 +104,11 @@ class Runner: model_name, finetuning_type = get("top.model_name"), get("top.finetuning_type") user_config = load_config() + if get("top.quantization_bit") in QUANTIZATION_BITS: + quantization_bit = int(get("top.quantization_bit")) + else: + quantization_bit = None + args = dict( stage=TRAINING_STAGES[get("train.training_stage")], do_train=True, @@ -111,7 +116,8 @@ class Runner: cache_dir=user_config.get("cache_dir", None), preprocessing_num_workers=16, finetuning_type=finetuning_type, - quantization_bit=int(get("top.quantization_bit")) if get("top.quantization_bit") in ["8", "4"] else None, + quantization_bit=quantization_bit, + quantization_method=get("top.quantization_method"), template=get("top.template"), rope_scaling=get("top.rope_scaling") if get("top.rope_scaling") in ["linear", "dynamic"] else None, flash_attn="fa2" if get("top.booster") == "flashattn2" else "auto", @@ -234,13 +240,19 @@ class Runner: model_name, finetuning_type = get("top.model_name"), get("top.finetuning_type") user_config = load_config() + if get("top.quantization_bit") in QUANTIZATION_BITS: + quantization_bit = int(get("top.quantization_bit")) + else: + quantization_bit = None + args = dict( stage="sft", model_name_or_path=get("top.model_path"), cache_dir=user_config.get("cache_dir", None), preprocessing_num_workers=16, finetuning_type=finetuning_type, - quantization_bit=int(get("top.quantization_bit")) if get("top.quantization_bit") in ["8", "4"] else None, + quantization_bit=quantization_bit, + quantization_method=get("top.quantization_method"), template=get("top.template"), rope_scaling=get("top.rope_scaling") if get("top.rope_scaling") in ["linear", "dynamic"] else None, flash_attn="fa2" if get("top.booster") == "flashattn2" else "auto", diff --git a/src/llamafactory/webui/utils.py b/src/llamafactory/webui/utils.py index a616bcba..4f313e4e 100644 --- a/src/llamafactory/webui/utils.py +++ b/src/llamafactory/webui/utils.py @@ -25,6 +25,7 @@ from yaml import safe_dump, safe_load from ..extras.constants import PEFT_METHODS, RUNNING_LOG, TRAINER_LOG, TRAINING_ARGS, TRAINING_STAGES from ..extras.packages import is_gradio_available, is_matplotlib_available from ..extras.ploting import gen_loss_plot +from ..model import QuantizationMethod from .common import DEFAULT_CACHE_DIR, DEFAULT_CONFIG_DIR, get_save_dir from .locales import ALERTS @@ -55,6 +56,18 @@ def can_quantize(finetuning_type: str) -> "gr.Dropdown": return gr.Dropdown(interactive=True) +def can_quantize_to(quantization_method: str) -> "gr.Dropdown": + r""" + Returns the available quantization bits. + """ + if quantization_method == QuantizationMethod.BITS_AND_BYTES.value: + return gr.Dropdown(choices=["none", "8", "4"]) + elif quantization_method == QuantizationMethod.HQQ.value: + return gr.Dropdown(choices=["none", "8", "6", "5", "4", "3", "2", "1"]) + elif quantization_method == QuantizationMethod.EETQ.value: + return gr.Dropdown(choices=["none", "8"]) + + def change_stage(training_stage: str = list(TRAINING_STAGES.keys())[0]) -> Tuple[List[str], bool]: r""" Modifys states after changing the training stage. From d1cda4ec68026ab91d4be5b5b77f46ad5669bec7 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Thu, 27 Jun 2024 00:36:04 +0800 Subject: [PATCH 129/160] tiny fix Former-commit-id: 69dac21ed9f07977b4540eb838a0ef93f3d3abc4 --- src/llamafactory/webui/utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/llamafactory/webui/utils.py b/src/llamafactory/webui/utils.py index 4f313e4e..069a2a2a 100644 --- a/src/llamafactory/webui/utils.py +++ b/src/llamafactory/webui/utils.py @@ -61,11 +61,11 @@ def can_quantize_to(quantization_method: str) -> "gr.Dropdown": Returns the available quantization bits. """ if quantization_method == QuantizationMethod.BITS_AND_BYTES.value: - return gr.Dropdown(choices=["none", "8", "4"]) + return gr.Dropdown(choices=["none", "8", "4"], value="none") elif quantization_method == QuantizationMethod.HQQ.value: - return gr.Dropdown(choices=["none", "8", "6", "5", "4", "3", "2", "1"]) + return gr.Dropdown(choices=["none", "8", "6", "5", "4", "3", "2", "1"], value="none") elif quantization_method == QuantizationMethod.EETQ.value: - return gr.Dropdown(choices=["none", "8"]) + return gr.Dropdown(choices=["none", "8"], value="none") def change_stage(training_stage: str = list(TRAINING_STAGES.keys())[0]) -> Tuple[List[str], bool]: From 5a7cb9af4ec2540adf233d6543a51fbd54d9d6b4 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Thu, 27 Jun 2024 00:46:41 +0800 Subject: [PATCH 130/160] tiny fix Former-commit-id: c6747a39dbbdda8decaa104499918bc7ac5f02e4 --- src/llamafactory/webui/components/top.py | 2 +- src/llamafactory/webui/utils.py | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/llamafactory/webui/components/top.py b/src/llamafactory/webui/components/top.py index e331d5e4..9df3f062 100644 --- a/src/llamafactory/webui/components/top.py +++ b/src/llamafactory/webui/components/top.py @@ -43,7 +43,7 @@ def create_top() -> Dict[str, "Component"]: with gr.Accordion(open=False) as advanced_tab: with gr.Row(): - quantization_bit = gr.Dropdown(choices=["none", "8", "4"], value="none", scale=1) + quantization_bit = gr.Dropdown(choices=["none", "8", "4"], value="none", allow_custom_value=True, scale=1) quantization_method = gr.Dropdown(choices=["bitsandbytes", "hqq", "eetq"], value="bitsandbytes", scale=1) template = gr.Dropdown(choices=list(TEMPLATES.keys()), value="default", scale=1) rope_scaling = gr.Radio(choices=["none", "linear", "dynamic"], value="none", scale=2) diff --git a/src/llamafactory/webui/utils.py b/src/llamafactory/webui/utils.py index 069a2a2a..14616ac4 100644 --- a/src/llamafactory/webui/utils.py +++ b/src/llamafactory/webui/utils.py @@ -61,11 +61,13 @@ def can_quantize_to(quantization_method: str) -> "gr.Dropdown": Returns the available quantization bits. """ if quantization_method == QuantizationMethod.BITS_AND_BYTES.value: - return gr.Dropdown(choices=["none", "8", "4"], value="none") + available_bits = ["none", "8", "4"] elif quantization_method == QuantizationMethod.HQQ.value: - return gr.Dropdown(choices=["none", "8", "6", "5", "4", "3", "2", "1"], value="none") + available_bits = ["none", "8", "6", "5", "4", "3", "2", "1"] elif quantization_method == QuantizationMethod.EETQ.value: - return gr.Dropdown(choices=["none", "8"], value="none") + available_bits = ["none", "8"] + + return gr.Dropdown(choices=available_bits) def change_stage(training_stage: str = list(TRAINING_STAGES.keys())[0]) -> Tuple[List[str], bool]: From 8e5b4bddf4528e0a795de3663eb4d9696a81b551 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Thu, 27 Jun 2024 00:53:33 +0800 Subject: [PATCH 131/160] update examples Former-commit-id: cce238f7d07919b79237bc9ab39265766c20f020 --- examples/README.md | 4 ++-- examples/README_zh.md | 4 ++-- examples/train_lora/llama3_lora_dpo.yaml | 2 +- ...3_lora_sft_bitsandbytes.yaml => llama3_lora_sft_otfq.yaml} | 1 + 4 files changed, 6 insertions(+), 5 deletions(-) rename examples/train_qlora/{llama3_lora_sft_bitsandbytes.yaml => llama3_lora_sft_otfq.yaml} (88%) diff --git a/examples/README.md b/examples/README.md index 007a81ab..d5aca5ad 100644 --- a/examples/README.md +++ b/examples/README.md @@ -94,10 +94,10 @@ FORCE_TORCHRUN=1 llamafactory-cli train examples/train_lora/llama3_lora_sft_ds3. ### QLoRA Fine-Tuning -#### Supervised Fine-Tuning with 4/8-bit Bitsandbytes Quantization (Recommended) +#### Supervised Fine-Tuning with 4/8-bit Bitsandbytes/HQQ/EETQ Quantization (Recommended) ```bash -llamafactory-cli train examples/train_qlora/llama3_lora_sft_bitsandbytes.yaml +llamafactory-cli train examples/train_qlora/llama3_lora_sft_otfq.yaml ``` #### Supervised Fine-Tuning with 4/8-bit GPTQ Quantization diff --git a/examples/README_zh.md b/examples/README_zh.md index b9d90f25..d96bf882 100644 --- a/examples/README_zh.md +++ b/examples/README_zh.md @@ -94,10 +94,10 @@ FORCE_TORCHRUN=1 llamafactory-cli train examples/train_lora/llama3_lora_sft_ds3. ### QLoRA 微调 -#### 基于 4/8 比特 Bitsandbytes 量化进行指令监督微调(推荐) +#### 基于 4/8 比特 Bitsandbytes/HQQ/EETQ 量化进行指令监督微调(推荐) ```bash -llamafactory-cli train examples/train_qlora/llama3_lora_sft_bitsandbytes.yaml +llamafactory-cli train examples/train_qlora/llama3_lora_sft_otfq.yaml ``` #### 基于 4/8 比特 GPTQ 量化进行指令监督微调 diff --git a/examples/train_lora/llama3_lora_dpo.yaml b/examples/train_lora/llama3_lora_dpo.yaml index db25fb51..188e5078 100644 --- a/examples/train_lora/llama3_lora_dpo.yaml +++ b/examples/train_lora/llama3_lora_dpo.yaml @@ -7,7 +7,7 @@ do_train: true finetuning_type: lora lora_target: all pref_beta: 0.1 -pref_loss: sigmoid # [sigmoid (dpo), orpo, simpo] +pref_loss: sigmoid # choices: [sigmoid (dpo), orpo, simpo] ### dataset dataset: dpo_en_demo diff --git a/examples/train_qlora/llama3_lora_sft_bitsandbytes.yaml b/examples/train_qlora/llama3_lora_sft_otfq.yaml similarity index 88% rename from examples/train_qlora/llama3_lora_sft_bitsandbytes.yaml rename to examples/train_qlora/llama3_lora_sft_otfq.yaml index cc773991..9c73b439 100644 --- a/examples/train_qlora/llama3_lora_sft_bitsandbytes.yaml +++ b/examples/train_qlora/llama3_lora_sft_otfq.yaml @@ -1,6 +1,7 @@ ### model model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct quantization_bit: 4 +quantization_method: bitsandbytes # choices: [bitsandbytes (4/8), hqq (2/3/4/5/6/8), eetq (8)] ### method stage: sft From d1aad72826f33333b768e8184a20f38405fb78a6 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Thu, 27 Jun 2024 01:12:25 +0800 Subject: [PATCH 132/160] add quant checks Former-commit-id: 15bb053e3549739b1a2134640a659b0f35df7de7 --- src/llamafactory/model/model_utils/quantization.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/llamafactory/model/model_utils/quantization.py b/src/llamafactory/model/model_utils/quantization.py index 3203b4aa..317646e0 100644 --- a/src/llamafactory/model/model_utils/quantization.py +++ b/src/llamafactory/model/model_utils/quantization.py @@ -108,8 +108,11 @@ def configure_quantization( Priority: PTQ-quantized (train/infer) > AutoGPTQ (export) > On-the-fly quantization (train/infer) """ if getattr(config, "quantization_config", None): # ptq - if is_deepspeed_zero3_enabled(): - raise ValueError("DeepSpeed ZeRO-3 is incompatible with PTQ-quantized models.") + if model_args.quantization_bit is not None: + logger.warning("`quantization_bit` will not affect on the PTQ-quantized models.") + + if is_deepspeed_zero3_enabled() or is_fsdp_enabled(): + raise ValueError("DeepSpeed ZeRO-3 or FSDP is incompatible with PTQ-quantized models.") quantization_config: Dict[str, Any] = getattr(config, "quantization_config", None) quant_method = quantization_config.get("quant_method", "") @@ -182,6 +185,9 @@ def configure_quantization( if model_args.quantization_bit not in [8, 6, 5, 4, 3, 2, 1]: raise ValueError("HQQ only accepts 1/2/3/4/5/6/8-bit quantization.") + if is_deepspeed_zero3_enabled() or is_fsdp_enabled(): + raise ValueError("HQQ quantization is incompatible with DeepSpeed ZeRO-3 or FSDP.") + require_version("hqq", "To fix: pip install hqq") init_kwargs["quantization_config"] = HqqConfig( nbits=model_args.quantization_bit, quant_zero=False, quant_scale=False, axis=0 @@ -191,6 +197,9 @@ def configure_quantization( if model_args.quantization_bit != 8: raise ValueError("EETQ only accepts 8-bit quantization.") + if is_deepspeed_zero3_enabled() or is_fsdp_enabled(): + raise ValueError("EETQ quantization is incompatible with DeepSpeed ZeRO-3 or FSDP.") + require_version("eetq", "To fix: pip install eetq") init_kwargs["quantization_config"] = EetqConfig() logger.info("Quantizing model to {} bit with EETQ.".format(model_args.quantization_bit)) From 89d9dd5aa558a78d189462dc65549e6cddb16143 Mon Sep 17 00:00:00 2001 From: hzhaoy Date: Thu, 27 Jun 2024 13:49:57 +0800 Subject: [PATCH 133/160] fix #4579 Former-commit-id: 0fa298ff6a4febea36ea9f11c7594277a77e6e9b --- src/llamafactory/train/sft/trainer.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/llamafactory/train/sft/trainer.py b/src/llamafactory/train/sft/trainer.py index 8f18317f..f0a86dff 100644 --- a/src/llamafactory/train/sft/trainer.py +++ b/src/llamafactory/train/sft/trainer.py @@ -53,6 +53,9 @@ class CustomSeq2SeqTrainer(Seq2SeqTrainer): self.processor = processor if finetuning_args.pissa_convert: + if self.is_deepspeed_enabled: + self.accelerator.deepspeed_config = self.accelerator.state.deepspeed_plugin.deepspeed_config + self.deepspeed = self._wrap_model(self.model_wrapped) self.save_model(os.path.join(self.args.output_dir, "pissa_init")) if finetuning_args.use_badam: From 31fcd03f3ca5186b9ab120d2d692a04de1785640 Mon Sep 17 00:00:00 2001 From: fanjunliang Date: Thu, 27 Jun 2024 15:21:55 +0800 Subject: [PATCH 134/160] support docker-npu-[amd64|arm64] build Former-commit-id: 25f16f5e299c94175e62bac9f0da5b47a2bb31b7 --- README.md | 2 +- README_zh.md | 2 +- docker/docker-npu/Dockerfile | 12 +++++++++--- setup.py | 3 ++- 4 files changed, 13 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 4b42edd7..9c509ff0 100644 --- a/README.md +++ b/README.md @@ -465,7 +465,7 @@ For Ascend NPU users: ```bash # Choose docker image upon your environment -docker build -f ./docker/docker-npu/Dockerfile \ +docker build --platform linux/arm64 -f ./docker/docker-npu/Dockerfile \ --build-arg INSTALL_DEEPSPEED=false \ --build-arg PIP_INDEX=https://pypi.org/simple \ -t llamafactory:latest . diff --git a/README_zh.md b/README_zh.md index 3926c09d..c3fb6ecf 100644 --- a/README_zh.md +++ b/README_zh.md @@ -465,7 +465,7 @@ docker exec -it llamafactory bash ```bash # 根据您的环境选择镜像 -docker build -f ./docker/docker-npu/Dockerfile \ +docker build --platform linux/arm64 -f ./docker/docker-npu/Dockerfile \ --build-arg INSTALL_DEEPSPEED=false \ --build-arg PIP_INDEX=https://pypi.org/simple \ -t llamafactory:latest . diff --git a/docker/docker-npu/Dockerfile b/docker/docker-npu/Dockerfile index 0ec16107..8d80397e 100644 --- a/docker/docker-npu/Dockerfile +++ b/docker/docker-npu/Dockerfile @@ -1,10 +1,11 @@ # Use the Ubuntu 22.04 image with CANN 8.0.rc1 # More versions can be found at https://hub.docker.com/r/cosdt/cann/tags -FROM cosdt/cann:8.0.rc1-910b-ubuntu22.04 +FROM --platform=$TARGETPLATFORM cosdt/cann:8.0.rc1-910b-ubuntu22.04 ENV DEBIAN_FRONTEND=noninteractive # Define installation arguments +ARG TARGETPLATFORM ARG INSTALL_DEEPSPEED=false ARG PIP_INDEX=https://pypi.org/simple ARG EXTRA_INDEX=https://download.pytorch.org/whl/cpu @@ -15,7 +16,6 @@ WORKDIR /app # Install the requirements COPY requirements.txt /app RUN pip config set global.index-url $PIP_INDEX && \ - pip config set global.extra-index-url $EXTRA_INDEX && \ pip install --upgrade pip && \ pip install -r requirements.txt @@ -23,7 +23,13 @@ RUN pip config set global.index-url $PIP_INDEX && \ COPY . /app # Install the LLaMA Factory -RUN EXTRA_PACKAGES="torch-npu,metrics"; \ +RUN EXTRA_PACKAGES="metrics"; \ + if [ "$TARGETPLATFORM" == "linux/arm64" ]; then \ + EXTRA_PACKAGES="${EXTRA_PACKAGES},torch-npu-arm64"; \ + else \ + pip config set global.extra-index-url $EXTRA_INDEX; \ + EXTRA_PACKAGES="${EXTRA_PACKAGES},torch-npu-amd64"; \ + fi; \ if [ "$INSTALL_DEEPSPEED" = "true" ]; then \ EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \ fi; \ diff --git a/setup.py b/setup.py index 89301d1b..594070cd 100644 --- a/setup.py +++ b/setup.py @@ -35,7 +35,8 @@ def get_requires(): extra_require = { "torch": ["torch>=1.13.1"], - "torch-npu": ["torch==2.1.0+cpu", "torch-npu==2.1.0.post3", "decorator"], + "torch-npu-arm64": ["torch==2.1.0", "torch-npu==2.1.0.post3", "decorator"], + "torch-npu-amd64": ["torch==2.1.0+cpu", "torch-npu==2.1.0.post3", "decorator"], "metrics": ["nltk", "jieba", "rouge-chinese"], "deepspeed": ["deepspeed>=0.10.0"], "bitsandbytes": ["bitsandbytes>=0.39.0"], From 556eca918d025669a45804cad96a542dc1d663e0 Mon Sep 17 00:00:00 2001 From: faddddeout <39449491+injet-zhou@users.noreply.github.com> Date: Thu, 27 Jun 2024 09:58:00 +0000 Subject: [PATCH 135/160] Exit the process with the subprocess's return code when utilizing the CLI Former-commit-id: ab42a4e2501a80fba1704a506bd1209a441570fa --- src/llamafactory/cli.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/llamafactory/cli.py b/src/llamafactory/cli.py index e4846780..c92f556b 100644 --- a/src/llamafactory/cli.py +++ b/src/llamafactory/cli.py @@ -91,7 +91,7 @@ def main(): master_addr = os.environ.get("MASTER_ADDR", "127.0.0.1") master_port = os.environ.get("MASTER_PORT", str(random.randint(20001, 29999))) logger.info("Initializing distributed tasks at: {}:{}".format(master_addr, master_port)) - subprocess.run( + subproc = subprocess.run( ( "torchrun --nnodes {nnodes} --node_rank {node_rank} --nproc_per_node {nproc_per_node} " "--master_addr {master_addr} --master_port {master_port} {file_name} {args}" @@ -106,6 +106,7 @@ def main(): ), shell=True, ) + sys.exit(subproc.returncode) else: run_exp() elif command == Command.WEBDEMO: From 4b123f49cbd6ac6bf507865374fcff8ed98d14da Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Thu, 27 Jun 2024 19:16:46 +0800 Subject: [PATCH 136/160] Update setup.py Former-commit-id: 42293ab26f7fd7ffb77b308655ccd47b7c2ffa84 --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 594070cd..dd333c48 100644 --- a/setup.py +++ b/setup.py @@ -35,8 +35,8 @@ def get_requires(): extra_require = { "torch": ["torch>=1.13.1"], - "torch-npu-arm64": ["torch==2.1.0", "torch-npu==2.1.0.post3", "decorator"], - "torch-npu-amd64": ["torch==2.1.0+cpu", "torch-npu==2.1.0.post3", "decorator"], + "torch-npu": ["torch==2.1.0", "torch-npu==2.1.0.post3", "decorator"], + "torch-npu-amd": ["torch==2.1.0+cpu", "torch-npu==2.1.0.post3", "decorator"], "metrics": ["nltk", "jieba", "rouge-chinese"], "deepspeed": ["deepspeed>=0.10.0"], "bitsandbytes": ["bitsandbytes>=0.39.0"], From e930a42083d9e47fde89519345458a416ada4c85 Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Thu, 27 Jun 2024 19:17:35 +0800 Subject: [PATCH 137/160] Update README.md Former-commit-id: 01869ccbb5af2704c9d5bfdd4f2ff30978fb466d --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9c509ff0..4b42edd7 100644 --- a/README.md +++ b/README.md @@ -465,7 +465,7 @@ For Ascend NPU users: ```bash # Choose docker image upon your environment -docker build --platform linux/arm64 -f ./docker/docker-npu/Dockerfile \ +docker build -f ./docker/docker-npu/Dockerfile \ --build-arg INSTALL_DEEPSPEED=false \ --build-arg PIP_INDEX=https://pypi.org/simple \ -t llamafactory:latest . From a2ebdbc112dbdf640be9d7624c068994c57cd377 Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Thu, 27 Jun 2024 19:17:52 +0800 Subject: [PATCH 138/160] Update README_zh.md Former-commit-id: 62f2e27f4355aa35c26e1146dbe90fac3b380118 --- README_zh.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README_zh.md b/README_zh.md index c3fb6ecf..3926c09d 100644 --- a/README_zh.md +++ b/README_zh.md @@ -465,7 +465,7 @@ docker exec -it llamafactory bash ```bash # 根据您的环境选择镜像 -docker build --platform linux/arm64 -f ./docker/docker-npu/Dockerfile \ +docker build -f ./docker/docker-npu/Dockerfile \ --build-arg INSTALL_DEEPSPEED=false \ --build-arg PIP_INDEX=https://pypi.org/simple \ -t llamafactory:latest . From f6eda1c35df85b70b1aaa9b877cf4fd0333b0113 Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Thu, 27 Jun 2024 19:38:15 +0800 Subject: [PATCH 139/160] Update setup.py Former-commit-id: 544e1844fb237eed3eb621f4e6e355eac2ff7b85 --- setup.py | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.py b/setup.py index dd333c48..64f50a87 100644 --- a/setup.py +++ b/setup.py @@ -36,7 +36,6 @@ def get_requires(): extra_require = { "torch": ["torch>=1.13.1"], "torch-npu": ["torch==2.1.0", "torch-npu==2.1.0.post3", "decorator"], - "torch-npu-amd": ["torch==2.1.0+cpu", "torch-npu==2.1.0.post3", "decorator"], "metrics": ["nltk", "jieba", "rouge-chinese"], "deepspeed": ["deepspeed>=0.10.0"], "bitsandbytes": ["bitsandbytes>=0.39.0"], From 88018000ac7f2a042d424ad68f13ef9bdedd84ad Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Thu, 27 Jun 2024 19:51:25 +0800 Subject: [PATCH 140/160] Update Dockerfile Former-commit-id: 7dea6840256472f8aa2c642f11d9e30bfa0fb96f --- docker/docker-npu/Dockerfile | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/docker/docker-npu/Dockerfile b/docker/docker-npu/Dockerfile index 8d80397e..d1d176e9 100644 --- a/docker/docker-npu/Dockerfile +++ b/docker/docker-npu/Dockerfile @@ -1,39 +1,38 @@ # Use the Ubuntu 22.04 image with CANN 8.0.rc1 # More versions can be found at https://hub.docker.com/r/cosdt/cann/tags -FROM --platform=$TARGETPLATFORM cosdt/cann:8.0.rc1-910b-ubuntu22.04 +FROM cosdt/cann:8.0.rc1-910b-ubuntu22.04 +# Set env ENV DEBIAN_FRONTEND=noninteractive # Define installation arguments -ARG TARGETPLATFORM ARG INSTALL_DEEPSPEED=false ARG PIP_INDEX=https://pypi.org/simple -ARG EXTRA_INDEX=https://download.pytorch.org/whl/cpu +# x86 torch cpu index +ARG TORCH_INDEX=https://download.pytorch.org/whl/cpu # Set the working directory WORKDIR /app # Install the requirements COPY requirements.txt /app -RUN pip config set global.index-url $PIP_INDEX && \ - pip install --upgrade pip && \ - pip install -r requirements.txt +RUN pip config set global.index-url "$PIP_INDEX" && \ + pip config set global.extra-index-url "$PIP_INDEX" && \ + python -m pip install --upgrade pip && \ + python -m pip install -r requirements.txt # Copy the rest of the application into the image COPY . /app # Install the LLaMA Factory -RUN EXTRA_PACKAGES="metrics"; \ - if [ "$TARGETPLATFORM" == "linux/arm64" ]; then \ - EXTRA_PACKAGES="${EXTRA_PACKAGES},torch-npu-arm64"; \ - else \ - pip config set global.extra-index-url $EXTRA_INDEX; \ - EXTRA_PACKAGES="${EXTRA_PACKAGES},torch-npu-amd64"; \ - fi; \ - if [ "$INSTALL_DEEPSPEED" = "true" ]; then \ +RUN EXTRA_PACKAGES="torch-npu,metrics"; \ + if [ "$INSTALL_DEEPSPEED" == "true" ]; then \ EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \ fi; \ - pip install -e .[$EXTRA_PACKAGES] && \ + if [ "$(uname -i)" != "aarch64" ]; then \ + pip config set global.extra-index-url "$TORCH_INDEX" \ + fi; \ + pip install -e ".[$EXTRA_PACKAGES]" && \ pip uninstall -y transformer-engine flash-attn # Set up volumes From 12a91774b00f4bfd8869112ed726ed3d09d1b4fa Mon Sep 17 00:00:00 2001 From: hoshi-hiyouga Date: Thu, 27 Jun 2024 19:57:40 +0800 Subject: [PATCH 141/160] Update Dockerfile Former-commit-id: a239f535a64378b74ef34799cd8e2e4a78f00f4c --- docker/docker-npu/Dockerfile | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/docker/docker-npu/Dockerfile b/docker/docker-npu/Dockerfile index d1d176e9..71ab3daf 100644 --- a/docker/docker-npu/Dockerfile +++ b/docker/docker-npu/Dockerfile @@ -2,13 +2,11 @@ # More versions can be found at https://hub.docker.com/r/cosdt/cann/tags FROM cosdt/cann:8.0.rc1-910b-ubuntu22.04 -# Set env ENV DEBIAN_FRONTEND=noninteractive # Define installation arguments ARG INSTALL_DEEPSPEED=false ARG PIP_INDEX=https://pypi.org/simple -# x86 torch cpu index ARG TORCH_INDEX=https://download.pytorch.org/whl/cpu # Set the working directory @@ -17,7 +15,7 @@ WORKDIR /app # Install the requirements COPY requirements.txt /app RUN pip config set global.index-url "$PIP_INDEX" && \ - pip config set global.extra-index-url "$PIP_INDEX" && \ + pip config set global.extra-index-url "$TORCH_INDEX" && \ python -m pip install --upgrade pip && \ python -m pip install -r requirements.txt @@ -29,9 +27,6 @@ RUN EXTRA_PACKAGES="torch-npu,metrics"; \ if [ "$INSTALL_DEEPSPEED" == "true" ]; then \ EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \ fi; \ - if [ "$(uname -i)" != "aarch64" ]; then \ - pip config set global.extra-index-url "$TORCH_INDEX" \ - fi; \ pip install -e ".[$EXTRA_PACKAGES]" && \ pip uninstall -y transformer-engine flash-attn From bf99223a80e7ba5d3f3513456207f4776e63e00e Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Thu, 27 Jun 2024 20:14:48 +0800 Subject: [PATCH 142/160] tiny fix Former-commit-id: c1a78a3a9f8ab9d57577cee37f9c457d60863ba2 --- README.md | 2 +- README_zh.md | 2 +- docker/docker-cuda/Dockerfile | 36 ++++++++++++++------------- docker/docker-cuda/docker-compose.yml | 2 +- docker/docker-npu/Dockerfile | 4 +-- src/llamafactory/cli.py | 4 +-- src/llamafactory/hparams/parser.py | 4 +-- 7 files changed, 28 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index 45ac23d8..44aed7e8 100644 --- a/README.md +++ b/README.md @@ -444,7 +444,7 @@ docker build -f ./docker/docker-cuda/Dockerfile \ --build-arg INSTALL_BNB=false \ --build-arg INSTALL_VLLM=false \ --build-arg INSTALL_DEEPSPEED=false \ - --build-arg INSTALL_FLASH_ATTN=false \ + --build-arg INSTALL_FLASHATTN=false \ --build-arg PIP_INDEX=https://pypi.org/simple \ -t llamafactory:latest . diff --git a/README_zh.md b/README_zh.md index c5fd4f69..7e3d51ad 100644 --- a/README_zh.md +++ b/README_zh.md @@ -444,7 +444,7 @@ docker build -f ./docker/docker-cuda/Dockerfile \ --build-arg INSTALL_BNB=false \ --build-arg INSTALL_VLLM=false \ --build-arg INSTALL_DEEPSPEED=false \ - --build-arg INSTALL_FLASH_ATTN=false \ + --build-arg INSTALL_FLASHATTN=false \ --build-arg PIP_INDEX=https://pypi.org/simple \ -t llamafactory:latest . diff --git a/docker/docker-cuda/Dockerfile b/docker/docker-cuda/Dockerfile index 44aaf538..718390a8 100644 --- a/docker/docker-cuda/Dockerfile +++ b/docker/docker-cuda/Dockerfile @@ -2,11 +2,14 @@ # https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-24-02.html FROM nvcr.io/nvidia/pytorch:24.02-py3 +# Define environments +ENV MAX_JOBS=4 + # Define installation arguments ARG INSTALL_BNB=false ARG INSTALL_VLLM=false ARG INSTALL_DEEPSPEED=false -ARG INSTALL_FLASH_ATTN=false +ARG INSTALL_FLASHATTN=false ARG PIP_INDEX=https://pypi.org/simple # Set the working directory @@ -14,34 +17,33 @@ WORKDIR /app # Install the requirements COPY requirements.txt /app -RUN pip config set global.index-url $PIP_INDEX -RUN pip config set global.extra-index-url $PIP_INDEX -RUN python -m pip install --upgrade pip -RUN python -m pip install -r requirements.txt +RUN pip config set global.index-url "$PIP_INDEX" && \ + pip config set global.extra-index-url "$PIP_INDEX" && \ + python -m pip install --upgrade pip && \ + python -m pip install -r requirements.txt + +# Rebuild flash attention +RUN pip uninstall -y transformer-engine flash-attn && \ + if [ "$INSTALL_FLASHATTN" == "true" ]; then \ + pip uninstall -y ninja && pip install ninja && \ + pip install --no-cache-dir flash-attn --no-build-isolation \ + fi; # Copy the rest of the application into the image COPY . /app # Install the LLaMA Factory RUN EXTRA_PACKAGES="metrics"; \ - if [ "$INSTALL_BNB" = "true" ]; then \ + if [ "$INSTALL_BNB" == "true" ]; then \ EXTRA_PACKAGES="${EXTRA_PACKAGES},bitsandbytes"; \ fi; \ - if [ "$INSTALL_VLLM" = "true" ]; then \ + if [ "$INSTALL_VLLM" == "true" ]; then \ EXTRA_PACKAGES="${EXTRA_PACKAGES},vllm"; \ fi; \ - if [ "$INSTALL_DEEPSPEED" = "true" ]; then \ + if [ "$INSTALL_DEEPSPEED" == "true" ]; then \ EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \ fi; \ - pip install -e .[$EXTRA_PACKAGES] && \ - pip uninstall -y transformer-engine flash-attn - -# Rebuild flash-attn -RUN if [ "$INSTALL_FLASH_ATTN" = "true" ]; then \ - ninja --version || \ - (pip uninstall -y ninja && pip install ninja) && \ - MAX_JOBS=4 pip install --no-cache-dir flash-attn --no-build-isolation \ - fi; + pip install -e ".[$EXTRA_PACKAGES]" # Set up volumes VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/app/data", "/app/output" ] diff --git a/docker/docker-cuda/docker-compose.yml b/docker/docker-cuda/docker-compose.yml index 4ccb0c04..16267dc3 100644 --- a/docker/docker-cuda/docker-compose.yml +++ b/docker/docker-cuda/docker-compose.yml @@ -7,7 +7,7 @@ services: INSTALL_BNB: false INSTALL_VLLM: false INSTALL_DEEPSPEED: false - INSTALL_FLASH_ATTN: false + INSTALL_FLASHATTN: false PIP_INDEX: https://pypi.org/simple container_name: llamafactory volumes: diff --git a/docker/docker-npu/Dockerfile b/docker/docker-npu/Dockerfile index 71ab3daf..e413d4e3 100644 --- a/docker/docker-npu/Dockerfile +++ b/docker/docker-npu/Dockerfile @@ -2,6 +2,7 @@ # More versions can be found at https://hub.docker.com/r/cosdt/cann/tags FROM cosdt/cann:8.0.rc1-910b-ubuntu22.04 +# Define environments ENV DEBIAN_FRONTEND=noninteractive # Define installation arguments @@ -27,8 +28,7 @@ RUN EXTRA_PACKAGES="torch-npu,metrics"; \ if [ "$INSTALL_DEEPSPEED" == "true" ]; then \ EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \ fi; \ - pip install -e ".[$EXTRA_PACKAGES]" && \ - pip uninstall -y transformer-engine flash-attn + pip install -e ".[$EXTRA_PACKAGES]" # Set up volumes VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/app/data", "/app/output" ] diff --git a/src/llamafactory/cli.py b/src/llamafactory/cli.py index c92f556b..48eb2898 100644 --- a/src/llamafactory/cli.py +++ b/src/llamafactory/cli.py @@ -91,7 +91,7 @@ def main(): master_addr = os.environ.get("MASTER_ADDR", "127.0.0.1") master_port = os.environ.get("MASTER_PORT", str(random.randint(20001, 29999))) logger.info("Initializing distributed tasks at: {}:{}".format(master_addr, master_port)) - subproc = subprocess.run( + process = subprocess.run( ( "torchrun --nnodes {nnodes} --node_rank {node_rank} --nproc_per_node {nproc_per_node} " "--master_addr {master_addr} --master_port {master_port} {file_name} {args}" @@ -106,7 +106,7 @@ def main(): ), shell=True, ) - sys.exit(subproc.returncode) + sys.exit(process.returncode) else: run_exp() elif command == Command.WEBDEMO: diff --git a/src/llamafactory/hparams/parser.py b/src/llamafactory/hparams/parser.py index d4bcfbc6..6017907c 100644 --- a/src/llamafactory/hparams/parser.py +++ b/src/llamafactory/hparams/parser.py @@ -199,8 +199,8 @@ def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS: if not is_torch_bf16_gpu_available(): raise ValueError("This device does not support `pure_bf16`.") - if training_args.deepspeed: - raise ValueError("`pure_bf16` is incompatible with DeepSpeed.") + if is_deepspeed_zero3_enabled(): + raise ValueError("`pure_bf16` is incompatible with DeepSpeed ZeRO-3.") if training_args.fp16 or training_args.bf16: raise ValueError("Turn off mixed precision training when using `pure_bf16`.") From 95bf795de4fb5507fddf3c6807dab3662b034b17 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Thu, 27 Jun 2024 20:29:16 +0800 Subject: [PATCH 143/160] fix docker file Former-commit-id: 688f02decb1185deb74b26444f7643cab7d355c1 --- docker/docker-cuda/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/docker-cuda/Dockerfile b/docker/docker-cuda/Dockerfile index 718390a8..e4fac987 100644 --- a/docker/docker-cuda/Dockerfile +++ b/docker/docker-cuda/Dockerfile @@ -26,8 +26,8 @@ RUN pip config set global.index-url "$PIP_INDEX" && \ RUN pip uninstall -y transformer-engine flash-attn && \ if [ "$INSTALL_FLASHATTN" == "true" ]; then \ pip uninstall -y ninja && pip install ninja && \ - pip install --no-cache-dir flash-attn --no-build-isolation \ - fi; + pip install --no-cache-dir flash-attn --no-build-isolation; \ + fi # Copy the rest of the application into the image COPY . /app From 9103fdf8666ffc511af60f577a8d07945cc0d0af Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Fri, 28 Jun 2024 00:41:58 +0800 Subject: [PATCH 144/160] fix #4549 Former-commit-id: c9fdef10de737d1f433209812ef73e29cb60490a --- src/llamafactory/hparams/parser.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/llamafactory/hparams/parser.py b/src/llamafactory/hparams/parser.py index 6017907c..5c59fbe0 100644 --- a/src/llamafactory/hparams/parser.py +++ b/src/llamafactory/hparams/parser.py @@ -202,9 +202,6 @@ def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS: if is_deepspeed_zero3_enabled(): raise ValueError("`pure_bf16` is incompatible with DeepSpeed ZeRO-3.") - if training_args.fp16 or training_args.bf16: - raise ValueError("Turn off mixed precision training when using `pure_bf16`.") - if ( finetuning_args.use_galore and finetuning_args.galore_layerwise From 46f0189e88ec7fdfa13a8bb01446b4ec63c3fab5 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Fri, 28 Jun 2024 01:04:24 +0800 Subject: [PATCH 145/160] refactor pissa, improve llamaboard Former-commit-id: 619556e46c19718f702c97df5d570a2a4c5fb13a --- src/llamafactory/extras/misc.py | 72 ++-------- src/llamafactory/hparams/finetuning_args.py | 6 +- src/llamafactory/hparams/parser.py | 12 +- .../{extras => train}/callbacks.py | 135 +++++++++++++++++- src/llamafactory/train/dpo/trainer.py | 21 +-- src/llamafactory/train/kto/trainer.py | 13 +- src/llamafactory/train/ppo/trainer.py | 35 +++-- src/llamafactory/train/ppo/workflow.py | 4 +- src/llamafactory/train/pt/trainer.py | 23 ++- src/llamafactory/train/rm/trainer.py | 19 +-- src/llamafactory/train/rm/workflow.py | 6 +- src/llamafactory/train/sft/trainer.py | 23 +-- src/llamafactory/train/trainer_utils.py | 48 ------- src/llamafactory/train/tuner.py | 4 +- src/llamafactory/webui/runner.py | 1 + src/llamafactory/webui/utils.py | 13 +- 16 files changed, 219 insertions(+), 216 deletions(-) rename src/llamafactory/{extras => train}/callbacks.py (59%) diff --git a/src/llamafactory/extras/misc.py b/src/llamafactory/extras/misc.py index 93153b3e..30c287bd 100644 --- a/src/llamafactory/extras/misc.py +++ b/src/llamafactory/extras/misc.py @@ -1,4 +1,7 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2024 HuggingFace Inc. and the LlamaFactory team. +# +# This code is inspired by the HuggingFace's PEFT library. +# https://github.com/huggingface/peft/blob/v0.10.0/src/peft/peft_model.py # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,15 +17,11 @@ import gc import os -from typing import TYPE_CHECKING, Dict, Tuple +from typing import TYPE_CHECKING, Tuple import torch -from peft import PeftModel -from transformers import InfNanRemoveLogitsProcessor, LogitsProcessorList, PreTrainedModel +from transformers import InfNanRemoveLogitsProcessor, LogitsProcessorList from transformers.utils import ( - SAFE_WEIGHTS_NAME, - WEIGHTS_NAME, - is_safetensors_available, is_torch_bf16_gpu_available, is_torch_cuda_available, is_torch_mps_available, @@ -31,15 +30,9 @@ from transformers.utils import ( ) from transformers.utils.versions import require_version -from .constants import V_HEAD_SAFE_WEIGHTS_NAME, V_HEAD_WEIGHTS_NAME from .logging import get_logger -if is_safetensors_available(): - from safetensors import safe_open - from safetensors.torch import save_file - - _is_fp16_available = is_torch_npu_available() or is_torch_cuda_available() try: _is_bf16_available = is_torch_bf16_gpu_available() @@ -48,8 +41,6 @@ except Exception: if TYPE_CHECKING: - from trl import AutoModelForCausalLMWithValueHead - from ..hparams import ModelArguments @@ -99,7 +90,7 @@ def count_parameters(model: torch.nn.Module) -> Tuple[int, int]: if num_params == 0 and hasattr(param, "ds_numel"): num_params = param.ds_numel - # Due to the design of 4bit linear layers from bitsandbytes, multiply the number of parameters by 2 + # Due to the design of 4bit linear layers from bitsandbytes, multiply the number of parameters by itemsize if param.__class__.__name__ == "Params4bit": if hasattr(param, "quant_storage") and hasattr(param.quant_storage, "itemsize"): num_bytes = param.quant_storage.itemsize @@ -117,51 +108,6 @@ def count_parameters(model: torch.nn.Module) -> Tuple[int, int]: return trainable_params, all_param -def fix_valuehead_checkpoint( - model: "AutoModelForCausalLMWithValueHead", output_dir: str, safe_serialization: bool -) -> None: - r""" - The model is already unwrapped. - - There are three cases: - 1. full tuning without ds_zero3: state_dict = {"model.layers.*": ..., "v_head.summary.*": ...} - 2. lora tuning without ds_zero3: state_dict = {"v_head.summary.*": ...} - 3. under deepspeed zero3: state_dict = {"pretrained_model.model.layers.*": ..., "v_head.summary.*": ...} - - We assume `stage3_gather_16bit_weights_on_model_save=true`. - """ - if not isinstance(model.pretrained_model, (PreTrainedModel, PeftModel)): - return - - if safe_serialization: - path_to_checkpoint = os.path.join(output_dir, SAFE_WEIGHTS_NAME) - with safe_open(path_to_checkpoint, framework="pt", device="cpu") as f: - state_dict: Dict[str, torch.Tensor] = {key: f.get_tensor(key) for key in f.keys()} - else: - path_to_checkpoint = os.path.join(output_dir, WEIGHTS_NAME) - state_dict: Dict[str, torch.Tensor] = torch.load(path_to_checkpoint, map_location="cpu") - - decoder_state_dict = {} - v_head_state_dict = {} - for name, param in state_dict.items(): - if name.startswith("v_head."): - v_head_state_dict[name] = param - else: - decoder_state_dict[name.replace("pretrained_model.", "")] = param - - os.remove(path_to_checkpoint) - model.pretrained_model.save_pretrained( - output_dir, state_dict=decoder_state_dict or None, safe_serialization=safe_serialization - ) - - if safe_serialization: - save_file(v_head_state_dict, os.path.join(output_dir, V_HEAD_SAFE_WEIGHTS_NAME), metadata={"format": "pt"}) - else: - torch.save(v_head_state_dict, os.path.join(output_dir, V_HEAD_WEIGHTS_NAME)) - - logger.info("Value head model saved at: {}".format(output_dir)) - - def get_current_device() -> torch.device: r""" Gets the current available device. @@ -201,7 +147,7 @@ def get_logits_processor() -> "LogitsProcessorList": return logits_processor -def infer_optim_dtype(model_dtype: torch.dtype) -> torch.dtype: +def infer_optim_dtype(model_dtype: "torch.dtype") -> "torch.dtype": r""" Infers the optimal dtype according to the model_dtype and device compatibility. """ @@ -220,7 +166,7 @@ def is_gpu_or_npu_available() -> bool: return is_torch_npu_available() or is_torch_cuda_available() -def has_tokenized_data(path: os.PathLike) -> bool: +def has_tokenized_data(path: "os.PathLike") -> bool: r""" Checks if the path has a tokenized dataset. """ diff --git a/src/llamafactory/hparams/finetuning_args.py b/src/llamafactory/hparams/finetuning_args.py index b676891e..28da95ad 100644 --- a/src/llamafactory/hparams/finetuning_args.py +++ b/src/llamafactory/hparams/finetuning_args.py @@ -379,10 +379,10 @@ class FinetuningArguments(FreezeArguments, LoraArguments, RLHFArguments, GaloreA if self.loraplus_lr_ratio is not None and self.finetuning_type != "lora": raise ValueError("`loraplus_lr_ratio` is only valid for LoRA training.") - if self.pissa_convert and self.finetuning_type != "lora": - raise ValueError("`pissa_convert` is only valid for LoRA training.") + if self.pissa_init and self.finetuning_type != "lora": + raise ValueError("`pissa_init` is only valid for LoRA training.") - if self.pissa_convert and (self.stage in ["rm", "ppo", "kto"] or self.use_ref_model): + if self.pissa_init and (self.stage in ["ppo", "kto"] or self.use_ref_model): raise ValueError("Cannot use PiSSA for current training stage.") if self.train_mm_proj_only and self.finetuning_type != "full": diff --git a/src/llamafactory/hparams/parser.py b/src/llamafactory/hparams/parser.py index 5c59fbe0..8b2ea4c1 100644 --- a/src/llamafactory/hparams/parser.py +++ b/src/llamafactory/hparams/parser.py @@ -83,9 +83,6 @@ def _verify_model_args(model_args: "ModelArguments", finetuning_args: "Finetunin if model_args.adapter_name_or_path is not None and finetuning_args.finetuning_type != "lora": raise ValueError("Adapter is only valid for the LoRA method.") - if model_args.use_unsloth and is_deepspeed_zero3_enabled(): - raise ValueError("Unsloth is incompatible with DeepSpeed ZeRO-3.") - if model_args.quantization_bit is not None: if finetuning_args.finetuning_type != "lora": raise ValueError("Quantization is only compatible with the LoRA method.") @@ -186,6 +183,9 @@ def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS: if training_args.parallel_mode == ParallelMode.NOT_DISTRIBUTED: raise ValueError("Please launch distributed training with `llamafactory-cli` or `torchrun`.") + if training_args.deepspeed and training_args.parallel_mode != ParallelMode.DISTRIBUTED: + raise ValueError("Please use `FORCE_TORCHRUN=1` to launch DeepSpeed training.") + if training_args.max_steps == -1 and data_args.streaming: raise ValueError("Please specify `max_steps` in streaming mode.") @@ -195,6 +195,9 @@ def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS: if training_args.do_train and model_args.quantization_device_map == "auto": raise ValueError("Cannot use device map for quantized models in training.") + if finetuning_args.pissa_init and is_deepspeed_zero3_enabled(): + raise ValueError("PiSSA is incompatible with DeepSpeed ZeRO-3.") + if finetuning_args.pure_bf16: if not is_torch_bf16_gpu_available(): raise ValueError("This device does not support `pure_bf16`.") @@ -224,6 +227,9 @@ def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS: if model_args.visual_inputs and data_args.packing: raise ValueError("Cannot use packing in MLLM fine-tuning.") + if model_args.use_unsloth and is_deepspeed_zero3_enabled(): + raise ValueError("Unsloth is incompatible with DeepSpeed ZeRO-3.") + _verify_model_args(model_args, finetuning_args) _check_extra_dependencies(model_args, finetuning_args, training_args) diff --git a/src/llamafactory/extras/callbacks.py b/src/llamafactory/train/callbacks.py similarity index 59% rename from src/llamafactory/extras/callbacks.py rename to src/llamafactory/train/callbacks.py index 0dff6a69..dc9c981e 100644 --- a/src/llamafactory/extras/callbacks.py +++ b/src/llamafactory/train/callbacks.py @@ -1,4 +1,7 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2024 HuggingFace Inc. and the LlamaFactory team. +# +# This code is inspired by the HuggingFace's transformers library. +# https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/trainer.py # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -22,22 +25,78 @@ from concurrent.futures import ThreadPoolExecutor from datetime import timedelta from typing import TYPE_CHECKING, Any, Dict, Optional +import torch import transformers -from transformers import TrainerCallback +from peft import PeftModel +from transformers import PreTrainedModel, ProcessorMixin, TrainerCallback from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR, has_length +from transformers.utils import ( + SAFE_WEIGHTS_NAME, + WEIGHTS_NAME, + is_safetensors_available, +) -from .constants import TRAINER_LOG -from .logging import LoggerHandler, get_logger -from .misc import fix_valuehead_checkpoint +from ..extras.constants import TRAINER_LOG, V_HEAD_SAFE_WEIGHTS_NAME, V_HEAD_WEIGHTS_NAME +from ..extras.logging import LoggerHandler, get_logger +if is_safetensors_available(): + from safetensors import safe_open + from safetensors.torch import save_file + if TYPE_CHECKING: from transformers import TrainerControl, TrainerState, TrainingArguments + from trl import AutoModelForCausalLMWithValueHead logger = get_logger(__name__) +def fix_valuehead_checkpoint( + model: "AutoModelForCausalLMWithValueHead", output_dir: str, safe_serialization: bool +) -> None: + r""" + The model is already unwrapped. + + There are three cases: + 1. full tuning without ds_zero3: state_dict = {"model.layers.*": ..., "v_head.summary.*": ...} + 2. lora tuning without ds_zero3: state_dict = {"v_head.summary.*": ...} + 3. under deepspeed zero3: state_dict = {"pretrained_model.model.layers.*": ..., "v_head.summary.*": ...} + + We assume `stage3_gather_16bit_weights_on_model_save=true`. + """ + if not isinstance(model.pretrained_model, (PreTrainedModel, PeftModel)): + return + + if safe_serialization: + path_to_checkpoint = os.path.join(output_dir, SAFE_WEIGHTS_NAME) + with safe_open(path_to_checkpoint, framework="pt", device="cpu") as f: + state_dict: Dict[str, torch.Tensor] = {key: f.get_tensor(key) for key in f.keys()} + else: + path_to_checkpoint = os.path.join(output_dir, WEIGHTS_NAME) + state_dict: Dict[str, torch.Tensor] = torch.load(path_to_checkpoint, map_location="cpu") + + decoder_state_dict = {} + v_head_state_dict = {} + for name, param in state_dict.items(): + if name.startswith("v_head."): + v_head_state_dict[name] = param + else: + decoder_state_dict[name.replace("pretrained_model.", "")] = param + + os.remove(path_to_checkpoint) + model.pretrained_model.save_pretrained( + output_dir, state_dict=decoder_state_dict or None, safe_serialization=safe_serialization + ) + + if safe_serialization: + save_file(v_head_state_dict, os.path.join(output_dir, V_HEAD_SAFE_WEIGHTS_NAME), metadata={"format": "pt"}) + else: + torch.save(v_head_state_dict, os.path.join(output_dir, V_HEAD_WEIGHTS_NAME)) + + logger.info("Value head model saved at: {}".format(output_dir)) + + class FixValueHeadModelCallback(TrainerCallback): def on_save(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs): r""" @@ -51,8 +110,70 @@ class FixValueHeadModelCallback(TrainerCallback): ) +class SaveProcessorCallback(TrainerCallback): + def __init__(self, processor: "ProcessorMixin") -> None: + r""" + Initializes a callback for saving the processor. + """ + self.processor = processor + + def on_train_end(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs): + r""" + Event called at the end of training. + """ + if args.should_save: + getattr(self.processor, "image_processor").save_pretrained(args.output_dir) + + +class PissaConvertCallback(TrainerCallback): + r""" + Initializes a callback for converting the PiSSA adapter to a normal one. + """ + + def on_train_begin(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs): + r""" + Event called at the beginning of training. + """ + if args.should_save: + model = kwargs.pop("model") + pissa_init_dir = os.path.join(args.output_dir, "pissa_init") + logger.info("Initial PiSSA adatper will be saved at: {}.".format(pissa_init_dir)) + if isinstance(model, PeftModel): + init_lora_weights = getattr(model.peft_config["default"], "init_lora_weights") + setattr(model.peft_config["default"], "init_lora_weights", True) + model.save_pretrained(pissa_init_dir, safe_serialization=args.save_safetensors) + setattr(model.peft_config["default"], "init_lora_weights", init_lora_weights) + + def on_train_end(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs): + r""" + Event called at the end of training. + """ + if args.should_save: + model = kwargs.pop("model") + pissa_init_dir = os.path.join(args.output_dir, "pissa_init") + pissa_backup_dir = os.path.join(args.output_dir, "pissa_backup") + pissa_convert_dir = os.path.join(args.output_dir, "pissa_converted") + logger.info("Converted PiSSA adapter will be saved at: {}.".format(pissa_convert_dir)) + # 1. save a pissa backup with init_lora_weights: True + # 2. save a converted lora with init_lora_weights: pissa + # 3. load the pissa backup with init_lora_weights: True + # 4. delete the initial adapter and change init_lora_weights to pissa + if isinstance(model, PeftModel): + init_lora_weights = getattr(model.peft_config["default"], "init_lora_weights") + setattr(model.peft_config["default"], "init_lora_weights", True) + model.save_pretrained(pissa_backup_dir, safe_serialization=args.save_safetensors) + setattr(model.peft_config["default"], "init_lora_weights", init_lora_weights) + model.save_pretrained( + pissa_convert_dir, safe_serialization=args.save_safetensors, convert_pissa_to_lora=pissa_init_dir + ) + model.load_adapter(pissa_backup_dir, "default", is_trainable=True) + model.set_adapter("default") + model.delete_adapter("pissa_init") + setattr(model.peft_config["default"], "init_lora_weights", init_lora_weights) + + class LogCallback(TrainerCallback): - def __init__(self, output_dir: str) -> None: + def __init__(self) -> None: r""" Initializes a callback for logging training and evaluation status. """ @@ -70,7 +191,7 @@ class LogCallback(TrainerCallback): self.webui_mode = os.environ.get("LLAMABOARD_ENABLED", "0").lower() in ["true", "1"] if self.webui_mode: signal.signal(signal.SIGABRT, self._set_abort) - self.logger_handler = LoggerHandler(output_dir) + self.logger_handler = LoggerHandler(os.environ.get("LLAMABOARD_WORKDIR")) logging.root.addHandler(self.logger_handler) transformers.logging.add_handler(self.logger_handler) diff --git a/src/llamafactory/train/dpo/trainer.py b/src/llamafactory/train/dpo/trainer.py index ed4fd5d9..e45467d6 100644 --- a/src/llamafactory/train/dpo/trainer.py +++ b/src/llamafactory/train/dpo/trainer.py @@ -15,7 +15,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os import warnings from collections import defaultdict from contextlib import nullcontext @@ -29,7 +28,8 @@ from trl import DPOTrainer from trl.trainer import disable_dropout_in_model from ...extras.constants import IGNORE_INDEX -from ..trainer_utils import convert_pissa_adapter, create_custom_optimzer, create_custom_scheduler, get_batch_logps +from ..callbacks import PissaConvertCallback, SaveProcessorCallback +from ..trainer_utils import create_custom_optimzer, create_custom_scheduler, get_batch_logps if TYPE_CHECKING: @@ -54,7 +54,6 @@ class CustomDPOTrainer(DPOTrainer): disable_dropout_in_model(ref_model) self.finetuning_args = finetuning_args - self.processor = processor self.reference_free = False self.use_dpo_data_collator = True # hack to avoid warning self.generate_during_eval = False # disable at evaluation @@ -92,14 +91,17 @@ class CustomDPOTrainer(DPOTrainer): self.ref_model = self.accelerator.prepare_model(self.ref_model, evaluation_mode=True) self.ref_model.eval() + if processor is not None: + self.add_callback(SaveProcessorCallback(processor)) + if finetuning_args.pissa_convert: - self.save_model(os.path.join(self.args.output_dir, "pissa_init")) + self.callback_handler.add_callback(PissaConvertCallback) if finetuning_args.use_badam: from badam import BAdamCallback, clip_grad_norm_old_version self.accelerator.clip_grad_norm_ = MethodType(clip_grad_norm_old_version, self.accelerator) - self.callback_handler.add_callback(BAdamCallback) + self.add_callback(BAdamCallback) def create_optimizer(self) -> "torch.optim.Optimizer": if self.optimizer is None: @@ -112,15 +114,6 @@ class CustomDPOTrainer(DPOTrainer): create_custom_scheduler(self.args, num_training_steps, optimizer) return super().create_scheduler(num_training_steps, optimizer) - def _save(self, output_dir: Optional[str] = None, state_dict: Optional[Dict[str, "torch.Tensor"]] = None) -> None: - super()._save(output_dir, state_dict) - output_dir = output_dir if output_dir is not None else self.args.output_dir - if self.finetuning_args.pissa_convert: - convert_pissa_adapter(output_dir, state_dict, self.accelerator, self.model, self.args) - - if self.processor is not None: - getattr(self.processor, "image_processor").save_pretrained(output_dir) - def odds_ratio_loss(self, chosen_logps: "torch.Tensor", rejected_logps: "torch.Tensor") -> "torch.Tensor": r""" Computes ORPO's odds ratio (OR) loss for batched log probabilities of the policy model. diff --git a/src/llamafactory/train/kto/trainer.py b/src/llamafactory/train/kto/trainer.py index c2edf95a..460311e4 100644 --- a/src/llamafactory/train/kto/trainer.py +++ b/src/llamafactory/train/kto/trainer.py @@ -27,6 +27,7 @@ from trl import KTOTrainer from trl.trainer import disable_dropout_in_model from ...extras.constants import IGNORE_INDEX +from ..callbacks import SaveProcessorCallback from ..trainer_utils import create_custom_optimzer, create_custom_scheduler, get_batch_logps @@ -53,7 +54,6 @@ class CustomKTOTrainer(KTOTrainer): disable_dropout_in_model(ref_model) self.finetuning_args = finetuning_args - self.processor = processor self.reference_free = False self.use_dpo_data_collator = True # hack to avoid warning self.generate_during_eval = False # disable at evaluation @@ -90,11 +90,14 @@ class CustomKTOTrainer(KTOTrainer): self.ref_model = self.accelerator.prepare_model(self.ref_model, evaluation_mode=True) self.ref_model.eval() + if processor is not None: + self.add_callback(SaveProcessorCallback(processor)) + if finetuning_args.use_badam: from badam import BAdamCallback, clip_grad_norm_old_version self.accelerator.clip_grad_norm_ = MethodType(clip_grad_norm_old_version, self.accelerator) - self.callback_handler.add_callback(BAdamCallback) + self.add_callback(BAdamCallback) def create_optimizer(self) -> "torch.optim.Optimizer": if self.optimizer is None: @@ -113,12 +116,6 @@ class CustomKTOTrainer(KTOTrainer): """ return Trainer._get_train_sampler(self) - def _save(self, output_dir: Optional[str] = None, state_dict: Optional[Dict[str, "torch.Tensor"]] = None) -> None: - super()._save(output_dir, state_dict) - output_dir = output_dir if output_dir is not None else self.args.output_dir - if self.processor is not None: - getattr(self.processor, "image_processor").save_pretrained(output_dir) - def forward( self, model: "PreTrainedModel", batch: Dict[str, "torch.Tensor"], prefix: Literal["", "kl_"] = "" ) -> Tuple["torch.Tensor", "torch.Tensor"]: diff --git a/src/llamafactory/train/ppo/trainer.py b/src/llamafactory/train/ppo/trainer.py index c5f6e175..57f0b848 100644 --- a/src/llamafactory/train/ppo/trainer.py +++ b/src/llamafactory/train/ppo/trainer.py @@ -27,6 +27,7 @@ from accelerate.utils import DistributedDataParallelKwargs from tqdm import tqdm from transformers import GenerationConfig, Trainer, TrainerControl, TrainerState from transformers.optimization import get_scheduler +from transformers.trainer_callback import CallbackHandler from transformers.trainer_pt_utils import remove_dummy_checkpoint from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR from transformers.utils import SAFE_WEIGHTS_NAME, WEIGHTS_NAME @@ -34,9 +35,9 @@ from trl import PPOConfig, PPOTrainer from trl.core import PPODecorators, logprobs_from_logits from trl.models.utils import unwrap_model_for_generation -from ...extras.callbacks import FixValueHeadModelCallback, LogCallback from ...extras.logging import get_logger from ...extras.misc import AverageMeter, count_parameters, get_current_device, get_logits_processor +from ..callbacks import FixValueHeadModelCallback, SaveProcessorCallback from ..trainer_utils import create_custom_optimzer, create_custom_scheduler from .ppo_utils import dump_layernorm, get_rewards_from_server, replace_model, restore_layernorm @@ -131,7 +132,6 @@ class CustomPPOTrainer(PPOTrainer, Trainer): self.finetuning_args = finetuning_args self.reward_model = reward_model self.current_device = get_current_device() # patch for deepspeed training - self.processor = processor self.generation_config = GenerationConfig( pad_token_id=self.tokenizer.pad_token_id, @@ -143,8 +143,9 @@ class CustomPPOTrainer(PPOTrainer, Trainer): self.control = TrainerControl() self.is_deepspeed_enabled = getattr(self.accelerator.state, "deepspeed_plugin", None) is not None self.is_fsdp_enabled = getattr(self.accelerator.state, "fsdp_plugin", None) is not None - self.log_callback, self.save_callback = callbacks[0], callbacks[1] - assert isinstance(self.log_callback, LogCallback) and isinstance(self.save_callback, FixValueHeadModelCallback) + self.callback_handler = CallbackHandler( + [callbacks], self.accelerator.unwrap_model(self.model), self.tokenizer, self.optimizer, self.lr_scheduler + ) if self.args.max_steps > 0: logger.info("max_steps is given, it will override any value given in num_train_epochs") @@ -165,11 +166,16 @@ class CustomPPOTrainer(PPOTrainer, Trainer): else: self.reward_model = self.accelerator.prepare_model(self.reward_model, evaluation_mode=True) + self.add_callback(FixValueHeadModelCallback) + + if processor is not None: + self.add_callback(SaveProcessorCallback(processor)) + if finetuning_args.use_badam: from badam import BAdamCallback, clip_grad_norm_old_version self.accelerator.clip_grad_norm_ = MethodType(clip_grad_norm_old_version, self.accelerator) - self.callback_handler.add_callback(BAdamCallback) + self.add_callback(BAdamCallback) def ppo_train(self, resume_from_checkpoint: Optional[str] = None) -> None: r""" @@ -219,7 +225,7 @@ class CustomPPOTrainer(PPOTrainer, Trainer): dataiter = iter(self.dataloader) loss_meter = AverageMeter() reward_meter = AverageMeter() - self.log_callback.on_train_begin(self.args, self.state, self.control) + self.callback_handler.on_train_begin(self.args, self.state, self.control) for step in tqdm(range(max_steps), disable=not self.is_local_process_zero()): try: @@ -257,7 +263,7 @@ class CustomPPOTrainer(PPOTrainer, Trainer): logger.warning("Failed to save stats due to unknown errors.") self.state.global_step += 1 - self.log_callback.on_step_end(self.args, self.state, self.control) + self.callback_handler.on_step_end(self.args, self.state, self.control) if self.is_local_process_zero() and (step + 1) % self.args.logging_steps == 0: logs = dict( @@ -269,7 +275,7 @@ class CustomPPOTrainer(PPOTrainer, Trainer): tqdm.write(str(logs)) logs["step"] = step self.state.log_history.append(logs) - self.log_callback.on_log(self.args, self.state, self.control) + self.callback_handler.on_log(self.args, self.state, self.control, logs) loss_meter.reset() reward_meter.reset() @@ -277,17 +283,12 @@ class CustomPPOTrainer(PPOTrainer, Trainer): self.save_model( os.path.join(self.args.output_dir, "{}-{}".format(PREFIX_CHECKPOINT_DIR, self.state.global_step)) ) - self.save_callback.on_save( - self.args, self.state, self.control, model=self.accelerator.unwrap_model(self.model) - ) + self.callback_handler.on_save(self.args, self.state, self.control) if self.control.should_epoch_stop or self.control.should_training_stop: break - self.log_callback.on_train_end(self.args, self.state, self.control) - self.save_callback.on_train_end( - self.args, self.state, self.control, model=self.accelerator.unwrap_model(self.model) - ) + self.callback_handler.on_train_end(self.args, self.state, self.control) def create_optimizer( self, @@ -505,7 +506,3 @@ class CustomPPOTrainer(PPOTrainer, Trainer): elif self.args.should_save: self._save(output_dir) - - if self.processor is not None and self.args.should_save: - output_dir = output_dir if output_dir is not None else self.args.output_dir - getattr(self.processor, "image_processor").save_pretrained(output_dir) diff --git a/src/llamafactory/train/ppo/workflow.py b/src/llamafactory/train/ppo/workflow.py index 4f4d2820..651296f3 100644 --- a/src/llamafactory/train/ppo/workflow.py +++ b/src/llamafactory/train/ppo/workflow.py @@ -20,10 +20,9 @@ from typing import TYPE_CHECKING, List, Optional from transformers import DataCollatorWithPadding from ...data import get_dataset -from ...extras.callbacks import FixValueHeadModelCallback -from ...extras.misc import fix_valuehead_checkpoint from ...extras.ploting import plot_loss from ...model import load_model, load_tokenizer +from ..callbacks import FixValueHeadModelCallback, fix_valuehead_checkpoint from ..trainer_utils import create_ref_model, create_reward_model from .trainer import CustomPPOTrainer @@ -75,6 +74,7 @@ def run_ppo( ppo_trainer.save_model() if training_args.should_save: fix_valuehead_checkpoint(model, training_args.output_dir, training_args.save_safetensors) + ppo_trainer.save_state() # must be called after save_model to have a folder if ppo_trainer.is_world_process_zero() and finetuning_args.plot_loss: plot_loss(training_args.output_dir, keys=["loss", "reward"]) diff --git a/src/llamafactory/train/pt/trainer.py b/src/llamafactory/train/pt/trainer.py index b6fb161d..e8f180a6 100644 --- a/src/llamafactory/train/pt/trainer.py +++ b/src/llamafactory/train/pt/trainer.py @@ -12,14 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os from types import MethodType -from typing import TYPE_CHECKING, Dict, Optional +from typing import TYPE_CHECKING, Optional from transformers import Trainer from ...extras.logging import get_logger -from ..trainer_utils import convert_pissa_adapter, create_custom_optimzer, create_custom_scheduler +from ..callbacks import PissaConvertCallback, SaveProcessorCallback +from ..trainer_utils import create_custom_optimzer, create_custom_scheduler if TYPE_CHECKING: @@ -42,16 +42,18 @@ class CustomTrainer(Trainer): ) -> None: super().__init__(**kwargs) self.finetuning_args = finetuning_args - self.processor = processor + + if processor is not None: + self.add_callback(SaveProcessorCallback(processor)) if finetuning_args.pissa_convert: - self.save_model(os.path.join(self.args.output_dir, "pissa_init")) + self.add_callback(PissaConvertCallback) if finetuning_args.use_badam: from badam import BAdamCallback, clip_grad_norm_old_version self.accelerator.clip_grad_norm_ = MethodType(clip_grad_norm_old_version, self.accelerator) - self.callback_handler.add_callback(BAdamCallback) + self.add_callback(BAdamCallback) def create_optimizer(self) -> "torch.optim.Optimizer": if self.optimizer is None: @@ -63,12 +65,3 @@ class CustomTrainer(Trainer): ) -> "torch.optim.lr_scheduler.LRScheduler": create_custom_scheduler(self.args, num_training_steps, optimizer) return super().create_scheduler(num_training_steps, optimizer) - - def _save(self, output_dir: Optional[str] = None, state_dict: Optional[Dict[str, "torch.Tensor"]] = None) -> None: - super()._save(output_dir, state_dict) - output_dir = output_dir if output_dir is not None else self.args.output_dir - if self.finetuning_args.pissa_convert: - convert_pissa_adapter(output_dir, state_dict, self.accelerator, self.model, self.args) - - if self.processor is not None: - getattr(self.processor, "image_processor").save_pretrained(output_dir) diff --git a/src/llamafactory/train/rm/trainer.py b/src/llamafactory/train/rm/trainer.py index 70c2e9a0..5eceead8 100644 --- a/src/llamafactory/train/rm/trainer.py +++ b/src/llamafactory/train/rm/trainer.py @@ -46,6 +46,7 @@ import torch from transformers import Trainer from ...extras.logging import get_logger +from ..callbacks import FixValueHeadModelCallback, PissaConvertCallback, SaveProcessorCallback from ..trainer_utils import create_custom_optimzer, create_custom_scheduler @@ -69,13 +70,20 @@ class PairwiseTrainer(Trainer): ) -> None: super().__init__(**kwargs) self.finetuning_args = finetuning_args - self.processor = processor self.can_return_loss = True # override property to return eval_loss + self.add_callback(FixValueHeadModelCallback) + + if processor is not None: + self.add_callback(SaveProcessorCallback(processor)) + + if finetuning_args.pissa_convert: + self.add_callback(PissaConvertCallback) + if finetuning_args.use_badam: from badam import BAdamCallback, clip_grad_norm_old_version self.accelerator.clip_grad_norm_ = MethodType(clip_grad_norm_old_version, self.accelerator) - self.callback_handler.add_callback(BAdamCallback) + self.add_callback(BAdamCallback) def create_optimizer(self) -> "torch.optim.Optimizer": if self.optimizer is None: @@ -88,12 +96,6 @@ class PairwiseTrainer(Trainer): create_custom_scheduler(self.args, num_training_steps, optimizer) return super().create_scheduler(num_training_steps, optimizer) - def _save(self, output_dir: Optional[str] = None, state_dict: Optional[Dict[str, "torch.Tensor"]] = None) -> None: - super()._save(output_dir, state_dict) - output_dir = output_dir if output_dir is not None else self.args.output_dir - if self.processor is not None: - getattr(self.processor, "image_processor").save_pretrained(output_dir) - def compute_loss( self, model: "PreTrainedModel", inputs: Dict[str, torch.Tensor], return_outputs: bool = False ) -> Union[torch.Tensor, Tuple[torch.Tensor, List[torch.Tensor]]]: @@ -164,4 +166,5 @@ class PairwiseTrainer(Trainer): res: List[str] = [] for c_score, r_score in zip(chosen_scores, rejected_scores): res.append(json.dumps({"chosen": round(float(c_score), 2), "rejected": round(float(r_score), 2)})) + writer.write("\n".join(res)) diff --git a/src/llamafactory/train/rm/workflow.py b/src/llamafactory/train/rm/workflow.py index 6f24e964..e0b32b77 100644 --- a/src/llamafactory/train/rm/workflow.py +++ b/src/llamafactory/train/rm/workflow.py @@ -40,10 +40,9 @@ from typing import TYPE_CHECKING, List, Optional from ...data import PairwiseDataCollatorWithPadding, get_dataset, split_dataset -from ...extras.callbacks import FixValueHeadModelCallback -from ...extras.misc import fix_valuehead_checkpoint from ...extras.ploting import plot_loss from ...model import load_model, load_tokenizer +from ..callbacks import fix_valuehead_checkpoint from ..trainer_utils import create_modelcard_and_push from .metric import compute_accuracy from .trainer import PairwiseTrainer @@ -77,7 +76,7 @@ def run_rm( args=training_args, finetuning_args=finetuning_args, data_collator=data_collator, - callbacks=callbacks + [FixValueHeadModelCallback()], + callbacks=callbacks, compute_metrics=compute_accuracy, **tokenizer_module, **split_dataset(dataset, data_args, training_args), @@ -89,6 +88,7 @@ def run_rm( trainer.save_model() if training_args.should_save: fix_valuehead_checkpoint(model, training_args.output_dir, training_args.save_safetensors) + trainer.log_metrics("train", train_result.metrics) trainer.save_metrics("train", train_result.metrics) trainer.save_state() diff --git a/src/llamafactory/train/sft/trainer.py b/src/llamafactory/train/sft/trainer.py index f0a86dff..06bd2b6b 100644 --- a/src/llamafactory/train/sft/trainer.py +++ b/src/llamafactory/train/sft/trainer.py @@ -26,7 +26,8 @@ from transformers import Seq2SeqTrainer from ...extras.constants import IGNORE_INDEX from ...extras.logging import get_logger -from ..trainer_utils import convert_pissa_adapter, create_custom_optimzer, create_custom_scheduler +from ..callbacks import PissaConvertCallback, SaveProcessorCallback +from ..trainer_utils import create_custom_optimzer, create_custom_scheduler if TYPE_CHECKING: @@ -50,19 +51,18 @@ class CustomSeq2SeqTrainer(Seq2SeqTrainer): ) -> None: super().__init__(**kwargs) self.finetuning_args = finetuning_args - self.processor = processor + + if processor is not None: + self.add_callback(SaveProcessorCallback(processor)) if finetuning_args.pissa_convert: - if self.is_deepspeed_enabled: - self.accelerator.deepspeed_config = self.accelerator.state.deepspeed_plugin.deepspeed_config - self.deepspeed = self._wrap_model(self.model_wrapped) - self.save_model(os.path.join(self.args.output_dir, "pissa_init")) + self.add_callback(PissaConvertCallback) if finetuning_args.use_badam: from badam import BAdamCallback, clip_grad_norm_old_version self.accelerator.clip_grad_norm_ = MethodType(clip_grad_norm_old_version, self.accelerator) - self.callback_handler.add_callback(BAdamCallback) + self.add_callback(BAdamCallback) def create_optimizer(self) -> "torch.optim.Optimizer": if self.optimizer is None: @@ -75,15 +75,6 @@ class CustomSeq2SeqTrainer(Seq2SeqTrainer): create_custom_scheduler(self.args, num_training_steps, optimizer) return super().create_scheduler(num_training_steps, optimizer) - def _save(self, output_dir: Optional[str] = None, state_dict: Optional[Dict[str, "torch.Tensor"]] = None) -> None: - super()._save(output_dir, state_dict) - output_dir = output_dir if output_dir is not None else self.args.output_dir - if self.finetuning_args.pissa_convert: - convert_pissa_adapter(output_dir, state_dict, self.accelerator, self.model, self.args) - - if self.processor is not None: - getattr(self.processor, "image_processor").save_pretrained(output_dir) - def prediction_step( self, model: "torch.nn.Module", diff --git a/src/llamafactory/train/trainer_utils.py b/src/llamafactory/train/trainer_utils.py index 21d41c36..4b581691 100644 --- a/src/llamafactory/train/trainer_utils.py +++ b/src/llamafactory/train/trainer_utils.py @@ -17,11 +17,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Tuple, Union import torch -from peft import PeftModel from transformers import Trainer from transformers.integrations import is_deepspeed_zero3_enabled from transformers.optimization import get_scheduler @@ -40,7 +38,6 @@ if is_galore_available(): if TYPE_CHECKING: - from accelerate import Accelerator from transformers import PreTrainedModel, Seq2SeqTrainingArguments from trl import AutoModelForCausalLMWithValueHead @@ -175,51 +172,6 @@ def create_reward_model( return reward_model -def convert_pissa_adapter( - output_dir: str, - state_dict: Dict[str, "torch.Tensor"], - accelerator: "Accelerator", - model: "PreTrainedModel", - training_args: "Seq2SeqTrainingArguments", -) -> None: - r""" - Converts the PiSSA adapter to a LoRA adapter. - """ - pissa_init_dir = os.path.join(training_args.output_dir, "pissa_init") - pissa_backup_dir = os.path.join(output_dir, "pissa_backup") - if output_dir == pissa_init_dir: - logger.info("Initial PiSSA adatper will be saved at: {}.".format(pissa_init_dir)) - unwrapped_model = accelerator.unwrap_model(model) - if isinstance(unwrapped_model, PeftModel): - init_lora_weights = getattr(unwrapped_model.peft_config["default"], "init_lora_weights") - setattr(unwrapped_model.peft_config["default"], "init_lora_weights", True) - unwrapped_model.save_pretrained( - output_dir, - state_dict=state_dict, - safe_serialization=training_args.save_safetensors, - ) - setattr(unwrapped_model.peft_config["default"], "init_lora_weights", init_lora_weights) - - elif output_dir == training_args.output_dir: # at the end of training - logger.info("Converted PiSSA adapter will be saved at: {}.".format(output_dir)) - unwrapped_model = accelerator.unwrap_model(model) - if isinstance(unwrapped_model, PeftModel): # backup the pissa adapter for further use - unwrapped_model.save_pretrained( - pissa_backup_dir, - state_dict=state_dict, - safe_serialization=training_args.save_safetensors, - ) - unwrapped_model.save_pretrained( - output_dir, - state_dict=state_dict, - safe_serialization=training_args.save_safetensors, - convert_pissa_to_lora=pissa_init_dir, - ) - # TODO: the model is applied pissa again unexpectedly - unwrapped_model.load_adapter(pissa_backup_dir, "default", is_trainable=True) - unwrapped_model.set_adapter("default") - - def _get_decay_parameter_names(model: "PreTrainedModel") -> List[str]: r""" Returns a list of names of parameters with weight decay. (weights in non-layernorm layers) diff --git a/src/llamafactory/train/tuner.py b/src/llamafactory/train/tuner.py index a02fff22..dc982e07 100644 --- a/src/llamafactory/train/tuner.py +++ b/src/llamafactory/train/tuner.py @@ -20,11 +20,11 @@ import torch from transformers import PreTrainedModel from ..data import get_template_and_fix_tokenizer -from ..extras.callbacks import LogCallback from ..extras.constants import V_HEAD_SAFE_WEIGHTS_NAME, V_HEAD_WEIGHTS_NAME from ..extras.logging import get_logger from ..hparams import get_infer_args, get_train_args from ..model import load_model, load_tokenizer +from .callbacks import LogCallback from .dpo import run_dpo from .kto import run_kto from .ppo import run_ppo @@ -41,8 +41,8 @@ logger = get_logger(__name__) def run_exp(args: Optional[Dict[str, Any]] = None, callbacks: List["TrainerCallback"] = []) -> None: + callbacks.append(LogCallback()) model_args, data_args, training_args, finetuning_args, generating_args = get_train_args(args) - callbacks.append(LogCallback(training_args.output_dir)) if finetuning_args.stage == "pt": run_pt(model_args, data_args, training_args, finetuning_args, callbacks) diff --git a/src/llamafactory/webui/runner.py b/src/llamafactory/webui/runner.py index f7fbac30..131d180d 100644 --- a/src/llamafactory/webui/runner.py +++ b/src/llamafactory/webui/runner.py @@ -310,6 +310,7 @@ class Runner: env = deepcopy(os.environ) env["LLAMABOARD_ENABLED"] = "1" + env["LLAMABOARD_WORKDIR"] = args["output_dir"] if args.get("deepspeed", None) is not None: env["FORCE_TORCHRUN"] = "1" diff --git a/src/llamafactory/webui/utils.py b/src/llamafactory/webui/utils.py index 14616ac4..6e5fdbe4 100644 --- a/src/llamafactory/webui/utils.py +++ b/src/llamafactory/webui/utils.py @@ -38,12 +38,15 @@ def abort_process(pid: int) -> None: r""" Aborts the processes recursively in a bottom-up way. """ - children = psutil.Process(pid).children() - if children: - for child in children: - abort_process(child.pid) + try: + children = psutil.Process(pid).children() + if children: + for child in children: + abort_process(child.pid) - os.kill(pid, signal.SIGABRT) + os.kill(pid, signal.SIGABRT) + except Exception: + pass def can_quantize(finetuning_type: str) -> "gr.Dropdown": From 024760f8664f6ff307937bc96a23d51f5b36ae10 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Fri, 28 Jun 2024 01:17:07 +0800 Subject: [PATCH 146/160] update examples Former-commit-id: 66f248b90cfa2b29c73060459b2337b78154c47b --- examples/extras/fsdp_qlora/llama3_lora_sft.yaml | 2 +- examples/extras/llama_pro/llama3_freeze_sft.yaml | 2 +- examples/extras/loraplus/llama3_lora_sft.yaml | 2 +- examples/extras/pissa/llama3_lora_sft.yaml | 2 +- examples/train_full/llama3_full_sft_ds3.yaml | 2 +- examples/train_lora/llama3_lora_dpo.yaml | 2 +- examples/train_lora/llama3_lora_kto.yaml | 2 +- examples/train_lora/llama3_lora_ppo.yaml | 2 +- examples/train_lora/llama3_lora_pretrain.yaml | 2 +- examples/train_lora/llama3_lora_reward.yaml | 4 ++-- examples/train_lora/llama3_lora_sft.yaml | 2 +- examples/train_lora/llama3_lora_sft_ds0.yaml | 2 +- examples/train_lora/llama3_lora_sft_ds3.yaml | 2 +- examples/train_lora/llava1_5_lora_sft.yaml | 2 +- examples/train_qlora/llama3_lora_sft_aqlm.yaml | 2 +- examples/train_qlora/llama3_lora_sft_awq.yaml | 2 +- examples/train_qlora/llama3_lora_sft_gptq.yaml | 2 +- examples/train_qlora/llama3_lora_sft_otfq.yaml | 2 +- 18 files changed, 19 insertions(+), 19 deletions(-) diff --git a/examples/extras/fsdp_qlora/llama3_lora_sft.yaml b/examples/extras/fsdp_qlora/llama3_lora_sft.yaml index cc773991..6c80ef58 100644 --- a/examples/extras/fsdp_qlora/llama3_lora_sft.yaml +++ b/examples/extras/fsdp_qlora/llama3_lora_sft.yaml @@ -30,7 +30,7 @@ learning_rate: 1.0e-4 num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 -fp16: true +bf16: true ddp_timeout: 180000000 ### eval diff --git a/examples/extras/llama_pro/llama3_freeze_sft.yaml b/examples/extras/llama_pro/llama3_freeze_sft.yaml index f92d6945..5e7e90bb 100644 --- a/examples/extras/llama_pro/llama3_freeze_sft.yaml +++ b/examples/extras/llama_pro/llama3_freeze_sft.yaml @@ -31,7 +31,7 @@ learning_rate: 1.0e-4 num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 -fp16: true +bf16: true ddp_timeout: 180000000 ### eval diff --git a/examples/extras/loraplus/llama3_lora_sft.yaml b/examples/extras/loraplus/llama3_lora_sft.yaml index 57383ae0..062a312b 100644 --- a/examples/extras/loraplus/llama3_lora_sft.yaml +++ b/examples/extras/loraplus/llama3_lora_sft.yaml @@ -30,7 +30,7 @@ learning_rate: 1.0e-4 num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 -fp16: true +bf16: true ddp_timeout: 180000000 ### eval diff --git a/examples/extras/pissa/llama3_lora_sft.yaml b/examples/extras/pissa/llama3_lora_sft.yaml index fd4b9f1d..05077b6c 100644 --- a/examples/extras/pissa/llama3_lora_sft.yaml +++ b/examples/extras/pissa/llama3_lora_sft.yaml @@ -32,7 +32,7 @@ learning_rate: 1.0e-4 num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 -fp16: true +bf16: true ddp_timeout: 180000000 ### eval diff --git a/examples/train_full/llama3_full_sft_ds3.yaml b/examples/train_full/llama3_full_sft_ds3.yaml index 40afd2ee..c983ad5c 100644 --- a/examples/train_full/llama3_full_sft_ds3.yaml +++ b/examples/train_full/llama3_full_sft_ds3.yaml @@ -29,7 +29,7 @@ learning_rate: 1.0e-4 num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 -fp16: true +bf16: true ddp_timeout: 180000000 ### eval diff --git a/examples/train_lora/llama3_lora_dpo.yaml b/examples/train_lora/llama3_lora_dpo.yaml index 188e5078..d87c0669 100644 --- a/examples/train_lora/llama3_lora_dpo.yaml +++ b/examples/train_lora/llama3_lora_dpo.yaml @@ -31,7 +31,7 @@ learning_rate: 5.0e-6 num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 -fp16: true +bf16: true ddp_timeout: 180000000 ### eval diff --git a/examples/train_lora/llama3_lora_kto.yaml b/examples/train_lora/llama3_lora_kto.yaml index f730c82e..08208c25 100644 --- a/examples/train_lora/llama3_lora_kto.yaml +++ b/examples/train_lora/llama3_lora_kto.yaml @@ -30,7 +30,7 @@ learning_rate: 5.0e-6 num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 -fp16: true +bf16: true ddp_timeout: 180000000 ### eval diff --git a/examples/train_lora/llama3_lora_ppo.yaml b/examples/train_lora/llama3_lora_ppo.yaml index e574014e..512e90ea 100644 --- a/examples/train_lora/llama3_lora_ppo.yaml +++ b/examples/train_lora/llama3_lora_ppo.yaml @@ -30,7 +30,7 @@ learning_rate: 1.0e-5 num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 -fp16: true +bf16: true ddp_timeout: 180000000 ### generate diff --git a/examples/train_lora/llama3_lora_pretrain.yaml b/examples/train_lora/llama3_lora_pretrain.yaml index 839b3e51..5e8aaaef 100644 --- a/examples/train_lora/llama3_lora_pretrain.yaml +++ b/examples/train_lora/llama3_lora_pretrain.yaml @@ -28,7 +28,7 @@ learning_rate: 1.0e-4 num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 -fp16: true +bf16: true ddp_timeout: 180000000 ### eval diff --git a/examples/train_lora/llama3_lora_reward.yaml b/examples/train_lora/llama3_lora_reward.yaml index 79559d19..96c32238 100644 --- a/examples/train_lora/llama3_lora_reward.yaml +++ b/examples/train_lora/llama3_lora_reward.yaml @@ -25,11 +25,11 @@ overwrite_output_dir: true ### train per_device_train_batch_size: 1 gradient_accumulation_steps: 8 -learning_rate: 1.0e-5 +learning_rate: 1.0e-4 num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 -fp16: true +bf16: true ddp_timeout: 180000000 ### eval diff --git a/examples/train_lora/llama3_lora_sft.yaml b/examples/train_lora/llama3_lora_sft.yaml index fe30c575..55a8077e 100644 --- a/examples/train_lora/llama3_lora_sft.yaml +++ b/examples/train_lora/llama3_lora_sft.yaml @@ -29,7 +29,7 @@ learning_rate: 1.0e-4 num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 -fp16: true +bf16: true ddp_timeout: 180000000 ### eval diff --git a/examples/train_lora/llama3_lora_sft_ds0.yaml b/examples/train_lora/llama3_lora_sft_ds0.yaml index 08b638e6..f1442faa 100644 --- a/examples/train_lora/llama3_lora_sft_ds0.yaml +++ b/examples/train_lora/llama3_lora_sft_ds0.yaml @@ -30,7 +30,7 @@ learning_rate: 1.0e-4 num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 -fp16: true +bf16: true ddp_timeout: 180000000 ### eval diff --git a/examples/train_lora/llama3_lora_sft_ds3.yaml b/examples/train_lora/llama3_lora_sft_ds3.yaml index b7266d61..66e7007e 100644 --- a/examples/train_lora/llama3_lora_sft_ds3.yaml +++ b/examples/train_lora/llama3_lora_sft_ds3.yaml @@ -30,7 +30,7 @@ learning_rate: 1.0e-4 num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 -fp16: true +bf16: true ddp_timeout: 180000000 ### eval diff --git a/examples/train_lora/llava1_5_lora_sft.yaml b/examples/train_lora/llava1_5_lora_sft.yaml index 55ac31fa..ec03f82c 100644 --- a/examples/train_lora/llava1_5_lora_sft.yaml +++ b/examples/train_lora/llava1_5_lora_sft.yaml @@ -30,7 +30,7 @@ learning_rate: 1.0e-4 num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 -fp16: true +bf16: true ddp_timeout: 180000000 ### eval diff --git a/examples/train_qlora/llama3_lora_sft_aqlm.yaml b/examples/train_qlora/llama3_lora_sft_aqlm.yaml index 7b6767d5..3519d46b 100644 --- a/examples/train_qlora/llama3_lora_sft_aqlm.yaml +++ b/examples/train_qlora/llama3_lora_sft_aqlm.yaml @@ -29,7 +29,7 @@ learning_rate: 1.0e-4 num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 -fp16: true +bf16: true ddp_timeout: 180000000 ### eval diff --git a/examples/train_qlora/llama3_lora_sft_awq.yaml b/examples/train_qlora/llama3_lora_sft_awq.yaml index a2a26e4b..df48669b 100644 --- a/examples/train_qlora/llama3_lora_sft_awq.yaml +++ b/examples/train_qlora/llama3_lora_sft_awq.yaml @@ -29,7 +29,7 @@ learning_rate: 1.0e-4 num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 -fp16: true +bf16: true ddp_timeout: 180000000 ### eval diff --git a/examples/train_qlora/llama3_lora_sft_gptq.yaml b/examples/train_qlora/llama3_lora_sft_gptq.yaml index ad3d854c..61fa9bb4 100644 --- a/examples/train_qlora/llama3_lora_sft_gptq.yaml +++ b/examples/train_qlora/llama3_lora_sft_gptq.yaml @@ -29,7 +29,7 @@ learning_rate: 1.0e-4 num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 -fp16: true +bf16: true ddp_timeout: 180000000 ### eval diff --git a/examples/train_qlora/llama3_lora_sft_otfq.yaml b/examples/train_qlora/llama3_lora_sft_otfq.yaml index 9c73b439..80a05768 100644 --- a/examples/train_qlora/llama3_lora_sft_otfq.yaml +++ b/examples/train_qlora/llama3_lora_sft_otfq.yaml @@ -31,7 +31,7 @@ learning_rate: 1.0e-4 num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_ratio: 0.1 -fp16: true +bf16: true ddp_timeout: 180000000 ### eval From 42e7489713ac0f464695283e05d0b86a450d8a37 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Fri, 28 Jun 2024 01:26:50 +0800 Subject: [PATCH 147/160] add Gemma2 models Former-commit-id: 8fc5a248ecfd6861cb90dac6c14fe89cdeaf8921 --- README.md | 2 +- README_zh.md | 2 +- src/llamafactory/extras/constants.py | 12 ++++++++++++ src/llamafactory/train/rm/trainer.py | 2 +- 4 files changed, 15 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 44aed7e8..27d1e98b 100644 --- a/README.md +++ b/README.md @@ -160,7 +160,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ | [Command-R](https://huggingface.co/CohereForAI) | 35B/104B | cohere | | [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek | | [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon | -| [Gemma/CodeGemma](https://huggingface.co/google) | 2B/7B | gemma | +| [Gemma/Gemma2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma | | [GLM4](https://huggingface.co/THUDM) | 9B | glm4 | | [InternLM2](https://huggingface.co/internlm) | 7B/20B | intern2 | | [LLaMA](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - | diff --git a/README_zh.md b/README_zh.md index 7e3d51ad..9fb56a4e 100644 --- a/README_zh.md +++ b/README_zh.md @@ -160,7 +160,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd | [Command-R](https://huggingface.co/CohereForAI) | 35B/104B | cohere | | [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek | | [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon | -| [Gemma/CodeGemma](https://huggingface.co/google) | 2B/7B | gemma | +| [Gemma/Gemma2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma | | [GLM4](https://huggingface.co/THUDM) | 9B | glm4 | | [InternLM2](https://huggingface.co/internlm) | 7B/20B | intern2 | | [LLaMA](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - | diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py index 866f39d4..3ea21f76 100644 --- a/src/llamafactory/extras/constants.py +++ b/src/llamafactory/extras/constants.py @@ -507,6 +507,18 @@ register_model_group( "Gemma-1.1-7B-Chat": { DownloadSource.DEFAULT: "google/gemma-1.1-7b-it", }, + "Gemma-2-9B": { + DownloadSource.DEFAULT: "google/gemma-2-9b", + }, + "Gemma-2-27B": { + DownloadSource.DEFAULT: "google/gemma-2-27b", + }, + "Gemma-2-9B-Chat": { + DownloadSource.DEFAULT: "google/gemma-2-9b-it", + }, + "Gemma-2-27B-Chat": { + DownloadSource.DEFAULT: "google/gemma-2-27b-it", + }, }, template="gemma", ) diff --git a/src/llamafactory/train/rm/trainer.py b/src/llamafactory/train/rm/trainer.py index 5eceead8..accc877d 100644 --- a/src/llamafactory/train/rm/trainer.py +++ b/src/llamafactory/train/rm/trainer.py @@ -105,7 +105,7 @@ class PairwiseTrainer(Trainer): Subclass and override to inject custom behavior. Note that the first element will be removed from the output tuple. - See: https://github.com/huggingface/transformers/blob/v4.39.1/src/transformers/trainer.py#L3777 + See: https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/trainer.py#L3842 """ # Compute rewards _, _, values = model(**inputs, output_hidden_states=True, return_dict=True) From a1437c15f76aa8b14a8fd287e22e5aa163775541 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Fri, 28 Jun 2024 01:28:59 +0800 Subject: [PATCH 148/160] fix docker flashattn Former-commit-id: 0966f5d4616a3877a6b921976dc39e8799831d36 --- docker/docker-cuda/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/docker-cuda/Dockerfile b/docker/docker-cuda/Dockerfile index e4fac987..d94aa970 100644 --- a/docker/docker-cuda/Dockerfile +++ b/docker/docker-cuda/Dockerfile @@ -4,6 +4,7 @@ FROM nvcr.io/nvidia/pytorch:24.02-py3 # Define environments ENV MAX_JOBS=4 +ENV FLASH_ATTENTION_FORCE_BUILD=TRUE # Define installation arguments ARG INSTALL_BNB=false From cfdf5a5a787191624c711b2a8adddb4aba0f6edc Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Fri, 28 Jun 2024 03:18:54 +0800 Subject: [PATCH 149/160] increase pissa_iter for stability Former-commit-id: 03f8d9b0fb10ae58e7f68508197330d616957899 --- src/llamafactory/hparams/finetuning_args.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llamafactory/hparams/finetuning_args.py b/src/llamafactory/hparams/finetuning_args.py index 28da95ad..3867c0ec 100644 --- a/src/llamafactory/hparams/finetuning_args.py +++ b/src/llamafactory/hparams/finetuning_args.py @@ -113,7 +113,7 @@ class LoraArguments: metadata={"help": "Whether or not to initialize a PiSSA adapter."}, ) pissa_iter: int = field( - default=4, + default=16, metadata={"help": "The number of iteration steps performed by FSVD in PiSSA. Use -1 to disable it."}, ) pissa_convert: bool = field( From fda2cf677b366816fb2088b969834dd09d71b57a Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Fri, 28 Jun 2024 06:00:26 +0800 Subject: [PATCH 150/160] bf16 by default, gemma2 attns Gemma2 finetuning cannot work until merging https://github.com/huggingface/transformers/pull/31674 Former-commit-id: da66c32c7be0adc28d2185b23e9f62d56acb961c --- src/llamafactory/model/model_utils/attention.py | 8 +++++++- src/llamafactory/model/patcher.py | 2 +- src/llamafactory/webui/components/train.py | 2 +- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/llamafactory/model/model_utils/attention.py b/src/llamafactory/model/model_utils/attention.py index dfd90936..9021d277 100644 --- a/src/llamafactory/model/model_utils/attention.py +++ b/src/llamafactory/model/model_utils/attention.py @@ -28,7 +28,13 @@ if TYPE_CHECKING: logger = get_logger(__name__) -def configure_attn_implementation(config: "PretrainedConfig", model_args: "ModelArguments") -> None: +def configure_attn_implementation( + config: "PretrainedConfig", model_args: "ModelArguments", is_trainable: bool +) -> None: + if getattr(config, "model_type", None) == "gemma2" and is_trainable: # gemma2 adopts soft-cap attention + logger.warning("Gemma-2 models should use eager attention in training, change `flash_attn` to disabled.") + model_args.flash_attn = "disabled" + if model_args.flash_attn == "auto": return diff --git a/src/llamafactory/model/patcher.py b/src/llamafactory/model/patcher.py index 24cd2601..4eae0bb4 100644 --- a/src/llamafactory/model/patcher.py +++ b/src/llamafactory/model/patcher.py @@ -67,7 +67,7 @@ def patch_config( use_jit_compile = os.environ.get("JIT_COMPILE", "0").lower() in ["true", "1"] torch.npu.set_compile_mode(jit_compile=use_jit_compile) - configure_attn_implementation(config, model_args) + configure_attn_implementation(config, model_args, is_trainable) configure_rope(config, model_args, is_trainable) configure_longlora(config, model_args, is_trainable) configure_quantization(config, tokenizer, model_args, init_kwargs) diff --git a/src/llamafactory/webui/components/train.py b/src/llamafactory/webui/components/train.py index f33c37ee..d4832dd3 100644 --- a/src/llamafactory/webui/components/train.py +++ b/src/llamafactory/webui/components/train.py @@ -54,7 +54,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]: num_train_epochs = gr.Textbox(value="3.0") max_grad_norm = gr.Textbox(value="1.0") max_samples = gr.Textbox(value="100000") - compute_type = gr.Dropdown(choices=["fp16", "bf16", "fp32", "pure_bf16"], value="fp16") + compute_type = gr.Dropdown(choices=["bf16", "fp16", "fp32", "pure_bf16"], value="bf16") input_elems.update({learning_rate, num_train_epochs, max_grad_norm, max_samples, compute_type}) elem_dict.update( From 6a75d5706030f7eadf5756e81eb1f1a702d06b79 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Fri, 28 Jun 2024 06:55:19 +0800 Subject: [PATCH 151/160] update readme Former-commit-id: 9f809c311af373508cb51b204ae54b047729a9dc --- README.md | 57 +++++++++++++++++++++++++--------------------------- README_zh.md | 57 +++++++++++++++++++++++++--------------------------- 2 files changed, 54 insertions(+), 60 deletions(-) diff --git a/README.md b/README.md index 27d1e98b..6c6a48d9 100644 --- a/README.md +++ b/README.md @@ -151,35 +151,32 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ ## Supported Models -| Model | Model size | Template | -| --------------------------------------------------------- | -------------------------------- | --------- | -| [Baichuan2](https://huggingface.co/baichuan-inc) | 7B/13B | baichuan2 | -| [BLOOM](https://huggingface.co/bigscience) | 560M/1.1B/1.7B/3B/7.1B/176B | - | -| [BLOOMZ](https://huggingface.co/bigscience) | 560M/1.1B/1.7B/3B/7.1B/176B | - | -| [ChatGLM3](https://huggingface.co/THUDM) | 6B | chatglm3 | -| [Command-R](https://huggingface.co/CohereForAI) | 35B/104B | cohere | -| [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek | -| [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon | -| [Gemma/Gemma2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma | -| [GLM4](https://huggingface.co/THUDM) | 9B | glm4 | -| [InternLM2](https://huggingface.co/internlm) | 7B/20B | intern2 | -| [LLaMA](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - | -| [LLaMA-2](https://huggingface.co/meta-llama) | 7B/13B/70B | llama2 | -| [LLaMA-3](https://huggingface.co/meta-llama) | 8B/70B | llama3 | -| [LLaVA-1.5](https://huggingface.co/llava-hf) | 7B/13B | vicuna | -| [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | mistral | -| [OLMo](https://huggingface.co/allenai) | 1B/7B | - | -| [PaliGemma](https://huggingface.co/google) | 3B | gemma | -| [Phi-1.5/2](https://huggingface.co/microsoft) | 1.3B/2.7B | - | -| [Phi-3](https://huggingface.co/microsoft) | 4B/7B/14B | phi | -| [Qwen](https://huggingface.co/Qwen) | 1.8B/7B/14B/72B | qwen | -| [Qwen1.5 (Code/MoE)](https://huggingface.co/Qwen) | 0.5B/1.8B/4B/7B/14B/32B/72B/110B | qwen | -| [Qwen2 (MoE)](https://huggingface.co/Qwen) | 0.5B/1.5B/7B/57B/72B | qwen | -| [StarCoder2](https://huggingface.co/bigcode) | 3B/7B/15B | - | -| [XVERSE](https://huggingface.co/xverse) | 7B/13B/65B | xverse | -| [Yi (1/1.5)](https://huggingface.co/01-ai) | 6B/9B/34B | yi | -| [Yi-VL](https://huggingface.co/01-ai) | 6B/34B | yi_vl | -| [Yuan](https://huggingface.co/IEITYuan) | 2B/51B/102B | yuan | +| Model | Model size | Template | +| ------------------------------------------------------------ | -------------------------------- | --------- | +| [Baichuan 2](https://huggingface.co/baichuan-inc) | 7B/13B | baichuan2 | +| [BLOOM/BLOOMZ](https://huggingface.co/bigscience) | 560M/1.1B/1.7B/3B/7.1B/176B | - | +| [ChatGLM3](https://huggingface.co/THUDM) | 6B | chatglm3 | +| [Command R](https://huggingface.co/CohereForAI) | 35B/104B | cohere | +| [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek | +| [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon | +| [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma | +| [GLM-4](https://huggingface.co/THUDM) | 9B | glm4 | +| [InternLM2](https://huggingface.co/internlm) | 7B/20B | intern2 | +| [Llama](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - | +| [Llama 2](https://huggingface.co/meta-llama) | 7B/13B/70B | llama2 | +| [Llama 3](https://huggingface.co/meta-llama) | 8B/70B | llama3 | +| [LLaVA-1.5](https://huggingface.co/llava-hf) | 7B/13B | vicuna | +| [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | mistral | +| [OLMo](https://huggingface.co/allenai) | 1B/7B | - | +| [PaliGemma](https://huggingface.co/google) | 3B | gemma | +| [Phi-1.5/Phi-2](https://huggingface.co/microsoft) | 1.3B/2.7B | - | +| [Phi-3](https://huggingface.co/microsoft) | 4B/7B/14B | phi | +| [Qwen/Qwen1.5/Qwen2 (Code/MoE)](https://huggingface.co/Qwen) | 0.5B/1.5B/4B/7B/14B/32B/72B/110B | qwen | +| [StarCoder 2](https://huggingface.co/bigcode) | 3B/7B/15B | - | +| [XVERSE](https://huggingface.co/xverse) | 7B/13B/65B | xverse | +| [Yi/Yi-1.5](https://huggingface.co/01-ai) | 6B/9B/34B | yi | +| [Yi-VL](https://huggingface.co/01-ai) | 6B/34B | yi_vl | +| [Yuan 2](https://huggingface.co/IEITYuan) | 2B/51B/102B | yuan | > [!NOTE] > For the "base" models, the `template` argument can be chosen from `default`, `alpaca`, `vicuna` etc. But make sure to use the **corresponding template** for the "instruct/chat" models. @@ -610,7 +607,7 @@ If you have a project that should be incorporated, please contact via email or c This repository is licensed under the [Apache-2.0 License](LICENSE). -Please follow the model licenses to use the corresponding model weights: [Baichuan2](https://huggingface.co/baichuan-inc/Baichuan2-7B-Base/blob/main/Community%20License%20for%20Baichuan%202%20Model.pdf) / [BLOOM](https://huggingface.co/spaces/bigscience/license) / [ChatGLM3](https://github.com/THUDM/ChatGLM3/blob/main/MODEL_LICENSE) / [Command-R](https://cohere.com/c4ai-cc-by-nc-license) / [DeepSeek](https://github.com/deepseek-ai/DeepSeek-LLM/blob/main/LICENSE-MODEL) / [Falcon](https://huggingface.co/tiiuae/falcon-180B/blob/main/LICENSE.txt) / [Gemma](https://ai.google.dev/gemma/terms) / [GLM4](https://huggingface.co/THUDM/glm-4-9b/blob/main/LICENSE) / [InternLM2](https://github.com/InternLM/InternLM#license) / [LLaMA](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) / [LLaMA-2 (LLaVA-1.5)](https://ai.meta.com/llama/license/) / [LLaMA-3](https://llama.meta.com/llama3/license/) / [Mistral](LICENSE) / [OLMo](LICENSE) / [Phi-1.5/2](https://huggingface.co/microsoft/phi-1_5/resolve/main/Research%20License.docx) / [Phi-3](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/blob/main/LICENSE) / [Qwen](https://github.com/QwenLM/Qwen/blob/main/Tongyi%20Qianwen%20LICENSE%20AGREEMENT) / [StarCoder2](https://huggingface.co/spaces/bigcode/bigcode-model-license-agreement) / [XVERSE](https://github.com/xverse-ai/XVERSE-13B/blob/main/MODEL_LICENSE.pdf) / [Yi](https://huggingface.co/01-ai/Yi-6B/blob/main/LICENSE) / [Yi-1.5](LICENSE) / [Yuan](https://github.com/IEIT-Yuan/Yuan-2.0/blob/main/LICENSE-Yuan) +Please follow the model licenses to use the corresponding model weights: [Baichuan 2](https://huggingface.co/baichuan-inc/Baichuan2-7B-Base/blob/main/Community%20License%20for%20Baichuan%202%20Model.pdf) / [BLOOM](https://huggingface.co/spaces/bigscience/license) / [ChatGLM3](https://github.com/THUDM/ChatGLM3/blob/main/MODEL_LICENSE) / [Command R](https://cohere.com/c4ai-cc-by-nc-license) / [DeepSeek](https://github.com/deepseek-ai/DeepSeek-LLM/blob/main/LICENSE-MODEL) / [Falcon](https://huggingface.co/tiiuae/falcon-180B/blob/main/LICENSE.txt) / [Gemma](https://ai.google.dev/gemma/terms) / [GLM-4](https://huggingface.co/THUDM/glm-4-9b/blob/main/LICENSE) / [InternLM2](https://github.com/InternLM/InternLM#license) / [Llama](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) / [Llama 2 (LLaVA-1.5)](https://ai.meta.com/llama/license/) / [Llama 3](https://llama.meta.com/llama3/license/) / [Mistral](LICENSE) / [OLMo](LICENSE) / [Phi-1.5/Phi-2](https://huggingface.co/microsoft/phi-1_5/resolve/main/Research%20License.docx) / [Phi-3](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/blob/main/LICENSE) / [Qwen](https://github.com/QwenLM/Qwen/blob/main/Tongyi%20Qianwen%20LICENSE%20AGREEMENT) / [StarCoder 2](https://huggingface.co/spaces/bigcode/bigcode-model-license-agreement) / [XVERSE](https://github.com/xverse-ai/XVERSE-13B/blob/main/MODEL_LICENSE.pdf) / [Yi](https://huggingface.co/01-ai/Yi-6B/blob/main/LICENSE) / [Yi-1.5](LICENSE) / [Yuan 2](https://github.com/IEIT-Yuan/Yuan-2.0/blob/main/LICENSE-Yuan) ## Citation diff --git a/README_zh.md b/README_zh.md index 9fb56a4e..f695646a 100644 --- a/README_zh.md +++ b/README_zh.md @@ -151,35 +151,32 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd ## 模型 -| 模型名 | 模型大小 | Template | -| --------------------------------------------------------- | -------------------------------- | --------- | -| [Baichuan2](https://huggingface.co/baichuan-inc) | 7B/13B | baichuan2 | -| [BLOOM](https://huggingface.co/bigscience) | 560M/1.1B/1.7B/3B/7.1B/176B | - | -| [BLOOMZ](https://huggingface.co/bigscience) | 560M/1.1B/1.7B/3B/7.1B/176B | - | -| [ChatGLM3](https://huggingface.co/THUDM) | 6B | chatglm3 | -| [Command-R](https://huggingface.co/CohereForAI) | 35B/104B | cohere | -| [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek | -| [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon | -| [Gemma/Gemma2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma | -| [GLM4](https://huggingface.co/THUDM) | 9B | glm4 | -| [InternLM2](https://huggingface.co/internlm) | 7B/20B | intern2 | -| [LLaMA](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - | -| [LLaMA-2](https://huggingface.co/meta-llama) | 7B/13B/70B | llama2 | -| [LLaMA-3](https://huggingface.co/meta-llama) | 8B/70B | llama3 | -| [LLaVA-1.5](https://huggingface.co/llava-hf) | 7B/13B | vicuna | -| [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | mistral | -| [OLMo](https://huggingface.co/allenai) | 1B/7B | - | -| [PaliGemma](https://huggingface.co/google) | 3B | gemma | -| [Phi-1.5/2](https://huggingface.co/microsoft) | 1.3B/2.7B | - | -| [Phi-3](https://huggingface.co/microsoft) | 4B/7B/14B | phi | -| [Qwen](https://huggingface.co/Qwen) | 1.8B/7B/14B/72B | qwen | -| [Qwen1.5 (Code/MoE)](https://huggingface.co/Qwen) | 0.5B/1.8B/4B/7B/14B/32B/72B/110B | qwen | -| [Qwen2 (MoE)](https://huggingface.co/Qwen) | 0.5B/1.5B/7B/57B/72B | qwen | -| [StarCoder2](https://huggingface.co/bigcode) | 3B/7B/15B | - | -| [XVERSE](https://huggingface.co/xverse) | 7B/13B/65B | xverse | -| [Yi (1/1.5)](https://huggingface.co/01-ai) | 6B/9B/34B | yi | -| [Yi-VL](https://huggingface.co/01-ai) | 6B/34B | yi_vl | -| [Yuan](https://huggingface.co/IEITYuan) | 2B/51B/102B | yuan | +| 模型名 | 模型大小 | Template | +| ------------------------------------------------------------ | -------------------------------- | --------- | +| [Baichuan 2](https://huggingface.co/baichuan-inc) | 7B/13B | baichuan2 | +| [BLOOM/BLOOMZ](https://huggingface.co/bigscience) | 560M/1.1B/1.7B/3B/7.1B/176B | - | +| [ChatGLM3](https://huggingface.co/THUDM) | 6B | chatglm3 | +| [Command R](https://huggingface.co/CohereForAI) | 35B/104B | cohere | +| [DeepSeek (Code/MoE)](https://huggingface.co/deepseek-ai) | 7B/16B/67B/236B | deepseek | +| [Falcon](https://huggingface.co/tiiuae) | 7B/11B/40B/180B | falcon | +| [Gemma/Gemma 2/CodeGemma](https://huggingface.co/google) | 2B/7B/9B/27B | gemma | +| [GLM-4](https://huggingface.co/THUDM) | 9B | glm4 | +| [InternLM2](https://huggingface.co/internlm) | 7B/20B | intern2 | +| [Llama](https://github.com/facebookresearch/llama) | 7B/13B/33B/65B | - | +| [Llama 2](https://huggingface.co/meta-llama) | 7B/13B/70B | llama2 | +| [Llama 3](https://huggingface.co/meta-llama) | 8B/70B | llama3 | +| [LLaVA-1.5](https://huggingface.co/llava-hf) | 7B/13B | vicuna | +| [Mistral/Mixtral](https://huggingface.co/mistralai) | 7B/8x7B/8x22B | mistral | +| [OLMo](https://huggingface.co/allenai) | 1B/7B | - | +| [PaliGemma](https://huggingface.co/google) | 3B | gemma | +| [Phi-1.5/Phi-2](https://huggingface.co/microsoft) | 1.3B/2.7B | - | +| [Phi-3](https://huggingface.co/microsoft) | 4B/7B/14B | phi | +| [Qwen/Qwen1.5/Qwen2 (Code/MoE)](https://huggingface.co/Qwen) | 0.5B/1.5B/4B/7B/14B/32B/72B/110B | qwen | +| [StarCoder 2](https://huggingface.co/bigcode) | 3B/7B/15B | - | +| [XVERSE](https://huggingface.co/xverse) | 7B/13B/65B | xverse | +| [Yi/Yi-1.5](https://huggingface.co/01-ai) | 6B/9B/34B | yi | +| [Yi-VL](https://huggingface.co/01-ai) | 6B/34B | yi_vl | +| [Yuan 2](https://huggingface.co/IEITYuan) | 2B/51B/102B | yuan | > [!NOTE] > 对于所有“基座”(Base)模型,`template` 参数可以是 `default`, `alpaca`, `vicuna` 等任意值。但“对话”(Instruct/Chat)模型请务必使用**对应的模板**。 @@ -610,7 +607,7 @@ run_name: test_run # 可选 本仓库的代码依照 [Apache-2.0](LICENSE) 协议开源。 -使用模型权重时,请遵循对应的模型协议:[Baichuan2](https://huggingface.co/baichuan-inc/Baichuan2-7B-Base/blob/main/Community%20License%20for%20Baichuan%202%20Model.pdf) / [BLOOM](https://huggingface.co/spaces/bigscience/license) / [ChatGLM3](https://github.com/THUDM/ChatGLM3/blob/main/MODEL_LICENSE) / [Command-R](https://cohere.com/c4ai-cc-by-nc-license) / [DeepSeek](https://github.com/deepseek-ai/DeepSeek-LLM/blob/main/LICENSE-MODEL) / [Falcon](https://huggingface.co/tiiuae/falcon-180B/blob/main/LICENSE.txt) / [Gemma](https://ai.google.dev/gemma/terms) / [GLM4](https://huggingface.co/THUDM/glm-4-9b/blob/main/LICENSE) / [InternLM2](https://github.com/InternLM/InternLM#license) / [LLaMA](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) / [LLaMA-2 (LLaVA-1.5)](https://ai.meta.com/llama/license/) / [LLaMA-3](https://llama.meta.com/llama3/license/) / [Mistral](LICENSE) / [OLMo](LICENSE) / [Phi-1.5/2](https://huggingface.co/microsoft/phi-1_5/resolve/main/Research%20License.docx) / [Phi-3](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/blob/main/LICENSE) / [Qwen](https://github.com/QwenLM/Qwen/blob/main/Tongyi%20Qianwen%20LICENSE%20AGREEMENT) / [StarCoder2](https://huggingface.co/spaces/bigcode/bigcode-model-license-agreement) / [XVERSE](https://github.com/xverse-ai/XVERSE-13B/blob/main/MODEL_LICENSE.pdf) / [Yi](https://huggingface.co/01-ai/Yi-6B/blob/main/LICENSE) / [Yi-1.5](LICENSE) / [Yuan](https://github.com/IEIT-Yuan/Yuan-2.0/blob/main/LICENSE-Yuan) +使用模型权重时,请遵循对应的模型协议:[Baichuan 2](https://huggingface.co/baichuan-inc/Baichuan2-7B-Base/blob/main/Community%20License%20for%20Baichuan%202%20Model.pdf) / [BLOOM](https://huggingface.co/spaces/bigscience/license) / [ChatGLM3](https://github.com/THUDM/ChatGLM3/blob/main/MODEL_LICENSE) / [Command R](https://cohere.com/c4ai-cc-by-nc-license) / [DeepSeek](https://github.com/deepseek-ai/DeepSeek-LLM/blob/main/LICENSE-MODEL) / [Falcon](https://huggingface.co/tiiuae/falcon-180B/blob/main/LICENSE.txt) / [Gemma](https://ai.google.dev/gemma/terms) / [GLM-4](https://huggingface.co/THUDM/glm-4-9b/blob/main/LICENSE) / [InternLM2](https://github.com/InternLM/InternLM#license) / [Llama](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) / [Llama 2 (LLaVA-1.5)](https://ai.meta.com/llama/license/) / [Llama 3](https://llama.meta.com/llama3/license/) / [Mistral](LICENSE) / [OLMo](LICENSE) / [Phi-1.5/Phi-2](https://huggingface.co/microsoft/phi-1_5/resolve/main/Research%20License.docx) / [Phi-3](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/blob/main/LICENSE) / [Qwen](https://github.com/QwenLM/Qwen/blob/main/Tongyi%20Qianwen%20LICENSE%20AGREEMENT) / [StarCoder 2](https://huggingface.co/spaces/bigcode/bigcode-model-license-agreement) / [XVERSE](https://github.com/xverse-ai/XVERSE-13B/blob/main/MODEL_LICENSE.pdf) / [Yi](https://huggingface.co/01-ai/Yi-6B/blob/main/LICENSE) / [Yi-1.5](LICENSE) / [Yuan 2](https://github.com/IEIT-Yuan/Yuan-2.0/blob/main/LICENSE-Yuan) ## 引用 From 3c4f8eaa55be246546e7c4ed6aedfe9dd08f6785 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sat, 29 Jun 2024 01:42:14 +0800 Subject: [PATCH 152/160] loose gemma2 attention Former-commit-id: a0b645017a2de3d58b6cbc71bd91ec96fc7a818b --- src/llamafactory/model/model_utils/attention.py | 10 ++++++++-- src/llamafactory/train/callbacks.py | 5 +---- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/llamafactory/model/model_utils/attention.py b/src/llamafactory/model/model_utils/attention.py index 9021d277..80d9d4b8 100644 --- a/src/llamafactory/model/model_utils/attention.py +++ b/src/llamafactory/model/model_utils/attention.py @@ -32,8 +32,14 @@ def configure_attn_implementation( config: "PretrainedConfig", model_args: "ModelArguments", is_trainable: bool ) -> None: if getattr(config, "model_type", None) == "gemma2" and is_trainable: # gemma2 adopts soft-cap attention - logger.warning("Gemma-2 models should use eager attention in training, change `flash_attn` to disabled.") - model_args.flash_attn = "disabled" + if model_args.flash_attn == "auto": + logger.warning("Gemma-2 models should use eager attention in training, change `flash_attn` to disabled.") + model_args.flash_attn = "disabled" + else: + logger.warning( + "Gemma-2 models should use eager attention in training, but you set `flash_attn: {}`. " + "Will proceed at your own risk.".format(model_args.flash_attn) + ) if model_args.flash_attn == "auto": return diff --git a/src/llamafactory/train/callbacks.py b/src/llamafactory/train/callbacks.py index dc9c981e..4d024278 100644 --- a/src/llamafactory/train/callbacks.py +++ b/src/llamafactory/train/callbacks.py @@ -1,7 +1,4 @@ -# Copyright 2024 HuggingFace Inc. and the LlamaFactory team. -# -# This code is inspired by the HuggingFace's transformers library. -# https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/trainer.py +# Copyright 2024 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From 889c042ecd3a8d2c05dbba80056ef807fcf89619 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sun, 30 Jun 2024 21:05:31 +0800 Subject: [PATCH 153/160] update npu docker Former-commit-id: 2f4d5174205605b8821d4fb626283e07694ecf80 --- docker/docker-npu/Dockerfile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docker/docker-npu/Dockerfile b/docker/docker-npu/Dockerfile index e413d4e3..34cf9616 100644 --- a/docker/docker-npu/Dockerfile +++ b/docker/docker-npu/Dockerfile @@ -1,6 +1,9 @@ # Use the Ubuntu 22.04 image with CANN 8.0.rc1 # More versions can be found at https://hub.docker.com/r/cosdt/cann/tags +# FROM cosdt/cann:8.0.rc1-910-ubuntu22.04 FROM cosdt/cann:8.0.rc1-910b-ubuntu22.04 +# FROM cosdt/cann:8.0.rc1-910-openeuler22.03 +# FROM cosdt/cann:8.0.rc1-910b-openeuler22.03 # Define environments ENV DEBIAN_FRONTEND=noninteractive From 188b4be64d6a57479657d47adb0039ce8b6cc7c5 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sun, 30 Jun 2024 21:28:51 +0800 Subject: [PATCH 154/160] fix #4398 #4592 Former-commit-id: 8c92d268903c00392c8bd75a731daa1f107d6202 --- README.md | 2 ++ README_zh.md | 4 +++- src/llamafactory/extras/misc.py | 24 +++++++++++++++++------- src/llamafactory/model/loader.py | 3 ++- 4 files changed, 24 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 6c6a48d9..6299650b 100644 --- a/README.md +++ b/README.md @@ -384,6 +384,8 @@ Remember to use `ASCEND_RT_VISIBLE_DEVICES` instead of `CUDA_VISIBLE_DEVICES` to If you cannot infer model on NPU devices, try setting `do_sample: false` in the configurations. +Download the pre-built Docker images: [32GB](http://mirrors.cn-central-221.ovaijisuan.com/detail/130.html) | [64GB](http://mirrors.cn-central-221.ovaijisuan.com/detail/131.html) +
### Data Preparation diff --git a/README_zh.md b/README_zh.md index f695646a..efc647a7 100644 --- a/README_zh.md +++ b/README_zh.md @@ -357,7 +357,7 @@ pip install https://github.com/jllllll/bitsandbytes-windows-webui/releases/downl
昇腾 NPU 用户指南 -在昇腾 NPU 设备上安装 LLaMA Factory 时,需要指定额外依赖项,使用 `pip install -e ".[torch-npu,metrics]"` 命令安装。此外,还需要安装 **[Ascend CANN Toolkit and Kernels](https://www.hiascend.com/developer/download/community/result?module=cann)**,安装方法请参考[安装教程](https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/80RC2alpha002/quickstart/quickstart/quickstart_18_0004.html)或使用以下命令: +在昇腾 NPU 设备上安装 LLaMA Factory 时,需要指定额外依赖项,使用 `pip install -e ".[torch-npu,metrics]"` 命令安装。此外,还需要安装 **[Ascend CANN Toolkit 与 Kernels](https://www.hiascend.com/developer/download/community/result?module=cann)**,安装方法请参考[安装教程](https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/80RC2alpha002/quickstart/quickstart/quickstart_18_0004.html)或使用以下命令: ```bash # 请替换 URL 为 CANN 版本和设备型号对应的 URL @@ -384,6 +384,8 @@ source /usr/local/Ascend/ascend-toolkit/set_env.sh 如果遇到无法正常推理的情况,请尝试设置 `do_sample: false`。 +下载预构建 Docker 镜像:[32GB](http://mirrors.cn-central-221.ovaijisuan.com/detail/130.html) | [64GB](http://mirrors.cn-central-221.ovaijisuan.com/detail/131.html) +
### 数据准备 diff --git a/src/llamafactory/extras/misc.py b/src/llamafactory/extras/misc.py index 30c287bd..20c752c5 100644 --- a/src/llamafactory/extras/misc.py +++ b/src/llamafactory/extras/misc.py @@ -20,7 +20,9 @@ import os from typing import TYPE_CHECKING, Tuple import torch +import transformers.dynamic_module_utils from transformers import InfNanRemoveLogitsProcessor, LogitsProcessorList +from transformers.dynamic_module_utils import get_relative_imports from transformers.utils import ( is_torch_bf16_gpu_available, is_torch_cuda_available, @@ -69,6 +71,9 @@ class AverageMeter: def check_dependencies() -> None: + r""" + Checks the version of the required packages. + """ if os.environ.get("DISABLE_VERSION_CHECK", "0").lower() in ["true", "1"]: logger.warning("Version checking has been disabled, may lead to unexpected behaviors.") else: @@ -79,7 +84,7 @@ def check_dependencies() -> None: require_version("trl>=0.8.6", "To fix: pip install trl>=0.8.6") -def count_parameters(model: torch.nn.Module) -> Tuple[int, int]: +def count_parameters(model: "torch.nn.Module") -> Tuple[int, int]: r""" Returns the number of trainable parameters and number of all parameters in the model. """ @@ -108,7 +113,7 @@ def count_parameters(model: torch.nn.Module) -> Tuple[int, int]: return trainable_params, all_param -def get_current_device() -> torch.device: +def get_current_device() -> "torch.device": r""" Gets the current available device. """ @@ -147,6 +152,13 @@ def get_logits_processor() -> "LogitsProcessorList": return logits_processor +def has_tokenized_data(path: "os.PathLike") -> bool: + r""" + Checks if the path has a tokenized dataset. + """ + return os.path.isdir(path) and len(os.listdir(path)) > 0 + + def infer_optim_dtype(model_dtype: "torch.dtype") -> "torch.dtype": r""" Infers the optimal dtype according to the model_dtype and device compatibility. @@ -166,11 +178,9 @@ def is_gpu_or_npu_available() -> bool: return is_torch_npu_available() or is_torch_cuda_available() -def has_tokenized_data(path: "os.PathLike") -> bool: - r""" - Checks if the path has a tokenized dataset. - """ - return os.path.isdir(path) and len(os.listdir(path)) > 0 +def skip_check_imports() -> None: + if os.environ.get("FORCE_CHECK_IMPORTS", "0").lower() not in ["true", "1"]: + transformers.dynamic_module_utils.check_imports = get_relative_imports def torch_gc() -> None: diff --git a/src/llamafactory/model/loader.py b/src/llamafactory/model/loader.py index 1261d17a..fe700d53 100644 --- a/src/llamafactory/model/loader.py +++ b/src/llamafactory/model/loader.py @@ -19,7 +19,7 @@ from transformers import AutoConfig, AutoModelForCausalLM, AutoModelForVision2Se from trl import AutoModelForCausalLMWithValueHead from ..extras.logging import get_logger -from ..extras.misc import count_parameters, try_download_model_from_ms +from ..extras.misc import count_parameters, skip_check_imports, try_download_model_from_ms from .adapter import init_adapter from .model_utils.misc import register_autoclass from .model_utils.mod import convert_pretrained_model_to_mod, load_mod_pretrained_model @@ -48,6 +48,7 @@ def _get_init_kwargs(model_args: "ModelArguments") -> Dict[str, Any]: Note: including inplace operation of model_args. """ + skip_check_imports() model_args.model_name_or_path = try_download_model_from_ms(model_args) return { "trust_remote_code": True, From b670fb57db89aa5ac66da1e6288c69a9c213dac2 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Mon, 1 Jul 2024 00:22:52 +0800 Subject: [PATCH 155/160] update readme Former-commit-id: 7998d969bf942c91cf41a189e3941f6e04c81c6f --- README.md | 10 +++++++++- README_zh.md | 10 +++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 6299650b..3d3feae5 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ [![GitHub Code License](https://img.shields.io/github/license/hiyouga/LLaMA-Factory)](LICENSE) [![GitHub last commit](https://img.shields.io/github/last-commit/hiyouga/LLaMA-Factory)](https://github.com/hiyouga/LLaMA-Factory/commits/main) [![PyPI](https://img.shields.io/pypi/v/llamafactory)](https://pypi.org/project/llamafactory/) -[![Citation](https://img.shields.io/badge/citation-63-green)](#projects-using-llama-factory) +[![Citation](https://img.shields.io/badge/citation-71-green)](#projects-using-llama-factory) [![GitHub pull request](https://img.shields.io/badge/PRs-welcome-blue)](https://github.com/hiyouga/LLaMA-Factory/pulls) [![Discord](https://dcbadge.vercel.app/api/server/rKfvV9r9FK?compact=true&style=flat)](https://discord.gg/rKfvV9r9FK) [![Twitter](https://img.shields.io/twitter/follow/llamafactory_ai)](https://twitter.com/llamafactory_ai) @@ -593,6 +593,14 @@ If you have a project that should be incorporated, please contact via email or c 1. Chen et al. Advancing Tool-Augmented Large Language Models: Integrating Insights from Errors in Inference Trees. 2024. [[arxiv]](https://arxiv.org/abs/2406.07115) 1. Zhu et al. Are Large Language Models Good Statisticians?. 2024. [[arxiv]](https://arxiv.org/abs/2406.07815) 1. Li et al. Know the Unknown: An Uncertainty-Sensitive Method for LLM Instruction Tuning. 2024. [[arxiv]](https://arxiv.org/abs/2406.10099) +1. Ding et al. IntentionQA: A Benchmark for Evaluating Purchase Intention Comprehension Abilities of Language Models in E-commerce. 2024. [[arxiv]](https://arxiv.org/abs/2406.10173) +1. He et al. COMMUNITY-CROSS-INSTRUCT: Unsupervised Instruction Generation for Aligning Large Language Models to Online Communities. 2024. [[arxiv]](https://arxiv.org/abs/2406.12074) +1. Lin et al. FVEL: Interactive Formal Verification Environment with Large Language Models via Theorem Proving. 2024. [[arxiv]](https://arxiv.org/abs/2406.14408) +1. Treutlein et al. Connecting the Dots: LLMs can Infer and Verbalize Latent Structure from Disparate Training Data. 2024. [[arxiv]](https://arxiv.org/abs/2406.14546) +1. Feng et al. SS-Bench: A Benchmark for Social Story Generation and Evaluation. 2024. [[arxiv]](https://arxiv.org/abs/2406.15695) +1. Feng et al. Self-Constructed Context Decompilation with Fined-grained Alignment Enhancement. 2024. [[arxiv]](https://arxiv.org/abs/2406.17233) +1. Liu et al. Large Language Models for Cuffless Blood Pressure Measurement From Wearable Biosignals. 2024. [[arxiv]](https://arxiv.org/abs/2406.18069) +1. Iyer et al. Exploring Very Low-Resource Translation with LLMs: The University of Edinburgh’s Submission to AmericasNLP 2024 Translation Task. AmericasNLP 2024. [[paper]](https://aclanthology.org/2024.americasnlp-1.25) 1. **[StarWhisper](https://github.com/Yu-Yang-Li/StarWhisper)**: A large language model for Astronomy, based on ChatGLM2-6B and Qwen-14B. 1. **[DISC-LawLLM](https://github.com/FudanDISC/DISC-LawLLM)**: A large language model specialized in Chinese legal domain, based on Baichuan-13B, is capable of retrieving and reasoning on legal knowledge. 1. **[Sunsimiao](https://github.com/X-D-Lab/Sunsimiao)**: A large language model specialized in Chinese medical domain, based on Baichuan-7B and ChatGLM-6B. diff --git a/README_zh.md b/README_zh.md index efc647a7..cb5a42e4 100644 --- a/README_zh.md +++ b/README_zh.md @@ -4,7 +4,7 @@ [![GitHub Code License](https://img.shields.io/github/license/hiyouga/LLaMA-Factory)](LICENSE) [![GitHub last commit](https://img.shields.io/github/last-commit/hiyouga/LLaMA-Factory)](https://github.com/hiyouga/LLaMA-Factory/commits/main) [![PyPI](https://img.shields.io/pypi/v/llamafactory)](https://pypi.org/project/llamafactory/) -[![Citation](https://img.shields.io/badge/citation-63-green)](#使用了-llama-factory-的项目) +[![Citation](https://img.shields.io/badge/citation-71-green)](#使用了-llama-factory-的项目) [![GitHub pull request](https://img.shields.io/badge/PRs-welcome-blue)](https://github.com/hiyouga/LLaMA-Factory/pulls) [![Discord](https://dcbadge.vercel.app/api/server/rKfvV9r9FK?compact=true&style=flat)](https://discord.gg/rKfvV9r9FK) [![Twitter](https://img.shields.io/twitter/follow/llamafactory_ai)](https://twitter.com/llamafactory_ai) @@ -593,6 +593,14 @@ run_name: test_run # 可选 1. Chen et al. Advancing Tool-Augmented Large Language Models: Integrating Insights from Errors in Inference Trees. 2024. [[arxiv]](https://arxiv.org/abs/2406.07115) 1. Zhu et al. Are Large Language Models Good Statisticians?. 2024. [[arxiv]](https://arxiv.org/abs/2406.07815) 1. Li et al. Know the Unknown: An Uncertainty-Sensitive Method for LLM Instruction Tuning. 2024. [[arxiv]](https://arxiv.org/abs/2406.10099) +1. Ding et al. IntentionQA: A Benchmark for Evaluating Purchase Intention Comprehension Abilities of Language Models in E-commerce. 2024. [[arxiv]](https://arxiv.org/abs/2406.10173) +1. He et al. COMMUNITY-CROSS-INSTRUCT: Unsupervised Instruction Generation for Aligning Large Language Models to Online Communities. 2024. [[arxiv]](https://arxiv.org/abs/2406.12074) +1. Lin et al. FVEL: Interactive Formal Verification Environment with Large Language Models via Theorem Proving. 2024. [[arxiv]](https://arxiv.org/abs/2406.14408) +1. Treutlein et al. Connecting the Dots: LLMs can Infer and Verbalize Latent Structure from Disparate Training Data. 2024. [[arxiv]](https://arxiv.org/abs/2406.14546) +1. Feng et al. SS-Bench: A Benchmark for Social Story Generation and Evaluation. 2024. [[arxiv]](https://arxiv.org/abs/2406.15695) +1. Feng et al. Self-Constructed Context Decompilation with Fined-grained Alignment Enhancement. 2024. [[arxiv]](https://arxiv.org/abs/2406.17233) +1. Liu et al. Large Language Models for Cuffless Blood Pressure Measurement From Wearable Biosignals. 2024. [[arxiv]](https://arxiv.org/abs/2406.18069) +1. Iyer et al. Exploring Very Low-Resource Translation with LLMs: The University of Edinburgh’s Submission to AmericasNLP 2024 Translation Task. AmericasNLP 2024. [[paper]](https://aclanthology.org/2024.americasnlp-1.25) 1. **[StarWhisper](https://github.com/Yu-Yang-Li/StarWhisper)**: 天文大模型 StarWhisper,基于 ChatGLM2-6B 和 Qwen-14B 在天文数据上微调而得。 1. **[DISC-LawLLM](https://github.com/FudanDISC/DISC-LawLLM)**: 中文法律领域大模型 DISC-LawLLM,基于 Baichuan-13B 微调而得,具有法律推理和知识检索能力。 1. **[Sunsimiao](https://github.com/X-D-Lab/Sunsimiao)**: 孙思邈中文医疗大模型 Sumsimiao,基于 Baichuan-7B 和 ChatGLM-6B 在中文医疗数据上微调而得。 From 67d2eb6b2a1169c69a49e6511e4a6ac7d0b0566b Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Mon, 1 Jul 2024 01:19:27 +0800 Subject: [PATCH 156/160] fix #4402 #4617 Deprecate reserved_label_len arg Former-commit-id: 4b6568984c0be4b31e7aa91b7c0d52b7f7b12b0b --- src/llamafactory/data/data_utils.py | 12 +- src/llamafactory/data/formatter.py | 116 ++------------- src/llamafactory/data/processors/feedback.py | 16 +- src/llamafactory/data/processors/pairwise.py | 17 ++- .../data/processors/processor_utils.py | 15 +- .../data/processors/supervised.py | 15 +- .../data/processors/unsupervised.py | 9 +- src/llamafactory/data/template.py | 114 +++++--------- src/llamafactory/data/tool_utils.py | 140 ++++++++++++++++++ src/llamafactory/hparams/data_args.py | 7 - tests/data/test_formatter.py | 4 +- tests/data/test_processor.py | 32 ++++ tests/data/test_template.py | 55 ++++++- 13 files changed, 329 insertions(+), 223 deletions(-) create mode 100644 src/llamafactory/data/tool_utils.py create mode 100644 tests/data/test_processor.py diff --git a/src/llamafactory/data/data_utils.py b/src/llamafactory/data/data_utils.py index cc9761b1..76ded47e 100644 --- a/src/llamafactory/data/data_utils.py +++ b/src/llamafactory/data/data_utils.py @@ -13,7 +13,7 @@ # limitations under the License. from enum import Enum, unique -from typing import TYPE_CHECKING, Dict, List, Tuple, Union +from typing import TYPE_CHECKING, Dict, List, Sequence, Set, Union from datasets import concatenate_datasets, interleave_datasets @@ -30,6 +30,9 @@ if TYPE_CHECKING: logger = get_logger(__name__) +SLOTS = Sequence[Union[str, Set[str], Dict[str, str]]] + + @unique class Role(str, Enum): USER = "user" @@ -39,13 +42,6 @@ class Role(str, Enum): OBSERVATION = "observation" -def infer_max_len(source_len: int, target_len: int, max_len: int, reserved_label_len: int) -> Tuple[int, int]: - max_target_len = int(max_len * (target_len / (source_len + target_len))) - max_target_len = max(max_target_len, reserved_label_len) - max_source_len = max_len - min(max_target_len, target_len) - return max_source_len, max_target_len - - def merge_dataset( all_datasets: List[Union["Dataset", "IterableDataset"]], data_args: "DataArguments", diff --git a/src/llamafactory/data/formatter.py b/src/llamafactory/data/formatter.py index 88ebf682..c1653a76 100644 --- a/src/llamafactory/data/formatter.py +++ b/src/llamafactory/data/formatter.py @@ -16,97 +16,10 @@ import json import re from abc import ABC, abstractmethod from dataclasses import dataclass, field -from typing import Any, Dict, List, Literal, Optional, Sequence, Set, Tuple, Union +from typing import List, Literal, Optional, Tuple, Union - -SLOTS = Sequence[Union[str, Set[str], Dict[str, str]]] - - -DEFAULT_TOOL_PROMPT = ( - "You have access to the following tools:\n{tool_text}" - "Use the following format if using a tool:\n" - "```\n" - "Action: tool name (one of [{tool_names}]).\n" - "Action Input: the input to the tool, in a JSON format representing the kwargs " - """(e.g. ```{{"input": "hello world", "num_beams": 5}}```).\n""" - "```\n" -) - - -GLM4_TOOL_PROMPT = ( - "你是一个名为 ChatGLM 的人工智能助手。你是基于智谱AI训练的语言模型 GLM-4 模型开发的," - "你的任务是针对用户的问题和要求提供适当的答复和支持。# 可用工具{tool_text}" -) - - -def default_tool_formatter(tools: List[Dict[str, Any]]) -> str: - tool_text = "" - tool_names = [] - for tool in tools: - param_text = "" - for name, param in tool["parameters"]["properties"].items(): - required = ", required" if name in tool["parameters"].get("required", []) else "" - enum = ", should be one of [{}]".format(", ".join(param["enum"])) if param.get("enum", None) else "" - items = ( - ", where each item should be {}".format(param["items"].get("type", "")) if param.get("items") else "" - ) - param_text += " - {name} ({type}{required}): {desc}{enum}{items}\n".format( - name=name, - type=param.get("type", ""), - required=required, - desc=param.get("description", ""), - enum=enum, - items=items, - ) - - tool_text += "> Tool Name: {name}\nTool Description: {desc}\nTool Args:\n{args}\n".format( - name=tool["name"], desc=tool.get("description", ""), args=param_text - ) - tool_names.append(tool["name"]) - - return DEFAULT_TOOL_PROMPT.format(tool_text=tool_text, tool_names=", ".join(tool_names)) - - -def default_tool_extractor(content: str) -> Union[str, List[Tuple[str, str]]]: - regex = re.compile(r"Action:\s*([a-zA-Z0-9_]+)\s*Action Input:\s*(.+?)(?=\s*Action:|\s*$)", re.DOTALL) - action_match: List[Tuple[str, str]] = re.findall(regex, content) - if not action_match: - return content - - results = [] - for match in action_match: - tool_name = match[0].strip() - tool_input = match[1].strip().strip('"').strip("```") - try: - arguments = json.loads(tool_input) - results.append((tool_name, json.dumps(arguments, ensure_ascii=False))) - except json.JSONDecodeError: - return content - - return results - - -def glm4_tool_formatter(tools: List[Dict[str, Any]]) -> str: - tool_text = "" - for tool in tools: - tool_text += "\n\n## {name}\n\n{body}\n在调用上述函数时,请使用 Json 格式表示调用的参数。".format( - name=tool["name"], body=json.dumps(tool, indent=4, ensure_ascii=False) - ) - - return GLM4_TOOL_PROMPT.format(tool_text=tool_text) - - -def glm4_tool_extractor(content: str) -> Union[str, List[Tuple[str, str]]]: - if "\n" not in content: - return content - - tool_name, tool_input = content.split("\n", maxsplit=1) - try: - arguments = json.loads(tool_input) - except json.JSONDecodeError: - return content - - return [(tool_name, json.dumps(arguments, ensure_ascii=False))] +from .data_utils import SLOTS +from .tool_utils import DefaultToolUtils, GLM4ToolUtils @dataclass @@ -168,15 +81,12 @@ class StringFormatter(Formatter): @dataclass class FunctionFormatter(Formatter): def __post_init__(self): - has_name, has_args = False, False - for slot in filter(lambda s: isinstance(s, str), self.slots): - if "{{name}}" in slot: - has_name = True - if "{{arguments}}" in slot: - has_args = True - - if not has_name or not has_args: - raise ValueError("Name and arguments placeholders are required in the function formatter.") + if self.tool_format == "default": + self.slots = DefaultToolUtils.get_function_slots() + self.slots + elif self.tool_format == "glm4": + self.slots = GLM4ToolUtils.get_function_slots() + self.slots + else: + raise NotImplementedError("Tool format {} was not found.".format(self.tool_format)) def apply(self, **kwargs) -> SLOTS: content = kwargs.pop("content") @@ -210,11 +120,11 @@ class FunctionFormatter(Formatter): class ToolFormatter(Formatter): def __post_init__(self): if self.tool_format == "default": - self._tool_formatter = default_tool_formatter - self._tool_extractor = default_tool_extractor + self._tool_formatter = DefaultToolUtils.tool_formatter + self._tool_extractor = DefaultToolUtils.tool_extractor elif self.tool_format == "glm4": - self._tool_formatter = glm4_tool_formatter - self._tool_extractor = glm4_tool_extractor + self._tool_formatter = GLM4ToolUtils.tool_formatter + self._tool_extractor = GLM4ToolUtils.tool_extractor else: raise NotImplementedError("Tool format {} was not found.".format(self.tool_format)) diff --git a/src/llamafactory/data/processors/feedback.py b/src/llamafactory/data/processors/feedback.py index 219ab353..7ba05e23 100644 --- a/src/llamafactory/data/processors/feedback.py +++ b/src/llamafactory/data/processors/feedback.py @@ -16,7 +16,7 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Tuple from ...extras.constants import IGNORE_INDEX from ...extras.logging import get_logger -from .processor_utils import get_paligemma_token_type_ids, get_pixel_values +from .processor_utils import get_paligemma_token_type_ids, get_pixel_values, infer_seqlen if TYPE_CHECKING: @@ -55,12 +55,8 @@ def _encode_feedback_example( else: kl_messages = prompt + [kl_response[1]] - prompt_ids, response_ids = template.encode_oneturn( - tokenizer, messages, system, tools, data_args.cutoff_len, data_args.reserved_label_len - ) - _, kl_response_ids = template.encode_oneturn( - tokenizer, kl_messages, system, tools, data_args.cutoff_len, data_args.reserved_label_len - ) + prompt_ids, response_ids = template.encode_oneturn(tokenizer, messages, system, tools) + _, kl_response_ids = template.encode_oneturn(tokenizer, kl_messages, system, tools) if template.efficient_eos: response_ids += [tokenizer.eos_token_id] @@ -70,6 +66,12 @@ def _encode_feedback_example( image_token_id = tokenizer.convert_tokens_to_ids(template.image_token) prompt_ids = [image_token_id] * getattr(processor, "image_seq_length") + prompt_ids + # do not consider the kl_response + source_len, target_len = infer_seqlen(len(prompt_ids), len(response_ids), data_args.cutoff_len) + prompt_ids = prompt_ids[:source_len] + response_ids = response_ids[:target_len] + kl_response_ids = kl_response_ids[:target_len] + input_ids = prompt_ids + response_ids labels = [IGNORE_INDEX] * len(prompt_ids) + response_ids kl_input_ids = prompt_ids + kl_response_ids diff --git a/src/llamafactory/data/processors/pairwise.py b/src/llamafactory/data/processors/pairwise.py index b2939348..c6001e6e 100644 --- a/src/llamafactory/data/processors/pairwise.py +++ b/src/llamafactory/data/processors/pairwise.py @@ -16,7 +16,7 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Tuple from ...extras.constants import IGNORE_INDEX from ...extras.logging import get_logger -from .processor_utils import get_paligemma_token_type_ids, get_pixel_values +from .processor_utils import get_paligemma_token_type_ids, get_pixel_values, infer_seqlen if TYPE_CHECKING: @@ -44,12 +44,8 @@ def _encode_pairwise_example( chosen_messages = prompt + [response[0]] rejected_messages = prompt + [response[1]] - prompt_ids, chosen_ids = template.encode_oneturn( - tokenizer, chosen_messages, system, tools, data_args.cutoff_len, data_args.reserved_label_len - ) - _, rejected_ids = template.encode_oneturn( - tokenizer, rejected_messages, system, tools, data_args.cutoff_len, data_args.reserved_label_len - ) + prompt_ids, chosen_ids = template.encode_oneturn(tokenizer, chosen_messages, system, tools) + _, rejected_ids = template.encode_oneturn(tokenizer, rejected_messages, system, tools) if template.efficient_eos: chosen_ids += [tokenizer.eos_token_id] @@ -59,6 +55,13 @@ def _encode_pairwise_example( image_token_id = tokenizer.convert_tokens_to_ids(template.image_token) prompt_ids = [image_token_id] * getattr(processor, "image_seq_length") + prompt_ids + source_len, target_len = infer_seqlen( + len(prompt_ids), max(len(chosen_ids), len(rejected_ids)), data_args.cutoff_len + ) # consider the response is more important + prompt_ids = prompt_ids[:source_len] + chosen_ids = chosen_ids[:target_len] + rejected_ids = rejected_ids[:target_len] + chosen_input_ids = prompt_ids + chosen_ids chosen_labels = [IGNORE_INDEX] * len(prompt_ids) + chosen_ids rejected_input_ids = prompt_ids + rejected_ids diff --git a/src/llamafactory/data/processors/processor_utils.py b/src/llamafactory/data/processors/processor_utils.py index 93df0cd5..455908ae 100644 --- a/src/llamafactory/data/processors/processor_utils.py +++ b/src/llamafactory/data/processors/processor_utils.py @@ -13,7 +13,7 @@ # limitations under the License. import bisect -from typing import TYPE_CHECKING, List, Sequence +from typing import TYPE_CHECKING, List, Sequence, Tuple from ...extras.packages import is_pillow_available @@ -76,3 +76,16 @@ def get_paligemma_token_type_ids(input_len: int, processor: "ProcessorMixin") -> """ image_seq_length = getattr(processor, "image_seq_length") return [0] * image_seq_length + [1] * (input_len - image_seq_length) + + +def infer_seqlen(source_len: int, target_len: int, cutoff_len: int) -> Tuple[int, int]: + if target_len * 2 < cutoff_len: # truncate source + max_target_len = cutoff_len + elif source_len * 2 < cutoff_len: # truncate target + max_target_len = cutoff_len - source_len + else: # truncate both + max_target_len = int(cutoff_len * (target_len / (source_len + target_len))) + + new_target_len = min(max_target_len, target_len) + new_source_len = max(cutoff_len - new_target_len, 0) + return new_source_len, new_target_len diff --git a/src/llamafactory/data/processors/supervised.py b/src/llamafactory/data/processors/supervised.py index eb5ffb1a..b283542d 100644 --- a/src/llamafactory/data/processors/supervised.py +++ b/src/llamafactory/data/processors/supervised.py @@ -17,7 +17,7 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Tuple from ...extras.constants import IGNORE_INDEX from ...extras.logging import get_logger -from .processor_utils import get_paligemma_token_type_ids, get_pixel_values, greedy_knapsack +from .processor_utils import get_paligemma_token_type_ids, get_pixel_values, greedy_knapsack, infer_seqlen if TYPE_CHECKING: @@ -51,10 +51,17 @@ def _encode_supervised_example( input_ids += [image_token_id] * getattr(processor, "image_seq_length") labels += [IGNORE_INDEX] * getattr(processor, "image_seq_length") - encoded_pairs = template.encode_multiturn( - tokenizer, messages, system, tools, data_args.cutoff_len, data_args.reserved_label_len - ) + encoded_pairs = template.encode_multiturn(tokenizer, messages, system, tools) + total_length = 1 if template.efficient_eos else 0 for turn_idx, (source_ids, target_ids) in enumerate(encoded_pairs): + if total_length >= data_args.cutoff_len: + break + + source_len, target_len = infer_seqlen(len(source_ids), len(target_ids), data_args.cutoff_len - total_length) + source_ids = source_ids[:source_len] + target_ids = target_ids[:target_len] + total_length += source_len + target_len + if data_args.train_on_prompt: source_mask = source_ids elif turn_idx != 0 and template.efficient_eos: diff --git a/src/llamafactory/data/processors/unsupervised.py b/src/llamafactory/data/processors/unsupervised.py index 75ad4d51..b3fc85c9 100644 --- a/src/llamafactory/data/processors/unsupervised.py +++ b/src/llamafactory/data/processors/unsupervised.py @@ -16,7 +16,7 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Tuple from ...extras.logging import get_logger from ..data_utils import Role -from .processor_utils import get_paligemma_token_type_ids, get_pixel_values +from .processor_utils import get_paligemma_token_type_ids, get_pixel_values, infer_seqlen if TYPE_CHECKING: @@ -47,9 +47,7 @@ def _encode_unsupervised_example( else: messages = prompt + [{"role": Role.ASSISTANT.value, "content": ""}] - input_ids, labels = template.encode_oneturn( - tokenizer, messages, system, tools, data_args.cutoff_len, data_args.reserved_label_len - ) + input_ids, labels = template.encode_oneturn(tokenizer, messages, system, tools) if template.efficient_eos: labels += [tokenizer.eos_token_id] @@ -57,6 +55,9 @@ def _encode_unsupervised_example( image_token_id = tokenizer.convert_tokens_to_ids(template.image_token) input_ids = [image_token_id] * getattr(processor, "image_seq_length") + input_ids + source_len, target_len = infer_seqlen(len(input_ids), len(labels), data_args.cutoff_len) + input_ids = input_ids[:source_len] + labels = labels[:target_len] return input_ids, labels diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py index 53f16df4..aefd5195 100644 --- a/src/llamafactory/data/template.py +++ b/src/llamafactory/data/template.py @@ -16,7 +16,7 @@ from dataclasses import dataclass from typing import TYPE_CHECKING, Dict, List, Optional, Sequence, Tuple, Union from ..extras.logging import get_logger -from .data_utils import Role, infer_max_len +from .data_utils import Role from .formatter import EmptyFormatter, FunctionFormatter, StringFormatter, ToolFormatter @@ -48,36 +48,33 @@ class Template: def encode_oneturn( self, tokenizer: "PreTrainedTokenizer", - messages: List[Dict[str, str]], + messages: Sequence[Dict[str, str]], system: Optional[str] = None, tools: Optional[str] = None, - cutoff_len: int = 1_000_000, - reserved_label_len: int = 1, ) -> Tuple[List[int], List[int]]: r""" Returns a single pair of token ids representing prompt and response respectively. """ - encoded_pairs = self._encode(tokenizer, messages, system, tools, cutoff_len, reserved_label_len) + encoded_messages = self._encode(tokenizer, messages, system, tools) prompt_ids = [] - for query_ids, resp_ids in encoded_pairs[:-1]: - prompt_ids += query_ids + resp_ids - prompt_ids = prompt_ids + encoded_pairs[-1][0] - answer_ids = encoded_pairs[-1][1] + for encoded_ids in encoded_messages[:-1]: + prompt_ids += encoded_ids + + answer_ids = encoded_messages[-1] return prompt_ids, answer_ids def encode_multiturn( self, tokenizer: "PreTrainedTokenizer", - messages: List[Dict[str, str]], + messages: Sequence[Dict[str, str]], system: Optional[str] = None, tools: Optional[str] = None, - cutoff_len: int = 1_000_000, - reserved_label_len: int = 1, - ) -> Sequence[Tuple[List[int], List[int]]]: + ) -> List[Tuple[List[int], List[int]]]: r""" Returns multiple pairs of token ids representing prompts and responses respectively. """ - return self._encode(tokenizer, messages, system, tools, cutoff_len, reserved_label_len) + encoded_messages = self._encode(tokenizer, messages, system, tools) + return [(encoded_messages[i], encoded_messages[i + 1]) for i in range(0, len(encoded_messages), 2)] def extract_tool(self, content: str) -> Union[str, List[Tuple[str, str]]]: r""" @@ -88,16 +85,14 @@ class Template: def _encode( self, tokenizer: "PreTrainedTokenizer", - messages: List[Dict[str, str]], + messages: Sequence[Dict[str, str]], system: Optional[str], tools: Optional[str], - cutoff_len: int, - reserved_label_len: int, - ) -> Sequence[Tuple[List[int], List[int]]]: + ) -> List[List[int]]: r""" Encodes formatted inputs to pairs of token ids. - Turn 0: system + query resp - Turn t: sep + query resp + Turn 0: prefix + system + query resp + Turn t: sep + query resp """ system = system or self.default_system encoded_messages = [] @@ -106,10 +101,9 @@ class Template: if i == 0: elements += self.format_prefix.apply() - - if i == 0 and (system or tools): - tool_text = self.format_tools.apply(content=tools)[0] if tools else "" - elements += self.format_system.apply(content=(system + tool_text)) + if system or tools: + tool_text = self.format_tools.apply(content=tools)[0] if tools else "" + elements += self.format_system.apply(content=(system + tool_text)) if i > 0 and i % 2 == 0: elements += self.format_separator.apply() @@ -127,11 +121,9 @@ class Template: encoded_messages.append(self._convert_elements_to_ids(tokenizer, elements)) - return self._make_pairs(encoded_messages, cutoff_len, reserved_label_len) + return encoded_messages - def _convert_elements_to_ids( - self, tokenizer: "PreTrainedTokenizer", elements: List[Union[str, Dict[str, str]]] - ) -> List[int]: + def _convert_elements_to_ids(self, tokenizer: "PreTrainedTokenizer", elements: "SLOTS") -> List[int]: r""" Converts elements to token ids. """ @@ -152,60 +144,32 @@ class Template: return token_ids - def _make_pairs( - self, - encoded_messages: Sequence[List[int]], - cutoff_len: int, - reserved_label_len: int, - ) -> Sequence[Tuple[List[int], List[int]]]: - encoded_pairs = [] - total_length = 0 - for i in range(0, len(encoded_messages), 2): - if total_length >= cutoff_len: - break - - max_source_len, max_target_len = infer_max_len( - source_len=len(encoded_messages[i]), - target_len=len(encoded_messages[i + 1]), - max_len=(cutoff_len - total_length), - reserved_label_len=reserved_label_len, - ) - source_ids = encoded_messages[i][:max_source_len] - target_ids = encoded_messages[i + 1][:max_target_len] - total_length += len(source_ids) + len(target_ids) - encoded_pairs.append((source_ids, target_ids)) - - return encoded_pairs - @dataclass class Llama2Template(Template): def _encode( self, tokenizer: "PreTrainedTokenizer", - messages: List[Dict[str, str]], + messages: Sequence[Dict[str, str]], system: str, tools: str, - cutoff_len: int, - reserved_label_len: int, - ) -> Sequence[Tuple[List[int], List[int]]]: + ) -> List[List[int]]: r""" Encodes formatted inputs to pairs of token ids. - Turn 0: system + query resp - Turn t: sep + query resp + Turn 0: prefix + system + query resp + Turn t: sep + query resp """ system = system or self.default_system encoded_messages = [] for i, message in enumerate(messages): elements = [] + system_text = "" if i == 0: elements += self.format_prefix.apply() - - system_text = "" - if i == 0 and (system or tools): - tool_text = self.format_tools.apply(content=tools)[0] if tools else "" - system_text = self.format_system.apply(content=(system + tool_text))[0] + if system or tools: + tool_text = self.format_tools.apply(content=tools)[0] if tools else "" + system_text = self.format_system.apply(content=(system + tool_text))[0] if i > 0 and i % 2 == 0: elements += self.format_separator.apply() @@ -223,7 +187,7 @@ class Llama2Template(Template): encoded_messages.append(self._convert_elements_to_ids(tokenizer, elements)) - return self._make_pairs(encoded_messages, cutoff_len, reserved_label_len) + return encoded_messages TEMPLATES: Dict[str, Template] = {} @@ -240,7 +204,7 @@ def _register_template( format_separator: Optional["Formatter"] = None, format_prefix: Optional["Formatter"] = None, default_system: str = "", - stop_words: List[str] = [], + stop_words: Sequence[str] = [], image_token: str = "", efficient_eos: bool = False, replace_eos: bool = False, @@ -275,9 +239,7 @@ def _register_template( template_class = Llama2Template if name.startswith("llama2") else Template default_user_formatter = StringFormatter(slots=["{{content}}"]) default_assistant_formatter = StringFormatter(slots=["{{content}}"] + eos_slots) - default_function_formatter = FunctionFormatter( - slots=["Action: {{name}}\nAction Input: {{arguments}}\n"] + eos_slots - ) + default_function_formatter = FunctionFormatter(slots=eos_slots, tool_format="default") default_tool_formatter = ToolFormatter(tool_format="default") default_separator_formatter = EmptyFormatter() default_prefix_formatter = EmptyFormatter() @@ -390,7 +352,9 @@ def get_template_and_fix_tokenizer( if tool_format is not None: logger.info("Using tool format: {}.".format(tool_format)) + eos_slots = [] if template.efficient_eos else [{"eos_token"}] template.format_tools = ToolFormatter(tool_format=tool_format) + template.format_function = FunctionFormatter(slots=eos_slots, tool_format=tool_format) stop_words = template.stop_words if template.replace_eos: @@ -506,10 +470,11 @@ _register_template( format_user=StringFormatter(slots=[{"token": "<|user|>"}, "\n", "{{content}}", {"token": "<|assistant|>"}]), format_assistant=StringFormatter(slots=["\n", "{{content}}"]), format_system=StringFormatter(slots=[{"token": "<|system|>"}, "\n", "{{content}}"]), - format_function=FunctionFormatter(slots=["{{name}}\n{{arguments}}"]), + format_function=FunctionFormatter(slots=[], tool_format="glm4"), format_observation=StringFormatter( slots=[{"token": "<|observation|>"}, "\n", "{{content}}", {"token": "<|assistant|>"}] ), + format_tools=ToolFormatter(tool_format="glm4"), format_prefix=EmptyFormatter(slots=[{"token": "[gMASK]"}, {"token": "sop"}]), stop_words=["<|user|>", "<|observation|>"], efficient_eos=True, @@ -603,16 +568,15 @@ _register_template( _register_template( name="deepseekcoder", format_user=StringFormatter(slots=["### Instruction:\n{{content}}\n### Response:"]), - format_assistant=StringFormatter(slots=["\n", "{{content}}"]), - format_separator=EmptyFormatter(slots=["\n<|EOT|>\n"]), + format_assistant=StringFormatter(slots=["\n{{content}}\n"]), + format_separator=EmptyFormatter(slots=["\n"]), + format_prefix=EmptyFormatter(slots=[{"bos_token"}]), default_system=( "You are an AI programming assistant, utilizing the Deepseek Coder model, " "developed by Deepseek Company, and you only answer questions related to computer science. " "For politically sensitive questions, security and privacy issues, " "and other non-computer science questions, you will refuse to answer\n" ), - stop_words=["<|EOT|>"], - efficient_eos=True, ) @@ -662,7 +626,7 @@ _register_template( format_user=StringFormatter(slots=["<|user|>\n{{content}}<|assistant|>"]), format_assistant=StringFormatter(slots=["\n{{content}}"]), format_system=StringFormatter(slots=["<|system|>\n{{content}}"]), - format_function=FunctionFormatter(slots=["{{name}}\n{{arguments}}"]), + format_function=FunctionFormatter(slots=[], tool_format="glm4"), format_observation=StringFormatter(slots=["<|observation|>\n{{content}}<|assistant|>"]), format_tools=ToolFormatter(tool_format="glm4"), format_prefix=EmptyFormatter(slots=["[gMASK]"]), diff --git a/src/llamafactory/data/tool_utils.py b/src/llamafactory/data/tool_utils.py new file mode 100644 index 00000000..ac5565d5 --- /dev/null +++ b/src/llamafactory/data/tool_utils.py @@ -0,0 +1,140 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import re +from abc import ABC, abstractmethod +from dataclasses import dataclass +from typing import Any, Dict, List, Tuple, Union + +from .data_utils import SLOTS + + +DEFAULT_TOOL_PROMPT = ( + "You have access to the following tools:\n{tool_text}" + "Use the following format if using a tool:\n" + "```\n" + "Action: tool name (one of [{tool_names}]).\n" + "Action Input: the input to the tool, in a JSON format representing the kwargs " + """(e.g. ```{{"input": "hello world", "num_beams": 5}}```).\n""" + "```\n" +) + + +GLM4_TOOL_PROMPT = ( + "你是一个名为 ChatGLM 的人工智能助手。你是基于智谱AI训练的语言模型 GLM-4 模型开发的," + "你的任务是针对用户的问题和要求提供适当的答复和支持。# 可用工具{tool_text}" +) + + +@dataclass +class ToolUtils(ABC): + @staticmethod + @abstractmethod + def get_function_slots() -> SLOTS: ... + + @staticmethod + @abstractmethod + def tool_formatter(tools: List[Dict[str, Any]]) -> str: ... + + @staticmethod + @abstractmethod + def tool_extractor(content: str) -> Union[str, List[Tuple[str, str]]]: ... + + +class DefaultToolUtils(ToolUtils): + @staticmethod + def get_function_slots() -> SLOTS: + return ["Action: {{name}}\nAction Input: {{arguments}}\n"] + + @staticmethod + def tool_formatter(tools: List[Dict[str, Any]]) -> str: + tool_text = "" + tool_names = [] + for tool in tools: + param_text = "" + for name, param in tool["parameters"]["properties"].items(): + required, enum, items = "", "", "" + if name in tool["parameters"].get("required", []): + required = ", required" + + if param.get("enum", None): + enum = ", should be one of [{}]".format(", ".join(param["enum"])) + + if param.get("items", None): + items = ", where each item should be {}".format(param["items"].get("type", "")) + + param_text += " - {name} ({type}{required}): {desc}{enum}{items}\n".format( + name=name, + type=param.get("type", ""), + required=required, + desc=param.get("description", ""), + enum=enum, + items=items, + ) + + tool_text += "> Tool Name: {name}\nTool Description: {desc}\nTool Args:\n{args}\n".format( + name=tool["name"], desc=tool.get("description", ""), args=param_text + ) + tool_names.append(tool["name"]) + + return DEFAULT_TOOL_PROMPT.format(tool_text=tool_text, tool_names=", ".join(tool_names)) + + @staticmethod + def tool_extractor(content: str) -> Union[str, List[Tuple[str, str]]]: + regex = re.compile(r"Action:\s*([a-zA-Z0-9_]+)\s*Action Input:\s*(.+?)(?=\s*Action:|\s*$)", re.DOTALL) + action_match: List[Tuple[str, str]] = re.findall(regex, content) + if not action_match: + return content + + results = [] + for match in action_match: + tool_name = match[0].strip() + tool_input = match[1].strip().strip('"').strip("```") + try: + arguments = json.loads(tool_input) + results.append((tool_name, json.dumps(arguments, ensure_ascii=False))) + except json.JSONDecodeError: + return content + + return results + + +class GLM4ToolUtils(ToolUtils): + @staticmethod + def get_function_slots() -> SLOTS: + return ["{{name}}\n{{arguments}}"] + + @staticmethod + def tool_formatter(tools: List[Dict[str, Any]]) -> str: + tool_text = "" + for tool in tools: + tool_text += "\n\n## {name}\n\n{body}\n在调用上述函数时,请使用 Json 格式表示调用的参数。".format( + name=tool["name"], body=json.dumps(tool, indent=4, ensure_ascii=False) + ) + + return GLM4_TOOL_PROMPT.format(tool_text=tool_text) + + @staticmethod + def tool_extractor(content: str) -> Union[str, List[Tuple[str, str]]]: + if "\n" not in content: + return content + + tool_name, tool_input = content.split("\n", maxsplit=1) + try: + arguments = json.loads(tool_input) + except json.JSONDecodeError: + return content + + return [(tool_name, json.dumps(arguments, ensure_ascii=False))] diff --git a/src/llamafactory/hparams/data_args.py b/src/llamafactory/hparams/data_args.py index dad13820..880be84a 100644 --- a/src/llamafactory/hparams/data_args.py +++ b/src/llamafactory/hparams/data_args.py @@ -45,10 +45,6 @@ class DataArguments: default=1024, metadata={"help": "The cutoff length of the tokenized inputs in the dataset."}, ) - reserved_label_len: int = field( - default=1, - metadata={"help": "The minimum cutoff length reserved for the tokenized labels in the dataset."}, - ) train_on_prompt: bool = field( default=False, metadata={"help": "Whether to disable the mask on the prompt or not."}, @@ -111,9 +107,6 @@ class DataArguments: ) def __post_init__(self): - if self.reserved_label_len >= self.cutoff_len: - raise ValueError("`reserved_label_len` must be smaller than `cutoff_len`.") - if self.streaming and self.val_size > 1e-6 and self.val_size < 1: raise ValueError("Streaming mode should have an integer val size.") diff --git a/tests/data/test_formatter.py b/tests/data/test_formatter.py index 37b21dc5..1845df24 100644 --- a/tests/data/test_formatter.py +++ b/tests/data/test_formatter.py @@ -28,7 +28,7 @@ def test_string_formatter(): def test_function_formatter(): - formatter = FunctionFormatter(slots=["Action: {{name}}\nAction Input: {{arguments}}\n"]) + formatter = FunctionFormatter(slots=[], tool_format="default") tool_calls = json.dumps({"name": "tool_name", "arguments": {"foo": "bar", "size": 10}}) assert formatter.apply(content=tool_calls) == [ """Action: tool_name\nAction Input: {\"foo\": \"bar\", \"size\": 10}\n""" @@ -36,7 +36,7 @@ def test_function_formatter(): def test_multi_function_formatter(): - formatter = FunctionFormatter(slots=["Action: {{name}}\nAction Input: {{arguments}}\n"]) + formatter = FunctionFormatter(slots=[], tool_format="default") tool_calls = json.dumps([{"name": "tool_name", "arguments": {"foo": "bar", "size": 10}}] * 2) assert formatter.apply(content=tool_calls) == [ """Action: tool_name\nAction Input: {\"foo\": \"bar\", \"size\": 10}\n""", diff --git a/tests/data/test_processor.py b/tests/data/test_processor.py new file mode 100644 index 00000000..fa8f7172 --- /dev/null +++ b/tests/data/test_processor.py @@ -0,0 +1,32 @@ +# Copyright 2024 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Tuple + +import pytest + +from llamafactory.data.processors.processor_utils import infer_seqlen + + +@pytest.mark.parametrize( + "test_input,test_output", + [ + ((3000, 2000, 1000), (600, 400)), + ((2000, 3000, 1000), (400, 600)), + ((1000, 100, 1000), (900, 100)), + ((100, 1000, 1000), (100, 900)), + ], +) +def test_infer_seqlen(test_input: Tuple[int, int, int], test_output: Tuple[int, int]): + assert test_output == infer_seqlen(*test_input) diff --git a/tests/data/test_template.py b/tests/data/test_template.py index 9d73c116..e4728a84 100644 --- a/tests/data/test_template.py +++ b/tests/data/test_template.py @@ -21,15 +21,60 @@ from llamafactory.data import get_template_and_fix_tokenizer TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3") +MESSAGES = [ + {"role": "user", "content": "How are you"}, + {"role": "assistant", "content": "I am fine!"}, + {"role": "user", "content": "你好"}, + {"role": "assistant", "content": "很高兴认识你!"}, +] + + +def test_encode_oneturn(): + tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA) + template = get_template_and_fix_tokenizer(tokenizer, name="llama3") + prompt_ids, answer_ids = template.encode_oneturn(tokenizer, MESSAGES) + assert tokenizer.decode(prompt_ids) == ( + "<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\nHow are you<|eot_id|>" + "<|start_header_id|>assistant<|end_header_id|>\n\nI am fine!<|eot_id|>" + "<|start_header_id|>user<|end_header_id|>\n\n你好<|eot_id|>" + "<|start_header_id|>assistant<|end_header_id|>\n\n" + ) + assert tokenizer.decode(answer_ids) == "很高兴认识你!<|eot_id|>" + + +def test_encode_multiturn(): + tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA) + template = get_template_and_fix_tokenizer(tokenizer, name="llama3") + encoded_pairs = template.encode_multiturn(tokenizer, MESSAGES) + assert tokenizer.decode(encoded_pairs[0][0]) == ( + "<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\nHow are you<|eot_id|>" + "<|start_header_id|>assistant<|end_header_id|>\n\n" + ) + assert tokenizer.decode(encoded_pairs[0][1]) == "I am fine!<|eot_id|>" + assert tokenizer.decode(encoded_pairs[1][0]) == ( + "<|start_header_id|>user<|end_header_id|>\n\n你好<|eot_id|>" + "<|start_header_id|>assistant<|end_header_id|>\n\n" + ) + assert tokenizer.decode(encoded_pairs[1][1]) == "很高兴认识你!<|eot_id|>" + def test_jinja_template(): tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA) ref_tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA) get_template_and_fix_tokenizer(tokenizer, name="llama3") assert tokenizer.chat_template != ref_tokenizer.chat_template + assert tokenizer.apply_chat_template(MESSAGES) == ref_tokenizer.apply_chat_template(MESSAGES) - messages = [ - {"role": "user", "content": "hi!"}, - {"role": "assistant", "content": "hello there"}, - ] - assert tokenizer.apply_chat_template(messages) == ref_tokenizer.apply_chat_template(messages) + +def test_qwen_template(): + tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-7B-Instruct") + template = get_template_and_fix_tokenizer(tokenizer, name="qwen") + prompt_ids, answer_ids = template.encode_oneturn(tokenizer, MESSAGES) + assert tokenizer.decode(prompt_ids) == ( + "<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n" + "<|im_start|>user\nHow are you<|im_end|>\n" + "<|im_start|>assistant\nI am fine!<|im_end|>\n" + "<|im_start|>user\n你好<|im_end|>\n" + "<|im_start|>assistant\n" + ) + assert tokenizer.decode(answer_ids) == "很高兴认识你!<|im_end|>" From 38c94d2e9c656902b86e0b76d913024772e9c7f1 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Mon, 1 Jul 2024 01:29:09 +0800 Subject: [PATCH 157/160] Update label_issue.yml Former-commit-id: fffa3defdda02ad579cb703c0704f94bad94f21a --- .github/workflows/label_issue.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/label_issue.yml b/.github/workflows/label_issue.yml index 0e10f0b9..ffd644a7 100644 --- a/.github/workflows/label_issue.yml +++ b/.github/workflows/label_issue.yml @@ -16,7 +16,7 @@ jobs: ISSUE_TITLE: ${{ github.event.issue.title }} run: | LABEL=pending - NPU_KEYWORDS=(npu ascend 昇腾) + NPU_KEYWORDS=(npu ascend huawei 华为 昇腾) ISSUE_TITLE_LOWER=$(echo $ISSUE_TITLE | tr '[:upper:]' '[:lower:]') for KEYWORD in ${NPU_KEYWORDS[@]}; do if [[ $ISSUE_TITLE_LOWER == *$KEYWORD* ]] && [[ $ISSUE_TITLE_LOWER != *input* ]]; then From 884b49e662ed23fbe9be63062a1f3a0385bff0a4 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Mon, 1 Jul 2024 03:51:20 +0800 Subject: [PATCH 158/160] add eval acc Former-commit-id: 7ffde76fbfb6192e3aac31ccc098f31ce89181ae --- src/llamafactory/train/sft/metric.py | 24 +++++++++++++++++++++--- src/llamafactory/train/sft/trainer.py | 17 ++++++----------- src/llamafactory/train/sft/workflow.py | 7 ++++--- 3 files changed, 31 insertions(+), 17 deletions(-) diff --git a/src/llamafactory/train/sft/metric.py b/src/llamafactory/train/sft/metric.py index 95bfcb69..72faef0a 100644 --- a/src/llamafactory/train/sft/metric.py +++ b/src/llamafactory/train/sft/metric.py @@ -17,9 +17,11 @@ # limitations under the License. from dataclasses import dataclass -from typing import TYPE_CHECKING, Dict, Sequence, Tuple, Union +from typing import TYPE_CHECKING, Dict import numpy as np +import torch +from transformers import EvalPrediction from transformers.utils import is_jieba_available, is_nltk_available from ...extras.constants import IGNORE_INDEX @@ -42,6 +44,22 @@ if is_rouge_available(): from rouge_chinese import Rouge +def compute_accuracy(eval_preds: "EvalPrediction") -> Dict[str, float]: + preds, labels = eval_preds.predictions, eval_preds.label_ids + accuracies = [] + for i in range(len(preds)): + pred, label = preds[i, 1:], labels[i, :-1] + label_mask = label != IGNORE_INDEX + accuracies.append(np.mean(pred[label_mask] == label[label_mask])) + + return {"accuracy": float(np.mean(accuracies))} + + +def eval_logit_processor(logits: "torch.Tensor", labels: "torch.Tensor") -> "torch.Tensor": + logits = logits[0] if isinstance(logits, (list, tuple)) else logits + return torch.argmax(logits, dim=-1) + + @dataclass class ComputeMetrics: r""" @@ -50,11 +68,11 @@ class ComputeMetrics: tokenizer: "PreTrainedTokenizer" - def __call__(self, eval_preds: Sequence[Union[np.ndarray, Tuple[np.ndarray]]]) -> Dict[str, float]: + def __call__(self, eval_preds: "EvalPrediction") -> Dict[str, float]: r""" Uses the model predictions to compute metrics. """ - preds, labels = eval_preds + preds, labels = eval_preds.predictions, eval_preds.label_ids score_dict = {"rouge-1": [], "rouge-2": [], "rouge-l": [], "bleu-4": []} preds = np.where(preds != IGNORE_INDEX, preds, self.tokenizer.pad_token_id) diff --git a/src/llamafactory/train/sft/trainer.py b/src/llamafactory/train/sft/trainer.py index 06bd2b6b..954bb69f 100644 --- a/src/llamafactory/train/sft/trainer.py +++ b/src/llamafactory/train/sft/trainer.py @@ -135,21 +135,16 @@ class CustomSeq2SeqTrainer(Seq2SeqTrainer): for i in range(len(preds)): pad_len = np.nonzero(preds[i] != self.tokenizer.pad_token_id)[0] - if len(pad_len): - preds[i] = np.concatenate( - (preds[i][pad_len[0] :], preds[i][: pad_len[0]]), axis=-1 - ) # move pad token to last + if len(pad_len): # move pad token to last + preds[i] = np.concatenate((preds[i][pad_len[0] :], preds[i][: pad_len[0]]), axis=-1) - decoded_inputs = self.tokenizer.batch_decode( - dataset["input_ids"], skip_special_tokens=True, clean_up_tokenization_spaces=False - ) - decoded_labels = self.tokenizer.batch_decode( - labels, skip_special_tokens=True, clean_up_tokenization_spaces=False - ) - decoded_preds = self.tokenizer.batch_decode(preds, skip_special_tokens=True, clean_up_tokenization_spaces=True) + decoded_inputs = self.tokenizer.batch_decode(dataset["input_ids"], skip_special_tokens=True) + decoded_labels = self.tokenizer.batch_decode(labels, skip_special_tokens=True) + decoded_preds = self.tokenizer.batch_decode(preds, skip_special_tokens=True) with open(output_prediction_file, "w", encoding="utf-8") as writer: res: List[str] = [] for text, label, pred in zip(decoded_inputs, decoded_labels, decoded_preds): res.append(json.dumps({"prompt": text, "label": label, "predict": pred}, ensure_ascii=False)) + writer.write("\n".join(res)) diff --git a/src/llamafactory/train/sft/workflow.py b/src/llamafactory/train/sft/workflow.py index 885bc7ac..0c3f9b11 100644 --- a/src/llamafactory/train/sft/workflow.py +++ b/src/llamafactory/train/sft/workflow.py @@ -25,7 +25,7 @@ from ...extras.misc import get_logits_processor from ...extras.ploting import plot_loss from ...model import load_model, load_tokenizer from ..trainer_utils import create_modelcard_and_push -from .metric import ComputeMetrics +from .metric import ComputeMetrics, compute_accuracy, eval_logit_processor from .trainer import CustomSeq2SeqTrainer @@ -72,7 +72,8 @@ def run_sft( finetuning_args=finetuning_args, data_collator=data_collator, callbacks=callbacks, - compute_metrics=ComputeMetrics(tokenizer) if training_args.predict_with_generate else None, + compute_metrics=ComputeMetrics(tokenizer) if training_args.predict_with_generate else compute_accuracy, + preprocess_logits_for_metrics=None if training_args.predict_with_generate else eval_logit_processor, **tokenizer_module, **split_dataset(dataset, data_args, training_args), ) @@ -91,7 +92,7 @@ def run_sft( trainer.save_metrics("train", train_result.metrics) trainer.save_state() if trainer.is_world_process_zero() and finetuning_args.plot_loss: - plot_loss(training_args.output_dir, keys=["loss", "eval_loss"]) + plot_loss(training_args.output_dir, keys=["loss", "eval_loss", "eval_accuracy"]) # Evaluation if training_args.do_eval: From 4357e4239171ad69f95dcf2cf12b6cf894a48450 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Mon, 1 Jul 2024 03:55:20 +0800 Subject: [PATCH 159/160] tiny fix Former-commit-id: 19e43c3a9ed771e991cb273d394ab28fb923f868 --- src/llamafactory/model/model_utils/attention.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llamafactory/model/model_utils/attention.py b/src/llamafactory/model/model_utils/attention.py index 80d9d4b8..4bed7e21 100644 --- a/src/llamafactory/model/model_utils/attention.py +++ b/src/llamafactory/model/model_utils/attention.py @@ -35,7 +35,7 @@ def configure_attn_implementation( if model_args.flash_attn == "auto": logger.warning("Gemma-2 models should use eager attention in training, change `flash_attn` to disabled.") model_args.flash_attn = "disabled" - else: + elif model_args.flash_attn != "disabled": logger.warning( "Gemma-2 models should use eager attention in training, but you set `flash_attn: {}`. " "Will proceed at your own risk.".format(model_args.flash_attn) From 973cf8e980ab8154939a41b963bb782385458563 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Mon, 1 Jul 2024 05:43:17 +0800 Subject: [PATCH 160/160] tiny fix Former-commit-id: 5dd2e5c3323f56420b5845a5ed28bcd9d4da5e41 --- src/llamafactory/train/sft/metric.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llamafactory/train/sft/metric.py b/src/llamafactory/train/sft/metric.py index 72faef0a..c69608c0 100644 --- a/src/llamafactory/train/sft/metric.py +++ b/src/llamafactory/train/sft/metric.py @@ -48,7 +48,7 @@ def compute_accuracy(eval_preds: "EvalPrediction") -> Dict[str, float]: preds, labels = eval_preds.predictions, eval_preds.label_ids accuracies = [] for i in range(len(preds)): - pred, label = preds[i, 1:], labels[i, :-1] + pred, label = preds[i, :-1], labels[i, 1:] label_mask = label != IGNORE_INDEX accuracies.append(np.mean(pred[label_mask] == label[label_mask]))