diff --git a/scripts/api_example/test_image.py b/scripts/api_example/test_image.py index 34a937e5..77d6d7c8 100644 --- a/scripts/api_example/test_image.py +++ b/scripts/api_example/test_image.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/scripts/api_example/test_toolcall.py b/scripts/api_example/test_toolcall.py index 04410db0..6a0a6f38 100644 --- a/scripts/api_example/test_toolcall.py +++ b/scripts/api_example/test_toolcall.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/scripts/convert_ckpt/llamafy_baichuan2.py b/scripts/convert_ckpt/llamafy_baichuan2.py index 077703f6..75e849b2 100644 --- a/scripts/convert_ckpt/llamafy_baichuan2.py +++ b/scripts/convert_ckpt/llamafy_baichuan2.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -19,15 +19,10 @@ from typing import Any, Dict import fire import torch +from huggingface_hub import split_torch_state_dict_into_shards from safetensors.torch import save_file from tqdm import tqdm -from transformers.modeling_utils import ( - SAFE_WEIGHTS_INDEX_NAME, - SAFE_WEIGHTS_NAME, - WEIGHTS_INDEX_NAME, - WEIGHTS_NAME, - shard_checkpoint, -) +from transformers.modeling_utils import SAFE_WEIGHTS_INDEX_NAME, SAFE_WEIGHTS_NAME, WEIGHTS_INDEX_NAME, WEIGHTS_NAME CONFIG_NAME = "config.json" @@ -40,34 +35,42 @@ def save_weight(input_dir: str, output_dir: str, shard_size: str, save_safetenso shard_weight = torch.load(os.path.join(input_dir, filepath), map_location="cpu") baichuan2_state_dict.update(shard_weight) - llama2_state_dict: Dict[str, torch.Tensor] = OrderedDict() + llama_state_dict: Dict[str, torch.Tensor] = OrderedDict() for key, value in tqdm(baichuan2_state_dict.items(), desc="Convert format"): if "W_pack" in key: proj_size = value.size(0) // 3 - llama2_state_dict[key.replace("W_pack", "q_proj")] = value[:proj_size, :] - llama2_state_dict[key.replace("W_pack", "k_proj")] = value[proj_size : 2 * proj_size, :] - llama2_state_dict[key.replace("W_pack", "v_proj")] = value[2 * proj_size :, :] + llama_state_dict[key.replace("W_pack", "q_proj")] = value[:proj_size, :] + llama_state_dict[key.replace("W_pack", "k_proj")] = value[proj_size : 2 * proj_size, :] + llama_state_dict[key.replace("W_pack", "v_proj")] = value[2 * proj_size :, :] elif "lm_head" in key: - llama2_state_dict[key] = torch.nn.functional.normalize(value) + llama_state_dict[key] = torch.nn.functional.normalize(value) else: - llama2_state_dict[key] = value + llama_state_dict[key] = value weights_name = SAFE_WEIGHTS_NAME if save_safetensors else WEIGHTS_NAME - shards, index = shard_checkpoint(llama2_state_dict, max_shard_size=shard_size, weights_name=weights_name) - - for shard_file, shard in tqdm(shards.items(), desc="Save weights"): + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + state_dict_split = split_torch_state_dict_into_shards( + llama_state_dict, filename_pattern=filename_pattern, max_shard_size=shard_size + ) + for shard_file, tensors in tqdm(state_dict_split.filename_to_tensors.items(), desc="Save weights"): + shard = {tensor: llama_state_dict[tensor].contiguous() for tensor in tensors} if save_safetensors: save_file(shard, os.path.join(output_dir, shard_file), metadata={"format": "pt"}) else: torch.save(shard, os.path.join(output_dir, shard_file)) - if index is None: - print(f"Model weights saved in {os.path.join(output_dir, WEIGHTS_NAME)}") + if not state_dict_split.is_sharded: + print(f"Model weights saved in {os.path.join(output_dir, weights_name)}.") else: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } index_name = SAFE_WEIGHTS_INDEX_NAME if save_safetensors else WEIGHTS_INDEX_NAME with open(os.path.join(output_dir, index_name), "w", encoding="utf-8") as f: json.dump(index, f, indent=2, sort_keys=True) - print(f"Model weights saved in {output_dir}") + + print(f"Model weights saved in {output_dir}.") def save_config(input_dir: str, output_dir: str): @@ -81,6 +84,7 @@ def save_config(input_dir: str, output_dir: str): with open(os.path.join(output_dir, CONFIG_NAME), "w", encoding="utf-8") as f: json.dump(llama2_config_dict, f, indent=2) + print(f"Model config saved in {os.path.join(output_dir, CONFIG_NAME)}") diff --git a/scripts/convert_ckpt/llamafy_qwen.py b/scripts/convert_ckpt/llamafy_qwen.py index beabe4b1..bb3fe519 100644 --- a/scripts/convert_ckpt/llamafy_qwen.py +++ b/scripts/convert_ckpt/llamafy_qwen.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -19,16 +19,11 @@ from typing import Any, Dict import fire import torch +from huggingface_hub import split_torch_state_dict_into_shards from safetensors import safe_open from safetensors.torch import save_file from tqdm import tqdm -from transformers.modeling_utils import ( - SAFE_WEIGHTS_INDEX_NAME, - SAFE_WEIGHTS_NAME, - WEIGHTS_INDEX_NAME, - WEIGHTS_NAME, - shard_checkpoint, -) +from transformers.modeling_utils import SAFE_WEIGHTS_INDEX_NAME, SAFE_WEIGHTS_NAME, WEIGHTS_INDEX_NAME, WEIGHTS_NAME from transformers.utils import check_min_version @@ -49,60 +44,68 @@ def save_weight(input_dir: str, output_dir: str, shard_size: str, save_safetenso for key in f.keys(): qwen_state_dict[key] = f.get_tensor(key) - llama2_state_dict: Dict[str, torch.Tensor] = OrderedDict() + llama_state_dict: Dict[str, torch.Tensor] = OrderedDict() torch_dtype = None for key, value in tqdm(qwen_state_dict.items(), desc="Convert format"): if torch_dtype is None: torch_dtype = value.dtype if "wte" in key: - llama2_state_dict["model.embed_tokens.weight"] = value + llama_state_dict["model.embed_tokens.weight"] = value elif "ln_f" in key: - llama2_state_dict["model.norm.weight"] = value + llama_state_dict["model.norm.weight"] = value else: key = key.replace("transformer.h", "model.layers") if "attn.c_attn" in key: proj_size = value.size(0) // 3 - llama2_state_dict[key.replace("attn.c_attn", "self_attn.q_proj")] = value[:proj_size, ...] - llama2_state_dict[key.replace("attn.c_attn", "self_attn.k_proj")] = value[ + llama_state_dict[key.replace("attn.c_attn", "self_attn.q_proj")] = value[:proj_size, ...] + llama_state_dict[key.replace("attn.c_attn", "self_attn.k_proj")] = value[ proj_size : 2 * proj_size, ... ] - llama2_state_dict[key.replace("attn.c_attn", "self_attn.v_proj")] = value[2 * proj_size :, ...] + llama_state_dict[key.replace("attn.c_attn", "self_attn.v_proj")] = value[2 * proj_size :, ...] elif "attn.c_proj" in key: - llama2_state_dict[key.replace("attn.c_proj", "self_attn.o_proj")] = value - llama2_state_dict[key.replace("attn.c_proj.weight", "self_attn.o_proj.bias")] = torch.zeros_like( + llama_state_dict[key.replace("attn.c_proj", "self_attn.o_proj")] = value + llama_state_dict[key.replace("attn.c_proj.weight", "self_attn.o_proj.bias")] = torch.zeros_like( value[:, 0] ).squeeze() elif "ln_1" in key: - llama2_state_dict[key.replace("ln_1", "input_layernorm")] = value + llama_state_dict[key.replace("ln_1", "input_layernorm")] = value elif "ln_2" in key: - llama2_state_dict[key.replace("ln_2", "post_attention_layernorm")] = value + llama_state_dict[key.replace("ln_2", "post_attention_layernorm")] = value elif "mlp.w1" in key: - llama2_state_dict[key.replace("mlp.w1", "mlp.up_proj")] = value + llama_state_dict[key.replace("mlp.w1", "mlp.up_proj")] = value elif "mlp.w2" in key: - llama2_state_dict[key.replace("mlp.w2", "mlp.gate_proj")] = value + llama_state_dict[key.replace("mlp.w2", "mlp.gate_proj")] = value elif "mlp.c_proj" in key: - llama2_state_dict[key.replace("mlp.c_proj", "mlp.down_proj")] = value + llama_state_dict[key.replace("mlp.c_proj", "mlp.down_proj")] = value elif "lm_head" in key: - llama2_state_dict[key] = value + llama_state_dict[key] = value else: raise KeyError(f"Unable to process key {key}") weights_name = SAFE_WEIGHTS_NAME if save_safetensors else WEIGHTS_NAME - shards, index = shard_checkpoint(llama2_state_dict, max_shard_size=shard_size, weights_name=weights_name) - - for shard_file, shard in tqdm(shards.items(), desc="Save weights"): + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + state_dict_split = split_torch_state_dict_into_shards( + llama_state_dict, filename_pattern=filename_pattern, max_shard_size=shard_size + ) + for shard_file, tensors in tqdm(state_dict_split.filename_to_tensors.items(), desc="Save weights"): + shard = {tensor: llama_state_dict[tensor].contiguous() for tensor in tensors} if save_safetensors: save_file(shard, os.path.join(output_dir, shard_file), metadata={"format": "pt"}) else: torch.save(shard, os.path.join(output_dir, shard_file)) - if index is None: - print(f"Model weights saved in {os.path.join(output_dir, weights_name)}") + if not state_dict_split.is_sharded: + print(f"Model weights saved in {os.path.join(output_dir, weights_name)}.") else: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } index_name = SAFE_WEIGHTS_INDEX_NAME if save_safetensors else WEIGHTS_INDEX_NAME with open(os.path.join(output_dir, index_name), "w", encoding="utf-8") as f: json.dump(index, f, indent=2, sort_keys=True) - print(f"Model weights saved in {output_dir}") + + print(f"Model weights saved in {output_dir}.") return str(torch_dtype).replace("torch.", "") @@ -134,6 +137,7 @@ def save_config(input_dir: str, output_dir: str, torch_dtype: str): with open(os.path.join(output_dir, CONFIG_NAME), "w", encoding="utf-8") as f: json.dump(llama2_config_dict, f, indent=2) + print(f"Model config saved in {os.path.join(output_dir, CONFIG_NAME)}") diff --git a/scripts/llama_pro.py b/scripts/llama_pro.py index 447890f4..dd10b525 100644 --- a/scripts/llama_pro.py +++ b/scripts/llama_pro.py @@ -1,4 +1,4 @@ -# Copyright 2024 Tencent Inc. and the LlamaFactory team. +# Copyright 2025 Tencent Inc. and the LlamaFactory team. # # This code is inspired by the Tencent's LLaMA-Pro library. # https://github.com/TencentARC/LLaMA-Pro/blob/main/scripts/block_expansion.py @@ -18,20 +18,15 @@ import json import os from collections import OrderedDict -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Dict import fire import torch +from huggingface_hub import split_torch_state_dict_into_shards from safetensors.torch import save_file from tqdm import tqdm from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer, PreTrainedModel -from transformers.modeling_utils import ( - SAFE_WEIGHTS_INDEX_NAME, - SAFE_WEIGHTS_NAME, - WEIGHTS_INDEX_NAME, - WEIGHTS_NAME, - shard_checkpoint, -) +from transformers.modeling_utils import SAFE_WEIGHTS_INDEX_NAME, SAFE_WEIGHTS_NAME, WEIGHTS_INDEX_NAME, WEIGHTS_NAME if TYPE_CHECKING: @@ -46,41 +41,36 @@ def block_expansion( model_name_or_path: str, output_dir: str, num_expand: int, - shard_size: str = "2GB", + shard_size: str = "5GB", save_safetensors: bool = True, ): r""" - Performs block expansion for LLaMA, Mistral, Qwen1.5 or Yi models. + Performs block expansion for LLaMA, Mistral, Qwen2 or Yi models. Usage: python llama_pro.py --model_name_or_path meta-llama/Llama-2-7b-hf --output_dir llama2_pro --num_expand 8 """ - config: "PretrainedConfig" = AutoConfig.from_pretrained(model_name_or_path) + config: "PretrainedConfig" = AutoConfig.from_pretrained(model_name_or_path, trust_remote_code=True) num_layers = getattr(config, "num_hidden_layers") - setattr(config, "num_hidden_layers", num_layers + num_expand) - config.save_pretrained(output_dir) - - tokenizer = AutoTokenizer.from_pretrained(model_name_or_path) - tokenizer.save_pretrained(output_dir) - - config = AutoConfig.from_pretrained(model_name_or_path) # load the original one - if save_safetensors: - setattr(config, "tie_word_embeddings", False) # safetensors does not allow shared weights - - model = AutoModelForCausalLM.from_pretrained( - model_name_or_path, - config=config, - torch_dtype="auto", - trust_remote_code=True, - low_cpu_mem_usage=True, - ) - assert isinstance(model, PreTrainedModel) # type hint - state_dict = model.state_dict() - if num_layers % num_expand != 0: raise ValueError(f"`num_layers` {num_layers} should be divisible by `num_expand` {num_expand}.") + setattr(config, "num_hidden_layers", num_layers + num_expand) + config.save_pretrained(output_dir) + + tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True) + tokenizer.save_pretrained(output_dir) + + print(f"Expanding model of {num_layers} layers to {num_layers + num_expand} layers.") + model = AutoModelForCausalLM.from_pretrained( + model_name_or_path, torch_dtype="auto", device_map="cpu", trust_remote_code=True, low_cpu_mem_usage=True + ) + assert isinstance(model, PreTrainedModel) # type hint + if save_safetensors and getattr(model.config, "tie_word_embeddings", False): + del model.lm_head # safetensors does not allow shared weights + split = num_layers // num_expand layer_cnt = 0 - output_state_dict = OrderedDict() + state_dict = model.state_dict() + output_state_dict: Dict[str, "torch.Tensor"] = OrderedDict() for i in range(num_layers): for key, value in state_dict.items(): if f".{i:d}." in key: @@ -104,17 +94,24 @@ def block_expansion( output_state_dict[key] = value weights_name = SAFE_WEIGHTS_NAME if save_safetensors else WEIGHTS_NAME - shards, index = shard_checkpoint(output_state_dict, max_shard_size=shard_size, weights_name=weights_name) - - for shard_file, shard in tqdm(shards.items(), desc="Save weights"): + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + state_dict_split = split_torch_state_dict_into_shards( + output_state_dict, filename_pattern=filename_pattern, max_shard_size=shard_size + ) + for shard_file, tensors in tqdm(state_dict_split.filename_to_tensors.items(), desc="Save weights"): + shard = {tensor: output_state_dict[tensor].contiguous() for tensor in tensors} if save_safetensors: save_file(shard, os.path.join(output_dir, shard_file), metadata={"format": "pt"}) else: torch.save(shard, os.path.join(output_dir, shard_file)) - if index is None: + if not state_dict_split.is_sharded: print(f"Model weights saved in {os.path.join(output_dir, weights_name)}.") else: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } index_name = SAFE_WEIGHTS_INDEX_NAME if save_safetensors else WEIGHTS_INDEX_NAME with open(os.path.join(output_dir, index_name), "w", encoding="utf-8") as f: json.dump(index, f, indent=2, sort_keys=True) diff --git a/scripts/loftq_init.py b/scripts/loftq_init.py index 5e640843..83e38e88 100644 --- a/scripts/loftq_init.py +++ b/scripts/loftq_init.py @@ -1,4 +1,4 @@ -# Copyright 2024 HuggingFace Inc. and the LlamaFactory team. +# Copyright 2025 HuggingFace Inc. and the LlamaFactory team. # # This code is based on the HuggingFace's PEFT library. # https://github.com/huggingface/peft/blob/v0.10.0/examples/loftq_finetuning/quantize_save_load.py diff --git a/scripts/pissa_init.py b/scripts/pissa_init.py index f7092f0e..3be11fbf 100644 --- a/scripts/pissa_init.py +++ b/scripts/pissa_init.py @@ -1,4 +1,4 @@ -# Copyright 2024 HuggingFace Inc. and the LlamaFactory team. +# Copyright 2025 HuggingFace Inc. and the LlamaFactory team. # # This code is based on the HuggingFace's PEFT library. # https://github.com/huggingface/peft/blob/v0.11.0/examples/pissa_finetuning/preprocess.py diff --git a/scripts/stat_utils/cal_flops.py b/scripts/stat_utils/cal_flops.py index 2bcf697b..a9eb033f 100644 --- a/scripts/stat_utils/cal_flops.py +++ b/scripts/stat_utils/cal_flops.py @@ -1,4 +1,4 @@ -# Copyright 2024 Microsoft Corporation and the LlamaFactory team. +# Copyright 2025 Microsoft Corporation and the LlamaFactory team. # # This code is inspired by the Microsoft's DeepSpeed library. # https://www.deepspeed.ai/tutorials/flops-profiler/ diff --git a/scripts/stat_utils/cal_lr.py b/scripts/stat_utils/cal_lr.py index 21206a28..85921d90 100644 --- a/scripts/stat_utils/cal_lr.py +++ b/scripts/stat_utils/cal_lr.py @@ -1,4 +1,4 @@ -# Copyright 2024 imoneoi and the LlamaFactory team. +# Copyright 2025 imoneoi and the LlamaFactory team. # # This code is inspired by the imoneoi's OpenChat library. # https://github.com/imoneoi/openchat/blob/3.6.0/ochat/training_deepspeed/train.py diff --git a/scripts/stat_utils/cal_mfu.py b/scripts/stat_utils/cal_mfu.py index edc6e234..ef5672d2 100644 --- a/scripts/stat_utils/cal_mfu.py +++ b/scripts/stat_utils/cal_mfu.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/scripts/stat_utils/cal_ppl.py b/scripts/stat_utils/cal_ppl.py index 32d50e64..768202f9 100644 --- a/scripts/stat_utils/cal_ppl.py +++ b/scripts/stat_utils/cal_ppl.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/scripts/stat_utils/length_cdf.py b/scripts/stat_utils/length_cdf.py index 5cf25347..275549ba 100644 --- a/scripts/stat_utils/length_cdf.py +++ b/scripts/stat_utils/length_cdf.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/scripts/vllm_infer.py b/scripts/vllm_infer.py index 796d5b98..2550f5ba 100644 --- a/scripts/vllm_infer.py +++ b/scripts/vllm_infer.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/setup.py b/setup.py index 908552da..0802efbe 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/api.py b/src/api.py index ad2e8cbb..61215459 100644 --- a/src/api.py +++ b/src/api.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/__init__.py b/src/llamafactory/__init__.py index 0b5d8cce..9b807697 100644 --- a/src/llamafactory/__init__.py +++ b/src/llamafactory/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/api/app.py b/src/llamafactory/api/app.py index 50b53e9e..b3c136de 100644 --- a/src/llamafactory/api/app.py +++ b/src/llamafactory/api/app.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/api/chat.py b/src/llamafactory/api/chat.py index c467a3e6..6959f4d8 100644 --- a/src/llamafactory/api/chat.py +++ b/src/llamafactory/api/chat.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/api/common.py b/src/llamafactory/api/common.py index d1ac94de..59c84de6 100644 --- a/src/llamafactory/api/common.py +++ b/src/llamafactory/api/common.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/api/protocol.py b/src/llamafactory/api/protocol.py index c6fe6f75..310e743e 100644 --- a/src/llamafactory/api/protocol.py +++ b/src/llamafactory/api/protocol.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/chat/__init__.py b/src/llamafactory/chat/__init__.py index 07276d48..15d8b9ba 100644 --- a/src/llamafactory/chat/__init__.py +++ b/src/llamafactory/chat/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/chat/base_engine.py b/src/llamafactory/chat/base_engine.py index 700e1eef..60a8e007 100644 --- a/src/llamafactory/chat/base_engine.py +++ b/src/llamafactory/chat/base_engine.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/chat/hf_engine.py b/src/llamafactory/chat/hf_engine.py index 7b5e3bbe..88e22aaa 100644 --- a/src/llamafactory/chat/hf_engine.py +++ b/src/llamafactory/chat/hf_engine.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/chat/vllm_engine.py b/src/llamafactory/chat/vllm_engine.py index ee9c4c8c..86c36f0a 100644 --- a/src/llamafactory/chat/vllm_engine.py +++ b/src/llamafactory/chat/vllm_engine.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/cli.py b/src/llamafactory/cli.py index 72085e2d..5ce7a964 100644 --- a/src/llamafactory/cli.py +++ b/src/llamafactory/cli.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/data/__init__.py b/src/llamafactory/data/__init__.py index ea1a02f2..247d8cf0 100644 --- a/src/llamafactory/data/__init__.py +++ b/src/llamafactory/data/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/data/aligner.py b/src/llamafactory/data/aligner.py index 82bbfafb..aafe1536 100644 --- a/src/llamafactory/data/aligner.py +++ b/src/llamafactory/data/aligner.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/data/data_utils.py b/src/llamafactory/data/data_utils.py index bd5d3587..9c2f527c 100644 --- a/src/llamafactory/data/data_utils.py +++ b/src/llamafactory/data/data_utils.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/data/formatter.py b/src/llamafactory/data/formatter.py index f6c24468..ac558770 100644 --- a/src/llamafactory/data/formatter.py +++ b/src/llamafactory/data/formatter.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/data/loader.py b/src/llamafactory/data/loader.py index 3c7e34a4..7e972c88 100644 --- a/src/llamafactory/data/loader.py +++ b/src/llamafactory/data/loader.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/data/parser.py b/src/llamafactory/data/parser.py index 709d0c90..493eba68 100644 --- a/src/llamafactory/data/parser.py +++ b/src/llamafactory/data/parser.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/data/preprocess.py b/src/llamafactory/data/preprocess.py index c5a10ec9..27363791 100644 --- a/src/llamafactory/data/preprocess.py +++ b/src/llamafactory/data/preprocess.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/data/processors/feedback.py b/src/llamafactory/data/processors/feedback.py index b670da44..37b8154d 100644 --- a/src/llamafactory/data/processors/feedback.py +++ b/src/llamafactory/data/processors/feedback.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/data/processors/pairwise.py b/src/llamafactory/data/processors/pairwise.py index a594c984..242febfc 100644 --- a/src/llamafactory/data/processors/pairwise.py +++ b/src/llamafactory/data/processors/pairwise.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/data/processors/processor_utils.py b/src/llamafactory/data/processors/processor_utils.py index 8e13d100..95198623 100644 --- a/src/llamafactory/data/processors/processor_utils.py +++ b/src/llamafactory/data/processors/processor_utils.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/data/processors/supervised.py b/src/llamafactory/data/processors/supervised.py index 83bd8ba2..d7dc7379 100644 --- a/src/llamafactory/data/processors/supervised.py +++ b/src/llamafactory/data/processors/supervised.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/data/processors/unsupervised.py b/src/llamafactory/data/processors/unsupervised.py index e21ebd42..d85f28dd 100644 --- a/src/llamafactory/data/processors/unsupervised.py +++ b/src/llamafactory/data/processors/unsupervised.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py index e87643da..5615557c 100644 --- a/src/llamafactory/data/template.py +++ b/src/llamafactory/data/template.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -642,7 +642,7 @@ _register_template( _register_template( name="empty", - efficient_eos=True, + format_assistant=StringFormatter(slots=["{{content}}"]), ) diff --git a/src/llamafactory/data/tool_utils.py b/src/llamafactory/data/tool_utils.py index 44cf86cc..6132e982 100644 --- a/src/llamafactory/data/tool_utils.py +++ b/src/llamafactory/data/tool_utils.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/eval/evaluator.py b/src/llamafactory/eval/evaluator.py index 82e37f41..99758dd2 100644 --- a/src/llamafactory/eval/evaluator.py +++ b/src/llamafactory/eval/evaluator.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # This code is inspired by the Dan's test library. # https://github.com/hendrycks/test/blob/master/evaluate_flan.py diff --git a/src/llamafactory/eval/template.py b/src/llamafactory/eval/template.py index e6ab46a5..e1454097 100644 --- a/src/llamafactory/eval/template.py +++ b/src/llamafactory/eval/template.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py index d570cc7a..90cd21e6 100644 --- a/src/llamafactory/extras/constants.py +++ b/src/llamafactory/extras/constants.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/extras/ploting.py b/src/llamafactory/extras/ploting.py index 3e372a38..d05970d2 100644 --- a/src/llamafactory/extras/ploting.py +++ b/src/llamafactory/extras/ploting.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/hparams/__init__.py b/src/llamafactory/hparams/__init__.py index 254a845e..9bcc4295 100644 --- a/src/llamafactory/hparams/__init__.py +++ b/src/llamafactory/hparams/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/hparams/evaluation_args.py b/src/llamafactory/hparams/evaluation_args.py index a7f221ca..ec1867e8 100644 --- a/src/llamafactory/hparams/evaluation_args.py +++ b/src/llamafactory/hparams/evaluation_args.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/hparams/finetuning_args.py b/src/llamafactory/hparams/finetuning_args.py index fa71907e..52e37a3b 100644 --- a/src/llamafactory/hparams/finetuning_args.py +++ b/src/llamafactory/hparams/finetuning_args.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/hparams/generating_args.py b/src/llamafactory/hparams/generating_args.py index 377fea15..db3306d6 100644 --- a/src/llamafactory/hparams/generating_args.py +++ b/src/llamafactory/hparams/generating_args.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/launcher.py b/src/llamafactory/launcher.py index b93f2ad1..169b042a 100644 --- a/src/llamafactory/launcher.py +++ b/src/llamafactory/launcher.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/model/__init__.py b/src/llamafactory/model/__init__.py index 48cfe76c..1957ff8d 100644 --- a/src/llamafactory/model/__init__.py +++ b/src/llamafactory/model/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/model/adapter.py b/src/llamafactory/model/adapter.py index 9edd87dd..2602d5a3 100644 --- a/src/llamafactory/model/adapter.py +++ b/src/llamafactory/model/adapter.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/model/loader.py b/src/llamafactory/model/loader.py index 113ddafa..19e57099 100644 --- a/src/llamafactory/model/loader.py +++ b/src/llamafactory/model/loader.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/model/model_utils/attention.py b/src/llamafactory/model/model_utils/attention.py index 8ec74351..3302de2e 100644 --- a/src/llamafactory/model/model_utils/attention.py +++ b/src/llamafactory/model/model_utils/attention.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/model/model_utils/embedding.py b/src/llamafactory/model/model_utils/embedding.py index 497bac16..199b53c3 100644 --- a/src/llamafactory/model/model_utils/embedding.py +++ b/src/llamafactory/model/model_utils/embedding.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/model/model_utils/liger_kernel.py b/src/llamafactory/model/model_utils/liger_kernel.py index 294e828c..16623873 100644 --- a/src/llamafactory/model/model_utils/liger_kernel.py +++ b/src/llamafactory/model/model_utils/liger_kernel.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/model/model_utils/misc.py b/src/llamafactory/model/model_utils/misc.py index f3228638..e5f8ce5f 100644 --- a/src/llamafactory/model/model_utils/misc.py +++ b/src/llamafactory/model/model_utils/misc.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/model/model_utils/mod.py b/src/llamafactory/model/model_utils/mod.py index ec73af00..5f67cd50 100644 --- a/src/llamafactory/model/model_utils/mod.py +++ b/src/llamafactory/model/model_utils/mod.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/model/model_utils/moe.py b/src/llamafactory/model/model_utils/moe.py index cd5558d3..4e520d5c 100644 --- a/src/llamafactory/model/model_utils/moe.py +++ b/src/llamafactory/model/model_utils/moe.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/model/model_utils/unsloth.py b/src/llamafactory/model/model_utils/unsloth.py index 4f4fd499..899cc971 100644 --- a/src/llamafactory/model/model_utils/unsloth.py +++ b/src/llamafactory/model/model_utils/unsloth.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/model/model_utils/valuehead.py b/src/llamafactory/model/model_utils/valuehead.py index a1eed179..ace90f75 100644 --- a/src/llamafactory/model/model_utils/valuehead.py +++ b/src/llamafactory/model/model_utils/valuehead.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/model/patcher.py b/src/llamafactory/model/patcher.py index cf110af9..aab2052d 100644 --- a/src/llamafactory/model/patcher.py +++ b/src/llamafactory/model/patcher.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/train/callbacks.py b/src/llamafactory/train/callbacks.py index 5906a4a6..41c83819 100644 --- a/src/llamafactory/train/callbacks.py +++ b/src/llamafactory/train/callbacks.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/train/dpo/__init__.py b/src/llamafactory/train/dpo/__init__.py index 9ce0d089..73c1a4a6 100644 --- a/src/llamafactory/train/dpo/__init__.py +++ b/src/llamafactory/train/dpo/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/train/kto/__init__.py b/src/llamafactory/train/kto/__init__.py index a1900368..491b067e 100644 --- a/src/llamafactory/train/kto/__init__.py +++ b/src/llamafactory/train/kto/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/train/ppo/__init__.py b/src/llamafactory/train/ppo/__init__.py index 161f6f5d..ed9bc4d2 100644 --- a/src/llamafactory/train/ppo/__init__.py +++ b/src/llamafactory/train/ppo/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/train/ppo/ppo_utils.py b/src/llamafactory/train/ppo/ppo_utils.py index 11ce25a9..55b79b4e 100644 --- a/src/llamafactory/train/ppo/ppo_utils.py +++ b/src/llamafactory/train/ppo/ppo_utils.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/train/pt/__init__.py b/src/llamafactory/train/pt/__init__.py index d80e6f22..1f5c2898 100644 --- a/src/llamafactory/train/pt/__init__.py +++ b/src/llamafactory/train/pt/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/train/pt/trainer.py b/src/llamafactory/train/pt/trainer.py index 1e692204..3024004d 100644 --- a/src/llamafactory/train/pt/trainer.py +++ b/src/llamafactory/train/pt/trainer.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/train/rm/__init__.py b/src/llamafactory/train/rm/__init__.py index 48278315..f0e8a45c 100644 --- a/src/llamafactory/train/rm/__init__.py +++ b/src/llamafactory/train/rm/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/train/rm/metric.py b/src/llamafactory/train/rm/metric.py index 17e47cc1..6f08b107 100644 --- a/src/llamafactory/train/rm/metric.py +++ b/src/llamafactory/train/rm/metric.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/train/sft/__init__.py b/src/llamafactory/train/sft/__init__.py index 475dfe5f..6107a9ae 100644 --- a/src/llamafactory/train/sft/__init__.py +++ b/src/llamafactory/train/sft/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/train/test_utils.py b/src/llamafactory/train/test_utils.py index 55e6c199..9404c249 100644 --- a/src/llamafactory/train/test_utils.py +++ b/src/llamafactory/train/test_utils.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/train/tuner.py b/src/llamafactory/train/tuner.py index bbbef1cf..4e60e2f0 100644 --- a/src/llamafactory/train/tuner.py +++ b/src/llamafactory/train/tuner.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/webui/chatter.py b/src/llamafactory/webui/chatter.py index 3c598cf9..39602947 100644 --- a/src/llamafactory/webui/chatter.py +++ b/src/llamafactory/webui/chatter.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/webui/common.py b/src/llamafactory/webui/common.py index e8f1e097..dc30196a 100644 --- a/src/llamafactory/webui/common.py +++ b/src/llamafactory/webui/common.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/webui/components/__init__.py b/src/llamafactory/webui/components/__init__.py index 715fb6e4..eb3c9d4c 100644 --- a/src/llamafactory/webui/components/__init__.py +++ b/src/llamafactory/webui/components/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/webui/components/chatbot.py b/src/llamafactory/webui/components/chatbot.py index 840c190d..5c2e7586 100644 --- a/src/llamafactory/webui/components/chatbot.py +++ b/src/llamafactory/webui/components/chatbot.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/webui/components/data.py b/src/llamafactory/webui/components/data.py index e62e1823..1dbc68d5 100644 --- a/src/llamafactory/webui/components/data.py +++ b/src/llamafactory/webui/components/data.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/webui/components/eval.py b/src/llamafactory/webui/components/eval.py index 39a12026..7be0a5b4 100644 --- a/src/llamafactory/webui/components/eval.py +++ b/src/llamafactory/webui/components/eval.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/webui/components/export.py b/src/llamafactory/webui/components/export.py index 7f4b46e6..c5034222 100644 --- a/src/llamafactory/webui/components/export.py +++ b/src/llamafactory/webui/components/export.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/webui/components/infer.py b/src/llamafactory/webui/components/infer.py index 6647cd7a..c2d63ee6 100644 --- a/src/llamafactory/webui/components/infer.py +++ b/src/llamafactory/webui/components/infer.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/webui/components/top.py b/src/llamafactory/webui/components/top.py index 467e3112..978f93cd 100644 --- a/src/llamafactory/webui/components/top.py +++ b/src/llamafactory/webui/components/top.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/webui/components/train.py b/src/llamafactory/webui/components/train.py index 28aa3a8c..4f037313 100644 --- a/src/llamafactory/webui/components/train.py +++ b/src/llamafactory/webui/components/train.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/webui/control.py b/src/llamafactory/webui/control.py index b8087af6..abadd045 100644 --- a/src/llamafactory/webui/control.py +++ b/src/llamafactory/webui/control.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/webui/css.py b/src/llamafactory/webui/css.py index c4445e8e..a48a21c5 100644 --- a/src/llamafactory/webui/css.py +++ b/src/llamafactory/webui/css.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/webui/engine.py b/src/llamafactory/webui/engine.py index 3b18eeb9..2708139d 100644 --- a/src/llamafactory/webui/engine.py +++ b/src/llamafactory/webui/engine.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/webui/interface.py b/src/llamafactory/webui/interface.py index d6387339..6dff348e 100644 --- a/src/llamafactory/webui/interface.py +++ b/src/llamafactory/webui/interface.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/webui/locales.py b/src/llamafactory/webui/locales.py index f7846c53..30f4517b 100644 --- a/src/llamafactory/webui/locales.py +++ b/src/llamafactory/webui/locales.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/webui/manager.py b/src/llamafactory/webui/manager.py index 18332ac0..3b6f5a9a 100644 --- a/src/llamafactory/webui/manager.py +++ b/src/llamafactory/webui/manager.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/webui/runner.py b/src/llamafactory/webui/runner.py index cc8c6cc5..a587befd 100644 --- a/src/llamafactory/webui/runner.py +++ b/src/llamafactory/webui/runner.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/train.py b/src/train.py index 6703ffdb..eba846a0 100644 --- a/src/train.py +++ b/src/train.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/webui.py b/src/webui.py index d0f00ea6..16b8335b 100644 --- a/src/webui.py +++ b/src/webui.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/data/processors/test_feedback.py b/tests/data/processors/test_feedback.py index c04e823b..a70c6e1d 100644 --- a/tests/data/processors/test_feedback.py +++ b/tests/data/processors/test_feedback.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/data/processors/test_pairwise.py b/tests/data/processors/test_pairwise.py index da50ca24..7602d070 100644 --- a/tests/data/processors/test_pairwise.py +++ b/tests/data/processors/test_pairwise.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/data/processors/test_processor_utils.py b/tests/data/processors/test_processor_utils.py index 692fcaa1..9cf31220 100644 --- a/tests/data/processors/test_processor_utils.py +++ b/tests/data/processors/test_processor_utils.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/data/processors/test_supervised.py b/tests/data/processors/test_supervised.py index 965429a6..2a988e84 100644 --- a/tests/data/processors/test_supervised.py +++ b/tests/data/processors/test_supervised.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/data/processors/test_unsupervised.py b/tests/data/processors/test_unsupervised.py index c59fa5b2..c3f3159f 100644 --- a/tests/data/processors/test_unsupervised.py +++ b/tests/data/processors/test_unsupervised.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/data/test_collator.py b/tests/data/test_collator.py index dcb53d6b..23a045ae 100644 --- a/tests/data/test_collator.py +++ b/tests/data/test_collator.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/data/test_formatter.py b/tests/data/test_formatter.py index 4eb2072b..2aaf48a2 100644 --- a/tests/data/test_formatter.py +++ b/tests/data/test_formatter.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/data/test_mm_plugin.py b/tests/data/test_mm_plugin.py index c9084af0..7dce8118 100644 --- a/tests/data/test_mm_plugin.py +++ b/tests/data/test_mm_plugin.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/data/test_template.py b/tests/data/test_template.py index e6b6ed2b..d6eaeda3 100644 --- a/tests/data/test_template.py +++ b/tests/data/test_template.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/e2e/test_chat.py b/tests/e2e/test_chat.py index b95646d7..98818f27 100644 --- a/tests/e2e/test_chat.py +++ b/tests/e2e/test_chat.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/e2e/test_train.py b/tests/e2e/test_train.py index 71cda495..46d7813c 100644 --- a/tests/e2e/test_train.py +++ b/tests/e2e/test_train.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/eval/test_eval_template.py b/tests/eval/test_eval_template.py index f85d9d57..eddc1640 100644 --- a/tests/eval/test_eval_template.py +++ b/tests/eval/test_eval_template.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/model/model_utils/test_attention.py b/tests/model/model_utils/test_attention.py index 35f3284d..a3deda29 100644 --- a/tests/model/model_utils/test_attention.py +++ b/tests/model/model_utils/test_attention.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/model/model_utils/test_checkpointing.py b/tests/model/model_utils/test_checkpointing.py index cdf62807..ef38d0d5 100644 --- a/tests/model/model_utils/test_checkpointing.py +++ b/tests/model/model_utils/test_checkpointing.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/model/model_utils/test_misc.py b/tests/model/model_utils/test_misc.py new file mode 100644 index 00000000..b2c8b3bf --- /dev/null +++ b/tests/model/model_utils/test_misc.py @@ -0,0 +1,43 @@ +# Copyright 2025 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import pytest +import torch +from transformers import AutoConfig, AutoModelForCausalLM + +from llamafactory.model.model_utils.misc import find_expanded_modules + + +HF_TOKEN = os.getenv("HF_TOKEN") + + +@pytest.mark.skipif(not HF_TOKEN, reason="Gated model.") +def test_expanded_modules(): + config = AutoConfig.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct") + with torch.device("meta"): + model = AutoModelForCausalLM.from_config(config) + + expanded_modules = find_expanded_modules(model, ["q_proj", "v_proj"], num_layer_trainable=4) + assert expanded_modules == [ + "model.layers.7.self_attn.q_proj", + "model.layers.7.self_attn.v_proj", + "model.layers.15.self_attn.q_proj", + "model.layers.15.self_attn.v_proj", + "model.layers.23.self_attn.q_proj", + "model.layers.23.self_attn.v_proj", + "model.layers.31.self_attn.q_proj", + "model.layers.31.self_attn.v_proj", + ] diff --git a/tests/model/model_utils/test_packing.py b/tests/model/model_utils/test_packing.py index bee21691..81e0d66a 100644 --- a/tests/model/model_utils/test_packing.py +++ b/tests/model/model_utils/test_packing.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/model/model_utils/test_visual.py b/tests/model/model_utils/test_visual.py index b4e23def..66d91ca6 100644 --- a/tests/model/model_utils/test_visual.py +++ b/tests/model/model_utils/test_visual.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/model/test_base.py b/tests/model/test_base.py index 6b6aa8b8..9e8c5048 100644 --- a/tests/model/test_base.py +++ b/tests/model/test_base.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/model/test_freeze.py b/tests/model/test_freeze.py index 964f52c9..cd9fc61c 100644 --- a/tests/model/test_freeze.py +++ b/tests/model/test_freeze.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/model/test_full.py b/tests/model/test_full.py index 6990a0e9..3bd9c9e8 100644 --- a/tests/model/test_full.py +++ b/tests/model/test_full.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/model/test_lora.py b/tests/model/test_lora.py index e1d2148e..2a4177ce 100644 --- a/tests/model/test_lora.py +++ b/tests/model/test_lora.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/model/test_pissa.py b/tests/model/test_pissa.py index 7bfdac51..d1e4114c 100644 --- a/tests/model/test_pissa.py +++ b/tests/model/test_pissa.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/train/test_sft_trainer.py b/tests/train/test_sft_trainer.py index 75231d20..bb67a31e 100644 --- a/tests/train/test_sft_trainer.py +++ b/tests/train/test_sft_trainer.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License.