mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-08-23 22:32:54 +08:00
Merge branch 'hiyouga:main' into pixtral-patch
Former-commit-id: 93a441a6b746e9a933dad8c45553fb5b68bf2b34
This commit is contained in:
commit
5523a6fd2c
@ -30,6 +30,9 @@ Choose your path:
|
|||||||
- **Local machine**: Please refer to [usage](#getting-started)
|
- **Local machine**: Please refer to [usage](#getting-started)
|
||||||
- **Documentation (WIP)**: https://llamafactory.readthedocs.io/zh-cn/latest/
|
- **Documentation (WIP)**: https://llamafactory.readthedocs.io/zh-cn/latest/
|
||||||
|
|
||||||
|
> [!NOTE]
|
||||||
|
> Except for the above links, all other websites are unauthorized third-party websites. Please carefully use them.
|
||||||
|
|
||||||
## Table of Contents
|
## Table of Contents
|
||||||
|
|
||||||
- [Features](#features)
|
- [Features](#features)
|
||||||
|
@ -31,6 +31,9 @@ https://github.com/user-attachments/assets/e6ce34b0-52d5-4f3e-a830-592106c4c272
|
|||||||
- **入门教程**:https://zhuanlan.zhihu.com/p/695287607
|
- **入门教程**:https://zhuanlan.zhihu.com/p/695287607
|
||||||
- **框架文档**:https://llamafactory.readthedocs.io/zh-cn/latest/
|
- **框架文档**:https://llamafactory.readthedocs.io/zh-cn/latest/
|
||||||
|
|
||||||
|
> [!NOTE]
|
||||||
|
> 除上述链接以外的其他网站均为未经许可的第三方网站,请小心甄别。
|
||||||
|
|
||||||
## 目录
|
## 目录
|
||||||
|
|
||||||
- [项目特色](#项目特色)
|
- [项目特色](#项目特色)
|
||||||
|
Binary file not shown.
Before Width: | Height: | Size: 164 KiB After Width: | Height: | Size: 199 KiB |
Binary file not shown.
Before Width: | Height: | Size: 167 KiB After Width: | Height: | Size: 168 KiB |
@ -1,4 +1,4 @@
|
|||||||
transformers>=4.41.2,<=4.45.0
|
transformers>=4.41.2,<=4.45.2
|
||||||
datasets>=2.16.0,<=2.21.0
|
datasets>=2.16.0,<=2.21.0
|
||||||
accelerate>=0.30.1,<=0.34.2
|
accelerate>=0.30.1,<=0.34.2
|
||||||
peft>=0.11.1,<=0.12.0
|
peft>=0.11.1,<=0.12.0
|
||||||
|
2
setup.py
2
setup.py
@ -54,7 +54,7 @@ extra_require = {
|
|||||||
"gptq": ["optimum>=1.17.0", "auto-gptq>=0.5.0"],
|
"gptq": ["optimum>=1.17.0", "auto-gptq>=0.5.0"],
|
||||||
"awq": ["autoawq"],
|
"awq": ["autoawq"],
|
||||||
"aqlm": ["aqlm[gpu]>=1.1.0"],
|
"aqlm": ["aqlm[gpu]>=1.1.0"],
|
||||||
"vllm": ["vllm>=0.4.3,<=0.6.0"],
|
"vllm": ["vllm>=0.4.3,<=0.6.2"],
|
||||||
"galore": ["galore-torch"],
|
"galore": ["galore-torch"],
|
||||||
"badam": ["badam>=1.2.1"],
|
"badam": ["badam>=1.2.1"],
|
||||||
"adam-mini": ["adam-mini"],
|
"adam-mini": ["adam-mini"],
|
||||||
|
@ -20,7 +20,7 @@ Level:
|
|||||||
|
|
||||||
Dependency graph:
|
Dependency graph:
|
||||||
main:
|
main:
|
||||||
transformers>=4.41.2,<=4.45.0
|
transformers>=4.41.2,<=4.45.2
|
||||||
datasets>=2.16.0,<=2.21.0
|
datasets>=2.16.0,<=2.21.0
|
||||||
accelerate>=0.30.1,<=0.34.2
|
accelerate>=0.30.1,<=0.34.2
|
||||||
peft>=0.11.1,<=0.12.0
|
peft>=0.11.1,<=0.12.0
|
||||||
@ -28,9 +28,9 @@ Dependency graph:
|
|||||||
attention:
|
attention:
|
||||||
transformers>=4.42.4 (gemma+fa2)
|
transformers>=4.42.4 (gemma+fa2)
|
||||||
longlora:
|
longlora:
|
||||||
transformers>=4.41.2,<=4.45.0
|
transformers>=4.41.2,<=4.45.2
|
||||||
packing:
|
packing:
|
||||||
transformers>=4.41.2,<=4.45.0
|
transformers>=4.41.2,<=4.45.2
|
||||||
|
|
||||||
Disable version checking: DISABLE_VERSION_CHECK=1
|
Disable version checking: DISABLE_VERSION_CHECK=1
|
||||||
Enable VRAM recording: RECORD_VRAM=1
|
Enable VRAM recording: RECORD_VRAM=1
|
||||||
|
@ -357,9 +357,7 @@ def get_template_and_fix_tokenizer(tokenizer: "PreTrainedTokenizer", data_args:
|
|||||||
Gets chat template and fixes the tokenizer.
|
Gets chat template and fixes the tokenizer.
|
||||||
"""
|
"""
|
||||||
if data_args.template in ["llava", "paligemma", "qwen2_vl"]:
|
if data_args.template in ["llava", "paligemma", "qwen2_vl"]:
|
||||||
require_version(
|
require_version("transformers>=4.45.0", "To fix: pip install transformers>=4.45.0")
|
||||||
"transformers>=4.45.0.dev0", "To fix: pip install git+https://github.com/huggingface/transformers.git"
|
|
||||||
)
|
|
||||||
require_version("accelerate>=0.34.0", "To fix: pip install accelerate>=0.34.0")
|
require_version("accelerate>=0.34.0", "To fix: pip install accelerate>=0.34.0")
|
||||||
|
|
||||||
if data_args.template is None:
|
if data_args.template is None:
|
||||||
|
@ -79,7 +79,7 @@ def check_dependencies() -> None:
|
|||||||
if os.environ.get("DISABLE_VERSION_CHECK", "0").lower() in ["true", "1"]:
|
if os.environ.get("DISABLE_VERSION_CHECK", "0").lower() in ["true", "1"]:
|
||||||
logger.warning("Version checking has been disabled, may lead to unexpected behaviors.")
|
logger.warning("Version checking has been disabled, may lead to unexpected behaviors.")
|
||||||
else:
|
else:
|
||||||
require_version("transformers>=4.41.2,<=4.45.0", "To fix: pip install transformers>=4.41.2,<=4.45.0")
|
require_version("transformers>=4.41.2,<=4.45.2", "To fix: pip install transformers>=4.41.2,<=4.45.2")
|
||||||
require_version("datasets>=2.16.0,<=2.21.0", "To fix: pip install datasets>=2.16.0,<=2.21.0")
|
require_version("datasets>=2.16.0,<=2.21.0", "To fix: pip install datasets>=2.16.0,<=2.21.0")
|
||||||
require_version("accelerate>=0.30.1,<=0.34.2", "To fix: pip install accelerate>=0.30.1,<=0.34.2")
|
require_version("accelerate>=0.30.1,<=0.34.2", "To fix: pip install accelerate>=0.30.1,<=0.34.2")
|
||||||
require_version("peft>=0.11.1,<=0.12.0", "To fix: pip install peft>=0.11.1,<=0.12.0")
|
require_version("peft>=0.11.1,<=0.12.0", "To fix: pip install peft>=0.11.1,<=0.12.0")
|
||||||
|
@ -57,7 +57,7 @@ def _parse_args(parser: "HfArgumentParser", args: Optional[Dict[str, Any]] = Non
|
|||||||
if args is not None:
|
if args is not None:
|
||||||
return parser.parse_dict(args)
|
return parser.parse_dict(args)
|
||||||
|
|
||||||
if len(sys.argv) == 2 and sys.argv[1].endswith(".yaml"):
|
if len(sys.argv) == 2 and (sys.argv[1].endswith(".yaml") or sys.argv[1].endswith(".yml")):
|
||||||
return parser.parse_yaml_file(os.path.abspath(sys.argv[1]))
|
return parser.parse_yaml_file(os.path.abspath(sys.argv[1]))
|
||||||
|
|
||||||
if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
|
if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
|
||||||
@ -123,7 +123,7 @@ def _check_extra_dependencies(
|
|||||||
require_version("mixture-of-depth>=1.1.6", "To fix: pip install mixture-of-depth>=1.1.6")
|
require_version("mixture-of-depth>=1.1.6", "To fix: pip install mixture-of-depth>=1.1.6")
|
||||||
|
|
||||||
if model_args.infer_backend == "vllm":
|
if model_args.infer_backend == "vllm":
|
||||||
require_version("vllm>=0.4.3,<=0.6.0", "To fix: pip install vllm>=0.4.3,<=0.6.0")
|
require_version("vllm>=0.4.3,<=0.6.2", "To fix: pip install vllm>=0.4.3,<=0.6.2")
|
||||||
|
|
||||||
if finetuning_args.use_galore:
|
if finetuning_args.use_galore:
|
||||||
require_version("galore_torch", "To fix: pip install galore_torch")
|
require_version("galore_torch", "To fix: pip install galore_torch")
|
||||||
|
@ -82,6 +82,8 @@ def load_tokenizer(model_args: "ModelArguments") -> "TokenizerModule":
|
|||||||
padding_side="right",
|
padding_side="right",
|
||||||
**init_kwargs,
|
**init_kwargs,
|
||||||
)
|
)
|
||||||
|
except Exception as e:
|
||||||
|
raise OSError("Failed to load tokenizer.") from e
|
||||||
|
|
||||||
if model_args.new_special_tokens is not None:
|
if model_args.new_special_tokens is not None:
|
||||||
num_added_tokens = tokenizer.add_special_tokens(
|
num_added_tokens = tokenizer.add_special_tokens(
|
||||||
@ -97,12 +99,13 @@ def load_tokenizer(model_args: "ModelArguments") -> "TokenizerModule":
|
|||||||
try:
|
try:
|
||||||
processor = AutoProcessor.from_pretrained(model_args.model_name_or_path, **init_kwargs)
|
processor = AutoProcessor.from_pretrained(model_args.model_name_or_path, **init_kwargs)
|
||||||
patch_processor(processor, config, tokenizer, model_args)
|
patch_processor(processor, config, tokenizer, model_args)
|
||||||
except Exception:
|
except Exception as e:
|
||||||
|
logger.warning("Processor was not found: {}.".format(e))
|
||||||
processor = None
|
processor = None
|
||||||
|
|
||||||
# Avoid load tokenizer, see:
|
# Avoid load tokenizer, see:
|
||||||
# https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/models/auto/processing_auto.py#L324
|
# https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/models/auto/processing_auto.py#L324
|
||||||
if "Processor" not in processor.__class__.__name__:
|
if processor is not None and "Processor" not in processor.__class__.__name__:
|
||||||
processor = None
|
processor = None
|
||||||
|
|
||||||
return {"tokenizer": tokenizer, "processor": processor}
|
return {"tokenizer": tokenizer, "processor": processor}
|
||||||
|
@ -353,7 +353,7 @@ def llama_sdpa_attention_forward(
|
|||||||
|
|
||||||
|
|
||||||
def _apply_llama_patch() -> None:
|
def _apply_llama_patch() -> None:
|
||||||
require_version("transformers>=4.41.2,<=4.45.0", "To fix: pip install transformers>=4.41.2,<=4.45.0")
|
require_version("transformers>=4.41.2,<=4.45.2", "To fix: pip install transformers>=4.41.2,<=4.45.2")
|
||||||
LlamaAttention.forward = llama_attention_forward
|
LlamaAttention.forward = llama_attention_forward
|
||||||
LlamaFlashAttention2.forward = llama_flash_attention_2_forward
|
LlamaFlashAttention2.forward = llama_flash_attention_2_forward
|
||||||
LlamaSdpaAttention.forward = llama_sdpa_attention_forward
|
LlamaSdpaAttention.forward = llama_sdpa_attention_forward
|
||||||
|
@ -114,7 +114,7 @@ def get_unpad_data(attention_mask: "torch.Tensor") -> Tuple["torch.Tensor", "tor
|
|||||||
|
|
||||||
|
|
||||||
def _patch_for_block_diag_attn(model_type: str) -> None:
|
def _patch_for_block_diag_attn(model_type: str) -> None:
|
||||||
require_version("transformers>=4.41.2,<=4.45.0", "To fix: pip install transformers>=4.41.2,<=4.45.0")
|
require_version("transformers>=4.41.2,<=4.45.2", "To fix: pip install transformers>=4.41.2,<=4.45.2")
|
||||||
if is_transformers_version_greater_than_4_43():
|
if is_transformers_version_greater_than_4_43():
|
||||||
import transformers.modeling_flash_attention_utils
|
import transformers.modeling_flash_attention_utils
|
||||||
|
|
||||||
|
@ -110,6 +110,9 @@ def patch_config(
|
|||||||
if getattr(config, "model_type", None) == "qwen2" and is_trainable and model_args.flash_attn == "fa2":
|
if getattr(config, "model_type", None) == "qwen2" and is_trainable and model_args.flash_attn == "fa2":
|
||||||
setattr(config, "use_cache", False) # qwen2 does not support use_cache when using flash attn
|
setattr(config, "use_cache", False) # qwen2 does not support use_cache when using flash attn
|
||||||
|
|
||||||
|
if "LlavaLlamaForCausalLM" in getattr(config, "architectures", []):
|
||||||
|
raise ValueError("Please download llava models with hf-compatible format: https://huggingface.co/llava-hf")
|
||||||
|
|
||||||
# deepspeed zero3 is not compatible with low_cpu_mem_usage
|
# deepspeed zero3 is not compatible with low_cpu_mem_usage
|
||||||
init_kwargs["low_cpu_mem_usage"] = model_args.low_cpu_mem_usage and (not is_deepspeed_zero3_enabled())
|
init_kwargs["low_cpu_mem_usage"] = model_args.low_cpu_mem_usage and (not is_deepspeed_zero3_enabled())
|
||||||
|
|
||||||
|
@ -139,5 +139,5 @@ def export_model(args: Optional[Dict[str, Any]] = None) -> None:
|
|||||||
model_args.export_hub_model_id, token=model_args.hf_hub_token
|
model_args.export_hub_model_id, token=model_args.hf_hub_token
|
||||||
)
|
)
|
||||||
|
|
||||||
except Exception:
|
except Exception as e:
|
||||||
logger.warning("Cannot save tokenizer, please copy the files manually.")
|
logger.warning("Cannot save tokenizer, please copy the files manually: {}.".format(e))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user