diff --git a/README.md b/README.md index c02c455e..b891004f 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,9 @@ Choose your path: - **Local machine**: Please refer to [usage](#getting-started) - **Documentation (WIP)**: https://llamafactory.readthedocs.io/zh-cn/latest/ +> [!NOTE] +> Except for the above links, all other websites are unauthorized third-party websites. Please carefully use them. + ## Table of Contents - [Features](#features) diff --git a/README_zh.md b/README_zh.md index 4dff0be7..7cb93eb7 100644 --- a/README_zh.md +++ b/README_zh.md @@ -31,6 +31,9 @@ https://github.com/user-attachments/assets/e6ce34b0-52d5-4f3e-a830-592106c4c272 - **入门教程**:https://zhuanlan.zhihu.com/p/695287607 - **框架文档**:https://llamafactory.readthedocs.io/zh-cn/latest/ +> [!NOTE] +> 除上述链接以外的其他网站均为未经许可的第三方网站,请小心甄别。 + ## 目录 - [项目特色](#项目特色) diff --git a/assets/wechat.jpg b/assets/wechat.jpg index f2d57406..caebd4d8 100644 Binary files a/assets/wechat.jpg and b/assets/wechat.jpg differ diff --git a/assets/wechat_npu.jpg b/assets/wechat_npu.jpg index 7708e35a..e30714f7 100644 Binary files a/assets/wechat_npu.jpg and b/assets/wechat_npu.jpg differ diff --git a/requirements.txt b/requirements.txt index e913c58d..69489bec 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -transformers>=4.41.2,<=4.45.0 +transformers>=4.41.2,<=4.45.2 datasets>=2.16.0,<=2.21.0 accelerate>=0.30.1,<=0.34.2 peft>=0.11.1,<=0.12.0 diff --git a/setup.py b/setup.py index a80cb81b..f42cf7d1 100644 --- a/setup.py +++ b/setup.py @@ -54,7 +54,7 @@ extra_require = { "gptq": ["optimum>=1.17.0", "auto-gptq>=0.5.0"], "awq": ["autoawq"], "aqlm": ["aqlm[gpu]>=1.1.0"], - "vllm": ["vllm>=0.4.3,<=0.6.0"], + "vllm": ["vllm>=0.4.3,<=0.6.2"], "galore": ["galore-torch"], "badam": ["badam>=1.2.1"], "adam-mini": ["adam-mini"], diff --git a/src/llamafactory/__init__.py b/src/llamafactory/__init__.py index 5293c512..ffc8c9ad 100644 --- a/src/llamafactory/__init__.py +++ b/src/llamafactory/__init__.py @@ -20,7 +20,7 @@ Level: Dependency graph: main: - transformers>=4.41.2,<=4.45.0 + transformers>=4.41.2,<=4.45.2 datasets>=2.16.0,<=2.21.0 accelerate>=0.30.1,<=0.34.2 peft>=0.11.1,<=0.12.0 @@ -28,9 +28,9 @@ Dependency graph: attention: transformers>=4.42.4 (gemma+fa2) longlora: - transformers>=4.41.2,<=4.45.0 + transformers>=4.41.2,<=4.45.2 packing: - transformers>=4.41.2,<=4.45.0 + transformers>=4.41.2,<=4.45.2 Disable version checking: DISABLE_VERSION_CHECK=1 Enable VRAM recording: RECORD_VRAM=1 diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py index bf30245d..28ad2295 100644 --- a/src/llamafactory/data/template.py +++ b/src/llamafactory/data/template.py @@ -357,9 +357,7 @@ def get_template_and_fix_tokenizer(tokenizer: "PreTrainedTokenizer", data_args: Gets chat template and fixes the tokenizer. """ if data_args.template in ["llava", "paligemma", "qwen2_vl"]: - require_version( - "transformers>=4.45.0.dev0", "To fix: pip install git+https://github.com/huggingface/transformers.git" - ) + require_version("transformers>=4.45.0", "To fix: pip install transformers>=4.45.0") require_version("accelerate>=0.34.0", "To fix: pip install accelerate>=0.34.0") if data_args.template is None: diff --git a/src/llamafactory/extras/misc.py b/src/llamafactory/extras/misc.py index de034ef6..7d0a457a 100644 --- a/src/llamafactory/extras/misc.py +++ b/src/llamafactory/extras/misc.py @@ -79,7 +79,7 @@ def check_dependencies() -> None: if os.environ.get("DISABLE_VERSION_CHECK", "0").lower() in ["true", "1"]: logger.warning("Version checking has been disabled, may lead to unexpected behaviors.") else: - require_version("transformers>=4.41.2,<=4.45.0", "To fix: pip install transformers>=4.41.2,<=4.45.0") + require_version("transformers>=4.41.2,<=4.45.2", "To fix: pip install transformers>=4.41.2,<=4.45.2") require_version("datasets>=2.16.0,<=2.21.0", "To fix: pip install datasets>=2.16.0,<=2.21.0") require_version("accelerate>=0.30.1,<=0.34.2", "To fix: pip install accelerate>=0.30.1,<=0.34.2") require_version("peft>=0.11.1,<=0.12.0", "To fix: pip install peft>=0.11.1,<=0.12.0") diff --git a/src/llamafactory/hparams/parser.py b/src/llamafactory/hparams/parser.py index fd112607..68a5d2f6 100644 --- a/src/llamafactory/hparams/parser.py +++ b/src/llamafactory/hparams/parser.py @@ -57,7 +57,7 @@ def _parse_args(parser: "HfArgumentParser", args: Optional[Dict[str, Any]] = Non if args is not None: return parser.parse_dict(args) - if len(sys.argv) == 2 and sys.argv[1].endswith(".yaml"): + if len(sys.argv) == 2 and (sys.argv[1].endswith(".yaml") or sys.argv[1].endswith(".yml")): return parser.parse_yaml_file(os.path.abspath(sys.argv[1])) if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): @@ -123,7 +123,7 @@ def _check_extra_dependencies( require_version("mixture-of-depth>=1.1.6", "To fix: pip install mixture-of-depth>=1.1.6") if model_args.infer_backend == "vllm": - require_version("vllm>=0.4.3,<=0.6.0", "To fix: pip install vllm>=0.4.3,<=0.6.0") + require_version("vllm>=0.4.3,<=0.6.2", "To fix: pip install vllm>=0.4.3,<=0.6.2") if finetuning_args.use_galore: require_version("galore_torch", "To fix: pip install galore_torch") diff --git a/src/llamafactory/model/loader.py b/src/llamafactory/model/loader.py index c9e10e40..911d01bf 100644 --- a/src/llamafactory/model/loader.py +++ b/src/llamafactory/model/loader.py @@ -82,6 +82,8 @@ def load_tokenizer(model_args: "ModelArguments") -> "TokenizerModule": padding_side="right", **init_kwargs, ) + except Exception as e: + raise OSError("Failed to load tokenizer.") from e if model_args.new_special_tokens is not None: num_added_tokens = tokenizer.add_special_tokens( @@ -97,12 +99,13 @@ def load_tokenizer(model_args: "ModelArguments") -> "TokenizerModule": try: processor = AutoProcessor.from_pretrained(model_args.model_name_or_path, **init_kwargs) patch_processor(processor, config, tokenizer, model_args) - except Exception: + except Exception as e: + logger.warning("Processor was not found: {}.".format(e)) processor = None # Avoid load tokenizer, see: # https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/models/auto/processing_auto.py#L324 - if "Processor" not in processor.__class__.__name__: + if processor is not None and "Processor" not in processor.__class__.__name__: processor = None return {"tokenizer": tokenizer, "processor": processor} diff --git a/src/llamafactory/model/model_utils/longlora.py b/src/llamafactory/model/model_utils/longlora.py index b341653a..e87e5b8b 100644 --- a/src/llamafactory/model/model_utils/longlora.py +++ b/src/llamafactory/model/model_utils/longlora.py @@ -353,7 +353,7 @@ def llama_sdpa_attention_forward( def _apply_llama_patch() -> None: - require_version("transformers>=4.41.2,<=4.45.0", "To fix: pip install transformers>=4.41.2,<=4.45.0") + require_version("transformers>=4.41.2,<=4.45.2", "To fix: pip install transformers>=4.41.2,<=4.45.2") LlamaAttention.forward = llama_attention_forward LlamaFlashAttention2.forward = llama_flash_attention_2_forward LlamaSdpaAttention.forward = llama_sdpa_attention_forward diff --git a/src/llamafactory/model/model_utils/packing.py b/src/llamafactory/model/model_utils/packing.py index d52731b8..2ae3a6ff 100644 --- a/src/llamafactory/model/model_utils/packing.py +++ b/src/llamafactory/model/model_utils/packing.py @@ -114,7 +114,7 @@ def get_unpad_data(attention_mask: "torch.Tensor") -> Tuple["torch.Tensor", "tor def _patch_for_block_diag_attn(model_type: str) -> None: - require_version("transformers>=4.41.2,<=4.45.0", "To fix: pip install transformers>=4.41.2,<=4.45.0") + require_version("transformers>=4.41.2,<=4.45.2", "To fix: pip install transformers>=4.41.2,<=4.45.2") if is_transformers_version_greater_than_4_43(): import transformers.modeling_flash_attention_utils diff --git a/src/llamafactory/model/patcher.py b/src/llamafactory/model/patcher.py index 06d41af5..126e9723 100644 --- a/src/llamafactory/model/patcher.py +++ b/src/llamafactory/model/patcher.py @@ -110,6 +110,9 @@ def patch_config( if getattr(config, "model_type", None) == "qwen2" and is_trainable and model_args.flash_attn == "fa2": setattr(config, "use_cache", False) # qwen2 does not support use_cache when using flash attn + if "LlavaLlamaForCausalLM" in getattr(config, "architectures", []): + raise ValueError("Please download llava models with hf-compatible format: https://huggingface.co/llava-hf") + # deepspeed zero3 is not compatible with low_cpu_mem_usage init_kwargs["low_cpu_mem_usage"] = model_args.low_cpu_mem_usage and (not is_deepspeed_zero3_enabled()) diff --git a/src/llamafactory/train/tuner.py b/src/llamafactory/train/tuner.py index 9928e7b9..1fd202aa 100644 --- a/src/llamafactory/train/tuner.py +++ b/src/llamafactory/train/tuner.py @@ -139,5 +139,5 @@ def export_model(args: Optional[Dict[str, Any]] = None) -> None: model_args.export_hub_model_id, token=model_args.hf_hub_token ) - except Exception: - logger.warning("Cannot save tokenizer, please copy the files manually.") + except Exception as e: + logger.warning("Cannot save tokenizer, please copy the files manually: {}.".format(e))