mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-12-16 11:50:35 +08:00
Support InternLM3 Dense 8B Model (#6640)
* support internlm3 * update * update * update * add hint Former-commit-id: 24ab7ae0944c5f373e9cac60f0332e704824a057
This commit is contained in:
@@ -24,6 +24,7 @@ from transformers.modeling_utils import is_fsdp_enabled
|
||||
|
||||
from ..extras import logging
|
||||
from ..extras.misc import infer_optim_dtype
|
||||
from ..extras.packages import is_transformers_version_greater_than
|
||||
from .model_utils.attention import configure_attn_implementation, print_attn_implementation
|
||||
from .model_utils.checkpointing import prepare_model_for_training
|
||||
from .model_utils.embedding import resize_embedding_layer
|
||||
@@ -117,6 +118,9 @@ def patch_config(
|
||||
if "LlavaLlamaForCausalLM" in getattr(config, "architectures", []):
|
||||
raise ValueError("Please download llava models with hf-compatible format: https://huggingface.co/llava-hf")
|
||||
|
||||
if getattr(config, "model_type", None) == "internlm3" and not is_transformers_version_greater_than("4.47.1"):
|
||||
raise RuntimeError("InternLM3 model requires transformers >= 4.47.1, please upgrade it.")
|
||||
|
||||
# deepspeed zero3 is not compatible with low_cpu_mem_usage
|
||||
init_kwargs["low_cpu_mem_usage"] = model_args.low_cpu_mem_usage and (not is_deepspeed_zero3_enabled())
|
||||
|
||||
|
||||
Reference in New Issue
Block a user