mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-11-05 10:22:15 +08:00
Compare commits
No commits in common. "a442fa90ad4b567990ec511e7e774f074ef479e4" and "47a7dc16986031f56ed4e2360ddeead5acfafad8" have entirely different histories.
a442fa90ad
...
47a7dc1698
@ -283,7 +283,7 @@ Choose your path:
|
|||||||
| [GPT-OSS](https://huggingface.co/openai) | 20B/120B | gpt |
|
| [GPT-OSS](https://huggingface.co/openai) | 20B/120B | gpt |
|
||||||
| [Granite 3.0-3.3](https://huggingface.co/ibm-granite) | 1B/2B/3B/8B | granite3 |
|
| [Granite 3.0-3.3](https://huggingface.co/ibm-granite) | 1B/2B/3B/8B | granite3 |
|
||||||
| [Granite 4](https://huggingface.co/ibm-granite) | 7B | granite4 |
|
| [Granite 4](https://huggingface.co/ibm-granite) | 7B | granite4 |
|
||||||
| [Hunyuan (MT)](https://huggingface.co/tencent/) | 7B | hunyuan |
|
| [Hunyuan](https://huggingface.co/tencent/) | 7B | hunyuan |
|
||||||
| [Index](https://huggingface.co/IndexTeam) | 1.9B | index |
|
| [Index](https://huggingface.co/IndexTeam) | 1.9B | index |
|
||||||
| [InternLM 2-3](https://huggingface.co/internlm) | 7B/8B/20B | intern2 |
|
| [InternLM 2-3](https://huggingface.co/internlm) | 7B/8B/20B | intern2 |
|
||||||
| [InternVL 2.5-3.5](https://huggingface.co/OpenGVLab) | 1B/2B/4B/8B/14B/30B/38B/78B/241B | intern_vl |
|
| [InternVL 2.5-3.5](https://huggingface.co/OpenGVLab) | 1B/2B/4B/8B/14B/30B/38B/78B/241B | intern_vl |
|
||||||
|
|||||||
@ -285,7 +285,7 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc
|
|||||||
| [GPT-OSS](https://huggingface.co/openai) | 20B/120B | gpt |
|
| [GPT-OSS](https://huggingface.co/openai) | 20B/120B | gpt |
|
||||||
| [Granite 3.0-3.3](https://huggingface.co/ibm-granite) | 1B/2B/3B/8B | granite3 |
|
| [Granite 3.0-3.3](https://huggingface.co/ibm-granite) | 1B/2B/3B/8B | granite3 |
|
||||||
| [Granite 4](https://huggingface.co/ibm-granite) | 7B | granite4 |
|
| [Granite 4](https://huggingface.co/ibm-granite) | 7B | granite4 |
|
||||||
| [Hunyuan (MT)](https://huggingface.co/tencent/) | 7B | hunyuan |
|
| [Hunyuan](https://huggingface.co/tencent/) | 7B | hunyuan |
|
||||||
| [Index](https://huggingface.co/IndexTeam) | 1.9B | index |
|
| [Index](https://huggingface.co/IndexTeam) | 1.9B | index |
|
||||||
| [InternLM 2-3](https://huggingface.co/internlm) | 7B/8B/20B | intern2 |
|
| [InternLM 2-3](https://huggingface.co/internlm) | 7B/8B/20B | intern2 |
|
||||||
| [InternVL 2.5-3.5](https://huggingface.co/OpenGVLab) | 1B/2B/4B/8B/14B/30B/38B/78B/241B | intern_vl |
|
| [InternVL 2.5-3.5](https://huggingface.co/OpenGVLab) | 1B/2B/4B/8B/14B/30B/38B/78B/241B | intern_vl |
|
||||||
|
|||||||
@ -1201,10 +1201,10 @@ register_template(
|
|||||||
|
|
||||||
register_template(
|
register_template(
|
||||||
name="hunyuan",
|
name="hunyuan",
|
||||||
format_user=StringFormatter(slots=["{{content}}<|extra_0|>"]),
|
format_user=StringFormatter(slots=["<|bos|>user\n{{content}}<|eos|>\n<|bos|>assistant\n"]),
|
||||||
format_assistant=StringFormatter(slots=["{{content}}<|eos|>"]),
|
format_assistant=StringFormatter(slots=["{{content}}<|eos|>\n"]),
|
||||||
format_system=StringFormatter(slots=["{{content}}<|extra_4|>"]),
|
format_system=StringFormatter(slots=["<|bos|>system\n{{content}}<|eos|>\n"]),
|
||||||
format_prefix=EmptyFormatter(slots=["<|startoftext|>"]),
|
format_prefix=EmptyFormatter(slots=["<|bos|>"]),
|
||||||
stop_words=["<|eos|>"],
|
stop_words=["<|eos|>"],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@ -1152,10 +1152,6 @@ register_model_group(
|
|||||||
DownloadSource.DEFAULT: "tencent/Hunyuan-7B-Instruct",
|
DownloadSource.DEFAULT: "tencent/Hunyuan-7B-Instruct",
|
||||||
DownloadSource.MODELSCOPE: "AI-ModelScope/Hunyuan-7B-Instruct",
|
DownloadSource.MODELSCOPE: "AI-ModelScope/Hunyuan-7B-Instruct",
|
||||||
},
|
},
|
||||||
"Hunyuan-MT-7B-Instruct": {
|
|
||||||
DownloadSource.DEFAULT: "tencent/Hunyuan-MT-7B",
|
|
||||||
DownloadSource.MODELSCOPE: "Tencent-Hunyuan/Hunyuan-MT-7B",
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
template="hunyuan",
|
template="hunyuan",
|
||||||
)
|
)
|
||||||
|
|||||||
@ -14,6 +14,8 @@
|
|||||||
|
|
||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
|
from transformers.utils import is_flash_attn_2_available, is_torch_sdpa_available
|
||||||
|
|
||||||
from ...extras import logging
|
from ...extras import logging
|
||||||
from ...extras.constants import AttentionFunction
|
from ...extras.constants import AttentionFunction
|
||||||
|
|
||||||
@ -28,8 +30,6 @@ logger = logging.get_logger(__name__)
|
|||||||
|
|
||||||
|
|
||||||
def configure_attn_implementation(config: "PretrainedConfig", model_args: "ModelArguments") -> None:
|
def configure_attn_implementation(config: "PretrainedConfig", model_args: "ModelArguments") -> None:
|
||||||
from transformers.utils import is_flash_attn_2_available
|
|
||||||
|
|
||||||
if getattr(config, "model_type", None) == "gemma2":
|
if getattr(config, "model_type", None) == "gemma2":
|
||||||
if model_args.flash_attn == AttentionFunction.AUTO or model_args.flash_attn == AttentionFunction.FA2:
|
if model_args.flash_attn == AttentionFunction.AUTO or model_args.flash_attn == AttentionFunction.FA2:
|
||||||
if is_flash_attn_2_available():
|
if is_flash_attn_2_available():
|
||||||
@ -51,8 +51,6 @@ def configure_attn_implementation(config: "PretrainedConfig", model_args: "Model
|
|||||||
requested_attn_implementation = "eager"
|
requested_attn_implementation = "eager"
|
||||||
|
|
||||||
elif model_args.flash_attn == AttentionFunction.SDPA:
|
elif model_args.flash_attn == AttentionFunction.SDPA:
|
||||||
from transformers.utils import is_torch_sdpa_available
|
|
||||||
|
|
||||||
if not is_torch_sdpa_available():
|
if not is_torch_sdpa_available():
|
||||||
logger.warning_rank0("torch>=2.1.1 is required for SDPA attention.")
|
logger.warning_rank0("torch>=2.1.1 is required for SDPA attention.")
|
||||||
return
|
return
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user