mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-12-14 10:56:56 +08:00
@@ -1215,6 +1215,9 @@ register_model_group(
|
||||
"YiVL-6B-Chat": {
|
||||
DownloadSource.DEFAULT: "BUAADreamer/Yi-VL-6B-hf",
|
||||
},
|
||||
"YiVL-34B-Chat": {
|
||||
DownloadSource.DEFAULT: "BUAADreamer/Yi-VL-34B-hf",
|
||||
},
|
||||
},
|
||||
template="yi_vl",
|
||||
vision=True,
|
||||
|
||||
@@ -78,8 +78,15 @@ def load_tokenizer(model_args: "ModelArguments") -> "TokenizerModule":
|
||||
patch_tokenizer(tokenizer)
|
||||
|
||||
if model_args.visual_inputs:
|
||||
processor = AutoProcessor.from_pretrained(model_args.model_name_or_path, **init_kwargs)
|
||||
setattr(processor, "tokenizer", tokenizer)
|
||||
try:
|
||||
processor = AutoProcessor.from_pretrained(model_args.model_name_or_path, **init_kwargs)
|
||||
setattr(processor, "tokenizer", tokenizer)
|
||||
except Exception:
|
||||
raise ValueError(
|
||||
"This multimodal LLM is not supported.\n"
|
||||
"Download LLaVA-1.5 models from: https://huggingface.co/llava-hf\n"
|
||||
"Download Yi-VL models from: https://huggingface.co/BUAADreamer"
|
||||
)
|
||||
else:
|
||||
processor = None
|
||||
|
||||
|
||||
@@ -58,7 +58,7 @@ class LlavaMultiModalProjectorForYiVLForVLLM(LlavaMultiModalProjectorForYiVL):
|
||||
self.linear_2 = torch.nn.LayerNorm(text_hidden_size, bias=True)
|
||||
self.linear_3 = torch.nn.Linear(text_hidden_size, text_hidden_size, bias=True)
|
||||
self.linear_4 = torch.nn.LayerNorm(text_hidden_size, bias=True)
|
||||
self.act = torch.nn.GELU()
|
||||
self.act = ACT2FN[projector_hidden_act]
|
||||
|
||||
|
||||
def autocast_projector_dtype(
|
||||
|
||||
Reference in New Issue
Block a user