mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-08-23 22:32:54 +08:00
add some
Former-commit-id: 785cc70ff205f5962c3ca67f453589e4a471ba8c
This commit is contained in:
parent
a0188a430f
commit
b9eeaa9706
@ -366,6 +366,7 @@ class CpmOPlugin(BasePlugin):
|
|||||||
position_ids_ = list(range(input_ids_.size(0)))
|
position_ids_ = list(range(input_ids_.size(0)))
|
||||||
# print(input_ids_.shape, len(position_ids_)
|
# print(input_ids_.shape, len(position_ids_)
|
||||||
position_ids.append(position_ids_)
|
position_ids.append(position_ids_)
|
||||||
|
#TODO add pad
|
||||||
position_ids = torch.tensor(position_ids, dtype=torch.int64)
|
position_ids = torch.tensor(position_ids, dtype=torch.int64)
|
||||||
mm_inputs.update({
|
mm_inputs.update({
|
||||||
"image_bound": image_bounds_list,
|
"image_bound": image_bounds_list,
|
||||||
|
@ -142,6 +142,10 @@ def get_forbidden_modules(config: "PretrainedConfig", finetuning_args: "Finetuni
|
|||||||
forbidden_modules.update({"visual.patch_embed", "visual.blocks", "model", "lm_head"})
|
forbidden_modules.update({"visual.patch_embed", "visual.blocks", "model", "lm_head"})
|
||||||
elif finetuning_args.freeze_vision_tower:
|
elif finetuning_args.freeze_vision_tower:
|
||||||
forbidden_modules.add("visual")
|
forbidden_modules.add("visual")
|
||||||
|
|
||||||
|
elif model_type == "minicpmv":
|
||||||
|
if finetuning_args.freeze_vision_tower:
|
||||||
|
forbidden_modules.add("vpm")
|
||||||
|
|
||||||
return forbidden_modules
|
return forbidden_modules
|
||||||
|
|
||||||
@ -196,6 +200,8 @@ def patch_target_modules(
|
|||||||
return "^(?!.*vision_model).*(?:{}).*".format("|".join(target_modules))
|
return "^(?!.*vision_model).*(?:{}).*".format("|".join(target_modules))
|
||||||
elif model_type == "qwen2_vl":
|
elif model_type == "qwen2_vl":
|
||||||
return "^(?!.*visual).*(?:{}).*".format("|".join(target_modules))
|
return "^(?!.*visual).*(?:{}).*".format("|".join(target_modules))
|
||||||
|
elif model_type == "minicpmv":
|
||||||
|
return "^(?!.*vpm).*(?:{}).*".format("|".join(target_modules))
|
||||||
else:
|
else:
|
||||||
return target_modules
|
return target_modules
|
||||||
else:
|
else:
|
||||||
|
@ -138,12 +138,13 @@ def patch_model(
|
|||||||
add_valuehead: bool,
|
add_valuehead: bool,
|
||||||
) -> None:
|
) -> None:
|
||||||
gen_config = model.generation_config # check and fix generation config
|
gen_config = model.generation_config # check and fix generation config
|
||||||
if not gen_config.do_sample and (
|
if gen_config is not None:
|
||||||
(gen_config.temperature is not None and gen_config.temperature != 1.0)
|
if not gen_config.do_sample and (
|
||||||
or (gen_config.top_p is not None and gen_config.top_p != 1.0)
|
(gen_config.temperature is not None and gen_config.temperature != 1.0)
|
||||||
or (gen_config.typical_p is not None and gen_config.typical_p != 1.0)
|
or (gen_config.top_p is not None and gen_config.top_p != 1.0)
|
||||||
):
|
or (gen_config.typical_p is not None and gen_config.typical_p != 1.0)
|
||||||
gen_config.do_sample = True
|
):
|
||||||
|
gen_config.do_sample = True
|
||||||
|
|
||||||
if "GenerationMixin" not in str(model.generate.__func__):
|
if "GenerationMixin" not in str(model.generate.__func__):
|
||||||
model.generate = MethodType(PreTrainedModel.generate, model)
|
model.generate = MethodType(PreTrainedModel.generate, model)
|
||||||
|
@ -24,6 +24,7 @@ import numpy as np
|
|||||||
import torch
|
import torch
|
||||||
from transformers import Seq2SeqTrainer
|
from transformers import Seq2SeqTrainer
|
||||||
from typing_extensions import override
|
from typing_extensions import override
|
||||||
|
import copy
|
||||||
|
|
||||||
from ...extras import logging
|
from ...extras import logging
|
||||||
from ...extras.constants import IGNORE_INDEX
|
from ...extras.constants import IGNORE_INDEX
|
||||||
@ -122,7 +123,6 @@ class CustomSeq2SeqTrainer(Seq2SeqTrainer):
|
|||||||
labels = inputs.pop("labels", None)
|
labels = inputs.pop("labels", None)
|
||||||
else:
|
else:
|
||||||
labels = inputs.get("labels")
|
labels = inputs.get("labels")
|
||||||
|
|
||||||
loss, generated_tokens, _ = super().prediction_step(
|
loss, generated_tokens, _ = super().prediction_step(
|
||||||
model, inputs, prediction_loss_only=prediction_loss_only, ignore_keys=ignore_keys, **gen_kwargs
|
model, inputs, prediction_loss_only=prediction_loss_only, ignore_keys=ignore_keys, **gen_kwargs
|
||||||
)
|
)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user