From 994049380d4cb18b23733cc2ecaa4caa4ddc46a9 Mon Sep 17 00:00:00 2001 From: fzc8578 <1428195643@qq.com> Date: Fri, 10 Jan 2025 20:55:52 +0800 Subject: [PATCH] fix some Former-commit-id: 15bbcdf8d3265f4154d3937719da5e54a5963355 --- src/llamafactory/data/mm_plugin.py | 3 ++- src/llamafactory/model/loader.py | 2 +- src/llamafactory/model/patcher.py | 13 ++++++------- src/llamafactory/train/sft/trainer.py | 1 + 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/llamafactory/data/mm_plugin.py b/src/llamafactory/data/mm_plugin.py index 8c2a4dd0..3a102d59 100644 --- a/src/llamafactory/data/mm_plugin.py +++ b/src/llamafactory/data/mm_plugin.py @@ -281,7 +281,7 @@ class CpmOPlugin(BasePlugin): mm_inputs = self._get_mm_inputs(images, videos, processor) pattern = "(./)" - images, image_sizes, _ = mm_inputs["pixel_values"], mm_inputs["image_sizes"], mm_inputs["tgt_sizes"] + images, image_sizes = mm_inputs["pixel_values"], mm_inputs["image_sizes"] image_index = 0 for index, message in enumerate(messages): @@ -334,6 +334,7 @@ class CpmOPlugin(BasePlugin): new_images.append(images[idx : idx + valid_image_nums]) idx += valid_image_nums images = new_images + image_inputs = image_processor( images, do_pad=True, max_slice_nums=image_processor.max_slice_nums, return_tensors="pt" ) diff --git a/src/llamafactory/model/loader.py b/src/llamafactory/model/loader.py index 6aadb866..022cce06 100644 --- a/src/llamafactory/model/loader.py +++ b/src/llamafactory/model/loader.py @@ -100,7 +100,7 @@ def load_tokenizer(model_args: "ModelArguments") -> "TokenizerModule": processor = AutoProcessor.from_pretrained(model_args.model_name_or_path, **init_kwargs) patch_processor(processor, config, tokenizer, model_args) except Exception as e: - logger.info(f"Processor was not found: {e}.") + logger.debug(f"Processor was not found: {e}.") processor = None # Avoid load tokenizer, see: diff --git a/src/llamafactory/model/patcher.py b/src/llamafactory/model/patcher.py index 7fe8c023..2ce84e86 100644 --- a/src/llamafactory/model/patcher.py +++ b/src/llamafactory/model/patcher.py @@ -138,13 +138,12 @@ def patch_model( add_valuehead: bool, ) -> None: gen_config = model.generation_config # check and fix generation config - if gen_config is not None: - if not gen_config.do_sample and ( - (gen_config.temperature is not None and gen_config.temperature != 1.0) - or (gen_config.top_p is not None and gen_config.top_p != 1.0) - or (gen_config.typical_p is not None and gen_config.typical_p != 1.0) - ): - gen_config.do_sample = True + if not gen_config.do_sample and ( + (gen_config.temperature is not None and gen_config.temperature != 1.0) + or (gen_config.top_p is not None and gen_config.top_p != 1.0) + or (gen_config.typical_p is not None and gen_config.typical_p != 1.0) + ): + gen_config.do_sample = True if "GenerationMixin" not in str(model.generate.__func__): model.generate = MethodType(PreTrainedModel.generate, model) diff --git a/src/llamafactory/train/sft/trainer.py b/src/llamafactory/train/sft/trainer.py index bfe693d9..28ec25eb 100644 --- a/src/llamafactory/train/sft/trainer.py +++ b/src/llamafactory/train/sft/trainer.py @@ -122,6 +122,7 @@ class CustomSeq2SeqTrainer(Seq2SeqTrainer): labels = inputs.pop("labels", None) else: labels = inputs.get("labels") + loss, generated_tokens, _ = super().prediction_step( model, inputs, prediction_loss_only=prediction_loss_only, ignore_keys=ignore_keys, **gen_kwargs )