From 549f35b1fd04f98ca9f46e2f904262f75dfeed49 Mon Sep 17 00:00:00 2001 From: BUAADreamer <1428195643@qq.com> Date: Thu, 25 Apr 2024 21:27:48 +0800 Subject: [PATCH] modify style Former-commit-id: fc0fa9f048aa7ac2f74da15c0250bae5c46cf386 --- src/llmtuner/hparams/model_args.py | 14 +--- src/llmtuner/train/sftmm/__init__.py | 4 -- src/llmtuner/train/sftmm/metric.py | 61 ---------------- src/llmtuner/train/sftmm/trainer.py | 39 ----------- src/llmtuner/train/sftmm/workflow.py | 101 --------------------------- 5 files changed, 2 insertions(+), 217 deletions(-) delete mode 100644 src/llmtuner/train/sftmm/__init__.py delete mode 100644 src/llmtuner/train/sftmm/metric.py delete mode 100644 src/llmtuner/train/sftmm/trainer.py delete mode 100644 src/llmtuner/train/sftmm/workflow.py diff --git a/src/llmtuner/hparams/model_args.py b/src/llmtuner/hparams/model_args.py index df1a5ec0..97b908e4 100644 --- a/src/llmtuner/hparams/model_args.py +++ b/src/llmtuner/hparams/model_args.py @@ -188,18 +188,8 @@ class ModelArguments: if self.new_special_tokens is not None: # support multiple special tokens self.new_special_tokens = [token.strip() for token in self.new_special_tokens.split(",")] - assert self.quantization_bit in [ - None, - 8, - 4, - ], "We only accept 4-bit or 8-bit quantization." - assert self.export_quantization_bit in [ - None, - 8, - 4, - 3, - 2, - ], "We only accept 2/3/4/8-bit quantization." + assert self.quantization_bit in [None, 8, 4], "We only accept 4-bit or 8-bit quantization." + assert self.export_quantization_bit in [None, 8, 4, 3, 2], "We only accept 2/3/4/8-bit quantization." if self.export_quantization_bit is not None and self.export_quantization_dataset is None: raise ValueError("Quantization dataset is necessary for exporting.") diff --git a/src/llmtuner/train/sftmm/__init__.py b/src/llmtuner/train/sftmm/__init__.py deleted file mode 100644 index 9ebdf821..00000000 --- a/src/llmtuner/train/sftmm/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from .workflow import run_sft_mm - - -__all__ = ["run_sft_mm"] diff --git a/src/llmtuner/train/sftmm/metric.py b/src/llmtuner/train/sftmm/metric.py deleted file mode 100644 index d1af4c17..00000000 --- a/src/llmtuner/train/sftmm/metric.py +++ /dev/null @@ -1,61 +0,0 @@ -from dataclasses import dataclass -from typing import TYPE_CHECKING, Dict, Sequence, Tuple, Union - -import numpy as np - -from ...extras.constants import IGNORE_INDEX -from ...extras.packages import is_jieba_available, is_nltk_available, is_rouge_available - - -if TYPE_CHECKING: - from transformers.tokenization_utils import PreTrainedTokenizer - -if is_jieba_available(): - import jieba # type: ignore - -if is_nltk_available(): - from nltk.translate.bleu_score import SmoothingFunction, sentence_bleu - -if is_rouge_available(): - from rouge_chinese import Rouge - - -@dataclass -class ComputeMetrics: - r""" - Wraps the tokenizer into metric functions, used in Seq2SeqPeftTrainer. - """ - - tokenizer: "PreTrainedTokenizer" - - def __call__(self, eval_preds: Sequence[Union[np.ndarray, Tuple[np.ndarray]]]) -> Dict[str, float]: - r""" - Uses the model predictions to compute metrics. - """ - preds, labels = eval_preds - score_dict = {"rouge-1": [], "rouge-2": [], "rouge-l": [], "bleu-4": []} - - preds = np.where(preds != IGNORE_INDEX, preds, self.tokenizer.pad_token_id) - labels = np.where(labels != IGNORE_INDEX, labels, self.tokenizer.pad_token_id) - - decoded_preds = self.tokenizer.batch_decode(preds, skip_special_tokens=True) - decoded_labels = self.tokenizer.batch_decode(labels, skip_special_tokens=True) - - for pred, label in zip(decoded_preds, decoded_labels): - hypothesis = list(jieba.cut(pred)) - reference = list(jieba.cut(label)) - - if len(" ".join(hypothesis).split()) == 0 or len(" ".join(reference).split()) == 0: - result = {"rouge-1": {"f": 0.0}, "rouge-2": {"f": 0.0}, "rouge-l": {"f": 0.0}} - else: - rouge = Rouge() - scores = rouge.get_scores(" ".join(hypothesis), " ".join(reference)) - result = scores[0] - - for k, v in result.items(): - score_dict[k].append(round(v["f"] * 100, 4)) - - bleu_score = sentence_bleu([list(label)], list(pred), smoothing_function=SmoothingFunction().method3) - score_dict["bleu-4"].append(round(bleu_score * 100, 4)) - - return {k: float(np.mean(v)) for k, v in score_dict.items()} diff --git a/src/llmtuner/train/sftmm/trainer.py b/src/llmtuner/train/sftmm/trainer.py deleted file mode 100644 index 270e7169..00000000 --- a/src/llmtuner/train/sftmm/trainer.py +++ /dev/null @@ -1,39 +0,0 @@ -from types import MethodType -from typing import TYPE_CHECKING, Optional - -import torch -from transformers import Seq2SeqTrainer - -from ...extras.logging import get_logger -from ..utils import create_custom_optimzer, create_custom_scheduler - - -if TYPE_CHECKING: - from ...hparams import FinetuningArguments - -logger = get_logger(__name__) - - -class CustomSeq2SeqTrainer(Seq2SeqTrainer): - r""" - Inherits Seq2SeqTrainer to compute generative metrics such as BLEU and ROUGE. - """ - - def __init__(self, finetuning_args: "FinetuningArguments", **kwargs) -> None: - super().__init__(**kwargs) - self.finetuning_args = finetuning_args - if finetuning_args.use_badam: - from badam import clip_grad_norm_for_sparse_tensor - - self.accelerator.clip_grad_norm_ = MethodType(clip_grad_norm_for_sparse_tensor, self.accelerator) - - def create_optimizer(self) -> "torch.optim.Optimizer": - if self.optimizer is None: - self.optimizer = create_custom_optimzer(self.model, self.args, self.finetuning_args) - return super().create_optimizer() - - def create_scheduler( - self, num_training_steps: int, optimizer: Optional["torch.optim.Optimizer"] = None - ) -> "torch.optim.lr_scheduler.LRScheduler": - create_custom_scheduler(self.args, num_training_steps, optimizer) - return super().create_scheduler(num_training_steps, optimizer) diff --git a/src/llmtuner/train/sftmm/workflow.py b/src/llmtuner/train/sftmm/workflow.py deleted file mode 100644 index dbda2d05..00000000 --- a/src/llmtuner/train/sftmm/workflow.py +++ /dev/null @@ -1,101 +0,0 @@ -# Inspired by: https://github.com/huggingface/transformers/blob/v4.34.1/examples/pytorch/summarization/run_summarization.py -from typing import TYPE_CHECKING, List, Optional - -from transformers import DataCollatorForSeq2Seq - -from ...data import get_dataset -from ...extras.constants import IGNORE_INDEX -from ...extras.misc import get_logits_processor -from ...extras.ploting import plot_loss -from ...model import load_model, load_processor -from ..sft.metric import ComputeMetrics -from ..utils import create_modelcard_and_push -from .trainer import CustomSeq2SeqTrainer - - -if TYPE_CHECKING: - from transformers import Seq2SeqTrainingArguments, TrainerCallback - - from ...hparams import ( - DataArguments, - FinetuningArguments, - GeneratingArguments, - ModelArguments, - ) - - -def run_sft_mm( - model_args: "ModelArguments", - data_args: "DataArguments", - training_args: "Seq2SeqTrainingArguments", - finetuning_args: "FinetuningArguments", - generating_args: "GeneratingArguments", - callbacks: Optional[List["TrainerCallback"]] = None, -): - processor = load_processor(model_args) - tokenizer = processor.tokenizer - dataset = get_dataset(tokenizer, model_args, data_args, training_args, "sft", processor) - model = load_model(tokenizer, model_args, finetuning_args, training_args.do_train) - if getattr(model, "is_quantized", False) and not training_args.do_train: - setattr(model, "_hf_peft_config_loaded", True) # hack here: make model compatible with prediction - train_dataset = dataset - eval_dataset = dataset - data_collator = DataCollatorForSeq2Seq( - tokenizer=tokenizer, - pad_to_multiple_of=(8 if tokenizer.padding_side == "right" else None), # for shift short attention - label_pad_token_id=(IGNORE_INDEX if data_args.ignore_pad_token_for_loss else tokenizer.pad_token_id), - ) - - # Override the decoding parameters of Seq2SeqTrainer - training_args.generation_max_length = training_args.generation_max_length or data_args.cutoff_len - training_args.generation_num_beams = data_args.eval_num_beams or training_args.generation_num_beams - training_args.remove_unused_columns = False - - # Initialize our Trainer - trainer = CustomSeq2SeqTrainer( - model=model, - args=training_args, - finetuning_args=finetuning_args, - tokenizer=tokenizer, - data_collator=data_collator, - callbacks=callbacks, - compute_metrics=(ComputeMetrics(tokenizer) if training_args.predict_with_generate else None), - train_dataset=train_dataset, - eval_dataset=eval_dataset, - ) - - # Keyword arguments for `model.generate` - gen_kwargs = generating_args.to_dict() - gen_kwargs["eos_token_id"] = [tokenizer.eos_token_id] + tokenizer.additional_special_tokens_ids - gen_kwargs["pad_token_id"] = tokenizer.pad_token_id - gen_kwargs["logits_processor"] = get_logits_processor() - - # Training - if training_args.do_train: - train_result = trainer.train(resume_from_checkpoint=training_args.resume_from_checkpoint) - trainer.save_model() - trainer.log_metrics("train", train_result.metrics) - trainer.save_metrics("train", train_result.metrics) - trainer.save_state() - if trainer.is_world_process_zero() and finetuning_args.plot_loss: - plot_loss(training_args.output_dir, keys=["loss", "eval_loss"]) - - # Evaluation - if training_args.do_eval: - metrics = trainer.evaluate(metric_key_prefix="eval", **gen_kwargs) - if training_args.predict_with_generate: # eval_loss will be wrong if predict_with_generate is enabled - metrics.pop("eval_loss", None) - trainer.log_metrics("eval", metrics) - trainer.save_metrics("eval", metrics) - - # Predict - if training_args.do_predict: - predict_results = trainer.predict(dataset, metric_key_prefix="predict", **gen_kwargs) - if training_args.predict_with_generate: # predict_loss will be wrong if predict_with_generate is enabled - predict_results.metrics.pop("predict_loss", None) - trainer.log_metrics("predict", predict_results.metrics) - trainer.save_metrics("predict", predict_results.metrics) - trainer.save_predictions(predict_results) - - # Create model card - create_modelcard_and_push(trainer, model_args, data_args, training_args, finetuning_args)