diff --git a/src/llamafactory/data/processors/supervised.py b/src/llamafactory/data/processors/supervised.py index 3406576b..35640174 100644 --- a/src/llamafactory/data/processors/supervised.py +++ b/src/llamafactory/data/processors/supervised.py @@ -10,7 +10,7 @@ if TYPE_CHECKING: from transformers import ProcessorMixin from transformers.tokenization_utils import PreTrainedTokenizer - from ...hparams import DataArguments, FinetuningArguments + from ...hparams import DataArguments from ..template import Template diff --git a/src/llamafactory/model/model_utils/packing.py b/src/llamafactory/model/model_utils/packing.py index fe718ebb..9b7359be 100644 --- a/src/llamafactory/model/model_utils/packing.py +++ b/src/llamafactory/model/model_utils/packing.py @@ -239,7 +239,7 @@ def configure_packing(config: "PretrainedConfig") -> None: attn_implementation = getattr(config, "_attn_implementation", None) if attn_implementation != "flash_attention_2": - raise ValueError("Efficient packing only supports for flash_attention_2. Please set config `flash_attn` is fa2" + " " + attn_implementation) + raise ValueError("Efficient packing only supports for flash_attention_2. Please set config `flash_attn` is fa2") logger = get_logger(__name__) diff --git a/src/llamafactory/train/kto/workflow.py b/src/llamafactory/train/kto/workflow.py index f003e157..c79b160b 100644 --- a/src/llamafactory/train/kto/workflow.py +++ b/src/llamafactory/train/kto/workflow.py @@ -24,7 +24,7 @@ def run_kto( ): tokenizer_module = load_tokenizer(model_args) tokenizer = tokenizer_module["tokenizer"] - dataset = get_dataset(model_args, data_args, training_args, finetuning_args, stage="kto", **tokenizer_module) + dataset = get_dataset(model_args, data_args, training_args, stage="kto", **tokenizer_module) model = load_model(tokenizer, model_args, finetuning_args, training_args.do_train) data_collator = KTODataCollatorWithPadding(