From 3b244a69dce7157d69ed3a047e81bc0d9f5286a1 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Mon, 10 Jun 2024 21:24:15 +0800 Subject: [PATCH] fix #2666 Former-commit-id: c907d816670975daa900898660d3503708b7fc37 --- src/llamafactory/model/adapter.py | 2 +- tests/data/test_supervised.py | 32 ++++++++++++++--------- tests/model/model_utils/test_attention.py | 15 +++++------ tests/model/test_freeze.py | 19 ++++---------- tests/model/test_full.py | 8 +++--- tests/model/test_lora.py | 19 ++++---------- 6 files changed, 41 insertions(+), 54 deletions(-) diff --git a/src/llamafactory/model/adapter.py b/src/llamafactory/model/adapter.py index f4e501a7..34b9eda6 100644 --- a/src/llamafactory/model/adapter.py +++ b/src/llamafactory/model/adapter.py @@ -209,6 +209,7 @@ def _setup_lora_tuning( "lora_alpha": finetuning_args.lora_alpha, "lora_dropout": finetuning_args.lora_dropout, "use_rslora": finetuning_args.use_rslora, + "use_dora": finetuning_args.use_dora, "modules_to_save": finetuning_args.additional_target, } @@ -218,7 +219,6 @@ def _setup_lora_tuning( lora_config = LoraConfig( task_type=TaskType.CAUSAL_LM, inference_mode=False, - use_dora=finetuning_args.use_dora, **peft_kwargs, ) model = get_peft_model(model, lora_config) diff --git a/tests/data/test_supervised.py b/tests/data/test_supervised.py index bb7f71df..63a3453f 100644 --- a/tests/data/test_supervised.py +++ b/tests/data/test_supervised.py @@ -1,4 +1,5 @@ import os +import random import pytest from datasets import load_dataset @@ -8,17 +9,17 @@ from llamafactory.hparams import get_train_args from llamafactory.model import load_tokenizer -TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-LlamaForCausalLM") +TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3") -TRAINING_ARGS = { +TRAIN_ARGS = { "model_name_or_path": TINY_LLAMA, "stage": "sft", "do_train": True, "finetuning_type": "full", - "dataset": "llamafactory/tiny_dataset", + "dataset": "llamafactory/tiny-supervised-dataset", "dataset_dir": "ONLINE", "template": "llama3", - "cutoff_len": 1024, + "cutoff_len": 8192, "overwrite_cache": True, "output_dir": "dummy_dir", "overwrite_output_dir": True, @@ -26,19 +27,24 @@ TRAINING_ARGS = { } -@pytest.mark.parametrize("test_num", [5]) -def test_supervised(test_num: int): - model_args, data_args, training_args, _, _ = get_train_args(TRAINING_ARGS) +@pytest.mark.parametrize("num_samples", [10]) +def test_supervised(num_samples: int): + model_args, data_args, training_args, _, _ = get_train_args(TRAIN_ARGS) tokenizer_module = load_tokenizer(model_args) tokenizer = tokenizer_module["tokenizer"] tokenized_data = get_dataset(model_args, data_args, training_args, stage="sft", **tokenizer_module) - original_data = load_dataset(TRAINING_ARGS["dataset"], split="train") - for test_idx in range(test_num): - decode_result = tokenizer.decode(tokenized_data["input_ids"][test_idx]) + original_data = load_dataset(TRAIN_ARGS["dataset"], split="train") + indexes = random.choices(range(len(original_data)), k=num_samples) + for index in indexes: + decoded_result = tokenizer.decode(tokenized_data["input_ids"][index]) + prompt = original_data[index]["instruction"] + if original_data[index]["input"]: + prompt += "\n" + original_data[index]["input"] + messages = [ - {"role": "user", "content": original_data[test_idx]["instruction"]}, - {"role": "assistant", "content": original_data[test_idx]["output"]}, + {"role": "user", "content": prompt}, + {"role": "assistant", "content": original_data[index]["output"]}, ] templated_result = tokenizer.apply_chat_template(messages, tokenize=False) - assert decode_result == templated_result + assert decoded_result == templated_result diff --git a/tests/model/model_utils/test_attention.py b/tests/model/model_utils/test_attention.py index 4d414289..751adda4 100644 --- a/tests/model/model_utils/test_attention.py +++ b/tests/model/model_utils/test_attention.py @@ -6,7 +6,12 @@ from llamafactory.hparams import get_infer_args from llamafactory.model import load_model, load_tokenizer -TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-LlamaForCausalLM") +TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3") + +INFER_ARGS = { + "model_name_or_path": TINY_LLAMA, + "template": "llama3", +} def test_attention(): @@ -23,13 +28,7 @@ def test_attention(): "fa2": "LlamaFlashAttention2", } for requested_attention in attention_available: - model_args, _, finetuning_args, _ = get_infer_args( - { - "model_name_or_path": TINY_LLAMA, - "template": "llama2", - "flash_attn": requested_attention, - } - ) + model_args, _, finetuning_args, _ = get_infer_args({"flash_attn": requested_attention, **INFER_ARGS}) tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args) for module in model.modules(): diff --git a/tests/model/test_freeze.py b/tests/model/test_freeze.py index c6cdec78..97800696 100644 --- a/tests/model/test_freeze.py +++ b/tests/model/test_freeze.py @@ -6,14 +6,14 @@ from llamafactory.hparams import get_train_args from llamafactory.model import load_model, load_tokenizer -TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-LlamaForCausalLM") +TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3") -TRAINING_ARGS = { +TRAIN_ARGS = { "model_name_or_path": TINY_LLAMA, "stage": "sft", "do_train": True, "finetuning_type": "freeze", - "dataset": "llamafactory/tiny_dataset", + "dataset": "llamafactory/tiny-supervised-dataset", "dataset_dir": "ONLINE", "template": "llama3", "cutoff_len": 1024, @@ -25,12 +25,7 @@ TRAINING_ARGS = { def test_freeze_all_modules(): - model_args, _, _, finetuning_args, _ = get_train_args( - { - "freeze_trainable_layers": 1, - **TRAINING_ARGS, - } - ) + model_args, _, _, finetuning_args, _ = get_train_args({"freeze_trainable_layers": 1, **TRAIN_ARGS}) tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) for name, param in model.named_parameters(): @@ -44,11 +39,7 @@ def test_freeze_all_modules(): def test_freeze_extra_modules(): model_args, _, _, finetuning_args, _ = get_train_args( - { - "freeze_trainable_layers": 1, - "freeze_extra_modules": "embed_tokens,lm_head", - **TRAINING_ARGS, - } + {"freeze_trainable_layers": 1, "freeze_extra_modules": "embed_tokens,lm_head", **TRAIN_ARGS} ) tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) diff --git a/tests/model/test_full.py b/tests/model/test_full.py index ef57a980..6cb78f37 100644 --- a/tests/model/test_full.py +++ b/tests/model/test_full.py @@ -6,14 +6,14 @@ from llamafactory.hparams import get_train_args from llamafactory.model import load_model, load_tokenizer -TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-LlamaForCausalLM") +TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3") -TRAINING_ARGS = { +TRAIN_ARGS = { "model_name_or_path": TINY_LLAMA, "stage": "sft", "do_train": True, "finetuning_type": "full", - "dataset": "llamafactory/tiny_dataset", + "dataset": "llamafactory/tiny-supervised-dataset", "dataset_dir": "ONLINE", "template": "llama3", "cutoff_len": 1024, @@ -25,7 +25,7 @@ TRAINING_ARGS = { def test_full(): - model_args, _, _, finetuning_args, _ = get_train_args(TRAINING_ARGS) + model_args, _, _, finetuning_args, _ = get_train_args(TRAIN_ARGS) tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) for param in model.parameters(): diff --git a/tests/model/test_lora.py b/tests/model/test_lora.py index 1f2c02ae..2e2b89d9 100644 --- a/tests/model/test_lora.py +++ b/tests/model/test_lora.py @@ -6,14 +6,14 @@ from llamafactory.hparams import get_train_args from llamafactory.model import load_model, load_tokenizer -TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-LlamaForCausalLM") +TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3") -TRAINING_ARGS = { +TRAIN_ARGS = { "model_name_or_path": TINY_LLAMA, "stage": "sft", "do_train": True, "finetuning_type": "lora", - "dataset": "llamafactory/tiny_dataset", + "dataset": "llamafactory/tiny-supervised-dataset", "dataset_dir": "ONLINE", "template": "llama3", "cutoff_len": 1024, @@ -25,12 +25,7 @@ TRAINING_ARGS = { def test_lora_all_modules(): - model_args, _, _, finetuning_args, _ = get_train_args( - { - "lora_target": "all", - **TRAINING_ARGS, - } - ) + model_args, _, _, finetuning_args, _ = get_train_args({"lora_target": "all", **TRAIN_ARGS}) tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True) linear_modules = set() @@ -48,11 +43,7 @@ def test_lora_all_modules(): def test_lora_extra_modules(): model_args, _, _, finetuning_args, _ = get_train_args( - { - "lora_target": "all", - "additional_target": "embed_tokens,lm_head", - **TRAINING_ARGS, - } + {"lora_target": "all", "additional_target": "embed_tokens,lm_head", **TRAIN_ARGS} ) tokenizer_module = load_tokenizer(model_args) model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True)