mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-08-02 11:42:49 +08:00
parent
af9ef037dd
commit
8524dcaa4a
3
.gitignore
vendored
3
.gitignore
vendored
@ -172,6 +172,3 @@ saves/
|
|||||||
output/
|
output/
|
||||||
wandb/
|
wandb/
|
||||||
generated_predictions.jsonl
|
generated_predictions.jsonl
|
||||||
|
|
||||||
# unittest
|
|
||||||
dummy_dir/
|
|
||||||
|
@ -15,6 +15,8 @@
|
|||||||
from dataclasses import asdict, dataclass, field
|
from dataclasses import asdict, dataclass, field
|
||||||
from typing import Any, Dict, Optional
|
from typing import Any, Dict, Optional
|
||||||
|
|
||||||
|
from transformers import GenerationConfig
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class GeneratingArguments:
|
class GeneratingArguments:
|
||||||
@ -69,10 +71,17 @@ class GeneratingArguments:
|
|||||||
metadata={"help": "Whether or not to remove special tokens in the decoding."},
|
metadata={"help": "Whether or not to remove special tokens in the decoding."},
|
||||||
)
|
)
|
||||||
|
|
||||||
def to_dict(self) -> Dict[str, Any]:
|
def to_dict(self, obey_generation_config: bool = False) -> Dict[str, Any]:
|
||||||
args = asdict(self)
|
args = asdict(self)
|
||||||
if args.get("max_new_tokens", -1) > 0:
|
if args.get("max_new_tokens", -1) > 0:
|
||||||
args.pop("max_length", None)
|
args.pop("max_length", None)
|
||||||
else:
|
else:
|
||||||
args.pop("max_new_tokens", None)
|
args.pop("max_new_tokens", None)
|
||||||
|
|
||||||
|
if obey_generation_config:
|
||||||
|
generation_config = GenerationConfig()
|
||||||
|
for key in list(args.keys()):
|
||||||
|
if not hasattr(generation_config, key):
|
||||||
|
args.pop(key)
|
||||||
|
|
||||||
return args
|
return args
|
||||||
|
@ -151,7 +151,7 @@ class CustomSeq2SeqTrainer(Seq2SeqTrainer):
|
|||||||
return padded_tensor.contiguous() # in contiguous memory
|
return padded_tensor.contiguous() # in contiguous memory
|
||||||
|
|
||||||
def save_predictions(
|
def save_predictions(
|
||||||
self, dataset: "Dataset", predict_results: "PredictionOutput", gen_kwargs: Dict[str, Any]
|
self, dataset: "Dataset", predict_results: "PredictionOutput", skip_special_tokens: bool = True
|
||||||
) -> None:
|
) -> None:
|
||||||
r"""
|
r"""
|
||||||
Saves model predictions to `output_dir`.
|
Saves model predictions to `output_dir`.
|
||||||
@ -179,12 +179,8 @@ class CustomSeq2SeqTrainer(Seq2SeqTrainer):
|
|||||||
preds[i] = np.concatenate((preds[i][pad_len[0] :], preds[i][: pad_len[0]]), axis=-1)
|
preds[i] = np.concatenate((preds[i][pad_len[0] :], preds[i][: pad_len[0]]), axis=-1)
|
||||||
|
|
||||||
decoded_inputs = self.processing_class.batch_decode(dataset["input_ids"], skip_special_tokens=False)
|
decoded_inputs = self.processing_class.batch_decode(dataset["input_ids"], skip_special_tokens=False)
|
||||||
decoded_preds = self.processing_class.batch_decode(
|
decoded_preds = self.processing_class.batch_decode(preds, skip_special_tokens=skip_special_tokens)
|
||||||
preds, skip_special_tokens=gen_kwargs["skip_special_tokens"]
|
decoded_labels = self.processing_class.batch_decode(labels, skip_special_tokens=skip_special_tokens)
|
||||||
)
|
|
||||||
decoded_labels = self.processing_class.batch_decode(
|
|
||||||
labels, skip_special_tokens=gen_kwargs["skip_special_tokens"]
|
|
||||||
)
|
|
||||||
|
|
||||||
with open(output_prediction_file, "w", encoding="utf-8") as f:
|
with open(output_prediction_file, "w", encoding="utf-8") as f:
|
||||||
for text, pred, label in zip(decoded_inputs, decoded_preds, decoded_labels):
|
for text, pred, label in zip(decoded_inputs, decoded_preds, decoded_labels):
|
||||||
|
@ -91,7 +91,7 @@ def run_sft(
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Keyword arguments for `model.generate`
|
# Keyword arguments for `model.generate`
|
||||||
gen_kwargs = generating_args.to_dict()
|
gen_kwargs = generating_args.to_dict(obey_generation_config=True)
|
||||||
gen_kwargs["eos_token_id"] = [tokenizer.eos_token_id] + tokenizer.additional_special_tokens_ids
|
gen_kwargs["eos_token_id"] = [tokenizer.eos_token_id] + tokenizer.additional_special_tokens_ids
|
||||||
gen_kwargs["pad_token_id"] = tokenizer.pad_token_id
|
gen_kwargs["pad_token_id"] = tokenizer.pad_token_id
|
||||||
gen_kwargs["logits_processor"] = get_logits_processor()
|
gen_kwargs["logits_processor"] = get_logits_processor()
|
||||||
@ -130,7 +130,7 @@ def run_sft(
|
|||||||
predict_results.metrics.pop("predict_loss", None)
|
predict_results.metrics.pop("predict_loss", None)
|
||||||
trainer.log_metrics("predict", predict_results.metrics)
|
trainer.log_metrics("predict", predict_results.metrics)
|
||||||
trainer.save_metrics("predict", predict_results.metrics)
|
trainer.save_metrics("predict", predict_results.metrics)
|
||||||
trainer.save_predictions(dataset_module["eval_dataset"], predict_results, gen_kwargs)
|
trainer.save_predictions(dataset_module["eval_dataset"], predict_results, generating_args.skip_special_tokens)
|
||||||
|
|
||||||
# Create model card
|
# Create model card
|
||||||
create_modelcard_and_push(trainer, model_args, data_args, training_args, finetuning_args)
|
create_modelcard_and_push(trainer, model_args, data_args, training_args, finetuning_args)
|
||||||
|
@ -60,12 +60,12 @@ OS_NAME = os.getenv("OS_NAME", "")
|
|||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_run_exp(stage: str, dataset: str):
|
def test_run_exp(stage: str, dataset: str):
|
||||||
output_dir = os.path.join("output", f"dummy_dir/train_{stage}")
|
output_dir = os.path.join("output", f"train_{stage}")
|
||||||
run_exp({"stage": stage, "dataset": dataset, "output_dir": output_dir, **TRAIN_ARGS})
|
run_exp({"stage": stage, "dataset": dataset, "output_dir": output_dir, **TRAIN_ARGS})
|
||||||
assert os.path.exists(output_dir)
|
assert os.path.exists(output_dir)
|
||||||
|
|
||||||
|
|
||||||
def test_export():
|
def test_export():
|
||||||
export_dir = os.path.join("output", "dummy_dir/llama3_export")
|
export_dir = os.path.join("output", "llama3_export")
|
||||||
export_model({"export_dir": export_dir, **INFER_ARGS})
|
export_model({"export_dir": export_dir, **INFER_ARGS})
|
||||||
assert os.path.exists(export_dir)
|
assert os.path.exists(export_dir)
|
||||||
|
@ -58,7 +58,11 @@ class DataCollatorWithVerbose(DataCollatorWithPadding):
|
|||||||
@pytest.mark.parametrize("disable_shuffling", [False, True])
|
@pytest.mark.parametrize("disable_shuffling", [False, True])
|
||||||
def test_shuffle(disable_shuffling: bool):
|
def test_shuffle(disable_shuffling: bool):
|
||||||
model_args, data_args, training_args, finetuning_args, _ = get_train_args(
|
model_args, data_args, training_args, finetuning_args, _ = get_train_args(
|
||||||
{"output_dir": f"dummy_dir/{disable_shuffling}", "disable_shuffling": disable_shuffling, **TRAIN_ARGS}
|
{
|
||||||
|
"output_dir": os.path.join("output", f"shuffle{str(disable_shuffling).lower()}"),
|
||||||
|
"disable_shuffling": disable_shuffling,
|
||||||
|
**TRAIN_ARGS,
|
||||||
|
}
|
||||||
)
|
)
|
||||||
tokenizer_module = load_tokenizer(model_args)
|
tokenizer_module = load_tokenizer(model_args)
|
||||||
tokenizer = tokenizer_module["tokenizer"]
|
tokenizer = tokenizer_module["tokenizer"]
|
||||||
|
Loading…
x
Reference in New Issue
Block a user