add pre-training script

This commit is contained in:
hiyouga
2023-05-29 21:37:22 +08:00
parent c0e5df92d6
commit 8ff96509fa
7 changed files with 169 additions and 21 deletions

View File

@@ -18,14 +18,14 @@ from utils import (
def main():
# prepare pretrained model and dataset
# Prepare pretrained model and dataset
model_args, data_args, training_args, finetuning_args = prepare_args(stage="rm")
dataset = prepare_data(model_args, data_args)
model, tokenizer = load_pretrained(model_args, finetuning_args, training_args.do_train, stage="rm")
dataset = preprocess_data(dataset, tokenizer, data_args, training_args, stage="rm")
data_collator = PairwiseDataCollatorForLLaMA(tokenizer, model.pretrained_model)
training_args.remove_unused_columns = False # Important for pairwise dataset
training_args.remove_unused_columns = False # important for pairwise dataset
# Split the dataset
if training_args.do_train: