mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-12-19 05:10:35 +08:00
add pre-training script
This commit is contained in:
@@ -18,14 +18,14 @@ from utils import (
|
||||
|
||||
def main():
|
||||
|
||||
# prepare pretrained model and dataset
|
||||
# Prepare pretrained model and dataset
|
||||
model_args, data_args, training_args, finetuning_args = prepare_args(stage="rm")
|
||||
dataset = prepare_data(model_args, data_args)
|
||||
model, tokenizer = load_pretrained(model_args, finetuning_args, training_args.do_train, stage="rm")
|
||||
dataset = preprocess_data(dataset, tokenizer, data_args, training_args, stage="rm")
|
||||
data_collator = PairwiseDataCollatorForLLaMA(tokenizer, model.pretrained_model)
|
||||
|
||||
training_args.remove_unused_columns = False # Important for pairwise dataset
|
||||
training_args.remove_unused_columns = False # important for pairwise dataset
|
||||
|
||||
# Split the dataset
|
||||
if training_args.do_train:
|
||||
|
||||
Reference in New Issue
Block a user