#!/bin/bash # use `--tokenized_path` in training script to load data CUDA_VISIBLE_DEVICES= llamafactory-cli train \ --stage sft \ --do_train \ --model_name_or_path meta-llama/Llama-2-7b-hf \ --dataset alpaca_gpt4_en,glaive_toolcall \ --dataset_dir ../../data \ --template default \ --finetuning_type lora \ --lora_target q_proj,v_proj \ --output_dir ../../saves/LLaMA2-7B/lora/sft \ --overwrite_cache \ --overwrite_output_dir \ --cutoff_len 1024 \ --preprocessing_num_workers 16 \ --max_samples 3000 \ --tokenized_path ../../saves/datasets/sft