LLaMA-Factory/examples/extras/nlg_eval/llama3_lora_predict.yaml
hiyouga c1768cfb14 support batch infer in vllm
Former-commit-id: 3ef5ed3b9a44eed2f7e3ff221dfc343d0a97c0b5
2024-12-04 13:50:00 +00:00

29 lines
623 B
YAML

# The batch generation can be SLOW using this config.
# For faster inference, we recommend to use `scripts/vllm_infer.py`.
### model
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
adapter_name_or_path: saves/llama3-8b/lora/sft
### method
stage: sft
do_predict: true
finetuning_type: lora
### dataset
eval_dataset: identity,alpaca_en_demo
template: llama3
cutoff_len: 2048
max_samples: 50
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: saves/llama3-8b/lora/predict
overwrite_output_dir: true
### eval
per_device_eval_batch_size: 1
predict_with_generate: true
ddp_timeout: 180000000