support batch infer in vllm

2026-03-10 05:35:59 +08:00 · 2024-12-04 13:50:00 +00:00
parent dc78355002
commit 1324d158f9
29 changed files with 148 additions and 407 deletions
--- a/examples/inference/llama3.yaml
+++ b/examples/inference/llama3.yaml
@@ -1,2 +1,3 @@
 model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 template: llama3
+infer_backend: huggingface  # choices: [huggingface, vllm]