From cef3a0b2e25558e02573df4d0ce26a00c152e2a1 Mon Sep 17 00:00:00 2001
From: Kingsley <82590017+Kuangdd01@users.noreply.github.com>
Date: Fri, 9 May 2025 21:16:52 +0800
Subject: [PATCH] [scripts] add video params for vllm infer (#7992)

---
 scripts/vllm_infer.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/scripts/vllm_infer.py b/scripts/vllm_infer.py
index 53391eec..29ea8425 100644
--- a/scripts/vllm_infer.py
+++ b/scripts/vllm_infer.py
@@ -51,6 +51,8 @@ def vllm_infer(
     pipeline_parallel_size: int = 1,
     image_max_pixels: int = 768 * 768,
     image_min_pixels: int = 32 * 32,
+    video_fps: float = 2.0,
+    video_maxlen: int = 128,
 ):
     r"""Perform batch generation using vLLM engine, which supports tensor parallelism.
 
@@ -96,7 +98,11 @@ def vllm_infer(
         elif sample["videos"]:
             multi_modal_data = {
                 "video": template_obj.mm_plugin._regularize_videos(
-                    sample["videos"], image_max_pixels=image_max_pixels, image_min_pixels=image_min_pixels
+                    sample["videos"],
+                    image_max_pixels=image_max_pixels,
+                    image_min_pixels=image_min_pixels,
+                    video_fps=video_fps,
+                    video_maxlen=video_maxlen,
                 )["videos"]
             }
         elif sample["audios"]: