Merge pull request #4246 from hzhaoy/adapt-vllm-v0.5.0

adapt vllm==0.5.0

Former-commit-id: 1f23f25226f43bb2b2116474b9c6ab2d72e7e260
This commit is contained in:
hoshi-hiyouga 2024-06-13 01:54:02 +08:00 committed by GitHub
commit 7366647b43

View File

@ -1,10 +1,12 @@
import uuid import uuid
from typing import TYPE_CHECKING, AsyncGenerator, AsyncIterator, Dict, List, Optional, Sequence, Union from typing import TYPE_CHECKING, AsyncGenerator, AsyncIterator, Dict, List, Optional, Sequence, Union
from packaging import version
from ..data import get_template_and_fix_tokenizer from ..data import get_template_and_fix_tokenizer
from ..extras.logging import get_logger from ..extras.logging import get_logger
from ..extras.misc import get_device_count from ..extras.misc import get_device_count
from ..extras.packages import is_vllm_available from ..extras.packages import is_vllm_available, _get_package_version
from ..model import load_config, load_tokenizer from ..model import load_config, load_tokenizer
from ..model.model_utils.visual import LlavaMultiModalProjectorForYiVLForVLLM from ..model.model_utils.visual import LlavaMultiModalProjectorForYiVLForVLLM
from .base_engine import BaseEngine, Response from .base_engine import BaseEngine, Response
@ -14,10 +16,10 @@ if is_vllm_available():
from vllm import AsyncEngineArgs, AsyncLLMEngine, RequestOutput, SamplingParams from vllm import AsyncEngineArgs, AsyncLLMEngine, RequestOutput, SamplingParams
from vllm.lora.request import LoRARequest from vllm.lora.request import LoRARequest
try: if _get_package_version("vllm") >= version.parse("0.5.0"):
from vllm.multimodal import MultiModalData # type: ignore (for vllm>=0.5.0) from vllm.multimodal.image import ImagePixelData
except ImportError: else:
from vllm.sequence import MultiModalData # for vllm<0.5.0 from vllm.sequence import MultiModalData
if TYPE_CHECKING: if TYPE_CHECKING:
@ -110,6 +112,9 @@ class VllmEngine(BaseEngine):
if self.processor is not None and image is not None: # add image features if self.processor is not None and image is not None: # add image features
image_processor: "BaseImageProcessor" = getattr(self.processor, "image_processor") image_processor: "BaseImageProcessor" = getattr(self.processor, "image_processor")
pixel_values = image_processor(image, return_tensors="pt")["pixel_values"] pixel_values = image_processor(image, return_tensors="pt")["pixel_values"]
if _get_package_version("vllm") >= version.parse("0.5.0"):
multi_modal_data = ImagePixelData(pixel_values)
else:
multi_modal_data = MultiModalData(type=MultiModalData.Type.IMAGE, data=pixel_values) multi_modal_data = MultiModalData(type=MultiModalData.Type.IMAGE, data=pixel_values)
else: else:
multi_modal_data = None multi_modal_data = None