[model] add smollm2 and medgemma (#8161)

This commit is contained in:
hoshi-hiyouga 2025-05-26 23:19:58 +08:00 committed by GitHub
parent dc8cca11b3
commit f3a1dc8483
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 54 additions and 15 deletions

View File

@ -186,7 +186,6 @@ async def create_chat_completion_response(
) -> "ChatCompletionResponse":
completion_id = f"chatcmpl-{uuid.uuid4().hex}"
input_messages, system, tools, images, videos, audios = _process_request(request)
repetition_penalty = request.presence_penalty if request.presence_penalty else request.repetition_penalty
responses = await chat_model.achat(
input_messages,
system,
@ -199,8 +198,8 @@ async def create_chat_completion_response(
top_p=request.top_p,
max_new_tokens=request.max_tokens,
num_return_sequences=request.n,
repetition_penalty=request.presence_penalty,
stop=request.stop,
repetition_penalty=repetition_penalty,
)
prompt_length, response_length = 0, 0
@ -250,7 +249,6 @@ async def create_stream_chat_completion_response(
yield _create_stream_chat_completion_chunk(
completion_id=completion_id, model=request.model, delta=ChatCompletionMessage(role=Role.ASSISTANT, content="")
)
repetition_penalty = request.presence_penalty if request.presence_penalty else request.repetition_penalty
async for new_token in chat_model.astream_chat(
input_messages,
system,
@ -262,8 +260,8 @@ async def create_stream_chat_completion_response(
temperature=request.temperature,
top_p=request.top_p,
max_new_tokens=request.max_tokens,
repetition_penalty=request.presence_penalty,
stop=request.stop,
repetition_penalty=repetition_penalty,
):
if len(new_token) != 0:
yield _create_stream_chat_completion_chunk(

View File

@ -103,11 +103,10 @@ class ChatCompletionRequest(BaseModel):
temperature: Optional[float] = None
top_p: Optional[float] = None
n: int = 1
presence_penalty: Optional[float] = None
max_tokens: Optional[int] = None
stop: Optional[Union[str, list[str]]] = None
stream: bool = False
presence_penalty: Optional[float] = None
repetition_penalty: Optional[float] = None
class ChatCompletionResponseChoice(BaseModel):

View File

@ -1686,15 +1686,9 @@ register_template(
register_template(
name="smollm",
format_system=StringFormatter(
slots=["<|im_start|>system\n{{content}}<|im_end|>\n"]
),
format_user=StringFormatter(
slots=["<|im_start|>user\n{{content}}<|im_end|>\n"]
),
format_assistant=StringFormatter(
slots=["<|im_start|>assistant\n{{content}}<|im_end|>\n"]
),
format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
format_assistant=StringFormatter(slots=["{{content}}<|im_end|>\n"]),
format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n"]),
stop_words=["<|im_end|>"],
)

View File

@ -684,6 +684,10 @@ register_model_group(
DownloadSource.DEFAULT: "google/gemma-3-1b-it",
DownloadSource.MODELSCOPE: "LLM-Research/gemma-3-1b-it",
},
"MedGemma-27B-Instruct": {
DownloadSource.DEFAULT: "google/medgemma-27b-text-it",
DownloadSource.MODELSCOPE: "google/medgemma-27b-text-it",
},
},
template="gemma",
)
@ -715,6 +719,14 @@ register_model_group(
DownloadSource.DEFAULT: "google/gemma-3-27b-it",
DownloadSource.MODELSCOPE: "LLM-Research/gemma-3-27b-it",
},
"MedGemma-4B": {
DownloadSource.DEFAULT: "google/medgemma-4b-pt",
DownloadSource.MODELSCOPE: "google/medgemma-4b-pt",
},
"MedGemma-4B-Instruct": {
DownloadSource.DEFAULT: "google/medgemma-4b-it",
DownloadSource.MODELSCOPE: "google/medgemma-4b-it",
},
},
template="gemma3",
multimodal=True,
@ -2721,6 +2733,18 @@ register_model_group(
register_model_group(
models={
"SmolLM-135M": {
DownloadSource.DEFAULT: "HuggingFaceTB/SmolLM-135M",
DownloadSource.MODELSCOPE: "HuggingFaceTB/SmolLM-135M",
},
"SmolLM-360M": {
DownloadSource.DEFAULT: "HuggingFaceTB/SmolLM-360M",
DownloadSource.MODELSCOPE: "HuggingFaceTB/SmolLM-360M",
},
"SmolLM-1.7B": {
DownloadSource.DEFAULT: "HuggingFaceTB/SmolLM-1.7B",
DownloadSource.MODELSCOPE: "HuggingFaceTB/SmolLM-1.7B",
},
"SmolLM-135M-Instruct": {
DownloadSource.DEFAULT: "HuggingFaceTB/SmolLM-135M-Instruct",
DownloadSource.MODELSCOPE: "HuggingFaceTB/SmolLM-135M-Instruct",
@ -2733,6 +2757,30 @@ register_model_group(
DownloadSource.DEFAULT: "HuggingFaceTB/SmolLM-1.7B-Instruct",
DownloadSource.MODELSCOPE: "HuggingFaceTB/SmolLM-1.7B-Instruct",
},
"SmolLM2-135M": {
DownloadSource.DEFAULT: "HuggingFaceTB/SmolLM2-135M",
DownloadSource.MODELSCOPE: "HuggingFaceTB/SmolLM2-135M",
},
"SmolLM2-360M": {
DownloadSource.DEFAULT: "HuggingFaceTB/SmolLM2-360M",
DownloadSource.MODELSCOPE: "HuggingFaceTB/SmolLM2-360M",
},
"SmolLM2-1.7B": {
DownloadSource.DEFAULT: "HuggingFaceTB/SmolLM2-1.7B",
DownloadSource.MODELSCOPE: "HuggingFaceTB/SmolLM2-1.7B",
},
"SmolLM2-135M-Instruct": {
DownloadSource.DEFAULT: "HuggingFaceTB/SmolLM2-135M-Instruct",
DownloadSource.MODELSCOPE: "HuggingFaceTB/SmolLM2-135M-Instruct",
},
"SmolLM2-360M-Instruct": {
DownloadSource.DEFAULT: "HuggingFaceTB/SmolLM2-360M-Instruct",
DownloadSource.MODELSCOPE: "HuggingFaceTB/SmolLM2-360M-Instruct",
},
"SmolLM2-1.7B-Instruct": {
DownloadSource.DEFAULT: "HuggingFaceTB/SmolLM2-1.7B-Instruct",
DownloadSource.MODELSCOPE: "HuggingFaceTB/SmolLM2-1.7B-Instruct",
},
},
template="smollm",
)