[model] add smollm2 and medgemma (#8161)

2025-12-15 19:30:36 +08:00 · 2025-05-26 23:19:58 +08:00
parent dc8cca11b3
commit f3a1dc8483
4 changed files with 54 additions and 15 deletions
--- a/src/llamafactory/api/chat.py
+++ b/src/llamafactory/api/chat.py
@@ -186,7 +186,6 @@ async def create_chat_completion_response(
 ) -> "ChatCompletionResponse":
    completion_id = f"chatcmpl-{uuid.uuid4().hex}"
    input_messages, system, tools, images, videos, audios = _process_request(request)
-    repetition_penalty = request.presence_penalty if request.presence_penalty else request.repetition_penalty
    responses = await chat_model.achat(
        input_messages,
        system,
@@ -199,8 +198,8 @@ async def create_chat_completion_response(
        top_p=request.top_p,
        max_new_tokens=request.max_tokens,
        num_return_sequences=request.n,
+        repetition_penalty=request.presence_penalty,
        stop=request.stop,
-        repetition_penalty=repetition_penalty,
    )

    prompt_length, response_length = 0, 0
@@ -250,7 +249,6 @@ async def create_stream_chat_completion_response(
    yield _create_stream_chat_completion_chunk(
        completion_id=completion_id, model=request.model, delta=ChatCompletionMessage(role=Role.ASSISTANT, content="")
    )
-    repetition_penalty = request.presence_penalty if request.presence_penalty else request.repetition_penalty
    async for new_token in chat_model.astream_chat(
        input_messages,
        system,
@@ -262,8 +260,8 @@ async def create_stream_chat_completion_response(
        temperature=request.temperature,
        top_p=request.top_p,
        max_new_tokens=request.max_tokens,
+        repetition_penalty=request.presence_penalty,
        stop=request.stop,
-        repetition_penalty=repetition_penalty,
    ):
        if len(new_token) != 0:
            yield _create_stream_chat_completion_chunk(
--- a/src/llamafactory/api/protocol.py
+++ b/src/llamafactory/api/protocol.py
@@ -103,11 +103,10 @@ class ChatCompletionRequest(BaseModel):
    temperature: Optional[float] = None
    top_p: Optional[float] = None
    n: int = 1
+    presence_penalty: Optional[float] = None
    max_tokens: Optional[int] = None
    stop: Optional[Union[str, list[str]]] = None
    stream: bool = False
-    presence_penalty: Optional[float] = None
-    repetition_penalty: Optional[float] = None


 class ChatCompletionResponseChoice(BaseModel):
--- a/src/llamafactory/data/template.py
+++ b/src/llamafactory/data/template.py
@@ -1686,15 +1686,9 @@ register_template(

 register_template(
    name="smollm",
-    format_system=StringFormatter(
-        slots=["<|im_start|>system\n{{content}}<|im_end|>\n"]
-    ),
-    format_user=StringFormatter(
-        slots=["<|im_start|>user\n{{content}}<|im_end|>\n"]
-    ),
-    format_assistant=StringFormatter(
-        slots=["<|im_start|>assistant\n{{content}}<|im_end|>\n"]
-    ),
+    format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
+    format_assistant=StringFormatter(slots=["{{content}}<|im_end|>\n"]),
+    format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n"]),
    stop_words=["<|im_end|>"],
 )

--- a/src/llamafactory/extras/constants.py
+++ b/src/llamafactory/extras/constants.py
@@ -684,6 +684,10 @@ register_model_group(
            DownloadSource.DEFAULT: "google/gemma-3-1b-it",
            DownloadSource.MODELSCOPE: "LLM-Research/gemma-3-1b-it",
        },
+        "MedGemma-27B-Instruct": {
+            DownloadSource.DEFAULT: "google/medgemma-27b-text-it",
+            DownloadSource.MODELSCOPE: "google/medgemma-27b-text-it",
+        },
    },
    template="gemma",
 )
@@ -715,6 +719,14 @@ register_model_group(
            DownloadSource.DEFAULT: "google/gemma-3-27b-it",
            DownloadSource.MODELSCOPE: "LLM-Research/gemma-3-27b-it",
        },
+        "MedGemma-4B": {
+            DownloadSource.DEFAULT: "google/medgemma-4b-pt",
+            DownloadSource.MODELSCOPE: "google/medgemma-4b-pt",
+        },
+        "MedGemma-4B-Instruct": {
+            DownloadSource.DEFAULT: "google/medgemma-4b-it",
+            DownloadSource.MODELSCOPE: "google/medgemma-4b-it",
+        },
    },
    template="gemma3",
    multimodal=True,
@@ -2721,6 +2733,18 @@ register_model_group(

 register_model_group(
    models={
+        "SmolLM-135M": {
+            DownloadSource.DEFAULT: "HuggingFaceTB/SmolLM-135M",
+            DownloadSource.MODELSCOPE: "HuggingFaceTB/SmolLM-135M",
+        },
+        "SmolLM-360M": {
+            DownloadSource.DEFAULT: "HuggingFaceTB/SmolLM-360M",
+            DownloadSource.MODELSCOPE: "HuggingFaceTB/SmolLM-360M",
+        },
+        "SmolLM-1.7B": {
+            DownloadSource.DEFAULT: "HuggingFaceTB/SmolLM-1.7B",
+            DownloadSource.MODELSCOPE: "HuggingFaceTB/SmolLM-1.7B",
+        },
        "SmolLM-135M-Instruct": {
            DownloadSource.DEFAULT: "HuggingFaceTB/SmolLM-135M-Instruct",
            DownloadSource.MODELSCOPE: "HuggingFaceTB/SmolLM-135M-Instruct",
@@ -2733,6 +2757,30 @@ register_model_group(
            DownloadSource.DEFAULT: "HuggingFaceTB/SmolLM-1.7B-Instruct",
            DownloadSource.MODELSCOPE: "HuggingFaceTB/SmolLM-1.7B-Instruct",
        },
+        "SmolLM2-135M": {
+            DownloadSource.DEFAULT: "HuggingFaceTB/SmolLM2-135M",
+            DownloadSource.MODELSCOPE: "HuggingFaceTB/SmolLM2-135M",
+        },
+        "SmolLM2-360M": {
+            DownloadSource.DEFAULT: "HuggingFaceTB/SmolLM2-360M",
+            DownloadSource.MODELSCOPE: "HuggingFaceTB/SmolLM2-360M",
+        },
+        "SmolLM2-1.7B": {
+            DownloadSource.DEFAULT: "HuggingFaceTB/SmolLM2-1.7B",
+            DownloadSource.MODELSCOPE: "HuggingFaceTB/SmolLM2-1.7B",
+        },
+        "SmolLM2-135M-Instruct": {
+            DownloadSource.DEFAULT: "HuggingFaceTB/SmolLM2-135M-Instruct",
+            DownloadSource.MODELSCOPE: "HuggingFaceTB/SmolLM2-135M-Instruct",
+        },
+        "SmolLM2-360M-Instruct": {
+            DownloadSource.DEFAULT: "HuggingFaceTB/SmolLM2-360M-Instruct",
+            DownloadSource.MODELSCOPE: "HuggingFaceTB/SmolLM2-360M-Instruct",
+        },
+        "SmolLM2-1.7B-Instruct": {
+            DownloadSource.DEFAULT: "HuggingFaceTB/SmolLM2-1.7B-Instruct",
+            DownloadSource.MODELSCOPE: "HuggingFaceTB/SmolLM2-1.7B-Instruct",
+        },
    },
    template="smollm",
 )