diff --git a/src/llamafactory/api/chat.py b/src/llamafactory/api/chat.py index 8228d588..304c8cdc 100644 --- a/src/llamafactory/api/chat.py +++ b/src/llamafactory/api/chat.py @@ -186,7 +186,6 @@ async def create_chat_completion_response( ) -> "ChatCompletionResponse": completion_id = f"chatcmpl-{uuid.uuid4().hex}" input_messages, system, tools, images, videos, audios = _process_request(request) - repetition_penalty = request.presence_penalty if request.presence_penalty else request.repetition_penalty responses = await chat_model.achat( input_messages, system, @@ -199,8 +198,8 @@ async def create_chat_completion_response( top_p=request.top_p, max_new_tokens=request.max_tokens, num_return_sequences=request.n, + repetition_penalty=request.presence_penalty, stop=request.stop, - repetition_penalty=repetition_penalty, ) prompt_length, response_length = 0, 0 @@ -250,7 +249,6 @@ async def create_stream_chat_completion_response( yield _create_stream_chat_completion_chunk( completion_id=completion_id, model=request.model, delta=ChatCompletionMessage(role=Role.ASSISTANT, content="") ) - repetition_penalty = request.presence_penalty if request.presence_penalty else request.repetition_penalty async for new_token in chat_model.astream_chat( input_messages, system, @@ -262,8 +260,8 @@ async def create_stream_chat_completion_response( temperature=request.temperature, top_p=request.top_p, max_new_tokens=request.max_tokens, + repetition_penalty=request.presence_penalty, stop=request.stop, - repetition_penalty=repetition_penalty, ): if len(new_token) != 0: yield _create_stream_chat_completion_chunk( diff --git a/src/llamafactory/api/protocol.py b/src/llamafactory/api/protocol.py index 2c5520ea..889d938e 100644 --- a/src/llamafactory/api/protocol.py +++ b/src/llamafactory/api/protocol.py @@ -103,11 +103,10 @@ class ChatCompletionRequest(BaseModel): temperature: Optional[float] = None top_p: Optional[float] = None n: int = 1 + presence_penalty: Optional[float] = None max_tokens: Optional[int] = None stop: Optional[Union[str, list[str]]] = None stream: bool = False - presence_penalty: Optional[float] = None - repetition_penalty: Optional[float] = None class ChatCompletionResponseChoice(BaseModel): diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py index f88d60bb..c64734ef 100644 --- a/src/llamafactory/data/template.py +++ b/src/llamafactory/data/template.py @@ -1686,15 +1686,9 @@ register_template( register_template( name="smollm", - format_system=StringFormatter( - slots=["<|im_start|>system\n{{content}}<|im_end|>\n"] - ), - format_user=StringFormatter( - slots=["<|im_start|>user\n{{content}}<|im_end|>\n"] - ), - format_assistant=StringFormatter( - slots=["<|im_start|>assistant\n{{content}}<|im_end|>\n"] - ), + format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]), + format_assistant=StringFormatter(slots=["{{content}}<|im_end|>\n"]), + format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n"]), stop_words=["<|im_end|>"], ) diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py index b06da885..53363e93 100644 --- a/src/llamafactory/extras/constants.py +++ b/src/llamafactory/extras/constants.py @@ -684,6 +684,10 @@ register_model_group( DownloadSource.DEFAULT: "google/gemma-3-1b-it", DownloadSource.MODELSCOPE: "LLM-Research/gemma-3-1b-it", }, + "MedGemma-27B-Instruct": { + DownloadSource.DEFAULT: "google/medgemma-27b-text-it", + DownloadSource.MODELSCOPE: "google/medgemma-27b-text-it", + }, }, template="gemma", ) @@ -715,6 +719,14 @@ register_model_group( DownloadSource.DEFAULT: "google/gemma-3-27b-it", DownloadSource.MODELSCOPE: "LLM-Research/gemma-3-27b-it", }, + "MedGemma-4B": { + DownloadSource.DEFAULT: "google/medgemma-4b-pt", + DownloadSource.MODELSCOPE: "google/medgemma-4b-pt", + }, + "MedGemma-4B-Instruct": { + DownloadSource.DEFAULT: "google/medgemma-4b-it", + DownloadSource.MODELSCOPE: "google/medgemma-4b-it", + }, }, template="gemma3", multimodal=True, @@ -2721,6 +2733,18 @@ register_model_group( register_model_group( models={ + "SmolLM-135M": { + DownloadSource.DEFAULT: "HuggingFaceTB/SmolLM-135M", + DownloadSource.MODELSCOPE: "HuggingFaceTB/SmolLM-135M", + }, + "SmolLM-360M": { + DownloadSource.DEFAULT: "HuggingFaceTB/SmolLM-360M", + DownloadSource.MODELSCOPE: "HuggingFaceTB/SmolLM-360M", + }, + "SmolLM-1.7B": { + DownloadSource.DEFAULT: "HuggingFaceTB/SmolLM-1.7B", + DownloadSource.MODELSCOPE: "HuggingFaceTB/SmolLM-1.7B", + }, "SmolLM-135M-Instruct": { DownloadSource.DEFAULT: "HuggingFaceTB/SmolLM-135M-Instruct", DownloadSource.MODELSCOPE: "HuggingFaceTB/SmolLM-135M-Instruct", @@ -2733,6 +2757,30 @@ register_model_group( DownloadSource.DEFAULT: "HuggingFaceTB/SmolLM-1.7B-Instruct", DownloadSource.MODELSCOPE: "HuggingFaceTB/SmolLM-1.7B-Instruct", }, + "SmolLM2-135M": { + DownloadSource.DEFAULT: "HuggingFaceTB/SmolLM2-135M", + DownloadSource.MODELSCOPE: "HuggingFaceTB/SmolLM2-135M", + }, + "SmolLM2-360M": { + DownloadSource.DEFAULT: "HuggingFaceTB/SmolLM2-360M", + DownloadSource.MODELSCOPE: "HuggingFaceTB/SmolLM2-360M", + }, + "SmolLM2-1.7B": { + DownloadSource.DEFAULT: "HuggingFaceTB/SmolLM2-1.7B", + DownloadSource.MODELSCOPE: "HuggingFaceTB/SmolLM2-1.7B", + }, + "SmolLM2-135M-Instruct": { + DownloadSource.DEFAULT: "HuggingFaceTB/SmolLM2-135M-Instruct", + DownloadSource.MODELSCOPE: "HuggingFaceTB/SmolLM2-135M-Instruct", + }, + "SmolLM2-360M-Instruct": { + DownloadSource.DEFAULT: "HuggingFaceTB/SmolLM2-360M-Instruct", + DownloadSource.MODELSCOPE: "HuggingFaceTB/SmolLM2-360M-Instruct", + }, + "SmolLM2-1.7B-Instruct": { + DownloadSource.DEFAULT: "HuggingFaceTB/SmolLM2-1.7B-Instruct", + DownloadSource.MODELSCOPE: "HuggingFaceTB/SmolLM2-1.7B-Instruct", + }, }, template="smollm", )