diff --git a/src/llmtuner/api/chat.py b/src/llmtuner/api/chat.py index fa2f0d03..972ee906 100644 --- a/src/llmtuner/api/chat.py +++ b/src/llmtuner/api/chat.py @@ -103,6 +103,7 @@ async def create_chat_completion_response( top_p=request.top_p, max_new_tokens=request.max_tokens, num_return_sequences=request.n, + stop=request.stop ) prompt_length, response_length = 0, 0 @@ -155,6 +156,7 @@ async def create_stream_chat_completion_response( temperature=request.temperature, top_p=request.top_p, max_new_tokens=request.max_tokens, + stop=request.stop ): if len(new_token) != 0: yield _create_stream_chat_completion_chunk(