mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-12-16 11:50:35 +08:00
support vllm
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
import json
|
||||
import os
|
||||
from typing import TYPE_CHECKING, Any, Dict, Generator, List, Optional, Sequence, Tuple
|
||||
|
||||
import gradio as gr
|
||||
@@ -7,12 +8,12 @@ from gradio.components import Component # cannot use TYPE_CHECKING here
|
||||
from ..chat import ChatModel
|
||||
from ..data import Role
|
||||
from ..extras.misc import torch_gc
|
||||
from ..hparams import GeneratingArguments
|
||||
from .common import get_save_dir
|
||||
from .locales import ALERTS
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from ..chat import BaseEngine
|
||||
from .manager import Manager
|
||||
|
||||
|
||||
@@ -22,29 +23,19 @@ class WebChatModel(ChatModel):
|
||||
) -> None:
|
||||
self.manager = manager
|
||||
self.demo_mode = demo_mode
|
||||
self.model = None
|
||||
self.tokenizer = None
|
||||
self.generating_args = GeneratingArguments()
|
||||
self.engine: Optional["BaseEngine"] = None
|
||||
|
||||
if not lazy_init: # read arguments from command line
|
||||
super().__init__()
|
||||
|
||||
if demo_mode: # load demo_config.json if exists
|
||||
import json
|
||||
|
||||
try:
|
||||
with open("demo_config.json", "r", encoding="utf-8") as f:
|
||||
args = json.load(f)
|
||||
assert args.get("model_name_or_path", None) and args.get("template", None)
|
||||
super().__init__(args)
|
||||
except AssertionError:
|
||||
print("Please provided model name and template in `demo_config.json`.")
|
||||
except Exception:
|
||||
print("Cannot find `demo_config.json` at current directory.")
|
||||
if demo_mode and os.environ.get("DEMO_MODEL") and os.environ.get("DEMO_TEMPLATE"): # load demo model
|
||||
model_name_or_path = os.environ.get("DEMO_MODEL")
|
||||
template = os.environ.get("DEMO_TEMPLATE")
|
||||
super().__init__(dict(model_name_or_path=model_name_or_path, template=template))
|
||||
|
||||
@property
|
||||
def loaded(self) -> bool:
|
||||
return self.model is not None
|
||||
return self.engine is not None
|
||||
|
||||
def load_model(self, data: Dict[Component, Any]) -> Generator[str, None, None]:
|
||||
get = lambda name: data[self.manager.get_elem_by_name(name)]
|
||||
@@ -98,8 +89,7 @@ class WebChatModel(ChatModel):
|
||||
return
|
||||
|
||||
yield ALERTS["info_unloading"][lang]
|
||||
self.model = None
|
||||
self.tokenizer = None
|
||||
self.engine = None
|
||||
torch_gc()
|
||||
yield ALERTS["info_unloaded"][lang]
|
||||
|
||||
@@ -123,7 +113,7 @@ class WebChatModel(ChatModel):
|
||||
):
|
||||
response += new_text
|
||||
if tools:
|
||||
result = self.template.format_tools.extract(response)
|
||||
result = self.engine.template.format_tools.extract(response)
|
||||
else:
|
||||
result = response
|
||||
|
||||
|
||||
@@ -28,10 +28,9 @@ def create_chat_box(
|
||||
submit_btn = gr.Button(variant="primary")
|
||||
|
||||
with gr.Column(scale=1):
|
||||
gen_kwargs = engine.chatter.generating_args
|
||||
max_new_tokens = gr.Slider(10, 2048, value=gen_kwargs.max_new_tokens, step=1)
|
||||
top_p = gr.Slider(0.01, 1, value=gen_kwargs.top_p, step=0.01)
|
||||
temperature = gr.Slider(0.01, 1.5, value=gen_kwargs.temperature, step=0.01)
|
||||
max_new_tokens = gr.Slider(8, 4096, value=512, step=1)
|
||||
top_p = gr.Slider(0.01, 1.0, value=0.7, step=0.01)
|
||||
temperature = gr.Slider(0.01, 1.5, value=0.95, step=0.01)
|
||||
clear_btn = gr.Button()
|
||||
|
||||
tools.input(check_json_schema, [tools, engine.manager.get_elem_by_name("top.lang")])
|
||||
|
||||
@@ -16,8 +16,8 @@ class Engine:
|
||||
self.demo_mode = demo_mode
|
||||
self.pure_chat = pure_chat
|
||||
self.manager = Manager()
|
||||
self.runner = Runner(self.manager, demo_mode=demo_mode)
|
||||
self.chatter = WebChatModel(manager=self.manager, demo_mode=demo_mode, lazy_init=(not pure_chat))
|
||||
self.runner = Runner(self.manager, demo_mode)
|
||||
self.chatter = WebChatModel(self.manager, demo_mode, lazy_init=(not pure_chat))
|
||||
|
||||
def _form_dict(self, resume_dict: Dict[str, Dict[str, Any]]):
|
||||
return {self.manager.get_elem_by_name(k): gr.update(**v) for k, v in resume_dict.items()}
|
||||
|
||||
Reference in New Issue
Block a user