mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-09-13 16:42:48 +08:00
support multiturn training like FastChat
Former-commit-id: b6faf0207d5b637722a1fd45984d27b3ac095fd4
This commit is contained in:
parent
ca90a1e6d9
commit
3419396945
@ -50,6 +50,9 @@ async def create_item(request: Request):
|
|||||||
json_post_list = json.loads(json_post)
|
json_post_list = json.loads(json_post)
|
||||||
prompt = json_post_list.get("prompt")
|
prompt = json_post_list.get("prompt")
|
||||||
history = json_post_list.get("history")
|
history = json_post_list.get("history")
|
||||||
|
max_new_tokens = json_post_list.get("max_new_tokens", None)
|
||||||
|
top_p = json_post_list.get("top_p", None)
|
||||||
|
temperature = json_post_list.get("temperature", None)
|
||||||
|
|
||||||
# Tokenize the input prompt
|
# Tokenize the input prompt
|
||||||
input_ids = tokenizer([prompt_template.get_prompt(prompt, history)], return_tensors="pt")["input_ids"]
|
input_ids = tokenizer([prompt_template.get_prompt(prompt, history)], return_tensors="pt")["input_ids"]
|
||||||
@ -59,6 +62,9 @@ async def create_item(request: Request):
|
|||||||
gen_kwargs = generating_args.to_dict()
|
gen_kwargs = generating_args.to_dict()
|
||||||
gen_kwargs["input_ids"] = input_ids
|
gen_kwargs["input_ids"] = input_ids
|
||||||
gen_kwargs["logits_processor"] = get_logits_processor()
|
gen_kwargs["logits_processor"] = get_logits_processor()
|
||||||
|
gen_kwargs["max_new_tokens"] = max_new_tokens if max_new_tokens else gen_kwargs["max_new_tokens"]
|
||||||
|
gen_kwargs["top_p"] = top_p if top_p else gen_kwargs["top_p"]
|
||||||
|
gen_kwargs["temperature"] = temperature if temperature else gen_kwargs["temperature"]
|
||||||
|
|
||||||
# Generate response
|
# Generate response
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
|
@ -16,23 +16,27 @@ from transformers import TextIteratorStreamer
|
|||||||
def main():
|
def main():
|
||||||
|
|
||||||
model_args, data_args, finetuning_args, generating_args = prepare_infer_args()
|
model_args, data_args, finetuning_args, generating_args = prepare_infer_args()
|
||||||
model_name = "BLOOM" if "bloom" in model_args.model_name_or_path else "LLaMA"
|
|
||||||
model, tokenizer = load_pretrained(model_args, finetuning_args)
|
model, tokenizer = load_pretrained(model_args, finetuning_args)
|
||||||
|
|
||||||
|
model_name = "BLOOM" if "bloom" in model_args.model_name_or_path else "LLaMA"
|
||||||
prompt_template = Template(data_args.prompt_template)
|
prompt_template = Template(data_args.prompt_template)
|
||||||
streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
|
|
||||||
|
|
||||||
def predict_and_print(query, history: list):
|
def predict_and_print(query, history: list) -> list:
|
||||||
input_ids = tokenizer([prompt_template.get_prompt(query, history)], return_tensors="pt")["input_ids"]
|
input_ids = tokenizer([prompt_template.get_prompt(query, history)], return_tensors="pt")["input_ids"]
|
||||||
input_ids = input_ids.to(model.device)
|
input_ids = input_ids.to(model.device)
|
||||||
|
|
||||||
|
streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
|
||||||
|
|
||||||
gen_kwargs = generating_args.to_dict()
|
gen_kwargs = generating_args.to_dict()
|
||||||
gen_kwargs["input_ids"] = input_ids
|
gen_kwargs["input_ids"] = input_ids
|
||||||
gen_kwargs["logits_processor"] = get_logits_processor()
|
gen_kwargs["logits_processor"] = get_logits_processor()
|
||||||
gen_kwargs["streamer"] = streamer
|
gen_kwargs["streamer"] = streamer
|
||||||
|
|
||||||
thread = Thread(target=model.generate, kwargs=gen_kwargs)
|
thread = Thread(target=model.generate, kwargs=gen_kwargs)
|
||||||
thread.start()
|
thread.start()
|
||||||
|
|
||||||
|
print("{}: ".format(model_name), end="", flush=True)
|
||||||
response = ""
|
response = ""
|
||||||
print("{}: ".format(model_name), end="")
|
|
||||||
for new_text in streamer:
|
for new_text in streamer:
|
||||||
print(new_text, end="", flush=True)
|
print(new_text, end="", flush=True)
|
||||||
response += new_text
|
response += new_text
|
||||||
|
@ -421,18 +421,17 @@ def preprocess_data(
|
|||||||
prompt_template = Template(data_args.prompt_template)
|
prompt_template = Template(data_args.prompt_template)
|
||||||
|
|
||||||
# support question with a single answer or multiple answers
|
# support question with a single answer or multiple answers
|
||||||
def format_example(examples):
|
def get_dialog(examples):
|
||||||
for i in range(len(examples["prompt"])):
|
for i in range(len(examples["prompt"])):
|
||||||
if examples["prompt"][i] and examples["response"][i]:
|
if examples["prompt"][i] and examples["response"][i]:
|
||||||
query, answer = examples["prompt"][i], examples["response"][i]
|
query, answer = examples["prompt"][i], examples["response"][i]
|
||||||
if examples["query"][i]:
|
query = query + "\n" + examples["query"][i] if examples["query"][i] else query
|
||||||
query += "\n" + examples["query"][i]
|
dialog = prompt_template.get_dialog(query, answer, examples["history"][i], prefix)
|
||||||
prompt = prompt_template.get_prompt(query, examples["history"][i], prefix)
|
yield dialog
|
||||||
yield prompt, answer
|
|
||||||
|
|
||||||
def preprocess_pretrain_dataset(examples):
|
def preprocess_pretrain_dataset(examples):
|
||||||
# build grouped texts with format `<s> X1 X2 X3 ...` (without </s>)
|
# build grouped texts with format `X1 X2 X3 ...` (without [BOS] and [EOS])
|
||||||
text_ids = tokenizer(examples["prompt"])["input_ids"]
|
text_ids = tokenizer(examples["prompt"], add_special_tokens=False)["input_ids"]
|
||||||
concatenated_ids = list(chain(*text_ids))
|
concatenated_ids = list(chain(*text_ids))
|
||||||
total_length = len(concatenated_ids)
|
total_length = len(concatenated_ids)
|
||||||
# we drop the small remainder, and if the total_length < block_size, we exclude this batch
|
# we drop the small remainder, and if the total_length < block_size, we exclude this batch
|
||||||
@ -446,28 +445,29 @@ def preprocess_data(
|
|||||||
}
|
}
|
||||||
|
|
||||||
def preprocess_supervised_dataset(examples):
|
def preprocess_supervised_dataset(examples):
|
||||||
# build inputs with format `X <s> Y </s>` and labels with format `<ignore> ... <ignore> <s> Y </s>`
|
# build inputs with format `X [BOS] Y [EOS]` and labels with format `[IGNORE] ... [IGNORE] Y [EOS]`
|
||||||
|
# for input with history, we build multiple input-label pairs just like:
|
||||||
|
# https://github.com/lm-sys/FastChat/blob/f17c092f64840fa6354ed52789dccb2daa793d0b/fastchat/train/train.py#L112
|
||||||
model_inputs = {"input_ids": [], "labels": []}
|
model_inputs = {"input_ids": [], "labels": []}
|
||||||
for prompt, answer in format_example(examples):
|
for dialog in get_dialog(examples):
|
||||||
source_ids = tokenizer.encode(text=prompt, add_special_tokens=False)
|
input_ids, labels = [], []
|
||||||
target_ids = tokenizer.encode(text=answer, add_special_tokens=False)
|
|
||||||
|
|
||||||
if len(source_ids) > data_args.max_source_length - 1: # bos token
|
for i in range(len(dialog) // 2):
|
||||||
source_ids = source_ids[:data_args.max_source_length - 1]
|
source_ids = tokenizer.encode(text=dialog[2*i], add_special_tokens=False)
|
||||||
if len(target_ids) > data_args.max_target_length - 1: # eos token
|
target_ids = tokenizer.encode(text=dialog[2*i+1], add_special_tokens=False)
|
||||||
target_ids = target_ids[:data_args.max_target_length - 1]
|
input_ids += source_ids + [tokenizer.bos_token_id] + target_ids + [tokenizer.eos_token_id]
|
||||||
|
labels += [IGNORE_INDEX] * (len(source_ids) + 1) + target_ids + [tokenizer.eos_token_id]
|
||||||
|
|
||||||
input_ids = source_ids + [tokenizer.bos_token_id] + target_ids + [tokenizer.eos_token_id]
|
model_inputs["input_ids"].append(input_ids[:data_args.max_source_length + data_args.max_target_length])
|
||||||
labels = [IGNORE_INDEX] * len(source_ids) + [tokenizer.bos_token_id] + target_ids + [tokenizer.eos_token_id]
|
model_inputs["labels"].append(labels[:data_args.max_source_length + data_args.max_target_length])
|
||||||
|
|
||||||
model_inputs["input_ids"].append(input_ids)
|
|
||||||
model_inputs["labels"].append(labels)
|
|
||||||
return model_inputs
|
return model_inputs
|
||||||
|
|
||||||
def preprocess_unsupervised_dataset(examples):
|
def preprocess_unsupervised_dataset(examples):
|
||||||
# build inputs with format `X <s>` and labels with format `Y <s>`
|
# build inputs with format `X [BOS]` and labels with format `Y [BOS]`
|
||||||
model_inputs = {"input_ids": [], "labels": []}
|
model_inputs = {"input_ids": [], "labels": []}
|
||||||
for prompt, answer in format_example(examples):
|
for dialog in get_dialog(examples):
|
||||||
|
prompt, answer = "".join(dialog[:-1]), dialog[-1]
|
||||||
|
|
||||||
source_ids = tokenizer.encode(text=prompt, add_special_tokens=False)
|
source_ids = tokenizer.encode(text=prompt, add_special_tokens=False)
|
||||||
target_ids = tokenizer.encode(text=answer, add_special_tokens=False)
|
target_ids = tokenizer.encode(text=answer, add_special_tokens=False)
|
||||||
|
|
||||||
@ -484,9 +484,11 @@ def preprocess_data(
|
|||||||
return model_inputs
|
return model_inputs
|
||||||
|
|
||||||
def preprocess_pairwise_dataset(examples):
|
def preprocess_pairwise_dataset(examples):
|
||||||
# build input pairs with format `X <s> Y1 </s>` and `X <s> Y2 </s>`
|
# build input pairs with format `X [BOS] Y1 [EOS]` and `X [BOS] Y2 [EOS]`
|
||||||
model_inputs = {"accept_ids": [], "reject_ids": []}
|
model_inputs = {"accept_ids": [], "reject_ids": []}
|
||||||
for prompt, answer in format_example(examples):
|
for dialog in get_dialog(examples):
|
||||||
|
prompt, answer = "".join(dialog[:-1]), dialog[-1]
|
||||||
|
|
||||||
source_ids = tokenizer.encode(text=prompt, add_special_tokens=False)
|
source_ids = tokenizer.encode(text=prompt, add_special_tokens=False)
|
||||||
accept_ids = tokenizer.encode(text=answer[0], add_special_tokens=False)
|
accept_ids = tokenizer.encode(text=answer[0], add_special_tokens=False)
|
||||||
reject_ids = tokenizer.encode(text=answer[1], add_special_tokens=False)
|
reject_ids = tokenizer.encode(text=answer[1], add_special_tokens=False)
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
from typing import Optional
|
from typing import List, Optional
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
|
||||||
@ -8,89 +8,131 @@ class Template:
|
|||||||
name: str
|
name: str
|
||||||
|
|
||||||
def __post_init__(self):
|
def __post_init__(self):
|
||||||
assert hasattr(self, "_format_{}".format(self.name)), "Template {} does not exist.".format(self.name)
|
|
||||||
|
if self.name == "vanilla":
|
||||||
|
r"""
|
||||||
|
Supports language model inference without histories.
|
||||||
|
"""
|
||||||
|
self._register_template(
|
||||||
|
prefix="",
|
||||||
|
prompt="",
|
||||||
|
sep="",
|
||||||
|
use_history=False
|
||||||
|
)
|
||||||
|
|
||||||
|
elif self.name == "alpaca":
|
||||||
|
r"""
|
||||||
|
Supports: https://huggingface.co/tatsu-lab/alpaca-7b-wdiff
|
||||||
|
https://github.com/ymcui/Chinese-LLaMA-Alpaca
|
||||||
|
"""
|
||||||
|
self._register_template(
|
||||||
|
prefix="Below is an instruction that describes a task. "
|
||||||
|
"Write a response that appropriately completes the request.\n\n",
|
||||||
|
prompt="### Instruction:\n{query}\n\n### Response:\n",
|
||||||
|
sep="\n\n",
|
||||||
|
use_history=True
|
||||||
|
)
|
||||||
|
|
||||||
|
elif self.name == "vicuna":
|
||||||
|
r"""
|
||||||
|
Supports: https://huggingface.co/lmsys/vicuna-7b-delta-v1.1
|
||||||
|
https://huggingface.co/lmsys/vicuna-13b-delta-v1.1
|
||||||
|
"""
|
||||||
|
self._register_template(
|
||||||
|
prefix="A chat between a curious user and an artificial intelligence assistant. "
|
||||||
|
"The assistant gives helpful, detailed, and polite answers to the user's questions.",
|
||||||
|
prompt="USER: {query} ASSISTANT: ",
|
||||||
|
sep="</s>",
|
||||||
|
use_history=True
|
||||||
|
)
|
||||||
|
|
||||||
|
elif self.name == "belle":
|
||||||
|
r"""
|
||||||
|
Supports: https://huggingface.co/BelleGroup/BELLE-LLaMA-EXT-13B
|
||||||
|
"""
|
||||||
|
self._register_template(
|
||||||
|
prefix="",
|
||||||
|
prompt="Human: {query}\n\nBelle: ",
|
||||||
|
sep="\n\n",
|
||||||
|
use_history=True
|
||||||
|
)
|
||||||
|
|
||||||
|
elif self.name == "linly":
|
||||||
|
r"""
|
||||||
|
Supports: https://github.com/CVI-SZU/Linly
|
||||||
|
"""
|
||||||
|
self._register_template(
|
||||||
|
prefix="",
|
||||||
|
prompt="User: {query}\nBot: ",
|
||||||
|
sep="\n",
|
||||||
|
use_history=True
|
||||||
|
)
|
||||||
|
|
||||||
|
elif self.name == "billa":
|
||||||
|
r"""
|
||||||
|
Supports: https://github.com/Neutralzz/BiLLa
|
||||||
|
"""
|
||||||
|
self._register_template(
|
||||||
|
prefix="",
|
||||||
|
prompt="Human: {query}\nAssistant: ",
|
||||||
|
sep="\n",
|
||||||
|
use_history=True
|
||||||
|
)
|
||||||
|
|
||||||
|
elif self.name == "ziya":
|
||||||
|
r"""
|
||||||
|
Supports: https://huggingface.co/IDEA-CCNL/Ziya-LLaMA-13B-v1
|
||||||
|
"""
|
||||||
|
self._register_template(
|
||||||
|
prefix="",
|
||||||
|
prompt="<human>:{query}\n<bot>:",
|
||||||
|
sep="\n",
|
||||||
|
use_history=True
|
||||||
|
)
|
||||||
|
|
||||||
|
elif self.name == "aquila":
|
||||||
|
r"""
|
||||||
|
Supports: https://huggingface.co/qhduan/aquilachat-7b
|
||||||
|
"""
|
||||||
|
self._register_template(
|
||||||
|
prefix="A chat between a curious human and an artificial intelligence assistant. "
|
||||||
|
"The assistant gives helpful, detailed, and polite answers to the human's questions.",
|
||||||
|
prompt="Human: {query}\nAssistant: ",
|
||||||
|
sep="###",
|
||||||
|
use_history=True
|
||||||
|
)
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise ValueError("Template {} does not exist.".format(self.name))
|
||||||
|
|
||||||
def get_prompt(self, query: str, history: Optional[list] = None, prefix: Optional[str] = "") -> str:
|
def get_prompt(self, query: str, history: Optional[list] = None, prefix: Optional[str] = "") -> str:
|
||||||
return getattr(self, "_format_{}".format(self.name))(query, history, prefix)
|
|
||||||
|
|
||||||
def _format_vanilla(self, query: str, history: Optional[list], prefix: Optional[str] = "") -> str:
|
|
||||||
r"""
|
r"""
|
||||||
Use for language model inference without histories.
|
Returns a string containing prompt without response.
|
||||||
"""
|
"""
|
||||||
return query
|
return "".join(self._format_example(query, history, prefix))
|
||||||
|
|
||||||
def _format_alpaca(self, query: str, history: Optional[list], prefix: Optional[str] = "") -> str:
|
def get_dialog(self, query: str, resp: str, history: Optional[list] = None, prefix: Optional[str] = "") -> List[str]:
|
||||||
r"""
|
r"""
|
||||||
Supports: https://huggingface.co/tatsu-lab/alpaca-7b-wdiff
|
Returns a list containing 2 * n elements where the 2k-th is a query and the (2k+1)-th is a response.
|
||||||
https://github.com/ymcui/Chinese-LLaMA-Alpaca
|
|
||||||
"""
|
"""
|
||||||
if prefix:
|
return self._format_example(query, history, prefix) + [resp]
|
||||||
prompt = prefix
|
|
||||||
else:
|
|
||||||
prompt = "Below is an instruction that describes a task. "
|
|
||||||
prompt += "Write a response that appropriately completes the request.\n\n"
|
|
||||||
if history:
|
|
||||||
for old_query, response in history:
|
|
||||||
prompt += "### Instruction:\n{}\n\n### Response:\n{}\n\n".format(old_query, response)
|
|
||||||
prompt += "### Instruction:\n{}\n\n### Response:\n".format(query)
|
|
||||||
return prompt
|
|
||||||
|
|
||||||
def _format_vicuna(self, query: str, history: Optional[list], prefix: Optional[str] = "") -> str:
|
def _register_template(self, prefix: str, prompt: str, sep: str, use_history: Optional[bool] = True) -> None:
|
||||||
r"""
|
self.prefix = prefix
|
||||||
Supports: https://huggingface.co/lmsys/vicuna-7b-delta-v1.1
|
self.prompt = prompt
|
||||||
https://huggingface.co/lmsys/vicuna-13b-delta-v1.1
|
self.sep = sep
|
||||||
"""
|
self.use_history = use_history
|
||||||
if prefix:
|
|
||||||
prompt = prefix
|
|
||||||
else:
|
|
||||||
prompt = "A chat between a curious user and an artificial intelligence assistant. "
|
|
||||||
prompt += "The assistant gives helpful, detailed, and polite answers to the user's questions. "
|
|
||||||
if history:
|
|
||||||
for old_query, response in history:
|
|
||||||
prompt += "USER: {} ASSISTANT: {}</s>".format(old_query, response)
|
|
||||||
prompt += "USER: {} ASSISTANT: ".format(query)
|
|
||||||
return prompt
|
|
||||||
|
|
||||||
def _format_belle(self, query: str, history: Optional[list], prefix: Optional[str] = "") -> str:
|
def _format_example(self, query: str, history: Optional[list] = None, prefix: Optional[str] = "") -> List[str]:
|
||||||
r"""
|
prefix = prefix if prefix else self.prefix
|
||||||
Supports: https://huggingface.co/BelleGroup/BELLE-LLaMA-EXT-13B
|
history = history if (history and self.use_history) else []
|
||||||
"""
|
history = history + [(query, "<dummy>")]
|
||||||
prompt = prefix
|
convs = []
|
||||||
if history:
|
for turn_idx, (user_query, bot_resp) in enumerate(history):
|
||||||
for old_query, response in history:
|
if turn_idx == 0:
|
||||||
prompt += "Human: {}\n\nBelle: {}\n\n".format(old_query, response)
|
convs.append(prefix + self.prompt.format(query=user_query))
|
||||||
prompt += "Human: {}\n\nBelle: ".format(query)
|
convs.append(bot_resp)
|
||||||
return prompt
|
else:
|
||||||
|
convs.append(self.sep + self.prompt.format(query=user_query))
|
||||||
def _format_linly(self, query: str, history: Optional[list], prefix: Optional[str] = "") -> str:
|
convs.append(bot_resp)
|
||||||
r"""
|
return convs[:-1] # drop last
|
||||||
Supports: https://github.com/CVI-SZU/Linly
|
|
||||||
"""
|
|
||||||
prompt = prefix
|
|
||||||
if history:
|
|
||||||
for old_query, response in history:
|
|
||||||
prompt += "User: {}\nBot: {}\n".format(old_query, response)
|
|
||||||
prompt += "User: {}\nBot: ".format(query)
|
|
||||||
return prompt
|
|
||||||
|
|
||||||
def _format_billa(self, query: str, history: Optional[list], prefix: Optional[str] = "") -> str:
|
|
||||||
r"""
|
|
||||||
Supports: https://github.com/Neutralzz/BiLLa
|
|
||||||
"""
|
|
||||||
prompt = prefix
|
|
||||||
if history:
|
|
||||||
for old_query, response in history:
|
|
||||||
prompt += "Human: {}\nAssistant: {}\n".format(old_query, response)
|
|
||||||
prompt += "Human: {}\nAssistant: ".format(query)
|
|
||||||
return prompt
|
|
||||||
|
|
||||||
def _format_ziya(self, query: str, history: Optional[list], prefix: Optional[str] = "") -> str:
|
|
||||||
r"""
|
|
||||||
Supports: https://huggingface.co/IDEA-CCNL/Ziya-LLaMA-13B-v1
|
|
||||||
"""
|
|
||||||
prompt = prefix
|
|
||||||
if history:
|
|
||||||
for old_query, response in history:
|
|
||||||
prompt += "<human>:{}\n<bot>:{}\n".format(old_query, response)
|
|
||||||
prompt += "<human>:{}\n<bot>:".format(query)
|
|
||||||
return prompt
|
|
||||||
|
@ -25,7 +25,6 @@ model_args, data_args, finetuning_args, generating_args = prepare_infer_args()
|
|||||||
model, tokenizer = load_pretrained(model_args, finetuning_args)
|
model, tokenizer = load_pretrained(model_args, finetuning_args)
|
||||||
|
|
||||||
prompt_template = Template(data_args.prompt_template)
|
prompt_template = Template(data_args.prompt_template)
|
||||||
streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
|
|
||||||
|
|
||||||
|
|
||||||
def postprocess(self, y):
|
def postprocess(self, y):
|
||||||
@ -82,9 +81,12 @@ def predict(query, chatbot, max_length, top_p, temperature, history):
|
|||||||
|
|
||||||
input_ids = tokenizer([prompt_template.get_prompt(query, history)], return_tensors="pt")["input_ids"]
|
input_ids = tokenizer([prompt_template.get_prompt(query, history)], return_tensors="pt")["input_ids"]
|
||||||
input_ids = input_ids.to(model.device)
|
input_ids = input_ids.to(model.device)
|
||||||
|
|
||||||
|
streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
|
||||||
|
|
||||||
gen_kwargs = {
|
gen_kwargs = {
|
||||||
"input_ids": input_ids,
|
"input_ids": input_ids,
|
||||||
"do_sample": True,
|
"do_sample": generating_args.do_sample,
|
||||||
"top_p": top_p,
|
"top_p": top_p,
|
||||||
"temperature": temperature,
|
"temperature": temperature,
|
||||||
"num_beams": generating_args.num_beams,
|
"num_beams": generating_args.num_beams,
|
||||||
@ -93,8 +95,10 @@ def predict(query, chatbot, max_length, top_p, temperature, history):
|
|||||||
"logits_processor": get_logits_processor(),
|
"logits_processor": get_logits_processor(),
|
||||||
"streamer": streamer
|
"streamer": streamer
|
||||||
}
|
}
|
||||||
|
|
||||||
thread = Thread(target=model.generate, kwargs=gen_kwargs)
|
thread = Thread(target=model.generate, kwargs=gen_kwargs)
|
||||||
thread.start()
|
thread.start()
|
||||||
|
|
||||||
response = ""
|
response = ""
|
||||||
for new_text in streamer:
|
for new_text in streamer:
|
||||||
response += new_text
|
response += new_text
|
||||||
|
Loading…
x
Reference in New Issue
Block a user