mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-08-23 06:12:50 +08:00
fix API
Former-commit-id: 29af67b015ff92e5dd9bf2985ce7723dc036d989
This commit is contained in:
parent
af37ac077c
commit
18656a6316
@ -10,7 +10,7 @@ rouge-chinese
|
||||
nltk
|
||||
gradio>=3.36.0
|
||||
uvicorn
|
||||
pydantic
|
||||
fastapi
|
||||
pydantic==1.10.11
|
||||
fastapi==0.95.1
|
||||
sse-starlette
|
||||
matplotlib
|
||||
|
@ -4,4 +4,4 @@ from llmtuner.tuner import get_train_args, get_infer_args, load_model_and_tokeni
|
||||
from llmtuner.webui import create_ui
|
||||
|
||||
|
||||
__version__ = "0.1.0"
|
||||
__version__ = "0.1.1"
|
||||
|
@ -1,4 +1,3 @@
|
||||
import json
|
||||
import uvicorn
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
@ -96,7 +95,7 @@ def create_app():
|
||||
finish_reason=None
|
||||
)
|
||||
chunk = ChatCompletionStreamResponse(model=request.model, choices=[choice_data])
|
||||
yield json.dumps(chunk, ensure_ascii=False)
|
||||
yield chunk.json(exclude_unset=True, ensure_ascii=False)
|
||||
|
||||
for new_text in chat_model.stream_chat(
|
||||
query, history, prefix, temperature=request.temperature, top_p=request.top_p, max_new_tokens=request.max_tokens
|
||||
@ -110,7 +109,7 @@ def create_app():
|
||||
finish_reason=None
|
||||
)
|
||||
chunk = ChatCompletionStreamResponse(model=request.model, choices=[choice_data])
|
||||
yield json.dumps(chunk, ensure_ascii=False)
|
||||
yield chunk.json(exclude_unset=True, ensure_ascii=False)
|
||||
|
||||
choice_data = ChatCompletionResponseStreamChoice(
|
||||
index=0,
|
||||
@ -118,7 +117,7 @@ def create_app():
|
||||
finish_reason=Finish.STOP
|
||||
)
|
||||
chunk = ChatCompletionStreamResponse(model=request.model, choices=[choice_data])
|
||||
yield json.dumps(chunk, ensure_ascii=False)
|
||||
yield chunk.json(exclude_unset=True, ensure_ascii=False)
|
||||
yield "[DONE]"
|
||||
|
||||
return app
|
||||
|
@ -107,7 +107,11 @@ class PPOPeftTrainer(PPOTrainer, PeftTrainer):
|
||||
# Compute rewards
|
||||
replace_model(unwrapped_model, target="reward")
|
||||
with torch.no_grad():
|
||||
_, _, values = self.model(**self.prepare_model_inputs(queries, responses))
|
||||
_, _, values = self.model(
|
||||
**self.prepare_model_inputs(queries, responses),
|
||||
output_hidden_states=True,
|
||||
return_dict=True
|
||||
)
|
||||
rewards = [reward for reward in values[:, -1].to(torch.float32)] # use float32 type
|
||||
replace_model(unwrapped_model, target="default")
|
||||
|
||||
|
@ -32,7 +32,7 @@ class PairwisePeftTrainer(PeftTrainer):
|
||||
See: https://github.com/huggingface/transformers/blob/v4.30.2/src/transformers/trainer.py#L3509
|
||||
"""
|
||||
batch_size = inputs["input_ids"].size(0) // 2
|
||||
_, _, values = model(**inputs)
|
||||
_, _, values = model(**inputs, output_hidden_states=True, return_dict=True)
|
||||
r_accept, r_reject = values[:, -1].split(batch_size, dim=0)
|
||||
loss = -torch.log(torch.sigmoid(r_accept - r_reject)).mean()
|
||||
return (loss, [loss, r_accept, r_reject]) if return_outputs else loss
|
||||
|
Loading…
x
Reference in New Issue
Block a user