|
|
|
|
@@ -7,7 +7,7 @@ from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Tuple
|
|
|
|
|
from transformers import TrainerState, TrainerControl
|
|
|
|
|
|
|
|
|
|
from trl import PPOTrainer
|
|
|
|
|
from trl.core import LengthSampler
|
|
|
|
|
from trl.core import LengthSampler, PPODecorators, logprobs_from_logits
|
|
|
|
|
|
|
|
|
|
from llmtuner.extras.logging import get_logger
|
|
|
|
|
from llmtuner.extras.misc import AverageMeter, count_parameters, get_logits_processor
|
|
|
|
|
@@ -35,6 +35,7 @@ class PPOPeftTrainer(PPOTrainer, PeftTrainer):
|
|
|
|
|
finetuning_args: "FinetuningArguments",
|
|
|
|
|
generating_args: "GeneratingArguments",
|
|
|
|
|
callbacks: List["LogCallback"],
|
|
|
|
|
compute_dtype: torch.dtype,
|
|
|
|
|
**kwargs
|
|
|
|
|
):
|
|
|
|
|
PPOTrainer.__init__(self, **kwargs)
|
|
|
|
|
@@ -42,6 +43,7 @@ class PPOPeftTrainer(PPOTrainer, PeftTrainer):
|
|
|
|
|
self.finetuning_args = finetuning_args
|
|
|
|
|
self.generating_args = generating_args
|
|
|
|
|
self.log_callback = callbacks[0]
|
|
|
|
|
self.compute_dtype = compute_dtype
|
|
|
|
|
self.state = TrainerState()
|
|
|
|
|
self.control = TrainerControl()
|
|
|
|
|
|
|
|
|
|
@@ -74,7 +76,7 @@ class PPOPeftTrainer(PPOTrainer, PeftTrainer):
|
|
|
|
|
|
|
|
|
|
# Keyword arguments for `model.generate`
|
|
|
|
|
gen_kwargs = self.generating_args.to_dict()
|
|
|
|
|
gen_kwargs["eos_token_id"] = [self.tokenizer.eos_token_id] + self.tokenizer.additional_special_tokens_ids
|
|
|
|
|
gen_kwargs["eos_token_id"] = list(set([self.tokenizer.eos_token_id] + self.tokenizer.additional_special_tokens_ids))
|
|
|
|
|
gen_kwargs["pad_token_id"] = self.tokenizer.pad_token_id
|
|
|
|
|
gen_kwargs["logits_processor"] = get_logits_processor()
|
|
|
|
|
|
|
|
|
|
@@ -183,12 +185,74 @@ class PPOPeftTrainer(PPOTrainer, PeftTrainer):
|
|
|
|
|
replace_model(unwrapped_model, target="reward")
|
|
|
|
|
batch = self.prepare_model_inputs(queries, responses)
|
|
|
|
|
_, _, values = self.model(**batch, output_hidden_states=True, return_dict=True)
|
|
|
|
|
if values.size(0) != batch["input_ids"].size(0):
|
|
|
|
|
if values.size(0) != batch["input_ids"].size(0): # adapt chatglm2
|
|
|
|
|
values = torch.transpose(values, 0, 1)
|
|
|
|
|
rewards = [reward for reward in values[:, -1].float().detach().cpu()] # use fp32 type
|
|
|
|
|
replace_model(unwrapped_model, target="default")
|
|
|
|
|
return rewards
|
|
|
|
|
|
|
|
|
|
@PPODecorators.empty_cuda_cache()
|
|
|
|
|
def batched_forward_pass(
|
|
|
|
|
self,
|
|
|
|
|
model: "AutoModelForCausalLMWithValueHead",
|
|
|
|
|
queries: torch.Tensor,
|
|
|
|
|
responses: torch.Tensor,
|
|
|
|
|
model_inputs: dict,
|
|
|
|
|
return_logits: Optional[bool] = False
|
|
|
|
|
):
|
|
|
|
|
r"""
|
|
|
|
|
Calculates model outputs in multiple batches.
|
|
|
|
|
|
|
|
|
|
Subclass and override to inject custom behavior.
|
|
|
|
|
"""
|
|
|
|
|
bs = len(queries)
|
|
|
|
|
fbs = self.config.mini_batch_size
|
|
|
|
|
all_logprobs = []
|
|
|
|
|
all_logits = []
|
|
|
|
|
all_masks = []
|
|
|
|
|
all_values = []
|
|
|
|
|
|
|
|
|
|
for i in range(math.ceil(bs / fbs)):
|
|
|
|
|
input_kwargs = {key: value[i * fbs : (i + 1) * fbs] for key, value in model_inputs.items()}
|
|
|
|
|
query_batch = queries[i * fbs : (i + 1) * fbs]
|
|
|
|
|
response_batch = responses[i * fbs : (i + 1) * fbs]
|
|
|
|
|
input_ids = input_kwargs["input_ids"]
|
|
|
|
|
attention_mask = input_kwargs["attention_mask"]
|
|
|
|
|
|
|
|
|
|
with torch.cuda.amp.autocast(dtype=self.compute_dtype): # support bf16
|
|
|
|
|
logits, _, values = model(**input_kwargs)
|
|
|
|
|
|
|
|
|
|
if values.size(0) != input_ids.size(0): # adapt chatglm2
|
|
|
|
|
values = torch.transpose(values, 0, 1)
|
|
|
|
|
|
|
|
|
|
logprobs = logprobs_from_logits(logits[:, :-1, :], input_ids[:, 1:])
|
|
|
|
|
masks = torch.zeros_like(attention_mask)
|
|
|
|
|
masks[:, :-1] = attention_mask[:, 1:]
|
|
|
|
|
|
|
|
|
|
for j in range(len(query_batch)):
|
|
|
|
|
start = len(query_batch[j]) - 1
|
|
|
|
|
if attention_mask[j, 0] == 0: # offset left padding
|
|
|
|
|
start += attention_mask[j, :].nonzero()[0]
|
|
|
|
|
end = start + len(response_batch[j])
|
|
|
|
|
|
|
|
|
|
masks[j, :start] = 0
|
|
|
|
|
masks[j, end:] = 0
|
|
|
|
|
|
|
|
|
|
if return_logits:
|
|
|
|
|
all_logits.append(logits)
|
|
|
|
|
else:
|
|
|
|
|
del logits
|
|
|
|
|
all_values.append(values)
|
|
|
|
|
all_logprobs.append(logprobs)
|
|
|
|
|
all_masks.append(masks)
|
|
|
|
|
|
|
|
|
|
return (
|
|
|
|
|
torch.cat(all_logprobs),
|
|
|
|
|
torch.cat(all_logits)[:, :-1] if return_logits else None,
|
|
|
|
|
torch.cat(all_values)[:, :-1],
|
|
|
|
|
torch.cat(all_masks)[:, :-1],
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
def save_model(self, output_dir: Optional[str] = None) -> None:
|
|
|
|
|
r"""
|
|
|
|
|
Saves model checkpoint.
|
|
|
|
|
|