This commit is contained in:
hiyouga
2024-12-19 12:16:30 +00:00
parent ffbb4dbdb0
commit d4c1fda1ad
6 changed files with 22 additions and 16 deletions

View File

@@ -151,7 +151,7 @@ class CustomSeq2SeqTrainer(Seq2SeqTrainer):
return padded_tensor.contiguous() # in contiguous memory
def save_predictions(
self, dataset: "Dataset", predict_results: "PredictionOutput", gen_kwargs: Dict[str, Any]
self, dataset: "Dataset", predict_results: "PredictionOutput", skip_special_tokens: bool = True
) -> None:
r"""
Saves model predictions to `output_dir`.
@@ -179,12 +179,8 @@ class CustomSeq2SeqTrainer(Seq2SeqTrainer):
preds[i] = np.concatenate((preds[i][pad_len[0] :], preds[i][: pad_len[0]]), axis=-1)
decoded_inputs = self.processing_class.batch_decode(dataset["input_ids"], skip_special_tokens=False)
decoded_preds = self.processing_class.batch_decode(
preds, skip_special_tokens=gen_kwargs["skip_special_tokens"]
)
decoded_labels = self.processing_class.batch_decode(
labels, skip_special_tokens=gen_kwargs["skip_special_tokens"]
)
decoded_preds = self.processing_class.batch_decode(preds, skip_special_tokens=skip_special_tokens)
decoded_labels = self.processing_class.batch_decode(labels, skip_special_tokens=skip_special_tokens)
with open(output_prediction_file, "w", encoding="utf-8") as f:
for text, pred, label in zip(decoded_inputs, decoded_preds, decoded_labels):