Former-commit-id: d2ebd225db
This commit is contained in:
hiyouga
2023-09-28 01:02:11 +08:00
parent 755e3e49b4
commit 8a8ba08bf7
2 changed files with 2 additions and 2 deletions

View File

@@ -100,7 +100,7 @@ def preprocess_dataset(
return model_inputs
def preprocess_packed_supervised_dataset(examples: Dict[str, List[Any]]) -> Dict[str, Any]:
# build inputs with format `<bos> X Y <eos>` and labels with format `<ignore> ... <ignore> Y <eos>`
# build inputs with format `<bos> X Y <eos>` and labels with format `<bos> X Y <eos>`
# we do not mask the inputs in packed training.
model_inputs = {"input_ids": [], "attention_mask": [], "labels": []}
input_ids, labels = [], []