This commit is contained in:
hiyouga
2024-08-30 03:21:50 +08:00
parent 8b588c7224
commit bee1bd43b9
8 changed files with 24 additions and 13 deletions

View File

@@ -62,7 +62,7 @@ def preprocess_unsupervised_dataset(
tokenizer: "PreTrainedTokenizer",
processor: Optional["ProcessorMixin"],
data_args: "DataArguments",
) -> Dict[str, List[List[int]]]:
) -> Dict[str, List[Any]]:
# build inputs with format `<bos> X` and labels with format `Y <eos>`
model_inputs = defaultdict(list)
for i in range(len(examples["prompt"])):