support interleave probs

Former-commit-id: 69744c17e8
This commit is contained in:
hiyouga
2023-08-04 21:27:35 +08:00
parent 44823ec2c7
commit b32ed1d7be
3 changed files with 17 additions and 7 deletions

View File

@@ -111,6 +111,6 @@ def get_dataset(
if not data_args.streaming:
logger.warning("We recommend using `mix_strategy=concat` in non-streaming mode.")
stopping_strategy = "first_exhausted" if data_args.mix_strategy.endswith("under") else "all_exhausted"
return interleave_datasets(all_datasets, stopping_strategy=stopping_strategy)
return interleave_datasets(all_datasets, data_args.interleave_probs, stopping_strategy=stopping_strategy)
else:
raise ValueError("Unknown mixing strategy.")