add code for reading from multi files in one directory

Former-commit-id: 9b80cf08b9f0d4aee896b228fb76399e9a7c9d8b
This commit is contained in:
BUAADreamer 2023-06-10 16:27:30 +08:00
parent ef6c5ae18a
commit 5b93ca6c39

View File

@ -358,12 +358,10 @@ def prepare_data(
elif dataset_attr.load_from == "file":
data_file = os.path.join(data_args.dataset_dir, dataset_attr.file_name)
extension = dataset_attr.file_name.split(".")[-1]
if dataset_attr.file_sha1 is not None:
checksum(data_file, dataset_attr.file_sha1)
else:
logger.warning("Checksum failed: missing SHA-1 hash value in dataset_info.json.")
print(extension)
raw_datasets = load_dataset(
extension if extension in ["csv", "json"] else "text",
data_files=data_file,