mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-10-16 00:28:10 +08:00
add code for reading from multi files in one directory
Former-commit-id: 9b80cf08b9f0d4aee896b228fb76399e9a7c9d8b
This commit is contained in:
parent
ef6c5ae18a
commit
5b93ca6c39
@ -358,12 +358,10 @@ def prepare_data(
|
||||
elif dataset_attr.load_from == "file":
|
||||
data_file = os.path.join(data_args.dataset_dir, dataset_attr.file_name)
|
||||
extension = dataset_attr.file_name.split(".")[-1]
|
||||
|
||||
if dataset_attr.file_sha1 is not None:
|
||||
checksum(data_file, dataset_attr.file_sha1)
|
||||
else:
|
||||
logger.warning("Checksum failed: missing SHA-1 hash value in dataset_info.json.")
|
||||
print(extension)
|
||||
raw_datasets = load_dataset(
|
||||
extension if extension in ["csv", "json"] else "text",
|
||||
data_files=data_file,
|
||||
|
Loading…
x
Reference in New Issue
Block a user