mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-12-14 10:56:56 +08:00
add code for reading from multi files in one directory
Former-commit-id: a2af9df5a9
This commit is contained in:
@@ -358,12 +358,10 @@ def prepare_data(
|
||||
elif dataset_attr.load_from == "file":
|
||||
data_file = os.path.join(data_args.dataset_dir, dataset_attr.file_name)
|
||||
extension = dataset_attr.file_name.split(".")[-1]
|
||||
|
||||
if dataset_attr.file_sha1 is not None:
|
||||
checksum(data_file, dataset_attr.file_sha1)
|
||||
else:
|
||||
logger.warning("Checksum failed: missing SHA-1 hash value in dataset_info.json.")
|
||||
print(extension)
|
||||
raw_datasets = load_dataset(
|
||||
extension if extension in ["csv", "json"] else "text",
|
||||
data_files=data_file,
|
||||
|
||||
Reference in New Issue
Block a user