mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-08-03 04:02:49 +08:00
[data] support loading folder from remote (#8078)
This commit is contained in:
parent
e8a18c17e9
commit
130bfaf8e3
@ -169,11 +169,22 @@ def read_cloud_json(cloud_path):
|
|||||||
try:
|
try:
|
||||||
# Try with anonymous access first
|
# Try with anonymous access first
|
||||||
fs = setup_fs(cloud_path, anon=True)
|
fs = setup_fs(cloud_path, anon=True)
|
||||||
return _read_json_with_fs(fs, cloud_path, lines=cloud_path.endswith(".jsonl"))
|
|
||||||
except Exception:
|
except Exception:
|
||||||
# Try again with credentials
|
# Try again with credentials
|
||||||
fs = setup_fs(cloud_path)
|
fs = setup_fs(cloud_path)
|
||||||
return _read_json_with_fs(fs, cloud_path, lines=cloud_path.endswith(".jsonl"))
|
|
||||||
|
if fs.isdir(cloud_path):
|
||||||
|
files = [x["Key"] for x in fs.listdir(cloud_path)]
|
||||||
|
else:
|
||||||
|
files = [cloud_path]
|
||||||
|
# filter out non-JSON files
|
||||||
|
files = [file for file in files if file.endswith(".json") or file.endswith(".jsonl")]
|
||||||
|
if not files:
|
||||||
|
raise ValueError(f"No JSON/JSONL files found in the specified path: {cloud_path}")
|
||||||
|
data = []
|
||||||
|
for file in files:
|
||||||
|
data.extend(_read_json_with_fs(fs, file, lines=file.endswith(".jsonl")))
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
def _read_json_with_fs(fs, path, lines=True):
|
def _read_json_with_fs(fs, path, lines=True):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user