[ray] add storage filesystem to ray config (#7854)

This commit is contained in:
Eric Tang 2025-04-27 07:12:40 -07:00 committed by GitHub
parent 036a76e9cb
commit b4407e4b0b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 23 additions and 1 deletions

View File

@ -34,6 +34,10 @@ class RayArguments:
default="./saves",
metadata={"help": "The storage path to save training results to"},
)
ray_storage_filesystem: Optional[Literal["s3", "gs", "gcs"]] = field(
default=None,
metadata={"help": "The storage filesystem to use. If None specified, local filesystem will be used."},
)
ray_num_workers: int = field(
default=1,
metadata={"help": "The number of workers for Ray training. Default is 1 worker."},
@ -55,6 +59,17 @@ class RayArguments:
self.use_ray = use_ray()
if isinstance(self.resources_per_worker, str) and self.resources_per_worker.startswith("{"):
self.resources_per_worker = _convert_str_dict(json.loads(self.resources_per_worker))
if self.ray_storage_filesystem is not None:
if self.ray_storage_filesystem not in ["s3", "gs", "gcs"]:
raise ValueError(
f"ray_storage_filesystem must be one of ['s3', 'gs', 'gcs'], got {self.ray_storage_filesystem}"
)
import pyarrow.fs as fs
if self.ray_storage_filesystem == "s3":
self.ray_storage_filesystem = fs.S3FileSystem()
elif self.ray_storage_filesystem == "gs" or self.ray_storage_filesystem == "gcs":
self.ray_storage_filesystem = fs.GcsFileSystem()
@dataclass

View File

@ -680,6 +680,12 @@ def get_ray_trainer(
if ray_args.ray_init_kwargs is not None:
ray.init(**ray_args.ray_init_kwargs)
if ray_args.ray_storage_filesystem is not None:
# this means we are using s3/gcs
storage_path = ray_args.ray_storage_path
else:
storage_path = Path(ray_args.ray_storage_path).absolute().as_posix()
trainer = TorchTrainer(
training_function,
train_loop_config=train_loop_config,
@ -691,7 +697,8 @@ def get_ray_trainer(
),
run_config=RunConfig(
name=ray_args.ray_run_name,
storage_path=Path(ray_args.ray_storage_path).absolute().as_posix(),
storage_filesystem=ray_args.ray_storage_filesystem,
storage_path=storage_path,
),
)
return trainer