From d8a5571be7fcdc6f9e2442a832252d507f58c862 Mon Sep 17 00:00:00 2001 From: Eric Tang <46737979+erictang000@users.noreply.github.com> Date: Thu, 20 Mar 2025 19:56:47 -0700 Subject: [PATCH] [3rdparty] fix redundant process group destroy for ray (#7395) * fix redundant process group destroy for ray * Update tuner.py --------- Co-authored-by: hoshi-hiyouga --- src/llamafactory/train/tuner.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/llamafactory/train/tuner.py b/src/llamafactory/train/tuner.py index c5d926ac..3adb382b 100644 --- a/src/llamafactory/train/tuner.py +++ b/src/llamafactory/train/tuner.py @@ -38,6 +38,7 @@ from .trainer_utils import get_ray_trainer, get_swanlab_callback if is_ray_available(): + import ray from ray.train.huggingface.transformers import RayTrainReportCallback @@ -77,6 +78,9 @@ def _training_function(config: dict[str, Any]) -> None: else: raise ValueError(f"Unknown task: {finetuning_args.stage}.") + if is_ray_available() and ray.is_initialized(): + return # if ray is intialized it will destroy the process group on return + try: if dist.is_initialized(): dist.destroy_process_group()