From fd2c64315be7c7f26090cec719404db3152e928d Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Thu, 6 Jun 2024 23:44:58 +0800 Subject: [PATCH] add DISABLE_TORCHRUN option Former-commit-id: 45d8be8f93188e6e9d1e0841c2736dce0c1ef7d2 --- src/llamafactory/cli.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/llamafactory/cli.py b/src/llamafactory/cli.py index 8ae3d6a8..092f4cf7 100644 --- a/src/llamafactory/cli.py +++ b/src/llamafactory/cli.py @@ -72,7 +72,12 @@ def main(): elif command == Command.EXPORT: export_model() elif command == Command.TRAIN: - if get_device_count() > 0: + disable_torchrun = os.environ.get("DISABLE_TORCHRUN", "0").lower() in ["true", "1"] + if disable_torchrun and get_device_count() > 1: + logger.warning("`torchrun` cannot be disabled when device count > 1.") + disable_torchrun = False + + if (not disable_torchrun) and (get_device_count() > 0): master_addr = os.environ.get("MASTER_ADDR", "127.0.0.1") master_port = os.environ.get("MASTER_PORT", str(random.randint(20001, 29999))) logger.info("Initializing distributed tasks at: {}:{}".format(master_addr, master_port))