From 128eea5def050dc301b480341d44283ab08353a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?X=CE=BBRI-U5?= Date: Tue, 30 Jan 2024 14:58:00 +0700 Subject: [PATCH] Update distributed.py --- src/nanotron/distributed.py | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/src/nanotron/distributed.py b/src/nanotron/distributed.py index 889b2330..238dca9b 100644 --- a/src/nanotron/distributed.py +++ b/src/nanotron/distributed.py @@ -259,22 +259,7 @@ def initialize_torch_distributed(): backend = "gloo" # Call the init process. - pytest_worker_id = os.environ.get("PYTEST_XDIST_WORKER") - if worker_id is not None: - port = find_free_port() - else: - def string_to_unique_number(s, min_port=2000, max_port=65000): - import hashlib - # Hash the string - hash_object = hashlib.sha256(s.encode()) - hash_number = int(hash_object.hexdigest(), base=16) - - # Map the hash to the specified range - range_size = min_port - max_port - return range_start + (hash_number % range_size) - - port = string_to_unique_number(pytest_worker_id) - + port = find_free_port() init_method = f"tcp://localhost:{port}" dist.init_process_group(init_method=init_method, backend=backend, world_size=world_size, rank=rank, timeout=dist.default_pg_timeout) return True