Skip to content

Commit

Permalink
fix master bug
Browse files Browse the repository at this point in the history
  • Loading branch information
caozhou committed Jun 5, 2024
1 parent 0fb14a3 commit 53e9dcc
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 24 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
container-test-job:
runs-on: self-hosted
container:
image: localhost:5000/flagscale_cicd:v1.2
image: localhost:5000/flagscale_cicd:v1.3
ports:
- 80
options: --gpus all --hostname flagscale_cicd
Expand Down
38 changes: 15 additions & 23 deletions flagscale/auto_tuner/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,58 +78,50 @@ def is_ip_addr(master):

if not isinstance(master, str):
return False
pattern = r"^((25[0-5]|2[0-4]\d|1\d{2}|[1-9]?\d)\.){3}(25[0-5]|2[0-4]\d|1\d{2}|[1-9]?\d)$"
pattern = (
r"^((25[0-5]|2[0-4]\d|1\d{2}|[1-9]?\d)\.){3}(25[0-5]|2[0-4]\d|1\d{2}|[1-9]?\d)$"
)
result = re.match(pattern, master)
if result:
return True
else:
return False


def get_ip_addr():
"""Get ip address."""
try:
hostname = socket.gethostname()
ip = socket.gethostbyname(socket.getfqdn(hostname))
except:
ip = '127.0.0.1'
ip = "127.0.0.1"
return ip


def is_master(config):
"""Check if current node is master."""
multi_nodes = False
if config.experiment.runner.get("nnodes", 1) > 1:
multi_nodes = True
nnodes = config.experiment.runner.get("nnodes", 1)

hostfile = None
if config.experiment.runner.get("hostfile", None):
hostfile = config.experiment.runner["hostfile"]
if os.environ.get("AIRS_SWITCH", None):
if os.environ.get("AIRS_HOSTFILE_PATH", None):
hostfile = os.environ["AIRS_HOSTFILE_PATH"]
resources = parse_hostfile(hostfile)

if resources and len(resources) > 1:
multi_nodes = True

if not resources and multi_nodes:
resources = parse_hostfile(hostfile)
if not resources and nnodes > 1:
raise ValueError("In the multi-node mode, please set the hostfile")

# Local host scene
if not resources and not multi_nodes:
return True

if resources and multi_nodes:
master = resources.keys()[0]
if resources:
master = list(resources.keys())[0]
if is_ip_addr(master):
return get_ip_addr() == master
else:
output = subprocess.run("hostname",
check=True,
shell=True,
text=True,
capture_output=True)
output = subprocess.run(
"hostname", check=True, shell=True, text=True, capture_output=True
)
hostname = output.stdout.strip()
return hostname == master

return False
# Local host Scene
return True

0 comments on commit 53e9dcc

Please sign in to comment.