Skip to content

Commit

Permalink
Merge pull request #272 from jameshcorbett/rabbitmapping-by-jobid
Browse files Browse the repository at this point in the history
rabbitmapping: fetch by jobid
  • Loading branch information
mergify[bot] authored Feb 13, 2025
2 parents 199be4f + 4dda552 commit 8abdb50
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 33 deletions.
95 changes: 64 additions & 31 deletions src/cmd/flux-getrabbit.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,12 @@
import json

import flux
from flux.job import JobID
from flux.hostlist import Hostlist


def main():
"""Create a JSON file mapping compute nodes <-> rabbits.
Fetch the SystemConfiguration from kubernetes and use that for the mapping.
Also fetch Storage resources from kubernetes to populate the JSON file with
capacity data.
"""
def read_args():
"""Read in command-line args."""
parser = argparse.ArgumentParser(
formatter_class=flux.util.help_formatter(),
description=("Map compute nodes to rabbits and vice versa."),
Expand All @@ -27,56 +23,93 @@ def main():
nargs="+",
metavar="HOSTS",
type=Hostlist,
help="One or more hostlists of compute nodes",
help="hostlists of compute nodes, to map to rabbits",
)
parser.add_argument(
"--jobids",
"-j",
nargs="+",
metavar="JOBID",
help="jobids, to map to rabbits",
)
parser.add_argument(
"rabbits",
nargs="*",
metavar="RABBITS",
type=Hostlist,
help="One or more hostlists of rabbit nodes",
help="hostlists of rabbits, to map to compute nodes",
)
# validate args
args = parser.parse_args()
if args.computes and args.rabbits:
return parser.parse_args()


def map_to_rabbits(args, handle, mapping):
"""Map compute nodes to rabbit nodes, and print.
If job IDs are provided, turn them into a hostlist of compute nodes and
then proceed.
"""
hlist = Hostlist()
if not args.computes:
args.computes = []
if args.jobids:
for jobid in args.jobids:
try:
nodelist = (
flux.job.job_list_id(handle, JobID(jobid), ["nodelist"])
.get_jobinfo()
.nodelist
)
except FileNotFoundError:
sys.exit(f"Could not find job {jobid}")
except Exception as exc:
sys.exit(f"Lookup of job {jobid} failed: {exc}")
args.computes.append(nodelist)
aggregated_computes = Hostlist()
for computes in args.computes:
aggregated_computes.append(computes)
aggregated_computes.uniq()
for hostname in aggregated_computes:
try:
rabbit = mapping["computes"][hostname]
except KeyError:
sys.exit(f"Could not find compute {hostname}")
hlist.append(rabbit)
print(hlist.uniq().encode())


def main():
"""Construct a hostlist of rabbits or compute nodes, depending on arguments."""
args = read_args()
if args.rabbits and (args.computes or args.jobids):
sys.exit(
"Both rabbits and computes (with '--computes') cannot be "
"looked up at the same time"
"Both rabbits and computes or jobids cannot be looked up at the same time"
)
# load the mapping file
handle = flux.Flux()
path = handle.conf_get("rabbit.mapping")
if path is None:
sys.exit("Flux is misconfigured, 'rabbit.mapping' key not set")
try:
with open(path, "r", encoding="utf8") as fd:
mapping = json.load(fd)
with open(path, "r", encoding="utf8") as json_fd:
mapping = json.load(json_fd)
except FileNotFoundError:
sys.exit(
f"Could not find file {path!r} specified under "
"'rabbit.mapping' config key, Flux may be misconfigured"
)
except json.JSONDecodeError as jexc:
sys.exit(f"File {path!r} could not be parsed as JSON: {jexc}")
# construct and print the hostlist of rabbits
# construct and print the hostlist of rabbits or compute nodes,
# depending on arguments
hlist = Hostlist()
if not args.computes and not args.rabbits:
# print out all rabbits
if not args.computes and not args.rabbits and not args.jobids:
# no arguments: print out all rabbits
hlist.append(mapping["rabbits"].keys())
print(hlist.uniq().encode())
return
if args.computes:
aggregated_computes = Hostlist()
for computes in args.computes:
aggregated_computes.append(computes)
aggregated_computes.uniq()
for hostname in aggregated_computes:
try:
rabbit = mapping["computes"][hostname]
except KeyError:
sys.exit(f"Could not find compute {hostname}")
hlist.append(rabbit)
print(hlist.uniq().encode())
if args.jobids or args.computes:
# construct and print the hostlist of rabbits
map_to_rabbits(args, handle, mapping)
return
# construct and print the hostlist of compute nodes
aggregated_rabbits = Hostlist()
Expand Down
19 changes: 17 additions & 2 deletions t/t2001-getrabbit.t
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,11 @@ test_expect_success 'flux rabbitmapping works on computes' '
'

test_expect_success 'flux rabbitmapping parses arguments correctly' '
test_must_fail $CMD --computes &&
test_must_fail $CMD rzadams201 -c rzadams1001
test_must_fail $CMD --computes &&
test_must_fail $CMD rzadams201 -c rzadams1001 &&
test_must_fail $CMD rzadams201 -j foobar &&
test_must_fail $CMD -j foobar &&
test_must_fail $CMD -j 124.469
'

test_expect_success 'flux rabbitmapping works with second mapping' '
Expand All @@ -62,4 +65,16 @@ test_expect_success 'flux rabbitmapping works with no arguments' '
test $($CMD) = tuolumne[201-272]
'

test_expect_success 'flux rabbitmapping works on jobids' '
echo "{\"computes\": {\"$(hostname)\": \"rabbit101\"}}" > local_rabbitmapping &&
echo "
[rabbit]
mapping = \"$(pwd)/local_rabbitmapping\"
" | flux config load &&
jobid=$(flux submit -n1 hostname) &&
$CMD -j ${jobid} &&
test $($CMD -j ${jobid}) = rabbit101 &&
test $($CMD -j ${jobid} -c $(hostname)) = rabbit101
'

test_done

0 comments on commit 8abdb50

Please sign in to comment.