Skip to content

Commit

Permalink
refactor(slurmctld): improve comments and function names
Browse files Browse the repository at this point in the history
  • Loading branch information
dsloanm committed Jan 8, 2025
1 parent 5ae35e1 commit 29e1169
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 17 deletions.
27 changes: 13 additions & 14 deletions charms/slurmctld/src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ def _on_resume_nodes_action(self, event: ActionEvent) -> None:
event.fail(message=f"Error resuming {nodes}: {e.output}")

def _on_slurmd_available(self, event: SlurmdAvailableEvent) -> None:
self._add_to_gres_conf(event)
self._update_gres_conf(event)
self._on_write_slurm_conf(event)

def _on_slurmd_departed(self, event: SlurmdDepartedEvent) -> None:
Expand All @@ -224,12 +224,13 @@ def _on_slurmd_departed(self, event: SlurmdDepartedEvent) -> None:
self._write_gres_conf(event)
self._on_write_slurm_conf(event)

def _add_to_gres_conf(self, event: SlurmdAvailableEvent) -> None:
"""Write new nodes to gres.conf configuration file for Generic Resource scheduling."""
# This function does not perform an "scontrol reconfigure". It is expected
# _on_write_slurm_conf() is called immediately following to do this.
def _update_gres_conf(self, event: SlurmdAvailableEvent) -> None:
"""Write new nodes to gres.conf configuration file for Generic Resource scheduling.
# Only the leader should write the config.
Warnings:
* This function does not perform an `scontrol reconfigure`. It is expected
that the function `_on_write_slurm_conf()` is called immediately following to do this.
"""
if not self.model.unit.is_leader():
return

Expand All @@ -238,31 +239,29 @@ def _add_to_gres_conf(self, event: SlurmdAvailableEvent) -> None:
return

if gres_info := event.gres_info:
# Build list of GRESNodes expected by slurmutils
gres_nodes = []
for resource in gres_info:
node = GRESNode(NodeName=str(event.node_name), **resource)
gres_nodes.append(node)

# Update gres.conf
with self._slurmctld.gres.edit() as config:
config.nodes[event.node_name] = gres_nodes

def _write_gres_conf(self, event: SlurmdDepartedEvent) -> None:
"""Write out current gres.conf configuration file for Generic Resource scheduling."""
# This function does not perform an "scontrol reconfigure". It is expected
# _on_write_slurm_conf() is called immediately following to do this.
"""Write out current gres.conf configuration file for Generic Resource scheduling.
# Only the leader should write the config.
Warnings:
* This function does not perform an `scontrol reconfigure`. It is expected
that the function `_on_write_slurm_conf()` is called immediately following to do this.
"""
if not self.model.unit.is_leader():
return

if not self._check_status():
event.defer()
return

# Get current GRES state for all available nodes and write to gres.conf.
gres_all_nodes = self._slurmd.get_gres()
gres_all_nodes = self._slurmd.get_all_gres_info()
gres_conf = GRESConfig(Nodes=gres_all_nodes)
self._slurmctld.gres.dump(gres_conf)

Expand Down
5 changes: 2 additions & 3 deletions charms/slurmctld/src/interface_slurmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,9 +266,8 @@ def get_new_nodes_and_nodes_and_partitions(self) -> Dict[str, Any]:
)
return {"DownNodes": new_node_down_nodes, "Nodes": nodes, "Partitions": partitions}

def get_gres(self) -> Dict[str, Any]:
def get_all_gres_info(self) -> Dict[str, Any]:
"""Return GRES configuration for all currently related compute nodes."""
# Loop over all relation units, gathering GRES info.
gres_info = {}
if relations := self.framework.model.relations.get(self._relation_name):
for relation in relations:
Expand All @@ -281,7 +280,7 @@ def get_gres(self) -> Dict[str, Any]:
):

node_name = node_config["NodeName"]
# slurmutils expects NodeName in values.
# Add NodeName to each GRES device to match the format required by slurmutils.
for device in gres:
device["NodeName"] = node_name
gres_info[node_name] = gres
Expand Down

0 comments on commit 29e1169

Please sign in to comment.