Skip to content

Commit

Permalink
chore(slurmctld): use peer relation for ingress ip
Browse files Browse the repository at this point in the history
These changes add a peer relation for the slurmctld charm and
replace using the slurmd interface to obtain the ingress_address
with the new slurmctld-peer relation.

The reason for this change is that we do not want to depend on
the existence of the slurmd relation in order to know our ip.

Using a peer relation we will always have resolvability so long as
juju knows the ip address of the unit.
  • Loading branch information
jamesbeedy authored and NucciTheBoss committed Nov 25, 2024
1 parent 60eae44 commit 70657d6
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 8 deletions.
4 changes: 4 additions & 0 deletions charms/slurmctld/charmcraft.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ provides:
interface: cos_agent
limit: 1

peers:
slurmctld-peer:
interface: slurmctld-peer

assumes:
- juju

Expand Down
18 changes: 10 additions & 8 deletions charms/slurmctld/src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
import subprocess
from typing import Any, Dict, List, Optional, Union

from constants import CHARM_MAINTAINED_SLURM_CONF_PARAMETERS
from constants import CHARM_MAINTAINED_SLURM_CONF_PARAMETERS, PEER_RELATION
from exceptions import IngressAddressUnavailableError
from interface_slurmd import (
PartitionAvailableEvent,
PartitionUnavailableEvent,
Expand Down Expand Up @@ -303,7 +304,7 @@ def _assemble_slurmctld_parameters() -> dict[str, Any]:

slurm_conf = SlurmConfig(
ClusterName=self._cluster_name,
SlurmctldAddr=self._slurmd_ingress_address,
SlurmctldAddr=self._ingress_address,
SlurmctldHost=[self._slurmctld.hostname],
SlurmctldParameters=_assemble_slurmctld_parameters(),
ProctrackType="proctrack/linuxproc" if is_container() else "proctrack/cgroup",
Expand Down Expand Up @@ -403,12 +404,13 @@ def hostname(self) -> str:
return self._slurmctld.hostname

@property
def _slurmd_ingress_address(self) -> str:
"""Return the ingress_address from the slurmd relation if it exists."""
ingress_address = ""
if binding := self.model.get_binding("slurmd"):
ingress_address = f"{binding.network.ingress_address}"
return ingress_address
def _ingress_address(self) -> str:
"""Return the ingress_address from the peer relation if it exists."""
if (peer_binding := self.model.get_binding(PEER_RELATION)) is not None:
ingress_address = f"{peer_binding.network.ingress_address}"
logger.debug(f"Slurmctld ingress_address: {ingress_address}")
return ingress_address
raise IngressAddressUnavailableError("Ingress address unavailable")

@property
def slurm_installed(self) -> bool:
Expand Down
2 changes: 2 additions & 0 deletions charms/slurmctld/src/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

"""This module provides constants for the slurmctld-operator charm."""

PEER_RELATION = "slurmctld-peer"

CHARM_MAINTAINED_SLURM_CONF_PARAMETERS = {
"AuthAltParameters": {"jwt_key": "/var/lib/slurm/checkpoint/jwt_hs256.key"},
"AuthAltTypes": ["auth/jwt"],
Expand Down
13 changes: 13 additions & 0 deletions charms/slurmctld/src/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) 2024 Omnivector Corp
# See LICENSE file for licensing details.

"""Custom exceptions for the slurmctld operator."""


class IngressAddressUnavailableError(Exception):
"""Exception raised when a slurm operation failed."""

@property
def message(self) -> str:
"""Return message passed as argument to exception."""
return self.args[0]
1 change: 1 addition & 0 deletions charms/slurmctld/tests/unit/test_charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ def test_on_slurmdbd_unavailable(self) -> None:
def test_get_user_supplied_parameters(self, *_) -> None:
"""Test that user supplied parameters are parsed correctly."""
self.harness.add_relation("slurmd", "slurmd")
self.harness.add_relation("slurmctld-peer", self.harness.charm.app.name)
self.harness.update_config(
{"slurm-conf-parameters": "JobAcctGatherFrequency=task=30,network=40"}
)
Expand Down

0 comments on commit 70657d6

Please sign in to comment.