diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index c1014b1..237d7d7 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -65,12 +65,16 @@ jobs: integration-test: strategy: - fail-fast: true + fail-fast: false matrix: - bases: + bases: - ubuntu@22.04 - name: Integration tests (LXD) | ${{ matrix.bases }} + local: [true, false] + name: Integration tests (LXD) ${{ matrix.local && '|' || '| Charmhub (edge) |'}} ${{ matrix.bases }} runs-on: ubuntu-latest + # Testing against Charmhub will probably yield errors when doing breaking changes, so don't + # block CI on that. + continue-on-error: ${{ !matrix.local }} needs: - inclusive-naming-check - lint @@ -79,10 +83,21 @@ jobs: steps: - name: Checkout uses: actions/checkout@v4 + with: + path: main + - name: Fetch slurmctld + uses: actions/checkout@v4 + if: ${{ matrix.local }} + with: + repository: charmed-hpc/slurmctld-operator + path: slurmctld-operator - name: Setup operator environment uses: charmed-kubernetes/actions-operator@main with: provider: lxd juju-channel: 3.1/stable - name: Run tests - run: tox run -e integration -- --charm-base=${{ matrix.bases }} + run: | + cd main && tox run -e integration -- \ + --charm-base=${{ matrix.bases }} \ + ${{ matrix.local && '--use-local' || '' }} diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index dc0a8a2..f060d34 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -32,10 +32,10 @@ jobs: - name: Checkout uses: actions/checkout@v3 - name: Select charmhub channel - uses: canonical/charming-actions/channel@2.2.0 + uses: canonical/charming-actions/channel@2.5.0-rc id: channel - name: Upload charm to charmhub - uses: canonical/charming-actions/upload-charm@2.2.0 + uses: canonical/charming-actions/upload-charm@2.5.0-rc with: credentials: "${{ secrets.CHARMCRAFT_AUTH }}" github-token: "${{ secrets.GITHUB_TOKEN }}" diff --git a/lib/charms/hpc_libs/v0/slurm_ops.py b/lib/charms/hpc_libs/v0/slurm_ops.py index d49a38a..b48f0a3 100644 --- a/lib/charms/hpc_libs/v0/slurm_ops.py +++ b/lib/charms/hpc_libs/v0/slurm_ops.py @@ -62,11 +62,13 @@ def _on_install(self, _) -> None: "ConfigurationManager", "ServiceType", "SlurmManagerBase", + "SlurmOpsError", ] import json import logging import re +import socket import subprocess from collections.abc import Mapping from enum import Enum @@ -82,7 +84,7 @@ def _on_install(self, _) -> None: # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 3 +LIBPATCH = 5 # Charm library dependencies to fetch during `charmcraft pack`. PYDEPS = ["pyyaml>=6.0.1"] @@ -131,7 +133,8 @@ def install() -> None: def version() -> str: """Get the current version of Slurm installed on the system.""" info = yaml.safe_load(_snap("info", "slurm")) - ver: str = info["installed"] + if (ver := info.get("installed")) is None: + raise SlurmOpsError("unable to retrive snap info. Ensure slurm is correctly installed") return ver.split(maxsplit=1)[0] @@ -177,6 +180,7 @@ class ServiceType(Enum): """Type of Slurm service to manage.""" MUNGED = "munged" + PROMETHEUS_EXPORTER = "slurm-prometheus-exporter" SLURMD = "slurmd" SLURMCTLD = "slurmctld" SLURMDBD = "slurmdbd" @@ -208,6 +212,17 @@ def restart(self) -> None: """Restart service.""" _snap("restart", f"slurm.{self._service.value}") + def active(self) -> bool: + """Return True if the service is active.""" + info = yaml.safe_load(_snap("info", "slurm")) + if (services := info.get("services")) is None: + raise SlurmOpsError("unable to retrive snap info. Ensure slurm is correctly installed") + + # Assume `services` contains the service, since `ServiceManager` is not exposed as a + # public interface for now. + # We don't do `"active" in state` because the word "active" is also part of "inactive" :) + return "inactive" not in services[f"slurm.{self._service.value}"] + class ConfigurationManager: """Control configuration of a Slurm component.""" @@ -271,6 +286,13 @@ def generate_key(self) -> None: _mungectl("key", "generate") +class PrometheusExporterManager(ServiceManager): + """Manage `slurm-prometheus-exporter` service operations.""" + + def __init__(self) -> None: + self._service = ServiceType.PROMETHEUS_EXPORTER + + class SlurmManagerBase(ServiceManager): """Base manager for Slurm services.""" @@ -278,3 +300,9 @@ def __init__(self, service: ServiceType) -> None: self._service = service self.config = ConfigurationManager(service.config_name) self.munge = MungeManager() + self.exporter = PrometheusExporterManager() + + @property + def hostname(self) -> str: + """The hostname where this manager is running.""" + return socket.gethostname().split(".")[0] diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 148c6d2..bb64c01 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -73,23 +73,3 @@ async def slurmctld_charm(request, ops_test: OpsTest) -> Union[str, Path]: ) return "slurmctld" - - -@pytest.fixture(scope="module") -async def slurmdbd_charm(request, ops_test: OpsTest) -> Union[str, Path]: - """Pack slurmdbd charm to use for integration tests when --use-local is specified. - - Returns: - `str` "slurmdbd" if --use-local not specified or if SLURMDBD_DIR does not exist. - """ - if request.config.option.use_local: - logger.info("Using local slurmdbd operator rather than pulling from Charmhub") - if SLURMDBD_DIR.exists(): - return await ops_test.build_charm(SLURMDBD_DIR) - else: - logger.warning( - f"{SLURMDBD_DIR} not found. " - f"Defaulting to latest/edge slurmdbd operator from Charmhub" - ) - - return "slurmdbd" diff --git a/tests/integration/test_charm.py b/tests/integration/test_charm.py index bfa1eef..2204b70 100644 --- a/tests/integration/test_charm.py +++ b/tests/integration/test_charm.py @@ -35,14 +35,12 @@ @pytest.mark.skip_if_deployed @pytest.mark.order(1) async def test_build_and_deploy( - ops_test: OpsTest, charm_base: str, slurmd_charm, slurmctld_charm, slurmdbd_charm + ops_test: OpsTest, charm_base: str, slurmd_charm, slurmctld_charm ) -> None: """Test that the slurmd charm can stabilize against slurmctld, slurmdbd and MySQL.""" - logger.info(f"Deploying {SLURMD} against {SLURMCTLD}, {SLURMDBD}, and {DATABASE}") + logger.info(f"Deploying {SLURMD} against {SLURMCTLD}") # Pack charms and download NHC resource for slurmd operator. - slurmd, slurmctld, slurmdbd = await asyncio.gather( - slurmd_charm, slurmctld_charm, slurmdbd_charm - ) + slurmd, slurmctld = await asyncio.gather(slurmd_charm, slurmctld_charm) # Deploy the test Charmed SLURM cloud. await asyncio.gather( ops_test.model.deploy( @@ -58,33 +56,9 @@ async def test_build_and_deploy( num_units=1, base=charm_base, ), - ops_test.model.deploy( - str(slurmdbd), - application_name=SLURMDBD, - channel="edge" if isinstance(slurmdbd, str) else None, - num_units=1, - base=charm_base, - ), - ops_test.model.deploy( - ROUTER, - application_name=f"{SLURMDBD}-{ROUTER}", - channel="dpe/edge", - num_units=0, - base=charm_base, - ), - ops_test.model.deploy( - DATABASE, - application_name=DATABASE, - channel="8.0/edge", - num_units=1, - base="ubuntu@22.04", - ), ) # Set relations for charmed applications. await ops_test.model.integrate(f"{SLURMD}:{SLURMCTLD}", f"{SLURMCTLD}:{SLURMD}") - await ops_test.model.integrate(f"{SLURMDBD}:{SLURMCTLD}", f"{SLURMCTLD}:{SLURMDBD}") - await ops_test.model.integrate(f"{SLURMDBD}-{ROUTER}:backend-database", f"{DATABASE}:database") - await ops_test.model.integrate(f"{SLURMDBD}:database", f"{SLURMDBD}-{ROUTER}:database") # Reduce the update status frequency to accelerate the triggering of deferred events. async with ops_test.fast_forward(): await ops_test.model.wait_for_idle(apps=[SLURMD], status="active", timeout=1000) diff --git a/tests/unit/test_charm.py b/tests/unit/test_charm.py index 783c38a..48aeff4 100644 --- a/tests/unit/test_charm.py +++ b/tests/unit/test_charm.py @@ -56,6 +56,7 @@ def test_install_fail(self, _, defer) -> None: defer.assert_called() @patch("slurmd_ops.SlurmdManager.install") + @patch("slurmd_ops.SlurmdManager.version", return_value="23.11.7") @patch("pathlib.Path.read_text", return_value="v1.0.0") @patch("ops.model.Unit.set_workload_version") @patch("ops.model.Resources.fetch") diff --git a/tests/unit/utils/test_slurmd.py b/tests/unit/utils/test_slurmd.py index 548086c..9c70eb2 100644 --- a/tests/unit/utils/test_slurmd.py +++ b/tests/unit/utils/test_slurmd.py @@ -26,18 +26,6 @@ class TestSlurmd(unittest.TestCase): """Unit tests for methods in slurmd utility module.""" - @patch("charms.operator_libs_linux.v1.systemd._systemctl") - def test_start(self, _) -> None: - slurmd.start() - - @patch("charms.operator_libs_linux.v1.systemd._systemctl") - def test_stop(self, _) -> None: - slurmd.stop() - - @patch("charms.operator_libs_linux.v1.systemd._systemctl") - def test_restart(self, _) -> None: - slurmd.restart() - @patch("pathlib.Path.write_text") def test_overwrite_default(self, _) -> None: slurmd.override_default("127.0.0.1") diff --git a/tox.ini b/tox.ini index bc09f5c..fd54f39 100644 --- a/tox.ini +++ b/tox.ini @@ -78,5 +78,5 @@ commands = -s \ --tb native \ --log-cli-level=INFO \ - {[vars]tst_path}integration \ + {[vars]tst_path}/integration \ {posargs}