diff --git a/sunbeam-python/sunbeam/jobs/checks.py b/sunbeam-python/sunbeam/jobs/checks.py index d9b61fe0..db1e1f8a 100644 --- a/sunbeam-python/sunbeam/jobs/checks.py +++ b/sunbeam-python/sunbeam/jobs/checks.py @@ -14,6 +14,7 @@ # limitations under the License. import base64 +import enum import json import logging import os @@ -58,11 +59,19 @@ def run(self) -> bool: return True +class DiagnosticResultType(enum.Enum): + """Enum for diagnostic result types.""" + + SUCCESS = "success" + WARNING = "warning" + FAILURE = "failure" + + class DiagnosticsResult: def __init__( self, name: str, - passed: bool, + passed: DiagnosticResultType, message: str | None = None, diagnostics: str | None = None, **details: dict, @@ -77,7 +86,7 @@ def to_dict(self) -> dict: """Return the result as a dictionary.""" result = { "name": self.name, - "passed": self.passed, + "passed": self.passed.value, **self.details, } if self.message: @@ -95,7 +104,7 @@ def fail( **details: dict, ): """Helper to create a failed result.""" - return cls(name, False, message, diagnostics, **details) + return cls(name, DiagnosticResultType.FAILURE, message, diagnostics, **details) @classmethod def success( @@ -106,7 +115,28 @@ def success( **details: dict, ): """Helper to create a successful result.""" - return cls(name, True, message, diagnostics, **details) + return cls(name, DiagnosticResultType.SUCCESS, message, diagnostics, **details) + + @classmethod + def warn( + cls, + name: str, + message: str | None = None, + diagnostics: str | None = None, + **details: dict, + ): + """Helper to create a warning result.""" + return cls(name, DiagnosticResultType.WARNING, message, diagnostics, **details) + + @staticmethod + def coalesce_type(results: list["DiagnosticsResult"]) -> DiagnosticResultType: + """Coalesce results into a single result.""" + types = [result.passed for result in results] + if DiagnosticResultType.FAILURE in types: + return DiagnosticResultType.FAILURE + if DiagnosticResultType.WARNING in types: + return DiagnosticResultType.WARNING + return DiagnosticResultType.SUCCESS class DiagnosticsCheck: diff --git a/sunbeam-python/sunbeam/jobs/common.py b/sunbeam-python/sunbeam/jobs/common.py index 2209b55f..3277473e 100644 --- a/sunbeam-python/sunbeam/jobs/common.py +++ b/sunbeam-python/sunbeam/jobs/common.py @@ -47,6 +47,7 @@ CLICK_OK = "[green]OK[/green]" CLICK_FAIL = "[red]FAIL[/red]" +CLICK_WARN = "[yellow]WARN[/yellow]" DEFAULT_JUJU_NO_PROXY_SETTINGS = "127.0.0.1,localhost,::1" K8S_CLUSTER_SERVICE_CIDR = "10.152.183.0/24" diff --git a/sunbeam-python/sunbeam/provider/maas/commands.py b/sunbeam-python/sunbeam/provider/maas/commands.py index fa762bf2..1fbf2341 100644 --- a/sunbeam-python/sunbeam/provider/maas/commands.py +++ b/sunbeam-python/sunbeam/provider/maas/commands.py @@ -83,6 +83,7 @@ ) from sunbeam.commands.terraform import TerraformInitStep from sunbeam.jobs.checks import ( + DiagnosticResultType, DiagnosticsCheck, DiagnosticsResult, JujuSnapCheck, @@ -93,6 +94,7 @@ from sunbeam.jobs.common import ( CLICK_FAIL, CLICK_OK, + CLICK_WARN, CONTEXT_SETTINGS, FORMAT_TABLE, FORMAT_YAML, @@ -1078,6 +1080,18 @@ def list_networks_cmd(ctx: click.Context, format: str): console.print(yaml.dump(mapping), end="") +def _colorize_result(result: DiagnosticsResult) -> str: + """Return a colorize string of the result status.""" + match result.passed: + case DiagnosticResultType.SUCCESS: + status = CLICK_OK + case DiagnosticResultType.FAILURE: + status = CLICK_FAIL + case DiagnosticResultType.WARNING: + status = CLICK_WARN + return status + + def _run_maas_checks(checks: list[DiagnosticsCheck], console: Console) -> list[dict]: """Run checks sequentially. @@ -1097,12 +1111,13 @@ def _run_maas_checks(checks: list[DiagnosticsCheck], console: Console) -> list[d results = [results] for result in results: - LOG.debug(f"{result.name=!r}, {result.passed=!r}, {result.message=!r}") + passed = result.passed.value + LOG.debug(f"{result.name=!r}, {passed=!r}, {result.message=!r}") console.print( message, result.message, "-", - CLICK_OK if result.passed else CLICK_FAIL, + _colorize_result(result), ) check_results.append(result.to_dict()) return check_results @@ -1129,7 +1144,7 @@ def _run_maas_meta_checks( results = [results] for result in results: check_results.append(result.to_dict()) - console.print(message, CLICK_OK if results[-1].passed else CLICK_FAIL) + console.print(message, _colorize_result(results[-1])) return check_results diff --git a/sunbeam-python/sunbeam/provider/maas/steps.py b/sunbeam-python/sunbeam/provider/maas/steps.py index 571d4d32..3bd89b42 100644 --- a/sunbeam-python/sunbeam/provider/maas/steps.py +++ b/sunbeam-python/sunbeam/provider/maas/steps.py @@ -226,17 +226,15 @@ def run(self) -> DiagnosticsResult: assigned_roles = self.machine["roles"] LOG.debug(f"{self.machine['hostname']=!r} assigned roles: {assigned_roles!r}") if not assigned_roles: - return DiagnosticsResult( + return DiagnosticsResult.fail( self.name, - False, "machine has no role assigned.", diagnostics=ROLES_NEEDED_ERROR, machine=self.machine["hostname"], ) - return DiagnosticsResult( + return DiagnosticsResult.success( self.name, - True, ", ".join(self.machine["roles"]), machine=self.machine["hostname"], ) @@ -452,7 +450,7 @@ def run(self) -> DiagnosticsResult: ) if "ssd" not in root_disk["tags"]: - return DiagnosticsResult.fail( + return DiagnosticsResult.warn( self.name, "root disk is not a SSD", "A machine root disk needs to be an SSD to be" @@ -462,7 +460,7 @@ def run(self) -> DiagnosticsResult: ) if root_disk["root_partition"]["size"] < 500 * 1024**3: - return DiagnosticsResult.fail( + return DiagnosticsResult.warn( self.name, "root disk is too small", "A machine root disk needs to be at least 500GB" @@ -496,7 +494,7 @@ def run(self) -> DiagnosticsResult: memory_min = RAM_32_GB_IN_MB core_min = 16 if self.machine["memory"] < memory_min or self.machine["cores"] < core_min: - return DiagnosticsResult.fail( + return DiagnosticsResult.warn( self.name, "machine does not meet requirements", textwrap.dedent( @@ -528,7 +526,8 @@ def _run_check_list(checks: list[DiagnosticsCheck]) -> list[DiagnosticsResult]: if isinstance(results, DiagnosticsResult): results = [results] for result in results: - LOG.debug(f"{result.name=!r}, {result.passed=!r}, {result.message=!r}") + passed = result.passed.value + LOG.debug(f"{result.name=!r}, {passed=!r}, {result.message=!r}") check_results.extend(results) return check_results @@ -567,7 +566,7 @@ def run(self) -> list[DiagnosticsResult]: checks.append(MachineRequirementsCheck(machine)) results = _run_check_list(checks) results.append( - DiagnosticsResult(self.name, all(result.passed for result in results)) + DiagnosticsResult(self.name, DiagnosticsResult.coalesce_type(results)) ) return results @@ -593,21 +592,32 @@ def run(self) -> DiagnosticsResult: for machine in self.machines: if self.role_tag in machine["roles"]: machines += 1 - if machines < self.min_count: + failure_diagnostics = textwrap.dedent( + """\ + A deployment needs to have at least {min_count} {role_name} to be + a part of an openstack deployment. You need to add more {role_name} + to the deployment using {role_tag} tag. + More on using tags: https://maas.io/docs/how-to-use-machine-tags + """ + ) + if machines == 0: return DiagnosticsResult.fail( + self.name, + "no machine with role: " + self.role_name, + failure_diagnostics.format( + min_count=self.min_count, + role_name=self.role_name, + role_tag=self.role_tag, + ), + ) + if machines < self.min_count: + return DiagnosticsResult.warn( self.name, "less than 3 " + self.role_name, - textwrap.dedent( - """\ - A deployment needs to have at least {min_count} {role_name} to be - a part of an openstack deployment. You need to add more {role_name} - to the deployment using {role_tag} tag. - More on using tags: https://maas.io/docs/how-to-use-machine-tags - """.format( - min_count=self.min_count, - role_name=self.role_name, - role_tag=self.role_tag, - ) + failure_diagnostics.format( + min_count=self.min_count, + role_name=self.role_name, + role_tag=self.role_tag, ), ) return DiagnosticsResult.success( @@ -628,15 +638,20 @@ def __init__(self, zones: list[str]): def run(self) -> DiagnosticsResult: """Checks deployment zones.""" - if len(self.zones) in (0, 2): + nb_zones = len(self.zones) + diagnostics = textwrap.dedent( + f"""\ + A deployment needs to have either 1 zone or more than 2 zones. + Current zones: {', '.join(self.zones)} + """ + ) + if nb_zones == 0: return DiagnosticsResult.fail( - self.name, - "deployment has 0 or 2 zones", - textwrap.dedent( - f"""\ - A deployment needs to have either 1 zone or more than 2 zones. - Current zones: {', '.join(self.zones)}""" - ), + self.name, "deployment has no zone", diagnostics + ) + if len(self.zones) == 2: + return DiagnosticsResult.warn( + self.name, "deployment has 2 zones", diagnostics ) return DiagnosticsResult.success( self.name, @@ -688,7 +703,7 @@ def run(self) -> DiagnosticsResult: """ ) diagnostics += distribution - return DiagnosticsResult.fail( + return DiagnosticsResult.warn( self.name, f"{', '.join(unbalanced_roles)} distribution is unbalanced", diagnostics, @@ -852,7 +867,7 @@ def run(self) -> list[DiagnosticsResult]: results = _run_check_list(checks) results.append( - DiagnosticsResult(self.name, all(result.passed for result in results)) + DiagnosticsResult(self.name, DiagnosticsResult.coalesce_type(results)) ) return results @@ -879,7 +894,7 @@ def run(self) -> list[DiagnosticsResult]: results = _run_check_list(checks) results.append( - DiagnosticsResult(self.name, all(result.passed for result in results)) + DiagnosticsResult(self.name, DiagnosticsResult.coalesce_type(results)) ) return results diff --git a/sunbeam-python/tests/unit/sunbeam/provider/maas/test_maas.py b/sunbeam-python/tests/unit/sunbeam/provider/maas/test_maas.py index 1c1cb0b5..252266af 100644 --- a/sunbeam-python/tests/unit/sunbeam/provider/maas/test_maas.py +++ b/sunbeam-python/tests/unit/sunbeam/provider/maas/test_maas.py @@ -20,6 +20,7 @@ from maas.client.bones import CallError import sunbeam.provider.maas.steps as maas_steps +from sunbeam.jobs.checks import DiagnosticResultType from sunbeam.jobs.deployment import Networks from sunbeam.jobs.deployments import DeploymentsConfig from sunbeam.jobs.juju import ControllerNotFoundException @@ -134,14 +135,14 @@ def test_run_with_no_assigned_roles(self): machine = {"hostname": "test_machine", "roles": []} check = MachineRolesCheck(machine) result = check.run() - assert result.passed is False + assert result.passed == DiagnosticResultType.FAILURE assert result.details["machine"] == "test_machine" def test_run_with_assigned_roles(self): machine = {"hostname": "test_machine", "roles": ["role1", "role2"]} check = MachineRolesCheck(machine) result = check.run() - assert result.passed is True + assert result.passed is DiagnosticResultType.SUCCESS assert result.details["machine"] == "test_machine" @@ -158,7 +159,7 @@ def test_run_with_incomplete_network_mapping(self, mocker): } check = MachineNetworkCheck(snap, machine) result = check.run() - assert result.passed is False + assert result.passed is DiagnosticResultType.FAILURE assert result.details["machine"] == "test_machine" assert result.message and "network mapping" in result.message @@ -171,7 +172,7 @@ def test_run_with_no_assigned_roles(self, mocker): machine = {"hostname": "test_machine", "roles": [], "spaces": []} check = MachineNetworkCheck(snap, machine) result = check.run() - assert result.passed is False + assert result.passed is DiagnosticResultType.FAILURE assert result.details["machine"] == "test_machine" assert result.message and "no role assigned" in result.message @@ -191,7 +192,7 @@ def test_run_with_missing_spaces(self, mocker): } check = MachineNetworkCheck(snap, machine) result = check.run() - assert result.passed is False + assert result.passed is DiagnosticResultType.FAILURE assert result.details["machine"] == "test_machine" assert result.message and "missing beta" in result.message @@ -208,7 +209,7 @@ def test_run_with_successful_check(self, mocker): } check = MachineNetworkCheck(snap, machine) result = check.run() - assert result.passed is True + assert result.passed is DiagnosticResultType.SUCCESS assert result.details["machine"] == "test_machine" @@ -217,7 +218,7 @@ def test_run_with_no_assigned_roles(self): machine = {"hostname": "test_machine", "roles": [], "storage": {}} check = MachineStorageCheck(machine) result = check.run() - assert result.passed is False + assert result.passed is DiagnosticResultType.FAILURE assert result.details["machine"] == "test_machine" assert result.message and "machine has no role assigned" in result.message @@ -229,7 +230,7 @@ def test_run_with_not_storage_node(self): } check = MachineStorageCheck(machine) result = check.run() - assert result.passed is True + assert result.passed is DiagnosticResultType.SUCCESS assert result.details["machine"] == "test_machine" assert result.message == "not a storage node." @@ -241,7 +242,7 @@ def test_run_with_no_ceph_storage(self): } check = MachineStorageCheck(machine) result = check.run() - assert result.passed is False + assert result.passed is DiagnosticResultType.FAILURE assert result.details["machine"] == "test_machine" assert result.message and "storage node has no ceph storage" in result.message assert result.diagnostics @@ -255,7 +256,7 @@ def test_run_with_ceph_storage(self): } check = MachineStorageCheck(machine) result = check.run() - assert result.passed is True + assert result.passed is DiagnosticResultType.SUCCESS assert result.details["machine"] == "test_machine" assert result.message and StorageTags.CEPH.value in result.message @@ -265,7 +266,7 @@ def test_run_with_no_assigned_roles(self): machine = {"hostname": "test_machine", "roles": [], "nics": []} check = MachineComputeNicCheck(machine) result = check.run() - assert result.passed is False + assert result.passed is DiagnosticResultType.FAILURE assert result.details["machine"] == "test_machine" assert result.message and "machine has no role assigned" in result.message @@ -277,7 +278,7 @@ def test_run_with_not_compute_node(self): } check = MachineComputeNicCheck(machine) result = check.run() - assert result.passed is True + assert result.passed is DiagnosticResultType.SUCCESS assert result.details["machine"] == "test_machine" assert result.message == "not a compute node." @@ -289,7 +290,7 @@ def test_run_with_no_compute_nic(self): } check = MachineComputeNicCheck(machine) result = check.run() - assert result.passed is False + assert result.passed is DiagnosticResultType.FAILURE assert result.details["machine"] == "test_machine" assert result.message and "no compute nic found" in result.message assert result.diagnostics @@ -303,7 +304,7 @@ def test_run_with_compute_nic(self): } check = MachineComputeNicCheck(machine) result = check.run() - assert result.passed is True + assert result.passed is DiagnosticResultType.SUCCESS assert result.details["machine"] == "test_machine" assert result.message and NicTags.COMPUTE.value in result.message @@ -313,7 +314,7 @@ def test_run_with_no_root_disk(self): machine = {"hostname": "test_machine"} check = MachineRootDiskCheck(machine) result = check.run() - assert result.passed is False + assert result.passed is DiagnosticResultType.FAILURE assert result.details["machine"] == "test_machine" assert result.message and "could not determine" in result.message @@ -321,7 +322,7 @@ def test_run_with_no_ssd_tag(self): machine = {"hostname": "test_machine", "root_disk": {"tags": []}} check = MachineRootDiskCheck(machine) result = check.run() - assert result.passed is False + assert result.passed is DiagnosticResultType.WARNING assert result.details["machine"] == "test_machine" assert result.message and "is not a SSD" in result.message @@ -332,7 +333,7 @@ def test_run_with_not_enough_space(self): } check = MachineRootDiskCheck(machine) result = check.run() - assert result.passed is False + assert result.passed is DiagnosticResultType.WARNING assert result.details["machine"] == "test_machine" assert result.message and "is too small" in result.message @@ -343,7 +344,7 @@ def test_run_with_valid_root_disk(self): } check = MachineRootDiskCheck(machine) result = check.run() - assert result.passed is True + assert result.passed is DiagnosticResultType.SUCCESS assert result.details["machine"] == "test_machine" assert result.message and "is a SSD and is large enough" in result.message @@ -358,7 +359,7 @@ def test_run_with_insufficient_memory(self): } check = MachineRequirementsCheck(machine) result = check.run() - assert result.passed is False + assert result.passed is DiagnosticResultType.WARNING assert result.details["machine"] == "test_machine" assert result.message and "machine does not meet requirements" in result.message @@ -371,7 +372,7 @@ def test_run_with_insufficient_cores(self): } check = MachineRequirementsCheck(machine) result = check.run() - assert result.passed is False + assert result.passed is DiagnosticResultType.WARNING assert result.details["machine"] == "test_machine" assert result.message and "machine does not meet requirements" in result.message @@ -384,7 +385,7 @@ def test_run_with_sufficient_resources(self): } check = MachineRequirementsCheck(machine) result = check.run() - assert result.passed is True + assert result.passed is DiagnosticResultType.SUCCESS assert result.details["machine"] == "test_machine" @@ -397,7 +398,7 @@ def test_run_with_insufficient_roles(self): ] check = DeploymentRolesCheck(machines, "Role", "role1", min_count=3) result = check.run() - assert result.passed is False + assert result.passed is DiagnosticResultType.WARNING assert result.message and "less than 3 Role" in result.message def test_run_with_sufficient_roles(self): @@ -408,7 +409,7 @@ def test_run_with_sufficient_roles(self): ] check = DeploymentRolesCheck(machines, "Role", "role1", min_count=3) result = check.run() - assert result.passed is True + assert result.passed is DiagnosticResultType.SUCCESS assert result.message == "Role: 3" @@ -417,21 +418,21 @@ def test_run_with_one_zone(self): zones = ["zone1"] check = ZonesCheck(zones) result = check.run() - assert result.passed is True + assert result.passed is DiagnosticResultType.SUCCESS assert result.message == "1 zone(s)" def test_run_with_two_zones(self): zones = ["zone1", "zone2"] check = ZonesCheck(zones) result = check.run() - assert result.passed is False - assert result.message == "deployment has 0 or 2 zones" + assert result.passed is DiagnosticResultType.WARNING + assert result.message == "deployment has 2 zones" def test_run_with_three_zones(self): zones = ["zone1", "zone2", "zone3"] check = ZonesCheck(zones) result = check.run() - assert result.passed is True + assert result.passed is DiagnosticResultType.SUCCESS assert result.message == "3 zone(s)" @@ -449,7 +450,7 @@ def test_run_with_balanced_roles(self): } check = ZoneBalanceCheck(machines) result = check.run() - assert result.passed is True + assert result.passed is DiagnosticResultType.SUCCESS assert result.message == "deployment is balanced" def test_run_with_unbalanced_roles(self): @@ -465,7 +466,7 @@ def test_run_with_unbalanced_roles(self): } check = ZoneBalanceCheck(machines) result = check.run() - assert result.passed is False + assert result.passed is DiagnosticResultType.WARNING assert result.message and "compute distribution is unbalanced" in result.message @@ -476,7 +477,7 @@ def test_run_with_missing_network_mapping(self, mocker): deployment.network_mapping = {} check = IpRangesCheck(client, deployment) result = check.run() - assert result.passed is False + assert result.passed is DiagnosticResultType.FAILURE assert result.diagnostics and "network mapping" in result.diagnostics def test_run_with_missing_public_ip_ranges(self, mocker): @@ -496,7 +497,7 @@ def test_run_with_missing_public_ip_ranges(self, mocker): ) check = IpRangesCheck(client, deployment) result = check.run() - assert result.passed is False + assert result.passed is DiagnosticResultType.FAILURE assert result.diagnostics and deployment.public_api_label in result.diagnostics get_ip_ranges_from_space_mock.assert_any_call(client, "public_space") @@ -530,7 +531,7 @@ def test_run_with_missing_internal_ip_ranges(self, mocker): ) check = IpRangesCheck(client, deployment) result = check.run() - assert result.passed is False + assert result.passed is DiagnosticResultType.FAILURE assert ( result.diagnostics and deployment.internal_api_label in result.diagnostics ) @@ -576,7 +577,7 @@ def test_run_with_successful_check(self, mocker): ) check = IpRangesCheck(client, deployment) result = check.run() - assert result.passed is True + assert result.passed is DiagnosticResultType.SUCCESS get_ip_ranges_from_space_mock.assert_any_call(client, "public_space") get_ip_ranges_from_space_mock.assert_any_call(client, "internal_space")