From ceed7ac315a3bfb12af609005402cfdb875ab49c Mon Sep 17 00:00:00 2001 From: jneo8 Date: Thu, 9 Jan 2025 15:02:31 +0800 Subject: [PATCH 1/4] feat: Add core watcher module - Add core watcher module - Refactor guests_on_hypervisor --- sunbeam-python/pyproject.toml | 4 + sunbeam-python/requirements.txt | 3 + sunbeam-python/sunbeam/core/openstack_api.py | 7 +- sunbeam-python/sunbeam/core/watcher.py | 168 ++++++++++++ sunbeam-python/test-requirements.txt | 1 + .../unit/sunbeam/core/test_openstack_api.py | 8 +- .../tests/unit/sunbeam/core/test_watcher.py | 258 ++++++++++++++++++ 7 files changed, 443 insertions(+), 6 deletions(-) create mode 100644 sunbeam-python/sunbeam/core/watcher.py create mode 100644 sunbeam-python/tests/unit/sunbeam/core/test_watcher.py diff --git a/sunbeam-python/pyproject.toml b/sunbeam-python/pyproject.toml index 46c4fe6d..c30b4cbb 100644 --- a/sunbeam-python/pyproject.toml +++ b/sunbeam-python/pyproject.toml @@ -98,3 +98,7 @@ min-file-size = 1 [tool.ruff.lint.mccabe] max-complexity = 15 + +[[tool.mypy.overrides]] +module = ["watcherclient.*", "timeout_decorator"] +follow_untyped_imports = true diff --git a/sunbeam-python/requirements.txt b/sunbeam-python/requirements.txt index 7f64c379..01e61437 100644 --- a/sunbeam-python/requirements.txt +++ b/sunbeam-python/requirements.txt @@ -45,3 +45,6 @@ python-libmaas # Faillible management tenacity + +# timeout +timeout-decorator diff --git a/sunbeam-python/sunbeam/core/openstack_api.py b/sunbeam-python/sunbeam/core/openstack_api.py index f42db3ca..c19a5ce9 100644 --- a/sunbeam-python/sunbeam/core/openstack_api.py +++ b/sunbeam-python/sunbeam/core/openstack_api.py @@ -43,7 +43,7 @@ def get_admin_connection(jhelper: JujuHelper) -> openstack.connection.Connection def guests_on_hypervisor( - hypervisor_name: str, jhelper: JujuHelper + hypervisor_name: str, conn: openstack.connection.Connection ) -> list[openstack.compute.v2.server.Server]: """Return a list of guests that run on the given hypervisor. @@ -51,8 +51,9 @@ def guests_on_hypervisor( :param jhelper: Juju helpers for retrieving admin credentials :raises: openstack.exceptions.SDKException """ - conn = get_admin_connection(jhelper) - return list(conn.compute.servers(all_projects=True, host=hypervisor_name)) + return list( + conn.compute.servers(all_projects=True, hypervisor_hostname=hypervisor_name) + ) def remove_compute_service( diff --git a/sunbeam-python/sunbeam/core/watcher.py b/sunbeam-python/sunbeam/core/watcher.py new file mode 100644 index 00000000..9acf4002 --- /dev/null +++ b/sunbeam-python/sunbeam/core/watcher.py @@ -0,0 +1,168 @@ +# Copyright (c) 2024 Canonical Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import time +from typing import Any + +import timeout_decorator +from watcherclient import v1 as watcher +from watcherclient.common.apiclient.exceptions import NotFound +from watcherclient.v1 import client as watcher_client + +from sunbeam.core.common import SunbeamException +from sunbeam.core.juju import JujuHelper +from sunbeam.core.openstack_api import get_admin_connection + +LOG = logging.getLogger(__name__) + +TIMEOUT = 60 * 3 +SLEEP_INTERVAL = 5 +ENABLE_MAINTENANCE_AUDIT_TEMPLATE_NAME = "Sunbeam Cluster Maintaining Template" +ENABLE_MAINTENANCE_STRATEGY_NAME = "host_maintenance" +ENABLE_MAINTENANCE_GOAL_NAME = "cluster_maintaining" + +WORKLOAD_BALANCING_GOAL_NAME = "workload_balancing" +WORKLOAD_BALANCING_STRATEGY_NAME = "workload_stabilization" +WORKLOAD_BALANCING_AUDIT_TEMPLATE_NAME = "Sunbeam Cluster Workload Balancing Template" + + +def get_watcher_client(jhelper: JujuHelper) -> watcher_client.Client: + conn = get_admin_connection(jhelper=jhelper) + watcher_endpoint = conn.session.get_endpoint( + service_type="infra-optim", + # TODO: get region + region_name="RegionOne", + ) + return watcher_client.Client(session=conn.session, endpoint=watcher_endpoint) + + +def _create_host_maintenance_audit_template( + client: watcher_client.Client, +) -> watcher.AuditTemplate: + template = client.audit_template.create( + name=ENABLE_MAINTENANCE_AUDIT_TEMPLATE_NAME, + description="Audit template for cluster maintaining", + goal=ENABLE_MAINTENANCE_GOAL_NAME, + strategy=ENABLE_MAINTENANCE_STRATEGY_NAME, + ) + return template + + +def _create_workload_balancing_audit_template( + client: watcher_client.Client, +) -> watcher.AuditTemplate: + template = client.audit_template.create( + name=WORKLOAD_BALANCING_AUDIT_TEMPLATE_NAME, + description="Audit template for workload balancing", + goal=WORKLOAD_BALANCING_GOAL_NAME, + strategy=WORKLOAD_BALANCING_STRATEGY_NAME, + ) + return template + + +def get_enable_maintenance_audit_template( + client: watcher_client.Client, +) -> watcher.AuditTemplate: + try: + template = client.audit_template.get(ENABLE_MAINTENANCE_AUDIT_TEMPLATE_NAME) + except NotFound: + template = _create_host_maintenance_audit_template(client=client) + return template + + +def get_workload_balancing_audit_template( + client: watcher_client.Client, +) -> watcher.AuditTemplate: + try: + template = client.audit_template.get(WORKLOAD_BALANCING_AUDIT_TEMPLATE_NAME) + except NotFound: + template = _create_workload_balancing_audit_template(client=client) + return template + + +@timeout_decorator.timeout(TIMEOUT) +def create_audit( + client: watcher_client.Client, + template: watcher.AuditTemplate, + audit_type: str = "ONESHOT", + parameters: dict[str, Any] = {}, +) -> watcher.Audit: + audit = client.audit.create( + audit_template_uuid=template.uuid, + audit_type=audit_type, + parameters=parameters, + ) + while True: + audit_details = client.audit.get(audit.uuid) + if audit_details.state in ["SUCCEEDED", "FAILED"]: + break + time.sleep(SLEEP_INTERVAL) + if audit_details.state == "SUCCEEDED": + LOG.debug(f"Create Watcher audit {audit.uuid} successfully") + else: + LOG.debug(f"Create Watcher audit {audit.uuid} failed") + raise SunbeamException( + f"Create watcher audit failed, template: {template.name}" + ) + + _check_audit_plans_recommended(client=client, audit=audit) + return audit + + +def _check_audit_plans_recommended(client: watcher_client.Client, audit: watcher.Audit): + action_plans = client.action_plan.list(audit=audit.uuid) + # Verify all the action_plan's state is RECOMMENDED + if not all(plan.state in ["RECOMMENDED", "SUCCEEDED"] for plan in action_plans): + raise SunbeamException( + f"Not all action plan for audit({audit.uuid}) is RECOMMENDED" + ) + + +def get_actions( + client: watcher_client.Client, audit: watcher.Audit +) -> list[watcher.Action]: + """Get list of actions by audit.""" + return client.action.list(audit=audit.uuid, detail=True) + + +def exec_audit(client: watcher_client.Client, audit: watcher.Audit): + """Run audit's action plans.""" + action_plans = client.action_plan.list(audit=audit.uuid) + for action_plan in action_plans: + _exec_plan(client=client, action_plan=action_plan) + LOG.info(f"All Action plan for Audit {audit.uuid} execution successfully") + + +@timeout_decorator.timeout(TIMEOUT) +def _exec_plan(client: watcher_client.Client, action_plan: watcher.ActionPlan): + """Run action plan.""" + if action_plan.state == "SUCCEEDED": + LOG.debug(f"action plan {action_plan.uuid} state is SUCCEEDED, skip execution") + return + client.action_plan.start(action_plan_id=action_plan.uuid) + + _action_plan: watcher.ActionPlan + while True: + _action_plan = client.action_plan.get(action_plan_id=action_plan.uuid) + if _action_plan.state in ["SUCCEEDED", "FAILED"]: + break + time.sleep(SLEEP_INTERVAL) + + if _action_plan.state == "SUCCEEDED": + LOG.debug(f"Action plan {action_plan.uuid} execution successfully") + else: + LOG.debug(f"Action plan {action_plan.uuid} execution failed") + raise SunbeamException(f"Action plan {action_plan.uuid} execution failed") diff --git a/sunbeam-python/test-requirements.txt b/sunbeam-python/test-requirements.txt index c85c4710..bcc5b0a6 100644 --- a/sunbeam-python/test-requirements.txt +++ b/sunbeam-python/test-requirements.txt @@ -29,3 +29,4 @@ croniter ruff mypy +python-watcherclient diff --git a/sunbeam-python/tests/unit/sunbeam/core/test_openstack_api.py b/sunbeam-python/tests/unit/sunbeam/core/test_openstack_api.py index 20dd2e37..4eb4d0e0 100644 --- a/sunbeam-python/tests/unit/sunbeam/core/test_openstack_api.py +++ b/sunbeam-python/tests/unit/sunbeam/core/test_openstack_api.py @@ -75,12 +75,14 @@ def test_get_admin_connection(self, retrieve_admin_credentials, os_connect): project_domain_name=FAKE_CREDS.get("OS_PROJECT_DOMAIN_NAME"), ) - def test_guests_on_hypervisor(self, get_admin_connection): + def test_guests_on_hypervisor(self): conn = Mock() get_admin_connection.return_value = conn conn.compute.servers.return_value = [1] - assert sunbeam.core.openstack_api.guests_on_hypervisor("hyper1", None) == [1] - conn.compute.servers.assert_called_once_with(all_projects=True, host="hyper1") + assert sunbeam.core.openstack_api.guests_on_hypervisor("hyper1", conn) == [1] + conn.compute.servers.assert_called_once_with( + all_projects=True, hypervisor_hostname="hyper1" + ) def test_remove_compute_service(self): service1 = Mock(binary="nova-compute", host="hyper1") diff --git a/sunbeam-python/tests/unit/sunbeam/core/test_watcher.py b/sunbeam-python/tests/unit/sunbeam/core/test_watcher.py new file mode 100644 index 00000000..d2996aa3 --- /dev/null +++ b/sunbeam-python/tests/unit/sunbeam/core/test_watcher.py @@ -0,0 +1,258 @@ +# Copyright (c) 2024 Canonical Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from unittest.mock import Mock, call, patch + +import pytest +from watcherclient.common.apiclient.exceptions import NotFound + +import sunbeam.core.watcher as watcher_helper +from sunbeam.core.common import SunbeamException +from sunbeam.core.juju import JujuHelper + + +@patch("sunbeam.core.watcher.get_admin_connection") +@patch("sunbeam.core.watcher.watcher_client.Client") +def test_get_watcher_client(mock_watcher_client, mock_get_admin_connection): + mock_conn = Mock() + mock_conn.session.get_endpoint.return_value = "fake_endpoint" + mock_get_admin_connection.return_value = mock_conn + mock_jhelper = Mock(spec=JujuHelper) + + client = watcher_helper.get_watcher_client(mock_jhelper) + + mock_get_admin_connection.assert_called_once_with(jhelper=mock_jhelper) + mock_conn.session.get_endpoint.assert_called_once_with( + service_type="infra-optim", + region_name="RegionOne", + ) + mock_watcher_client.assert_called_once_with( + session=mock_conn.session, + endpoint="fake_endpoint", + ) + assert client == mock_watcher_client.return_value + + +def test_create_host_maintenance_audit_template(): + mock_client = Mock() + result = watcher_helper._create_host_maintenance_audit_template(mock_client) + assert result == mock_client.audit_template.create.return_value + mock_client.audit_template.create.assert_called_once_with( + name="Sunbeam Cluster Maintaining Template", + description="Audit template for cluster maintaining", + goal="cluster_maintaining", + strategy="host_maintenance", + ) + + +def test_create_workload_balancing_audit_template(): + mock_client = Mock() + result = watcher_helper._create_workload_balancing_audit_template(mock_client) + assert result == mock_client.audit_template.create.return_value + mock_client.audit_template.create.assert_called_once_with( + name="Sunbeam Cluster Workload Balancing Template", + description="Audit template for workload balancing", + goal="workload_balancing", + strategy="workload_stabilization", + ) + + +def test_get_enable_maintenance_audit_template(): + mock_client = Mock() + + result = watcher_helper.get_enable_maintenance_audit_template(mock_client) + assert result == mock_client.audit_template.get.return_value + mock_client.audit_template.get.assert_called_once_with( + "Sunbeam Cluster Maintaining Template" + ) + + +@patch("sunbeam.core.watcher._create_host_maintenance_audit_template") +def test_get_enable_maintenance_audit_template_not_found(mock_create_template_func): + mock_client = Mock() + mock_client.audit_template.get.side_effect = NotFound + + result = watcher_helper.get_enable_maintenance_audit_template(mock_client) + assert result == mock_create_template_func.return_value + mock_create_template_func.assert_called_once_with(client=mock_client) + + +def test_get_workload_balancing_audit_template(): + mock_client = Mock() + + result = watcher_helper.get_workload_balancing_audit_template(mock_client) + assert result == mock_client.audit_template.get.return_value + mock_client.audit_template.get.assert_called_once_with( + "Sunbeam Cluster Workload Balancing Template" + ) + + +@patch("sunbeam.core.watcher._create_workload_balancing_audit_template") +def test_get_workload_balancing_audit_template_not_found(mock_create_template_func): + mock_client = Mock() + mock_client.audit_template.get.side_effect = NotFound + + result = watcher_helper.get_workload_balancing_audit_template(mock_client) + assert result == mock_create_template_func.return_value + mock_create_template_func.assert_called_once_with(client=mock_client) + + +@patch("sunbeam.core.watcher.time") +@patch("sunbeam.core.watcher._check_audit_plans_recommended") +def test_create_audit(mock_check_audit_plans_recommended, mock_time): + mock_client = Mock() + mock_template = Mock() + fake_audit_type = "fake_audit_type" + fake_parameters = {"fake_parameter_a": "a", "fake_parameter_b": "b"} + mock_audit = Mock() + + audit_details = [Mock(), Mock(), Mock()] + audit_details[-1].state = "SUCCEEDED" + + mock_client.audit.create.return_value = mock_audit + mock_client.audit.get.side_effect = audit_details + + result = watcher_helper.create_audit( + mock_client, mock_template, fake_audit_type, fake_parameters + ) + + assert result == mock_audit + + mock_client.audit.create.assert_called_once_with( + audit_template_uuid=mock_template.uuid, + audit_type=fake_audit_type, + parameters=fake_parameters, + ) + mock_check_audit_plans_recommended.assert_called_once_with( + client=mock_client, audit=mock_audit + ) + mock_time.sleep.assert_has_calls([call(5), call(5)]) + + +@patch("sunbeam.core.watcher._check_audit_plans_recommended") +def test_create_audit_failed(mock_check_audit_plan_recommended): + mock_client = Mock() + mock_template = Mock() + fake_audit_type = "fake_audit_type" + fake_parameters = {"fake_parameter_a": "a", "fake_parameter_b": "b"} + mock_audit = Mock() + mock_audit_detail = Mock() + mock_audit_detail.state = "FAILED" + + mock_client.audit.create.return_value = mock_audit + mock_client.audit.get.return_value = mock_audit_detail + + with pytest.raises(SunbeamException): + watcher_helper.create_audit( + mock_client, mock_template, fake_audit_type, fake_parameters + ) + + mock_client.audit.create.assert_called_once_with( + audit_template_uuid=mock_template.uuid, + audit_type=fake_audit_type, + parameters=fake_parameters, + ) + + +def test_check_audit_plans_recommended(): + mock_client = Mock() + mock_audit = Mock() + mock_action_plans = [Mock(), Mock()] + mock_action_plans[0].state = "RECOMMENDED" + mock_action_plans[1].state = "SUCCEEDED" + mock_client.action_plan.list.return_value = mock_action_plans + + watcher_helper._check_audit_plans_recommended(mock_client, mock_audit) + mock_client.action_plan.list.assert_called_once_with(audit=mock_audit.uuid) + + +def test_check_audit_plans_recommended_failed(): + mock_client = Mock() + mock_audit = Mock() + mock_action_plans = [Mock(), Mock()] + mock_action_plans[0].state = "RECOMMENDED" + mock_action_plans[1].state = "FAILED" + mock_client.action_plan.list.return_value = mock_action_plans + + with pytest.raises(SunbeamException): + watcher_helper._check_audit_plans_recommended(mock_client, mock_audit) + mock_client.action_plan.list.assert_called_once_with(audit=mock_audit.uuid) + + +def test_get_actions(): + mock_client = Mock() + mock_audit = Mock() + result = watcher_helper.get_actions(mock_client, mock_audit) + assert result == mock_client.action.list.return_value + mock_client.action.list.assert_called_once_with(audit=mock_audit.uuid, detail=True) + + +@patch("sunbeam.core.watcher._exec_plan") +def test_exec_audit(mock_exec_plan): + mock_client = Mock() + mock_audit = Mock() + mock_action_plans = [Mock(), Mock()] + mock_client.action_plan.list.return_value = mock_action_plans + + watcher_helper.exec_audit(mock_client, mock_audit) + mock_client.action_plan.list.assert_called_once_with(audit=mock_audit.uuid) + mock_exec_plan.assert_has_calls( + [ + call(client=mock_client, action_plan=mock_action_plans[0]), + call(client=mock_client, action_plan=mock_action_plans[1]), + ] + ) + + +def test_exec_plan_state_succeeded(): + mock_client = Mock() + mock_action_plan = Mock() + mock_action_plan.state = "SUCCEEDED" + + watcher_helper._exec_plan(mock_client, mock_action_plan) + mock_client.action_plan.start.assert_not_called() + + +@patch("sunbeam.core.watcher.time") +def test_exec_plan_state_pending(mock_time): + mock_client = Mock() + mock_action_plan = Mock() + mock_action_plan.state = "PENDING" + + action_plans = [Mock(), Mock(), Mock()] + action_plans[-1].state = "SUCCEEDED" + mock_client.action_plan.get.side_effect = action_plans + + watcher_helper._exec_plan(mock_client, mock_action_plan) + mock_client.action_plan.start.assert_called_once_with( + action_plan_id=mock_action_plan.uuid + ) + mock_time.sleep.assert_has_calls([call(5), call(5)]) + + +def test_exec_plan_state_pending_failed(): + mock_client = Mock() + mock_action_plan = Mock() + mock_action_plan.state = "PENDING" + + mock_client.action_plan.get.return_value = Mock() + mock_client.action_plan.get.return_value.state = "FAILED" + + with pytest.raises(SunbeamException): + watcher_helper._exec_plan(mock_client, mock_action_plan) + mock_client.action_plan.start.assert_called_once_with( + action_plan_id=mock_action_plan.uuid + ) + mock_client.action_plan.get.assert_called_once_with( + action_plan_id=mock_action_plan.uuid + ) From 2ed5ac408baa5fa2a44469a55a2cfc1a67e957f4 Mon Sep 17 00:00:00 2001 From: jneo8 Date: Fri, 10 Jan 2025 16:23:32 +0800 Subject: [PATCH 2/4] fix: Read region configuration to create watcher client - fix/Read watcher configuration from clusterdb to build watcher client - doc/Update guests_on_hypervisor func docstring - doc/Add comment for TIMEOUT and TIMEOUT_INTERVAL variables --- sunbeam-python/sunbeam/core/openstack_api.py | 2 +- sunbeam-python/sunbeam/core/watcher.py | 17 ++++++++---- .../tests/unit/sunbeam/core/test_watcher.py | 27 ++++++++++++++----- 3 files changed, 34 insertions(+), 12 deletions(-) diff --git a/sunbeam-python/sunbeam/core/openstack_api.py b/sunbeam-python/sunbeam/core/openstack_api.py index c19a5ce9..a4ccb251 100644 --- a/sunbeam-python/sunbeam/core/openstack_api.py +++ b/sunbeam-python/sunbeam/core/openstack_api.py @@ -48,7 +48,7 @@ def guests_on_hypervisor( """Return a list of guests that run on the given hypervisor. :param hypervisor_name: Name of hypervisor - :param jhelper: Juju helpers for retrieving admin credentials + :param conn: Admin connection :raises: openstack.exceptions.SDKException """ return list( diff --git a/sunbeam-python/sunbeam/core/watcher.py b/sunbeam-python/sunbeam/core/watcher.py index 9acf4002..06c31539 100644 --- a/sunbeam-python/sunbeam/core/watcher.py +++ b/sunbeam-python/sunbeam/core/watcher.py @@ -22,13 +22,17 @@ from watcherclient.common.apiclient.exceptions import NotFound from watcherclient.v1 import client as watcher_client -from sunbeam.core.common import SunbeamException +from sunbeam.core.common import SunbeamException, read_config +from sunbeam.core.deployment import Deployment from sunbeam.core.juju import JujuHelper from sunbeam.core.openstack_api import get_admin_connection +from sunbeam.steps.openstack import REGION_CONFIG_KEY LOG = logging.getLogger(__name__) +# Timeout while waiting for the watcher resource to reach the target state. TIMEOUT = 60 * 3 +# Sleep interval between querying watcher resources. SLEEP_INTERVAL = 5 ENABLE_MAINTENANCE_AUDIT_TEMPLATE_NAME = "Sunbeam Cluster Maintaining Template" ENABLE_MAINTENANCE_STRATEGY_NAME = "host_maintenance" @@ -39,12 +43,15 @@ WORKLOAD_BALANCING_AUDIT_TEMPLATE_NAME = "Sunbeam Cluster Workload Balancing Template" -def get_watcher_client(jhelper: JujuHelper) -> watcher_client.Client: - conn = get_admin_connection(jhelper=jhelper) +def get_watcher_client(deployment: Deployment) -> watcher_client.Client: + region = read_config(deployment.get_client(), REGION_CONFIG_KEY)["region"] + conn = get_admin_connection( + jhelper=JujuHelper(deployment.get_connected_controller()) + ) + watcher_endpoint = conn.session.get_endpoint( service_type="infra-optim", - # TODO: get region - region_name="RegionOne", + region_name=region, ) return watcher_client.Client(session=conn.session, endpoint=watcher_endpoint) diff --git a/sunbeam-python/tests/unit/sunbeam/core/test_watcher.py b/sunbeam-python/tests/unit/sunbeam/core/test_watcher.py index d2996aa3..1c4e2f65 100644 --- a/sunbeam-python/tests/unit/sunbeam/core/test_watcher.py +++ b/sunbeam-python/tests/unit/sunbeam/core/test_watcher.py @@ -18,23 +18,38 @@ import sunbeam.core.watcher as watcher_helper from sunbeam.core.common import SunbeamException -from sunbeam.core.juju import JujuHelper +from sunbeam.core.deployment import Deployment +@patch("sunbeam.core.watcher.read_config") +@patch("sunbeam.core.watcher.JujuHelper") @patch("sunbeam.core.watcher.get_admin_connection") @patch("sunbeam.core.watcher.watcher_client.Client") -def test_get_watcher_client(mock_watcher_client, mock_get_admin_connection): +def test_get_watcher_client( + mock_watcher_client, + mock_get_admin_connection, + mock_jhelper, + mock_read_config, +): mock_conn = Mock() mock_conn.session.get_endpoint.return_value = "fake_endpoint" + mock_read_config.return_value = {"region": "fake_region"} mock_get_admin_connection.return_value = mock_conn - mock_jhelper = Mock(spec=JujuHelper) + mock_deployment = Mock(spec=Deployment) - client = watcher_helper.get_watcher_client(mock_jhelper) + client = watcher_helper.get_watcher_client(mock_deployment) + + mock_read_config.assert_called_once_with( + mock_deployment.get_client.return_value, "Region" + ) + mock_jhelper.assert_called_once_with( + mock_deployment.get_connected_controller.return_value + ) + mock_get_admin_connection.assert_called_once_with(jhelper=mock_jhelper.return_value) - mock_get_admin_connection.assert_called_once_with(jhelper=mock_jhelper) mock_conn.session.get_endpoint.assert_called_once_with( service_type="infra-optim", - region_name="RegionOne", + region_name="fake_region", ) mock_watcher_client.assert_called_once_with( session=mock_conn.session, From 5dfecff94655ac5cfa4d46c96b39f70834ab5a5f Mon Sep 17 00:00:00 2001 From: jneo8 Date: Tue, 14 Jan 2025 17:21:13 +0800 Subject: [PATCH 3/4] refactor: Remove timeout decorator and raise exception on failed watcher action - Refactor: Replace timeout decorator by tenacity - Fix: Raise exception on failed watcher action --- sunbeam-python/requirements.txt | 3 - sunbeam-python/sunbeam/core/watcher.py | 78 ++++++++++++++----- .../tests/unit/sunbeam/core/test_watcher.py | 69 +++++++++------- 3 files changed, 97 insertions(+), 53 deletions(-) diff --git a/sunbeam-python/requirements.txt b/sunbeam-python/requirements.txt index 01e61437..7f64c379 100644 --- a/sunbeam-python/requirements.txt +++ b/sunbeam-python/requirements.txt @@ -45,6 +45,3 @@ python-libmaas # Faillible management tenacity - -# timeout -timeout-decorator diff --git a/sunbeam-python/sunbeam/core/watcher.py b/sunbeam-python/sunbeam/core/watcher.py index 06c31539..3225afa3 100644 --- a/sunbeam-python/sunbeam/core/watcher.py +++ b/sunbeam-python/sunbeam/core/watcher.py @@ -14,10 +14,9 @@ # limitations under the License. import logging -import time from typing import Any -import timeout_decorator +import tenacity from watcherclient import v1 as watcher from watcherclient.common.apiclient.exceptions import NotFound from watcherclient.v1 import client as watcher_client @@ -31,9 +30,9 @@ LOG = logging.getLogger(__name__) # Timeout while waiting for the watcher resource to reach the target state. -TIMEOUT = 60 * 3 +WAIT_TIMEOUT = 60 * 3 # Sleep interval between querying watcher resources. -SLEEP_INTERVAL = 5 +WAIT_SLEEP_INTERVAL = 5 ENABLE_MAINTENANCE_AUDIT_TEMPLATE_NAME = "Sunbeam Cluster Maintaining Template" ENABLE_MAINTENANCE_STRATEGY_NAME = "host_maintenance" ENABLE_MAINTENANCE_GOAL_NAME = "cluster_maintaining" @@ -100,7 +99,23 @@ def get_workload_balancing_audit_template( return template -@timeout_decorator.timeout(TIMEOUT) +@tenacity.retry( + reraise=True, + stop=tenacity.stop_after_delay(WAIT_TIMEOUT), + wait=tenacity.wait_fixed(WAIT_SLEEP_INTERVAL), +) +def _wait_resource_in_target_state( + client: watcher_client.Client, + resource_name: str, + resource_uuid: str, + states: list[str] = ["SUCCEEDED", "FAILED"], +) -> watcher.Audit: + src = getattr(client, resource_name).get(resource_uuid) + if src.state not in states: + raise SunbeamException(f"{resource_name} {resource_uuid} not in target state") + return src + + def create_audit( client: watcher_client.Client, template: watcher.AuditTemplate, @@ -112,11 +127,12 @@ def create_audit( audit_type=audit_type, parameters=parameters, ) - while True: - audit_details = client.audit.get(audit.uuid) - if audit_details.state in ["SUCCEEDED", "FAILED"]: - break - time.sleep(SLEEP_INTERVAL) + audit_details = _wait_resource_in_target_state( + client=client, + resource_name="audit", + resource_uuid=audit.uuid, + ) + if audit_details.state == "SUCCEEDED": LOG.debug(f"Create Watcher audit {audit.uuid} successfully") else: @@ -131,7 +147,9 @@ def create_audit( def _check_audit_plans_recommended(client: watcher_client.Client, audit: watcher.Audit): action_plans = client.action_plan.list(audit=audit.uuid) - # Verify all the action_plan's state is RECOMMENDED + # Verify all the action_plan's state is RECOMMENDED. + # In case there is not action been generated, the action plan state + # will be SUCCEEDED at the beginning. if not all(plan.state in ["RECOMMENDED", "SUCCEEDED"] for plan in action_plans): raise SunbeamException( f"Not all action plan for audit({audit.uuid}) is RECOMMENDED" @@ -153,7 +171,6 @@ def exec_audit(client: watcher_client.Client, audit: watcher.Audit): LOG.info(f"All Action plan for Audit {audit.uuid} execution successfully") -@timeout_decorator.timeout(TIMEOUT) def _exec_plan(client: watcher_client.Client, action_plan: watcher.ActionPlan): """Run action plan.""" if action_plan.state == "SUCCEEDED": @@ -161,15 +178,36 @@ def _exec_plan(client: watcher_client.Client, action_plan: watcher.ActionPlan): return client.action_plan.start(action_plan_id=action_plan.uuid) - _action_plan: watcher.ActionPlan - while True: - _action_plan = client.action_plan.get(action_plan_id=action_plan.uuid) - if _action_plan.state in ["SUCCEEDED", "FAILED"]: - break - time.sleep(SLEEP_INTERVAL) + action_plan_details = _wait_resource_in_target_state( + client=client, + resource_name="action_plan", + resource_uuid=action_plan.uuid, + ) - if _action_plan.state == "SUCCEEDED": + if action_plan_details.state == "SUCCEEDED": LOG.debug(f"Action plan {action_plan.uuid} execution successfully") else: LOG.debug(f"Action plan {action_plan.uuid} execution failed") - raise SunbeamException(f"Action plan {action_plan.uuid} execution failed") + + # Even if an action fails, the action plan can still be in the SUCCEEDED state. + # To handle this, we check if there are any failed actions at this point. + _raise_on_failed_action(client=client, action_plan=action_plan) + + +def _raise_on_failed_action( + client: watcher_client.Client, action_plan: watcher.ActionPlan +): + """Raise exception on failed action.""" + actions = client.action.list(action_plan=action_plan.uuid, detail=True) + info = {} + for action in actions: + if not action.state == "FAILED": + continue + info[action.uuid] = { + "action": action.action_type, + "updated-at": action.updated_at, + "description": action.description, + "input_parameters": action.input_parameters, + } + if len(info) > 0: + raise SunbeamException(f"Actions in FAILED state. {info}") diff --git a/sunbeam-python/tests/unit/sunbeam/core/test_watcher.py b/sunbeam-python/tests/unit/sunbeam/core/test_watcher.py index 1c4e2f65..ee8c5493 100644 --- a/sunbeam-python/tests/unit/sunbeam/core/test_watcher.py +++ b/sunbeam-python/tests/unit/sunbeam/core/test_watcher.py @@ -14,6 +14,7 @@ from unittest.mock import Mock, call, patch import pytest +import tenacity from watcherclient.common.apiclient.exceptions import NotFound import sunbeam.core.watcher as watcher_helper @@ -122,20 +123,22 @@ def test_get_workload_balancing_audit_template_not_found(mock_create_template_fu mock_create_template_func.assert_called_once_with(client=mock_client) -@patch("sunbeam.core.watcher.time") +@patch("sunbeam.core.watcher._wait_resource_in_target_state") @patch("sunbeam.core.watcher._check_audit_plans_recommended") -def test_create_audit(mock_check_audit_plans_recommended, mock_time): +def test_create_audit( + mock_check_audit_plans_recommended, mock_wait_resource_in_target_state +): mock_client = Mock() mock_template = Mock() fake_audit_type = "fake_audit_type" fake_parameters = {"fake_parameter_a": "a", "fake_parameter_b": "b"} mock_audit = Mock() - audit_details = [Mock(), Mock(), Mock()] - audit_details[-1].state = "SUCCEEDED" - mock_client.audit.create.return_value = mock_audit - mock_client.audit.get.side_effect = audit_details + + mock_audit_detail = Mock() + mock_audit_detail.state = "SUCCEEDED" + mock_wait_resource_in_target_state.return_value = mock_audit_detail result = watcher_helper.create_audit( mock_client, mock_template, fake_audit_type, fake_parameters @@ -148,10 +151,14 @@ def test_create_audit(mock_check_audit_plans_recommended, mock_time): audit_type=fake_audit_type, parameters=fake_parameters, ) + mock_wait_resource_in_target_state.assert_called_once_with( + client=mock_client, + resource_name="audit", + resource_uuid=mock_audit.uuid, + ) mock_check_audit_plans_recommended.assert_called_once_with( client=mock_client, audit=mock_audit ) - mock_time.sleep.assert_has_calls([call(5), call(5)]) @patch("sunbeam.core.watcher._check_audit_plans_recommended") @@ -238,36 +245,38 @@ def test_exec_plan_state_succeeded(): mock_client.action_plan.start.assert_not_called() -@patch("sunbeam.core.watcher.time") -def test_exec_plan_state_pending(mock_time): +@patch("sunbeam.core.watcher._wait_resource_in_target_state.retry.sleep") +def test_wait_resource_in_target_state_pending(mock_func_sleep): mock_client = Mock() - mock_action_plan = Mock() - mock_action_plan.state = "PENDING" - action_plans = [Mock(), Mock(), Mock()] - action_plans[-1].state = "SUCCEEDED" - mock_client.action_plan.get.side_effect = action_plans + fake_resource = [Mock() for i in range(5)] + fake_resource[-1].state = "SUCCEEDED" + mock_client.fake_resource.get.side_effect = fake_resource - watcher_helper._exec_plan(mock_client, mock_action_plan) - mock_client.action_plan.start.assert_called_once_with( - action_plan_id=mock_action_plan.uuid + watcher_helper._wait_resource_in_target_state( + mock_client, + "fake_resource", + "fake-uuid", + ) + mock_client.fake_resource.get.assert_has_calls( + [call("fake-uuid") for _ in range(len(fake_resource))] ) - mock_time.sleep.assert_has_calls([call(5), call(5)]) -def test_exec_plan_state_pending_failed(): +@patch( + "sunbeam.core.watcher._wait_resource_in_target_state.retry.stop", + return_value=tenacity.stop_after_attempt(10), +) +@patch("sunbeam.core.watcher._wait_resource_in_target_state.retry.sleep") +def test_wait_resource_in_target_state_failed(mock_retry_sleep, mock_retry_stop): mock_client = Mock() - mock_action_plan = Mock() - mock_action_plan.state = "PENDING" - mock_client.action_plan.get.return_value = Mock() - mock_client.action_plan.get.return_value.state = "FAILED" + fake_resource = [Mock() for i in range(5)] + mock_client.fake_resource.get.side_effect = fake_resource with pytest.raises(SunbeamException): - watcher_helper._exec_plan(mock_client, mock_action_plan) - mock_client.action_plan.start.assert_called_once_with( - action_plan_id=mock_action_plan.uuid - ) - mock_client.action_plan.get.assert_called_once_with( - action_plan_id=mock_action_plan.uuid - ) + watcher_helper._wait_resource_in_target_state( + mock_client, + "fake_resource", + "fake-uuid", + ) From d0e418f9e586cb0566a02eeea130954ce257fd98 Mon Sep 17 00:00:00 2001 From: jneo8 Date: Thu, 16 Jan 2025 15:02:10 +0800 Subject: [PATCH 4/4] docs: Update comment for watcher's wait timeout and wait interval --- sunbeam-python/sunbeam/core/watcher.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sunbeam-python/sunbeam/core/watcher.py b/sunbeam-python/sunbeam/core/watcher.py index 3225afa3..9fc617d1 100644 --- a/sunbeam-python/sunbeam/core/watcher.py +++ b/sunbeam-python/sunbeam/core/watcher.py @@ -29,9 +29,9 @@ LOG = logging.getLogger(__name__) -# Timeout while waiting for the watcher resource to reach the target state. +# Timeout of seconds while waiting for the watcher resource to reach the target state. WAIT_TIMEOUT = 60 * 3 -# Sleep interval between querying watcher resources. +# Sleep interval (in seconds) between querying watcher resources. WAIT_SLEEP_INTERVAL = 5 ENABLE_MAINTENANCE_AUDIT_TEMPLATE_NAME = "Sunbeam Cluster Maintaining Template" ENABLE_MAINTENANCE_STRATEGY_NAME = "host_maintenance"