Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add core watcher module #397

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions sunbeam-python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,7 @@ min-file-size = 1

[tool.ruff.lint.mccabe]
max-complexity = 15

[[tool.mypy.overrides]]
module = ["watcherclient.*", "timeout_decorator"]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

timeout_decorator is not need here

follow_untyped_imports = true
3 changes: 3 additions & 0 deletions sunbeam-python/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,6 @@ python-libmaas

# Faillible management
tenacity

# timeout
timeout-decorator
jneo8 marked this conversation as resolved.
Show resolved Hide resolved
9 changes: 5 additions & 4 deletions sunbeam-python/sunbeam/core/openstack_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,16 +43,17 @@ def get_admin_connection(jhelper: JujuHelper) -> openstack.connection.Connection


def guests_on_hypervisor(
hypervisor_name: str, jhelper: JujuHelper
hypervisor_name: str, conn: openstack.connection.Connection
jneo8 marked this conversation as resolved.
Show resolved Hide resolved
) -> list[openstack.compute.v2.server.Server]:
"""Return a list of guests that run on the given hypervisor.

:param hypervisor_name: Name of hypervisor
:param jhelper: Juju helpers for retrieving admin credentials
:param conn: Admin connection
:raises: openstack.exceptions.SDKException
"""
conn = get_admin_connection(jhelper)
return list(conn.compute.servers(all_projects=True, host=hypervisor_name))
return list(
conn.compute.servers(all_projects=True, hypervisor_hostname=hypervisor_name)
)


def remove_compute_service(
Expand Down
175 changes: 175 additions & 0 deletions sunbeam-python/sunbeam/core/watcher.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
# Copyright (c) 2024 Canonical Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import logging
import time
from typing import Any

import timeout_decorator
from watcherclient import v1 as watcher
from watcherclient.common.apiclient.exceptions import NotFound
from watcherclient.v1 import client as watcher_client

from sunbeam.core.common import SunbeamException, read_config
from sunbeam.core.deployment import Deployment
from sunbeam.core.juju import JujuHelper
from sunbeam.core.openstack_api import get_admin_connection
from sunbeam.steps.openstack import REGION_CONFIG_KEY

LOG = logging.getLogger(__name__)

# Timeout while waiting for the watcher resource to reach the target state.
TIMEOUT = 60 * 3
# Sleep interval between querying watcher resources.
SLEEP_INTERVAL = 5
jneo8 marked this conversation as resolved.
Show resolved Hide resolved
ENABLE_MAINTENANCE_AUDIT_TEMPLATE_NAME = "Sunbeam Cluster Maintaining Template"
ENABLE_MAINTENANCE_STRATEGY_NAME = "host_maintenance"
ENABLE_MAINTENANCE_GOAL_NAME = "cluster_maintaining"

WORKLOAD_BALANCING_GOAL_NAME = "workload_balancing"
WORKLOAD_BALANCING_STRATEGY_NAME = "workload_stabilization"
WORKLOAD_BALANCING_AUDIT_TEMPLATE_NAME = "Sunbeam Cluster Workload Balancing Template"


def get_watcher_client(deployment: Deployment) -> watcher_client.Client:
region = read_config(deployment.get_client(), REGION_CONFIG_KEY)["region"]
conn = get_admin_connection(
jhelper=JujuHelper(deployment.get_connected_controller())
)

watcher_endpoint = conn.session.get_endpoint(
service_type="infra-optim",
region_name=region,
)
return watcher_client.Client(session=conn.session, endpoint=watcher_endpoint)


def _create_host_maintenance_audit_template(
client: watcher_client.Client,
) -> watcher.AuditTemplate:
template = client.audit_template.create(
name=ENABLE_MAINTENANCE_AUDIT_TEMPLATE_NAME,
description="Audit template for cluster maintaining",
goal=ENABLE_MAINTENANCE_GOAL_NAME,
strategy=ENABLE_MAINTENANCE_STRATEGY_NAME,
)
return template


def _create_workload_balancing_audit_template(
client: watcher_client.Client,
) -> watcher.AuditTemplate:
template = client.audit_template.create(
name=WORKLOAD_BALANCING_AUDIT_TEMPLATE_NAME,
description="Audit template for workload balancing",
goal=WORKLOAD_BALANCING_GOAL_NAME,
strategy=WORKLOAD_BALANCING_STRATEGY_NAME,
)
return template


def get_enable_maintenance_audit_template(
client: watcher_client.Client,
) -> watcher.AuditTemplate:
try:
template = client.audit_template.get(ENABLE_MAINTENANCE_AUDIT_TEMPLATE_NAME)
except NotFound:
template = _create_host_maintenance_audit_template(client=client)
return template


def get_workload_balancing_audit_template(
client: watcher_client.Client,
) -> watcher.AuditTemplate:
try:
template = client.audit_template.get(WORKLOAD_BALANCING_AUDIT_TEMPLATE_NAME)
except NotFound:
template = _create_workload_balancing_audit_template(client=client)
return template


@timeout_decorator.timeout(TIMEOUT)
def create_audit(
client: watcher_client.Client,
template: watcher.AuditTemplate,
audit_type: str = "ONESHOT",
parameters: dict[str, Any] = {},
) -> watcher.Audit:
audit = client.audit.create(
audit_template_uuid=template.uuid,
audit_type=audit_type,
parameters=parameters,
)
while True:
audit_details = client.audit.get(audit.uuid)
if audit_details.state in ["SUCCEEDED", "FAILED"]:
break
time.sleep(SLEEP_INTERVAL)
if audit_details.state == "SUCCEEDED":
jneo8 marked this conversation as resolved.
Show resolved Hide resolved
LOG.debug(f"Create Watcher audit {audit.uuid} successfully")
else:
LOG.debug(f"Create Watcher audit {audit.uuid} failed")
raise SunbeamException(
f"Create watcher audit failed, template: {template.name}"
)

_check_audit_plans_recommended(client=client, audit=audit)
return audit


def _check_audit_plans_recommended(client: watcher_client.Client, audit: watcher.Audit):
action_plans = client.action_plan.list(audit=audit.uuid)
# Verify all the action_plan's state is RECOMMENDED
if not all(plan.state in ["RECOMMENDED", "SUCCEEDED"] for plan in action_plans):
raise SunbeamException(
f"Not all action plan for audit({audit.uuid}) is RECOMMENDED"
)
jneo8 marked this conversation as resolved.
Show resolved Hide resolved


def get_actions(
client: watcher_client.Client, audit: watcher.Audit
) -> list[watcher.Action]:
"""Get list of actions by audit."""
return client.action.list(audit=audit.uuid, detail=True)
Copy link
Collaborator

@hemanthnakkina hemanthnakkina Jan 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.



def exec_audit(client: watcher_client.Client, audit: watcher.Audit):
"""Run audit's action plans."""
action_plans = client.action_plan.list(audit=audit.uuid)
for action_plan in action_plans:
_exec_plan(client=client, action_plan=action_plan)
LOG.info(f"All Action plan for Audit {audit.uuid} execution successfully")


@timeout_decorator.timeout(TIMEOUT)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Each actionplan corresponds to multiple actions and each action can be migration. If there are more VMs to be migrated, then the actionplan may take more than TIMEOUT i.e., 3 minutes.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The expected behavior is as follows:

  • If the migration times out, the command should fail.
  • When the user re-runs the command, it should fail if any VMs are still migrating, displaying a message indicating that some instances are in the migration process.
  • Once all migration watcher actions are complete, re-running the command should result in success.

def _exec_plan(client: watcher_client.Client, action_plan: watcher.ActionPlan):
"""Run action plan."""
if action_plan.state == "SUCCEEDED":
LOG.debug(f"action plan {action_plan.uuid} state is SUCCEEDED, skip execution")
return
client.action_plan.start(action_plan_id=action_plan.uuid)

_action_plan: watcher.ActionPlan
while True:
_action_plan = client.action_plan.get(action_plan_id=action_plan.uuid)
if _action_plan.state in ["SUCCEEDED", "FAILED"]:
break
time.sleep(SLEEP_INTERVAL)

if _action_plan.state == "SUCCEEDED":
LOG.debug(f"Action plan {action_plan.uuid} execution successfully")
else:
LOG.debug(f"Action plan {action_plan.uuid} execution failed")
raise SunbeamException(f"Action plan {action_plan.uuid} execution failed")
1 change: 1 addition & 0 deletions sunbeam-python/test-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,4 @@ croniter

ruff
mypy
python-watcherclient
8 changes: 5 additions & 3 deletions sunbeam-python/tests/unit/sunbeam/core/test_openstack_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,12 +75,14 @@ def test_get_admin_connection(self, retrieve_admin_credentials, os_connect):
project_domain_name=FAKE_CREDS.get("OS_PROJECT_DOMAIN_NAME"),
)

def test_guests_on_hypervisor(self, get_admin_connection):
def test_guests_on_hypervisor(self):
conn = Mock()
get_admin_connection.return_value = conn
conn.compute.servers.return_value = [1]
assert sunbeam.core.openstack_api.guests_on_hypervisor("hyper1", None) == [1]
conn.compute.servers.assert_called_once_with(all_projects=True, host="hyper1")
assert sunbeam.core.openstack_api.guests_on_hypervisor("hyper1", conn) == [1]
conn.compute.servers.assert_called_once_with(
all_projects=True, hypervisor_hostname="hyper1"
)

def test_remove_compute_service(self):
service1 = Mock(binary="nova-compute", host="hyper1")
Expand Down
Loading
Loading