From 8ab18168ef9410654e7070824d6c9ebbe7f2c0a5 Mon Sep 17 00:00:00 2001 From: Michael Dmitry <33381599+michaeldmitry@users.noreply.github.com> Date: Fri, 13 Dec 2024 14:02:50 +0200 Subject: [PATCH] Implement grafana_datasource_exchange (#654) * add send-datasource endpoint * fix UTs * fetch lib * remove juju top lib * add raise_on_error=false * incr idle period * PR comments * remove unit_name --- metadata.yaml | 6 +- requirements.txt | 5 +- src/charm.py | 42 +++++++++- tests/integration/helpers.py | 6 +- tests/interface/conftest.py | 49 ++++++++++-- .../test_grafana_datasource_exchange.py | 13 ++++ tests/scenario/conftest.py | 15 +++- tests/scenario/helpers.py | 44 ++++++----- .../scenario/test_alert_expression_labels.py | 6 +- tests/scenario/test_brute_isolated.py | 8 +- tests/scenario/test_datasource_exchange.py | 78 +++++++++++++++++++ tests/scenario/test_server_scheme.py | 8 +- tests/unit/test_charm.py | 4 +- tox.ini | 8 +- 14 files changed, 236 insertions(+), 56 deletions(-) create mode 100644 tests/interface/test_grafana_datasource_exchange.py create mode 100644 tests/scenario/test_datasource_exchange.py diff --git a/metadata.yaml b/metadata.yaml index f77f7ef4..b94ccc0c 100644 --- a/metadata.yaml +++ b/metadata.yaml @@ -41,7 +41,11 @@ provides: interface: grafana_dashboard receive-remote-write: interface: prometheus_remote_write - + send-datasource: + interface: grafana_datasource_exchange + description: | + Integration to share with other COS components this charm's grafana datasources, and receive theirs. + requires: metrics-endpoint: interface: prometheus_scrape diff --git a/requirements.txt b/requirements.txt index 7c4fe61a..08a78f21 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,7 @@ -cosl +cosl>=0.0.46 cryptography jsonschema -# pinned to 2.15 as 2.16 breaks our scenario tests and 2.17 breaks our unittests -ops == 2.15 +ops pyaml requests lightkube >= 0.11 diff --git a/src/charm.py b/src/charm.py index 355b6474..0fa4a399 100755 --- a/src/charm.py +++ b/src/charm.py @@ -10,7 +10,7 @@ import socket import subprocess from pathlib import Path -from typing import Dict, Optional, Tuple, TypedDict, cast +from typing import Dict, List, Optional, Tuple, TypedDict, cast from urllib.parse import urlparse import yaml @@ -43,6 +43,7 @@ IngressPerUnitRevokedForUnitEvent, ) from cosl import JujuTopology +from cosl.interfaces.datasource_exchange import DatasourceDict, DatasourceExchange from lightkube.core.client import Client from lightkube.core.exceptions import ApiError as LightkubeApiError from lightkube.resources.core_v1 import PersistentVolumeClaim, Pod @@ -232,6 +233,11 @@ def __init__(self, *args): self.charm_tracing_endpoint, self.server_cert = charm_tracing_config( self.charm_tracing, self._ca_cert_path ) + self.datasource_exchange = DatasourceExchange( + self, + provider_endpoint="send-datasource", + requirer_endpoint=None, + ) self.framework.observe(self.on.prometheus_pebble_ready, self._on_pebble_ready) self.framework.observe(self.on.config_changed, self._configure) @@ -246,8 +252,23 @@ def __init__(self, *args): self.framework.observe(self.alertmanager_consumer.on.cluster_changed, self._configure) self.framework.observe(self.resources_patch.on.patch_failed, self._on_k8s_patch_failed) self.framework.observe(self.on.validate_configuration_action, self._on_validate_config) + self.framework.observe( + self.on.send_datasource_relation_changed, self._on_grafana_source_changed + ) + self.framework.observe( + self.on.send_datasource_relation_departed, self._on_grafana_source_changed + ) + self.framework.observe( + self.on.grafana_source_relation_changed, self._on_grafana_source_changed + ) + self.framework.observe( + self.on.grafana_source_relation_departed, self._on_grafana_source_changed + ) self.framework.observe(self.on.collect_unit_status, self._on_collect_unit_status) + def _on_grafana_source_changed(self, _): + self._update_datasource_exchange() + def _on_collect_unit_status(self, event: CollectStatusEvent): # "Pull" statuses retention_time = self.model.config.get("metrics_retention_time", "") @@ -1106,6 +1127,25 @@ def _push(self, path, contents): """Push file to container, creating subdirs as necessary.""" self.container.push(path, contents, make_dirs=True, encoding="utf-8") + def _update_datasource_exchange(self) -> None: + """Update the grafana-datasource-exchange relations.""" + if not self.unit.is_leader(): + return + + # we might have multiple grafana-source relations, this method collects them all and returns a mapping from + # the `grafana_uid` to the contents of the `datasource_uids` field + # for simplicity, we assume that we're sending the same data to different grafanas. + # read more in https://discourse.charmhub.io/t/tempo-ha-docs-correlating-traces-metrics-logs/16116 + grafana_uids_to_units_to_uids = self.grafana_source_provider.get_source_uids() + raw_datasources: List[DatasourceDict] = [] + + for grafana_uid, ds_uids in grafana_uids_to_units_to_uids.items(): + for _, ds_uid in ds_uids.items(): + raw_datasources.append( + {"type": "prometheus", "uid": ds_uid, "grafana_uid": grafana_uid} + ) + self.datasource_exchange.publish(datasources=raw_datasources) + @property def workload_tracing_endpoint(self) -> Optional[str]: """Tempo endpoint for workload tracing.""" diff --git a/tests/integration/helpers.py b/tests/integration/helpers.py index 550553a7..7fe7e06e 100644 --- a/tests/integration/helpers.py +++ b/tests/integration/helpers.py @@ -292,7 +292,9 @@ async def deploy_and_configure_minio(ops_test: OpsTest) -> None: "secret-key": "secretkey", } await ops_test.model.deploy("minio", channel="edge", trust=True, config=config) - await ops_test.model.wait_for_idle(apps=["minio"], status="active", timeout=2000) + await ops_test.model.wait_for_idle( + apps=["minio"], status="active", timeout=2000, idle_period=45 + ) minio_addr = await unit_address(ops_test, "minio", 0) mc_client = Minio( @@ -350,6 +352,8 @@ async def deploy_tempo_cluster(ops_test: OpsTest): status="active", timeout=2000, idle_period=30, + # TODO: remove when https://github.com/canonical/tempo-coordinator-k8s-operator/issues/90 is fixed + raise_on_error=False, ) diff --git a/tests/interface/conftest.py b/tests/interface/conftest.py index f2bbbac4..5be81991 100644 --- a/tests/interface/conftest.py +++ b/tests/interface/conftest.py @@ -2,11 +2,13 @@ # # See LICENSE file for licensing details. # from unittest.mock import patch +import json from unittest.mock import patch import pytest +from charms.tempo_coordinator_k8s.v0.charm_tracing import charm_tracing_disabled from interface_tester import InterfaceTester -from scenario import Container, ExecOutput, State +from scenario import Container, Exec, Relation, State from charm import PrometheusCharm @@ -27,16 +29,34 @@ def prometheus_charm(): _promtool_check_config=lambda *_: ("stdout", ""), _prometheus_version="0.1.0", ): - yield PrometheusCharm + with charm_tracing_disabled(): + yield PrometheusCharm + + +prometheus_container = Container( + name="prometheus", + can_connect=True, + execs={Exec(["update-ca-certificates", "--fresh"], return_code=0, stdout="")}, +) + +grafana_source_relation = Relation( + "grafana-source", + remote_app_data={ + "datasource_uids": json.dumps({"prometheus/0": "01234"}), + "grafana_uid": "5678", + }, +) + +grafana_datasource_exchange_relation = Relation( + "send-datasource", + remote_app_data={ + "datasources": json.dumps([{"type": "prometheus", "uid": "01234", "grafana_uid": "5678"}]) + }, +) def begin_with_initial_hooks_isolated() -> State: - container = Container( - "prometheus", - can_connect=True, - exec_mock={("update-ca-certificates", "--fresh"): ExecOutput(return_code=0, stdout="")}, - ) - state = State(containers=[container], leader=True) + state = State(containers=[prometheus_container], leader=True) return state @@ -47,3 +67,16 @@ def interface_tester(interface_tester: InterfaceTester, prometheus_charm): state_template=begin_with_initial_hooks_isolated(), ) yield interface_tester + + +@pytest.fixture +def grafana_datasource_exchange_tester(interface_tester: InterfaceTester, prometheus_charm): + interface_tester.configure( + charm_type=prometheus_charm, + state_template=State( + leader=True, + containers=[prometheus_container], + relations=[grafana_source_relation, grafana_datasource_exchange_relation], + ), + ) + yield interface_tester diff --git a/tests/interface/test_grafana_datasource_exchange.py b/tests/interface/test_grafana_datasource_exchange.py new file mode 100644 index 00000000..b2301dee --- /dev/null +++ b/tests/interface/test_grafana_datasource_exchange.py @@ -0,0 +1,13 @@ +# Copyright 2024 Canonical Ltd. +# See LICENSE file for licensing details. +from interface_tester import InterfaceTester + + +def test_grafana_datasource_exchange_v0_interface( + grafana_datasource_exchange_tester: InterfaceTester, +): + grafana_datasource_exchange_tester.configure( + interface_name="grafana_datasource_exchange", + interface_version=0, + ) + grafana_datasource_exchange_tester.run() diff --git a/tests/scenario/conftest.py b/tests/scenario/conftest.py index 10927833..3c3caf9e 100644 --- a/tests/scenario/conftest.py +++ b/tests/scenario/conftest.py @@ -4,7 +4,8 @@ from unittest.mock import patch import pytest -from scenario import Context +from charms.tempo_coordinator_k8s.v0.charm_tracing import charm_tracing_disabled +from scenario import Container, Context, Exec from charm import PrometheusCharm @@ -25,9 +26,19 @@ def prometheus_charm(): _promtool_check_config=lambda *_: ("stdout", ""), _prometheus_version="0.1.0", ): - yield PrometheusCharm + with charm_tracing_disabled(): + yield PrometheusCharm @pytest.fixture(scope="function") def context(prometheus_charm): return Context(charm_type=prometheus_charm, juju_version="3.0.3") + + +@pytest.fixture(scope="function") +def prometheus_container(): + return Container( + "prometheus", + can_connect=True, + execs={Exec(["update-ca-certificates", "--fresh"], return_code=0, stdout="")}, + ) diff --git a/tests/scenario/helpers.py b/tests/scenario/helpers.py index 492adb8a..36dee796 100644 --- a/tests/scenario/helpers.py +++ b/tests/scenario/helpers.py @@ -1,37 +1,39 @@ # Copyright 2023 Canonical Ltd. # See LICENSE file for licensing details. -from scenario import Container, Context, ExecOutput, Network, PeerRelation, Relation, State +import dataclasses + +from scenario import Container, Context, Exec, PeerRelation, Relation, State def begin_with_initial_hooks_isolated(context: Context, *, leader: bool = True) -> State: container = Container( "prometheus", can_connect=False, - exec_mock={("update-ca-certificates", "--fresh"): ExecOutput(return_code=0, stdout="")}, + execs={Exec(["update-ca-certificates", "--fresh"], return_code=0, stdout="")}, ) state = State(containers=[container]) peer_rel = PeerRelation("prometheus-peers") - state = context.run("install", state) + state = context.run(context.on.install(), state) - state = state.replace(relations=[peer_rel]) - state = context.run(peer_rel.created_event, state) + state = dataclasses.replace(state, relations=[peer_rel]) + state = context.run(context.on.relation_created(peer_rel), state) if leader: - state = state.replace(leader=True) - state = context.run("leader-elected", state) + state = dataclasses.replace(state, leader=True) + state = context.run(context.on.leader_elected(), state) else: - state = state.replace(leader=False) - state = context.run("leader-settings-changed", state) + state = dataclasses.replace(state, leader=False) + state = context.run(context.on.leader_elected(), state) - state = context.run("config-changed", state) + state = context.run(context.on.config_changed(), state) - container = container.replace(can_connect=True) - state = state.replace(containers=[container]) - state = context.run(container.pebble_ready_event, state) + container = dataclasses.replace(container, can_connect=True) + state = dataclasses.replace(state, containers=[container]) + state = context.run(context.on.pebble_ready(container), state) - state = context.run("start", state) + state = context.run(context.on.start(), state) return state @@ -39,20 +41,22 @@ def begin_with_initial_hooks_isolated(context: Context, *, leader: bool = True) def add_relation_sequence(context: Context, state: State, relation: Relation): """Helper to simulate a relation-added sequence.""" # TODO consider adding to scenario.sequences - state_with_relation = state.replace( - relations=state.relations + [relation], - networks=state.networks + [Network.default(relation.endpoint)], + state_with_relation = dataclasses.replace( + state, + relations=state.relations.union([relation]), + ) + state_after_relation_created = context.run( + context.on.relation_created(relation), state_with_relation ) - state_after_relation_created = context.run(relation.created_event, state_with_relation) # relation is not mutated! relation_1 = state_after_relation_created.get_relations(relation.endpoint)[0] state_after_relation_joined = context.run( - relation_1.joined_event, state_after_relation_created + context.on.relation_joined(relation_1), state_after_relation_created ) relation_2 = state_after_relation_joined.get_relations(relation.endpoint)[0] state_after_relation_changed = context.run( - relation_2.changed_event, state_after_relation_joined + context.on.relation_changed(relation_2), state_after_relation_joined ) return state_after_relation_changed diff --git a/tests/scenario/test_alert_expression_labels.py b/tests/scenario/test_alert_expression_labels.py index 3c77759d..10de975d 100644 --- a/tests/scenario/test_alert_expression_labels.py +++ b/tests/scenario/test_alert_expression_labels.py @@ -4,7 +4,7 @@ import json import yaml -from scenario import Container, ExecOutput, Relation, State +from scenario import Container, Exec, Relation, State def test_alert_expression_labels(context): @@ -42,10 +42,10 @@ def test_alert_expression_labels(context): container = Container( name="prometheus", can_connect=True, - exec_mock={("update-ca-certificates", "--fresh"): ExecOutput(return_code=0, stdout="")}, + execs={Exec(["update-ca-certificates", "--fresh"], return_code=0, stdout="")}, ) state = State(containers=[container], relations=[remote_write_relation]) - context.run(event=remote_write_relation.changed_event, state=state) + context.run(context.on.relation_changed(remote_write_relation), state=state) rules_file = ( container.get_filesystem(context) / "etc/prometheus/rules/juju_foobar-model_d07df316_remote-app.rules" diff --git a/tests/scenario/test_brute_isolated.py b/tests/scenario/test_brute_isolated.py index a7d074f8..04942265 100644 --- a/tests/scenario/test_brute_isolated.py +++ b/tests/scenario/test_brute_isolated.py @@ -10,10 +10,10 @@ def test_startup_shutdown_sequence(context: Context): state = begin_with_initial_hooks_isolated(context) - state = context.run("update-status", state) + state = context.run(context.on.update_status(), state) for peer_rel in state.get_relations("replicas"): - state = context.run(peer_rel.departed_event, state) + state = context.run(context.on.relation_departed(peer_rel), state) - state = context.run("stop", state) - context.run("remove", state) + state = context.run(context.on.stop(), state) + context.run(context.on.remove(), state) diff --git a/tests/scenario/test_datasource_exchange.py b/tests/scenario/test_datasource_exchange.py new file mode 100644 index 00000000..cc65ac51 --- /dev/null +++ b/tests/scenario/test_datasource_exchange.py @@ -0,0 +1,78 @@ +# Copyright 2024 Canonical Ltd. +# See LICENSE file for licensing details. + +import json + +import pytest +from cosl.interfaces.datasource_exchange import ( + DatasourceExchange, + DSExchangeAppData, + GrafanaDatasource, +) +from scenario import Relation, State + +from charm import PrometheusCharm + +ds_tempo = [ + {"type": "tempo", "uid": "3", "grafana_uid": "4"}, +] + +ds_loki = [ + {"type": "loki", "uid": "8", "grafana_uid": "9"}, +] + +loki_dsx = Relation( + "send-datasource", + remote_app_data=DSExchangeAppData(datasources=json.dumps(ds_loki)).dump(), +) +tempo_dsx = Relation( + "send-datasource", + remote_app_data=DSExchangeAppData(datasources=json.dumps(ds_tempo)).dump(), +) + +ds = Relation( + "grafana-source", + remote_app_data={ + "grafana_uid": "9", + "datasource_uids": json.dumps({"prometheus/0": "1234"}), + }, +) + + +@pytest.mark.parametrize("event_type", ("changed", "created", "joined")) +@pytest.mark.parametrize("relation_to_observe", (ds, loki_dsx, tempo_dsx)) +def test_datasource_send(context, prometheus_container, relation_to_observe, event_type): + + state_in = State( + relations=[ + ds, + loki_dsx, + tempo_dsx, + ], + containers=[prometheus_container], + leader=True, + ) + + # WHEN we receive a datasource-related event + with context( + getattr(context.on, f"relation_{event_type}")(relation_to_observe), state_in + ) as mgr: + charm: PrometheusCharm = mgr.charm + # THEN we can find all received datasource uids + dsx: DatasourceExchange = charm.datasource_exchange + received = dsx.received_datasources + assert received == ( + GrafanaDatasource(type="tempo", uid="3", grafana_uid="4"), + GrafanaDatasource(type="loki", uid="8", grafana_uid="9"), + ) + state_out = mgr.run() + + # AND THEN we publish our own datasource information to mimir and tempo + published_dsx_loki = state_out.get_relation(loki_dsx.id).local_app_data + published_dsx_tempo = state_out.get_relation(tempo_dsx.id).local_app_data + assert published_dsx_tempo == published_dsx_loki + assert json.loads(published_dsx_tempo["datasources"])[0] == { + "type": "prometheus", + "uid": "1234", + "grafana_uid": "9", + } diff --git a/tests/scenario/test_server_scheme.py b/tests/scenario/test_server_scheme.py index c70db15b..317220ad 100644 --- a/tests/scenario/test_server_scheme.py +++ b/tests/scenario/test_server_scheme.py @@ -34,9 +34,9 @@ def initial_state(self, context, fqdn, leader) -> State: # Add relations rels = [ - Relation("self-metrics-endpoint", relation_id=10), # external self-monitoring - Relation("grafana-source", relation_id=11), # grafana - Relation("receive-remote-write", relation_id=12), # grafana-agent + Relation("self-metrics-endpoint", id=10), # external self-monitoring + Relation("grafana-source", id=11), # grafana + Relation("receive-remote-write", id=12), # grafana-agent ] for rel in rels: state = add_relation_sequence(context, state, rel) @@ -56,7 +56,7 @@ def test_pebble_layer_scheme_becomes_https_if_tls_relation_added( # WHEN a tls_certificates relation joins ca = Relation( "certificates", - relation_id=100, + id=100, remote_app_data={ "certificates": json.dumps( [ diff --git a/tests/unit/test_charm.py b/tests/unit/test_charm.py index 19741d2c..19535058 100644 --- a/tests/unit/test_charm.py +++ b/tests/unit/test_charm.py @@ -582,7 +582,7 @@ def service(self): @k8s_resource_multipatch @patch("lightkube.core.client.GenericSyncClient") @patch.multiple( - "ops.testing._TestingPebbleClient", + "ops._private.harness._TestingPebbleClient", start_services=raise_if_called, stop_services=raise_if_called, restart_services=raise_if_called, @@ -617,7 +617,7 @@ def test_no_restart_nor_reload_when_nothing_changes(self, reload_config_patch, * @k8s_resource_multipatch @patch("lightkube.core.client.GenericSyncClient") @patch.multiple( - "ops.testing._TestingPebbleClient", + "ops._private.harness._TestingPebbleClient", start_services=raise_if_called, stop_services=raise_if_called, restart_services=raise_if_called, diff --git a/tox.ini b/tox.ini index 34701189..b1c393ee 100644 --- a/tox.ini +++ b/tox.ini @@ -96,14 +96,9 @@ allowlist_externals = [testenv:scenario] description = Scenario tests deps = - cosl - ops >= 2.5.0 pytest - ops-scenario >=5.1,<6.0 + ops[testing] -r{toxinidir}/requirements.txt - opentelemetry-exporter-otlp-proto-http==1.21.0 # PYDEPS for tracing - importlib-metadata==6.0.0 # PYDEPS for tracing - pydantic>=2 # PYDEPS for tracing commands = pytest -v --tb native {[vars]tst_path}/scenario --log-cli-level=INFO -s {posargs} @@ -112,7 +107,6 @@ commands = description = Run interface tests deps = pytest - ops-scenario>=5.3.1 pytest-interface-tester > 2.0.0 -r{toxinidir}/requirements.txt commands =