diff --git a/lib/charms/observability_libs/v0/kubernetes_compute_resources_patch.py b/lib/charms/observability_libs/v0/kubernetes_compute_resources_patch.py index 2ab8a22c..34dd0264 100644 --- a/lib/charms/observability_libs/v0/kubernetes_compute_resources_patch.py +++ b/lib/charms/observability_libs/v0/kubernetes_compute_resources_patch.py @@ -4,7 +4,7 @@ """# KubernetesComputeResourcesPatch Library. This library is designed to enable developers to more simply patch the Kubernetes compute resource -limits and requests created by Juju during the deployment of a sidecar charm. +limits and requests created by Juju during the deployment of a charm. When initialised, this library binds a handler to the parent charm's `config-changed` event. The config-changed event is used because it is guaranteed to fire on startup, on upgrade and on @@ -76,6 +76,17 @@ def _resource_spec_from_config(self) -> ResourceRequirements: return ResourceRequirements(limits=spec, requests=spec) ``` +If you wish to pull the state of the resources patch operation and set the charm unit status based on that patch result, +you can achieve that using `get_status()` function. +```python +class SomeCharm(CharmBase): + def __init__(self, *args): + #... + self.framework.observe(self.on.collect_unit_status, self._on_collect_unit_status) + #... + def _on_collect_unit_status(self, event: CollectStatusEvent): + event.add_status(self.resources_patch.get_status()) +``` Additionally, you may wish to use mocks in your charm's unit testing to ensure that the library does not try to make any API calls, or open any files during testing that are unlikely to be @@ -83,12 +94,14 @@ def _resource_spec_from_config(self) -> ResourceRequirements: ```python # ... +from ops import ActiveStatus @patch.multiple( "charm.KubernetesComputeResourcesPatch", _namespace="test-namespace", _is_patched=lambda *a, **kw: True, is_ready=lambda *a, **kw: True, + get_status=lambda _: ActiveStatus(), ) @patch("lightkube.core.client.GenericSyncClient") def setUp(self, *unused): @@ -105,8 +118,9 @@ def setUp(self, *unused): import logging from decimal import Decimal from math import ceil, floor -from typing import Callable, Dict, List, Optional, Union +from typing import Any, Callable, Dict, List, Optional, Tuple, Union +import tenacity from lightkube import ApiError, Client # pyright: ignore from lightkube.core import exceptions from lightkube.models.apps_v1 import StatefulSetSpec @@ -120,8 +134,10 @@ def setUp(self, *unused): from lightkube.resources.core_v1 import Pod from lightkube.types import PatchType from lightkube.utils.quantity import equals_canonically, parse_quantity +from ops import ActiveStatus, BlockedStatus, WaitingStatus from ops.charm import CharmBase from ops.framework import BoundEvent, EventBase, EventSource, Object, ObjectEvents +from ops.model import StatusBase logger = logging.getLogger(__name__) @@ -133,14 +149,16 @@ def setUp(self, *unused): # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 7 +LIBPATCH = 8 _Decimal = Union[Decimal, float, str, int] # types that are potentially convertible to Decimal def adjust_resource_requirements( - limits: Optional[dict], requests: Optional[dict], adhere_to_requests: bool = True + limits: Optional[Dict[Any, Any]], + requests: Optional[Dict[Any, Any]], + adhere_to_requests: bool = True, ) -> ResourceRequirements: """Adjust resource limits so that `limits` and `requests` are consistent with each other. @@ -289,6 +307,18 @@ def sanitize_resource_spec_dict(spec: Optional[dict]) -> Optional[dict]: return d +def _retry_on_condition(exception): + """Retry if the exception is an ApiError with a status code != 403. + + Returns: a boolean value to indicate whether to retry or not. + """ + if isinstance(exception, ApiError) and str(exception.status.code) != "403": + return True + if isinstance(exception, exceptions.ConfigError) or isinstance(exception, ValueError): + return True + return False + + class K8sResourcePatchFailedEvent(EventBase): """Emitted when patching fails.""" @@ -385,27 +415,132 @@ def get_actual(self, pod_name: str) -> Optional[ResourceRequirements]: ) return podspec.resources + def is_failed( + self, resource_reqs_func: Callable[[], ResourceRequirements] + ) -> Tuple[bool, str]: + """Returns a tuple indicating whether a patch operation has failed along with a failure message. + + Implementation is based on dry running the patch operation to catch if there would be failures (e.g: Wrong spec and Auth errors). + """ + try: + resource_reqs = resource_reqs_func() + limits = resource_reqs.limits + requests = resource_reqs.requests + except ValueError as e: + msg = f"Failed obtaining resource limit spec: {e}" + logger.error(msg) + return True, msg + + # Dry run does not catch negative values for resource requests and limits. + if not is_valid_spec(limits) or not is_valid_spec(requests): + msg = f"Invalid resource requirements specs: {limits}, {requests}" + logger.error(msg) + return True, msg + + resource_reqs = ResourceRequirements( + limits=sanitize_resource_spec_dict(limits), # type: ignore[arg-type] + requests=sanitize_resource_spec_dict(requests), # type: ignore[arg-type] + ) + + try: + self.apply(resource_reqs, dry_run=True) + except ApiError as e: + if e.status.code == 403: + msg = f"Kubernetes resources patch failed: `juju trust` this application. {e}" + else: + msg = f"Kubernetes resources patch failed: {e}" + return True, msg + except ValueError as e: + msg = f"Kubernetes resources patch failed: {e}" + return True, msg + + return False, "" + + def is_in_progress(self) -> bool: + """Returns a boolean to indicate whether a patch operation is in progress. + + Implementation follows a similar approach to `kubectl rollout status statefulset` to track the progress of a rollout. + Reference: https://github.com/kubernetes/kubectl/blob/kubernetes-1.31.0/pkg/polymorphichelpers/rollout_status.go + """ + try: + sts = self.client.get( + StatefulSet, name=self.statefulset_name, namespace=self.namespace + ) + except (ValueError, ApiError) as e: + # Assumption: if there was a persistent issue, it'd have been caught in `is_failed` + # Wait until next run to try again. + logger.error(f"Failed to fetch statefulset from K8s api: {e}") + return False + + if sts.status is None or sts.spec is None: + logger.debug("status/spec are not yet available") + return False + if sts.status.observedGeneration == 0 or ( + sts.metadata + and sts.status.observedGeneration + and sts.metadata.generation + and sts.metadata.generation > sts.status.observedGeneration + ): + logger.debug("waiting for statefulset spec update to be observed...") + return True + if ( + sts.spec.replicas is not None + and sts.status.readyReplicas is not None + and sts.status.readyReplicas < sts.spec.replicas + ): + logger.debug( + f"Waiting for {sts.spec.replicas-sts.status.readyReplicas} pods to be ready..." + ) + return True + + if ( + sts.spec.updateStrategy + and sts.spec.updateStrategy.type == "rollingUpdate" + and sts.spec.updateStrategy.rollingUpdate is not None + ): + if ( + sts.spec.replicas is not None + and sts.spec.updateStrategy.rollingUpdate.partition is not None + ): + if sts.status.updatedReplicas and sts.status.updatedReplicas < ( + sts.spec.replicas - sts.spec.updateStrategy.rollingUpdate.partition + ): + logger.debug( + f"Waiting for partitioned roll out to finish: {sts.status.updatedReplicas} out of {sts.spec.replicas - sts.spec.updateStrategy.rollingUpdate.partition} new pods have been updated..." + ) + return True + logger.debug( + f"partitioned roll out complete: {sts.status.updatedReplicas} new pods have been updated..." + ) + return False + + if sts.status.updateRevision != sts.status.currentRevision: + logger.debug( + f"waiting for statefulset rolling update to complete {sts.status.updatedReplicas} pods at revision {sts.status.updateRevision}..." + ) + return True + + logger.debug( + f"statefulset rolling update complete pods at revision {sts.status.currentRevision}" + ) + return False + def is_ready(self, pod_name, resource_reqs: ResourceRequirements): """Reports if the resource patch has been applied and is in effect. Returns: bool: A boolean indicating if the service patch has been applied and is in effect. """ - logger.info( - "reqs=%s, templated=%s, actual=%s", - resource_reqs, - self.get_templated(), - self.get_actual(pod_name), - ) return self.is_patched(resource_reqs) and equals_canonically( # pyright: ignore resource_reqs, self.get_actual(pod_name) # pyright: ignore ) - def apply(self, resource_reqs: ResourceRequirements) -> None: + def apply(self, resource_reqs: ResourceRequirements, dry_run=False) -> None: """Patch the Kubernetes resources created by Juju to limit cpu or mem.""" # Need to ignore invalid input, otherwise the StatefulSet gives "FailedCreate" and the # charm would be stuck in unknown/lost. - if self.is_patched(resource_reqs): + if not dry_run and self.is_patched(resource_reqs): + logger.debug(f"Resource requests are already patched: {resource_reqs}") return self.client.patch( @@ -415,6 +550,7 @@ def apply(self, resource_reqs: ResourceRequirements) -> None: namespace=self.namespace, patch_type=PatchType.APPLY, field_manager=self.__class__.__name__, + dry_run=dry_run, ) @@ -422,6 +558,9 @@ class KubernetesComputeResourcesPatch(Object): """A utility for patching the Kubernetes compute resources set up by Juju.""" on = K8sResourcePatchEvents() # pyright: ignore + PATCH_RETRY_STOP = tenacity.stop_after_delay(20) + PATCH_RETRY_WAIT = tenacity.wait_fixed(5) + PATCH_RETRY_IF = tenacity.retry_if_exception(_retry_on_condition) def __init__( self, @@ -468,7 +607,11 @@ def _on_config_changed(self, _): self._patch() def _patch(self) -> None: - """Patch the Kubernetes resources created by Juju to limit cpu or mem.""" + """Patch the Kubernetes resources created by Juju to limit cpu or mem. + + This method will keep on retrying to patch the kubernetes resource for a default duration of 20 seconds + if the patching failure is due to a recoverable error (e.g: Network Latency). + """ try: resource_reqs = self.resource_reqs_func() limits = resource_reqs.limits @@ -492,7 +635,18 @@ def _patch(self) -> None: ) try: - self.patcher.apply(resource_reqs) + for attempt in tenacity.Retrying( + retry=self.PATCH_RETRY_IF, + stop=self.PATCH_RETRY_STOP, + wait=self.PATCH_RETRY_WAIT, + # if you don't succeed raise the last caught exception when you're done + reraise=True, + ): + with attempt: + logger.debug( + f"attempt #{attempt.retry_state.attempt_number} to patch resource limits" + ) + self.patcher.apply(resource_reqs) except exceptions.ConfigError as e: msg = f"Error creating k8s client: {e}" @@ -503,6 +657,7 @@ def _patch(self) -> None: except ApiError as e: if e.status.code == 403: msg = f"Kubernetes resources patch failed: `juju trust` this application. {e}" + else: msg = f"Kubernetes resources patch failed: {e}" @@ -554,6 +709,29 @@ def is_ready(self) -> bool: self.on.patch_failed.emit(message=msg) return False + def get_status(self) -> StatusBase: + """Return the status of patching the resource limits in a `StatusBase` format. + + Returns: + StatusBase: There is a 1:1 mapping between the state of the patching operation and a `StatusBase` value that the charm can be set to. + Possible values are: + - ActiveStatus: The patch was applied successfully. + - BlockedStatus: The patch failed and requires a human intervention. + - WaitingStatus: The patch is still in progress. + + Example: + - ActiveStatus("Patch applied successfully") + - BlockedStatus("Failed due to missing permissions") + - WaitingStatus("Patch is in progress") + """ + failed, msg = self.patcher.is_failed(self.resource_reqs_func) + if failed: + return BlockedStatus(msg) + if self.patcher.is_in_progress(): + return WaitingStatus("waiting for resources patch to apply") + # patch successful or nothing has been patched yet + return ActiveStatus() + @property def _app(self) -> str: """Name of the current Juju application. diff --git a/lib/charms/observability_libs/v1/cert_handler.py b/lib/charms/observability_libs/v1/cert_handler.py index 3b87ad46..4a1940b9 100644 --- a/lib/charms/observability_libs/v1/cert_handler.py +++ b/lib/charms/observability_libs/v1/cert_handler.py @@ -26,7 +26,7 @@ self.framework.observe(self.cert_handler.on.cert_changed, self._on_server_cert_changed) container.push(keypath, self.cert_handler.private_key) -container.push(certpath, self.cert_handler.servert_cert) +container.push(certpath, self.cert_handler.server_cert) ``` Since this library uses [Juju Secrets](https://juju.is/docs/juju/secret) it requires Juju >= 3.0.3. @@ -59,7 +59,7 @@ import logging from ops.charm import CharmBase -from ops.framework import EventBase, EventSource, Object, ObjectEvents +from ops.framework import BoundEvent, EventBase, EventSource, Object, ObjectEvents, StoredState from ops.jujuversion import JujuVersion from ops.model import Relation, Secret, SecretNotFoundError @@ -67,7 +67,7 @@ LIBID = "b5cd5cd580f3428fa5f59a8876dcbe6a" LIBAPI = 1 -LIBPATCH = 11 +LIBPATCH = 13 VAULT_SECRET_LABEL = "cert-handler-private-vault" @@ -273,6 +273,7 @@ class CertHandler(Object): """A wrapper for the requirer side of the TLS Certificates charm library.""" on = CertHandlerEvents() # pyright: ignore + _stored = StoredState() def __init__( self, @@ -283,6 +284,7 @@ def __init__( peer_relation_name: str = "peers", cert_subject: Optional[str] = None, sans: Optional[List[str]] = None, + refresh_events: Optional[List[BoundEvent]] = None, ): """CertHandler is used to wrap TLS Certificates management operations for charms. @@ -299,8 +301,17 @@ def __init__( Must match metadata.yaml. cert_subject: Custom subject. Name collisions are under the caller's responsibility. sans: DNS names. If none are given, use FQDN. + refresh_events: an optional list of bound events which + will be observed to replace the current CSR with a new one + if there are changes in the CSR's DNS SANs or IP SANs. + Then, subsequently, replace its corresponding certificate with a new one. """ super().__init__(charm, key) + # use StoredState to store the hash of the CSR + # to potentially trigger a CSR renewal on `refresh_events` + self._stored.set_default( + csr_hash=None, + ) self.charm = charm # We need to sanitize the unit name, otherwise route53 complains: @@ -355,6 +366,15 @@ def __init__( self._on_upgrade_charm, ) + if refresh_events: + for ev in refresh_events: + self.framework.observe(ev, self._on_refresh_event) + + def _on_refresh_event(self, _): + """Replace the latest current CSR with a new one if there are any SANs changes.""" + if self._stored.csr_hash != self._csr_hash: + self._generate_csr(renew=True) + def _on_upgrade_charm(self, _): has_privkey = self.vault.get_value("private-key") @@ -419,6 +439,20 @@ def enabled(self) -> bool: return True + @property + def _csr_hash(self) -> int: + """A hash of the config that constructs the CSR. + + Only include here the config options that, should they change, should trigger a renewal of + the CSR. + """ + return hash( + ( + tuple(self.sans_dns), + tuple(self.sans_ip), + ) + ) + @property def available(self) -> bool: """Return True if all certs are available in relation data; False otherwise.""" @@ -484,6 +518,8 @@ def _generate_csr( ) self.certificates.request_certificate_creation(certificate_signing_request=csr) + self._stored.csr_hash = self._csr_hash + if clear_cert: self.vault.clear() @@ -548,9 +584,19 @@ def server_cert(self) -> Optional[str]: @property def chain(self) -> Optional[str]: - """Return the ca chain bundled as a single PEM string.""" + """Return the entire chain bundled as a single PEM string. This includes, if available, the certificate, intermediate CAs, and the root CA. + + If the server certificate is not set in the chain by the provider, we'll add it + to the top of the chain so that it could be used by a server. + """ cert = self.get_cert() - return cert.chain_as_pem() if cert else None + if not cert: + return None + chain = cert.chain_as_pem() + if cert.certificate not in chain: + # add server cert to chain + chain = cert.certificate + "\n\n" + chain + return chain def _on_certificate_expiring( self, event: Union[CertificateExpiringEvent, CertificateInvalidatedEvent] diff --git a/lib/charms/tempo_k8s/v1/charm_tracing.py b/lib/charms/tempo_k8s/v1/charm_tracing.py index dc84e3f4..2dbdddd6 100644 --- a/lib/charms/tempo_k8s/v1/charm_tracing.py +++ b/lib/charms/tempo_k8s/v1/charm_tracing.py @@ -9,21 +9,57 @@ This means that, if your charm is related to, for example, COS' Tempo charm, you will be able to inspect in real time from the Grafana dashboard the execution flow of your charm. -To start using this library, you need to do two things: +# Quickstart +Fetch the following charm libs (and ensure the minimum version/revision numbers are satisfied): + + charmcraft fetch-lib charms.tempo_k8s.v2.tracing # >= 1.10 + charmcraft fetch-lib charms.tempo_k8s.v1.charm_tracing # >= 2.7 + +Then edit your charm code to include: + +```python +# import the necessary charm libs +from charms.tempo_k8s.v2.tracing import TracingEndpointRequirer, charm_tracing_config +from charms.tempo_k8s.v1.charm_tracing import charm_tracing + +# decorate your charm class with charm_tracing: +@charm_tracing( + # forward-declare the instance attributes that the instrumentor will look up to obtain the + # tempo endpoint and server certificate + tracing_endpoint="tracing_endpoint", + server_cert="server_cert" +) +class MyCharm(CharmBase): + _path_to_cert = "/path/to/cert.crt" + # path to cert file **in the charm container**. Its presence will be used to determine whether + # the charm is ready to use tls for encrypting charm traces. If your charm does not support tls, + # you can ignore this and pass None to charm_tracing_config. + # If you do support TLS, you'll need to make sure that the server cert is copied to this location + # and kept up to date so the instrumentor can use it. + + def __init__(self, ...): + ... + self.tracing = TracingEndpointRequirer(self, ...) + self.tracing_endpoint, self.server_cert = charm_tracing_config(self.tracing, self._path_to_cert) +``` + +# Detailed usage +To use this library, you need to do two things: 1) decorate your charm class with `@trace_charm(tracing_endpoint="my_tracing_endpoint")` -2) add to your charm a "my_tracing_endpoint" (you can name this attribute whatever you like) **property** -that returns an otlp http/https endpoint url. If you are using the `TracingEndpointProvider` as -`self.tracing = TracingEndpointProvider(self)`, the implementation could be: +2) add to your charm a "my_tracing_endpoint" (you can name this attribute whatever you like) +**property**, **method** or **instance attribute** that returns an otlp http/https endpoint url. +If you are using the ``charms.tempo_k8s.v2.tracing.TracingEndpointRequirer`` as +``self.tracing = TracingEndpointRequirer(self)``, the implementation could be: ``` @property def my_tracing_endpoint(self) -> Optional[str]: '''Tempo endpoint for charm tracing''' if self.tracing.is_ready(): - return self.tracing.otlp_http_endpoint() + return self.tracing.get_endpoint("otlp_http") else: return None ``` @@ -33,19 +69,52 @@ def my_tracing_endpoint(self) -> Optional[str]: - every event as a span (including custom events) - every charm method call (except dunders) as a span -if you wish to add more fine-grained information to the trace, you can do so by getting a hold of the tracer like so: + +## TLS support +If your charm integrates with a TLS provider which is also trusted by the tracing provider (the Tempo charm), +you can configure ``charm_tracing`` to use TLS by passing a ``server_cert`` parameter to the decorator. + +If your charm is not trusting the same CA as the Tempo endpoint it is sending traces to, +you'll need to implement a cert-transfer relation to obtain the CA certificate from the same +CA that Tempo is using. + +For example: +``` +from charms.tempo_k8s.v1.charm_tracing import trace_charm +@trace_charm( + tracing_endpoint="my_tracing_endpoint", + server_cert="_server_cert" +) +class MyCharm(CharmBase): + self._server_cert = "/path/to/server.crt" + ... + + def on_tls_changed(self, e) -> Optional[str]: + # update the server cert on the charm container for charm tracing + Path(self._server_cert).write_text(self.get_server_cert()) + + def on_tls_broken(self, e) -> Optional[str]: + # remove the server cert so charm_tracing won't try to use tls anymore + Path(self._server_cert).unlink() +``` + + +## More fine-grained manual instrumentation +if you wish to add more spans to the trace, you can do so by getting a hold of the tracer like so: ``` import opentelemetry ... - @property - def tracer(self) -> opentelemetry.trace.Tracer: - return opentelemetry.trace.get_tracer(type(self).__name__) +def get_tracer(self) -> opentelemetry.trace.Tracer: + return opentelemetry.trace.get_tracer(type(self).__name__) ``` By default, the tracer is named after the charm type. If you wish to override that, you can pass -a different `service_name` argument to `trace_charm`. +a different ``service_name`` argument to ``trace_charm``. + +See the official opentelemetry Python SDK documentation for usage: +https://opentelemetry-python.readthedocs.io/en/latest/ -*Upgrading from `v0`:* +## Upgrading from `v0` If you are upgrading from `charm_tracing` v0, you need to take the following steps (assuming you already have the newest version of the library in your charm): @@ -55,8 +124,9 @@ def tracer(self) -> opentelemetry.trace.Tracer: `opentelemetry-exporter-otlp-proto-http>=1.21.0`. -2) Update the charm method referenced to from `@trace` and `@trace_charm`, -to return from `TracingEndpointRequirer.otlp_http_endpoint()` instead of `grpc_http`. For example: +2) Update the charm method referenced to from ``@trace`` and ``@trace_charm``, +to return from ``TracingEndpointRequirer.get_endpoint("otlp_http")`` instead of ``grpc_http``. +For example: ``` from charms.tempo_k8s.v0.charm_tracing import trace_charm @@ -72,7 +142,7 @@ class MyCharm(CharmBase): def my_tracing_endpoint(self) -> Optional[str]: '''Tempo endpoint for charm tracing''' if self.tracing.is_ready(): - return self.tracing.otlp_grpc_endpoint() + return self.tracing.otlp_grpc_endpoint() # OLD API, DEPRECATED. else: return None ``` @@ -93,15 +163,67 @@ class MyCharm(CharmBase): def my_tracing_endpoint(self) -> Optional[str]: '''Tempo endpoint for charm tracing''' if self.tracing.is_ready(): - return self.tracing.otlp_http_endpoint() + return self.tracing.get_endpoint("otlp_http") # NEW API, use this. else: return None ``` -3) If you were passing a certificate using `server_cert`, you need to change it to provide an *absolute* path to -the certificate file. +3) If you were passing a certificate (str) using `server_cert`, you need to change it to +provide an *absolute* path to the certificate file instead. """ + +def _remove_stale_otel_sdk_packages(): + """Hack to remove stale opentelemetry sdk packages from the charm's python venv. + + See https://github.com/canonical/grafana-agent-operator/issues/146 and + https://bugs.launchpad.net/juju/+bug/2058335 for more context. This patch can be removed after + this juju issue is resolved and sufficient time has passed to expect most users of this library + have migrated to the patched version of juju. When this patch is removed, un-ignore rule E402 for this file in the pyproject.toml (see setting + [tool.ruff.lint.per-file-ignores] in pyproject.toml). + + This only has an effect if executed on an upgrade-charm event. + """ + # all imports are local to keep this function standalone, side-effect-free, and easy to revert later + import os + + if os.getenv("JUJU_DISPATCH_PATH") != "hooks/upgrade-charm": + return + + import logging + import shutil + from collections import defaultdict + + from importlib_metadata import distributions + + otel_logger = logging.getLogger("charm_tracing_otel_patcher") + otel_logger.debug("Applying _remove_stale_otel_sdk_packages patch on charm upgrade") + # group by name all distributions starting with "opentelemetry_" + otel_distributions = defaultdict(list) + for distribution in distributions(): + name = distribution._normalized_name # type: ignore + if name.startswith("opentelemetry_"): + otel_distributions[name].append(distribution) + + otel_logger.debug(f"Found {len(otel_distributions)} opentelemetry distributions") + + # If we have multiple distributions with the same name, remove any that have 0 associated files + for name, distributions_ in otel_distributions.items(): + if len(distributions_) <= 1: + continue + + otel_logger.debug(f"Package {name} has multiple ({len(distributions_)}) distributions.") + for distribution in distributions_: + if not distribution.files: # Not None or empty list + path = distribution._path # type: ignore + otel_logger.info(f"Removing empty distribution of {name} at {path}.") + shutil.rmtree(path) + + otel_logger.debug("Successfully applied _remove_stale_otel_sdk_packages patch. ") + + +_remove_stale_otel_sdk_packages() + import functools import inspect import logging @@ -122,18 +244,20 @@ def my_tracing_endpoint(self) -> Optional[str]: ) import opentelemetry +import ops from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter from opentelemetry.sdk.resources import Resource from opentelemetry.sdk.trace import Span, TracerProvider from opentelemetry.sdk.trace.export import BatchSpanProcessor -from opentelemetry.trace import INVALID_SPAN, Tracer -from opentelemetry.trace import get_current_span as otlp_get_current_span from opentelemetry.trace import ( + INVALID_SPAN, + Tracer, get_tracer, get_tracer_provider, set_span_in_context, set_tracer_provider, ) +from opentelemetry.trace import get_current_span as otlp_get_current_span from ops.charm import CharmBase from ops.framework import Framework @@ -146,14 +270,22 @@ def my_tracing_endpoint(self) -> Optional[str]: # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 10 +LIBPATCH = 15 PYDEPS = ["opentelemetry-exporter-otlp-proto-http==1.21.0"] logger = logging.getLogger("tracing") +dev_logger = logging.getLogger("tracing-dev") +# set this to 0 if you are debugging/developing this library source +dev_logger.setLevel(logging.CRITICAL) + +_CharmType = Type[CharmBase] # the type CharmBase and any subclass thereof +_C = TypeVar("_C", bound=_CharmType) +_T = TypeVar("_T", bound=type) +_F = TypeVar("_F", bound=Type[Callable]) tracer: ContextVar[Tracer] = ContextVar("tracer") -_GetterType = Union[Callable[[CharmBase], Optional[str]], property] +_GetterType = Union[Callable[[_CharmType], Optional[str]], property] CHARM_TRACING_ENABLED = "CHARM_TRACING_ENABLED" @@ -199,9 +331,22 @@ def _get_tracer() -> Optional[Tracer]: try: return tracer.get() except LookupError: + # fallback: this course-corrects for a user error where charm_tracing symbols are imported + # from different paths (typically charms.tempo_k8s... and lib.charms.tempo_k8s...) try: ctx: Context = copy_context() if context_tracer := _get_tracer_from_context(ctx): + logger.warning( + "Tracer not found in `tracer` context var. " + "Verify that you're importing all `charm_tracing` symbols from the same module path. \n" + "For example, DO" + ": `from charms.lib...charm_tracing import foo, bar`. \n" + "DONT: \n" + " \t - `from charms.lib...charm_tracing import foo` \n" + " \t - `from lib...charm_tracing import bar` \n" + "For more info: https://python-notes.curiousefficiency.org/en/latest/python" + "_concepts/import_traps.html#the-double-import-trap" + ) return context_tracer.get() else: return None @@ -219,11 +364,6 @@ def _span(name: str) -> Generator[Optional[Span], Any, Any]: yield None -_C = TypeVar("_C", bound=Type[CharmBase]) -_T = TypeVar("_T", bound=type) -_F = TypeVar("_F", bound=Type[Callable]) - - class TracingError(RuntimeError): """Base class for errors raised by this module.""" @@ -232,60 +372,78 @@ class UntraceableObjectError(TracingError): """Raised when an object you're attempting to instrument cannot be autoinstrumented.""" -def _get_tracing_endpoint(tracing_endpoint_getter, self, charm): - if isinstance(tracing_endpoint_getter, property): - tracing_endpoint = tracing_endpoint_getter.__get__(self) - else: # method or callable - tracing_endpoint = tracing_endpoint_getter(self) +class TLSError(TracingError): + """Raised when the tracing endpoint is https but we don't have a cert yet.""" + + +def _get_tracing_endpoint( + tracing_endpoint_attr: str, + charm_instance: object, + charm_type: type, +): + _tracing_endpoint = getattr(charm_instance, tracing_endpoint_attr) + if callable(_tracing_endpoint): + tracing_endpoint = _tracing_endpoint() + else: + tracing_endpoint = _tracing_endpoint if tracing_endpoint is None: - logger.debug( - f"{charm}.{tracing_endpoint_getter} returned None; quietly disabling " - f"charm_tracing for the run." - ) return + elif not isinstance(tracing_endpoint, str): raise TypeError( - f"{charm}.{tracing_endpoint_getter} should return a tempo endpoint (string); " + f"{charm_type.__name__}.{tracing_endpoint_attr} should resolve to a tempo endpoint (string); " f"got {tracing_endpoint} instead." ) - else: - logger.debug(f"Setting up span exporter to endpoint: {tracing_endpoint}/v1/traces") + + dev_logger.debug(f"Setting up span exporter to endpoint: {tracing_endpoint}/v1/traces") return f"{tracing_endpoint}/v1/traces" -def _get_server_cert(server_cert_getter, self, charm): - if isinstance(server_cert_getter, property): - server_cert = server_cert_getter.__get__(self) - else: # method or callable - server_cert = server_cert_getter(self) +def _get_server_cert( + server_cert_attr: str, + charm_instance: ops.CharmBase, + charm_type: Type[ops.CharmBase], +): + _server_cert = getattr(charm_instance, server_cert_attr) + if callable(_server_cert): + server_cert = _server_cert() + else: + server_cert = _server_cert if server_cert is None: logger.warning( - f"{charm}.{server_cert_getter} returned None; sending traces over INSECURE connection." + f"{charm_type}.{server_cert_attr} is None; sending traces over INSECURE connection." ) return elif not Path(server_cert).is_absolute(): raise ValueError( - f"{charm}.{server_cert_getter} should return a valid tls cert absolute path (string | Path)); " + f"{charm_type}.{server_cert_attr} should resolve to a valid tls cert absolute path (string | Path)); " f"got {server_cert} instead." ) return server_cert def _setup_root_span_initializer( - charm: Type[CharmBase], - tracing_endpoint_getter: _GetterType, - server_cert_getter: Optional[_GetterType], + charm_type: _CharmType, + tracing_endpoint_attr: str, + server_cert_attr: Optional[str], service_name: Optional[str] = None, ): """Patch the charm's initializer.""" - original_init = charm.__init__ + original_init = charm_type.__init__ @functools.wraps(original_init) def wrap_init(self: CharmBase, framework: Framework, *args, **kwargs): + # we're using 'self' here because this is charm init code, makes sense to read what's below + # from the perspective of the charm. Self.unit.name... + original_init(self, framework, *args, **kwargs) + # we call this from inside the init context instead of, say, _autoinstrument, because we want it to + # be checked on a per-charm-instantiation basis, not on a per-type-declaration one. if not is_enabled(): + # this will only happen during unittesting, hopefully, so it's fine to log a + # bit more verbosely logger.info("Tracing DISABLED: skipping root span initialization") return @@ -298,6 +456,9 @@ def wrap_init(self: CharmBase, framework: Framework, *args, **kwargs): _service_name = service_name or f"{self.app.name}-charm" unit_name = self.unit.name + # apply hacky patch to remove stale opentelemetry sdk packages on upgrade-charm. + # it could be trouble if someone ever decides to implement their own tracer parallel to + # ours and before the charm has inited. We assume they won't. resource = Resource.create( attributes={ "service.name": _service_name, @@ -311,25 +472,24 @@ def wrap_init(self: CharmBase, framework: Framework, *args, **kwargs): } ) provider = TracerProvider(resource=resource) - try: - tracing_endpoint = _get_tracing_endpoint(tracing_endpoint_getter, self, charm) - except Exception: - # if anything goes wrong with retrieving the endpoint, we go on with tracing disabled. - # better than breaking the charm. - logger.exception( - f"exception retrieving the tracing " - f"endpoint from {charm}.{tracing_endpoint_getter}; " - f"proceeding with charm_tracing DISABLED. " - ) - return + + # if anything goes wrong with retrieving the endpoint, we let the exception bubble up. + tracing_endpoint = _get_tracing_endpoint(tracing_endpoint_attr, self, charm_type) if not tracing_endpoint: + # tracing is off if tracing_endpoint is None return server_cert: Optional[Union[str, Path]] = ( - _get_server_cert(server_cert_getter, self, charm) if server_cert_getter else None + _get_server_cert(server_cert_attr, self, charm_type) if server_cert_attr else None ) + if tracing_endpoint.startswith("https://") and not server_cert: + raise TLSError( + "Tracing endpoint is https, but no server_cert has been passed." + "Please point @trace_charm to a `server_cert` attr." + ) + exporter = OTLPSpanExporter( endpoint=tracing_endpoint, certificate_file=str(Path(server_cert).absolute()) if server_cert else None, @@ -361,6 +521,7 @@ def wrap_init(self: CharmBase, framework: Framework, *args, **kwargs): @contextmanager def wrap_event_context(event_name: str): + dev_logger.info(f"entering event context: {event_name}") # when the framework enters an event context, we create a span. with _span("event: " + event_name) as event_context_span: if event_context_span: @@ -374,6 +535,7 @@ def wrap_event_context(event_name: str): @functools.wraps(original_close) def wrap_close(): + dev_logger.info("tearing down tracer and flushing traces") span.end() opentelemetry.context.detach(span_token) # type: ignore tracer.reset(_tracer_token) @@ -385,7 +547,7 @@ def wrap_close(): framework.close = wrap_close return - charm.__init__ = wrap_init + charm_type.__init__ = wrap_init # type: ignore def trace_charm( @@ -393,7 +555,7 @@ def trace_charm( server_cert: Optional[str] = None, service_name: Optional[str] = None, extra_types: Sequence[type] = (), -): +) -> Callable[[_T], _T]: """Autoinstrument the decorated charm with tracing telemetry. Use this function to get out-of-the-box traces for all events emitted on this charm and all @@ -401,7 +563,7 @@ def trace_charm( Usage: >>> from charms.tempo_k8s.v1.charm_tracing import trace_charm - >>> from charms.tempo_k8s.v1.tracing import TracingEndpointProvider + >>> from charms.tempo_k8s.v1.tracing import TracingEndpointRequirer >>> from ops import CharmBase >>> >>> @trace_charm( @@ -411,7 +573,7 @@ def trace_charm( >>> >>> def __init__(self, framework: Framework): >>> ... - >>> self.tracing = TracingEndpointProvider(self) + >>> self.tracing = TracingEndpointRequirer(self) >>> >>> @property >>> def tempo_otlp_http_endpoint(self) -> Optional[str]: @@ -420,24 +582,28 @@ def trace_charm( >>> else: >>> return None >>> - :param server_cert: method or property on the charm type that returns an - optional absolute path to a tls certificate to be used when sending traces to a remote server. - If it returns None, an _insecure_ connection will be used. - :param tracing_endpoint: name of a property on the charm type that returns an - optional (fully resolvable) tempo url. If None, tracing will be effectively disabled. Else, traces will be - pushed to that endpoint. + + :param tracing_endpoint: name of a method, property or attribute on the charm type that returns an + optional (fully resolvable) tempo url to which the charm traces will be pushed. + If None, tracing will be effectively disabled. + :param server_cert: name of a method, property or attribute on the charm type that returns an + optional absolute path to a CA certificate file to be used when sending traces to a remote server. + If it returns None, an _insecure_ connection will be used. To avoid errors in transient + situations where the endpoint is already https but there is no certificate on disk yet, it + is recommended to disable tracing (by returning None from the tracing_endpoint) altogether + until the cert has been written to disk. :param service_name: service name tag to attach to all traces generated by this charm. Defaults to the juju application name this charm is deployed under. :param extra_types: pass any number of types that you also wish to autoinstrument. For example, charm libs, relation endpoint wrappers, workload abstractions, ... """ - def _decorator(charm_type: Type[CharmBase]): + def _decorator(charm_type: _T) -> _T: """Autoinstrument the wrapped charmbase type.""" _autoinstrument( charm_type, - tracing_endpoint_getter=getattr(charm_type, tracing_endpoint), - server_cert_getter=getattr(charm_type, server_cert) if server_cert else None, + tracing_endpoint_attr=tracing_endpoint, + server_cert_attr=server_cert, service_name=service_name, extra_types=extra_types, ) @@ -447,12 +613,12 @@ def _decorator(charm_type: Type[CharmBase]): def _autoinstrument( - charm_type: Type[CharmBase], - tracing_endpoint_getter: _GetterType, - server_cert_getter: Optional[_GetterType] = None, + charm_type: _T, + tracing_endpoint_attr: str, + server_cert_attr: Optional[str] = None, service_name: Optional[str] = None, extra_types: Sequence[type] = (), -) -> Type[CharmBase]: +) -> _T: """Set up tracing on this charm class. Use this function to get out-of-the-box traces for all events emitted on this charm and all @@ -464,29 +630,32 @@ def _autoinstrument( >>> from ops.main import main >>> _autoinstrument( >>> MyCharm, - >>> tracing_endpoint_getter=MyCharm.tempo_otlp_http_endpoint, + >>> tracing_endpoint_attr="tempo_otlp_http_endpoint", >>> service_name="MyCharm", >>> extra_types=(Foo, Bar) >>> ) >>> main(MyCharm) :param charm_type: the CharmBase subclass to autoinstrument. - :param server_cert_getter: method or property on the charm type that returns an - optional absolute path to a tls certificate to be used when sending traces to a remote server. - This needs to be a valid path to a certificate. - :param tracing_endpoint_getter: method or property on the charm type that returns an - optional tempo url. If None, tracing will be effectively disabled. Else, traces will be - pushed to that endpoint. + :param tracing_endpoint_attr: name of a method, property or attribute on the charm type that returns an + optional (fully resolvable) tempo url to which the charm traces will be pushed. + If None, tracing will be effectively disabled. + :param server_cert_attr: name of a method, property or attribute on the charm type that returns an + optional absolute path to a CA certificate file to be used when sending traces to a remote server. + If it returns None, an _insecure_ connection will be used. To avoid errors in transient + situations where the endpoint is already https but there is no certificate on disk yet, it + is recommended to disable tracing (by returning None from the tracing_endpoint) altogether + until the cert has been written to disk. :param service_name: service name tag to attach to all traces generated by this charm. Defaults to the juju application name this charm is deployed under. :param extra_types: pass any number of types that you also wish to autoinstrument. For example, charm libs, relation endpoint wrappers, workload abstractions, ... """ - logger.info(f"instrumenting {charm_type}") + dev_logger.info(f"instrumenting {charm_type}") _setup_root_span_initializer( charm_type, - tracing_endpoint_getter, - server_cert_getter=server_cert_getter, + tracing_endpoint_attr, + server_cert_attr=server_cert_attr, service_name=service_name, ) trace_type(charm_type) @@ -503,46 +672,66 @@ def trace_type(cls: _T) -> _T: It assumes that this class is only instantiated after a charm type decorated with `@trace_charm` has been instantiated. """ - logger.info(f"instrumenting {cls}") + dev_logger.info(f"instrumenting {cls}") for name, method in inspect.getmembers(cls, predicate=inspect.isfunction): - logger.info(f"discovered {method}") + dev_logger.info(f"discovered {method}") if method.__name__.startswith("__"): - logger.info(f"skipping {method} (dunder)") + dev_logger.info(f"skipping {method} (dunder)") continue - new_method = trace_method(method) - if isinstance(inspect.getattr_static(cls, method.__name__), staticmethod): + # the span title in the general case should be: + # method call: MyCharmWrappedMethods.b + # if the method has a name (functools.wrapped or regular method), let + # _trace_callable use its default algorithm to determine what name to give the span. + trace_method_name = None + try: + qualname_c0 = method.__qualname__.split(".")[0] + if not hasattr(cls, method.__name__): + # if the callable doesn't have a __name__ (probably a decorated method), + # it probably has a bad qualname too (such as my_decorator..wrapper) which is not + # great for finding out what the trace is about. So we use the method name instead and + # add a reference to the decorator name. Result: + # method call: @my_decorator(MyCharmWrappedMethods.b) + trace_method_name = f"@{qualname_c0}({cls.__name__}.{name})" + except Exception: # noqa: failsafe + pass + + new_method = trace_method(method, name=trace_method_name) + + if isinstance(inspect.getattr_static(cls, name), staticmethod): new_method = staticmethod(new_method) setattr(cls, name, new_method) return cls -def trace_method(method: _F) -> _F: +def trace_method(method: _F, name: Optional[str] = None) -> _F: """Trace this method. A span will be opened when this method is called and closed when it returns. """ - return _trace_callable(method, "method") + return _trace_callable(method, "method", name=name) -def trace_function(function: _F) -> _F: +def trace_function(function: _F, name: Optional[str] = None) -> _F: """Trace this function. A span will be opened when this function is called and closed when it returns. """ - return _trace_callable(function, "function") + return _trace_callable(function, "function", name=name) -def _trace_callable(callable: _F, qualifier: str) -> _F: - logger.info(f"instrumenting {callable}") +def _trace_callable(callable: _F, qualifier: str, name: Optional[str] = None) -> _F: + dev_logger.info(f"instrumenting {callable}") # sig = inspect.signature(callable) @functools.wraps(callable) def wrapped_function(*args, **kwargs): # type: ignore - name = getattr(callable, "__qualname__", getattr(callable, "__name__", str(callable))) - with _span(f"{qualifier} call: {name}"): # type: ignore + name_ = name or getattr( + callable, "__qualname__", getattr(callable, "__name__", str(callable)) + ) + with _span(f"{qualifier} call: {name_}"): # type: ignore return callable(*args, **kwargs) # type: ignore # wrapped_function.__signature__ = sig diff --git a/lib/charms/tempo_k8s/v2/tracing.py b/lib/charms/tempo_k8s/v2/tracing.py index 8b9fb4f3..d3a7c943 100644 --- a/lib/charms/tempo_k8s/v2/tracing.py +++ b/lib/charms/tempo_k8s/v2/tracing.py @@ -97,7 +97,7 @@ def __init__(self, *args): ) from ops.framework import EventSource, Object from ops.model import ModelError, Relation -from pydantic import BaseModel, ConfigDict, Field +from pydantic import BaseModel, Field # The unique Charmhub library identifier, never change it LIBID = "12977e9aa0b34367903d8afeb8c3d85d" @@ -107,7 +107,7 @@ def __init__(self, *args): # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 7 +LIBPATCH = 9 PYDEPS = ["pydantic"] @@ -116,14 +116,13 @@ def __init__(self, *args): DEFAULT_RELATION_NAME = "tracing" RELATION_INTERFACE_NAME = "tracing" +# Supported list rationale https://github.com/canonical/tempo-coordinator-k8s-operator/issues/8 ReceiverProtocol = Literal[ "zipkin", - "kafka", - "opencensus", - "tempo_http", - "tempo_grpc", "otlp_grpc", "otlp_http", + "jaeger_grpc", + "jaeger_thrift_http", ] RawReceiver = Tuple[ReceiverProtocol, str] @@ -141,14 +140,12 @@ class TransportProtocolType(str, enum.Enum): grpc = "grpc" -receiver_protocol_to_transport_protocol = { +receiver_protocol_to_transport_protocol: Dict[ReceiverProtocol, TransportProtocolType] = { "zipkin": TransportProtocolType.http, - "kafka": TransportProtocolType.http, - "opencensus": TransportProtocolType.http, - "tempo_http": TransportProtocolType.http, - "tempo_grpc": TransportProtocolType.grpc, "otlp_grpc": TransportProtocolType.grpc, "otlp_http": TransportProtocolType.http, + "jaeger_thrift_http": TransportProtocolType.http, + "jaeger_grpc": TransportProtocolType.grpc, } """A mapping between telemetry protocols and their corresponding transport protocol. """ @@ -341,7 +338,7 @@ class Config: class ProtocolType(BaseModel): """Protocol Type.""" - model_config = ConfigDict( + model_config = ConfigDict( # type: ignore # Allow serializing enum values. use_enum_values=True ) @@ -928,7 +925,7 @@ def get_endpoint( def charm_tracing_config( endpoint_requirer: TracingEndpointRequirer, cert_path: Optional[Union[Path, str]] ) -> Tuple[Optional[str], Optional[str]]: - """Utility function to determine the charm_tracing config you will likely want. + """Return the charm_tracing config you likely want. If no endpoint is provided: disable charm tracing. diff --git a/lib/charms/tls_certificates_interface/v3/tls_certificates.py b/lib/charms/tls_certificates_interface/v3/tls_certificates.py index 33f34b62..da7fa95e 100644 --- a/lib/charms/tls_certificates_interface/v3/tls_certificates.py +++ b/lib/charms/tls_certificates_interface/v3/tls_certificates.py @@ -277,13 +277,13 @@ def _on_all_certificates_invalidated(self, event: AllCertificatesInvalidatedEven """ # noqa: D405, D410, D411, D214, D416 import copy +import ipaddress import json import logging import uuid from contextlib import suppress from dataclasses import dataclass from datetime import datetime, timedelta, timezone -from ipaddress import IPv4Address from typing import List, Literal, Optional, Union from cryptography import x509 @@ -305,6 +305,7 @@ def _on_all_certificates_invalidated(self, event: AllCertificatesInvalidatedEven ModelError, Relation, RelationDataContent, + Secret, SecretNotFoundError, Unit, ) @@ -317,7 +318,7 @@ def _on_all_certificates_invalidated(self, event: AllCertificatesInvalidatedEven # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 15 +LIBPATCH = 20 PYDEPS = ["cryptography", "jsonschema"] @@ -735,16 +736,16 @@ def calculate_expiry_notification_time( """ if provider_recommended_notification_time is not None: provider_recommended_notification_time = abs(provider_recommended_notification_time) - provider_recommendation_time_delta = ( - expiry_time - timedelta(hours=provider_recommended_notification_time) + provider_recommendation_time_delta = expiry_time - timedelta( + hours=provider_recommended_notification_time ) if validity_start_time < provider_recommendation_time_delta: return provider_recommendation_time_delta if requirer_recommended_notification_time is not None: requirer_recommended_notification_time = abs(requirer_recommended_notification_time) - requirer_recommendation_time_delta = ( - expiry_time - timedelta(hours=requirer_recommended_notification_time) + requirer_recommendation_time_delta = expiry_time - timedelta( + hours=requirer_recommended_notification_time ) if validity_start_time < requirer_recommendation_time_delta: return requirer_recommendation_time_delta @@ -1077,7 +1078,7 @@ def generate_csr( # noqa: C901 if sans_oid: _sans.extend([x509.RegisteredID(x509.ObjectIdentifier(san)) for san in sans_oid]) if sans_ip: - _sans.extend([x509.IPAddress(IPv4Address(san)) for san in sans_ip]) + _sans.extend([x509.IPAddress(ipaddress.ip_address(san)) for san in sans_ip]) if sans: _sans.extend([x509.DNSName(san) for san in sans]) if sans_dns: @@ -1109,25 +1110,16 @@ def csr_matches_certificate(csr: str, cert: str) -> bool: Returns: bool: True/False depending on whether the CSR matches the certificate. """ - try: - csr_object = x509.load_pem_x509_csr(csr.encode("utf-8")) - cert_object = x509.load_pem_x509_certificate(cert.encode("utf-8")) - - if csr_object.public_key().public_bytes( - encoding=serialization.Encoding.PEM, - format=serialization.PublicFormat.SubjectPublicKeyInfo, - ) != cert_object.public_key().public_bytes( - encoding=serialization.Encoding.PEM, - format=serialization.PublicFormat.SubjectPublicKeyInfo, - ): - return False - if ( - csr_object.public_key().public_numbers().n # type: ignore[union-attr] - != cert_object.public_key().public_numbers().n # type: ignore[union-attr] - ): - return False - except ValueError: - logger.warning("Could not load certificate or CSR.") + csr_object = x509.load_pem_x509_csr(csr.encode("utf-8")) + cert_object = x509.load_pem_x509_certificate(cert.encode("utf-8")) + + if csr_object.public_key().public_bytes( + encoding=serialization.Encoding.PEM, + format=serialization.PublicFormat.SubjectPublicKeyInfo, + ) != cert_object.public_key().public_bytes( + encoding=serialization.Encoding.PEM, + format=serialization.PublicFormat.SubjectPublicKeyInfo, + ): return False return True @@ -1457,18 +1449,31 @@ def _revoke_certificates_for_which_no_csr_exists(self, relation_id: int) -> None Returns: None """ - provider_certificates = self.get_provider_certificates(relation_id) - requirer_csrs = self.get_requirer_csrs(relation_id) + provider_certificates = self.get_unsolicited_certificates(relation_id=relation_id) + for provider_certificate in provider_certificates: + self.on.certificate_revocation_request.emit( + certificate=provider_certificate.certificate, + certificate_signing_request=provider_certificate.csr, + ca=provider_certificate.ca, + chain=provider_certificate.chain, + ) + self.remove_certificate(certificate=provider_certificate.certificate) + + def get_unsolicited_certificates( + self, relation_id: Optional[int] = None + ) -> List[ProviderCertificate]: + """Return provider certificates for which no certificate requests exists. + + Those certificates should be revoked. + """ + unsolicited_certificates: List[ProviderCertificate] = [] + provider_certificates = self.get_provider_certificates(relation_id=relation_id) + requirer_csrs = self.get_requirer_csrs(relation_id=relation_id) list_of_csrs = [csr.csr for csr in requirer_csrs] for certificate in provider_certificates: if certificate.csr not in list_of_csrs: - self.on.certificate_revocation_request.emit( - certificate=certificate.certificate, - certificate_signing_request=certificate.csr, - ca=certificate.ca, - chain=certificate.chain, - ) - self.remove_certificate(certificate=certificate.certificate) + unsolicited_certificates.append(certificate) + return unsolicited_certificates def get_outstanding_certificate_requests( self, relation_id: Optional[int] = None @@ -1886,8 +1891,7 @@ def _on_relation_changed(self, event: RelationChangedEvent) -> None: "Removing secret with label %s", f"{LIBID}-{csr_in_sha256_hex}", ) - secret = self.model.get_secret( - label=f"{LIBID}-{csr_in_sha256_hex}") + secret = self.model.get_secret(label=f"{LIBID}-{csr_in_sha256_hex}") secret.remove_all_revisions() self.on.certificate_invalidated.emit( reason="revoked", @@ -1975,9 +1979,10 @@ def _on_secret_expired(self, event: SecretExpiredEvent) -> None: Args: event (SecretExpiredEvent): Juju event """ - if not event.secret.label or not event.secret.label.startswith(f"{LIBID}-"): + csr = self._get_csr_from_secret(event.secret) + if not csr: + logger.error("Failed to get CSR from secret %s", event.secret.label) return - csr = event.secret.get_content()["csr"] provider_certificate = self._find_certificate_in_relation_data(csr) if not provider_certificate: # A secret expired but we did not find matching certificate. Cleaning up @@ -2017,3 +2022,18 @@ def _find_certificate_in_relation_data(self, csr: str) -> Optional[ProviderCerti continue return provider_certificate return None + + def _get_csr_from_secret(self, secret: Secret) -> str: + """Extract the CSR from the secret label or content. + + This function is a workaround to maintain backwards compatibility + and fix the issue reported in + https://github.com/canonical/tls-certificates-interface/issues/228 + """ + if not (csr := secret.get_content().get("csr", "")): + # In versions <14 of the Lib we were storing the CSR in the label of the secret + # The CSR now is stored int the content of the secret, which was a breaking change + # Here we get the CSR if the secret was created by an app using libpatch 14 or lower + if secret.label and secret.label.startswith(f"{LIBID}-"): + csr = secret.label[len(f"{LIBID}-") :] + return csr