Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Workload tracing #645

Merged
merged 10 commits into from
Nov 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,16 @@ requires:
- (client) scraping targets for self-monitoring
- (client) posting alerts to alertmanager server
- (server) serving data to grafana
tracing:
charm-tracing:
interface: tracing
limit: 1
description: |
Enables sending charm traces to a distributed tracing backend such as Tempo.
workload-tracing:
interface: tracing
limit: 1
description: |
Enables sending workload traces (internal Prometheus traces) to a distributed tracing backend such as Tempo.

peers:
prometheus-peers:
Expand Down
51 changes: 43 additions & 8 deletions src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
PrometheusRemoteWriteProvider,
)
from charms.tempo_coordinator_k8s.v0.charm_tracing import trace_charm
from charms.tempo_coordinator_k8s.v0.tracing import TracingEndpointRequirer
from charms.tempo_coordinator_k8s.v0.tracing import TracingEndpointRequirer, charm_tracing_config
from charms.traefik_k8s.v1.ingress_per_unit import (
IngressPerUnitReadyForUnitEvent,
IngressPerUnitRequirer,
Expand Down Expand Up @@ -120,8 +120,8 @@ def to_status(tpl: Tuple[str, str]) -> StatusBase:


@trace_charm(
tracing_endpoint="tracing_endpoint",
server_cert="server_ca_cert_path",
tracing_endpoint="charm_tracing_endpoint",
server_cert="server_cert",
extra_types=[
KubernetesComputeResourcesPatch,
CertHandler,
Expand Down Expand Up @@ -222,7 +222,16 @@ def __init__(self, *args):
)

self.catalogue = CatalogueConsumer(charm=self, item=self._catalogue_item)
self.tracing = TracingEndpointRequirer(self, protocols=["otlp_http"])
self.charm_tracing = TracingEndpointRequirer(
self, relation_name="charm-tracing", protocols=["otlp_http"]
)
self.workload_tracing = TracingEndpointRequirer(
self, relation_name="workload-tracing", protocols=["otlp_grpc"]
)

self.charm_tracing_endpoint, self.server_cert = charm_tracing_config(
self.charm_tracing, self._ca_cert_path
)

self.framework.observe(self.on.prometheus_pebble_ready, self._on_pebble_ready)
self.framework.observe(self.on.config_changed, self._configure)
Expand Down Expand Up @@ -404,6 +413,12 @@ def _prometheus_layer(self) -> Layer:
a Pebble layer specification for the Prometheus workload container.
"""
logger.debug("Building pebble layer")
environment = {}
if self.workload_tracing_endpoint:
# tracing is ready to serve traffic, so we can add the topology.
environment["OTEL_RESOURCE_ATTRIBUTES"] = (
f"juju_application={self._topology.application},juju_model={self._topology.model},juju_model_uuid={self._topology.model_uuid},juju_unit={self._topology.unit},juju_charm={self._topology.charm_name}"
)
layer_config = {
"summary": "Prometheus layer",
"description": "Pebble layer configuration for Prometheus",
Expand All @@ -413,6 +428,7 @@ def _prometheus_layer(self) -> Layer:
"summary": "prometheus daemon",
"command": self._generate_command(),
"startup": "enabled",
"environment": environment,
}
},
}
Expand Down Expand Up @@ -947,6 +963,22 @@ def _alerting_config(self) -> dict:
)
return alerting_config

def _tracing_config(self) -> dict:
config = {
"endpoint": self.workload_tracing.get_endpoint("otlp_grpc"),
"sampling_fraction": 1,
}
if self.server_cert:
config["insecure"] = False
config["tls_config"] = {
"ca_file": self.server_cert,
"cert_file": CERT_PATH,
"key_file": KEY_PATH,
}
else:
config["insecure"] = True
return config

def _generate_prometheus_config(self) -> bool:
"""Construct Prometheus configuration and write to filesystem.

Expand Down Expand Up @@ -974,6 +1006,9 @@ def _generate_prometheus_config(self) -> bool:

web_config = self._web_config()

if self.workload_tracing_endpoint:
prometheus_config["tracing"] = self._tracing_config()

# Check if config changed, using its hash
config_hash = sha256(
yaml.safe_dump(
Expand Down Expand Up @@ -1072,10 +1107,10 @@ def _push(self, path, contents):
self.container.push(path, contents, make_dirs=True, encoding="utf-8")

@property
def tracing_endpoint(self) -> Optional[str]:
"""Tempo endpoint for charm tracing."""
if self.tracing.is_ready():
return self.tracing.get_endpoint("otlp_http")
def workload_tracing_endpoint(self) -> Optional[str]:
"""Tempo endpoint for workload tracing."""
if self.workload_tracing.is_ready():
return self.workload_tracing.get_endpoint("otlp_grpc")
return None

@property
Expand Down
2 changes: 2 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,8 @@ deps =
pytest-operator
prometheus-api-client
tenacity
websockets < 14.0
# https://github.com/juju/python-libjuju/issues/1184
commands =
pytest -vv --tb native --log-cli-level=INFO --color=yes -s {posargs} {toxinidir}/tests/integration
allowlist_externals =
Expand Down
Loading