Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Traces host info metric #1689

Merged
merged 6 commits into from
Feb 24, 2025
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 44 additions & 4 deletions pkg/export/otel/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
SpanMetricsCalls = "traces_spanmetrics_calls_total"
SpanMetricsSizes = "traces_spanmetrics_size_total"
TracesTargetInfo = "traces_target_info"
TracesHostInfo = "traces_host_info"
ServiceGraphClient = "traces_service_graph_request_client"
ServiceGraphServer = "traces_service_graph_request_server"
ServiceGraphFailed = "traces_service_graph_request_failed_total"
Expand Down Expand Up @@ -248,6 +249,13 @@
if err != nil {
return nil, fmt.Errorf("instantiating OTEL metrics reporter: %w", err)
}

if mr.cfg.SpanMetricsEnabled() || mr.cfg.ServiceGraphMetricsEnabled() {
hostMetrics := mr.newMetricsInstance(nil)
hostMeter := hostMetrics.provider.Meter(reporterName)
mr.setupHostInfoMeter(hostMeter)

Check failure on line 256 in pkg/export/otel/metrics.go

View workflow job for this annotation

GitHub Actions / test (1.23)

Error return value of `mr.setupHostInfoMeter` is not checked (errcheck)
}

return mr.reportMetrics, nil
}
}
Expand Down Expand Up @@ -536,6 +544,21 @@
return nil
}

func (mr *MetricsReporter) setupHostInfoMeter(meter instrument.Meter) error {
if !mr.cfg.SpanMetricsEnabled() || !mr.cfg.ServiceGraphMetricsEnabled() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This check is duplicate before invoking setupHostInfoMeter, but also is inconsistent. Shouldn't be:

if !mr.cfg.SpanMetricsEnabled() && !mr.cfg.ServiceGraphMetricsEnabled() {

?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, you are right, I'll remove it.

return nil
}

tracesHostInfo, err := meter.Int64Gauge(TracesHostInfo)
if err != nil {
return fmt.Errorf("creating span metric traces host info: %w", err)
}
attrOpt := instrument.WithAttributeSet(mr.metricHostAttributes())
tracesHostInfo.Record(mr.ctx, 1, attrOpt)

return nil
}

func (mr *MetricsReporter) setupGraphMeters(m *Metrics, meter instrument.Meter) error {
if !mr.cfg.ServiceGraphMetricsEnabled() {
return nil
Expand Down Expand Up @@ -583,10 +606,14 @@
return nil
}

func (mr *MetricsReporter) newMetricSet(service *svc.Attrs) (*Metrics, error) {
mlog := mlog().With("service", service)
func (mr *MetricsReporter) newMetricsInstance(service *svc.Attrs) Metrics {
mlog := mlog()
var resourceAttributes []attribute.KeyValue
if service != nil {
mlog = mlog.With("service", service)
resourceAttributes = append(getAppResourceAttrs(mr.hostID, service), ResourceAttrsFromEnv(service)...)
}
mlog.Debug("creating new Metrics reporter")
resourceAttributes := append(getAppResourceAttrs(mr.hostID, service), ResourceAttrsFromEnv(service)...)
resources := resource.NewWithAttributes(semconv.SchemaURL, resourceAttributes...)

opts := []metric.Option{
Expand All @@ -599,13 +626,18 @@
opts = append(opts, mr.spanMetricOptions(mlog)...)
opts = append(opts, mr.graphMetricOptions(mlog)...)

m := Metrics{
return Metrics{
ctx: mr.ctx,
service: service,
provider: metric.NewMeterProvider(
opts...,
),
}
}

func (mr *MetricsReporter) newMetricSet(service *svc.Attrs) (*Metrics, error) {
m := mr.newMetricsInstance(service)

// time units for HTTP and GRPC durations are in seconds, according to the OTEL specification:
// https://github.com/open-telemetry/opentelemetry-specification/tree/main/specification/metrics/semantic_conventions
// TODO: set ExplicitBucketBoundaries here and in prometheus from the previous specification
Expand Down Expand Up @@ -764,6 +796,14 @@
return attribute.NewSet(attrs...)
}

func (mr *MetricsReporter) metricHostAttributes() attribute.Set {
attrs := []attribute.KeyValue{
semconv.GrafanaHostID(mr.hostID),
}

return attribute.NewSet(attrs...)
}

// spanMetricAttributes follow a given specification, so their attribute getters are predefined and can't be
// selected by the user
func (mr *MetricsReporter) spanMetricAttributes() []attributes.Field[*request.Span, attribute.KeyValue] {
Expand Down
19 changes: 17 additions & 2 deletions pkg/export/prom/prom.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ const (
SpanMetricsCalls = "traces_spanmetrics_calls_total"
SpanMetricsSizes = "traces_spanmetrics_size_total"
TracesTargetInfo = "traces_target_info"
TracesHostInfo = "traces_host_info"
TargetInfo = "target_info"

ServiceGraphClient = "traces_service_graph_request_client_seconds"
Expand All @@ -45,8 +46,9 @@ const (
serviceKey = "service"
serviceNamespaceKey = "service_namespace"

hostIDKey = "host_id"
hostNameKey = "host_name"
hostIDKey = "host_id"
hostNameKey = "host_name"
grafanaHostIDKey = "grafana_host_id"

k8sNamespaceName = "k8s_namespace_name"
k8sPodName = "k8s_pod_name"
Expand Down Expand Up @@ -91,6 +93,7 @@ const (

// not adding version, as it is a fixed value
var beylaInfoLabelNames = []string{LanguageLabel}
var hostInfoLabelNames = []string{grafanaHostIDKey}

// TODO: TLS
type PrometheusConfig struct {
Expand Down Expand Up @@ -188,6 +191,7 @@ type metricsReporter struct {
spanMetricsCallsTotal *Expirer[prometheus.Counter]
spanMetricsSizeTotal *Expirer[prometheus.Counter]
tracesTargetInfo *Expirer[prometheus.Gauge]
tracesHostInfo *Expirer[prometheus.Gauge]

// trace service graph
serviceGraphClient *Expirer[prometheus.Histogram]
Expand Down Expand Up @@ -453,6 +457,12 @@ func newReporter(
Help: "target service information in trace span metric format",
}, labelNamesTargetInfo(kubeEnabled)).MetricVec, clock.Time, cfg.TTL)
}),
tracesHostInfo: optionalGaugeProvider(cfg.SpanMetricsEnabled() || cfg.ServiceGraphMetricsEnabled(), func() *Expirer[prometheus.Gauge] {
return NewExpirer[prometheus.Gauge](prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: TracesHostInfo,
Help: "A metric with a constant '1' value labeled by the host id ",
}, hostInfoLabelNames).MetricVec, clock.Time, cfg.TTL)
}),
serviceGraphClient: optionalHistogramProvider(cfg.ServiceGraphMetricsEnabled(), func() *Expirer[prometheus.Histogram] {
return NewExpirer[prometheus.Histogram](prometheus.NewHistogramVec(prometheus.HistogramOpts{
Name: ServiceGraphClient,
Expand Down Expand Up @@ -565,6 +575,10 @@ func newReporter(
)
}

if cfg.SpanMetricsEnabled() || cfg.ServiceGraphMetricsEnabled() {
registeredMetrics = append(registeredMetrics, mr.tracesHostInfo)
}

if is.GPUEnabled() {
registeredMetrics = append(registeredMetrics,
mr.gpuKernelCallsTotal,
Expand Down Expand Up @@ -636,6 +650,7 @@ func (r *metricsReporter) observe(span *request.Span) {
}
t := span.Timings()
r.beylaInfo.WithLabelValues(span.Service.SDKLanguage.String()).metric.Set(1.0)
r.tracesHostInfo.WithLabelValues(r.hostID).metric.Set(1.0)
duration := t.End.Sub(t.RequestStart).Seconds()

targetInfoLabelValues := r.labelValuesTargetInfo(span.Service)
Expand Down
1 change: 1 addition & 0 deletions test/integration/docker-compose-client.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ services:
BEYLA_INTERNAL_METRICS_PROMETHEUS_PORT: 8999
BEYLA_PROCESSES_INTERVAL: "100ms"
BEYLA_HOSTNAME: "beyla"
BEYLA_PROMETHEUS_FEATURES: "application,application_span,application_process,application_service_graph"
BEYLA_OTEL_METRIC_FEATURES: "application,application_process"
ports:
- "8999:8999" # Prometheus scrape port, if enabled via config
Expand Down
11 changes: 11 additions & 0 deletions test/integration/red_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -754,6 +754,17 @@ func testPrometheusBeylaBuildInfo(t *testing.T) {
})
}

func testHostInfo(t *testing.T) {
pq := prom.Client{HostPort: prometheusHostPort}
var results []prom.Result
test.Eventually(t, testTimeout, func(t require.TestingT) {
var err error
results, err = pq.Query(`traces_host_info{}`)
require.NoError(t, err)
require.NotEmpty(t, results)
})
}

func testPrometheusBPFMetrics(t *testing.T) {
t.Skip("BPF metrics are not available in the test environment")
pq := prom.Client{HostPort: prometheusHostPort}
Expand Down
2 changes: 2 additions & 0 deletions test/integration/suites_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ func TestSuite(t *testing.T) {
t.Run("GRPC TLS RED metrics", testREDMetricsGRPCTLS)
t.Run("Internal Prometheus metrics", testInternalPrometheusExport)
t.Run("Exemplars exist", testExemplarsExist)
t.Run("Testing Host Info metric", testHostInfo)

require.NoError(t, compose.Close())
}
Expand Down Expand Up @@ -80,6 +81,7 @@ func TestSuiteClientPromScrape(t *testing.T) {
require.NoError(t, compose.Up())
t.Run("Client RED metrics", testREDMetricsForClientHTTPLibraryNoTraces)
t.Run("Testing Beyla Build Info metric", testPrometheusBeylaBuildInfo)
t.Run("Testing Host Info metric", testHostInfo)
t.Run("Testing process-level metrics", testProcesses(map[string]string{
"process_executable_name": "pingclient",
"process_executable_path": "/pingclient",
Expand Down
9 changes: 9 additions & 0 deletions vendor/go.opentelemetry.io/otel/semconv/v1.19.0/resource.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading