Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Traces host info metric #1689

Merged
merged 6 commits into from
Feb 24, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 43 additions & 4 deletions pkg/export/otel/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ const (
SpanMetricsCalls = "traces_spanmetrics_calls_total"
SpanMetricsSizes = "traces_spanmetrics_size_total"
TracesTargetInfo = "traces_target_info"
TracesHostInfo = "traces_host_info"
ServiceGraphClient = "traces_service_graph_request_client"
ServiceGraphServer = "traces_service_graph_request_server"
ServiceGraphFailed = "traces_service_graph_request_failed_total"
Expand Down Expand Up @@ -248,6 +249,16 @@ func ReportMetrics(
if err != nil {
return nil, fmt.Errorf("instantiating OTEL metrics reporter: %w", err)
}

if mr.cfg.SpanMetricsEnabled() || mr.cfg.ServiceGraphMetricsEnabled() {
hostMetrics := mr.newMetricsInstance(nil)
hostMeter := hostMetrics.provider.Meter(reporterName)
err := mr.setupHostInfoMeter(hostMeter)
if err != nil {
return nil, fmt.Errorf("setting up host metrics: %w", err)
}
}

return mr.reportMetrics, nil
}
}
Expand Down Expand Up @@ -536,6 +547,17 @@ func (mr *MetricsReporter) setupSpanMeters(m *Metrics, meter instrument.Meter) e
return nil
}

func (mr *MetricsReporter) setupHostInfoMeter(meter instrument.Meter) error {
tracesHostInfo, err := meter.Int64Gauge(TracesHostInfo)
if err != nil {
return fmt.Errorf("creating span metric traces host info: %w", err)
}
attrOpt := instrument.WithAttributeSet(mr.metricHostAttributes())
tracesHostInfo.Record(mr.ctx, 1, attrOpt)

return nil
}

func (mr *MetricsReporter) setupGraphMeters(m *Metrics, meter instrument.Meter) error {
if !mr.cfg.ServiceGraphMetricsEnabled() {
return nil
Expand Down Expand Up @@ -583,10 +605,14 @@ func (mr *MetricsReporter) setupGraphMeters(m *Metrics, meter instrument.Meter)
return nil
}

func (mr *MetricsReporter) newMetricSet(service *svc.Attrs) (*Metrics, error) {
mlog := mlog().With("service", service)
func (mr *MetricsReporter) newMetricsInstance(service *svc.Attrs) Metrics {
mlog := mlog()
var resourceAttributes []attribute.KeyValue
if service != nil {
mlog = mlog.With("service", service)
resourceAttributes = append(getAppResourceAttrs(mr.hostID, service), ResourceAttrsFromEnv(service)...)
}
mlog.Debug("creating new Metrics reporter")
resourceAttributes := append(getAppResourceAttrs(mr.hostID, service), ResourceAttrsFromEnv(service)...)
resources := resource.NewWithAttributes(semconv.SchemaURL, resourceAttributes...)

opts := []metric.Option{
Expand All @@ -599,13 +625,18 @@ func (mr *MetricsReporter) newMetricSet(service *svc.Attrs) (*Metrics, error) {
opts = append(opts, mr.spanMetricOptions(mlog)...)
opts = append(opts, mr.graphMetricOptions(mlog)...)

m := Metrics{
return Metrics{
ctx: mr.ctx,
service: service,
provider: metric.NewMeterProvider(
opts...,
),
}
}

func (mr *MetricsReporter) newMetricSet(service *svc.Attrs) (*Metrics, error) {
m := mr.newMetricsInstance(service)

// time units for HTTP and GRPC durations are in seconds, according to the OTEL specification:
// https://github.com/open-telemetry/opentelemetry-specification/tree/main/specification/metrics/semantic_conventions
// TODO: set ExplicitBucketBoundaries here and in prometheus from the previous specification
Expand Down Expand Up @@ -764,6 +795,14 @@ func (mr *MetricsReporter) metricResourceAttributes(service *svc.Attrs) attribut
return attribute.NewSet(attrs...)
}

func (mr *MetricsReporter) metricHostAttributes() attribute.Set {
attrs := []attribute.KeyValue{
semconv.GrafanaHostID(mr.hostID),
}

return attribute.NewSet(attrs...)
}

// spanMetricAttributes follow a given specification, so their attribute getters are predefined and can't be
// selected by the user
func (mr *MetricsReporter) spanMetricAttributes() []attributes.Field[*request.Span, attribute.KeyValue] {
Expand Down
21 changes: 19 additions & 2 deletions pkg/export/prom/prom.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ const (
SpanMetricsCalls = "traces_spanmetrics_calls_total"
SpanMetricsSizes = "traces_spanmetrics_size_total"
TracesTargetInfo = "traces_target_info"
TracesHostInfo = "traces_host_info"
TargetInfo = "target_info"

ServiceGraphClient = "traces_service_graph_request_client_seconds"
Expand All @@ -45,8 +46,9 @@ const (
serviceKey = "service"
serviceNamespaceKey = "service_namespace"

hostIDKey = "host_id"
hostNameKey = "host_name"
hostIDKey = "host_id"
hostNameKey = "host_name"
grafanaHostIDKey = "grafana_host_id"

k8sNamespaceName = "k8s_namespace_name"
k8sPodName = "k8s_pod_name"
Expand Down Expand Up @@ -91,6 +93,7 @@ const (

// not adding version, as it is a fixed value
var beylaInfoLabelNames = []string{LanguageLabel}
var hostInfoLabelNames = []string{grafanaHostIDKey}

// TODO: TLS
type PrometheusConfig struct {
Expand Down Expand Up @@ -188,6 +191,7 @@ type metricsReporter struct {
spanMetricsCallsTotal *Expirer[prometheus.Counter]
spanMetricsSizeTotal *Expirer[prometheus.Counter]
tracesTargetInfo *Expirer[prometheus.Gauge]
tracesHostInfo *Expirer[prometheus.Gauge]

// trace service graph
serviceGraphClient *Expirer[prometheus.Histogram]
Expand Down Expand Up @@ -453,6 +457,12 @@ func newReporter(
Help: "target service information in trace span metric format",
}, labelNamesTargetInfo(kubeEnabled)).MetricVec, clock.Time, cfg.TTL)
}),
tracesHostInfo: optionalGaugeProvider(cfg.SpanMetricsEnabled() || cfg.ServiceGraphMetricsEnabled(), func() *Expirer[prometheus.Gauge] {
return NewExpirer[prometheus.Gauge](prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: TracesHostInfo,
Help: "A metric with a constant '1' value labeled by the host id ",
}, hostInfoLabelNames).MetricVec, clock.Time, cfg.TTL)
}),
serviceGraphClient: optionalHistogramProvider(cfg.ServiceGraphMetricsEnabled(), func() *Expirer[prometheus.Histogram] {
return NewExpirer[prometheus.Histogram](prometheus.NewHistogramVec(prometheus.HistogramOpts{
Name: ServiceGraphClient,
Expand Down Expand Up @@ -565,6 +575,10 @@ func newReporter(
)
}

if cfg.SpanMetricsEnabled() || cfg.ServiceGraphMetricsEnabled() {
registeredMetrics = append(registeredMetrics, mr.tracesHostInfo)
}

if is.GPUEnabled() {
registeredMetrics = append(registeredMetrics,
mr.gpuKernelCallsTotal,
Expand Down Expand Up @@ -636,6 +650,9 @@ func (r *metricsReporter) observe(span *request.Span) {
}
t := span.Timings()
r.beylaInfo.WithLabelValues(span.Service.SDKLanguage.String()).metric.Set(1.0)
if r.cfg.SpanMetricsEnabled() || r.cfg.ServiceGraphMetricsEnabled() {
r.tracesHostInfo.WithLabelValues(r.hostID).metric.Set(1.0)
}
duration := t.End.Sub(t.RequestStart).Seconds()

targetInfoLabelValues := r.labelValuesTargetInfo(span.Service)
Expand Down
1 change: 1 addition & 0 deletions test/integration/docker-compose-client.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ services:
BEYLA_INTERNAL_METRICS_PROMETHEUS_PORT: 8999
BEYLA_PROCESSES_INTERVAL: "100ms"
BEYLA_HOSTNAME: "beyla"
BEYLA_PROMETHEUS_FEATURES: "application,application_span,application_process,application_service_graph"
BEYLA_OTEL_METRIC_FEATURES: "application,application_process"
ports:
- "8999:8999" # Prometheus scrape port, if enabled via config
Expand Down
11 changes: 11 additions & 0 deletions test/integration/red_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -754,6 +754,17 @@ func testPrometheusBeylaBuildInfo(t *testing.T) {
})
}

func testHostInfo(t *testing.T) {
pq := prom.Client{HostPort: prometheusHostPort}
var results []prom.Result
test.Eventually(t, testTimeout, func(t require.TestingT) {
var err error
results, err = pq.Query(`traces_host_info{}`)
require.NoError(t, err)
require.NotEmpty(t, results)
})
}

func testPrometheusBPFMetrics(t *testing.T) {
t.Skip("BPF metrics are not available in the test environment")
pq := prom.Client{HostPort: prometheusHostPort}
Expand Down
2 changes: 2 additions & 0 deletions test/integration/suites_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ func TestSuite(t *testing.T) {
t.Run("GRPC TLS RED metrics", testREDMetricsGRPCTLS)
t.Run("Internal Prometheus metrics", testInternalPrometheusExport)
t.Run("Exemplars exist", testExemplarsExist)
t.Run("Testing Host Info metric", testHostInfo)

require.NoError(t, compose.Close())
}
Expand Down Expand Up @@ -80,6 +81,7 @@ func TestSuiteClientPromScrape(t *testing.T) {
require.NoError(t, compose.Up())
t.Run("Client RED metrics", testREDMetricsForClientHTTPLibraryNoTraces)
t.Run("Testing Beyla Build Info metric", testPrometheusBeylaBuildInfo)
t.Run("Testing Host Info metric", testHostInfo)
t.Run("Testing process-level metrics", testProcesses(map[string]string{
"process_executable_name": "pingclient",
"process_executable_path": "/pingclient",
Expand Down
9 changes: 9 additions & 0 deletions vendor/go.opentelemetry.io/otel/semconv/v1.19.0/resource.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading