Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for the BMS monitored resource to the Prometheus receiver #1471

Merged
merged 9 commits into from Dec 5, 2023
Merged
2 changes: 1 addition & 1 deletion cmd/google_cloud_ops_agent_diagnostics/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ func getUserAndMergedConfigs(ctx context.Context, userConfPath string) (*confgen
func main() {
defer func() {
if r := recover(); r != nil {
log.Fatal("Recovered in run", r)
log.Fatalf("Recovering from a panic due to %v", r)
}
}()
if err := run(context.Background()); err != nil {
Expand Down
66 changes: 12 additions & 54 deletions confgenerator/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,14 @@ import (

"github.com/GoogleCloudPlatform/ops-agent/confgenerator/otel"
"github.com/GoogleCloudPlatform/ops-agent/confgenerator/resourcedetector"
"github.com/GoogleCloudPlatform/ops-agent/internal/platform"
"github.com/go-playground/validator/v10"
yaml "github.com/goccy/go-yaml"
commonconfig "github.com/prometheus/common/config"
"github.com/prometheus/common/model"
promconfig "github.com/prometheus/prometheus/config"
"github.com/prometheus/prometheus/discovery"
_ "github.com/prometheus/prometheus/discovery/install" // init() of this package registers service discovery impl.
strutil "github.com/prometheus/prometheus/util/strutil"
)

var (
Expand All @@ -60,13 +60,15 @@ func (r PrometheusMetrics) Type() string {
return "prometheus"
}

func (r PrometheusMetrics) Pipelines(_ context.Context) []otel.ReceiverPipeline {
// Get the resource metadata for the instance we're running on.
if gceMetadata, ok := MetadataResource.(resourcedetector.GCEResource); ok {
// Create a prometheus style mapping for the GCE metadata.
gceMetadataMap := createPrometheusStyleGCEMetadata(gceMetadata)

// Add the GCE metadata to the prometheus config.
func (r PrometheusMetrics) Pipelines(ctx context.Context) []otel.ReceiverPipeline {
resource := MetadataResource
if p := platform.FromContext(ctx).ResourceOverride; p != nil {
resource = p
}
if resource != nil {
// Get the resource metadata for the instance we're running on.
resourceMetadataMap := resource.PrometheusStyleMetadata()
// Add the resource metadata to the prometheus config.
for i := range r.PromConfig.ScrapeConfigs {
// Iterate over the static configs.
for j := range r.PromConfig.ScrapeConfigs[i].ServiceDiscoveryConfigs {
Expand All @@ -76,8 +78,8 @@ func (r PrometheusMetrics) Pipelines(_ context.Context) []otel.ReceiverPipeline
if labels == nil {
labels = model.LabelSet{}
}
for k, v := range gceMetadataMap {
// If there are conflicts, the GCE metadata should take precedence.
for k, v := range resourceMetadataMap {
// If there are conflicts, the resource metadata should take precedence.
labels[model.LabelName(k)] = model.LabelValue(v)
}

Expand Down Expand Up @@ -152,50 +154,6 @@ func deepCopy(config promconfig.Config) (promconfig.Config, error) {
return copyConfig, nil
}

func createPrometheusStyleGCEMetadata(gceMetadata resourcedetector.GCEResource) map[string]string {
metaLabels := map[string]string{
"__meta_gce_instance_id": gceMetadata.InstanceID,
"__meta_gce_instance_name": gceMetadata.InstanceName,
"__meta_gce_project": gceMetadata.Project,
"__meta_gce_zone": gceMetadata.Zone,
"__meta_gce_network": gceMetadata.Network,
// TODO(b/b/246995894): Add support for subnetwork label.
// "__meta_gce_subnetwork": gceMetadata.Subnetwork,
"__meta_gce_public_ip": gceMetadata.PublicIP,
"__meta_gce_private_ip": gceMetadata.PrivateIP,
"__meta_gce_tags": gceMetadata.Tags,
"__meta_gce_machine_type": gceMetadata.MachineType,
}
prefix := "__meta_gce_"
for k, v := range gceMetadata.Metadata {
sanitizedKey := "metadata_" + strutil.SanitizeLabelName(k)
metaLabels[prefix+sanitizedKey] = strings.ReplaceAll(v, "$", "$$")
}

// Labels are not available using the GCE metadata API.
// TODO(b/246995462): Add support for labels.
//
// for k, v := range gceMetadata.Label {
// metaLabels[prefix+"label_"+k] = v
// }

for k, v := range gceMetadata.InterfaceIPv4 {
sanitizedKey := "interface_ipv4_nic" + strutil.SanitizeLabelName(k)
metaLabels[prefix+sanitizedKey] = v
}

// Set the location, namespace and cluster labels.
metaLabels["location"] = gceMetadata.Zone
metaLabels["namespace"] = gceMetadata.InstanceID
metaLabels["cluster"] = "__gce__"

// Set some curated labels.
metaLabels["instance_name"] = gceMetadata.InstanceName
metaLabels["machine_type"] = gceMetadata.MachineType

return metaLabels
}

func validatePrometheusConfig(sl validator.StructLevel) {
promConfig := sl.Current().Interface().(promconfig.Config)

Expand Down
13 changes: 13 additions & 0 deletions confgenerator/resourcedetector/bms_detector.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,19 @@ func (r BMSResource) MonitoredResource() *monitoredres.MonitoredResource {
}
}

func (r BMSResource) PrometheusStyleMetadata() map[string]string {
metaLabels := map[string]string{
"__meta_bms_instance_id": r.InstanceID,
"__meta_bms_project": r.Project,
"__meta_bms_location": r.Location,
}
// Set the location, namespace and cluster labels.
metaLabels["location"] = r.Location
metaLabels["namespace"] = r.InstanceID
metaLabels["cluster"] = "__bms__"
return metaLabels
}

func OnBMS() bool {
return os.Getenv(bmsProjectIDEnv) != "" && os.Getenv(bmsLocationEnv) != "" && os.Getenv(bmsInstanceIDEnv) != ""
}
Expand Down
5 changes: 5 additions & 0 deletions confgenerator/resourcedetector/detector.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ type Resource interface {
MonitoredResource() *monitoredres.MonitoredResource
OTelResourceAttributes() map[string]string
ProjectName() string
PrometheusStyleMetadata() map[string]string
}

// Get a resource instance for the current environment;
Expand Down Expand Up @@ -59,6 +60,10 @@ func (UnrecognizedPlatformResource) MonitoredResource() *monitoredres.MonitoredR
return nil
}

func (UnrecognizedPlatformResource) PrometheusStyleMetadata() map[string]string {
return nil
}

func GetUnrecognizedPlatformResource() (Resource, error) {
return UnrecognizedPlatformResource{}, nil
}
51 changes: 50 additions & 1 deletion confgenerator/resourcedetector/gce_detector.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,12 @@

package resourcedetector

import "google.golang.org/genproto/googleapis/api/monitoredres"
import (
"strings"

"github.com/prometheus/prometheus/util/strutil"
"google.golang.org/genproto/googleapis/api/monitoredres"
)

type gceAttribute int

Expand Down Expand Up @@ -128,6 +133,50 @@ func (r GCEResource) MonitoredResource() *monitoredres.MonitoredResource {
}
}

func (r GCEResource) PrometheusStyleMetadata() map[string]string {
quentinmit marked this conversation as resolved.
Show resolved Hide resolved
metaLabels := map[string]string{
"__meta_gce_instance_id": r.InstanceID,
"__meta_gce_instance_name": r.InstanceName,
"__meta_gce_project": r.Project,
"__meta_gce_zone": r.Zone,
"__meta_gce_network": r.Network,
// TODO(b/b/246995894): Add support for subnetwork label.
igorpeshansky marked this conversation as resolved.
Show resolved Hide resolved
// "__meta_gce_subnetwork": r.Subnetwork,
"__meta_gce_public_ip": r.PublicIP,
"__meta_gce_private_ip": r.PrivateIP,
"__meta_gce_tags": r.Tags,
"__meta_gce_machine_type": r.MachineType,
}
prefix := "__meta_gce_"
for k, v := range r.Metadata {
sanitizedKey := "metadata_" + strutil.SanitizeLabelName(k)
metaLabels[prefix+sanitizedKey] = strings.ReplaceAll(v, "$", "$$")
}

// Labels are not available using the GCE metadata API.
// TODO(b/246995462): Add support for labels.
//
// for k, v := range r.Label {
// metaLabels[prefix+"label_"+k] = v
// }

for k, v := range r.InterfaceIPv4 {
sanitizedKey := "interface_ipv4_nic" + strutil.SanitizeLabelName(k)
metaLabels[prefix+sanitizedKey] = v
}

// Set the location, namespace and cluster labels.
metaLabels["location"] = r.Zone
metaLabels["namespace"] = r.InstanceID
metaLabels["cluster"] = "__gce__"

// Set some curated labels.
metaLabels["instance_name"] = r.InstanceName
metaLabels["machine_type"] = r.MachineType

return metaLabels
}

type GCEResourceBuilderInterface interface {
GetResource() (Resource, error)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@ exporters:
resource_filters: []
service_resource_labels: false
skip_create_descriptor: true
retry_on_failure:
enabled: false
user_agent: Google-Cloud-Ops-Agent-Metrics/latest (BuildDistro=build_distro;Platform=linux;ShortName=linux_platform;ShortVersion=linux_platform_version)
processors:
agentmetrics/hostmetrics_0:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@ exporters:
resource_filters: []
service_resource_labels: false
skip_create_descriptor: true
retry_on_failure:
enabled: false
user_agent: Google-Cloud-Ops-Agent-Metrics/latest (BuildDistro=build_distro;Platform=linux;ShortName=linux_platform;ShortVersion=linux_platform_version)
processors:
agentmetrics/hostmetrics_0:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@ exporters:
resource_filters: []
service_resource_labels: false
skip_create_descriptor: true
retry_on_failure:
enabled: false
user_agent: Google-Cloud-Ops-Agent-Metrics/latest (BuildDistro=build_distro;Platform=linux;ShortName=linux_platform;ShortVersion=linux_platform_version)
processors:
agentmetrics/hostmetrics_0:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@ exporters:
resource_filters: []
service_resource_labels: false
skip_create_descriptor: true
retry_on_failure:
enabled: false
user_agent: Google-Cloud-Ops-Agent-Metrics/latest (BuildDistro=build_distro;Platform=linux;ShortName=linux_platform;ShortVersion=linux_platform_version)
processors:
agentmetrics/hostmetrics_0:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@ exporters:
resource_filters: []
service_resource_labels: false
skip_create_descriptor: true
retry_on_failure:
enabled: false
user_agent: Google-Cloud-Ops-Agent-Metrics/latest (BuildDistro=build_distro;Platform=linux;ShortName=linux_platform;ShortVersion=linux_platform_version)
processors:
agentmetrics/hostmetrics_0:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@ exporters:
resource_filters: []
service_resource_labels: false
skip_create_descriptor: true
retry_on_failure:
enabled: false
user_agent: Google-Cloud-Ops-Agent-Metrics/latest (BuildDistro=build_distro;Platform=linux;ShortName=linux_platform;ShortVersion=linux_platform_version)
googlecloud/otel:
metric:
Expand All @@ -16,10 +14,10 @@ exporters:
resource_filters: []
service_resource_labels: false
skip_create_descriptor: true
retry_on_failure:
enabled: false
user_agent: Google-Cloud-Ops-Agent-Metrics/latest (BuildDistro=build_distro;Platform=linux;ShortName=linux_platform;ShortVersion=linux_platform_version)
googlemanagedprometheus:
metric:
add_metric_suffixes: false
retry_on_failure:
enabled: false
untyped_double_export: true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@ exporters:
resource_filters: []
service_resource_labels: false
skip_create_descriptor: true
retry_on_failure:
enabled: false
user_agent: Google-Cloud-Ops-Agent-Metrics/latest (BuildDistro=build_distro;Platform=linux;ShortName=linux_platform;ShortVersion=linux_platform_version)
googlecloud/otel:
metric:
Expand All @@ -16,8 +14,6 @@ exporters:
resource_filters: []
service_resource_labels: false
skip_create_descriptor: true
retry_on_failure:
enabled: false
user_agent: Google-Cloud-Ops-Agent-Metrics/latest (BuildDistro=build_distro;Platform=linux;ShortName=linux_platform;ShortVersion=linux_platform_version)
processors:
agentmetrics/hostmetrics_0:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@ exporters:
resource_filters: []
service_resource_labels: false
skip_create_descriptor: true
retry_on_failure:
enabled: false
user_agent: Google-Cloud-Ops-Agent-Metrics/latest (BuildDistro=build_distro;Platform=linux;ShortName=linux_platform;ShortVersion=linux_platform_version)
googlecloud/otel:
metric:
Expand All @@ -16,10 +14,10 @@ exporters:
resource_filters: []
service_resource_labels: false
skip_create_descriptor: true
retry_on_failure:
enabled: false
user_agent: Google-Cloud-Ops-Agent-Metrics/latest (BuildDistro=build_distro;Platform=linux;ShortName=linux_platform;ShortVersion=linux_platform_version)
googlemanagedprometheus:
metric:
add_metric_suffixes: false
retry_on_failure:
enabled: false
untyped_double_export: true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@ exporters:
resource_filters: []
service_resource_labels: false
skip_create_descriptor: true
retry_on_failure:
enabled: false
user_agent: Google-Cloud-Ops-Agent-Metrics/latest (BuildDistro=build_distro;Platform=linux;ShortName=linux_platform;ShortVersion=linux_platform_version)
googlecloud/otel:
metric:
Expand All @@ -16,10 +14,10 @@ exporters:
resource_filters: []
service_resource_labels: false
skip_create_descriptor: true
retry_on_failure:
enabled: false
user_agent: Google-Cloud-Ops-Agent-Metrics/latest (BuildDistro=build_distro;Platform=linux;ShortName=linux_platform;ShortVersion=linux_platform_version)
googlemanagedprometheus:
metric:
add_metric_suffixes: false
retry_on_failure:
enabled: false
untyped_double_export: true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@ exporters:
resource_filters: []
service_resource_labels: false
skip_create_descriptor: true
retry_on_failure:
enabled: false
user_agent: Google-Cloud-Ops-Agent-Metrics/latest (BuildDistro=build_distro;Platform=linux;ShortName=linux_platform;ShortVersion=linux_platform_version)
googlecloud/otel:
metric:
Expand All @@ -16,10 +14,10 @@ exporters:
resource_filters: []
service_resource_labels: false
skip_create_descriptor: true
retry_on_failure:
enabled: false
user_agent: Google-Cloud-Ops-Agent-Metrics/latest (BuildDistro=build_distro;Platform=linux;ShortName=linux_platform;ShortVersion=linux_platform_version)
googlemanagedprometheus:
metric:
add_metric_suffixes: false
retry_on_failure:
enabled: false
untyped_double_export: true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@ exporters:
resource_filters: []
service_resource_labels: false
skip_create_descriptor: true
retry_on_failure:
enabled: false
user_agent: Google-Cloud-Ops-Agent-Metrics/latest (BuildDistro=build_distro;Platform=linux;ShortName=linux_platform;ShortVersion=linux_platform_version)
googlemanagedprometheus:
metric:
add_metric_suffixes: false
retry_on_failure:
enabled: false
untyped_double_export: true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@ exporters:
resource_filters: []
service_resource_labels: false
skip_create_descriptor: true
retry_on_failure:
enabled: false
user_agent: Google-Cloud-Ops-Agent-Metrics/latest (BuildDistro=build_distro;Platform=linux;ShortName=linux_platform;ShortVersion=linux_platform_version)
processors:
agentmetrics/hostmetrics_0:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@ exporters:
resource_filters: []
service_resource_labels: false
skip_create_descriptor: true
retry_on_failure:
enabled: false
user_agent: Google-Cloud-Ops-Agent-Metrics/latest (BuildDistro=build_distro;Platform=linux;ShortName=linux_platform;ShortVersion=linux_platform_version)
processors:
agentmetrics/hostmetrics_0:
Expand Down
Loading
Loading