From 1d9d0ed95baf06f6380f016ba204bb93e9e56c74 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Thu, 20 Feb 2025 15:01:50 -0500 Subject: [PATCH] Change Spanner default metrics to go to OTEL Also allows for disabling the native metrics entirely, otherwise you get an error if permissions are not properly configured --- internal/datastore/spanner/options.go | 34 +++++++++++++++++++++-- internal/datastore/spanner/spanner.go | 14 ++++++++-- pkg/cmd/datastore/datastore.go | 22 ++++++++++----- pkg/cmd/datastore/zz_generated.options.go | 9 ++++++ 4 files changed, 66 insertions(+), 13 deletions(-) diff --git a/internal/datastore/spanner/options.go b/internal/datastore/spanner/options.go index ce0ebffbf3..7d17405cfc 100644 --- a/internal/datastore/spanner/options.go +++ b/internal/datastore/spanner/options.go @@ -10,6 +10,33 @@ import ( log "github.com/authzed/spicedb/internal/logging" ) +// DatastoreMetricsOption is an option for configuring the metrics that are emitted +// by the Spanner datastore. +type DatastoreMetricsOption string + +const ( + // DatastoreMetricsOptionNone disables all metrics. + DatastoreMetricsOptionNone DatastoreMetricsOption = "none" + + // DatastoreMetricsOptionNative enables the native metrics that are emitted + // by the Spanner datastore. These metrics are emitted to GCP and require + // a ServiceAccount with the appropriate permissions to be configured. + // See: https://cloud.google.com/spanner/docs/view-manage-client-side-metrics + DatastoreMetricsOptionNative = "native" + + // DatastoreMetricsOptionOpenTelemetry enables the OpenTelemetry metrics that are emitted + // by the Spanner datastore. These metrics are emitted to the configured + // OpenTelemetry collector. + // This option is enabled by default. + DatastoreMetricsOptionOpenTelemetry = "otel" + + // DatastoreMetricsOptionLegacyPrometheus enables the legacy Prometheus metrics that are emitted + // by the Spanner datastore. These metrics are emitted to the configured + // Prometheus server. + // This option is deprecated and will be removed in a future release. + DatastoreMetricsOptionLegacyPrometheus = "deprecated-prometheus" +) + type spannerOptions struct { watchBufferLength uint16 watchBufferWriteTimeout time.Duration @@ -29,7 +56,7 @@ type spannerOptions struct { filterMaximumIDCount uint16 columnOptimizationOption common.ColumnOptimizationOption expirationDisabled bool - enableDatastoreMetrics bool + datastoreMetricsOption DatastoreMetricsOption } type migrationPhase uint8 @@ -177,9 +204,10 @@ func EmulatorHost(uri string) Option { } } -func EnableDatastoreMetrics(enable bool) Option { +// WithDatastoreMetricsOption configures the metrics that are emitted by the Spanner datastore. +func WithDatastoreMetricsOption(opt DatastoreMetricsOption) Option { return func(po *spannerOptions) { - po.enableDatastoreMetrics = enable + po.datastoreMetricsOption = opt } } diff --git a/internal/datastore/spanner/spanner.go b/internal/datastore/spanner/spanner.go index ed54c657da..ad3369c3f8 100644 --- a/internal/datastore/spanner/spanner.go +++ b/internal/datastore/spanner/spanner.go @@ -121,8 +121,13 @@ func NewSpannerDatastore(ctx context.Context, database string, opts ...Option) ( log.Info().Str("spanner-emulator-host", os.Getenv("SPANNER_EMULATOR_HOST")).Msg("running against spanner emulator") } - // TODO(jschorr): Replace with OpenTelemetry instrumentation once available. - if config.enableDatastoreMetrics { + if config.datastoreMetricsOption == DatastoreMetricsOptionOpenTelemetry { + log.Info().Msg("enabling OpenTelemetry metrics for Spanner datastore") + spanner.EnableOpenTelemetryMetrics() + } + + if config.datastoreMetricsOption == DatastoreMetricsOptionLegacyPrometheus { + log.Info().Msg("enabling legacy Prometheus metrics for Spanner datastore") err = spanner.EnableStatViews() // nolint: staticcheck if err != nil { return nil, fmt.Errorf("failed to enable spanner session metrics: %w", err) @@ -168,7 +173,10 @@ func NewSpannerDatastore(ctx context.Context, database string, opts ...Option) ( client, err := spanner.NewClientWithConfig( context.Background(), database, - spanner.ClientConfig{SessionPoolConfig: cfg}, + spanner.ClientConfig{ + SessionPoolConfig: cfg, + DisableNativeMetrics: config.datastoreMetricsOption != DatastoreMetricsOptionNative, + }, spannerOpts..., ) if err != nil { diff --git a/pkg/cmd/datastore/datastore.go b/pkg/cmd/datastore/datastore.go index deb43e0839..e7357e65d9 100644 --- a/pkg/cmd/datastore/datastore.go +++ b/pkg/cmd/datastore/datastore.go @@ -147,11 +147,12 @@ type Config struct { GCMaxOperationTime time.Duration `debugmap:"visible"` // Spanner - SpannerCredentialsFile string `debugmap:"visible"` - SpannerCredentialsJSON []byte `debugmap:"sensitive"` - SpannerEmulatorHost string `debugmap:"visible"` - SpannerMinSessions uint64 `debugmap:"visible"` - SpannerMaxSessions uint64 `debugmap:"visible"` + SpannerCredentialsFile string `debugmap:"visible"` + SpannerCredentialsJSON []byte `debugmap:"sensitive"` + SpannerEmulatorHost string `debugmap:"visible"` + SpannerMinSessions uint64 `debugmap:"visible"` + SpannerMaxSessions uint64 `debugmap:"visible"` + SpannerDatastoreMetricsOption string `debugmap:"visible"` // MySQL TablePrefix string `debugmap:"visible"` @@ -237,7 +238,7 @@ func RegisterDatastoreFlagsWithPrefix(flagSet *pflag.FlagSet, prefix string, opt flagSet.DurationVar(&opts.RequestHedgingInitialSlowValue, flagName("datastore-request-hedging-initial-slow-value"), defaults.RequestHedgingInitialSlowValue, "initial value to use for slow datastore requests, before statistics have been collected") flagSet.Uint64Var(&opts.RequestHedgingMaxRequests, flagName("datastore-request-hedging-max-requests"), defaults.RequestHedgingMaxRequests, "maximum number of historical requests to consider") flagSet.Float64Var(&opts.RequestHedgingQuantile, flagName("datastore-request-hedging-quantile"), defaults.RequestHedgingQuantile, "quantile of historical datastore request time over which a request will be considered slow") - flagSet.BoolVar(&opts.EnableDatastoreMetrics, flagName("datastore-prometheus-metrics"), defaults.EnableDatastoreMetrics, "set to false to disabled prometheus metrics from the datastore") + flagSet.BoolVar(&opts.EnableDatastoreMetrics, flagName("datastore-prometheus-metrics"), defaults.EnableDatastoreMetrics, "set to false to disabled metrics from the datastore (do not use for Spanner; setting to false will disable metrics to the configured metrics store in Spanner)") // See crdb doc for info about follower reads and how it is configured: https://www.cockroachlabs.com/docs/stable/follower-reads.html flagSet.DurationVar(&opts.FollowerReadDelay, flagName("datastore-follower-read-delay-duration"), DefaultFollowerReadDelay, "amount of time to subtract from non-sync revision timestamps to ensure they are sufficiently in the past to enable follower reads (cockroach and spanner drivers only) or read replicas (postgres and mysql drivers only)") flagSet.IntVar(&opts.MaxRetries, flagName("datastore-max-tx-retries"), 10, "number of times a retriable transaction should be retried") @@ -249,6 +250,7 @@ func RegisterDatastoreFlagsWithPrefix(flagSet *pflag.FlagSet, prefix string, opt flagSet.StringVar(&opts.SpannerEmulatorHost, flagName("datastore-spanner-emulator-host"), "", "URI of spanner emulator instance used for development and testing (e.g. localhost:9010)") flagSet.Uint64Var(&opts.SpannerMinSessions, flagName("datastore-spanner-min-sessions"), 100, "minimum number of sessions across all Spanner gRPC connections the client can have at a given time") flagSet.Uint64Var(&opts.SpannerMaxSessions, flagName("datastore-spanner-max-sessions"), 400, "maximum number of sessions across all Spanner gRPC connections the client can have at a given time") + flagSet.StringVar(&opts.SpannerDatastoreMetricsOption, flagName("datastore-spanner-metrics"), "otel", `configure the metrics that are emitted by the Spanner datastore ("none", "native", "otel", "deprecated-prometheus")`) flagSet.StringVar(&opts.TablePrefix, flagName("datastore-mysql-table-prefix"), "", "prefix to add to the name of all SpiceDB database tables") flagSet.StringVar(&opts.MigrationPhase, flagName("datastore-migration-phase"), "", "datastore-specific flag that should be used to signal to a datastore which phase of a multi-step migration it is in") flagSet.StringArrayVar(&opts.AllowedMigrations, flagName("datastore-allowed-migrations"), []string{}, "migration levels that will not fail the health check (in addition to the current head migration)") @@ -322,6 +324,7 @@ func DefaultDatastoreConfig() *Config { SpannerMinSessions: 100, SpannerMaxSessions: 400, FilterMaximumIDCount: 100, + SpannerDatastoreMetricsOption: spanner.DatastoreMetricsOptionOpenTelemetry, RelationshipIntegrityEnabled: false, RelationshipIntegrityCurrentKey: RelIntegrityKey{}, RelationshipIntegrityExpiredKeys: []string{}, @@ -639,6 +642,11 @@ func newSpannerDatastore(ctx context.Context, opts Config) (datastore.Datastore, return nil, errors.New("read replicas are not supported for the Spanner datastore engine") } + metricsOption := spanner.DatastoreMetricsOption(opts.SpannerDatastoreMetricsOption) + if !opts.EnableDatastoreMetrics { + metricsOption = spanner.DatastoreMetricsOptionNone + } + return spanner.NewSpannerDatastore( ctx, opts.URI, @@ -651,7 +659,7 @@ func newSpannerDatastore(ctx context.Context, opts Config) (datastore.Datastore, spanner.WatchBufferWriteTimeout(opts.WatchBufferWriteTimeout), spanner.EmulatorHost(opts.SpannerEmulatorHost), spanner.DisableStats(opts.DisableStats), - spanner.EnableDatastoreMetrics(opts.EnableDatastoreMetrics), + spanner.WithDatastoreMetricsOption(metricsOption), spanner.ReadConnsMaxOpen(opts.ReadConnPool.MaxOpenConns), spanner.WriteConnsMaxOpen(opts.WriteConnPool.MaxOpenConns), spanner.MinSessionCount(opts.SpannerMinSessions), diff --git a/pkg/cmd/datastore/zz_generated.options.go b/pkg/cmd/datastore/zz_generated.options.go index 4e0c537307..656419a7bc 100644 --- a/pkg/cmd/datastore/zz_generated.options.go +++ b/pkg/cmd/datastore/zz_generated.options.go @@ -69,6 +69,7 @@ func (c *Config) ToOption() ConfigOption { to.SpannerEmulatorHost = c.SpannerEmulatorHost to.SpannerMinSessions = c.SpannerMinSessions to.SpannerMaxSessions = c.SpannerMaxSessions + to.SpannerDatastoreMetricsOption = c.SpannerDatastoreMetricsOption to.TablePrefix = c.TablePrefix to.RelationshipIntegrityEnabled = c.RelationshipIntegrityEnabled to.RelationshipIntegrityCurrentKey = c.RelationshipIntegrityCurrentKey @@ -123,6 +124,7 @@ func (c Config) DebugMap() map[string]any { debugMap["SpannerEmulatorHost"] = helpers.DebugValue(c.SpannerEmulatorHost, false) debugMap["SpannerMinSessions"] = helpers.DebugValue(c.SpannerMinSessions, false) debugMap["SpannerMaxSessions"] = helpers.DebugValue(c.SpannerMaxSessions, false) + debugMap["SpannerDatastoreMetricsOption"] = helpers.DebugValue(c.SpannerDatastoreMetricsOption, false) debugMap["TablePrefix"] = helpers.DebugValue(c.TablePrefix, false) debugMap["RelationshipIntegrityEnabled"] = helpers.DebugValue(c.RelationshipIntegrityEnabled, false) debugMap["RelationshipIntegrityCurrentKey"] = helpers.DebugValue(c.RelationshipIntegrityCurrentKey, false) @@ -447,6 +449,13 @@ func WithSpannerMaxSessions(spannerMaxSessions uint64) ConfigOption { } } +// WithSpannerDatastoreMetricsOption returns an option that can set SpannerDatastoreMetricsOption on a Config +func WithSpannerDatastoreMetricsOption(spannerDatastoreMetricsOption string) ConfigOption { + return func(c *Config) { + c.SpannerDatastoreMetricsOption = spannerDatastoreMetricsOption + } +} + // WithTablePrefix returns an option that can set TablePrefix on a Config func WithTablePrefix(tablePrefix string) ConfigOption { return func(c *Config) {