Skip to content

Commit

Permalink
Change Spanner default metrics to go to OTEL
Browse files Browse the repository at this point in the history
Also allows for disabling the native metrics entirely, otherwise you get an error if permissions are not properly configured
  • Loading branch information
josephschorr committed Feb 20, 2025
1 parent b09d565 commit 1d9d0ed
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 13 deletions.
34 changes: 31 additions & 3 deletions internal/datastore/spanner/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,33 @@ import (
log "github.com/authzed/spicedb/internal/logging"
)

// DatastoreMetricsOption is an option for configuring the metrics that are emitted
// by the Spanner datastore.
type DatastoreMetricsOption string

const (
// DatastoreMetricsOptionNone disables all metrics.
DatastoreMetricsOptionNone DatastoreMetricsOption = "none"

// DatastoreMetricsOptionNative enables the native metrics that are emitted
// by the Spanner datastore. These metrics are emitted to GCP and require
// a ServiceAccount with the appropriate permissions to be configured.
// See: https://cloud.google.com/spanner/docs/view-manage-client-side-metrics
DatastoreMetricsOptionNative = "native"

// DatastoreMetricsOptionOpenTelemetry enables the OpenTelemetry metrics that are emitted
// by the Spanner datastore. These metrics are emitted to the configured
// OpenTelemetry collector.
// This option is enabled by default.
DatastoreMetricsOptionOpenTelemetry = "otel"

// DatastoreMetricsOptionLegacyPrometheus enables the legacy Prometheus metrics that are emitted
// by the Spanner datastore. These metrics are emitted to the configured
// Prometheus server.
// This option is deprecated and will be removed in a future release.
DatastoreMetricsOptionLegacyPrometheus = "deprecated-prometheus"
)

type spannerOptions struct {
watchBufferLength uint16
watchBufferWriteTimeout time.Duration
Expand All @@ -29,7 +56,7 @@ type spannerOptions struct {
filterMaximumIDCount uint16
columnOptimizationOption common.ColumnOptimizationOption
expirationDisabled bool
enableDatastoreMetrics bool
datastoreMetricsOption DatastoreMetricsOption
}

type migrationPhase uint8
Expand Down Expand Up @@ -177,9 +204,10 @@ func EmulatorHost(uri string) Option {
}
}

func EnableDatastoreMetrics(enable bool) Option {
// WithDatastoreMetricsOption configures the metrics that are emitted by the Spanner datastore.
func WithDatastoreMetricsOption(opt DatastoreMetricsOption) Option {
return func(po *spannerOptions) {
po.enableDatastoreMetrics = enable
po.datastoreMetricsOption = opt
}
}

Expand Down
14 changes: 11 additions & 3 deletions internal/datastore/spanner/spanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,13 @@ func NewSpannerDatastore(ctx context.Context, database string, opts ...Option) (
log.Info().Str("spanner-emulator-host", os.Getenv("SPANNER_EMULATOR_HOST")).Msg("running against spanner emulator")
}

// TODO(jschorr): Replace with OpenTelemetry instrumentation once available.
if config.enableDatastoreMetrics {
if config.datastoreMetricsOption == DatastoreMetricsOptionOpenTelemetry {
log.Info().Msg("enabling OpenTelemetry metrics for Spanner datastore")
spanner.EnableOpenTelemetryMetrics()
}

if config.datastoreMetricsOption == DatastoreMetricsOptionLegacyPrometheus {
log.Info().Msg("enabling legacy Prometheus metrics for Spanner datastore")
err = spanner.EnableStatViews() // nolint: staticcheck
if err != nil {
return nil, fmt.Errorf("failed to enable spanner session metrics: %w", err)
Expand Down Expand Up @@ -168,7 +173,10 @@ func NewSpannerDatastore(ctx context.Context, database string, opts ...Option) (
client, err := spanner.NewClientWithConfig(
context.Background(),
database,
spanner.ClientConfig{SessionPoolConfig: cfg},
spanner.ClientConfig{
SessionPoolConfig: cfg,
DisableNativeMetrics: config.datastoreMetricsOption != DatastoreMetricsOptionNative,
},
spannerOpts...,
)
if err != nil {
Expand Down
22 changes: 15 additions & 7 deletions pkg/cmd/datastore/datastore.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,11 +147,12 @@ type Config struct {
GCMaxOperationTime time.Duration `debugmap:"visible"`

// Spanner
SpannerCredentialsFile string `debugmap:"visible"`
SpannerCredentialsJSON []byte `debugmap:"sensitive"`
SpannerEmulatorHost string `debugmap:"visible"`
SpannerMinSessions uint64 `debugmap:"visible"`
SpannerMaxSessions uint64 `debugmap:"visible"`
SpannerCredentialsFile string `debugmap:"visible"`
SpannerCredentialsJSON []byte `debugmap:"sensitive"`
SpannerEmulatorHost string `debugmap:"visible"`
SpannerMinSessions uint64 `debugmap:"visible"`
SpannerMaxSessions uint64 `debugmap:"visible"`
SpannerDatastoreMetricsOption string `debugmap:"visible"`

// MySQL
TablePrefix string `debugmap:"visible"`
Expand Down Expand Up @@ -237,7 +238,7 @@ func RegisterDatastoreFlagsWithPrefix(flagSet *pflag.FlagSet, prefix string, opt
flagSet.DurationVar(&opts.RequestHedgingInitialSlowValue, flagName("datastore-request-hedging-initial-slow-value"), defaults.RequestHedgingInitialSlowValue, "initial value to use for slow datastore requests, before statistics have been collected")
flagSet.Uint64Var(&opts.RequestHedgingMaxRequests, flagName("datastore-request-hedging-max-requests"), defaults.RequestHedgingMaxRequests, "maximum number of historical requests to consider")
flagSet.Float64Var(&opts.RequestHedgingQuantile, flagName("datastore-request-hedging-quantile"), defaults.RequestHedgingQuantile, "quantile of historical datastore request time over which a request will be considered slow")
flagSet.BoolVar(&opts.EnableDatastoreMetrics, flagName("datastore-prometheus-metrics"), defaults.EnableDatastoreMetrics, "set to false to disabled prometheus metrics from the datastore")
flagSet.BoolVar(&opts.EnableDatastoreMetrics, flagName("datastore-prometheus-metrics"), defaults.EnableDatastoreMetrics, "set to false to disabled metrics from the datastore (do not use for Spanner; setting to false will disable metrics to the configured metrics store in Spanner)")
// See crdb doc for info about follower reads and how it is configured: https://www.cockroachlabs.com/docs/stable/follower-reads.html
flagSet.DurationVar(&opts.FollowerReadDelay, flagName("datastore-follower-read-delay-duration"), DefaultFollowerReadDelay, "amount of time to subtract from non-sync revision timestamps to ensure they are sufficiently in the past to enable follower reads (cockroach and spanner drivers only) or read replicas (postgres and mysql drivers only)")
flagSet.IntVar(&opts.MaxRetries, flagName("datastore-max-tx-retries"), 10, "number of times a retriable transaction should be retried")
Expand All @@ -249,6 +250,7 @@ func RegisterDatastoreFlagsWithPrefix(flagSet *pflag.FlagSet, prefix string, opt
flagSet.StringVar(&opts.SpannerEmulatorHost, flagName("datastore-spanner-emulator-host"), "", "URI of spanner emulator instance used for development and testing (e.g. localhost:9010)")
flagSet.Uint64Var(&opts.SpannerMinSessions, flagName("datastore-spanner-min-sessions"), 100, "minimum number of sessions across all Spanner gRPC connections the client can have at a given time")
flagSet.Uint64Var(&opts.SpannerMaxSessions, flagName("datastore-spanner-max-sessions"), 400, "maximum number of sessions across all Spanner gRPC connections the client can have at a given time")
flagSet.StringVar(&opts.SpannerDatastoreMetricsOption, flagName("datastore-spanner-metrics"), "otel", `configure the metrics that are emitted by the Spanner datastore ("none", "native", "otel", "deprecated-prometheus")`)
flagSet.StringVar(&opts.TablePrefix, flagName("datastore-mysql-table-prefix"), "", "prefix to add to the name of all SpiceDB database tables")
flagSet.StringVar(&opts.MigrationPhase, flagName("datastore-migration-phase"), "", "datastore-specific flag that should be used to signal to a datastore which phase of a multi-step migration it is in")
flagSet.StringArrayVar(&opts.AllowedMigrations, flagName("datastore-allowed-migrations"), []string{}, "migration levels that will not fail the health check (in addition to the current head migration)")
Expand Down Expand Up @@ -322,6 +324,7 @@ func DefaultDatastoreConfig() *Config {
SpannerMinSessions: 100,
SpannerMaxSessions: 400,
FilterMaximumIDCount: 100,
SpannerDatastoreMetricsOption: spanner.DatastoreMetricsOptionOpenTelemetry,
RelationshipIntegrityEnabled: false,
RelationshipIntegrityCurrentKey: RelIntegrityKey{},
RelationshipIntegrityExpiredKeys: []string{},
Expand Down Expand Up @@ -639,6 +642,11 @@ func newSpannerDatastore(ctx context.Context, opts Config) (datastore.Datastore,
return nil, errors.New("read replicas are not supported for the Spanner datastore engine")
}

metricsOption := spanner.DatastoreMetricsOption(opts.SpannerDatastoreMetricsOption)
if !opts.EnableDatastoreMetrics {
metricsOption = spanner.DatastoreMetricsOptionNone
}

return spanner.NewSpannerDatastore(
ctx,
opts.URI,
Expand All @@ -651,7 +659,7 @@ func newSpannerDatastore(ctx context.Context, opts Config) (datastore.Datastore,
spanner.WatchBufferWriteTimeout(opts.WatchBufferWriteTimeout),
spanner.EmulatorHost(opts.SpannerEmulatorHost),
spanner.DisableStats(opts.DisableStats),
spanner.EnableDatastoreMetrics(opts.EnableDatastoreMetrics),
spanner.WithDatastoreMetricsOption(metricsOption),
spanner.ReadConnsMaxOpen(opts.ReadConnPool.MaxOpenConns),
spanner.WriteConnsMaxOpen(opts.WriteConnPool.MaxOpenConns),
spanner.MinSessionCount(opts.SpannerMinSessions),
Expand Down
9 changes: 9 additions & 0 deletions pkg/cmd/datastore/zz_generated.options.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 1d9d0ed

Please sign in to comment.