From 9d4ffe70b200329f9e2e681c89ca1d3f4f8a04be Mon Sep 17 00:00:00 2001 From: Tanjin Xu <109303790+tanjinx@users.noreply.github.com> Date: Tue, 9 May 2023 11:01:47 -0700 Subject: [PATCH] `slack-vitess-r14.0.5:`cherrypick: backport rdonly patch (#77) * backport pr 29 * fix missing variable * fix missing patch * update help text --- go/flags/endtoend/vtgate.txt | 1 + go/vt/vtgate/tabletgateway.go | 24 ++++++++++++++++++++++-- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/go/flags/endtoend/vtgate.txt b/go/flags/endtoend/vtgate.txt index cbbe3215fa9..580c677fbd8 100644 --- a/go/flags/endtoend/vtgate.txt +++ b/go/flags/endtoend/vtgate.txt @@ -32,6 +32,7 @@ Usage of vtgate: --gate_query_cache_memory int gate server query cache size in bytes, maximum amount of memory to be cached. vtgate analyzes every incoming query and generate a query plan, these plans are being cached in a lru cache. This config controls the capacity of the lru cache. (default 33554432) --gate_query_cache_size int gate server query cache size, maximum number of queries to be cached. vtgate analyzes every incoming query and generate a query plan, these plans are being cached in a cache. This config controls the expected amount of unique entries in the cache. (default 5000) --gateway_initial_tablet_timeout duration At startup, the tabletGateway will wait up to this duration to get at least one tablet per keyspace/shard/tablet type (default 30s) + --gateway_route_replica_to_rdonly route REPLICA queries to RDONLY tablets as well as REPLICA tablets --grpc-use-effective-groups If set, and SSL is not used, will set the immediate caller's security groups from the effective caller id's groups. --grpc_auth_mode string Which auth plugin implementation to use (eg: static) --grpc_auth_mtls_allowed_substrings string List of substrings of at least one of the client certificate names (separated by colon). diff --git a/go/vt/vtgate/tabletgateway.go b/go/vt/vtgate/tabletgateway.go index 011b2e09b7a..306e0f4c510 100644 --- a/go/vt/vtgate/tabletgateway.go +++ b/go/vt/vtgate/tabletgateway.go @@ -51,7 +51,8 @@ var ( bufferImplementation = "keyspace_events" initialTabletTimeout = 30 * time.Second // retryCount is the number of times a query will be retried on error - retryCount = 2 + retryCount = 2 + routeReplicaToRdonly bool ) func init() { @@ -60,6 +61,7 @@ func init() { fs.StringVar(&bufferImplementation, "buffer_implementation", "keyspace_events", "Allowed values: healthcheck (legacy implementation), keyspace_events (default)") fs.DurationVar(&initialTabletTimeout, "gateway_initial_tablet_timeout", 30*time.Second, "At startup, the tabletGateway will wait up to this duration to get at least one tablet per keyspace/shard/tablet type") fs.IntVar(&retryCount, "retry-count", 2, "retry count") + fs.BoolVar(&routeReplicaToRdonly, "gateway_route_replica_to_rdonly", false, "route REPLICA queries to RDONLY tablets as well as REPLICA tablets") }) } @@ -293,6 +295,20 @@ func (gw *TabletGateway) withRetry(ctx context.Context, target *querypb.Target, } tablets := gw.hc.GetHealthyTabletStats(target) + + // temporary hack to enable REPLICA type queries to address both REPLICA tablets and RDONLY tablets + // original commit - https://github.com/tinyspeck/vitess/pull/166/commits/2552b4ce25a9fdb41ff07fa69f2ccf485fea83ac + // discoverygateway patch - https://github.com/slackhq/vitess/commit/47adb7c8fc720cb4cb7a090530b3e88d310ff6d3 + if routeReplicaToRdonly && target.TabletType == topodatapb.TabletType_REPLICA { + // Create a new target for the same original keyspace/shard, but RDONLY tablet type. + rdonlyTarget := &querypb.Target{ + Keyspace: target.Keyspace, + Shard: target.Shard, + TabletType: topodatapb.TabletType_RDONLY, + } + tablets = append(tablets, gw.hc.GetHealthyTabletStats(rdonlyTarget)...) + } + if len(tablets) == 0 { // if we have a keyspace event watcher, check if the reason why our primary is not available is that it's currently being resharded // or if a reparent operation is in progress. @@ -341,7 +357,11 @@ func (gw *TabletGateway) withRetry(ctx context.Context, target *querypb.Target, startTime := time.Now() var canRetry bool - canRetry, err = inner(ctx, target, th.Conn) + if routeReplicaToRdonly && target.TabletType == topodatapb.TabletType_REPLICA { + canRetry, err = inner(ctx, th.Target, th.Conn) + } else { + canRetry, err = inner(ctx, target, th.Conn) + } gw.updateStats(target, startTime, err) if canRetry { invalidTablets[topoproto.TabletAliasString(tabletLastUsed.Alias)] = true