From f02cb5b14179787b4cd0dbd8e35d086931ca6a7d Mon Sep 17 00:00:00 2001 From: Shawn Poulson Date: Mon, 26 Feb 2024 14:49:29 -0500 Subject: [PATCH 01/23] Fix global behavior `ResetTime` bug. Every call to `GetRateLimits` would reset the `ResetTime` and not the `Remaining` counter. This would cause counters to eventually deplete and never fully reset. --- algorithms.go | 30 ++++++++------ functional_test.go | 35 ++++++++++------- global.go | 11 ++++-- gubernator.go | 8 +++- peer_client.go | 8 ++-- peers.pb.go | 72 +++++++++++++++++++++++----------- peers.proto | 36 ++++++++++------- python/gubernator/peers_pb2.py | 12 +++--- workers.go | 18 +++++---- 9 files changed, 143 insertions(+), 87 deletions(-) diff --git a/algorithms.go b/algorithms.go index f2ed4a82..a9937c59 100644 --- a/algorithms.go +++ b/algorithms.go @@ -18,6 +18,7 @@ package gubernator import ( "context" + "time" "github.com/mailgun/holster/v4/clock" "github.com/prometheus/client_golang/prometheus" @@ -34,8 +35,7 @@ import ( // with 100 emails and the request will succeed. You can override this default behavior with `DRAIN_OVER_LIMIT` // Implements token bucket algorithm for rate limiting. https://en.wikipedia.org/wiki/Token_bucket -func tokenBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp *RateLimitResp, err error) { - +func tokenBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq, requestTime time.Time) (resp *RateLimitResp, err error) { tokenBucketTimer := prometheus.NewTimer(metricFuncTimeDuration.WithLabelValues("tokenBucket")) defer tokenBucketTimer.ObserveDuration() @@ -100,7 +100,7 @@ func tokenBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp * s.Remove(ctx, hashKey) } - return tokenBucketNewItem(ctx, s, c, r) + return tokenBucketNewItem(ctx, s, c, r, requestTime) } // Update the limit if it changed. @@ -133,7 +133,7 @@ func tokenBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp * } // If our new duration means we are currently expired. - now := MillisecondNow() + now := EpochMillis(requestTime) if expire <= now { // Renew item. span.AddEvent("Limit has expired") @@ -196,12 +196,12 @@ func tokenBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp * } // Item is not found in cache or store, create new. - return tokenBucketNewItem(ctx, s, c, r) + return tokenBucketNewItem(ctx, s, c, r, requestTime) } // Called by tokenBucket() when adding a new item in the store. -func tokenBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp *RateLimitResp, err error) { - now := MillisecondNow() +func tokenBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq, requestTime time.Time) (resp *RateLimitResp, err error) { + now := EpochMillis(requestTime) expire := now + r.Duration t := &TokenBucketItem{ @@ -252,7 +252,7 @@ func tokenBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq) } // Implements leaky bucket algorithm for rate limiting https://en.wikipedia.org/wiki/Leaky_bucket -func leakyBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp *RateLimitResp, err error) { +func leakyBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq, requestTime time.Time) (resp *RateLimitResp, err error) { leakyBucketTimer := prometheus.NewTimer(metricFuncTimeDuration.WithLabelValues("V1Instance.getRateLimit_leakyBucket")) defer leakyBucketTimer.ObserveDuration() @@ -260,7 +260,7 @@ func leakyBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp * r.Burst = r.Limit } - now := MillisecondNow() + now := EpochMillis(requestTime) // Get rate limit from cache. hashKey := r.HashKey() @@ -309,7 +309,7 @@ func leakyBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp * s.Remove(ctx, hashKey) } - return leakyBucketNewItem(ctx, s, c, r) + return leakyBucketNewItem(ctx, s, c, r, requestTime) } if HasBehavior(r.Behavior, Behavior_RESET_REMAINING) { @@ -421,12 +421,12 @@ func leakyBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp * return rl, nil } - return leakyBucketNewItem(ctx, s, c, r) + return leakyBucketNewItem(ctx, s, c, r, requestTime) } // Called by leakyBucket() when adding a new item in the store. -func leakyBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp *RateLimitResp, err error) { - now := MillisecondNow() +func leakyBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq, requestTime time.Time) (resp *RateLimitResp, err error) { + now := EpochMillis(requestTime) duration := r.Duration rate := float64(duration) / float64(r.Limit) if HasBehavior(r.Behavior, Behavior_DURATION_IS_GREGORIAN) { @@ -480,3 +480,7 @@ func leakyBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq) return &rl, nil } + +func EpochMillis(t time.Time) int64 { + return t.UnixNano() / 1_000_000 +} diff --git a/functional_test.go b/functional_test.go index 2d365b13..e3b906d6 100644 --- a/functional_test.go +++ b/functional_test.go @@ -971,7 +971,7 @@ func TestGlobalRateLimits(t *testing.T) { peers, err := cluster.ListNonOwningDaemons(name, key) require.NoError(t, err) - sendHit := func(client guber.V1Client, status guber.Status, hits int64, remain int64) { + sendHit := func(client guber.V1Client, status guber.Status, hits, expectRemaining, expectResetTime int64) int64 { ctx, cancel := context.WithTimeout(context.Background(), clock.Second*10) defer cancel() resp, err := client.GetRateLimits(ctx, &guber.GetRateLimitsReq{ @@ -988,16 +988,21 @@ func TestGlobalRateLimits(t *testing.T) { }, }) require.NoError(t, err) - assert.Equal(t, "", resp.Responses[0].Error) - assert.Equal(t, remain, resp.Responses[0].Remaining) - assert.Equal(t, status, resp.Responses[0].Status) - assert.Equal(t, int64(5), resp.Responses[0].Limit) + item := resp.Responses[0] + assert.Equal(t, "", item.Error) + assert.Equal(t, expectRemaining, item.Remaining) + assert.Equal(t, status, item.Status) + assert.Equal(t, int64(5), item.Limit) + if expectResetTime != 0 { + assert.Equal(t, expectResetTime, item.ResetTime) + } + return item.ResetTime } // Our first hit should create the request on the peer and queue for async forward - sendHit(peers[0].MustClient(), guber.Status_UNDER_LIMIT, 1, 4) + _ = sendHit(peers[0].MustClient(), guber.Status_UNDER_LIMIT, 1, 4, 0) // Our second should be processed as if we own it since the async forward hasn't occurred yet - sendHit(peers[0].MustClient(), guber.Status_UNDER_LIMIT, 2, 2) + _ = sendHit(peers[0].MustClient(), guber.Status_UNDER_LIMIT, 2, 2, 0) testutil.UntilPass(t, 20, clock.Millisecond*200, func(t testutil.TestingT) { // Inspect peers metrics, ensure the peer sent the global rate limit to the owner @@ -1009,19 +1014,21 @@ func TestGlobalRateLimits(t *testing.T) { owner, err := cluster.FindOwningDaemon(name, key) require.NoError(t, err) + // Get the ResetTime from owner. + expectResetTime := sendHit(owner.MustClient(), guber.Status_UNDER_LIMIT, 0, 2, 0) require.NoError(t, waitForBroadcast(clock.Second*3, owner, 1)) // Check different peers, they should have gotten the broadcast from the owner - sendHit(peers[1].MustClient(), guber.Status_UNDER_LIMIT, 0, 2) - sendHit(peers[2].MustClient(), guber.Status_UNDER_LIMIT, 0, 2) + sendHit(peers[1].MustClient(), guber.Status_UNDER_LIMIT, 0, 2, expectResetTime) + sendHit(peers[2].MustClient(), guber.Status_UNDER_LIMIT, 0, 2, expectResetTime) // Non owning peer should calculate the rate limit remaining before forwarding // to the owner. - sendHit(peers[3].MustClient(), guber.Status_UNDER_LIMIT, 2, 0) + sendHit(peers[3].MustClient(), guber.Status_UNDER_LIMIT, 2, 0, expectResetTime) require.NoError(t, waitForBroadcast(clock.Second*3, owner, 2)) - sendHit(peers[4].MustClient(), guber.Status_OVER_LIMIT, 1, 0) + sendHit(peers[4].MustClient(), guber.Status_OVER_LIMIT, 1, 0, expectResetTime) } // Ensure global broadcast updates all peers when GetRateLimits is called on @@ -1034,6 +1041,8 @@ func TestGlobalRateLimitsWithLoadBalancing(t *testing.T) { // Determine owner and non-owner peers. ownerPeerInfo, err := cluster.FindOwningPeer(name, key) require.NoError(t, err) + ownerDaemon, err := cluster.FindOwningDaemon(name, key) + require.NoError(t, err) owner := ownerPeerInfo.GRPCAddress nonOwner := cluster.PeerAt(0).GRPCAddress if nonOwner == owner { @@ -1078,9 +1087,7 @@ func TestGlobalRateLimitsWithLoadBalancing(t *testing.T) { // deplete the limit consistently. sendHit(guber.Status_UNDER_LIMIT, 1) sendHit(guber.Status_UNDER_LIMIT, 2) - - // Sleep to ensure the global broadcast occurs (every 100ms). - time.Sleep(150 * time.Millisecond) + require.NoError(t, waitForBroadcast(clock.Second*3, ownerDaemon, 1)) // All successive hits should return OVER_LIMIT. for i := 2; i <= 10; i++ { diff --git a/global.go b/global.go index b1f652ae..6f6e924b 100644 --- a/global.go +++ b/global.go @@ -18,6 +18,7 @@ package gubernator import ( "context" + "time" "github.com/mailgun/holster/v4/syncutil" "github.com/prometheus/client_golang/prometheus" @@ -73,11 +74,13 @@ func (gm *globalManager) QueueHit(r *RateLimitReq) { gm.hitsQueue <- r } -func (gm *globalManager) QueueUpdate(req *RateLimitReq, resp *RateLimitResp) { +func (gm *globalManager) QueueUpdate(req *RateLimitReq, resp *RateLimitResp, requestTime time.Time) { gm.broadcastQueue <- &UpdatePeerGlobal{ - Key: req.HashKey(), - Algorithm: req.Algorithm, - Status: resp, + Key: req.HashKey(), + Algorithm: req.Algorithm, + Duration: req.Duration, + Status: resp, + RequestTime: EpochMillis(requestTime), } } diff --git a/gubernator.go b/gubernator.go index f33fa48c..bde63652 100644 --- a/gubernator.go +++ b/gubernator.go @@ -23,6 +23,7 @@ import ( "sync" "github.com/mailgun/errors" + "github.com/mailgun/holster/v4/clock" "github.com/mailgun/holster/v4/syncutil" "github.com/mailgun/holster/v4/tracing" "github.com/prometheus/client_golang/prometheus" @@ -423,6 +424,7 @@ func (s *V1Instance) UpdatePeerGlobals(ctx context.Context, r *UpdatePeerGlobals item.Value = &LeakyBucketItem{ Remaining: float64(g.Status.Remaining), Limit: g.Status.Limit, + Duration: g.Duration, Burst: g.Status.Limit, UpdatedAt: now, } @@ -430,6 +432,7 @@ func (s *V1Instance) UpdatePeerGlobals(ctx context.Context, r *UpdatePeerGlobals item.Value = &TokenBucketItem{ Status: g.Status.Status, Limit: g.Status.Limit, + Duration: g.Duration, Remaining: g.Status.Remaining, CreatedAt: now, } @@ -572,7 +575,8 @@ func (s *V1Instance) getLocalRateLimit(ctx context.Context, r *RateLimitReq) (_ defer func() { tracing.EndScope(ctx, err) }() defer prometheus.NewTimer(metricFuncTimeDuration.WithLabelValues("V1Instance.getLocalRateLimit")).ObserveDuration() - resp, err := s.workerPool.GetRateLimit(ctx, r) + requestTime := clock.Now() + resp, err := s.workerPool.GetRateLimit(ctx, r, requestTime) if err != nil { return nil, errors.Wrap(err, "during workerPool.GetRateLimit") } @@ -580,7 +584,7 @@ func (s *V1Instance) getLocalRateLimit(ctx context.Context, r *RateLimitReq) (_ metricGetRateLimitCounter.WithLabelValues("local").Inc() // If global behavior, then broadcast update to all peers. if HasBehavior(r.Behavior, Behavior_GLOBAL) { - s.global.QueueUpdate(r, resp) + s.global.QueueUpdate(r, resp, requestTime) } return resp, nil diff --git a/peer_client.go b/peer_client.go index a39d9f02..98d08f41 100644 --- a/peer_client.go +++ b/peer_client.go @@ -21,6 +21,7 @@ import ( "crypto/tls" "fmt" "sync" + "time" "github.com/mailgun/holster/v4/clock" "github.com/mailgun/holster/v4/collections" @@ -71,9 +72,10 @@ type response struct { } type request struct { - request *RateLimitReq - resp chan *response - ctx context.Context + request *RateLimitReq + resp chan *response + ctx context.Context + requestTime time.Time } type PeerConfig struct { diff --git a/peers.pb.go b/peers.pb.go index a805b29a..d4100832 100644 --- a/peers.pb.go +++ b/peers.pb.go @@ -185,9 +185,17 @@ type UpdatePeerGlobal struct { sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - Key string `protobuf:"bytes,1,opt,name=key,proto3" json:"key,omitempty"` - Status *RateLimitResp `protobuf:"bytes,2,opt,name=status,proto3" json:"status,omitempty"` - Algorithm Algorithm `protobuf:"varint,3,opt,name=algorithm,proto3,enum=pb.gubernator.Algorithm" json:"algorithm,omitempty"` + // Uniquely identifies this rate limit IE: 'ip:10.2.10.7' or 'account:123445' + Key string `protobuf:"bytes,1,opt,name=key,proto3" json:"key,omitempty"` + Status *RateLimitResp `protobuf:"bytes,2,opt,name=status,proto3" json:"status,omitempty"` + // The algorithm used to calculate the rate limit. The algorithm may change on + // subsequent requests, when this occurs any previous rate limit hit counts are reset. + Algorithm Algorithm `protobuf:"varint,3,opt,name=algorithm,proto3,enum=pb.gubernator.Algorithm" json:"algorithm,omitempty"` + // The duration of the rate limit in milliseconds + Duration int64 `protobuf:"varint,4,opt,name=duration,proto3" json:"duration,omitempty"` + // Time of original GetRateLimits request so that ExpiresAt timestamps can be + // synchronized. + RequestTime int64 `protobuf:"varint,5,opt,name=request_time,json=requestTime,proto3" json:"request_time,omitempty"` } func (x *UpdatePeerGlobal) Reset() { @@ -243,6 +251,20 @@ func (x *UpdatePeerGlobal) GetAlgorithm() Algorithm { return Algorithm_TOKEN_BUCKET } +func (x *UpdatePeerGlobal) GetDuration() int64 { + if x != nil { + return x.Duration + } + return 0 +} + +func (x *UpdatePeerGlobal) GetRequestTime() int64 { + if x != nil { + return x.RequestTime + } + return 0 +} + type UpdatePeerGlobalsResp struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -302,7 +324,7 @@ var file_peers_proto_rawDesc = []byte{ 0x39, 0x0a, 0x07, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x50, 0x65, 0x65, 0x72, 0x47, 0x6c, 0x6f, 0x62, 0x61, - 0x6c, 0x52, 0x07, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x73, 0x22, 0x92, 0x01, 0x0a, 0x10, 0x55, + 0x6c, 0x52, 0x07, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x73, 0x22, 0xd1, 0x01, 0x0a, 0x10, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x50, 0x65, 0x65, 0x72, 0x47, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x34, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, @@ -311,25 +333,29 @@ var file_peers_proto_rawDesc = []byte{ 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x36, 0x0a, 0x09, 0x61, 0x6c, 0x67, 0x6f, 0x72, 0x69, 0x74, 0x68, 0x6d, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x18, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x41, 0x6c, 0x67, 0x6f, 0x72, - 0x69, 0x74, 0x68, 0x6d, 0x52, 0x09, 0x61, 0x6c, 0x67, 0x6f, 0x72, 0x69, 0x74, 0x68, 0x6d, 0x22, - 0x17, 0x0a, 0x15, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x50, 0x65, 0x65, 0x72, 0x47, 0x6c, 0x6f, - 0x62, 0x61, 0x6c, 0x73, 0x52, 0x65, 0x73, 0x70, 0x32, 0xcd, 0x01, 0x0a, 0x07, 0x50, 0x65, 0x65, - 0x72, 0x73, 0x56, 0x31, 0x12, 0x60, 0x0a, 0x11, 0x47, 0x65, 0x74, 0x50, 0x65, 0x65, 0x72, 0x52, - 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x73, 0x12, 0x23, 0x2e, 0x70, 0x62, 0x2e, 0x67, - 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x47, 0x65, 0x74, 0x50, 0x65, 0x65, - 0x72, 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x73, 0x52, 0x65, 0x71, 0x1a, 0x24, - 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x47, - 0x65, 0x74, 0x50, 0x65, 0x65, 0x72, 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x73, - 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x60, 0x0a, 0x11, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, - 0x50, 0x65, 0x65, 0x72, 0x47, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x73, 0x12, 0x23, 0x2e, 0x70, 0x62, - 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x55, 0x70, 0x64, 0x61, - 0x74, 0x65, 0x50, 0x65, 0x65, 0x72, 0x47, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x73, 0x52, 0x65, 0x71, - 0x1a, 0x24, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, - 0x2e, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x50, 0x65, 0x65, 0x72, 0x47, 0x6c, 0x6f, 0x62, 0x61, - 0x6c, 0x73, 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x42, 0x22, 0x5a, 0x1d, 0x67, 0x69, 0x74, 0x68, - 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x6d, 0x61, 0x69, 0x6c, 0x67, 0x75, 0x6e, 0x2f, 0x67, - 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x80, 0x01, 0x01, 0x62, 0x06, 0x70, 0x72, - 0x6f, 0x74, 0x6f, 0x33, + 0x69, 0x74, 0x68, 0x6d, 0x52, 0x09, 0x61, 0x6c, 0x67, 0x6f, 0x72, 0x69, 0x74, 0x68, 0x6d, 0x12, + 0x1a, 0x0a, 0x08, 0x64, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x04, 0x20, 0x01, 0x28, + 0x03, 0x52, 0x08, 0x64, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x21, 0x0a, 0x0c, 0x72, + 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x18, 0x05, 0x20, 0x01, 0x28, + 0x03, 0x52, 0x0b, 0x72, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x54, 0x69, 0x6d, 0x65, 0x22, 0x17, + 0x0a, 0x15, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x50, 0x65, 0x65, 0x72, 0x47, 0x6c, 0x6f, 0x62, + 0x61, 0x6c, 0x73, 0x52, 0x65, 0x73, 0x70, 0x32, 0xcd, 0x01, 0x0a, 0x07, 0x50, 0x65, 0x65, 0x72, + 0x73, 0x56, 0x31, 0x12, 0x60, 0x0a, 0x11, 0x47, 0x65, 0x74, 0x50, 0x65, 0x65, 0x72, 0x52, 0x61, + 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x73, 0x12, 0x23, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, + 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x47, 0x65, 0x74, 0x50, 0x65, 0x65, 0x72, + 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x73, 0x52, 0x65, 0x71, 0x1a, 0x24, 0x2e, + 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x47, 0x65, + 0x74, 0x50, 0x65, 0x65, 0x72, 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x73, 0x52, + 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x60, 0x0a, 0x11, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x50, + 0x65, 0x65, 0x72, 0x47, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x73, 0x12, 0x23, 0x2e, 0x70, 0x62, 0x2e, + 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x55, 0x70, 0x64, 0x61, 0x74, + 0x65, 0x50, 0x65, 0x65, 0x72, 0x47, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x73, 0x52, 0x65, 0x71, 0x1a, + 0x24, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, + 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x50, 0x65, 0x65, 0x72, 0x47, 0x6c, 0x6f, 0x62, 0x61, 0x6c, + 0x73, 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x42, 0x22, 0x5a, 0x1d, 0x67, 0x69, 0x74, 0x68, 0x75, + 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x6d, 0x61, 0x69, 0x6c, 0x67, 0x75, 0x6e, 0x2f, 0x67, 0x75, + 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x80, 0x01, 0x01, 0x62, 0x06, 0x70, 0x72, 0x6f, + 0x74, 0x6f, 0x33, } var ( diff --git a/peers.proto b/peers.proto index 1ce2a431..f97f4ead 100644 --- a/peers.proto +++ b/peers.proto @@ -26,32 +26,40 @@ import "gubernator.proto"; // NOTE: For use by gubernator peers only service PeersV1 { - // Used by peers to relay batches of requests to an owner peer - rpc GetPeerRateLimits (GetPeerRateLimitsReq) returns (GetPeerRateLimitsResp) {} + // Used by peers to relay batches of requests to an owner peer + rpc GetPeerRateLimits (GetPeerRateLimitsReq) returns (GetPeerRateLimitsResp) {} - // Used by owner peers to send global rate limit updates to non-owner peers - rpc UpdatePeerGlobals (UpdatePeerGlobalsReq) returns (UpdatePeerGlobalsResp) {} + // Used by owner peers to send global rate limit updates to non-owner peers + rpc UpdatePeerGlobals (UpdatePeerGlobalsReq) returns (UpdatePeerGlobalsResp) {} } message GetPeerRateLimitsReq { - // Must specify at least one RateLimit. The peer that recives this request MUST be authoritative for - // each rate_limit[x].unique_key provided, as the peer will not forward the request to any other peers - repeated RateLimitReq requests = 1; + // Must specify at least one RateLimit. The peer that recives this request MUST be authoritative for + // each rate_limit[x].unique_key provided, as the peer will not forward the request to any other peers + repeated RateLimitReq requests = 1; } message GetPeerRateLimitsResp { - // Responses are in the same order as they appeared in the PeerRateLimitRequests - repeated RateLimitResp rate_limits = 1; + // Responses are in the same order as they appeared in the PeerRateLimitRequests + repeated RateLimitResp rate_limits = 1; } message UpdatePeerGlobalsReq { - // Must specify at least one RateLimit - repeated UpdatePeerGlobal globals = 1; + // Must specify at least one RateLimit + repeated UpdatePeerGlobal globals = 1; } message UpdatePeerGlobal { - string key = 1; - RateLimitResp status = 2; - Algorithm algorithm = 3; + // Uniquely identifies this rate limit IE: 'ip:10.2.10.7' or 'account:123445' + string key = 1; + RateLimitResp status = 2; + // The algorithm used to calculate the rate limit. The algorithm may change on + // subsequent requests, when this occurs any previous rate limit hit counts are reset. + Algorithm algorithm = 3; + // The duration of the rate limit in milliseconds + int64 duration = 4; + // Time of original GetRateLimits request so that ExpiresAt timestamps can be + // synchronized. + int64 request_time = 5; } message UpdatePeerGlobalsResp {} diff --git a/python/gubernator/peers_pb2.py b/python/gubernator/peers_pb2.py index b1451c7a..9619dda8 100644 --- a/python/gubernator/peers_pb2.py +++ b/python/gubernator/peers_pb2.py @@ -15,7 +15,7 @@ import gubernator_pb2 as gubernator__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0bpeers.proto\x12\rpb.gubernator\x1a\x10gubernator.proto\"O\n\x14GetPeerRateLimitsReq\x12\x37\n\x08requests\x18\x01 \x03(\x0b\x32\x1b.pb.gubernator.RateLimitReqR\x08requests\"V\n\x15GetPeerRateLimitsResp\x12=\n\x0brate_limits\x18\x01 \x03(\x0b\x32\x1c.pb.gubernator.RateLimitRespR\nrateLimits\"Q\n\x14UpdatePeerGlobalsReq\x12\x39\n\x07globals\x18\x01 \x03(\x0b\x32\x1f.pb.gubernator.UpdatePeerGlobalR\x07globals\"\x92\x01\n\x10UpdatePeerGlobal\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x34\n\x06status\x18\x02 \x01(\x0b\x32\x1c.pb.gubernator.RateLimitRespR\x06status\x12\x36\n\talgorithm\x18\x03 \x01(\x0e\x32\x18.pb.gubernator.AlgorithmR\talgorithm\"\x17\n\x15UpdatePeerGlobalsResp2\xcd\x01\n\x07PeersV1\x12`\n\x11GetPeerRateLimits\x12#.pb.gubernator.GetPeerRateLimitsReq\x1a$.pb.gubernator.GetPeerRateLimitsResp\"\x00\x12`\n\x11UpdatePeerGlobals\x12#.pb.gubernator.UpdatePeerGlobalsReq\x1a$.pb.gubernator.UpdatePeerGlobalsResp\"\x00\x42\"Z\x1dgithub.com/mailgun/gubernator\x80\x01\x01\x62\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0bpeers.proto\x12\rpb.gubernator\x1a\x10gubernator.proto\"O\n\x14GetPeerRateLimitsReq\x12\x37\n\x08requests\x18\x01 \x03(\x0b\x32\x1b.pb.gubernator.RateLimitReqR\x08requests\"V\n\x15GetPeerRateLimitsResp\x12=\n\x0brate_limits\x18\x01 \x03(\x0b\x32\x1c.pb.gubernator.RateLimitRespR\nrateLimits\"Q\n\x14UpdatePeerGlobalsReq\x12\x39\n\x07globals\x18\x01 \x03(\x0b\x32\x1f.pb.gubernator.UpdatePeerGlobalR\x07globals\"\xd1\x01\n\x10UpdatePeerGlobal\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x34\n\x06status\x18\x02 \x01(\x0b\x32\x1c.pb.gubernator.RateLimitRespR\x06status\x12\x36\n\talgorithm\x18\x03 \x01(\x0e\x32\x18.pb.gubernator.AlgorithmR\talgorithm\x12\x1a\n\x08\x64uration\x18\x04 \x01(\x03R\x08\x64uration\x12!\n\x0crequest_time\x18\x05 \x01(\x03R\x0brequestTime\"\x17\n\x15UpdatePeerGlobalsResp2\xcd\x01\n\x07PeersV1\x12`\n\x11GetPeerRateLimits\x12#.pb.gubernator.GetPeerRateLimitsReq\x1a$.pb.gubernator.GetPeerRateLimitsResp\"\x00\x12`\n\x11UpdatePeerGlobals\x12#.pb.gubernator.UpdatePeerGlobalsReq\x1a$.pb.gubernator.UpdatePeerGlobalsResp\"\x00\x42\"Z\x1dgithub.com/mailgun/gubernator\x80\x01\x01\x62\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -30,9 +30,9 @@ _globals['_UPDATEPEERGLOBALSREQ']._serialized_start=217 _globals['_UPDATEPEERGLOBALSREQ']._serialized_end=298 _globals['_UPDATEPEERGLOBAL']._serialized_start=301 - _globals['_UPDATEPEERGLOBAL']._serialized_end=447 - _globals['_UPDATEPEERGLOBALSRESP']._serialized_start=449 - _globals['_UPDATEPEERGLOBALSRESP']._serialized_end=472 - _globals['_PEERSV1']._serialized_start=475 - _globals['_PEERSV1']._serialized_end=680 + _globals['_UPDATEPEERGLOBAL']._serialized_end=510 + _globals['_UPDATEPEERGLOBALSRESP']._serialized_start=512 + _globals['_UPDATEPEERGLOBALSRESP']._serialized_end=535 + _globals['_PEERSV1']._serialized_start=538 + _globals['_PEERSV1']._serialized_end=743 # @@protoc_insertion_point(module_scope) diff --git a/workers.go b/workers.go index 07ba177f..04557f76 100644 --- a/workers.go +++ b/workers.go @@ -42,6 +42,7 @@ import ( "strconv" "sync" "sync/atomic" + "time" "github.com/OneOfOne/xxhash" "github.com/mailgun/holster/v4/errors" @@ -199,7 +200,7 @@ func (p *WorkerPool) dispatch(worker *Worker) { } resp := new(response) - resp.rl, resp.err = worker.handleGetRateLimit(req.ctx, req.request, worker.cache) + resp.rl, resp.err = worker.handleGetRateLimit(req.ctx, req.request, req.requestTime, worker.cache) select { case req.resp <- resp: // Success. @@ -258,16 +259,17 @@ func (p *WorkerPool) dispatch(worker *Worker) { } // GetRateLimit sends a GetRateLimit request to worker pool. -func (p *WorkerPool) GetRateLimit(ctx context.Context, rlRequest *RateLimitReq) (retval *RateLimitResp, reterr error) { +func (p *WorkerPool) GetRateLimit(ctx context.Context, rlRequest *RateLimitReq, requestTime time.Time) (*RateLimitResp, error) { // Delegate request to assigned channel based on request key. worker := p.getWorker(rlRequest.HashKey()) queueGauge := metricWorkerQueue.WithLabelValues("GetRateLimit", worker.name) queueGauge.Inc() defer queueGauge.Dec() handlerRequest := request{ - ctx: ctx, - resp: make(chan *response, 1), - request: rlRequest, + ctx: ctx, + resp: make(chan *response, 1), + request: rlRequest, + requestTime: requestTime, } // Send request. @@ -289,14 +291,14 @@ func (p *WorkerPool) GetRateLimit(ctx context.Context, rlRequest *RateLimitReq) } // Handle request received by worker. -func (worker *Worker) handleGetRateLimit(ctx context.Context, req *RateLimitReq, cache Cache) (*RateLimitResp, error) { +func (worker *Worker) handleGetRateLimit(ctx context.Context, req *RateLimitReq, requestTime time.Time, cache Cache) (*RateLimitResp, error) { defer prometheus.NewTimer(metricFuncTimeDuration.WithLabelValues("Worker.handleGetRateLimit")).ObserveDuration() var rlResponse *RateLimitResp var err error switch req.Algorithm { case Algorithm_TOKEN_BUCKET: - rlResponse, err = tokenBucket(ctx, worker.conf.Store, cache, req) + rlResponse, err = tokenBucket(ctx, worker.conf.Store, cache, req, requestTime) if err != nil { msg := "Error in tokenBucket" countError(err, msg) @@ -305,7 +307,7 @@ func (worker *Worker) handleGetRateLimit(ctx context.Context, req *RateLimitReq, } case Algorithm_LEAKY_BUCKET: - rlResponse, err = leakyBucket(ctx, worker.conf.Store, cache, req) + rlResponse, err = leakyBucket(ctx, worker.conf.Store, cache, req, requestTime) if err != nil { msg := "Error in leakyBucket" countError(err, msg) From a665c3c6c11b34c73cac1128b6bb269efd7f7891 Mon Sep 17 00:00:00 2001 From: Shawn Poulson Date: Wed, 28 Feb 2024 17:13:00 -0500 Subject: [PATCH 02/23] Refine request time propagation. Request time is resolved at first call to `getLocalRateLimit()`, then is propagated across peer-to-peer for global behavior. --- Makefile | 2 +- gubernator.go | 9 +- gubernator.pb.go | 142 ++++++++++++++++------------ gubernator.proto | 5 + python/gubernator/gubernator_pb2.py | 40 ++++---- 5 files changed, 113 insertions(+), 85 deletions(-) diff --git a/Makefile b/Makefile index 192ed39c..3bdd5c12 100644 --- a/Makefile +++ b/Makefile @@ -13,7 +13,7 @@ $(GOLANGCI_LINT): ## Download Go linter .PHONY: lint lint: $(GOLANGCI_LINT) ## Run Go linter - $(GOLANGCI_LINT) run -v --fix -c .golangci.yml ./... + $(GOLANGCI_LINT) run -v -c .golangci.yml ./... .PHONY: test test: ## Run unit tests and measure code coverage diff --git a/gubernator.go b/gubernator.go index bde63652..dd2faca9 100644 --- a/gubernator.go +++ b/gubernator.go @@ -21,6 +21,7 @@ import ( "fmt" "strings" "sync" + "time" "github.com/mailgun/errors" "github.com/mailgun/holster/v4/clock" @@ -575,7 +576,13 @@ func (s *V1Instance) getLocalRateLimit(ctx context.Context, r *RateLimitReq) (_ defer func() { tracing.EndScope(ctx, err) }() defer prometheus.NewTimer(metricFuncTimeDuration.WithLabelValues("V1Instance.getLocalRateLimit")).ObserveDuration() - requestTime := clock.Now() + var requestTime time.Time + if r.RequestTime != nil { + requestTime = time.UnixMilli(*r.RequestTime) + } + if requestTime.IsZero() { + requestTime = clock.Now() + } resp, err := s.workerPool.GetRateLimit(ctx, r, requestTime) if err != nil { return nil, errors.Wrap(err, "during workerPool.GetRateLimit") diff --git a/gubernator.pb.go b/gubernator.pb.go index 808a8814..5c669457 100644 --- a/gubernator.pb.go +++ b/gubernator.pb.go @@ -374,6 +374,10 @@ type RateLimitReq struct { // this to pass trace context to other peers. Might be useful for future clients to pass along // trace information to gubernator. Metadata map[string]string `protobuf:"bytes,9,rep,name=metadata,proto3" json:"metadata,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"` + // The exact time of request in Epoch milliseconds. + // The is intended to be used for peer-to-peer requests to preserve + // timestamps. + RequestTime *int64 `protobuf:"varint,10,opt,name=request_time,json=requestTime,proto3,oneof" json:"request_time,omitempty"` } func (x *RateLimitReq) Reset() { @@ -471,6 +475,13 @@ func (x *RateLimitReq) GetMetadata() map[string]string { return nil } +func (x *RateLimitReq) GetRequestTime() int64 { + if x != nil && x.RequestTime != nil { + return *x.RequestTime + } + return 0 +} + type RateLimitResp struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -684,7 +695,7 @@ var file_gubernator_proto_rawDesc = []byte{ 0x70, 0x12, 0x3a, 0x0a, 0x09, 0x72, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1c, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x52, 0x65, - 0x73, 0x70, 0x52, 0x09, 0x72, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x73, 0x22, 0x8e, 0x03, + 0x73, 0x70, 0x52, 0x09, 0x72, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x73, 0x22, 0xc7, 0x03, 0x0a, 0x0c, 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x1d, 0x0a, 0x0a, 0x75, 0x6e, 0x69, 0x71, 0x75, 0x65, 0x5f, 0x6b, 0x65, 0x79, @@ -706,68 +717,72 @@ var file_gubernator_proto_rawDesc = []byte{ 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x52, 0x65, 0x71, 0x2e, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x08, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, - 0x61, 0x1a, 0x3b, 0x0a, 0x0d, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x45, 0x6e, 0x74, - 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0xac, - 0x02, 0x0a, 0x0d, 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x52, 0x65, 0x73, 0x70, - 0x12, 0x2d, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, - 0x32, 0x15, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, - 0x2e, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, - 0x14, 0x0a, 0x05, 0x6c, 0x69, 0x6d, 0x69, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x03, 0x52, 0x05, - 0x6c, 0x69, 0x6d, 0x69, 0x74, 0x12, 0x1c, 0x0a, 0x09, 0x72, 0x65, 0x6d, 0x61, 0x69, 0x6e, 0x69, - 0x6e, 0x67, 0x18, 0x03, 0x20, 0x01, 0x28, 0x03, 0x52, 0x09, 0x72, 0x65, 0x6d, 0x61, 0x69, 0x6e, - 0x69, 0x6e, 0x67, 0x12, 0x1d, 0x0a, 0x0a, 0x72, 0x65, 0x73, 0x65, 0x74, 0x5f, 0x74, 0x69, 0x6d, - 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x03, 0x52, 0x09, 0x72, 0x65, 0x73, 0x65, 0x74, 0x54, 0x69, - 0x6d, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x18, 0x05, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x05, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x12, 0x46, 0x0a, 0x08, 0x6d, 0x65, 0x74, 0x61, - 0x64, 0x61, 0x74, 0x61, 0x18, 0x06, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x2a, 0x2e, 0x70, 0x62, 0x2e, - 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x52, 0x61, 0x74, 0x65, 0x4c, - 0x69, 0x6d, 0x69, 0x74, 0x52, 0x65, 0x73, 0x70, 0x2e, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, - 0x61, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x08, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, - 0x1a, 0x3b, 0x0a, 0x0d, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x45, 0x6e, 0x74, 0x72, - 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, - 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0x10, 0x0a, - 0x0e, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x52, 0x65, 0x71, 0x22, - 0x62, 0x0a, 0x0f, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x52, 0x65, - 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, - 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, - 0x73, 0x61, 0x67, 0x65, 0x12, 0x1d, 0x0a, 0x0a, 0x70, 0x65, 0x65, 0x72, 0x5f, 0x63, 0x6f, 0x75, - 0x6e, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, 0x52, 0x09, 0x70, 0x65, 0x65, 0x72, 0x43, 0x6f, - 0x75, 0x6e, 0x74, 0x2a, 0x2f, 0x0a, 0x09, 0x41, 0x6c, 0x67, 0x6f, 0x72, 0x69, 0x74, 0x68, 0x6d, - 0x12, 0x10, 0x0a, 0x0c, 0x54, 0x4f, 0x4b, 0x45, 0x4e, 0x5f, 0x42, 0x55, 0x43, 0x4b, 0x45, 0x54, - 0x10, 0x00, 0x12, 0x10, 0x0a, 0x0c, 0x4c, 0x45, 0x41, 0x4b, 0x59, 0x5f, 0x42, 0x55, 0x43, 0x4b, - 0x45, 0x54, 0x10, 0x01, 0x2a, 0x8d, 0x01, 0x0a, 0x08, 0x42, 0x65, 0x68, 0x61, 0x76, 0x69, 0x6f, - 0x72, 0x12, 0x0c, 0x0a, 0x08, 0x42, 0x41, 0x54, 0x43, 0x48, 0x49, 0x4e, 0x47, 0x10, 0x00, 0x12, - 0x0f, 0x0a, 0x0b, 0x4e, 0x4f, 0x5f, 0x42, 0x41, 0x54, 0x43, 0x48, 0x49, 0x4e, 0x47, 0x10, 0x01, - 0x12, 0x0a, 0x0a, 0x06, 0x47, 0x4c, 0x4f, 0x42, 0x41, 0x4c, 0x10, 0x02, 0x12, 0x19, 0x0a, 0x15, - 0x44, 0x55, 0x52, 0x41, 0x54, 0x49, 0x4f, 0x4e, 0x5f, 0x49, 0x53, 0x5f, 0x47, 0x52, 0x45, 0x47, - 0x4f, 0x52, 0x49, 0x41, 0x4e, 0x10, 0x04, 0x12, 0x13, 0x0a, 0x0f, 0x52, 0x45, 0x53, 0x45, 0x54, - 0x5f, 0x52, 0x45, 0x4d, 0x41, 0x49, 0x4e, 0x49, 0x4e, 0x47, 0x10, 0x08, 0x12, 0x10, 0x0a, 0x0c, - 0x4d, 0x55, 0x4c, 0x54, 0x49, 0x5f, 0x52, 0x45, 0x47, 0x49, 0x4f, 0x4e, 0x10, 0x10, 0x12, 0x14, - 0x0a, 0x10, 0x44, 0x52, 0x41, 0x49, 0x4e, 0x5f, 0x4f, 0x56, 0x45, 0x52, 0x5f, 0x4c, 0x49, 0x4d, - 0x49, 0x54, 0x10, 0x20, 0x2a, 0x29, 0x0a, 0x06, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x0f, - 0x0a, 0x0b, 0x55, 0x4e, 0x44, 0x45, 0x52, 0x5f, 0x4c, 0x49, 0x4d, 0x49, 0x54, 0x10, 0x00, 0x12, - 0x0e, 0x0a, 0x0a, 0x4f, 0x56, 0x45, 0x52, 0x5f, 0x4c, 0x49, 0x4d, 0x49, 0x54, 0x10, 0x01, 0x32, - 0xdd, 0x01, 0x0a, 0x02, 0x56, 0x31, 0x12, 0x70, 0x0a, 0x0d, 0x47, 0x65, 0x74, 0x52, 0x61, 0x74, - 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x73, 0x12, 0x1f, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, - 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x47, 0x65, 0x74, 0x52, 0x61, 0x74, 0x65, 0x4c, - 0x69, 0x6d, 0x69, 0x74, 0x73, 0x52, 0x65, 0x71, 0x1a, 0x20, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, - 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x47, 0x65, 0x74, 0x52, 0x61, 0x74, 0x65, - 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x73, 0x52, 0x65, 0x73, 0x70, 0x22, 0x1c, 0x82, 0xd3, 0xe4, 0x93, - 0x02, 0x16, 0x3a, 0x01, 0x2a, 0x22, 0x11, 0x2f, 0x76, 0x31, 0x2f, 0x47, 0x65, 0x74, 0x52, 0x61, - 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x73, 0x12, 0x65, 0x0a, 0x0b, 0x48, 0x65, 0x61, 0x6c, - 0x74, 0x68, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x12, 0x1d, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, - 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x43, 0x68, - 0x65, 0x63, 0x6b, 0x52, 0x65, 0x71, 0x1a, 0x1e, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, - 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x43, 0x68, 0x65, - 0x63, 0x6b, 0x52, 0x65, 0x73, 0x70, 0x22, 0x17, 0x82, 0xd3, 0xe4, 0x93, 0x02, 0x11, 0x12, 0x0f, - 0x2f, 0x76, 0x31, 0x2f, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x42, - 0x22, 0x5a, 0x1d, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x6d, 0x61, - 0x69, 0x6c, 0x67, 0x75, 0x6e, 0x2f, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, - 0x80, 0x01, 0x01, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x61, 0x12, 0x26, 0x0a, 0x0c, 0x72, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x5f, 0x74, 0x69, 0x6d, + 0x65, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x03, 0x48, 0x00, 0x52, 0x0b, 0x72, 0x65, 0x71, 0x75, 0x65, + 0x73, 0x74, 0x54, 0x69, 0x6d, 0x65, 0x88, 0x01, 0x01, 0x1a, 0x3b, 0x0a, 0x0d, 0x4d, 0x65, 0x74, + 0x61, 0x64, 0x61, 0x74, 0x61, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, + 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, + 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, + 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x42, 0x0f, 0x0a, 0x0d, 0x5f, 0x72, 0x65, 0x71, 0x75, 0x65, + 0x73, 0x74, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x22, 0xac, 0x02, 0x0a, 0x0d, 0x52, 0x61, 0x74, 0x65, + 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x52, 0x65, 0x73, 0x70, 0x12, 0x2d, 0x0a, 0x06, 0x73, 0x74, 0x61, + 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x15, 0x2e, 0x70, 0x62, 0x2e, 0x67, + 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, + 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x6c, 0x69, 0x6d, 0x69, + 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x03, 0x52, 0x05, 0x6c, 0x69, 0x6d, 0x69, 0x74, 0x12, 0x1c, + 0x0a, 0x09, 0x72, 0x65, 0x6d, 0x61, 0x69, 0x6e, 0x69, 0x6e, 0x67, 0x18, 0x03, 0x20, 0x01, 0x28, + 0x03, 0x52, 0x09, 0x72, 0x65, 0x6d, 0x61, 0x69, 0x6e, 0x69, 0x6e, 0x67, 0x12, 0x1d, 0x0a, 0x0a, + 0x72, 0x65, 0x73, 0x65, 0x74, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x03, + 0x52, 0x09, 0x72, 0x65, 0x73, 0x65, 0x74, 0x54, 0x69, 0x6d, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x65, + 0x72, 0x72, 0x6f, 0x72, 0x18, 0x05, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x65, 0x72, 0x72, 0x6f, + 0x72, 0x12, 0x46, 0x0a, 0x08, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, 0x06, 0x20, + 0x03, 0x28, 0x0b, 0x32, 0x2a, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, + 0x74, 0x6f, 0x72, 0x2e, 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x52, 0x65, 0x73, + 0x70, 0x2e, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, + 0x08, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x1a, 0x3b, 0x0a, 0x0d, 0x4d, 0x65, 0x74, + 0x61, 0x64, 0x61, 0x74, 0x61, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, + 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, + 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, + 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0x10, 0x0a, 0x0e, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, + 0x43, 0x68, 0x65, 0x63, 0x6b, 0x52, 0x65, 0x71, 0x22, 0x62, 0x0a, 0x0f, 0x48, 0x65, 0x61, 0x6c, + 0x74, 0x68, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, + 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x73, 0x74, 0x61, + 0x74, 0x75, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x12, 0x1d, 0x0a, + 0x0a, 0x70, 0x65, 0x65, 0x72, 0x5f, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, + 0x05, 0x52, 0x09, 0x70, 0x65, 0x65, 0x72, 0x43, 0x6f, 0x75, 0x6e, 0x74, 0x2a, 0x2f, 0x0a, 0x09, + 0x41, 0x6c, 0x67, 0x6f, 0x72, 0x69, 0x74, 0x68, 0x6d, 0x12, 0x10, 0x0a, 0x0c, 0x54, 0x4f, 0x4b, + 0x45, 0x4e, 0x5f, 0x42, 0x55, 0x43, 0x4b, 0x45, 0x54, 0x10, 0x00, 0x12, 0x10, 0x0a, 0x0c, 0x4c, + 0x45, 0x41, 0x4b, 0x59, 0x5f, 0x42, 0x55, 0x43, 0x4b, 0x45, 0x54, 0x10, 0x01, 0x2a, 0x8d, 0x01, + 0x0a, 0x08, 0x42, 0x65, 0x68, 0x61, 0x76, 0x69, 0x6f, 0x72, 0x12, 0x0c, 0x0a, 0x08, 0x42, 0x41, + 0x54, 0x43, 0x48, 0x49, 0x4e, 0x47, 0x10, 0x00, 0x12, 0x0f, 0x0a, 0x0b, 0x4e, 0x4f, 0x5f, 0x42, + 0x41, 0x54, 0x43, 0x48, 0x49, 0x4e, 0x47, 0x10, 0x01, 0x12, 0x0a, 0x0a, 0x06, 0x47, 0x4c, 0x4f, + 0x42, 0x41, 0x4c, 0x10, 0x02, 0x12, 0x19, 0x0a, 0x15, 0x44, 0x55, 0x52, 0x41, 0x54, 0x49, 0x4f, + 0x4e, 0x5f, 0x49, 0x53, 0x5f, 0x47, 0x52, 0x45, 0x47, 0x4f, 0x52, 0x49, 0x41, 0x4e, 0x10, 0x04, + 0x12, 0x13, 0x0a, 0x0f, 0x52, 0x45, 0x53, 0x45, 0x54, 0x5f, 0x52, 0x45, 0x4d, 0x41, 0x49, 0x4e, + 0x49, 0x4e, 0x47, 0x10, 0x08, 0x12, 0x10, 0x0a, 0x0c, 0x4d, 0x55, 0x4c, 0x54, 0x49, 0x5f, 0x52, + 0x45, 0x47, 0x49, 0x4f, 0x4e, 0x10, 0x10, 0x12, 0x14, 0x0a, 0x10, 0x44, 0x52, 0x41, 0x49, 0x4e, + 0x5f, 0x4f, 0x56, 0x45, 0x52, 0x5f, 0x4c, 0x49, 0x4d, 0x49, 0x54, 0x10, 0x20, 0x2a, 0x29, 0x0a, + 0x06, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x0f, 0x0a, 0x0b, 0x55, 0x4e, 0x44, 0x45, 0x52, + 0x5f, 0x4c, 0x49, 0x4d, 0x49, 0x54, 0x10, 0x00, 0x12, 0x0e, 0x0a, 0x0a, 0x4f, 0x56, 0x45, 0x52, + 0x5f, 0x4c, 0x49, 0x4d, 0x49, 0x54, 0x10, 0x01, 0x32, 0xdd, 0x01, 0x0a, 0x02, 0x56, 0x31, 0x12, + 0x70, 0x0a, 0x0d, 0x47, 0x65, 0x74, 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x73, + 0x12, 0x1f, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, + 0x2e, 0x47, 0x65, 0x74, 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x73, 0x52, 0x65, + 0x71, 0x1a, 0x20, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, + 0x72, 0x2e, 0x47, 0x65, 0x74, 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x73, 0x52, + 0x65, 0x73, 0x70, 0x22, 0x1c, 0x82, 0xd3, 0xe4, 0x93, 0x02, 0x16, 0x3a, 0x01, 0x2a, 0x22, 0x11, + 0x2f, 0x76, 0x31, 0x2f, 0x47, 0x65, 0x74, 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, + 0x73, 0x12, 0x65, 0x0a, 0x0b, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x43, 0x68, 0x65, 0x63, 0x6b, + 0x12, 0x1d, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, + 0x2e, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x52, 0x65, 0x71, 0x1a, + 0x1e, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, + 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x52, 0x65, 0x73, 0x70, 0x22, + 0x17, 0x82, 0xd3, 0xe4, 0x93, 0x02, 0x11, 0x12, 0x0f, 0x2f, 0x76, 0x31, 0x2f, 0x48, 0x65, 0x61, + 0x6c, 0x74, 0x68, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x42, 0x22, 0x5a, 0x1d, 0x67, 0x69, 0x74, 0x68, + 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x6d, 0x61, 0x69, 0x6c, 0x67, 0x75, 0x6e, 0x2f, 0x67, + 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x80, 0x01, 0x01, 0x62, 0x06, 0x70, 0x72, + 0x6f, 0x74, 0x6f, 0x33, } var ( @@ -895,6 +910,7 @@ func file_gubernator_proto_init() { } } } + file_gubernator_proto_msgTypes[2].OneofWrappers = []interface{}{} type x struct{} out := protoimpl.TypeBuilder{ File: protoimpl.DescBuilder{ diff --git a/gubernator.proto b/gubernator.proto index fea99a22..8b6611a7 100644 --- a/gubernator.proto +++ b/gubernator.proto @@ -168,6 +168,11 @@ message RateLimitReq { // this to pass trace context to other peers. Might be useful for future clients to pass along // trace information to gubernator. map metadata = 9; + + // The exact time of request in Epoch milliseconds. + // The is intended to be used for peer-to-peer requests to preserve + // timestamps. + optional int64 request_time = 10; } enum Status { diff --git a/python/gubernator/gubernator_pb2.py b/python/gubernator/gubernator_pb2.py index 17351bb6..0bd92d08 100644 --- a/python/gubernator/gubernator_pb2.py +++ b/python/gubernator/gubernator_pb2.py @@ -15,7 +15,7 @@ from google.api import annotations_pb2 as google_dot_api_dot_annotations__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x10gubernator.proto\x12\rpb.gubernator\x1a\x1cgoogle/api/annotations.proto\"K\n\x10GetRateLimitsReq\x12\x37\n\x08requests\x18\x01 \x03(\x0b\x32\x1b.pb.gubernator.RateLimitReqR\x08requests\"O\n\x11GetRateLimitsResp\x12:\n\tresponses\x18\x01 \x03(\x0b\x32\x1c.pb.gubernator.RateLimitRespR\tresponses\"\x8e\x03\n\x0cRateLimitReq\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x1d\n\nunique_key\x18\x02 \x01(\tR\tuniqueKey\x12\x12\n\x04hits\x18\x03 \x01(\x03R\x04hits\x12\x14\n\x05limit\x18\x04 \x01(\x03R\x05limit\x12\x1a\n\x08\x64uration\x18\x05 \x01(\x03R\x08\x64uration\x12\x36\n\talgorithm\x18\x06 \x01(\x0e\x32\x18.pb.gubernator.AlgorithmR\talgorithm\x12\x33\n\x08\x62\x65havior\x18\x07 \x01(\x0e\x32\x17.pb.gubernator.BehaviorR\x08\x62\x65havior\x12\x14\n\x05\x62urst\x18\x08 \x01(\x03R\x05\x62urst\x12\x45\n\x08metadata\x18\t \x03(\x0b\x32).pb.gubernator.RateLimitReq.MetadataEntryR\x08metadata\x1a;\n\rMetadataEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\"\xac\x02\n\rRateLimitResp\x12-\n\x06status\x18\x01 \x01(\x0e\x32\x15.pb.gubernator.StatusR\x06status\x12\x14\n\x05limit\x18\x02 \x01(\x03R\x05limit\x12\x1c\n\tremaining\x18\x03 \x01(\x03R\tremaining\x12\x1d\n\nreset_time\x18\x04 \x01(\x03R\tresetTime\x12\x14\n\x05\x65rror\x18\x05 \x01(\tR\x05\x65rror\x12\x46\n\x08metadata\x18\x06 \x03(\x0b\x32*.pb.gubernator.RateLimitResp.MetadataEntryR\x08metadata\x1a;\n\rMetadataEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\"\x10\n\x0eHealthCheckReq\"b\n\x0fHealthCheckResp\x12\x16\n\x06status\x18\x01 \x01(\tR\x06status\x12\x18\n\x07message\x18\x02 \x01(\tR\x07message\x12\x1d\n\npeer_count\x18\x03 \x01(\x05R\tpeerCount*/\n\tAlgorithm\x12\x10\n\x0cTOKEN_BUCKET\x10\x00\x12\x10\n\x0cLEAKY_BUCKET\x10\x01*\x8d\x01\n\x08\x42\x65havior\x12\x0c\n\x08\x42\x41TCHING\x10\x00\x12\x0f\n\x0bNO_BATCHING\x10\x01\x12\n\n\x06GLOBAL\x10\x02\x12\x19\n\x15\x44URATION_IS_GREGORIAN\x10\x04\x12\x13\n\x0fRESET_REMAINING\x10\x08\x12\x10\n\x0cMULTI_REGION\x10\x10\x12\x14\n\x10\x44RAIN_OVER_LIMIT\x10 *)\n\x06Status\x12\x0f\n\x0bUNDER_LIMIT\x10\x00\x12\x0e\n\nOVER_LIMIT\x10\x01\x32\xdd\x01\n\x02V1\x12p\n\rGetRateLimits\x12\x1f.pb.gubernator.GetRateLimitsReq\x1a .pb.gubernator.GetRateLimitsResp\"\x1c\x82\xd3\xe4\x93\x02\x16\"\x11/v1/GetRateLimits:\x01*\x12\x65\n\x0bHealthCheck\x12\x1d.pb.gubernator.HealthCheckReq\x1a\x1e.pb.gubernator.HealthCheckResp\"\x17\x82\xd3\xe4\x93\x02\x11\x12\x0f/v1/HealthCheckB\"Z\x1dgithub.com/mailgun/gubernator\x80\x01\x01\x62\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x10gubernator.proto\x12\rpb.gubernator\x1a\x1cgoogle/api/annotations.proto\"K\n\x10GetRateLimitsReq\x12\x37\n\x08requests\x18\x01 \x03(\x0b\x32\x1b.pb.gubernator.RateLimitReqR\x08requests\"O\n\x11GetRateLimitsResp\x12:\n\tresponses\x18\x01 \x03(\x0b\x32\x1c.pb.gubernator.RateLimitRespR\tresponses\"\xc7\x03\n\x0cRateLimitReq\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x1d\n\nunique_key\x18\x02 \x01(\tR\tuniqueKey\x12\x12\n\x04hits\x18\x03 \x01(\x03R\x04hits\x12\x14\n\x05limit\x18\x04 \x01(\x03R\x05limit\x12\x1a\n\x08\x64uration\x18\x05 \x01(\x03R\x08\x64uration\x12\x36\n\talgorithm\x18\x06 \x01(\x0e\x32\x18.pb.gubernator.AlgorithmR\talgorithm\x12\x33\n\x08\x62\x65havior\x18\x07 \x01(\x0e\x32\x17.pb.gubernator.BehaviorR\x08\x62\x65havior\x12\x14\n\x05\x62urst\x18\x08 \x01(\x03R\x05\x62urst\x12\x45\n\x08metadata\x18\t \x03(\x0b\x32).pb.gubernator.RateLimitReq.MetadataEntryR\x08metadata\x12&\n\x0crequest_time\x18\n \x01(\x03H\x00R\x0brequestTime\x88\x01\x01\x1a;\n\rMetadataEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\x0f\n\r_request_time\"\xac\x02\n\rRateLimitResp\x12-\n\x06status\x18\x01 \x01(\x0e\x32\x15.pb.gubernator.StatusR\x06status\x12\x14\n\x05limit\x18\x02 \x01(\x03R\x05limit\x12\x1c\n\tremaining\x18\x03 \x01(\x03R\tremaining\x12\x1d\n\nreset_time\x18\x04 \x01(\x03R\tresetTime\x12\x14\n\x05\x65rror\x18\x05 \x01(\tR\x05\x65rror\x12\x46\n\x08metadata\x18\x06 \x03(\x0b\x32*.pb.gubernator.RateLimitResp.MetadataEntryR\x08metadata\x1a;\n\rMetadataEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\"\x10\n\x0eHealthCheckReq\"b\n\x0fHealthCheckResp\x12\x16\n\x06status\x18\x01 \x01(\tR\x06status\x12\x18\n\x07message\x18\x02 \x01(\tR\x07message\x12\x1d\n\npeer_count\x18\x03 \x01(\x05R\tpeerCount*/\n\tAlgorithm\x12\x10\n\x0cTOKEN_BUCKET\x10\x00\x12\x10\n\x0cLEAKY_BUCKET\x10\x01*\x8d\x01\n\x08\x42\x65havior\x12\x0c\n\x08\x42\x41TCHING\x10\x00\x12\x0f\n\x0bNO_BATCHING\x10\x01\x12\n\n\x06GLOBAL\x10\x02\x12\x19\n\x15\x44URATION_IS_GREGORIAN\x10\x04\x12\x13\n\x0fRESET_REMAINING\x10\x08\x12\x10\n\x0cMULTI_REGION\x10\x10\x12\x14\n\x10\x44RAIN_OVER_LIMIT\x10 *)\n\x06Status\x12\x0f\n\x0bUNDER_LIMIT\x10\x00\x12\x0e\n\nOVER_LIMIT\x10\x01\x32\xdd\x01\n\x02V1\x12p\n\rGetRateLimits\x12\x1f.pb.gubernator.GetRateLimitsReq\x1a .pb.gubernator.GetRateLimitsResp\"\x1c\x82\xd3\xe4\x93\x02\x16\"\x11/v1/GetRateLimits:\x01*\x12\x65\n\x0bHealthCheck\x12\x1d.pb.gubernator.HealthCheckReq\x1a\x1e.pb.gubernator.HealthCheckResp\"\x17\x82\xd3\xe4\x93\x02\x11\x12\x0f/v1/HealthCheckB\"Z\x1dgithub.com/mailgun/gubernator\x80\x01\x01\x62\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -31,28 +31,28 @@ _globals['_V1'].methods_by_name['GetRateLimits']._serialized_options = b'\202\323\344\223\002\026\"\021/v1/GetRateLimits:\001*' _globals['_V1'].methods_by_name['HealthCheck']._options = None _globals['_V1'].methods_by_name['HealthCheck']._serialized_options = b'\202\323\344\223\002\021\022\017/v1/HealthCheck' - _globals['_ALGORITHM']._serialized_start=1045 - _globals['_ALGORITHM']._serialized_end=1092 - _globals['_BEHAVIOR']._serialized_start=1095 - _globals['_BEHAVIOR']._serialized_end=1236 - _globals['_STATUS']._serialized_start=1238 - _globals['_STATUS']._serialized_end=1279 + _globals['_ALGORITHM']._serialized_start=1102 + _globals['_ALGORITHM']._serialized_end=1149 + _globals['_BEHAVIOR']._serialized_start=1152 + _globals['_BEHAVIOR']._serialized_end=1293 + _globals['_STATUS']._serialized_start=1295 + _globals['_STATUS']._serialized_end=1336 _globals['_GETRATELIMITSREQ']._serialized_start=65 _globals['_GETRATELIMITSREQ']._serialized_end=140 _globals['_GETRATELIMITSRESP']._serialized_start=142 _globals['_GETRATELIMITSRESP']._serialized_end=221 _globals['_RATELIMITREQ']._serialized_start=224 - _globals['_RATELIMITREQ']._serialized_end=622 - _globals['_RATELIMITREQ_METADATAENTRY']._serialized_start=563 - _globals['_RATELIMITREQ_METADATAENTRY']._serialized_end=622 - _globals['_RATELIMITRESP']._serialized_start=625 - _globals['_RATELIMITRESP']._serialized_end=925 - _globals['_RATELIMITRESP_METADATAENTRY']._serialized_start=563 - _globals['_RATELIMITRESP_METADATAENTRY']._serialized_end=622 - _globals['_HEALTHCHECKREQ']._serialized_start=927 - _globals['_HEALTHCHECKREQ']._serialized_end=943 - _globals['_HEALTHCHECKRESP']._serialized_start=945 - _globals['_HEALTHCHECKRESP']._serialized_end=1043 - _globals['_V1']._serialized_start=1282 - _globals['_V1']._serialized_end=1503 + _globals['_RATELIMITREQ']._serialized_end=679 + _globals['_RATELIMITREQ_METADATAENTRY']._serialized_start=603 + _globals['_RATELIMITREQ_METADATAENTRY']._serialized_end=662 + _globals['_RATELIMITRESP']._serialized_start=682 + _globals['_RATELIMITRESP']._serialized_end=982 + _globals['_RATELIMITRESP_METADATAENTRY']._serialized_start=603 + _globals['_RATELIMITRESP_METADATAENTRY']._serialized_end=662 + _globals['_HEALTHCHECKREQ']._serialized_start=984 + _globals['_HEALTHCHECKREQ']._serialized_end=1000 + _globals['_HEALTHCHECKRESP']._serialized_start=1002 + _globals['_HEALTHCHECKRESP']._serialized_end=1100 + _globals['_V1']._serialized_start=1339 + _globals['_V1']._serialized_end=1560 # @@protoc_insertion_point(module_scope) From 57a5c9772b61dc2fb6b3d37ce2ff5e0776d487f1 Mon Sep 17 00:00:00 2001 From: Shawn Poulson Date: Wed, 6 Mar 2024 17:21:13 -0500 Subject: [PATCH 03/23] Fix race condition in global behavior. QueueUpdate() allowed for sending request/response when local ratelimits are updated. However, the order they get called to QueueUpdate() is not guaranteed to be chronological. This causes stale updates to propagate, causing lost hits. Instead, QueueUpdate() will only pass the request. The current ratelimit state will be retrieved immediately before propagation. Rigorous functional tests added around global behavior. --- functional_test.go | 490 ++++++++++++++++++++++++++++++++++++++++++++- global.go | 58 ++++-- go.mod | 13 +- go.sum | 26 +-- gubernator.go | 6 +- interval_test.go | 2 +- 6 files changed, 550 insertions(+), 45 deletions(-) diff --git a/functional_test.go b/functional_test.go index ca15490c..c1aa2d16 100644 --- a/functional_test.go +++ b/functional_test.go @@ -24,7 +24,10 @@ import ( "math/rand" "net/http" "os" + "sort" "strings" + "sync" + "sync/atomic" "testing" "time" @@ -36,6 +39,7 @@ import ( "github.com/prometheus/common/model" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "golang.org/x/exp/maps" "google.golang.org/grpc" "google.golang.org/grpc/credentials/insecure" json "google.golang.org/protobuf/encoding/protojson" @@ -60,6 +64,13 @@ func TestMain(m *testing.M) { fmt.Println(err) os.Exit(1) } + + // Populate peer clients. Avoids data races when goroutines conflict trying + // to instantiate client singletons. + for _, peer := range cluster.GetDaemons() { + _ = peer.MustClient() + } + code := m.Run() cluster.Stop() @@ -1827,9 +1838,9 @@ func getBroadcastCount(d *guber.Daemon) (int, error) { return int(m.Value), nil } -// waitForBroadcast waits until the broadcast count for the daemon passed -// changes to the expected value. Returns an error if the expected value is -// not found before the context is cancelled. +// waitForBroadcast waits until the broadcast count for the daemon changes to +// the expected value. Returns an error if the expected value is not found +// before the context is cancelled. func waitForBroadcast(timeout clock.Duration, d *guber.Daemon, expect int) error { ctx, cancel := context.WithTimeout(context.Background(), timeout) defer cancel() @@ -1844,15 +1855,482 @@ func waitForBroadcast(timeout clock.Duration, d *guber.Daemon, expect int) error // It's possible a broadcast occurred twice if waiting for multiple peer to // forward updates to the owner. if int(m.Value) >= expect { - // Give the nodes some time to process the broadcasts - clock.Sleep(clock.Millisecond * 500) return nil } select { - case <-clock.After(time.Millisecond * 800): + case <-clock.After(time.Millisecond * 100): + case <-ctx.Done(): + return ctx.Err() + } + } +} + +// waitForUpdate waits until the global update count for the daemon changes to +// the expected value. Returns an error if the expected value is not found +// before the context is cancelled. +func waitForUpdate(timeout clock.Duration, d *guber.Daemon, expect int) error { + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + + for { + m, err := getMetricRequest(fmt.Sprintf("http://%s/metrics", d.Config().HTTPListenAddress), + "gubernator_global_send_duration_count") + if err != nil { + return err + } + + // It's possible a broadcast occurred twice if waiting for multiple peer to + // forward updates to the owner. + if int(m.Value) >= expect { + return nil + } + + select { + case <-clock.After(time.Millisecond * 100): case <-ctx.Done(): return ctx.Err() } } } + +func getMetricValue(t *testing.T, d *guber.Daemon, name string) float64 { + m, err := getMetricRequest(fmt.Sprintf("http://%s/metrics", d.Config().HTTPListenAddress), + name) + require.NoError(t, err) + if m == nil { + return 0 + } + return float64(m.Value) +} + +// Get metric counter values on each peer. +func getPeerCounters(t *testing.T, peers []*guber.Daemon, name string) map[string]int { + counters := make(map[string]int) + for _, peer := range peers { + counters[peer.InstanceID] = int(getMetricValue(t, peer, name)) + } + return counters +} + +func sendHit(t *testing.T, d *guber.Daemon, req *guber.RateLimitReq, expectStatus guber.Status, expectRemaining int64) { + if req.Hits != 0 { + t.Logf("Sending %d hits to peer %s", req.Hits, d.InstanceID) + } + client := d.MustClient() + ctx, cancel := context.WithTimeout(context.Background(), time.Second*10) + defer cancel() + resp, err := client.GetRateLimits(ctx, &guber.GetRateLimitsReq{ + Requests: []*guber.RateLimitReq{req}, + }) + require.NoError(t, err) + item := resp.Responses[0] + assert.Equal(t, "", item.Error) + if expectRemaining >= 0 { + assert.Equal(t, expectRemaining, item.Remaining) + } + assert.Equal(t, expectStatus, item.Status) + assert.Equal(t, req.Limit, item.Limit) +} + +func TestGlobalBehavior(t *testing.T) { + const limit = 1000 + broadcastTimeout := 400 * time.Millisecond + + makeReq := func(name, key string, hits int64) *guber.RateLimitReq { + return &guber.RateLimitReq{ + Name: name, + UniqueKey: key, + Algorithm: guber.Algorithm_TOKEN_BUCKET, + Behavior: guber.Behavior_GLOBAL, + Duration: guber.Minute * 3, + Hits: hits, + Limit: limit, + } + } + + t.Run("Hits on owner peer", func(t *testing.T) { + testCases := []struct { + Name string + Hits int64 + }{ + {Name: "Single hit", Hits: 1}, + {Name: "Multiple hits", Hits: 10}, + } + + for _, testCase := range testCases { + t.Run(testCase.Name, func(t *testing.T) { + name := t.Name() + key := fmt.Sprintf("account:%08x", rand.Int()) + peers, err := cluster.ListNonOwningDaemons(name, key) + require.NoError(t, err) + owner, err := cluster.FindOwningDaemon(name, key) + require.NoError(t, err) + t.Logf("Owner peer: %s", owner.InstanceID) + + broadcastCounters := getPeerCounters(t, cluster.GetDaemons(), "gubernator_broadcast_duration_count") + updateCounters := getPeerCounters(t, cluster.GetDaemons(), "gubernator_global_send_duration_count") + upgCounters := getPeerCounters(t, cluster.GetDaemons(), "gubernator_grpc_request_duration_count{method=\"/pb.gubernator.PeersV1/UpdatePeerGlobals\"}") + gprlCounters := getPeerCounters(t, cluster.GetDaemons(), "gubernator_grpc_request_duration_count{method=\"/pb.gubernator.PeersV1/GetPeerRateLimits\"}") + + // When + for i := int64(0); i < testCase.Hits; i++ { + sendHit(t, owner, makeReq(name, key, 1), guber.Status_UNDER_LIMIT, 999-i) + } + + // Then + // Expect a single global broadcast to all non-owner peers. + t.Log("Waiting for global broadcasts") + var wg sync.WaitGroup + var didOwnerBroadcast, didNonOwnerBroadcast int + wg.Add(len(peers) + 1) + go func() { + expected := broadcastCounters[owner.InstanceID] + 1 + if err := waitForBroadcast(broadcastTimeout, owner, expected); err == nil { + didOwnerBroadcast++ + t.Log("Global broadcast from owner") + } + wg.Done() + }() + for _, peer := range peers { + go func(peer *guber.Daemon) { + expected := broadcastCounters[peer.InstanceID] + 1 + if err := waitForBroadcast(broadcastTimeout, peer, expected); err == nil { + didNonOwnerBroadcast++ + t.Logf("Global broadcast from peer %s", peer.InstanceID) + } + wg.Done() + }(peer) + } + wg.Wait() + assert.Equal(t, 1, didOwnerBroadcast) + assert.Zero(t, didNonOwnerBroadcast) + + // Check for global hits update from non-owner to owner peer. + // Expect no global hits update because the hits were given + // directly to the owner peer. + t.Log("Waiting for global broadcasts") + var didOwnerUpdate, didNonOwnerUpdate int + wg.Add(len(peers) + 1) + go func() { + expected := updateCounters[owner.InstanceID] + 1 + if err := waitForUpdate(broadcastTimeout, owner, expected); err == nil { + didOwnerUpdate++ + t.Log("Global hits update from owner") + } + wg.Done() + }() + for _, peer := range peers { + go func(peer *guber.Daemon) { + expected := updateCounters[peer.InstanceID] + 1 + if err := waitForUpdate(broadcastTimeout, peer, expected); err == nil { + didNonOwnerUpdate++ + t.Logf("Global hits update from peer %s", peer.InstanceID) + } + wg.Done() + + }(peer) + } + wg.Wait() + assert.Zero(t, didOwnerUpdate) + assert.Zero(t, didNonOwnerUpdate) + + // Assert UpdatePeerGlobals endpoint called once on each peer except owner. + // Used by global broadcast. + upgCounters2 := getPeerCounters(t, cluster.GetDaemons(), "gubernator_grpc_request_duration_count{method=\"/pb.gubernator.PeersV1/UpdatePeerGlobals\"}") + for _, peer := range cluster.GetDaemons() { + expected := upgCounters[peer.InstanceID] + if peer.PeerInfo.DataCenter == cluster.DataCenterNone && peer.InstanceID != owner.InstanceID { + expected++ + } + assert.Equal(t, expected, upgCounters2[peer.InstanceID]) + } + + // Assert PeerGetRateLimits endpoint not called. + // Used by global hits update. + gprlCounters2 := getPeerCounters(t, cluster.GetDaemons(), "gubernator_grpc_request_duration_count{method=\"/pb.gubernator.PeersV1/GetPeerRateLimits\"}") + for _, peer := range cluster.GetDaemons() { + expected := gprlCounters[peer.InstanceID] + assert.Equal(t, expected, gprlCounters2[peer.InstanceID]) + } + + // Verify all peers report consistent remaining value value. + for _, peer := range cluster.GetDaemons() { + if peer.PeerInfo.DataCenter != cluster.DataCenterNone { + continue + } + sendHit(t, peer, makeReq(name, key, 0), guber.Status_UNDER_LIMIT, limit-testCase.Hits) + } + }) + } + }) + + t.Run("Hits on non-owner peer", func(t *testing.T) { + testCases := []struct { + Name string + Hits int64 + }{ + {Name: "Single hit", Hits: 1}, + {Name: "Multiple htis", Hits: 10}, + } + + for _, testCase := range testCases { + t.Run(testCase.Name, func(t *testing.T) { + name := t.Name() + key := fmt.Sprintf("account:%08x", rand.Int()) + peers, err := cluster.ListNonOwningDaemons(name, key) + require.NoError(t, err) + owner, err := cluster.FindOwningDaemon(name, key) + require.NoError(t, err) + t.Logf("Owner peer: %s", owner.InstanceID) + + broadcastCounters := getPeerCounters(t, cluster.GetDaemons(), "gubernator_broadcast_duration_count") + updateCounters := getPeerCounters(t, cluster.GetDaemons(), "gubernator_global_send_duration_count") + upgCounters := getPeerCounters(t, cluster.GetDaemons(), "gubernator_grpc_request_duration_count{method=\"/pb.gubernator.PeersV1/UpdatePeerGlobals\"}") + gprlCounters := getPeerCounters(t, cluster.GetDaemons(), "gubernator_grpc_request_duration_count{method=\"/pb.gubernator.PeersV1/GetPeerRateLimits\"}") + + // When + for i := int64(0); i < testCase.Hits; i++ { + sendHit(t, peers[0], makeReq(name, key, 1), guber.Status_UNDER_LIMIT, 999-i) + } + + // Then + // Check for global hits update from non-owner to owner peer. + // Expect single global hits update from non-owner peer that received hits. + t.Log("Waiting for global hits updates") + var wg sync.WaitGroup + var didOwnerUpdate int + var didNonOwnerUpdate []string + wg.Add(len(peers) + 1) + go func() { + expected := updateCounters[owner.InstanceID] + 1 + if err := waitForUpdate(broadcastTimeout, owner, expected); err == nil { + didOwnerUpdate++ + t.Log("Global hits update from owner") + } + wg.Done() + }() + for _, peer := range peers { + go func(peer *guber.Daemon) { + expected := updateCounters[peer.InstanceID] + 1 + if err := waitForUpdate(broadcastTimeout, peer, expected); err == nil { + didNonOwnerUpdate = append(didNonOwnerUpdate, peer.InstanceID) + t.Logf("Global hits update from peer %s", peer.InstanceID) + } + wg.Done() + + }(peer) + } + wg.Wait() + assert.Zero(t, didOwnerUpdate) + assert.Len(t, didNonOwnerUpdate, 1) + assert.Equal(t, []string{peers[0].InstanceID}, didNonOwnerUpdate) + + // Expect a single global broadcast to all non-owner peers. + t.Log("Waiting for global broadcasts") + var didOwnerBroadcast, didNonOwnerBroadcast int + wg.Add(len(peers) + 1) + go func() { + expected := broadcastCounters[owner.InstanceID] + 1 + if err := waitForBroadcast(broadcastTimeout, owner, expected); err == nil { + didOwnerBroadcast++ + t.Log("Global broadcast from owner") + } + wg.Done() + }() + for _, peer := range peers { + go func(peer *guber.Daemon) { + expected := broadcastCounters[peer.InstanceID] + 1 + if err := waitForBroadcast(broadcastTimeout, peer, expected); err == nil { + didNonOwnerBroadcast++ + t.Logf("Global broadcast from peer %s", peer.InstanceID) + } + wg.Done() + }(peer) + } + wg.Wait() + assert.Equal(t, 1, didOwnerBroadcast) + assert.Empty(t, didNonOwnerBroadcast) + + // Assert UpdatePeerGlobals endpoint called once on each peer except owner. + // Used by global broadcast. + upgCounters2 := getPeerCounters(t, cluster.GetDaemons(), "gubernator_grpc_request_duration_count{method=\"/pb.gubernator.PeersV1/UpdatePeerGlobals\"}") + for _, peer := range cluster.GetDaemons() { + expected := upgCounters[peer.InstanceID] + if peer.PeerInfo.DataCenter == cluster.DataCenterNone && peer.InstanceID != owner.InstanceID { + expected++ + } + assert.Equal(t, expected, upgCounters2[peer.InstanceID], "upgCounter %s", peer.InstanceID) + } + + // Assert PeerGetRateLimits endpoint called once on owner. + // Used by global hits update. + gprlCounters2 := getPeerCounters(t, cluster.GetDaemons(), "gubernator_grpc_request_duration_count{method=\"/pb.gubernator.PeersV1/GetPeerRateLimits\"}") + for _, peer := range cluster.GetDaemons() { + expected := gprlCounters[peer.InstanceID] + if peer.InstanceID == owner.InstanceID { + expected++ + } + assert.Equal(t, expected, gprlCounters2[peer.InstanceID], "gprlCounter %s", peer.InstanceID) + } + + // Verify all peers report consistent remaining value value. + for _, peer := range cluster.GetDaemons() { + if peer.PeerInfo.DataCenter != cluster.DataCenterNone { + continue + } + sendHit(t, peer, makeReq(name, key, 0), guber.Status_UNDER_LIMIT, limit-testCase.Hits) + } + }) + } + }) + + // Distribute hits across all non-owner peers. + t.Run("Distributed hits", func(t *testing.T) { + testCases := []struct { + Name string + Hits int + }{ + {Name: "2 hits", Hits: 2}, + {Name: "10 hits", Hits: 10}, + {Name: "100 hits", Hits: 100}, + } + + for _, testCase := range testCases { + t.Run(testCase.Name, func(t *testing.T) { + name := t.Name() + key := fmt.Sprintf("account:%08x", rand.Int()) + peers, err := cluster.ListNonOwningDaemons(name, key) + require.NoError(t, err) + owner, err := cluster.FindOwningDaemon(name, key) + require.NoError(t, err) + var localPeers []*guber.Daemon + for _, peer := range cluster.GetDaemons() { + if peer.PeerInfo.DataCenter == cluster.DataCenterNone && peer.InstanceID != owner.InstanceID { + localPeers = append(localPeers, peer) + } + } + t.Logf("Owner peer: %s", owner.InstanceID) + + broadcastCounters := getPeerCounters(t, cluster.GetDaemons(), "gubernator_broadcast_duration_count") + updateCounters := getPeerCounters(t, cluster.GetDaemons(), "gubernator_global_send_duration_count") + upgCounters := getPeerCounters(t, cluster.GetDaemons(), "gubernator_grpc_request_duration_count{method=\"/pb.gubernator.PeersV1/UpdatePeerGlobals\"}") + gprlCounters := getPeerCounters(t, cluster.GetDaemons(), "gubernator_grpc_request_duration_count{method=\"/pb.gubernator.PeersV1/GetPeerRateLimits\"}") + expectUpdate := make(map[string]struct{}) + var wg sync.WaitGroup + var mutex sync.Mutex + + // When + wg.Add(testCase.Hits) + for i := 0; i < testCase.Hits; i++ { + peer := localPeers[i%len(localPeers)] + go func(peer *guber.Daemon) { + sendHit(t, peer, makeReq(name, key, 1), guber.Status_UNDER_LIMIT, -1) + if peer.InstanceID != owner.InstanceID { + mutex.Lock() + expectUpdate[peer.InstanceID] = struct{}{} + mutex.Unlock() + } + wg.Done() + }(peer) + } + wg.Wait() + + // Then + // Check for global hits update from non-owner to owner peer. + // Expect single update from each non-owner peer that received + // hits. + t.Log("Waiting for global hits updates") + var didOwnerUpdate int64 + var didNonOwnerUpdate []string + wg.Add(len(peers) + 1) + go func() { + expected := updateCounters[owner.InstanceID] + 1 + if err := waitForUpdate(broadcastTimeout, owner, expected); err == nil { + atomic.AddInt64(&didOwnerUpdate, 1) + t.Log("Global hits update from owner") + } + wg.Done() + }() + for _, peer := range peers { + go func(peer *guber.Daemon) { + expected := updateCounters[peer.InstanceID] + 1 + if err := waitForUpdate(broadcastTimeout, peer, expected); err == nil { + mutex.Lock() + didNonOwnerUpdate = append(didNonOwnerUpdate, peer.InstanceID) + mutex.Unlock() + t.Logf("Global hits update from peer %s", peer.InstanceID) + } + wg.Done() + + }(peer) + } + wg.Wait() + assert.Zero(t, didOwnerUpdate) + assert.Len(t, didNonOwnerUpdate, len(expectUpdate)) + expectedNonOwnerUpdate := maps.Keys(expectUpdate) + sort.Strings(expectedNonOwnerUpdate) + sort.Strings(didNonOwnerUpdate) + assert.Equal(t, expectedNonOwnerUpdate, didNonOwnerUpdate) + + // Expect a single global broadcast to all non-owner peers. + t.Log("Waiting for global broadcasts") + var didOwnerBroadcast, didNonOwnerBroadcast int64 + wg.Add(len(peers) + 1) + go func() { + expected := broadcastCounters[owner.InstanceID] + 1 + if err := waitForBroadcast(broadcastTimeout, owner, expected); err == nil { + atomic.AddInt64(&didOwnerBroadcast, 1) + t.Log("Global broadcast from owner") + } + wg.Done() + }() + for _, peer := range peers { + go func(peer *guber.Daemon) { + expected := broadcastCounters[peer.InstanceID] + 1 + if err := waitForBroadcast(broadcastTimeout, peer, expected); err == nil { + atomic.AddInt64(&didNonOwnerBroadcast, 1) + t.Logf("Global broadcast from peer %s", peer.InstanceID) + } + wg.Done() + }(peer) + } + wg.Wait() + assert.Equal(t, int64(1), didOwnerBroadcast) + assert.Empty(t, didNonOwnerBroadcast) + + // Assert UpdatePeerGlobals endpoint called at least + // once on each peer except owner. + // Used by global broadcast. + upgCounters2 := getPeerCounters(t, cluster.GetDaemons(), "gubernator_grpc_request_duration_count{method=\"/pb.gubernator.PeersV1/UpdatePeerGlobals\"}") + for _, peer := range cluster.GetDaemons() { + expected := upgCounters[peer.InstanceID] + if peer.PeerInfo.DataCenter == cluster.DataCenterNone && peer.InstanceID != owner.InstanceID { + expected++ + } + assert.GreaterOrEqual(t, upgCounters2[peer.InstanceID], expected, "upgCounter %s", peer.InstanceID) + } + + // Assert PeerGetRateLimits endpoint called on owner + // for each non-owner that received hits. + // Used by global hits update. + gprlCounters2 := getPeerCounters(t, cluster.GetDaemons(), "gubernator_grpc_request_duration_count{method=\"/pb.gubernator.PeersV1/GetPeerRateLimits\"}") + for _, peer := range cluster.GetDaemons() { + expected := gprlCounters[peer.InstanceID] + if peer.InstanceID == owner.InstanceID { + expected += len(expectUpdate) + } + assert.Equal(t, expected, gprlCounters2[peer.InstanceID], "gprlCounter %s", peer.InstanceID) + } + + // Verify all peers report consistent remaining value value. + for _, peer := range cluster.GetDaemons() { + if peer.PeerInfo.DataCenter != cluster.DataCenterNone { + continue + } + sendHit(t, peer, makeReq(name, key, 0), guber.Status_UNDER_LIMIT, int64(limit-testCase.Hits)) + } + }) + } + }) +} diff --git a/global.go b/global.go index 72993bb3..2f40eac4 100644 --- a/global.go +++ b/global.go @@ -29,7 +29,7 @@ import ( // the cluster periodically when a global rate limit we own updates. type globalManager struct { hitsQueue chan *RateLimitReq - broadcastQueue chan *UpdatePeerGlobal + broadcastQueue chan broadcastItem wg syncutil.WaitGroup conf BehaviorConfig log FieldLogger @@ -40,11 +40,16 @@ type globalManager struct { metricGlobalQueueLength prometheus.Gauge } +type broadcastItem struct { + Request *RateLimitReq + RequestTime time.Time +} + func newGlobalManager(conf BehaviorConfig, instance *V1Instance) *globalManager { gm := globalManager{ log: instance.log, hitsQueue: make(chan *RateLimitReq, conf.GlobalBatchLimit), - broadcastQueue: make(chan *UpdatePeerGlobal, conf.GlobalBatchLimit), + broadcastQueue: make(chan broadcastItem, conf.GlobalBatchLimit), instance: instance, conf: conf, metricGlobalSendDuration: prometheus.NewSummary(prometheus.SummaryOpts{ @@ -72,16 +77,17 @@ func newGlobalManager(conf BehaviorConfig, instance *V1Instance) *globalManager } func (gm *globalManager) QueueHit(r *RateLimitReq) { - gm.hitsQueue <- r + if r.Hits != 0 { + gm.hitsQueue <- r + } } -func (gm *globalManager) QueueUpdate(req *RateLimitReq, resp *RateLimitResp, requestTime time.Time) { - gm.broadcastQueue <- &UpdatePeerGlobal{ - Key: req.HashKey(), - Algorithm: req.Algorithm, - Duration: req.Duration, - Status: resp, - RequestTime: EpochMillis(requestTime), +func (gm *globalManager) QueueUpdate(req *RateLimitReq, requestTime time.Time) { + if req.Hits != 0 { + gm.broadcastQueue <- broadcastItem{ + Request: req, + RequestTime: requestTime, + } } } @@ -191,18 +197,18 @@ func (gm *globalManager) sendHits(hits map[string]*RateLimitReq) { // and in a periodic frequency determined by GlobalSyncWait. func (gm *globalManager) runBroadcasts() { var interval = NewInterval(gm.conf.GlobalSyncWait) - updates := make(map[string]*UpdatePeerGlobal) + updates := make(map[string]broadcastItem) gm.wg.Until(func(done chan struct{}) bool { select { - case updateReq := <-gm.broadcastQueue: - updates[updateReq.Key] = updateReq + case update := <-gm.broadcastQueue: + updates[update.Request.HashKey()] = update // Send the hits if we reached our batch limit if len(updates) >= gm.conf.GlobalBatchLimit { gm.metricBroadcastCounter.WithLabelValues("queue_full").Inc() gm.broadcastPeers(context.Background(), updates) - updates = make(map[string]*UpdatePeerGlobal) + updates = make(map[string]broadcastItem) return true } @@ -216,7 +222,7 @@ func (gm *globalManager) runBroadcasts() { if len(updates) != 0 { gm.metricBroadcastCounter.WithLabelValues("timer").Inc() gm.broadcastPeers(context.Background(), updates) - updates = make(map[string]*UpdatePeerGlobal) + updates = make(map[string]broadcastItem) } else { gm.metricGlobalQueueLength.Set(0) } @@ -229,14 +235,30 @@ func (gm *globalManager) runBroadcasts() { } // broadcastPeers broadcasts global rate limit statuses to all other peers -func (gm *globalManager) broadcastPeers(ctx context.Context, updates map[string]*UpdatePeerGlobal) { +func (gm *globalManager) broadcastPeers(ctx context.Context, updates map[string]broadcastItem) { defer prometheus.NewTimer(gm.metricBroadcastDuration).ObserveDuration() var req UpdatePeerGlobalsReq gm.metricGlobalQueueLength.Set(float64(len(updates))) - for _, r := range updates { - req.Globals = append(req.Globals, r) + for _, update := range updates { + // Get current rate limit state. + grlReq := new(RateLimitReq) + *grlReq = *update.Request + grlReq.Hits = 0 + status, err := gm.instance.workerPool.GetRateLimit(ctx, grlReq, update.RequestTime) + if err != nil { + gm.log.WithError(err).Error("while retrieving rate limit status") + continue + } + updateReq := &UpdatePeerGlobal{ + Key: update.Request.HashKey(), + Algorithm: update.Request.Algorithm, + Duration: update.Request.Duration, + Status: status, + RequestTime: EpochMillis(update.RequestTime), + } + req.Globals = append(req.Globals, updateReq) } fan := syncutil.NewFanOut(gm.conf.GlobalPeerRequestsConcurrency) diff --git a/go.mod b/go.mod index 93080b32..cb0f9886 100644 --- a/go.mod +++ b/go.mod @@ -23,8 +23,9 @@ require ( go.opentelemetry.io/otel/sdk v1.21.0 go.opentelemetry.io/otel/trace v1.21.0 go.uber.org/goleak v1.3.0 - golang.org/x/net v0.18.0 - golang.org/x/sync v0.3.0 + golang.org/x/exp v0.0.0-20240222234643-814bf88cf225 + golang.org/x/net v0.22.0 + golang.org/x/sync v0.6.0 golang.org/x/time v0.3.0 google.golang.org/genproto/googleapis/api v0.0.0-20231016165738-49dd2c1f3d0b google.golang.org/grpc v1.59.0 @@ -81,12 +82,12 @@ require ( go.uber.org/atomic v1.9.0 // indirect go.uber.org/multierr v1.8.0 // indirect go.uber.org/zap v1.21.0 // indirect - golang.org/x/mod v0.8.0 // indirect + golang.org/x/mod v0.15.0 // indirect golang.org/x/oauth2 v0.12.0 // indirect - golang.org/x/sys v0.14.0 // indirect - golang.org/x/term v0.14.0 // indirect + golang.org/x/sys v0.18.0 // indirect + golang.org/x/term v0.18.0 // indirect golang.org/x/text v0.14.0 // indirect - golang.org/x/tools v0.6.0 // indirect + golang.org/x/tools v0.18.0 // indirect google.golang.org/appengine v1.6.7 // indirect google.golang.org/genproto v0.0.0-20231012201019-e917dd12ba7a // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20231016165738-49dd2c1f3d0b // indirect diff --git a/go.sum b/go.sum index fea9ef4c..7b2a2004 100644 --- a/go.sum +++ b/go.sum @@ -478,6 +478,8 @@ golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u0 golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM= golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU= +golang.org/x/exp v0.0.0-20240222234643-814bf88cf225 h1:LfspQV/FYTatPTr/3HzIcmiUFH7PGP+OQ6mgDYo3yuQ= +golang.org/x/exp v0.0.0-20240222234643-814bf88cf225/go.mod h1:CxmFvTBINI24O/j8iY7H1xHzx2i4OsyguNBmN/uPtqc= golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= @@ -503,8 +505,8 @@ golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.1/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.8.0 h1:LUYupSeNrTNCGzR/hVBk2NHZO4hXcVaW1k4Qx7rjPx8= -golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= +golang.org/x/mod v0.15.0 h1:SernR4v+D55NyBH2QiEQrlBAnj1ECL6AGrA5+dPaMY8= +golang.org/x/mod v0.15.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -550,8 +552,8 @@ golang.org/x/net v0.0.0-20210726213435-c6fcb2dbf985/go.mod h1:9nx3DQGgdP8bBQD5qx golang.org/x/net v0.0.0-20211209124913-491a49abca63/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= golang.org/x/net v0.0.0-20220225172249-27dd8689420f/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= -golang.org/x/net v0.18.0 h1:mIYleuAkSbHh0tCv7RvjL3F6ZVbLjq4+R7zbOn3Kokg= -golang.org/x/net v0.18.0/go.mod h1:/czyP5RqHAH4odGYxBJ1qz0+CE5WZ+2j1YgoEo8F2jQ= +golang.org/x/net v0.22.0 h1:9sGLhx7iRIHEiX0oAJ3MRZMUCElJgy7Br1nO+AMN3Tc= +golang.org/x/net v0.22.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= @@ -579,8 +581,8 @@ golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E= -golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= +golang.org/x/sync v0.6.0 h1:5BMeUDZ7vkXGfEr1x9B4bRcTH4lpkTkpdh0T/J+qjbQ= +golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -643,13 +645,13 @@ golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220114195835-da31bd327af9/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.14.0 h1:Vz7Qs629MkJkGyHxUlRHizWJRG2j8fbQKjELVSNhy7Q= -golang.org/x/sys v0.14.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= +golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210615171337-6886f2dfbf5b/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= -golang.org/x/term v0.14.0 h1:LGK9IlZ8T9jvdy6cTdfKUCltatMFOehAQo9SRC46UQ8= -golang.org/x/term v0.14.0/go.mod h1:TySc+nGkYR6qt8km8wUhuFRTVSMIX3XPR58y2lC8vww= +golang.org/x/term v0.18.0 h1:FcHjZXDMxI8mM3nwhX9HlKop4C0YQvCVCdwYl2wOtE8= +golang.org/x/term v0.18.0/go.mod h1:ILwASektA3OnRv7amZ1xhE/KTR+u50pbXfZ03+6Nx58= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -721,8 +723,8 @@ golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0= golang.org/x/tools v0.1.2/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.6-0.20210726203631-07bc1bf47fb2/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= -golang.org/x/tools v0.6.0 h1:BOw41kyTf3PuCW1pVQf8+Cyg8pMlkYB1oo9iJ6D/lKM= -golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= +golang.org/x/tools v0.18.0 h1:k8NLag8AGHnn+PHbl7g43CtqZAwG60vZkLqgyZgIHgQ= +golang.org/x/tools v0.18.0/go.mod h1:GL7B4CwcLLeo59yx/9UWWuNOW1n3VZ4f5axWfML7Lcg= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/gubernator.go b/gubernator.go index 5d7a5bd4..fda9f92a 100644 --- a/gubernator.go +++ b/gubernator.go @@ -413,6 +413,7 @@ func (s *V1Instance) getGlobalRateLimit(ctx context.Context, req *RateLimitReq) // UpdatePeerGlobals updates the local cache with a list of global rate limits. This method should only // be called by a peer who is the owner of a global rate limit. func (s *V1Instance) UpdatePeerGlobals(ctx context.Context, r *UpdatePeerGlobalsReq) (*UpdatePeerGlobalsResp, error) { + defer prometheus.NewTimer(metricFuncTimeDuration.WithLabelValues("V1Instance.UpdatePeerGlobals")).ObserveDuration() now := MillisecondNow() for _, g := range r.Globals { item := &CacheItem{ @@ -449,6 +450,7 @@ func (s *V1Instance) UpdatePeerGlobals(ctx context.Context, r *UpdatePeerGlobals // GetPeerRateLimits is called by other peers to get the rate limits owned by this peer. func (s *V1Instance) GetPeerRateLimits(ctx context.Context, r *GetPeerRateLimitsReq) (resp *GetPeerRateLimitsResp, err error) { + defer prometheus.NewTimer(metricFuncTimeDuration.WithLabelValues("V1Instance.GetPeerRateLimits")).ObserveDuration() if len(r.Requests) > maxBatchSize { err := fmt.Errorf("'PeerRequest.rate_limits' list too large; max size is '%d'", maxBatchSize) metricCheckErrorCounter.WithLabelValues("Request too large").Inc() @@ -588,12 +590,12 @@ func (s *V1Instance) getLocalRateLimit(ctx context.Context, r *RateLimitReq) (_ return nil, errors.Wrap(err, "during workerPool.GetRateLimit") } - metricGetRateLimitCounter.WithLabelValues("local").Inc() // If global behavior, then broadcast update to all peers. if HasBehavior(r.Behavior, Behavior_GLOBAL) { - s.global.QueueUpdate(r, resp, requestTime) + s.global.QueueUpdate(r, requestTime) } + metricGetRateLimitCounter.WithLabelValues("local").Inc() return resp, nil } diff --git a/interval_test.go b/interval_test.go index 68c8b40d..d01d86f3 100644 --- a/interval_test.go +++ b/interval_test.go @@ -19,7 +19,7 @@ package gubernator_test import ( "testing" - "github.com/mailgun/gubernator/v2" + gubernator "github.com/mailgun/gubernator/v2" "github.com/mailgun/holster/v4/clock" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" From 56a0b22454b29f8cfd291cd21891460d88ea6c34 Mon Sep 17 00:00:00 2001 From: Shawn Poulson Date: Thu, 7 Mar 2024 09:19:31 -0500 Subject: [PATCH 04/23] Fix compile error. --- global.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/global.go b/global.go index 2f40eac4..5af33301 100644 --- a/global.go +++ b/global.go @@ -23,6 +23,7 @@ import ( "github.com/mailgun/holster/v4/syncutil" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" + "google.golang.org/protobuf/proto" ) // globalManager manages async hit queue and updates peers in @@ -243,8 +244,7 @@ func (gm *globalManager) broadcastPeers(ctx context.Context, updates map[string] for _, update := range updates { // Get current rate limit state. - grlReq := new(RateLimitReq) - *grlReq = *update.Request + grlReq := proto.Clone(update.Request).(*RateLimitReq) grlReq.Hits = 0 status, err := gm.instance.workerPool.GetRateLimit(ctx, grlReq, update.RequestTime) if err != nil { From cb3816a55eeca1b2447683513a25dd06ecb339c1 Mon Sep 17 00:00:00 2001 From: Shawn Poulson Date: Thu, 7 Mar 2024 09:35:30 -0500 Subject: [PATCH 05/23] Fix intermittent test error caused by `TestHealthCheck`. --- functional_test.go | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/functional_test.go b/functional_test.go index c1aa2d16..526f7209 100644 --- a/functional_test.go +++ b/functional_test.go @@ -1622,11 +1622,10 @@ func TestHealthCheck(t *testing.T) { testutil.UntilPass(t, 20, clock.Millisecond*300, func(t testutil.TestingT) { // Check the health again to get back the connection error - healthResp, err = client.HealthCheck(context.Background(), &guber.HealthCheckReq{}) - if assert.Nil(t, err) { + healthResp, err := client.HealthCheck(context.Background(), &guber.HealthCheckReq{}) + if !assert.NoError(t, err) { return } - assert.Equal(t, "unhealthy", healthResp.GetStatus()) assert.Contains(t, healthResp.GetMessage(), "connect: connection refused") }) @@ -1637,12 +1636,17 @@ func TestHealthCheck(t *testing.T) { require.NoError(t, cluster.Restart(ctx)) // wait for every peer instance to come back online + numPeers := int32(len(cluster.GetPeers())) for _, peer := range cluster.GetPeers() { peerClient, err := guber.DialV1Server(peer.GRPCAddress, nil) require.NoError(t, err) - testutil.UntilPass(t, 10, clock.Millisecond*300, func(t testutil.TestingT) { - healthResp, err = peerClient.HealthCheck(context.Background(), &guber.HealthCheckReq{}) - assert.Equal(t, "healthy", healthResp.GetStatus()) + testutil.UntilPass(t, 10, 300*clock.Millisecond, func(t testutil.TestingT) { + healthResp, err := peerClient.HealthCheck(context.Background(), &guber.HealthCheckReq{}) + if !assert.NoError(t, err) { + return + } + assert.Equal(t, "healthy", healthResp.Status) + assert.Equal(t, numPeers, healthResp.PeerCount) }) } } From e2b8853c048c861c6fe5a3568cfe83f2b368680a Mon Sep 17 00:00:00 2001 From: Shawn Poulson Date: Mon, 11 Mar 2024 11:17:31 -0400 Subject: [PATCH 06/23] Refactor global behavior and functional tests for stability. - Simplify passing of request time across layers. - Better handling of metrics in tests. - Better detection of global broadcasts, global updates, and idle. - Drop redundant metric `guberator_global_broadcast_counter`. - Fix metric `gubernator_global_queue_length` for global broadcast. - Add metric `gubernator_global_send_queue_length` for global send. --- algorithms.go | 54 ++-- functional_test.go | 769 ++++++++++++++++++--------------------------- global.go | 81 +++-- gubernator.go | 25 +- peer_client.go | 2 - workers.go | 12 +- 6 files changed, 380 insertions(+), 563 deletions(-) diff --git a/algorithms.go b/algorithms.go index a9937c59..8d49bb35 100644 --- a/algorithms.go +++ b/algorithms.go @@ -35,7 +35,7 @@ import ( // with 100 emails and the request will succeed. You can override this default behavior with `DRAIN_OVER_LIMIT` // Implements token bucket algorithm for rate limiting. https://en.wikipedia.org/wiki/Token_bucket -func tokenBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq, requestTime time.Time) (resp *RateLimitResp, err error) { +func tokenBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp *RateLimitResp, err error) { tokenBucketTimer := prometheus.NewTimer(metricFuncTimeDuration.WithLabelValues("tokenBucket")) defer tokenBucketTimer.ObserveDuration() @@ -100,7 +100,7 @@ func tokenBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq, request s.Remove(ctx, hashKey) } - return tokenBucketNewItem(ctx, s, c, r, requestTime) + return tokenBucketNewItem(ctx, s, c, r) } // Update the limit if it changed. @@ -133,12 +133,12 @@ func tokenBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq, request } // If our new duration means we are currently expired. - now := EpochMillis(requestTime) - if expire <= now { + requestTime := *r.RequestTime + if expire <= requestTime { // Renew item. span.AddEvent("Limit has expired") - expire = now + r.Duration - t.CreatedAt = now + expire = requestTime + r.Duration + t.CreatedAt = requestTime t.Remaining = t.Limit } @@ -196,19 +196,19 @@ func tokenBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq, request } // Item is not found in cache or store, create new. - return tokenBucketNewItem(ctx, s, c, r, requestTime) + return tokenBucketNewItem(ctx, s, c, r) } // Called by tokenBucket() when adding a new item in the store. -func tokenBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq, requestTime time.Time) (resp *RateLimitResp, err error) { - now := EpochMillis(requestTime) - expire := now + r.Duration +func tokenBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp *RateLimitResp, err error) { + requestTime := *r.RequestTime + expire := requestTime + r.Duration t := &TokenBucketItem{ Limit: r.Limit, Duration: r.Duration, Remaining: r.Limit - r.Hits, - CreatedAt: now, + CreatedAt: requestTime, } // Add a new rate limit to the cache. @@ -252,7 +252,7 @@ func tokenBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq, } // Implements leaky bucket algorithm for rate limiting https://en.wikipedia.org/wiki/Leaky_bucket -func leakyBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq, requestTime time.Time) (resp *RateLimitResp, err error) { +func leakyBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp *RateLimitResp, err error) { leakyBucketTimer := prometheus.NewTimer(metricFuncTimeDuration.WithLabelValues("V1Instance.getRateLimit_leakyBucket")) defer leakyBucketTimer.ObserveDuration() @@ -260,7 +260,7 @@ func leakyBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq, request r.Burst = r.Limit } - now := EpochMillis(requestTime) + requestTime := *r.RequestTime // Get rate limit from cache. hashKey := r.HashKey() @@ -309,7 +309,7 @@ func leakyBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq, request s.Remove(ctx, hashKey) } - return leakyBucketNewItem(ctx, s, c, r, requestTime) + return leakyBucketNewItem(ctx, s, c, r) } if HasBehavior(r.Behavior, Behavior_RESET_REMAINING) { @@ -349,16 +349,16 @@ func leakyBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq, request } if r.Hits != 0 { - c.UpdateExpiration(r.HashKey(), now+duration) + c.UpdateExpiration(r.HashKey(), requestTime+duration) } // Calculate how much leaked out of the bucket since the last time we leaked a hit - elapsed := now - b.UpdatedAt + elapsed := requestTime - b.UpdatedAt leak := float64(elapsed) / rate if int64(leak) > 0 { b.Remaining += leak - b.UpdatedAt = now + b.UpdatedAt = requestTime } if int64(b.Remaining) > b.Burst { @@ -369,7 +369,7 @@ func leakyBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq, request Limit: b.Limit, Remaining: int64(b.Remaining), Status: Status_UNDER_LIMIT, - ResetTime: now + (b.Limit-int64(b.Remaining))*int64(rate), + ResetTime: requestTime + (b.Limit-int64(b.Remaining))*int64(rate), } // TODO: Feature missing: check for Duration change between item/request. @@ -391,7 +391,7 @@ func leakyBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq, request if int64(b.Remaining) == r.Hits { b.Remaining = 0 rl.Remaining = int64(b.Remaining) - rl.ResetTime = now + (rl.Limit-rl.Remaining)*int64(rate) + rl.ResetTime = requestTime + (rl.Limit-rl.Remaining)*int64(rate) return rl, nil } @@ -417,16 +417,16 @@ func leakyBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq, request b.Remaining -= float64(r.Hits) rl.Remaining = int64(b.Remaining) - rl.ResetTime = now + (rl.Limit-rl.Remaining)*int64(rate) + rl.ResetTime = requestTime + (rl.Limit-rl.Remaining)*int64(rate) return rl, nil } - return leakyBucketNewItem(ctx, s, c, r, requestTime) + return leakyBucketNewItem(ctx, s, c, r) } // Called by leakyBucket() when adding a new item in the store. -func leakyBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq, requestTime time.Time) (resp *RateLimitResp, err error) { - now := EpochMillis(requestTime) +func leakyBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp *RateLimitResp, err error) { + requestTime := *r.RequestTime duration := r.Duration rate := float64(duration) / float64(r.Limit) if HasBehavior(r.Behavior, Behavior_DURATION_IS_GREGORIAN) { @@ -445,7 +445,7 @@ func leakyBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq, Remaining: float64(r.Burst - r.Hits), Limit: r.Limit, Duration: duration, - UpdatedAt: now, + UpdatedAt: requestTime, Burst: r.Burst, } @@ -453,7 +453,7 @@ func leakyBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq, Status: Status_UNDER_LIMIT, Limit: b.Limit, Remaining: r.Burst - r.Hits, - ResetTime: now + (b.Limit-(r.Burst-r.Hits))*int64(rate), + ResetTime: requestTime + (b.Limit-(r.Burst-r.Hits))*int64(rate), } // Client could be requesting that we start with the bucket OVER_LIMIT @@ -461,12 +461,12 @@ func leakyBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq, metricOverLimitCounter.Add(1) rl.Status = Status_OVER_LIMIT rl.Remaining = 0 - rl.ResetTime = now + (rl.Limit-rl.Remaining)*int64(rate) + rl.ResetTime = requestTime + (rl.Limit-rl.Remaining)*int64(rate) b.Remaining = 0 } item := &CacheItem{ - ExpireAt: now + duration, + ExpireAt: requestTime + duration, Algorithm: r.Algorithm, Key: r.HashKey(), Value: &b, diff --git a/functional_test.go b/functional_test.go index 526f7209..e7b66ac1 100644 --- a/functional_test.go +++ b/functional_test.go @@ -34,6 +34,7 @@ import ( guber "github.com/mailgun/gubernator/v2" "github.com/mailgun/gubernator/v2/cluster" "github.com/mailgun/holster/v4/clock" + "github.com/mailgun/holster/v4/syncutil" "github.com/mailgun/holster/v4/testutil" "github.com/prometheus/common/expfmt" "github.com/prometheus/common/model" @@ -973,22 +974,22 @@ func TestMissingFields(t *testing.T) { } func TestGlobalRateLimits(t *testing.T) { - const ( - name = "test_global" - key = "account:12345" - ) - + name := t.Name() + key := randomKey() + owner, err := cluster.FindOwningDaemon(name, key) + require.NoError(t, err) peers, err := cluster.ListNonOwningDaemons(name, key) require.NoError(t, err) + var resetTime int64 - sendHit := func(client guber.V1Client, status guber.Status, hits, expectRemaining, expectResetTime int64) int64 { + sendHit := func(client guber.V1Client, status guber.Status, hits, remain int64) { ctx, cancel := context.WithTimeout(context.Background(), clock.Second*10) defer cancel() resp, err := client.GetRateLimits(ctx, &guber.GetRateLimitsReq{ Requests: []*guber.RateLimitReq{ { - Name: "test_global", - UniqueKey: "account:12345", + Name: name, + UniqueKey: key, Algorithm: guber.Algorithm_TOKEN_BUCKET, Behavior: guber.Behavior_GLOBAL, Duration: guber.Minute * 3, @@ -1000,19 +1001,27 @@ func TestGlobalRateLimits(t *testing.T) { require.NoError(t, err) item := resp.Responses[0] assert.Equal(t, "", item.Error) - assert.Equal(t, expectRemaining, item.Remaining) + assert.Equal(t, remain, item.Remaining) assert.Equal(t, status, item.Status) assert.Equal(t, int64(5), item.Limit) - if expectResetTime != 0 { - assert.Equal(t, expectResetTime, item.ResetTime) + + // ResetTime should not change during test. + if resetTime == 0 { + resetTime = item.ResetTime } - return item.ResetTime + assert.Equal(t, resetTime, item.ResetTime) + + // ensure that we have a canonical host + assert.NotEmpty(t, item.Metadata["owner"]) } + + require.NoError(t, waitForIdle(1*clock.Minute, cluster.GetDaemons()...)) + // Our first hit should create the request on the peer and queue for async forward - _ = sendHit(peers[0].MustClient(), guber.Status_UNDER_LIMIT, 1, 4, 0) + sendHit(peers[0].MustClient(), guber.Status_UNDER_LIMIT, 1, 4) // Our second should be processed as if we own it since the async forward hasn't occurred yet - _ = sendHit(peers[0].MustClient(), guber.Status_UNDER_LIMIT, 2, 2, 0) + sendHit(peers[0].MustClient(), guber.Status_UNDER_LIMIT, 2, 2) testutil.UntilPass(t, 20, clock.Millisecond*200, func(t testutil.TestingT) { // Inspect peers metrics, ensure the peer sent the global rate limit to the owner @@ -1021,44 +1030,36 @@ func TestGlobalRateLimits(t *testing.T) { assert.NoError(t, err) assert.Equal(t, 1, int(m.Value)) }) - owner, err := cluster.FindOwningDaemon(name, key) - require.NoError(t, err) - // Get the ResetTime from owner. - expectResetTime := sendHit(owner.MustClient(), guber.Status_UNDER_LIMIT, 0, 2, 0) require.NoError(t, waitForBroadcast(clock.Second*3, owner, 1)) // Check different peers, they should have gotten the broadcast from the owner - sendHit(peers[1].MustClient(), guber.Status_UNDER_LIMIT, 0, 2, expectResetTime) - sendHit(peers[2].MustClient(), guber.Status_UNDER_LIMIT, 0, 2, expectResetTime) + sendHit(peers[1].MustClient(), guber.Status_UNDER_LIMIT, 0, 2) + sendHit(peers[2].MustClient(), guber.Status_UNDER_LIMIT, 0, 2) // Non owning peer should calculate the rate limit remaining before forwarding // to the owner. - sendHit(peers[3].MustClient(), guber.Status_UNDER_LIMIT, 2, 0, expectResetTime) + sendHit(peers[3].MustClient(), guber.Status_UNDER_LIMIT, 2, 0) require.NoError(t, waitForBroadcast(clock.Second*3, owner, 2)) - sendHit(peers[4].MustClient(), guber.Status_OVER_LIMIT, 1, 0, expectResetTime) + sendHit(peers[4].MustClient(), guber.Status_OVER_LIMIT, 1, 0) } // Ensure global broadcast updates all peers when GetRateLimits is called on // either owner or non-owner peer. func TestGlobalRateLimitsWithLoadBalancing(t *testing.T) { ctx := context.Background() - const name = "test_global" - key := fmt.Sprintf("key:%016x", rand.Int()) + name := t.Name() + key := randomKey() // Determine owner and non-owner peers. - ownerPeerInfo, err := cluster.FindOwningPeer(name, key) + owner, err := cluster.FindOwningDaemon(name, key) require.NoError(t, err) - ownerDaemon, err := cluster.FindOwningDaemon(name, key) + // ownerAddr := owner.ownerPeerInfo.GRPCAddress + peers, err := cluster.ListNonOwningDaemons(name, key) require.NoError(t, err) - owner := ownerPeerInfo.GRPCAddress - nonOwner := cluster.PeerAt(0).GRPCAddress - if nonOwner == owner { - nonOwner = cluster.PeerAt(1).GRPCAddress - } - require.NotEqual(t, owner, nonOwner) + nonOwner := peers[0] // Connect to owner and non-owner peers in round robin. dialOpts := []grpc.DialOption{ @@ -1066,22 +1067,22 @@ func TestGlobalRateLimitsWithLoadBalancing(t *testing.T) { grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithDefaultServiceConfig(`{"loadBalancingConfig": [{"round_robin":{}}]}`), } - address := fmt.Sprintf("static:///%s,%s", owner, nonOwner) + address := fmt.Sprintf("static:///%s,%s", owner.PeerInfo.GRPCAddress, nonOwner.PeerInfo.GRPCAddress) conn, err := grpc.DialContext(ctx, address, dialOpts...) require.NoError(t, err) client := guber.NewV1Client(conn) - sendHit := func(status guber.Status, i int) { - ctx, cancel := context.WithTimeout(ctx, 10*clock.Second) + sendHit := func(client guber.V1Client, status guber.Status, i int) { + ctx, cancel := context.WithTimeout(context.Background(), 10*clock.Second) defer cancel() resp, err := client.GetRateLimits(ctx, &guber.GetRateLimitsReq{ Requests: []*guber.RateLimitReq{ { Name: name, UniqueKey: key, - Algorithm: guber.Algorithm_LEAKY_BUCKET, + Algorithm: guber.Algorithm_TOKEN_BUCKET, Behavior: guber.Behavior_GLOBAL, - Duration: guber.Minute * 5, + Duration: 5 * guber.Minute, Hits: 1, Limit: 2, }, @@ -1089,319 +1090,73 @@ func TestGlobalRateLimitsWithLoadBalancing(t *testing.T) { }) require.NoError(t, err, i) item := resp.Responses[0] - assert.Equal(t, "", item.GetError(), fmt.Sprintf("mismatch error, iteration %d", i)) - assert.Equal(t, status, item.GetStatus(), fmt.Sprintf("mismatch status, iteration %d", i)) + assert.Equal(t, "", item.Error, fmt.Sprintf("unexpected error, iteration %d", i)) + assert.Equal(t, status, item.Status, fmt.Sprintf("mismatch status, iteration %d", i)) } + require.NoError(t, waitForIdle(1*clock.Minute, cluster.GetDaemons()...)) + // Send two hits that should be processed by the owner and non-owner and // deplete the limit consistently. - sendHit(guber.Status_UNDER_LIMIT, 1) - sendHit(guber.Status_UNDER_LIMIT, 2) - require.NoError(t, waitForBroadcast(clock.Second*3, ownerDaemon, 1)) + sendHit(client, guber.Status_UNDER_LIMIT, 1) + sendHit(client, guber.Status_UNDER_LIMIT, 2) + require.NoError(t, waitForBroadcast(3*clock.Second, owner, 1)) // All successive hits should return OVER_LIMIT. for i := 2; i <= 10; i++ { - sendHit(guber.Status_OVER_LIMIT, i) + sendHit(client, guber.Status_OVER_LIMIT, i) } } func TestGlobalRateLimitsPeerOverLimit(t *testing.T) { - const ( - name = "test_global_token_limit" - key = "account:12345" - ) - - peers, err := cluster.ListNonOwningDaemons(name, key) - require.NoError(t, err) - - sendHit := func(expectedStatus guber.Status, hits int64) { - ctx, cancel := context.WithTimeout(context.Background(), clock.Second*10) - defer cancel() - resp, err := peers[0].MustClient().GetRateLimits(ctx, &guber.GetRateLimitsReq{ - Requests: []*guber.RateLimitReq{ - { - Name: name, - UniqueKey: key, - Algorithm: guber.Algorithm_TOKEN_BUCKET, - Behavior: guber.Behavior_GLOBAL, - Duration: guber.Minute * 5, - Hits: hits, - Limit: 2, - }, - }, - }) - assert.NoError(t, err) - assert.Equal(t, "", resp.Responses[0].GetError()) - assert.Equal(t, expectedStatus, resp.Responses[0].GetStatus()) - } + name := t.Name() + key := randomKey() owner, err := cluster.FindOwningDaemon(name, key) require.NoError(t, err) - - // Send two hits that should be processed by the owner and the broadcast to peer, depleting the remaining - sendHit(guber.Status_UNDER_LIMIT, 1) - sendHit(guber.Status_UNDER_LIMIT, 1) - // Wait for the broadcast from the owner to the peer - require.NoError(t, waitForBroadcast(clock.Second*3, owner, 1)) - // Since the remainder is 0, the peer should set OVER_LIMIT instead of waiting for the owner - // to respond with OVER_LIMIT. - sendHit(guber.Status_OVER_LIMIT, 1) - // Wait for the broadcast from the owner to the peer - require.NoError(t, waitForBroadcast(clock.Second*3, owner, 2)) - // The status should still be OVER_LIMIT - sendHit(guber.Status_OVER_LIMIT, 0) -} - -func TestGlobalRateLimitsPeerOverLimitLeaky(t *testing.T) { - const ( - name = "test_global_token_limit_leaky" - key = "account:12345" - ) - peers, err := cluster.ListNonOwningDaemons(name, key) require.NoError(t, err) - sendHit := func(client guber.V1Client, expectedStatus guber.Status, hits int64) { - ctx, cancel := context.WithTimeout(context.Background(), clock.Second*10) + sendHit := func(expectedStatus guber.Status, hits, expectedRemaining int64) { + ctx, cancel := context.WithTimeout(context.Background(), 10*clock.Second) defer cancel() - resp, err := client.GetRateLimits(ctx, &guber.GetRateLimitsReq{ - Requests: []*guber.RateLimitReq{ - { - Name: name, - UniqueKey: key, - Algorithm: guber.Algorithm_LEAKY_BUCKET, - Behavior: guber.Behavior_GLOBAL, - Duration: guber.Minute * 5, - Hits: hits, - Limit: 2, - }, - }, - }) - assert.NoError(t, err) - assert.Equal(t, "", resp.Responses[0].GetError()) - assert.Equal(t, expectedStatus, resp.Responses[0].GetStatus()) - } - owner, err := cluster.FindOwningDaemon(name, key) - require.NoError(t, err) - - // Send two hits that should be processed by the owner and the broadcast to peer, depleting the remaining - sendHit(peers[0].MustClient(), guber.Status_UNDER_LIMIT, 1) - sendHit(peers[0].MustClient(), guber.Status_UNDER_LIMIT, 1) - // Wait for the broadcast from the owner to the peers - require.NoError(t, waitForBroadcast(clock.Second*3, owner, 1)) - // Ask a different peer if the status is over the limit - sendHit(peers[1].MustClient(), guber.Status_OVER_LIMIT, 1) -} - -func TestGlobalRequestMoreThanAvailable(t *testing.T) { - const ( - name = "test_global_more_than_available" - key = "account:123456" - ) - - peers, err := cluster.ListNonOwningDaemons(name, key) - require.NoError(t, err) - - sendHit := func(client guber.V1Client, expectedStatus guber.Status, hits int64, remaining int64) { - ctx, cancel := context.WithTimeout(context.Background(), clock.Second*10) - defer cancel() - resp, err := client.GetRateLimits(ctx, &guber.GetRateLimitsReq{ - Requests: []*guber.RateLimitReq{ - { - Name: name, - UniqueKey: key, - Algorithm: guber.Algorithm_LEAKY_BUCKET, - Behavior: guber.Behavior_GLOBAL, - Duration: guber.Minute * 1_000, - Hits: hits, - Limit: 100, - }, - }, - }) - assert.NoError(t, err) - assert.Equal(t, "", resp.Responses[0].GetError()) - assert.Equal(t, expectedStatus, resp.Responses[0].GetStatus()) - } - owner, err := cluster.FindOwningDaemon(name, key) - require.NoError(t, err) - - prev, err := getBroadcastCount(owner) - require.NoError(t, err) - - // Ensure GRPC has connections to each peer before we start, as we want - // the actual test requests to happen quite fast. - for _, p := range peers { - sendHit(p.MustClient(), guber.Status_UNDER_LIMIT, 0, 100) - } - - // Send a request for 50 hits from each non owning peer in the cluster. These requests - // will be queued and sent to the owner as accumulated hits. As a result of the async nature - // of `Behavior_GLOBAL` rate limit requests spread across peers like this will be allowed to - // over-consume their resource within the rate limit window until the owner is updated and - // a broadcast to all peers is received. - // - // The maximum number of resources that can be over-consumed can be calculated by multiplying - // the remainder by the number of peers in the cluster. For example: If you have a remainder of 100 - // and a cluster of 10 instances, then the maximum over-consumed resource is 1,000. If you need - // a more accurate remaining calculation, and wish to avoid over consuming a resource, then do - // not use `Behavior_GLOBAL`. - for _, p := range peers { - sendHit(p.MustClient(), guber.Status_UNDER_LIMIT, 50, 50) - } - - // Wait for the broadcast from the owner to the peers - require.NoError(t, waitForBroadcast(clock.Second*10, owner, prev+1)) - - // We should be over the limit - sendHit(peers[0].MustClient(), guber.Status_OVER_LIMIT, 1, 0) -} - -func TestGlobalNegativeHits(t *testing.T) { - const ( - name = "test_global_negative_hits" - key = "account:12345" - ) - - peers, err := cluster.ListNonOwningDaemons(name, key) - require.NoError(t, err) - - sendHit := func(client guber.V1Client, status guber.Status, hits int64, remaining int64) { - ctx, cancel := context.WithTimeout(context.Background(), clock.Second*10) - defer cancel() - resp, err := client.GetRateLimits(ctx, &guber.GetRateLimitsReq{ + resp, err := peers[0].MustClient().GetRateLimits(ctx, &guber.GetRateLimitsReq{ Requests: []*guber.RateLimitReq{ { Name: name, UniqueKey: key, Algorithm: guber.Algorithm_TOKEN_BUCKET, Behavior: guber.Behavior_GLOBAL, - Duration: guber.Minute * 100, + Duration: 5 * guber.Minute, Hits: hits, Limit: 2, }, }, }) assert.NoError(t, err) - assert.Equal(t, "", resp.Responses[0].GetError()) - assert.Equal(t, status, resp.Responses[0].GetStatus()) - assert.Equal(t, remaining, resp.Responses[0].Remaining) - } - owner, err := cluster.FindOwningDaemon(name, key) - require.NoError(t, err) - prev, err := getBroadcastCount(owner) - require.NoError(t, err) - - // Send a negative hit on a rate limit with no hits - sendHit(peers[0].MustClient(), guber.Status_UNDER_LIMIT, -1, 3) - - // Wait for the negative remaining to propagate - require.NoError(t, waitForBroadcast(clock.Second*10, owner, prev+1)) - - // Send another negative hit to a different peer - sendHit(peers[1].MustClient(), guber.Status_UNDER_LIMIT, -1, 4) - - require.NoError(t, waitForBroadcast(clock.Second*10, owner, prev+2)) - - // Should have 4 in the remainder - sendHit(peers[2].MustClient(), guber.Status_UNDER_LIMIT, 4, 0) - - require.NoError(t, waitForBroadcast(clock.Second*10, owner, prev+3)) - - sendHit(peers[3].MustClient(), guber.Status_UNDER_LIMIT, 0, 0) -} - -func TestGlobalResetRemaining(t *testing.T) { - const ( - name = "test_global_reset" - key = "account:123456" - ) - - peers, err := cluster.ListNonOwningDaemons(name, key) - require.NoError(t, err) - - sendHit := func(client guber.V1Client, expectedStatus guber.Status, hits int64, remaining int64) { - ctx, cancel := context.WithTimeout(context.Background(), clock.Second*10) - defer cancel() - resp, err := client.GetRateLimits(ctx, &guber.GetRateLimitsReq{ - Requests: []*guber.RateLimitReq{ - { - Name: name, - UniqueKey: key, - Algorithm: guber.Algorithm_LEAKY_BUCKET, - Behavior: guber.Behavior_GLOBAL, - Duration: guber.Minute * 1_000, - Hits: hits, - Limit: 100, - }, - }, - }) - assert.NoError(t, err) - assert.Equal(t, "", resp.Responses[0].GetError()) - assert.Equal(t, expectedStatus, resp.Responses[0].GetStatus()) - assert.Equal(t, remaining, resp.Responses[0].Remaining) - } - owner, err := cluster.FindOwningDaemon(name, key) - require.NoError(t, err) - prev, err := getBroadcastCount(owner) - require.NoError(t, err) - - for _, p := range peers { - sendHit(p.MustClient(), guber.Status_UNDER_LIMIT, 50, 50) + item := resp.Responses[0] + assert.Equal(t, "", item.Error, "unexpected error") + assert.Equal(t, expectedStatus, item.Status, "mismatch status") + assert.Equal(t, expectedRemaining, item.Remaining, "mismatch remaining") } - // Wait for the broadcast from the owner to the peers - require.NoError(t, waitForBroadcast(clock.Second*10, owner, prev+1)) - - // We should be over the limit and remaining should be zero - sendHit(peers[0].MustClient(), guber.Status_OVER_LIMIT, 1, 0) + require.NoError(t, waitForIdle(1*clock.Minute, cluster.GetDaemons()...)) - // Now reset the remaining - ctx, cancel := context.WithTimeout(context.Background(), clock.Second*10) - defer cancel() - resp, err := peers[0].MustClient().GetRateLimits(ctx, &guber.GetRateLimitsReq{ - Requests: []*guber.RateLimitReq{ - { - Name: name, - UniqueKey: key, - Algorithm: guber.Algorithm_LEAKY_BUCKET, - Behavior: guber.Behavior_GLOBAL | guber.Behavior_RESET_REMAINING, - Duration: guber.Minute * 1_000, - Hits: 0, - Limit: 100, - }, - }, - }) - require.NoError(t, err) - assert.NotEqual(t, 100, resp.Responses[0].Remaining) + // Send two hits that should be processed by the owner and the broadcast to + // peer, depleting the remaining. + sendHit(guber.Status_UNDER_LIMIT, 1, 1) + sendHit(guber.Status_UNDER_LIMIT, 1, 0) - // Wait for the reset to propagate. - require.NoError(t, waitForBroadcast(clock.Second*10, owner, prev+2)) + // Wait for the broadcast from the owner to the peer + require.NoError(t, waitForBroadcast(3*clock.Second, owner, 1)) - // Check a different peer to ensure remaining has been reset - resp, err = peers[1].MustClient().GetRateLimits(ctx, &guber.GetRateLimitsReq{ - Requests: []*guber.RateLimitReq{ - { - Name: name, - UniqueKey: key, - Algorithm: guber.Algorithm_LEAKY_BUCKET, - Behavior: guber.Behavior_GLOBAL, - Duration: guber.Minute * 1_000, - Hits: 0, - Limit: 100, - }, - }, - }) - require.NoError(t, err) - assert.NotEqual(t, 100, resp.Responses[0].Remaining) + // Since the remainder is 0, the peer should return OVER_LIMIT on next hit. + sendHit(guber.Status_OVER_LIMIT, 1, 0) -} + // Wait for the broadcast from the owner to the peer. + require.NoError(t, waitForBroadcast(3*clock.Second, owner, 2)) -func getMetricRequest(url string, name string) (*model.Sample, error) { - resp, err := http.Get(url) - if err != nil { - return nil, err - } - defer resp.Body.Close() - return getMetric(resp.Body, name) + // The status should still be OVER_LIMIT. + sendHit(guber.Status_OVER_LIMIT, 0, 0) } func TestChangeLimit(t *testing.T) { @@ -1622,10 +1377,11 @@ func TestHealthCheck(t *testing.T) { testutil.UntilPass(t, 20, clock.Millisecond*300, func(t testutil.TestingT) { // Check the health again to get back the connection error - healthResp, err := client.HealthCheck(context.Background(), &guber.HealthCheckReq{}) - if !assert.NoError(t, err) { + healthResp, err = client.HealthCheck(context.Background(), &guber.HealthCheckReq{}) + if assert.Nil(t, err) { return } + assert.Equal(t, "unhealthy", healthResp.GetStatus()) assert.Contains(t, healthResp.GetMessage(), "connect: connection refused") }) @@ -1634,25 +1390,9 @@ func TestHealthCheck(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), clock.Second*15) defer cancel() require.NoError(t, cluster.Restart(ctx)) - - // wait for every peer instance to come back online - numPeers := int32(len(cluster.GetPeers())) - for _, peer := range cluster.GetPeers() { - peerClient, err := guber.DialV1Server(peer.GRPCAddress, nil) - require.NoError(t, err) - testutil.UntilPass(t, 10, 300*clock.Millisecond, func(t testutil.TestingT) { - healthResp, err := peerClient.HealthCheck(context.Background(), &guber.HealthCheckReq{}) - if !assert.NoError(t, err) { - return - } - assert.Equal(t, "healthy", healthResp.Status) - assert.Equal(t, numPeers, healthResp.PeerCount) - }) - } } func TestLeakyBucketDivBug(t *testing.T) { - // Freeze time so we don't leak during the test defer clock.Freeze(clock.Now()).Unfreeze() client, err := guber.DialV1Server(cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress, nil) @@ -1801,142 +1541,6 @@ func TestGetPeerRateLimits(t *testing.T) { // TODO: Add a test for sending no rate limits RateLimitReqList.RateLimits = nil -func getMetric(in io.Reader, name string) (*model.Sample, error) { - dec := expfmt.SampleDecoder{ - Dec: expfmt.NewDecoder(in, expfmt.FmtText), - Opts: &expfmt.DecodeOptions{ - Timestamp: model.Now(), - }, - } - - var all model.Vector - for { - var smpls model.Vector - err := dec.Decode(&smpls) - if err == io.EOF { - break - } - if err != nil { - return nil, err - } - all = append(all, smpls...) - } - - for _, s := range all { - if strings.Contains(s.Metric.String(), name) { - return s, nil - } - } - return nil, nil -} - -// getBroadcastCount returns the current broadcast count for use with waitForBroadcast() -// TODO: Replace this with something else, we can call and reset via HTTP/GRPC calls in gubernator v3 -func getBroadcastCount(d *guber.Daemon) (int, error) { - m, err := getMetricRequest(fmt.Sprintf("http://%s/metrics", d.Config().HTTPListenAddress), - "gubernator_broadcast_duration_count") - if err != nil { - return 0, err - } - - return int(m.Value), nil -} - -// waitForBroadcast waits until the broadcast count for the daemon changes to -// the expected value. Returns an error if the expected value is not found -// before the context is cancelled. -func waitForBroadcast(timeout clock.Duration, d *guber.Daemon, expect int) error { - ctx, cancel := context.WithTimeout(context.Background(), timeout) - defer cancel() - - for { - m, err := getMetricRequest(fmt.Sprintf("http://%s/metrics", d.Config().HTTPListenAddress), - "gubernator_broadcast_duration_count") - if err != nil { - return err - } - - // It's possible a broadcast occurred twice if waiting for multiple peer to - // forward updates to the owner. - if int(m.Value) >= expect { - return nil - } - - select { - case <-clock.After(time.Millisecond * 100): - case <-ctx.Done(): - return ctx.Err() - } - } -} - -// waitForUpdate waits until the global update count for the daemon changes to -// the expected value. Returns an error if the expected value is not found -// before the context is cancelled. -func waitForUpdate(timeout clock.Duration, d *guber.Daemon, expect int) error { - ctx, cancel := context.WithTimeout(context.Background(), timeout) - defer cancel() - - for { - m, err := getMetricRequest(fmt.Sprintf("http://%s/metrics", d.Config().HTTPListenAddress), - "gubernator_global_send_duration_count") - if err != nil { - return err - } - - // It's possible a broadcast occurred twice if waiting for multiple peer to - // forward updates to the owner. - if int(m.Value) >= expect { - return nil - } - - select { - case <-clock.After(time.Millisecond * 100): - case <-ctx.Done(): - return ctx.Err() - } - } -} - -func getMetricValue(t *testing.T, d *guber.Daemon, name string) float64 { - m, err := getMetricRequest(fmt.Sprintf("http://%s/metrics", d.Config().HTTPListenAddress), - name) - require.NoError(t, err) - if m == nil { - return 0 - } - return float64(m.Value) -} - -// Get metric counter values on each peer. -func getPeerCounters(t *testing.T, peers []*guber.Daemon, name string) map[string]int { - counters := make(map[string]int) - for _, peer := range peers { - counters[peer.InstanceID] = int(getMetricValue(t, peer, name)) - } - return counters -} - -func sendHit(t *testing.T, d *guber.Daemon, req *guber.RateLimitReq, expectStatus guber.Status, expectRemaining int64) { - if req.Hits != 0 { - t.Logf("Sending %d hits to peer %s", req.Hits, d.InstanceID) - } - client := d.MustClient() - ctx, cancel := context.WithTimeout(context.Background(), time.Second*10) - defer cancel() - resp, err := client.GetRateLimits(ctx, &guber.GetRateLimitsReq{ - Requests: []*guber.RateLimitReq{req}, - }) - require.NoError(t, err) - item := resp.Responses[0] - assert.Equal(t, "", item.Error) - if expectRemaining >= 0 { - assert.Equal(t, expectRemaining, item.Remaining) - } - assert.Equal(t, expectStatus, item.Status) - assert.Equal(t, req.Limit, item.Limit) -} - func TestGlobalBehavior(t *testing.T) { const limit = 1000 broadcastTimeout := 400 * time.Millisecond @@ -1972,6 +1576,8 @@ func TestGlobalBehavior(t *testing.T) { require.NoError(t, err) t.Logf("Owner peer: %s", owner.InstanceID) + require.NoError(t, waitForIdle(1*time.Minute, cluster.GetDaemons()...)) + broadcastCounters := getPeerCounters(t, cluster.GetDaemons(), "gubernator_broadcast_duration_count") updateCounters := getPeerCounters(t, cluster.GetDaemons(), "gubernator_global_send_duration_count") upgCounters := getPeerCounters(t, cluster.GetDaemons(), "gubernator_grpc_request_duration_count{method=\"/pb.gubernator.PeersV1/UpdatePeerGlobals\"}") @@ -2088,6 +1694,8 @@ func TestGlobalBehavior(t *testing.T) { require.NoError(t, err) t.Logf("Owner peer: %s", owner.InstanceID) + require.NoError(t, waitForIdle(1*clock.Minute, cluster.GetDaemons()...)) + broadcastCounters := getPeerCounters(t, cluster.GetDaemons(), "gubernator_broadcast_duration_count") updateCounters := getPeerCounters(t, cluster.GetDaemons(), "gubernator_global_send_duration_count") upgCounters := getPeerCounters(t, cluster.GetDaemons(), "gubernator_grpc_request_duration_count{method=\"/pb.gubernator.PeersV1/UpdatePeerGlobals\"}") @@ -2189,7 +1797,6 @@ func TestGlobalBehavior(t *testing.T) { } }) - // Distribute hits across all non-owner peers. t.Run("Distributed hits", func(t *testing.T) { testCases := []struct { Name string @@ -2216,6 +1823,8 @@ func TestGlobalBehavior(t *testing.T) { } t.Logf("Owner peer: %s", owner.InstanceID) + require.NoError(t, waitForIdle(1*clock.Minute, cluster.GetDaemons()...)) + broadcastCounters := getPeerCounters(t, cluster.GetDaemons(), "gubernator_broadcast_duration_count") updateCounters := getPeerCounters(t, cluster.GetDaemons(), "gubernator_global_send_duration_count") upgCounters := getPeerCounters(t, cluster.GetDaemons(), "gubernator_grpc_request_duration_count{method=\"/pb.gubernator.PeersV1/UpdatePeerGlobals\"}") @@ -2338,3 +1947,225 @@ func TestGlobalBehavior(t *testing.T) { } }) } + +// Request metrics and parse into map. +// Optionally pass names to filter metrics by name. +func getMetrics(HTTPAddr string, names ...string) (map[string]*model.Sample, error) { + url := fmt.Sprintf("http://%s/metrics", HTTPAddr) + resp, err := http.Get(url) + if err != nil { + return nil, err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("HTTP error requesting metrics: %s", resp.Status) + } + decoder := expfmt.SampleDecoder{ + Dec: expfmt.NewDecoder(resp.Body, expfmt.FmtText), + Opts: &expfmt.DecodeOptions{ + Timestamp: model.Now(), + }, + } + nameSet := make(map[string]struct{}) + for _, name := range names { + nameSet[name] = struct{}{} + } + metrics := make(map[string]*model.Sample) + + for { + var smpls model.Vector + err := decoder.Decode(&smpls) + if err == io.EOF { + break + } + if err != nil { + return nil, err + } + for _, smpl := range smpls { + name := smpl.Metric.String() + if _, ok := nameSet[name]; ok || len(nameSet) == 0 { + metrics[name] = smpl + } + } + } + + return metrics, nil +} + +func getMetricRequest(url string, name string) (*model.Sample, error) { + resp, err := http.Get(url) + if err != nil { + return nil, err + } + defer resp.Body.Close() + return getMetric(resp.Body, name) +} + +func getMetric(in io.Reader, name string) (*model.Sample, error) { + dec := expfmt.SampleDecoder{ + Dec: expfmt.NewDecoder(in, expfmt.FmtText), + Opts: &expfmt.DecodeOptions{ + Timestamp: model.Now(), + }, + } + + var all model.Vector + for { + var smpls model.Vector + err := dec.Decode(&smpls) + if err == io.EOF { + break + } + if err != nil { + return nil, err + } + all = append(all, smpls...) + } + + for _, s := range all { + if strings.Contains(s.Metric.String(), name) { + return s, nil + } + } + return nil, nil +} + +// waitForBroadcast waits until the broadcast count for the daemon changes to +// at least the expected value and the broadcast queue is empty. +// Returns an error if timeout waiting for conditions to be met. +func waitForBroadcast(timeout clock.Duration, d *guber.Daemon, expect int) error { + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + + for { + metrics, err := getMetrics(d.Config().HTTPListenAddress, + "gubernator_broadcast_duration_count", "gubernator_global_queue_length") + if err != nil { + return err + } + gbdc := metrics["gubernator_broadcast_duration_count"] + ggql := metrics["gubernator_global_queue_length"] + + // It's possible a broadcast occurred twice if waiting for multiple + // peers to forward updates to non-owners. + if int(gbdc.Value) >= expect && ggql.Value == 0 { + return nil + } + + select { + case <-clock.After(100 * clock.Millisecond): + case <-ctx.Done(): + return ctx.Err() + } + } +} + +// waitForUpdate waits until the global hits update count for the daemon +// changes to at least the expected value and the global update queue is empty. +// Returns an error if timeout waiting for conditions to be met. +func waitForUpdate(timeout clock.Duration, d *guber.Daemon, expect int) error { + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + + for { + metrics, err := getMetrics(d.Config().HTTPListenAddress, + "gubernator_global_send_duration_count", "gubernator_global_send_queue_length") + if err != nil { + return err + } + gsdc := metrics["gubernator_global_send_duration_count"] + gsql := metrics["gubernator_global_send_queue_length"] + + // It's possible a hit occurred twice if waiting for multiple peers to + // forward updates to the owner. + if int(gsdc.Value) >= expect && gsql.Value == 0 { + return nil + } + + select { + case <-clock.After(100 * clock.Millisecond): + case <-ctx.Done(): + return ctx.Err() + } + } +} + +// waitForIdle waits until both global broadcast and global hits queues are +// empty. +func waitForIdle(timeout clock.Duration, daemons ...*guber.Daemon) error { + var wg syncutil.WaitGroup + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + for _, d := range daemons { + wg.Run(func(raw any) error { + d := raw.(*guber.Daemon) + for { + metrics, err := getMetrics(d.Config().HTTPListenAddress, + "gubernator_global_queue_length", "gubernator_global_send_queue_length") + if err != nil { + return err + } + ggql := metrics["gubernator_global_queue_length"] + gsql := metrics["gubernator_global_send_queue_length"] + + if ggql.Value == 0 && gsql.Value == 0 { + return nil + } + + select { + case <-clock.After(100 * clock.Millisecond): + case <-ctx.Done(): + return ctx.Err() + } + } + }, d) + } + errs := wg.Wait() + if len(errs) > 0 { + return errs[0] + } + return nil +} + +func getMetricValue(t *testing.T, d *guber.Daemon, name string) float64 { + m, err := getMetricRequest(fmt.Sprintf("http://%s/metrics", d.Config().HTTPListenAddress), + name) + require.NoError(t, err) + if m == nil { + return 0 + } + return float64(m.Value) +} + +// Get metric counter values on each peer. +func getPeerCounters(t *testing.T, peers []*guber.Daemon, name string) map[string]int { + counters := make(map[string]int) + for _, peer := range peers { + counters[peer.InstanceID] = int(getMetricValue(t, peer, name)) + } + return counters +} + +func sendHit(t *testing.T, d *guber.Daemon, req *guber.RateLimitReq, expectStatus guber.Status, expectRemaining int64) { + if req.Hits != 0 { + t.Logf("Sending %d hits to peer %s", req.Hits, d.InstanceID) + } + client := d.MustClient() + ctx, cancel := context.WithTimeout(context.Background(), time.Second*10) + defer cancel() + resp, err := client.GetRateLimits(ctx, &guber.GetRateLimitsReq{ + Requests: []*guber.RateLimitReq{req}, + }) + require.NoError(t, err) + item := resp.Responses[0] + assert.Equal(t, "", item.Error) + if expectRemaining >= 0 { + assert.Equal(t, expectRemaining, item.Remaining) + } + assert.Equal(t, expectStatus, item.Status) + assert.Equal(t, req.Limit, item.Limit) +} + +func randomKey() string { + return fmt.Sprintf("%016x", rand.Int()) +} diff --git a/global.go b/global.go index 5af33301..47703f6e 100644 --- a/global.go +++ b/global.go @@ -18,7 +18,6 @@ package gubernator import ( "context" - "time" "github.com/mailgun/holster/v4/syncutil" "github.com/pkg/errors" @@ -29,28 +28,23 @@ import ( // globalManager manages async hit queue and updates peers in // the cluster periodically when a global rate limit we own updates. type globalManager struct { - hitsQueue chan *RateLimitReq - broadcastQueue chan broadcastItem - wg syncutil.WaitGroup - conf BehaviorConfig - log FieldLogger - instance *V1Instance // TODO circular import? V1Instance also holds a reference to globalManager - metricGlobalSendDuration prometheus.Summary - metricBroadcastDuration prometheus.Summary - metricBroadcastCounter *prometheus.CounterVec - metricGlobalQueueLength prometheus.Gauge -} - -type broadcastItem struct { - Request *RateLimitReq - RequestTime time.Time + hitsQueue chan *RateLimitReq + broadcastQueue chan *RateLimitReq + wg syncutil.WaitGroup + conf BehaviorConfig + log FieldLogger + instance *V1Instance // TODO circular import? V1Instance also holds a reference to globalManager + metricGlobalSendDuration prometheus.Summary + metricGlobalSendQueueLength prometheus.Gauge + metricBroadcastDuration prometheus.Summary + metricGlobalQueueLength prometheus.Gauge } func newGlobalManager(conf BehaviorConfig, instance *V1Instance) *globalManager { gm := globalManager{ log: instance.log, hitsQueue: make(chan *RateLimitReq, conf.GlobalBatchLimit), - broadcastQueue: make(chan broadcastItem, conf.GlobalBatchLimit), + broadcastQueue: make(chan *RateLimitReq, conf.GlobalBatchLimit), instance: instance, conf: conf, metricGlobalSendDuration: prometheus.NewSummary(prometheus.SummaryOpts{ @@ -58,15 +52,15 @@ func newGlobalManager(conf BehaviorConfig, instance *V1Instance) *globalManager Help: "The duration of GLOBAL async sends in seconds.", Objectives: map[float64]float64{0.5: 0.05, 0.99: 0.001}, }), + metricGlobalSendQueueLength: prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "gubernator_global_send_queue_length", + Help: "The count of requests queued up for global broadcast. This is only used for GetRateLimit requests using global behavior.", + }), metricBroadcastDuration: prometheus.NewSummary(prometheus.SummaryOpts{ Name: "gubernator_broadcast_duration", Help: "The duration of GLOBAL broadcasts to peers in seconds.", Objectives: map[float64]float64{0.5: 0.05, 0.99: 0.001}, }), - metricBroadcastCounter: prometheus.NewCounterVec(prometheus.CounterOpts{ - Name: "gubernator_broadcast_counter", - Help: "The count of broadcasts.", - }, []string{"condition"}), metricGlobalQueueLength: prometheus.NewGauge(prometheus.GaugeOpts{ Name: "gubernator_global_queue_length", Help: "The count of requests queued up for global broadcast. This is only used for GetRateLimit requests using global behavior.", @@ -83,12 +77,9 @@ func (gm *globalManager) QueueHit(r *RateLimitReq) { } } -func (gm *globalManager) QueueUpdate(req *RateLimitReq, requestTime time.Time) { +func (gm *globalManager) QueueUpdate(req *RateLimitReq) { if req.Hits != 0 { - gm.broadcastQueue <- broadcastItem{ - Request: req, - RequestTime: requestTime, - } + gm.broadcastQueue <- req } } @@ -118,11 +109,13 @@ func (gm *globalManager) runAsyncHits() { } else { hits[key] = r } + gm.metricGlobalSendQueueLength.Set(float64(len(hits))) // Send the hits if we reached our batch limit if len(hits) == gm.conf.GlobalBatchLimit { gm.sendHits(hits) hits = make(map[string]*RateLimitReq) + gm.metricGlobalSendQueueLength.Set(0) return true } @@ -136,6 +129,7 @@ func (gm *globalManager) runAsyncHits() { if len(hits) != 0 { gm.sendHits(hits) hits = make(map[string]*RateLimitReq) + gm.metricGlobalSendQueueLength.Set(0) } case <-done: interval.Stop() @@ -198,18 +192,19 @@ func (gm *globalManager) sendHits(hits map[string]*RateLimitReq) { // and in a periodic frequency determined by GlobalSyncWait. func (gm *globalManager) runBroadcasts() { var interval = NewInterval(gm.conf.GlobalSyncWait) - updates := make(map[string]broadcastItem) + updates := make(map[string]*RateLimitReq) gm.wg.Until(func(done chan struct{}) bool { select { case update := <-gm.broadcastQueue: - updates[update.Request.HashKey()] = update + updates[update.HashKey()] = update + gm.metricGlobalQueueLength.Set(float64(len(updates))) // Send the hits if we reached our batch limit if len(updates) >= gm.conf.GlobalBatchLimit { - gm.metricBroadcastCounter.WithLabelValues("queue_full").Inc() gm.broadcastPeers(context.Background(), updates) - updates = make(map[string]broadcastItem) + updates = make(map[string]*RateLimitReq) + gm.metricGlobalQueueLength.Set(0) return true } @@ -220,13 +215,13 @@ func (gm *globalManager) runBroadcasts() { } case <-interval.C: - if len(updates) != 0 { - gm.metricBroadcastCounter.WithLabelValues("timer").Inc() - gm.broadcastPeers(context.Background(), updates) - updates = make(map[string]broadcastItem) - } else { - gm.metricGlobalQueueLength.Set(0) + if len(updates) == 0 { + break } + gm.broadcastPeers(context.Background(), updates) + updates = make(map[string]*RateLimitReq) + gm.metricGlobalQueueLength.Set(0) + case <-done: interval.Stop() return false @@ -236,7 +231,7 @@ func (gm *globalManager) runBroadcasts() { } // broadcastPeers broadcasts global rate limit statuses to all other peers -func (gm *globalManager) broadcastPeers(ctx context.Context, updates map[string]broadcastItem) { +func (gm *globalManager) broadcastPeers(ctx context.Context, updates map[string]*RateLimitReq) { defer prometheus.NewTimer(gm.metricBroadcastDuration).ObserveDuration() var req UpdatePeerGlobalsReq @@ -244,19 +239,19 @@ func (gm *globalManager) broadcastPeers(ctx context.Context, updates map[string] for _, update := range updates { // Get current rate limit state. - grlReq := proto.Clone(update.Request).(*RateLimitReq) + grlReq := proto.Clone(update).(*RateLimitReq) grlReq.Hits = 0 - status, err := gm.instance.workerPool.GetRateLimit(ctx, grlReq, update.RequestTime) + status, err := gm.instance.workerPool.GetRateLimit(ctx, grlReq) if err != nil { gm.log.WithError(err).Error("while retrieving rate limit status") continue } updateReq := &UpdatePeerGlobal{ - Key: update.Request.HashKey(), - Algorithm: update.Request.Algorithm, - Duration: update.Request.Duration, + Key: update.HashKey(), + Algorithm: update.Algorithm, + Duration: update.Duration, Status: status, - RequestTime: EpochMillis(update.RequestTime), + RequestTime: *update.RequestTime, } req.Globals = append(req.Globals, updateReq) } diff --git a/gubernator.go b/gubernator.go index fda9f92a..87a0a04d 100644 --- a/gubernator.go +++ b/gubernator.go @@ -21,7 +21,6 @@ import ( "fmt" "strings" "sync" - "time" "github.com/mailgun/errors" "github.com/mailgun/holster/v4/clock" @@ -188,6 +187,7 @@ func (s *V1Instance) GetRateLimits(ctx context.Context, r *GetRateLimitsReq) (*G "Requests.RateLimits list too large; max size is '%d'", maxBatchSize) } + requestTime := EpochMillis(clock.Now()) resp := GetRateLimitsResp{ Responses: make([]*RateLimitResp, len(r.Requests)), } @@ -200,17 +200,19 @@ func (s *V1Instance) GetRateLimits(ctx context.Context, r *GetRateLimitsReq) (*G var peer *PeerClient var err error - if len(req.UniqueKey) == 0 { + if req.UniqueKey == "" { metricCheckErrorCounter.WithLabelValues("Invalid request").Inc() resp.Responses[i] = &RateLimitResp{Error: "field 'unique_key' cannot be empty"} continue } - - if len(req.Name) == 0 { + if req.Name == "" { metricCheckErrorCounter.WithLabelValues("Invalid request").Inc() resp.Responses[i] = &RateLimitResp{Error: "field 'namespace' cannot be empty"} continue } + if req.RequestTime == nil || *req.RequestTime == 0 { + req.RequestTime = &requestTime + } if ctx.Err() != nil { err = errors.Wrap(ctx.Err(), "Error while iterating request items") @@ -578,21 +580,14 @@ func (s *V1Instance) getLocalRateLimit(ctx context.Context, r *RateLimitReq) (_ defer func() { tracing.EndScope(ctx, err) }() defer prometheus.NewTimer(metricFuncTimeDuration.WithLabelValues("V1Instance.getLocalRateLimit")).ObserveDuration() - var requestTime time.Time - if r.RequestTime != nil { - requestTime = time.UnixMilli(*r.RequestTime) - } - if requestTime.IsZero() { - requestTime = clock.Now() - } - resp, err := s.workerPool.GetRateLimit(ctx, r, requestTime) + resp, err := s.workerPool.GetRateLimit(ctx, r) if err != nil { return nil, errors.Wrap(err, "during workerPool.GetRateLimit") } // If global behavior, then broadcast update to all peers. if HasBehavior(r.Behavior, Behavior_GLOBAL) { - s.global.QueueUpdate(r, requestTime) + s.global.QueueUpdate(r) } metricGetRateLimitCounter.WithLabelValues("local").Inc() @@ -736,10 +731,10 @@ func (s *V1Instance) Describe(ch chan<- *prometheus.Desc) { metricGetRateLimitCounter.Describe(ch) metricOverLimitCounter.Describe(ch) metricWorkerQueue.Describe(ch) - s.global.metricBroadcastCounter.Describe(ch) s.global.metricBroadcastDuration.Describe(ch) s.global.metricGlobalQueueLength.Describe(ch) s.global.metricGlobalSendDuration.Describe(ch) + s.global.metricGlobalSendQueueLength.Describe(ch) } // Collect fetches metrics from the server for use by prometheus @@ -754,10 +749,10 @@ func (s *V1Instance) Collect(ch chan<- prometheus.Metric) { metricGetRateLimitCounter.Collect(ch) metricOverLimitCounter.Collect(ch) metricWorkerQueue.Collect(ch) - s.global.metricBroadcastCounter.Collect(ch) s.global.metricBroadcastDuration.Collect(ch) s.global.metricGlobalQueueLength.Collect(ch) s.global.metricGlobalSendDuration.Collect(ch) + s.global.metricGlobalSendQueueLength.Collect(ch) } // HasBehavior returns true if the provided behavior is set diff --git a/peer_client.go b/peer_client.go index 2f3c0905..794ebea7 100644 --- a/peer_client.go +++ b/peer_client.go @@ -22,7 +22,6 @@ import ( "fmt" "sync" "sync/atomic" - "time" "github.com/mailgun/holster/v4/clock" "github.com/mailgun/holster/v4/collections" @@ -70,7 +69,6 @@ type request struct { request *RateLimitReq resp chan *response ctx context.Context - requestTime time.Time } type PeerConfig struct { diff --git a/workers.go b/workers.go index 04557f76..76fa1e31 100644 --- a/workers.go +++ b/workers.go @@ -42,7 +42,6 @@ import ( "strconv" "sync" "sync/atomic" - "time" "github.com/OneOfOne/xxhash" "github.com/mailgun/holster/v4/errors" @@ -200,7 +199,7 @@ func (p *WorkerPool) dispatch(worker *Worker) { } resp := new(response) - resp.rl, resp.err = worker.handleGetRateLimit(req.ctx, req.request, req.requestTime, worker.cache) + resp.rl, resp.err = worker.handleGetRateLimit(req.ctx, req.request, worker.cache) select { case req.resp <- resp: // Success. @@ -259,7 +258,7 @@ func (p *WorkerPool) dispatch(worker *Worker) { } // GetRateLimit sends a GetRateLimit request to worker pool. -func (p *WorkerPool) GetRateLimit(ctx context.Context, rlRequest *RateLimitReq, requestTime time.Time) (*RateLimitResp, error) { +func (p *WorkerPool) GetRateLimit(ctx context.Context, rlRequest *RateLimitReq) (*RateLimitResp, error) { // Delegate request to assigned channel based on request key. worker := p.getWorker(rlRequest.HashKey()) queueGauge := metricWorkerQueue.WithLabelValues("GetRateLimit", worker.name) @@ -269,7 +268,6 @@ func (p *WorkerPool) GetRateLimit(ctx context.Context, rlRequest *RateLimitReq, ctx: ctx, resp: make(chan *response, 1), request: rlRequest, - requestTime: requestTime, } // Send request. @@ -291,14 +289,14 @@ func (p *WorkerPool) GetRateLimit(ctx context.Context, rlRequest *RateLimitReq, } // Handle request received by worker. -func (worker *Worker) handleGetRateLimit(ctx context.Context, req *RateLimitReq, requestTime time.Time, cache Cache) (*RateLimitResp, error) { +func (worker *Worker) handleGetRateLimit(ctx context.Context, req *RateLimitReq, cache Cache) (*RateLimitResp, error) { defer prometheus.NewTimer(metricFuncTimeDuration.WithLabelValues("Worker.handleGetRateLimit")).ObserveDuration() var rlResponse *RateLimitResp var err error switch req.Algorithm { case Algorithm_TOKEN_BUCKET: - rlResponse, err = tokenBucket(ctx, worker.conf.Store, cache, req, requestTime) + rlResponse, err = tokenBucket(ctx, worker.conf.Store, cache, req) if err != nil { msg := "Error in tokenBucket" countError(err, msg) @@ -307,7 +305,7 @@ func (worker *Worker) handleGetRateLimit(ctx context.Context, req *RateLimitReq, } case Algorithm_LEAKY_BUCKET: - rlResponse, err = leakyBucket(ctx, worker.conf.Store, cache, req, requestTime) + rlResponse, err = leakyBucket(ctx, worker.conf.Store, cache, req) if err != nil { msg := "Error in leakyBucket" countError(err, msg) From 7c67a321658eb7f7d8d4d7d1f62de308d589c55c Mon Sep 17 00:00:00 2001 From: Shawn Poulson Date: Mon, 11 Mar 2024 11:22:41 -0400 Subject: [PATCH 07/23] Fix lint errors. --- functional_test.go | 2 +- peer_client.go | 6 +++--- workers.go | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/functional_test.go b/functional_test.go index e7b66ac1..516c3f8d 100644 --- a/functional_test.go +++ b/functional_test.go @@ -2108,7 +2108,7 @@ func waitForIdle(timeout clock.Duration, daemons ...*guber.Daemon) error { ggql := metrics["gubernator_global_queue_length"] gsql := metrics["gubernator_global_send_queue_length"] - if ggql.Value == 0 && gsql.Value == 0 { + if ggql.Value == 0 && gsql.Value == 0 { return nil } diff --git a/peer_client.go b/peer_client.go index 794ebea7..39c13c14 100644 --- a/peer_client.go +++ b/peer_client.go @@ -66,9 +66,9 @@ type response struct { } type request struct { - request *RateLimitReq - resp chan *response - ctx context.Context + request *RateLimitReq + resp chan *response + ctx context.Context } type PeerConfig struct { diff --git a/workers.go b/workers.go index 76fa1e31..f6ed60a9 100644 --- a/workers.go +++ b/workers.go @@ -265,9 +265,9 @@ func (p *WorkerPool) GetRateLimit(ctx context.Context, rlRequest *RateLimitReq) queueGauge.Inc() defer queueGauge.Dec() handlerRequest := request{ - ctx: ctx, - resp: make(chan *response, 1), - request: rlRequest, + ctx: ctx, + resp: make(chan *response, 1), + request: rlRequest, } // Send request. From 24bee89a222c3ab7690e50a294830e9315b09c57 Mon Sep 17 00:00:00 2001 From: Shawn Poulson Date: Mon, 11 Mar 2024 11:24:35 -0400 Subject: [PATCH 08/23] Tidy code. --- algorithms.go | 5 ----- gubernator.go | 5 +++++ 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/algorithms.go b/algorithms.go index 8d49bb35..7d452fc3 100644 --- a/algorithms.go +++ b/algorithms.go @@ -18,7 +18,6 @@ package gubernator import ( "context" - "time" "github.com/mailgun/holster/v4/clock" "github.com/prometheus/client_golang/prometheus" @@ -480,7 +479,3 @@ func leakyBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq) return &rl, nil } - -func EpochMillis(t time.Time) int64 { - return t.UnixNano() / 1_000_000 -} diff --git a/gubernator.go b/gubernator.go index 87a0a04d..9542e7da 100644 --- a/gubernator.go +++ b/gubernator.go @@ -21,6 +21,7 @@ import ( "fmt" "strings" "sync" + "time" "github.com/mailgun/errors" "github.com/mailgun/holster/v4/clock" @@ -793,3 +794,7 @@ func isDeadlineExceeded(err error) bool { } return errors.Is(err, context.DeadlineExceeded) } + +func EpochMillis(t time.Time) int64 { + return t.UnixNano() / 1_000_000 +} From ea424420dd4447d9a3772076da6acb1bc7a9ec2b Mon Sep 17 00:00:00 2001 From: Shawn Poulson Date: Mon, 11 Mar 2024 11:38:40 -0400 Subject: [PATCH 09/23] Add back tests that were erroneously removed. --- functional_test.go | 215 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 207 insertions(+), 8 deletions(-) diff --git a/functional_test.go b/functional_test.go index 516c3f8d..dfa12ee7 100644 --- a/functional_test.go +++ b/functional_test.go @@ -1159,6 +1159,205 @@ func TestGlobalRateLimitsPeerOverLimit(t *testing.T) { sendHit(guber.Status_OVER_LIMIT, 0, 0) } +func TestGlobalRequestMoreThanAvailable(t *testing.T) { + name := t.Name() + key := randomKey() + owner, err := cluster.FindOwningDaemon(name, key) + require.NoError(t, err) + peers, err := cluster.ListNonOwningDaemons(name, key) + require.NoError(t, err) + + sendHit := func(client guber.V1Client, expectedStatus guber.Status, hits int64, remaining int64) { + ctx, cancel := context.WithTimeout(context.Background(), clock.Second*10) + defer cancel() + resp, err := client.GetRateLimits(ctx, &guber.GetRateLimitsReq{ + Requests: []*guber.RateLimitReq{ + { + Name: name, + UniqueKey: key, + Algorithm: guber.Algorithm_LEAKY_BUCKET, + Behavior: guber.Behavior_GLOBAL, + Duration: guber.Minute * 1_000, + Hits: hits, + Limit: 100, + }, + }, + }) + assert.NoError(t, err) + assert.Equal(t, "", resp.Responses[0].GetError()) + assert.Equal(t, expectedStatus, resp.Responses[0].GetStatus()) + } + + require.NoError(t, waitForIdle(1*time.Minute, cluster.GetDaemons()...)) + prev := getMetricValue(t, owner, "gubernator_broadcast_duration_count") + + // Ensure GRPC has connections to each peer before we start, as we want + // the actual test requests to happen quite fast. + for _, p := range peers { + sendHit(p.MustClient(), guber.Status_UNDER_LIMIT, 0, 100) + } + + // Send a request for 50 hits from each non owning peer in the cluster. These requests + // will be queued and sent to the owner as accumulated hits. As a result of the async nature + // of `Behavior_GLOBAL` rate limit requests spread across peers like this will be allowed to + // over-consume their resource within the rate limit window until the owner is updated and + // a broadcast to all peers is received. + // + // The maximum number of resources that can be over-consumed can be calculated by multiplying + // the remainder by the number of peers in the cluster. For example: If you have a remainder of 100 + // and a cluster of 10 instances, then the maximum over-consumed resource is 1,000. If you need + // a more accurate remaining calculation, and wish to avoid over consuming a resource, then do + // not use `Behavior_GLOBAL`. + for _, p := range peers { + sendHit(p.MustClient(), guber.Status_UNDER_LIMIT, 50, 50) + } + + // Wait for the broadcast from the owner to the peers + require.NoError(t, waitForBroadcast(clock.Second*10, owner, prev+1)) + + // We should be over the limit + sendHit(peers[0].MustClient(), guber.Status_OVER_LIMIT, 1, 0) +} + +func TestGlobalNegativeHits(t *testing.T) { + name := t.Name() + key := randomKey() + owner, err := cluster.FindOwningDaemon(name, key) + require.NoError(t, err) + peers, err := cluster.ListNonOwningDaemons(name, key) + require.NoError(t, err) + + sendHit := func(client guber.V1Client, status guber.Status, hits int64, remaining int64) { + ctx, cancel := context.WithTimeout(context.Background(), clock.Second*10) + defer cancel() + resp, err := client.GetRateLimits(ctx, &guber.GetRateLimitsReq{ + Requests: []*guber.RateLimitReq{ + { + Name: name, + UniqueKey: key, + Algorithm: guber.Algorithm_TOKEN_BUCKET, + Behavior: guber.Behavior_GLOBAL, + Duration: guber.Minute * 100, + Hits: hits, + Limit: 2, + }, + }, + }) + assert.NoError(t, err) + assert.Equal(t, "", resp.Responses[0].GetError()) + assert.Equal(t, status, resp.Responses[0].GetStatus()) + assert.Equal(t, remaining, resp.Responses[0].Remaining) + } + + require.NoError(t, waitForIdle(1*time.Minute, cluster.GetDaemons()...)) + + prev := getMetricValue(t, owner, "gubernator_broadcast_duration_count") + require.NoError(t, err) + + // Send a negative hit on a rate limit with no hits + sendHit(peers[0].MustClient(), guber.Status_UNDER_LIMIT, -1, 3) + + // Wait for the negative remaining to propagate + require.NoError(t, waitForBroadcast(clock.Second*10, owner, prev+1)) + + // Send another negative hit to a different peer + sendHit(peers[1].MustClient(), guber.Status_UNDER_LIMIT, -1, 4) + + require.NoError(t, waitForBroadcast(clock.Second*10, owner, prev+2)) + + // Should have 4 in the remainder + sendHit(peers[2].MustClient(), guber.Status_UNDER_LIMIT, 4, 0) + + require.NoError(t, waitForBroadcast(clock.Second*10, owner, prev+3)) + + sendHit(peers[3].MustClient(), guber.Status_UNDER_LIMIT, 0, 0) +} + +func TestGlobalResetRemaining(t *testing.T) { + name := t.Name() + key := randomKey() + owner, err := cluster.FindOwningDaemon(name, key) + require.NoError(t, err) + peers, err := cluster.ListNonOwningDaemons(name, key) + require.NoError(t, err) + + sendHit := func(client guber.V1Client, expectedStatus guber.Status, hits int64, remaining int64) { + ctx, cancel := context.WithTimeout(context.Background(), clock.Second*10) + defer cancel() + resp, err := client.GetRateLimits(ctx, &guber.GetRateLimitsReq{ + Requests: []*guber.RateLimitReq{ + { + Name: name, + UniqueKey: key, + Algorithm: guber.Algorithm_LEAKY_BUCKET, + Behavior: guber.Behavior_GLOBAL, + Duration: guber.Minute * 1_000, + Hits: hits, + Limit: 100, + }, + }, + }) + assert.NoError(t, err) + assert.Equal(t, "", resp.Responses[0].GetError()) + assert.Equal(t, expectedStatus, resp.Responses[0].GetStatus()) + assert.Equal(t, remaining, resp.Responses[0].Remaining) + } + + require.NoError(t, waitForIdle(1*time.Minute, cluster.GetDaemons()...)) + + prev := getMetricValue(t, owner, "gubernator_broadcast_duration_count") + require.NoError(t, err) + + for _, p := range peers { + sendHit(p.MustClient(), guber.Status_UNDER_LIMIT, 50, 50) + } + + // Wait for the broadcast from the owner to the peers + require.NoError(t, waitForBroadcast(clock.Second*10, owner, prev+1)) + + // We should be over the limit and remaining should be zero + sendHit(peers[0].MustClient(), guber.Status_OVER_LIMIT, 1, 0) + + // Now reset the remaining + ctx, cancel := context.WithTimeout(context.Background(), clock.Second*10) + defer cancel() + resp, err := peers[0].MustClient().GetRateLimits(ctx, &guber.GetRateLimitsReq{ + Requests: []*guber.RateLimitReq{ + { + Name: name, + UniqueKey: key, + Algorithm: guber.Algorithm_LEAKY_BUCKET, + Behavior: guber.Behavior_GLOBAL | guber.Behavior_RESET_REMAINING, + Duration: guber.Minute * 1_000, + Hits: 0, + Limit: 100, + }, + }, + }) + require.NoError(t, err) + assert.NotEqual(t, 100, resp.Responses[0].Remaining) + + // Wait for the reset to propagate. + require.NoError(t, waitForBroadcast(clock.Second*10, owner, prev+2)) + + // Check a different peer to ensure remaining has been reset + resp, err = peers[1].MustClient().GetRateLimits(ctx, &guber.GetRateLimitsReq{ + Requests: []*guber.RateLimitReq{ + { + Name: name, + UniqueKey: key, + Algorithm: guber.Algorithm_LEAKY_BUCKET, + Behavior: guber.Behavior_GLOBAL, + Duration: guber.Minute * 1_000, + Hits: 0, + Limit: 100, + }, + }, + }) + require.NoError(t, err) + assert.NotEqual(t, 100, resp.Responses[0].Remaining) +} + func TestChangeLimit(t *testing.T) { client, errs := guber.DialV1Server(cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress, nil) require.Nil(t, errs) @@ -1931,7 +2130,7 @@ func TestGlobalBehavior(t *testing.T) { for _, peer := range cluster.GetDaemons() { expected := gprlCounters[peer.InstanceID] if peer.InstanceID == owner.InstanceID { - expected += len(expectUpdate) + expected += float64(len(expectUpdate)) } assert.Equal(t, expected, gprlCounters2[peer.InstanceID], "gprlCounter %s", peer.InstanceID) } @@ -2033,7 +2232,7 @@ func getMetric(in io.Reader, name string) (*model.Sample, error) { // waitForBroadcast waits until the broadcast count for the daemon changes to // at least the expected value and the broadcast queue is empty. // Returns an error if timeout waiting for conditions to be met. -func waitForBroadcast(timeout clock.Duration, d *guber.Daemon, expect int) error { +func waitForBroadcast(timeout clock.Duration, d *guber.Daemon, expect float64) error { ctx, cancel := context.WithTimeout(context.Background(), timeout) defer cancel() @@ -2048,7 +2247,7 @@ func waitForBroadcast(timeout clock.Duration, d *guber.Daemon, expect int) error // It's possible a broadcast occurred twice if waiting for multiple // peers to forward updates to non-owners. - if int(gbdc.Value) >= expect && ggql.Value == 0 { + if float64(gbdc.Value) >= expect && ggql.Value == 0 { return nil } @@ -2063,7 +2262,7 @@ func waitForBroadcast(timeout clock.Duration, d *guber.Daemon, expect int) error // waitForUpdate waits until the global hits update count for the daemon // changes to at least the expected value and the global update queue is empty. // Returns an error if timeout waiting for conditions to be met. -func waitForUpdate(timeout clock.Duration, d *guber.Daemon, expect int) error { +func waitForUpdate(timeout clock.Duration, d *guber.Daemon, expect float64) error { ctx, cancel := context.WithTimeout(context.Background(), timeout) defer cancel() @@ -2078,7 +2277,7 @@ func waitForUpdate(timeout clock.Duration, d *guber.Daemon, expect int) error { // It's possible a hit occurred twice if waiting for multiple peers to // forward updates to the owner. - if int(gsdc.Value) >= expect && gsql.Value == 0 { + if float64(gsdc.Value) >= expect && gsql.Value == 0 { return nil } @@ -2138,10 +2337,10 @@ func getMetricValue(t *testing.T, d *guber.Daemon, name string) float64 { } // Get metric counter values on each peer. -func getPeerCounters(t *testing.T, peers []*guber.Daemon, name string) map[string]int { - counters := make(map[string]int) +func getPeerCounters(t *testing.T, peers []*guber.Daemon, name string) map[string]float64 { + counters := make(map[string]float64) for _, peer := range peers { - counters[peer.InstanceID] = int(getMetricValue(t, peer, name)) + counters[peer.InstanceID] = getMetricValue(t, peer, name) } return counters } From bd38ee691ca5687a1666c5aba9bf270953a11204 Mon Sep 17 00:00:00 2001 From: Shawn Poulson Date: Mon, 11 Mar 2024 12:09:47 -0400 Subject: [PATCH 10/23] Fix tests. --- functional_test.go | 72 ++++++++++++++++++++++++++++++--------------- gubernator.go | 4 +-- peer_client_test.go | 8 +++-- 3 files changed, 56 insertions(+), 28 deletions(-) diff --git a/functional_test.go b/functional_test.go index dfa12ee7..348c3199 100644 --- a/functional_test.go +++ b/functional_test.go @@ -1526,6 +1526,8 @@ func TestResetRemaining(t *testing.T) { } func TestHealthCheck(t *testing.T) { + name := t.Name() + key := randomKey() client, err := guber.DialV1Server(cluster.DaemonAt(0).GRPCListeners[0].Addr().String(), nil) require.NoError(t, err) @@ -1539,8 +1541,8 @@ func TestHealthCheck(t *testing.T) { _, err = client.GetRateLimits(context.Background(), &guber.GetRateLimitsReq{ Requests: []*guber.RateLimitReq{ { - Name: "test_health_check", - UniqueKey: "account:12345", + Name: name, + UniqueKey: key, Algorithm: guber.Algorithm_TOKEN_BUCKET, Behavior: guber.Behavior_BATCHING, Duration: guber.Second * 3, @@ -1589,19 +1591,32 @@ func TestHealthCheck(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), clock.Second*15) defer cancel() require.NoError(t, cluster.Restart(ctx)) + + // wait for every peer instance to come back online + numPeers := int32(len(cluster.GetPeers())) + for _, peer := range cluster.GetPeers() { + peerClient, err := guber.DialV1Server(peer.GRPCAddress, nil) + require.NoError(t, err) + testutil.UntilPass(t, 10, 300*clock.Millisecond, func(t testutil.TestingT) { + healthResp, err = peerClient.HealthCheck(context.Background(), &guber.HealthCheckReq{}) + assert.Equal(t, "healthy", healthResp.GetStatus()) + assert.Equal(t, numPeers, healthResp.PeerCount) + }) + } } func TestLeakyBucketDivBug(t *testing.T) { defer clock.Freeze(clock.Now()).Unfreeze() - + name := t.Name() + key := randomKey() client, err := guber.DialV1Server(cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress, nil) require.NoError(t, err) resp, err := client.GetRateLimits(context.Background(), &guber.GetRateLimitsReq{ Requests: []*guber.RateLimitReq{ { - Name: "test_leaky_bucket_div", - UniqueKey: "account:12345", + Name: name, + UniqueKey: key, Algorithm: guber.Algorithm_LEAKY_BUCKET, Duration: guber.Millisecond * 1000, Hits: 1, @@ -1619,8 +1634,8 @@ func TestLeakyBucketDivBug(t *testing.T) { resp, err = client.GetRateLimits(context.Background(), &guber.GetRateLimitsReq{ Requests: []*guber.RateLimitReq{ { - Name: "test_leaky_bucket_div", - UniqueKey: "account:12345", + Name: name, + UniqueKey: key, Algorithm: guber.Algorithm_LEAKY_BUCKET, Duration: guber.Millisecond * 1000, Hits: 100, @@ -1644,6 +1659,8 @@ func TestMultiRegion(t *testing.T) { } func TestGRPCGateway(t *testing.T) { + name := t.Name() + key := randomKey() address := cluster.GetRandomPeer(cluster.DataCenterNone).HTTPAddress resp, err := http.DefaultClient.Get("http://" + address + "/v1/HealthCheck") require.NoError(t, err) @@ -1663,8 +1680,8 @@ func TestGRPCGateway(t *testing.T) { payload, err := json.Marshal(&guber.GetRateLimitsReq{ Requests: []*guber.RateLimitReq{ { - Name: "requests_per_sec", - UniqueKey: "account:12345", + Name: name, + UniqueKey: key, Duration: guber.Millisecond * 1000, Hits: 1, Limit: 10, @@ -1692,6 +1709,7 @@ func TestGRPCGateway(t *testing.T) { } func TestGetPeerRateLimits(t *testing.T) { + name := t.Name() ctx := context.Background() peerClient, err := guber.NewPeerClient(guber.PeerConfig{ Info: cluster.GetRandomPeer(cluster.DataCenterNone), @@ -1701,6 +1719,7 @@ func TestGetPeerRateLimits(t *testing.T) { t.Run("Stable rate check request order", func(t *testing.T) { // Ensure response order matches rate check request order. // Try various batch sizes. + requestTime := epochMillis(clock.Now()) testCases := []int{1, 2, 5, 10, 100, 1000} for _, n := range testCases { @@ -1711,13 +1730,14 @@ func TestGetPeerRateLimits(t *testing.T) { } for i := 0; i < n; i++ { req.Requests[i] = &guber.RateLimitReq{ - Name: "Foobar", - UniqueKey: fmt.Sprintf("%08x", i), - Hits: 0, - Limit: 1000 + int64(i), - Duration: 1000, - Algorithm: guber.Algorithm_TOKEN_BUCKET, - Behavior: guber.Behavior_BATCHING, + Name: name, + UniqueKey: randomKey(), + Hits: 0, + Limit: 1000 + int64(i), + Duration: 1000, + Algorithm: guber.Algorithm_TOKEN_BUCKET, + Behavior: guber.Behavior_BATCHING, + RequestTime: &requestTime, } } @@ -1743,16 +1763,18 @@ func TestGetPeerRateLimits(t *testing.T) { func TestGlobalBehavior(t *testing.T) { const limit = 1000 broadcastTimeout := 400 * time.Millisecond + requestTime := epochMillis(clock.Now()) makeReq := func(name, key string, hits int64) *guber.RateLimitReq { return &guber.RateLimitReq{ - Name: name, - UniqueKey: key, - Algorithm: guber.Algorithm_TOKEN_BUCKET, - Behavior: guber.Behavior_GLOBAL, - Duration: guber.Minute * 3, - Hits: hits, - Limit: limit, + Name: name, + UniqueKey: key, + Algorithm: guber.Algorithm_TOKEN_BUCKET, + Behavior: guber.Behavior_GLOBAL, + Duration: guber.Minute * 3, + Hits: hits, + Limit: limit, + RequestTime: &requestTime, } } @@ -2368,3 +2390,7 @@ func sendHit(t *testing.T, d *guber.Daemon, req *guber.RateLimitReq, expectStatu func randomKey() string { return fmt.Sprintf("%016x", rand.Int()) } + +func epochMillis(t time.Time) int64 { + return t.UnixNano() / 1_000_000 +} diff --git a/gubernator.go b/gubernator.go index 9542e7da..e931c41c 100644 --- a/gubernator.go +++ b/gubernator.go @@ -188,7 +188,7 @@ func (s *V1Instance) GetRateLimits(ctx context.Context, r *GetRateLimitsReq) (*G "Requests.RateLimits list too large; max size is '%d'", maxBatchSize) } - requestTime := EpochMillis(clock.Now()) + requestTime := epochMillis(clock.Now()) resp := GetRateLimitsResp{ Responses: make([]*RateLimitResp, len(r.Requests)), } @@ -795,6 +795,6 @@ func isDeadlineExceeded(err error) bool { return errors.Is(err, context.DeadlineExceeded) } -func EpochMillis(t time.Time) int64 { +func epochMillis(t time.Time) int64 { return t.UnixNano() / 1_000_000 } diff --git a/peer_client_test.go b/peer_client_test.go index d739f40a..926eb2da 100644 --- a/peer_client_test.go +++ b/peer_client_test.go @@ -37,6 +37,7 @@ func TestPeerClientShutdown(t *testing.T) { } const threads = 10 + requestTime := epochMillis(clock.Now()) cases := []test{ {"No batching", gubernator.Behavior_NO_BATCHING}, @@ -71,9 +72,10 @@ func TestPeerClientShutdown(t *testing.T) { wg.Go(func() error { ctx := context.Background() _, err := client.GetPeerRateLimit(ctx, &gubernator.RateLimitReq{ - Hits: 1, - Limit: 100, - Behavior: c.Behavior, + Hits: 1, + Limit: 100, + Behavior: c.Behavior, + RequestTime: &requestTime, }) if err != nil { From 65ee4fa143772ab24d2c4be007d002c615731048 Mon Sep 17 00:00:00 2001 From: Shawn Poulson Date: Mon, 11 Mar 2024 12:45:30 -0400 Subject: [PATCH 11/23] Fix benchmark test errors. --- Makefile | 2 +- benchmark_test.go | 35 ++++++++++++++++------------------- functional_test.go | 33 ++++++++++++++------------------- 3 files changed, 31 insertions(+), 39 deletions(-) diff --git a/Makefile b/Makefile index 3bdd5c12..5baa4d74 100644 --- a/Makefile +++ b/Makefile @@ -24,7 +24,7 @@ test: ## Run unit tests and measure code coverage .PHONY: bench bench: ## Run Go benchmarks - go test ./... -bench . -benchtime 5s -timeout 0 -run=XXX -benchmem + go test ./... -bench . -benchtime 5s -timeout 0 -run='^$$' -benchmem .PHONY: docker docker: ## Build Docker image diff --git a/benchmark_test.go b/benchmark_test.go index 5a383761..20323dcd 100644 --- a/benchmark_test.go +++ b/benchmark_test.go @@ -22,6 +22,7 @@ import ( guber "github.com/mailgun/gubernator/v2" "github.com/mailgun/gubernator/v2/cluster" + "github.com/mailgun/holster/v4/clock" "github.com/mailgun/holster/v4/syncutil" "github.com/stretchr/testify/require" ) @@ -31,6 +32,7 @@ func BenchmarkServer(b *testing.B) { conf := guber.Config{} err := conf.SetDefaults() require.NoError(b, err, "Error in conf.SetDefaults") + requestTime := epochMillis(clock.Now()) b.Run("GetPeerRateLimit() with no batching", func(b *testing.B) { client, err := guber.NewPeerClient(guber.PeerConfig{ @@ -40,17 +42,17 @@ func BenchmarkServer(b *testing.B) { if err != nil { b.Errorf("Error building client: %s", err) } - b.ResetTimer() for n := 0; n < b.N; n++ { - _, err := client.GetPeerRateLimit(context.Background(), &guber.RateLimitReq{ - Name: "get_peer_rate_limits_benchmark", - UniqueKey: guber.RandomString(10), - Behavior: guber.Behavior_NO_BATCHING, - Limit: 10, - Duration: 5, - Hits: 1, + _, err := client.GetPeerRateLimit(ctx, &guber.RateLimitReq{ + Name: b.Name(), + UniqueKey: guber.RandomString(10), + Behavior: guber.Behavior_NO_BATCHING, + Limit: 10, + Duration: 5, + Hits: 1, + RequestTime: &requestTime, }) if err != nil { b.Errorf("Error in client.GetPeerRateLimit: %s", err) @@ -61,14 +63,13 @@ func BenchmarkServer(b *testing.B) { b.Run("GetRateLimit()", func(b *testing.B) { client, err := guber.DialV1Server(cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress, nil) require.NoError(b, err, "Error in guber.DialV1Server") - b.ResetTimer() for n := 0; n < b.N; n++ { _, err := client.GetRateLimits(ctx, &guber.GetRateLimitsReq{ Requests: []*guber.RateLimitReq{ { - Name: "get_rate_limit_benchmark", + Name: b.Name(), UniqueKey: guber.RandomString(10), Limit: 10, Duration: guber.Second * 5, @@ -85,14 +86,13 @@ func BenchmarkServer(b *testing.B) { b.Run("GetRateLimitGlobal()", func(b *testing.B) { client, err := guber.DialV1Server(cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress, nil) require.NoError(b, err, "Error in guber.DialV1Server") - b.ResetTimer() for n := 0; n < b.N; n++ { - _, err := client.GetRateLimits(context.Background(), &guber.GetRateLimitsReq{ + _, err := client.GetRateLimits(ctx, &guber.GetRateLimitsReq{ Requests: []*guber.RateLimitReq{ { - Name: "get_rate_limit_benchmark", + Name: b.Name(), UniqueKey: guber.RandomString(10), Behavior: guber.Behavior_GLOBAL, Limit: 10, @@ -110,11 +110,10 @@ func BenchmarkServer(b *testing.B) { b.Run("HealthCheck", func(b *testing.B) { client, err := guber.DialV1Server(cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress, nil) require.NoError(b, err, "Error in guber.DialV1Server") - b.ResetTimer() for n := 0; n < b.N; n++ { - if _, err := client.HealthCheck(context.Background(), &guber.HealthCheckReq{}); err != nil { + if _, err := client.HealthCheck(ctx, &guber.HealthCheckReq{}); err != nil { b.Errorf("Error in client.HealthCheck: %s", err) } } @@ -123,17 +122,15 @@ func BenchmarkServer(b *testing.B) { b.Run("Thundering herd", func(b *testing.B) { client, err := guber.DialV1Server(cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress, nil) require.NoError(b, err, "Error in guber.DialV1Server") - b.ResetTimer() - fan := syncutil.NewFanOut(100) for n := 0; n < b.N; n++ { fan.Run(func(o interface{}) error { - _, err := client.GetRateLimits(context.Background(), &guber.GetRateLimitsReq{ + _, err := client.GetRateLimits(ctx, &guber.GetRateLimitsReq{ Requests: []*guber.RateLimitReq{ { - Name: "get_rate_limit_benchmark", + Name: b.Name(), UniqueKey: guber.RandomString(10), Limit: 10, Duration: guber.Second * 5, diff --git a/functional_test.go b/functional_test.go index 348c3199..d4988cc4 100644 --- a/functional_test.go +++ b/functional_test.go @@ -975,12 +975,12 @@ func TestMissingFields(t *testing.T) { func TestGlobalRateLimits(t *testing.T) { name := t.Name() - key := randomKey() + key := guber.RandomString(10) owner, err := cluster.FindOwningDaemon(name, key) require.NoError(t, err) peers, err := cluster.ListNonOwningDaemons(name, key) require.NoError(t, err) - var resetTime int64 + var firstResetTime int64 sendHit := func(client guber.V1Client, status guber.Status, hits, remain int64) { ctx, cancel := context.WithTimeout(context.Background(), clock.Second*10) @@ -1006,10 +1006,10 @@ func TestGlobalRateLimits(t *testing.T) { assert.Equal(t, int64(5), item.Limit) // ResetTime should not change during test. - if resetTime == 0 { - resetTime = item.ResetTime + if firstResetTime == 0 { + firstResetTime = item.ResetTime } - assert.Equal(t, resetTime, item.ResetTime) + assert.Equal(t, firstResetTime, item.ResetTime) // ensure that we have a canonical host assert.NotEmpty(t, item.Metadata["owner"]) @@ -1051,12 +1051,11 @@ func TestGlobalRateLimits(t *testing.T) { func TestGlobalRateLimitsWithLoadBalancing(t *testing.T) { ctx := context.Background() name := t.Name() - key := randomKey() + key := guber.RandomString(10) // Determine owner and non-owner peers. owner, err := cluster.FindOwningDaemon(name, key) require.NoError(t, err) - // ownerAddr := owner.ownerPeerInfo.GRPCAddress peers, err := cluster.ListNonOwningDaemons(name, key) require.NoError(t, err) nonOwner := peers[0] @@ -1110,7 +1109,7 @@ func TestGlobalRateLimitsWithLoadBalancing(t *testing.T) { func TestGlobalRateLimitsPeerOverLimit(t *testing.T) { name := t.Name() - key := randomKey() + key := guber.RandomString(10) owner, err := cluster.FindOwningDaemon(name, key) require.NoError(t, err) peers, err := cluster.ListNonOwningDaemons(name, key) @@ -1161,7 +1160,7 @@ func TestGlobalRateLimitsPeerOverLimit(t *testing.T) { func TestGlobalRequestMoreThanAvailable(t *testing.T) { name := t.Name() - key := randomKey() + key := guber.RandomString(10) owner, err := cluster.FindOwningDaemon(name, key) require.NoError(t, err) peers, err := cluster.ListNonOwningDaemons(name, key) @@ -1221,7 +1220,7 @@ func TestGlobalRequestMoreThanAvailable(t *testing.T) { func TestGlobalNegativeHits(t *testing.T) { name := t.Name() - key := randomKey() + key := guber.RandomString(10) owner, err := cluster.FindOwningDaemon(name, key) require.NoError(t, err) peers, err := cluster.ListNonOwningDaemons(name, key) @@ -1275,7 +1274,7 @@ func TestGlobalNegativeHits(t *testing.T) { func TestGlobalResetRemaining(t *testing.T) { name := t.Name() - key := randomKey() + key := guber.RandomString(10) owner, err := cluster.FindOwningDaemon(name, key) require.NoError(t, err) peers, err := cluster.ListNonOwningDaemons(name, key) @@ -1527,7 +1526,7 @@ func TestResetRemaining(t *testing.T) { func TestHealthCheck(t *testing.T) { name := t.Name() - key := randomKey() + key := guber.RandomString(10) client, err := guber.DialV1Server(cluster.DaemonAt(0).GRPCListeners[0].Addr().String(), nil) require.NoError(t, err) @@ -1608,7 +1607,7 @@ func TestHealthCheck(t *testing.T) { func TestLeakyBucketDivBug(t *testing.T) { defer clock.Freeze(clock.Now()).Unfreeze() name := t.Name() - key := randomKey() + key := guber.RandomString(10) client, err := guber.DialV1Server(cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress, nil) require.NoError(t, err) @@ -1660,7 +1659,7 @@ func TestMultiRegion(t *testing.T) { func TestGRPCGateway(t *testing.T) { name := t.Name() - key := randomKey() + key := guber.RandomString(10) address := cluster.GetRandomPeer(cluster.DataCenterNone).HTTPAddress resp, err := http.DefaultClient.Get("http://" + address + "/v1/HealthCheck") require.NoError(t, err) @@ -1731,7 +1730,7 @@ func TestGetPeerRateLimits(t *testing.T) { for i := 0; i < n; i++ { req.Requests[i] = &guber.RateLimitReq{ Name: name, - UniqueKey: randomKey(), + UniqueKey: guber.RandomString(10), Hits: 0, Limit: 1000 + int64(i), Duration: 1000, @@ -2387,10 +2386,6 @@ func sendHit(t *testing.T, d *guber.Daemon, req *guber.RateLimitReq, expectStatu assert.Equal(t, req.Limit, item.Limit) } -func randomKey() string { - return fmt.Sprintf("%016x", rand.Int()) -} - func epochMillis(t time.Time) int64 { return t.UnixNano() / 1_000_000 } From d5c74d25ba611ca78c08e9a7b77ef019d20e9a7f Mon Sep 17 00:00:00 2001 From: Shawn Poulson Date: Mon, 11 Mar 2024 12:56:29 -0400 Subject: [PATCH 12/23] Fix TestHealthCheck. --- functional_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/functional_test.go b/functional_test.go index d4988cc4..c3afc5b2 100644 --- a/functional_test.go +++ b/functional_test.go @@ -1563,8 +1563,8 @@ func TestHealthCheck(t *testing.T) { _, err = client.GetRateLimits(context.Background(), &guber.GetRateLimitsReq{ Requests: []*guber.RateLimitReq{ { - Name: "test_health_check", - UniqueKey: "account:12345", + Name: name, + UniqueKey: key, Algorithm: guber.Algorithm_TOKEN_BUCKET, Behavior: guber.Behavior_GLOBAL, Duration: guber.Second * 3, @@ -1575,7 +1575,7 @@ func TestHealthCheck(t *testing.T) { }) require.Nil(t, err) - testutil.UntilPass(t, 20, clock.Millisecond*300, func(t testutil.TestingT) { + testutil.UntilPass(t, 20, 300*clock.Millisecond, func(t testutil.TestingT) { // Check the health again to get back the connection error healthResp, err = client.HealthCheck(context.Background(), &guber.HealthCheckReq{}) if assert.Nil(t, err) { From cd7bbab89e71d58ad6341940a1b0a060954f376f Mon Sep 17 00:00:00 2001 From: Shawn Poulson Date: Mon, 11 Mar 2024 14:09:36 -0400 Subject: [PATCH 13/23] Fix flaky test `TestHealthCheck`. --- functional_test.go | 204 ++++++++++++++++++--------------------------- 1 file changed, 81 insertions(+), 123 deletions(-) diff --git a/functional_test.go b/functional_test.go index c3afc5b2..388cdf96 100644 --- a/functional_test.go +++ b/functional_test.go @@ -31,6 +31,7 @@ import ( "testing" "time" + "github.com/mailgun/errors" guber "github.com/mailgun/gubernator/v2" "github.com/mailgun/gubernator/v2/cluster" "github.com/mailgun/holster/v4/clock" @@ -48,30 +49,12 @@ import ( // Setup and shutdown the mock gubernator cluster for the entire test suite func TestMain(m *testing.M) { - if err := cluster.StartWith([]guber.PeerInfo{ - {GRPCAddress: "127.0.0.1:9990", HTTPAddress: "127.0.0.1:9980", DataCenter: cluster.DataCenterNone}, - {GRPCAddress: "127.0.0.1:9991", HTTPAddress: "127.0.0.1:9981", DataCenter: cluster.DataCenterNone}, - {GRPCAddress: "127.0.0.1:9992", HTTPAddress: "127.0.0.1:9982", DataCenter: cluster.DataCenterNone}, - {GRPCAddress: "127.0.0.1:9993", HTTPAddress: "127.0.0.1:9983", DataCenter: cluster.DataCenterNone}, - {GRPCAddress: "127.0.0.1:9994", HTTPAddress: "127.0.0.1:9984", DataCenter: cluster.DataCenterNone}, - {GRPCAddress: "127.0.0.1:9995", HTTPAddress: "127.0.0.1:9985", DataCenter: cluster.DataCenterNone}, - - // DataCenterOne - {GRPCAddress: "127.0.0.1:9890", HTTPAddress: "127.0.0.1:9880", DataCenter: cluster.DataCenterOne}, - {GRPCAddress: "127.0.0.1:9891", HTTPAddress: "127.0.0.1:9881", DataCenter: cluster.DataCenterOne}, - {GRPCAddress: "127.0.0.1:9892", HTTPAddress: "127.0.0.1:9882", DataCenter: cluster.DataCenterOne}, - {GRPCAddress: "127.0.0.1:9893", HTTPAddress: "127.0.0.1:9883", DataCenter: cluster.DataCenterOne}, - }); err != nil { + err := startGubernator() + if err != nil { fmt.Println(err) os.Exit(1) } - // Populate peer clients. Avoids data races when goroutines conflict trying - // to instantiate client singletons. - for _, peer := range cluster.GetDaemons() { - _ = peer.MustClient() - } - code := m.Run() cluster.Stop() @@ -80,8 +63,8 @@ func TestMain(m *testing.M) { } func TestOverTheLimit(t *testing.T) { - client, errs := guber.DialV1Server(cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress, nil) - require.Nil(t, errs) + client, err := guber.DialV1Server(cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress, nil) + require.NoError(t, err) tests := []struct { Remaining int64 @@ -115,7 +98,7 @@ func TestOverTheLimit(t *testing.T) { }, }, }) - require.Nil(t, err) + require.NoError(t, err) rl := resp.Responses[0] @@ -135,7 +118,7 @@ func TestMultipleAsync(t *testing.T) { t.Logf("Asking Peer: %s", cluster.GetPeers()[0].GRPCAddress) client, errs := guber.DialV1Server(cluster.GetPeers()[0].GRPCAddress, nil) - require.Nil(t, errs) + require.NoError(t, errs) resp, err := client.GetRateLimits(context.Background(), &guber.GetRateLimitsReq{ Requests: []*guber.RateLimitReq{ @@ -159,7 +142,7 @@ func TestMultipleAsync(t *testing.T) { }, }, }) - require.Nil(t, err) + require.NoError(t, err) require.Len(t, resp.Responses, 2) @@ -178,8 +161,8 @@ func TestTokenBucket(t *testing.T) { defer clock.Freeze(clock.Now()).Unfreeze() addr := cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress - client, errs := guber.DialV1Server(addr, nil) - require.Nil(t, errs) + client, err := guber.DialV1Server(addr, nil) + require.NoError(t, err) tests := []struct { name string @@ -221,7 +204,7 @@ func TestTokenBucket(t *testing.T) { }, }, }) - require.Nil(t, err) + require.NoError(t, err) rl := resp.Responses[0] @@ -238,8 +221,8 @@ func TestTokenBucket(t *testing.T) { func TestTokenBucketGregorian(t *testing.T) { defer clock.Freeze(clock.Now()).Unfreeze() - client, errs := guber.DialV1Server(cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress, nil) - require.Nil(t, errs) + client, err := guber.DialV1Server(cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress, nil) + require.NoError(t, err) tests := []struct { Name string @@ -296,7 +279,7 @@ func TestTokenBucketGregorian(t *testing.T) { }, }, }) - require.Nil(t, err) + require.NoError(t, err) rl := resp.Responses[0] @@ -314,8 +297,8 @@ func TestTokenBucketNegativeHits(t *testing.T) { defer clock.Freeze(clock.Now()).Unfreeze() addr := cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress - client, errs := guber.DialV1Server(addr, nil) - require.Nil(t, errs) + client, err := guber.DialV1Server(addr, nil) + require.NoError(t, err) tests := []struct { name string @@ -368,7 +351,7 @@ func TestTokenBucketNegativeHits(t *testing.T) { }, }, }) - require.Nil(t, err) + require.NoError(t, err) rl := resp.Responses[0] @@ -384,8 +367,8 @@ func TestTokenBucketNegativeHits(t *testing.T) { func TestDrainOverLimit(t *testing.T) { defer clock.Freeze(clock.Now()).Unfreeze() - client, errs := guber.DialV1Server(cluster.PeerAt(0).GRPCAddress, nil) - require.Nil(t, errs) + client, err := guber.DialV1Server(cluster.PeerAt(0).GRPCAddress, nil) + require.NoError(t, err) tests := []struct { Name string @@ -494,8 +477,8 @@ func TestTokenBucketRequestMoreThanAvailable(t *testing.T) { func TestLeakyBucket(t *testing.T) { defer clock.Freeze(clock.Now()).Unfreeze() - client, errs := guber.DialV1Server(cluster.PeerAt(0).GRPCAddress, nil) - require.Nil(t, errs) + client, err := guber.DialV1Server(cluster.PeerAt(0).GRPCAddress, nil) + require.NoError(t, err) tests := []struct { Name string @@ -621,8 +604,8 @@ func TestLeakyBucket(t *testing.T) { func TestLeakyBucketWithBurst(t *testing.T) { defer clock.Freeze(clock.Now()).Unfreeze() - client, errs := guber.DialV1Server(cluster.PeerAt(0).GRPCAddress, nil) - require.Nil(t, errs) + client, err := guber.DialV1Server(cluster.PeerAt(0).GRPCAddress, nil) + require.NoError(t, err) tests := []struct { Name string @@ -728,8 +711,8 @@ func TestLeakyBucketWithBurst(t *testing.T) { func TestLeakyBucketGregorian(t *testing.T) { defer clock.Freeze(clock.Now()).Unfreeze() - client, errs := guber.DialV1Server(cluster.PeerAt(0).GRPCAddress, nil) - require.Nil(t, errs) + client, err := guber.DialV1Server(cluster.PeerAt(0).GRPCAddress, nil) + require.NoError(t, err) tests := []struct { Name string @@ -798,8 +781,8 @@ func TestLeakyBucketGregorian(t *testing.T) { func TestLeakyBucketNegativeHits(t *testing.T) { defer clock.Freeze(clock.Now()).Unfreeze() - client, errs := guber.DialV1Server(cluster.PeerAt(0).GRPCAddress, nil) - require.Nil(t, errs) + client, err := guber.DialV1Server(cluster.PeerAt(0).GRPCAddress, nil) + require.NoError(t, err) tests := []struct { Name string @@ -911,8 +894,8 @@ func TestLeakyBucketRequestMoreThanAvailable(t *testing.T) { } func TestMissingFields(t *testing.T) { - client, errs := guber.DialV1Server(cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress, nil) - require.Nil(t, errs) + client, err := guber.DialV1Server(cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress, nil) + require.NoError(t, err) tests := []struct { Req *guber.RateLimitReq @@ -967,7 +950,7 @@ func TestMissingFields(t *testing.T) { resp, err := client.GetRateLimits(context.Background(), &guber.GetRateLimitsReq{ Requests: []*guber.RateLimitReq{test.Req}, }) - require.Nil(t, err) + require.NoError(t, err) assert.Equal(t, test.Error, resp.Responses[0].Error, i) assert.Equal(t, test.Status, resp.Responses[0].Status, i) } @@ -1358,8 +1341,8 @@ func TestGlobalResetRemaining(t *testing.T) { } func TestChangeLimit(t *testing.T) { - client, errs := guber.DialV1Server(cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress, nil) - require.Nil(t, errs) + client, err := guber.DialV1Server(cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress, nil) + require.NoError(t, err) tests := []struct { Remaining int64 @@ -1440,7 +1423,7 @@ func TestChangeLimit(t *testing.T) { }, }, }) - require.Nil(t, err) + require.NoError(t, err) rl := resp.Responses[0] @@ -1453,8 +1436,8 @@ func TestChangeLimit(t *testing.T) { } func TestResetRemaining(t *testing.T) { - client, errs := guber.DialV1Server(cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress, nil) - require.Nil(t, errs) + client, err := guber.DialV1Server(cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress, nil) + require.NoError(t, err) tests := []struct { Remaining int64 @@ -1513,7 +1496,7 @@ func TestResetRemaining(t *testing.T) { }, }, }) - require.Nil(t, err) + require.NoError(t, err) rl := resp.Responses[0] @@ -1525,83 +1508,28 @@ func TestResetRemaining(t *testing.T) { } func TestHealthCheck(t *testing.T) { - name := t.Name() - key := guber.RandomString(10) - client, err := guber.DialV1Server(cluster.DaemonAt(0).GRPCListeners[0].Addr().String(), nil) - require.NoError(t, err) - - // Check that the cluster is healthy to start with - healthResp, err := client.HealthCheck(context.Background(), &guber.HealthCheckReq{}) - require.NoError(t, err) - - require.Equal(t, "healthy", healthResp.GetStatus()) - - // Create a global rate limit that will need to be sent to all peers in the cluster - _, err = client.GetRateLimits(context.Background(), &guber.GetRateLimitsReq{ - Requests: []*guber.RateLimitReq{ - { - Name: name, - UniqueKey: key, - Algorithm: guber.Algorithm_TOKEN_BUCKET, - Behavior: guber.Behavior_BATCHING, - Duration: guber.Second * 3, - Hits: 1, - Limit: 5, - }, - }, - }) - require.Nil(t, err) - - // Stop the rest of the cluster to ensure errors occur on our instance - for i := 1; i < cluster.NumOfDaemons(); i++ { - d := cluster.DaemonAt(i) - require.NotNil(t, d) - d.Close() + // Check that the cluster is healthy to start with. + for _, peer := range cluster.GetDaemons() { + healthResp, err := peer.MustClient().HealthCheck(context.Background(), &guber.HealthCheckReq{}) + require.NoError(t, err) + assert.Equal(t, "healthy", healthResp.Status) } - // Hit the global rate limit again this time causing a connection error - _, err = client.GetRateLimits(context.Background(), &guber.GetRateLimitsReq{ - Requests: []*guber.RateLimitReq{ - { - Name: name, - UniqueKey: key, - Algorithm: guber.Algorithm_TOKEN_BUCKET, - Behavior: guber.Behavior_GLOBAL, - Duration: guber.Second * 3, - Hits: 1, - Limit: 5, - }, - }, - }) - require.Nil(t, err) + // Stop the cluster to ensure errors occur on our instance. + cluster.Stop() + // Check the health again to get back the connection error. testutil.UntilPass(t, 20, 300*clock.Millisecond, func(t testutil.TestingT) { - // Check the health again to get back the connection error - healthResp, err = client.HealthCheck(context.Background(), &guber.HealthCheckReq{}) - if assert.Nil(t, err) { - return + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + for _, peer := range cluster.GetDaemons() { + _, err := peer.MustClient().HealthCheck(ctx, &guber.HealthCheckReq{}) + assert.Error(t, err, "connect: connection refused") } - - assert.Equal(t, "unhealthy", healthResp.GetStatus()) - assert.Contains(t, healthResp.GetMessage(), "connect: connection refused") }) - // Restart stopped instances - ctx, cancel := context.WithTimeout(context.Background(), clock.Second*15) - defer cancel() - require.NoError(t, cluster.Restart(ctx)) - - // wait for every peer instance to come back online - numPeers := int32(len(cluster.GetPeers())) - for _, peer := range cluster.GetPeers() { - peerClient, err := guber.DialV1Server(peer.GRPCAddress, nil) - require.NoError(t, err) - testutil.UntilPass(t, 10, 300*clock.Millisecond, func(t testutil.TestingT) { - healthResp, err = peerClient.HealthCheck(context.Background(), &guber.HealthCheckReq{}) - assert.Equal(t, "healthy", healthResp.GetStatus()) - assert.Equal(t, numPeers, healthResp.PeerCount) - }) - } + // Restart so cluster is ready for next test. + require.NoError(t, startGubernator()) } func TestLeakyBucketDivBug(t *testing.T) { @@ -2389,3 +2317,33 @@ func sendHit(t *testing.T, d *guber.Daemon, req *guber.RateLimitReq, expectStatu func epochMillis(t time.Time) int64 { return t.UnixNano() / 1_000_000 } + +func startGubernator() error { + err := cluster.StartWith([]guber.PeerInfo{ + {GRPCAddress: "127.0.0.1:9990", HTTPAddress: "127.0.0.1:9980", DataCenter: cluster.DataCenterNone}, + {GRPCAddress: "127.0.0.1:9991", HTTPAddress: "127.0.0.1:9981", DataCenter: cluster.DataCenterNone}, + {GRPCAddress: "127.0.0.1:9992", HTTPAddress: "127.0.0.1:9982", DataCenter: cluster.DataCenterNone}, + {GRPCAddress: "127.0.0.1:9993", HTTPAddress: "127.0.0.1:9983", DataCenter: cluster.DataCenterNone}, + {GRPCAddress: "127.0.0.1:9994", HTTPAddress: "127.0.0.1:9984", DataCenter: cluster.DataCenterNone}, + {GRPCAddress: "127.0.0.1:9995", HTTPAddress: "127.0.0.1:9985", DataCenter: cluster.DataCenterNone}, + + // DataCenterOne + {GRPCAddress: "127.0.0.1:9890", HTTPAddress: "127.0.0.1:9880", DataCenter: cluster.DataCenterOne}, + {GRPCAddress: "127.0.0.1:9891", HTTPAddress: "127.0.0.1:9881", DataCenter: cluster.DataCenterOne}, + {GRPCAddress: "127.0.0.1:9892", HTTPAddress: "127.0.0.1:9882", DataCenter: cluster.DataCenterOne}, + {GRPCAddress: "127.0.0.1:9893", HTTPAddress: "127.0.0.1:9883", DataCenter: cluster.DataCenterOne}, + }) + if err != nil { + return errors.Wrap(err, "while starting cluster") + } + + // Populate peer clients. Avoids data races when goroutines conflict trying + // to instantiate client singletons. + for _, peer := range cluster.GetDaemons() { + _, err = peer.Client() + if err != nil { + return errors.Wrap(err, "while connecting client") + } + } + return nil +} From 0fb2a335531f6b5a1f3e6b7d7f6c146bf52997af Mon Sep 17 00:00:00 2001 From: Shawn Poulson Date: Mon, 11 Mar 2024 14:45:00 -0400 Subject: [PATCH 14/23] Backwards compatibility needed for upgrading. --- gubernator.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/gubernator.go b/gubernator.go index e931c41c..32642501 100644 --- a/gubernator.go +++ b/gubernator.go @@ -503,6 +503,12 @@ func (s *V1Instance) GetPeerRateLimits(ctx context.Context, r *GetPeerRateLimits SetBehavior(&rin.req.Behavior, Behavior_DRAIN_OVER_LIMIT, true) } + // Assign default to RequestTime for backwards compatibility. + if r.RequestTime == nil || *r.RequestTime == 0 { + requestTime := epochMillis(clock.Now()) + r.RequestTime = &requestTime + } + rl, err := s.getLocalRateLimit(ctx, rin.req) if err != nil { // Return the error for this request From 47eede560ccf6f754921a2e78845f14381e3f5e9 Mon Sep 17 00:00:00 2001 From: Shawn Poulson Date: Mon, 11 Mar 2024 14:56:54 -0400 Subject: [PATCH 15/23] Fix compile error. --- gubernator.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gubernator.go b/gubernator.go index 32642501..d0869cd3 100644 --- a/gubernator.go +++ b/gubernator.go @@ -504,9 +504,9 @@ func (s *V1Instance) GetPeerRateLimits(ctx context.Context, r *GetPeerRateLimits } // Assign default to RequestTime for backwards compatibility. - if r.RequestTime == nil || *r.RequestTime == 0 { + if rin.req.RequestTime == nil || *rin.req.RequestTime == 0 { requestTime := epochMillis(clock.Now()) - r.RequestTime = &requestTime + rin.req.RequestTime = &requestTime } rl, err := s.getLocalRateLimit(ctx, rin.req) From 2229596b52cda7c38077d0bbc80c8a07e0dde229 Mon Sep 17 00:00:00 2001 From: Shawn Poulson Date: Mon, 11 Mar 2024 15:27:45 -0400 Subject: [PATCH 16/23] Fix test. --- functional_test.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/functional_test.go b/functional_test.go index 388cdf96..7bd770ce 100644 --- a/functional_test.go +++ b/functional_test.go @@ -748,14 +748,16 @@ func TestLeakyBucketGregorian(t *testing.T) { now = now.Truncate(1 * time.Minute) // So we don't start on the minute boundary now = now.Add(time.Millisecond * 100) + name := t.Name() + key := guber.RandomString(10) for _, test := range tests { t.Run(test.Name, func(t *testing.T) { resp, err := client.GetRateLimits(context.Background(), &guber.GetRateLimitsReq{ Requests: []*guber.RateLimitReq{ { - Name: "test_leaky_bucket_greg", - UniqueKey: "account:12345", + Name: name, + UniqueKey: key, Behavior: guber.Behavior_DURATION_IS_GREGORIAN, Algorithm: guber.Algorithm_LEAKY_BUCKET, Duration: guber.GregorianMinutes, @@ -764,15 +766,13 @@ func TestLeakyBucketGregorian(t *testing.T) { }, }, }) - clock.Freeze(clock.Now()) require.NoError(t, err) rl := resp.Responses[0] - assert.Equal(t, test.Status, rl.Status) assert.Equal(t, test.Remaining, rl.Remaining) assert.Equal(t, int64(60), rl.Limit) - assert.True(t, rl.ResetTime > now.Unix()) + assert.Greater(t, rl.ResetTime, now.Unix()) clock.Advance(test.Sleep) }) } From c0608d5a90e825d82fa8780edbc6d53b01edb205 Mon Sep 17 00:00:00 2001 From: Shawn Poulson Date: Mon, 11 Mar 2024 19:16:25 -0400 Subject: [PATCH 17/23] Fix for overlimit metric doublecounting on non-owner and owner. --- algorithms.go | 40 ++++++++++++++++++++++++++-------------- benchmark_test.go | 12 ++++++------ global.go | 3 ++- gubernator.go | 28 ++++++++++++++++++---------- peer_client.go | 7 ++++--- workers.go | 17 +++++++++-------- 6 files changed, 65 insertions(+), 42 deletions(-) diff --git a/algorithms.go b/algorithms.go index 7d452fc3..4032fa4f 100644 --- a/algorithms.go +++ b/algorithms.go @@ -34,7 +34,7 @@ import ( // with 100 emails and the request will succeed. You can override this default behavior with `DRAIN_OVER_LIMIT` // Implements token bucket algorithm for rate limiting. https://en.wikipedia.org/wiki/Token_bucket -func tokenBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp *RateLimitResp, err error) { +func tokenBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq, rs RateLimitReqState) (resp *RateLimitResp, err error) { tokenBucketTimer := prometheus.NewTimer(metricFuncTimeDuration.WithLabelValues("tokenBucket")) defer tokenBucketTimer.ObserveDuration() @@ -99,7 +99,7 @@ func tokenBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp * s.Remove(ctx, hashKey) } - return tokenBucketNewItem(ctx, s, c, r) + return tokenBucketNewItem(ctx, s, c, r, rs) } // Update the limit if it changed. @@ -161,7 +161,9 @@ func tokenBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp * // If we are already at the limit. if rl.Remaining == 0 && r.Hits > 0 { trace.SpanFromContext(ctx).AddEvent("Already over the limit") - metricOverLimitCounter.Add(1) + if rs.IsOwner { + metricOverLimitCounter.Add(1) + } rl.Status = Status_OVER_LIMIT t.Status = rl.Status return rl, nil @@ -179,7 +181,9 @@ func tokenBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp * // without updating the cache. if r.Hits > t.Remaining { trace.SpanFromContext(ctx).AddEvent("Over the limit") - metricOverLimitCounter.Add(1) + if rs.IsOwner { + metricOverLimitCounter.Add(1) + } rl.Status = Status_OVER_LIMIT if HasBehavior(r.Behavior, Behavior_DRAIN_OVER_LIMIT) { // DRAIN_OVER_LIMIT behavior drains the remaining counter. @@ -195,11 +199,11 @@ func tokenBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp * } // Item is not found in cache or store, create new. - return tokenBucketNewItem(ctx, s, c, r) + return tokenBucketNewItem(ctx, s, c, r, rs) } // Called by tokenBucket() when adding a new item in the store. -func tokenBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp *RateLimitResp, err error) { +func tokenBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq, rs RateLimitReqState) (resp *RateLimitResp, err error) { requestTime := *r.RequestTime expire := requestTime + r.Duration @@ -235,7 +239,9 @@ func tokenBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq) // Client could be requesting that we always return OVER_LIMIT. if r.Hits > r.Limit { trace.SpanFromContext(ctx).AddEvent("Over the limit") - metricOverLimitCounter.Add(1) + if rs.IsOwner { + metricOverLimitCounter.Add(1) + } rl.Status = Status_OVER_LIMIT rl.Remaining = r.Limit t.Remaining = r.Limit @@ -251,7 +257,7 @@ func tokenBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq) } // Implements leaky bucket algorithm for rate limiting https://en.wikipedia.org/wiki/Leaky_bucket -func leakyBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp *RateLimitResp, err error) { +func leakyBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq, rs RateLimitReqState) (resp *RateLimitResp, err error) { leakyBucketTimer := prometheus.NewTimer(metricFuncTimeDuration.WithLabelValues("V1Instance.getRateLimit_leakyBucket")) defer leakyBucketTimer.ObserveDuration() @@ -308,7 +314,7 @@ func leakyBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp * s.Remove(ctx, hashKey) } - return leakyBucketNewItem(ctx, s, c, r) + return leakyBucketNewItem(ctx, s, c, r, rs) } if HasBehavior(r.Behavior, Behavior_RESET_REMAINING) { @@ -381,7 +387,9 @@ func leakyBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp * // If we are already at the limit if int64(b.Remaining) == 0 && r.Hits > 0 { - metricOverLimitCounter.Add(1) + if rs.IsOwner { + metricOverLimitCounter.Add(1) + } rl.Status = Status_OVER_LIMIT return rl, nil } @@ -397,7 +405,9 @@ func leakyBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp * // If requested is more than available, then return over the limit // without updating the bucket, unless `DRAIN_OVER_LIMIT` is set. if r.Hits > int64(b.Remaining) { - metricOverLimitCounter.Add(1) + if rs.IsOwner { + metricOverLimitCounter.Add(1) + } rl.Status = Status_OVER_LIMIT // DRAIN_OVER_LIMIT behavior drains the remaining counter. @@ -420,11 +430,11 @@ func leakyBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp * return rl, nil } - return leakyBucketNewItem(ctx, s, c, r) + return leakyBucketNewItem(ctx, s, c, r, rs) } // Called by leakyBucket() when adding a new item in the store. -func leakyBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq) (resp *RateLimitResp, err error) { +func leakyBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq, rs RateLimitReqState) (resp *RateLimitResp, err error) { requestTime := *r.RequestTime duration := r.Duration rate := float64(duration) / float64(r.Limit) @@ -457,7 +467,9 @@ func leakyBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq) // Client could be requesting that we start with the bucket OVER_LIMIT if r.Hits > r.Burst { - metricOverLimitCounter.Add(1) + if rs.IsOwner { + metricOverLimitCounter.Add(1) + } rl.Status = Status_OVER_LIMIT rl.Remaining = 0 rl.ResetTime = requestTime + (rl.Limit-rl.Remaining)*int64(rate) diff --git a/benchmark_test.go b/benchmark_test.go index 20323dcd..5ceacf42 100644 --- a/benchmark_test.go +++ b/benchmark_test.go @@ -34,7 +34,7 @@ func BenchmarkServer(b *testing.B) { require.NoError(b, err, "Error in conf.SetDefaults") requestTime := epochMillis(clock.Now()) - b.Run("GetPeerRateLimit() with no batching", func(b *testing.B) { + b.Run("GetPeerRateLimit", func(b *testing.B) { client, err := guber.NewPeerClient(guber.PeerConfig{ Info: cluster.GetRandomPeer(cluster.DataCenterNone), Behavior: conf.Behaviors, @@ -46,9 +46,9 @@ func BenchmarkServer(b *testing.B) { for n := 0; n < b.N; n++ { _, err := client.GetPeerRateLimit(ctx, &guber.RateLimitReq{ - Name: b.Name(), - UniqueKey: guber.RandomString(10), - Behavior: guber.Behavior_NO_BATCHING, + Name: b.Name(), + UniqueKey: guber.RandomString(10), + // Behavior: guber.Behavior_NO_BATCHING, Limit: 10, Duration: 5, Hits: 1, @@ -60,7 +60,7 @@ func BenchmarkServer(b *testing.B) { } }) - b.Run("GetRateLimit()", func(b *testing.B) { + b.Run("GetRateLimits batching", func(b *testing.B) { client, err := guber.DialV1Server(cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress, nil) require.NoError(b, err, "Error in guber.DialV1Server") b.ResetTimer() @@ -83,7 +83,7 @@ func BenchmarkServer(b *testing.B) { } }) - b.Run("GetRateLimitGlobal()", func(b *testing.B) { + b.Run("GetRateLimits global", func(b *testing.B) { client, err := guber.DialV1Server(cluster.GetRandomPeer(cluster.DataCenterNone).GRPCAddress, nil) require.NoError(b, err, "Error in guber.DialV1Server") b.ResetTimer() diff --git a/global.go b/global.go index 47703f6e..2300b971 100644 --- a/global.go +++ b/global.go @@ -234,6 +234,7 @@ func (gm *globalManager) runBroadcasts() { func (gm *globalManager) broadcastPeers(ctx context.Context, updates map[string]*RateLimitReq) { defer prometheus.NewTimer(gm.metricBroadcastDuration).ObserveDuration() var req UpdatePeerGlobalsReq + reqState := RateLimitReqState{IsOwner: false} gm.metricGlobalQueueLength.Set(float64(len(updates))) @@ -241,7 +242,7 @@ func (gm *globalManager) broadcastPeers(ctx context.Context, updates map[string] // Get current rate limit state. grlReq := proto.Clone(update).(*RateLimitReq) grlReq.Hits = 0 - status, err := gm.instance.workerPool.GetRateLimit(ctx, grlReq) + status, err := gm.instance.workerPool.GetRateLimit(ctx, grlReq, reqState) if err != nil { gm.log.WithError(err).Error("while retrieving rate limit status") continue diff --git a/gubernator.go b/gubernator.go index d0869cd3..280821f3 100644 --- a/gubernator.go +++ b/gubernator.go @@ -53,6 +53,10 @@ type V1Instance struct { workerPool *WorkerPool } +type RateLimitReqState struct { + IsOwner bool +} + var ( metricGetRateLimitCounter = prometheus.NewCounterVec(prometheus.CounterOpts{ Name: "gubernator_getratelimit_counter", @@ -240,9 +244,10 @@ func (s *V1Instance) GetRateLimits(ctx context.Context, r *GetRateLimitsReq) (*G } // If our server instance is the owner of this rate limit - if peer.Info().IsOwner { + reqState := RateLimitReqState{IsOwner: peer.Info().IsOwner} + if reqState.IsOwner { // Apply our rate limit algorithm to the request - resp.Responses[i], err = s.getLocalRateLimit(ctx, req) + resp.Responses[i], err = s.getLocalRateLimit(ctx, req, reqState) if err != nil { err = errors.Wrapf(err, "Error while apply rate limit for '%s'", key) span := trace.SpanFromContext(ctx) @@ -313,6 +318,7 @@ func (s *V1Instance) asyncRequest(ctx context.Context, req *AsyncReq) { funcTimer := prometheus.NewTimer(metricFuncTimeDuration.WithLabelValues("V1Instance.asyncRequest")) defer funcTimer.ObserveDuration() + reqState := RateLimitReqState{IsOwner: false} resp := AsyncResp{ Idx: req.Idx, } @@ -332,7 +338,7 @@ func (s *V1Instance) asyncRequest(ctx context.Context, req *AsyncReq) { // If we are attempting again, the owner of this rate limit might have changed to us! if attempts != 0 { if req.Peer.Info().IsOwner { - resp.Resp, err = s.getLocalRateLimit(ctx, req.Req) + resp.Resp, err = s.getLocalRateLimit(ctx, req.Req, reqState) if err != nil { s.log.WithContext(ctx). WithError(err). @@ -399,12 +405,13 @@ func (s *V1Instance) getGlobalRateLimit(ctx context.Context, req *RateLimitReq) tracing.EndScope(ctx, err) }() - cpy := proto.Clone(req).(*RateLimitReq) - SetBehavior(&cpy.Behavior, Behavior_NO_BATCHING, true) - SetBehavior(&cpy.Behavior, Behavior_GLOBAL, false) + req2 := proto.Clone(req).(*RateLimitReq) + SetBehavior(&req2.Behavior, Behavior_NO_BATCHING, true) + SetBehavior(&req2.Behavior, Behavior_GLOBAL, false) + reqState := RateLimitReqState{IsOwner: false} // Process the rate limit like we own it - resp, err = s.getLocalRateLimit(ctx, cpy) + resp, err = s.getLocalRateLimit(ctx, req2, reqState) if err != nil { return nil, errors.Wrap(err, "during in getLocalRateLimit") } @@ -476,6 +483,7 @@ func (s *V1Instance) GetPeerRateLimits(ctx context.Context, r *GetPeerRateLimits respChan := make(chan respOut) var respWg sync.WaitGroup respWg.Add(1) + reqState := RateLimitReqState{IsOwner: true} go func() { // Capture each response and return in the same order @@ -509,7 +517,7 @@ func (s *V1Instance) GetPeerRateLimits(ctx context.Context, r *GetPeerRateLimits rin.req.RequestTime = &requestTime } - rl, err := s.getLocalRateLimit(ctx, rin.req) + rl, err := s.getLocalRateLimit(ctx, rin.req, reqState) if err != nil { // Return the error for this request err = errors.Wrap(err, "Error in getLocalRateLimit") @@ -577,7 +585,7 @@ func (s *V1Instance) HealthCheck(ctx context.Context, r *HealthCheckReq) (health return health, nil } -func (s *V1Instance) getLocalRateLimit(ctx context.Context, r *RateLimitReq) (_ *RateLimitResp, err error) { +func (s *V1Instance) getLocalRateLimit(ctx context.Context, r *RateLimitReq, rs RateLimitReqState) (_ *RateLimitResp, err error) { ctx = tracing.StartNamedScope(ctx, "V1Instance.getLocalRateLimit", trace.WithAttributes( attribute.String("ratelimit.key", r.UniqueKey), attribute.String("ratelimit.name", r.Name), @@ -587,7 +595,7 @@ func (s *V1Instance) getLocalRateLimit(ctx context.Context, r *RateLimitReq) (_ defer func() { tracing.EndScope(ctx, err) }() defer prometheus.NewTimer(metricFuncTimeDuration.WithLabelValues("V1Instance.getLocalRateLimit")).ObserveDuration() - resp, err := s.workerPool.GetRateLimit(ctx, r) + resp, err := s.workerPool.GetRateLimit(ctx, r, rs) if err != nil { return nil, errors.Wrap(err, "during workerPool.GetRateLimit") } diff --git a/peer_client.go b/peer_client.go index 39c13c14..5e2fef15 100644 --- a/peer_client.go +++ b/peer_client.go @@ -66,9 +66,10 @@ type response struct { } type request struct { - request *RateLimitReq - resp chan *response - ctx context.Context + request *RateLimitReq + reqState RateLimitReqState + resp chan *response + ctx context.Context } type PeerConfig struct { diff --git a/workers.go b/workers.go index f6ed60a9..d62071be 100644 --- a/workers.go +++ b/workers.go @@ -199,7 +199,7 @@ func (p *WorkerPool) dispatch(worker *Worker) { } resp := new(response) - resp.rl, resp.err = worker.handleGetRateLimit(req.ctx, req.request, worker.cache) + resp.rl, resp.err = worker.handleGetRateLimit(req.ctx, req.request, req.reqState, worker.cache) select { case req.resp <- resp: // Success. @@ -258,16 +258,17 @@ func (p *WorkerPool) dispatch(worker *Worker) { } // GetRateLimit sends a GetRateLimit request to worker pool. -func (p *WorkerPool) GetRateLimit(ctx context.Context, rlRequest *RateLimitReq) (*RateLimitResp, error) { +func (p *WorkerPool) GetRateLimit(ctx context.Context, rlRequest *RateLimitReq, rs RateLimitReqState) (*RateLimitResp, error) { // Delegate request to assigned channel based on request key. worker := p.getWorker(rlRequest.HashKey()) queueGauge := metricWorkerQueue.WithLabelValues("GetRateLimit", worker.name) queueGauge.Inc() defer queueGauge.Dec() handlerRequest := request{ - ctx: ctx, - resp: make(chan *response, 1), - request: rlRequest, + ctx: ctx, + resp: make(chan *response, 1), + request: rlRequest, + reqState: rs, } // Send request. @@ -289,14 +290,14 @@ func (p *WorkerPool) GetRateLimit(ctx context.Context, rlRequest *RateLimitReq) } // Handle request received by worker. -func (worker *Worker) handleGetRateLimit(ctx context.Context, req *RateLimitReq, cache Cache) (*RateLimitResp, error) { +func (worker *Worker) handleGetRateLimit(ctx context.Context, req *RateLimitReq, rs RateLimitReqState, cache Cache) (*RateLimitResp, error) { defer prometheus.NewTimer(metricFuncTimeDuration.WithLabelValues("Worker.handleGetRateLimit")).ObserveDuration() var rlResponse *RateLimitResp var err error switch req.Algorithm { case Algorithm_TOKEN_BUCKET: - rlResponse, err = tokenBucket(ctx, worker.conf.Store, cache, req) + rlResponse, err = tokenBucket(ctx, worker.conf.Store, cache, req, rs) if err != nil { msg := "Error in tokenBucket" countError(err, msg) @@ -305,7 +306,7 @@ func (worker *Worker) handleGetRateLimit(ctx context.Context, req *RateLimitReq, } case Algorithm_LEAKY_BUCKET: - rlResponse, err = leakyBucket(ctx, worker.conf.Store, cache, req) + rlResponse, err = leakyBucket(ctx, worker.conf.Store, cache, req, rs) if err != nil { msg := "Error in leakyBucket" countError(err, msg) From 517bf634e5af35ae57495e2c80fad332440731c7 Mon Sep 17 00:00:00 2001 From: Shawn Poulson Date: Mon, 11 Mar 2024 19:31:28 -0400 Subject: [PATCH 18/23] Metric `gubernator_getratelimit_counter` adds calltype value "local non-owner" Better tracks the code path. Can exclude non-owner activity. Can get accurate total rate limit checks with query like `rate(gubernator_getratelimit_counter{calltype="local"}[1m])` --- gubernator.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/gubernator.go b/gubernator.go index 280821f3..19d5db50 100644 --- a/gubernator.go +++ b/gubernator.go @@ -605,7 +605,11 @@ func (s *V1Instance) getLocalRateLimit(ctx context.Context, r *RateLimitReq, rs s.global.QueueUpdate(r) } - metricGetRateLimitCounter.WithLabelValues("local").Inc() + if rs.IsOwner { + metricGetRateLimitCounter.WithLabelValues("local").Inc() + } else { + metricGetRateLimitCounter.WithLabelValues("local non-owner").Inc() + } return resp, nil } From 5f137ad64c21e321fe5c9915d3263a1901b60ac2 Mon Sep 17 00:00:00 2001 From: Shawn Poulson Date: Mon, 11 Mar 2024 19:38:26 -0400 Subject: [PATCH 19/23] Changed mind. Instead of `calltype="local non-owner"`, just don't increment a counter. --- gubernator.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/gubernator.go b/gubernator.go index 19d5db50..9fd6aee1 100644 --- a/gubernator.go +++ b/gubernator.go @@ -607,8 +607,6 @@ func (s *V1Instance) getLocalRateLimit(ctx context.Context, r *RateLimitReq, rs if rs.IsOwner { metricGetRateLimitCounter.WithLabelValues("local").Inc() - } else { - metricGetRateLimitCounter.WithLabelValues("local non-owner").Inc() } return resp, nil } From f51861d4d4cc14de3e33d48f0fa8f23396e11360 Mon Sep 17 00:00:00 2001 From: Shawn Poulson Date: Mon, 11 Mar 2024 20:09:06 -0400 Subject: [PATCH 20/23] Don't call `OnChange()` event from non-owner. Non-owners shouldn't be persisting rate limit state. --- algorithms.go | 36 ++++++++++++++++++------------------ gubernator.go | 6 +++--- workers.go | 10 +++++----- 3 files changed, 26 insertions(+), 26 deletions(-) diff --git a/algorithms.go b/algorithms.go index 4032fa4f..61fa1544 100644 --- a/algorithms.go +++ b/algorithms.go @@ -34,7 +34,7 @@ import ( // with 100 emails and the request will succeed. You can override this default behavior with `DRAIN_OVER_LIMIT` // Implements token bucket algorithm for rate limiting. https://en.wikipedia.org/wiki/Token_bucket -func tokenBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq, rs RateLimitReqState) (resp *RateLimitResp, err error) { +func tokenBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq, reqState RateLimitReqState) (resp *RateLimitResp, err error) { tokenBucketTimer := prometheus.NewTimer(metricFuncTimeDuration.WithLabelValues("tokenBucket")) defer tokenBucketTimer.ObserveDuration() @@ -99,7 +99,7 @@ func tokenBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq, rs Rate s.Remove(ctx, hashKey) } - return tokenBucketNewItem(ctx, s, c, r, rs) + return tokenBucketNewItem(ctx, s, c, r, reqState) } // Update the limit if it changed. @@ -146,7 +146,7 @@ func tokenBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq, rs Rate rl.ResetTime = expire } - if s != nil { + if s != nil && reqState.IsOwner { defer func() { s.OnChange(ctx, r, item) }() @@ -161,7 +161,7 @@ func tokenBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq, rs Rate // If we are already at the limit. if rl.Remaining == 0 && r.Hits > 0 { trace.SpanFromContext(ctx).AddEvent("Already over the limit") - if rs.IsOwner { + if reqState.IsOwner { metricOverLimitCounter.Add(1) } rl.Status = Status_OVER_LIMIT @@ -181,7 +181,7 @@ func tokenBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq, rs Rate // without updating the cache. if r.Hits > t.Remaining { trace.SpanFromContext(ctx).AddEvent("Over the limit") - if rs.IsOwner { + if reqState.IsOwner { metricOverLimitCounter.Add(1) } rl.Status = Status_OVER_LIMIT @@ -199,11 +199,11 @@ func tokenBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq, rs Rate } // Item is not found in cache or store, create new. - return tokenBucketNewItem(ctx, s, c, r, rs) + return tokenBucketNewItem(ctx, s, c, r, reqState) } // Called by tokenBucket() when adding a new item in the store. -func tokenBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq, rs RateLimitReqState) (resp *RateLimitResp, err error) { +func tokenBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq, reqState RateLimitReqState) (resp *RateLimitResp, err error) { requestTime := *r.RequestTime expire := requestTime + r.Duration @@ -239,7 +239,7 @@ func tokenBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq, // Client could be requesting that we always return OVER_LIMIT. if r.Hits > r.Limit { trace.SpanFromContext(ctx).AddEvent("Over the limit") - if rs.IsOwner { + if reqState.IsOwner { metricOverLimitCounter.Add(1) } rl.Status = Status_OVER_LIMIT @@ -249,7 +249,7 @@ func tokenBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq, c.Add(item) - if s != nil { + if s != nil && reqState.IsOwner { s.OnChange(ctx, r, item) } @@ -257,7 +257,7 @@ func tokenBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq, } // Implements leaky bucket algorithm for rate limiting https://en.wikipedia.org/wiki/Leaky_bucket -func leakyBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq, rs RateLimitReqState) (resp *RateLimitResp, err error) { +func leakyBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq, reqState RateLimitReqState) (resp *RateLimitResp, err error) { leakyBucketTimer := prometheus.NewTimer(metricFuncTimeDuration.WithLabelValues("V1Instance.getRateLimit_leakyBucket")) defer leakyBucketTimer.ObserveDuration() @@ -314,7 +314,7 @@ func leakyBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq, rs Rate s.Remove(ctx, hashKey) } - return leakyBucketNewItem(ctx, s, c, r, rs) + return leakyBucketNewItem(ctx, s, c, r, reqState) } if HasBehavior(r.Behavior, Behavior_RESET_REMAINING) { @@ -379,7 +379,7 @@ func leakyBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq, rs Rate // TODO: Feature missing: check for Duration change between item/request. - if s != nil { + if s != nil && reqState.IsOwner { defer func() { s.OnChange(ctx, r, item) }() @@ -387,7 +387,7 @@ func leakyBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq, rs Rate // If we are already at the limit if int64(b.Remaining) == 0 && r.Hits > 0 { - if rs.IsOwner { + if reqState.IsOwner { metricOverLimitCounter.Add(1) } rl.Status = Status_OVER_LIMIT @@ -405,7 +405,7 @@ func leakyBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq, rs Rate // If requested is more than available, then return over the limit // without updating the bucket, unless `DRAIN_OVER_LIMIT` is set. if r.Hits > int64(b.Remaining) { - if rs.IsOwner { + if reqState.IsOwner { metricOverLimitCounter.Add(1) } rl.Status = Status_OVER_LIMIT @@ -430,11 +430,11 @@ func leakyBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq, rs Rate return rl, nil } - return leakyBucketNewItem(ctx, s, c, r, rs) + return leakyBucketNewItem(ctx, s, c, r, reqState) } // Called by leakyBucket() when adding a new item in the store. -func leakyBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq, rs RateLimitReqState) (resp *RateLimitResp, err error) { +func leakyBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq, reqState RateLimitReqState) (resp *RateLimitResp, err error) { requestTime := *r.RequestTime duration := r.Duration rate := float64(duration) / float64(r.Limit) @@ -467,7 +467,7 @@ func leakyBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq, // Client could be requesting that we start with the bucket OVER_LIMIT if r.Hits > r.Burst { - if rs.IsOwner { + if reqState.IsOwner { metricOverLimitCounter.Add(1) } rl.Status = Status_OVER_LIMIT @@ -485,7 +485,7 @@ func leakyBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq, c.Add(item) - if s != nil { + if s != nil && reqState.IsOwner { s.OnChange(ctx, r, item) } diff --git a/gubernator.go b/gubernator.go index 9fd6aee1..55f3f9f2 100644 --- a/gubernator.go +++ b/gubernator.go @@ -585,7 +585,7 @@ func (s *V1Instance) HealthCheck(ctx context.Context, r *HealthCheckReq) (health return health, nil } -func (s *V1Instance) getLocalRateLimit(ctx context.Context, r *RateLimitReq, rs RateLimitReqState) (_ *RateLimitResp, err error) { +func (s *V1Instance) getLocalRateLimit(ctx context.Context, r *RateLimitReq, reqState RateLimitReqState) (_ *RateLimitResp, err error) { ctx = tracing.StartNamedScope(ctx, "V1Instance.getLocalRateLimit", trace.WithAttributes( attribute.String("ratelimit.key", r.UniqueKey), attribute.String("ratelimit.name", r.Name), @@ -595,7 +595,7 @@ func (s *V1Instance) getLocalRateLimit(ctx context.Context, r *RateLimitReq, rs defer func() { tracing.EndScope(ctx, err) }() defer prometheus.NewTimer(metricFuncTimeDuration.WithLabelValues("V1Instance.getLocalRateLimit")).ObserveDuration() - resp, err := s.workerPool.GetRateLimit(ctx, r, rs) + resp, err := s.workerPool.GetRateLimit(ctx, r, reqState) if err != nil { return nil, errors.Wrap(err, "during workerPool.GetRateLimit") } @@ -605,7 +605,7 @@ func (s *V1Instance) getLocalRateLimit(ctx context.Context, r *RateLimitReq, rs s.global.QueueUpdate(r) } - if rs.IsOwner { + if reqState.IsOwner { metricGetRateLimitCounter.WithLabelValues("local").Inc() } return resp, nil diff --git a/workers.go b/workers.go index d62071be..34d99d1d 100644 --- a/workers.go +++ b/workers.go @@ -258,7 +258,7 @@ func (p *WorkerPool) dispatch(worker *Worker) { } // GetRateLimit sends a GetRateLimit request to worker pool. -func (p *WorkerPool) GetRateLimit(ctx context.Context, rlRequest *RateLimitReq, rs RateLimitReqState) (*RateLimitResp, error) { +func (p *WorkerPool) GetRateLimit(ctx context.Context, rlRequest *RateLimitReq, reqState RateLimitReqState) (*RateLimitResp, error) { // Delegate request to assigned channel based on request key. worker := p.getWorker(rlRequest.HashKey()) queueGauge := metricWorkerQueue.WithLabelValues("GetRateLimit", worker.name) @@ -268,7 +268,7 @@ func (p *WorkerPool) GetRateLimit(ctx context.Context, rlRequest *RateLimitReq, ctx: ctx, resp: make(chan *response, 1), request: rlRequest, - reqState: rs, + reqState: reqState, } // Send request. @@ -290,14 +290,14 @@ func (p *WorkerPool) GetRateLimit(ctx context.Context, rlRequest *RateLimitReq, } // Handle request received by worker. -func (worker *Worker) handleGetRateLimit(ctx context.Context, req *RateLimitReq, rs RateLimitReqState, cache Cache) (*RateLimitResp, error) { +func (worker *Worker) handleGetRateLimit(ctx context.Context, req *RateLimitReq, reqState RateLimitReqState, cache Cache) (*RateLimitResp, error) { defer prometheus.NewTimer(metricFuncTimeDuration.WithLabelValues("Worker.handleGetRateLimit")).ObserveDuration() var rlResponse *RateLimitResp var err error switch req.Algorithm { case Algorithm_TOKEN_BUCKET: - rlResponse, err = tokenBucket(ctx, worker.conf.Store, cache, req, rs) + rlResponse, err = tokenBucket(ctx, worker.conf.Store, cache, req, reqState) if err != nil { msg := "Error in tokenBucket" countError(err, msg) @@ -306,7 +306,7 @@ func (worker *Worker) handleGetRateLimit(ctx context.Context, req *RateLimitReq, } case Algorithm_LEAKY_BUCKET: - rlResponse, err = leakyBucket(ctx, worker.conf.Store, cache, req, rs) + rlResponse, err = leakyBucket(ctx, worker.conf.Store, cache, req, reqState) if err != nil { msg := "Error in leakyBucket" countError(err, msg) From d55016d2447eaac133c0a5f381fd988abcd5a97d Mon Sep 17 00:00:00 2001 From: Shawn Poulson Date: Wed, 13 Mar 2024 10:23:52 -0400 Subject: [PATCH 21/23] Simplify cache item expiration check. --- algorithms.go | 12 ++++++------ cache.go | 16 ++++++++++++++++ gubernator.go | 4 ++-- lrucache.go | 11 +---------- 4 files changed, 25 insertions(+), 18 deletions(-) diff --git a/algorithms.go b/algorithms.go index 61fa1544..17af8f03 100644 --- a/algorithms.go +++ b/algorithms.go @@ -146,18 +146,18 @@ func tokenBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq, reqStat rl.ResetTime = expire } - if s != nil && reqState.IsOwner { - defer func() { - s.OnChange(ctx, r, item) - }() - } - // Client is only interested in retrieving the current status or // updating the rate limit config. if r.Hits == 0 { return rl, nil } + if s != nil && reqState.IsOwner { + defer func() { + s.OnChange(ctx, r, item) + }() + } + // If we are already at the limit. if rl.Remaining == 0 && r.Hits > 0 { trace.SpanFromContext(ctx).AddEvent("Already over the limit") diff --git a/cache.go b/cache.go index 163627d2..0fd431a5 100644 --- a/cache.go +++ b/cache.go @@ -39,3 +39,19 @@ type CacheItem struct { // for the latest rate limit data. InvalidAt int64 } + +func (item *CacheItem) IsExpired() bool { + now := MillisecondNow() + + // If the entry is invalidated + if item.InvalidAt != 0 && item.InvalidAt < now { + return true + } + + // If the entry has expired, remove it from the cache + if item.ExpireAt < now { + return true + } + + return false +} diff --git a/gubernator.go b/gubernator.go index 55f3f9f2..f81742a2 100644 --- a/gubernator.go +++ b/gubernator.go @@ -318,7 +318,7 @@ func (s *V1Instance) asyncRequest(ctx context.Context, req *AsyncReq) { funcTimer := prometheus.NewTimer(metricFuncTimeDuration.WithLabelValues("V1Instance.asyncRequest")) defer funcTimer.ObserveDuration() - reqState := RateLimitReqState{IsOwner: false} + reqState := RateLimitReqState{IsOwner: req.Peer.Info().IsOwner} resp := AsyncResp{ Idx: req.Idx, } @@ -337,7 +337,7 @@ func (s *V1Instance) asyncRequest(ctx context.Context, req *AsyncReq) { // If we are attempting again, the owner of this rate limit might have changed to us! if attempts != 0 { - if req.Peer.Info().IsOwner { + if reqState.IsOwner { resp.Resp, err = s.getLocalRateLimit(ctx, req.Req, reqState) if err != nil { s.log.WithContext(ctx). diff --git a/lrucache.go b/lrucache.go index 09bc36ba..03867209 100644 --- a/lrucache.go +++ b/lrucache.go @@ -112,16 +112,7 @@ func (c *LRUCache) GetItem(key string) (item *CacheItem, ok bool) { if ele, hit := c.cache[key]; hit { entry := ele.Value.(*CacheItem) - now := MillisecondNow() - // If the entry is invalidated - if entry.InvalidAt != 0 && entry.InvalidAt < now { - c.removeElement(ele) - metricCacheAccess.WithLabelValues("miss").Add(1) - return - } - - // If the entry has expired, remove it from the cache - if entry.ExpireAt < now { + if entry.IsExpired() { c.removeElement(ele) metricCacheAccess.WithLabelValues("miss").Add(1) return From 5ce3bc1941d50f427942da98fe906087905d6553 Mon Sep 17 00:00:00 2001 From: Shawn Poulson Date: Wed, 13 Mar 2024 10:46:41 -0400 Subject: [PATCH 22/23] Rename `RequestTime` to `CreatedAt` in protos. --- algorithms.go | 38 +++---- benchmark_test.go | 10 +- functional_test.go | 36 +++---- global.go | 10 +- gubernator.go | 14 +-- gubernator.pb.go | 154 +++++++++++++++------------- gubernator.proto | 15 ++- peer_client_test.go | 10 +- peers.pb.go | 61 ++++++----- peers.proto | 14 ++- python/gubernator/gubernator_pb2.py | 40 ++++---- python/gubernator/peers_pb2.py | 12 +-- 12 files changed, 221 insertions(+), 193 deletions(-) diff --git a/algorithms.go b/algorithms.go index 17af8f03..8f4bea6a 100644 --- a/algorithms.go +++ b/algorithms.go @@ -132,12 +132,12 @@ func tokenBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq, reqStat } // If our new duration means we are currently expired. - requestTime := *r.RequestTime - if expire <= requestTime { + createdAt := *r.CreatedAt + if expire <= createdAt { // Renew item. span.AddEvent("Limit has expired") - expire = requestTime + r.Duration - t.CreatedAt = requestTime + expire = createdAt + r.Duration + t.CreatedAt = createdAt t.Remaining = t.Limit } @@ -204,14 +204,14 @@ func tokenBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq, reqStat // Called by tokenBucket() when adding a new item in the store. func tokenBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq, reqState RateLimitReqState) (resp *RateLimitResp, err error) { - requestTime := *r.RequestTime - expire := requestTime + r.Duration + createdAt := *r.CreatedAt + expire := createdAt + r.Duration t := &TokenBucketItem{ Limit: r.Limit, Duration: r.Duration, Remaining: r.Limit - r.Hits, - CreatedAt: requestTime, + CreatedAt: createdAt, } // Add a new rate limit to the cache. @@ -265,7 +265,7 @@ func leakyBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq, reqStat r.Burst = r.Limit } - requestTime := *r.RequestTime + createdAt := *r.CreatedAt // Get rate limit from cache. hashKey := r.HashKey() @@ -354,16 +354,16 @@ func leakyBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq, reqStat } if r.Hits != 0 { - c.UpdateExpiration(r.HashKey(), requestTime+duration) + c.UpdateExpiration(r.HashKey(), createdAt+duration) } // Calculate how much leaked out of the bucket since the last time we leaked a hit - elapsed := requestTime - b.UpdatedAt + elapsed := createdAt - b.UpdatedAt leak := float64(elapsed) / rate if int64(leak) > 0 { b.Remaining += leak - b.UpdatedAt = requestTime + b.UpdatedAt = createdAt } if int64(b.Remaining) > b.Burst { @@ -374,7 +374,7 @@ func leakyBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq, reqStat Limit: b.Limit, Remaining: int64(b.Remaining), Status: Status_UNDER_LIMIT, - ResetTime: requestTime + (b.Limit-int64(b.Remaining))*int64(rate), + ResetTime: createdAt + (b.Limit-int64(b.Remaining))*int64(rate), } // TODO: Feature missing: check for Duration change between item/request. @@ -398,7 +398,7 @@ func leakyBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq, reqStat if int64(b.Remaining) == r.Hits { b.Remaining = 0 rl.Remaining = int64(b.Remaining) - rl.ResetTime = requestTime + (rl.Limit-rl.Remaining)*int64(rate) + rl.ResetTime = createdAt + (rl.Limit-rl.Remaining)*int64(rate) return rl, nil } @@ -426,7 +426,7 @@ func leakyBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq, reqStat b.Remaining -= float64(r.Hits) rl.Remaining = int64(b.Remaining) - rl.ResetTime = requestTime + (rl.Limit-rl.Remaining)*int64(rate) + rl.ResetTime = createdAt + (rl.Limit-rl.Remaining)*int64(rate) return rl, nil } @@ -435,7 +435,7 @@ func leakyBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq, reqStat // Called by leakyBucket() when adding a new item in the store. func leakyBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq, reqState RateLimitReqState) (resp *RateLimitResp, err error) { - requestTime := *r.RequestTime + createdAt := *r.CreatedAt duration := r.Duration rate := float64(duration) / float64(r.Limit) if HasBehavior(r.Behavior, Behavior_DURATION_IS_GREGORIAN) { @@ -454,7 +454,7 @@ func leakyBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq, Remaining: float64(r.Burst - r.Hits), Limit: r.Limit, Duration: duration, - UpdatedAt: requestTime, + UpdatedAt: createdAt, Burst: r.Burst, } @@ -462,7 +462,7 @@ func leakyBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq, Status: Status_UNDER_LIMIT, Limit: b.Limit, Remaining: r.Burst - r.Hits, - ResetTime: requestTime + (b.Limit-(r.Burst-r.Hits))*int64(rate), + ResetTime: createdAt + (b.Limit-(r.Burst-r.Hits))*int64(rate), } // Client could be requesting that we start with the bucket OVER_LIMIT @@ -472,12 +472,12 @@ func leakyBucketNewItem(ctx context.Context, s Store, c Cache, r *RateLimitReq, } rl.Status = Status_OVER_LIMIT rl.Remaining = 0 - rl.ResetTime = requestTime + (rl.Limit-rl.Remaining)*int64(rate) + rl.ResetTime = createdAt + (rl.Limit-rl.Remaining)*int64(rate) b.Remaining = 0 } item := &CacheItem{ - ExpireAt: requestTime + duration, + ExpireAt: createdAt + duration, Algorithm: r.Algorithm, Key: r.HashKey(), Value: &b, diff --git a/benchmark_test.go b/benchmark_test.go index 5ceacf42..9673cf2b 100644 --- a/benchmark_test.go +++ b/benchmark_test.go @@ -32,7 +32,7 @@ func BenchmarkServer(b *testing.B) { conf := guber.Config{} err := conf.SetDefaults() require.NoError(b, err, "Error in conf.SetDefaults") - requestTime := epochMillis(clock.Now()) + createdAt := epochMillis(clock.Now()) b.Run("GetPeerRateLimit", func(b *testing.B) { client, err := guber.NewPeerClient(guber.PeerConfig{ @@ -49,10 +49,10 @@ func BenchmarkServer(b *testing.B) { Name: b.Name(), UniqueKey: guber.RandomString(10), // Behavior: guber.Behavior_NO_BATCHING, - Limit: 10, - Duration: 5, - Hits: 1, - RequestTime: &requestTime, + Limit: 10, + Duration: 5, + Hits: 1, + CreatedAt: &createdAt, }) if err != nil { b.Errorf("Error in client.GetPeerRateLimit: %s", err) diff --git a/functional_test.go b/functional_test.go index 7bd770ce..400137b2 100644 --- a/functional_test.go +++ b/functional_test.go @@ -1646,7 +1646,7 @@ func TestGetPeerRateLimits(t *testing.T) { t.Run("Stable rate check request order", func(t *testing.T) { // Ensure response order matches rate check request order. // Try various batch sizes. - requestTime := epochMillis(clock.Now()) + createdAt := epochMillis(clock.Now()) testCases := []int{1, 2, 5, 10, 100, 1000} for _, n := range testCases { @@ -1657,14 +1657,14 @@ func TestGetPeerRateLimits(t *testing.T) { } for i := 0; i < n; i++ { req.Requests[i] = &guber.RateLimitReq{ - Name: name, - UniqueKey: guber.RandomString(10), - Hits: 0, - Limit: 1000 + int64(i), - Duration: 1000, - Algorithm: guber.Algorithm_TOKEN_BUCKET, - Behavior: guber.Behavior_BATCHING, - RequestTime: &requestTime, + Name: name, + UniqueKey: guber.RandomString(10), + Hits: 0, + Limit: 1000 + int64(i), + Duration: 1000, + Algorithm: guber.Algorithm_TOKEN_BUCKET, + Behavior: guber.Behavior_BATCHING, + CreatedAt: &createdAt, } } @@ -1690,18 +1690,18 @@ func TestGetPeerRateLimits(t *testing.T) { func TestGlobalBehavior(t *testing.T) { const limit = 1000 broadcastTimeout := 400 * time.Millisecond - requestTime := epochMillis(clock.Now()) + createdAt := epochMillis(clock.Now()) makeReq := func(name, key string, hits int64) *guber.RateLimitReq { return &guber.RateLimitReq{ - Name: name, - UniqueKey: key, - Algorithm: guber.Algorithm_TOKEN_BUCKET, - Behavior: guber.Behavior_GLOBAL, - Duration: guber.Minute * 3, - Hits: hits, - Limit: limit, - RequestTime: &requestTime, + Name: name, + UniqueKey: key, + Algorithm: guber.Algorithm_TOKEN_BUCKET, + Behavior: guber.Behavior_GLOBAL, + Duration: guber.Minute * 3, + Hits: hits, + Limit: limit, + CreatedAt: &createdAt, } } diff --git a/global.go b/global.go index 2300b971..c5fe1676 100644 --- a/global.go +++ b/global.go @@ -248,11 +248,11 @@ func (gm *globalManager) broadcastPeers(ctx context.Context, updates map[string] continue } updateReq := &UpdatePeerGlobal{ - Key: update.HashKey(), - Algorithm: update.Algorithm, - Duration: update.Duration, - Status: status, - RequestTime: *update.RequestTime, + Key: update.HashKey(), + Algorithm: update.Algorithm, + Duration: update.Duration, + Status: status, + CreatedAt: *update.CreatedAt, } req.Globals = append(req.Globals, updateReq) } diff --git a/gubernator.go b/gubernator.go index f81742a2..ff6812ae 100644 --- a/gubernator.go +++ b/gubernator.go @@ -192,7 +192,7 @@ func (s *V1Instance) GetRateLimits(ctx context.Context, r *GetRateLimitsReq) (*G "Requests.RateLimits list too large; max size is '%d'", maxBatchSize) } - requestTime := epochMillis(clock.Now()) + createdAt := epochMillis(clock.Now()) resp := GetRateLimitsResp{ Responses: make([]*RateLimitResp, len(r.Requests)), } @@ -215,8 +215,8 @@ func (s *V1Instance) GetRateLimits(ctx context.Context, r *GetRateLimitsReq) (*G resp.Responses[i] = &RateLimitResp{Error: "field 'namespace' cannot be empty"} continue } - if req.RequestTime == nil || *req.RequestTime == 0 { - req.RequestTime = &requestTime + if req.CreatedAt == nil || *req.CreatedAt == 0 { + req.CreatedAt = &createdAt } if ctx.Err() != nil { @@ -511,10 +511,10 @@ func (s *V1Instance) GetPeerRateLimits(ctx context.Context, r *GetPeerRateLimits SetBehavior(&rin.req.Behavior, Behavior_DRAIN_OVER_LIMIT, true) } - // Assign default to RequestTime for backwards compatibility. - if rin.req.RequestTime == nil || *rin.req.RequestTime == 0 { - requestTime := epochMillis(clock.Now()) - rin.req.RequestTime = &requestTime + // Assign default to CreatedAt for backwards compatibility. + if rin.req.CreatedAt == nil || *rin.req.CreatedAt == 0 { + createdAt := epochMillis(clock.Now()) + rin.req.CreatedAt = &createdAt } rl, err := s.getLocalRateLimit(ctx, rin.req, reqState) diff --git a/gubernator.pb.go b/gubernator.pb.go index 5c669457..3b54288d 100644 --- a/gubernator.pb.go +++ b/gubernator.pb.go @@ -374,10 +374,17 @@ type RateLimitReq struct { // this to pass trace context to other peers. Might be useful for future clients to pass along // trace information to gubernator. Metadata map[string]string `protobuf:"bytes,9,rep,name=metadata,proto3" json:"metadata,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"` - // The exact time of request in Epoch milliseconds. - // The is intended to be used for peer-to-peer requests to preserve - // timestamps. - RequestTime *int64 `protobuf:"varint,10,opt,name=request_time,json=requestTime,proto3,oneof" json:"request_time,omitempty"` + // The exact time this request was created in Epoch milliseconds. Due to + // time drift between systems, it may be advantageous for a client to set the + // exact time the request was created. It possible the system clock for the + // client has drifted from the system clock where gubernator daemon is + // running. + // + // The created time is used by gubernator to calculate the reset time for + // both token and leaky algorithms. If it is not set by the client, + // gubernator will set the created time when it receives the rate limit + // request. + CreatedAt *int64 `protobuf:"varint,10,opt,name=created_at,json=createdAt,proto3,oneof" json:"created_at,omitempty"` } func (x *RateLimitReq) Reset() { @@ -475,9 +482,9 @@ func (x *RateLimitReq) GetMetadata() map[string]string { return nil } -func (x *RateLimitReq) GetRequestTime() int64 { - if x != nil && x.RequestTime != nil { - return *x.RequestTime +func (x *RateLimitReq) GetCreatedAt() int64 { + if x != nil && x.CreatedAt != nil { + return *x.CreatedAt } return 0 } @@ -695,7 +702,7 @@ var file_gubernator_proto_rawDesc = []byte{ 0x70, 0x12, 0x3a, 0x0a, 0x09, 0x72, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1c, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x52, 0x65, - 0x73, 0x70, 0x52, 0x09, 0x72, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x73, 0x22, 0xc7, 0x03, + 0x73, 0x70, 0x52, 0x09, 0x72, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x73, 0x22, 0xc1, 0x03, 0x0a, 0x0c, 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x52, 0x65, 0x71, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x1d, 0x0a, 0x0a, 0x75, 0x6e, 0x69, 0x71, 0x75, 0x65, 0x5f, 0x6b, 0x65, 0x79, @@ -717,72 +724,71 @@ var file_gubernator_proto_rawDesc = []byte{ 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x52, 0x65, 0x71, 0x2e, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x08, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, - 0x61, 0x12, 0x26, 0x0a, 0x0c, 0x72, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x5f, 0x74, 0x69, 0x6d, - 0x65, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x03, 0x48, 0x00, 0x52, 0x0b, 0x72, 0x65, 0x71, 0x75, 0x65, - 0x73, 0x74, 0x54, 0x69, 0x6d, 0x65, 0x88, 0x01, 0x01, 0x1a, 0x3b, 0x0a, 0x0d, 0x4d, 0x65, 0x74, - 0x61, 0x64, 0x61, 0x74, 0x61, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, - 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, - 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, - 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x42, 0x0f, 0x0a, 0x0d, 0x5f, 0x72, 0x65, 0x71, 0x75, 0x65, - 0x73, 0x74, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x22, 0xac, 0x02, 0x0a, 0x0d, 0x52, 0x61, 0x74, 0x65, - 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x52, 0x65, 0x73, 0x70, 0x12, 0x2d, 0x0a, 0x06, 0x73, 0x74, 0x61, - 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x15, 0x2e, 0x70, 0x62, 0x2e, 0x67, - 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, - 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x6c, 0x69, 0x6d, 0x69, - 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x03, 0x52, 0x05, 0x6c, 0x69, 0x6d, 0x69, 0x74, 0x12, 0x1c, - 0x0a, 0x09, 0x72, 0x65, 0x6d, 0x61, 0x69, 0x6e, 0x69, 0x6e, 0x67, 0x18, 0x03, 0x20, 0x01, 0x28, - 0x03, 0x52, 0x09, 0x72, 0x65, 0x6d, 0x61, 0x69, 0x6e, 0x69, 0x6e, 0x67, 0x12, 0x1d, 0x0a, 0x0a, - 0x72, 0x65, 0x73, 0x65, 0x74, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x03, - 0x52, 0x09, 0x72, 0x65, 0x73, 0x65, 0x74, 0x54, 0x69, 0x6d, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x65, - 0x72, 0x72, 0x6f, 0x72, 0x18, 0x05, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x65, 0x72, 0x72, 0x6f, - 0x72, 0x12, 0x46, 0x0a, 0x08, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, 0x06, 0x20, - 0x03, 0x28, 0x0b, 0x32, 0x2a, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, - 0x74, 0x6f, 0x72, 0x2e, 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x52, 0x65, 0x73, - 0x70, 0x2e, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, - 0x08, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x1a, 0x3b, 0x0a, 0x0d, 0x4d, 0x65, 0x74, - 0x61, 0x64, 0x61, 0x74, 0x61, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, - 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, - 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, - 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0x10, 0x0a, 0x0e, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, - 0x43, 0x68, 0x65, 0x63, 0x6b, 0x52, 0x65, 0x71, 0x22, 0x62, 0x0a, 0x0f, 0x48, 0x65, 0x61, 0x6c, - 0x74, 0x68, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, - 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x73, 0x74, 0x61, - 0x74, 0x75, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x02, - 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x12, 0x1d, 0x0a, - 0x0a, 0x70, 0x65, 0x65, 0x72, 0x5f, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, - 0x05, 0x52, 0x09, 0x70, 0x65, 0x65, 0x72, 0x43, 0x6f, 0x75, 0x6e, 0x74, 0x2a, 0x2f, 0x0a, 0x09, - 0x41, 0x6c, 0x67, 0x6f, 0x72, 0x69, 0x74, 0x68, 0x6d, 0x12, 0x10, 0x0a, 0x0c, 0x54, 0x4f, 0x4b, - 0x45, 0x4e, 0x5f, 0x42, 0x55, 0x43, 0x4b, 0x45, 0x54, 0x10, 0x00, 0x12, 0x10, 0x0a, 0x0c, 0x4c, - 0x45, 0x41, 0x4b, 0x59, 0x5f, 0x42, 0x55, 0x43, 0x4b, 0x45, 0x54, 0x10, 0x01, 0x2a, 0x8d, 0x01, - 0x0a, 0x08, 0x42, 0x65, 0x68, 0x61, 0x76, 0x69, 0x6f, 0x72, 0x12, 0x0c, 0x0a, 0x08, 0x42, 0x41, - 0x54, 0x43, 0x48, 0x49, 0x4e, 0x47, 0x10, 0x00, 0x12, 0x0f, 0x0a, 0x0b, 0x4e, 0x4f, 0x5f, 0x42, - 0x41, 0x54, 0x43, 0x48, 0x49, 0x4e, 0x47, 0x10, 0x01, 0x12, 0x0a, 0x0a, 0x06, 0x47, 0x4c, 0x4f, - 0x42, 0x41, 0x4c, 0x10, 0x02, 0x12, 0x19, 0x0a, 0x15, 0x44, 0x55, 0x52, 0x41, 0x54, 0x49, 0x4f, - 0x4e, 0x5f, 0x49, 0x53, 0x5f, 0x47, 0x52, 0x45, 0x47, 0x4f, 0x52, 0x49, 0x41, 0x4e, 0x10, 0x04, - 0x12, 0x13, 0x0a, 0x0f, 0x52, 0x45, 0x53, 0x45, 0x54, 0x5f, 0x52, 0x45, 0x4d, 0x41, 0x49, 0x4e, - 0x49, 0x4e, 0x47, 0x10, 0x08, 0x12, 0x10, 0x0a, 0x0c, 0x4d, 0x55, 0x4c, 0x54, 0x49, 0x5f, 0x52, - 0x45, 0x47, 0x49, 0x4f, 0x4e, 0x10, 0x10, 0x12, 0x14, 0x0a, 0x10, 0x44, 0x52, 0x41, 0x49, 0x4e, - 0x5f, 0x4f, 0x56, 0x45, 0x52, 0x5f, 0x4c, 0x49, 0x4d, 0x49, 0x54, 0x10, 0x20, 0x2a, 0x29, 0x0a, - 0x06, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x0f, 0x0a, 0x0b, 0x55, 0x4e, 0x44, 0x45, 0x52, - 0x5f, 0x4c, 0x49, 0x4d, 0x49, 0x54, 0x10, 0x00, 0x12, 0x0e, 0x0a, 0x0a, 0x4f, 0x56, 0x45, 0x52, - 0x5f, 0x4c, 0x49, 0x4d, 0x49, 0x54, 0x10, 0x01, 0x32, 0xdd, 0x01, 0x0a, 0x02, 0x56, 0x31, 0x12, - 0x70, 0x0a, 0x0d, 0x47, 0x65, 0x74, 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x73, - 0x12, 0x1f, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, - 0x2e, 0x47, 0x65, 0x74, 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x73, 0x52, 0x65, - 0x71, 0x1a, 0x20, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, - 0x72, 0x2e, 0x47, 0x65, 0x74, 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x73, 0x52, - 0x65, 0x73, 0x70, 0x22, 0x1c, 0x82, 0xd3, 0xe4, 0x93, 0x02, 0x16, 0x3a, 0x01, 0x2a, 0x22, 0x11, - 0x2f, 0x76, 0x31, 0x2f, 0x47, 0x65, 0x74, 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, - 0x73, 0x12, 0x65, 0x0a, 0x0b, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x43, 0x68, 0x65, 0x63, 0x6b, - 0x12, 0x1d, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, - 0x2e, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x52, 0x65, 0x71, 0x1a, - 0x1e, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, - 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x52, 0x65, 0x73, 0x70, 0x22, - 0x17, 0x82, 0xd3, 0xe4, 0x93, 0x02, 0x11, 0x12, 0x0f, 0x2f, 0x76, 0x31, 0x2f, 0x48, 0x65, 0x61, - 0x6c, 0x74, 0x68, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x42, 0x22, 0x5a, 0x1d, 0x67, 0x69, 0x74, 0x68, - 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x6d, 0x61, 0x69, 0x6c, 0x67, 0x75, 0x6e, 0x2f, 0x67, - 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x80, 0x01, 0x01, 0x62, 0x06, 0x70, 0x72, - 0x6f, 0x74, 0x6f, 0x33, + 0x61, 0x12, 0x22, 0x0a, 0x0a, 0x63, 0x72, 0x65, 0x61, 0x74, 0x65, 0x64, 0x5f, 0x61, 0x74, 0x18, + 0x0a, 0x20, 0x01, 0x28, 0x03, 0x48, 0x00, 0x52, 0x09, 0x63, 0x72, 0x65, 0x61, 0x74, 0x65, 0x64, + 0x41, 0x74, 0x88, 0x01, 0x01, 0x1a, 0x3b, 0x0a, 0x0d, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, + 0x61, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, + 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, + 0x38, 0x01, 0x42, 0x0d, 0x0a, 0x0b, 0x5f, 0x63, 0x72, 0x65, 0x61, 0x74, 0x65, 0x64, 0x5f, 0x61, + 0x74, 0x22, 0xac, 0x02, 0x0a, 0x0d, 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x52, + 0x65, 0x73, 0x70, 0x12, 0x2d, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x0e, 0x32, 0x15, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, + 0x74, 0x6f, 0x72, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, + 0x75, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x6c, 0x69, 0x6d, 0x69, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, + 0x03, 0x52, 0x05, 0x6c, 0x69, 0x6d, 0x69, 0x74, 0x12, 0x1c, 0x0a, 0x09, 0x72, 0x65, 0x6d, 0x61, + 0x69, 0x6e, 0x69, 0x6e, 0x67, 0x18, 0x03, 0x20, 0x01, 0x28, 0x03, 0x52, 0x09, 0x72, 0x65, 0x6d, + 0x61, 0x69, 0x6e, 0x69, 0x6e, 0x67, 0x12, 0x1d, 0x0a, 0x0a, 0x72, 0x65, 0x73, 0x65, 0x74, 0x5f, + 0x74, 0x69, 0x6d, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x03, 0x52, 0x09, 0x72, 0x65, 0x73, 0x65, + 0x74, 0x54, 0x69, 0x6d, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x18, 0x05, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x12, 0x46, 0x0a, 0x08, 0x6d, + 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, 0x06, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x2a, 0x2e, + 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x52, 0x61, + 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x52, 0x65, 0x73, 0x70, 0x2e, 0x4d, 0x65, 0x74, 0x61, + 0x64, 0x61, 0x74, 0x61, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x08, 0x6d, 0x65, 0x74, 0x61, 0x64, + 0x61, 0x74, 0x61, 0x1a, 0x3b, 0x0a, 0x0d, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x45, + 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, + 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, + 0x22, 0x10, 0x0a, 0x0e, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x52, + 0x65, 0x71, 0x22, 0x62, 0x0a, 0x0f, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x43, 0x68, 0x65, 0x63, + 0x6b, 0x52, 0x65, 0x73, 0x70, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, + 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x18, 0x0a, + 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, + 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x12, 0x1d, 0x0a, 0x0a, 0x70, 0x65, 0x65, 0x72, 0x5f, + 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, 0x52, 0x09, 0x70, 0x65, 0x65, + 0x72, 0x43, 0x6f, 0x75, 0x6e, 0x74, 0x2a, 0x2f, 0x0a, 0x09, 0x41, 0x6c, 0x67, 0x6f, 0x72, 0x69, + 0x74, 0x68, 0x6d, 0x12, 0x10, 0x0a, 0x0c, 0x54, 0x4f, 0x4b, 0x45, 0x4e, 0x5f, 0x42, 0x55, 0x43, + 0x4b, 0x45, 0x54, 0x10, 0x00, 0x12, 0x10, 0x0a, 0x0c, 0x4c, 0x45, 0x41, 0x4b, 0x59, 0x5f, 0x42, + 0x55, 0x43, 0x4b, 0x45, 0x54, 0x10, 0x01, 0x2a, 0x8d, 0x01, 0x0a, 0x08, 0x42, 0x65, 0x68, 0x61, + 0x76, 0x69, 0x6f, 0x72, 0x12, 0x0c, 0x0a, 0x08, 0x42, 0x41, 0x54, 0x43, 0x48, 0x49, 0x4e, 0x47, + 0x10, 0x00, 0x12, 0x0f, 0x0a, 0x0b, 0x4e, 0x4f, 0x5f, 0x42, 0x41, 0x54, 0x43, 0x48, 0x49, 0x4e, + 0x47, 0x10, 0x01, 0x12, 0x0a, 0x0a, 0x06, 0x47, 0x4c, 0x4f, 0x42, 0x41, 0x4c, 0x10, 0x02, 0x12, + 0x19, 0x0a, 0x15, 0x44, 0x55, 0x52, 0x41, 0x54, 0x49, 0x4f, 0x4e, 0x5f, 0x49, 0x53, 0x5f, 0x47, + 0x52, 0x45, 0x47, 0x4f, 0x52, 0x49, 0x41, 0x4e, 0x10, 0x04, 0x12, 0x13, 0x0a, 0x0f, 0x52, 0x45, + 0x53, 0x45, 0x54, 0x5f, 0x52, 0x45, 0x4d, 0x41, 0x49, 0x4e, 0x49, 0x4e, 0x47, 0x10, 0x08, 0x12, + 0x10, 0x0a, 0x0c, 0x4d, 0x55, 0x4c, 0x54, 0x49, 0x5f, 0x52, 0x45, 0x47, 0x49, 0x4f, 0x4e, 0x10, + 0x10, 0x12, 0x14, 0x0a, 0x10, 0x44, 0x52, 0x41, 0x49, 0x4e, 0x5f, 0x4f, 0x56, 0x45, 0x52, 0x5f, + 0x4c, 0x49, 0x4d, 0x49, 0x54, 0x10, 0x20, 0x2a, 0x29, 0x0a, 0x06, 0x53, 0x74, 0x61, 0x74, 0x75, + 0x73, 0x12, 0x0f, 0x0a, 0x0b, 0x55, 0x4e, 0x44, 0x45, 0x52, 0x5f, 0x4c, 0x49, 0x4d, 0x49, 0x54, + 0x10, 0x00, 0x12, 0x0e, 0x0a, 0x0a, 0x4f, 0x56, 0x45, 0x52, 0x5f, 0x4c, 0x49, 0x4d, 0x49, 0x54, + 0x10, 0x01, 0x32, 0xdd, 0x01, 0x0a, 0x02, 0x56, 0x31, 0x12, 0x70, 0x0a, 0x0d, 0x47, 0x65, 0x74, + 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x73, 0x12, 0x1f, 0x2e, 0x70, 0x62, 0x2e, + 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x47, 0x65, 0x74, 0x52, 0x61, + 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x73, 0x52, 0x65, 0x71, 0x1a, 0x20, 0x2e, 0x70, 0x62, + 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x47, 0x65, 0x74, 0x52, + 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x73, 0x52, 0x65, 0x73, 0x70, 0x22, 0x1c, 0x82, + 0xd3, 0xe4, 0x93, 0x02, 0x16, 0x3a, 0x01, 0x2a, 0x22, 0x11, 0x2f, 0x76, 0x31, 0x2f, 0x47, 0x65, + 0x74, 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x73, 0x12, 0x65, 0x0a, 0x0b, 0x48, + 0x65, 0x61, 0x6c, 0x74, 0x68, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x12, 0x1d, 0x2e, 0x70, 0x62, 0x2e, + 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x48, 0x65, 0x61, 0x6c, 0x74, + 0x68, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x52, 0x65, 0x71, 0x1a, 0x1e, 0x2e, 0x70, 0x62, 0x2e, 0x67, + 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, + 0x43, 0x68, 0x65, 0x63, 0x6b, 0x52, 0x65, 0x73, 0x70, 0x22, 0x17, 0x82, 0xd3, 0xe4, 0x93, 0x02, + 0x11, 0x12, 0x0f, 0x2f, 0x76, 0x31, 0x2f, 0x48, 0x65, 0x61, 0x6c, 0x74, 0x68, 0x43, 0x68, 0x65, + 0x63, 0x6b, 0x42, 0x22, 0x5a, 0x1d, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, + 0x2f, 0x6d, 0x61, 0x69, 0x6c, 0x67, 0x75, 0x6e, 0x2f, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, + 0x74, 0x6f, 0x72, 0x80, 0x01, 0x01, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( diff --git a/gubernator.proto b/gubernator.proto index 8b6611a7..52d5e65f 100644 --- a/gubernator.proto +++ b/gubernator.proto @@ -169,10 +169,17 @@ message RateLimitReq { // trace information to gubernator. map metadata = 9; - // The exact time of request in Epoch milliseconds. - // The is intended to be used for peer-to-peer requests to preserve - // timestamps. - optional int64 request_time = 10; + // The exact time this request was created in Epoch milliseconds. Due to + // time drift between systems, it may be advantageous for a client to set the + // exact time the request was created. It possible the system clock for the + // client has drifted from the system clock where gubernator daemon is + // running. + // + // The created time is used by gubernator to calculate the reset time for + // both token and leaky algorithms. If it is not set by the client, + // gubernator will set the created time when it receives the rate limit + // request. + optional int64 created_at = 10; } enum Status { diff --git a/peer_client_test.go b/peer_client_test.go index 926eb2da..5f0bc016 100644 --- a/peer_client_test.go +++ b/peer_client_test.go @@ -37,7 +37,7 @@ func TestPeerClientShutdown(t *testing.T) { } const threads = 10 - requestTime := epochMillis(clock.Now()) + createdAt := epochMillis(clock.Now()) cases := []test{ {"No batching", gubernator.Behavior_NO_BATCHING}, @@ -72,10 +72,10 @@ func TestPeerClientShutdown(t *testing.T) { wg.Go(func() error { ctx := context.Background() _, err := client.GetPeerRateLimit(ctx, &gubernator.RateLimitReq{ - Hits: 1, - Limit: 100, - Behavior: c.Behavior, - RequestTime: &requestTime, + Hits: 1, + Limit: 100, + Behavior: c.Behavior, + CreatedAt: &createdAt, }) if err != nil { diff --git a/peers.pb.go b/peers.pb.go index d4100832..e69e6fe2 100644 --- a/peers.pb.go +++ b/peers.pb.go @@ -193,9 +193,17 @@ type UpdatePeerGlobal struct { Algorithm Algorithm `protobuf:"varint,3,opt,name=algorithm,proto3,enum=pb.gubernator.Algorithm" json:"algorithm,omitempty"` // The duration of the rate limit in milliseconds Duration int64 `protobuf:"varint,4,opt,name=duration,proto3" json:"duration,omitempty"` - // Time of original GetRateLimits request so that ExpiresAt timestamps can be - // synchronized. - RequestTime int64 `protobuf:"varint,5,opt,name=request_time,json=requestTime,proto3" json:"request_time,omitempty"` + // The exact time the original request was created in Epoch milliseconds. + // Due to time drift between systems, it may be advantageous for a client to + // set the exact time the request was created. It possible the system clock + // for the client has drifted from the system clock where gubernator daemon + // is running. + // + // The created time is used by gubernator to calculate the reset time for + // both token and leaky algorithms. If it is not set by the client, + // gubernator will set the created time when it receives the rate limit + // request. + CreatedAt int64 `protobuf:"varint,5,opt,name=created_at,json=createdAt,proto3" json:"created_at,omitempty"` } func (x *UpdatePeerGlobal) Reset() { @@ -258,9 +266,9 @@ func (x *UpdatePeerGlobal) GetDuration() int64 { return 0 } -func (x *UpdatePeerGlobal) GetRequestTime() int64 { +func (x *UpdatePeerGlobal) GetCreatedAt() int64 { if x != nil { - return x.RequestTime + return x.CreatedAt } return 0 } @@ -324,7 +332,7 @@ var file_peers_proto_rawDesc = []byte{ 0x39, 0x0a, 0x07, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1f, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x50, 0x65, 0x65, 0x72, 0x47, 0x6c, 0x6f, 0x62, 0x61, - 0x6c, 0x52, 0x07, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x73, 0x22, 0xd1, 0x01, 0x0a, 0x10, 0x55, + 0x6c, 0x52, 0x07, 0x67, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x73, 0x22, 0xcd, 0x01, 0x0a, 0x10, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x50, 0x65, 0x65, 0x72, 0x47, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x34, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, @@ -335,27 +343,26 @@ var file_peers_proto_rawDesc = []byte{ 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x41, 0x6c, 0x67, 0x6f, 0x72, 0x69, 0x74, 0x68, 0x6d, 0x52, 0x09, 0x61, 0x6c, 0x67, 0x6f, 0x72, 0x69, 0x74, 0x68, 0x6d, 0x12, 0x1a, 0x0a, 0x08, 0x64, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x04, 0x20, 0x01, 0x28, - 0x03, 0x52, 0x08, 0x64, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x21, 0x0a, 0x0c, 0x72, - 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x18, 0x05, 0x20, 0x01, 0x28, - 0x03, 0x52, 0x0b, 0x72, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x54, 0x69, 0x6d, 0x65, 0x22, 0x17, - 0x0a, 0x15, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x50, 0x65, 0x65, 0x72, 0x47, 0x6c, 0x6f, 0x62, - 0x61, 0x6c, 0x73, 0x52, 0x65, 0x73, 0x70, 0x32, 0xcd, 0x01, 0x0a, 0x07, 0x50, 0x65, 0x65, 0x72, - 0x73, 0x56, 0x31, 0x12, 0x60, 0x0a, 0x11, 0x47, 0x65, 0x74, 0x50, 0x65, 0x65, 0x72, 0x52, 0x61, - 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x73, 0x12, 0x23, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, - 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x47, 0x65, 0x74, 0x50, 0x65, 0x65, 0x72, - 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x73, 0x52, 0x65, 0x71, 0x1a, 0x24, 0x2e, - 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x47, 0x65, - 0x74, 0x50, 0x65, 0x65, 0x72, 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x73, 0x52, - 0x65, 0x73, 0x70, 0x22, 0x00, 0x12, 0x60, 0x0a, 0x11, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x50, - 0x65, 0x65, 0x72, 0x47, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x73, 0x12, 0x23, 0x2e, 0x70, 0x62, 0x2e, - 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x55, 0x70, 0x64, 0x61, 0x74, - 0x65, 0x50, 0x65, 0x65, 0x72, 0x47, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x73, 0x52, 0x65, 0x71, 0x1a, - 0x24, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, - 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x50, 0x65, 0x65, 0x72, 0x47, 0x6c, 0x6f, 0x62, 0x61, 0x6c, - 0x73, 0x52, 0x65, 0x73, 0x70, 0x22, 0x00, 0x42, 0x22, 0x5a, 0x1d, 0x67, 0x69, 0x74, 0x68, 0x75, - 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x6d, 0x61, 0x69, 0x6c, 0x67, 0x75, 0x6e, 0x2f, 0x67, 0x75, - 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x80, 0x01, 0x01, 0x62, 0x06, 0x70, 0x72, 0x6f, - 0x74, 0x6f, 0x33, + 0x03, 0x52, 0x08, 0x64, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x1d, 0x0a, 0x0a, 0x63, + 0x72, 0x65, 0x61, 0x74, 0x65, 0x64, 0x5f, 0x61, 0x74, 0x18, 0x05, 0x20, 0x01, 0x28, 0x03, 0x52, + 0x09, 0x63, 0x72, 0x65, 0x61, 0x74, 0x65, 0x64, 0x41, 0x74, 0x22, 0x17, 0x0a, 0x15, 0x55, 0x70, + 0x64, 0x61, 0x74, 0x65, 0x50, 0x65, 0x65, 0x72, 0x47, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x73, 0x52, + 0x65, 0x73, 0x70, 0x32, 0xcd, 0x01, 0x0a, 0x07, 0x50, 0x65, 0x65, 0x72, 0x73, 0x56, 0x31, 0x12, + 0x60, 0x0a, 0x11, 0x47, 0x65, 0x74, 0x50, 0x65, 0x65, 0x72, 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, + 0x6d, 0x69, 0x74, 0x73, 0x12, 0x23, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, + 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x47, 0x65, 0x74, 0x50, 0x65, 0x65, 0x72, 0x52, 0x61, 0x74, 0x65, + 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x73, 0x52, 0x65, 0x71, 0x1a, 0x24, 0x2e, 0x70, 0x62, 0x2e, 0x67, + 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x47, 0x65, 0x74, 0x50, 0x65, 0x65, + 0x72, 0x52, 0x61, 0x74, 0x65, 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x73, 0x52, 0x65, 0x73, 0x70, 0x22, + 0x00, 0x12, 0x60, 0x0a, 0x11, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x50, 0x65, 0x65, 0x72, 0x47, + 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x73, 0x12, 0x23, 0x2e, 0x70, 0x62, 0x2e, 0x67, 0x75, 0x62, 0x65, + 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x50, 0x65, 0x65, + 0x72, 0x47, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x73, 0x52, 0x65, 0x71, 0x1a, 0x24, 0x2e, 0x70, 0x62, + 0x2e, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2e, 0x55, 0x70, 0x64, 0x61, + 0x74, 0x65, 0x50, 0x65, 0x65, 0x72, 0x47, 0x6c, 0x6f, 0x62, 0x61, 0x6c, 0x73, 0x52, 0x65, 0x73, + 0x70, 0x22, 0x00, 0x42, 0x22, 0x5a, 0x1d, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, + 0x6d, 0x2f, 0x6d, 0x61, 0x69, 0x6c, 0x67, 0x75, 0x6e, 0x2f, 0x67, 0x75, 0x62, 0x65, 0x72, 0x6e, + 0x61, 0x74, 0x6f, 0x72, 0x80, 0x01, 0x01, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( diff --git a/peers.proto b/peers.proto index f97f4ead..0dad87d4 100644 --- a/peers.proto +++ b/peers.proto @@ -58,8 +58,16 @@ message UpdatePeerGlobal { Algorithm algorithm = 3; // The duration of the rate limit in milliseconds int64 duration = 4; - // Time of original GetRateLimits request so that ExpiresAt timestamps can be - // synchronized. - int64 request_time = 5; + // The exact time the original request was created in Epoch milliseconds. + // Due to time drift between systems, it may be advantageous for a client to + // set the exact time the request was created. It possible the system clock + // for the client has drifted from the system clock where gubernator daemon + // is running. + // + // The created time is used by gubernator to calculate the reset time for + // both token and leaky algorithms. If it is not set by the client, + // gubernator will set the created time when it receives the rate limit + // request. + int64 created_at = 5; } message UpdatePeerGlobalsResp {} diff --git a/python/gubernator/gubernator_pb2.py b/python/gubernator/gubernator_pb2.py index 0bd92d08..f1369bd5 100644 --- a/python/gubernator/gubernator_pb2.py +++ b/python/gubernator/gubernator_pb2.py @@ -15,7 +15,7 @@ from google.api import annotations_pb2 as google_dot_api_dot_annotations__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x10gubernator.proto\x12\rpb.gubernator\x1a\x1cgoogle/api/annotations.proto\"K\n\x10GetRateLimitsReq\x12\x37\n\x08requests\x18\x01 \x03(\x0b\x32\x1b.pb.gubernator.RateLimitReqR\x08requests\"O\n\x11GetRateLimitsResp\x12:\n\tresponses\x18\x01 \x03(\x0b\x32\x1c.pb.gubernator.RateLimitRespR\tresponses\"\xc7\x03\n\x0cRateLimitReq\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x1d\n\nunique_key\x18\x02 \x01(\tR\tuniqueKey\x12\x12\n\x04hits\x18\x03 \x01(\x03R\x04hits\x12\x14\n\x05limit\x18\x04 \x01(\x03R\x05limit\x12\x1a\n\x08\x64uration\x18\x05 \x01(\x03R\x08\x64uration\x12\x36\n\talgorithm\x18\x06 \x01(\x0e\x32\x18.pb.gubernator.AlgorithmR\talgorithm\x12\x33\n\x08\x62\x65havior\x18\x07 \x01(\x0e\x32\x17.pb.gubernator.BehaviorR\x08\x62\x65havior\x12\x14\n\x05\x62urst\x18\x08 \x01(\x03R\x05\x62urst\x12\x45\n\x08metadata\x18\t \x03(\x0b\x32).pb.gubernator.RateLimitReq.MetadataEntryR\x08metadata\x12&\n\x0crequest_time\x18\n \x01(\x03H\x00R\x0brequestTime\x88\x01\x01\x1a;\n\rMetadataEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\x0f\n\r_request_time\"\xac\x02\n\rRateLimitResp\x12-\n\x06status\x18\x01 \x01(\x0e\x32\x15.pb.gubernator.StatusR\x06status\x12\x14\n\x05limit\x18\x02 \x01(\x03R\x05limit\x12\x1c\n\tremaining\x18\x03 \x01(\x03R\tremaining\x12\x1d\n\nreset_time\x18\x04 \x01(\x03R\tresetTime\x12\x14\n\x05\x65rror\x18\x05 \x01(\tR\x05\x65rror\x12\x46\n\x08metadata\x18\x06 \x03(\x0b\x32*.pb.gubernator.RateLimitResp.MetadataEntryR\x08metadata\x1a;\n\rMetadataEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\"\x10\n\x0eHealthCheckReq\"b\n\x0fHealthCheckResp\x12\x16\n\x06status\x18\x01 \x01(\tR\x06status\x12\x18\n\x07message\x18\x02 \x01(\tR\x07message\x12\x1d\n\npeer_count\x18\x03 \x01(\x05R\tpeerCount*/\n\tAlgorithm\x12\x10\n\x0cTOKEN_BUCKET\x10\x00\x12\x10\n\x0cLEAKY_BUCKET\x10\x01*\x8d\x01\n\x08\x42\x65havior\x12\x0c\n\x08\x42\x41TCHING\x10\x00\x12\x0f\n\x0bNO_BATCHING\x10\x01\x12\n\n\x06GLOBAL\x10\x02\x12\x19\n\x15\x44URATION_IS_GREGORIAN\x10\x04\x12\x13\n\x0fRESET_REMAINING\x10\x08\x12\x10\n\x0cMULTI_REGION\x10\x10\x12\x14\n\x10\x44RAIN_OVER_LIMIT\x10 *)\n\x06Status\x12\x0f\n\x0bUNDER_LIMIT\x10\x00\x12\x0e\n\nOVER_LIMIT\x10\x01\x32\xdd\x01\n\x02V1\x12p\n\rGetRateLimits\x12\x1f.pb.gubernator.GetRateLimitsReq\x1a .pb.gubernator.GetRateLimitsResp\"\x1c\x82\xd3\xe4\x93\x02\x16\"\x11/v1/GetRateLimits:\x01*\x12\x65\n\x0bHealthCheck\x12\x1d.pb.gubernator.HealthCheckReq\x1a\x1e.pb.gubernator.HealthCheckResp\"\x17\x82\xd3\xe4\x93\x02\x11\x12\x0f/v1/HealthCheckB\"Z\x1dgithub.com/mailgun/gubernator\x80\x01\x01\x62\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x10gubernator.proto\x12\rpb.gubernator\x1a\x1cgoogle/api/annotations.proto\"K\n\x10GetRateLimitsReq\x12\x37\n\x08requests\x18\x01 \x03(\x0b\x32\x1b.pb.gubernator.RateLimitReqR\x08requests\"O\n\x11GetRateLimitsResp\x12:\n\tresponses\x18\x01 \x03(\x0b\x32\x1c.pb.gubernator.RateLimitRespR\tresponses\"\xc1\x03\n\x0cRateLimitReq\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x1d\n\nunique_key\x18\x02 \x01(\tR\tuniqueKey\x12\x12\n\x04hits\x18\x03 \x01(\x03R\x04hits\x12\x14\n\x05limit\x18\x04 \x01(\x03R\x05limit\x12\x1a\n\x08\x64uration\x18\x05 \x01(\x03R\x08\x64uration\x12\x36\n\talgorithm\x18\x06 \x01(\x0e\x32\x18.pb.gubernator.AlgorithmR\talgorithm\x12\x33\n\x08\x62\x65havior\x18\x07 \x01(\x0e\x32\x17.pb.gubernator.BehaviorR\x08\x62\x65havior\x12\x14\n\x05\x62urst\x18\x08 \x01(\x03R\x05\x62urst\x12\x45\n\x08metadata\x18\t \x03(\x0b\x32).pb.gubernator.RateLimitReq.MetadataEntryR\x08metadata\x12\"\n\ncreated_at\x18\n \x01(\x03H\x00R\tcreatedAt\x88\x01\x01\x1a;\n\rMetadataEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\r\n\x0b_created_at\"\xac\x02\n\rRateLimitResp\x12-\n\x06status\x18\x01 \x01(\x0e\x32\x15.pb.gubernator.StatusR\x06status\x12\x14\n\x05limit\x18\x02 \x01(\x03R\x05limit\x12\x1c\n\tremaining\x18\x03 \x01(\x03R\tremaining\x12\x1d\n\nreset_time\x18\x04 \x01(\x03R\tresetTime\x12\x14\n\x05\x65rror\x18\x05 \x01(\tR\x05\x65rror\x12\x46\n\x08metadata\x18\x06 \x03(\x0b\x32*.pb.gubernator.RateLimitResp.MetadataEntryR\x08metadata\x1a;\n\rMetadataEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\"\x10\n\x0eHealthCheckReq\"b\n\x0fHealthCheckResp\x12\x16\n\x06status\x18\x01 \x01(\tR\x06status\x12\x18\n\x07message\x18\x02 \x01(\tR\x07message\x12\x1d\n\npeer_count\x18\x03 \x01(\x05R\tpeerCount*/\n\tAlgorithm\x12\x10\n\x0cTOKEN_BUCKET\x10\x00\x12\x10\n\x0cLEAKY_BUCKET\x10\x01*\x8d\x01\n\x08\x42\x65havior\x12\x0c\n\x08\x42\x41TCHING\x10\x00\x12\x0f\n\x0bNO_BATCHING\x10\x01\x12\n\n\x06GLOBAL\x10\x02\x12\x19\n\x15\x44URATION_IS_GREGORIAN\x10\x04\x12\x13\n\x0fRESET_REMAINING\x10\x08\x12\x10\n\x0cMULTI_REGION\x10\x10\x12\x14\n\x10\x44RAIN_OVER_LIMIT\x10 *)\n\x06Status\x12\x0f\n\x0bUNDER_LIMIT\x10\x00\x12\x0e\n\nOVER_LIMIT\x10\x01\x32\xdd\x01\n\x02V1\x12p\n\rGetRateLimits\x12\x1f.pb.gubernator.GetRateLimitsReq\x1a .pb.gubernator.GetRateLimitsResp\"\x1c\x82\xd3\xe4\x93\x02\x16\"\x11/v1/GetRateLimits:\x01*\x12\x65\n\x0bHealthCheck\x12\x1d.pb.gubernator.HealthCheckReq\x1a\x1e.pb.gubernator.HealthCheckResp\"\x17\x82\xd3\xe4\x93\x02\x11\x12\x0f/v1/HealthCheckB\"Z\x1dgithub.com/mailgun/gubernator\x80\x01\x01\x62\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -31,28 +31,28 @@ _globals['_V1'].methods_by_name['GetRateLimits']._serialized_options = b'\202\323\344\223\002\026\"\021/v1/GetRateLimits:\001*' _globals['_V1'].methods_by_name['HealthCheck']._options = None _globals['_V1'].methods_by_name['HealthCheck']._serialized_options = b'\202\323\344\223\002\021\022\017/v1/HealthCheck' - _globals['_ALGORITHM']._serialized_start=1102 - _globals['_ALGORITHM']._serialized_end=1149 - _globals['_BEHAVIOR']._serialized_start=1152 - _globals['_BEHAVIOR']._serialized_end=1293 - _globals['_STATUS']._serialized_start=1295 - _globals['_STATUS']._serialized_end=1336 + _globals['_ALGORITHM']._serialized_start=1096 + _globals['_ALGORITHM']._serialized_end=1143 + _globals['_BEHAVIOR']._serialized_start=1146 + _globals['_BEHAVIOR']._serialized_end=1287 + _globals['_STATUS']._serialized_start=1289 + _globals['_STATUS']._serialized_end=1330 _globals['_GETRATELIMITSREQ']._serialized_start=65 _globals['_GETRATELIMITSREQ']._serialized_end=140 _globals['_GETRATELIMITSRESP']._serialized_start=142 _globals['_GETRATELIMITSRESP']._serialized_end=221 _globals['_RATELIMITREQ']._serialized_start=224 - _globals['_RATELIMITREQ']._serialized_end=679 - _globals['_RATELIMITREQ_METADATAENTRY']._serialized_start=603 - _globals['_RATELIMITREQ_METADATAENTRY']._serialized_end=662 - _globals['_RATELIMITRESP']._serialized_start=682 - _globals['_RATELIMITRESP']._serialized_end=982 - _globals['_RATELIMITRESP_METADATAENTRY']._serialized_start=603 - _globals['_RATELIMITRESP_METADATAENTRY']._serialized_end=662 - _globals['_HEALTHCHECKREQ']._serialized_start=984 - _globals['_HEALTHCHECKREQ']._serialized_end=1000 - _globals['_HEALTHCHECKRESP']._serialized_start=1002 - _globals['_HEALTHCHECKRESP']._serialized_end=1100 - _globals['_V1']._serialized_start=1339 - _globals['_V1']._serialized_end=1560 + _globals['_RATELIMITREQ']._serialized_end=673 + _globals['_RATELIMITREQ_METADATAENTRY']._serialized_start=599 + _globals['_RATELIMITREQ_METADATAENTRY']._serialized_end=658 + _globals['_RATELIMITRESP']._serialized_start=676 + _globals['_RATELIMITRESP']._serialized_end=976 + _globals['_RATELIMITRESP_METADATAENTRY']._serialized_start=599 + _globals['_RATELIMITRESP_METADATAENTRY']._serialized_end=658 + _globals['_HEALTHCHECKREQ']._serialized_start=978 + _globals['_HEALTHCHECKREQ']._serialized_end=994 + _globals['_HEALTHCHECKRESP']._serialized_start=996 + _globals['_HEALTHCHECKRESP']._serialized_end=1094 + _globals['_V1']._serialized_start=1333 + _globals['_V1']._serialized_end=1554 # @@protoc_insertion_point(module_scope) diff --git a/python/gubernator/peers_pb2.py b/python/gubernator/peers_pb2.py index 9619dda8..97a519d4 100644 --- a/python/gubernator/peers_pb2.py +++ b/python/gubernator/peers_pb2.py @@ -15,7 +15,7 @@ import gubernator_pb2 as gubernator__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0bpeers.proto\x12\rpb.gubernator\x1a\x10gubernator.proto\"O\n\x14GetPeerRateLimitsReq\x12\x37\n\x08requests\x18\x01 \x03(\x0b\x32\x1b.pb.gubernator.RateLimitReqR\x08requests\"V\n\x15GetPeerRateLimitsResp\x12=\n\x0brate_limits\x18\x01 \x03(\x0b\x32\x1c.pb.gubernator.RateLimitRespR\nrateLimits\"Q\n\x14UpdatePeerGlobalsReq\x12\x39\n\x07globals\x18\x01 \x03(\x0b\x32\x1f.pb.gubernator.UpdatePeerGlobalR\x07globals\"\xd1\x01\n\x10UpdatePeerGlobal\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x34\n\x06status\x18\x02 \x01(\x0b\x32\x1c.pb.gubernator.RateLimitRespR\x06status\x12\x36\n\talgorithm\x18\x03 \x01(\x0e\x32\x18.pb.gubernator.AlgorithmR\talgorithm\x12\x1a\n\x08\x64uration\x18\x04 \x01(\x03R\x08\x64uration\x12!\n\x0crequest_time\x18\x05 \x01(\x03R\x0brequestTime\"\x17\n\x15UpdatePeerGlobalsResp2\xcd\x01\n\x07PeersV1\x12`\n\x11GetPeerRateLimits\x12#.pb.gubernator.GetPeerRateLimitsReq\x1a$.pb.gubernator.GetPeerRateLimitsResp\"\x00\x12`\n\x11UpdatePeerGlobals\x12#.pb.gubernator.UpdatePeerGlobalsReq\x1a$.pb.gubernator.UpdatePeerGlobalsResp\"\x00\x42\"Z\x1dgithub.com/mailgun/gubernator\x80\x01\x01\x62\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0bpeers.proto\x12\rpb.gubernator\x1a\x10gubernator.proto\"O\n\x14GetPeerRateLimitsReq\x12\x37\n\x08requests\x18\x01 \x03(\x0b\x32\x1b.pb.gubernator.RateLimitReqR\x08requests\"V\n\x15GetPeerRateLimitsResp\x12=\n\x0brate_limits\x18\x01 \x03(\x0b\x32\x1c.pb.gubernator.RateLimitRespR\nrateLimits\"Q\n\x14UpdatePeerGlobalsReq\x12\x39\n\x07globals\x18\x01 \x03(\x0b\x32\x1f.pb.gubernator.UpdatePeerGlobalR\x07globals\"\xcd\x01\n\x10UpdatePeerGlobal\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x34\n\x06status\x18\x02 \x01(\x0b\x32\x1c.pb.gubernator.RateLimitRespR\x06status\x12\x36\n\talgorithm\x18\x03 \x01(\x0e\x32\x18.pb.gubernator.AlgorithmR\talgorithm\x12\x1a\n\x08\x64uration\x18\x04 \x01(\x03R\x08\x64uration\x12\x1d\n\ncreated_at\x18\x05 \x01(\x03R\tcreatedAt\"\x17\n\x15UpdatePeerGlobalsResp2\xcd\x01\n\x07PeersV1\x12`\n\x11GetPeerRateLimits\x12#.pb.gubernator.GetPeerRateLimitsReq\x1a$.pb.gubernator.GetPeerRateLimitsResp\"\x00\x12`\n\x11UpdatePeerGlobals\x12#.pb.gubernator.UpdatePeerGlobalsReq\x1a$.pb.gubernator.UpdatePeerGlobalsResp\"\x00\x42\"Z\x1dgithub.com/mailgun/gubernator\x80\x01\x01\x62\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -30,9 +30,9 @@ _globals['_UPDATEPEERGLOBALSREQ']._serialized_start=217 _globals['_UPDATEPEERGLOBALSREQ']._serialized_end=298 _globals['_UPDATEPEERGLOBAL']._serialized_start=301 - _globals['_UPDATEPEERGLOBAL']._serialized_end=510 - _globals['_UPDATEPEERGLOBALSRESP']._serialized_start=512 - _globals['_UPDATEPEERGLOBALSRESP']._serialized_end=535 - _globals['_PEERSV1']._serialized_start=538 - _globals['_PEERSV1']._serialized_end=743 + _globals['_UPDATEPEERGLOBAL']._serialized_end=506 + _globals['_UPDATEPEERGLOBALSRESP']._serialized_start=508 + _globals['_UPDATEPEERGLOBALSRESP']._serialized_end=531 + _globals['_PEERSV1']._serialized_start=534 + _globals['_PEERSV1']._serialized_end=739 # @@protoc_insertion_point(module_scope) From 0b2adf6622b918648b243d4bd16ec936067bd66d Mon Sep 17 00:00:00 2001 From: Shawn Poulson Date: Wed, 13 Mar 2024 10:57:36 -0400 Subject: [PATCH 23/23] Revert optimization that won't work. --- algorithms.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/algorithms.go b/algorithms.go index 8f4bea6a..c9231610 100644 --- a/algorithms.go +++ b/algorithms.go @@ -146,18 +146,18 @@ func tokenBucket(ctx context.Context, s Store, c Cache, r *RateLimitReq, reqStat rl.ResetTime = expire } - // Client is only interested in retrieving the current status or - // updating the rate limit config. - if r.Hits == 0 { - return rl, nil - } - if s != nil && reqState.IsOwner { defer func() { s.OnChange(ctx, r, item) }() } + // Client is only interested in retrieving the current status or + // updating the rate limit config. + if r.Hits == 0 { + return rl, nil + } + // If we are already at the limit. if rl.Remaining == 0 && r.Hits > 0 { trace.SpanFromContext(ctx).AddEvent("Already over the limit")