Skip to content

Commit

Permalink
🧹 Retry login command when concurrent IAM updates are detected
Browse files Browse the repository at this point in the history
If the server returns aborted for RegisterAgent, that means it aborted
the change because of concurrent writes to the IAM policy, likely
meaning concurrent registrations. Clients are safe to backoff and
retry in this case.
  • Loading branch information
jaym committed Jan 29, 2024
1 parent a62e09b commit d3345c5
Showing 1 changed file with 31 additions and 2 deletions.
33 changes: 31 additions & 2 deletions apps/cnquery/cmd/login.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ package cmd

import (
"context"
"math/rand"
"strings"
"time"

Expand All @@ -19,7 +20,9 @@ import (
"go.mondoo.com/cnquery/v10/providers-sdk/v1/sysinfo"
"go.mondoo.com/cnquery/v10/providers-sdk/v1/upstream"
"go.mondoo.com/ranger-rpc"
"go.mondoo.com/ranger-rpc/codes"
"go.mondoo.com/ranger-rpc/plugins/authentication/statictoken"
"go.mondoo.com/ranger-rpc/status"
)

func init() {
Expand Down Expand Up @@ -116,7 +119,7 @@ func register(token string, annotations map[string]string) error {
name = sysInfo.Hostname
}

confirmation, err := client.RegisterAgent(context.Background(), &upstream.AgentRegistrationRequest{
confirmation, err := registerAgent(context.Background(), client, &upstream.AgentRegistrationRequest{
Token: token,
Name: name,
AgentInfo: &upstream.AgentInfo{
Expand Down Expand Up @@ -193,7 +196,7 @@ func register(token string, annotations map[string]string) error {
name = sysInfo.Hostname
}

confirmation, err := client.RegisterAgent(context.Background(), &upstream.AgentRegistrationRequest{
confirmation, err := registerAgent(context.Background(), client, &upstream.AgentRegistrationRequest{
Name: name,
AgentInfo: &upstream.AgentInfo{
Mrn: opts.AgentMrn,
Expand Down Expand Up @@ -247,3 +250,29 @@ func register(token string, annotations map[string]string) error {
log.Info().Msgf("client %s has logged in successfully", viper.Get("agent_mrn"))
return nil
}

func registerAgent(ctx context.Context, client *upstream.AgentManagerClient, req *upstream.AgentRegistrationRequest) (*upstream.AgentRegistrationConfirmation, error) {
const maxRetries = 3
try := 0
for {
confirmation, err := client.RegisterAgent(ctx, req)
if err != nil {
if status.Code(err) == codes.Aborted {
jitter := time.Duration(rand.Intn(5000)) * time.Millisecond
sleepTime := 5*(1<<try)*time.Second + jitter

try++
if try > maxRetries {
return nil, errors.Wrap(err, "failed to log in client due to concurrent IAM changes")
}

log.Warn().Err(err).Msgf("failed to log in client due to concurrent IAM changes, retrying (%d/%d) in %dms", try, maxRetries, sleepTime.Milliseconds())
time.Sleep(sleepTime)
} else {
return nil, errors.Wrap(err, "failed to log in client")
}
} else {
return confirmation, nil
}
}
}

0 comments on commit d3345c5

Please sign in to comment.