Skip to content

Commit

Permalink
Assume instance exists within eventual-consistency grace period
Browse files Browse the repository at this point in the history
  • Loading branch information
cartermckinnon authored and cmckn committed Nov 15, 2024
1 parent a59953a commit d936e99
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 14 deletions.
25 changes: 13 additions & 12 deletions pkg/providers/v1/aws.go
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,8 @@ type Cloud struct {

nodeInformer informercorev1.NodeInformer
// Extract the function out to make it easier to test
nodeInformerHasSynced cache.InformerSynced
nodeInformerHasSynced cache.InformerSynced
nodeEventualConsistencyGracePeriod time.Duration

eventBroadcaster record.EventBroadcaster
eventRecorder record.EventRecorder
Expand Down Expand Up @@ -600,13 +601,14 @@ func newAWSCloud2(cfg config.CloudConfig, awsServices Services, provider config.
}

awsCloud := &Cloud{
ec2: ec2,
elb: elb,
elbv2: elbv2,
metadata: metadata,
kms: kms,
cfg: &cfg,
region: regionName,
ec2: ec2,
elb: elb,
elbv2: elbv2,
metadata: metadata,
kms: kms,
cfg: &cfg,
region: regionName,
nodeEventualConsistencyGracePeriod: cfg.Global.NodeEventualConsistencyGracePeriod,
}
awsCloud.instanceCache.cloud = awsCloud
awsCloud.zoneCache.cloud = awsCloud
Expand Down Expand Up @@ -887,10 +889,9 @@ func (c *Cloud) InstanceExistsByProviderID(ctx context.Context, providerID strin

instances, err := c.ec2.DescribeInstances(request)
if err != nil {
// if err is InstanceNotFound, return false with no error
if IsAWSErrorInstanceNotFound(err) {
return false, nil
}
// we may receive InvalidInstanceId.NotFound for some time after launch
// due to eventual-consistency. return this error so the caller can decide
// how to handle this
return false, err
}
if len(instances) == 0 {
Expand Down
9 changes: 8 additions & 1 deletion pkg/providers/v1/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@ package config

import (
"fmt"
"github.com/aws/aws-sdk-go/aws/request"
"strings"
"time"

"github.com/aws/aws-sdk-go/aws/request"

"github.com/aws/aws-sdk-go/aws/endpoints"

Expand Down Expand Up @@ -62,6 +64,11 @@ type CloudConfig struct {

// NodeIPFamilies determines which IP addresses are added to node objects and their ordering.
NodeIPFamilies []string

// NodeEventualConsistencyGracePeriod is used to account for propogation delays in the EC2 API.
// An instance may not appear in `ec2:DescribeInstances` output for a period of time after launch.
// The cloud-node-lifecycle-controller must not delete the Node prematurely in this case.
NodeEventualConsistencyGracePeriod time.Duration
}
// [ServiceOverride "1"]
// Service = s3
Expand Down
20 changes: 19 additions & 1 deletion pkg/providers/v1/instances_v2.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@ package aws

import (
"context"
"fmt"
"strconv"
"time"

v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
Expand Down Expand Up @@ -50,7 +52,23 @@ func (c *Cloud) InstanceExists(ctx context.Context, node *v1.Node) (bool, error)
return false, err
}

return c.InstanceExistsByProviderID(ctx, providerID)
exists, err := c.InstanceExistsByProviderID(ctx, providerID)
if err != nil {
if IsAWSErrorInstanceNotFound(err) {
if time.Since(node.CreationTimestamp.Time) < c.nodeEventualConsistencyGracePeriod {
// recently-launched EC2 instances may not appear in `ec2:DescribeInstances`
// we return an error if we're within the eventual-consistency grace period
// e.g. to cause the cloud-node-lifecycle-controller to ignore this node
return false, fmt.Errorf("node is within eventual-consistency grace period (%v): %v", c.nodeEventualConsistencyGracePeriod, err)
}
// if the grace period has elapsed, assume the instance was not found because it was terminated a while ago.
// instances remain in the API with "terminated" status for "a short while":
// https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/TroubleshootingInstancesShuttingDown.html#terminated-instance-still-displaying
return false, nil
}

}
return exists, nil
}

// InstanceShutdown returns true if the instance is shutdown according to the cloud provider.
Expand Down

0 comments on commit d936e99

Please sign in to comment.