Skip to content

Commit

Permalink
Assume instance exists within eventual-consistency grace period
Browse files Browse the repository at this point in the history
  • Loading branch information
cartermckinnon authored and cmckn committed Nov 15, 2024
1 parent a59953a commit 8282b31
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 15 deletions.
41 changes: 28 additions & 13 deletions pkg/providers/v1/aws.go
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,8 @@ type Cloud struct {

nodeInformer informercorev1.NodeInformer
// Extract the function out to make it easier to test
nodeInformerHasSynced cache.InformerSynced
nodeInformerHasSynced cache.InformerSynced
nodeEventualConsistencyGracePeriod time.Duration

eventBroadcaster record.EventBroadcaster
eventRecorder record.EventRecorder
Expand Down Expand Up @@ -600,13 +601,14 @@ func newAWSCloud2(cfg config.CloudConfig, awsServices Services, provider config.
}

awsCloud := &Cloud{
ec2: ec2,
elb: elb,
elbv2: elbv2,
metadata: metadata,
kms: kms,
cfg: &cfg,
region: regionName,
ec2: ec2,
elb: elb,
elbv2: elbv2,
metadata: metadata,
kms: kms,
cfg: &cfg,
region: regionName,
nodeEventualConsistencyGracePeriod: cfg.Global.NodeEventualConsistencyGracePeriod,
}
awsCloud.instanceCache.cloud = awsCloud
awsCloud.zoneCache.cloud = awsCloud
Expand Down Expand Up @@ -869,9 +871,26 @@ func (c *Cloud) NodeAddressesByProviderID(ctx context.Context, providerID string
return addresses, nil
}

// InstanceExistsByProviderID returns true if the instance with the given provider id still exists.
// InstanceExistsByProviderID implements Instances.InstanceExistsByProviderID (v1)
// returns true if the instance with the given provider id still exists.
// If false is returned with no error, the instance will be immediately deleted by the cloud controller manager.
func (c *Cloud) InstanceExistsByProviderID(ctx context.Context, providerID string) (bool, error) {
exists, err := c.instanceExistsByProviderID(ctx, providerID)
if err != nil {
if IsAWSErrorInstanceNotFound(err) {
// we have to assume this means the instance was terminated a while ago
return false, nil
}
return false, err
}
return exists, nil
}

// instanceExistsByProviderID returns true if the instance with the given provider id still exists.
// If error InvalidInstanceId.NotFound is returned, the caller should decide how to handle this. It could mean:
// a. the instance was launched recently and isn't found in the API due to eventual-consistency
// b. the instance was terminated a while ago, and no longer appears in the API with "terminated" status
func (c *Cloud) instanceExistsByProviderID(_ context.Context, providerID string) (bool, error) {
instanceID, err := KubernetesInstanceID(providerID).MapToAWSInstanceID()
if err != nil {
return false, err
Expand All @@ -887,10 +906,6 @@ func (c *Cloud) InstanceExistsByProviderID(ctx context.Context, providerID strin

instances, err := c.ec2.DescribeInstances(request)
if err != nil {
// if err is InstanceNotFound, return false with no error
if IsAWSErrorInstanceNotFound(err) {
return false, nil
}
return false, err
}
if len(instances) == 0 {
Expand Down
9 changes: 8 additions & 1 deletion pkg/providers/v1/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@ package config

import (
"fmt"
"github.com/aws/aws-sdk-go/aws/request"
"strings"
"time"

"github.com/aws/aws-sdk-go/aws/request"

"github.com/aws/aws-sdk-go/aws/endpoints"

Expand Down Expand Up @@ -62,6 +64,11 @@ type CloudConfig struct {

// NodeIPFamilies determines which IP addresses are added to node objects and their ordering.
NodeIPFamilies []string

// NodeEventualConsistencyGracePeriod is used to account for propogation delays in the EC2 API.
// An instance may not appear in `ec2:DescribeInstances` output for a period of time after launch.
// The cloud-node-lifecycle-controller must not delete the Node prematurely in this case.
NodeEventualConsistencyGracePeriod time.Duration
}
// [ServiceOverride "1"]
// Service = s3
Expand Down
17 changes: 16 additions & 1 deletion pkg/providers/v1/instances_v2.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@ package aws

import (
"context"
"fmt"
"strconv"
"time"

v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
Expand Down Expand Up @@ -50,7 +52,20 @@ func (c *Cloud) InstanceExists(ctx context.Context, node *v1.Node) (bool, error)
return false, err
}

return c.InstanceExistsByProviderID(ctx, providerID)
exists, err := c.instanceExistsByProviderID(ctx, providerID)
if err != nil {
if IsAWSErrorInstanceNotFound(err) {
if time.Since(node.CreationTimestamp.Time) < c.nodeEventualConsistencyGracePeriod {
// recently-launched EC2 instances may not appear in `ec2:DescribeInstances`
// we return an error if we're within the eventual-consistency grace period
// e.g. to cause the cloud-node-lifecycle-controller to ignore this node
return false, fmt.Errorf("node is within eventual-consistency grace period (%v): %v", c.nodeEventualConsistencyGracePeriod, err)
}
return false, nil
}

}
return exists, nil
}

// InstanceShutdown returns true if the instance is shutdown according to the cloud provider.
Expand Down

0 comments on commit 8282b31

Please sign in to comment.