Skip to content

Commit

Permalink
Enhance subnet selection
Browse files Browse the repository at this point in the history
  • Loading branch information
Joseph Chen committed Mar 15, 2024
1 parent 53e7290 commit cd263e8
Show file tree
Hide file tree
Showing 18 changed files with 878 additions and 43 deletions.
21 changes: 21 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -545,6 +545,15 @@ You can use the below command to enable `DISABLE_TCP_EARLY_DEMUX` to `true` -
kubectl patch daemonset aws-node -n kube-system -p '{"spec": {"template": {"spec": {"initContainers": [{"env":[{"name":"DISABLE_TCP_EARLY_DEMUX","value":"true"}],"name":"aws-vpc-cni-init"}]}}}}'
```

#### `ENABLE_SUBNET_DISCOVERY` (v1.18.0+)

Type: Boolean as a String

Default: `true`

Subnet discovery is enabled by default. VPC-CNI will pick the subnet with the most number of free IPs from the nodes' VPC/AZ to create the secondary ENIs. The subnets considered are the subnet the node is created in and subnets tagged with `kubernetes.io/role/cni`.
If `ENABLE_SUBNET_DISCOVERY` is set to `false` or if DescribeSubnets fails due to IAM permissions, all secondary ENIs will be created in the subnet the node is created in.

#### `ENABLE_PREFIX_DELEGATION` (v1.9.0+)

Type: Boolean as a String
Expand Down Expand Up @@ -733,6 +742,7 @@ Note that if you set this while using Multus, you must ensure that any chained p
This plugin interacts with the following tags on ENIs:

* `cluster.k8s.amazonaws.com/name`
* `kubernetes.io/role/cni`
* `node.k8s.amazonaws.com/instance_id`
* `node.k8s.amazonaws.com/no_manage`

Expand All @@ -741,6 +751,17 @@ This plugin interacts with the following tags on ENIs:
The tag `cluster.k8s.amazonaws.com/name` will be set to the cluster name of the
aws-node daemonset which created the ENI.

#### CNI role tag

The tag `kubernetes.io/role/cni` is read by the aws-node daemonset to determine
if a secondary subnet can be used for creating secondary ENIs.

This tag is not set by the cni plugin itself, but rather must be set by a user
to indicate that a subnet can be used for secondary ENIs. Secondary subnets
to be used must have this tag. The primary subnet (node's subnet) is not
required to be tagged.


#### Instance ID tag

The tag `node.k8s.amazonaws.com/instance_id` will be set to the instance ID of
Expand Down
1 change: 1 addition & 0 deletions charts/aws-vpc-cni/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ env:
ENABLE_IPv4: "true"
ENABLE_IPv6: "false"
VPC_CNI_VERSION: "v1.16.4"
ENABLE_SUBNET_DISCOVERY: "true"

# this flag enables you to use the match label that was present in the original daemonset deployed by EKS
# You can then annotate and label the original aws-node resources and 'adopt' them into a helm release
Expand Down
2 changes: 2 additions & 0 deletions config/master/aws-k8s-cni-cn.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -466,6 +466,8 @@ spec:
value: "false"
- name: VPC_CNI_VERSION
value: "v1.16.4"
- name: ENABLE_SUBNET_DISCOVERY
value: "true"
- name: WARM_ENI_TARGET
value: "1"
- name: WARM_PREFIX_TARGET
Expand Down
2 changes: 2 additions & 0 deletions config/master/aws-k8s-cni-us-gov-east-1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -466,6 +466,8 @@ spec:
value: "false"
- name: VPC_CNI_VERSION
value: "v1.16.4"
- name: ENABLE_SUBNET_DISCOVERY
value: "true"
- name: WARM_ENI_TARGET
value: "1"
- name: WARM_PREFIX_TARGET
Expand Down
2 changes: 2 additions & 0 deletions config/master/aws-k8s-cni-us-gov-west-1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -466,6 +466,8 @@ spec:
value: "false"
- name: VPC_CNI_VERSION
value: "v1.16.4"
- name: ENABLE_SUBNET_DISCOVERY
value: "true"
- name: WARM_ENI_TARGET
value: "1"
- name: WARM_PREFIX_TARGET
Expand Down
2 changes: 2 additions & 0 deletions config/master/aws-k8s-cni.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -466,6 +466,8 @@ spec:
value: "false"
- name: VPC_CNI_VERSION
value: "v1.16.4"
- name: ENABLE_SUBNET_DISCOVERY
value: "true"
- name: WARM_ENI_TARGET
value: "1"
- name: WARM_PREFIX_TARGET
Expand Down
151 changes: 128 additions & 23 deletions pkg/awsutils/awsutils.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"net"
"os"
"regexp"
"sort"
"strings"
"sync"
"time"
Expand Down Expand Up @@ -58,6 +59,7 @@ const (
eniClusterTagKey = "cluster.k8s.amazonaws.com/name"
additionalEniTagsEnvVar = "ADDITIONAL_ENI_TAGS"
reservedTagKeyPrefix = "k8s.amazonaws.com"
subnetDiscoveryTagKey = "kubernetes.io/role/cni"
// UnknownInstanceType indicates that the instance type is not yet supported
UnknownInstanceType = "vpc ip resource(eni ip limit): unknown instance type"

Expand Down Expand Up @@ -85,7 +87,7 @@ var log = logger.Get()
// APIs defines interfaces calls for adding/getting/deleting ENIs/secondary IPs. The APIs are not thread-safe.
type APIs interface {
// AllocENI creates an ENI and attaches it to the instance
AllocENI(useCustomCfg bool, sg []*string, subnet string, numIPs int) (eni string, err error)
AllocENI(useCustomCfg bool, sg []*string, eniCfgSubnet string, numIPs int) (eni string, err error)

// FreeENI detaches ENI interface and deletes it
FreeENI(eniName string) error
Expand Down Expand Up @@ -200,10 +202,12 @@ type EC2InstanceMetadataCache struct {
primaryENImac string
availabilityZone string
region string
vpcID string

unmanagedENIs StringSet
useCustomNetworking bool
multiCardENIs StringSet
useSubnetDiscovery bool
enablePrefixDelegation bool

clusterName string
Expand Down Expand Up @@ -347,7 +351,7 @@ func (i instrumentedIMDS) GetMetadataWithContext(ctx context.Context, p string)
}

// New creates an EC2InstanceMetadataCache
func New(useCustomNetworking, disableLeakedENICleanup, v4Enabled, v6Enabled bool) (*EC2InstanceMetadataCache, error) {
func New(useSubnetDiscovery, useCustomNetworking, disableLeakedENICleanup, v4Enabled, v6Enabled bool) (*EC2InstanceMetadataCache, error) {
// ctx is passed to initWithEC2Metadata func to cancel spawned go-routines when tests are run
ctx := context.Background()

Expand All @@ -367,6 +371,8 @@ func New(useCustomNetworking, disableLeakedENICleanup, v4Enabled, v6Enabled bool
log.Debugf("Discovered region: %s", cache.region)
cache.useCustomNetworking = useCustomNetworking
log.Infof("Custom networking enabled %v", cache.useCustomNetworking)
cache.useSubnetDiscovery = useSubnetDiscovery
log.Infof("Subnet discovery enabled %v", cache.useSubnetDiscovery)
cache.v4Enabled = v4Enabled
cache.v6Enabled = v6Enabled

Expand Down Expand Up @@ -442,14 +448,22 @@ func (cache *EC2InstanceMetadataCache) initWithEC2Metadata(ctx context.Context)
}
log.Debugf("%s is the primary ENI of this instance", cache.primaryENI)

// retrieve sub-id
// retrieve subnet-id
cache.subnetID, err = cache.imds.GetSubnetID(ctx, mac)
if err != nil {
awsAPIErrInc("GetSubnetID", err)
return err
}
log.Debugf("Found subnet-id: %s ", cache.subnetID)

// retrieve vpc-id
cache.vpcID, err = cache.imds.GetVpcID(ctx, mac)
if err != nil {
awsAPIErrInc("GetVpcID", err)
return err
}
log.Debugf("Found vpc-id: %s ", cache.vpcID)

// We use the ctx here for testing, since we spawn go-routines above which will run forever.
select {
case <-ctx.Done():
Expand Down Expand Up @@ -730,8 +744,8 @@ func (cache *EC2InstanceMetadataCache) awsGetFreeDeviceNumber() (int, error) {

// AllocENI creates an ENI and attaches it to the instance
// returns: newly created ENI ID
func (cache *EC2InstanceMetadataCache) AllocENI(useCustomCfg bool, sg []*string, subnet string, numIPs int) (string, error) {
eniID, err := cache.createENI(useCustomCfg, sg, subnet, numIPs)
func (cache *EC2InstanceMetadataCache) AllocENI(useCustomCfg bool, sg []*string, eniCfgSubnet string, numIPs int) (string, error) {
eniID, err := cache.createENI(useCustomCfg, sg, eniCfgSubnet, numIPs)
if err != nil {
return "", errors.Wrap(err, "AllocENI: failed to create ENI")
}
Expand Down Expand Up @@ -803,7 +817,7 @@ func (cache *EC2InstanceMetadataCache) attachENI(eniID string) (string, error) {
}

// return ENI id, error
func (cache *EC2InstanceMetadataCache) createENI(useCustomCfg bool, sg []*string, subnet string, numIPs int) (string, error) {
func (cache *EC2InstanceMetadataCache) createENI(useCustomCfg bool, sg []*string, eniCfgSubnet string, numIPs int) (string, error) {
eniDescription := eniDescriptionPrefix + cache.instanceID
tags := map[string]string{
eniCreatedAtTagKey: time.Now().Format(time.RFC3339),
Expand All @@ -826,6 +840,7 @@ func (cache *EC2InstanceMetadataCache) createENI(useCustomCfg bool, sg []*string

log.Infof("Trying to allocate %d IP addresses on new ENI", needIPs)
log.Debugf("PD enabled - %t", cache.enablePrefixDelegation)

input := &ec2.CreateNetworkInterfaceInput{}

if cache.enablePrefixDelegation {
Expand All @@ -845,33 +860,123 @@ func (cache *EC2InstanceMetadataCache) createENI(useCustomCfg bool, sg []*string
SecondaryPrivateIpAddressCount: aws.Int64(int64(needIPs)),
}
}
if useCustomCfg {
log.Info("Using a custom network config for the new ENI")
if len(sg) != 0 {
input.Groups = sg

var err error
var networkInterfaceID string
if cache.useCustomNetworking {
input = createENIUsingCustomCfg(sg, eniCfgSubnet, input)
log.Infof("Creating ENI with security groups: %v in subnet: %s", aws.StringValueSlice(input.Groups), aws.StringValue(input.SubnetId))

networkInterfaceID, err = cache.tryCreateNetworkInterface(input)
if err == nil {
return networkInterfaceID, nil
}
} else {
if cache.useSubnetDiscovery {
subnetResult, vpcErr := cache.getVpcSubnets()
if vpcErr != nil {
log.Warnf("Failed to call ec2:DescribeSubnets: %v", vpcErr)
log.Info("Defaulting to same subnet as the primary interface for the new ENI")
networkInterfaceID, err = cache.tryCreateNetworkInterface(input)
if err == nil {
return networkInterfaceID, nil
}
} else {
for _, subnet := range subnetResult {
if *subnet.SubnetId != cache.subnetID {
if !validTag(subnet) {
continue
}
}
log.Infof("Creating ENI with security groups: %v in subnet: %s", aws.StringValueSlice(input.Groups), aws.StringValue(input.SubnetId))

input.SubnetId = subnet.SubnetId
networkInterfaceID, err = cache.tryCreateNetworkInterface(input)
if err == nil {
return networkInterfaceID, nil
}
}
}
} else {
log.Warnf("No custom networking security group found, will use the node's primary ENI's SG: %v", aws.StringValueSlice(input.Groups))
log.Info("Using same security group config as the primary interface for the new ENI")
networkInterfaceID, err = cache.tryCreateNetworkInterface(input)
if err == nil {
return networkInterfaceID, nil
}
}
}
return "", errors.Wrap(err, "failed to create network interface")
}

func (cache *EC2InstanceMetadataCache) getVpcSubnets() ([]*ec2.Subnet, error) {
describeSubnetInput := &ec2.DescribeSubnetsInput{
Filters: []*ec2.Filter{
{
Name: aws.String("vpc-id"),
Values: []*string{aws.String(cache.vpcID)},
},
{
Name: aws.String("availability-zone"),
Values: []*string{aws.String(cache.availabilityZone)},
},
},
}

start := time.Now()
subnetResult, err := cache.ec2SVC.DescribeSubnetsWithContext(context.Background(), describeSubnetInput)
prometheusmetrics.Ec2ApiReq.WithLabelValues("DescribeSubnets").Inc()
prometheusmetrics.AwsAPILatency.WithLabelValues("DescribeSubnets", fmt.Sprint(err != nil), awsReqStatus(err)).Observe(msSince(start))
if err != nil {
checkAPIErrorAndBroadcastEvent(err, "ec2:DescribeSubnets")
awsAPIErrInc("DescribeSubnets", err)
prometheusmetrics.Ec2ApiErr.WithLabelValues("DescribeSubnets").Inc()
return nil, errors.Wrap(err, "AllocENI: unable to describe subnets")
}

// Sort the subnet by available IP address counter (desc order) before determining subnet to use
sort.SliceStable(subnetResult.Subnets, func(i, j int) bool {
return *subnetResult.Subnets[j].AvailableIpAddressCount < *subnetResult.Subnets[i].AvailableIpAddressCount
})

return subnetResult.Subnets, nil
}

func validTag(subnet *ec2.Subnet) bool {
for _, tag := range subnet.Tags {
if *tag.Key == subnetDiscoveryTagKey {
return true
}
input.SubnetId = aws.String(subnet)
}
return false
}

func createENIUsingCustomCfg(sg []*string, eniCfgSubnet string, input *ec2.CreateNetworkInterfaceInput) *ec2.CreateNetworkInterfaceInput {
log.Info("Using a custom network config for the new ENI")

if len(sg) != 0 {
input.Groups = sg
} else {
log.Info("Using same config as the primary interface for the new ENI")
log.Warnf("No custom networking security group found, will use the node's primary ENI's SG: %v", aws.StringValueSlice(input.Groups))
}
input.SubnetId = aws.String(eniCfgSubnet)

log.Infof("Creating ENI with security groups: %v in subnet: %s", aws.StringValueSlice(input.Groups), aws.StringValue(input.SubnetId))
return input
}

func (cache *EC2InstanceMetadataCache) tryCreateNetworkInterface(input *ec2.CreateNetworkInterfaceInput) (string, error) {
start := time.Now()
result, err := cache.ec2SVC.CreateNetworkInterfaceWithContext(context.Background(), input)
prometheusmetrics.Ec2ApiReq.WithLabelValues("CreateNetworkInterface").Inc()
prometheusmetrics.AwsAPILatency.WithLabelValues("CreateNetworkInterface", fmt.Sprint(err != nil), awsReqStatus(err)).Observe(msSince(start))
if err != nil {
checkAPIErrorAndBroadcastEvent(err, "ec2:CreateNetworkInterface")
awsAPIErrInc("CreateNetworkInterface", err)
prometheusmetrics.Ec2ApiErr.WithLabelValues("CreateNetworkInterface").Inc()
log.Errorf("Failed to CreateNetworkInterface %v", err)
return "", errors.Wrap(err, "failed to create network interface")
}
log.Infof("Created a new ENI: %s", aws.StringValue(result.NetworkInterface.NetworkInterfaceId))
return aws.StringValue(result.NetworkInterface.NetworkInterfaceId), nil
if err == nil {
log.Infof("Created a new ENI: %s", aws.StringValue(result.NetworkInterface.NetworkInterfaceId))
return aws.StringValue(result.NetworkInterface.NetworkInterfaceId), nil
}
checkAPIErrorAndBroadcastEvent(err, "ec2:CreateNetworkInterface")
awsAPIErrInc("CreateNetworkInterface", err)
prometheusmetrics.Ec2ApiErr.WithLabelValues("CreateNetworkInterface").Inc()
log.Errorf("Failed to CreateNetworkInterface %v for subnet %s", err, *input.SubnetId)
return "", err
}

// buildENITags computes the desired AWS Tags for eni
Expand Down
Loading

0 comments on commit cd263e8

Please sign in to comment.