diff --git a/README.md b/README.md index 78c279426a..eafe86da21 100644 --- a/README.md +++ b/README.md @@ -545,6 +545,15 @@ You can use the below command to enable `DISABLE_TCP_EARLY_DEMUX` to `true` - kubectl patch daemonset aws-node -n kube-system -p '{"spec": {"template": {"spec": {"initContainers": [{"env":[{"name":"DISABLE_TCP_EARLY_DEMUX","value":"true"}],"name":"aws-vpc-cni-init"}]}}}}' ``` +#### `ENABLE_SUBNET_DISCOVERY` (v1.18.0+) + +Type: Boolean as a String + +Default: `true` + +Subnet discovery is enabled by default. VPC-CNI will pick the subnet with the most number of free IPs from the nodes' VPC/AZ to create the secondary ENIs. The subnets considered are the subnet the node is created in and subnets tagged with `kubernetes.io/role/cni`. +If `ENABLE_SUBNET_DISCOVERY` is set to `false` or if DescribeSubnets fails due to IAM permissions, all secondary ENIs will be created in the subnet the node is created in. + #### `ENABLE_PREFIX_DELEGATION` (v1.9.0+) Type: Boolean as a String @@ -733,6 +742,7 @@ Note that if you set this while using Multus, you must ensure that any chained p This plugin interacts with the following tags on ENIs: * `cluster.k8s.amazonaws.com/name` +* `kubernetes.io/role/cni` * `node.k8s.amazonaws.com/instance_id` * `node.k8s.amazonaws.com/no_manage` @@ -741,6 +751,17 @@ This plugin interacts with the following tags on ENIs: The tag `cluster.k8s.amazonaws.com/name` will be set to the cluster name of the aws-node daemonset which created the ENI. +#### CNI role tag + +The tag `kubernetes.io/role/cni` is read by the aws-node daemonset to determine +if a secondary subnet can be used for creating secondary ENIs. + +This tag is not set by the cni plugin itself, but rather must be set by a user +to indicate that a subnet can be used for secondary ENIs. Secondary subnets +to be used must have this tag. The primary subnet (node's subnet) is not +required to be tagged. + + #### Instance ID tag The tag `node.k8s.amazonaws.com/instance_id` will be set to the instance ID of diff --git a/charts/aws-vpc-cni/values.yaml b/charts/aws-vpc-cni/values.yaml index 253c25f936..b490887dde 100644 --- a/charts/aws-vpc-cni/values.yaml +++ b/charts/aws-vpc-cni/values.yaml @@ -84,6 +84,7 @@ env: ENABLE_IPv4: "true" ENABLE_IPv6: "false" VPC_CNI_VERSION: "v1.16.4" + ENABLE_SUBNET_DISCOVERY: "true" # this flag enables you to use the match label that was present in the original daemonset deployed by EKS # You can then annotate and label the original aws-node resources and 'adopt' them into a helm release diff --git a/config/master/aws-k8s-cni-cn.yaml b/config/master/aws-k8s-cni-cn.yaml index fd42723edf..809fb96b36 100644 --- a/config/master/aws-k8s-cni-cn.yaml +++ b/config/master/aws-k8s-cni-cn.yaml @@ -466,6 +466,8 @@ spec: value: "false" - name: VPC_CNI_VERSION value: "v1.16.4" + - name: ENABLE_SUBNET_DISCOVERY + value: "true" - name: WARM_ENI_TARGET value: "1" - name: WARM_PREFIX_TARGET diff --git a/config/master/aws-k8s-cni-us-gov-east-1.yaml b/config/master/aws-k8s-cni-us-gov-east-1.yaml index 68e89b02ea..d5038be9a0 100644 --- a/config/master/aws-k8s-cni-us-gov-east-1.yaml +++ b/config/master/aws-k8s-cni-us-gov-east-1.yaml @@ -466,6 +466,8 @@ spec: value: "false" - name: VPC_CNI_VERSION value: "v1.16.4" + - name: ENABLE_SUBNET_DISCOVERY + value: "true" - name: WARM_ENI_TARGET value: "1" - name: WARM_PREFIX_TARGET diff --git a/config/master/aws-k8s-cni-us-gov-west-1.yaml b/config/master/aws-k8s-cni-us-gov-west-1.yaml index 42b95afc0c..b3114c780e 100644 --- a/config/master/aws-k8s-cni-us-gov-west-1.yaml +++ b/config/master/aws-k8s-cni-us-gov-west-1.yaml @@ -466,6 +466,8 @@ spec: value: "false" - name: VPC_CNI_VERSION value: "v1.16.4" + - name: ENABLE_SUBNET_DISCOVERY + value: "true" - name: WARM_ENI_TARGET value: "1" - name: WARM_PREFIX_TARGET diff --git a/config/master/aws-k8s-cni.yaml b/config/master/aws-k8s-cni.yaml index bec54f2d02..0d23f99952 100644 --- a/config/master/aws-k8s-cni.yaml +++ b/config/master/aws-k8s-cni.yaml @@ -466,6 +466,8 @@ spec: value: "false" - name: VPC_CNI_VERSION value: "v1.16.4" + - name: ENABLE_SUBNET_DISCOVERY + value: "true" - name: WARM_ENI_TARGET value: "1" - name: WARM_PREFIX_TARGET diff --git a/pkg/awsutils/awsutils.go b/pkg/awsutils/awsutils.go index 060fe17659..bccbf4b869 100644 --- a/pkg/awsutils/awsutils.go +++ b/pkg/awsutils/awsutils.go @@ -22,6 +22,7 @@ import ( "net" "os" "regexp" + "sort" "strings" "sync" "time" @@ -58,6 +59,7 @@ const ( eniClusterTagKey = "cluster.k8s.amazonaws.com/name" additionalEniTagsEnvVar = "ADDITIONAL_ENI_TAGS" reservedTagKeyPrefix = "k8s.amazonaws.com" + subnetDiscoveryTagKey = "kubernetes.io/role/cni" // UnknownInstanceType indicates that the instance type is not yet supported UnknownInstanceType = "vpc ip resource(eni ip limit): unknown instance type" @@ -85,7 +87,7 @@ var log = logger.Get() // APIs defines interfaces calls for adding/getting/deleting ENIs/secondary IPs. The APIs are not thread-safe. type APIs interface { // AllocENI creates an ENI and attaches it to the instance - AllocENI(useCustomCfg bool, sg []*string, subnet string, numIPs int) (eni string, err error) + AllocENI(useCustomCfg bool, sg []*string, eniCfgSubnet string, numIPs int) (eni string, err error) // FreeENI detaches ENI interface and deletes it FreeENI(eniName string) error @@ -200,10 +202,12 @@ type EC2InstanceMetadataCache struct { primaryENImac string availabilityZone string region string + vpcID string unmanagedENIs StringSet useCustomNetworking bool multiCardENIs StringSet + useSubnetDiscovery bool enablePrefixDelegation bool clusterName string @@ -347,7 +351,7 @@ func (i instrumentedIMDS) GetMetadataWithContext(ctx context.Context, p string) } // New creates an EC2InstanceMetadataCache -func New(useCustomNetworking, disableLeakedENICleanup, v4Enabled, v6Enabled bool) (*EC2InstanceMetadataCache, error) { +func New(useSubnetDiscovery, useCustomNetworking, disableLeakedENICleanup, v4Enabled, v6Enabled bool) (*EC2InstanceMetadataCache, error) { // ctx is passed to initWithEC2Metadata func to cancel spawned go-routines when tests are run ctx := context.Background() @@ -367,6 +371,8 @@ func New(useCustomNetworking, disableLeakedENICleanup, v4Enabled, v6Enabled bool log.Debugf("Discovered region: %s", cache.region) cache.useCustomNetworking = useCustomNetworking log.Infof("Custom networking enabled %v", cache.useCustomNetworking) + cache.useSubnetDiscovery = useSubnetDiscovery + log.Infof("Subnet discovery enabled %v", cache.useSubnetDiscovery) cache.v4Enabled = v4Enabled cache.v6Enabled = v6Enabled @@ -442,7 +448,7 @@ func (cache *EC2InstanceMetadataCache) initWithEC2Metadata(ctx context.Context) } log.Debugf("%s is the primary ENI of this instance", cache.primaryENI) - // retrieve sub-id + // retrieve subnet-id cache.subnetID, err = cache.imds.GetSubnetID(ctx, mac) if err != nil { awsAPIErrInc("GetSubnetID", err) @@ -450,6 +456,14 @@ func (cache *EC2InstanceMetadataCache) initWithEC2Metadata(ctx context.Context) } log.Debugf("Found subnet-id: %s ", cache.subnetID) + // retrieve vpc-id + cache.vpcID, err = cache.imds.GetVpcID(ctx, mac) + if err != nil { + awsAPIErrInc("GetVpcID", err) + return err + } + log.Debugf("Found vpc-id: %s ", cache.vpcID) + // We use the ctx here for testing, since we spawn go-routines above which will run forever. select { case <-ctx.Done(): @@ -730,8 +744,8 @@ func (cache *EC2InstanceMetadataCache) awsGetFreeDeviceNumber() (int, error) { // AllocENI creates an ENI and attaches it to the instance // returns: newly created ENI ID -func (cache *EC2InstanceMetadataCache) AllocENI(useCustomCfg bool, sg []*string, subnet string, numIPs int) (string, error) { - eniID, err := cache.createENI(useCustomCfg, sg, subnet, numIPs) +func (cache *EC2InstanceMetadataCache) AllocENI(useCustomCfg bool, sg []*string, eniCfgSubnet string, numIPs int) (string, error) { + eniID, err := cache.createENI(useCustomCfg, sg, eniCfgSubnet, numIPs) if err != nil { return "", errors.Wrap(err, "AllocENI: failed to create ENI") } @@ -803,7 +817,7 @@ func (cache *EC2InstanceMetadataCache) attachENI(eniID string) (string, error) { } // return ENI id, error -func (cache *EC2InstanceMetadataCache) createENI(useCustomCfg bool, sg []*string, subnet string, numIPs int) (string, error) { +func (cache *EC2InstanceMetadataCache) createENI(useCustomCfg bool, sg []*string, eniCfgSubnet string, numIPs int) (string, error) { eniDescription := eniDescriptionPrefix + cache.instanceID tags := map[string]string{ eniCreatedAtTagKey: time.Now().Format(time.RFC3339), @@ -826,6 +840,7 @@ func (cache *EC2InstanceMetadataCache) createENI(useCustomCfg bool, sg []*string log.Infof("Trying to allocate %d IP addresses on new ENI", needIPs) log.Debugf("PD enabled - %t", cache.enablePrefixDelegation) + input := &ec2.CreateNetworkInterfaceInput{} if cache.enablePrefixDelegation { @@ -845,33 +860,123 @@ func (cache *EC2InstanceMetadataCache) createENI(useCustomCfg bool, sg []*string SecondaryPrivateIpAddressCount: aws.Int64(int64(needIPs)), } } - if useCustomCfg { - log.Info("Using a custom network config for the new ENI") - if len(sg) != 0 { - input.Groups = sg + + var err error + var networkInterfaceID string + if cache.useCustomNetworking { + input = createENIUsingCustomCfg(sg, eniCfgSubnet, input) + log.Infof("Creating ENI with security groups: %v in subnet: %s", aws.StringValueSlice(input.Groups), aws.StringValue(input.SubnetId)) + + networkInterfaceID, err = cache.tryCreateNetworkInterface(input) + if err == nil { + return networkInterfaceID, nil + } + } else { + if cache.useSubnetDiscovery { + subnetResult, vpcErr := cache.getVpcSubnets() + if vpcErr != nil { + log.Warnf("Failed to call ec2:DescribeSubnets: %v", vpcErr) + log.Info("Defaulting to same subnet as the primary interface for the new ENI") + networkInterfaceID, err = cache.tryCreateNetworkInterface(input) + if err == nil { + return networkInterfaceID, nil + } + } else { + for _, subnet := range subnetResult { + if *subnet.SubnetId != cache.subnetID { + if !validTag(subnet) { + continue + } + } + log.Infof("Creating ENI with security groups: %v in subnet: %s", aws.StringValueSlice(input.Groups), aws.StringValue(input.SubnetId)) + + input.SubnetId = subnet.SubnetId + networkInterfaceID, err = cache.tryCreateNetworkInterface(input) + if err == nil { + return networkInterfaceID, nil + } + } + } } else { - log.Warnf("No custom networking security group found, will use the node's primary ENI's SG: %v", aws.StringValueSlice(input.Groups)) + log.Info("Using same security group config as the primary interface for the new ENI") + networkInterfaceID, err = cache.tryCreateNetworkInterface(input) + if err == nil { + return networkInterfaceID, nil + } + } + } + return "", errors.Wrap(err, "failed to create network interface") +} + +func (cache *EC2InstanceMetadataCache) getVpcSubnets() ([]*ec2.Subnet, error) { + describeSubnetInput := &ec2.DescribeSubnetsInput{ + Filters: []*ec2.Filter{ + { + Name: aws.String("vpc-id"), + Values: []*string{aws.String(cache.vpcID)}, + }, + { + Name: aws.String("availability-zone"), + Values: []*string{aws.String(cache.availabilityZone)}, + }, + }, + } + + start := time.Now() + subnetResult, err := cache.ec2SVC.DescribeSubnetsWithContext(context.Background(), describeSubnetInput) + prometheusmetrics.Ec2ApiReq.WithLabelValues("DescribeSubnets").Inc() + prometheusmetrics.AwsAPILatency.WithLabelValues("DescribeSubnets", fmt.Sprint(err != nil), awsReqStatus(err)).Observe(msSince(start)) + if err != nil { + checkAPIErrorAndBroadcastEvent(err, "ec2:DescribeSubnets") + awsAPIErrInc("DescribeSubnets", err) + prometheusmetrics.Ec2ApiErr.WithLabelValues("DescribeSubnets").Inc() + return nil, errors.Wrap(err, "AllocENI: unable to describe subnets") + } + + // Sort the subnet by available IP address counter (desc order) before determining subnet to use + sort.SliceStable(subnetResult.Subnets, func(i, j int) bool { + return *subnetResult.Subnets[j].AvailableIpAddressCount < *subnetResult.Subnets[i].AvailableIpAddressCount + }) + + return subnetResult.Subnets, nil +} + +func validTag(subnet *ec2.Subnet) bool { + for _, tag := range subnet.Tags { + if *tag.Key == subnetDiscoveryTagKey { + return true } - input.SubnetId = aws.String(subnet) + } + return false +} + +func createENIUsingCustomCfg(sg []*string, eniCfgSubnet string, input *ec2.CreateNetworkInterfaceInput) *ec2.CreateNetworkInterfaceInput { + log.Info("Using a custom network config for the new ENI") + + if len(sg) != 0 { + input.Groups = sg } else { - log.Info("Using same config as the primary interface for the new ENI") + log.Warnf("No custom networking security group found, will use the node's primary ENI's SG: %v", aws.StringValueSlice(input.Groups)) } + input.SubnetId = aws.String(eniCfgSubnet) - log.Infof("Creating ENI with security groups: %v in subnet: %s", aws.StringValueSlice(input.Groups), aws.StringValue(input.SubnetId)) + return input +} +func (cache *EC2InstanceMetadataCache) tryCreateNetworkInterface(input *ec2.CreateNetworkInterfaceInput) (string, error) { start := time.Now() result, err := cache.ec2SVC.CreateNetworkInterfaceWithContext(context.Background(), input) prometheusmetrics.Ec2ApiReq.WithLabelValues("CreateNetworkInterface").Inc() prometheusmetrics.AwsAPILatency.WithLabelValues("CreateNetworkInterface", fmt.Sprint(err != nil), awsReqStatus(err)).Observe(msSince(start)) - if err != nil { - checkAPIErrorAndBroadcastEvent(err, "ec2:CreateNetworkInterface") - awsAPIErrInc("CreateNetworkInterface", err) - prometheusmetrics.Ec2ApiErr.WithLabelValues("CreateNetworkInterface").Inc() - log.Errorf("Failed to CreateNetworkInterface %v", err) - return "", errors.Wrap(err, "failed to create network interface") - } - log.Infof("Created a new ENI: %s", aws.StringValue(result.NetworkInterface.NetworkInterfaceId)) - return aws.StringValue(result.NetworkInterface.NetworkInterfaceId), nil + if err == nil { + log.Infof("Created a new ENI: %s", aws.StringValue(result.NetworkInterface.NetworkInterfaceId)) + return aws.StringValue(result.NetworkInterface.NetworkInterfaceId), nil + } + checkAPIErrorAndBroadcastEvent(err, "ec2:CreateNetworkInterface") + awsAPIErrInc("CreateNetworkInterface", err) + prometheusmetrics.Ec2ApiErr.WithLabelValues("CreateNetworkInterface").Inc() + log.Errorf("Failed to CreateNetworkInterface %v for subnet %s", err, *input.SubnetId) + return "", err } // buildENITags computes the desired AWS Tags for eni diff --git a/pkg/awsutils/awsutils_test.go b/pkg/awsutils/awsutils_test.go index 3120c7f628..30cbe5669a 100644 --- a/pkg/awsutils/awsutils_test.go +++ b/pkg/awsutils/awsutils_test.go @@ -46,6 +46,7 @@ const ( metadataInstanceType = "instance-type" metadataSGs = "/security-group-ids" metadataSubnetID = "/subnet-id" + metadataVpcID = "/vpc-id" metadataVPCcidrs = "/vpc-ipv4-cidr-blocks" metadataDeviceNum = "/device-number" metadataInterface = "/interface-id" @@ -65,6 +66,7 @@ const ( sgs = sg1 + " " + sg2 subnetID = "subnet-6b245523" subnetCIDR = "10.0.1.0/24" + vpcID = "vpc-3c133421" primaryeniID = "eni-00000000" eniID = primaryeniID eniAttachID = "eni-attach-beb21856" @@ -93,6 +95,7 @@ func testMetadata(overrides map[string]interface{}) FakeIMDS { metadataMACPath + primaryMAC + metadataSGs: sgs, metadataMACPath + primaryMAC + metadataIPv4s: eni1PrivateIP, metadataMACPath + primaryMAC + metadataSubnetID: subnetID, + metadataMACPath + primaryMAC + metadataVpcID: vpcID, metadataMACPath + primaryMAC + metadataSubnetCIDR: subnetCIDR, metadataMACPath + primaryMAC + metadataVPCcidrs: metadataVPCIPv4CIDRs, } @@ -118,6 +121,7 @@ func testMetadataWithPrefixes(overrides map[string]interface{}) FakeIMDS { metadataMACPath + primaryMAC + metadataIPv4s: eni1PrivateIP, metadataMACPath + primaryMAC + metadataIPv4Prefixes: eni1Prefix, metadataMACPath + primaryMAC + metadataSubnetID: subnetID, + metadataMACPath + primaryMAC + metadataVpcID: vpcID, metadataMACPath + primaryMAC + metadataSubnetCIDR: subnetCIDR, metadataMACPath + primaryMAC + metadataVPCcidrs: metadataVPCIPv4CIDRs, } @@ -167,7 +171,7 @@ func TestInitWithEC2metadata(t *testing.T) { assert.Equal(t, cache.instanceID, instanceID) assert.Equal(t, cache.primaryENImac, primaryMAC) assert.Equal(t, cache.primaryENI, primaryeniID) - assert.Equal(t, subnetID, cache.subnetID) + assert.Equal(t, cache.vpcID, vpcID) } } @@ -376,6 +380,21 @@ func TestAllocENI(t *testing.T) { mockMetadata := testMetadata(nil) + ipAddressCount := int64(100) + subnetResult := &ec2.DescribeSubnetsOutput{ + Subnets: []*ec2.Subnet{{ + AvailableIpAddressCount: aws.Int64(ipAddressCount), + SubnetId: aws.String(subnetID), + Tags: []*ec2.Tag{ + { + Key: aws.String("kubernetes.io/role/cni"), + Value: aws.String("1"), + }, + }, + }}, + } + mockEC2.EXPECT().DescribeSubnetsWithContext(gomock.Any(), gomock.Any(), gomock.Any()).Return(subnetResult, nil) + cureniID := eniID eni := ec2.CreateNetworkInterfaceOutput{NetworkInterface: &ec2.NetworkInterface{NetworkInterfaceId: &cureniID}} mockEC2.EXPECT().CreateNetworkInterfaceWithContext(gomock.Any(), gomock.Any(), gomock.Any()).Return(&eni, nil) @@ -401,9 +420,10 @@ func TestAllocENI(t *testing.T) { mockEC2.EXPECT().ModifyNetworkInterfaceAttributeWithContext(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, nil) cache := &EC2InstanceMetadataCache{ - ec2SVC: mockEC2, - imds: TypedIMDS{mockMetadata}, - instanceType: "c5n.18xlarge", + ec2SVC: mockEC2, + imds: TypedIMDS{mockMetadata}, + instanceType: "c5n.18xlarge", + useSubnetDiscovery: true, } _, err := cache.AllocENI(false, nil, "", 5) @@ -416,6 +436,21 @@ func TestAllocENINoFreeDevice(t *testing.T) { mockMetadata := testMetadata(nil) + ipAddressCount := int64(100) + subnetResult := &ec2.DescribeSubnetsOutput{ + Subnets: []*ec2.Subnet{{ + AvailableIpAddressCount: &ipAddressCount, + SubnetId: aws.String(subnetID), + Tags: []*ec2.Tag{ + { + Key: aws.String("kubernetes.io/role/cni"), + Value: aws.String("1"), + }, + }, + }}, + } + mockEC2.EXPECT().DescribeSubnetsWithContext(gomock.Any(), gomock.Any(), gomock.Any()).Return(subnetResult, nil) + cureniID := eniID eni := ec2.CreateNetworkInterfaceOutput{NetworkInterface: &ec2.NetworkInterface{NetworkInterfaceId: &cureniID}} mockEC2.EXPECT().CreateNetworkInterfaceWithContext(gomock.Any(), gomock.Any(), gomock.Any()).Return(&eni, nil) @@ -436,9 +471,10 @@ func TestAllocENINoFreeDevice(t *testing.T) { mockEC2.EXPECT().DeleteNetworkInterfaceWithContext(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, nil) cache := &EC2InstanceMetadataCache{ - ec2SVC: mockEC2, - imds: TypedIMDS{mockMetadata}, - instanceType: "c5n.18xlarge", + ec2SVC: mockEC2, + imds: TypedIMDS{mockMetadata}, + instanceType: "c5n.18xlarge", + useSubnetDiscovery: true, } _, err := cache.AllocENI(false, nil, "", 5) @@ -451,6 +487,21 @@ func TestAllocENIMaxReached(t *testing.T) { mockMetadata := testMetadata(nil) + ipAddressCount := int64(100) + subnetResult := &ec2.DescribeSubnetsOutput{ + Subnets: []*ec2.Subnet{{ + AvailableIpAddressCount: &ipAddressCount, + SubnetId: aws.String(subnetID), + Tags: []*ec2.Tag{ + { + Key: aws.String("kubernetes.io/role/cni"), + Value: aws.String("1"), + }, + }, + }}, + } + mockEC2.EXPECT().DescribeSubnetsWithContext(gomock.Any(), gomock.Any(), gomock.Any()).Return(subnetResult, nil) + cureniID := eniID eni := ec2.CreateNetworkInterfaceOutput{NetworkInterface: &ec2.NetworkInterface{NetworkInterfaceId: &cureniID}} mockEC2.EXPECT().CreateNetworkInterfaceWithContext(gomock.Any(), gomock.Any(), gomock.Any()).Return(&eni, nil) @@ -473,9 +524,10 @@ func TestAllocENIMaxReached(t *testing.T) { mockEC2.EXPECT().DeleteNetworkInterfaceWithContext(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, nil) cache := &EC2InstanceMetadataCache{ - ec2SVC: mockEC2, - imds: TypedIMDS{mockMetadata}, - instanceType: "c5n.18xlarge", + ec2SVC: mockEC2, + imds: TypedIMDS{mockMetadata}, + instanceType: "c5n.18xlarge", + useSubnetDiscovery: true, } _, err := cache.AllocENI(false, nil, "", 5) @@ -486,6 +538,21 @@ func TestAllocENIWithIPAddresses(t *testing.T) { ctrl, mockEC2 := setup(t) defer ctrl.Finish() + ipAddressCount := int64(100) + subnetResult := &ec2.DescribeSubnetsOutput{ + Subnets: []*ec2.Subnet{{ + AvailableIpAddressCount: &ipAddressCount, + SubnetId: aws.String(subnetID), + Tags: []*ec2.Tag{ + { + Key: aws.String("kubernetes.io/role/cni"), + Value: aws.String("1"), + }, + }, + }}, + } + mockEC2.EXPECT().DescribeSubnetsWithContext(gomock.Any(), gomock.Any(), gomock.Any()).Return(subnetResult, nil) + // when required IP numbers(5) is below ENI's limit(30) currentEniID := eniID eni := ec2.CreateNetworkInterfaceOutput{NetworkInterface: &ec2.NetworkInterface{NetworkInterfaceId: ¤tEniID}} @@ -509,16 +576,17 @@ func TestAllocENIWithIPAddresses(t *testing.T) { mockEC2.EXPECT().AttachNetworkInterfaceWithContext(gomock.Any(), gomock.Any(), gomock.Any()).Return(attachResult, nil) mockEC2.EXPECT().ModifyNetworkInterfaceAttributeWithContext(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, nil) - cache := &EC2InstanceMetadataCache{ec2SVC: mockEC2, instanceType: "c5n.18xlarge"} + cache := &EC2InstanceMetadataCache{ec2SVC: mockEC2, instanceType: "c5n.18xlarge", useSubnetDiscovery: true} _, err := cache.AllocENI(false, nil, subnetID, 5) assert.NoError(t, err) // when required IP numbers(50) is higher than ENI's limit(49) + mockEC2.EXPECT().DescribeSubnetsWithContext(gomock.Any(), gomock.Any(), gomock.Any()).Return(subnetResult, nil) mockEC2.EXPECT().CreateNetworkInterfaceWithContext(gomock.Any(), gomock.Any(), gomock.Any()).Return(&eni, nil) mockEC2.EXPECT().DescribeInstancesWithContext(gomock.Any(), gomock.Any(), gomock.Any()).Return(result, nil) mockEC2.EXPECT().AttachNetworkInterfaceWithContext(gomock.Any(), gomock.Any(), gomock.Any()).Return(attachResult, nil) mockEC2.EXPECT().ModifyNetworkInterfaceAttributeWithContext(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, nil) - cache = &EC2InstanceMetadataCache{ec2SVC: mockEC2, instanceType: "c5n.18xlarge"} + cache = &EC2InstanceMetadataCache{ec2SVC: mockEC2, instanceType: "c5n.18xlarge", useSubnetDiscovery: true} _, err = cache.AllocENI(false, nil, subnetID, 49) assert.NoError(t, err) } @@ -529,13 +597,29 @@ func TestAllocENIWithIPAddressesAlreadyFull(t *testing.T) { mockMetadata := testMetadata(nil) + ipAddressCount := int64(100) + subnetResult := &ec2.DescribeSubnetsOutput{ + Subnets: []*ec2.Subnet{{ + AvailableIpAddressCount: &ipAddressCount, + SubnetId: aws.String(subnetID), + Tags: []*ec2.Tag{ + { + Key: aws.String("kubernetes.io/role/cni"), + Value: aws.String("1"), + }, + }, + }}, + } + mockEC2.EXPECT().DescribeSubnetsWithContext(gomock.Any(), gomock.Any(), gomock.Any()).Return(subnetResult, nil) + retErr := awserr.New("PrivateIpAddressLimitExceeded", "Too many IPs already allocated", nil) mockEC2.EXPECT().CreateNetworkInterfaceWithContext(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, retErr) cache := &EC2InstanceMetadataCache{ - ec2SVC: mockEC2, - imds: TypedIMDS{mockMetadata}, - instanceType: "t3.xlarge", + ec2SVC: mockEC2, + imds: TypedIMDS{mockMetadata}, + instanceType: "t3.xlarge", + useSubnetDiscovery: true, } _, err := cache.AllocENI(true, nil, "", 14) assert.Error(t, err) @@ -547,6 +631,21 @@ func TestAllocENIWithPrefixAddresses(t *testing.T) { mockMetadata := testMetadata(nil) + ipAddressCount := int64(100) + subnetResult := &ec2.DescribeSubnetsOutput{ + Subnets: []*ec2.Subnet{{ + AvailableIpAddressCount: &ipAddressCount, + SubnetId: aws.String(subnetID), + Tags: []*ec2.Tag{ + { + Key: aws.String("kubernetes.io/role/cni"), + Value: aws.String("1"), + }, + }, + }}, + } + mockEC2.EXPECT().DescribeSubnetsWithContext(gomock.Any(), gomock.Any(), gomock.Any()).Return(subnetResult, nil) + currentEniID := eniID eni := ec2.CreateNetworkInterfaceOutput{NetworkInterface: &ec2.NetworkInterface{NetworkInterfaceId: ¤tEniID}} mockEC2.EXPECT().CreateNetworkInterfaceWithContext(gomock.Any(), gomock.Any(), gomock.Any()).Return(&eni, nil) @@ -574,6 +673,7 @@ func TestAllocENIWithPrefixAddresses(t *testing.T) { imds: TypedIMDS{mockMetadata}, instanceType: "c5n.18xlarge", enablePrefixDelegation: true, + useSubnetDiscovery: true, } _, err := cache.AllocENI(false, nil, subnetID, 1) assert.NoError(t, err) @@ -585,6 +685,21 @@ func TestAllocENIWithPrefixesAlreadyFull(t *testing.T) { mockMetadata := testMetadata(nil) + ipAddressCount := int64(100) + subnetResult := &ec2.DescribeSubnetsOutput{ + Subnets: []*ec2.Subnet{{ + AvailableIpAddressCount: &ipAddressCount, + SubnetId: aws.String(subnetID), + Tags: []*ec2.Tag{ + { + Key: aws.String("kubernetes.io/role/cni"), + Value: aws.String("1"), + }, + }, + }}, + } + mockEC2.EXPECT().DescribeSubnetsWithContext(gomock.Any(), gomock.Any(), gomock.Any()).Return(subnetResult, nil) + retErr := awserr.New("PrivateIpAddressLimitExceeded", "Too many IPs already allocated", nil) mockEC2.EXPECT().CreateNetworkInterfaceWithContext(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, retErr) @@ -593,6 +708,7 @@ func TestAllocENIWithPrefixesAlreadyFull(t *testing.T) { imds: TypedIMDS{mockMetadata}, instanceType: "c5n.18xlarge", enablePrefixDelegation: true, + useSubnetDiscovery: true, } _, err := cache.AllocENI(true, nil, "", 1) assert.Error(t, err) diff --git a/pkg/awsutils/imds.go b/pkg/awsutils/imds.go index 901654f2de..69c9343501 100644 --- a/pkg/awsutils/imds.go +++ b/pkg/awsutils/imds.go @@ -186,6 +186,19 @@ func (imds TypedIMDS) GetSubnetID(ctx context.Context, mac string) (string, erro return subnetID, err } +func (imds TypedIMDS) GetVpcID(ctx context.Context, mac string) (string, error) { + key := fmt.Sprintf("network/interfaces/macs/%s/vpc-id", mac) + vpcID, err := imds.GetMetadataWithContext(ctx, key) + if err != nil { + if imdsErr, ok := err.(*imdsRequestError); ok { + log.Warnf("%v", err) + return vpcID, imdsErr.err + } + return "", err + } + return vpcID, err +} + // GetSecurityGroupIDs returns the IDs of the security groups to which the network interface belongs. func (imds TypedIMDS) GetSecurityGroupIDs(ctx context.Context, mac string) ([]string, error) { key := fmt.Sprintf("network/interfaces/macs/%s/security-group-ids", mac) diff --git a/pkg/awsutils/imds_test.go b/pkg/awsutils/imds_test.go index 07ce476893..5663b64e5a 100644 --- a/pkg/awsutils/imds_test.go +++ b/pkg/awsutils/imds_test.go @@ -126,6 +126,17 @@ func TestGetSubnetID(t *testing.T) { } } +func TestGetVpcID(t *testing.T) { + f := TypedIMDS{FakeIMDS(map[string]interface{}{ + "network/interfaces/macs/02:c5:f8:3e:6b:27/vpc-id": "vpc-0afaed81bf542db37", + })} + + id, err := f.GetVpcID(context.TODO(), "02:c5:f8:3e:6b:27") + if assert.NoError(t, err) { + assert.Equal(t, id, "vpc-0afaed81bf542db37") + } +} + func TestGetSecurityGroupIDs(t *testing.T) { f := TypedIMDS{FakeIMDS(map[string]interface{}{ "network/interfaces/macs/02:c5:f8:3e:6b:27/security-group-ids": "sg-00581e028df71bda8", diff --git a/pkg/ec2wrapper/client.go b/pkg/ec2wrapper/client.go index f2b687c664..09242ab08f 100644 --- a/pkg/ec2wrapper/client.go +++ b/pkg/ec2wrapper/client.go @@ -36,6 +36,7 @@ type EC2 interface { ModifyNetworkInterfaceAttributeWithContext(ctx aws.Context, input *ec2svc.ModifyNetworkInterfaceAttributeInput, opts ...request.Option) (*ec2svc.ModifyNetworkInterfaceAttributeOutput, error) CreateTagsWithContext(ctx aws.Context, input *ec2svc.CreateTagsInput, opts ...request.Option) (*ec2svc.CreateTagsOutput, error) DescribeNetworkInterfacesPagesWithContext(ctx aws.Context, input *ec2svc.DescribeNetworkInterfacesInput, fn func(*ec2svc.DescribeNetworkInterfacesOutput, bool) bool, opts ...request.Option) error + DescribeSubnetsWithContext(ctx aws.Context, input *ec2svc.DescribeSubnetsInput, opts ...request.Option) (*ec2svc.DescribeSubnetsOutput, error) } // New creates a new EC2 wrapper diff --git a/pkg/ec2wrapper/mocks/ec2wrapper_mocks.go b/pkg/ec2wrapper/mocks/ec2wrapper_mocks.go index 53066d8314..53446727f5 100644 --- a/pkg/ec2wrapper/mocks/ec2wrapper_mocks.go +++ b/pkg/ec2wrapper/mocks/ec2wrapper_mocks.go @@ -249,6 +249,26 @@ func (mr *MockEC2MockRecorder) DescribeNetworkInterfacesWithContext(arg0, arg1 i return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DescribeNetworkInterfacesWithContext", reflect.TypeOf((*MockEC2)(nil).DescribeNetworkInterfacesWithContext), varargs...) } +// DescribeSubnetsWithContext mocks base method. +func (m *MockEC2) DescribeSubnetsWithContext(arg0 context.Context, arg1 *ec2.DescribeSubnetsInput, arg2 ...request.Option) (*ec2.DescribeSubnetsOutput, error) { + m.ctrl.T.Helper() + varargs := []interface{}{arg0, arg1} + for _, a := range arg2 { + varargs = append(varargs, a) + } + ret := m.ctrl.Call(m, "DescribeSubnetsWithContext", varargs...) + ret0, _ := ret[0].(*ec2.DescribeSubnetsOutput) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// DescribeSubnetsWithContext indicates an expected call of DescribeSubnetsWithContext. +func (mr *MockEC2MockRecorder) DescribeSubnetsWithContext(arg0, arg1 interface{}, arg2 ...interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + varargs := append([]interface{}{arg0, arg1}, arg2...) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DescribeSubnetsWithContext", reflect.TypeOf((*MockEC2)(nil).DescribeSubnetsWithContext), varargs...) +} + // DetachNetworkInterfaceWithContext mocks base method. func (m *MockEC2) DetachNetworkInterfaceWithContext(arg0 context.Context, arg1 *ec2.DetachNetworkInterfaceInput, arg2 ...request.Option) (*ec2.DetachNetworkInterfaceOutput, error) { m.ctrl.T.Helper() diff --git a/pkg/ipamd/ipamd.go b/pkg/ipamd/ipamd.go index 0a5913c7bf..648a00b104 100644 --- a/pkg/ipamd/ipamd.go +++ b/pkg/ipamd/ipamd.go @@ -118,6 +118,9 @@ const ( // This environment variable specifies whether IPAMD should allocate or deallocate ENIs on a non-schedulable node (default false). envManageENIsNonSchedulable = "AWS_MANAGE_ENIS_NON_SCHEDULABLE" + // This environment is used to specify whether we should use enhanced subnet selection or not when creating ENIs (default true). + envSubnetDiscovery = "ENABLE_SUBNET_DISCOVERY" + // eniNoManageTagKey is the tag that may be set on an ENI to indicate ipamd // should not manage it in any form. eniNoManageTagKey = "node.k8s.amazonaws.com/no_manage" @@ -197,6 +200,7 @@ type IPAMContext struct { enableIPv6 bool useCustomNetworking bool manageENIsNonScheduleable bool + useSubnetDiscovery bool networkClient networkutils.NetworkAPIs maxIPsPerENI int maxENI int @@ -333,11 +337,12 @@ func New(k8sClient client.Client) (*IPAMContext, error) { c.networkClient = networkutils.New() c.useCustomNetworking = UseCustomNetworkCfg() c.manageENIsNonScheduleable = ManageENIsOnNonSchedulableNode() + c.useSubnetDiscovery = UseSubnetDiscovery() c.enablePrefixDelegation = usePrefixDelegation() c.enableIPv4 = isIPv4Enabled() c.enableIPv6 = isIPv6Enabled() c.disableENIProvisioning = disableENIProvisioning() - client, err := awsutils.New(c.useCustomNetworking, disableLeakedENICleanup(), c.enableIPv4, c.enableIPv6) + client, err := awsutils.New(c.useSubnetDiscovery, c.useCustomNetworking, disableLeakedENICleanup(), c.enableIPv4, c.enableIPv6) if err != nil { return nil, errors.Wrap(err, "ipamd: can not initialize with AWS SDK interface") } @@ -837,7 +842,7 @@ func (c *IPAMContext) updateLastNodeIPPoolAction() { func (c *IPAMContext) tryAllocateENI(ctx context.Context) error { var securityGroups []*string - var subnet string + var eniCfgSubnet string if c.useCustomNetworking { eniCfg, err := eniconfig.MyENIConfig(ctx, c.k8sClient) @@ -851,12 +856,12 @@ func (c *IPAMContext) tryAllocateENI(ctx context.Context) error { log.Debugf("Found security-group id: %s", sgID) securityGroups = append(securityGroups, aws.String(sgID)) } - subnet = eniCfg.Subnet + eniCfgSubnet = eniCfg.Subnet } resourcesToAllocate := c.GetENIResourcesToAllocate() if resourcesToAllocate > 0 { - eni, err := c.awsClient.AllocENI(c.useCustomNetworking, securityGroups, subnet, resourcesToAllocate) + eni, err := c.awsClient.AllocENI(c.useCustomNetworking, securityGroups, eniCfgSubnet, resourcesToAllocate) if err != nil { log.Errorf("Failed to increase pool size due to not able to allocate ENI %v", err) ipamdErrInc("increaseIPPoolAllocENI") @@ -1681,6 +1686,11 @@ func ManageENIsOnNonSchedulableNode() bool { return parseBoolEnvVar(envManageENIsNonSchedulable, false) } +// UseSubnetDiscovery returns whether we should use enhanced subnet selection or not when creating ENIs. +func UseSubnetDiscovery() bool { + return parseBoolEnvVar(envSubnetDiscovery, true) +} + func parseBoolEnvVar(envVariableName string, defaultVal bool) bool { if strValue := os.Getenv(envVariableName); strValue != "" { parsedValue, err := strconv.ParseBool(strValue) @@ -1910,6 +1920,7 @@ func GetConfigForDebug() map[string]interface{} { envWarmENITarget: getWarmENITarget(), envCustomNetworkCfg: UseCustomNetworkCfg(), envManageENIsNonSchedulable: ManageENIsOnNonSchedulableNode(), + envSubnetDiscovery: UseSubnetDiscovery(), } } diff --git a/test/framework/resources/aws/services/ec2.go b/test/framework/resources/aws/services/ec2.go index 92f490c6a1..af5305ba04 100644 --- a/test/framework/resources/aws/services/ec2.go +++ b/test/framework/resources/aws/services/ec2.go @@ -47,8 +47,9 @@ type EC2 interface { DeleteKey(keyName string) error DescribeKey(keyName string) (*ec2.DescribeKeyPairsOutput, error) ModifyNetworkInterfaceSecurityGroups(securityGroupIds []*string, networkInterfaceId *string) (*ec2.ModifyNetworkInterfaceAttributeOutput, error) - DescribeAvailabilityZones() (*ec2.DescribeAvailabilityZonesOutput, error) + CreateTags(resourceIds []string, tags []*ec2.Tag) (*ec2.CreateTagsOutput, error) + DeleteTags(resourceIds []string, tags []*ec2.Tag) (*ec2.DeleteTagsOutput, error) } type defaultEC2 struct { @@ -394,6 +395,22 @@ func (d *defaultEC2) DescribeVPC(vpcID string) (*ec2.DescribeVpcsOutput, error) return d.EC2API.DescribeVpcs(describeVPCInput) } +func (d *defaultEC2) CreateTags(resourceIds []string, tags []*ec2.Tag) (*ec2.CreateTagsOutput, error) { + input := &ec2.CreateTagsInput{ + Resources: aws.StringSlice(resourceIds), + Tags: tags, + } + return d.EC2API.CreateTags(input) +} + +func (d *defaultEC2) DeleteTags(resourceIds []string, tags []*ec2.Tag) (*ec2.DeleteTagsOutput, error) { + input := &ec2.DeleteTagsInput{ + Resources: aws.StringSlice(resourceIds), + Tags: tags, + } + return d.EC2API.DeleteTags(input) +} + func NewEC2(session *session.Session) EC2 { return &defaultEC2{ EC2API: ec2.New(session), diff --git a/test/integration/README.md b/test/integration/README.md index 72d20d343c..29c927e87a 100644 --- a/test/integration/README.md +++ b/test/integration/README.md @@ -81,6 +81,13 @@ Custom networking tests validate use of the `AWS_VPC_K8S_CNI_CUSTOM_NETWORK_CFG` Test info: - Pass `custom-networking-cidr-range` flag with *allowed* VPC CIDR that does not conflict with an existing one. So if existing VPC CIDR is `192.168.0.0/16`, you can use `custom-networking-cidr-range=100.64.0.0/16`. You can go to your cluster VPC to check existing/allowed CIDRs. +### ENI Subnet Discovery (eni_subnet_discovery) + +The ENI Subnet Discovery test suite validates ENI allocation by making sure the tagged subnet with the largest number of free IPs is selected. + +Test info: + - Pass `secondary-cidr-range` flag with *allowed* VPC CIDR that does not conflict with an existing one. So if existing VPC CIDR is `192.168.0.0/16`, you can use `secondary-cidr-range=100.64.0.0/16`. You can go to your cluster VPC to check existing/allowed CIDRs. + ### SNAT tests (snat) SNAT tests cover pod source NAT behavior with various deployment scenarios. diff --git a/test/integration/eni-subnet-discovery/eni_subnet_discovery_suite_test.go b/test/integration/eni-subnet-discovery/eni_subnet_discovery_suite_test.go new file mode 100644 index 0000000000..ed83c71ead --- /dev/null +++ b/test/integration/eni-subnet-discovery/eni_subnet_discovery_suite_test.go @@ -0,0 +1,142 @@ +// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. + +package eni_subnet_discovery + +import ( + "flag" + "fmt" + "net" + "testing" + "time" + + "github.com/apparentlymart/go-cidr/cidr" + "github.com/aws/amazon-vpc-cni-k8s/test/framework" + awsUtils "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/aws/utils" + k8sUtils "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/utils" + "github.com/aws/amazon-vpc-cni-k8s/test/framework/utils" + "github.com/aws/aws-sdk-go/service/ec2" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "github.com/prometheus/client_golang/prometheus" + corev1 "k8s.io/api/core/v1" +) + +func TestCustomNetworking(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "CNI ENI Subnet Selection Test Suite") +} + +var ( + f *framework.Framework + clusterVPCConfig *awsUtils.ClusterVPCConfig + cidrRangeString string + cidrRange *net.IPNet + cidrBlockAssociationID string + createdSubnet string + primaryInstance *ec2.Instance +) + +// Parse test specific variable from flag +func init() { + flag.StringVar(&cidrRangeString, "secondary-cidr-range", "100.64.0.0/16", "second cidr range to be associated with the VPC") +} + +var _ = BeforeSuite(func() { + f = framework.New(framework.GlobalOptions) + + nodeList, err := f.K8sResourceManagers.NodeManager().GetNodes(f.Options.NgNameLabelKey, + f.Options.NgNameLabelVal) + Expect(err).ToNot(HaveOccurred()) + + numOfNodes := len(nodeList.Items) + Expect(numOfNodes).Should(BeNumerically(">", 1)) + + // Nominate the first untainted node as the one to run deployment against + By("finding the first untainted node for the deployment") + var primaryNode *corev1.Node + for _, n := range nodeList.Items { + if len(n.Spec.Taints) == 0 { + primaryNode = &n + break + } + } + Expect(primaryNode).To(Not(BeNil()), "expected to find a non-tainted node") + + instanceID := k8sUtils.GetInstanceIDFromNode(*primaryNode) + primaryInstance, err = f.CloudServices.EC2().DescribeInstance(instanceID) + Expect(err).ToNot(HaveOccurred()) + + _, cidrRange, err = net.ParseCIDR(cidrRangeString) + Expect(err).ToNot(HaveOccurred()) + + By("creating test namespace") + f.K8sResourceManagers.NamespaceManager().CreateNamespace(utils.DefaultTestNamespace) + + By("getting the cluster VPC Config") + clusterVPCConfig, err = awsUtils.GetClusterVPCConfig(f) + Expect(err).ToNot(HaveOccurred()) + + By("associating cidr range to the VPC") + association, err := f.CloudServices.EC2().AssociateVPCCIDRBlock(f.Options.AWSVPCID, cidrRange.String()) + Expect(err).ToNot(HaveOccurred()) + cidrBlockAssociationID = *association.CidrBlockAssociation.AssociationId + + By(fmt.Sprintf("creating the subnet in %s", *primaryInstance.Placement.AvailabilityZone)) + + // Subnet must be greater than /19 + subnetCidr, err := cidr.Subnet(cidrRange, 2, 0) + Expect(err).ToNot(HaveOccurred()) + + createSubnetOutput, err := f.CloudServices.EC2(). + CreateSubnet(subnetCidr.String(), f.Options.AWSVPCID, *primaryInstance.Placement.AvailabilityZone) + Expect(err).ToNot(HaveOccurred()) + + subnetID := *createSubnetOutput.Subnet.SubnetId + + By("associating the route table with the newly created subnet") + err = f.CloudServices.EC2().AssociateRouteTableToSubnet(clusterVPCConfig.PublicRouteTableID, subnetID) + Expect(err).ToNot(HaveOccurred()) + + By("try detaching all ENIs by setting WARM_ENI_TARGET to 0") + k8sUtils.AddEnvVarToDaemonSetAndWaitTillUpdated(f, utils.AwsNodeName, utils.AwsNodeNamespace, + utils.AwsNodeName, map[string]string{"WARM_ENI_TARGET": "0"}) + + By("sleeping to allow CNI Plugin to delete unused ENIs") + time.Sleep(time.Second * 90) + + createdSubnet = subnetID +}) + +var _ = AfterSuite(func() { + By("deleting test namespace") + f.K8sResourceManagers.NamespaceManager(). + DeleteAndWaitTillNamespaceDeleted(utils.DefaultTestNamespace) + + var errs prometheus.MultiError + + By("sleeping to allow CNI Plugin to delete unused ENIs") + time.Sleep(time.Second * 90) + + By(fmt.Sprintf("deleting the subnet %s", createdSubnet)) + errs.Append(f.CloudServices.EC2().DeleteSubnet(createdSubnet)) + + By("disassociating the CIDR range to the VPC") + errs.Append(f.CloudServices.EC2().DisAssociateVPCCIDRBlock(cidrBlockAssociationID)) + + Expect(errs.MaybeUnwrap()).ToNot(HaveOccurred()) + + By("by setting WARM_ENI_TARGET to 1") + k8sUtils.AddEnvVarToDaemonSetAndWaitTillUpdated(f, utils.AwsNodeName, utils.AwsNodeNamespace, + utils.AwsNodeName, map[string]string{"WARM_ENI_TARGET": "1"}) +}) diff --git a/test/integration/eni-subnet-discovery/eni_subnet_discovery_test.go b/test/integration/eni-subnet-discovery/eni_subnet_discovery_test.go new file mode 100644 index 0000000000..c2fe33c6ad --- /dev/null +++ b/test/integration/eni-subnet-discovery/eni_subnet_discovery_test.go @@ -0,0 +1,331 @@ +// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. + +package eni_subnet_discovery + +import ( + "fmt" + "net" + "os" + "strconv" + "strings" + "time" + + "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/manifest" + k8sUtils "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/utils" + "github.com/aws/amazon-vpc-cni-k8s/test/framework/utils" + "github.com/aws/amazon-vpc-cni-k8s/test/integration/common" + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/service/ec2" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + v1 "k8s.io/api/apps/v1" +) + +const ( + podLabelKey = "role" + podLabelVal = "eni-subnet-discovery-test" + AwsNodeLabelKey = "k8s-app" + EKSCNIPolicyARN = "arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy" + EKSCNIPolicyV4 = "/testdata/amazon-eks-cni-policy-v4.json" +) + +var err error +var newEniSubnetIds []string + +var _ = Describe("ENI Subnet Selection Test", func() { + var ( + deployment *v1.Deployment + ) + + Context("when creating deployment", func() { + JustBeforeEach(func() { + By("creating deployment") + container := manifest.NewNetCatAlpineContainer(f.Options.TestImageRegistry). + Command([]string{"sleep"}). + Args([]string{"3600"}). + Build() + + deploymentBuilder := manifest.NewBusyBoxDeploymentBuilder(f.Options.TestImageRegistry). + Container(container). + Replicas(50). + PodLabel(podLabelKey, podLabelVal). + NodeName(*primaryInstance.PrivateDnsName). + Build() + + deployment, err = f.K8sResourceManagers.DeploymentManager(). + CreateAndWaitTillDeploymentIsReady(deploymentBuilder, utils.DefaultDeploymentReadyTimeout) + Expect(err).ToNot(HaveOccurred()) + + // Wait for deployment to settle, as if any pods restart, their pod IP will change between + // the GET and the validation. + time.Sleep(5 * time.Second) + }) + + JustAfterEach(func() { + By("deleting deployment") + err := f.K8sResourceManagers.DeploymentManager().DeleteAndWaitTillDeploymentIsDeleted(deployment) + Expect(err).ToNot(HaveOccurred()) + + By("sleeping to allow CNI Plugin to delete unused ENIs") + time.Sleep(time.Second * 90) + + newEniSubnetIds = nil + }) + + Context("when subnet discovery is enabled", func() { + BeforeEach(func() { + k8sUtils.AddEnvVarToDaemonSetAndWaitTillUpdated(f, utils.AwsNodeName, utils.AwsNodeNamespace, utils.AwsNodeName, map[string]string{ + "ENABLE_SUBNET_DISCOVERY": "true", + }) + // After updating daemonset pod, we must wait until conflist is updated so that container-runtime calls CNI ADD with the latest VETH prefix and MTU. + // Otherwise, the stale value can cause failures in future test cases. + time.Sleep(utils.PollIntervalMedium) + }) + Context("using a subnet tagged with kubernetes.io/role/cni", func() { + BeforeEach(func() { + By("Tagging kubernetes.io/role/cni to subnet") + _, err = f.CloudServices.EC2(). + CreateTags( + []string{createdSubnet}, + []*ec2.Tag{ + { + Key: aws.String("kubernetes.io/role/cni"), + Value: aws.String("1"), + }, + }, + ) + Expect(err).ToNot(HaveOccurred()) + }) + AfterEach(func() { + By("Untagging kubernetes.io/role/cni from subnet") + _, err = f.CloudServices.EC2(). + DeleteTags( + []string{createdSubnet}, + []*ec2.Tag{ + { + Key: aws.String("kubernetes.io/role/cni"), + Value: aws.String("1"), + }, + }, + ) + Expect(err).ToNot(HaveOccurred()) + }) + It(fmt.Sprintf("should have subnet in CIDR range %s", cidrRangeString), func() { + checkSecondaryENISubnets(true) + }) + + Context("missing ec2:DescribeSubnets permission,", func() { + var role string + var EKSCNIPolicyV4ARN string + BeforeEach(func() { + By("getting the iam role") + podList, err := f.K8sResourceManagers.PodManager().GetPodsWithLabelSelector(AwsNodeLabelKey, utils.AwsNodeName) + Expect(err).ToNot(HaveOccurred()) + for _, env := range podList.Items[0].Spec.Containers[0].Env { + if env.Name == "AWS_ROLE_ARN" { + role = strings.Split(env.Value, "/")[1] + } + } + if role == "" { // get the node instance role + By("getting the node instance role") + instanceProfileRoleName := strings.Split(*primaryInstance.IamInstanceProfile.Arn, "instance-profile/")[1] + instanceProfileOutput, err := f.CloudServices.IAM().GetInstanceProfile(instanceProfileRoleName) + Expect(err).ToNot(HaveOccurred()) + role = *instanceProfileOutput.InstanceProfile.Roles[0].RoleName + } + err = f.CloudServices.IAM().DetachRolePolicy(EKSCNIPolicyARN, role) + Expect(err).ToNot(HaveOccurred()) + + eksCNIPolicyV4Path := utils.GetProjectRoot() + EKSCNIPolicyV4 + eksCNIPolicyV4Bytes, err := os.ReadFile(eksCNIPolicyV4Path) + Expect(err).ToNot(HaveOccurred()) + + eksCNIPolicyV4Data := string(eksCNIPolicyV4Bytes) + + By("Creating and attaching policy AmazonEKS_CNI_Policy_V4") + output, err := f.CloudServices.IAM().CreatePolicy("AmazonEKS_CNI_Policy_V4", eksCNIPolicyV4Data) + Expect(err).ToNot(HaveOccurred()) + EKSCNIPolicyV4ARN = *output.Policy.Arn + err = f.CloudServices.IAM().AttachRolePolicy(EKSCNIPolicyV4ARN, role) + Expect(err).ToNot(HaveOccurred()) + + // Sleep to allow time for CNI policy reattachment + time.Sleep(10 * time.Second) + + RestartAwsNodePods() + }) + + AfterEach(func() { + By("attaching VPC_CNI policy") + err = f.CloudServices.IAM().AttachRolePolicy(EKSCNIPolicyARN, role) + Expect(err).ToNot(HaveOccurred()) + + By("Detaching and deleting policy AmazonEKS_CNI_Policy_V4") + err = f.CloudServices.IAM().DetachRolePolicy(EKSCNIPolicyV4ARN, role) + Expect(err).ToNot(HaveOccurred()) + + err = f.CloudServices.IAM().DeletePolicy(EKSCNIPolicyV4ARN) + Expect(err).ToNot(HaveOccurred()) + + // Sleep to allow time for CNI policy detachment + time.Sleep(10 * time.Second) + + RestartAwsNodePods() + }) + It("should have same subnet as primary ENI", func() { + checkSecondaryENISubnets(false) + }) + }) + }) + Context("using a subnet tagged with kubernetes.io/role/cn", func() { + BeforeEach(func() { + By("Tagging kubernetes.io/role/cn to subnet") + _, err = f.CloudServices.EC2(). + CreateTags( + []string{createdSubnet}, + []*ec2.Tag{ + { + Key: aws.String("kubernetes.io/role/cn"), + Value: aws.String("1"), + }, + }, + ) + Expect(err).ToNot(HaveOccurred()) + }) + AfterEach(func() { + By("Untagging kubernetes.io/role/cn from subnet") + _, err = f.CloudServices.EC2(). + DeleteTags( + []string{createdSubnet}, + []*ec2.Tag{ + { + Key: aws.String("kubernetes.io/role/cn"), + Value: aws.String("1"), + }, + }, + ) + Expect(err).ToNot(HaveOccurred()) + }) + + It("should have same subnet as primary ENI", func() { + checkSecondaryENISubnets(false) + }) + }) + }) + + Context("when subnet discovery is disabled", func() { + BeforeEach(func() { + k8sUtils.AddEnvVarToDaemonSetAndWaitTillUpdated(f, utils.AwsNodeName, utils.AwsNodeNamespace, utils.AwsNodeName, map[string]string{ + "ENABLE_SUBNET_DISCOVERY": "false", + }) + // After updating daemonset pod, we must wait until conflist is updated so that container-runtime calls CNI ADD with the latest VETH prefix and MTU. + // Otherwise, the stale value can cause failures in future test cases. + time.Sleep(utils.PollIntervalMedium) + }) + AfterEach(func() { + // Set ENABLE_SUBNET_DISCOVERY back to true as this is the default behavior going forward. + k8sUtils.AddEnvVarToDaemonSetAndWaitTillUpdated(f, utils.AwsNodeName, utils.AwsNodeNamespace, utils.AwsNodeName, map[string]string{ + "ENABLE_SUBNET_DISCOVERY": "true", + }) + // After updating daemonset pod, we must wait until conflist is updated so that container-runtime calls CNI ADD with the latest VETH prefix and MTU. + // Otherwise, the stale value can cause failures in future test cases. + time.Sleep(utils.PollIntervalMedium) + }) + Context("using a subnet tagged with kubernetes.io/role/cni", func() { + BeforeEach(func() { + By("Tagging kubernetes.io/role/cni to subnet") + _, err = f.CloudServices.EC2(). + CreateTags( + []string{createdSubnet}, + []*ec2.Tag{ + { + Key: aws.String("kubernetes.io/role/cni"), + Value: aws.String("1"), + }, + }, + ) + Expect(err).ToNot(HaveOccurred()) + }) + AfterEach(func() { + By("Untagging kubernetes.io/role/cni from subnet") + _, err = f.CloudServices.EC2(). + DeleteTags( + []string{createdSubnet}, + []*ec2.Tag{ + { + Key: aws.String("kubernetes.io/role/cni"), + Value: aws.String("1"), + }, + }, + ) + Expect(err).ToNot(HaveOccurred()) + }) + It("should have the same subnets as the primary ENI", func() { + checkSecondaryENISubnets(false) + }) + }) + }) + }) +}) + +func checkSecondaryENISubnets(expectNewCidr bool) { + instance, err := f.CloudServices.EC2().DescribeInstance(*primaryInstance.InstanceId) + Expect(err).ToNot(HaveOccurred()) + + By("retrieving secondary ENIs") + for _, nwInterface := range instance.NetworkInterfaces { + primaryENI := common.IsPrimaryENI(nwInterface, instance.PrivateIpAddress) + if !primaryENI { + newEniSubnetIds = append(newEniSubnetIds, *nwInterface.SubnetId) + } + } + + By("verifying at least one new Secondary ENI is created") + Expect(len(newEniSubnetIds)).Should(BeNumerically(">", 0)) + + vpcOutput, err := f.CloudServices.EC2().DescribeVPC(*primaryInstance.VpcId) + Expect(err).ToNot(HaveOccurred()) + + expectedCidrRangeString := *vpcOutput.Vpcs[0].CidrBlock + expectedCidrSplit := strings.Split(*vpcOutput.Vpcs[0].CidrBlock, "/") + expectedSuffix, _ := strconv.Atoi(expectedCidrSplit[1]) + _, expectedCIDR, _ := net.ParseCIDR(*vpcOutput.Vpcs[0].CidrBlock) + + if expectNewCidr { + expectedCidrRangeString = cidrRangeString + expectedCidrSplit = strings.Split(cidrRangeString, "/") + expectedSuffix, _ = strconv.Atoi(expectedCidrSplit[1]) + _, expectedCIDR, _ = net.ParseCIDR(cidrRangeString) + } + + By(fmt.Sprintf("checking the secondary ENI subnets are in the CIDR %s", expectedCidrRangeString)) + for _, subnetID := range newEniSubnetIds { + subnetOutput, err := f.CloudServices.EC2().DescribeSubnet(subnetID) + Expect(err).ToNot(HaveOccurred()) + cidrSplit := strings.Split(*subnetOutput.Subnets[0].CidrBlock, "/") + actualSubnetIp, _, _ := net.ParseCIDR(*subnetOutput.Subnets[0].CidrBlock) + Expect(expectedCIDR.Contains(actualSubnetIp)) + suffix, _ := strconv.Atoi(cidrSplit[1]) + Expect(suffix).Should(BeNumerically(">=", expectedSuffix)) + } +} + +func RestartAwsNodePods() { + By("Restarting aws-node pods") + podList, err := f.K8sResourceManagers.PodManager().GetPodsWithLabelSelector(AwsNodeLabelKey, utils.AwsNodeName) + Expect(err).ToNot(HaveOccurred()) + for _, pod := range podList.Items { + f.K8sResourceManagers.PodManager().DeleteAndWaitTillPodDeleted(&pod) + } +} diff --git a/testdata/amazon-eks-cni-policy-v4.json b/testdata/amazon-eks-cni-policy-v4.json new file mode 100644 index 0000000000..42389648ad --- /dev/null +++ b/testdata/amazon-eks-cni-policy-v4.json @@ -0,0 +1,31 @@ +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "ec2:AssignPrivateIpAddresses", + "ec2:AttachNetworkInterface", + "ec2:CreateNetworkInterface", + "ec2:DeleteNetworkInterface", + "ec2:DescribeInstances", + "ec2:DescribeTags", + "ec2:DescribeNetworkInterfaces", + "ec2:DescribeInstanceTypes", + "ec2:DetachNetworkInterface", + "ec2:ModifyNetworkInterfaceAttribute", + "ec2:UnassignPrivateIpAddresses" + ], + "Resource": "*" + }, + { + "Effect": "Allow", + "Action": [ + "ec2:CreateTags" + ], + "Resource": [ + "arn:aws:ec2:*:*:network-interface/*" + ] + } + ] +} \ No newline at end of file