Merge pull request #16099 from spotinst/feat/add_multi_arch_base127_r…

…ebase Spotinst: Feature/add multi arch in VNG and resource tag specification
kubernetes · Nov 23, 2023 · 7c17b16 · 7c17b16
2 parents 634423f + 30813fe
commit 7c17b16
Show file tree

Hide file tree

Showing 23 changed files with 2,437 additions and 141 deletions.
diff --git a/docs/getting_started/spot-ocean.md b/docs/getting_started/spot-ocean.md
@@ -161,30 +161,31 @@ metadata:
 
 | Label | Description | Default |
 |---|---|---|
-| `spotinst.io/spot-percentage` | Specify the percentage of Spot instances that should spin up from the target capacity. | `100` |
-| `spotinst.io/utilize-reserved-instances` | Specify whether reserved instances should be utilized. | `true` |
-| `spotinst.io/utilize-commitments` | Specify whether reserved instance commitments should be utilized. | none |
-| `spotinst.io/fallback-to-ondemand` | Specify whether fallback to on-demand instances should be enabled. | `true` |
+| `spotinst.io/spot-percentage` | Specify the percentage of Spot instances that should spin up from the target capacity.                                    | `100` |
+| `spotinst.io/utilize-reserved-instances` | Specify whether reserved instances should be utilized.                                                                    | `true` |
+| `spotinst.io/utilize-commitments` | Specify whether reserved instance commitments should be utilized.                                                         | none |
+| `spotinst.io/fallback-to-ondemand` | Specify whether fallback to on-demand instances should be enabled.                                                        | `true` |
 | `spotinst.io/draining-timeout` | Specify a period of time, in seconds, after a node is marked for termination during which on running pods remains active. | none |
-| `spotinst.io/grace-period` | Specify a period of time, in seconds, that Ocean should wait before applying instance health checks. | none |
-| `spotinst.io/ocean-default-launchspec` | Specify whether to use the InstanceGroup's spec as the default Launch Spec for the Ocean cluster. | none |
-| `spotinst.io/ocean-instance-types-whitelist` | Specify whether to whitelist specific instance types. | none |
-| `spotinst.io/ocean-instance-types-blacklist` | Specify whether to blacklist specific instance types. | none |
-| `spotinst.io/ocean-instance-types` | Specify a list of instance types that should be used by the Ocean Launch Spec. | none |
-| `spotinst.io/autoscaler-disabled` | Specify whether the auto scaler should be disabled. | `false` |
-| `spotinst.io/autoscaler-default-node-labels` | Specify whether default node labels should be set for the auto scaler. | `false` |
-| `spotinst.io/autoscaler-auto-config` | Specify whether headroom resources should be automatically configured and optimized. | `true` |
-| `spotinst.io/autoscaler-auto-headroom-percentage` | Specify the auto headroom percentage (a number in the range [0, 200]) which controls the percentage of headroom. | none |
-| `spotinst.io/autoscaler-headroom-cpu-per-unit` | Specify the number of CPUs to allocate for headroom. CPUs are denoted in millicores, where 1000 millicores = 1 vCPU. | none |
-| `spotinst.io/autoscaler-headroom-gpu-per-unit` | Specify the number of GPUs to allocate for headroom. | none |
-| `spotinst.io/autoscaler-headroom-mem-per-unit` | Specify the amount of memory (MB) to allocate for headroom. | none |
-| `spotinst.io/autoscaler-headroom-num-of-units` | Specify the number of units to retain as headroom, where each unit has the defined CPU and memory. | none |
-| `spotinst.io/autoscaler-cooldown` | Specify a period of time, in seconds, that Ocean should wait between scaling actions. | `300` |
-| `spotinst.io/autoscaler-scale-down-max-percentage` | Specify the maximum scale down percentage. | none |
-| `spotinst.io/autoscaler-scale-down-evaluation-periods` | Specify the number of evaluation periods that should accumulate before a scale down action takes place. | `5` |
-| `spotinst.io/autoscaler-resource-limits-max-vcpu` | Specify the maximum number of virtual CPUs that can be allocated to the cluster. | none |
-| `spotinst.io/autoscaler-resource-limits-max-memory` | Specify the maximum amount of total physical memory (in GiB units) that can be allocated to the cluster. | none |
-| `spotinst.io/restrict-scale-down` | Specify whether the scale-down activities should be restricted. | none |
+| `spotinst.io/grace-period` | Specify a period of time, in seconds, that Ocean should wait before applying instance health checks.                      | none |
+| `spotinst.io/ocean-default-launchspec` | Specify whether to use the InstanceGroup's spec as the default Launch Spec for the Ocean cluster.                         | none |
+| `spotinst.io/ocean-instance-types-whitelist` | Specify whether to whitelist specific instance types.                                                                     | none |
+| `spotinst.io/ocean-instance-types-blacklist` | Specify whether to blacklist specific instance types.                                                                     | none |
+| `spotinst.io/ocean-instance-types` | Specify a list of instance types that should be used by the Ocean Launch Spec.                                            | none |
+| `spotinst.io/autoscaler-disabled` | Specify whether the auto scaler should be disabled.                                                                       | `false` |
+| `spotinst.io/autoscaler-default-node-labels` | Specify whether default node labels should be set for the auto scaler.                                                    | `false` |
+| `spotinst.io/autoscaler-auto-config` | Specify whether headroom resources should be automatically configured and optimized.                                      | `true` |
+| `spotinst.io/autoscaler-auto-headroom-percentage` | Specify the auto headroom percentage (a number in the range [0, 200]) which controls the percentage of headroom.          | none |
+| `spotinst.io/autoscaler-headroom-cpu-per-unit` | Specify the number of CPUs to allocate for headroom. CPUs are denoted in millicores, where 1000 millicores = 1 vCPU.      | none |
+| `spotinst.io/autoscaler-headroom-gpu-per-unit` | Specify the number of GPUs to allocate for headroom.                                                                      | none |
+| `spotinst.io/autoscaler-headroom-mem-per-unit` | Specify the amount of memory (MB) to allocate for headroom.                                                               | none |
+| `spotinst.io/autoscaler-headroom-num-of-units` | Specify the number of units to retain as headroom, where each unit has the defined CPU and memory.                        | none |
+| `spotinst.io/autoscaler-cooldown` | Specify a period of time, in seconds, that Ocean should wait between scaling actions.                                     | `300` |
+| `spotinst.io/autoscaler-scale-down-max-percentage` | Specify the maximum scale down percentage.                                                                                | none |
+| `spotinst.io/autoscaler-scale-down-evaluation-periods` | Specify the number of evaluation periods that should accumulate before a scale down action takes place.                   | `5` |
+| `spotinst.io/autoscaler-resource-limits-max-vcpu` | Specify the maximum number of virtual CPUs that can be allocated to the cluster.                                          | none |
+| `spotinst.io/autoscaler-resource-limits-max-memory` | Specify the maximum amount of total physical memory (in GiB units) that can be allocated to the cluster.                  | none |
+| `spotinst.io/restrict-scale-down` | Specify whether the scale-down activities should be restricted.                                                           | none |
+| `spotinst.io/other-architecture-images` | Specify other architecture images. | none |
 
 ## Cluster Metadata Labels
 ```yaml
@@ -198,15 +199,16 @@ metadata:
   labels:
     spotinst.io/strategy-cluster-spread-nodes-by: "count"
     spotinst.io/strategy-cluster-orientation-availability-vs-cost: "balanced"    
+    spotinst.io/resource-tag-specification-volumes: "true"
   ...
 ```
 
 
-| Label | Description                                                                        | Default |
-|---|---|---|
-| `spotinst.io/strategy-cluster-spread-nodes-by` | Specify how Ocean will spread the nodes across markets by this value [vcpu,count]. | `count` |
-| `spotinst.io/strategy-cluster-orientation-availability-vs-cost` | Specify approach [cost,balanced,cheapest] that Ocean takes while launching nodes.  | `balanced` |
-
+|| Label | Description                                                                            | Default |
+|---|----------------------------------------------------------------------------------------|---|
+| `spotinst.io/strategy-cluster-spread-nodes-by` | Specify how Ocean will spread the nodes across markets by this value [vcpu,count].     | `count` |
+| `spotinst.io/strategy-cluster-orientation-availability-vs-cost` | Specify approach [cost,balanced,cheapest] that Ocean takes while launching nodes.      | `balanced` |
+| `spotinst.io/resource-tag-specification-volumes` | Specify if Volume resources will be tagged with Virtual Node Group tags or Ocean tags. | `false` |
 ## Documentation
 
 If you're new to [Spot](https://spot.io/) and want to get started, please checkout our [Getting Started](https://docs.spot.io/connect-your-cloud-provider/) guide, available on the [Spot Documentation](https://docs.spot.io/) website.

diff --git a/go.mod b/go.mod
@@ -38,7 +38,7 @@ require (
 	github.com/spf13/cobra v1.8.0
 	github.com/spf13/pflag v1.0.5
 	github.com/spf13/viper v1.17.0
-	github.com/spotinst/spotinst-sdk-go v1.145.0
+	github.com/spotinst/spotinst-sdk-go v1.171.0
 	github.com/stretchr/testify v1.8.4
 	github.com/weaveworks/mesh v0.0.0-20191105120815-58dbcc3e8e63
 	go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.45.0

diff --git a/go.sum b/go.sum
@@ -685,8 +685,8 @@ github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
 github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
 github.com/spf13/viper v1.17.0 h1:I5txKw7MJasPL/BrfkbA0Jyo/oELqVmux4pR/UxOMfI=
 github.com/spf13/viper v1.17.0/go.mod h1:BmMMMLQXSbcHK6KAOiFLz0l5JHrU89OdIRHvsk0+yVI=
-github.com/spotinst/spotinst-sdk-go v1.145.0 h1:c/PufzKMbjmqSYcVHr+TuNpcZ6x5+jZALVGTesbJ7q4=
-github.com/spotinst/spotinst-sdk-go v1.145.0/go.mod h1:Ku9c4p+kRWnQqmXkzGcTMHLcQKgLHrQZISxeKY7mPqE=
+github.com/spotinst/spotinst-sdk-go v1.171.0 h1:ZihMPEjkpIkSpawWLJt9RtCRY4mOQMGlfrkVmA03000=
+github.com/spotinst/spotinst-sdk-go v1.171.0/go.mod h1:Ku9c4p+kRWnQqmXkzGcTMHLcQKgLHrQZISxeKY7mPqE=
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
 github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
 github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=

diff --git a/pkg/model/awsmodel/spotinst.go b/pkg/model/awsmodel/spotinst.go
@@ -114,6 +114,10 @@ const (
 	// instance group to specify the cooldown period (in seconds) for scaling actions.
 	SpotInstanceGroupLabelAutoScalerCooldown = "spotinst.io/autoscaler-cooldown"
 
+	// SpotInstanceGroupLabelOtherArchitectureImages  Identifier of other architecture image in AWS.
+	//For each architecture type (amd64, arm64) only one AMI is allowed,first image is from  config.InstanceGroup.spec.image
+	SpotInstanceGroupLabelOtherArchitectureImages = "spotinst.io/other-architecture-images"
+
 	// SpotInstanceGroupLabelAutoScalerScaleDown* are the metadata labels used on the
 	// instance group to specify the scale down configuration used by the auto scaler.
 	SpotInstanceGroupLabelAutoScalerScaleDownMaxPercentage     = "spotinst.io/autoscaler-scale-down-max-percentage"
@@ -135,6 +139,10 @@ const (
 	// SpotClusterLabelStrategyClusterOrientationAvailabilityVsCost is the metadata label used on the
 	// instance group to specify how to optimize towards  continuity and/or cost-effective infrastructure
 	SpotClusterLabelStrategyClusterOrientationAvailabilityVsCost = "spotinst.io/strategy-cluster-orientation-availability-vs-cost"
+
+	// SpotClusterLabelResourceTagSpecificationVolumes
+	// Specify if Volume resources will be tagged with Virtual Node Group tags or Ocean tags.
+	SpotClusterLabelResourceTagSpecificationVolumes = "spotinst.io/resource-tag-specification-volumes"
 )
 
 // SpotInstanceGroupModelBuilder configures SpotInstanceGroup objects
@@ -387,6 +395,11 @@ func (b *SpotInstanceGroupModelBuilder) buildOcean(c *fi.CloudupModelBuilderCont
 			ocean.SpreadNodesBy = fi.PtrTo(v)
 		case SpotClusterLabelStrategyClusterOrientationAvailabilityVsCost:
 			ocean.AvailabilityVsCost = fi.PtrTo(string(spotinsttasks.NormalizeClusterOrientation(&v)))
+		case SpotClusterLabelResourceTagSpecificationVolumes:
+			ocean.ResourceTagSpecificationVolumes, err = parseBool(v)
+			if err != nil {
+				return err
+			}
 		}
 	}
 
@@ -524,7 +537,7 @@ func (b *SpotInstanceGroupModelBuilder) buildOcean(c *fi.CloudupModelBuilderCont
 
 	klog.V(4).Infof("Adding task: Ocean/%s", fi.ValueOf(ocean.Name))
 	c.AddTask(ocean)
-
+	klog.V(4).Infof("Finish task: Ocean/%s", fi.ValueOf(ocean.Name))
 	return nil
 }
 
@@ -558,6 +571,12 @@ func (b *SpotInstanceGroupModelBuilder) buildLaunchSpec(c *fi.CloudupModelBuilde
 			if err != nil {
 				return err
 			}
+
+		case SpotInstanceGroupLabelOtherArchitectureImages:
+			launchSpec.OtherArchitectureImages, err = parseStringSlice(v)
+			if err != nil {
+				return err
+			}
 		}
 	}
 

diff --git a/upup/pkg/fi/cloudup/spotinsttasks/launch_spec.go b/upup/pkg/fi/cloudup/spotinsttasks/launch_spec.go
@@ -55,6 +55,7 @@ type LaunchSpec struct {
 	MinSize                  *int64
 	MaxSize                  *int64
 	InstanceMetadataOptions  *InstanceMetadataOptions
+	OtherArchitectureImages  []string
 
 	Ocean *Ocean
 }
@@ -157,8 +158,12 @@ func (o *LaunchSpec) Find(c *fi.CloudupContext) (*LaunchSpec, error) {
 
 	// Image.
 	{
+		//		convert spec from api that reply for multi arch data only in spec.images
+		if spec.Images != nil && len(spec.Images) > 1 {
+			spec.SetImageId(fi.PtrTo(fi.ValueOf(spec.Images[0].ImageId)))
+			actual.OtherArchitectureImages = append(actual.OtherArchitectureImages, fi.ValueOf(spec.Images[1].ImageId))
+		}
 		actual.ImageID = spec.ImageID
-
 		if o.ImageID != nil && actual.ImageID != nil &&
 			fi.ValueOf(actual.ImageID) != fi.ValueOf(o.ImageID) {
 			image, err := resolveImage(cloud, fi.ValueOf(o.ImageID))
@@ -169,6 +174,16 @@ func (o *LaunchSpec) Find(c *fi.CloudupContext) (*LaunchSpec, error) {
 				actual.ImageID = o.ImageID
 			}
 		}
+		if o.OtherArchitectureImages != nil && actual.OtherArchitectureImages != nil &&
+			(actual.OtherArchitectureImages[0] != o.OtherArchitectureImages[0]) {
+			image, err := resolveImage(cloud, o.OtherArchitectureImages[0])
+			if err != nil {
+				return nil, err
+			}
+			if fi.ValueOf(image.ImageId) == actual.OtherArchitectureImages[0] {
+				actual.OtherArchitectureImages[0] = o.OtherArchitectureImages[0]
+			}
+		}
 	}
 
 	// User data.
@@ -395,12 +410,21 @@ func (_ *LaunchSpec) create(cloud awsup.AWSCloud, a, e, changes *LaunchSpec) err
 
 	// Image.
 	{
-		if e.ImageID != nil {
+		if e.ImageID != nil && len(e.OtherArchitectureImages) == 0 { //old api
 			image, err := resolveImage(cloud, fi.ValueOf(e.ImageID))
 			if err != nil {
 				return err
 			}
 			spec.SetImageId(image.ImageId)
+		} else {
+			if e.ImageID != nil && len(e.OtherArchitectureImages) == 1 {
+				images, err := buildImages(cloud, e.ImageID, e.OtherArchitectureImages)
+				if err != nil {
+					return err
+				}
+				spec.SetImageId(nil)
+				spec.SetImages(images)
+			}
 		}
 	}
 
@@ -610,7 +634,7 @@ func (_ *LaunchSpec) update(cloud awsup.AWSCloud, a, e, changes *LaunchSpec) err
 
 	// Image.
 	{
-		if changes.ImageID != nil {
+		if changes.ImageID != nil { //old api
 			image, err := resolveImage(cloud, fi.ValueOf(e.ImageID))
 			if err != nil {
 				return err
@@ -619,10 +643,21 @@ func (_ *LaunchSpec) update(cloud awsup.AWSCloud, a, e, changes *LaunchSpec) err
 			if fi.ValueOf(actual.ImageID) != fi.ValueOf(image.ImageId) {
 				spec.SetImageId(image.ImageId)
 			}
-
 			changes.ImageID = nil
 			changed = true
 		}
+		if changes.OtherArchitectureImages != nil {
+			images, err := buildImages(cloud, spec.ImageID, e.OtherArchitectureImages)
+			if err != nil {
+				return err
+			}
+			spec.SetImageId(nil)
+			spec.SetImages(images)
+			changes.OtherArchitectureImages = nil
+			changed = true
+
+		}
+
 	}
 
 	// User data.
@@ -1139,3 +1174,26 @@ func (o *LaunchSpec) convertBlockDeviceMapping(in *awstasks.BlockDeviceMapping)
 
 	return out
 }
+func buildImages(cloud awsup.AWSCloud, ImageID *string, OtherArchitectureImages []string) ([]*aws.Images, error) {
+	var imagesSlice []*aws.Images
+	var imageIndex = 0
+	if ImageID != nil {
+		image, err := resolveImage(cloud, fi.ValueOf(ImageID))
+		if err != nil {
+			return nil, err
+		}
+		imagesSlice = append(imagesSlice, &aws.Images{})
+		imagesSlice[imageIndex].SetImageId(image.ImageId)
+		imageIndex++
+	}
+	if len(OtherArchitectureImages) == 1 {
+		image2, err := resolveImage(cloud, OtherArchitectureImages[0])
+		if err != nil {
+			return nil, err
+		}
+		imagesSlice = append(imagesSlice, &aws.Images{})
+		imagesSlice[imageIndex].SetImageId(image2.ImageId)
+	}
+
+	return imagesSlice, nil
+}