From 3513105ae441279231d7a6c6decb28855c9c2170 Mon Sep 17 00:00:00 2001 From: rho song <13165182+rhoboat@users.noreply.github.com> Date: Fri, 23 Sep 2022 17:32:20 -0700 Subject: [PATCH] Add utilities for ecs. --- awscommons/v2/ecs.go | 201 ++++++++++++++++++++++++++++++++++++++++ awscommons/v2/errors.go | 34 +++++++ go.mod | 9 +- go.sum | 10 ++ logging/logging.go | 8 +- 5 files changed, 257 insertions(+), 5 deletions(-) create mode 100644 awscommons/v2/ecs.go create mode 100644 awscommons/v2/errors.go diff --git a/awscommons/v2/ecs.go b/awscommons/v2/ecs.go new file mode 100644 index 0000000..a45837d --- /dev/null +++ b/awscommons/v2/ecs.go @@ -0,0 +1,201 @@ +package awscommons + +import ( + "fmt" + "math" + "strings" + "time" + + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/service/ecs" + ecsTypes "github.com/aws/aws-sdk-go-v2/service/ecs/types" + "github.com/gruntwork-io/go-commons/collections" + "github.com/gruntwork-io/go-commons/errors" + "github.com/gruntwork-io/go-commons/logging" + "github.com/gruntwork-io/go-commons/retry" +) + +// GetContainerInstanceArns gets the container instance ARNs of all the EC2 instances in an ECS Cluster. +// ECS container instance ARNs are different from EC2 instance IDs! +// An ECS container instance is an EC2 instance that runs the ECS container agent and has been registered into +// an ECS cluster. +// Example identifiers: +// - EC2 instance ID: i-08e8cfc073db135a9 +// - container instance ID: 2db66342-5f69-4782-89a3-f9b707f979ab +// - container instance ARN: arn:aws:ecs:us-east-1:012345678910:container-instance/2db66342-5f69-4782-89a3-f9b707f979ab +func GetContainerInstanceArns(opts *Options, clusterName string) ([]string, error) { + client, err := NewECSClient(opts) + if err != nil { + return nil, err + } + + logger := logging.GetProjectLogger() + logger.Infof("Looking up Container Instance ARNs for ECS cluster %s", clusterName) + + input := &ecs.ListContainerInstancesInput{Cluster: aws.String(clusterName)} + arns := []string{} + // Handle pagination by repeatedly making the API call while there is a next token set. + for { + result, err := client.ListContainerInstances(opts.Context, input) + if err != nil { + return nil, errors.WithStackTrace(err) + } + arns = append(arns, result.ContainerInstanceArns...) + if result.NextToken == nil { + break + } + input.NextToken = result.NextToken + } + + return arns, nil +} + +// StartDrainingContainerInstances puts ECS container instances in DRAINING state so that all ECS Tasks running on +// them are migrated to other container instances. Batches into chunks of 10 because of AWS API limitations. +// (An error occurred InvalidParameterException when calling the UpdateContainerInstancesState +// operation: instanceIds can have at most 10 items.) +func StartDrainingContainerInstances(opts *Options, clusterName string, containerInstanceArns []string) error { + client, err := NewECSClient(opts) + if err != nil { + return err + } + + logger := logging.GetProjectLogger() + batchSize := 10 + numBatches := int(math.Ceil(float64(len(containerInstanceArns) / batchSize))) + + errList := NewMultipleDrainContainerInstanceErrors() + for batchIdx, batchedArnList := range collections.BatchListIntoGroupsOf(containerInstanceArns, batchSize) { + batchedArns := aws.StringSlice(batchedArnList) + + logger.Infof("Putting batch %d/%d of container instances in cluster %s into DRAINING state", batchIdx, numBatches, clusterName) + input := &ecs.UpdateContainerInstancesStateInput{ + Cluster: aws.String(clusterName), + ContainerInstances: aws.ToStringSlice(batchedArns), + Status: "DRAINING", + } + _, err := client.UpdateContainerInstancesState(opts.Context, input) + if err != nil { + errList.AddError(err) + logger.Errorf("Encountered error starting to drain container instances in batch %d: %s", batchIdx, err) + logger.Errorf("Container Instance ARNs: %s", strings.Join(batchedArnList, ",")) + continue + } + + logger.Infof("Started draining %d container instances from batch %d", len(batchedArnList), batchIdx) + } + + if !errList.IsEmpty() { + return errors.WithStackTrace(errList) + } + logger.Infof("Successfully started draining all %d container instances", len(containerInstanceArns)) + return nil +} + +// WaitForContainerInstancesToDrain waits until there are no more ECS Tasks running on any of the ECS container +// instances. Batches container instances in groups of 100 because of AWS API limitations. +func WaitForContainerInstancesToDrain(opts *Options, clusterName string, containerInstanceArns []string, start time.Time, timeout time.Duration, maxRetries int, sleepBetweenRetries time.Duration) error { + client, err := NewECSClient(opts) + if err != nil { + return err + } + + logger := logging.GetProjectLogger() + logger.Infof("Checking if all ECS Tasks have been drained from the ECS Container Instances in Cluster %s.", clusterName) + + batchSize := 100 + numBatches := int(math.Ceil(float64(len(containerInstanceArns) / batchSize))) + + err = retry.DoWithRetry( + logger.Logger, + "Wait for Container Instances to be Drained", + maxRetries, sleepBetweenRetries, + func() error { + responses := []*ecs.DescribeContainerInstancesOutput{} + for batchIdx, batchedArnList := range collections.BatchListIntoGroupsOf(containerInstanceArns, batchSize) { + batchedArns := aws.StringSlice(batchedArnList) + + logger.Infof("Fetching description of batch %d/%d of ECS Instances in Cluster %s.", batchIdx, numBatches, clusterName) + input := &ecs.DescribeContainerInstancesInput{ + Cluster: aws.String(clusterName), + ContainerInstances: aws.ToStringSlice(batchedArns), + } + result, err := client.DescribeContainerInstances(opts.Context, input) + if err != nil { + return errors.WithStackTrace(err) + } + responses = append(responses, result) + } + + // If we exceeded the timeout, halt with error. + if timeoutExceeded(start, timeout) { + return retry.FatalError{Underlying: fmt.Errorf("maximum drain timeout of %s seconds has elapsed and instances are still draining", timeout)} + } + + // Yay, all done. + if drained, _ := allInstancesFullyDrained(responses); drained == true { + logger.Infof("All container instances have been drained in Cluster %s!", clusterName) + return nil + } + + // If there's no error, retry. + if err == nil { + return errors.WithStackTrace(fmt.Errorf("container instances still draining")) + } + + // Else, there's an error, halt and fail. + return retry.FatalError{Underlying: err} + }) + return errors.WithStackTrace(err) +} + +// timeoutExceeded returns true if the amount of time since start has exceeded the timeout. +func timeoutExceeded(start time.Time, timeout time.Duration) bool { + timeElapsed := time.Now().Sub(start) + return timeElapsed > timeout +} + +// NewECSClient returns a new AWS SDK client for interacting with AWS ECS. +func NewECSClient(opts *Options) (*ecs.Client, error) { + cfg, err := NewDefaultConfig(opts) + if err != nil { + return nil, errors.WithStackTrace(err) + } + return ecs.NewFromConfig(cfg), nil +} + +func allInstancesFullyDrained(responses []*ecs.DescribeContainerInstancesOutput) (bool, error) { + for _, response := range responses { + instances := response.ContainerInstances + if len(instances) == 0 { + return false, errors.WithStackTrace(fmt.Errorf("querying DescribeContainerInstances returned no instances")) + } + + for _, instance := range instances { + if !instanceFullyDrained(instance) { + return false, nil + } + } + } + return true, nil +} + +func instanceFullyDrained(instance ecsTypes.ContainerInstance) bool { + logger := logging.GetProjectLogger() + instanceArn := instance.ContainerInstanceArn + + if *instance.Status == "ACTIVE" { + logger.Infof("The ECS Container Instance %s is still in ACTIVE status", *instanceArn) + return false + } + if instance.PendingTasksCount > 0 { + logger.Infof("The ECS Container Instance %s still has pending tasks", *instanceArn) + return false + } + if instance.RunningTasksCount > 0 { + logger.Infof("The ECS Container Instance %s still has running tasks", *instanceArn) + return false + } + + return true +} diff --git a/awscommons/v2/errors.go b/awscommons/v2/errors.go new file mode 100644 index 0000000..24984b6 --- /dev/null +++ b/awscommons/v2/errors.go @@ -0,0 +1,34 @@ +package awscommons + +import ( + "fmt" + "strings" +) + +// MultipleDrainContainerInstanceErrors represents multiple errors found while terminating instances +type MultipleDrainContainerInstanceErrors struct { + errors []error +} + +func (err MultipleDrainContainerInstanceErrors) Error() string { + messages := []string{ + fmt.Sprintf("%d errors found while draining container instances:", len(err.errors)), + } + + for _, individualErr := range err.errors { + messages = append(messages, individualErr.Error()) + } + return strings.Join(messages, "\n") +} + +func (err MultipleDrainContainerInstanceErrors) AddError(newErr error) { + err.errors = append(err.errors, newErr) +} + +func (err MultipleDrainContainerInstanceErrors) IsEmpty() bool { + return len(err.errors) == 0 +} + +func NewMultipleDrainContainerInstanceErrors() MultipleDrainContainerInstanceErrors { + return MultipleDrainContainerInstanceErrors{[]error{}} +} diff --git a/go.mod b/go.mod index 68f8237..b1fbc09 100644 --- a/go.mod +++ b/go.mod @@ -4,7 +4,7 @@ go 1.17 require ( github.com/aws/aws-sdk-go v1.44.48 - github.com/aws/aws-sdk-go-v2 v1.16.7 + github.com/aws/aws-sdk-go-v2 v1.16.16 github.com/aws/aws-sdk-go-v2/config v1.15.13 github.com/aws/aws-sdk-go-v2/service/ec2 v1.47.2 github.com/aws/aws-sdk-go-v2/service/s3 v1.27.1 @@ -32,17 +32,18 @@ require ( github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.4.3 // indirect github.com/aws/aws-sdk-go-v2/credentials v1.12.8 // indirect github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.12.8 // indirect - github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.14 // indirect - github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.8 // indirect + github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.23 // indirect + github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.17 // indirect github.com/aws/aws-sdk-go-v2/internal/ini v1.3.15 // indirect github.com/aws/aws-sdk-go-v2/internal/v4a v1.0.5 // indirect + github.com/aws/aws-sdk-go-v2/service/ecs v1.18.22 // indirect github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.9.3 // indirect github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.1.9 // indirect github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.8 // indirect github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.13.8 // indirect github.com/aws/aws-sdk-go-v2/service/sso v1.11.11 // indirect github.com/aws/aws-sdk-go-v2/service/sts v1.16.9 // indirect - github.com/aws/smithy-go v1.12.0 // indirect + github.com/aws/smithy-go v1.13.3 // indirect github.com/bgentry/go-netrc v0.0.0-20140422174119-9fd32a8b3d3d // indirect github.com/boombuler/barcode v1.0.1-0.20190219062509-6c824513bacc // indirect github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect diff --git a/go.sum b/go.sum index fd36c12..0c1fa16 100644 --- a/go.sum +++ b/go.sum @@ -66,6 +66,8 @@ github.com/aws/aws-sdk-go v1.44.48 h1:jLDC9RsNoYMLFlKpB8LdqUnoDdC2yvkS4QbuyPQJ8+ github.com/aws/aws-sdk-go v1.44.48/go.mod h1:y4AeaBuwd2Lk+GepC1E9v0qOiTws0MIWAX4oIKwKHZo= github.com/aws/aws-sdk-go-v2 v1.16.7 h1:zfBwXus3u14OszRxGcqCDS4MfMCv10e8SMJ2r8Xm0Ns= github.com/aws/aws-sdk-go-v2 v1.16.7/go.mod h1:6CpKuLXg2w7If3ABZCl/qZ6rEgwtjZTn4eAf4RcEyuw= +github.com/aws/aws-sdk-go-v2 v1.16.16 h1:M1fj4FE2lB4NzRb9Y0xdWsn2P0+2UHVxwKyOa4YJNjk= +github.com/aws/aws-sdk-go-v2 v1.16.16/go.mod h1:SwiyXi/1zTUZ6KIAmLK5V5ll8SiURNUYOqTerZPaF9k= github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.4.3 h1:S/ZBwevQkr7gv5YxONYpGQxlMFFYSRfz3RMcjsC9Qhk= github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.4.3/go.mod h1:gNsR5CaXKmQSSzrmGxmwmct/r+ZBfbxorAuXYsj/M5Y= github.com/aws/aws-sdk-go-v2/config v1.15.13 h1:CJH9zn/Enst7lDiGpoguVt0lZr5HcpNVlRJWbJ6qreo= @@ -76,14 +78,20 @@ github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.12.8 h1:VfBdn2AxwMbFyJN/lF/xuT3 github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.12.8/go.mod h1:oL1Q3KuCq1D4NykQnIvtRiBGLUXhcpY5pl6QZB2XEPU= github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.14 h1:2C0pYHcUBmdzPj+EKNC4qj97oK6yjrUhc1KoSodglvk= github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.14/go.mod h1:kdjrMwHwrC3+FsKhNcCMJ7tUVj/8uSD5CZXeQ4wV6fM= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.23 h1:s4g/wnzMf+qepSNgTvaQQHNxyMLKSawNhKCPNy++2xY= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.23/go.mod h1:2DFxAQ9pfIRy0imBCJv+vZ2X6RKxves6fbnEuSry6b4= github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.8 h1:2J+jdlBJWEmTyAwC82Ym68xCykIvnSnIN18b8xHGlcc= github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.8/go.mod h1:ZIV8GYoC6WLBW5KGs+o4rsc65/ozd+eQ0L31XF5VDwk= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.17 h1:/K482T5A3623WJgWT8w1yRAFK4RzGzEl7y39yhtn9eA= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.17/go.mod h1:pRwaTYCJemADaqCbUAxltMoHKata7hmB5PjEXeu0kfg= github.com/aws/aws-sdk-go-v2/internal/ini v1.3.15 h1:QquxR7NH3ULBsKC+NoTpilzbKKS+5AELfNREInbhvas= github.com/aws/aws-sdk-go-v2/internal/ini v1.3.15/go.mod h1:Tkrthp/0sNBShQQsamR7j/zY4p19tVTAs+nnqhH6R3c= github.com/aws/aws-sdk-go-v2/internal/v4a v1.0.5 h1:tEEHn+PGAxRVqMPEhtU8oCSW/1Ge3zP5nUgPrGQNUPs= github.com/aws/aws-sdk-go-v2/internal/v4a v1.0.5/go.mod h1:aIwFF3dUk95ocCcA3zfk3nhz0oLkpzHFWuMp8l/4nNs= github.com/aws/aws-sdk-go-v2/service/ec2 v1.47.2 h1:81hrDgbXHL44WdY6M/fHGXLlv17qTpOFzutXRVDEk3Y= github.com/aws/aws-sdk-go-v2/service/ec2 v1.47.2/go.mod h1:VoBcwURHnJVCWuXHdqVuG03i2lUlHJ5DTTqDSyCdEcc= +github.com/aws/aws-sdk-go-v2/service/ecs v1.18.22 h1:jBx029Z9GQIIq5fC5bW1ZMDsjihvmQQIe/QqdFl+7zY= +github.com/aws/aws-sdk-go-v2/service/ecs v1.18.22/go.mod h1:6bV2xEub6Vch19ZZASMbrNMNIpBPTwy64r9WIQ+wsSE= github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.9.3 h1:4n4KCtv5SUoT5Er5XV41huuzrCqepxlW3SDI9qHQebc= github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.9.3/go.mod h1:gkb2qADY+OHaGLKNTYxMaQNacfeyQpZ4csDTQMeFmcw= github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.1.9 h1:gVv2vXOMqJeR4ZHHV32K7LElIJIIzyw/RU1b0lSfWTQ= @@ -102,6 +110,8 @@ github.com/aws/aws-sdk-go-v2/service/sts v1.16.9 h1:yOfILxyjmtr2ubRkRJldlHDFBhf5 github.com/aws/aws-sdk-go-v2/service/sts v1.16.9/go.mod h1:O1IvkYxr+39hRf960Us6j0x1P8pDqhTX+oXM5kQNl/Y= github.com/aws/smithy-go v1.12.0 h1:gXpeZel/jPoWQ7OEmLIgCUnhkFftqNfwWUwAHSlp1v0= github.com/aws/smithy-go v1.12.0/go.mod h1:Tg+OJXh4MB2R/uN61Ko2f6hTZwB/ZYGOtib8J3gBHzA= +github.com/aws/smithy-go v1.13.3 h1:l7LYxGuzK6/K+NzJ2mC+VvLUbae0sL3bXU//04MkmnA= +github.com/aws/smithy-go v1.13.3/go.mod h1:Tg+OJXh4MB2R/uN61Ko2f6hTZwB/ZYGOtib8J3gBHzA= github.com/bgentry/go-netrc v0.0.0-20140422174119-9fd32a8b3d3d h1:xDfNPAt8lFiC1UJrqV3uuy861HCTo708pDMbjHHdCas= github.com/bgentry/go-netrc v0.0.0-20140422174119-9fd32a8b3d3d/go.mod h1:6QX/PXZ00z/TKoufEY6K/a0k6AhaJrQKdFe6OfVXsa4= github.com/bgentry/speakeasy v0.1.0 h1:ByYyxL9InA1OWqxJqqp2A5pYHUrCiAL6K3J+LKSsQkY= diff --git a/logging/logging.go b/logging/logging.go index 8c1d807..8ccb90e 100644 --- a/logging/logging.go +++ b/logging/logging.go @@ -9,7 +9,13 @@ import ( var globalLogLevel = logrus.InfoLevel var globalLogLevelLock = sync.Mutex{} -// Create a new logger with the given name +// GetProjectLogger creates a new project logger +func GetProjectLogger() *logrus.Entry { + logger := GetLogger("") + return logger.WithField("name", "go-commons") +} + +// GetLogger create a new logger with the given name func GetLogger(name string) *logrus.Logger { logger := logrus.New()