Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add utilities for ecs. #73

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions awscommons/v2/auth.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"github.com/aws/aws-sdk-go-v2/aws"
"github.com/aws/aws-sdk-go-v2/config"
"github.com/gruntwork-io/go-commons/errors"
"github.com/sirupsen/logrus"
)

const (
Expand All @@ -17,6 +18,8 @@ type Options struct {
Region string

Context context.Context

Logger *logrus.Entry
}

// NewOptions will create a new aws.Options struct that provides reasonable defaults for unspecified values.
Expand Down
219 changes: 219 additions & 0 deletions awscommons/v2/ecs.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
package awscommons

import (
"fmt"
"math"
"strings"
"time"

"github.com/aws/aws-sdk-go-v2/aws"
"github.com/aws/aws-sdk-go-v2/service/ecs"
ecs_types "github.com/aws/aws-sdk-go-v2/service/ecs/types"
"github.com/gruntwork-io/go-commons/collections"
"github.com/gruntwork-io/go-commons/errors"
"github.com/gruntwork-io/go-commons/retry"
)

// GetContainerInstanceArns gets the container instance ARNs of all the EC2 instances in an ECS Cluster.
// ECS container instance ARNs are different from EC2 instance IDs!
// An ECS container instance is an EC2 instance that runs the ECS container agent and has been registered into
// an ECS cluster.
// Example identifiers:
// - EC2 instance ID: i-08e8cfc073db135a9
// - container instance ID: 2db66342-5f69-4782-89a3-f9b707f979ab
// - container instance ARN: arn:aws:ecs:us-east-1:012345678910:container-instance/2db66342-5f69-4782-89a3-f9b707f979ab
func GetContainerInstanceArns(opts *Options, clusterName string) ([]string, error) {
client, err := NewECSClient(opts)
if err != nil {
return nil, err
}

if opts.Logger != nil {
opts.Logger.Debugf("Looking up Container Instance ARNs for ECS cluster %s", clusterName)
}

input := &ecs.ListContainerInstancesInput{Cluster: aws.String(clusterName)}
arns := []string{}
// Handle pagination by repeatedly making the API call while there is a next token set.
for {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NIT: consider adding a maximum to the loop iteration to avoid an infinite loop. Logically, the AWS API should prevent such an infinite loop, but it's always good to practice defensive coding to avoid unknown unknowns causing unexpected behaviors.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, that's true. I'll take care of this in the next review cycle, putting a todo here for now.

result, err := client.ListContainerInstances(opts.Context, input)
if err != nil {
return nil, errors.WithStackTrace(err)
}
arns = append(arns, result.ContainerInstanceArns...)
if result.NextToken == nil {
break
}
input.NextToken = result.NextToken
}

return arns, nil
}

// StartDrainingContainerInstances puts ECS container instances in DRAINING state so that all ECS Tasks running on
// them are migrated to other container instances. Batches into chunks of 10 because of AWS API limitations.
// (An error occurred InvalidParameterException when calling the UpdateContainerInstancesState
// operation: instanceIds can have at most 10 items.)
func StartDrainingContainerInstances(opts *Options, clusterName string, containerInstanceArns []string) error {
client, err := NewECSClient(opts)
if err != nil {
return err
}

batchSize := 10
numBatches := int(math.Ceil(float64(len(containerInstanceArns) / batchSize)))

errList := NewMultipleDrainContainerInstanceErrors()
for batchIdx, batchedArnList := range collections.BatchListIntoGroupsOf(containerInstanceArns, batchSize) {
batchedArns := aws.StringSlice(batchedArnList)

if opts.Logger != nil {
opts.Logger.Debugf("Putting batch %d/%d of container instances in cluster %s into DRAINING state", batchIdx, numBatches, clusterName)
}
input := &ecs.UpdateContainerInstancesStateInput{
Cluster: aws.String(clusterName),
ContainerInstances: aws.ToStringSlice(batchedArns),
Status: "DRAINING",
}
_, err := client.UpdateContainerInstancesState(opts.Context, input)
if err != nil {
errList.AddError(err)
if opts.Logger != nil {
opts.Logger.Errorf("Encountered error starting to drain container instances in batch %d: %s", batchIdx, err)
opts.Logger.Errorf("Container Instance ARNs: %s", strings.Join(batchedArnList, ","))
}
continue
}

if opts.Logger != nil {
opts.Logger.Debugf("Started draining %d container instances from batch %d", len(batchedArnList), batchIdx)
}
}

if !errList.IsEmpty() {
return errors.WithStackTrace(errList)
}

if opts.Logger != nil {
opts.Logger.Debugf("Successfully started draining all %d container instances", len(containerInstanceArns))
}
return nil
}

// WaitForContainerInstancesToDrain waits until there are no more ECS Tasks running on any of the ECS container
// instances. Batches container instances in groups of 100 because of AWS API limitations.
func WaitForContainerInstancesToDrain(opts *Options, clusterName string, containerInstanceArns []string, start time.Time, timeout time.Duration, maxRetries int, sleepBetweenRetries time.Duration) error {
client, err := NewECSClient(opts)
if err != nil {
return err
}

if opts.Logger != nil {
opts.Logger.Debugf("Checking if all ECS Tasks have been drained from the ECS Container Instances in Cluster %s.", clusterName)
}

batchSize := 100
numBatches := int(math.Ceil(float64(len(containerInstanceArns) / batchSize)))

err = retry.DoWithRetry(
opts.Logger,
"Wait for Container Instances to be Drained",
maxRetries, sleepBetweenRetries,
func() error {
responses := []*ecs.DescribeContainerInstancesOutput{}
for batchIdx, batchedArnList := range collections.BatchListIntoGroupsOf(containerInstanceArns, batchSize) {
batchedArns := aws.StringSlice(batchedArnList)

if opts.Logger != nil {
opts.Logger.Debugf("Fetching description of batch %d/%d of ECS Instances in Cluster %s.", batchIdx, numBatches, clusterName)
}
input := &ecs.DescribeContainerInstancesInput{
Cluster: aws.String(clusterName),
ContainerInstances: aws.ToStringSlice(batchedArns),
}
result, err := client.DescribeContainerInstances(opts.Context, input)
if err != nil {
return errors.WithStackTrace(err)
}
responses = append(responses, result)
}

// If we exceeded the timeout, halt with error.
if timeoutExceeded(start, timeout) {
return retry.FatalError{Underlying: fmt.Errorf("maximum drain timeout of %s seconds has elapsed and instances are still draining", timeout)}
}

// Yay, all done.
if drained, _ := allInstancesFullyDrained(responses); drained == true {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should handle the error or add comment as to why it's ok not to handle the error.

if opts.Logger != nil {
opts.Logger.Debugf("All container instances have been drained in Cluster %s!", clusterName)
}
return nil
}

// If there's no error, retry.
if err == nil {
return errors.WithStackTrace(fmt.Errorf("container instances still draining"))
}

// Else, there's an error, halt and fail.
return retry.FatalError{Underlying: err}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This err check is a tautology, since err is only set within the for loop above, and you immediately return err if there is an error. So it will always be nil at this point.

You can reduce this to return the container instances still draining error.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, I see. I think the best thing is to retry in all other cases. If timeout exceeded, exit with error. If drained, return nil. All other cases, retry.

})
return errors.WithStackTrace(err)
}

// timeoutExceeded returns true if the amount of time since start has exceeded the timeout.
func timeoutExceeded(start time.Time, timeout time.Duration) bool {
timeElapsed := time.Now().Sub(start)
return timeElapsed > timeout
}

// NewECSClient returns a new AWS SDK client for interacting with AWS ECS.
func NewECSClient(opts *Options) (*ecs.Client, error) {
cfg, err := NewDefaultConfig(opts)
if err != nil {
return nil, errors.WithStackTrace(err)
}
return ecs.NewFromConfig(cfg), nil
}

func allInstancesFullyDrained(responses []*ecs.DescribeContainerInstancesOutput) (bool, error) {
for _, response := range responses {
instances := response.ContainerInstances
if len(instances) == 0 {
return false, errors.WithStackTrace(fmt.Errorf("querying DescribeContainerInstances returned no instances"))
}

for _, instance := range instances {
if !instanceFullyDrained(instance) {
return false, nil
}
}
}
return true, nil
}

func instanceFullyDrained(instance ecs_types.ContainerInstance) bool {
instanceArn := instance.ContainerInstanceArn

if *instance.Status == "ACTIVE" {
if opts.Logger != nil {
opts.Logger.Debugf("The ECS Container Instance %s is still in ACTIVE status", *instanceArn)
}
return false
}
if instance.PendingTasksCount > 0 {
if opts.Logger != nil {
opts.Logger.Debugf("The ECS Container Instance %s still has pending tasks", *instanceArn)
}
return false
}
if instance.RunningTasksCount > 0 {
if opts.Logger != nil {
opts.Logger.Debugf("The ECS Container Instance %s still has running tasks", *instanceArn)
}
return false
}

return true
}
34 changes: 34 additions & 0 deletions awscommons/v2/errors.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package awscommons

import (
"fmt"
"strings"
)

// MultipleDrainContainerInstanceErrors represents multiple errors found while terminating instances
type MultipleDrainContainerInstanceErrors struct {
errors []error
}

func (err MultipleDrainContainerInstanceErrors) Error() string {
messages := []string{
fmt.Sprintf("%d errors found while draining container instances:", len(err.errors)),
}

for _, individualErr := range err.errors {
messages = append(messages, individualErr.Error())
}
return strings.Join(messages, "\n")
}

func (err MultipleDrainContainerInstanceErrors) AddError(newErr error) {
err.errors = append(err.errors, newErr)
}

func (err MultipleDrainContainerInstanceErrors) IsEmpty() bool {
return len(err.errors) == 0
}

func NewMultipleDrainContainerInstanceErrors() MultipleDrainContainerInstanceErrors {
return MultipleDrainContainerInstanceErrors{[]error{}}
}
9 changes: 5 additions & 4 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ go 1.17

require (
github.com/aws/aws-sdk-go v1.44.48
github.com/aws/aws-sdk-go-v2 v1.16.7
github.com/aws/aws-sdk-go-v2 v1.16.16
github.com/aws/aws-sdk-go-v2/config v1.15.13
github.com/aws/aws-sdk-go-v2/service/ec2 v1.47.2
github.com/aws/aws-sdk-go-v2/service/s3 v1.27.1
Expand Down Expand Up @@ -32,17 +32,18 @@ require (
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.4.3 // indirect
github.com/aws/aws-sdk-go-v2/credentials v1.12.8 // indirect
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.12.8 // indirect
github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.14 // indirect
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.8 // indirect
github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.23 // indirect
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.17 // indirect
github.com/aws/aws-sdk-go-v2/internal/ini v1.3.15 // indirect
github.com/aws/aws-sdk-go-v2/internal/v4a v1.0.5 // indirect
github.com/aws/aws-sdk-go-v2/service/ecs v1.18.22 // indirect
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.9.3 // indirect
github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.1.9 // indirect
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.8 // indirect
github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.13.8 // indirect
github.com/aws/aws-sdk-go-v2/service/sso v1.11.11 // indirect
github.com/aws/aws-sdk-go-v2/service/sts v1.16.9 // indirect
github.com/aws/smithy-go v1.12.0 // indirect
github.com/aws/smithy-go v1.13.3 // indirect
github.com/bgentry/go-netrc v0.0.0-20140422174119-9fd32a8b3d3d // indirect
github.com/boombuler/barcode v1.0.1-0.20190219062509-6c824513bacc // indirect
github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect
Expand Down
10 changes: 10 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ github.com/aws/aws-sdk-go v1.44.48 h1:jLDC9RsNoYMLFlKpB8LdqUnoDdC2yvkS4QbuyPQJ8+
github.com/aws/aws-sdk-go v1.44.48/go.mod h1:y4AeaBuwd2Lk+GepC1E9v0qOiTws0MIWAX4oIKwKHZo=
github.com/aws/aws-sdk-go-v2 v1.16.7 h1:zfBwXus3u14OszRxGcqCDS4MfMCv10e8SMJ2r8Xm0Ns=
github.com/aws/aws-sdk-go-v2 v1.16.7/go.mod h1:6CpKuLXg2w7If3ABZCl/qZ6rEgwtjZTn4eAf4RcEyuw=
github.com/aws/aws-sdk-go-v2 v1.16.16 h1:M1fj4FE2lB4NzRb9Y0xdWsn2P0+2UHVxwKyOa4YJNjk=
github.com/aws/aws-sdk-go-v2 v1.16.16/go.mod h1:SwiyXi/1zTUZ6KIAmLK5V5ll8SiURNUYOqTerZPaF9k=
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.4.3 h1:S/ZBwevQkr7gv5YxONYpGQxlMFFYSRfz3RMcjsC9Qhk=
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.4.3/go.mod h1:gNsR5CaXKmQSSzrmGxmwmct/r+ZBfbxorAuXYsj/M5Y=
github.com/aws/aws-sdk-go-v2/config v1.15.13 h1:CJH9zn/Enst7lDiGpoguVt0lZr5HcpNVlRJWbJ6qreo=
Expand All @@ -76,14 +78,20 @@ github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.12.8 h1:VfBdn2AxwMbFyJN/lF/xuT3
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.12.8/go.mod h1:oL1Q3KuCq1D4NykQnIvtRiBGLUXhcpY5pl6QZB2XEPU=
github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.14 h1:2C0pYHcUBmdzPj+EKNC4qj97oK6yjrUhc1KoSodglvk=
github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.14/go.mod h1:kdjrMwHwrC3+FsKhNcCMJ7tUVj/8uSD5CZXeQ4wV6fM=
github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.23 h1:s4g/wnzMf+qepSNgTvaQQHNxyMLKSawNhKCPNy++2xY=
github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.23/go.mod h1:2DFxAQ9pfIRy0imBCJv+vZ2X6RKxves6fbnEuSry6b4=
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.8 h1:2J+jdlBJWEmTyAwC82Ym68xCykIvnSnIN18b8xHGlcc=
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.8/go.mod h1:ZIV8GYoC6WLBW5KGs+o4rsc65/ozd+eQ0L31XF5VDwk=
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.17 h1:/K482T5A3623WJgWT8w1yRAFK4RzGzEl7y39yhtn9eA=
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.17/go.mod h1:pRwaTYCJemADaqCbUAxltMoHKata7hmB5PjEXeu0kfg=
github.com/aws/aws-sdk-go-v2/internal/ini v1.3.15 h1:QquxR7NH3ULBsKC+NoTpilzbKKS+5AELfNREInbhvas=
github.com/aws/aws-sdk-go-v2/internal/ini v1.3.15/go.mod h1:Tkrthp/0sNBShQQsamR7j/zY4p19tVTAs+nnqhH6R3c=
github.com/aws/aws-sdk-go-v2/internal/v4a v1.0.5 h1:tEEHn+PGAxRVqMPEhtU8oCSW/1Ge3zP5nUgPrGQNUPs=
github.com/aws/aws-sdk-go-v2/internal/v4a v1.0.5/go.mod h1:aIwFF3dUk95ocCcA3zfk3nhz0oLkpzHFWuMp8l/4nNs=
github.com/aws/aws-sdk-go-v2/service/ec2 v1.47.2 h1:81hrDgbXHL44WdY6M/fHGXLlv17qTpOFzutXRVDEk3Y=
github.com/aws/aws-sdk-go-v2/service/ec2 v1.47.2/go.mod h1:VoBcwURHnJVCWuXHdqVuG03i2lUlHJ5DTTqDSyCdEcc=
github.com/aws/aws-sdk-go-v2/service/ecs v1.18.22 h1:jBx029Z9GQIIq5fC5bW1ZMDsjihvmQQIe/QqdFl+7zY=
github.com/aws/aws-sdk-go-v2/service/ecs v1.18.22/go.mod h1:6bV2xEub6Vch19ZZASMbrNMNIpBPTwy64r9WIQ+wsSE=
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.9.3 h1:4n4KCtv5SUoT5Er5XV41huuzrCqepxlW3SDI9qHQebc=
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.9.3/go.mod h1:gkb2qADY+OHaGLKNTYxMaQNacfeyQpZ4csDTQMeFmcw=
github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.1.9 h1:gVv2vXOMqJeR4ZHHV32K7LElIJIIzyw/RU1b0lSfWTQ=
Expand All @@ -102,6 +110,8 @@ github.com/aws/aws-sdk-go-v2/service/sts v1.16.9 h1:yOfILxyjmtr2ubRkRJldlHDFBhf5
github.com/aws/aws-sdk-go-v2/service/sts v1.16.9/go.mod h1:O1IvkYxr+39hRf960Us6j0x1P8pDqhTX+oXM5kQNl/Y=
github.com/aws/smithy-go v1.12.0 h1:gXpeZel/jPoWQ7OEmLIgCUnhkFftqNfwWUwAHSlp1v0=
github.com/aws/smithy-go v1.12.0/go.mod h1:Tg+OJXh4MB2R/uN61Ko2f6hTZwB/ZYGOtib8J3gBHzA=
github.com/aws/smithy-go v1.13.3 h1:l7LYxGuzK6/K+NzJ2mC+VvLUbae0sL3bXU//04MkmnA=
github.com/aws/smithy-go v1.13.3/go.mod h1:Tg+OJXh4MB2R/uN61Ko2f6hTZwB/ZYGOtib8J3gBHzA=
github.com/bgentry/go-netrc v0.0.0-20140422174119-9fd32a8b3d3d h1:xDfNPAt8lFiC1UJrqV3uuy861HCTo708pDMbjHHdCas=
github.com/bgentry/go-netrc v0.0.0-20140422174119-9fd32a8b3d3d/go.mod h1:6QX/PXZ00z/TKoufEY6K/a0k6AhaJrQKdFe6OfVXsa4=
github.com/bgentry/speakeasy v0.1.0 h1:ByYyxL9InA1OWqxJqqp2A5pYHUrCiAL6K3J+LKSsQkY=
Expand Down
2 changes: 1 addition & 1 deletion logging/logging.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import (
var globalLogLevel = logrus.InfoLevel
var globalLogLevelLock = sync.Mutex{}

// Create a new logger with the given name
// GetLogger create a new logger with the given name
func GetLogger(name string) *logrus.Logger {
logger := logrus.New()

Expand Down