Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Status Code Metrics for API Calls in the Agent #1442

Merged
merged 8 commits into from
Dec 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .golangci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ linters:
- goimports
- gosec
- gosimple
- govet
- ineffassign
- misspell
- revive
Expand Down
6 changes: 3 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ WIN_BUILD = GOOS=windows GOARCH=amd64 go build -trimpath -buildmode=${CWAGENT_BU
DARWIN_BUILD_AMD64 = CGO_ENABLED=1 GO111MODULE=on GOOS=darwin GOARCH=amd64 go build -trimpath -ldflags="${LDFLAGS}" -o $(BUILD_SPACE)/bin/darwin_amd64
DARWIN_BUILD_ARM64 = CGO_ENABLED=1 GO111MODULE=on GOOS=darwin GOARCH=arm64 go build -trimpath -ldflags="${LDFLAGS}" -o $(BUILD_SPACE)/bin/darwin_arm64

IMAGE_REGISTRY = amazon
IMAGE_REPO = cloudwatch-agent
IMAGE_TAG = $(VERSION)
IMAGE_REGISTRY = 730335384949.dkr.ecr.us-west-2.amazonaws.com
IMAGE_REPO = cwagent
IMAGE_TAG = latest
IMAGE = $(IMAGE_REGISTRY)/$(IMAGE_REPO):$(IMAGE_TAG)
DOCKER_BUILD_FROM_SOURCE = docker build -t $(IMAGE) -f ./amazon-cloudwatch-container-insights/cloudwatch-agent-dockerfile/source/Dockerfile
DOCKER_WINDOWS_BUILD_FROM_SOURCE = docker build -t $(IMAGE) -f ./amazon-cloudwatch-container-insights/cloudwatch-agent-dockerfile/source/Dockerfile.Windows
Expand Down
17 changes: 16 additions & 1 deletion internal/ecsservicediscovery/servicediscovery.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"sync"
"time"

"github.com/amazon-contributing/opentelemetry-collector-contrib/extension/awsmiddleware"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/service/ec2"
"github.com/aws/aws-sdk-go/service/ecs"
Expand All @@ -23,6 +24,7 @@ type ServiceDiscovery struct {

stats ProcessorStats
clusterProcessors []Processor
Configurer *awsmiddleware.Configurer
}

func (sd *ServiceDiscovery) init() {
Expand All @@ -31,8 +33,21 @@ func (sd *ServiceDiscovery) init() {
}
configProvider := credentialConfig.Credentials()
sd.svcEcs = ecs.New(configProvider, aws.NewConfig().WithRegion(sd.Config.TargetClusterRegion).WithMaxRetries(AwsSdkLevelRetryCount))

if sd.Configurer != nil {
if err := sd.Configurer.Configure(awsmiddleware.SDKv1(&sd.svcEcs.Handlers)); err != nil {
log.Printf("ERROR: Failed to configure ECS client: %v", err)
}
}

sd.svcEc2 = ec2.New(configProvider, aws.NewConfig().WithRegion(sd.Config.TargetClusterRegion).WithMaxRetries(AwsSdkLevelRetryCount))

if sd.Configurer != nil {
if err := sd.Configurer.Configure(awsmiddleware.SDKv1(&sd.svcEc2.Handlers)); err != nil {
log.Printf("ERROR: Failed to configure EC2 client: %v", err)
}
}

sd.initClusterProcessorPipeline()
}

Expand Down Expand Up @@ -70,8 +85,8 @@ func StartECSServiceDiscovery(sd *ServiceDiscovery, shutDownChan chan interface{

func (sd *ServiceDiscovery) work() {
sd.stats.ResetStats()
var err error
var clusterTasks []*DecoratedTask
var err error
for _, p := range sd.clusterProcessors {
clusterTasks, err = p.Process(sd.Config.TargetCluster, clusterTasks)
// Ignore partial result to avoid overwriting existing targets
Expand Down
42 changes: 42 additions & 0 deletions internal/ecsservicediscovery/servicediscovery_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ import (
"sync"
"testing"

"github.com/amazon-contributing/opentelemetry-collector-contrib/extension/awsmiddleware"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock"
)

func Test_ServiceDiscovery_InitPipelines(t *testing.T) {
Expand Down Expand Up @@ -70,3 +72,43 @@ func Test_StartECSServiceDiscovery_BadClusterConfig(t *testing.T) {
StartECSServiceDiscovery(p, nil, &wg)
assert.Equal(t, 0, len(p.clusterProcessors))
}

func Test_StartECSServiceDiscovery_WithConfigurer(t *testing.T) {
var wg sync.WaitGroup
var requestHandlers []awsmiddleware.RequestHandler

handler := new(awsmiddleware.MockHandler)
handler.On("ID").Return("mock")
handler.On("Position").Return(awsmiddleware.After)
handler.On("HandleRequest", mock.Anything, mock.Anything)
handler.On("HandleResponse", mock.Anything, mock.Anything)
requestHandlers = append(requestHandlers, handler)
middleware := new(awsmiddleware.MockMiddlewareExtension)
middleware.On("Handlers").Return(
requestHandlers,
[]awsmiddleware.ResponseHandler{handler},
)
c := awsmiddleware.NewConfigurer(middleware.Handlers())

config := ServiceDiscoveryConfig{
TargetCluster: "test",
TargetClusterRegion: "us-east-1",
}
p := &ServiceDiscovery{Config: &config, Configurer: c}
wg.Add(1)
StartECSServiceDiscovery(p, nil, &wg)
assert.Equal(t, 0, len(p.clusterProcessors))
}

func Test_StartECSServiceDiscovery_WithoutConfigurer(t *testing.T) {
var wg sync.WaitGroup

config := ServiceDiscoveryConfig{
TargetCluster: "test",
TargetClusterRegion: "us-east-1",
}
p := &ServiceDiscovery{Config: &config, Configurer: nil}
wg.Add(1)
StartECSServiceDiscovery(p, nil, &wg)
assert.Equal(t, 0, len(p.clusterProcessors))
}
35 changes: 31 additions & 4 deletions plugins/inputs/prometheus/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,13 @@ import (
_ "embed"
"sync"

"github.com/amazon-contributing/opentelemetry-collector-contrib/extension/awsmiddleware"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/plugins/inputs"
"go.uber.org/zap"

"github.com/aws/amazon-cloudwatch-agent/cfg/envconfig"
"github.com/aws/amazon-cloudwatch-agent/extension/agenthealth"
"github.com/aws/amazon-cloudwatch-agent/internal/ecsservicediscovery"
)

Expand All @@ -23,6 +27,7 @@ type Prometheus struct {
mbCh chan PrometheusMetricBatch
shutDownChan chan interface{}
wg sync.WaitGroup
middleware awsmiddleware.Middleware
}

func (p *Prometheus) SampleConfig() string {
Expand All @@ -39,19 +44,33 @@ func (p *Prometheus) Gather(_ telegraf.Accumulator) error {

func (p *Prometheus) Start(accIn telegraf.Accumulator) error {
mth := NewMetricsTypeHandler()

receiver := &metricsReceiver{pmbCh: p.mbCh}
handler := &metricsHandler{mbCh: p.mbCh,
handler := &metricsHandler{
mbCh: p.mbCh,
acc: accIn,
calculator: NewCalculator(),
filter: NewMetricsFilter(),
clusterName: p.ClusterName,
mtHandler: mth,
}

ecssd := &ecsservicediscovery.ServiceDiscovery{Config: p.ECSSDConfig}
var configurer *awsmiddleware.Configurer
var ecssd *ecsservicediscovery.ServiceDiscovery
needEcssd := true

if p.middleware != nil {
configurer = awsmiddleware.NewConfigurer(p.middleware.Handlers())
if configurer != nil {
ecssd = &ecsservicediscovery.ServiceDiscovery{Config: p.ECSSDConfig, Configurer: configurer}
needEcssd = false
}
}
if needEcssd {
ecssd = &ecsservicediscovery.ServiceDiscovery{Config: p.ECSSDConfig}
Paramadon marked this conversation as resolved.
Show resolved Hide resolved
}

// Start ECS Service Discovery when in ECS
// https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/ContainerInsights-Prometheus-Setup-autodiscovery-ecs.html
// Launch ECS Service Discovery as a goroutine
p.wg.Add(1)
go ecsservicediscovery.StartECSServiceDiscovery(ecssd, p.shutDownChan, &p.wg)

Expand All @@ -77,6 +96,14 @@ func init() {
return &Prometheus{
mbCh: make(chan PrometheusMetricBatch, 10000),
shutDownChan: make(chan interface{}),
middleware: agenthealth.NewAgentHealth(
zap.NewNop(),
okankoAMZ marked this conversation as resolved.
Show resolved Hide resolved
&agenthealth.Config{
IsUsageDataEnabled: envconfig.IsUsageDataEnabled(),
IsStatusCodeEnabled: true,
},
),
}

})
}
5 changes: 3 additions & 2 deletions plugins/outputs/cloudwatchlogs/cloudwatchlogs.go
Original file line number Diff line number Diff line change
Expand Up @@ -402,8 +402,9 @@ func init() {
middleware: agenthealth.NewAgentHealth(
zap.NewNop(),
&agenthealth.Config{
IsUsageDataEnabled: envconfig.IsUsageDataEnabled(),
Stats: &agent.StatsConfig{Operations: []string{"PutLogEvents"}},
IsUsageDataEnabled: envconfig.IsUsageDataEnabled(),
Stats: &agent.StatsConfig{Operations: []string{"PutLogEvents"}},
IsStatusCodeEnabled: true,
},
),
}
Expand Down
13 changes: 0 additions & 13 deletions plugins/processors/awsentity/processor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,19 +54,6 @@ func newAddToMockEntityStore(rs *mockEntityStore) func(entitystore.LogGroupName,
}
}

func newMockGetMetricAttributesFromEntityStore() func() map[string]*string {
mockPlatform := "AWS::EC2"
mockInstanceID := "i-123456789"
mockAutoScalingGroup := "auto-scaling"
return func() map[string]*string {
return map[string]*string{
entitystore.PlatformType: &mockPlatform,
entitystore.InstanceIDKey: &mockInstanceID,
entitystore.ASGKey: &mockAutoScalingGroup,
}
}
}

func newMockGetServiceNameAndSource(service, source string) func() (string, string) {
return func() (string, string) {
return service, source
Expand Down
2 changes: 2 additions & 0 deletions plugins/processors/ec2tagger/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ type Config struct {
Filename string `mapstructure:"shared_credential_file,omitempty"`
Token string `mapstructure:"token,omitempty"`
IMDSRetries int `mapstructure:"imds_retries,omitempty"`

MiddlewareID *component.ID `mapstructure:"middleware,omitempty"`
}

// Verify Config implements Processor interface.
Expand Down
21 changes: 12 additions & 9 deletions plugins/processors/ec2tagger/ec2tagger.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"sync"
"time"

"github.com/amazon-contributing/opentelemetry-collector-contrib/extension/awsmiddleware"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/service/ec2"
"github.com/aws/aws-sdk-go/service/ec2/ec2iface"
Expand Down Expand Up @@ -56,14 +57,14 @@ type Tagger struct {
ec2API ec2iface.EC2API
volumeSerialCache volume.Cache

Configurer *awsmiddleware.Configurer
sync.RWMutex //to protect ec2TagCache
}

// newTagger returns a new EC2 Tagger processor.
func newTagger(config *Config, logger *zap.Logger) *Tagger {
_, cancel := context.WithCancel(context.Background())
mdCredentialConfig := &configaws.CredentialConfig{}

p := &Tagger{
Config: config,
logger: logger,
Expand Down Expand Up @@ -280,14 +281,12 @@ func (t *Tagger) ebsVolumesRetrieved() bool {

// Start acts as input validation and serves the purpose of updating ec2 tags and ebs volumes if necessary.
// It will be called when OTel is enabling each processor
func (t *Tagger) Start(ctx context.Context, _ component.Host) error {
func (t *Tagger) Start(ctx context.Context, host component.Host) error {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

adding host so we can grab the handler and attach them to the client.

t.shutdownC = make(chan bool)
t.ec2TagCache = map[string]string{}

if err := t.deriveEC2MetadataFromIMDS(ctx); err != nil {
return err
}

t.tagFilters = []*ec2.Filter{
{
Name: aws.String("resource-type"),
Expand All @@ -298,12 +297,11 @@ func (t *Tagger) Start(ctx context.Context, _ component.Host) error {
Values: aws.StringSlice([]string{t.ec2MetadataRespond.instanceId}),
},
}

// if the customer said 'AutoScalingGroupName' (the CW dimension), do what they mean not what they said
// and filter for the EC2 tag name called 'aws:autoscaling:groupName'
useAllTags := len(t.EC2InstanceTagKeys) == 1 && t.EC2InstanceTagKeys[0] == "*"

if !useAllTags && len(t.EC2InstanceTagKeys) > 0 {
// if the customer said 'AutoScalingGroupName' (the CW dimension), do what they mean not what they said
Paramadon marked this conversation as resolved.
Show resolved Hide resolved
// and filter for the EC2 tag name called 'aws:autoscaling:groupName'
for i, key := range t.EC2InstanceTagKeys {
if cwDimensionASG == key {
t.EC2InstanceTagKeys[i] = Ec2InstanceTagKeyASG
Expand All @@ -315,7 +313,6 @@ func (t *Tagger) Start(ctx context.Context, _ component.Host) error {
Values: aws.StringSlice(t.EC2InstanceTagKeys),
})
}

if len(t.EC2InstanceTagKeys) > 0 || len(t.EBSDeviceKeys) > 0 {
ec2CredentialConfig := &configaws.CredentialConfig{
AccessKey: t.AccessKey,
Expand All @@ -327,6 +324,13 @@ func (t *Tagger) Start(ctx context.Context, _ component.Host) error {
Region: t.ec2MetadataRespond.region,
}
t.ec2API = t.ec2Provider(ec2CredentialConfig)

Paramadon marked this conversation as resolved.
Show resolved Hide resolved
if client, ok := t.ec2API.(*ec2.EC2); ok {
if t.Config.MiddlewareID != nil {
awsmiddleware.TryConfigure(t.logger, host, *t.Config.MiddlewareID, awsmiddleware.SDKv1(&client.Handlers))
}
}

go func() { //Async start of initial retrieval to prevent block of agent start
t.initialRetrievalOfTagsAndVolumes()
t.refreshLoopToUpdateTagsAndVolumes()
Expand All @@ -336,7 +340,6 @@ func (t *Tagger) Start(ctx context.Context, _ component.Host) error {
} else {
t.setStarted()
}

return nil
}

Expand Down
15 changes: 12 additions & 3 deletions translator/tocwconfig/sampleConfig/advanced_config_darwin.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,13 @@ extensions:
usage_flags:
mode: EC2
region_type: ACJ
agenthealth/statuscode:
is_status_code_enabled: true
is_usage_data_enabled: true
stats:
usage_flags:
mode: EC2
region_type: ACJ
entitystore:
mode: ec2
region: us-west-2
Expand All @@ -38,10 +45,11 @@ processors:
ec2_instance_tag_keys:
- AutoScalingGroupName
ec2_metadata_tags:
- ImageId
- InstanceId
- InstanceType
- ImageId
imds_retries: 1
middleware: agenthealth/statuscode
refresh_interval_seconds: 0s
receivers:
telegraf_cpu:
Expand Down Expand Up @@ -71,6 +79,7 @@ receivers:
service:
extensions:
- agenthealth/metrics
- agenthealth/statuscode
- entitystore
pipelines:
metrics/host:
Expand All @@ -80,11 +89,11 @@ service:
- awsentity/resource
- ec2tagger
receivers:
- telegraf_disk
- telegraf_netstat
- telegraf_swap
- telegraf_cpu
- telegraf_disk
- telegraf_mem
- telegraf_netstat
metrics/hostDeltaMetrics:
exporters:
- awscloudwatch
Expand Down
Loading
Loading