Skip to content

Commit

Permalink
Adding Statuscode Handler to Agent Health Extension (#1423)
Browse files Browse the repository at this point in the history
  • Loading branch information
Paramadon authored Dec 12, 2024
1 parent 2c8e72f commit f3d2e33
Show file tree
Hide file tree
Showing 18 changed files with 621 additions and 72 deletions.
2 changes: 1 addition & 1 deletion .golangci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,4 +59,4 @@ linters:
- nonamedreturns

issues:
new-from-rev: 3221f76
new-from-rev: 9af4477
5 changes: 3 additions & 2 deletions extension/agenthealth/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@ import (
)

type Config struct {
IsUsageDataEnabled bool `mapstructure:"is_usage_data_enabled"`
Stats agent.StatsConfig `mapstructure:"stats"`
IsUsageDataEnabled bool `mapstructure:"is_usage_data_enabled"`
Stats *agent.StatsConfig `mapstructure:"stats,omitempty"`
IsStatusCodeEnabled bool `mapstructure:"is_status_code_enabled,omitempty"`
}

var _ component.Config = (*Config)(nil)
4 changes: 2 additions & 2 deletions extension/agenthealth/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,11 @@ func TestLoadConfig(t *testing.T) {
},
{
id: component.NewIDWithName(TypeStr, "1"),
want: &Config{IsUsageDataEnabled: false, Stats: agent.StatsConfig{Operations: []string{agent.AllowAllOperations}}},
want: &Config{IsUsageDataEnabled: false, Stats: nil},
},
{
id: component.NewIDWithName(TypeStr, "2"),
want: &Config{IsUsageDataEnabled: true, Stats: agent.StatsConfig{Operations: []string{"ListBuckets"}}},
want: &Config{IsUsageDataEnabled: true, Stats: &agent.StatsConfig{Operations: []string{"ListBuckets"}}},
},
}
for _, testCase := range testCases {
Expand Down
29 changes: 25 additions & 4 deletions extension/agenthealth/extension.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"go.uber.org/zap"

"github.com/aws/amazon-cloudwatch-agent/extension/agenthealth/handler/stats"
"github.com/aws/amazon-cloudwatch-agent/extension/agenthealth/handler/stats/agent"
"github.com/aws/amazon-cloudwatch-agent/extension/agenthealth/handler/useragent"
)

Expand All @@ -24,11 +25,31 @@ var _ awsmiddleware.Extension = (*agentHealth)(nil)
func (ah *agentHealth) Handlers() ([]awsmiddleware.RequestHandler, []awsmiddleware.ResponseHandler) {
var responseHandlers []awsmiddleware.ResponseHandler
requestHandlers := []awsmiddleware.RequestHandler{useragent.NewHandler(ah.cfg.IsUsageDataEnabled)}
if ah.cfg.IsUsageDataEnabled {
req, res := stats.NewHandlers(ah.logger, ah.cfg.Stats)
requestHandlers = append(requestHandlers, req...)
responseHandlers = append(responseHandlers, res...)

if !ah.cfg.IsUsageDataEnabled {
ah.logger.Debug("Usage data is disabled, skipping stats handlers")
return requestHandlers, responseHandlers
}

statusCodeEnabled := ah.cfg.IsStatusCodeEnabled

var statsResponseHandlers []awsmiddleware.ResponseHandler
var statsRequestHandlers []awsmiddleware.RequestHandler
var statsConfig agent.StatsConfig
var agentStatsEnabled bool

if ah.cfg.Stats != nil {
statsConfig = *ah.cfg.Stats
agentStatsEnabled = true
} else {
agentStatsEnabled = false
}

statsRequestHandlers, statsResponseHandlers = stats.NewHandlers(ah.logger, statsConfig, statusCodeEnabled, agentStatsEnabled)

requestHandlers = append(requestHandlers, statsRequestHandlers...)
responseHandlers = append(responseHandlers, statsResponseHandlers...)

return requestHandlers, responseHandlers
}

Expand Down
23 changes: 22 additions & 1 deletion extension/agenthealth/extension_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,39 @@ import (
"github.com/stretchr/testify/assert"
"go.opentelemetry.io/collector/component/componenttest"
"go.uber.org/zap"

"github.com/aws/amazon-cloudwatch-agent/extension/agenthealth/handler/stats/agent"
)

func TestExtension(t *testing.T) {
ctx := context.Background()
cfg := &Config{IsUsageDataEnabled: true}
cfg := &Config{IsUsageDataEnabled: true, IsStatusCodeEnabled: true, Stats: &agent.StatsConfig{Operations: []string{"ListBuckets"}}}
extension := NewAgentHealth(zap.NewNop(), cfg)
assert.NotNil(t, extension)
assert.NoError(t, extension.Start(ctx, componenttest.NewNopHost()))
requestHandlers, responseHandlers := extension.Handlers()
// user agent, client stats, stats
assert.Len(t, requestHandlers, 3)
// client stats
assert.Len(t, responseHandlers, 2)
cfg.IsUsageDataEnabled = false
requestHandlers, responseHandlers = extension.Handlers()
// user agent
assert.Len(t, requestHandlers, 1)
assert.Len(t, responseHandlers, 0)
assert.NoError(t, extension.Shutdown(ctx))
}

func TestExtensionStatusCodeOnly(t *testing.T) {
ctx := context.Background()
cfg := &Config{IsUsageDataEnabled: true, IsStatusCodeEnabled: true}
extension := NewAgentHealth(zap.NewNop(), cfg)
assert.NotNil(t, extension)
assert.NoError(t, extension.Start(ctx, componenttest.NewNopHost()))
requestHandlers, responseHandlers := extension.Handlers()
// user agent, client stats, stats
assert.Len(t, requestHandlers, 1)
// client stats
assert.Len(t, responseHandlers, 1)
cfg.IsUsageDataEnabled = false
requestHandlers, responseHandlers = extension.Handlers()
Expand Down
6 changes: 1 addition & 5 deletions extension/agenthealth/factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@ import (

"go.opentelemetry.io/collector/component"
"go.opentelemetry.io/collector/extension"

"github.com/aws/amazon-cloudwatch-agent/extension/agenthealth/handler/stats/agent"
)

var (
Expand All @@ -28,9 +26,7 @@ func NewFactory() extension.Factory {
func createDefaultConfig() component.Config {
return &Config{
IsUsageDataEnabled: true,
Stats: agent.StatsConfig{
Operations: []string{agent.AllowAllOperations},
},
Stats: nil,
}
}

Expand Down
4 changes: 1 addition & 3 deletions extension/agenthealth/factory_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,11 @@ import (
"github.com/stretchr/testify/assert"
"go.opentelemetry.io/collector/component/componenttest"
"go.opentelemetry.io/collector/extension/extensiontest"

"github.com/aws/amazon-cloudwatch-agent/extension/agenthealth/handler/stats/agent"
)

func TestCreateDefaultConfig(t *testing.T) {
cfg := NewFactory().CreateDefaultConfig()
assert.Equal(t, &Config{IsUsageDataEnabled: true, Stats: agent.StatsConfig{Operations: []string{agent.AllowAllOperations}}}, cfg)
assert.Equal(t, &Config{IsUsageDataEnabled: true, Stats: nil}, cfg)
assert.NoError(t, componenttest.CheckConfigStruct(cfg))
}

Expand Down
120 changes: 98 additions & 22 deletions extension/agenthealth/handler/stats/agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,28 +15,29 @@ const (
)

type Stats struct {
CpuPercent *float64 `json:"cpu,omitempty"`
MemoryBytes *uint64 `json:"mem,omitempty"`
FileDescriptorCount *int32 `json:"fd,omitempty"`
ThreadCount *int32 `json:"th,omitempty"`
LatencyMillis *int64 `json:"lat,omitempty"`
PayloadBytes *int `json:"load,omitempty"`
StatusCode *int `json:"code,omitempty"`
SharedConfigFallback *int `json:"scfb,omitempty"`
ImdsFallbackSucceed *int `json:"ifs,omitempty"`
AppSignals *int `json:"as,omitempty"`
EnhancedContainerInsights *int `json:"eci,omitempty"`
RunningInContainer *int `json:"ric,omitempty"`
RegionType *string `json:"rt,omitempty"`
Mode *string `json:"m,omitempty"`
EntityRejected *int `json:"ent,omitempty"`
CPUPercent *float64 `json:"cpu,omitempty"`
MemoryBytes *uint64 `json:"mem,omitempty"`
FileDescriptorCount *int32 `json:"fd,omitempty"`
ThreadCount *int32 `json:"th,omitempty"`
LatencyMillis *int64 `json:"lat,omitempty"`
PayloadBytes *int `json:"load,omitempty"`
StatusCode *int `json:"code,omitempty"`
SharedConfigFallback *int `json:"scfb,omitempty"`
ImdsFallbackSucceed *int `json:"ifs,omitempty"`
AppSignals *int `json:"as,omitempty"`
EnhancedContainerInsights *int `json:"eci,omitempty"`
RunningInContainer *int `json:"ric,omitempty"`
RegionType *string `json:"rt,omitempty"`
Mode *string `json:"m,omitempty"`
EntityRejected *int `json:"ent,omitempty"`
StatusCodes map[string][5]int `json:"codes,omitempty"` //represents status codes 200,400,408,413,429,
}

// Merge the other Stats into the current. If the field is not nil,
// then it'll overwrite the existing one.
func (s *Stats) Merge(other Stats) {
if other.CpuPercent != nil {
s.CpuPercent = other.CpuPercent
if other.CPUPercent != nil {
s.CPUPercent = other.CPUPercent
}
if other.MemoryBytes != nil {
s.MemoryBytes = other.MemoryBytes
Expand Down Expand Up @@ -80,6 +81,26 @@ func (s *Stats) Merge(other Stats) {
if other.EntityRejected != nil {
s.EntityRejected = other.EntityRejected
}
if other.StatusCodes != nil {
if s.StatusCodes == nil {
s.StatusCodes = make(map[string][5]int)
}

for key, value := range other.StatusCodes {
if existing, ok := s.StatusCodes[key]; ok {
s.StatusCodes[key] = [5]int{
existing[0] + value[0], // 200
existing[1] + value[1], // 400
existing[2] + value[2], // 408
existing[3] + value[3], // 413
existing[4] + value[4], // 429
}
} else {
s.StatusCodes[key] = value
}
}
}

}

func (s *Stats) Marshal() (string, error) {
Expand All @@ -104,6 +125,29 @@ func (of OperationsFilter) IsAllowed(operationName string) bool {
return of.allowAll || of.operations.Contains(operationName)
}

type StatsConfig struct {
// Operations are the allowed operation names to gather stats for.
Operations []string `mapstructure:"operations,omitempty"`
// UsageFlags are the usage flags to set on start up.
UsageFlags map[Flag]any `mapstructure:"usage_flags,omitempty"`
}

var StatusCodeOperations = []string{ // all the operations that are allowed
"PutRetentionPolicy",
"DescribeInstances",
"DescribeTags",
"DescribeVolumes",
"DescribeContainerInstances",
"DescribeServices",
"DescribeTaskDefinition",
"ListServices",
"ListTasks",
"DescribeTasks",
"CreateLogGroup",
"CreateLogStream",
"AssumeRole",
}

func NewOperationsFilter(operations ...string) OperationsFilter {
allowed := collections.NewSet[string](operations...)
return OperationsFilter{
Expand All @@ -112,9 +156,41 @@ func NewOperationsFilter(operations ...string) OperationsFilter {
}
}

type StatsConfig struct {
// Operations are the allowed operation names to gather stats for.
Operations []string `mapstructure:"operations,omitempty"`
// UsageFlags are the usage flags to set on start up.
UsageFlags map[Flag]any `mapstructure:"usage_flags,omitempty"`
// NewStatusCodeOperationsFilter creates a new filter for allowed operations and status codes.
func NewStatusCodeOperationsFilter() OperationsFilter {
return NewOperationsFilter(StatusCodeOperations...)
}

// GetShortOperationName maps long operation names to short ones.
func GetShortOperationName(operation string) string {
switch operation {
case "PutRetentionPolicy":
return "prp"
case "DescribeInstances":
return "di"
case "DescribeTags":
return "dt"
case "DescribeTasks":
return "dts"
case "DescribeVolumes":
return "dv"
case "DescribeContainerInstances":
return "dci"
case "DescribeServices":
return "ds"
case "DescribeTaskDefinition":
return "dtd"
case "ListServices":
return "ls"
case "ListTasks":
return "lt"
case "CreateLogGroup":
return "clg"
case "CreateLogStream":
return "cls"
case "AssumeRole":
return "ar"
default:
return ""
}
}
Loading

0 comments on commit f3d2e33

Please sign in to comment.