Skip to content

Commit

Permalink
Add New Metrics For Where Customer Are Using The Agent (#913)
Browse files Browse the repository at this point in the history
  • Loading branch information
sethAmazon authored Oct 23, 2023
1 parent b6c6bfa commit 64d9a5c
Show file tree
Hide file tree
Showing 41 changed files with 183 additions and 22 deletions.
2 changes: 1 addition & 1 deletion cmd/config-downloader/downloader.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ func main() {

mode = sdkutil.DetectAgentMode(mode)

region = util.DetectRegion(mode, cc.CredentialsMap())
region, _ = util.DetectRegion(mode, cc.CredentialsMap())

if region == "" && downloadLocation != locationDefault {
fmt.Println("Unable to determine aws-region.")
Expand Down
41 changes: 36 additions & 5 deletions handlers/agentinfo/info.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,22 @@ import (
"github.com/aws/amazon-cloudwatch-agent/cfg/envconfig"
"github.com/aws/amazon-cloudwatch-agent/internal/util/collections"
"github.com/aws/amazon-cloudwatch-agent/receiver/adapter"
translatorConfig "github.com/aws/amazon-cloudwatch-agent/translator/config"
)

const (
versionFilename = "CWAGENT_VERSION"
unknownVersion = "Unknown"
updateInterval = time.Minute
// region types
AgentConfigJson = "ACJ"
CredsMap = "CM"
EC2Metadata = "EC2M"
ECSMetadata = "ECSM"
RegionNotFound = "RNF"
ModeEC2 = "EC2"
ModeOnPrem = "OP"
ModeWithIRSA = "WI"
)

var (
Expand All @@ -50,10 +60,12 @@ var (
id = uuid.NewString()
sharedConfigFallback atomic.Bool
imdsFallbackSucceed atomic.Bool
isRunningAsRoot = defaultIsRunningAsRoot
runInContainer *int
regionType *string
mode *string
)

var isRunningAsRoot = defaultIsRunningAsRoot

type AgentInfo interface {
RecordOpData(time.Duration, int, error)
StatsHeader() string
Expand All @@ -77,15 +89,21 @@ type agentStats struct {
StatusCode *int `json:"code,omitempty"`
SharedConfigFallback *int `json:"scfb,omitempty"`
ImdsFallbackSucceed *int `json:"ifs,omitempty"`
RunInContainer *int `json:"ric,omitempty"`
RegionType *string `json:"rt,omitempty"`
Mode *string `json:"m,omitempty"`
}

func New(groupName string) AgentInfo {
return newAgentInfo(groupName)
func New(groupName string, regionType string, mode string) AgentInfo {
return newAgentInfo(groupName, regionType, mode)
}

func newAgentInfo(groupName string) *agentInfo {
func newAgentInfo(groupName string, regionTypeInput string, modeInput string) *agentInfo {
ai := new(agentInfo)
ai.userAgent = getUserAgent(groupName, fullVersion, receivers, processors, exporters, isUsageDataEnabled())
runInContainer = runInContainerFunc()
regionType = aws.String(regionTypeInput)
mode = aws.String(modeInput)
if isUsageDataEnabled() {
ai.proc, _ = process.NewProcess(int32(os.Getpid()))
if ai.proc == nil {
Expand All @@ -96,6 +114,9 @@ func newAgentInfo(groupName string) *agentInfo {
MemoryBytes: ai.memoryBytes(),
FileDescriptorCount: ai.fileDescriptorCount(),
ThreadCount: ai.threadCount(),
RunInContainer: runInContainer,
RegionType: regionType,
Mode: mode,
}
ai.statsHeader = getAgentStats(stats)
ai.nextUpdate = time.Now().Add(updateInterval)
Expand Down Expand Up @@ -126,6 +147,9 @@ func (ai *agentInfo) RecordOpData(latency time.Duration, payloadBytes int, err e
stats.ThreadCount = ai.threadCount()
stats.SharedConfigFallback = getSharedConfigFallback()
stats.ImdsFallbackSucceed = succeedImdsFallback()
stats.RunInContainer = runInContainer
stats.RegionType = regionType
stats.Mode = mode
ai.nextUpdate = now.Add(updateInterval)
}

Expand Down Expand Up @@ -330,3 +354,10 @@ func getSharedConfigFallback() *int {
func SetImdsFallbackSucceed() {
imdsFallbackSucceed.Store(true)
}

func runInContainerFunc() *int {
if os.Getenv(translatorConfig.RUN_IN_CONTAINER) == translatorConfig.RUN_IN_CONTAINER_TRUE {
return aws.Int(1)
}
return aws.Int(0)
}
16 changes: 14 additions & 2 deletions handlers/agentinfo/info_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,15 @@ import (
)

func TestNew(t *testing.T) {
ai := New("")
ai := New("", "", "")
expectedUserAgentRegex := `^CWAgent/Unknown \(.*\) ` +
`ID/[0-9a-fA-F]{8}\b-[0-9a-fA-F]{4}\b-[0-9a-fA-F]{4}\b-[0-9a-fA-F]{4}\b-[0-9a-fA-F]{12}$`

assert.Regexp(t, regexp.MustCompile(expectedUserAgentRegex), ai.UserAgent())
}

func TestRecordOpData(t *testing.T) {
ai := newAgentInfo("")
ai := newAgentInfo("", "", "")

stats := ai.StatsHeader()
actual := agentStats{}
Expand Down Expand Up @@ -124,8 +124,20 @@ func TestGetAgentStats(t *testing.T) {
PayloadBytes: aws.Int(5678),
StatusCode: aws.Int(200),
ImdsFallbackSucceed: aws.Int(1),
RunInContainer: aws.Int(0),
RegionType: aws.String(EC2Metadata),
Mode: aws.String(ModeWithIRSA),
}

assert.Equal(t, "\"cpu\":1.2,\"mem\":123,\"fd\":456,\"th\":789,\"lat\":1234,\"load\":5678,\"code\":200,\"ifs\":1,\"ric\":0,\"rt\":\"EC2M\",\"m\":\"WI\"", getAgentStats(stats))

stats.Mode = nil
assert.Equal(t, "\"cpu\":1.2,\"mem\":123,\"fd\":456,\"th\":789,\"lat\":1234,\"load\":5678,\"code\":200,\"ifs\":1,\"ric\":0,\"rt\":\"EC2M\"", getAgentStats(stats))

stats.RegionType = nil
assert.Equal(t, "\"cpu\":1.2,\"mem\":123,\"fd\":456,\"th\":789,\"lat\":1234,\"load\":5678,\"code\":200,\"ifs\":1,\"ric\":0", getAgentStats(stats))

stats.RunInContainer = nil
assert.Equal(t, "\"cpu\":1.2,\"mem\":123,\"fd\":456,\"th\":789,\"lat\":1234,\"load\":5678,\"code\":200,\"ifs\":1", getAgentStats(stats))

stats.ImdsFallbackSucceed = nil
Expand Down
2 changes: 1 addition & 1 deletion plugins/outputs/cloudwatch/cloudwatch.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ func (c *CloudWatch) Capabilities() consumer.Capabilities {
}

func (c *CloudWatch) Start(_ context.Context, host component.Host) error {
c.agentInfo = agentinfo.New("")
c.agentInfo = agentinfo.New("", c.config.RegionType, c.config.Mode)
c.publisher, _ = publisher.NewPublisher(
publisher.NewNonBlockingFifoQueue(metricChanBufferSize),
maxConcurrentPublisher,
Expand Down
2 changes: 1 addition & 1 deletion plugins/outputs/cloudwatch/cloudwatch_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -382,7 +382,7 @@ func newCloudWatchClient(
MaxDatumsPerCall: defaultMaxDatumsPerCall,
MaxValuesPerDatum: defaultMaxValuesPerDatum,
},
agentInfo: agentinfo.New(""),
agentInfo: agentinfo.New("", "", ""),
}
cloudwatch.startRoutines()
return cloudwatch
Expand Down
4 changes: 3 additions & 1 deletion plugins/outputs/cloudwatch/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,15 @@ import (
"go.opentelemetry.io/collector/component"
)

// Config represent a configuration for the CloudWatch logs exporter.
// Config represent a configuration for the CloudWatch metrics exporter.
type Config struct {
Region string `mapstructure:"region"`
EndpointOverride string `mapstructure:"endpoint_override,omitempty"`
AccessKey string `mapstructure:"access_key,omitempty"`
SecretKey string `mapstructure:"secret_key,omitempty"`
RoleARN string `mapstructure:"role_arn,omitempty"`
RegionType string `mapstructure:"region_type,omitempty"`
Mode string `mapstructure:"mode,omitempty"`
Profile string `mapstructure:"profile,omitempty"`
SharedCredentialFilename string `mapstructure:"shared_credential_file,omitempty"`
Token string `mapstructure:"token,omitempty"`
Expand Down
6 changes: 4 additions & 2 deletions plugins/outputs/cloudwatchlogs/cloudwatchlogs.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ const (
LogEntryField = "value"

defaultFlushTimeout = 5 * time.Second
eventHeaderSize = 26
eventHeaderSize = 200
truncatedSuffix = "[Truncated...]"
msgSizeLimit = 256*1024 - eventHeaderSize

Expand All @@ -43,6 +43,8 @@ const (

type CloudWatchLogs struct {
Region string `toml:"region"`
RegionType string `toml:"region_type"`
Mode string `toml:"mode"`
EndpointOverride string `toml:"endpoint_override"`
AccessKey string `toml:"access_key"`
SecretKey string `toml:"secret_key"`
Expand Down Expand Up @@ -133,7 +135,7 @@ func (c *CloudWatchLogs) getDest(t Target) *cwDest {
Logger: configaws.SDKLogger{},
},
)
agentInfo := agentinfo.New(t.Group)
agentInfo := agentinfo.New(t.Group, c.RegionType, c.Mode)
client.Handlers.Build.PushBackNamed(handlers.NewRequestCompressionHandler([]string{"PutLogEvents"}))
client.Handlers.Build.PushBackNamed(handlers.NewCustomHeaderHandler("User-Agent", agentInfo.UserAgent()))
client.Handlers.Build.PushBackNamed(handlers.NewDynamicCustomHeaderHandler("X-Amz-Agent-Stats", agentInfo.StatsHeader))
Expand Down
2 changes: 1 addition & 1 deletion plugins/outputs/cloudwatchlogs/pusher_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -765,6 +765,6 @@ func TestResendWouldStopAfterExhaustedRetries(t *testing.T) {

func testPreparation(retention int, s *svcMock, flushTimeout time.Duration, retryDuration time.Duration) (chan struct{}, *pusher) {
stop := make(chan struct{})
p := NewPusher(Target{"G", "S", retention}, s, flushTimeout, retryDuration, models.NewLogger("cloudwatchlogs", "test", ""), stop, &wg, agentinfo.New(""))
p := NewPusher(Target{"G", "S", retention}, s, flushTimeout, retryDuration, models.NewLogger("cloudwatchlogs", "test", ""), stop, &wg, agentinfo.New("", "", ""))
return stop, p
}
10 changes: 10 additions & 0 deletions translator/context/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"log"
"os"

"github.com/aws/amazon-cloudwatch-agent/handlers/agentinfo"
"github.com/aws/amazon-cloudwatch-agent/translator/config"
)

Expand Down Expand Up @@ -41,6 +42,7 @@ type Context struct {
multiConfig string
outputTomlFilePath string
mode string
shortMode string
credentials map[string]string
proxy map[string]string
ssl map[string]string
Expand Down Expand Up @@ -96,6 +98,10 @@ func (ctx *Context) Mode() string {
return ctx.mode
}

func (ctx *Context) ShortMode() string {
return ctx.shortMode
}

func (ctx *Context) Credentials() map[string]string {
return ctx.credentials
}
Expand All @@ -112,12 +118,16 @@ func (ctx *Context) SetMode(mode string) {
switch mode {
case config.ModeEC2:
ctx.mode = config.ModeEC2
ctx.shortMode = agentinfo.ModeEC2
case config.ModeOnPrem:
ctx.mode = config.ModeOnPrem
ctx.shortMode = agentinfo.ModeOnPrem
case config.ModeOnPremise:
ctx.mode = config.ModeOnPremise
ctx.shortMode = agentinfo.ModeOnPrem
case config.ModeWithIRSA:
ctx.mode = config.ModeWithIRSA
ctx.shortMode = agentinfo.ModeWithIRSA
default:
log.Panicf("Invalid mode %s. Valid mode values are %s, %s, %s and %s.", mode, config.ModeEC2, config.ModeOnPrem, config.ModeOnPremise, config.ModeWithIRSA)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ exporters:
max_values_per_datum: 150
namespace: CWAgent
region: us-west-2
region_type: "ACJ"
mode: "EC2"
resource_to_telemetry_conversion:
enabled: true
extensions: {}
Expand Down
2 changes: 2 additions & 0 deletions translator/tocwconfig/sampleConfig/advanced_config_linux.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ exporters:
max_values_per_datum: 150
namespace: CWAgent
region: us-west-2
region_type: "ACJ"
mode: "EC2"
resource_to_telemetry_conversion:
enabled: true
extensions: {}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ exporters:
max_values_per_datum: 150
namespace: CWAgent
region: us-west-2
region_type: "ACJ"
mode: "EC2"
resource_to_telemetry_conversion:
enabled: true
extensions: {}
Expand Down
2 changes: 2 additions & 0 deletions translator/tocwconfig/sampleConfig/basic_config_linux.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ exporters:
max_values_per_datum: 150
namespace: CWAgent
region: us-east-1
region_type: "ACJ"
mode: "EC2"
resource_to_telemetry_conversion:
enabled: true
extensions: {}
Expand Down
2 changes: 2 additions & 0 deletions translator/tocwconfig/sampleConfig/basic_config_windows.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ exporters:
max_values_per_datum: 150
namespace: CWAgent
region: us-west-2
region_type: "ACJ"
mode: "EC2"
resource_to_telemetry_conversion:
enabled: true
extensions: {}
Expand Down
2 changes: 2 additions & 0 deletions translator/tocwconfig/sampleConfig/collectd_config_linux.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ exporters:
max_values_per_datum: 150
namespace: CWAgent
region: us-west-2
region_type: "ACJ"
mode: "EC2"
resource_to_telemetry_conversion:
enabled: true
extensions: {}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,4 +126,6 @@
force_flush_interval = "60s"
log_stream_name = "LOG_STREAM_NAME"
region = "us-west-2"
region_type = "ACJ"
mode = "EC2"
role_arn = "log_role_arn_value_test"
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ exporters:
max_values_per_datum: 5000
namespace: CWAgent
region: us-west-2
region_type: "ACJ"
mode: "EC2"
resource_to_telemetry_conversion:
enabled: true
role_arn: metrics_role_arn_value_test
Expand Down
2 changes: 2 additions & 0 deletions translator/tocwconfig/sampleConfig/complete_linux_config.conf
Original file line number Diff line number Diff line change
Expand Up @@ -126,4 +126,6 @@
force_flush_interval = "60s"
log_stream_name = "LOG_STREAM_NAME"
region = "us-west-2"
region_type = "ACJ"
mode = "EC2"
role_arn = "log_role_arn_value_test"
2 changes: 2 additions & 0 deletions translator/tocwconfig/sampleConfig/complete_linux_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ exporters:
max_values_per_datum: 5000
namespace: CWAgent
region: us-west-2
region_type: "ACJ"
mode: "EC2"
resource_to_telemetry_conversion:
enabled: true
role_arn: metrics_role_arn_value_test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,4 +144,6 @@
force_flush_interval = "60s"
log_stream_name = "LOG_STREAM_NAME"
region = "us-west-2"
region_type = "ACJ"
mode = "EC2"
role_arn = "log_role_arn_value_test"
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ exporters:
max_values_per_datum: 5000
namespace: CWAgent
region: us-west-2
region_type: "ACJ"
mode: "EC2"
resource_to_telemetry_conversion:
enabled: true
role_arn: metrics_role_arn_value_test
Expand Down
2 changes: 2 additions & 0 deletions translator/tocwconfig/sampleConfig/delta_config_linux.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ exporters:
max_values_per_datum: 150
namespace: CWAgent
region: us-east-1
region_type: "ACJ"
mode: "EC2"
resource_to_telemetry_conversion:
enabled: true
extensions: {}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ exporters:
max_values_per_datum: 150
namespace: CWAgent
region: us-east-1
region_type: "ACJ"
mode: "EC2"
resource_to_telemetry_conversion:
enabled: true
extensions: {}
Expand Down
2 changes: 2 additions & 0 deletions translator/tocwconfig/sampleConfig/drop_origin_linux.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ exporters:
max_values_per_datum: 150
namespace: CWAgent
region: us-west-2
region_type: "ACJ"
mode: "EC2"
resource_to_telemetry_conversion:
enabled: true
extensions: {}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ exporters:
max_values_per_datum: 150
namespace: CWAgent
region: us-east-1
region_type: "ACJ"
mode: "EC2"
resource_to_telemetry_conversion:
enabled: true
extensions: {}
Expand Down
Loading

0 comments on commit 64d9a5c

Please sign in to comment.