Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Jmx metrics #385

Merged
merged 1 commit into from
May 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions msi/tools/amazon-cloudwatch-agent.wxs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
<ComponentRef Id='SchemaJSON' />
<ComponentRef Id='DownloaderEXE' />
<ComponentRef Id='TranslatorEXE' />
<ComponentRef Id='OtelJmxJar' />
<ComponentRef Id='CWAGENT_VERSION' />
<ComponentRef Id='LICENSE' />
<ComponentRef Id='NOTICE' />
Expand Down Expand Up @@ -119,6 +120,9 @@
<Component Id='TranslatorEXE' Guid='f4527006-edcb-4271-a971-039848bc8bb7' Win64='yes'>
<File Source='config-translator.exe' KeyPath='yes' Checksum='yes'/>
</Component>
<Component Id='OtelJmxJar' Guid='5af08c39-c483-4b08-8315-c99345684c4d' Win64='yes'>
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Required to add jar to msi

<File Source='opentelemetry-jmx-metrics.jar' KeyPath='yes'/>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does this need to be changed to packaging/opentelemetry-jmx-metrics.jar?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comes from msi zip not the repo. The repo copies to msi zip by the pre package make command

</Component>
<Component Id='CWAGENT_VERSION' Guid='f4ddf7bf-48fc-41f6-a914-4153a7cf0afc' Win64='yes'>
<File Source='CWAGENT_VERSION' KeyPath='yes'/>
</Component>
Expand Down
2 changes: 1 addition & 1 deletion test/metric/metric_validation_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ func IsAllValuesGreaterThanOrEqualToExpectedValue(metricName string, values []fl

totalSum := 0.0
for _, value := range values {
if value < 0 {
if value < 0 && expectedValue >= 0 {
log.Printf("Values are not all greater than or equal to zero for %s", metricName)
return false
}
Expand Down
2 changes: 1 addition & 1 deletion test/metric/metric_value_query.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ func (n *MetricValueFetcher) Fetch(namespace, metricName string, metricSpecificD
Period: &metricQueryPeriod,
Stat: aws.String(string(stat)),
},
Id: aws.String(strings.ToLower(metricName)),
Id: aws.String(strings.ToLower(strings.ReplaceAll(strings.ReplaceAll(metricName, "-", "_"), ".", "_"))),
},
}

Expand Down
75 changes: 75 additions & 0 deletions test/metric_value_benchmark/agent_configs/jmx_kafka_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
{
"agent": {
"debug": true
},
"metrics": {
"namespace": "MetricValueBenchmarkJMXTest",
"force_flush_interval": 5,
"aggregation_dimensions": [
[
"InstanceId"
]
],
"append_dimensions": {
"InstanceId": "${aws:InstanceId}"
},
"metrics_collected": {
"jmx": [
{
"endpoint": "localhost:2000",
"kafka": {
"measurement": [
"kafka.unclean.election.rate",
"kafka.request.time.total",
"kafka.request.time.avg",
"kafka.request.time.99p",
"kafka.request.time.50p",
"kafka.request.queue",
"kafka.request.failed",
"kafka.request.count",
"kafka.purgatory.size",
"kafka.partition.under_replicated",
"kafka.partition.offline",
"kafka.partition.count",
"kafka.network.io",
"kafka.message.count",
"kafka.max.lag",
"kafka.leader.election.rate",
"kafka.isr.operation.count",
"kafka.controller.active.count"
]
}
},
{
"endpoint": "localhost:2010",
"kafka-consumer": {
"measurement": [
"kafka.consumer.total.records-consumed-rate",
"kafka.consumer.total.bytes-consumed-rate",
"kafka.consumer.records-consumed-rate",
"kafka.consumer.fetch-rate",
"kafka.consumer.bytes-consumed-rate"
]
}
},
{
"endpoint": "localhost:2020",
"kafka-producer": {
"measurement": [
"kafka.producer.io-wait-time-ns-avg",
"kafka.producer.record-retry-rate",
"kafka.producer.compression-rate",
"kafka.producer.outgoing-byte-rate",
"kafka.producer.request-rate",
"kafka.producer.byte-rate",
"kafka.producer.request-latency-avg",
"kafka.producer.response-rate",
"kafka.producer.record-error-rate",
"kafka.producer.record-send-rate"
]
}
}
]
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
{
"agent": {
"debug": true
},
"metrics": {
"namespace": "MetricValueBenchmarkJMXTest",
"force_flush_interval": 5,
"aggregation_dimensions": [
[
"InstanceId"
]
],
"append_dimensions": {
"InstanceId": "${aws:InstanceId}"
},
"metrics_collected": {
"jmx": [
{
"endpoint": "localhost:2030",
"tomcat": {
"measurement": [
"tomcat.traffic",
"tomcat.threads",
"tomcat.sessions",
"tomcat.request_count",
"tomcat.processing_time",
"tomcat.max_time",
"tomcat.errors"
]
},
"jvm": {
"measurement": [
"jvm.threads.count",
"jvm.memory.pool.used",
"jvm.memory.pool.max",
"jvm.memory.pool.init",
"jvm.memory.pool.committed",
"jvm.memory.nonheap.used",
"jvm.memory.nonheap.max",
"jvm.memory.nonheap.init",
"jvm.memory.nonheap.committed",
"jvm.memory.heap.used",
"jvm.memory.heap.max",
"jvm.memory.heap.init",
"jvm.memory.heap.committed",
"jvm.gc.collections.elapsed",
"jvm.gc.collections.count",
"jvm.classes.loaded"
]
}
}
]
}
}
}
Binary file not shown.
154 changes: 154 additions & 0 deletions test/metric_value_benchmark/jmx_kafka_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: MIT

//go:build !windows

package metric_value_benchmark

import (
"log"
"time"

"github.com/aws/amazon-cloudwatch-agent-test/test/metric"
"github.com/aws/amazon-cloudwatch-agent-test/test/metric/dimension"
"github.com/aws/amazon-cloudwatch-agent-test/test/status"
"github.com/aws/amazon-cloudwatch-agent-test/test/test_runner"
"github.com/aws/amazon-cloudwatch-agent-test/util/common"
)

type JMXKafkaTestRunner struct {
test_runner.BaseTestRunner
}

var _ test_runner.ITestRunner = (*JMXKafkaTestRunner)(nil)

func (t *JMXKafkaTestRunner) Validate() status.TestGroupResult {
metricsToFetch := t.GetMeasuredMetrics()
testResults := make([]status.TestResult, len(metricsToFetch))
for i, metricName := range metricsToFetch {
testResults[i] = t.validateJMXMetric(metricName)
}

return status.TestGroupResult{
Name: t.GetTestName(),
TestResults: testResults,
}
}

func (t *JMXKafkaTestRunner) GetTestName() string {
return "JMXKafka"
}

func (t *JMXKafkaTestRunner) GetAgentConfigFileName() string {
return "jmx_kafka_config.json"
}

func (t *JMXKafkaTestRunner) GetAgentRunDuration() time.Duration {
return 2 * time.Minute
}

func (t *JMXKafkaTestRunner) SetupBeforeAgentRun() error {
err := t.BaseTestRunner.SetupBeforeAgentRun()
if err != nil {
return err
}

log.Println("set up zookeeper and kafka")
startJMXCommands := []string{
"curl https://dlcdn.apache.org/kafka/3.6.2/kafka_2.13-3.6.2.tgz -o kafka_2.13-3.6.2.tgz",
"tar -xzf kafka_2.13-3.6.2.tgz",
"echo 'export JMX_PORT=2000'|cat - kafka_2.13-3.6.2/bin/kafka-server-start.sh > /tmp/kafka-server-start.sh && mv /tmp/kafka-server-start.sh kafka_2.13-3.6.2/bin/kafka-server-start.sh",
"echo 'export JMX_PORT=2010'|cat - kafka_2.13-3.6.2/bin/kafka-console-consumer.sh > /tmp/kafka-console-consumer.sh && mv /tmp/kafka-console-consumer.sh kafka_2.13-3.6.2/bin/kafka-console-consumer.sh",
"echo 'export JMX_PORT=2020'|cat - kafka_2.13-3.6.2/bin/kafka-console-producer.sh > /tmp/kafka-console-producer.sh && mv /tmp/kafka-console-producer.sh kafka_2.13-3.6.2/bin/kafka-console-producer.sh",
"sudo chmod +x kafka_2.13-3.6.2/bin/kafka-run-class.sh",
"sudo chmod +x kafka_2.13-3.6.2/bin/kafka-server-start.sh",
"sudo chmod +x kafka_2.13-3.6.2/bin/kafka-console-consumer.sh",
"sudo chmod +x kafka_2.13-3.6.2/bin/kafka-console-producer.sh",
"(yes | nohup kafka_2.13-3.6.2/bin/kafka-console-producer.sh --topic quickstart-events --bootstrap-server localhost:9092) > /tmp/kafka-console-producer-logs.txt 2>&1 &",
"kafka_2.13-3.6.2/bin/kafka-console-consumer.sh --topic quickstart-events --from-beginning --bootstrap-server localhost:9092 > /tmp/kafka-console-consumer-logs.txt 2>&1 &",
"curl https://dlcdn.apache.org/zookeeper/zookeeper-3.8.4/apache-zookeeper-3.8.4-bin.tar.gz -o apache-zookeeper-3.8.4-bin.tar.gz",
"tar -xzf apache-zookeeper-3.8.4-bin.tar.gz",
"mkdir apache-zookeeper-3.8.4-bin/data",
"touch apache-zookeeper-3.8.4-bin/conf/zoo.cfg",
"echo -e 'tickTime = 2000\ndataDir = ../data\nclientPort = 2181\ninitLimit = 5\nsyncLimit = 2\n' >> apache-zookeeper-3.8.4-bin/conf/zoo.cfg",
"sudo apache-zookeeper-3.8.4-bin/bin/zkServer.sh start",
"sudo kafka_2.13-3.6.2/bin/kafka-server-start.sh kafka_2.13-3.6.2/config/server.properties > /tmp/kafka-server-start-logs.txt 2>&1 &",
}

err = common.RunCommands(startJMXCommands)
if err != nil {
return err
}
return nil
}

func (t *JMXKafkaTestRunner) GetMeasuredMetrics() []string {
return []string{
"kafka.unclean.election.rate",
"kafka.request.time.total",
"kafka.request.time.avg",
"kafka.request.time.99p",
"kafka.request.time.50p",
"kafka.request.queue",
"kafka.request.failed",
"kafka.request.count",
"kafka.purgatory.size",
"kafka.partition.under_replicated",
"kafka.partition.offline",
"kafka.partition.count",
"kafka.network.io",
"kafka.message.count",
"kafka.max.lag",
"kafka.leader.election.rate",
"kafka.isr.operation.count",
"kafka.controller.active.count",
"kafka.consumer.total.records-consumed-rate",
"kafka.consumer.total.bytes-consumed-rate",
"kafka.consumer.records-consumed-rate",
"kafka.consumer.fetch-rate",
"kafka.consumer.bytes-consumed-rate",
"kafka.producer.io-wait-time-ns-avg",
"kafka.producer.record-retry-rate",
"kafka.producer.compression-rate",
"kafka.producer.outgoing-byte-rate",
"kafka.producer.request-rate",
"kafka.producer.byte-rate",
"kafka.producer.request-latency-avg",
"kafka.producer.response-rate",
"kafka.producer.record-error-rate",
"kafka.producer.record-send-rate",
}
}

func (t *JMXKafkaTestRunner) validateJMXMetric(metricName string) status.TestResult {
testResult := status.TestResult{
Name: metricName,
Status: status.FAILED,
}

dims, failed := t.DimensionFactory.GetDimensions([]dimension.Instruction{
{
Key: "InstanceId",
Value: dimension.UnknownDimensionValue(),
},
})

if len(failed) > 0 {
return testResult
}

fetcher := metric.MetricValueFetcher{}
values, err := fetcher.Fetch(jmxNamespace, metricName, dims, metric.AVERAGE, metric.HighResolutionStatPeriod)
log.Printf("metric values are %v", values)
if err != nil {
log.Printf("err: %v\n", err)
return testResult
}

if !metric.IsAllValuesGreaterThanOrEqualToExpectedValue(metricName, values, 0) {
return testResult
}

testResult.Status = status.SUCCESSFUL
return testResult
}
Loading
Loading