Skip to content

Commit

Permalink
Merge pull request #34 from wyTrivail/terraform-fix
Browse files Browse the repository at this point in the history
fix skip logic in validator
  • Loading branch information
wyTrivail authored Oct 20, 2020
2 parents da5b1c1 + 425d182 commit 6e7afc9
Show file tree
Hide file tree
Showing 21 changed files with 370 additions and 33 deletions.
6 changes: 6 additions & 0 deletions terraform/ec2/amis.tf
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,12 @@ variable "ami_family" {
start_command = "sudo /opt/aws/aws-otel-collector/bin/aws-otel-collector-ctl -c /tmp/ot-default.yml -a start"
connection_type = "ssh"
user_data = ""
soaking_cwagent_config = "../template/cwagent-config/soaking-linux.json.tpl"
soaking_cwagent_config_destination = "/tmp/cwagent-config.json"
cwagent_download_command = "sudo rpm -Uvh https://s3.amazonaws.com/amazoncloudwatch-agent/amazon_linux/amd64/latest/amazon-cloudwatch-agent.rpm"
cwagent_start_command = "sudo /opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl -a fetch-config -c file:/tmp/cwagent-config.json -s"

soaking_cpu_metric_name = "procstat_cpu_usage"
}
windows = {
login_user = "Administrator"
Expand Down
3 changes: 3 additions & 0 deletions terraform/ec2/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,10 @@ data "template_file" "docker_compose" {
otel_endpoint = "${aws_instance.aoc.private_ip}:55680"
}
}

resource "null_resource" "sample-app-validator" {
# skip this validation if it's a soaking test
count = var.soaking ? 0 : 1
provisioner "file" {
content = data.template_file.docker_compose.rendered
destination = "/tmp/docker-compose.yml"
Expand Down
101 changes: 101 additions & 0 deletions terraform/ec2/soaking.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
# ------------------------------------------------------------------------
# Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License").
# You may not use this file except in compliance with the License.
# A copy of the License is located at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# or in the "license" file accompanying this file. This file is distributed
# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
# express or implied. See the License for the specific language governing
# permissions and limitations under the License.
# -------------------------------------------------------------------------

## install cwagent on the instance to collect metric from otel-collector
data "template_file" "cwagent_config" {
count = var.soaking ? 1 : 0
template = file(local.ami_family["soaking_cwagent_config"])

vars = {
soaking_metric_namespace = var.soaking_metric_namespace
}
}

resource "null_resource" "install_cwagent" {
count = var.soaking ? 1 : 0

// copy cwagent config to the instance
provisioner "file" {
content = data.template_file.cwagent_config[0].rendered
destination = local.ami_family["soaking_cwagent_config_destination"]

connection {
type = local.connection_type
user = local.login_user
private_key = local.connection_type == "ssh" ? data.aws_s3_bucket_object.ssh_private_key.body : null
password = local.connection_type == "winrm" ? rsadecrypt(aws_instance.aoc.password_data, data.aws_s3_bucket_object.ssh_private_key.body) : null
host = aws_instance.aoc.public_ip
}
}

provisioner "remote-exec" {
inline = [
local.ami_family["cwagent_download_command"],
local.ami_family["cwagent_start_command"]
]

connection {
type = local.connection_type
user = local.login_user
private_key = local.connection_type == "ssh" ? data.aws_s3_bucket_object.ssh_private_key.body : null
password = local.connection_type == "winrm" ? rsadecrypt(aws_instance.aoc.password_data, data.aws_s3_bucket_object.ssh_private_key.body) : null
host = aws_instance.aoc.public_ip
}
}
}

## create cloudwatch alarm base on the metrics emitted by cwagent
# wait 2 minute for the metrics to be available on cloudwatch
resource "time_sleep" "wait_2_minutes" {
depends_on = [null_resource.install_cwagent[0]]

create_duration = "120s"
}
# cpu alarm
resource "aws_cloudwatch_metric_alarm" "cpu_alarm" {
depends_on = [time_sleep.wait_2_minutes]
alarm_name = "otel-soaking-cpu-alarm-${module.common.testing_id}"
comparison_operator = "GreaterThanOrEqualToThreshold"
evaluation_periods = 2
threshold = "50"

metric_query {
id = "cpu"
return_data = true

metric {
metric_name = local.ami_family["soaking_cpu_metric_name"]
namespace = var.soaking_metric_namespace
period = 60
stat = "Average"
unit = "Percent"

# use this dimension to identify each test
dimensions = {
InstanceId = aws_instance.aoc.id
}
}
}
}

# soaking alarm pulling
resource "null_resource" "bake_alarms" {
depends_on = [aws_cloudwatch_metric_alarm.cpu_alarm]
count = var.soaking ? 1 : 0
provisioner "local-exec" {
command = "${module.common.validator_path} --args='-c ${var.validation_config} -t ${module.common.testing_id} --region ${var.region} --alarm-names ${aws_cloudwatch_metric_alarm.cpu_alarm.alarm_name}'"
working_dir = "../../"
}
}
3 changes: 3 additions & 0 deletions terraform/ec2/validation.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@



12 changes: 11 additions & 1 deletion terraform/ec2/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ variable "otconfig_path" {
}

variable "docker_compose_path" {
default = "../template/ec2-docker-compose-config/default_ec2_docker_compose.yml"
default = "../template/ec2-docker-compose-config/default_ec2_docker_compose.yml.tpl"
}

variable "package_s3_bucket" {
Expand Down Expand Up @@ -65,3 +65,13 @@ variable "sshkey_s3_private_key" {
variable "sample_app_callable" {
default = true
}

# create soaking alarm if this flag is on
variable "soaking" {
default = false
}

variable "soaking_metric_namespace" {
default = "AWSOtelCollector/SoakTest"
}

54 changes: 54 additions & 0 deletions terraform/template/cwagent-config/soaking-linux.json.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
{
"agent": {
"metrics_collection_interval": 10
},
"metrics": {
"append_dimensions": {
"InstanceId": "$${aws:InstanceId}"
},
"metrics_collected": {
"cpu": {
"measurement": [
"cpu_usage_idle",
"cpu_usage_iowait",
"cpu_usage_user",
"cpu_usage_system"
],
"totalcpu": false
},
"disk": {
"measurement": [
"used_percent",
"inodes_free"
]
},
"diskio": {
"measurement": [
"io_time"
]
},
"mem": {
"measurement": [
"mem_used_percent"
]
},
"statsd": {
},
"swap": {
"measurement": [
"swap_used_percent"
]
},
"procstat": [
{
"measurement": [
"cpu_usage",
"memory_rss"
],
"exe": "aws-otel-collector"
}
]
},
"namespace": "${soaking_metric_namespace}"
}
}
13 changes: 13 additions & 0 deletions terraform/testing-suites/soaking-ec2.tfvars
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# enable soaking
soaking = true

# assuming the sample app will generate high volume data by default
sample_app_callable = false

# ask validator to pull the alarms
validation_config="alarm-pulling-validation.yml"

# use amazonlinux2 by default to soak
testing_ami = "amazonlinux2"

aoc_version = "v0.1.13-311011856"
9 changes: 8 additions & 1 deletion validator/src/main/java/com/amazon/aoc/App.java
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,11 @@ public class App implements Callable<Integer> {
description = "eg, --ecs-context ecsCluster=xxx --ecs-context ecsTaskArn=xxxx")
private Map<String, String> ecsContexts;

@CommandLine.Option(
names = {"--alarm-names"},
description = "the cloudwatch alarm names")
private List<String> alarmNameList;

public static void main(String[] args) throws Exception {
int exitCode = new CommandLine(new App()).execute(args);
System.exit(exitCode);
Expand All @@ -66,9 +71,11 @@ public static void main(String[] args) throws Exception {
@Override
public Integer call() throws Exception {
// build context
Context context = new Context(this.testingId, this.metricNamespace, this.region);
Context context = new Context(this.testingId, this.region);
context.setMetricNamespace(this.metricNamespace);
context.setEndpoint(this.endpoint);
context.setEcsContext(buildECSContext(ecsContexts));
context.setAlarmNameList(alarmNameList);

log.info(context);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ public enum ExceptionCode {
// build validator
VALIDATION_TYPE_NOT_EXISTED(60001, "validation type not existed"),
CALLER_TYPE_NOT_EXISTED(60002, "caller type not existed"),

// alarm validation
ALARM_BAKING(70001, "alarms still need to be baked"),
;
private int code;
private String message;
Expand Down
14 changes: 11 additions & 3 deletions validator/src/main/java/com/amazon/aoc/helpers/RetryHelper.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ public class RetryHelper {
* @param retryable the lambda
* @throws Exception when the retry count is reached
*/
public static void retry(int retryCount, int sleepInMilliSeconds, Retryable retryable)
public static void retry(
int retryCount, int sleepInMilliSeconds, boolean throwExceptionInTheEnd, Retryable retryable)
throws Exception {
while (retryCount-- > 0) {
try {
Expand All @@ -45,7 +46,9 @@ public static void retry(int retryCount, int sleepInMilliSeconds, Retryable retr
}
}

throw new BaseException(ExceptionCode.FAILED_AFTER_RETRY);
if (throwExceptionInTheEnd) {
throw new BaseException(ExceptionCode.FAILED_AFTER_RETRY);
}
}

/**
Expand All @@ -58,6 +61,7 @@ public static void retry(Retryable retryable) throws Exception {
retry(
Integer.valueOf(GenericConstants.MAX_RETRIES.getVal()),
Integer.valueOf(GenericConstants.SLEEP_IN_MILLISECONDS.getVal()),
true,
retryable);
}

Expand All @@ -69,6 +73,10 @@ public static void retry(Retryable retryable) throws Exception {
* @throws Exception when the retry count is reached
*/
public static void retry(int retryCount, Retryable retryable) throws Exception {
retry(retryCount, Integer.valueOf(GenericConstants.SLEEP_IN_MILLISECONDS.getVal()), retryable);
retry(
retryCount,
Integer.valueOf(GenericConstants.SLEEP_IN_MILLISECONDS.getVal()),
true,
retryable);
}
}
13 changes: 11 additions & 2 deletions validator/src/main/java/com/amazon/aoc/models/Context.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,24 @@
import lombok.Data;
import lombok.NonNull;

import java.util.List;

@Data
public class Context {
@NonNull private String testingId;

@NonNull private String metricNamespace;

@NonNull private String region;

private String metricNamespace;

private String endpoint;

private ECSContext ecsContext;

/*
alarm related parameters
*/
private List<String> alarmNameList;
private Integer alarmPullingDuration;
private Integer alarmPullingTimes;
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,22 @@
import com.amazon.aoc.fileconfigs.ExpectedMetric;
import com.amazon.aoc.fileconfigs.ExpectedTrace;
import lombok.Data;
import org.apache.logging.log4j.core.appender.rolling.action.IfNot;

@Data
public class ValidationConfig {
String validationType;
String callingType;
String callingType = "none";

String httpPath;
String httpMethod;

ExpectedMetric expectedMetricTemplate;
ExpectedTrace expectedTraceTemplate;

/**
* alarm related.
*/
Integer pullingDuration;
Integer pullingTimes;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package com.amazon.aoc.services;

import com.amazonaws.services.cloudwatch.AmazonCloudWatch;
import com.amazonaws.services.cloudwatch.AmazonCloudWatchClientBuilder;
import com.amazonaws.services.cloudwatch.model.DescribeAlarmsRequest;
import com.amazonaws.services.cloudwatch.model.DescribeAlarmsResult;
import com.amazonaws.services.cloudwatch.model.MetricAlarm;

import java.util.List;

public class CloudWatchAlarmService {
AmazonCloudWatch amazonCloudWatch;

public CloudWatchAlarmService(String region) {
amazonCloudWatch = AmazonCloudWatchClientBuilder.standard().withRegion(region).build();
}

/**
* Get alarm list base on name.
* @param alarmNameList alarm name list
* @return the list of MetricAlarm Object
*/
public List<MetricAlarm> listAlarms(List<String> alarmNameList) {
DescribeAlarmsResult describeALarmsResult =
amazonCloudWatch.describeAlarms(new DescribeAlarmsRequest().withAlarmNames(alarmNameList));
return describeALarmsResult.getMetricAlarms();
}
}
Loading

0 comments on commit 6e7afc9

Please sign in to comment.