From a9115c359c8a16bc9a77043f386157d46868ed25 Mon Sep 17 00:00:00 2001 From: Sam Ainsworth Date: Wed, 13 Mar 2024 10:08:38 +0000 Subject: [PATCH] UML-1959 Create Cloudwatch dashboard in Terraform (#2542) * UML-1959 Create Cloudwatch dashboard in Terraform --- terraform/environment/region.tf | 2 + .../region/cloudwatch_dashboard.tf | 17 + terraform/environment/region/data_sources.tf | 14 + .../templates/cw_dashboard_watching.tftpl | 424 ++++++++++++++++++ terraform/environment/region/terraform.tf | 12 +- .../region/{actor_ecs.tf => use_ecs.tf} | 0 ..._load_balancer.tf => use_load_balancer.tf} | 0 terraform/environment/region/variables.tf | 6 + terraform/environment/terraform.tfvars.json | 4 + terraform/environment/variables.tf | 1 + 10 files changed, 469 insertions(+), 11 deletions(-) create mode 100644 terraform/environment/region/cloudwatch_dashboard.tf create mode 100644 terraform/environment/region/templates/cw_dashboard_watching.tftpl rename terraform/environment/region/{actor_ecs.tf => use_ecs.tf} (100%) rename terraform/environment/region/{actor_load_balancer.tf => use_load_balancer.tf} (100%) diff --git a/terraform/environment/region.tf b/terraform/environment/region.tf index c10d6b5761..39d0d4f1b9 100644 --- a/terraform/environment/region.tf +++ b/terraform/environment/region.tf @@ -12,6 +12,7 @@ module "eu_west_1" { container_version = var.container_version cookie_expires_use = local.environment.cookie_expires_use cookie_expires_view = local.environment.cookie_expires_view + create_dashboard = local.environment.create_dashboard dns_namespace_env = local.dns_namespace_env ecs_execution_role = module.iam.ecs_execution_role ecs_task_roles = module.iam.ecs_task_roles @@ -89,6 +90,7 @@ module "eu_west_2" { container_version = var.container_version cookie_expires_use = local.environment.cookie_expires_use cookie_expires_view = local.environment.cookie_expires_view + create_dashboard = local.environment.create_dashboard dns_namespace_env = local.dns_namespace_env ecs_execution_role = module.iam.ecs_execution_role ecs_task_roles = module.iam.ecs_task_roles diff --git a/terraform/environment/region/cloudwatch_dashboard.tf b/terraform/environment/region/cloudwatch_dashboard.tf new file mode 100644 index 0000000000..ede9149a79 --- /dev/null +++ b/terraform/environment/region/cloudwatch_dashboard.tf @@ -0,0 +1,17 @@ +resource "aws_cloudwatch_dashboard" "main" { + count = var.create_dashboard ? 1 : 0 + dashboard_name = "${var.environment_name}-${data.aws_region.current.name}-dashboard" + dashboard_body = templatefile("${path.module}/templates/cw_dashboard_watching.tftpl", { + region = data.aws_region.current.name, + environment = var.environment_name, + viewer_alb_arn = local.viewer_alb_arn, + use_alb_arn = local.use_alb_arn + }) + + provider = aws.region +} + +locals { + viewer_alb_arn = trimprefix(aws_lb.viewer.arn, "arn:aws:elasticloadbalancing:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:loadbalancer/") + use_alb_arn = trimprefix(aws_lb.use.arn, "arn:aws:elasticloadbalancing:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:loadbalancer/") +} diff --git a/terraform/environment/region/data_sources.tf b/terraform/environment/region/data_sources.tf index d756a3b55a..67ad7b7956 100644 --- a/terraform/environment/region/data_sources.tf +++ b/terraform/environment/region/data_sources.tf @@ -1,3 +1,17 @@ +data "aws_vpc" "default" { + default = "true" + + provider = aws.region +} + +data "aws_region" "current" { + provider = aws.region +} + +data "aws_caller_identity" "current" { + provider = aws.region +} + data "aws_subnets" "private" { filter { name = "vpc-id" diff --git a/terraform/environment/region/templates/cw_dashboard_watching.tftpl b/terraform/environment/region/templates/cw_dashboard_watching.tftpl new file mode 100644 index 0000000000..05e401ce5d --- /dev/null +++ b/terraform/environment/region/templates/cw_dashboard_watching.tftpl @@ -0,0 +1,424 @@ +${jsonencode({ + "widgets": [ + { + "height": 5, + "width": 24, + "y": 29, + "x": 0, + "type": "metric", + "properties": { + "region": "${region}", + "metrics": [ + [ "AWS/ECS", "CPUUtilization", "ClusterName", "${environment}-use-an-lpa", "ServiceName", "pdf", { "stat": "Average", "id": "m0r0" } ], + [ "...", "actor", { "stat": "Average", "id": "m0r1" } ], + [ "...", "viewer", { "stat": "Average", "id": "m0r2" } ], + [ "...", "api", { "stat": "Average", "id": "m0r3" } ], + [ "...", "ServiceName", "viewer", "ClusterName", "${environment}-use-an-lpa", { "stat": "Average" } ], + [ "...", "actor", ".", ".", { "stat": "Average" } ], + [ "...", "pdf-service", ".", ".", { "stat": "Average" } ], + [ "...", "viewer-service", ".", ".", { "stat": "Average" } ], + [ "...", "actor-service", ".", ".", { "stat": "Average" } ], + [ "...", "api-service", ".", ".", { "stat": "Average" } ], + [ "...", "api", ".", ".", { "stat": "Average" } ], + [ "...", "pdf", ".", ".", { "stat": "Average" } ] + ], + "title": "CPU Utilization Average", + "copilot": true, + "legend": { + "position": "right" + }, + "period": 300, + "view": "timeSeries", + "stacked": false + } + }, + { + "height": 4, + "width": 24, + "y": 34, + "x": 0, + "type": "metric", + "properties": { + "region": "${region}", + "metrics": [ + [ "AWS/ECS", "MemoryUtilization", "ClusterName", "${environment}-use-an-lpa", "ServiceName", "pdf", { "stat": "Average", "id": "m0r0" } ], + [ "...", "actor", { "stat": "Average", "id": "m0r1" } ], + [ "...", "viewer", { "stat": "Average", "id": "m0r2" } ], + [ "...", "api", { "stat": "Average", "id": "m0r3" } ], + [ "...", "ServiceName", "api", "ClusterName", "${environment}-use-an-lpa", { "stat": "Average" } ], + [ "...", "pdf", ".", ".", { "stat": "Average" } ], + [ "...", "pdf-service", ".", ".", { "stat": "Average" } ], + [ "...", "viewer-service", ".", ".", { "stat": "Average" } ], + [ "...", "api-service", ".", ".", { "stat": "Average" } ], + [ "...", "actor-service", ".", ".", { "stat": "Average" } ], + [ "...", "actor", ".", ".", { "stat": "Average" } ], + [ "...", "viewer", ".", ".", { "stat": "Average" } ] + ], + "title": "Memory Utilization Average", + "copilot": true, + "legend": { + "position": "right" + }, + "period": 300, + "view": "timeSeries", + "stacked": false + } + }, + { + "height": 6, + "width": 6, + "y": 18, + "x": 0, + "type": "metric", + "properties": { + "region": "${region}", + "metrics": [ + [ "AWS/ApplicationELB", "RequestCount", "LoadBalancer", "${viewer_alb_arn}", { "stat": "Sum", "id": "m0r0" } ], + [ "...", "${use_alb_arn}", { "stat": "Sum", "id": "m0r1" } ] + ], + "title": "Request Count Sum", + "copilot": true, + "legend": { + "position": "bottom" + } + } + }, + { + "height": 6, + "width": 9, + "y": 18, + "x": 15, + "type": "metric", + "properties": { + "region": "${region}", + "metrics": [ + [ "AWS/ApplicationELB", "HTTPCode_ELB_5XX_Count", "LoadBalancer", "${viewer_alb_arn}", { "stat": "Sum", "id": "m0r0" } ], + [ "...", "${use_alb_arn}", { "stat": "Sum", "id": "m0r1" } ] + ], + "title": "HTTP 5XX Count", + "copilot": true, + "legend": { + "position": "bottom" + } + } + }, + { + "height": 5, + "width": 8, + "y": 24, + "x": 0, + "type": "metric", + "properties": { + "region": "${region}", + "metrics": [ + [ "AWS/ApplicationELB", "ActiveConnectionCount", "LoadBalancer", "${viewer_alb_arn}", { "stat": "Sum", "id": "m0r0" } ], + [ "...", "${viewer_alb_arn}", { "stat": "Sum", "id": "m0r1" } ] + ], + "title": "Active Connection Count Sum", + "copilot": true, + "legend": { + "position": "bottom" + } + } + }, + { + "height": 5, + "width": 8, + "y": 24, + "x": 16, + "type": "metric", + "properties": { + "region": "${region}", + "metrics": [ + [ "AWS/ApplicationELB", "HTTP_Fixed_Response_Count", "LoadBalancer", "${viewer_alb_arn}", { "stat": "Sum", "id": "m0r0" } ], + [ "...", "${use_alb_arn}", { "stat": "Sum", "id": "m0r1" } ] + ], + "title": "HTTP Fixed Response Count Sum", + "copilot": true, + "legend": { + "position": "bottom" + } + } + }, + { + "height": 5, + "width": 8, + "y": 24, + "x": 8, + "type": "metric", + "properties": { + "region": "${region}", + "metrics": [ + [ "AWS/ApplicationELB", "HTTPCode_ELB_3XX_Count", "LoadBalancer", "${viewer_alb_arn}", { "stat": "Sum", "id": "m0r0" } ], + [ "...", "${use_alb_arn}", { "stat": "Sum", "id": "m0r1" } ] + ], + "title": "HTTP Code ELB 3 XX Count Sum", + "copilot": true, + "legend": { + "position": "bottom" + } + } + }, + { + "height": 6, + "width": 9, + "y": 18, + "x": 6, + "type": "metric", + "properties": { + "region": "${region}", + "metrics": [ + [ "AWS/ApplicationELB", "HTTPCode_ELB_4XX_Count", "LoadBalancer", "${viewer_alb_arn}", { "stat": "Sum", "id": "m0r0" } ], + [ "...", "${use_alb_arn}", { "stat": "Sum", "id": "m0r1" } ] + ], + "title": "HTTP 4XX Count", + "copilot": true, + "legend": { + "position": "bottom" + } + } + }, + { + "height": 4, + "width": 24, + "y": 42, + "x": 0, + "type": "metric", + "properties": { + "region": "${region}", + "metrics": [ + [ "AWS/DynamoDB", "SuccessfulRequestLatency", "TableName", "${environment}-ActorCodes", "Operation", "GetItem", { "stat": "Average", "id": "m0r0" } ], + [ "...", "${environment}-ActorUsers", ".", ".", { "stat": "Average", "id": "m0r1" } ], + [ "...", "${environment}-UserLpaActorMap", ".", ".", { "stat": "Average", "id": "m0r2" } ], + [ "...", "${environment}-ViewerActivity", ".", ".", { "stat": "Average", "id": "m0r3" } ], + [ "...", "${environment}-ViewerCodes", ".", ".", { "stat": "Average", "id": "m0r4" } ] + ], + "title": "Get Item Average Latency", + "legend": { + "position": "right" + }, + "copilot": true + } + }, + { + "height": 4, + "width": 24, + "y": 38, + "x": 0, + "type": "metric", + "properties": { + "region": "${region}", + "metrics": [ + [ "AWS/DynamoDB", "SuccessfulRequestLatency", "TableName", "${environment}-ActorCodes", "Operation", "PutItem", { "stat": "Average", "id": "m0r0" } ], + [ "...", "${environment}-ActorUsers", ".", ".", { "stat": "Average", "id": "m0r1" } ], + [ "...", "${environment}-UserLpaActorMap", ".", ".", { "stat": "Average", "id": "m0r2" } ], + [ "...", "${environment}-ViewerActivity", ".", ".", { "stat": "Average", "id": "m0r3" } ], + [ "...", "${environment}-ViewerCodes", ".", ".", { "stat": "Average", "id": "m0r4" } ] + ], + "title": "Put Item Average Latency", + "legend": { + "position": "right" + }, + "copilot": true + } + }, + { + "height": 4, + "width": 24, + "y": 46, + "x": 0, + "type": "metric", + "properties": { + "region": "${region}", + "metrics": [ + [ "AWS/DynamoDB", "ConsumedReadCapacityUnits", "TableName", "${environment}-ActorCodes", { "stat": "Sum", "id": "m0r0" } ], + [ "...", "${environment}-ActorUsers", { "stat": "Sum", "id": "m0r1" } ], + [ "...", "${environment}-UserLpaActorMap", { "stat": "Sum", "id": "m0r2" } ], + [ "...", "${environment}-ViewerActivity", { "stat": "Sum", "id": "m0r3" } ], + [ "...", "${environment}-ViewerCodes", { "stat": "Sum", "id": "m0r4" } ] + ], + "title": "Consumed Read Capacity Units Sum", + "legend": { + "position": "right" + }, + "copilot": true + } + }, + { + "height": 3, + "width": 24, + "y": 50, + "x": 0, + "type": "metric", + "properties": { + "region": "${region}", + "metrics": [ + [ "AWS/DynamoDB", "ConsumedWriteCapacityUnits", "TableName", "${environment}-ActorCodes", { "stat": "Sum", "id": "m0r0" } ], + [ "...", "${environment}-ActorUsers", { "stat": "Sum", "id": "m0r1" } ], + [ "...", "${environment}-UserLpaActorMap", { "stat": "Sum", "id": "m0r2" } ], + [ "...", "${environment}-ViewerActivity", { "stat": "Sum", "id": "m0r3" } ], + [ "...", "${environment}-ViewerCodes", { "stat": "Sum", "id": "m0r4" } ] + ], + "title": "Consumed Write Capacity Units Sum", + "legend": { + "position": "right" + }, + "copilot": true + } + }, + { + "height": 3, + "width": 24, + "y": 0, + "x": 0, + "type": "metric", + "properties": { + "metrics": [ + [ "${environment}_events", "account_created_event", { "label": "account created" } ], + [ ".", "account_activated_event", { "label": "account activated" } ], + [ ".", "account_deleted_event", { "label": "account deleted" } ] + ], + "view": "singleValue", + "region": "${region}", + "stat": "Sum", + "period": 2592000, + "title": "Account Events" + } + }, + { + "height": 3, + "width": 24, + "y": 9, + "x": 0, + "type": "metric", + "properties": { + "metrics": [ + [ "${environment}_events", "older_lpa_success_event", { "label": "older LPA journey success" } ], + [ ".", "older_lpa_found_event", { "label": "LPA found" } ], + [ ".", "older_lpa_already_added_event", { "label": "LPA already added" } ], + [ ".", "older_lpa_does_not_match_event", { "label": "LPA does not match" } ], + [ ".", "older_lpa_has_activation_key_event", { "label": "LPA has activation key" } ], + [ ".", "older_lpa_key_already_requested_event", { "label": "Activation Key Already Requested" } ], + [ ".", "older_lpa_not_eligible_event", { "label": "LPA not eligible" } ], + [ ".", "older_lpa_not_found_event", { "label": "LPA not found" } ], + [ ".", "older_lpa_too_old_event", { "label": "LPA too old" } ], + [ ".", "older_lpa_invalid_status_event", { "label": "LPA status not valid" } ], + [ ".", "older_lpa_force_activation_key_event", { "label": "user forced request for key" } ] + ], + "view": "singleValue", + "stacked": false, + "region": "${region}", + "stat": "Sum", + "period": 2592000, + "title": "Older Journey Outcomes" + } + }, + { + "height": 3, + "width": 24, + "y": 3, + "x": 0, + "type": "metric", + "properties": { + "metrics": [ + [ "${environment}_events", "view_lpa_share_code_success_event", { "label": "lpa viewed (share code found)" } ], + [ ".", "download_summary_event", { "label": "LPA summary downloaded" } ], + [ ".", "view_lpa_share_code_cancelled_event", { "label": "share code was cancelled" } ], + [ ".", "view_lpa_share_code_expired_event", { "label": "share code was expired" } ], + [ ".", "view_lpa_share_code_not_found_event", { "label": "share code not found" } ], + [ ".", "share_code_not_found_event", { "label": "share code not found (legacy)" } ] + ], + "view": "singleValue", + "region": "${region}", + "title": "View Events", + "period": 2592000, + "stat": "Sum" + } + }, + { + "height": 3, + "width": 24, + "y": 6, + "x": 0, + "type": "metric", + "properties": { + "metrics": [ + [ "${environment}_events", "add_lpa_found_event", { "label": "LPA found" } ], + [ ".", "add_lpa_success_event", { "label": "add LPA success" } ], + [ ".", "add_lpa_already_added_event", { "label": "LPA already added" } ], + [ ".", "add_lpa_not_eligible_event", { "label": "LPA not eligible" } ], + [ ".", "add_lpa_not_found_event", { "label": "LPA not found" } ], + [ ".", "add_lpa_failure_event", { "label": "add LPA failure (legacy event)" } ] + ], + "view": "singleValue", + "title": "Add LPA Events", + "region": "${region}", + "stat": "Sum", + "period": 2592000 + } + }, + { + "height": 3, + "width": 24, + "y": 12, + "x": 0, + "type": "metric", + "properties": { + "metrics": [ + [ "AWS/WAFV2", "AllowedRequests", "WebACL", "${environment}-web-acl", "Region", "${region}", "Rule", "ALL", { "label": "Allowed" } ], + [ ".", "BlockedRequests", ".", ".", ".", ".", ".", ".", { "label": "Blocked" } ], + [ "...", "AWS-AWSManagedRulesKnownBadInputsRuleSet", { "label": "(Blocked) Bad Inputs" } ], + [ ".", "CountedRequests", ".", ".", ".", ".", ".", "ALL", { "visible": false } ], + [ ".", "BlockedRequests", ".", ".", ".", ".", ".", "AWS-AWSManagedRulesPHPRuleSet", { "label": "(Blocked) PHP Vulns" } ] + ], + "view": "singleValue", + "region": "${region}", + "stat": "Sum", + "period": 86400, + "title": "AWS WAF Actions (${environment} ALBs)" + } + }, + { + "height": 3, + "width": 24, + "y": 15, + "x": 0, + "type": "metric", + "properties": { + "stat": "Sum", + "view": "timeSeries", + "stacked": false, + "metrics": [ + [ "AWS/WAFV2", "AllowedRequests", "Region", "${region}", "Rule", "AWS-AWSManagedRulesPHPRuleSet", "WebACL", "${environment}-web-acl" ], + [ ".", "BlockedRequests", ".", ".", ".", ".", ".", "." ], + [ ".", "CountedRequests", ".", ".", ".", ".", ".", "." ], + [ ".", "CaptchaRequests", ".", ".", ".", ".", ".", "." ], + [ ".", "AllowedRequests", ".", ".", ".", "AWS-AWSManagedRulesKnownBadInputsRuleSet", ".", "." ], + [ ".", "BlockedRequests", ".", ".", ".", ".", ".", "." ], + [ ".", "CountedRequests", ".", ".", ".", ".", ".", "." ], + [ ".", "CaptchaRequests", ".", ".", ".", ".", ".", "." ], + [ ".", "AllowedRequests", ".", ".", ".", "ALL", ".", "." ], + [ ".", "BlockedRequests", ".", ".", ".", ".", ".", "." ], + [ ".", "CountedRequests", ".", ".", ".", ".", ".", "." ], + [ ".", "CaptchaRequests", ".", ".", ".", ".", ".", "." ], + [ ".", "AllowedRequests", ".", ".", ".", "${environment}-web-acl", ".", "." ] + ], + "region": "${region}", + "title": "WAF Actions Graphed", + "yAxis": { + "left": { + "showUnits": false + }, + "right": { + "showUnits": false + } + }, + "period": 300, + "legend": { + "position": "hidden" + } + } + } + ] +} +)} diff --git a/terraform/environment/region/terraform.tf b/terraform/environment/region/terraform.tf index af6d3094c3..78c1bb44e8 100644 --- a/terraform/environment/region/terraform.tf +++ b/terraform/environment/region/terraform.tf @@ -16,14 +16,4 @@ terraform { version = ">= 3.0.0" } } -} - -data "aws_vpc" "default" { - default = "true" - - provider = aws.region -} - -data "aws_region" "current" { - provider = aws.region -} +} \ No newline at end of file diff --git a/terraform/environment/region/actor_ecs.tf b/terraform/environment/region/use_ecs.tf similarity index 100% rename from terraform/environment/region/actor_ecs.tf rename to terraform/environment/region/use_ecs.tf diff --git a/terraform/environment/region/actor_load_balancer.tf b/terraform/environment/region/use_load_balancer.tf similarity index 100% rename from terraform/environment/region/actor_load_balancer.tf rename to terraform/environment/region/use_load_balancer.tf diff --git a/terraform/environment/region/variables.tf b/terraform/environment/region/variables.tf index 4e0ba1bf5a..71349f112d 100644 --- a/terraform/environment/region/variables.tf +++ b/terraform/environment/region/variables.tf @@ -53,6 +53,12 @@ variable "cookie_expires_view" { type = number } +variable "create_dashboard" { + description = "Whether or not to create the CloudWatch dashboard." + type = bool + default = false +} + variable "dns_namespace_env" { description = "The environment to use for the DNS namespace." type = string diff --git a/terraform/environment/terraform.tfvars.json b/terraform/environment/terraform.tfvars.json index 8f6caa230f..926a17ab18 100644 --- a/terraform/environment/terraform.tfvars.json +++ b/terraform/environment/terraform.tfvars.json @@ -23,6 +23,7 @@ }, "cookie_expires_use": 1440, "cookie_expires_view": 1440, + "create_dashboard": false, "google_analytics_id_use": "G-JQHJE49CBB", "google_analytics_id_view": "G-C790FLJLL7", "have_a_backup_plan": false, @@ -112,6 +113,7 @@ }, "cookie_expires_use": 1440, "cookie_expires_view": 1440, + "create_dashboard": false, "google_analytics_id_use": "G-JQHJE49CBB", "google_analytics_id_view": "G-C790FLJLL7", "have_a_backup_plan": false, @@ -201,6 +203,7 @@ }, "cookie_expires_use": 1440, "cookie_expires_view": 1440, + "create_dashboard": true, "google_analytics_id_use": "", "google_analytics_id_view": "", "have_a_backup_plan": false, @@ -290,6 +293,7 @@ }, "cookie_expires_use": 1440, "cookie_expires_view": 1440, + "create_dashboard": true, "google_analytics_id_use": "G-TX93T4G7SZ", "google_analytics_id_view": "G-J4S91NRVMJ", "have_a_backup_plan": true, diff --git a/terraform/environment/variables.tf b/terraform/environment/variables.tf index cf545ccfe0..cc38fba520 100644 --- a/terraform/environment/variables.tf +++ b/terraform/environment/variables.tf @@ -46,6 +46,7 @@ variable "environments" { }) cookie_expires_use = number cookie_expires_view = number + create_dashboard = bool google_analytics_id_use = string google_analytics_id_view = string have_a_backup_plan = bool