diff --git a/aws/common.tf b/aws/common.tf index 9b7561a..38ba48f 100644 --- a/aws/common.tf +++ b/aws/common.tf @@ -15,10 +15,6 @@ locals { create_kms = var.custom_kms_key && !try(length(var.kms_key) > 0, false) images = { - grok_compute = { - image = var.docker_grok_compute_image - tag = var.docker_grok_compute_tag == "latest" ? "${var.docker_grok_compute_tag}-${formatdate("YYYYMMDDhhmmss", timestamp())}" : var.docker_grok_compute_tag - }, jupyter_kernel_gateway = { image = var.docker_jkg_image tag = var.docker_jkg_tag == "latest" ? "${var.docker_jkg_tag}-${formatdate("YYYYMMDDhhmmss", timestamp())}" : var.docker_jkg_tag @@ -27,61 +23,18 @@ locals { image = var.docker_jn_image tag = var.docker_jn_tag == "latest" ? "${var.docker_jn_tag}-${formatdate("YYYYMMDDhhmmss", timestamp())}" : var.docker_jn_tag }, - h2o = { - image = var.docker_h2o_image - tag = var.docker_h2o_tag == "latest" ? "${var.docker_h2o_tag}-${formatdate("YYYYMMDDhhmmss", timestamp())}" : var.docker_h2o_tag - }, "ecs-searchdomain-sidecar-${var.name}-${var.environment}" = { image = "docker/ecs-searchdomain-sidecar" tag = "1.0" } } - targets = [ - { - name = "gc" - backend_protocol = "HTTP" - backend_port = 5005 - target_type = aws_ecs_task_definition.grok_compute.network_mode == "awsvpc" ? "ip" : "instance" - health_check = { - enabled = true - interval = 60 - unhealthy_threshold = 5 - path = "/grok_compute/info" - matcher = "200" - } - }, - { - name = "jkg" - backend_protocol = "HTTP" - backend_port = 8888 - target_type = aws_ecs_task_definition.jkg.network_mode == "awsvpc" ? "ip" : "instance" - health_check = { - enabled = true - interval = 60 - unhealthy_threshold = 5 - path = "/jupyter/api/swagger.yaml" - matcher = "200" - } - }, - { - name = "jkgH" - backend_protocol = "HTTP" - backend_port = 5005 - target_type = aws_ecs_task_definition.jkg.network_mode == "awsvpc" ? "ip" : "instance" - health_check = { - enabled = true - interval = 60 - unhealthy_threshold = 5 - path = "/jupyter/helper/info" - matcher = "200" - } - }, - { - name = "jn" - backend_protocol = "HTTP" - backend_port = 8889 - target_type = aws_ecs_task_definition.jn.network_mode == "awsvpc" ? "ip" : "instance" + targets = { + jn = { + create_attachment = false + backend_protocol = "HTTP" + backend_port = 8889 + target_type = aws_ecs_task_definition.jn.network_mode == "awsvpc" ? "ip" : "instance" health_check = { enabled = true interval = 60 @@ -89,12 +42,14 @@ locals { path = "/notebook/api" matcher = "200" } + priority = 4 + conditions = [{ path_pattern = { values = ["/notebook/helper/*"] } }] }, - { - name = "jnH" - backend_protocol = "HTTP" - backend_port = 5005 - target_type = aws_ecs_task_definition.jn.network_mode == "awsvpc" ? "ip" : "instance" + jnH = { + create_attachment = false + backend_protocol = "HTTP" + backend_port = 5005 + target_type = aws_ecs_task_definition.jn.network_mode == "awsvpc" ? "ip" : "instance" health_check = { enabled = true interval = 60 @@ -102,34 +57,10 @@ locals { path = "/notebook/helper/info" matcher = "200" } - }, - { - name = "h2oH" - backend_protocol = "HTTP" - backend_port = 5005 - target_type = aws_ecs_task_definition.h2o.network_mode == "awsvpc" ? "ip" : "instance" - health_check = { - enabled = true - interval = 60 - unhealthy_threshold = 5 - path = "/helper/info" - matcher = "200" - } - }, - { - name = "h2o" - backend_protocol = "HTTP" - backend_port = 54321 - target_type = aws_ecs_task_definition.h2o.network_mode == "awsvpc" ? "ip" : "instance" - health_check = { - enabled = true - interval = 60 - unhealthy_threshold = 5 - path = "/3/About" - matcher = "200" - } + priority = 5 + conditions = [{ path_pattern = { values = ["/notebook/*"] } }] } - ] + } } data "aws_availability_zones" "available" { diff --git a/aws/ecs.tf b/aws/ecs.tf index a2ec2ce..b380c7d 100644 --- a/aws/ecs.tf +++ b/aws/ecs.tf @@ -48,28 +48,6 @@ module "ecs" { tags = local.tags } -# TODO: check AWS principal (autoscaling group) for ecs -#resource "aws_secretsmanager_secret_policy" "docker_hub" { -# count = try(length(var.docker_hub_secret_arn) > 0, false) ? 0 : 1 -# secret_arn = aws_secretsmanager_secret.docker_hub[0].arn -# -# policy = < 0, false) ? 0 : 1 -# aws_service_name = "ecs.amazonaws.com" -#} - -resource "aws_ecs_service" "grok_compute" { - name = "${local.ecs_name}_grok_compute" - cluster = module.ecs.cluster_arn - task_definition = aws_ecs_task_definition.grok_compute.arn - launch_type = var.ecs_launch_type - - desired_count = 1 - deployment_maximum_percent = 200 - deployment_minimum_healthy_percent = 100 - scheduling_strategy = "REPLICA" - deployment_controller { - type = "ECS" - } - enable_execute_command = true - force_new_deployment = true - - # iam_role = aws_ecs_task_definition.grok_compute.network_mode == "awsvpc" ? null : try(length(var.iam_service_linked_role) > 0, false) ? var.iam_service_linked_role : aws_iam_service_linked_role.service[0].arn - - dynamic "service_registries" { - for_each = var.ecs_launch_type == "FARGATE" ? [ - { - registry_arn : aws_service_discovery_service.grok_compute[0].arn - } - ] : [] - content { - registry_arn = service_registries.value["registry_arn"] - } - } - - load_balancer { - target_group_arn = module.lb_ext.target_group_arns[0] - container_name = "grok_compute" - container_port = 5005 - } - load_balancer { - target_group_arn = module.lb_int.target_group_arns[0] - container_name = "grok_compute" - container_port = 5005 - } - dynamic "network_configuration" { - for_each = var.ecs_launch_type == "FARGATE" ? [ - { - subnets : try(module.vpc[0].private_subnets, var.private_subnet_ids) - security_groups : [module.sg.security_group_id] - } - ] : [] - content { - subnets = network_configuration.value["subnets"] - security_groups = network_configuration.value["security_groups"] - assign_public_ip = false - } - } -} resource "aws_ecs_service" "jkg" { name = "${local.ecs_name}_jupyter_kernel_gateway" cluster = module.ecs.cluster_arn @@ -825,8 +484,6 @@ resource "aws_ecs_service" "jkg" { enable_execute_command = true force_new_deployment = true - # iam_role = aws_ecs_task_definition.jkg.network_mode == "awsvpc" ? null : try(length(var.iam_service_linked_role) > 0, false) ? var.iam_service_linked_role : aws_iam_service_linked_role.service[0].arn - dynamic "service_registries" { for_each = var.ecs_launch_type == "FARGATE" ? [ { @@ -838,27 +495,6 @@ resource "aws_ecs_service" "jkg" { } } - load_balancer { - target_group_arn = module.lb_ext.target_group_arns[1] - container_name = "jupyter_kernel_gateway" - container_port = 8888 - } - load_balancer { - target_group_arn = module.lb_ext.target_group_arns[2] - container_name = "jupyter_kernel_gateway" - container_port = 5005 - } - load_balancer { - target_group_arn = module.lb_int.target_group_arns[1] - container_name = "jupyter_kernel_gateway" - container_port = 8888 - } - load_balancer { - target_group_arn = module.lb_int.target_group_arns[2] - container_name = "jupyter_kernel_gateway" - container_port = 5005 - } - dynamic "network_configuration" { for_each = var.ecs_launch_type == "FARGATE" ? [ { @@ -889,8 +525,6 @@ resource "aws_ecs_service" "jn" { enable_execute_command = true force_new_deployment = true - # iam_role = aws_ecs_task_definition.jn.network_mode == "awsvpc" ? null : try(length(var.iam_service_linked_role) > 0, false) ? var.iam_service_linked_role : aws_iam_service_linked_role.service[0].arn - dynamic "service_registries" { for_each = var.ecs_launch_type == "FARGATE" ? [ { @@ -903,22 +537,22 @@ resource "aws_ecs_service" "jn" { } load_balancer { - target_group_arn = module.lb_ext.target_group_arns[3] + target_group_arn = module.lb_ext.target_groups["jn"].arn container_name = "jupyter_notebook" container_port = 8889 } load_balancer { - target_group_arn = module.lb_ext.target_group_arns[4] + target_group_arn = module.lb_ext.target_groups["jnH"].arn container_name = "jupyter_notebook" container_port = 5005 } load_balancer { - target_group_arn = module.lb_int.target_group_arns[3] + target_group_arn = module.lb_int.target_groups["jn"].arn container_name = "jupyter_notebook" container_port = 8889 } load_balancer { - target_group_arn = module.lb_int.target_group_arns[4] + target_group_arn = module.lb_int.target_groups["jnH"].arn container_name = "jupyter_notebook" container_port = 5005 } @@ -937,70 +571,6 @@ resource "aws_ecs_service" "jn" { } } } -resource "aws_ecs_service" "h2o" { - name = "${local.ecs_name}_h2o" - cluster = module.ecs.cluster_arn - task_definition = aws_ecs_task_definition.h2o.arn - launch_type = var.ecs_launch_type - - desired_count = 1 - deployment_maximum_percent = var.ecs_launch_type == "FARGATE" ? 200 : 100 - deployment_minimum_healthy_percent = var.ecs_launch_type == "FARGATE" ? 100 : 0 - scheduling_strategy = "REPLICA" - deployment_controller { - type = "ECS" - } - enable_execute_command = true - force_new_deployment = true - - # iam_role = aws_ecs_task_definition.h2o.network_mode == "awsvpc" ? null : try(length(var.iam_service_linked_role) > 0, false) ? var.iam_service_linked_role : aws_iam_service_linked_role.service[0].arn - - dynamic "service_registries" { - for_each = var.ecs_launch_type == "FARGATE" ? [ - { - registry_arn : aws_service_discovery_service.h2o[0].arn - } - ] : [] - content { - registry_arn = service_registries.value["registry_arn"] - } - } - - load_balancer { - target_group_arn = module.lb_ext.target_group_arns[5] - container_name = "h2o" - container_port = 5005 - } - load_balancer { - target_group_arn = module.lb_ext.target_group_arns[6] - container_name = "h2o" - container_port = 54321 - } - load_balancer { - target_group_arn = module.lb_int.target_group_arns[5] - container_name = "h2o" - container_port = 5005 - } - load_balancer { - target_group_arn = module.lb_int.target_group_arns[6] - container_name = "h2o" - container_port = 54321 - } - - dynamic "network_configuration" { - for_each = var.ecs_launch_type == "FARGATE" ? [ - { - subnets : try(module.vpc[0].private_subnets, var.private_subnet_ids) - security_groups : [module.sg.security_group_id] - } - ] : [] - content { - subnets = network_configuration.value["subnets"] - security_groups = network_configuration.value["security_groups"] - assign_public_ip = false - } - } -} data "aws_ami" "aws_optimized_ecs" { count = !try(length(var.ami_id) > 0, false) && var.ecs_launch_type == "EC2" ? 1 : 0 most_recent = true diff --git a/aws/lb.tf b/aws/lb.tf index 26fc12b..5cea574 100644 --- a/aws/lb.tf +++ b/aws/lb.tf @@ -1,111 +1,13 @@ resource "random_string" "lb_id" { - for_each = { - for target in local.targets : - target.name => target - } - length = 2 - special = false - keepers = { target_type = each.value["target_type"] } -} - -module "lb_ext_sg" { - source = "registry.terraform.io/terraform-aws-modules/security-group/aws" - version = "~> 4.12.0" - - name = "${local.lb_name}-lb-ext" - description = "${local.lb_name}-lb-ext Datagrok LB Security Group" - vpc_id = try(module.vpc[0].vpc_id, var.vpc_id) + for_each = local.targets + length = 2 + special = false + keepers = { + target_type = each.value["target_type"] - egress_with_source_security_group_id = [ - { - from_port = 0 - to_port = 65535 - protocol = "tcp" - description = "Datagrok egress rules from LB to ECS" - source_security_group_id = module.sg.security_group_id - }, - ] - - ingress_with_cidr_blocks = [ - { - from_port = 80 - to_port = 80 - protocol = "tcp" - description = "Access to HTTP" - cidr_blocks = var.lb_access_cidr_blocks - }, - { - from_port = 443 - to_port = 443 - protocol = "tcp" - description = "Access to HTTPS" - cidr_blocks = var.lb_access_cidr_blocks - }, - { - from_port = 54321 - to_port = 54321 - protocol = "tcp" - description = "Access to h20" - cidr_blocks = var.lb_access_cidr_blocks - }, - { - from_port = 5005 - to_port = 5005 - protocol = "tcp" - description = "Access to h20h" - cidr_blocks = var.lb_access_cidr_blocks - }, - ] + } } -module "lb_int_sg" { - source = "registry.terraform.io/terraform-aws-modules/security-group/aws" - version = "~> 4.12.0" - - name = "${local.lb_name}-lb-int" - description = "${local.lb_name}-lb-int Datagrok LB Security Group" - vpc_id = try(module.vpc[0].vpc_id, var.vpc_id) - - egress_with_source_security_group_id = [ - { - from_port = 0 - to_port = 65535 - protocol = "tcp" - description = "Datagrok egress rules from LB to ECS" - source_security_group_id = module.sg.security_group_id - }, - ] - ingress_with_cidr_blocks = [ - { - from_port = 80 - to_port = 80 - protocol = "tcp" - description = "Access to HTTP" - cidr_blocks = try(module.vpc[0].vpc_cidr_block, var.cidr) - }, - { - from_port = 54321 - to_port = 54321 - protocol = "tcp" - description = "Access to h20" - cidr_blocks = try(module.vpc[0].vpc_cidr_block, var.cidr) - }, - { - from_port = 5005 - to_port = 5005 - protocol = "tcp" - description = "Access to h20h" - cidr_blocks = try(module.vpc[0].vpc_cidr_block, var.cidr) - }, - { - from_port = 8090 - to_port = 8090 - protocol = "tcp" - description = "Access Datagrok to CVM" - cidr_blocks = try(module.vpc[0].vpc_cidr_block, var.cidr) - }, - ] -} data "aws_route53_zone" "external" { count = !var.create_route53_external_zone && var.route53_enabled ? 1 : 0 name = var.domain_name @@ -144,267 +46,163 @@ module "acm" { } module "lb_ext" { source = "registry.terraform.io/terraform-aws-modules/alb/aws" - version = "~> 6.10.0" + version = "~> 9.10.0" name = "${local.lb_name}-ext" load_balancer_type = "application" vpc_id = try(module.vpc[0].vpc_id, var.vpc_id) subnets = try(module.vpc[0].public_subnets, var.public_subnet_ids) - security_groups = [module.lb_ext_sg.security_group_id] drop_invalid_header_fields = true idle_timeout = 1200 + security_group_ingress_rules = { + all_http = { + from_port = 80 + to_port = 80 + protocol = "tcp" + description = "Access to HTTP" + cidr_ipv4 = var.lb_access_cidr_blocks + }, + all_https = { + from_port = 443 + to_port = 443 + protocol = "tcp" + description = "Access to HTTPS" + cidr_ipv4 = var.lb_access_cidr_blocks + } + } + security_group_egress_rules = { + cvm = { + from_port = 0 + to_port = 65535 + protocol = "tcp" + description = "CVM egress rules from LB to ECS" + referenced_security_group_id = module.sg.security_group_id + } + } + access_logs = var.bucket_logging.enabled ? { bucket = var.bucket_logging.create_log_bucket ? module.log_bucket.s3_bucket_id : var.bucket_logging.log_bucket prefix = "lb" enabled = true } : { bucket = "", enabled = false } - target_groups = [for target in local.targets : merge(target, { name = "${local.lb_name}-ext-${target["name"]}${random_string.lb_id[target["name"]].result}" })] + target_groups = { + for key, value in local.targets : + key => + merge(value, { name = "${local.lb_name}-ext-${key}-${random_string.lb_id[key].result}" }) + } - https_listeners = [ - { + listeners = { + http-https-redirect = { + action_type = "redirect" + port = 80 + protocol = "HTTP" + redirect = { + port = 443 + protocol = "HTTPS" + status_code = "HTTP_301" + } + } + cvm = { port = 443 protocol = "HTTPS" certificate_arn = try(module.acm[0].acm_certificate_arn, var.acm_cert_arn) action_type = "fixed-response" fixed_response = { status_code = 204 - message_body = "No content. Try other endpoints for the URL: /jupyter, /notebook, /grok_compute." + message_body = "No content. Try other endpoints for the URL: /jupyter, /notebook" content_type = "text/plain" } - }, - { - port = 5005 - protocol = "HTTPS" - certificate_arn = try(module.acm[0].acm_certificate_arn, var.acm_cert_arn) - target_group_index = 5 - }, - { - port = 54321 - protocol = "HTTPS" - certificate_arn = try(module.acm[0].acm_certificate_arn, var.acm_cert_arn) - target_group_index = 6 - } - ] - - http_tcp_listeners = [ - { - action_type = "redirect" - port = 80 - protocol = "HTTP" - redirect = { - port = 443 - protocol = "HTTPS" - status_code = "HTTP_301" + rules = { + for key, value in local.targets : + key => { priority = value.priority, actions = [{ type = "forward", target_group_key = key }], conditions = value.conditions } } } - ] - - https_listener_rules = [ - { - https_listener_index = 0 - priority = 1 - actions = [ - { - type = "forward" - target_group_index = 0 - } - ] - conditions = [ - { - path_patterns = ["/grok_compute/*"] - } - ] - }, - { - https_listener_index = 0 - priority = 2 - actions = [ - { - type = "forward" - target_group_index = 2 - } - ] - conditions = [ - { - path_patterns = ["/jupyter/helper/*"] - } - ] - }, - { - https_listener_index = 0 - priority = 3 - actions = [ - { - type = "forward" - target_group_index = 1 - } - ] - conditions = [ - { - path_patterns = ["/jupyter/*"] - } - ] - }, - { - https_listener_index = 0 - priority = 4 - actions = [ - { - type = "forward" - target_group_index = 4 - } - ] - conditions = [ - { - path_patterns = ["/notebook/helper/*"] - } - ] - }, - { - https_listener_index = 0 - priority = 5 - actions = [ - { - type = "forward" - target_group_index = 3 - } - ] - conditions = [ - { - path_patterns = ["/notebook/*"] - } - ] - }, - ] + } tags = local.tags } + module "lb_int" { source = "registry.terraform.io/terraform-aws-modules/alb/aws" - version = "~> 6.10.0" + version = "~> 9.10.0" name = "${local.lb_name}-int" load_balancer_type = "application" internal = true vpc_id = try(module.vpc[0].vpc_id, var.vpc_id) subnets = try(module.vpc[0].private_subnets, var.private_subnet_ids) - security_groups = [module.lb_int_sg.security_group_id] drop_invalid_header_fields = true idle_timeout = 1200 + security_group_ingress_rules = { + vpc_http = { + from_port = 80 + to_port = 80 + protocol = "tcp" + description = "Access to HTTP" + cidr_ipv4 = try(module.vpc[0].vpc_cidr_block, var.cidr) + }, + vpc_cvm = { + from_port = 8090 + to_port = 8090 + protocol = "tcp" + description = "Access Datagrok to CVM" + cidr_ipv4 = try(module.vpc[0].vpc_cidr_block, var.cidr) + } + } + security_group_egress_rules = { + cvm = { + from_port = 0 + to_port = 65535 + protocol = "tcp" + description = "CVM egress rules from LB to ECS" + referenced_security_group_id = module.sg.security_group_id + }, + } + access_logs = var.bucket_logging.enabled ? { bucket = var.bucket_logging.create_log_bucket ? module.log_bucket.s3_bucket_id : var.bucket_logging.log_bucket prefix = "lb" enabled = true } : { bucket = "", enabled = false } - target_groups = [for target in local.targets : merge(target, { name = "${local.lb_name}-int-${target["name"]}${random_string.lb_id[target["name"]].result}" })] + target_groups = { + for key, value in local.targets : + key => + merge(value, { name = "${local.lb_name}-int-${key}-${random_string.lb_id[key].result}" }) + } - http_tcp_listeners = [ - { + listeners = { + http-https-redirect = { action_type = "redirect" port = 80 protocol = "HTTP" - action_type = "fixed-response" + redirect = { + port = 443 + protocol = "HTTPS" + status_code = "HTTP_301" + } + } + cvm = { + port = 443 + protocol = "HTTPS" + certificate_arn = try(module.acm[0].acm_certificate_arn, var.acm_cert_arn) + action_type = "fixed-response" fixed_response = { status_code = 204 - message_body = "No content. Try other endpoints for the URL: /jupyter, /notebook, /grok_compute." + message_body = "No content. Try other endpoints for the URL: /jupyter, /notebook" content_type = "text/plain" } - }, - { - port = 5005 - protocol = "HTTP" - target_group_index = 5 - }, - { - port = 54321 - protocol = "HTTP" - target_group_index = 6 + rules = { + for key, value in local.targets : + key => { priority = value.priority, actions = [{ type = "forward", target_group_key = key }], conditions = value.conditions } + } } - ] - - http_tcp_listener_rules = [ - { - http_tcp_listener_index = 0 - priority = 1 - actions = [ - { - type = "forward" - target_group_index = 0 - } - ] - conditions = [ - { - path_patterns = ["/grok_compute/*"] - } - ] - }, - { - http_tcp_listener_index = 0 - priority = 2 - actions = [ - { - type = "forward" - target_group_index = 2 - } - ] - conditions = [ - { - path_patterns = ["/jupyter/helper/*"] - } - ] - }, - { - http_tcp_listener_index = 0 - priority = 3 - actions = [ - { - type = "forward" - target_group_index = 1 - } - ] - conditions = [ - { - path_patterns = ["/jupyter/*"] - } - ] - }, - { - http_tcp_listener_index = 0 - priority = 4 - actions = [ - { - type = "forward" - target_group_index = 4 - } - ] - conditions = [ - { - path_patterns = ["/notebook/helper/*"] - } - ] - }, - { - http_tcp_listener_index = 0 - priority = 5 - actions = [ - { - type = "forward" - target_group_index = 3 - } - ] - conditions = [ - { - path_patterns = ["/notebook/*"] - } - ] - }, - ] + } tags = local.tags } @@ -459,8 +257,8 @@ resource "aws_route53_record" "external" { name = local.r53_record type = "A" alias { - name = module.lb_ext.lb_dns_name - zone_id = module.lb_ext.lb_zone_id + name = module.lb_ext.dns_name + zone_id = module.lb_ext.zone_id evaluate_target_health = true } } @@ -477,18 +275,8 @@ resource "aws_route53_record" "internal" { name = "cvm.datagrok.${var.name}.${var.environment}.internal" type = "A" alias { - name = module.lb_int.lb_dns_name - zone_id = module.lb_int.lb_zone_id - evaluate_target_health = true - } -} -resource "aws_route53_record" "h2o" { - zone_id = var.create_route53_internal_zone ? aws_route53_zone.internal[0].id : var.route53_internal_zone - name = "h2o.datagrok.${var.name}.${var.environment}.internal" - type = "A" - alias { - name = module.lb_int.lb_dns_name - zone_id = module.lb_int.lb_zone_id + name = module.lb_int.dns_name + zone_id = module.lb_int.zone_id evaluate_target_health = true } } diff --git a/aws/monitoring.tf b/aws/monitoring.tf index 941534b..72b7fbf 100644 --- a/aws/monitoring.tf +++ b/aws/monitoring.tf @@ -47,64 +47,6 @@ module "notify_slack" { slack_emoji = var.monitoring.slack_emoji } -resource "aws_cloudwatch_metric_alarm" "grok_compute_task_count" { - count = var.monitoring.alarms_enabled && var.ecs_cluster_insights ? 1 : 0 - alarm_name = "${local.ecs_name}-grok_compute-task-count" - comparison_operator = "LessThanThreshold" - threshold = "1" - evaluation_periods = "2" - treat_missing_data = "ignore" - alarm_description = "This metric monitors ${local.ecs_name} grok_compute ECS tasks count" - alarm_actions = compact([ - var.monitoring.slack_alerts ? - module.notify_slack.slack_topic_arn : - "", - var.monitoring.email_alerts ? - module.sns_topic.sns_topic_arn : - "", - !var.monitoring.create_sns_topic ? - var.monitoring.sns_topic_arn : - "" - ]) - tags = local.tags - - metric_query { - id = "expression" - expression = "IF(desired > running, 0, 1)" - label = "Task Failures" - return_data = "true" - } - - metric_query { - id = "desired" - - metric { - metric_name = "DesiredTaskCount" - namespace = "ECS/ContainerInsights" - period = "60" - stat = "Average" - dimensions = { - ClusterName = module.ecs.cluster_name - ServiceName = aws_ecs_service.grok_compute.name - } - } - } - - metric_query { - id = "running" - - metric { - metric_name = "RunningTaskCount" - namespace = "ECS/ContainerInsights" - period = "60" - stat = "Average" - dimensions = { - ClusterName = module.ecs.cluster_name - ServiceName = aws_ecs_service.grok_compute.name - } - } - } -} resource "aws_cloudwatch_metric_alarm" "jkg_task_count" { count = var.monitoring.alarms_enabled && var.ecs_cluster_insights ? 1 : 0 alarm_name = "${local.ecs_name}-jkg-task-count" @@ -221,64 +163,6 @@ resource "aws_cloudwatch_metric_alarm" "jn_task_count" { } } } -resource "aws_cloudwatch_metric_alarm" "h2o_task_count" { - count = var.monitoring.alarms_enabled && var.ecs_cluster_insights ? 1 : 0 - alarm_name = "${local.ecs_name}-h2o-task-count" - comparison_operator = "LessThanThreshold" - threshold = "1" - evaluation_periods = "2" - treat_missing_data = "ignore" - alarm_description = "This metric monitors ${local.ecs_name} H2O ECS tasks count" - alarm_actions = compact([ - var.monitoring.slack_alerts ? - module.notify_slack.slack_topic_arn : - "", - var.monitoring.email_alerts ? - module.sns_topic.sns_topic_arn : - "", - !var.monitoring.create_sns_topic ? - var.monitoring.sns_topic_arn : - "" - ]) - tags = local.tags - - metric_query { - id = "expression" - expression = "IF(desired > running, 0, 1)" - label = "Task Failures" - return_data = "true" - } - - metric_query { - id = "desired" - - metric { - metric_name = "DesiredTaskCount" - namespace = "ECS/ContainerInsights" - period = "60" - stat = "Average" - dimensions = { - ClusterName = module.ecs.cluster_name - ServiceName = aws_ecs_service.h2o.name - } - } - } - - metric_query { - id = "running" - - metric { - metric_name = "RunningTaskCount" - namespace = "ECS/ContainerInsights" - period = "60" - stat = "Average" - dimensions = { - ClusterName = module.ecs.cluster_name - ServiceName = aws_ecs_service.h2o.name - } - } - } -} resource "aws_cloudwatch_metric_alarm" "instance_count" { count = var.monitoring.alarms_enabled && var.ecs_cluster_insights && var.ecs_launch_type == "EC2" ? 1 : 0 @@ -370,11 +254,11 @@ resource "aws_cloudwatch_metric_alarm" "high_ram" { } resource "aws_cloudwatch_metric_alarm" "lb_target" { - count = var.monitoring.alarms_enabled ? length(local.targets) : 0 - alarm_name = "datagrok-lb-target-${module.lb_ext.target_group_names[count.index]}" + for_each = var.monitoring.alarms_enabled ? local.targets : {} + alarm_name = "datagrok-lb-target-${module.lb_ext.target_groups[each.key].name}" comparison_operator = "LessThanThreshold" threshold = "1" - alarm_description = "${local.ecs_name} external ALB target group ${module.lb_ext.target_group_names[count.index]} registered targets alarms" + alarm_description = "${local.ecs_name} external ALB target group ${module.lb_ext.target_groups[each.key].name} registered targets alarms" treat_missing_data = "ignore" period = "60" evaluation_periods = "1" @@ -383,8 +267,8 @@ resource "aws_cloudwatch_metric_alarm" "lb_target" { metric_name = "HealthyHostCount" namespace = "AWS/ApplicationELB" dimensions = { - TargetGroup = module.lb_ext.target_group_arn_suffixes[count.index] - LoadBalancer = module.lb_ext.lb_arn_suffix + TargetGroup = module.lb_ext.target_groups[each.key].arn_suffix + LoadBalancer = module.lb_ext.arn_suffix } alarm_actions = compact([ var.monitoring.slack_alerts ? @@ -414,7 +298,7 @@ resource "aws_cloudwatch_metric_alarm" "datagrok_lb_5xx_count" { datapoints_to_alarm = 1 treat_missing_data = "notBreaching" dimensions = { - "LoadBalancer" = module.lb_ext.lb_arn_suffix + "LoadBalancer" = module.lb_ext.arn_suffix } alarm_actions = compact([ var.monitoring.slack_alerts ? @@ -431,11 +315,11 @@ resource "aws_cloudwatch_metric_alarm" "datagrok_lb_5xx_count" { } resource "aws_cloudwatch_metric_alarm" "lb_target_5xx_count" { - count = var.monitoring.alarms_enabled ? length(local.targets) : 0 - alarm_name = "datagrok-lb-target-${module.lb_ext.target_group_names[count.index]}-5xx" + for_each = var.monitoring.alarms_enabled ? local.targets : {} + alarm_name = "datagrok-lb-target-${module.lb_ext.target_groups[each.key].name}-5xx" comparison_operator = "GreaterThanThreshold" threshold = "0" - alarm_description = "${local.ecs_name} external ALB target group ${module.lb_ext.target_group_names[count.index]} registered 5XX errors" + alarm_description = "${local.ecs_name} external ALB target group ${module.lb_ext.target_groups[each.key].name} registered 5XX errors" treat_missing_data = "notBreaching" period = "120" evaluation_periods = "1" @@ -444,8 +328,8 @@ resource "aws_cloudwatch_metric_alarm" "lb_target_5xx_count" { metric_name = "HTTPCode_Target_5XX_Count" namespace = "AWS/ApplicationELB" dimensions = { - TargetGroup = module.lb_ext.target_group_arn_suffixes[count.index] - LoadBalancer = module.lb_ext.lb_arn_suffix + TargetGroup = module.lb_ext.target_groups[each.key].arn_suffix + LoadBalancer = module.lb_ext.arn_suffix } alarm_actions = compact([ var.monitoring.slack_alerts ? diff --git a/aws/outputs.tf b/aws/outputs.tf index b018149..9ba0fd3 100644 --- a/aws/outputs.tf +++ b/aws/outputs.tf @@ -130,10 +130,10 @@ output "route53_external_cloudwatch_log_group_arn" { output "alb_external_arn" { description = "The ARN of the external Application Load balancer" - value = module.lb_ext.lb_arn + value = module.lb_ext.arn } output "alb_internal_arn" { description = "The ARN of the external Application Load balancer" - value = module.lb_int.lb_arn + value = module.lb_int.arn } diff --git a/aws/variables.tf b/aws/variables.tf index f0afa5d..623a853 100644 --- a/aws/variables.tf +++ b/aws/variables.tf @@ -376,20 +376,6 @@ variable "cloudwatch_log_group_arn" { description = "The ARM of existing CloudWatch Log Group to use with Datagrok." } -variable "docker_grok_compute_image" { - type = string - default = "docker.io/datagrok/grok_compute" - nullable = false - description = "Grok Compute Docker Image registry location. By default the official image from Docker Hub will be used." -} - -variable "docker_grok_compute_tag" { - type = string - default = "latest" - nullable = false - description = "Tag from Docker registry for Grok Compute Docker Image" -} - variable "docker_jkg_image" { type = string default = "docker.io/datagrok/jupyter_kernel_gateway" @@ -418,20 +404,6 @@ variable "docker_jn_tag" { description = "Tag from Docker registry for Jupyter Notebook Docker Image" } -variable "docker_h2o_image" { - type = string - default = "docker.io/datagrok/h2o" - nullable = false - description = "H2O Docker Image registry location. By default the official image from Docker Hub will be used." -} - -variable "docker_h2o_tag" { - type = string - default = "latest" - nullable = false - description = "Tag from Docker registry for H2O Docker Image" -} - variable "create_cloudwatch_log_group" { type = bool default = true @@ -486,34 +458,6 @@ variable "enable_route53_logging" { description = "Specifies whether Logging requests using server access logging for Datagrok Route53 zone are enabled. We recommend to set it to true for production stand." } -variable "grok_compute_container_memory_reservation" { - type = number - default = 512 - nullable = false - description = "The soft limit (in MiB) of memory to reserve for the Grok Compute container." -} - -variable "grok_compute_container_cpu" { - type = number - default = 256 - nullable = false - description = "The number of cpu units the Amazon ECS container agent reserves for the Grok Compute container." -} - -variable "grok_compute_memory" { - type = number - default = 2048 - nullable = false - description = "Amount (in MiB) of memory used by the Grok Compute FARGATE task. The hard limit of memory (in MiB) to present to the task." -} - -variable "grok_compute_cpu" { - type = number - default = 1024 - nullable = false - description = "Number of cpu units used by the Grok Compute FARGATE task. The hard limit of CPU units to present for the task." -} - variable "jkg_container_memory_reservation" { type = number default = 512 @@ -570,34 +514,6 @@ variable "jn_cpu" { description = "Number of cpu units used by the Jupyter Notebook FARGATE task. The hard limit of CPU units to present for the task." } -variable "h2o_container_memory_reservation" { - type = number - default = 512 - nullable = false - description = "The soft limit (in MiB) of memory to reserve for the H2O container." -} - -variable "h2o_container_cpu" { - type = number - default = 256 - nullable = false - description = "The number of cpu units the Amazon ECS container agent reserves for the H2O container." -} - -variable "h2o_memory" { - type = number - default = 2048 - nullable = false - description = "Amount (in MiB) of memory used by the H2O FARGATE task. The hard limit of memory (in MiB) to present to the task." -} - -variable "h2o_cpu" { - type = number - default = 512 - nullable = false - description = "Number of cpu units used by the H2O FARGATE task. The hard limit of CPU units to present for the task." -} - variable "bucket_logging" { type = object({ log_bucket = optional(string) @@ -670,18 +586,6 @@ variable "db_dg_password" { sensitive = true } -variable "s3_bucket_region" { - description = "The S3 Bucket region for a stand." - type = string - nullable = false -} - -variable "s3_bucket_name" { - description = "The S3 Bucket name of a stand." - type = string - nullable = false -} - variable "jkg_secret" { description = "The jupyter kernel gateway secret name" type = string