From 21cb6feed70ddc65567055ea7202e91d1b73269e Mon Sep 17 00:00:00 2001 From: Matt Moore Date: Wed, 20 Dec 2023 14:13:11 -0500 Subject: [PATCH] Start to add eventing dashboards. This creates sections for "topics" and "subscriptions" that filter on their respective prefixes. The topic section shows both topic metrics and subscription metrics for topics matching the prefix. The subscription section shows the subscription metrics for subscriptions matching the prefix. Building on this, the broker ingress now has a customized dashboard that takes advantage of the topic metrics to put them front-and-center. Also building on this, the event recorder now includes a section with subscription metrics for each even type. Signed-off-by: Matt Moore --- .github/workflows/documentation.yaml | 1 + cloudevent-broker/README.md | 8 +- cloudevent-broker/ingress.tf | 54 +++++++- cloudevent-recorder/README.md | 2 +- cloudevent-recorder/recorder.tf | 10 +- dashboard/README.md | 1 + dashboard/cloudevent-receiver/README.md | 99 ++++++++++++++ dashboard/cloudevent-receiver/dashboard.tf | 57 ++++++++ dashboard/cloudevent-receiver/variables.tf | 9 ++ dashboard/sections/subscription/main.tf | 88 +++++++++++++ dashboard/sections/topic/main.tf | 146 +++++++++++++++++++++ 11 files changed, 467 insertions(+), 8 deletions(-) create mode 100644 dashboard/cloudevent-receiver/README.md create mode 100644 dashboard/cloudevent-receiver/dashboard.tf create mode 100644 dashboard/cloudevent-receiver/variables.tf create mode 100644 dashboard/sections/subscription/main.tf create mode 100644 dashboard/sections/topic/main.tf diff --git a/.github/workflows/documentation.yaml b/.github/workflows/documentation.yaml index 844c13f4..f68fcf30 100644 --- a/.github/workflows/documentation.yaml +++ b/.github/workflows/documentation.yaml @@ -17,6 +17,7 @@ jobs: - networking - dashboard/service - dashboard/job + - dashboard/cloudevent-receiver steps: - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 diff --git a/cloudevent-broker/README.md b/cloudevent-broker/README.md index 2284a5f6..d2c370a0 100644 --- a/cloudevent-broker/README.md +++ b/cloudevent-broker/README.md @@ -90,7 +90,12 @@ No requirements. | Name | Source | Version | |------|--------|---------| -| [ingress-dashboard](#module\_ingress-dashboard) | ../dashboard/service | n/a | +| [http](#module\_http) | ../dashboard/sections/http | n/a | +| [layout](#module\_layout) | ../dashboard/sections/layout | n/a | +| [logs](#module\_logs) | ../dashboard/sections/logs | n/a | +| [resources](#module\_resources) | ../dashboard/sections/resources | n/a | +| [topic](#module\_topic) | ../dashboard/sections/topic | n/a | +| [width](#module\_width) | ../dashboard/sections/width | n/a | ## Resources @@ -98,6 +103,7 @@ No requirements. |------|------| | [cosign_sign.this](https://registry.terraform.io/providers/chainguard-dev/cosign/latest/docs/resources/sign) | resource | | [google_cloud_run_v2_service.this](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/cloud_run_v2_service) | resource | +| [google_monitoring_dashboard.dashboard](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/monitoring_dashboard) | resource | | [google_pubsub_topic.this](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/pubsub_topic) | resource | | [google_pubsub_topic_iam_binding.ingress-publishes-events](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/pubsub_topic_iam_binding) | resource | | [google_service_account.this](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/service_account) | resource | diff --git a/cloudevent-broker/ingress.tf b/cloudevent-broker/ingress.tf index 9d4978d4..9546ded2 100644 --- a/cloudevent-broker/ingress.tf +++ b/cloudevent-broker/ingress.tf @@ -81,7 +81,55 @@ resource "google_cloud_run_v2_service" "this" { } } -module "ingress-dashboard" { - source = "../dashboard/service" - service_name = var.name +module "topic" { + source = "../dashboard/sections/topic" + title = "Broker Events" + topic_prefix = var.name +} + +module "logs" { + source = "../dashboard/sections/logs" + title = "Service Logs" + filter = ["resource.type=\"cloud_run_revision\""] +} + +module "http" { + source = "../dashboard/sections/http" + title = "HTTP" + filter = ["resource.type=\"cloud_run_revision\""] +} + +module "resources" { + source = "../dashboard/sections/resources" + title = "Resources" + filter = ["resource.type=\"cloud_run_revision\""] +} + +module "width" { source = "../dashboard/sections/width" } + +module "layout" { + source = "../dashboard/sections/layout" + sections = [ + module.topic.section, + module.logs.section, + module.http.section, + module.resources.section, + ] +} + +resource "google_monitoring_dashboard" "dashboard" { + dashboard_json = jsonencode({ + displayName = "Cloud Events Broker Ingress: ${var.name}" + dashboardFilters = [{ + filterType = "RESOURCE_LABEL" + stringValue = var.name + labelKey = "service_name" + }] + + // https://cloud.google.com/monitoring/api/ref_v3/rest/v1/projects.dashboards#mosaiclayout + mosaicLayout = { + columns = module.width.size + tiles = module.layout.tiles, + } + }) } diff --git a/cloudevent-recorder/README.md b/cloudevent-recorder/README.md index 2c0a2d1f..bce32b78 100644 --- a/cloudevent-recorder/README.md +++ b/cloudevent-recorder/README.md @@ -62,7 +62,7 @@ No requirements. | Name | Source | Version | |------|--------|---------| -| [recorder-dashboard](#module\_recorder-dashboard) | ../dashboard/service | n/a | +| [recorder-dashboard](#module\_recorder-dashboard) | ../dashboard/cloudevent-receiver | n/a | | [triggers](#module\_triggers) | ../cloudevent-trigger | n/a | ## Resources diff --git a/cloudevent-recorder/recorder.tf b/cloudevent-recorder/recorder.tf index 6e35b0e2..2118da49 100644 --- a/cloudevent-recorder/recorder.tf +++ b/cloudevent-recorder/recorder.tf @@ -99,7 +99,7 @@ resource "google_cloud_run_v2_service" "recorder-service" { } resource "random_id" "trigger-suffix" { - for_each = local.regional-types + for_each = var.types byte_length = 2 } @@ -109,7 +109,7 @@ module "triggers" { source = "../cloudevent-trigger" - name = "${var.name}-${random_id.trigger-suffix[each.key].hex}" + name = "${var.name}-${random_id.trigger-suffix[each.value.type].hex}" project_id = var.project_id broker = var.broker[each.value.region] filter = { "type" : each.value.type } @@ -122,6 +122,10 @@ module "triggers" { } module "recorder-dashboard" { - source = "../dashboard/service" + source = "../dashboard/cloudevent-receiver" service_name = var.name + + triggers = { + for type in var.types : "type: ${each.key}" => "${var.name}-${random_id.trigger-suffix[each.value.type].hex}" + } } diff --git a/dashboard/README.md b/dashboard/README.md index 6bd32cde..8ce35d60 100644 --- a/dashboard/README.md +++ b/dashboard/README.md @@ -3,4 +3,5 @@ The modules in this directory define [`google_monitoring_dashboard`](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/monitoring_dashboard) resources in a repeatable structured way. - The [Service](service/README.md) and [Job](job/README.md) modules define pre-configured dashboards for Cloud Run services and Cloud Run jobs, respectively. +- The [`cloudevent-receiver`](cloudevent-receiver/README.md) module defines a pre-configured dashboard for a Cloud Run-based event handler receiving events from a `cloudevent-trigger`. - The modules in [`./widgets`](widgets/) define the widgets used by the dashboards, in a way that can be reused to create custom dashboards. diff --git a/dashboard/cloudevent-receiver/README.md b/dashboard/cloudevent-receiver/README.md new file mode 100644 index 00000000..e9d965af --- /dev/null +++ b/dashboard/cloudevent-receiver/README.md @@ -0,0 +1,99 @@ +# `dashboard/cloudevent-receiver` + +This module provisions a Google Cloud Monitoring dashboard for a regionalized Cloud Run service that receives Cloud Events from one or more `cloudevent-trigger`. + +It assumes the service has the same name in all regions. + +```hcl +// Create a network with several regional subnets +module "networking" { + source = "chainguard-dev/glue/cloudrun//networking" + + name = "my-networking" + project_id = var.project_id + regions = [...] +} + +// Run a regionalized cloud run service "receiver" to handle events. +resource "google_cloud_run_v2_service" "receiver" { + for_each = module.networking.regional-networks + name = "receiver" + + //... + template { + //... + containers { + image = "..." + } + } +} + +module "cloudevent-trigger" { + for_each = module.networking.regional-networks + + source = "chainguard-dev/glue/cloudrun//cloudevent-trigger" + + name = "my-trigger" + project_id = var.project_id + broker = module.cloudevent-broker.broker[each.key] + filter = { "type" : "dev.chainguard.foo" } + + depends_on = [google_cloud_run_v2_service.sockeye] + private-service = { + region = each.key + name = google_cloud_run_v2_service.receiver[each.key].name + } +} + +// Set up a dashboard for a regionalized event handler named "receiver". +module "receiver-dashboard" { + source = "chainguard-dev/glue/cloudrun//dashboard/cloudevent-receiver" + service_name = "receiver" + + triggers = { + "type dev.chainguard.foo": "my-trigger" + } +} +``` + +The dashboard it creates includes widgets for service logs, request count, latency (p50,p95,p99), instance count grouped by revision, CPU and memory utilization, startup latency, and sent/received bytes. + + +## Requirements + +No requirements. + +## Providers + +| Name | Version | +|------|---------| +| [google](#provider\_google) | n/a | + +## Modules + +| Name | Source | Version | +|------|--------|---------| +| [http](#module\_http) | ../sections/http | n/a | +| [layout](#module\_layout) | ../sections/layout | n/a | +| [logs](#module\_logs) | ../sections/logs | n/a | +| [resources](#module\_resources) | ../sections/resources | n/a | +| [subscription](#module\_subscription) | ../sections/subscription | n/a | +| [width](#module\_width) | ../sections/width | n/a | + +## Resources + +| Name | Type | +|------|------| +| [google_monitoring_dashboard.dashboard](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/monitoring_dashboard) | resource | + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| [service\_name](#input\_service\_name) | Name of the service(s) to monitor | `string` | n/a | yes | +| [triggers](#input\_triggers) | A mapping from a descriptive name to a subscription name prefix. | `map(string)` | n/a | yes | + +## Outputs + +No outputs. + diff --git a/dashboard/cloudevent-receiver/dashboard.tf b/dashboard/cloudevent-receiver/dashboard.tf new file mode 100644 index 00000000..00361c69 --- /dev/null +++ b/dashboard/cloudevent-receiver/dashboard.tf @@ -0,0 +1,57 @@ +module "subscription" { + for_each = var.triggers + + source = "../sections/subscription" + title = "Events ${each.key}" + + subscription_prefix = each.value +} + +module "logs" { + source = "../sections/logs" + title = "Service Logs" + filter = ["resource.type=\"cloud_run_revision\""] +} + +module "http" { + source = "../sections/http" + title = "HTTP" + filter = ["resource.type=\"cloud_run_revision\""] +} + +module "resources" { + source = "../sections/resources" + title = "Resources" + filter = ["resource.type=\"cloud_run_revision\""] +} + +module "width" { source = "../sections/width" } + +module "layout" { + source = "../sections/layout" + sections = concat([ + for key in sort(keys(var.triggers)) : module.subscription[key].section + ], + [ + module.logs.section, + module.http.section, + module.resources.section, + ]) +} + +resource "google_monitoring_dashboard" "dashboard" { + dashboard_json = jsonencode({ + displayName = "Cloud Event Receiver: ${var.service_name}" + dashboardFilters = [{ + filterType = "RESOURCE_LABEL" + stringValue = var.service_name + labelKey = "service_name" + }] + + // https://cloud.google.com/monitoring/api/ref_v3/rest/v1/projects.dashboards#mosaiclayout + mosaicLayout = { + columns = module.width.size + tiles = module.layout.tiles, + } + }) +} diff --git a/dashboard/cloudevent-receiver/variables.tf b/dashboard/cloudevent-receiver/variables.tf new file mode 100644 index 00000000..fb221bfb --- /dev/null +++ b/dashboard/cloudevent-receiver/variables.tf @@ -0,0 +1,9 @@ +variable "service_name" { + description = "Name of the service(s) to monitor" + type = string +} + +variable "triggers" { + description = "A mapping from a descriptive name to a subscription name prefix." + type = map(string) +} diff --git a/dashboard/sections/subscription/main.tf b/dashboard/sections/subscription/main.tf new file mode 100644 index 00000000..73ad8e86 --- /dev/null +++ b/dashboard/sections/subscription/main.tf @@ -0,0 +1,88 @@ +variable "title" { type = string } +variable "subscription_prefix" { type = string } +variable "collapsed" { default = false } + +module "width" { source = "../width" } + +module "received-events" { + source = "../../widgets/xy" + title = "Events Pushed" + filter = [ + "resource.type=\"pubsub_subscription\"", + "metric.type=\"pubsub.googleapis.com/subscription/push_request_count\"", + "resource.label.\"subscription_id\"=monitoring.regex.full_match(\"${var.subscription_prefix}-.*\")", + ] + group_by_fields = [ + "resource.label.\"subscription_id\"", + "metric.label.\"response_class\"" + ] + primary_align = "ALIGN_MEAN" + primary_reduce = "REDUCE_NONE" +} + +module "push-latency" { + source = "../../widgets/latency" + title = "Push latency" + filter = [ + "resource.type=\"pubsub_subscription\"", + "metric.type=\"pubsub.googleapis.com/subscription/push_request_latencies\"", + "resource.label.\"subscription_id\"=monitoring.regex.full_match(\"${var.subscription_prefix}-.*\")", + ] + group_by_fields = ["resource.label.\"subscription_id\""] +} + +module "oldest-unacked" { + source = "../../widgets/xy" + title = "Oldest unacked message age" + filter = [ + "resource.type=\"pubsub_subscription\"", + "metric.type=\"pubsub.googleapis.com/subscription/oldest_unacked_message_age\"", + "resource.label.\"subscription_id\"=monitoring.regex.full_match(\"${var.subscription_prefix}-.*\")", + ] + group_by_fields = ["resource.label.\"subscription_id\""] + primary_align = "ALIGN_MAX" + primary_reduce = "REDUCE_NONE" +} + +locals { + columns = 3 + unit = module.width.size / local.columns + + // https://www.terraform.io/language/functions/range + // N columns, unit width each ([0, unit, 2 * unit, ...]) + col = range(0, local.columns * local.unit, local.unit) + + tiles = [{ + yPos = 0, + xPos = local.col[0], + height = local.unit, + width = local.unit, + widget = module.received-events.widget, + }, + { + yPos = 0, + xPos = local.col[1], + height = local.unit, + width = local.unit, + widget = module.push-latency.widget, + }, + { + yPos = 0, + xPos = local.col[2], + height = local.unit, + width = local.unit, + widget = module.oldest-unacked.widget, + }] +} + +module "collapsible" { + source = "../collapsible" + + title = var.title + tiles = local.tiles + collapsed = var.collapsed +} + +output "section" { + value = module.collapsible.section +} diff --git a/dashboard/sections/topic/main.tf b/dashboard/sections/topic/main.tf new file mode 100644 index 00000000..1efc7049 --- /dev/null +++ b/dashboard/sections/topic/main.tf @@ -0,0 +1,146 @@ +variable "title" { type = string } +variable "topic_prefix" { type = string } +variable "collapsed" { default = false } + +module "width" { source = "../width" } + +module "sent-events" { + source = "../../widgets/xy" + title = "Events Published" + filter = [ + "resource.type=\"pubsub_topic\"", + "metric.type=\"pubsub.googleapis.com/topic/send_request_count\"", + "resource.label.\"topic_id\"=monitoring.regex.full_match(\"${var.topic_prefix}-.*\")", + ] + group_by_fields = ["resource.label.\"topic_id\""] + primary_align = "ALIGN_MEAN" + primary_reduce = "REDUCE_NONE" +} + +module "send-latency" { + source = "../../widgets/latency" + title = "Publish latency" + filter = [ + "resource.type=\"pubsub_topic\"", + "metric.type=\"pubsub.googleapis.com/topic/send_request_latencies\"", + "resource.label.\"topic_id\"=monitoring.regex.full_match(\"${var.topic_prefix}-.*\")", + ] + group_by_fields = ["resource.label.\"topic_id\""] +} + +module "topic-oldest-unacked" { + source = "../../widgets/xy" + title = "Oldest unacked message age (topic)" + filter = [ + "resource.type=\"pubsub_topic\"", + "metric.type=\"pubsub.googleapis.com/topic/oldest_unacked_message_age_by_region\"", + "resource.label.\"topic_id\"=monitoring.regex.full_match(\"${var.topic_prefix}-.*\")", + ] + group_by_fields = ["resource.label.\"topic_id\""] + primary_align = "ALIGN_MAX" + primary_reduce = "REDUCE_NONE" +} + +module "received-events" { + source = "../../widgets/xy" + title = "Events Pushed" + filter = [ + "resource.type=\"pubsub_subscription\"", + "metric.type=\"pubsub.googleapis.com/subscription/push_request_count\"", + "metadata.system_labels.\"topic_id\"=monitoring.regex.full_match(\"${var.topic_prefix}-.*\")", + ] + group_by_fields = [ + "resource.label.\"subscription_id\"", + "metric.label.\"response_class\"" + ] + primary_align = "ALIGN_MEAN" + primary_reduce = "REDUCE_NONE" +} + +module "push-latency" { + source = "../../widgets/latency" + title = "Push latency" + filter = [ + "resource.type=\"pubsub_subscription\"", + "metric.type=\"pubsub.googleapis.com/subscription/push_request_latencies\"", + "metadata.system_labels.\"topic_id\"=monitoring.regex.full_match(\"${var.topic_prefix}-.*\")", + ] + group_by_fields = ["resource.label.\"subscription_id\""] +} + +module "oldest-unacked" { + source = "../../widgets/xy" + title = "Oldest unacked message age" + filter = [ + "resource.type=\"pubsub_subscription\"", + "metric.type=\"pubsub.googleapis.com/subscription/oldest_unacked_message_age\"", + "metadata.system_labels.\"topic_id\"=monitoring.regex.full_match(\"${var.topic_prefix}-.*\")", + ] + group_by_fields = ["resource.label.\"subscription_id\""] + primary_align = "ALIGN_MAX" + primary_reduce = "REDUCE_NONE" +} + +locals { + columns = 3 + unit = module.width.size / local.columns + + // https://www.terraform.io/language/functions/range + // N columns, unit width each ([0, unit, 2 * unit, ...]) + col = range(0, local.columns * local.unit, local.unit) + + tiles = [{ + yPos = 0, + xPos = local.col[0], + height = local.unit, + width = local.unit, + widget = module.sent-events.widget, + }, + { + yPos = 0, + xPos = local.col[1], + height = local.unit, + width = local.unit, + widget = module.send-latency.widget, + }, + { + yPos = 0, + xPos = local.col[2], + height = local.unit, + width = local.unit, + widget = module.topic-oldest-unacked.widget, + }, + { + yPos = local.unit, + xPos = local.col[0], + height = local.unit, + width = local.unit, + widget = module.received-events.widget, + }, + { + yPos = local.unit, + xPos = local.col[1], + height = local.unit, + width = local.unit, + widget = module.push-latency.widget, + }, + { + yPos = local.unit, + xPos = local.col[2], + height = local.unit, + width = local.unit, + widget = module.oldest-unacked.widget, + }] +} + +module "collapsible" { + source = "../collapsible" + + title = var.title + tiles = local.tiles + collapsed = var.collapsed +} + +output "section" { + value = module.collapsible.section +}