Skip to content

Commit

Permalink
Merge branch 'main' into add-custom-http-metrics-dash
Browse files Browse the repository at this point in the history
Signed-off-by: Nghia Tran <[email protected]>
  • Loading branch information
tcnghia authored Dec 22, 2023
2 parents b2640b8 + 3c89783 commit 7712b9c
Show file tree
Hide file tree
Showing 17 changed files with 303 additions and 16 deletions.
1 change: 1 addition & 0 deletions .github/workflows/documentation.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ jobs:
- cloudevent-broker
- cloudevent-trigger
- cloudevent-recorder
- otel-collector
- networking
- dashboard/service
- dashboard/job
Expand Down
1 change: 1 addition & 0 deletions cloudevent-broker/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ No requirements.
| <a name="module_http"></a> [http](#module\_http) | ../dashboard/sections/http | n/a |
| <a name="module_layout"></a> [layout](#module\_layout) | ../dashboard/sections/layout | n/a |
| <a name="module_logs"></a> [logs](#module\_logs) | ../dashboard/sections/logs | n/a |
| <a name="module_otel-collector"></a> [otel-collector](#module\_otel-collector) | ../otel-collector | n/a |
| <a name="module_resources"></a> [resources](#module\_resources) | ../dashboard/sections/resources | n/a |
| <a name="module_topic"></a> [topic](#module\_topic) | ../dashboard/sections/topic | n/a |
| <a name="module_width"></a> [width](#module\_width) | ../dashboard/sections/width | n/a |
Expand Down
12 changes: 8 additions & 4 deletions cloudevent-broker/cmd/ingress/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"log"
"time"

"cloud.google.com/go/compute/metadata"
"cloud.google.com/go/pubsub"
cloudevents "github.com/cloudevents/sdk-go/v2"
"github.com/kelseyhightower/envconfig"
Expand All @@ -26,9 +27,8 @@ const (
)

type envConfig struct {
Port int `envconfig:"PORT" default:"8080" required:"true"`
Project string `envconfig:"PROJECT_ID" required:"true"`
Topic string `envconfig:"PUBSUB_TOPIC" required:"true"`
Port int `envconfig:"PORT" default:"8080" required:"true"`
Topic string `envconfig:"PUBSUB_TOPIC" required:"true"`
}

func main() {
Expand All @@ -44,7 +44,11 @@ func main() {
log.Fatalf("failed to create CE client, %v", err)
}

psc, err := pubsub.NewClient(ctx, env.Project, option.WithTokenSource(google.ComputeTokenSource("")))
projectID, err := metadata.ProjectID()
if err != nil {
log.Fatalf("failed to get project ID, %v", err)
}
psc, err := pubsub.NewClient(ctx, projectID, option.WithTokenSource(google.ComputeTokenSource("")))
if err != nil {
log.Fatalf("failed to create pubsub client, %v", err)
}
Expand Down
16 changes: 11 additions & 5 deletions cloudevent-broker/ingress.tf
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,13 @@ resource "cosign_sign" "this" {
conflict = "REPLACE"
}

module "otel-collector" {
source = "../otel-collector"

project_id = var.project_id
service_account = google_service_account.this.email
}

resource "google_cloud_run_v2_service" "this" {
for_each = var.regions

Expand Down Expand Up @@ -69,15 +76,14 @@ resource "google_cloud_run_v2_service" "this" {
containers {
image = cosign_sign.this.signed_ref

env {
name = "PROJECT_ID"
value = var.project_id
}
ports { container_port = 8080 }

env {
name = "PUBSUB_TOPIC"
value = google_pubsub_topic.this[each.key].name
}
}
containers { image = module.otel-collector.image }
}
}

Expand Down Expand Up @@ -108,7 +114,7 @@ module "resources" {
module "width" { source = "../dashboard/sections/width" }

module "layout" {
source = "../dashboard/sections/layout"
source = "../dashboard/sections/layout"
sections = [
module.topic.section,
module.logs.section,
Expand Down
1 change: 1 addition & 0 deletions cloudevent-recorder/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ No requirements.

| Name | Source | Version |
|------|--------|---------|
| <a name="module_otel-collector"></a> [otel-collector](#module\_otel-collector) | ../otel-collector | n/a |
| <a name="module_recorder-dashboard"></a> [recorder-dashboard](#module\_recorder-dashboard) | ../dashboard/cloudevent-receiver | n/a |
| <a name="module_triggers"></a> [triggers](#module\_triggers) | ../cloudevent-trigger | n/a |

Expand Down
14 changes: 14 additions & 0 deletions cloudevent-recorder/recorder.tf
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ resource "ko_build" "recorder-image" {

resource "cosign_sign" "recorder-image" {
image = ko_build.recorder-image.image_ref

# Only keep the latest signature.
conflict = "REPLACE"
}

resource "ko_build" "logrotate-image" {
Expand All @@ -34,6 +37,16 @@ resource "ko_build" "logrotate-image" {

resource "cosign_sign" "logrotate-image" {
image = ko_build.logrotate-image.image_ref

# Only keep the latest signature.
conflict = "REPLACE"
}

module "otel-collector" {
source = "../otel-collector"

project_id = var.project_id
service_account = google_service_account.recorder.email
}

resource "google_cloud_run_v2_service" "recorder-service" {
Expand Down Expand Up @@ -91,6 +104,7 @@ resource "google_cloud_run_v2_service" "recorder-service" {
mount_path = "/logs"
}
}
containers { image = module.otel-collector.image }
volumes {
name = "logs"
empty_dir {}
Expand Down
34 changes: 34 additions & 0 deletions dashboard/sections/alerts/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
variable "title" { type = string }
variable "collapsed" { default = false }
variable "alert" { type = string }

module "width" { source = "../width" }

module "alert" {
source = "../../widgets/alert"
title = var.title
alert_name = var.alert
}

locals {
tiles = [{
yPos = 0
xPos = 0
height = 3
width = module.width.size
widget = module.alert.widget
}]
}

module "collapsible" {
source = "../collapsible"

// If no alert is defined, this is an empty collapsed section.
title = var.title
tiles = local.tiles
collapsed = var.collapsed
}

output "section" {
value = module.collapsible.section
}
4 changes: 2 additions & 2 deletions dashboard/sections/collapsible/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ variable "tiles" {}
variable "collapsed" { default = false }

locals {
start_row = min([for s in var.tiles : s.yPos]...)
start_row = length(var.tiles) == 0 ? 0 : min([for s in var.tiles : s.yPos]...)
}

module "width" { source = "../width" }
Expand All @@ -12,7 +12,7 @@ output "section" {
value = concat([{
yPos = local.start_row
xPos = 0,
height = max([for s in var.tiles : s.yPos + s.height - local.start_row]...),
height = length(var.tiles) == 0 ? 0 : max([for s in var.tiles : s.yPos + s.height - local.start_row]...),
width = module.width.size,
widget = {
title = var.title
Expand Down
2 changes: 2 additions & 0 deletions dashboard/service/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ No requirements.

| Name | Source | Version |
|------|--------|---------|
| <a name="module_alerts"></a> [alerts](#module\_alerts) | ../sections/alerts | n/a |
| <a name="module_http"></a> [http](#module\_http) | ../sections/http | n/a |
| <a name="module_layout"></a> [layout](#module\_layout) | ../sections/layout | n/a |
| <a name="module_logs"></a> [logs](#module\_logs) | ../sections/logs | n/a |
Expand All @@ -68,6 +69,7 @@ No requirements.

| Name | Description | Type | Default | Required |
|------|-------------|------|---------|:--------:|
| <a name="input_alerts"></a> [alerts](#input\_alerts) | Alerting policies to add to the dashboard. | `list(string)` | `[]` | no |
| <a name="input_labels"></a> [labels](#input\_labels) | Additional labels to apply to the dashboard. | `map` | `{}` | no |
| <a name="input_service_name"></a> [service\_name](#input\_service\_name) | Name of the service(s) to monitor | `string` | n/a | yes |

Expand Down
21 changes: 16 additions & 5 deletions dashboard/service/dashboard.tf
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,26 @@ module "resources" {
filter = ["resource.type=\"cloud_run_revision\""]
}

module "alerts" {
for_each = toset(var.alerts)

source = "../sections/alerts"
alert = each.key
title = "Alert"
}

module "width" { source = "../sections/width" }

module "layout" {
source = "../sections/layout"
sections = [
module.logs.section,
module.http.section,
module.resources.section,
]
sections = concat(
[for x in var.alerts : module.alerts[x].section],
[
module.logs.section,
module.http.section,
module.resources.section,
]
)
}

resource "google_monitoring_dashboard" "dashboard" {
Expand Down
7 changes: 7 additions & 0 deletions dashboard/service/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,10 @@ variable "labels" {
description = "Additional labels to apply to the dashboard."
default = {}
}

variable "alerts" {
description = "Alerting policies to add to the dashboard."
type = list(string)
default = []
}

68 changes: 68 additions & 0 deletions otel-collector/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# `otel-collector`

This module encapsulates producing a sidecar image for publishing otel collected
metrics, and granting the service account as which the sidecar runs permission
to write those metrics (so it's impossible to forget):

```
module "otel-collector" {
source = "chainguard-dev/cloudrun/glue//otel-collector"
project_id = var.project_id
service_account = google_service_account.this.email
}
resource "google_cloud_run_v2_service" "this" {
template {
service_account = google_service_account.this.email
containers {
image = "..."
// Specifying port is necessary when there are multiple containers.
ports { container_port = 8080 }
}
// Install the sidecar!
containers { image = module.otel-collector.image }
}
}
```

<!-- BEGIN_TF_DOCS -->
## Requirements

No requirements.

## Providers

| Name | Version |
|------|---------|
| <a name="provider_cosign"></a> [cosign](#provider\_cosign) | n/a |
| <a name="provider_google"></a> [google](#provider\_google) | n/a |
| <a name="provider_ko"></a> [ko](#provider\_ko) | n/a |

## Modules

No modules.

## Resources

| Name | Type |
|------|------|
| [cosign_sign.otel-image](https://registry.terraform.io/providers/chainguard-dev/cosign/latest/docs/resources/sign) | resource |
| [google_project_iam_member.metrics-writer](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/project_iam_member) | resource |
| [ko_build.otel-image](https://registry.terraform.io/providers/ko-build/ko/latest/docs/resources/build) | resource |

## Inputs

| Name | Description | Type | Default | Required |
|------|-------------|------|---------|:--------:|
| <a name="input_otel_collector_image"></a> [otel\_collector\_image](#input\_otel\_collector\_image) | The otel collector image to use as a base. | `string` | `"cgr.dev/chainguard/opentelemetry-collector-contrib:latest"` | no |
| <a name="input_project_id"></a> [project\_id](#input\_project\_id) | n/a | `string` | n/a | yes |
| <a name="input_service_account"></a> [service\_account](#input\_service\_account) | The service account as which the collector will run. | `string` | n/a | yes |

## Outputs

| Name | Description |
|------|-------------|
| <a name="output_image"></a> [image](#output\_image) | n/a |
<!-- END_TF_DOCS -->
68 changes: 68 additions & 0 deletions otel-collector/cmd/otel-collector/kodata/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
receivers:
prometheus:
config:
scrape_configs:
- job_name: "localhost"
scrape_interval: 10s
static_configs:
# TODO: make this configurable
- targets: ["localhost:2112"]
metric_relabel_configs:
- source_labels: [ __name__ ]
regex: '^prometheus_.*'
action: drop
- source_labels: [ __name__ ]
regex: '^process_.*'
action: drop
- source_labels: [ __name__ ]
regex: '^go_.*'
action: drop

processors:
batch:
# batch metrics before sending to reduce API usage
send_batch_max_size: 200
send_batch_size: 200
timeout: 5s

memory_limiter:
# drop metrics if memory usage gets too high
check_interval: 1s
limit_percentage: 65
spike_limit_percentage: 20

# automatically detect Cloud Run resource metadata
resourcedetection:
detectors: [env, gcp]
timeout: 2s
override: false

resource:
attributes:
# add instance_id as a resource attribute
- key: service.instance.id
from_attribute: faas.id
action: upsert
# parse service name from K_SERVICE Cloud Run variable
- key: service.name
value: ${env:K_SERVICE}
action: insert

exporters:
googlemanagedprometheus:

extensions:
health_check:

service:
telemetry:
logs:
# We don't want to see scraper startup logging every
# cold start.
level: "error"
extensions: [health_check]
pipelines:
metrics:
receivers: [prometheus]
processors: [batch, memory_limiter, resourcedetection, resource]
exporters: [googlemanagedprometheus]
Loading

0 comments on commit 7712b9c

Please sign in to comment.