diff --git a/config/observability/README.md b/config/observability/README.md new file mode 100644 index 000000000..747d2db2e --- /dev/null +++ b/config/observability/README.md @@ -0,0 +1,48 @@ +# Observability stack guide + +## Deploying the observabilty stack + +```bash +./bin/kustomize build ./config/observability/| docker run --rm -i ryane/kfilt -i kind=CustomResourceDefinition | kubectl apply --server-side -f - +./bin/kustomize build ./config/observability/| docker run --rm -i ryane/kfilt -x kind=CustomResourceDefinition | kubectl apply -f - +``` + +This will deploy prometheus, alertmanager and grafana into the `monitoring` namespace, +along with metrics scrape configuration for Istio and Envoy. + +## Accessing Grafana & Prometheus + +Use port forwarding to access Grafana & Prometheus: + +```bash +kubectl -n monitoring port-forward service/grafana 3000:3000 +``` + +The Grafana UI can be found at [http://127.0.0.1:3000/](http://127.0.0.1:3000/). +It is pre-loaded with some kubernetes and [gateway-api-state](https://github.com/Kuadrant/gateway-api-state-metrics) dashboards. + +```bash +kubectl -n monitoring port-forward service/prometheus-k8s 9090:9090 +``` + +The Prometheus UI can be found at [http://127.0.0.1:9090](http://127.0.0.1:9090). + +## Editing dashboards + +Dashboards can be imported in the Grafana UI using either raw JSON, a JSON file, or the URL/ID of one of the [dashboards on grafana.com](https://grafana.com/grafana/dashboards/). +Some example dashboards are available in the ./examples folder. + +To import a dashboard, click on the plus icon on the left sidebar and navigate to **Import**. After entering a dashboard URL/ID or JSON, click **Load**. + +After loading the dashboard, the next screen allows you to select a name and folder for the dashboard and specify the data source before finally importing the dashboard. + +Grafana dashboards can be exported as JSON in order to add them to the project's git repo. +When viewing the dashboard you wish to export, click on the **share** button at the top of the screen. + +In the modal popup click **Export** and then **Save to file**. + +## Editing alerting rules + +Alerting rules can be defined in [PrometheusRules](https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/user-guides/alerting.md#configuring-alertmanager-in-prometheus) resources. +The can be viewed in the Prometheus UI Alerts tab. +Some example alerting rules are available in the ./examples folder. diff --git a/config/observability/additional-scrape-configs.yaml b/config/observability/additional-scrape-configs.yaml new file mode 100644 index 000000000..eea5c5d75 --- /dev/null +++ b/config/observability/additional-scrape-configs.yaml @@ -0,0 +1,81 @@ +apiVersion: v1 +kind: Secret +metadata: + name: additional-scrape-configs + namespace: monitoring +stringData: + prometheus-additional.yaml: | + - job_name: kubernetes-pods + honor_labels: true + honor_timestamps: true + scrape_interval: 15s + scrape_timeout: 10s + metrics_path: /metrics + scheme: http + follow_redirects: true + enable_http2: true + relabel_configs: + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] + separator: ; + regex: "true" + replacement: $1 + action: keep + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape_slow] + separator: ; + regex: "true" + replacement: $1 + action: drop + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scheme] + separator: ; + regex: (https?) + target_label: __scheme__ + replacement: $1 + action: replace + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] + separator: ; + regex: (.+) + target_label: __metrics_path__ + replacement: $1 + action: replace + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port, __meta_kubernetes_pod_ip] + separator: ; + regex: (\d+);(([A-Fa-f0-9]{1,4}::?){1,7}[A-Fa-f0-9]{1,4}) + target_label: __address__ + replacement: '[$2]:$1' + action: replace + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port, __meta_kubernetes_pod_ip] + separator: ; + regex: (\d+);((([0-9]+?)(\.|$)){4}) + target_label: __address__ + replacement: $2:$1 + action: replace + - separator: ; + regex: __meta_kubernetes_pod_annotation_prometheus_io_param_(.+) + replacement: __param_$1 + action: labelmap + - separator: ; + regex: __meta_kubernetes_pod_label_(.+) + replacement: $1 + action: labelmap + - source_labels: [__meta_kubernetes_namespace] + separator: ; + regex: (.*) + target_label: namespace + replacement: $1 + action: replace + - source_labels: [__meta_kubernetes_pod_name] + separator: ; + regex: (.*) + target_label: pod + replacement: $1 + action: replace + - source_labels: [__meta_kubernetes_pod_phase] + separator: ; + regex: Pending|Succeeded|Failed|Completed + replacement: $1 + action: drop + kubernetes_sd_configs: + - role: pod + kubeconfig_file: "" + follow_redirects: true + enable_http2: true diff --git a/config/observability/cluster_role.yaml b/config/observability/cluster_role.yaml new file mode 100644 index 000000000..69079131f --- /dev/null +++ b/config/observability/cluster_role.yaml @@ -0,0 +1,23 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: prometheus-k8s +rules: + - apiGroups: + - "" + resources: + - nodes/metrics + verbs: + - get + - nonResourceURLs: + - /metrics + verbs: + - get + - apiGroups: + - "" + resources: + - pods + verbs: + - get + - list + - watch diff --git a/config/observability/grafana_datasources.yaml b/config/observability/grafana_datasources.yaml new file mode 100644 index 000000000..06d2cdf59 --- /dev/null +++ b/config/observability/grafana_datasources.yaml @@ -0,0 +1,14 @@ +{ + "apiVersion": 1, + "datasources": [ + { + "access": "proxy", + "editable": false, + "name": "prometheus", + "orgId": 1, + "type": "prometheus", + "url": "http://prometheus-k8s.monitoring.svc:9090", + "version": 1 + } + ] +} diff --git a/config/observability/grafana_deployment_patch.yaml b/config/observability/grafana_deployment_patch.yaml new file mode 100644 index 000000000..5d3a5ea82 --- /dev/null +++ b/config/observability/grafana_deployment_patch.yaml @@ -0,0 +1,84 @@ +- op: add + path: /spec/template/spec/volumes/- + value: + name: grafana-gatewayclasses + configMap: + defaultMode: 420 + name: grafana-gatewayclasses +- op: add + path: /spec/template/spec/volumes/- + value: + name: grafana-gateways + configMap: + defaultMode: 420 + name: grafana-gateways +- op: add + path: /spec/template/spec/volumes/- + value: + name: grafana-httproutes + configMap: + defaultMode: 420 + name: grafana-httproutes +- op: add + path: /spec/template/spec/volumes/- + value: + name: grafana-grpcroutes + configMap: + defaultMode: 420 + name: grafana-grpcroutes +- op: add + path: /spec/template/spec/volumes/- + value: + name: grafana-tlsroutes + configMap: + defaultMode: 420 + name: grafana-tlsroutes +- op: add + path: /spec/template/spec/volumes/- + value: + name: grafana-tcproutes + configMap: + defaultMode: 420 + name: grafana-tcproutes +- op: add + path: /spec/template/spec/volumes/- + value: + name: grafana-udproutes + configMap: + defaultMode: 420 + name: grafana-udproutes +- op: add + path: /spec/template/spec/containers/0/volumeMounts/- + value: + name: grafana-gatewayclasses + mountPath: /grafana-dashboard-definitions/0/grafana-gatewayclasses +- op: add + path: /spec/template/spec/containers/0/volumeMounts/- + value: + name: grafana-gateways + mountPath: /grafana-dashboard-definitions/0/grafana-gateways +- op: add + path: /spec/template/spec/containers/0/volumeMounts/- + value: + name: grafana-httproutes + mountPath: /grafana-dashboard-definitions/0/grafana-httproutes +- op: add + path: /spec/template/spec/containers/0/volumeMounts/- + value: + name: grafana-grpcroutes + mountPath: /grafana-dashboard-definitions/0/grafana-grpcroutes +- op: add + path: /spec/template/spec/containers/0/volumeMounts/- + value: + name: grafana-tlsroutes + mountPath: /grafana-dashboard-definitions/0/grafana-tlsroutes +- op: add + path: /spec/template/spec/containers/0/volumeMounts/- + value: + name: grafana-tcproutes + mountPath: /grafana-dashboard-definitions/0/grafana-tcpcroutes +- op: add + path: /spec/template/spec/containers/0/volumeMounts/- + value: + name: grafana-udproutes + mountPath: /grafana-dashboard-definitions/0/grafana-udproutes diff --git a/config/observability/ksm_clusterrole_patch.yaml b/config/observability/ksm_clusterrole_patch.yaml new file mode 100644 index 000000000..8766bb16e --- /dev/null +++ b/config/observability/ksm_clusterrole_patch.yaml @@ -0,0 +1,39 @@ +- op: add + path: /rules/- + value: + apiGroups: + - "apiextensions.k8s.io" + resources: + - customresourcedefinitions + verbs: + - list + - watch +- op: add + path: /rules/- + value: + apiGroups: + - "gateway.networking.k8s.io" + resources: + - gateways + - gatewayclasses + - httproutes + - grpcroutes + - tcproutes + - tlsroutes + - udproutes + verbs: + - list + - watch +- op: add + path: /rules/- + value: + apiGroups: + - "kuadrant.io" + resources: + - tlspolicies + - dnspolicies + - ratelimitpolicies + - authpolicies + verbs: + - list + - watch diff --git a/config/observability/ksm_deployment_patch.yaml b/config/observability/ksm_deployment_patch.yaml new file mode 100644 index 000000000..160b2eaf1 --- /dev/null +++ b/config/observability/ksm_deployment_patch.yaml @@ -0,0 +1,21 @@ +- op: replace + path: /spec/template/spec/containers/0/image + value: registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.10.1 +- op: add + path: /spec/template/spec/volumes + value: + - name: custom-resource-state + configMap: + defaultMode: 420 + name: custom-resource-state +- op: add + path: /spec/template/spec/containers/0/volumeMounts + value: + - name: custom-resource-state + mountPath: /custom-resource-state +- op: add + path: /spec/template/spec/containers/0/args/- + value: --custom-resource-state-config-file +- op: add + path: /spec/template/spec/containers/0/args/- + value: /custom-resource-state/custom-resource-state.yaml diff --git a/config/observability/kustomization.yaml b/config/observability/kustomization.yaml new file mode 100644 index 000000000..e119ddcd8 --- /dev/null +++ b/config/observability/kustomization.yaml @@ -0,0 +1,95 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + - github.com/prometheus-operator/kube-prometheus?ref=release-0.11 + - github.com/Kuadrant/gateway-api-state-metrics?ref=main + - github.com/Kuadrant/gateway-api-state-metrics/config/examples/dashboards?ref=main +# To scrape istio metrics, 3 configurations are required: +# 1. Envoy metrics directly from the istio ingress gateway pod + - podmonitor-envoy.yaml +# 2. Istiod metrics via the istiod service + - servicemonitor-istiod.yaml +# 3. Istio metrics exposed via envoy on 15020 in each application. +# We're using the additionalScrapeConfigs field of the Prometheus CR +# here to read existing prometheus scrape annotations on pods. +# Ideally this would be done via another PodMonitor or ServicMonitor, +# however that isn't possible as the container port 15020 is not +# exposed or named, so we need to drop to raw custom prometheus +# scrape config. +# See https://github.com/prometheus-operator/prometheus-operator/issues/3071#issuecomment-763746836 + - additional-scrape-configs.yaml + +patchesStrategicMerge: + - cluster_role.yaml + +patches: + - target: + kind: Prometheus + name: k8s + patch: |- + kind: Prometheus + metadata: + name: k8s + spec: + replicas: 1 + additionalScrapeConfigs: + name: additional-scrape-configs + key: prometheus-additional.yaml + - target: + kind: Alertmanager + name: main + patch: |- + kind: Alertmanager + metadata: + name: main + spec: + replicas: 1 + - target: + kind: ClusterRole + name: prometheus-k8s + patch: |- + - op: add + path: /rules/- + value: + apiGroups: + - "" + resources: + - services + - endpoints + verbs: + - get + - list + - watch + +# Patch grafana deployment to include dashboard configmaps +patchesJson6902: + - target: + group: apps + version: v1 + kind: Deployment + name: kube-state-metrics + path: ksm_deployment_patch.yaml + - target: + group: rbac.authorization.k8s.io + version: v1 + kind: ClusterRole + name: kube-state-metrics + path: ksm_clusterrole_patch.yaml + - target: + group: apps + version: v1 + kind: Deployment + name: grafana + path: grafana_deployment_patch.yaml + +generatorOptions: + disableNameSuffixHash: true + +secretGenerator: +- name: grafana-datasources + namespace: monitoring + behavior: replace + files: + - grafana_datasources.yaml + diff --git a/config/observability/podmonitor-envoy.yaml b/config/observability/podmonitor-envoy.yaml new file mode 100644 index 000000000..1d98a8f8a --- /dev/null +++ b/config/observability/podmonitor-envoy.yaml @@ -0,0 +1,14 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PodMonitor +metadata: + name: envoy-stats +spec: + namespaceSelector: + matchNames: + - istio-system + selector: + matchLabels: + app: istio-ingressgateway + podMetricsEndpoints: + - port: http-envoy-prom + path: /stats/prometheus diff --git a/config/observability/servicemonitor-istiod.yaml b/config/observability/servicemonitor-istiod.yaml new file mode 100644 index 000000000..656cd440b --- /dev/null +++ b/config/observability/servicemonitor-istiod.yaml @@ -0,0 +1,13 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: istiod +spec: + namespaceSelector: + matchNames: + - istio-system + selector: + matchLabels: + app: istiod + endpoints: + - port: http-monitoring