From 9771c626079d1a5b62e9e3abc5120b2f09c980e2 Mon Sep 17 00:00:00 2001 From: bdrennz <146774453+bdrennz@users.noreply.github.com> Date: Wed, 13 Nov 2024 06:57:47 -0800 Subject: [PATCH 1/2] CP-23051: Change default kube-state-metrics behavior to use Cloudzero subchart (#91) * override KSM name * enable ksm by default * make cloudzero ksm undiscoverable * improve documentation * option 2 is not the default behavior * fix indentation * add line * add documentation for changing the service port for cloudzero ksm * disable cloudzero KSM as scrape target * set default port * fix endpoint * use default port * add release notes * remove metric exporter documentation * change beta version --- charts/cloudzero-agent/README.md | 30 ------------------- .../templates/validatorcm.yaml | 2 +- charts/cloudzero-agent/values.yaml | 10 ++++++- docs/releases/0.0.30-beta.md | 21 +++++++++++++ 4 files changed, 31 insertions(+), 32 deletions(-) create mode 100644 docs/releases/0.0.30-beta.md diff --git a/charts/cloudzero-agent/README.md b/charts/cloudzero-agent/README.md index ccb295cc..62781e3f 100644 --- a/charts/cloudzero-agent/README.md +++ b/charts/cloudzero-agent/README.md @@ -39,8 +39,6 @@ helm install cloudzero/cloudzero-agent \ --set clusterName= \ --set-string cloudAccountId= \ --set region= \ - # optionally deploy kube-state-metrics if it doesn't exist in the cluster already - --set kube-state-metrics.enabled= ``` ### Update Helm Chart @@ -58,7 +56,6 @@ helm upgrade cloudzero/cloudzero-agent \ --set clusterName= \ --set-string cloudAccountId= \ --set region= \ - --set kube-state-metrics.enabled= ``` ### Mandatory Values @@ -109,33 +106,6 @@ helm install cloudzero/cloudzero-agent \ -f values-override.yaml ``` -### Metric Exporters - -This chart depends on metrics from [kube-state-metrics](https://github.com/kubernetes/kube-state-metrics). There are two installation options for providing the `kube-state-metrics` metrics to the cloudzero-agent. If you don't know which option is right for you, use the second option. - -#### Option 1 (default): Use existing kube-state-metrics - -Using an existing `kube-state-metrics` exporter may be desirable for minimizing cost. By default, the `cloudzero-agent` will attempt to find an existing `kube-state-metrics` K8s Service by searching for a K8s Service with the annotation `prometheus.io/scrape: "true"`. If an existing `kube-state-metrics` Service exists but does not have that annotation and you do not wish to add it, see the **Custom Scrape Configs** section below. - -In addition to the above, the existing `kube-state-metrics` Service address should be added in `values-override.yaml` as shown below so that the `cloudzero-agent` can validate the connection: - -```yaml -validator: - serviceEndpoints: - kubeStateMetrics: ..svc.cluster.local:8080 -``` - - -#### Option 2: Use kube-state-metrics subchart - -Alternatively, deploy the `kube-state-metrics` subchart that comes packaged with this chart. This is done by enabling settings in `values-override.yaml` as shown: - -```yaml -kube-state-metrics: - enabled: true -``` -In this option, no additional configuration is required in the `validator` field. - ### Secret Management The chart requires a CloudZero API key to send metric data. Admins can retrieve API keys [here](https://app.cloudzero.com/organization/api-keys). diff --git a/charts/cloudzero-agent/templates/validatorcm.yaml b/charts/cloudzero-agent/templates/validatorcm.yaml index 213f5df1..80b33a93 100644 --- a/charts/cloudzero-agent/templates/validatorcm.yaml +++ b/charts/cloudzero-agent/templates/validatorcm.yaml @@ -33,7 +33,7 @@ data: {{- if .Values.validator.serviceEndpoints.kubeStateMetrics }} kube_state_metrics_service_endpoint: http://{{ .Values.validator.serviceEndpoints.kubeStateMetrics }}/ {{- else }} - kube_state_metrics_service_endpoint: http://{{- if .Release.Name }}{{.Release.Name}}-{{- end }}kube-state-metrics:8080/ + kube_state_metrics_service_endpoint: http://{{- if .Release.Name }}{{.Release.Name}}-{{- end }}state-metrics:8080/ {{- end }} {{- if .Values.validator.serviceEndpoints.prometheusNodeExporter }} prometheus_node_exporter_service_endpoint: http://{{ .Values.validator.serviceEndpoints.prometheusNodeExporter }}/ diff --git a/charts/cloudzero-agent/values.yaml b/charts/cloudzero-agent/values.yaml index 4b92db4a..7dafe61c 100644 --- a/charts/cloudzero-agent/values.yaml +++ b/charts/cloudzero-agent/values.yaml @@ -48,9 +48,17 @@ prometheusConfig: additionalScrapeJobs: [] kube-state-metrics: - enabled: false + enabled: true + fullnameOverride: "cloudzero-state-metrics" extraArgs: - --metric-labels-allowlist=pods=[app.kubernetes.io/component] + # Disable CloudZero KSM as a Scrape Target since the service endpoint is explicity defined + # by the Validators config file. + prometheusScrape: false + # Set a default port other than 8080 to avoid collisions with any existing KSM services. + service: + port: 8080 + prometheus-node-exporter: enabled: false diff --git a/docs/releases/0.0.30-beta.md b/docs/releases/0.0.30-beta.md new file mode 100644 index 00000000..28c32368 --- /dev/null +++ b/docs/releases/0.0.30-beta.md @@ -0,0 +1,21 @@ +## [0.0.30-beta](https://github.com/cloudzero/cloudzero-charts/compare/v0.0.28...v0.0.30-beta) (2024-11-12) + +Improve Kube State Metrics Install + +### Upgrade Steps +To install, specify the version of the beta chart: + +``` bash +helm upgrade --install -n cz-prom-agent cz-prom-agent charts/cloudzero-agent \ + --set apiKey=$api_key \ + --set clusterName='cluster' \ + --set-string cloudAccountId="account_id" \ + --set region='region' \ + --version 0.0.30-beta + +``` + +### Improvements +* **CloudZero Metrics:** CloudZero State Metrics is enabled/installed by default. + +``` From 878b28869996aff97ab4ec7526ba48e7acafca6a Mon Sep 17 00:00:00 2001 From: bdrennz <146774453+bdrennz@users.noreply.github.com> Date: Thu, 14 Nov 2024 08:04:47 -0800 Subject: [PATCH 2/2] CP-23388: Define Static KubeStateMetrics Target Endpoint (#99) * change kube-state-metrics value name to avoid template errors * define static target * fix kube-state-metrics dependency * remove unused documentation * cast port to int * fix endpoint * update scrape config * dynamicaly populate metrics * use camel case --- charts/cloudzero-agent/BETA-INSTALLATION.md | 4 +- charts/cloudzero-agent/Chart.lock | 4 +- charts/cloudzero-agent/Chart.yaml | 2 +- charts/cloudzero-agent/README.md | 44 ------------------ charts/cloudzero-agent/templates/cm.yaml | 51 ++++----------------- charts/cloudzero-agent/values.yaml | 2 +- 6 files changed, 15 insertions(+), 92 deletions(-) diff --git a/charts/cloudzero-agent/BETA-INSTALLATION.md b/charts/cloudzero-agent/BETA-INSTALLATION.md index cf4f222a..be8c2303 100644 --- a/charts/cloudzero-agent/BETA-INSTALLATION.md +++ b/charts/cloudzero-agent/BETA-INSTALLATION.md @@ -45,7 +45,7 @@ helm install cloudzero-beta/cloudzero-agent \ --set clusterName= \ --set-string cloudAccountId= \ --set region= \ - --set kube-state-metrics.enabled= \ + --set kube_state_metrics.enabled= \ --create-namespace ``` @@ -63,7 +63,7 @@ helm install cloudzero-beta/cloudzero-agent \ --set clusterName= \ --set-string cloudAccountId= \ --set region= \ - --set kube-state-metrics.enabled= \ + --set kube_state_metrics.enabled= \ --create-namespace ``` diff --git a/charts/cloudzero-agent/Chart.lock b/charts/cloudzero-agent/Chart.lock index a2094f70..8841411b 100644 --- a/charts/cloudzero-agent/Chart.lock +++ b/charts/cloudzero-agent/Chart.lock @@ -5,5 +5,5 @@ dependencies: - name: prometheus-node-exporter repository: https://prometheus-community.github.io/helm-charts version: 4.24.0 -digest: sha256:827a33fa07fde17be0bf808e0beba3ca7b23c9fc1960580b2ba6d0ecc0b57a3f -generated: "2024-03-20T11:42:44.034766-04:00" +digest: sha256:254bcb4b6b7f42a53ad1ec5885e079958efa2a09f30ffafe03c6ad0eccd06f7d +generated: "2024-11-14T04:46:38.987981-08:00" diff --git a/charts/cloudzero-agent/Chart.yaml b/charts/cloudzero-agent/Chart.yaml index d110bab5..c34446fa 100644 --- a/charts/cloudzero-agent/Chart.yaml +++ b/charts/cloudzero-agent/Chart.yaml @@ -11,7 +11,7 @@ dependencies: - name: kube-state-metrics version: "5.15.*" repository: https://prometheus-community.github.io/helm-charts - condition: kube-state-metrics.enabled + condition: kubeStateMetrics.enabled - name: prometheus-node-exporter version: "4.24.*" repository: https://prometheus-community.github.io/helm-charts diff --git a/charts/cloudzero-agent/README.md b/charts/cloudzero-agent/README.md index 62781e3f..ce85625f 100644 --- a/charts/cloudzero-agent/README.md +++ b/charts/cloudzero-agent/README.md @@ -144,50 +144,6 @@ kube-state-metrics: repository: my-custom-kube-state-metrics/kube-state-metrics ``` -### Custom Scrape Configs - -If running without the default `kube-state-metrics` exporter subchart and your existing `kube-state-metrics` deployment does not have the required `prometheus.io/scrape: "true"`, adjust the Prometheus scrape configs as shown: - -`values-override.yaml` -```yaml -prometheusConfig: - scrapeJobs: - kubeStateMetrics: - enabled: false # this disables the default kube-state-metrics scrape job, which will be replaced by an entry in additionalScrapeJobs - additionalScrapeJobs: - - job_name: custom-kube-state-metrics - honor_timestamps: true - scrape_interval: 1m - scrape_timeout: 10s - metrics_path: /metrics - static_configs: - - targets: - - 'my-kube-state-metrics-service.default.svc.cluster.local:8080' - relabel_configs: - - separator: ; - regex: __meta_kubernetes_service_label_(.+) - replacement: $1 - action: labelmap - - source_labels: [__meta_kubernetes_namespace] - separator: ; - regex: (.*) - target_label: namespace - replacement: $1 - action: replace - - source_labels: [__meta_kubernetes_service_name] - separator: ; - regex: (.*) - target_label: service - replacement: $1 - action: replace - - source_labels: [__meta_kubernetes_pod_node_name] - separator: ; - regex: (.*) - target_label: node - replacement: $1 - action: replace -``` - ### Exporting Pod Labels Pod labels can be exported as metrics using kube-state-metrics. To customize the labels for export, modify the values-override.yaml file as shown below: diff --git a/charts/cloudzero-agent/templates/cm.yaml b/charts/cloudzero-agent/templates/cm.yaml index e73e816d..9e501c72 100644 --- a/charts/cloudzero-agent/templates/cm.yaml +++ b/charts/cloudzero-agent/templates/cm.yaml @@ -18,11 +18,10 @@ data: scrape_interval: {{ .Values.prometheusConfig.globalScrapeInterval }} scrape_configs: {{- if .Values.prometheusConfig.scrapeJobs.kubeStateMetrics.enabled }} - - job_name: cloudzero-service-endpoints # kube_*, node_* metrics - honor_labels: true + - job_name: static-kube-state-metrics honor_timestamps: true track_timestamps_staleness: false - scrape_interval: {{ .Values.prometheusConfig.scrapeJobs.kubeStateMetrics.scrapeInterval }} + scrape_interval: 1m scrape_timeout: 10s scrape_protocols: - OpenMetricsText1.0.0 @@ -34,38 +33,6 @@ data: follow_redirects: true enable_http2: true relabel_configs: - - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] - separator: ; - regex: "true" - replacement: $1 - action: keep - - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape_slow] - separator: ; - regex: "true" - replacement: $1 - action: drop - - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme] - separator: ; - regex: (https?) - target_label: __scheme__ - replacement: $1 - action: replace - - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path] - separator: ; - regex: (.+) - target_label: __metrics_path__ - replacement: $1 - action: replace - - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port] - separator: ; - regex: (.+?)(?::\d+)?;(\d+) - target_label: __address__ - replacement: $1:$2 - action: replace - - separator: ; - regex: __meta_kubernetes_service_annotation_prometheus_io_param_(.+) - replacement: __param_$1 - action: labelmap - separator: ; regex: __meta_kubernetes_service_label_(.+) replacement: $1 @@ -92,13 +59,13 @@ data: - source_labels: [__name__] regex: "^({{ join "|" .Values.kubeMetrics }})$" action: keep - - action: labelkeep - regex: "^({{ include "cloudzero-agent.requiredMetricLabels" . }})$" - kubernetes_sd_configs: - - role: endpoints - kubeconfig_file: "" - follow_redirects: true - enable_http2: true + - separator: ; + regex: ^(board_asset_tag|container|created_by_kind|created_by_name|image|instance|name|namespace|node|node_kubernetes_io_instance_type|pod|product_name|provider_id|resource|unit|uid|_.*|label_.*|app.kubernetes.io/*|k8s.*)$ + replacement: $1 + action: labelkeep + static_configs: + - targets: + - {{ printf "%s-kube-state-metrics.%s.svc.cluster.local:%d" .Release.Name .Release.Namespace (int .Values.kubeStateMetrics.service.port) }} {{- end }} {{- if .Values.prometheusConfig.scrapeJobs.cadvisor.enabled }} - job_name: cloudzero-nodes-cadvisor # container_* metrics diff --git a/charts/cloudzero-agent/values.yaml b/charts/cloudzero-agent/values.yaml index 7dafe61c..7036e306 100644 --- a/charts/cloudzero-agent/values.yaml +++ b/charts/cloudzero-agent/values.yaml @@ -47,7 +47,7 @@ prometheusConfig: # -- Any items added to this list will be added to the Prometheus scrape configuration. additionalScrapeJobs: [] -kube-state-metrics: +kubeStateMetrics: enabled: true fullnameOverride: "cloudzero-state-metrics" extraArgs: