Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
Signed-off-by: Thibault Mange <[email protected]>
  • Loading branch information
thibaultmg committed Dec 18, 2024
1 parent c16ba00 commit 99a0442
Show file tree
Hide file tree
Showing 22 changed files with 19,887 additions and 0 deletions.
6 changes: 6 additions & 0 deletions cicd-scripts/dashcheck/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
.ONESHELL:
.PHONY: check-metrics
check-metrics:
./extract-dashboards-metrics.sh ../../operators/multiclusterobservability/manifests/base/grafana/nexus/acm | tr '\n' ',' > /tmp/dash-metrics
go run main.go --scrape-configs=../../operators/multiclusterobservability/manifests/base/grafana/nexus/acm/scrape-config.yaml --dashboard-metrics=$$(cat /tmp/dash-metrics)
rm /tmp/dash-metrics
19 changes: 19 additions & 0 deletions cicd-scripts/dashcheck/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@

Generate metrics count stats:

```bash
./extract-dashboards-metrics.sh | tr '\n' ' ' | xargs ./count-metrics.sh > metrics-stats.txt
```

Sort extracted metrics to identify highest cardinality ones:

```bash
sort -k2,2nr metrics-stats.txt | grep -v " 0"
```


Check prom rules
```bash
cat grafana/nexus/acm/prometheus-rule.yaml | yq '.spec' | promtool check rules
```

30 changes: 30 additions & 0 deletions cicd-scripts/dashcheck/count-metrics.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/bin/bash

set -e -o pipefail

# Ensure the script is executed with at least one metric
if [[ $# -lt 1 ]]; then
echo "Usage: $0 <metric1> [<metric2> ...]"
exit 1
fi

# Prometheus server URL (replace with your actual URL)
PROMETHEUS_URL="http://localhost:9090"

# Loop through each metric passed as an argument
for metric in "$@"; do
# Query Prometheus for the count of the metric
response=$(curl -s -G \
--data-urlencode "query=count(${metric})" \
"$PROMETHEUS_URL/api/v1/query")

# Extract the count from the JSON response
count=$(echo "$response" | jq -r '.data.result[0].value[1]')

# Handle cases where the metric doesn't exist
if [[ "$count" == "null" ]]; then
echo "$metric 0"
else
echo "$metric $count"
fi
done
30 changes: 30 additions & 0 deletions cicd-scripts/dashcheck/extract-dashboards-metrics.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/bin/bash

set -e -o pipefail

if [ $# -ne 1 ]; then
echo "Usage: $0 <directory>"
exit 1
fi

SEARCH_DIR="$1"

if [[ "$(uname)" == "Darwin" ]]; then
TMP_DIR="${TMPDIR:-/tmp}"
else
TMP_DIR="/tmp"
fi

OUTPUT_DIR="$TMP_DIR/grafana-dashboards"
mkdir -p "$OUTPUT_DIR"

find "$SEARCH_DIR" -name 'dash*.yaml' ! -name '*ocp311.yaml' -print0 | while IFS= read -r -d '' file; do
yq '.data | to_entries | .[0].value' "$file" > "$OUTPUT_DIR/$(basename "$file")"
done

files=$(find "$OUTPUT_DIR" -type f -print0 | xargs -0)
mimirtool analyze dashboard $files --output "$OUTPUT_DIR/dashboards-metrics.json"

cat "$OUTPUT_DIR/dashboards-metrics.json" | jq -r '.metricsUsed.[]'

rm -rf "$OUTPUT_DIR"
180 changes: 180 additions & 0 deletions cicd-scripts/dashcheck/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
package main

import (
"flag"
"fmt"
"os"
"slices"
"strings"

prometheusalpha1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1alpha1"
"github.com/prometheus/prometheus/promql/parser"
"sigs.k8s.io/yaml"
)

func main() {
scrapeConfigsArg := flag.String("scrape-configs", "", "Path to the comma separated scrape_configs")
dashboardMetricsArg := flag.String("dashboard-metrics", "", "Comma separated dashboard metrics")
flag.Parse()

if *scrapeConfigsArg == "" {
fmt.Println("Please provide the scrape_configs paths")
return
}

if *dashboardMetricsArg == "" {
fmt.Println("Please provide the dashboard metrics")
return
}

dashboardMetrics := strings.Split(*dashboardMetricsArg, ",")
dashboardMetrics = slices.DeleteFunc(dashboardMetrics, func(s string) bool { return s == "" })
if len(dashboardMetrics) == 0 {
fmt.Println("No dashboard metrics found")
os.Exit(1)
}

scrapeConfigsList, err := readScrapeConfigs(*scrapeConfigsArg)
if err != nil {
fmt.Println("Error reading scrape configs: ", err)
os.Exit(1)
}

if len(scrapeConfigsList) == 0 {
fmt.Println("No scrape configs found")
os.Exit(1)
}

collectedMetrics := []string{}
for _, scrapeConfig := range scrapeConfigsList {
if scrapeConfig == nil {
fmt.Println("Scrape config is nil")
os.Exit(1)
}

metrics, err := extractCollectedMetrics(scrapeConfig)
if err != nil {
fmt.Println("Error extracting metrics: ", err)
os.Exit(1)
}

if dups := getDuplicates(metrics); len(dups) > 0 {
fmt.Printf("Duplicate metrics found in %s: %v", scrapeConfig.Name, dups)
os.Exit(1)
}

collectedMetrics = append(collectedMetrics, metrics...)
}

if dups := getDuplicates(collectedMetrics); len(dups) > 0 {
fmt.Println("Duplicate metrics found in scrape configs: ", dups)
os.Exit(1)
}

added, removed := diff(dashboardMetrics, collectedMetrics)
if len(added) > 0 {
fmt.Println("Metrics found in scrape configs but not in dashboards: ", added)
os.Exit(1)
}

if len(removed) > 0 {
fmt.Println("Metrics found in dashboards but not in scrape configs: ", removed)
os.Exit(1)
}

greenCheckMark := "\033[32m" + "✓" + "\033[0m"
fmt.Println(greenCheckMark, "Scrape configs are collecting all dashboards metrics, not more. Good job!")
}

func readScrapeConfigs(scrapeConfigsPath string) ([]*prometheusalpha1.ScrapeConfig, error) {
paths := strings.Split(scrapeConfigsPath, ",")
ret := []*prometheusalpha1.ScrapeConfig{}
for _, path := range paths {
fmt.Println("Reading scrape config: ", path)
res, err := readScrapeConfig(path)
if err != nil {
return nil, err
}
ret = append(ret, res)
}

return ret, nil
}

func readScrapeConfig(scrapeConfigsPath string) (*prometheusalpha1.ScrapeConfig, error) {
fileData, err := os.ReadFile(scrapeConfigsPath)
if err != nil {
return nil, fmt.Errorf("failed to read file %s: %w", scrapeConfigsPath, err)
}

scrapeConfig := &prometheusalpha1.ScrapeConfig{}
if err := yaml.Unmarshal(fileData, scrapeConfig); err != nil {
return nil, fmt.Errorf("failed to unmarshal file %s: %w", scrapeConfigsPath, err)
}

return scrapeConfig, nil
}

func extractCollectedMetrics(scrapeConfig *prometheusalpha1.ScrapeConfig) ([]string, error) {
ret := []string{}
for _, query := range scrapeConfig.Spec.Params["match[]"] {
expr, err := parser.ParseExpr(query)
if err != nil {
return nil, fmt.Errorf("failed to parse query %s: %w", query, err)
}

switch v := expr.(type) {
case *parser.VectorSelector:
for _, matcher := range v.LabelMatchers {
if matcher.Name == "__name__" {
ret = append(ret, matcher.Value)
}
}
default:
return nil, fmt.Errorf("unsupported expression type: %T", v)
}
}

return ret, nil
}

func getDuplicates(elements []string) []string {
found := map[string]struct{}{}
ret := []string{}
for _, element := range elements {
if _, ok := found[element]; ok {
ret = append(ret, element)
} else {
found[element] = struct{}{}
}
}
return ret
}

func diff(a, b []string) (added, removed []string) {
mA := make(map[string]struct{}, len(a))
for _, x := range a {
mA[x] = struct{}{}
}

mB := make(map[string]struct{}, len(b))
for _, x := range b {
mB[x] = struct{}{}
}

// Identify elements in b that are not in a
for x := range mB {
if _, ok := mA[x]; !ok {
added = append(added, x)
}
}

// Identify elements in a that are not in b
for x := range mA {
if _, ok := mB[x]; !ok {
removed = append(removed, x)
}
}

return added, removed
}
Loading

0 comments on commit 99a0442

Please sign in to comment.