diff --git a/observability/README.md b/observability/README.md new file mode 100644 index 0000000..4f1e7c6 --- /dev/null +++ b/observability/README.md @@ -0,0 +1,28 @@ + +# Observability Stack + +This directory contains an observability implementation based on Grafana tooling + +## Caveats +1) reliance on ref-implementation for SSO + - This is possible to work around by removing the `auth.generic_oauth` section from `prometheus.yaml` and removing the `grafana-config.yaml` and `grafana-external-secret.yaml` files +2) using `tls_skip_verify_insecure` for oauth + - This is due to using the ingress certificate. Once this is addressed, we can remove this +3) Bigger memory requirement required for kind cluster + - Due to using a more robust loki deployment, the memory limits have been increased. 16 GB seems to work while leaving ample room in the cluster. + +## Components +The observability stack is built upon: +- Prometheus - metrics +- Loki - logging + - Promtail - log delivery +- Opencost - cost accounting +- Grafana - visualization +- Alertmanager - alerting + +## Installation +Note: The stack is configured to use Keycloak for SSO; therefore, the ref-implementation is required for this to work. + +`idpbuilder create --use-path-routing --package-dir ./ref-implementation --package-dir ./observability` + +A `grafana-config` job will be deployed into the keycloak namespace to create/patch some of the keycloak components. If deployed at the same time as the `ref-implementation`, this job will fail until the `config` job succeeds. This is normal diff --git a/observability/loki.yaml b/observability/loki.yaml new file mode 100644 index 0000000..95727fb --- /dev/null +++ b/observability/loki.yaml @@ -0,0 +1,50 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: loki + namespace: argocd + labels: + env: dev + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + project: default + sources: + - repoURL: 'https://grafana.github.io/helm-charts' + targetRevision: 6.6.3 + helm: + releaseName: loki + values: | + deploymentMode: SingleBinary + loki: + commonConfig: + replication_factor: 1 + storage: + type: 'filesystem' + schemaConfig: + configs: + - from: "2024-01-01" + store: tsdb + index: + prefix: loki_index_ + period: 24h + object_store: filesystem # we're storing on filesystem so there's no real persistence here. + schema: v13 + singleBinary: + replicas: 1 + read: + replicas: 0 + backend: + replicas: 0 + write: + replicas: 0 + chart: loki + destination: + server: "https://kubernetes.default.svc" + namespace: monitoring + syncPolicy: + syncOptions: + - CreateNamespace=true + - ServerSideApply=true + automated: + selfHeal: true diff --git a/observability/opencost.yaml b/observability/opencost.yaml new file mode 100644 index 0000000..9ec6433 --- /dev/null +++ b/observability/opencost.yaml @@ -0,0 +1,33 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: opencost + namespace: argocd + labels: + env: dev + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + project: default + sources: + - repoURL: 'https://opencost.github.io/opencost-helm-chart' + targetRevision: 1.38.1 + helm: + releaseName: opencost + values: | + opencost: + prometheus: + internal: + serviceName: prometheus-kube-prometheus-prometheus + namespaceName: monitoring + port: 9090 + chart: opencost + destination: + server: "https://kubernetes.default.svc" + namespace: monitoring + syncPolicy: + syncOptions: + - CreateNamespace=true + - ServerSideApply=true + automated: + selfHeal: true diff --git a/observability/prometheus.yaml b/observability/prometheus.yaml new file mode 100644 index 0000000..841c0c9 --- /dev/null +++ b/observability/prometheus.yaml @@ -0,0 +1,59 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: prometheus + namespace: argocd + labels: + env: dev + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + project: default + sources: + - repoURL: 'https://prometheus-community.github.io/helm-charts' + targetRevision: 57.2.0 + helm: + releaseName: prometheus + values: | + grafana: + envFromSecret: grafana-oidc + additionalDataSources: + - name: loki + access: proxy + orgId: 1 + type: loki + url: http://loki-gateway + jsonData: + httpHeaderName1: X-Scope-OrgID + secureJsonData: + httpHeaderValue1: '1' + grafana.ini: + server: + root_url: https://cnoe.localtest.me:8443/grafana + serve_from_sub_path: true + auth.generic_oauth: + enabled: true + name: grafana + allow_sign_up: true + auth_url: https://cnoe.localtest.me:8443/keycloak/realms/cnoe/protocol/openid-connect/auth + token_url: https://cnoe.localtest.me:8443/keycloak/realms/cnoe/protocol/openid-connect/token + api_url: https://cnoe.localtest.me:8443/keycloak/realms/cnoe/protocol/openid-connect/userinfo + scopes: openid email profile offline_access roles + role_attribute_path: contains(resource_access.grafana.roles[*], 'admin') && 'GrafanaAdmin' || contains(resource_access.grafana.roles[*], 'admin') && 'Admin' || contains(resource_access.grafana.roles[*], 'editor') && 'Editor' || 'Viewer' + allow_assign_grafana_admin: true + role_attribute_strict: true + auto_login: true + tls_skip_verify_insecure: true + chart: kube-prometheus-stack + - repoURL: cnoe://prometheus + targetRevision: HEAD + path: "manifests" + destination: + server: "https://kubernetes.default.svc" + namespace: monitoring + syncPolicy: + syncOptions: + - CreateNamespace=true + - ServerSideApply=true + automated: + selfHeal: true diff --git a/observability/prometheus/manifests/grafana-config.yaml b/observability/prometheus/manifests/grafana-config.yaml new file mode 100644 index 0000000..71ae61e --- /dev/null +++ b/observability/prometheus/manifests/grafana-config.yaml @@ -0,0 +1,200 @@ +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: grafana-config-job + namespace: keycloak +data: + client-role-admin-payload.json: | + {"name": "admin"} + client-role-editor-payload.json: | + {"name": "editor"} + client-role-viewer-payload.json: | + {"name": "viewer"} + admin-role-assignment-payload.json: | + [ + { + "id": "$ADMIN_ROLE_ID", + "name": "admin" + } + ] + roles-mapper-payload.json: | + { + "id":"$CLIENT_ROLES_MAPPER_ID", + "name": "client roles", + "protocol":"openid-connect", + "protocolMapper":"oidc-usermodel-client-role-mapper", + "config": { + "access.token.claim":"true", + "claim.name":"resource_access.${client_id}.roles", + "jsonType.label":"String", + "multivalued":"true", + "id.token.claim": "true", + "userinfo.token.claim": "true" + } + } + grafana-client-payload.json: | + { + "protocol": "openid-connect", + "clientId": "grafana", + "name": "Grafana Client", + "description": "Used for Grafana SSO", + "publicClient": false, + "authorizationServicesEnabled": false, + "serviceAccountsEnabled": false, + "implicitFlowEnabled": false, + "directAccessGrantsEnabled": true, + "standardFlowEnabled": true, + "frontchannelLogout": true, + "attributes": { + "saml_idp_initiated_sso_url_name": "", + "oauth2.device.authorization.grant.enabled": false, + "oidc.ciba.grant.enabled": false + }, + "alwaysDisplayInConsole": false, + "rootUrl": "", + "baseUrl": "", + "redirectUris": [ + "https://cnoe.localtest.me:8443/grafana/login/generic_oauth" + ], + "webOrigins": [ + "/*" + ] + } + +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: grafana-config + namespace: keycloak +spec: + template: + metadata: + generateName: grafana-config + spec: + serviceAccountName: keycloak-config + restartPolicy: Never + volumes: + - name: keycloak-config + secret: + secretName: keycloak-config + - name: config-payloads + configMap: + name: grafana-config-job + containers: + - name: kubectl + image: docker.io/library/ubuntu:22.04 + volumeMounts: + - name: keycloak-config + readOnly: true + mountPath: "/var/secrets/" + - name: config-payloads + readOnly: true + mountPath: "/var/config/" + command: ["/bin/bash", "-c"] + args: + - | + #! /bin/bash + set -ex -o pipefail + apt -qq update && apt -qq install curl jq gettext-base -y + + curl -sS -LO "https://dl.k8s.io/release/v1.28.3//bin/linux/amd64/kubectl" + chmod +x kubectl + + echo "checking if we're ready to start" + set +e + ./kubectl get secret -n keycloak keycloak-clients &> /dev/null + if [ $? -ne 0 ]; then + exit 1 + fi + set -e + + ADMIN_PASSWORD=$(cat /var/secrets/KEYCLOAK_ADMIN_PASSWORD) + + KEYCLOAK_URL=http://keycloak.keycloak.svc.cluster.local:8080/keycloak + + KEYCLOAK_TOKEN=$(curl -sS --fail-with-body -X POST -H "Content-Type: application/x-www-form-urlencoded" \ + --data-urlencode "username=cnoe-admin" \ + --data-urlencode "password=${ADMIN_PASSWORD}" \ + --data-urlencode "grant_type=password" \ + --data-urlencode "client_id=admin-cli" \ + ${KEYCLOAK_URL}/realms/master/protocol/openid-connect/token | jq -e -r '.access_token') + + set +e + + curl --fail-with-body -H "Authorization: bearer ${KEYCLOAK_TOKEN}" "${KEYCLOAK_URL}/admin/realms/cnoe" &> /dev/null + if [ $? -ne 0 ]; then + exit 0 + fi + set -e + + echo "creating Grafana client" + curl -sS -H "Content-Type: application/json" \ + -H "Authorization: bearer ${KEYCLOAK_TOKEN}" \ + -X POST --data @/var/config/grafana-client-payload.json \ + ${KEYCLOAK_URL}/admin/realms/cnoe/clients + + CLIENT_ID=$(curl -sS -H "Content-Type: application/json" \ + -H "Authorization: bearer ${KEYCLOAK_TOKEN}" \ + -X GET ${KEYCLOAK_URL}/admin/realms/cnoe/clients | jq -e -r '.[] | select(.clientId == "grafana") | .id') + + CLIENT_SCOPE_GROUPS_ID=$(curl -sS -H "Content-Type: application/json" -H "Authorization: bearer ${KEYCLOAK_TOKEN}" -X GET ${KEYCLOAK_URL}/admin/realms/cnoe/client-scopes | jq -e -r '.[] | select(.name == "groups") | .id') + curl -sS -H "Content-Type: application/json" -H "Authorization: bearer ${KEYCLOAK_TOKEN}" -X PUT ${KEYCLOAK_URL}/admin/realms/cnoe/clients/${CLIENT_ID}/default-client-scopes/${CLIENT_SCOPE_GROUPS_ID} + + GRAFANA_CLIENT_SECRET=$(curl -sS -H "Content-Type: application/json" \ + -H "Authorization: bearer ${KEYCLOAK_TOKEN}" \ + -X GET ${KEYCLOAK_URL}/admin/realms/cnoe/clients/${CLIENT_ID} | jq -e -r '.secret') + + # Add Grafana roles to client + curl -sS -H "Content-Type: application/json" \ + -H "Authorization: bearer ${KEYCLOAK_TOKEN}" \ + -X POST --data @/var/config/client-role-admin-payload.json \ + ${KEYCLOAK_URL}/admin/realms/cnoe/clients/${CLIENT_ID}/roles + + curl -sS -H "Content-Type: application/json" \ + -H "Authorization: bearer ${KEYCLOAK_TOKEN}" \ + -X POST --data @/var/config/client-role-editor-payload.json \ + ${KEYCLOAK_URL}/admin/realms/cnoe/clients/${CLIENT_ID}/roles + + curl -sS -H "Content-Type: application/json" \ + -H "Authorization: bearer ${KEYCLOAK_TOKEN}" \ + -X POST --data @/var/config/client-role-viewer-payload.json \ + ${KEYCLOAK_URL}/admin/realms/cnoe/clients/${CLIENT_ID}/roles + + export ADMIN_ROLE_ID=$(curl -sS -H "Content-Type: application/json" \ + -H "Authorization: bearer ${KEYCLOAK_TOKEN}" "${KEYCLOAK_URL}/admin/realms/cnoe/clients/${CLIENT_ID}/roles/admin" | jq -r '.id') + + # Assign admin role to user1 + USER1_USERID=$(curl -sS -H "Content-Type: application/json" \ + -H "Authorization: bearer ${KEYCLOAK_TOKEN}" "${KEYCLOAK_URL}/admin/realms/cnoe/users?lastName=one" | jq -r '.[0].id') + + envsubst < /var/config/admin-role-assignment-payload.json | curl -k -sS -H 'Content-Type: application/json' \ + -H "Authorization: bearer ${KEYCLOAK_TOKEN}" \ + -X POST --data @- \ + ${KEYCLOAK_URL}/admin/realms/cnoe/users/${USER1_USERID}/role-mappings/clients/${CLIENT_ID} + + # Add role to token + CLIENT_SCOPE_ROLES_ID=$(curl -sS -H "Content-Type: application/json" \ + -H "Authorization: bearer ${KEYCLOAK_TOKEN}" \ + -X GET ${KEYCLOAK_URL}/admin/realms/cnoe/client-scopes | jq -e -r '.[] | select(.name == "roles") | .id') + + export CLIENT_ROLES_MAPPER_ID=$(curl -sS -H "Content-Type: application/json" \ + -H "Authorization: bearer ${KEYCLOAK_TOKEN}" \ + -X GET ${KEYCLOAK_URL}/admin/realms/cnoe/client-scopes/${CLIENT_SCOPE_ROLES_ID}/protocol-mappers/models | jq -e -r '.[] | select(.name == "client roles") | .id') + + cat /var/config/roles-mapper-payload.json | envsubst '$CLIENT_ROLES_MAPPER_ID' | curl -sS -H "Content-Type: application/json" \ + -H "Authorization: bearer ${KEYCLOAK_TOKEN}" \ + -X PUT --data @- \ + ${KEYCLOAK_URL}/admin/realms/cnoe/client-scopes/${CLIENT_SCOPE_ROLES_ID}/protocol-mappers/models/${CLIENT_ROLES_MAPPER_ID} + + ./kubectl patch secret -n keycloak keycloak-clients --type=json \ + -p='[{ + "op" : "add" , + "path" : "/data/GRAFANA_CLIENT_SECRET" , + "value" : "'$(echo -n "$GRAFANA_CLIENT_SECRET" | base64 -w 0)'" + },{ + "op" : "add" , + "path" : "/data/GRAFANA_CLIENT_ID" , + "value" : "'$(echo -n "grafana" | base64 -w 0)'" + }]' diff --git a/observability/prometheus/manifests/grafana-external-secret.yaml b/observability/prometheus/manifests/grafana-external-secret.yaml new file mode 100644 index 0000000..12975da --- /dev/null +++ b/observability/prometheus/manifests/grafana-external-secret.yaml @@ -0,0 +1,20 @@ +apiVersion: external-secrets.io/v1beta1 +kind: ExternalSecret +metadata: + name: keycloak-oidc + namespace: monitoring +spec: + secretStoreRef: + name: keycloak + kind: ClusterSecretStore + target: + name: grafana-oidc + data: + - secretKey: GF_AUTH_GENERIC_OAUTH_CLIENT_ID + remoteRef: + key: keycloak-clients + property: GRAFANA_CLIENT_ID + - secretKey: GF_AUTH_GENERIC_OAUTH_CLIENT_SECRET + remoteRef: + key: keycloak-clients + property: GRAFANA_CLIENT_SECRET diff --git a/observability/prometheus/manifests/grafana-ingress.yaml b/observability/prometheus/manifests/grafana-ingress.yaml new file mode 100644 index 0000000..8f65a6c --- /dev/null +++ b/observability/prometheus/manifests/grafana-ingress.yaml @@ -0,0 +1,32 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: grafana-ingress + namespace: monitoring + annotations: + nginx.ingress.kubernetes.io/backend-protocol: HTTP + nginx.ingress.kubernetes.io/rewrite-target: /grafana/$2 + nginx.ingress.kubernetes.io/use-regex: 'true' +spec: + ingressClassName: nginx + rules: + - host: cnoe.localtest.me + http: + paths: + - path: /grafana(/|$)(.*) + pathType: ImplementationSpecific + backend: + service: + name: prometheus-grafana + port: + number: 80 + - host: localhost + http: + paths: + - path: /grafana(/|$)(.*) + pathType: ImplementationSpecific + backend: + service: + name: prometheus-grafana + port: + number: 80 diff --git a/observability/promtail.yaml b/observability/promtail.yaml new file mode 100644 index 0000000..d02ae2b --- /dev/null +++ b/observability/promtail.yaml @@ -0,0 +1,31 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: promtail + namespace: argocd + labels: + env: dev + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + project: default + sources: + - repoURL: 'https://grafana.github.io/helm-charts' + targetRevision: 6.16.0 + helm: + releaseName: promtail + values: | + config: + clients: + - url: http://loki-gateway/loki/api/v1/push + tenant_id: 1 + chart: promtail + destination: + server: "https://kubernetes.default.svc" + namespace: monitoring + syncPolicy: + syncOptions: + - CreateNamespace=true + - ServerSideApply=true + automated: + selfHeal: true diff --git a/ref-implementation/keycloak/manifests/keycloak-config.yaml b/ref-implementation/keycloak/manifests/keycloak-config.yaml index 4bb098e..46950fc 100644 --- a/ref-implementation/keycloak/manifests/keycloak-config.yaml +++ b/ref-implementation/keycloak/manifests/keycloak-config.yaml @@ -100,7 +100,7 @@ data: user-user1.json: | { "username": "user1", - "email": "", + "email": "user1@noreply.com", "firstName": "user", "lastName": "one", "requiredActions": [], @@ -113,7 +113,7 @@ data: user-user2.json: | { "username": "user2", - "email": "", + "email": "user2@noreply.com", "firstName": "user", "lastName": "two", "requiredActions": [],