From 878a3dd8fee7b0978179560fc64adb330418c2f8 Mon Sep 17 00:00:00 2001 From: Ryan Crichton Date: Tue, 8 Nov 2022 12:36:20 +0200 Subject: [PATCH 01/18] Initial logging config --- .../roles/docker/files/docker-daemon.json | 3 +- monitoring/docker-compose.dev.yml | 12 + monitoring/docker-compose.yml | 70 ++ .../logging-universal-dashboard_rev1.json | 926 ++++++++++++++++++ monitoring/loki/loki-config-minio.yml | 20 + monitoring/loki/loki-config.yml | 65 ++ monitoring/package-metadata.json | 2 +- monitoring/promtail/promtail-config.yml | 47 + monitoring/swarm.sh | 5 +- 9 files changed, 1145 insertions(+), 5 deletions(-) create mode 100644 monitoring/grafana/dashboards/containers/logging-universal-dashboard_rev1.json create mode 100644 monitoring/loki/loki-config-minio.yml create mode 100644 monitoring/loki/loki-config.yml create mode 100644 monitoring/promtail/promtail-config.yml diff --git a/infrastructure/ansible/roles/docker/files/docker-daemon.json b/infrastructure/ansible/roles/docker/files/docker-daemon.json index b8ab8c9d..54d65077 100644 --- a/infrastructure/ansible/roles/docker/files/docker-daemon.json +++ b/infrastructure/ansible/roles/docker/files/docker-daemon.json @@ -2,6 +2,7 @@ "log-driver": "json-file", "log-opts": { "max-size": "10m", - "max-file": "3" + "max-file": "3", + "labels-regex": "^.+" } } diff --git a/monitoring/docker-compose.dev.yml b/monitoring/docker-compose.dev.yml index aaaf782a..751b8338 100644 --- a/monitoring/docker-compose.dev.yml +++ b/monitoring/docker-compose.dev.yml @@ -12,3 +12,15 @@ services: - target: 9090 published: 9090 mode: host + + loki: + ports: + - target: 3100 + published: 3100 + mode: host + + minio1: + ports: + - target: 9001 + published: 9001 + mode: host diff --git a/monitoring/docker-compose.yml b/monitoring/docker-compose.yml index 5cb5ded9..e77d94a6 100644 --- a/monitoring/docker-compose.yml +++ b/monitoring/docker-compose.yml @@ -26,6 +26,8 @@ services: source: kminion-groups_rev1.json - target: /etc/grafana/provisioning/dashboards/applications/kminion-topic_rev1.json source: kminion-topic_rev1.json + - target: /etc/grafana/provisioning/dashboards/containers/logging-universal-dashboard_rev1.json + source: logging-universal-dashboard_rev1.json prometheus: image: prom/prometheus:v2.38.0 @@ -71,6 +73,56 @@ services: - KAFKA_BROKER_LIST=kafka:9092 - KAFKA_COMPRESSION=gzip + loki: + image: grafana/loki:2.6.1 + volumes: + - loki_data:/tmp/loki + configs: + - target: /etc/loki/loki-config.yml + source: loki-config.yml + command: -config.file=/etc/loki/loki-config.yml + deploy: + labels: + - prometheus-job-service=loki + - prometheus-address=loki:3100 + + promtail: + image: grafana/promtail:2.6.1 + volumes: + - /var/lib/docker/containers:/host/containers + - /var/log:/var/log:ro + configs: + - target: /etc/promtail/promtail-config.yml + source: promtail-config.yml + command: -config.file=/etc/promtail/promtail-config.yml + deploy: + mode: global + + minio1: + image: quay.io/minio/minio:RELEASE.2022-10-24T18-35-07Z + entrypoint: sh + command: -c 'mkdir -p /data1/loki /data2/loki && minio server --console-address ":9001" http://minio{1...1}/data{1...2}' + environment: + MINIO_ROOT_USER: minioadmin + MINIO_ROOT_PASSWORD: minioadmin + healthcheck: + test: + [ + "CMD", + "curl", + "-f", + "http://localhost:9000/minio/health/live" + ] + interval: 30s + timeout: 20s + retries: 3 + hostname: minio1 + volumes: + - minio_data1:/data1 + - minio_data2:/data2 + deploy: + replicas: 1 + configs: grafana.ini: file: ./grafana/grafana.ini @@ -112,12 +164,30 @@ configs: name: kminion-topic_rev1.json-${kminion_topic_rev1_json_DIGEST:?err} labels: name: grafana + logging-universal-dashboard_rev1.json: + file: ./grafana/dashboards/containers/logging-universal-dashboard_rev1.json + name: logging-universal-dashboard_rev1.json-${logging_universal_dashboard_rev1_json_DIGEST:?err} + labels: + name: grafana prometheus.yml: file: ./prometheus/prometheus.yml name: prometheus.yml-${prometheus_yml_DIGEST:?err} labels: name: prometheus + loki-config.yml: + file: ./loki/loki-config.yml + name: loki-config.yml-${loki_config_yml_DIGEST:?err} + labels: + name: loki + promtail-config.yml: + file: ./promtail/promtail-config.yml + name: promtail-config.yml-${promtail_config_yml_DIGEST:?err} + labels: + name: promtail volumes: prometheus_data: grafana_data: + loki_data: + minio_data1: + minio_data2: diff --git a/monitoring/grafana/dashboards/containers/logging-universal-dashboard_rev1.json b/monitoring/grafana/dashboards/containers/logging-universal-dashboard_rev1.json new file mode 100644 index 00000000..8d3bf322 --- /dev/null +++ b/monitoring/grafana/dashboards/containers/logging-universal-dashboard_rev1.json @@ -0,0 +1,926 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "description": "Universal and flexible dashboard for logging", + "editable": true, + "fiscalYearStartMonth": 0, + "gnetId": 12611, + "graphTooltip": 0, + "id": 11, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "loki", + "uid": "P00201832B18B88C3" + }, + "description": "Total Count of log lines in the specified time range", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "color": "rgb(31, 255, 7)", + "text": "0" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgb(31, 255, 7)", + "value": null + }, + { + "color": "rgb(31, 255, 7)", + "value": 10 + }, + { + "color": "rgb(31, 255, 7)", + "value": 50 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 11, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "sum" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.1.0", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "P00201832B18B88C3" + }, + "editorMode": "code", + "expr": "sum(count_over_time(({swarm_service_name=\"$service_name\", stream=~\"$stream\", swarm_task_name=~\"$task_name\"})[$__interval]))", + "hide": false, + "queryType": "range", + "refId": "A" + } + ], + "title": "Total Count of logs", + "type": "stat" + }, + { + "datasource": { + "type": "loki", + "uid": "P00201832B18B88C3" + }, + "description": "Total Count: of $searchable_pattern in the specified time range", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "color": "rgb(222, 15, 43)", + "text": "0" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgb(222, 15, 43)", + "value": null + }, + { + "color": "rgb(222, 15, 43)", + "value": 10 + }, + { + "color": "rgb(222, 15, 43)", + "value": 50 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 6, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "sum" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.1.0", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "P00201832B18B88C3" + }, + "editorMode": "code", + "expr": "sum(count_over_time(({swarm_service_name=\"$service_name\", stream=~\"$stream\", swarm_task_name=~\"$task_name\"} |~ \"(?i)$searchable_pattern\")[$__interval]))", + "hide": false, + "queryType": "range", + "refId": "A" + } + ], + "title": "Total Count: of \"$searchable_pattern\"", + "type": "stat" + }, + { + "datasource": { + "type": "loki", + "uid": "P00201832B18B88C3" + }, + "description": "Live logs is a like 'tail -f | grep' in a real time", + "gridPos": { + "h": 22, + "w": 24, + "x": 0, + "y": 3 + }, + "id": 2, + "options": { + "dedupStrategy": "none", + "enableLogDetails": true, + "prettifyLogMessage": false, + "showCommonLabels": true, + "showLabels": false, + "showTime": true, + "sortOrder": "Descending", + "wrapLogMessage": true + }, + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "P00201832B18B88C3" + }, + "editorMode": "code", + "expr": "{swarm_service_name=\"$service_name\", stream=~\"$stream\", swarm_task_name=~\"$task_name\"} |~ \"(?i)$searchable_pattern\"", + "hide": false, + "queryType": "range", + "refId": "A" + } + ], + "title": "Live logs (filtered by \"$searchable_pattern\")", + "type": "logs" + }, + { + "datasource": { + "type": "loki", + "uid": "P00201832B18B88C3" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [] + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 7, + "x": 0, + "y": 25 + }, + "id": 19, + "links": [], + "maxDataPoints": 100, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "7.0.4", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "P00201832B18B88C3" + }, + "editorMode": "code", + "expr": "sum(count_over_time(({swarm_service_name=\"$service_name\", stream=~\"$stream\", swarm_task_name=~\"$task_name\"})[$__interval])) by (stream)", + "hide": false, + "queryType": "range", + "refId": "A" + } + ], + "title": "Total count of stderr / stdout pie", + "type": "piechart" + }, + { + "datasource": { + "type": "loki", + "uid": "P00201832B18B88C3" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [] + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 7, + "y": 25 + }, + "id": 20, + "interval": "1m", + "links": [], + "maxDataPoints": "", + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "pieType": "donut", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "6.4.3", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "P00201832B18B88C3" + }, + "editorMode": "code", + "expr": "sum(count_over_time(({swarm_service_name=\"$service_name\", stream=~\"$stream\", swarm_task_name=~\"$task_name\"} |~ \"(?i)$searchable_pattern\")[$__interval])) by (swarm_task_name)", + "queryType": "range", + "refId": "A" + } + ], + "title": "Matched word: \"$searchable_pattern\" donut", + "type": "piechart" + }, + { + "datasource": { + "type": "loki", + "uid": "P00201832B18B88C3" + }, + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "color": "#299c46", + "text": "0" + } + }, + "type": "special" + } + ], + "max": 100, + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#299c46", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 10 + }, + { + "color": "#C4162A", + "value": 50 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 5, + "x": 19, + "y": 25 + }, + "id": 9, + "links": [], + "maxDataPoints": 100, + "options": { + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": false + }, + "pluginVersion": "9.1.0", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "P00201832B18B88C3" + }, + "editorMode": "code", + "expr": "sum(count_over_time(({swarm_service_name=\"$service_name\", stream=~\"$stream\", swarm_task_name=~\"$task_name\"} |~ \"(?i)$searchable_pattern\")[$__interval])) * 100 / sum(count_over_time(({swarm_service_name=\"$service_name\", stream=~\"$stream\", swarm_task_name=~\"$task_name\"})[$__interval]))", + "hide": false, + "queryType": "range", + "refId": "A" + } + ], + "title": "\"$searchable_pattern\" Percentage for specified time", + "type": "gauge" + }, + { + "datasource": { + "type": "loki", + "uid": "P00201832B18B88C3" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Count", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 3, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 32 + }, + "id": 18, + "interval": "1m", + "links": [], + "maxDataPoints": "", + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.1.0", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "P00201832B18B88C3" + }, + "editorMode": "code", + "expr": "sum(count_over_time(({swarm_service_name=\"$service_name\", stream=~\"$stream\", swarm_task_name=~\"$task_name\"} |~ \"(?i)$searchable_pattern\")[$__interval])) by (swarm_task_name)", + "queryType": "range", + "refId": "A" + } + ], + "title": "Matched word: \"$searchable_pattern\" historical", + "type": "timeseries" + }, + { + "datasource": { + "type": "loki", + "uid": "P00201832B18B88C3" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 100, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 10, + "type": "log" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 32 + }, + "id": 10, + "links": [], + "maxDataPoints": 100, + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.1.0", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "P00201832B18B88C3" + }, + "editorMode": "code", + "expr": "sum(rate(({swarm_service_name=\"$service_name\", stream=~\"$stream\", swarm_task_name=~\"$task_name\"} |~ \"(?i)$searchable_pattern\")[30s])) by (swarm_task_name)", + "hide": false, + "queryType": "range", + "refId": "A" + } + ], + "title": "\"$searchable_pattern\" Rate per Task", + "type": "timeseries" + }, + { + "datasource": { + "type": "loki", + "uid": "P00201832B18B88C3" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 40, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 6, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "always", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "{stream=\"stderr\"} stderr" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#C4162A", + "mode": "fixed" + } + }, + { + "id": "custom.lineWidth", + "value": 2 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "{stream=\"stdout\"} stdout" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#56A64B", + "mode": "fixed" + } + }, + { + "id": "custom.lineWidth", + "value": 2 + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 41 + }, + "id": 7, + "links": [], + "maxDataPoints": 100, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "9.1.0", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "P00201832B18B88C3" + }, + "editorMode": "code", + "expr": "sum(count_over_time(({swarm_service_name=\"$service_name\", stream=~\"$stream\", swarm_task_name=~\"$task_name\"})[$__interval])) by (stream)", + "hide": false, + "queryType": "range", + "refId": "A" + } + ], + "title": "Count of stderr / stdout historical", + "type": "timeseries" + } + ], + "refresh": "10s", + "schemaVersion": 37, + "style": "dark", + "tags": [ + "Loki", + "logging" + ], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "instant_prometheus-kafka-adapter", + "value": "instant_prometheus-kafka-adapter" + }, + "datasource": { + "type": "loki", + "uid": "P00201832B18B88C3" + }, + "definition": "label_values({swarm_service_name=~\".+\"}, swarm_service_name)", + "hide": 0, + "includeAll": false, + "label": "Service", + "multi": false, + "name": "service_name", + "options": [], + "query": "label_values({swarm_service_name=~\".+\"}, swarm_service_name)", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "loki", + "uid": "P00201832B18B88C3" + }, + "definition": "label_values({swarm_service_name=\"$service_name\"}, swarm_task_name)", + "hide": 0, + "includeAll": true, + "label": "Task", + "multi": true, + "name": "task_name", + "options": [], + "query": "label_values({swarm_service_name=\"$service_name\"}, swarm_task_name)", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "loki", + "uid": "P00201832B18B88C3" + }, + "definition": "label_values({swarm_service_name=\"$service_name\"}, stream)", + "hide": 0, + "includeAll": true, + "label": "Stream", + "multi": false, + "name": "stream", + "options": [], + "query": "label_values({swarm_service_name=\"$service_name\"}, stream)", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": false, + "text": "error", + "value": "error" + }, + "hide": 0, + "label": "Search (case insensitive)", + "name": "searchable_pattern", + "options": [ + { + "selected": true, + "text": "error", + "value": "error" + } + ], + "query": "error", + "skipUrlSync": false, + "type": "textbox" + } + ] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "", + "title": "Container logs", + "uid": "fRIvzUZMf", + "version": 4, + "weekStart": "" +} diff --git a/monitoring/loki/loki-config-minio.yml b/monitoring/loki/loki-config-minio.yml new file mode 100644 index 00000000..46020dac --- /dev/null +++ b/monitoring/loki/loki-config-minio.yml @@ -0,0 +1,20 @@ +storage_config: + aws: + # Note: use a fully qualified domain name, like localhost. + # full example: http://loki:supersecret@localhost.:9000 + s3: http://:@: + s3forcepathstyle: true + boltdb_shipper: + active_index_directory: /loki/boltdb-shipper-active + cache_location: /loki/boltdb-shipper-cache + shared_store: s3 + +schema_config: + configs: + - from: 2020-07-01 + store: boltdb-shipper + object_store: aws + schema: v11 + index: + prefix: index_ + period: 24h diff --git a/monitoring/loki/loki-config.yml b/monitoring/loki/loki-config.yml new file mode 100644 index 00000000..83abdf9a --- /dev/null +++ b/monitoring/loki/loki-config.yml @@ -0,0 +1,65 @@ +auth_enabled: false + +server: + http_listen_port: 3100 + grpc_listen_port: 9096 + grpc_server_max_concurrent_streams: 0 + +ingester: + wal: + enabled: true + dir: /tmp/wal + lifecycler: + address: 127.0.0.1 + ring: + kvstore: + store: inmemory + replication_factor: 1 + final_sleep: 0s + chunk_idle_period: 1h + max_chunk_age: 1h + chunk_target_size: 1048576 + chunk_retain_period: 30s + max_transfer_retries: 0 + +schema_config: + configs: + - from: 2020-07-01 + store: boltdb-shipper + object_store: aws + schema: v11 + index: + prefix: index_ + period: 24h + +storage_config: + boltdb_shipper: + active_index_directory: /loki/boltdb-shipper-active + cache_location: /loki/boltdb-shipper-cache + resync_interval: 30s + shared_store: s3 + aws: + # Note: use a fully qualified domain name, like localhost. + # full example: http://loki:supersecret@localhost.:9000 + s3: http://minioadmin:minioadmin@minio1.:9000/loki + s3forcepathstyle: true + +compactor: + working_directory: /loki/boltdb-shipper-compactor + shared_store: s3 + +limits_config: + reject_old_samples: true + reject_old_samples_max_age: 168h + ingestion_rate_mb: 100 + ingestion_burst_size_mb: 150 + max_concurrent_tail_requests: 200 + max_cache_freshness_per_query: 10m + max_streams_per_user: 500 + +chunk_store_config: + max_look_back_period: 0s + +table_manager: + retention_deletes_enabled: false + retention_period: 0s diff --git a/monitoring/package-metadata.json b/monitoring/package-metadata.json index 6532c538..7c155e7f 100644 --- a/monitoring/package-metadata.json +++ b/monitoring/package-metadata.json @@ -4,7 +4,7 @@ "description": "A package for monitoring the platform services", "type": "infrastructure", "version": "0.0.1", - "dependencies": ["message-bus-kafka"], + "dependencies": [], "environmentVariables": { "STATEFUL_NODES": "single", "GF_SECURITY_ADMIN_USER": "admin", diff --git a/monitoring/promtail/promtail-config.yml b/monitoring/promtail/promtail-config.yml new file mode 100644 index 00000000..1cce8fe2 --- /dev/null +++ b/monitoring/promtail/promtail-config.yml @@ -0,0 +1,47 @@ +server: + http_listen_address: 0.0.0.0 + http_listen_port: 9080 + +positions: + filename: /tmp/positions.yaml + +clients: +- url: http://loki:3100/loki/api/v1/push + +scrape_configs: + +- job_name: containers + static_configs: + - targets: + - localhost + labels: + job: containerlogs + __path__: /host/containers/*/*log + + pipeline_stages: + - json: + expressions: + log: log + stream: stream + time: time + tag: attrs.tag + stack_name: attrs."com.docker.stack.namespace" + swarm_service_name: attrs."com.docker.swarm.service.name" + swarm_task_name: attrs."com.docker.swarm.task.name" + swarm_node_id: attrs."com.docker.swarm.node.id" + - regex: + expression: "^/host/containers/(?P.{12}).+/.+-json.log$" + source: filename + - timestamp: + format: RFC3339Nano + source: time + - labels: + stream: + container_id: + tag: + stack_name: + swarm_service_name: + swarm_task_name: + swarm_node_id: + - output: + source: log diff --git a/monitoring/swarm.sh b/monitoring/swarm.sh index 800707e7..c4174ff2 100644 --- a/monitoring/swarm.sh +++ b/monitoring/swarm.sh @@ -14,12 +14,11 @@ ROOT_PATH="${COMPOSE_FILE_PATH}/.." . "${ROOT_PATH}/utils/docker-utils.sh" . "${ROOT_PATH}/utils/log.sh" - if [[ "${MODE}" == "dev" ]]; then - log info "Running Message Bus Kafka package in DEV mode" + log info "Running Monitoring package in DEV mode" monitoring_dev_compose_param="-c ${COMPOSE_FILE_PATH}/docker-compose.dev.yml" else - log info "Running Message Bus Kafka package in PROD mode" + log info "Running Monitoring package in PROD mode" monitoring_dev_compose_param="" fi From a647f81d388b2a70e41eceaab22e25c6257eb582 Mon Sep 17 00:00:00 2001 From: Ryan Crichton Date: Fri, 20 Jan 2023 11:13:59 +0200 Subject: [PATCH 02/18] Add minio cluster config --- monitoring/docker-compose.cluster.yml | 97 +++++++++++++++++++++++++++ monitoring/docker-compose.yml | 12 ++-- monitoring/loki/loki-config-minio.yml | 20 ------ monitoring/loki/loki-config.yml | 4 +- monitoring/package-metadata.json | 6 +- 5 files changed, 108 insertions(+), 31 deletions(-) delete mode 100644 monitoring/loki/loki-config-minio.yml diff --git a/monitoring/docker-compose.cluster.yml b/monitoring/docker-compose.cluster.yml index 93a25cc2..f428404d 100644 --- a/monitoring/docker-compose.cluster.yml +++ b/monitoring/docker-compose.cluster.yml @@ -29,6 +29,97 @@ services: - '--web.console.templates=/etc/prometheus/consoles' - '--web.enable-lifecycle' + minio1: + command: -c 'mkdir -p /data1/loki /data2/loki && minio server --console-address ":9001" http://minio{1...4}/data{1...2}' + deploy: + placement: + constraints: + - "node.labels.name==node-1" + + minio2: + image: quay.io/minio/minio:RELEASE.2022-10-24T18-35-07Z + entrypoint: sh + command: -c 'mkdir -p /data1/loki /data2/loki && minio server --console-address ":9001" http://minio{1...4}/data{1...2}' + environment: + MINIO_ROOT_USER: ${MO_SECURITY_ADMIN_USER} + MINIO_ROOT_PASSWORD: ${MO_SECURITY_ADMIN_PASSWORD} + healthcheck: + test: + [ + "CMD", + "curl", + "-f", + "http://localhost:9000/minio/health/live" + ] + interval: 30s + timeout: 20s + retries: 3 + hostname: minio2 + volumes: + - minio2_data1:/data1 + - minio2_data2:/data2 + deploy: + placement: + constraints: + - "node.labels.name==node-1" + replicas: 1 + + minio3: + image: quay.io/minio/minio:RELEASE.2022-10-24T18-35-07Z + entrypoint: sh + command: -c 'mkdir -p /data1/loki /data2/loki && minio server --console-address ":9001" http://minio{1...4}/data{1...2}' + environment: + MINIO_ROOT_USER: ${MO_SECURITY_ADMIN_USER} + MINIO_ROOT_PASSWORD: ${MO_SECURITY_ADMIN_PASSWORD} + healthcheck: + test: + [ + "CMD", + "curl", + "-f", + "http://localhost:9000/minio/health/live" + ] + interval: 30s + timeout: 20s + retries: 3 + hostname: minio3 + volumes: + - minio3_data1:/data1 + - minio3_data2:/data2 + deploy: + placement: + constraints: + - "node.labels.name==node-2" + replicas: 1 + + minio4: + image: quay.io/minio/minio:RELEASE.2022-10-24T18-35-07Z + entrypoint: sh + command: -c 'mkdir -p /data1/loki /data2/loki && minio server --console-address ":9001" http://minio{1...4}/data{1...2}' + environment: + MINIO_ROOT_USER: ${MO_SECURITY_ADMIN_USER} + MINIO_ROOT_PASSWORD: ${MO_SECURITY_ADMIN_PASSWORD} + healthcheck: + test: + [ + "CMD", + "curl", + "-f", + "http://localhost:9000/minio/health/live" + ] + interval: 30s + timeout: 20s + retries: 3 + hostname: minio4 + volumes: + - minio4_data1:/data1 + - minio4_data2:/data2 + deploy: + placement: + constraints: + - "node.labels.name==node-3" + replicas: 1 + configs: prometheus.yml: file: ./prometheus/prometheus.yml @@ -38,3 +129,9 @@ configs: volumes: prometheus_data_backup: + minio2_data1: + minio2_data2: + minio3_data1: + minio3_data2: + minio4_data1: + minio4_data2: diff --git a/monitoring/docker-compose.yml b/monitoring/docker-compose.yml index 153162d1..516a33ae 100644 --- a/monitoring/docker-compose.yml +++ b/monitoring/docker-compose.yml @@ -126,8 +126,8 @@ services: entrypoint: sh command: -c 'mkdir -p /data1/loki /data2/loki && minio server --console-address ":9001" http://minio{1...1}/data{1...2}' environment: - MINIO_ROOT_USER: minioadmin - MINIO_ROOT_PASSWORD: minioadmin + MINIO_ROOT_USER: ${MO_SECURITY_ADMIN_USER} + MINIO_ROOT_PASSWORD: ${MO_SECURITY_ADMIN_PASSWORD} healthcheck: test: [ @@ -141,8 +141,8 @@ services: retries: 3 hostname: minio1 volumes: - - minio_data1:/data1 - - minio_data2:/data2 + - minio1_data1:/data1 + - minio1_data2:/data2 deploy: replicas: 1 @@ -212,5 +212,5 @@ volumes: prometheus_data: grafana_data: loki_data: - minio_data1: - minio_data2: + minio1_data1: + minio1_data2: diff --git a/monitoring/loki/loki-config-minio.yml b/monitoring/loki/loki-config-minio.yml deleted file mode 100644 index 46020dac..00000000 --- a/monitoring/loki/loki-config-minio.yml +++ /dev/null @@ -1,20 +0,0 @@ -storage_config: - aws: - # Note: use a fully qualified domain name, like localhost. - # full example: http://loki:supersecret@localhost.:9000 - s3: http://:@: - s3forcepathstyle: true - boltdb_shipper: - active_index_directory: /loki/boltdb-shipper-active - cache_location: /loki/boltdb-shipper-cache - shared_store: s3 - -schema_config: - configs: - - from: 2020-07-01 - store: boltdb-shipper - object_store: aws - schema: v11 - index: - prefix: index_ - period: 24h diff --git a/monitoring/loki/loki-config.yml b/monitoring/loki/loki-config.yml index 83abdf9a..070f8267 100644 --- a/monitoring/loki/loki-config.yml +++ b/monitoring/loki/loki-config.yml @@ -39,9 +39,7 @@ storage_config: resync_interval: 30s shared_store: s3 aws: - # Note: use a fully qualified domain name, like localhost. - # full example: http://loki:supersecret@localhost.:9000 - s3: http://minioadmin:minioadmin@minio1.:9000/loki + s3: http://${MO_SECURITY_ADMIN_USER}:${MO_SECURITY_ADMIN_PASSWORD}@minio1.:9000/loki s3forcepathstyle: true compactor: diff --git a/monitoring/package-metadata.json b/monitoring/package-metadata.json index 48798832..c8671042 100644 --- a/monitoring/package-metadata.json +++ b/monitoring/package-metadata.json @@ -6,7 +6,7 @@ "version": "0.0.1", "dependencies": [], "environmentVariables": { - "STATEFUL_NODES": "single", + "STATEFUL_NODES": "cluster", "GF_SECURITY_ADMIN_USER": "admin", "GF_SECURITY_ADMIN_PASSWORD": "dev_password_only", "GF_SMTP_ENABLED": "false", @@ -20,6 +20,8 @@ "KC_GRAFANA_CLIENT_ID": "grafana-oauth", "KC_GRAFANA_CLIENT_SECRET": "CV14QfwnpYFj1IH5dK5lScPNCYAIYP1c", "GF_SERVER_DOMAIN": "localhost", - "GF_SERVER_ROOT_URL": "http://localhost:3000" + "GF_SERVER_ROOT_URL": "http://localhost:3000", + "MO_SECURITY_ADMIN_USER": "admin", + "MO_SECURITY_ADMIN_PASSWORD": "dev_password_only" } } From d145b84de9f4b67371c4b37ee8d167cff5e693c8 Mon Sep 17 00:00:00 2001 From: Ryan Crichton Date: Fri, 20 Jan 2023 11:31:53 +0200 Subject: [PATCH 03/18] Fix loki config env vars --- monitoring/docker-compose.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/monitoring/docker-compose.yml b/monitoring/docker-compose.yml index 516a33ae..4a4c7ed8 100644 --- a/monitoring/docker-compose.yml +++ b/monitoring/docker-compose.yml @@ -100,10 +100,13 @@ services: image: grafana/loki:2.6.1 volumes: - loki_data:/tmp/loki + environment: + MO_SECURITY_ADMIN_USER: ${MO_SECURITY_ADMIN_USER} + MO_SECURITY_ADMIN_PASSWORD: ${MO_SECURITY_ADMIN_PASSWORD} configs: - target: /etc/loki/loki-config.yml source: loki-config.yml - command: -config.file=/etc/loki/loki-config.yml + command: -config.file=/etc/loki/loki-config.yml -config.expand-env=true deploy: labels: - prometheus-job-service=loki From ce3b9a68a864755380d9983f353b1f6a8c6a9610 Mon Sep 17 00:00:00 2001 From: Ryan Crichton Date: Fri, 20 Jan 2023 12:18:03 +0200 Subject: [PATCH 04/18] Fix minio cluster config --- monitoring/docker-compose.yml | 2 +- monitoring/swarm.sh | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/monitoring/docker-compose.yml b/monitoring/docker-compose.yml index 4a4c7ed8..5c0a07ee 100644 --- a/monitoring/docker-compose.yml +++ b/monitoring/docker-compose.yml @@ -127,7 +127,7 @@ services: minio1: image: quay.io/minio/minio:RELEASE.2022-10-24T18-35-07Z entrypoint: sh - command: -c 'mkdir -p /data1/loki /data2/loki && minio server --console-address ":9001" http://minio{1...1}/data{1...2}' + command: -c 'mkdir -p /data1/loki /data2/loki && minio server --console-address ":9001" http://minio{1...${NUM_MINIO_SERVERS}}/data{1...2}' environment: MINIO_ROOT_USER: ${MO_SECURITY_ADMIN_USER} MINIO_ROOT_PASSWORD: ${MO_SECURITY_ADMIN_PASSWORD} diff --git a/monitoring/swarm.sh b/monitoring/swarm.sh index ce4ba5dc..66d4c178 100644 --- a/monitoring/swarm.sh +++ b/monitoring/swarm.sh @@ -56,6 +56,9 @@ function initialize_package() { if [[ "${CLUSTERED_MODE}" == "true" ]]; then monitoring_cluster_compose_filename="docker-compose.cluster.yml" + export NUM_MINIO_SERVERS=4 + else + export NUM_MINIO_SERVERS=1 fi if [[ "${MODE}" == "dev" ]]; then From 8ff0a5de6af3f4a5b4cf79098be03adcbe156391 Mon Sep 17 00:00:00 2001 From: Ryan Crichton Date: Fri, 20 Jan 2023 12:50:10 +0200 Subject: [PATCH 05/18] Remove unnecessary command from minio config --- monitoring/docker-compose.cluster.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/monitoring/docker-compose.cluster.yml b/monitoring/docker-compose.cluster.yml index f428404d..59bfe293 100644 --- a/monitoring/docker-compose.cluster.yml +++ b/monitoring/docker-compose.cluster.yml @@ -30,7 +30,6 @@ services: - '--web.enable-lifecycle' minio1: - command: -c 'mkdir -p /data1/loki /data2/loki && minio server --console-address ":9001" http://minio{1...4}/data{1...2}' deploy: placement: constraints: From 411edc07d95857df2a9f779c608e460ae20839a2 Mon Sep 17 00:00:00 2001 From: Ryan Crichton Date: Fri, 20 Jan 2023 16:36:58 +0200 Subject: [PATCH 06/18] Add nginx config for minio --- .env.cluster | 2 +- .../http-minio-secure.conf | 31 +++++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) create mode 100644 reverse-proxy-nginx/package-conf-secure/http-minio-secure.conf diff --git a/.env.cluster b/.env.cluster index 33aec3b6..03a0e74a 100644 --- a/.env.cluster +++ b/.env.cluster @@ -28,7 +28,7 @@ REPMGR_PARTNER_NODES=santempi-psql-1,santempi-psql-2,santempi-psql-3 # Reverse Proxy - Nginx REVERSE_PROXY_INSTANCES=3 DOMAIN_NAME=domain -SUBDOMAINS=openhimcomms.domain,openhimcore.domain,openhimconsole.domain,kibana.domain,reports.domain,santewww.domain,santempi.domain,superset.domain,keycloak.domain,grafana.domain +SUBDOMAINS=openhimcomms.domain,openhimcore.domain,openhimconsole.domain,kibana.domain,reports.domain,santewww.domain,santempi.domain,superset.domain,keycloak.domain,grafana.domain,minio.domain STAGING=true INSECURE=false diff --git a/reverse-proxy-nginx/package-conf-secure/http-minio-secure.conf b/reverse-proxy-nginx/package-conf-secure/http-minio-secure.conf new file mode 100644 index 00000000..d6f6efd9 --- /dev/null +++ b/reverse-proxy-nginx/package-conf-secure/http-minio-secure.conf @@ -0,0 +1,31 @@ +server { + listen 80; + server_name minio.*; + + location /.well-known/acme-challenge/ { + resolver 127.0.0.11 valid=30s; + set $upstream_certbot certbot; + proxy_pass http://$upstream_certbot$request_uri; + } + + location / { + return 301 https://$host$request_uri; + } +} +server { + listen 443 ssl; + listen [::]:443 ssl; + server_name minio.*; + + location /.well-known/acme-challenge/ { + resolver 127.0.0.11 valid=30s; + set $upstream_certbot certbot; + proxy_pass http://$upstream_certbot$request_uri; + } + + location / { + resolver 127.0.0.11 valid=30s; + set $upstream_minio minio1; + proxy_pass http://$upstream_minio:9001; + } +} From 0f871253826389251001dd06ae4864e2f7faafb2 Mon Sep 17 00:00:00 2001 From: Ryan Crichton Date: Thu, 26 Jan 2023 15:38:07 +0200 Subject: [PATCH 07/18] Apply suggestions from code review Co-authored-by: Michael Loosen <24909683+michaelloosen@users.noreply.github.com> --- monitoring/docker-compose.dev.yml | 2 +- .../containers/logging-universal-dashboard_rev1.json | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/monitoring/docker-compose.dev.yml b/monitoring/docker-compose.dev.yml index 3da46417..7ee03b07 100644 --- a/monitoring/docker-compose.dev.yml +++ b/monitoring/docker-compose.dev.yml @@ -22,5 +22,5 @@ services: minio1: ports: - target: 9001 - published: 9001 + published: 9005 mode: host diff --git a/monitoring/grafana/dashboards/containers/logging-universal-dashboard_rev1.json b/monitoring/grafana/dashboards/containers/logging-universal-dashboard_rev1.json index 8d3bf322..1229002d 100644 --- a/monitoring/grafana/dashboards/containers/logging-universal-dashboard_rev1.json +++ b/monitoring/grafana/dashboards/containers/logging-universal-dashboard_rev1.json @@ -35,7 +35,7 @@ "type": "loki", "uid": "P00201832B18B88C3" }, - "description": "Total Count of log lines in the specified time range", + "description": "Total count of log lines in the specified time range", "fieldConfig": { "defaults": { "color": { @@ -111,7 +111,7 @@ "refId": "A" } ], - "title": "Total Count of logs", + "title": "Total count of logs", "type": "stat" }, { From 5b05bf49334e76d50a1b2ee588c54e089f52fbb2 Mon Sep 17 00:00:00 2001 From: Ryan Crichton Date: Thu, 26 Jan 2023 16:15:54 +0200 Subject: [PATCH 08/18] Address feedback: naming changes, improved service destruction --- monitoring/docker-compose.cluster.yml | 44 +++++++++++++-------------- monitoring/docker-compose.dev.yml | 2 +- monitoring/docker-compose.yml | 26 ++++++++-------- monitoring/loki/loki-config.yml | 2 +- monitoring/swarm.sh | 12 ++++++-- 5 files changed, 46 insertions(+), 40 deletions(-) diff --git a/monitoring/docker-compose.cluster.yml b/monitoring/docker-compose.cluster.yml index 59bfe293..a1d307d0 100644 --- a/monitoring/docker-compose.cluster.yml +++ b/monitoring/docker-compose.cluster.yml @@ -29,16 +29,16 @@ services: - '--web.console.templates=/etc/prometheus/consoles' - '--web.enable-lifecycle' - minio1: + minio-01: deploy: placement: constraints: - "node.labels.name==node-1" - minio2: + minio-02: image: quay.io/minio/minio:RELEASE.2022-10-24T18-35-07Z entrypoint: sh - command: -c 'mkdir -p /data1/loki /data2/loki && minio server --console-address ":9001" http://minio{1...4}/data{1...2}' + command: -c 'mkdir -p /data1/loki /data2/loki && minio server --console-address ":9001" http://minio-0{1...4}/data{1...2}' environment: MINIO_ROOT_USER: ${MO_SECURITY_ADMIN_USER} MINIO_ROOT_PASSWORD: ${MO_SECURITY_ADMIN_PASSWORD} @@ -53,20 +53,20 @@ services: interval: 30s timeout: 20s retries: 3 - hostname: minio2 + hostname: minio-02 volumes: - - minio2_data1:/data1 - - minio2_data2:/data2 + - minio-02-data1:/data1 + - minio-02-data2:/data2 deploy: placement: constraints: - "node.labels.name==node-1" replicas: 1 - minio3: + minio-03: image: quay.io/minio/minio:RELEASE.2022-10-24T18-35-07Z entrypoint: sh - command: -c 'mkdir -p /data1/loki /data2/loki && minio server --console-address ":9001" http://minio{1...4}/data{1...2}' + command: -c 'mkdir -p /data1/loki /data2/loki && minio server --console-address ":9001" http://minio-0{1...4}/data{1...2}' environment: MINIO_ROOT_USER: ${MO_SECURITY_ADMIN_USER} MINIO_ROOT_PASSWORD: ${MO_SECURITY_ADMIN_PASSWORD} @@ -81,20 +81,20 @@ services: interval: 30s timeout: 20s retries: 3 - hostname: minio3 + hostname: minio-03 volumes: - - minio3_data1:/data1 - - minio3_data2:/data2 + - minio-03-data1:/data1 + - minio-03-data2:/data2 deploy: placement: constraints: - "node.labels.name==node-2" replicas: 1 - minio4: + minio-04: image: quay.io/minio/minio:RELEASE.2022-10-24T18-35-07Z entrypoint: sh - command: -c 'mkdir -p /data1/loki /data2/loki && minio server --console-address ":9001" http://minio{1...4}/data{1...2}' + command: -c 'mkdir -p /data1/loki /data2/loki && minio server --console-address ":9001" http://minio-0{1...4}/data{1...2}' environment: MINIO_ROOT_USER: ${MO_SECURITY_ADMIN_USER} MINIO_ROOT_PASSWORD: ${MO_SECURITY_ADMIN_PASSWORD} @@ -109,10 +109,10 @@ services: interval: 30s timeout: 20s retries: 3 - hostname: minio4 + hostname: minio-04 volumes: - - minio4_data1:/data1 - - minio4_data2:/data2 + - minio-04-data1:/data1 + - minio-04-data2:/data2 deploy: placement: constraints: @@ -128,9 +128,9 @@ configs: volumes: prometheus_data_backup: - minio2_data1: - minio2_data2: - minio3_data1: - minio3_data2: - minio4_data1: - minio4_data2: + minio-02-data1: + minio-02-data2: + minio-03-data1: + minio-03-data2: + minio-04-data1: + minio-04-data2: diff --git a/monitoring/docker-compose.dev.yml b/monitoring/docker-compose.dev.yml index 7ee03b07..38b4c708 100644 --- a/monitoring/docker-compose.dev.yml +++ b/monitoring/docker-compose.dev.yml @@ -19,7 +19,7 @@ services: published: 3100 mode: host - minio1: + minio-01: ports: - target: 9001 published: 9005 diff --git a/monitoring/docker-compose.yml b/monitoring/docker-compose.yml index 5c0a07ee..6f1699bb 100644 --- a/monitoring/docker-compose.yml +++ b/monitoring/docker-compose.yml @@ -4,7 +4,7 @@ services: grafana: image: grafana/grafana-oss:9.2.3 volumes: - - grafana_data:/var/lib/grafana + - grafana-data:/var/lib/grafana environment: GF_SECURITY_ADMIN_USER: ${GF_SECURITY_ADMIN_USER} GF_SECURITY_ADMIN_PASSWORD: ${GF_SECURITY_ADMIN_PASSWORD} @@ -56,7 +56,7 @@ services: image: prom/prometheus:v2.38.0 user: root volumes: - - prometheus_data:/prometheus + - prometheus-data:/prometheus - /var/run/docker.sock:/var/run/docker.sock:ro configs: - target: /etc/prometheus/prometheus.yml @@ -99,7 +99,7 @@ services: loki: image: grafana/loki:2.6.1 volumes: - - loki_data:/tmp/loki + - loki-data:/tmp/loki environment: MO_SECURITY_ADMIN_USER: ${MO_SECURITY_ADMIN_USER} MO_SECURITY_ADMIN_PASSWORD: ${MO_SECURITY_ADMIN_PASSWORD} @@ -124,10 +124,10 @@ services: deploy: mode: global - minio1: + minio-01: image: quay.io/minio/minio:RELEASE.2022-10-24T18-35-07Z entrypoint: sh - command: -c 'mkdir -p /data1/loki /data2/loki && minio server --console-address ":9001" http://minio{1...${NUM_MINIO_SERVERS}}/data{1...2}' + command: -c 'mkdir -p /data1/loki /data2/loki && minio server --console-address ":9001" http://minio-0{1...${NUM_MINIO_SERVERS}}/data{1...2}' environment: MINIO_ROOT_USER: ${MO_SECURITY_ADMIN_USER} MINIO_ROOT_PASSWORD: ${MO_SECURITY_ADMIN_PASSWORD} @@ -142,10 +142,10 @@ services: interval: 30s timeout: 20s retries: 3 - hostname: minio1 + hostname: minio-01 volumes: - - minio1_data1:/data1 - - minio1_data2:/data2 + - minio-01-data1:/data1 + - minio-01-data2:/data2 deploy: replicas: 1 @@ -212,8 +212,8 @@ configs: name: promtail volumes: - prometheus_data: - grafana_data: - loki_data: - minio1_data1: - minio1_data2: + prometheus-data: + grafana-data: + loki-data: + minio-01-data1: + minio-01-data2: diff --git a/monitoring/loki/loki-config.yml b/monitoring/loki/loki-config.yml index 070f8267..40fd5c11 100644 --- a/monitoring/loki/loki-config.yml +++ b/monitoring/loki/loki-config.yml @@ -39,7 +39,7 @@ storage_config: resync_interval: 30s shared_store: s3 aws: - s3: http://${MO_SECURITY_ADMIN_USER}:${MO_SECURITY_ADMIN_PASSWORD}@minio1.:9000/loki + s3: http://${MO_SECURITY_ADMIN_USER}:${MO_SECURITY_ADMIN_PASSWORD}@minio-01.:9000/loki s3forcepathstyle: true compactor: diff --git a/monitoring/swarm.sh b/monitoring/swarm.sh index 66d4c178..80aa19bf 100644 --- a/monitoring/swarm.sh +++ b/monitoring/swarm.sh @@ -22,17 +22,23 @@ function init_vars() { "grafana" "prometheus" "prometheus-kafka-adapter" + "loki" + "minio-01" ) if [[ "${CLUSTERED_MODE}" == "true" ]]; then SCALED_SERVICES=( "${SCALED_SERVICES[@]}" "prometheus_backup" + "minio-02" + "minio-03" + "minio-04" ) fi SERVICE_NAMES=( "${SCALED_SERVICES[@]}" "cadvisor" "node-exporter" + "promtail" ) readonly ACTION @@ -80,19 +86,19 @@ function initialize_package() { function scale_services_down() { docker::scale_services_down "${SCALED_SERVICES[@]}" - docker::service_destroy "cadvisor" "node-exporter" + docker::service_destroy "cadvisor" "node-exporter" "promtail" } function destroy_package() { docker::service_destroy "${SERVICE_NAMES[@]}" - docker::try_remove_volume prometheus_data grafana_data + docker::try_remove_volume prometheus-data grafana-data loki-data minio-01-data1 minio-01-data2 prometheus_data_backup if [[ $CLUSTERED_MODE == "true" ]]; then log warn "Volumes are only deleted on the host on which the command is run. Monitoring volumes on other nodes are not deleted" fi - docker::prune_configs "grafana" "prometheus" + docker::prune_configs "grafana" "prometheus" "promtail" "loki" } main() { From 50d181648a8c6c9dbcbd5091f95e361e989ff9d5 Mon Sep 17 00:00:00 2001 From: Ryan Crichton Date: Wed, 8 Feb 2023 16:42:23 +0200 Subject: [PATCH 09/18] Remove prometheus kafka adapter We a backup prometheus instance so we don't need this anymore. --- monitoring/docker-compose.yml | 6 ------ monitoring/prometheus/prometheus.yml | 3 --- monitoring/swarm.sh | 1 - 3 files changed, 10 deletions(-) diff --git a/monitoring/docker-compose.yml b/monitoring/docker-compose.yml index 6f1699bb..67037abe 100644 --- a/monitoring/docker-compose.yml +++ b/monitoring/docker-compose.yml @@ -90,12 +90,6 @@ services: deploy: mode: global - prometheus-kafka-adapter: - image: telefonica/prometheus-kafka-adapter:1.8.0 - environment: - - KAFKA_BROKER_LIST=kafka:9092 - - KAFKA_COMPRESSION=gzip - loki: image: grafana/loki:2.6.1 volumes: diff --git a/monitoring/prometheus/prometheus.yml b/monitoring/prometheus/prometheus.yml index b327a3bf..e0e71c5e 100644 --- a/monitoring/prometheus/prometheus.yml +++ b/monitoring/prometheus/prometheus.yml @@ -74,6 +74,3 @@ scrape_configs: # Use the prometheus-job Swarm label as Prometheus job label. - source_labels: [__meta_dockerswarm_service_label_prometheus_job_task] target_label: job - -remote_write: - - url: "http://prometheus-kafka-adapter:8080/receive" diff --git a/monitoring/swarm.sh b/monitoring/swarm.sh index 80aa19bf..2fe61ec3 100644 --- a/monitoring/swarm.sh +++ b/monitoring/swarm.sh @@ -21,7 +21,6 @@ function init_vars() { SCALED_SERVICES=( "grafana" "prometheus" - "prometheus-kafka-adapter" "loki" "minio-01" ) From 84e8290dc0692d7caca0c5bc90d9ba0fa99b84e4 Mon Sep 17 00:00:00 2001 From: Ryan Crichton Date: Wed, 8 Feb 2023 16:43:29 +0200 Subject: [PATCH 10/18] Add insecure config for minio --- .../package-conf-insecure/http-minio-insecure.conf | 9 +++++++++ .../package-conf-secure/http-minio-secure.conf | 2 +- reverse-proxy-nginx/package-metadata.json | 2 +- 3 files changed, 11 insertions(+), 2 deletions(-) create mode 100644 reverse-proxy-nginx/package-conf-insecure/http-minio-insecure.conf diff --git a/reverse-proxy-nginx/package-conf-insecure/http-minio-insecure.conf b/reverse-proxy-nginx/package-conf-insecure/http-minio-insecure.conf new file mode 100644 index 00000000..f0f6be12 --- /dev/null +++ b/reverse-proxy-nginx/package-conf-insecure/http-minio-insecure.conf @@ -0,0 +1,9 @@ +server { + listen 9001; + + location / { + resolver 127.0.0.11 valid=30s; + set $upstream_minio minio-01; + proxy_pass http://$upstream_minio:9001; + } +} diff --git a/reverse-proxy-nginx/package-conf-secure/http-minio-secure.conf b/reverse-proxy-nginx/package-conf-secure/http-minio-secure.conf index d6f6efd9..3defe27e 100644 --- a/reverse-proxy-nginx/package-conf-secure/http-minio-secure.conf +++ b/reverse-proxy-nginx/package-conf-secure/http-minio-secure.conf @@ -25,7 +25,7 @@ server { location / { resolver 127.0.0.11 valid=30s; - set $upstream_minio minio1; + set $upstream_minio minio-01; proxy_pass http://$upstream_minio:9001; } } diff --git a/reverse-proxy-nginx/package-metadata.json b/reverse-proxy-nginx/package-metadata.json index cbd0d183..a5904958 100644 --- a/reverse-proxy-nginx/package-metadata.json +++ b/reverse-proxy-nginx/package-metadata.json @@ -16,6 +16,6 @@ "RENEWAL_EMAIL": "dummy@jembi.org", "STAGING": "true", "INSECURE": "true", - "INSECURE_PORTS": "5001:5001-80:80-8080:8080-5601:5601-5488:5488-3000:3000-9200:9200-8089:8089" + "INSECURE_PORTS": "5001:5001-80:80-8080:8080-5601:5601-5488:5488-3000:3000-9200:9200-8089:8089-9001:9001" } } From 1406616f24842ca3ff2714cacd17c350d5a7b73d Mon Sep 17 00:00:00 2001 From: Ryan Crichton Date: Wed, 8 Feb 2023 16:44:09 +0200 Subject: [PATCH 11/18] Add package deploy tests for monitoring package --- monitoring/swarm.sh | 3 +- .../kafka-packages.cluster.feature | 2 -- .../features/cluster-mode/monitoring.feature | 36 +++++++++++++++++++ .../single-mode/kafka-packages.feature | 18 +--------- .../features/single-mode/monitoring.feature | 29 +++++++++++++++ test/cucumber/package.json | 4 ++- 6 files changed, 71 insertions(+), 21 deletions(-) create mode 100644 test/cucumber/features/cluster-mode/monitoring.feature create mode 100644 test/cucumber/features/single-mode/monitoring.feature diff --git a/monitoring/swarm.sh b/monitoring/swarm.sh index 2fe61ec3..4be59627 100644 --- a/monitoring/swarm.sh +++ b/monitoring/swarm.sh @@ -91,9 +91,10 @@ function scale_services_down() { function destroy_package() { docker::service_destroy "${SERVICE_NAMES[@]}" - docker::try_remove_volume prometheus-data grafana-data loki-data minio-01-data1 minio-01-data2 prometheus_data_backup + docker::try_remove_volume prometheus-data grafana-data minio-01-data1 minio-01-data2 prometheus_data_backup loki-data if [[ $CLUSTERED_MODE == "true" ]]; then + docker::try_remove_volume minio-02-data1 minio-02-data2 log warn "Volumes are only deleted on the host on which the command is run. Monitoring volumes on other nodes are not deleted" fi diff --git a/test/cucumber/features/cluster-mode/kafka-packages.cluster.feature b/test/cucumber/features/cluster-mode/kafka-packages.cluster.feature index 4b7ab997..08481d54 100644 --- a/test/cucumber/features/cluster-mode/kafka-packages.cluster.feature +++ b/test/cucumber/features/cluster-mode/kafka-packages.cluster.feature @@ -18,7 +18,6 @@ Feature: Kafka and its dependent packages? When I launch the platform with params Then The service "grafana" should be started with 1 replica And The service "prometheus" should be started with 1 replica - And The service "prometheus-kafka-adapter" should be started with 1 replica And The service "prometheus_backup" should be started with 1 replica And The service "cadvisor" should be started with 3 replicas And The service "node-exporter" should be started with 3 replicas @@ -35,7 +34,6 @@ Feature: Kafka and its dependent packages? And The service "kafka-minion" should be removed And The service "grafana" should be removed And The service "prometheus" should be removed - And The service "prometheus-kafka-adapter" should be removed And The service "prometheus_backup" should be removed And The service "cadvisor" should be removed And The service "node-exporter" should be removed diff --git a/test/cucumber/features/cluster-mode/monitoring.feature b/test/cucumber/features/cluster-mode/monitoring.feature new file mode 100644 index 00000000..cf98465f --- /dev/null +++ b/test/cucumber/features/cluster-mode/monitoring.feature @@ -0,0 +1,36 @@ +Feature: Monitoring package? + Does the Monitoring package work as expected + + Scenario: Init Monitoring + Given I use parameters "init monitoring --only --dev --env-file=.env.cluster" + When I launch the platform with params + Then The service "grafana" should be started with 1 replica + And The service "prometheus" should be started with 1 replica + And The service "cadvisor" should be started with 3 replica + And The service "node-exporter" should be started with 3 replica + And The service "loki" should be started with 1 replica + And The service "promtail" should be started with 3 replica + And The service "minio-01" should be started with 1 replica + And The service "minio-02" should be started with 1 replica + And The service "minio-03" should be started with 1 replica + And The service "minio-04" should be started with 1 replica + And The service "prometheus_backup" should be started with 1 replica + And There should be 8 volumes + + Scenario: Destroy Monitoring package + Given I use parameters "destroy monitoring --dev --env-file=.env.cluster" + When I launch the platform with params + Then The service "grafana" should be removed + And The service "prometheus" should be removed + And The service "cadvisor" should be removed + And The service "node-exporter" should be removed + And The service "loki" should be removed + And The service "promtail" should be removed + And The service "minio-01" should be removed + And The service "minio-02" should be removed + And The service "minio-03" should be removed + And The service "minio-04" should be removed + And The service "prometheus_backup" should be removed + And There should be 0 service + And There should be 0 volume + And There should be 0 config diff --git a/test/cucumber/features/single-mode/kafka-packages.feature b/test/cucumber/features/single-mode/kafka-packages.feature index 5c0853c3..1ccfebb1 100644 --- a/test/cucumber/features/single-mode/kafka-packages.feature +++ b/test/cucumber/features/single-mode/kafka-packages.feature @@ -22,19 +22,8 @@ Feature: Kafka and its dependent packages? When I launch the platform with params Then The service "kafka-unbundler-consumer" should be started with 1 replica - Scenario: Init Monitoring - Given I use parameters "init monitoring --only --dev --env-file=.env.local" - When I launch the platform with params - Then The service "grafana" should be started with 1 replica - And The service "prometheus" should be started with 1 replica - And The service "prometheus-kafka-adapter" should be started with 1 replica - And The service "cadvisor" should be started with 1 replica - And The service "node-exporter" should be started with 1 replica - And The service "cadvisor" should have healthy containers - And There should be 4 volumes - Scenario: Destroy Kafka and its dependent packages - Given I use parameters "destroy kafka-mapper-consumer kafka-unbundler-consumer monitoring --dev --env-file=.env.local" + Given I use parameters "destroy kafka-mapper-consumer kafka-unbundler-consumer --dev --env-file=.env.local" When I launch the platform with params Then The service "zookeeper-1" should be removed And The service "kafka" should be removed @@ -42,11 +31,6 @@ Feature: Kafka and its dependent packages? And The service "kafka-minion" should be removed And The service "kafka-mapper-consumer" should be removed And The service "kafka-unbundler-consumer" should be removed - And The service "grafana" should be removed - And The service "prometheus" should be removed - And The service "prometheus-kafka-adapter" should be removed - And The service "cadvisor" should be removed - And The service "node-exporter" should be removed And There should be 0 service And There should be 0 volume And There should be 0 config diff --git a/test/cucumber/features/single-mode/monitoring.feature b/test/cucumber/features/single-mode/monitoring.feature new file mode 100644 index 00000000..41171ad3 --- /dev/null +++ b/test/cucumber/features/single-mode/monitoring.feature @@ -0,0 +1,29 @@ +Feature: Monitoring package? + Does the Monitoring package work as expected + + Scenario: Init Monitoring + Given I use parameters "init monitoring --only --dev --env-file=.env.local" + When I launch the platform with params + Then The service "grafana" should be started with 1 replica + And The service "prometheus" should be started with 1 replica + And The service "cadvisor" should be started with 1 replica + And The service "node-exporter" should be started with 1 replica + And The service "cadvisor" should have healthy containers + And The service "loki" should be started with 1 replica + And The service "promtail" should be started with 1 replica + And The service "minio-01" should be started with 1 replica + And There should be 6 volumes + + Scenario: Destroy Monitoring package + Given I use parameters "destroy monitoring --dev --env-file=.env.cluster" + When I launch the platform with params + Then The service "grafana" should be removed + And The service "prometheus" should be removed + And The service "cadvisor" should be removed + And The service "node-exporter" should be removed + And The service "loki" should be removed + And The service "promtail" should be removed + And The service "minio-01" should be removed + And There should be 0 service + And There should be 0 volume + And There should be 0 config diff --git a/test/cucumber/package.json b/test/cucumber/package.json index e819aa85..0bbf4878 100644 --- a/test/cucumber/package.json +++ b/test/cucumber/package.json @@ -23,7 +23,9 @@ "test:cluster:keycloak": "cucumber-js 'features/cluster-mode/keycloak.cluster.feature'", "test:single:jsreport": "cucumber-js 'features/single-mode/jsreport.feature'", "test:single:superset": "cucumber-js 'features/single-mode/superset.feature'", - "test:single:mpi-mediator": "cucumber-js 'features/single-mode/mpi-mediator.feature'" + "test:single:mpi-mediator": "cucumber-js 'features/single-mode/mpi-mediator.feature'", + "test:single:monitoring": "cucumber-js 'features/single-mode/monitoring.feature'", + "test:cluster:monitoring": "cucumber-js 'features/cluster-mode/monitoring.feature'" }, "devDependencies": { "@cucumber/cucumber": "8.5.0", From dd271dfbfcf57af5df1cbd14ddbcacfdbb5205b3 Mon Sep 17 00:00:00 2001 From: Ryan Crichton Date: Wed, 8 Feb 2023 17:07:12 +0200 Subject: [PATCH 12/18] Fix tests --- monitoring/swarm.sh | 1 + test/cucumber/features/cluster-mode/monitoring.feature | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/monitoring/swarm.sh b/monitoring/swarm.sh index 4be59627..6f6c9f0f 100644 --- a/monitoring/swarm.sh +++ b/monitoring/swarm.sh @@ -94,6 +94,7 @@ function destroy_package() { docker::try_remove_volume prometheus-data grafana-data minio-01-data1 minio-01-data2 prometheus_data_backup loki-data if [[ $CLUSTERED_MODE == "true" ]]; then + sleep 5 docker::try_remove_volume minio-02-data1 minio-02-data2 log warn "Volumes are only deleted on the host on which the command is run. Monitoring volumes on other nodes are not deleted" fi diff --git a/test/cucumber/features/cluster-mode/monitoring.feature b/test/cucumber/features/cluster-mode/monitoring.feature index cf98465f..9fa9936e 100644 --- a/test/cucumber/features/cluster-mode/monitoring.feature +++ b/test/cucumber/features/cluster-mode/monitoring.feature @@ -15,7 +15,7 @@ Feature: Monitoring package? And The service "minio-03" should be started with 1 replica And The service "minio-04" should be started with 1 replica And The service "prometheus_backup" should be started with 1 replica - And There should be 8 volumes + And There should be 7 volumes Scenario: Destroy Monitoring package Given I use parameters "destroy monitoring --dev --env-file=.env.cluster" From 0f33ba7b7688ce74fc294fb4c9e9b79953406168 Mon Sep 17 00:00:00 2001 From: Ryan Crichton Date: Thu, 9 Feb 2023 10:10:38 +0200 Subject: [PATCH 13/18] Fix kafka cluster tests --- .../cluster-mode/kafka-packages.cluster.feature | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/test/cucumber/features/cluster-mode/kafka-packages.cluster.feature b/test/cucumber/features/cluster-mode/kafka-packages.cluster.feature index 08481d54..69642261 100644 --- a/test/cucumber/features/cluster-mode/kafka-packages.cluster.feature +++ b/test/cucumber/features/cluster-mode/kafka-packages.cluster.feature @@ -13,18 +13,8 @@ Feature: Kafka and its dependent packages? And The service "message-bus-kafka-config-importer" should be removed And There should be 6 services - Scenario: Init Monitoring - Given I use parameters "init monitoring --only --dev --env-file=.env.cluster" - When I launch the platform with params - Then The service "grafana" should be started with 1 replica - And The service "prometheus" should be started with 1 replica - And The service "prometheus_backup" should be started with 1 replica - And The service "cadvisor" should be started with 3 replicas - And The service "node-exporter" should be started with 3 replicas - And The service "cadvisor" should have healthy containers - Scenario: Destroy Kafka and its dependent packages - Given I use parameters "destroy monitoring --dev --env-file=.env.cluster" + Given I use parameters "destroy message-bus-kafka --dev --env-file=.env.cluster" When I launch the platform with params Then The service "zookeeper-1" should be removed And The service "zookeeper-2" should be removed @@ -32,11 +22,6 @@ Feature: Kafka and its dependent packages? And The service "kafka" should be removed And The service "kafdrop" should be removed And The service "kafka-minion" should be removed - And The service "grafana" should be removed - And The service "prometheus" should be removed - And The service "prometheus_backup" should be removed - And The service "cadvisor" should be removed - And The service "node-exporter" should be removed And There should be 0 service And There should be 0 volume And There should be 0 config From 126b7073c8ec9c5a492d8f47aec24caa81d20bd5 Mon Sep 17 00:00:00 2001 From: Ryan Crichton Date: Thu, 23 Feb 2023 14:53:43 +0200 Subject: [PATCH 14/18] Only run monitoring test when monitoring files have changed --- .github/workflows/run-tests.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/run-tests.sh b/.github/workflows/run-tests.sh index 193a2de5..c7226d97 100755 --- a/.github/workflows/run-tests.sh +++ b/.github/workflows/run-tests.sh @@ -32,6 +32,8 @@ else DOCKER_HOST=ssh://ubuntu@$GITHUB_RUN_ID.jembi.cloud yarn test:"$NODE_MODE":hapi elif [[ $folder_name == *"santempi"* ]]; then DOCKER_HOST=ssh://ubuntu@$GITHUB_RUN_ID.jembi.cloud yarn test:"$NODE_MODE":sante + elif [[ $folder_name == *"monitoring"* ]]; then + DOCKER_HOST=ssh://ubuntu@$GITHUB_RUN_ID.jembi.cloud yarn test:"$NODE_MODE":monitoring fi done fi From 61be63abc4980bf0b21ef533ef27b634f3bec87d Mon Sep 17 00:00:00 2001 From: Ryan Crichton Date: Thu, 23 Feb 2023 15:04:24 +0200 Subject: [PATCH 15/18] Migrate monitoring tests to new CLI v2 --- test/cucumber/features/cluster-mode/monitoring.feature | 4 ++-- test/cucumber/features/single-mode/monitoring.feature | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/test/cucumber/features/cluster-mode/monitoring.feature b/test/cucumber/features/cluster-mode/monitoring.feature index 9fa9936e..7170a50a 100644 --- a/test/cucumber/features/cluster-mode/monitoring.feature +++ b/test/cucumber/features/cluster-mode/monitoring.feature @@ -2,7 +2,7 @@ Feature: Monitoring package? Does the Monitoring package work as expected Scenario: Init Monitoring - Given I use parameters "init monitoring --only --dev --env-file=.env.cluster" + Given I use parameters "package init -n=monitoring --only --dev --env-file=.env.cluster" When I launch the platform with params Then The service "grafana" should be started with 1 replica And The service "prometheus" should be started with 1 replica @@ -18,7 +18,7 @@ Feature: Monitoring package? And There should be 7 volumes Scenario: Destroy Monitoring package - Given I use parameters "destroy monitoring --dev --env-file=.env.cluster" + Given I use parameters "package destroy -n=monitoring --dev --env-file=.env.cluster" When I launch the platform with params Then The service "grafana" should be removed And The service "prometheus" should be removed diff --git a/test/cucumber/features/single-mode/monitoring.feature b/test/cucumber/features/single-mode/monitoring.feature index 41171ad3..80a12ddf 100644 --- a/test/cucumber/features/single-mode/monitoring.feature +++ b/test/cucumber/features/single-mode/monitoring.feature @@ -2,7 +2,7 @@ Feature: Monitoring package? Does the Monitoring package work as expected Scenario: Init Monitoring - Given I use parameters "init monitoring --only --dev --env-file=.env.local" + Given I use parameters "package init -n=monitoring --only --dev --env-file=.env.local" When I launch the platform with params Then The service "grafana" should be started with 1 replica And The service "prometheus" should be started with 1 replica @@ -15,7 +15,7 @@ Feature: Monitoring package? And There should be 6 volumes Scenario: Destroy Monitoring package - Given I use parameters "destroy monitoring --dev --env-file=.env.cluster" + Given I use parameters "package destroy -n=monitoring --dev --env-file=.env.cluster" When I launch the platform with params Then The service "grafana" should be removed And The service "prometheus" should be removed From c1f3c7dda1db47bc0ce3a4dc5ec2ab690798f3c8 Mon Sep 17 00:00:00 2001 From: Ryan Crichton Date: Thu, 23 Feb 2023 16:20:38 +0200 Subject: [PATCH 16/18] Fix Kafka package test --- .../features/cluster-mode/kafka-packages.cluster.feature | 2 -- 1 file changed, 2 deletions(-) diff --git a/test/cucumber/features/cluster-mode/kafka-packages.cluster.feature b/test/cucumber/features/cluster-mode/kafka-packages.cluster.feature index dee10359..12915c46 100644 --- a/test/cucumber/features/cluster-mode/kafka-packages.cluster.feature +++ b/test/cucumber/features/cluster-mode/kafka-packages.cluster.feature @@ -18,7 +18,6 @@ Feature: Kafka and its dependent packages? When I launch the platform with params Then The service "grafana" should be started with 1 replica And The service "prometheus" should be started with 1 replica - And The service "prometheus-kafka-adapter" should be started with 1 replica And The service "prometheus_backup" should be started with 1 replica And The service "cadvisor" should be started with 3 replicas And The service "node-exporter" should be started with 3 replicas @@ -35,7 +34,6 @@ Feature: Kafka and its dependent packages? And The service "kafka-minion" should be removed And The service "grafana" should be removed And The service "prometheus" should be removed - And The service "prometheus-kafka-adapter" should be removed And The service "prometheus_backup" should be removed And The service "cadvisor" should be removed And The service "node-exporter" should be removed From 9da07245d128d1f906e27eece2a912647e5d591b Mon Sep 17 00:00:00 2001 From: Ryan Crichton Date: Fri, 24 Feb 2023 11:21:29 +0200 Subject: [PATCH 17/18] Pin keycloak image version --- identity-access-manager-keycloak/docker-compose.yml | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/identity-access-manager-keycloak/docker-compose.yml b/identity-access-manager-keycloak/docker-compose.yml index b55d7497..50663bad 100644 --- a/identity-access-manager-keycloak/docker-compose.yml +++ b/identity-access-manager-keycloak/docker-compose.yml @@ -2,8 +2,14 @@ version: '3.9' services: identity-access-manager-keycloak: - image: keycloak/keycloak - command: ["start", "--proxy=edge", "--hostname-url=${KC_FRONTEND_URL}", "--import-realm"] + image: keycloak/keycloak:20.0 + command: + [ + "start", + "--proxy=edge", + "--hostname-url=${KC_FRONTEND_URL}", + "--import-realm" + ] hostname: identity-access-manager-keycloak healthcheck: test: curl --fail http://localhost:8080/health/ready || exit 1 From 42226fb76216c4a7d5c8e64fc3a38608dbb20d7e Mon Sep 17 00:00:00 2001 From: Ryan Crichton Date: Fri, 24 Feb 2023 11:21:45 +0200 Subject: [PATCH 18/18] Remove monitoring from Kafka cluster test --- .../cluster-mode/kafka-packages.cluster.feature | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/test/cucumber/features/cluster-mode/kafka-packages.cluster.feature b/test/cucumber/features/cluster-mode/kafka-packages.cluster.feature index 12915c46..9b09deb4 100644 --- a/test/cucumber/features/cluster-mode/kafka-packages.cluster.feature +++ b/test/cucumber/features/cluster-mode/kafka-packages.cluster.feature @@ -13,18 +13,8 @@ Feature: Kafka and its dependent packages? And The service "message-bus-kafka-config-importer" should be removed And There should be 6 services - Scenario: Init Monitoring - Given I use parameters "package init -n=monitoring --only --dev --env-file=.env.cluster" - When I launch the platform with params - Then The service "grafana" should be started with 1 replica - And The service "prometheus" should be started with 1 replica - And The service "prometheus_backup" should be started with 1 replica - And The service "cadvisor" should be started with 3 replicas - And The service "node-exporter" should be started with 3 replicas - And The service "cadvisor" should have healthy containers - Scenario: Destroy Kafka and its dependent packages - Given I use parameters "package destroy -n=monitoring --dev --env-file=.env.cluster" + Given I use parameters "package destroy -n=message-bus-kafka --dev --env-file=.env.cluster" When I launch the platform with params Then The service "zookeeper-1" should be removed And The service "zookeeper-2" should be removed @@ -32,11 +22,6 @@ Feature: Kafka and its dependent packages? And The service "kafka" should be removed And The service "kafdrop" should be removed And The service "kafka-minion" should be removed - And The service "grafana" should be removed - And The service "prometheus" should be removed - And The service "prometheus_backup" should be removed - And The service "cadvisor" should be removed - And The service "node-exporter" should be removed And There should be 0 service And There should be 0 volume And There should be 0 config