From d730cd26036e19445ddccf0b7d5e1d63e356f29b Mon Sep 17 00:00:00 2001 From: Matthias Theuermann <73223147+mati007thm@users.noreply.github.com> Date: Thu, 8 Aug 2024 08:44:06 +0200 Subject: [PATCH] feat: cloud native observability (#15) * fix: update Traefik to v3 to use OpenTelemetry * fix: changed apiversion for traefik * fix: added ingress for dapr dashboard and adjusted prometheus-values * fix: using new yaml for jaeger * fix: rename files * fix: removed unintended loop due to build in retry in golang * fix: added prometheus rules and error generation script * fix: removed unncessary lines of code * fix: added chaos engineering files --- .../chaos-engineering/burn-cpu.yaml | 17 ++ .../chaos-engineering/chaos-on-the-rise.yaml | 39 +++++ .../chaos-engineering/ingress.yaml | 14 ++ .../chaos-engineering/network-bandwith.yaml | 20 +++ .../chaos-engineering/network-corrupt.yaml | 17 ++ .../chaos-engineering/pod-failure.yaml | 14 ++ .../chaos-engineering/schedule-chaos.yaml | 21 +++ dapr-distributed-calendar/error.sh | 147 +++++++++++++++++ dapr-distributed-calendar/fluent/ingress.yaml | 2 +- dapr-distributed-calendar/go/go_events.go | 39 ++++- dapr-distributed-calendar/jaeger/ingress.yaml | 4 +- dapr-distributed-calendar/jaeger/jaeger.yaml | 27 ++++ .../jaeger/simplest.yaml | 5 - .../kubernetes-deploy.sh | 7 +- .../kubernetes/go-events.yaml | 3 + .../kubernetes/ingress.yaml | 17 +- .../kubernetes/node-controller.yaml | 5 + .../kubernetes/python-messages.yaml | 3 + dapr-distributed-calendar/locust/ingress.yaml | 2 +- .../node/node_controller.js | 152 +++++++++++++++--- .../otel/otel-collector-values.yaml | 2 +- .../prometheus/ingress.yaml | 4 +- .../kube-prometheus-stack-values.yaml | 13 +- .../prometheus/prom-example-rule.yaml | 16 ++ .../prometheus/prom-traefik-rule.yaml | 20 +++ dapr-distributed-calendar/test.sh | 2 +- .../traefik/ingress.yaml | 30 ++++ .../traefik/services.yaml | 36 +++++ .../traefik/traefik-dashboard-ingress.yaml | 20 --- .../traefik/traefik-dashboard-service.yaml | 18 --- .../traefik/traefik-values.yaml | 33 ++++ .../traefik/update-traefik.sh | 13 ++ 32 files changed, 667 insertions(+), 95 deletions(-) create mode 100644 dapr-distributed-calendar/chaos-engineering/burn-cpu.yaml create mode 100644 dapr-distributed-calendar/chaos-engineering/chaos-on-the-rise.yaml create mode 100644 dapr-distributed-calendar/chaos-engineering/ingress.yaml create mode 100644 dapr-distributed-calendar/chaos-engineering/network-bandwith.yaml create mode 100644 dapr-distributed-calendar/chaos-engineering/network-corrupt.yaml create mode 100644 dapr-distributed-calendar/chaos-engineering/pod-failure.yaml create mode 100644 dapr-distributed-calendar/chaos-engineering/schedule-chaos.yaml create mode 100755 dapr-distributed-calendar/error.sh create mode 100644 dapr-distributed-calendar/jaeger/jaeger.yaml delete mode 100644 dapr-distributed-calendar/jaeger/simplest.yaml create mode 100644 dapr-distributed-calendar/prometheus/prom-example-rule.yaml create mode 100644 dapr-distributed-calendar/prometheus/prom-traefik-rule.yaml create mode 100644 dapr-distributed-calendar/traefik/ingress.yaml create mode 100644 dapr-distributed-calendar/traefik/services.yaml delete mode 100644 dapr-distributed-calendar/traefik/traefik-dashboard-ingress.yaml delete mode 100644 dapr-distributed-calendar/traefik/traefik-dashboard-service.yaml create mode 100644 dapr-distributed-calendar/traefik/traefik-values.yaml create mode 100755 dapr-distributed-calendar/traefik/update-traefik.sh diff --git a/dapr-distributed-calendar/chaos-engineering/burn-cpu.yaml b/dapr-distributed-calendar/chaos-engineering/burn-cpu.yaml new file mode 100644 index 0000000..a317052 --- /dev/null +++ b/dapr-distributed-calendar/chaos-engineering/burn-cpu.yaml @@ -0,0 +1,17 @@ +apiVersion: chaos-mesh.org/v1alpha1 +kind: StressChaos +metadata: + namespace: 12-factor-app + name: burn-cpu-100 +spec: + selector: + namespaces: + - 12-factor-app + labelSelectors: + app.kubernetes.io/instance: 12-factor-app + mode: all + stressors: + cpu: + workers: 1 + load: 100 + duration: 30s diff --git a/dapr-distributed-calendar/chaos-engineering/chaos-on-the-rise.yaml b/dapr-distributed-calendar/chaos-engineering/chaos-on-the-rise.yaml new file mode 100644 index 0000000..90711fa --- /dev/null +++ b/dapr-distributed-calendar/chaos-engineering/chaos-on-the-rise.yaml @@ -0,0 +1,39 @@ +apiVersion: chaos-mesh.org/v1alpha1 +kind: Workflow +metadata: + name: chaos-on-the-rise + namespace: 12-factor-app +spec: + entry: entry + templates: + - name: entry + templateType: Serial + deadline: 2m + children: + - stress-test + - pod-killer + - name: stress-test + templateType: StressChaos + deadline: 10s + stressChaos: + selector: + namespaces: + - 12-factor-app + labelSelectors: + app: controller + mode: all + stressors: + cpu: + workers: 1 + load: 100 + - name: pod-killer + templateType: PodChaos + deadline: 60s + podChaos: + selector: + namespaces: + - 12-factor-app + labelSelectors: + app: controller + mode: all + action: pod-kill diff --git a/dapr-distributed-calendar/chaos-engineering/ingress.yaml b/dapr-distributed-calendar/chaos-engineering/ingress.yaml new file mode 100644 index 0000000..16a7e29 --- /dev/null +++ b/dapr-distributed-calendar/chaos-engineering/ingress.yaml @@ -0,0 +1,14 @@ +apiVersion: traefik.io/v1alpha1 +kind: IngressRoute +metadata: + name: chaos-ingress + namespace: chaos-testing +spec: + entryPoints: + - web + routes: + - match: Host(`chaos.--01..`) && PathPrefix(`/`) + kind: Rule + services: + - name: chaos-dashboard + port: 2333 diff --git a/dapr-distributed-calendar/chaos-engineering/network-bandwith.yaml b/dapr-distributed-calendar/chaos-engineering/network-bandwith.yaml new file mode 100644 index 0000000..eee4630 --- /dev/null +++ b/dapr-distributed-calendar/chaos-engineering/network-bandwith.yaml @@ -0,0 +1,20 @@ +apiVersion: chaos-mesh.org/v1alpha1 +kind: NetworkChaos +metadata: + name: network-bandwidth-100kbps + namespace: 12-factor-app +spec: + action: bandwidth + mode: all + selector: + namespaces: + - 12-factor-app + labelSelectors: + app.kubernetes.io/instance: 12-factor-app + bandwidth: + rate: 100kbps + limit: 100 + buffer: 10000 + peakrate: 1000000 + minburst: 1000000 + duration: "10s" \ No newline at end of file diff --git a/dapr-distributed-calendar/chaos-engineering/network-corrupt.yaml b/dapr-distributed-calendar/chaos-engineering/network-corrupt.yaml new file mode 100644 index 0000000..5ac07cd --- /dev/null +++ b/dapr-distributed-calendar/chaos-engineering/network-corrupt.yaml @@ -0,0 +1,17 @@ +apiVersion: chaos-mesh.org/v1alpha1 +kind: NetworkChaos +metadata: + name: network-corrupt-example + namespace: 12-factor-app +spec: + action: corrupt + mode: all + selector: + namespaces: + - 12-factor-app + labelSelectors: + app.kubernetes.io/instance: 12-factor-app + corrupt: + corrupt: "40" + correlation: "25" + duration: "10s" diff --git a/dapr-distributed-calendar/chaos-engineering/pod-failure.yaml b/dapr-distributed-calendar/chaos-engineering/pod-failure.yaml new file mode 100644 index 0000000..b64b298 --- /dev/null +++ b/dapr-distributed-calendar/chaos-engineering/pod-failure.yaml @@ -0,0 +1,14 @@ +apiVersion: chaos-mesh.org/v1alpha1 +kind: PodChaos +metadata: + namespace: 12-factor-app + name: pod-failure-example +spec: + selector: + namespaces: + - 12-factor-app # the namespace of the system under test (SUT) you've deployed + labelSelectors: + app.kubernetes.io/instance: 12-factor-app # the label of the pod for chaos injection + mode: all # the mode to run chaos action; supported modes are one/all/fixed/fixed-percent/random-max-percent + action: pod-failure # the specific chaos action to inject; supported actions: pod-kill/pod-failure/container-kill + duration: 60s # duration for the injected chaos experiment diff --git a/dapr-distributed-calendar/chaos-engineering/schedule-chaos.yaml b/dapr-distributed-calendar/chaos-engineering/schedule-chaos.yaml new file mode 100644 index 0000000..5dda833 --- /dev/null +++ b/dapr-distributed-calendar/chaos-engineering/schedule-chaos.yaml @@ -0,0 +1,21 @@ +apiVersion: chaos-mesh.org/v1alpha1 +kind: Schedule +metadata: + namespace: 12-factor-app + name: scheduled-pod-failure +spec: + schedule: 5 * * * * + startingDeadlineSeconds: null + concurrencyPolicy: Forbid + historyLimit: 2 + type: PodChaos + podChaos: + selector: + namespaces: + - 12-factor-app + labelSelectors: + app: controller + mode: one + action: pod-failure + duration: 1m + gracePeriod: 0 diff --git a/dapr-distributed-calendar/error.sh b/dapr-distributed-calendar/error.sh new file mode 100755 index 0000000..0c5706c --- /dev/null +++ b/dapr-distributed-calendar/error.sh @@ -0,0 +1,147 @@ +#!/bin/bash + +ENDPOINT="${ENDPOINT:-localhost}" +PORT="${PORT:-3000}" + +echo Endpoint: $ENDPOINT +echo Port: $PORT + +# Update event 1 +echo 'Update event 1' +curl --location --request PUT 'http://'$ENDPOINT':'$PORT'/updateevent/1' \ +--header 'Content-Type: application/json' \ +--data '{ + "data": { + "name": "Updated Event", + "date": "TBD" + } +}' | jq + +# Update event 1 +echo 'Update event 1' +curl --location --request PUT 'http://'$ENDPOINT':'$PORT'/updateevent/1' \ +--header 'Content-Type: application/json' \ +--data '{ + "data": { + "name": "Updated Event", + "date": "TBD" + } +}' | jq + +# Update event 1 +echo 'Update event 1' +curl --location --request PUT 'http://'$ENDPOINT':'$PORT'/updateevent/1' \ +--header 'Content-Type: application/json' \ +--data '{ + "data": { + "name": "Updated Event", + "date": "TBD" + } +}' | jq + +# Update event 1 +echo 'Update event 1' +curl --location --request PUT 'http://'$ENDPOINT':'$PORT'/updateevent/1' \ +--header 'Content-Type: application/json' \ +--data '{ + "data": { + "name": "Updated Event", + "date": "TBD" + } +}' | jq + +# Get event 1 (should fail) +echo 'Get event 1 (should fail)' +curl --location 'http://'$ENDPOINT':'$PORT'/event/1' | jq + +# Get event 1 (should fail) +echo 'Get event 1 (should fail)' +curl --location 'http://'$ENDPOINT':'$PORT'/event/1' | jq + +# Get event 1 (should fail) +echo 'Get event 1 (should fail)' +curl --location 'http://'$ENDPOINT':'$PORT'/event/1' | jq + +# Get event 1 (should fail) +echo 'Get event 1 (should fail)' +curl --location 'http://'$ENDPOINT':'$PORT'/event/1' | jq + +# Create event 1 +echo 'Create event 1' +curl --location 'http://'$ENDPOINT':'$PORT'/newevent' \ +--header 'Content-Type: application/json' \ +--data '{ + "data": { + "name": "Uninstall Event", + "date": "TBD", + "id": "1" + } +}' |jq + +# Create event 1 again (should fail) +echo 'Create event 1 again (should fail)' +curl --location 'http://'$ENDPOINT':'$PORT'/newevent' \ +--header 'Content-Type: application/json' \ +--data '{ + "data": { + "name": "Uninstall Event", + "date": "TBD", + "id": "1" + } +}' | jq + +# Create event 1 again (should fail) +echo 'Create event 1 again (should fail)' +curl --location 'http://'$ENDPOINT':'$PORT'/newevent' \ +--header 'Content-Type: application/json' \ +--data '{ + "data": { + "name": "Uninstall Event", + "date": "TBD", + "id": "1" + } +}' | jq + +# Create event 1 again (should fail) +echo 'Create event 1 again (should fail)' +curl --location 'http://'$ENDPOINT':'$PORT'/newevent' \ +--header 'Content-Type: application/json' \ +--data '{ + "data": { + "name": "Uninstall Event", + "date": "TBD", + "id": "1" + } +}' | jq + +# Create event 1 again (should fail) +echo 'Create event 1 again (should fail)' +curl --location 'http://'$ENDPOINT':'$PORT'/newevent' \ +--header 'Content-Type: application/json' \ +--data '{ + "data": { + "name": "Uninstall Event", + "date": "TBD", + "id": "1" + } +}' | jq + +# Delete event 1 +echo 'Delete event 1' +curl --location --request DELETE 'http://'$ENDPOINT':'$PORT'/event/1' | jq + +# Delete event 1 again (should fail) +echo 'Delete event 1 again (should fail)' +curl --location --request DELETE 'http://'$ENDPOINT':'$PORT'/event/1' | jq + +# Delete event 1 again (should fail) +echo 'Delete event 1 again (should fail)' +curl --location --request DELETE 'http://'$ENDPOINT':'$PORT'/event/1' | jq + +# Delete event 1 again (should fail) +echo 'Delete event 1 again (should fail)' +curl --location --request DELETE 'http://'$ENDPOINT':'$PORT'/event/1' | jq + +# Delete event 1 again (should fail) +echo 'Delete event 1 again (should fail)' +curl --location --request DELETE 'http://'$ENDPOINT':'$PORT'/event/1' | jq \ No newline at end of file diff --git a/dapr-distributed-calendar/fluent/ingress.yaml b/dapr-distributed-calendar/fluent/ingress.yaml index 3396c71..c849a76 100644 --- a/dapr-distributed-calendar/fluent/ingress.yaml +++ b/dapr-distributed-calendar/fluent/ingress.yaml @@ -1,4 +1,4 @@ -apiVersion: traefik.containo.us/v1alpha1 +apiVersion: traefik.io/v1alpha1 kind: IngressRoute metadata: name: kibana-ingress diff --git a/dapr-distributed-calendar/go/go_events.go b/dapr-distributed-calendar/go/go_events.go index a517ce4..fde23db 100644 --- a/dapr-distributed-calendar/go/go_events.go +++ b/dapr-distributed-calendar/go/go_events.go @@ -103,7 +103,7 @@ func addEvent(w http.ResponseWriter, r *http.Request) { if string(bodyBytes) != "" { log.Printf("Event with ID %s already exists", id) w.Header().Set("Content-Type", "application/json") - w.WriteHeader(http.StatusMethodNotAllowed) + w.WriteHeader(http.StatusOK) return } @@ -115,6 +115,8 @@ func addEvent(w http.ResponseWriter, r *http.Request) { eventsCounter.Add(context.Background(), 1) log.Printf("Response after posting to state: %s", resp.Status) http.Error(w, "All Okay", http.StatusOK) + response := map[string]string{"message": "Event created"} + json.NewEncoder(w).Encode(response) } func deleteEvent(w http.ResponseWriter, r *http.Request) { @@ -141,7 +143,7 @@ func deleteEvent(w http.ResponseWriter, r *http.Request) { if string(bodyBytes) == "" { log.Printf("Event with ID %s does not exist exists", eventID.ID) w.Header().Set("Content-Type", "application/json") - w.WriteHeader(http.StatusNotFound) + w.WriteHeader(http.StatusOK) return } @@ -157,7 +159,9 @@ func deleteEvent(w http.ResponseWriter, r *http.Request) { return } log.Printf("Response after delete call: %s", resp.Status) - w.WriteHeader(http.StatusNoContent) + w.WriteHeader(http.StatusOK) + response := map[string]string{"message": "Event deleted"} + json.NewEncoder(w).Encode(response) defer resp.Body.Close() eventsCounter.Add(context.Background(), -1) } @@ -203,11 +207,14 @@ func getEvent(w http.ResponseWriter, r *http.Request) { func updateEvent(w http.ResponseWriter, r *http.Request) { var event Event + // Decode the request body into the 'event' struct err := json.NewDecoder(r.Body).Decode(&event) if err != nil { log.Printf("Error while decoding: %e", err) + http.Error(w, "Error decoding request body", http.StatusBadRequest) return } + log.Printf("Event Name: %s", event.Name) log.Printf("Event Date: %s", event.Date) log.Printf("Event ID: %s", event.ID) @@ -221,42 +228,58 @@ func updateEvent(w http.ResponseWriter, r *http.Request) { log.Print(string(state)) id := data[0]["key"] + log.Printf("Updating event with ID: %s", id) + // Check if event with given ID already exists bodyBytes, err := checkEvent(id) if err != nil { log.Printf("Error while checking event: %e", err) + http.Error(w, "Internal server error", http.StatusInternalServerError) return } - if string(bodyBytes) == "" { - log.Printf("Event with ID %s does not exists", id) + + // If event does not exist, return a not found response + if len(bodyBytes) == 0 { + log.Printf("Event with ID %s does not exist", id) w.Header().Set("Content-Type", "application/json") - w.WriteHeader(http.StatusNotFound) + w.WriteHeader(http.StatusOK) return } + // Update the event (your existing logic) req, err := http.NewRequest(http.MethodPost, stateURL, bytes.NewBuffer(state)) if err != nil { log.Fatalln("Error posting to state", err) + http.Error(w, "Internal server error", http.StatusInternalServerError) return } client := &http.Client{} _, err = client.Do(req) if err != nil { log.Fatalln("Error updating event", err) + http.Error(w, "Internal server error", http.StatusInternalServerError) return } + + // Respond with a success message + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + + response := map[string]string{"message": "Event updated"} + json.NewEncoder(w).Encode(response) } func checkEvent(id string) ([]byte, error) { req, err := http.NewRequest(http.MethodGet, stateURL+"/"+id, nil) if err != nil { - log.Fatalln("Error creating get request", err) + log.Printf("Error creating get request: %v", err) return nil, err } + client := &http.Client{} resp, err := client.Do(req) if err != nil { - log.Fatalln("Error getting event", err) + log.Printf("Error getting event: %v", err) return nil, err } diff --git a/dapr-distributed-calendar/jaeger/ingress.yaml b/dapr-distributed-calendar/jaeger/ingress.yaml index b181e1a..12a0ffd 100644 --- a/dapr-distributed-calendar/jaeger/ingress.yaml +++ b/dapr-distributed-calendar/jaeger/ingress.yaml @@ -1,4 +1,4 @@ -apiVersion: traefik.containo.us/v1alpha1 +apiVersion: traefik.io/v1alpha1 kind: IngressRoute metadata: name: jaeger-ingress @@ -10,5 +10,5 @@ spec: - match: Host(`jaeger.--01..`) && PathPrefix(`/`) kind: Rule services: - - name: simplest-query + - name: jaeger-query port: 16686 \ No newline at end of file diff --git a/dapr-distributed-calendar/jaeger/jaeger.yaml b/dapr-distributed-calendar/jaeger/jaeger.yaml new file mode 100644 index 0000000..e95c9ad --- /dev/null +++ b/dapr-distributed-calendar/jaeger/jaeger.yaml @@ -0,0 +1,27 @@ +# apiVersion: jaegertracing.io/v1 +# kind: Jaeger +# metadata: +# name: simplest +# namespace: observability + +apiVersion: jaegertracing.io/v1 +kind: Jaeger +metadata: + name: jaeger + namespace: observability +spec: + strategy: allInOne + allInOne: + image: jaegertracing/all-in-one:latest + options: + log-level: debug + query: + base-path: /jaeger + prometheus: + server-url: "http://prom.--01.." + metricsStorage: + type: prometheus + storage: + options: + memory: + max-traces: 100000 diff --git a/dapr-distributed-calendar/jaeger/simplest.yaml b/dapr-distributed-calendar/jaeger/simplest.yaml deleted file mode 100644 index 8f1ca78..0000000 --- a/dapr-distributed-calendar/jaeger/simplest.yaml +++ /dev/null @@ -1,5 +0,0 @@ -apiVersion: jaegertracing.io/v1 -kind: Jaeger -metadata: - name: simplest - namespace: observability \ No newline at end of file diff --git a/dapr-distributed-calendar/kubernetes-deploy.sh b/dapr-distributed-calendar/kubernetes-deploy.sh index 68b613e..613f5ab 100755 --- a/dapr-distributed-calendar/kubernetes-deploy.sh +++ b/dapr-distributed-calendar/kubernetes-deploy.sh @@ -30,8 +30,9 @@ helm upgrade --install \ # install jaeger (requires cert-manager) OPTIONAL kubectl create namespace observability -kubectl create -f https://github.com/jaegertracing/jaeger-operator/releases/download/v1.38.0/jaeger-operator.yaml -n observability -kubectl wait --for=condition=ready pod --all --timeout=200s -n observability +helm repo add jaegertracing https://jaegertracing.github.io/helm-charts +helm repo update +helm install jaeger jaegertracing/jaeger-operator -n observability --wait kubectl apply -f jaeger/. # install prometheus OPTIONAL @@ -75,7 +76,7 @@ helm install redis bitnami/redis --namespace 12-factor-app --wait kubectl apply -f kubernetes/. kubectl wait --for=condition=ready pod --all --timeout=200s -n 12-factor-app -# setup locust for loadgeneration OPTIONAL +# setup locust for load generation OPTIONAL kubectl create configmap my-loadtest-locustfile --from-file locust/main.py -n 12-factor-app helm repo add deliveryhero https://charts.deliveryhero.io/ helm repo update diff --git a/dapr-distributed-calendar/kubernetes/go-events.yaml b/dapr-distributed-calendar/kubernetes/go-events.yaml index 9985dca..237fa81 100644 --- a/dapr-distributed-calendar/kubernetes/go-events.yaml +++ b/dapr-distributed-calendar/kubernetes/go-events.yaml @@ -4,15 +4,18 @@ metadata: name: go-events namespace: 12-factor-app labels: + app.kubernetes.io/instance: 12-factor-app app: go-events spec: replicas: 1 selector: matchLabels: + app.kubernetes.io/instance: 12-factor-app app: go-events template: metadata: labels: + app.kubernetes.io/instance: 12-factor-app app: go-events annotations: # instrumentation.opentelemetry.io/inject-go: "go-instrumentation" diff --git a/dapr-distributed-calendar/kubernetes/ingress.yaml b/dapr-distributed-calendar/kubernetes/ingress.yaml index 59781c7..401604d 100644 --- a/dapr-distributed-calendar/kubernetes/ingress.yaml +++ b/dapr-distributed-calendar/kubernetes/ingress.yaml @@ -1,4 +1,4 @@ -apiVersion: traefik.containo.us/v1alpha1 +apiVersion: traefik.io/v1alpha1 kind: IngressRoute metadata: name: controller-ingress @@ -12,3 +12,18 @@ spec: services: - name: controller port: 3000 +--- +apiVersion: traefik.io/v1alpha1 +kind: IngressRoute +metadata: + name: dapr-dashboard-ingress + namespace: dapr-system +spec: + entryPoints: + - web + routes: + - match: Host(`dapr.--01..`) && PathPrefix(`/`) + kind: Rule + services: + - name: dapr-dashboard + port: 8080 diff --git a/dapr-distributed-calendar/kubernetes/node-controller.yaml b/dapr-distributed-calendar/kubernetes/node-controller.yaml index 4dce9a3..473a215 100644 --- a/dapr-distributed-calendar/kubernetes/node-controller.yaml +++ b/dapr-distributed-calendar/kubernetes/node-controller.yaml @@ -4,9 +4,11 @@ metadata: name: controller namespace: 12-factor-app labels: + app.kubernetes.io/instance: 12-factor-app app: controller spec: selector: + app.kubernetes.io/instance: 12-factor-app app: controller ports: - protocol: TCP @@ -20,15 +22,18 @@ metadata: name: controller namespace: 12-factor-app labels: + app.kubernetes.io/instance: 12-factor-app app: controller spec: replicas: 1 selector: matchLabels: + app.kubernetes.io/instance: 12-factor-app app: controller template: metadata: labels: + app.kubernetes.io/instance: 12-factor-app app: controller annotations: # instrumentation.opentelemetry.io/inject-nodejs: "node-instrumentation" diff --git a/dapr-distributed-calendar/kubernetes/python-messages.yaml b/dapr-distributed-calendar/kubernetes/python-messages.yaml index 1fca0a3..bf2e483 100644 --- a/dapr-distributed-calendar/kubernetes/python-messages.yaml +++ b/dapr-distributed-calendar/kubernetes/python-messages.yaml @@ -4,15 +4,18 @@ metadata: name: messages namespace: 12-factor-app labels: + app.kubernetes.io/instance: 12-factor-app app: messages spec: replicas: 1 selector: matchLabels: + app.kubernetes.io/instance: 12-factor-app app: messages template: metadata: labels: + app.kubernetes.io/instance: 12-factor-app app: messages annotations: # instrumentation.opentelemetry.io/inject-python: "python-instrumentation" diff --git a/dapr-distributed-calendar/locust/ingress.yaml b/dapr-distributed-calendar/locust/ingress.yaml index 81c3fe9..d77192a 100644 --- a/dapr-distributed-calendar/locust/ingress.yaml +++ b/dapr-distributed-calendar/locust/ingress.yaml @@ -1,4 +1,4 @@ -apiVersion: traefik.containo.us/v1alpha1 +apiVersion: traefik.io/v1alpha1 kind: IngressRoute metadata: name: locust-ingress diff --git a/dapr-distributed-calendar/node/node_controller.js b/dapr-distributed-calendar/node/node_controller.js index aaffc8a..b0c94fd 100644 --- a/dapr-distributed-calendar/node/node_controller.js +++ b/dapr-distributed-calendar/node/node_controller.js @@ -66,15 +66,51 @@ app.post('/newevent', (req, res) => { headers: { "Content-Type": "application/json" } - }).then((response) => { + }).then(async(response) => { if (!response.ok) { throw "Failed to persist state."; } + try{ + const reader = response.body.getReader(); + let responseBody = ''; + + // The following function handles each data chunk + function push() { + return reader.read().then(({ done, value }) => { + // Is there no more data to read? + if (done) { + // Tell the browser that we have finished sending data + reader.releaseLock(); + return; + } - console.log("Successfully persisted state."); - res.status(200).send({ message: "Event created" }); - }).catch(() => { - res.status(405).send({message: "Event already exists"}); + // Get the data and append it to the responseBody + responseBody += new TextDecoder().decode(value); + push(); + }); + } + + await push(); + + // Check if response body is empty or not valid JSON + if (!responseBody || Object.keys(responseBody).length === 0) { + console.log("Event already exists"); + console.log("Status: 405") + res.status(405).send({message: "Event already exists"}); + return; + } + console.log("Successfully persisted state."); + console.log("Status: 200") + res.status(200).send({ message: "Event created" }); + } catch (error) { + console.log("Error parsing JSON:", error); + console.log("Status: 500") + res.status(500).send({ message: "Error parsing JSON" }); + } + }).catch((error) => { + console.log(error); + console.log("Status: 500") + res.status(500).send({message: error}); }); send_notif(data) }); @@ -92,16 +128,52 @@ app.delete('/event/:id', (req, res) => { headers: { "Content-Type": "application/json" } - }).then((response) => { - console.log("My status: "+ response.status) - if (response.status != 204) { - throw "Failed to delete state."; + }).then(async(response) => { + if (!response.ok) { + throw "Failed to delete state."; } - console.log("Successfully deleted event."); - res.status(204).send(); - }).catch(() => { - res.status(404).send({ message: "Event not found" }); - }); + try{ + const reader = response.body.getReader(); + let responseBody = ''; + + // The following function handles each data chunk + function push() { + return reader.read().then(({ done, value }) => { + // Is there no more data to read? + if (done) { + // Tell the browser that we have finished sending data + reader.releaseLock(); + return; + } + + // Get the data and append it to the responseBody + responseBody += new TextDecoder().decode(value); + push(); + }); + } + + await push(); + + // Check if response body is empty or not valid JSON + if (!responseBody || Object.keys(responseBody).length === 0) { + console.log("Event not found"); + console.log("Status: 404") + res.status(404).send({ message: "Event not found" }); + return; + } + console.log("Successfully deleted event."); + console.log("Status: 204") + res.status(204).send(); + } catch (error) { + console.log("Error parsing JSON:", error); + console.log("Status: 500") + res.status(500).send({ message: "Error parsing JSON" }); + } + }).catch((error) => { + console.log(error); + console.log("Status: 500") + res.status(500).send({message: error}); + }); }); app.get('/event/:id', (req, res) =>{ @@ -147,17 +219,21 @@ app.get('/event/:id', (req, res) =>{ // Check if response body is empty or not valid JSON if (!responseBody || Object.keys(responseBody).length === 0) { console.log("Event not found"); + console.log("Status: 404") res.status(404).send({ message: "Event not found" }); return; } const parsedResponseBody = JSON.parse(responseBody); + console.log("Status: 200") res.status(200).json(parsedResponseBody); } catch (error) { console.log("Error parsing JSON:", error); + console.log("Status: 500") res.status(500).send({ message: "Error parsing JSON" }); } }).catch((error) => { console.log(error); + console.log("Status: 500") res.status(500).send({message: error}); }); }) @@ -184,15 +260,51 @@ app.put('/updateevent/:id', (req, res) => { headers: { "Content-Type": "application/json" } - }).then((response) => { + }).then(async(response) => { if (!response.ok) { throw "Failed to update event."; } - - console.log("Successfully updated event."); - res.status(200).send({ message: "Event updated" }); - }).catch(() => { - res.status(404).send({ message: "Event not found" }); + try{ + const reader = response.body.getReader(); + let responseBody = ''; + + // The following function handles each data chunk + function push() { + return reader.read().then(({ done, value }) => { + // Is there no more data to read? + if (done) { + // Tell the browser that we have finished sending data + reader.releaseLock(); + return; + } + + // Get the data and append it to the responseBody + responseBody += new TextDecoder().decode(value); + push(); + }); + } + + await push(); + + // Check if response body is empty or not valid JSON + if (!responseBody || Object.keys(responseBody).length === 0) { + console.log("Event not found"); + console.log("Status: 404") + res.status(404).send({ message: "Event not found" }); + return; + } + console.log("Successfully updated event."); + console.log("Status: 200") + res.status(200).send({ message: "Event updated" }); + } catch (error) { + console.log("Error parsing JSON:", error); + console.log("Status: 500") + res.status(500).send({ message: "Error parsing JSON" }); + } + }).catch((error) => { + console.log(error); + console.log("Status: 500") + res.status(500).send({message: error}); }); }); diff --git a/dapr-distributed-calendar/otel/otel-collector-values.yaml b/dapr-distributed-calendar/otel/otel-collector-values.yaml index 87d90e9..0b2ef62 100644 --- a/dapr-distributed-calendar/otel/otel-collector-values.yaml +++ b/dapr-distributed-calendar/otel/otel-collector-values.yaml @@ -34,7 +34,7 @@ spec: debug: verbosity: detailed otlp/jaeger: - endpoint: simplest-collector.observability:4317 + endpoint: jaeger-collector.observability:4317 tls: insecure: true prometheus: diff --git a/dapr-distributed-calendar/prometheus/ingress.yaml b/dapr-distributed-calendar/prometheus/ingress.yaml index d563f36..a6a1840 100644 --- a/dapr-distributed-calendar/prometheus/ingress.yaml +++ b/dapr-distributed-calendar/prometheus/ingress.yaml @@ -1,4 +1,4 @@ -apiVersion: traefik.containo.us/v1alpha1 +apiVersion: traefik.io/v1alpha1 kind: IngressRoute metadata: name: prometheus-ingress @@ -13,7 +13,7 @@ spec: - name: prometheus-kube-prometheus-prometheus port: 9090 --- -apiVersion: traefik.containo.us/v1alpha1 +apiVersion: traefik.io/v1alpha1 kind: IngressRoute metadata: name: grafana-ingress diff --git a/dapr-distributed-calendar/prometheus/kube-prometheus-stack-values.yaml b/dapr-distributed-calendar/prometheus/kube-prometheus-stack-values.yaml index 0608ccf..9b811de 100644 --- a/dapr-distributed-calendar/prometheus/kube-prometheus-stack-values.yaml +++ b/dapr-distributed-calendar/prometheus/kube-prometheus-stack-values.yaml @@ -6,7 +6,7 @@ prometheus: enableFeatures: - exemplar-storage additionalScrapeConfigs: - - job_name: 'dapr' + - job_name: 'opentelemetry' honor_labels: true kubernetes_sd_configs: - role: pod @@ -17,14 +17,3 @@ prometheus: - source_labels: [__meta_kubernetes_pod_annotation_dapr_collector_demo] action: keep regex: true - - job_name: 'traefik' - honor_labels: true - kubernetes_sd_configs: - - role: pod - namespaces: - own_namespace: true - names: [kube-system] - relabel_configs: - - source_labels: [__meta_kubernetes_pod_container_name] - action: keep - regex: "traefik" diff --git a/dapr-distributed-calendar/prometheus/prom-example-rule.yaml b/dapr-distributed-calendar/prometheus/prom-example-rule.yaml new file mode 100644 index 0000000..dc0257f --- /dev/null +++ b/dapr-distributed-calendar/prometheus/prom-example-rule.yaml @@ -0,0 +1,16 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + labels: + app: kube-prometheus-stack + release: prometheus + name: prometheus-example-rules +spec: + groups: + - name: example.rules + rules: + - alert: ExampleAlert + annotations: + message: | + I am an example alert!! + expr: vector(1) diff --git a/dapr-distributed-calendar/prometheus/prom-traefik-rule.yaml b/dapr-distributed-calendar/prometheus/prom-traefik-rule.yaml new file mode 100644 index 0000000..6b3a0f5 --- /dev/null +++ b/dapr-distributed-calendar/prometheus/prom-traefik-rule.yaml @@ -0,0 +1,20 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + labels: + app: kube-prometheus-stack + release: prometheus + role: alert-rules + name: prometheus-traefik-rules +spec: + groups: + - name: traefik.rules + rules: + - alert: TraefikHighHttp4xxErrorRateService + expr: sum(rate(traefik_service_requests_total{code=~"4.*"}[3m])) by (service) / sum(rate(traefik_service_requests_total[3m])) by (service) * 100 > 5 + for: 1m + labels: + severity: critical + annotations: + summary: Traefik high HTTP 4xx error rate service (instance {{ $labels.instance }}) + description: "Traefik service 4xx error rate is above 5%\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" diff --git a/dapr-distributed-calendar/test.sh b/dapr-distributed-calendar/test.sh index c1f3eb2..44ae4ad 100755 --- a/dapr-distributed-calendar/test.sh +++ b/dapr-distributed-calendar/test.sh @@ -55,7 +55,7 @@ curl --location 'http://'$ENDPOINT':'$PORT'/event/1' | jq # Delete event 1 echo 'Delete event 1' -curl --location --request DELETE 'http://'$ENDPOINT':3000/event/1' | jq +curl --location --request DELETE 'http://'$ENDPOINT':'$PORT'/event/1' | jq # Delete event 1 again (should fail) echo 'Delete event 1 again (should fail)' diff --git a/dapr-distributed-calendar/traefik/ingress.yaml b/dapr-distributed-calendar/traefik/ingress.yaml new file mode 100644 index 0000000..f02d81f --- /dev/null +++ b/dapr-distributed-calendar/traefik/ingress.yaml @@ -0,0 +1,30 @@ +--- +apiVersion: traefik.io/v1alpha1 +kind: IngressRoute +metadata: + name: traefik-ingress + namespace: kube-system +spec: + entryPoints: + - web + routes: + - match: Host(`traefik.--01..`) && PathPrefix(`/`) + kind: Rule + services: + - name: traefik-dashboard + port: 9000 +--- +apiVersion: traefik.io/v1alpha1 +kind: IngressRoute +metadata: + name: traefik-metrics + namespace: kube-system +spec: + entryPoints: + - web + routes: + - kind: Rule + match: Host(`traefik.--01..`) && PathPrefix(`/metrics`) + services: + - name: prometheus@internal + kind: TraefikService \ No newline at end of file diff --git a/dapr-distributed-calendar/traefik/services.yaml b/dapr-distributed-calendar/traefik/services.yaml new file mode 100644 index 0000000..cd8dc6b --- /dev/null +++ b/dapr-distributed-calendar/traefik/services.yaml @@ -0,0 +1,36 @@ +apiVersion: v1 +kind: Service +metadata: + name: traefik-dashboard + namespace: kube-system + labels: + app.kubernetes.io/instance: traefik-dashboard-kube-system + app.kubernetes.io/name: traefik-dashboard +spec: + type: ClusterIP + ports: + - name: traefik + port: 9000 + targetPort: traefik + protocol: TCP + selector: + app.kubernetes.io/instance: traefik-kube-system + app.kubernetes.io/name: traefik +--- +apiVersion: v1 +kind: Service +metadata: + name: traefik-metrics + namespace: kube-system + labels: + app.kubernetes.io/instance: traefik-kube-system + app.kubernetes.io/name: traefik-metrics +spec: + type: ClusterIP + ports: + - name: metrics + port: 9100 + protocol: TCP + selector: + app.kubernetes.io/instance: traefik-kube-system + app.kubernetes.io/name: traefik diff --git a/dapr-distributed-calendar/traefik/traefik-dashboard-ingress.yaml b/dapr-distributed-calendar/traefik/traefik-dashboard-ingress.yaml deleted file mode 100644 index a097e93..0000000 --- a/dapr-distributed-calendar/traefik/traefik-dashboard-ingress.yaml +++ /dev/null @@ -1,20 +0,0 @@ ---- -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: traefik-ingress - namespace: kube-system - annotations: - kubernetes.io/ingress.class: traefik -spec: - rules: - - host: traefik.--01.. - http: - paths: - - path: / - pathType: Prefix - backend: - service: - name: traefik-dashboard - port: - number: 9000 diff --git a/dapr-distributed-calendar/traefik/traefik-dashboard-service.yaml b/dapr-distributed-calendar/traefik/traefik-dashboard-service.yaml deleted file mode 100644 index d71c2d0..0000000 --- a/dapr-distributed-calendar/traefik/traefik-dashboard-service.yaml +++ /dev/null @@ -1,18 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: traefik-dashboard - namespace: kube-system - labels: - app.kubernetes.io/instance: traefik-dashboard-kube-system - app.kubernetes.io/name: traefik-dashboard -spec: - type: ClusterIP - ports: - - name: traefik - port: 9000 - targetPort: traefik - protocol: TCP - selector: - app.kubernetes.io/instance: traefik-kube-system - app.kubernetes.io/name: traefik \ No newline at end of file diff --git a/dapr-distributed-calendar/traefik/traefik-values.yaml b/dapr-distributed-calendar/traefik/traefik-values.yaml new file mode 100644 index 0000000..5e72e3e --- /dev/null +++ b/dapr-distributed-calendar/traefik/traefik-values.yaml @@ -0,0 +1,33 @@ +image: + tag: "v3.0.0-beta3" +logs: + access: + enabled: true +metrics: + # prometheus: null + openTelemetry: + ## Address of the OpenTelemetry Collector to send metrics to. + address: "otel-dapr-collector.opentelemetry:4317" + ## Enable metrics on entry points. + addEntryPointsLabels: true + ## Enable metrics on routers. + addRoutersLabels: true + ## Enable metrics on services. + addServicesLabels: true + ## Explicit boundaries for Histogram data points. + explicitBoundaries: + - "0.1" + - "0.3" + - "1.2" + - "5.0" + ## Allows reporter to send metrics to the OpenTelemetry Collector without using a secured protocol. + insecure: true + ## Interval at which metrics are sent to the OpenTelemetry Collector. + pushInterval: 10s + ## This instructs the reporter to send metrics to the OpenTelemetry Collector using gRPC. + grpc: true +tracing: + openTelemetry: + grpc: true + insecure: true + address: otel-dapr-collector.opentelemetry:4317 \ No newline at end of file diff --git a/dapr-distributed-calendar/traefik/update-traefik.sh b/dapr-distributed-calendar/traefik/update-traefik.sh new file mode 100755 index 0000000..ac1cc0f --- /dev/null +++ b/dapr-distributed-calendar/traefik/update-traefik.sh @@ -0,0 +1,13 @@ +#!/bin/sh + +# Add the Traefik Helm repository +helm repo add traefik https://traefik.github.io/charts + +# Update the Helm repositories +helm repo update + +# Uninstall Traefik +helm uninstall traefik --namespace kube-system + +# Reinstall Traefik +helm install traefik traefik/traefik --namespace kube-system --values traefik-values.yaml --wait