From dba64cf676650d17a271b0ac98f89a2acfaa2827 Mon Sep 17 00:00:00 2001
From: Andreas Thaler <andreas.thaler01@sap.com>
Date: Mon, 13 Nov 2023 16:33:15 +0100
Subject: [PATCH 1/3] remove kyma-specifics from the prometheus example

---
 prometheus/values.yaml | 48 ++++++++++++++++++++++--------------------
 1 file changed, 25 insertions(+), 23 deletions(-)

diff --git a/prometheus/values.yaml b/prometheus/values.yaml
index b18b20e0..00d7b252 100644
--- a/prometheus/values.yaml
+++ b/prometheus/values.yaml
@@ -1,12 +1,7 @@
-####### This block is required to run in parallel with a Kyma monitoring stack
-prometheusOperator:
-  # exclude the kyma-system Namespace from watching to avoid conflict with the Kyma monitoring stack
-  denyNamespaces:
-    - kyma-system
-  # re-use the kublet service definition of the Kyma monitoring stack.
-  kubeletService:
-    enabled: false
 
+####### This block configures resource limits for the tooling and disables scaping of them via annotation
+
+prometheusOperator:
   # Define resource limits
   resources:
     requests:
@@ -16,16 +11,14 @@ prometheusOperator:
       cpu: "500m"
       memory: "1Gi"
 
-# change the port of the node-exporter to be different from the one used by the Kyma monitoring stack
 prometheus-node-exporter:
+  # Disables prometheus annotations on monitoring components as they are scraped using an explicit config
   service:
-    port: 9101
-    targetPort: 9101
     annotations:
-####### This block disables prometheus annotations on monitoring components in order to be able to enable scraping via annotation for all workloads
       prometheus.io/scrape: "false"
 
 kube-state-metrics:
+  # Disables prometheus annotations on monitoring components as they are scraped using an explicit config
   prometheusScrape: false
 
   # Define resource limits
@@ -37,22 +30,22 @@ kube-state-metrics:
       cpu: "500m"
       memory: "2Gi"
 
-####### This block disables control plane components which are not reachable from within the Gardener data plane
+####### This block disabled not needed features
 
-# Disable scraping of etcd
 kubeEtcd:
+  # Disable scraping of control plane component etcd as it is not reachable from the data plane
   enabled: false
 
-# Disable scraping of kubeControllerManager
 kubeControllerManager:
+  # Disable scraping of control plane component kubeControllerManager as it is not reachable from the data plane
   enabled: false
 
-# Disable scraping of kubeProxy
 kubeProxy:
+  # Disable scraping of control plane component kubeProxy as it is not reachable from the data plane
   enabled: false
 
-# Disable scraping of kubeScheduler
 kubeScheduler:
+  # Disable scraping of control plane component kubeScheduler as it is not reachable from the data plane
   enabled: false
 
 ####### This block is required to enable scraping of endpoints with Istio strict mTLS, see also https://istio.io/latest/docs/ops/integrations/prometheus/#tls-settings
@@ -60,22 +53,26 @@ prometheus:
   prometheusSpec:
     podMetadata:
       labels:
+        # Enables istio sidecar injection
         sidecar.istio.io/inject: "true"
       annotations:
-        traffic.sidecar.istio.io/includeOutboundIPRanges: ""  # do not intercept any outbound traffic
+        # Configures istio to not intercept outbound traffic
+        traffic.sidecar.istio.io/includeOutboundIPRanges: "" 
+        # Configures istio to write the client certs into a specific folder
         proxy.istio.io/config: |
           # configure an env variable `OUTPUT_CERTS` to write certificates to the given folder
           proxyMetadata:
             OUTPUT_CERTS: /etc/istio-output-certs
+        # Configures istio to mount the folder to the attached volume
         sidecar.istio.io/userVolumeMount: '[{"name": "istio-certs", "mountPath": "/etc/istio-output-certs"}]' # mount the shared volume at sidecar proxy
 
-    # Additional volumes on the output StatefulSet definition.
+    # Additional volume on the output StatefulSet definition for storing the client certs
     volumes:
       - emptyDir:
           medium: Memory
         name: istio-certs
 
-    # Additional VolumeMounts on the output StatefulSet definition.
+    # Additional VolumeMount on the output StatefulSet definition for storing the client certs
     volumeMounts:
       - mountPath: /etc/prometheus/secrets/istio.default/
         name: istio-certs
@@ -158,7 +155,7 @@ prometheus:
             replacement: $1:$2
           - source_labels: [__meta_kubernetes_namespace]
             action: drop
-            regex: kyma-system|kube-system|compass-system|kyma-integration
+            regex: kyma-system|kube-system
           - source_labels: [__meta_kubernetes_namespace]
             action: replace
             target_label: namespace
@@ -216,7 +213,7 @@ prometheus:
             target_label: __address__
           - source_labels: [__meta_kubernetes_namespace]
             action: drop
-            regex: kyma-system|kube-system|compass-system|kyma-integration
+            regex: kyma-system|kube-system
           - source_labels: [__meta_kubernetes_namespace]
             action: replace
             target_label: namespace
@@ -227,8 +224,10 @@ prometheus:
             action: replace
             target_label: node
 
-# Configures grafana with istio sidecar and alertmanage as additional datasource
+####### This block configures grafana with istio sidecar and alertmanager as additional datasource
+
 grafana:
+  # Add alertmanager as datasource
   additionalDataSources:
     - name: Alertmanager
       type: alertmanager
@@ -236,6 +235,7 @@ grafana:
       access: proxy
       jsonData:
         implementation: prometheus
+  # Configure all grafana sidecars (for loading of dashboards/datasources/rules) with proper security context
   sidecar:
     securityContext:
       privileged: false
@@ -243,7 +243,9 @@ grafana:
       runAsNonRoot: true
       runAsUser: 1337
   podLabels:
+    # Enable istio sidecar for Grafana
     sidecar.istio.io/inject: "true"
+  # Overwrite servicemonitor which scrapes grafana with TLS settings as it runs with mTLS now
   serviceMonitor:
     scheme: https
     tlsConfig:

From 6d45a170095412b4e314e1cdc3e1b4b84066b89a Mon Sep 17 00:00:00 2001
From: Andreas Thaler <andreas.thaler01@sap.com>
Date: Thu, 16 Nov 2023 16:33:25 +0100
Subject: [PATCH 2/3] move kyma tutorial into the example

---
 prometheus/README.md                       | 139 ++-------------------
 prometheus/assets/monitoring-tutorials.svg |   4 +
 prometheus/prometheus.md                   | 134 ++++++++++++++++++++
 3 files changed, 149 insertions(+), 128 deletions(-)
 create mode 100644 prometheus/assets/monitoring-tutorials.svg
 create mode 100644 prometheus/prometheus.md

diff --git a/prometheus/README.md b/prometheus/README.md
index 4ec980b8..4e871c74 100644
--- a/prometheus/README.md
+++ b/prometheus/README.md
@@ -1,140 +1,23 @@
-# Install a custom kube-prometheus-stack in Kyma
+# Monitoring in Kyma using a custom kube-prometheus-stack
 
 ## Overview
 
-The Kyma monitoring stack brings limited configuration options in contrast to the upstream [`kube-prometheus-stack`](https://github.com/prometheus-community/helm-charts/blob/main/charts/kube-prometheus-stack) chart. Modifications might be reset at the next upgrade cycle.
+The Kyma telemetry module provides collection and integration into observability backends based on OpenTelemetry. OpenTelemetry is the new vendor-neutral player in the cloud-native observability domain and has a growing adoption. However, it still lacks many features and sometimes the typical `kube-prometheus-stack` based on Prometheus, Grafana and the surrounding helper tools is more appropriate.
 
-As an alternative, you can install the upstream chart with all customization options in parallel. This tutorial outlines how to set up such installation in co-existence to the Kyma monitoring stack.
+The following instructions describe the complete monitoring flow for your service running in Kyma. You get the gist of monitoring applications, such as Prometheus, Grafana, and Alertmanager. You learn how and where you can observe and visualize your service metrics to monitor them for any alerting values.
 
-> **CAUTION:**
-- This tutorial describes a basic setup that you should not use in production. Typically, a production setup needs further configuration, like optimizing the amount of data to scrape and the required resource footprint of the installation. To achieve qualities like [high availability](https://prometheus.io/docs/introduction/faq/#can-prometheus-be-made-highly-available), [scalability](https://prometheus.io/docs/introduction/faq/#i-was-told-prometheus-doesnt-scale), or [durable long-term storage](https://prometheus.io/docs/operating/integrations/#remote-endpoints-and-storage), you need a more advanced setup.
-- This example uses the latest Grafana version, which is under AGPL-3.0 and might not be free of charge for commercial usage.
+All the tutorials use the [`monitoring-custom-metrics`](./monitoring-custom-metrics/README.md) example and one of its services called `sample-metrics`. This service exposes the `cpu_temperature_celsius` custom metric on the `/metrics` endpoint. This custom metric is the central element of the whole tutorial set. The metric value simulates the current processor temperature and changes randomly from 60 to 90 degrees Celsius. The alerting threshold in these tutorials is 75 degrees Celsius. If the temperature exceeds this value, the Grafana dashboard, PrometheusRule, and Alertmanager notifications you create inform you about this.
 
-## Prerequisites
+## Sequence of tasks
 
-- Kyma as the target deployment environment.
-- Kubectl > 1.22.x
-- Helm 3.x
+The instructions cover the following tasks:
 
-## Installation
+ ![Monitoring tutorials](./assets/monitoring-tutorials.svg)
 
-### Preparation
-1. If you cluster was installed manually using the Kyma CLI, you must assure that the Kyma monitoring stack running in your cluster is limited to detection of Kubernetes resources only in the `kyma-system` Namespace. To rule out that there are any side effects with the additional custom stack, run:
-    ```bash
-    kyma deploy --component monitoring --value monitoring.prometheusOperator.namespaces.releaseNamespace=true
-    ```
+1. [**Deploy a custom Prometheus stack**](./prometheus.md), in which you deploy the [kube-prometheus-stack](https://github.com/prometheus-operator/kube-prometheus) from the upstream Helm chart.
 
-1. Export your Namespace as a variable. Replace the `{namespace}` placeholder in the following command and run it:
+2. [**Observe application metrics**](./monitoring-custom-metrics/README.md), in which you redirect the `cpu_temperature_celsius` metric to the localhost and the Prometheus UI. You later observe how the metric value changes in the predefined 10 seconds interval in which Prometheus scrapes the metric values from the service's `/metrics` endpoint.
 
-    ```bash
-    export K8S_NAMESPACE="{namespace}"
-    ```
-1. If you haven't created the Namespace yet, now is the time to do so:
-    ```bash
-    kubectl create namespace $K8S_NAMESPACE
-    ```
-   >**Note**: This Namespace must have **no** Istio sidecar injection enabled; that is, there must be no `istio-injection` label present on the Namespace. The Helm chart deploys jobs that will not succeed when Isto sidecar injection is enabled.
+3. [**Create a Grafana dashboard**](./monitoring-grafana-dashboard/README.md), in which you create a Grafana dashboard of a Gauge type for the `cpu_temperature_celsius` metric. This dashboard shows explicitly when the CPU temperature is equal to or higher than the predefined threshold of 75 degrees Celsius, at which point the dashboard turns red.
 
-1. Export the Helm release name that you want to use. It can be any name, but be aware that all resources in the cluster will be prefixed with that name. Run the following command:
-    ```bash
-    export HELM_PROM_RELEASE="prometheus"
-    ```
-
-1. Update your Helm installation with the required Helm repository:
-
-    ```bash
-    helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
-    helm repo update
-    ```
-
-### Install the kube-prometheus-stack
-
-1. Run the Helm upgrade command, which installs the chart if it's not present yet. At the end of the command, change the Grafana admin password to some value of your choice.
-    ```bash
-    helm upgrade --install -n ${K8S_NAMESPACE} ${HELM_PROM_RELEASE} prometheus-community/kube-prometheus-stack -f https://raw.githubusercontent.com/kyma-project/examples/main/prometheus/values.yaml --set grafana.adminPassword=myPwd
-    ```
-
-2. You can use the [values.yaml](./values.yaml) provided with this tutorial, which contains customized settings deviating from the default settings, or create your own one.
-The provided `values.yaml` covers the following adjustments:
-- Parallel operation to a Kyma monitoring stack
-- Client certificate injection to support scraping of workload secured with Istio strict mTLS
-- Active scraping of workload annotated with prometheus.io/scrape
-- Basic configuration of data persistence with retention
-- Basic resource limits for involved components
-
-### Activate scraping of Istio metrics & Grafana dashboards
-
-1. To configure Prometheus for scraping of the Istio-specific metrics from any istio-proxy running in the cluster, deploy a PodMonitor, which scrapes any Pod that has a port with name `.*-envoy-prom` exposed.
-
-    ```bash
-    kubectl -n ${K8S_NAMESPACE} apply -f https://raw.githubusercontent.com/kyma-project/examples/main/prometheus/istio/podmonitor-istio-proxy.yaml
-    ```
-
-2. Deploy a ServiceMonitor definition for the central metrics of the `istiod` deployment:
-
-    ```bash
-    kubectl -n ${K8S_NAMESPACE} apply -f https://raw.githubusercontent.com/kyma-project/examples/main/prometheus/istio/servicemonitor-istiod.yaml
-    ```
-
-3. Get the latest versions of the Istio-specific dashboards.
-   Grafana is configured to load dashboards dynamically from ConfigMaps in the cluster, so Istio-specific dashboards can be applied as well.
-   Either follow the [Istio quick start instructions](https://istio.io/latest/docs/ops/integrations/grafana/#option-1-quick-start), or take the prepared ones with the following command:
-
-    ```bash
-    kubectl -n ${K8S_NAMESPACE} apply -f https://raw.githubusercontent.com/kyma-project/examples/main/prometheus/istio/configmap-istio-grafana-dashboards.yaml
-    kubectl -n ${K8S_NAMESPACE} apply -f https://raw.githubusercontent.com/kyma-project/examples/main/prometheus/istio/configmap-istio-services-grafana-dashboards.yaml
-    ```
-
-   > **NOTE:** This setup collects all Istio metrics on a Pod level, which can lead to cardinality issues. Because  metrics are only needed on service level, for setups having a bigger amount of workloads deployed, it is recommended to use a setup based on federation as described in the [Istio documentation](https://istio.io/latest/docs/ops/best-practices/observability/#using-prometheus-for-production-scale-monitoring).
-
-### Verify the installation
-
-1. You should see several Pods coming up in the Namespace, especially Prometheus and Alertmanager. Assure that all Pods have the "Running" state.
-2. Browse the Prometheus dashboard and verify that all "Status->Targets" are healthy. The following command exposes the dashboard on `http://localhost:9090`:
-   ```bash
-   kubectl -n ${K8S_NAMESPACE} port-forward $(kubectl -n ${K8S_NAMESPACE} get service -l app=kube-prometheus-stack-prometheus -oname) 9090
-   ```
-3. Browse the Grafana dashboard and verify that the dashboards are showing data. The user `admin` is preconfigured in the Helm chart; the password was provided in your `helm install` command. The following command exposes the dashboard on `http://localhost:3000`:
-   ```bash
-   kubectl -n ${K8S_NAMESPACE} port-forward svc/${HELM_PROM_RELEASE}-grafana 3000:80
-   ```
-
-### Deploy a custom workload and scrape it
-
-Follow the tutorial [monitoring-custom-metrics](./monitoring-custom-metrics/), but use the steps above to verify that the metrics are collected.
-
-### Scrape workload via annotations
-
-Instead of defining a ServiceMonitor per workload for setting up custom metric scraping, you can use a simplified way based on annotations. The used [values.yaml](./values.yaml) defines an `additionalScrapeConfig`, which  scrapes all Pods and services that have the following annotations:
-
-```yaml
-prometheus.io/scrape: "true"   # mandatory to enable automatic scraping
-prometheus.io/scheme: https    # optional, default is "http" if no Istio sidecar is used. When using a sidecar (Pod has label `security.istio.io/tlsMode=istio`), the default is "https". Use "https" to scrape workloads using Istio client certificates.
-prometheus.io/port: "1234"     # optional, configure the port under which the metrics are exposed
-prometheus.io/path: /myMetrics # optional, configure the path under which the metrics are exposed
-```
-
-You can try it out by removing the ServiceMonitor from the previous example and instead providing the annotations to the Service manifest.
-
-### Set up alerting
-
-1. You can connect the Alertmanager to your notification channel (for instance, Slack or VictorOps) by providing an [Alertmanager configuration](https://prometheus.io/docs/alerting/latest/configuration/#configuration-file) to the `alertmanager.config` value.
-
-   The [alertmanager-values.yaml](./alertmanager-values.yaml) example provides a configuration that sends notifications for alerts with high severity to a Slack channel. To deploy it, download the file, adapt `<channel-name>`, `<api-url>` and `<cluster-domain>` to your environment, and run the Helm upgrade command to deploy the configuration:
-   ```bash
-    helm upgrade --install -n ${K8S_NAMESPACE} ${HELM_PROM_RELEASE} prometheus-community/kube-prometheus-stack -f https://raw.githubusercontent.com/kyma-project/examples/main/prometheus/values.yaml -f ./alertmanager-values.yaml --set grafana.adminPassword=myPwd
-   ```
-
-2. Follow the tutorial [monitoring-alert-rules](./monitoring-alert-rules/) to set up an alerting rule on Prometheus.
-
-### Set up Grafana dashboards
-
-Follow the tutorial [monitoring-grafana-dashboard](./monitoring-grafana-dashboard) to learn how to visualize your metrics in a Grafana dashboard.
-
-### Cleanup
-
-To remove the installation from the cluster, call Helm:
-
-```bash
-helm delete -n ${K8S_NAMESPACE} ${HELM_PROM_RELEASE}
-```
+4. [**Define alerting rules**](./monitoring-alert-rules/README.md), in which you define the `CPUTempHigh` alerting rule by creating a PrometheusRule resource. Prometheus accesses the `/metrics` endpoint every 10 seconds and validates the current value of the `cpu_temperature_celsius` metric. If the value is equal to or higher than 75 degrees Celsius, Prometheus waits for 10 seconds to recheck it. If the value still exceeds the threshold, Prometheus triggers the rule. You can observe both the rule and the alert it generates on the Prometheus dashboard.
diff --git a/prometheus/assets/monitoring-tutorials.svg b/prometheus/assets/monitoring-tutorials.svg
new file mode 100644
index 00000000..245b0737
--- /dev/null
+++ b/prometheus/assets/monitoring-tutorials.svg
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Do not edit this file with editors other than diagrams.net -->
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" width="1038px" height="161px" viewBox="-0.5 -0.5 1038 161" content="&lt;mxfile modified=&quot;2022-12-02T15:12:02.626Z&quot; host=&quot;Electron&quot; agent=&quot;5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) draw.io/20.3.0 Chrome/104.0.5112.114 Electron/20.1.3 Safari/537.36&quot; etag=&quot;PfidF6nbkdm-KIs0posg&quot; version=&quot;20.3.0&quot; type=&quot;device&quot;&gt;&lt;diagram id=&quot;JuD58Ek72PpHLqRJGzoY&quot; name=&quot;Page-1&quot;&gt;7Vhtc6IwEP41fLQjIGg/Ktb2Xudmejd+vAmwQFogNISq/fW3wQRRaOu99Op06gfBJ8sm++zuE4Nhe9n6kpMi+cJCSA1rGK4Ne25Ylnk+GuNFIpstMnAmCok5DZXVDrimD6DAoUIrGkK5ZygYSwUt9sGA5TkEYg8jnLPVvlnE0v1ZCxJDB7gOSNpFlzQUiV7dcIdfAY0TcTCQEW2rgDIhIVu1IPvCsD3OmNjeZWsPUkmepmX73OKR0WZdHHJx1AObG2eZLRaDzLFnXx/upsxzBsrLPUkrFa9arNhoAjir8hCkk6Fhz1YJFXBdkECOrjDniCUiS/GXibel4OwWPJYyjkjOcjSbRTRNNWRY9qL+IN6NQC8HuIB1C1IRXQLLQPANmmx0gWm+V+3k2KrGknZmHBUbURURN952rOGNIq6fxO/2cpo83PjXH1f+cuU5s8r5OXB7SHRTISNnGJmsTR28e1fJfGPodlR/2lCLdQ1KB4OyboopGphOsW4/4cbqWs/ma+BT5QPPQWDfYLh5iN9XkGaNHe882UGQCP9xqzkUKds0zrH3IhpXHGRzJvL7FlcwKLgkOIGqrGMjwe2T823J0vCjVWj+bhUi124wAT/qluLInJhz9wVL0dLYXiW6wxeqxPHz7dymSSuSbOyQlEnT5SSlcY73AdIByNVM0kBRFKdqIKNhKD1KH4X0nK1juQGcbSXX2l6lW8nO8ExyXG8KI+k+ZyJI1FQHGQkJTKLgyRz+Ta7UaLO7NLkbOZ3cuT2pG79U5sy+1L2qiHxrda9LMtliuV8WR2vEMcgcIppDrSOG3L4Ox3EXxrrLYxzkFUb1FuVDjU70nnVCcmJO3vXkqOSN3cPkvbqenJ+anuABISI5+Xfi4XEgohaPXu24JFUMvSOyNH1GuPzrIjbFm5YVxzk9WdEnondZeSZ5o8mpyYrVd2B8VVn5zDDfCStFcxrptnP7r8zwx4c/laDdqUeuH/g9xU63vN4pYV2wEpo1MV/a98vR9sQUVKVgmXxxgImiwVvWJGv8PzUJf+5estRjrVdV9sUv&lt;/diagram&gt;&lt;/mxfile&gt;"><defs/><g><rect x="0" y="0" width="1037" height="152" fill="#ffffff" stroke="none" pointer-events="all"/><rect x="0" y="0" width="200" height="160" rx="24" ry="24" fill="#4181d6" stroke="#6c8ebf" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 198px; height: 1px; padding-top: 80px; margin-left: 1px;"><div data-drawio-colors="color: rgb(0, 0, 0); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(0, 0, 0); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;"><font style="font-size: 15px" color="#ffffff"><b>Kubernetes and Helm<br /><br /></b><br />Deploy and configure the kube-prometheus stack<br /></font></div></div></div></foreignObject><text x="100" y="84" fill="rgb(0, 0, 0)" font-family="Helvetica" font-size="12px" text-anchor="middle">Kubernetes and Helm...</text></switch></g><path d="M 210 66 L 230 66 L 230 45 L 270 80 L 230 115 L 230 94 L 210 94 L 210 80 Z" fill="#dae8fc" stroke="#6c8ebf" stroke-miterlimit="10" pointer-events="all"/><rect x="837" y="0" width="200" height="160" rx="24" ry="24" fill="#4181d6" stroke="#6c8ebf" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 198px; height: 1px; padding-top: 80px; margin-left: 838px;"><div data-drawio-colors="color: rgb(0, 0, 0); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(0, 0, 0); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;"><font style="font-size: 15px" color="#ffffff"><b>Prometheus </b><br /><br /><br />Define an <br />alerting rule<br /></font></div></div></div></foreignObject><text x="937" y="84" fill="rgb(0, 0, 0)" font-family="Helvetica" font-size="12px" text-anchor="middle">Prometheus...</text></switch></g><path d="M 767 66 L 787 66 L 787 45 L 827 80 L 787 115 L 787 94 L 767 94 L 767 80 Z" fill="#dae8fc" stroke="#6c8ebf" stroke-miterlimit="10" pointer-events="all"/><rect x="557" y="0" width="200" height="160" rx="24" ry="24" fill="#4181d6" stroke="#6c8ebf" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 198px; height: 1px; padding-top: 80px; margin-left: 558px;"><div data-drawio-colors="color: rgb(0, 0, 0); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(0, 0, 0); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;"><font style="font-size: 15px" color="#ffffff"><b>Grafana</b><br /><br /><br />Create a <br />Gauge <br />dashboard type<br /></font></div></div></div></foreignObject><text x="657" y="84" fill="rgb(0, 0, 0)" font-family="Helvetica" font-size="12px" text-anchor="middle">Grafana...</text></switch></g><path d="M 487 66 L 507 66 L 507 45 L 547 80 L 507 115 L 507 94 L 487 94 L 487 80 Z" fill="#dae8fc" stroke="#6c8ebf" stroke-miterlimit="10" pointer-events="all"/><rect x="277" y="0" width="200" height="160" rx="24" ry="24" fill="#4181d6" stroke="#6c8ebf" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility" style="overflow: visible; text-align: left;"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 198px; height: 1px; padding-top: 80px; margin-left: 278px;"><div data-drawio-colors="color: rgb(0, 0, 0); " style="box-sizing: border-box; font-size: 0px; text-align: center;"><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: rgb(0, 0, 0); line-height: 1.2; pointer-events: all; white-space: normal; overflow-wrap: normal;"><font style="font-size: 15px" color="#ffffff"><b>Localhost and <br />Prometheus UI</b><br /><br />Deploy a service, <br />expose and observe <br />the custom metric<br /></font></div></div></div></foreignObject><text x="377" y="84" fill="rgb(0, 0, 0)" font-family="Helvetica" font-size="12px" text-anchor="middle">Localhost and...</text></switch></g></g><switch><g requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility"/><a transform="translate(0,-5)" xlink:href="https://www.diagrams.net/doc/faq/svg-export-text-problems" target="_blank"><text text-anchor="middle" font-size="10px" x="50%" y="100%">Text is not SVG - cannot display</text></a></switch></svg>
\ No newline at end of file
diff --git a/prometheus/prometheus.md b/prometheus/prometheus.md
new file mode 100644
index 00000000..26ce2eff
--- /dev/null
+++ b/prometheus/prometheus.md
@@ -0,0 +1,134 @@
+# Install a custom kube-prometheus-stack
+
+## Overview
+
+The Kyma monitoring stack brings limited configuration options in contrast to the upstream [`kube-prometheus-stack`](https://github.com/prometheus-community/helm-charts/blob/main/charts/kube-prometheus-stack) chart. Modifications might be reset at the next upgrade cycle.
+
+ This tutorial outlines how to set up a [`kube-prometheus-stack`](https://github.com/prometheus-community/helm-charts/blob/main/charts/kube-prometheus-stack), including a basic setup of Prometheus, Grafana and the Alertmanager.
+
+> **CAUTION:**
+- This tutorial describes a basic setup that you should not use in production. Typically, a production setup needs further configuration, like optimizing the amount of data to scrape and the required resource footprint of the installation. To achieve qualities like [high availability](https://prometheus.io/docs/introduction/faq/#can-prometheus-be-made-highly-available), [scalability](https://prometheus.io/docs/introduction/faq/#i-was-told-prometheus-doesnt-scale), or [durable long-term storage](https://prometheus.io/docs/operating/integrations/#remote-endpoints-and-storage), you need a more advanced setup.
+- This example uses the latest Grafana version, which is under AGPL-3.0 and might not be free of charge for commercial usage.
+
+## Prerequisites
+
+- Kyma as the target deployment environment.
+- Kubectl > 1.22.x
+- Helm 3.x
+
+## Installation
+
+### Preparation
+1. Export your Namespace as a variable. Replace the `{namespace}` placeholder in the following command and run it:
+
+    ```bash
+    export K8S_NAMESPACE="{namespace}"
+    ```
+1. If you haven't created the Namespace yet, now is the time to do so:
+    ```bash
+    kubectl create namespace $K8S_NAMESPACE
+    ```
+   >**Note**: This Namespace must have **no** Istio sidecar injection enabled; that is, there must be no `istio-injection` label present on the Namespace. The Helm chart deploys jobs that will not succeed when Isto sidecar injection is enabled.
+
+1. Export the Helm release name that you want to use. It can be any name, but be aware that all resources in the cluster will be prefixed with that name. Run the following command:
+    ```bash
+    export HELM_PROM_RELEASE="prometheus"
+    ```
+
+1. Update your Helm installation with the required Helm repository:
+
+    ```bash
+    helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
+    helm repo update
+    ```
+
+### Install the kube-prometheus-stack
+
+1. Run the Helm upgrade command, which installs the chart if it's not present yet. At the end of the command, change the Grafana admin password to some value of your choice.
+    ```bash
+    helm upgrade --install -n ${K8S_NAMESPACE} ${HELM_PROM_RELEASE} prometheus-community/kube-prometheus-stack -f https://raw.githubusercontent.com/kyma-project/examples/main/prometheus/values.yaml --set grafana.adminPassword=myPwd
+    ```
+
+2. You can use the [values.yaml](./values.yaml) provided with this tutorial, which contains customized settings deviating from the default settings, or create your own one.
+The provided `values.yaml` covers the following adjustments:
+- Client certificate injection to support scraping of workload secured with Istio strict mTLS
+- Active scraping of workload annotated with prometheus.io/scrape
+- Basic configuration of data persistence with retention
+- Basic resource limits for involved components
+
+### Activate scraping of Istio metrics & Grafana dashboards
+
+1. To configure Prometheus for scraping of the Istio-specific metrics from any istio-proxy running in the cluster, deploy a PodMonitor, which scrapes any Pod that has a port with name `.*-envoy-prom` exposed.
+
+    ```bash
+    kubectl -n ${K8S_NAMESPACE} apply -f https://raw.githubusercontent.com/kyma-project/examples/main/prometheus/istio/podmonitor-istio-proxy.yaml
+    ```
+
+2. Deploy a ServiceMonitor definition for the central metrics of the `istiod` deployment:
+
+    ```bash
+    kubectl -n ${K8S_NAMESPACE} apply -f https://raw.githubusercontent.com/kyma-project/examples/main/prometheus/istio/servicemonitor-istiod.yaml
+    ```
+
+3. Get the latest versions of the Istio-specific dashboards.
+   Grafana is configured to load dashboards dynamically from ConfigMaps in the cluster, so Istio-specific dashboards can be applied as well.
+   Either follow the [Istio quick start instructions](https://istio.io/latest/docs/ops/integrations/grafana/#option-1-quick-start), or take the prepared ones with the following command:
+
+    ```bash
+    kubectl -n ${K8S_NAMESPACE} apply -f https://raw.githubusercontent.com/kyma-project/examples/main/prometheus/istio/configmap-istio-grafana-dashboards.yaml
+    kubectl -n ${K8S_NAMESPACE} apply -f https://raw.githubusercontent.com/kyma-project/examples/main/prometheus/istio/configmap-istio-services-grafana-dashboards.yaml
+    ```
+
+   > **NOTE:** This setup collects all Istio metrics on a Pod level, which can lead to cardinality issues. Because  metrics are only needed on service level, for setups having a bigger amount of workloads deployed, it is recommended to use a setup based on federation as described in the [Istio documentation](https://istio.io/latest/docs/ops/best-practices/observability/#using-prometheus-for-production-scale-monitoring).
+
+### Verify the installation
+
+1. You should see several Pods coming up in the Namespace, especially Prometheus and Alertmanager. Assure that all Pods have the "Running" state.
+2. Browse the Prometheus dashboard and verify that all "Status->Targets" are healthy. The following command exposes the dashboard on `http://localhost:9090`:
+   ```bash
+   kubectl -n ${K8S_NAMESPACE} port-forward $(kubectl -n ${K8S_NAMESPACE} get service -l app=kube-prometheus-stack-prometheus -oname) 9090
+   ```
+3. Browse the Grafana dashboard and verify that the dashboards are showing data. The user `admin` is preconfigured in the Helm chart; the password was provided in your `helm install` command. The following command exposes the dashboard on `http://localhost:3000`:
+   ```bash
+   kubectl -n ${K8S_NAMESPACE} port-forward svc/${HELM_PROM_RELEASE}-grafana 3000:80
+   ```
+
+### Deploy a custom workload and scrape it
+
+Follow the tutorial [monitoring-custom-metrics](./monitoring-custom-metrics/), but use the steps above to verify that the metrics are collected.
+
+### Scrape workload via annotations
+
+Instead of defining a ServiceMonitor per workload for setting up custom metric scraping, you can use a simplified way based on annotations. The used [values.yaml](./values.yaml) defines an `additionalScrapeConfig`, which  scrapes all Pods and services that have the following annotations:
+
+```yaml
+prometheus.io/scrape: "true"   # mandatory to enable automatic scraping
+prometheus.io/scheme: https    # optional, default is "http" if no Istio sidecar is used. When using a sidecar (Pod has label `security.istio.io/tlsMode=istio`), the default is "https". Use "https" to scrape workloads using Istio client certificates.
+prometheus.io/port: "1234"     # optional, configure the port under which the metrics are exposed
+prometheus.io/path: /myMetrics # optional, configure the path under which the metrics are exposed
+```
+
+You can try it out by removing the ServiceMonitor from the previous example and instead providing the annotations to the Service manifest.
+
+### Set up alerting
+
+1. You can connect the Alertmanager to your notification channel (for instance, Slack or VictorOps) by providing an [Alertmanager configuration](https://prometheus.io/docs/alerting/latest/configuration/#configuration-file) to the `alertmanager.config` value.
+
+   The [alertmanager-values.yaml](./alertmanager-values.yaml) example provides a configuration that sends notifications for alerts with high severity to a Slack channel. To deploy it, download the file, adapt `<channel-name>`, `<api-url>` and `<cluster-domain>` to your environment, and run the Helm upgrade command to deploy the configuration:
+   ```bash
+    helm upgrade --install -n ${K8S_NAMESPACE} ${HELM_PROM_RELEASE} prometheus-community/kube-prometheus-stack -f https://raw.githubusercontent.com/kyma-project/examples/main/prometheus/values.yaml -f ./alertmanager-values.yaml --set grafana.adminPassword=myPwd
+   ```
+
+2. Follow the tutorial [monitoring-alert-rules](./monitoring-alert-rules/) to set up an alerting rule on Prometheus.
+
+### Set up Grafana dashboards
+
+Follow the tutorial [monitoring-grafana-dashboard](./monitoring-grafana-dashboard) to learn how to visualize your metrics in a Grafana dashboard.
+
+### Cleanup
+
+To remove the installation from the cluster, call Helm:
+
+```bash
+helm delete -n ${K8S_NAMESPACE} ${HELM_PROM_RELEASE}
+```

From 83698633cace10b4ba1dbd7ff525d314a8fa6a6c Mon Sep 17 00:00:00 2001
From: Andreas Thaler <andreas.thaler01@sap.com>
Date: Fri, 17 Nov 2023 15:31:03 +0100
Subject: [PATCH 3/3] Apply suggestions from code review
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Małgorzata Świeca <malgorzata.swieca@sap.com>
---
 prometheus/README.md   | 4 ++--
 prometheus/values.yaml | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/prometheus/README.md b/prometheus/README.md
index 4e871c74..c4a746b0 100644
--- a/prometheus/README.md
+++ b/prometheus/README.md
@@ -2,7 +2,7 @@
 
 ## Overview
 
-The Kyma telemetry module provides collection and integration into observability backends based on OpenTelemetry. OpenTelemetry is the new vendor-neutral player in the cloud-native observability domain and has a growing adoption. However, it still lacks many features and sometimes the typical `kube-prometheus-stack` based on Prometheus, Grafana and the surrounding helper tools is more appropriate.
+The Kyma Telemetry module provides collection and integration into observability backends based on OpenTelemetry. OpenTelemetry is the new vendor-neutral player in the cloud-native observability domain and has a growing adoption. However, it still lacks many features and sometimes the typical `kube-prometheus-stack` based on Prometheus, Grafana, and the surrounding helper tools is more appropriate.
 
 The following instructions describe the complete monitoring flow for your service running in Kyma. You get the gist of monitoring applications, such as Prometheus, Grafana, and Alertmanager. You learn how and where you can observe and visualize your service metrics to monitor them for any alerting values.
 
@@ -16,7 +16,7 @@ The instructions cover the following tasks:
 
 1. [**Deploy a custom Prometheus stack**](./prometheus.md), in which you deploy the [kube-prometheus-stack](https://github.com/prometheus-operator/kube-prometheus) from the upstream Helm chart.
 
-2. [**Observe application metrics**](./monitoring-custom-metrics/README.md), in which you redirect the `cpu_temperature_celsius` metric to the localhost and the Prometheus UI. You later observe how the metric value changes in the predefined 10 seconds interval in which Prometheus scrapes the metric values from the service's `/metrics` endpoint.
+2. [**Observe application metrics**](./monitoring-custom-metrics/README.md), in which you redirect the `cpu_temperature_celsius` metric to the localhost and the Prometheus UI. You later observe how the metric value changes in the predefined 10-second interval in which Prometheus scrapes the metric values from the service's `/metrics` endpoint.
 
 3. [**Create a Grafana dashboard**](./monitoring-grafana-dashboard/README.md), in which you create a Grafana dashboard of a Gauge type for the `cpu_temperature_celsius` metric. This dashboard shows explicitly when the CPU temperature is equal to or higher than the predefined threshold of 75 degrees Celsius, at which point the dashboard turns red.
 
diff --git a/prometheus/values.yaml b/prometheus/values.yaml
index 00d7b252..b00aa283 100644
--- a/prometheus/values.yaml
+++ b/prometheus/values.yaml
@@ -1,5 +1,5 @@
 
-####### This block configures resource limits for the tooling and disables scaping of them via annotation
+####### This block configures resource limits for the tooling and disables scaping of them via annotations
 
 prometheusOperator:
   # Define resource limits
@@ -30,7 +30,7 @@ kube-state-metrics:
       cpu: "500m"
       memory: "2Gi"
 
-####### This block disabled not needed features
+####### This block disables not needed features
 
 kubeEtcd:
   # Disable scraping of control plane component etcd as it is not reachable from the data plane