From eb1bac5e150b0f070300f8a3be93dff8c2bdd5c2 Mon Sep 17 00:00:00 2001 From: jianrongzhang89 Date: Wed, 11 Dec 2024 15:42:04 -0500 Subject: [PATCH] [issue 465]Create a documentation section to use Grafana DataSource with SonataFlow Prometheus metrics: address review comments --- serverlessworkflow/modules/ROOT/nav.adoc | 4 +- .../_common-content/sonataflow-metrics.adoc | 134 ++++++++++++++++++ .../modules/ROOT/pages/cloud/index.adoc | 8 ++ .../cloud/operator/monitoring-workflows.adoc | 6 +- .../cloud/operator/sonataflow-metrics.adoc | 101 +------------ .../expose-metrics-to-prometheus.adoc | 45 +----- 6 files changed, 152 insertions(+), 146 deletions(-) create mode 100644 serverlessworkflow/modules/ROOT/pages/_common-content/sonataflow-metrics.adoc diff --git a/serverlessworkflow/modules/ROOT/nav.adoc b/serverlessworkflow/modules/ROOT/nav.adoc index 48d8510d0..26cba2125 100644 --- a/serverlessworkflow/modules/ROOT/nav.adoc +++ b/serverlessworkflow/modules/ROOT/nav.adoc @@ -83,7 +83,9 @@ *** xref:cloud/operator/service-discovery.adoc[Service Discovery] *** xref:cloud/operator/using-persistence.adoc[Workflow Persistence] *** xref:cloud/operator/configuring-workflow-eventing-system.adoc[Workflow Eventing System] -*** xref:cloud/operator/monitoring-workflows.adoc[Workflow Monitoring] +*** Monitoring +**** xref:cloud/operator/monitoring-workflows.adoc[Workflow Monitoring] +**** xref:cloud/operator/sonataflow-metrics.adoc[Prometheus Metrics for Workflows] // *** xref:cloud/operator/configuring-knative-eventing-resources.adoc[Knative Eventing] *** xref:cloud/operator/known-issues.adoc[Roadmap and Known Issues] *** xref:cloud/operator/add-custom-ca-to-a-workflow-pod.adoc[Add Custom CA to Workflow Pod] diff --git a/serverlessworkflow/modules/ROOT/pages/_common-content/sonataflow-metrics.adoc b/serverlessworkflow/modules/ROOT/pages/_common-content/sonataflow-metrics.adoc new file mode 100644 index 000000000..54f100068 --- /dev/null +++ b/serverlessworkflow/modules/ROOT/pages/_common-content/sonataflow-metrics.adoc @@ -0,0 +1,134 @@ +== Overview + +In {product_name}, you can check the following metrics: + +* `kogito_process_instance_started_total`: Number of started workflows (a workflow that has started might be running or completed) +* `kogito_process_instance_running_total`: Number of running workflows +* `kogito_process_instance_completed_total`: Number of completed workflows +* `kogito_process_instance_error`: Number of workflows that report an error ( a workflow with an error might be still running or have been completed) +* `kogito_process_instance_duration_seconds`: Duration of a process instance in seconds +* `kogito_node_instance_duration_milliseconds`: Duration of relevant nodes in milliseconds (a workflow is composed by nodes, user might be interested on the time consumed by an specific node type) +* `sonataflow_input_parameters_counter`: Records input parameters, the occurrences of <"param_name","param_value"> per `processId`. + +[NOTE] +==== +Internally, workflows are referred as processes. Therefore, the `processId` and `processName` is workflow ID and name respectively. +==== + +Each of the metrics mentioned previously contains a label for a specific workflow ID. For example, the `kogito_process_instance_completed_total` metric below contains the labels for `callbackstatetimeouts` workflow: + +.Example `kogito_process_instance_completed_total` metric +[source,yaml] +---- +# HELP kogito_process_instance_completed_total Completed Process Instances +# TYPE kogito_process_instance_completed_total counter +kogito_process_instance_completed_total{app_id="sonataflow-process-monitoring-listener",artifactId="serverless-workflow-project",process_id="callbackstatetimeouts",process_state="Completed",version="1.0.0-SNAPSHOT",} 3.0 +---- + +[NOTE] +==== +Internally, {product_name} uses Quarkus Micrometer extension, which also exposes built-in metrics. You can disable the Micrometer metrics in {product_name}. For more information, see link:{quarkus_micrometer_url}[Quarkus - Micrometer Metrics]. +==== + +== Metrics Description +=== kogito_process_instance_completed_total +Workflow instances that have reached a terminal status, “Aborted” or “Completed”, and thus are considered as completed. + +[NOTE] +==== +These are the only two terminal status. The “Error” state is not terminal. +Additionally, the metric has the process_state="Completed", or could be "Aborted", to register exactly which of the two terminal status were reached. +==== + +[source, yaml] +---- +# HELP kogito_process_instance_completed_total Completed Process Instances +# TYPE kogito_process_instance_completed_total counter +kogito_process_instance_completed_total{app_id="sonataflow-process-monitoring-listener",artifactId="serverless-workflow-project",process_id="callbackstatetimeouts",process_state="Completed",version="1.0.0-SNAPSHOT",} 3.0 +---- + +=== kogito_process_instance_started_total +Count the number of started workflow instances. + +[source, yaml] +---- +# HELP kogito_process_instance_started_total Started Process Instances +# TYPE kogito_process_instance_started_total counter +kogito_process_instance_started_total{app_id="sonataflow-process-monitoring-listener",artifactId="serverless-workflow-project",process_id="callbackstatetimeouts",version="1.0.0-SNAPSHOT",} 7.0 +---- + +=== kogito_process_instance_running_total +Records the number of running workflow instances. + +[NOTE] +==== +This includes workflow instances that are in the "Error" state, since the error state is not a terminal state. +Process instances that have reached a terminal status, i.e. "Completed" or "Aborted", are not present in this metric. +==== + +[source, yaml] +---- +# HELP kogito_process_instance_running_total Running Process Instances +# TYPE kogito_process_instance_running_total gauge +kogito_process_instance_running_total{app_id="sonataflow-process-monitoring-listener",artifactId="serverless-workflow-project",process_id="callbackstatetimeouts",version="1.0.0-SNAPSHOT",} 4.0 +---- + +=== kogito_process_instance_duration_seconds +Calculates duration of a workflow instance that has reached a terminal state,, i.e. "Aborted" or "Completed". This metric is registered when the process reaches the terminal state. + +[source, yaml] +---- +# HELP kogito_process_instance_duration_seconds_max Process Instances Duration +# TYPE kogito_process_instance_duration_seconds_max gauge + kogito_process_instance_duration_seconds_max{app_id="sonataflow-process-monitoring-listener",artifactId="serverless-workflow-project",process_id="callbackstatetimeouts",version="1.0.0-SNAPSHOT",} 30.0 + + +# HELP kogito_process_instance_duration_seconds Process Instances Duration +# TYPE kogito_process_instance_duration_seconds summary + kogito_process_instance_duration_seconds_count{app_id="sonataflow-process-monitoring-listener",artifactId="serverless-workflow-project",process_id="callbackstatetimeouts",version="1.0.0-SNAPSHOT",} 3.0 + kogito_process_instance_duration_seconds_sum{app_id="sonataflow-process-monitoring-listener",artifactId="serverless-workflow-project",process_id="callbackstatetimeouts",version="1.0.0-SNAPSHOT",} 90.0 +---- + +=== kogito_node_instance_duration_milliseconds +Records the duration of the execution for nodes “relevant” to the workflows. The metric is calculated when a given node has finished executing. + +[source, yaml] +---- +# HELP kogito_node_instance_duration_milliseconds_max Relevant nodes duration in milliseconds +# TYPE kogito_node_instance_duration_milliseconds_max gauge +kogito_node_instance_duration_milliseconds_max{artifactId="serverless-workflow-project",node_name="CallbackState",process_id="callbackstatetimeouts",version="1.0.0-SNAPSHOT",} 30014.0 + + +# HELP kogito_node_instance_duration_milliseconds Relevant nodes duration in milliseconds +# TYPE kogito_node_instance_duration_milliseconds summary +kogito_node_instance_duration_milliseconds_count{artifactId="serverless-workflow-project",node_name="CallbackState",process_id="callbackstatetimeouts",version="1.0.0-SNAPSHOT",} 3.0 +kogito_node_instance_duration_milliseconds_sum{artifactId="serverless-workflow-project",node_name="CallbackState",process_id="callbackstatetimeouts",version="1.0.0-SNAPSHOT",} 90128.0 +---- + +=== kogito_process_instance_error +Records the number of errors that have occurred per processId and error, including the error message. + +[source, yaml] +---- +# HELP kogito_process_instance_error Number of errors that has occurred +# TYPE kogito_process_instance_error counter +---- + +=== sonataflow_input_parameters_counter_total + +Records the occurrences of <"param_name", "param_value"> per processId. + +[NOTE] +==== +Parameters that are json values, or arrays are flattened. +==== + +[source, yaml] +---- +# HELP sonataflow_input_parameters_counter_total Input parameters +# TYPE sonataflow_input_parameters_counter_total counter +sonataflow_input_parameters_counter_total{app_id="sonataflow-process-monitoring-listener",artifactId="serverless-workflow-project",param_name="name",param_value="walter",process_id="callbackstatetimeouts",version="1.0.0-SNAPSHOT",} 1.0 +sonataflow_input_parameters_counter_total{app_id="sonataflow-process-monitoring-listener",artifactId="serverless-workflow-project",param_name="surname.sur1",param_value="Medvedeo",process_id="callbackstatetimeouts",version="1.0.0-SNAPSHOT",} 1.0 +sonataflow_input_parameters_counter_total{app_id="sonataflow-process-monitoring-listener",artifactId="serverless-workflow-project",param_name="name",param_value="bob",process_id="callbackstatetimeouts",version="1.0.0-SNAPSHOT",} 5.0 +sonataflow_input_parameters_counter_total{app_id="sonataflow-process-monitoring-listener",artifactId="serverless-workflow-project",param_name="surname",param_value="esponja",process_id="callbackstatetimeouts",version="1.0.0-SNAPSHOT",} 5.0 +---- diff --git a/serverlessworkflow/modules/ROOT/pages/cloud/index.adoc b/serverlessworkflow/modules/ROOT/pages/cloud/index.adoc index f1752ecc9..590ea2e8b 100644 --- a/serverlessworkflow/modules/ROOT/pages/cloud/index.adoc +++ b/serverlessworkflow/modules/ROOT/pages/cloud/index.adoc @@ -136,6 +136,14 @@ xref:cloud/operator/monitoring-workflows.adoc[] Learn how to configure Prometheus, Grafana and Grafana Dashboard for monitoring of workflow instances -- +[.card] +-- +[.card-title] +xref:cloud/operator/monitoring-workflows.adoc[] +[.card-description] +Learn Prometheus metrics for workflow monitoring +-- + [.card] -- [.card-title] diff --git a/serverlessworkflow/modules/ROOT/pages/cloud/operator/monitoring-workflows.adoc b/serverlessworkflow/modules/ROOT/pages/cloud/operator/monitoring-workflows.adoc index 142566de2..3d0a2ab81 100644 --- a/serverlessworkflow/modules/ROOT/pages/cloud/operator/monitoring-workflows.adoc +++ b/serverlessworkflow/modules/ROOT/pages/cloud/operator/monitoring-workflows.adoc @@ -322,11 +322,11 @@ Click `+` -> `Import dashboard`, copy the json model data for xref::cloud/operat image::cloud/operator/monitoring/grafana-dashboard-example.png[] === Customize or build your own dashboard -You can customize or build your own dashboard. For more information, see xref:https://grafana.com/docs/grafana/latest/dashboards[Grafana Dashboards] and xref:cloud/operator/sonataflow-metrics.adoc[SonataFlow Metrics]. +You can customize or build your own dashboard. For more information, see link:https://grafana.com/docs/grafana/latest/dashboards[Grafana Dashboards] and xref:cloud/operator/sonataflow-metrics.adoc[Prometheus Metrics for Workflows]. == Additional resources -* xref:cloud/operator/sonataflow-metrics.adoc[SonataFlow Metrics] -* xref:https://grafana.com/docs/grafana/latest/dashboards[Grafana Dashboards] +* xref:cloud/operator/sonataflow-metrics.adoc[Prometheus Metrics for Workflows] +* link:https://grafana.com/docs/grafana/latest/dashboards[Grafana Dashboards] include::../../../pages/_common-content/report-issue.adoc[] \ No newline at end of file diff --git a/serverlessworkflow/modules/ROOT/pages/cloud/operator/sonataflow-metrics.adoc b/serverlessworkflow/modules/ROOT/pages/cloud/operator/sonataflow-metrics.adoc index 25baa3f55..01c5eedd4 100644 --- a/serverlessworkflow/modules/ROOT/pages/cloud/operator/sonataflow-metrics.adoc +++ b/serverlessworkflow/modules/ROOT/pages/cloud/operator/sonataflow-metrics.adoc @@ -1,99 +1,4 @@ -= SonataFlow Metrics += Prometheus Metrics in {product_name} -== kogito_process_instance_completed_total -Workflow instances that have reached a terminal status, “Aborted” or “Completed”, and thus are -considered as completed. - -Note: These are the only two terminal status. The “Error” state is not terminal. - - -Additionally, the metric has the process_state="Completed", or could be "Aborted", to register exactly which of the two terminal status were reached. - -[source, yaml] ----- -# HELP kogito_process_instance_completed_total Completed Process Instances -# TYPE kogito_process_instance_completed_total counter -kogito_process_instance_completed_total{app_id="sonataflow-process-monitoring-listener",artifactId="serverless-workflow-project",process_id="callbackstatetimeouts",process_state="Completed",version="1.0.0-SNAPSHOT",} 3.0 ----- - -== kogito_process_instance_started_total -Count the number of started workflow instances. - -[source, yaml] ----- -# HELP kogito_process_instance_started_total Started Process Instances -# TYPE kogito_process_instance_started_total counter -kogito_process_instance_started_total{app_id="sonataflow-process-monitoring-listener",artifactId="serverless-workflow-project",process_id="callbackstatetimeouts",version="1.0.0-SNAPSHOT",} 7.0 ----- - -== kogito_process_instance_running_total -Records the number of running workflow instances. - -Note: This includes workflow instances that are in the "Error" state, since the error state is not a terminal state. - - -Only the process instances that has reached a terminal status, i.e. "Completed" or "Aborted", are not present in this metric. - -[source, yaml] ----- -# HELP kogito_process_instance_running_total Running Process Instances -# TYPE kogito_process_instance_running_total gauge -kogito_process_instance_running_total{app_id="sonataflow-process-monitoring-listener",artifactId="serverless-workflow-project",process_id="callbackstatetimeouts",version="1.0.0-SNAPSHOT",} 4.0 ----- - -== kogito_process_instance_duration_seconds -Calculates duration of a workflow instance that has reached a terminal state,, i.e. "Aborted" or "Completed". This metric is registered when the process reaches the terminal state. - -[source, yaml] ----- -# HELP kogito_process_instance_duration_seconds_max Process Instances Duration -# TYPE kogito_process_instance_duration_seconds_max gauge - kogito_process_instance_duration_seconds_max{app_id="sonataflow-process-monitoring-listener",artifactId="serverless-workflow-project",process_id="callbackstatetimeouts",version="1.0.0-SNAPSHOT",} 30.0 - - -# HELP kogito_process_instance_duration_seconds Process Instances Duration -# TYPE kogito_process_instance_duration_seconds summary - kogito_process_instance_duration_seconds_count{app_id="sonataflow-process-monitoring-listener",artifactId="serverless-workflow-project",process_id="callbackstatetimeouts",version="1.0.0-SNAPSHOT",} 3.0 - kogito_process_instance_duration_seconds_sum{app_id="sonataflow-process-monitoring-listener",artifactId="serverless-workflow-project",process_id="callbackstatetimeouts",version="1.0.0-SNAPSHOT",} 90.0 ----- - -== kogito_node_instance_duration_milliseconds -Records the duration of the execution for nodes “relevant” to the workflows. The metric is calculated when a given node has finished executing. - -[source, yaml] ----- -# HELP kogito_node_instance_duration_milliseconds_max Relevant nodes duration in milliseconds -# TYPE kogito_node_instance_duration_milliseconds_max gauge -kogito_node_instance_duration_milliseconds_max{artifactId="serverless-workflow-project",node_name="CallbackState",process_id="callbackstatetimeouts",version="1.0.0-SNAPSHOT",} 30014.0 - - -# HELP kogito_node_instance_duration_milliseconds Relevant nodes duration in milliseconds -# TYPE kogito_node_instance_duration_milliseconds summary -kogito_node_instance_duration_milliseconds_count{artifactId="serverless-workflow-project",node_name="CallbackState",process_id="callbackstatetimeouts",version="1.0.0-SNAPSHOT",} 3.0 -kogito_node_instance_duration_milliseconds_sum{artifactId="serverless-workflow-project",node_name="CallbackState",process_id="callbackstatetimeouts",version="1.0.0-SNAPSHOT",} 90128.0 ----- - -== kogito_process_instance_error -Records the number of errors that have occurred per processId and error, including the error message. - -[source, yaml] ----- -# HELP kogito_process_instance_error Number of errors that has occurred -# TYPE kogito_process_instance_error counter ----- - -== sonataflow_input_parameters_counter_total - -Records the occurrences of <"param_name", "param_value"> per processId. - -Note: parameters that are json values, or arrays are flattened. - -[source, yaml] ----- -# HELP sonataflow_input_parameters_counter_total Input parameters -# TYPE sonataflow_input_parameters_counter_total counter -sonataflow_input_parameters_counter_total{app_id="sonataflow-process-monitoring-listener",artifactId="serverless-workflow-project",param_name="name",param_value="walter",process_id="callbackstatetimeouts",version="1.0.0-SNAPSHOT",} 1.0 -sonataflow_input_parameters_counter_total{app_id="sonataflow-process-monitoring-listener",artifactId="serverless-workflow-project",param_name="surname.sur1",param_value="Medvedeo",process_id="callbackstatetimeouts",version="1.0.0-SNAPSHOT",} 1.0 -sonataflow_input_parameters_counter_total{app_id="sonataflow-process-monitoring-listener",artifactId="serverless-workflow-project",param_name="name",param_value="bob",process_id="callbackstatetimeouts",version="1.0.0-SNAPSHOT",} 5.0 -sonataflow_input_parameters_counter_total{app_id="sonataflow-process-monitoring-listener",artifactId="serverless-workflow-project",param_name="surname",param_value="esponja",process_id="callbackstatetimeouts",version="1.0.0-SNAPSHOT",} 5.0 ----- +include::../../../pages/_common-content/sonataflow-metrics.adoc[] +include::../../../pages/_common-content/report-issue.adoc[] \ No newline at end of file diff --git a/serverlessworkflow/modules/ROOT/pages/use-cases/advanced-developer-use-cases/integrations/expose-metrics-to-prometheus.adoc b/serverlessworkflow/modules/ROOT/pages/use-cases/advanced-developer-use-cases/integrations/expose-metrics-to-prometheus.adoc index ee6ce25d5..b715e0f2a 100644 --- a/serverlessworkflow/modules/ROOT/pages/use-cases/advanced-developer-use-cases/integrations/expose-metrics-to-prometheus.adoc +++ b/serverlessworkflow/modules/ROOT/pages/use-cases/advanced-developer-use-cases/integrations/expose-metrics-to-prometheus.adoc @@ -147,48 +147,5 @@ For more information about configuring Prometheus, see link:{prometheus_operator [[ref-sw-example-metrics]] == Metrics in {product_name} -In {product_name}, you can check the following metrics: - -* `kogito_process_instance_started_total`: Number of started workflows (a workflow that has started might be running or completed) -* `kogito_process_instance_running_total`: Number of running workflows -* `kogito_process_instance_completed_total`: Number of completed workflows -* `kogito_process_instance_error`: Number of workflows that report an error ( a workflow with an error might be still running or have been completed) -* `kogito_process_instance_duration_seconds`: Duration of a process instance in seconds -* `kogito_node_instance_duration_milliseconds`: Duration of relevant nodes in milliseconds (a workflow is composed by nodes, user might be interested on the time consumed by an specific node type) - -Optionally, if you include dependency - -[source,xml] ----- - - org.kie - kie-addons-quarkus-monitoring-sonataflow - ----- - -metric `sonataflow_input_parameters_counter`, which tracks input parameter, will be available. This metric is optional because it counts every workflow input parameter key value combination, which might be space consuming. - - -[NOTE] -==== -Internally, workflows are referred as processes. Therefore, the `processId` and `processName` is workflow ID and name respectively. -==== - -Each of the metrics mentioned previously contains a label for a specific workflow ID. For example, the `kogito_process_instance_completed_total` metric contains labels for `jsongreet`, `yamlgreet`, and `foreach` workflows: - -.Example `kogito_process_instance_completed_total` metric -[source,yaml] ----- -# HELP kogito_process_instance_completed_total Completed Process Instances -# TYPE kogito_process_instance_completed_total counter -kogito_process_instance_completed_total{app_id="default-process-monitoring-listener",artifactId="kogito-serverless-workflow-demo",node_name="2",process_id="jsongreet",version="1.0.0-SNAPSHOT",} 154.0 -kogito_process_instance_completed_total{app_id="default-process-monitoring-listener",artifactId="kogito-serverless-workflow-demo",node_name="2",process_id="yamlgreet",version="1.0.0-SNAPSHOT",} 218.0 -kogito_process_instance_completed_total{app_id="default-process-monitoring-listener",artifactId="kogito-serverless-workflow-demo",node_name="2",process_id="foreach",version="1.0.0-SNAPSHOT",} 162.0 ----- - -[NOTE] -==== -Internally, {product_name} uses Quarkus Micrometer extension, which also exposes built-in metrics. You can disable the Micrometer metrics in {product_name}. For more information, see link:{quarkus_micrometer_url}[Quarkus - Micrometer Metrics]. -==== - +include::../../../../pages/_common-content/sonataflow-metrics.adoc[] include::../../../../pages/_common-content/report-issue.adoc[] \ No newline at end of file