From c169f5e02a76ac74e25928f0f92929921d450b09 Mon Sep 17 00:00:00 2001 From: skudasov Date: Wed, 20 Mar 2024 17:07:04 +0100 Subject: [PATCH 1/3] change dashboard module path --- .../chainlink-cluster/dashboard/cmd/deploy.go | 6 +- charts/chainlink-cluster/go.mod | 6 +- dashboard/README.md | 24 - dashboard/go.mod | 22 - dashboard/go.sum | 37 - .../lib/ccip-load-test-view/component.go | 497 ----- dashboard/lib/config.go | 70 - dashboard/lib/core-don/component.go | 1806 ----------------- dashboard/lib/core-don/platform.go | 45 - dashboard/lib/core-ocrv2-ccip/component.go | 83 - dashboard/lib/dashboard.go | 90 - dashboard/lib/k8s-pods/component.go | 198 -- dashboard/lib/log.go | 28 - 13 files changed, 7 insertions(+), 2905 deletions(-) delete mode 100644 dashboard/README.md delete mode 100644 dashboard/go.mod delete mode 100644 dashboard/go.sum delete mode 100644 dashboard/lib/ccip-load-test-view/component.go delete mode 100644 dashboard/lib/config.go delete mode 100644 dashboard/lib/core-don/component.go delete mode 100644 dashboard/lib/core-don/platform.go delete mode 100644 dashboard/lib/core-ocrv2-ccip/component.go delete mode 100644 dashboard/lib/dashboard.go delete mode 100644 dashboard/lib/k8s-pods/component.go delete mode 100644 dashboard/lib/log.go diff --git a/charts/chainlink-cluster/dashboard/cmd/deploy.go b/charts/chainlink-cluster/dashboard/cmd/deploy.go index ed901ea878b..883c1939a6b 100644 --- a/charts/chainlink-cluster/dashboard/cmd/deploy.go +++ b/charts/chainlink-cluster/dashboard/cmd/deploy.go @@ -2,9 +2,9 @@ package main import ( "github.com/K-Phoen/grabana/dashboard" - lib "github.com/smartcontractkit/chainlink/dashboard-lib/lib" - core_don "github.com/smartcontractkit/chainlink/dashboard-lib/lib/core-don" - k8spods "github.com/smartcontractkit/chainlink/dashboard-lib/lib/k8s-pods" + lib "github.com/smartcontractkit/chainlink/dashboard-lib" + core_don "github.com/smartcontractkit/chainlink/dashboard-lib/core-don" + k8spods "github.com/smartcontractkit/chainlink/dashboard-lib/k8s-pods" waspdb "github.com/smartcontractkit/wasp/dashboard" ) diff --git a/charts/chainlink-cluster/go.mod b/charts/chainlink-cluster/go.mod index 9caa817122c..a09db2045e6 100644 --- a/charts/chainlink-cluster/go.mod +++ b/charts/chainlink-cluster/go.mod @@ -4,7 +4,7 @@ go 1.21.7 require ( github.com/K-Phoen/grabana v0.22.1 - github.com/smartcontractkit/chainlink/dashboard-lib v0.22.1 + github.com/smartcontractkit/chainlink/dashboard-lib v0.0.0-00010101000000-000000000000 github.com/smartcontractkit/wasp v0.4.6 ) @@ -20,6 +20,8 @@ require ( golang.org/x/sys v0.15.0 // indirect ) +replace github.com/grafana/grafana-foundation-sdk/go => github.com/grafana/grafana-foundation-sdk/go v0.0.0-20240314112857-a7c9c6d0044c + replace ( github.com/go-kit/log => github.com/go-kit/log v0.2.1 @@ -33,5 +35,5 @@ replace ( github.com/mwitkow/grpc-proxy => github.com/smartcontractkit/grpc-proxy v0.0.0-20230731113816-f1be6620749f github.com/sercand/kuberesolver/v4 => github.com/sercand/kuberesolver/v5 v5.1.1 - github.com/smartcontractkit/chainlink/dashboard-lib => ../../dashboard + github.com/smartcontractkit/chainlink/dashboard-lib => ../../dashboard-lib ) diff --git a/dashboard/README.md b/dashboard/README.md deleted file mode 100644 index b77d68df73d..00000000000 --- a/dashboard/README.md +++ /dev/null @@ -1,24 +0,0 @@ -# Chainlink Grafana Dashboards Library - -This library offers dashboard components and tools for constructing and testing Grafana dashboards at Chainlink. - -Components structure is as follows: -``` -dashboard - |- lib - |- component_1 - |- component.go - |- component_2 - |- component.go -|- go.mod -|- go.sum -``` - -Each component should contain rows, logic and unique variables in `component.go` - -Components should be imported from this module, see [example](../charts/chainlink-cluster/dashboard/cmd/deploy.go) - -## How to convert from JSON using Grabana codegen utility -1. Download Grabana binary [here](https://github.com/K-Phoen/grabana/releases) -2. ./bin/grabana convert-go -i dashboard.json > lib/my_new_component/rows.go -3. Create a [component](lib/k8s-pods/component.go) \ No newline at end of file diff --git a/dashboard/go.mod b/dashboard/go.mod deleted file mode 100644 index eef60129771..00000000000 --- a/dashboard/go.mod +++ /dev/null @@ -1,22 +0,0 @@ -module github.com/smartcontractkit/chainlink/dashboard-lib - -go 1.21.7 - -require ( - github.com/K-Phoen/grabana v0.22.1 - github.com/grafana/grafana-foundation-sdk/go v0.0.0-00010101000000-000000000000 - github.com/pkg/errors v0.9.1 - github.com/rs/zerolog v1.32.0 -) - -replace github.com/grafana/grafana-foundation-sdk/go => github.com/grafana/grafana-foundation-sdk/go v0.0.0-20240314112857-a7c9c6d0044c - -require ( - github.com/K-Phoen/sdk v0.12.4 // indirect - github.com/gosimple/slug v1.13.1 // indirect - github.com/gosimple/unidecode v1.0.1 // indirect - github.com/mattn/go-colorable v0.1.13 // indirect - github.com/mattn/go-isatty v0.0.19 // indirect - github.com/prometheus/common v0.45.0 // indirect - golang.org/x/sys v0.13.0 // indirect -) diff --git a/dashboard/go.sum b/dashboard/go.sum deleted file mode 100644 index 0af3f10f4fe..00000000000 --- a/dashboard/go.sum +++ /dev/null @@ -1,37 +0,0 @@ -github.com/K-Phoen/grabana v0.22.1 h1:b/O+C3H2H6VNYSeMCYUO4X4wYuwFXgBcRkvYa+fjpQA= -github.com/K-Phoen/grabana v0.22.1/go.mod h1:3LTXrTzQzTKTgvKSXdRjlsJbizSOW/V23Q3iX00R5bU= -github.com/K-Phoen/sdk v0.12.4 h1:j2EYuBJm3zDTD0fGKACVFWxAXtkR0q5QzfVqxmHSeGQ= -github.com/K-Phoen/sdk v0.12.4/go.mod h1:qmM0wO23CtoDux528MXPpYvS4XkRWkWX6rvX9Za8EVU= -github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= -github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= -github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= -github.com/gosimple/slug v1.13.1 h1:bQ+kpX9Qa6tHRaK+fZR0A0M2Kd7Pa5eHPPsb1JpHD+Q= -github.com/gosimple/slug v1.13.1/go.mod h1:UiRaFH+GEilHstLUmcBgWcI42viBN7mAb818JrYOeFQ= -github.com/gosimple/unidecode v1.0.1 h1:hZzFTMMqSswvf0LBJZCZgThIZrpDHFXux9KeGmn6T/o= -github.com/gosimple/unidecode v1.0.1/go.mod h1:CP0Cr1Y1kogOtx0bJblKzsVWrqYaqfNOnHzpgWw4Awc= -github.com/grafana/grafana-foundation-sdk/go v0.0.0-20240314112857-a7c9c6d0044c h1:0vdGmlvHPzjNHx9Tx8soQEKe1ci0WVtA82s00sZDYUs= -github.com/grafana/grafana-foundation-sdk/go v0.0.0-20240314112857-a7c9c6d0044c/go.mod h1:WtWosval1KCZP9BGa42b8aVoJmVXSg0EvQXi9LDSVZQ= -github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= -github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= -github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= -github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA= -github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= -github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= -github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= -github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/prometheus/common v0.45.0 h1:2BGz0eBc2hdMDLnO/8n0jeB3oPrt2D08CekT0lneoxM= -github.com/prometheus/common v0.45.0/go.mod h1:YJmSTw9BoKxJplESWWxlbyttQR4uaEcGyv9MZjVOJsY= -github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg= -github.com/rs/zerolog v1.32.0 h1:keLypqrlIjaFsbmJOBdB/qvyF8KEtCWHwobLp5l/mQ0= -github.com/rs/zerolog v1.32.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss= -github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= -github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= -golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE= -golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= -gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/dashboard/lib/ccip-load-test-view/component.go b/dashboard/lib/ccip-load-test-view/component.go deleted file mode 100644 index 9f58438410e..00000000000 --- a/dashboard/lib/ccip-load-test-view/component.go +++ /dev/null @@ -1,497 +0,0 @@ -package ccip_load_test_view - -import ( - "encoding/json" - "fmt" - "github.com/K-Phoen/grabana/dashboard" - "github.com/K-Phoen/grabana/logs" - "github.com/K-Phoen/grabana/row" - "github.com/K-Phoen/grabana/stat" - "github.com/K-Phoen/grabana/target/loki" - "github.com/K-Phoen/grabana/target/prometheus" - "github.com/K-Phoen/grabana/timeseries" - "github.com/K-Phoen/grabana/timeseries/axis" - "github.com/K-Phoen/grabana/variable/query" - cLoki "github.com/grafana/grafana-foundation-sdk/go/loki" - cXYChart "github.com/grafana/grafana-foundation-sdk/go/xychart" -) - -type Props struct { - LokiDataSource string -} - -func vars(p Props) []dashboard.Option { - return []dashboard.Option{ - dashboard.VariableAsQuery( - "Test Run Name", - query.DataSource(p.LokiDataSource), - query.Multiple(), - query.IncludeAll(), - query.Request("label_values(namespace)"), - ), - dashboard.VariableAsQuery( - "cluster", - query.DataSource(p.LokiDataSource), - query.Multiple(), - query.IncludeAll(), - query.Request("label_values(cluster)"), - ), - dashboard.VariableAsQuery( - "test_group", - query.DataSource(p.LokiDataSource), - query.Multiple(), - query.IncludeAll(), - query.Request("label_values(test_group)"), - ), - dashboard.VariableAsQuery( - "test_id", - query.DataSource(p.LokiDataSource), - query.Multiple(), - query.IncludeAll(), - query.Request("label_values(test_id)"), - ), - dashboard.VariableAsQuery( - "source_chain", - query.DataSource(p.LokiDataSource), - query.Multiple(), - query.IncludeAll(), - query.Request("label_values(source_chain)"), - ), - dashboard.VariableAsQuery( - "dest_chain", - query.DataSource(p.LokiDataSource), - query.Multiple(), - query.IncludeAll(), - query.Request("label_values(dest_chain)"), - ), - dashboard.VariableAsQuery( - "geth_node", - query.DataSource(p.LokiDataSource), - query.Multiple(), - query.IncludeAll(), - query.Request("label_values(geth_node)"), - ), - dashboard.VariableAsQuery( - "remote_runner", - query.DataSource(p.LokiDataSource), - query.Multiple(), - query.IncludeAll(), - query.Request("namespace"), - ), - } -} - -func XYChartSeqNum() map[string]interface{} { - // TODO: https://github.com/grafana/grafana-foundation-sdk/tree/v10.4.x%2Bcog-v0.0.x/go has a lot of useful components - // TODO: need to change upload API and use combined upload in lib/dashboard.go - xAxisName := "seq_num" - builder := cXYChart.NewPanelBuilder(). - Title("XYChart"). - Dims(cXYChart.XYDimensionConfig{ - X: &xAxisName, - }). - WithTarget( - cLoki.NewDataqueryBuilder(). - Expr(`{namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_Commit_ReportAccepted_duration!= "" | data_Commit_ReportAccepted_success="✅"`). - LegendFormat("Commit Report Accepted"), - ) - sampleDashboard, err := builder.Build() - if err != nil { - panic(err) - } - dashboardJson, err := json.MarshalIndent(sampleDashboard, "", " ") - if err != nil { - panic(err) - } - var data map[string]interface{} - if err := json.Unmarshal(dashboardJson, &data); err != nil { - panic(err) - } - fmt.Println(string(dashboardJson)) - return data -} - -func statsRow(p Props) []dashboard.Option { - return []dashboard.Option{ - dashboard.Row( - "CCIP Duration Stats", - row.Collapse(), - row.WithTimeSeries( - "Sequence numbers", - timeseries.Transparent(), - timeseries.Description("Sequence Numbers triggered by Test"), - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.LokiDataSource), - timeseries.WithLokiTarget( - `min_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", source_chain="${source_chain}", dest_chain="${dest_chain}"} | json | data_CCIPSendRequested_success="✅" | unwrap data_CCIPSendRequested_seq_num [$__range]) by (test_id)`, - loki.Legend("Starts"), - ), - timeseries.WithLokiTarget( - `max_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}", source_chain="${source_chain}", dest_chain="${dest_chain}"} | json | data_CCIPSendRequested_success="✅" | unwrap data_CCIPSendRequested_seq_num [$__range]) by (test_id)`, - loki.Legend("Ends"), - ), - ), - row.WithTimeSeries( - "Source Router Fees ( /1e18)", - timeseries.Transparent(), - timeseries.Description("Router.GetFee"), - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.LokiDataSource), - timeseries.WithLokiTarget( - `avg_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_CCIP_Send_Transaction_success="✅"| unwrap data_CCIP_Send_Transaction_ccip_send_data_fee [$__range]) by (test_id) /1e18`, - loki.Legend("Avg"), - ), - timeseries.WithLokiTarget( - `min_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_CCIP_Send_Transaction_success="✅"| unwrap data_CCIP_Send_Transaction_ccip_send_data_fee [$__range]) by (test_id) /1e18`, - loki.Legend("Min"), - ), - timeseries.WithLokiTarget( - `max_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_CCIP_Send_Transaction_success="✅"| unwrap data_CCIP_Send_Transaction_ccip_send_data_fee [$__range]) by (test_id) /1e18 `, - loki.Legend("Max"), - ), - ), - row.WithTimeSeries( - "Commit Duration Summary", - timeseries.Transparent(), - timeseries.Description(""), - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.LokiDataSource), - timeseries.Axis( - axis.Unit("seconds"), - ), - timeseries.WithLokiTarget( - `avg_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_Commit_ReportAccepted_success="✅"| unwrap data_Commit_ReportAccepted_duration [$__range]) by (data_Commit_ReportAccepted_seqNum)`, - loki.Legend("Avg"), - ), - timeseries.WithLokiTarget( - `min_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_Commit_ReportAccepted_success="✅"| unwrap data_Commit_ReportAccepted_duration [$__range]) by (data_Commit_ReportAccepted_seqNum)`, - loki.Legend("Min"), - ), - timeseries.WithLokiTarget( - `max_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_Commit_ReportAccepted_success="✅"| unwrap data_Commit_ReportAccepted_duration [$__range]) by (data_Commit_ReportAccepted_seqNum)`, - loki.Legend("Max"), - ), - ), - row.WithTimeSeries( - "Report Blessing Summary", - timeseries.Transparent(), - timeseries.Description(""), - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.LokiDataSource), - timeseries.Axis( - axis.Unit("seconds"), - ), - timeseries.WithLokiTarget( - `avg_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_ReportBlessedByARM_success="✅"| unwrap data_ReportBlessedByARM_duration [$__range]) by (data_ReportBlessedByARM_seqNum)`, - loki.Legend("Avg"), - ), - timeseries.WithLokiTarget( - `min_over_time({ namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_ReportBlessedByARM_success="✅"| unwrap data_ReportBlessedByARM_duration [$__range]) by (data_ReportBlessedByARM_seqNum)`, - loki.Legend("Min"), - ), - timeseries.WithLokiTarget( - `max_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_ReportBlessedByARM_success="✅"| unwrap data_ReportBlessedByARM_duration [$__range]) by (data_ReportBlessedByARM_seqNum)`, - loki.Legend("Max"), - ), - ), - row.WithTimeSeries( - "Execution Duration Summary", - timeseries.Transparent(), - timeseries.Description(""), - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.LokiDataSource), - timeseries.Axis( - axis.Unit("seconds"), - ), - timeseries.WithLokiTarget( - `avg_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_ExecutionStateChanged_success="✅"| unwrap data_ExecutionStateChanged_duration [$__range]) by (data_ExecutionStateChanged_seqNum)`, - loki.Legend("Avg"), - ), - timeseries.WithLokiTarget( - `min_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_ExecutionStateChanged_success="✅"| unwrap data_ExecutionStateChanged_duration [$__range]) by (data_ExecutionStateChanged_seqNum)`, - loki.Legend("Min"), - ), - timeseries.WithLokiTarget( - `max_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_ExecutionStateChanged_success="✅"| unwrap data_ExecutionStateChanged_duration [$__range]) by (data_ExecutionStateChanged_seqNum)`, - loki.Legend("Max"), - ), - ), - row.WithTimeSeries( - "E2E (Commit, ARM, Execution)", - timeseries.Transparent(), - timeseries.Description(""), - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.LokiDataSource), - timeseries.Axis( - axis.Unit("seconds"), - ), - timeseries.WithLokiTarget( - `avg_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_CommitAndExecute_success="✅"| unwrap data_CommitAndExecute_duration [$__range]) by (data_CommitAndExecute_seqNum)`, - loki.Legend("Avg"), - ), - timeseries.WithLokiTarget( - `min_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_CommitAndExecute_success="✅"| unwrap data_CommitAndExecute_duration [$__range]) by (data_CommitAndExecute_seqNum)`, - loki.Legend("Min"), - ), - timeseries.WithLokiTarget( - `max_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_CommitAndExecute_success="✅"| unwrap data_CommitAndExecute_duration [$__range]) by (data_CommitAndExecute_seqNum)`, - loki.Legend("Max"), - ), - ), - ), - } -} - -func failedMessagesRow(p Props) []dashboard.Option { - return []dashboard.Option{ - dashboard.Row( - "Failed Messages", - row.Collapse(), - row.WithTimeSeries( - "Failed Commit", - timeseries.Transparent(), - timeseries.Description(""), - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.LokiDataSource), - timeseries.WithLokiTarget( - `count_over_time({ namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_Commit_ReportAccepted_success="❌" [$__range])`, - loki.Legend("{{error}}"), - ), - ), - row.WithTimeSeries( - "Failed Bless", - timeseries.Transparent(), - timeseries.Description(""), - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.LokiDataSource), - timeseries.WithLokiTarget( - `count_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_ReportBlessedByARM_success="❌" [$__range])`, - loki.Legend("{{error}}"), - ), - ), - row.WithTimeSeries( - "Failed Execution", - timeseries.Transparent(), - timeseries.Description(""), - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.LokiDataSource), - timeseries.WithLokiTarget( - `count_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_ExecutionStateChanged_success="❌" [$__range])`, - loki.Legend("{{error}}"), - ), - ), - row.WithTimeSeries( - "Failed Commit and Execution", - timeseries.Transparent(), - timeseries.Description(""), - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.LokiDataSource), - timeseries.WithLokiTarget( - `count_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_CommitAndExecute_success="❌" [$__range])`, - loki.Legend("{{error}}"), - ), - ), - ), - } -} - -func reqRespRow(p Props) []dashboard.Option { - return []dashboard.Option{ - dashboard.Row( - "Requests/Responses", - row.WithStat( - "Stats", - stat.DataSource(p.LokiDataSource), - stat.Transparent(), - stat.Text(stat.TextValueAndName), - stat.Height("100px"), - stat.TitleFontSize(20), - stat.ValueFontSize(20), - stat.Span(12), - stat.WithPrometheusTarget( - `max_over_time({namespace="${namespace}", go_test_name="${go_test_name:pipe}", test_data_type="stats", test_group="$test_group", test_id=~"${test_id:pipe}", source_chain="${source_chain}", dest_chain="${dest_chain}"} -| json -| unwrap current_time_unit [$__range]) by (test_id)`, - prometheus.Legend("Time Unit"), - ), - stat.WithPrometheusTarget( - `max_over_time({namespace="${namespace}", go_test_name="${go_test_name:pipe}", test_data_type="stats", test_group="$test_group", test_id=~"${test_id:pipe}", source_chain="${source_chain}", dest_chain="${dest_chain}"} -| json -| unwrap load_duration [$__range]) by (test_id)/ 1e9 `, - prometheus.Legend("Total Duration"), - ), - stat.WithPrometheusTarget( - `max_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_CCIP_Send_Transaction_success="✅"| unwrap data_CCIP_Send_Transaction_ccip_send_data_message_bytes_length [$__range]) by (test_id)`, - prometheus.Legend("Max Byte Len Sent"), - ), - stat.WithPrometheusTarget( - `max_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_CCIP_Send_Transaction_success="✅"| unwrap data_CCIP_Send_Transaction_ccip_send_data_no_of_tokens_sent [$__range]) by (test_id)`, - prometheus.Legend("Max No Of Tokens Sent"), - ), - ), - row.WithTimeSeries( - "Request Rate", - timeseries.Transparent(), - timeseries.Description("Requests triggered over test duration"), - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.LokiDataSource), - timeseries.WithLokiTarget( - `last_over_time({namespace="${namespace}", go_test_name="${go_test_name:pipe}", test_data_type="stats", test_group="$test_group", test_id="${test_id:pipe}", source_chain="${source_chain}", dest_chain="${dest_chain}"}| json | unwrap current_rps [$__interval]) by (test_id,gen_name)`, - loki.Legend("Request Triggered/TimeUnit"), - ), - ), - row.WithTimeSeries( - "Trigger Summary", - timeseries.Transparent(), - timeseries.Points(), - timeseries.Description("Latest Stage Stats"), - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.LokiDataSource), - timeseries.WithLokiTarget( - `max_over_time({namespace="${namespace}", go_test_name="${go_test_name:pipe}", test_data_type="stats", test_group="$test_group", test_id=~"${test_id:pipe}", source_chain="${source_chain}", dest_chain="${dest_chain}"} -| json -| unwrap success [$__range]) by (test_id)`, - loki.Legend("Successful Requests"), - ), - timeseries.WithLokiTarget( - `max_over_time({namespace="${namespace}", go_test_name="${go_test_name:pipe}", test_data_type="stats", test_group="$test_group", test_id=~"${test_id:pipe}", source_chain="${source_chain}", dest_chain="${dest_chain}"} -| json -| unwrap failed [$__range]) by (test_id)`, - loki.Legend("Failed Requests"), - ), - ), - row.WithLogs( - "All CCIP Phases Stats", - logs.DataSource(p.LokiDataSource), - logs.Span(12), - logs.Height("300px"), - logs.Transparent(), - logs.WithLokiTarget( - `{namespace="${namespace}", go_test_name="${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json `, - ), - ), - ), - } -} - -func gasStatsRow(p Props) []dashboard.Option { - return []dashboard.Option{ - dashboard.Row( - "CCIP Gas Stats", - row.Collapse(), - row.WithTimeSeries( - "Gas Used in CCIP-Send⛽️", - timeseries.Transparent(), - timeseries.Description(""), - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.LokiDataSource), - timeseries.Axis( - axis.Unit("wei"), - ), - timeseries.WithLokiTarget( - `avg_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_CCIP_Send_Transaction_success="✅"| unwrap data_CCIP_Send_Transaction_ccip_send_data_gas_used [$__range]) by (test_id)`, - loki.Legend("Avg"), - ), - timeseries.WithLokiTarget( - `min_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_CCIP_Send_Transaction_success="✅"| unwrap data_CCIP_Send_Transaction_ccip_send_data_gas_used [$__range]) by (test_id)`, - loki.Legend("Min"), - ), - timeseries.WithLokiTarget( - `max_over_time({ namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_CCIP_Send_Transaction_success="✅"| unwrap data_CCIP_Send_Transaction_ccip_send_data_gas_used [$__range]) by (test_id) `, - loki.Legend("Max"), - ), - ), - row.WithTimeSeries( - "Gas Used in Commit⛽️", - timeseries.Transparent(), - timeseries.Description(""), - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.LokiDataSource), - timeseries.Axis( - axis.Unit("wei"), - ), - timeseries.WithLokiTarget( - `avg_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_Commit_ReportAccepted_success="✅"| unwrap data_Commit_ReportAccepted_ccip_send_data_gas_used [$__range]) by (test_id)`, - loki.Legend("Avg"), - ), - timeseries.WithLokiTarget( - `min_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_Commit_ReportAccepted_success="✅"| unwrap data_Commit_ReportAccepted_ccip_send_data_gas_used [$__range]) by (test_id)`, - loki.Legend("Min"), - ), - timeseries.WithLokiTarget( - `max_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_Commit_ReportAccepted_success="✅"| unwrap data_Commit_ReportAccepted_ccip_send_data_gas_used [$__range]) by (test_id) `, - loki.Legend("Max"), - ), - ), - row.WithTimeSeries( - "Gas Used in ARM Blessing⛽️", - timeseries.Transparent(), - timeseries.Description(""), - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.LokiDataSource), - timeseries.Axis( - axis.Unit("wei"), - ), - timeseries.WithLokiTarget( - `avg_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_ReportBlessedByARM_success="✅"| unwrap data_ReportBlessedByARM_ccip_send_data_gas_used [$__range]) by (test_id)`, - loki.Legend("Avg"), - ), - timeseries.WithLokiTarget( - `min_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_ReportBlessedByARM_success="✅"| unwrap data_ReportBlessedByARM_ccip_send_data_gas_used [$__range]) by (test_id)`, - loki.Legend("Min"), - ), - timeseries.WithLokiTarget( - `max_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_ReportBlessedByARM_success="✅"| unwrap data_ReportBlessedByARM_ccip_send_data_gas_used [$__range]) by (test_id) `, - loki.Legend("Max"), - ), - ), - row.WithTimeSeries( - "Gas Used in Execution⛽️", - timeseries.Transparent(), - timeseries.Description(""), - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.LokiDataSource), - timeseries.Axis( - axis.Unit("wei"), - ), - timeseries.WithLokiTarget( - `avg_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_ExecutionStateChanged_success="✅"| unwrap data_ExecutionStateChanged_ccip_send_data_gas_used [$__range]) by (test_id)`, - loki.Legend("Avg"), - ), - timeseries.WithLokiTarget( - `min_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_ExecutionStateChanged_success="✅"| unwrap data_ExecutionStateChanged_ccip_send_data_gas_used [$__range]) by (test_id)`, - loki.Legend("Min"), - ), - timeseries.WithLokiTarget( - `max_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_ExecutionStateChanged_success="✅"| unwrap data_ExecutionStateChanged_ccip_send_data_gas_used [$__range]) by (test_id) `, - loki.Legend("Max"), - ), - ), - ), - } -} - -func New(p Props) []dashboard.Option { - opts := vars(p) - opts = append(opts, statsRow(p)...) - opts = append(opts, gasStatsRow(p)...) - opts = append(opts, failedMessagesRow(p)...) - opts = append(opts, reqRespRow(p)...) - return opts -} diff --git a/dashboard/lib/config.go b/dashboard/lib/config.go deleted file mode 100644 index eae6e956fc7..00000000000 --- a/dashboard/lib/config.go +++ /dev/null @@ -1,70 +0,0 @@ -package dashboardlib - -import "os" - -type EnvConfig struct { - Platform string - GrafanaURL string - GrafanaToken string - GrafanaFolder string - DataSources DataSources -} - -type DataSources struct { - Loki string - Prometheus string -} - -type DashboardOpts struct { - Tags []string - AutoRefresh string -} - -func ReadEnvDeployOpts() EnvConfig { - name := os.Getenv("DASHBOARD_NAME") - if name == "" { - L.Fatal().Msg("DASHBOARD_NAME must be provided") - } - lokiDataSourceName := os.Getenv("LOKI_DATA_SOURCE_NAME") - if lokiDataSourceName == "" { - L.Fatal().Msg("LOKI_DATA_SOURCE_NAME is empty, panels with logs will be disabled") - } - prometheusDataSourceName := os.Getenv("PROMETHEUS_DATA_SOURCE_NAME") - if prometheusDataSourceName == "" { - L.Fatal().Msg("PROMETHEUS_DATA_SOURCE_NAME must be provided") - } - grafanaURL := os.Getenv("GRAFANA_URL") - if grafanaURL == "" { - L.Fatal().Msg("GRAFANA_URL must be provided") - } - grafanaToken := os.Getenv("GRAFANA_TOKEN") - if grafanaToken == "" { - L.Fatal().Msg("GRAFANA_TOKEN must be provided") - } - grafanaFolder := os.Getenv("GRAFANA_FOLDER") - if grafanaFolder == "" { - L.Fatal().Msg("GRAFANA_FOLDER must be provided") - } - platform := os.Getenv("INFRA_PLATFORM") - if platform == "" { - L.Fatal().Msg("INFRA_PLATFORM must be provided, can be either docker|kubernetes") - } - loki := os.Getenv("LOKI_DATA_SOURCE_NAME") - if lokiDataSourceName == "" { - L.Fatal().Msg("LOKI_DATA_SOURCE_NAME is empty, panels with logs will be disabled") - } - prom := os.Getenv("PROMETHEUS_DATA_SOURCE_NAME") - if prometheusDataSourceName == "" { - L.Fatal().Msg("PROMETHEUS_DATA_SOURCE_NAME must be provided") - } - return EnvConfig{ - GrafanaURL: grafanaURL, - GrafanaToken: grafanaToken, - GrafanaFolder: grafanaFolder, - Platform: platform, - DataSources: DataSources{ - Loki: loki, - Prometheus: prom, - }, - } -} diff --git a/dashboard/lib/core-don/component.go b/dashboard/lib/core-don/component.go deleted file mode 100644 index 24173fb6cc9..00000000000 --- a/dashboard/lib/core-don/component.go +++ /dev/null @@ -1,1806 +0,0 @@ -package core_don - -import ( - "fmt" - "github.com/K-Phoen/grabana/dashboard" - "github.com/K-Phoen/grabana/gauge" - "github.com/K-Phoen/grabana/row" - "github.com/K-Phoen/grabana/stat" - "github.com/K-Phoen/grabana/table" - "github.com/K-Phoen/grabana/target/prometheus" - "github.com/K-Phoen/grabana/timeseries" - "github.com/K-Phoen/grabana/timeseries/axis" - "github.com/K-Phoen/grabana/variable/query" -) - -type Props struct { - PrometheusDataSource string - PlatformOpts PlatformOpts -} - -func vars(p Props) []dashboard.Option { - return []dashboard.Option{ - dashboard.VariableAsQuery( - "instance", - query.DataSource(p.PrometheusDataSource), - query.Multiple(), - query.IncludeAll(), - query.Request(fmt.Sprintf("label_values(%s)", p.PlatformOpts.LabelFilter)), - query.Sort(query.NumericalAsc), - ), - dashboard.VariableAsQuery( - "evmChainID", - query.DataSource(p.PrometheusDataSource), - query.Multiple(), - query.IncludeAll(), - query.Request(fmt.Sprintf("label_values(%s)", "evmChainID")), - query.Sort(query.NumericalAsc), - ), - } -} - -func generalInfoRow(p Props) []dashboard.Option { - return []dashboard.Option{ - dashboard.Row( - "General CL Cluster Info", - row.Collapse(), - row.WithStat( - "App Version", - stat.DataSource(p.PrometheusDataSource), - stat.Text(stat.TextValueAndName), - stat.Orientation(stat.OrientationAuto), - stat.TitleFontSize(12), - stat.ValueFontSize(20), - stat.Span(2), - stat.Text("name"), - stat.WithPrometheusTarget( - `version{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{version}}"), - ), - ), - row.WithStat( - "Go Version", - stat.DataSource(p.PrometheusDataSource), - stat.Text(stat.TextValueAndName), - stat.Orientation(stat.OrientationAuto), - stat.TitleFontSize(12), - stat.ValueFontSize(20), - stat.Span(2), - stat.Text("name"), - stat.WithPrometheusTarget( - `go_info{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{version}}"), - ), - ), - row.WithStat( - "Uptime in days", - stat.DataSource(p.PrometheusDataSource), - stat.Text(stat.TextValueAndName), - stat.Orientation(stat.OrientationHorizontal), - stat.TitleFontSize(12), - stat.ValueFontSize(20), - stat.Span(8), - stat.WithPrometheusTarget( - `uptime_seconds{`+p.PlatformOpts.LabelQuery+`} / 86400`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), - ), - ), - row.WithStat( - "ETH Balance", - stat.DataSource(p.PrometheusDataSource), - stat.Text(stat.TextValueAndName), - stat.Orientation(stat.OrientationHorizontal), - stat.TitleFontSize(12), - stat.ValueFontSize(20), - stat.Span(6), - stat.Decimals(2), - stat.WithPrometheusTarget( - `eth_balance{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - {{account}}"), - ), - ), - row.WithStat( - "Solana Balance", - stat.DataSource(p.PrometheusDataSource), - stat.Text(stat.TextValueAndName), - stat.Orientation(stat.OrientationHorizontal), - stat.TitleFontSize(12), - stat.ValueFontSize(20), - stat.Span(6), - stat.Decimals(2), - stat.WithPrometheusTarget( - `solana_balance{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LabelFilter+"}} - {{account}}"), - ), - ), - row.WithTimeSeries( - "Service Components Health", - timeseries.Span(12), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.WithPrometheusTarget( - `health{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - {{service_id}}"), - ), - ), - row.WithTimeSeries( - "ETH Balance", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit(""), - axis.Decimals(2), - ), - timeseries.WithPrometheusTarget( - `eth_balance{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - {{account}}"), - ), - ), - row.WithTimeSeries( - "SOL Balance", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit(""), - axis.Decimals(2), - ), - timeseries.WithPrometheusTarget( - `solana_balance{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - {{account}}"), - ), - ), - ), - } -} - -func logPollerRow(p Props) []dashboard.Option { - return []dashboard.Option{ - dashboard.Row("LogPoller", - row.Collapse(), - row.WithStat( - "Goroutines", - stat.DataSource(p.PrometheusDataSource), - stat.Text(stat.TextValueAndName), - stat.Orientation(stat.OrientationAuto), - stat.Height("200px"), - stat.TitleFontSize(30), - stat.ValueFontSize(30), - stat.Span(6), - stat.Text("Goroutines"), - stat.WithPrometheusTarget( - `count(count by (evmChainID) (log_poller_query_duration_sum{job=~"$instance"}))`, - prometheus.Legend("Goroutines"), - ), - ), - row.WithTimeSeries( - "RPS", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit("requests"), - ), - timeseries.WithPrometheusTarget( - `avg by (query) (sum by (query, job) (rate(log_poller_query_duration_count{job=~"$instance", evmChainID=~"$evmChainID"}[$__rate_interval])))`, - prometheus.Legend("{{query}} - {{job}}"), - ), - timeseries.WithPrometheusTarget( - `avg (sum by(job) (rate(log_poller_query_duration_count{job=~"$instance", evmChainID=~"$evmChainID"}[$__rate_interval])))`, - prometheus.Legend("Total"), - ), - ), - row.WithTimeSeries( - "RPS by type", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit("requests"), - ), - timeseries.WithPrometheusTarget( - `avg by (type) (sum by (type, job) (rate(log_poller_query_duration_count{job=~"$instance", evmChainID=~"$evmChainID"}[$__rate_interval])))`, - ), - ), - row.WithTimeSeries( - "Avg number of logs returned", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit("logs"), - ), - timeseries.WithPrometheusTarget( - `avg by (query) (log_poller_query_dataset_size{job=~"$instance", evmChainID=~"$evmChainID"})`, - prometheus.Legend("{{query}} - {{job}}"), - ), - ), - row.WithTimeSeries( - "Max number of logs returned", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit("logs"), - ), - timeseries.WithPrometheusTarget( - `max by (query) (log_poller_query_dataset_size{job=~"$instance", evmChainID=~"$evmChainID"})`, - prometheus.Legend("{{query}} - {{job}}"), - ), - ), - row.WithTimeSeries( - "Logs returned by chain", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit("logs"), - ), - timeseries.WithPrometheusTarget( - `max by (evmChainID) (log_poller_query_dataset_size{job=~"$instance", evmChainID=~"$evmChainID"})`, - prometheus.Legend("{{evmChainID}}"), - ), - ), - row.WithTimeSeries( - "Queries duration by type (0.5 perc)", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit("ms"), - ), - timeseries.WithPrometheusTarget( - `histogram_quantile(0.5, sum(rate(log_poller_query_duration_bucket{job=~"$instance", evmChainID=~"$evmChainID"}[$__rate_interval])) by (le, query)) / 1e6`, - prometheus.Legend("{{query}}"), - ), - ), - row.WithTimeSeries( - "queries duration by type (0.9 perc)", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit("ms"), - ), - timeseries.WithPrometheusTarget( - `histogram_quantile(0.9, sum(rate(log_poller_query_duration_bucket{job=~"$instance", evmChainID=~"$evmChainID"}[$__rate_interval])) by (le, query)) / 1e6`, - prometheus.Legend("{{query}}"), - ), - ), - row.WithTimeSeries( - "Queries duration by type (0.99 perc)", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit("ms"), - ), - timeseries.WithPrometheusTarget( - `histogram_quantile(0.99, sum(rate(log_poller_query_duration_bucket{job=~"$instance", evmChainID=~"$evmChainID"}[$__rate_interval])) by (le, query)) / 1e6`, - prometheus.Legend("{{query}}"), - ), - ), - row.WithTimeSeries( - "Queries duration by chain (0.99 perc)", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit("ms"), - ), - timeseries.WithPrometheusTarget( - `histogram_quantile(0.99, sum(rate(log_poller_query_duration_bucket{job=~"$instance", evmChainID=~"$evmChainID"}[$__rate_interval])) by (le, evmChainID)) / 1e6`, - prometheus.Legend("{{query}}"), - ), - ), - row.WithTimeSeries( - "Number of logs inserted", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit("logs"), - ), - timeseries.WithPrometheusTarget( - `avg by (evmChainID) (log_poller_logs_inserted{job=~"$instance", evmChainID=~"$evmChainID"})`, - prometheus.Legend("{{evmChainID}}"), - ), - ), - row.WithTimeSeries( - "Logs insertion rate", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.WithPrometheusTarget( - `avg by (evmChainID) (rate(log_poller_logs_inserted{job=~"$instance", evmChainID=~"$evmChainID"}[$__rate_interval]))`, - prometheus.Legend("{{evmChainID}}"), - ), - ), - row.WithTimeSeries( - "Number of blocks inserted", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit("blocks"), - ), - timeseries.WithPrometheusTarget( - `avg by (evmChainID) (log_poller_blocks_inserted{job=~"$instance", evmChainID=~"$evmChainID"})`, - prometheus.Legend("{{evmChainID}}"), - ), - ), - row.WithTimeSeries( - "Blocks insertion rate", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.WithPrometheusTarget( - `avg by (evmChainID) (rate(log_poller_blocks_inserted{job=~"$instance", evmChainID=~"$evmChainID"}[$__rate_interval]))`, - prometheus.Legend("{{evmChainID}}"), - ), - ), - ), - } -} - -func feedJobsRow(p Props) []dashboard.Option { - return []dashboard.Option{ - dashboard.Row("Feeds Jobs", - row.Collapse(), - row.WithTimeSeries( - "Feeds Job Proposal Requests", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit(""), - ), - timeseries.WithPrometheusTarget( - `feeds_job_proposal_requests{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), - ), - ), - row.WithTimeSeries( - "Feeds Job Proposal Count", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit(""), - ), - timeseries.WithPrometheusTarget( - `feeds_job_proposal_count{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), - ), - ), - ), - } -} - -func mailBoxRow(p Props) []dashboard.Option { - return []dashboard.Option{ - dashboard.Row("Mailbox", - row.Collapse(), - row.WithTimeSeries( - "Mailbox Load Percent", - timeseries.Span(12), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit(""), - ), - timeseries.WithPrometheusTarget( - `mailbox_load_percent{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - {{ name }}"), - ), - ), - ), - } -} - -func promReporterRow(p Props) []dashboard.Option { - return []dashboard.Option{ - dashboard.Row("Prom Reporter", - row.Collapse(), - row.WithTimeSeries( - "Unconfirmed Transactions", - timeseries.Span(4), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit("Tx"), - ), - timeseries.WithPrometheusTarget( - `unconfirmed_transactions{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), - ), - ), - row.WithTimeSeries( - "Unconfirmed TX Age", - timeseries.Span(4), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit("Sec"), - ), - timeseries.WithPrometheusTarget( - `max_unconfirmed_tx_age{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), - ), - ), - row.WithTimeSeries( - "Unconfirmed TX Blocks", - timeseries.Span(4), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit("Blocks"), - ), - timeseries.WithPrometheusTarget( - `max_unconfirmed_blocks{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), - ), - ), - ), - } -} - -func txManagerRow(p Props) []dashboard.Option { - return []dashboard.Option{ - dashboard.Row("TX Manager", - row.Collapse(), - row.WithTimeSeries( - "TX Manager Time Until TX Broadcast", - timeseries.Span(12), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit(""), - ), - timeseries.WithPrometheusTarget( - `tx_manager_time_until_tx_broadcast{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), - ), - ), - row.WithTimeSeries( - "TX Manager Num Gas Bumps", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit(""), - ), - timeseries.WithPrometheusTarget( - `tx_manager_num_gas_bumps{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), - ), - ), - row.WithTimeSeries( - "TX Manager Num Gas Bumps Exceeds Limit", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit(""), - ), - timeseries.WithPrometheusTarget( - `tx_manager_gas_bump_exceeds_limit{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), - ), - ), - row.WithTimeSeries( - "TX Manager Num Confirmed Transactions", - timeseries.Span(3), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit(""), - ), - timeseries.WithPrometheusTarget( - `tx_manager_num_confirmed_transactions{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), - ), - ), - row.WithTimeSeries( - "TX Manager Num Successful Transactions", - timeseries.Span(3), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit(""), - ), - timeseries.WithPrometheusTarget( - `tx_manager_num_successful_transactions{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), - ), - ), - row.WithTimeSeries( - "TX Manager Num Reverted Transactions", - timeseries.Span(3), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit(""), - ), - timeseries.WithPrometheusTarget( - `tx_manager_num_tx_reverted{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), - ), - ), - row.WithTimeSeries( - "TX Manager Num Fwd Transactions", - timeseries.Span(3), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit(""), - ), - timeseries.WithPrometheusTarget( - `tx_manager_fwd_tx_count{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), - ), - ), - row.WithTimeSeries( - "TX Manager Num Transactions Attempts", - timeseries.Span(12), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit(""), - ), - timeseries.WithPrometheusTarget( - `tx_manager_tx_attempt_count{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), - ), - ), - row.WithTimeSeries( - "TX Manager Time Until TX Confirmed", - timeseries.Span(12), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit(""), - ), - timeseries.WithPrometheusTarget( - `tx_manager_time_until_tx_confirmed{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), - ), - ), - row.WithTimeSeries( - "TX Manager Block Until TX Confirmed", - timeseries.Span(12), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit(""), - ), - timeseries.WithPrometheusTarget( - `tx_manager_blocks_until_tx_confirmed{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), - ), - ), - ), - } -} - -func headTrackerRow(p Props) []dashboard.Option { - return []dashboard.Option{ - dashboard.Row("Head tracker", - row.Collapse(), - row.WithTimeSeries( - "Head tracker current head", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit("Block"), - ), - timeseries.WithPrometheusTarget( - `head_tracker_current_head{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), - ), - ), - row.WithTimeSeries( - "Head tracker very old head", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit("Block"), - ), - timeseries.WithPrometheusTarget( - `head_tracker_very_old_head{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), - ), - ), - row.WithTimeSeries( - "Head tracker heads received", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit("Block"), - ), - timeseries.WithPrometheusTarget( - `head_tracker_heads_received{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), - ), - ), - row.WithTimeSeries( - "Head tracker connection errors", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit("Block"), - ), - timeseries.WithPrometheusTarget( - `head_tracker_connection_errors{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), - ), - ), - ), - } -} - -func appDBConnectionsRow(p Props) []dashboard.Option { - return []dashboard.Option{ - dashboard.Row("DB Connection Metrics (App)", - row.Collapse(), - row.WithTimeSeries( - "DB Connections", - timeseries.Span(12), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit("Conn"), - ), - timeseries.WithPrometheusTarget( - `db_conns_max{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - Max"), - ), - timeseries.WithPrometheusTarget( - `db_conns_open{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - Open"), - ), - timeseries.WithPrometheusTarget( - `db_conns_used{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - Used"), - ), - timeseries.WithPrometheusTarget( - `db_conns_wait{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - Wait"), - ), - ), - row.WithTimeSeries( - "DB Wait Count", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit(""), - ), - timeseries.WithPrometheusTarget( - `db_wait_count{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), - ), - ), - row.WithTimeSeries( - "DB Wait Time", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit("Sec"), - ), - timeseries.WithPrometheusTarget( - `db_wait_time_seconds{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), - ), - ), - ), - } -} - -func sqlQueriesRow(p Props) []dashboard.Option { - return []dashboard.Option{ - dashboard.Row( - "SQL Query", - row.Collapse(), - row.WithTimeSeries( - "SQL Query Timeout Percent", - timeseries.Span(12), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit("percent"), - ), - timeseries.WithPrometheusTarget( - `histogram_quantile(0.9, sum(rate(sql_query_timeout_percent_bucket{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (le))`, - prometheus.Legend("p90"), - ), - timeseries.WithPrometheusTarget( - `histogram_quantile(0.95, sum(rate(sql_query_timeout_percent_bucket{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (le))`, - prometheus.Legend("p95"), - ), - timeseries.WithPrometheusTarget( - `histogram_quantile(0.99, sum(rate(sql_query_timeout_percent_bucket{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (le))`, - prometheus.Legend("p99"), - ), - ), - ), - } -} - -func logsCountersRow(p Props) []dashboard.Option { - return []dashboard.Option{ - dashboard.Row("Logs Metrics", - row.Collapse(), - row.WithTimeSeries( - "Logs Counters", - timeseries.Span(12), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.WithPrometheusTarget( - `log_panic_count{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - panic"), - ), - timeseries.WithPrometheusTarget( - `log_fatal_count{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - fatal"), - ), - timeseries.WithPrometheusTarget( - `log_critical_count{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - critical"), - ), - timeseries.WithPrometheusTarget( - `log_warn_count{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - warn"), - ), - timeseries.WithPrometheusTarget( - `log_error_count{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - error"), - ), - ), - row.WithTimeSeries( - "Logs Rate", - timeseries.Span(12), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.WithPrometheusTarget( - `sum(rate(log_panic_count{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (`+p.PlatformOpts.LegendString+`)`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - panic"), - ), - timeseries.WithPrometheusTarget( - `sum(rate(log_fatal_count{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (`+p.PlatformOpts.LegendString+`)`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - fatal"), - ), - timeseries.WithPrometheusTarget( - `sum(rate(log_critical_count{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (`+p.PlatformOpts.LegendString+`)`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - critical"), - ), - timeseries.WithPrometheusTarget( - `sum(rate(log_warn_count{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (`+p.PlatformOpts.LegendString+`)`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - warn"), - ), - timeseries.WithPrometheusTarget( - `sum(rate(log_error_count{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (`+p.PlatformOpts.LegendString+`)`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - error"), - ), - ), - ), - } -} - -// TODO: fix, no data points for OCRv1 -func evmPoolLifecycleRow(p Props) []dashboard.Option { - return []dashboard.Option{ - dashboard.Row( - "EVM Pool Lifecycle", - row.Collapse(), - row.WithTimeSeries( - "EVM Pool Highest Seen Block", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit("Block"), - ), - timeseries.WithPrometheusTarget( - `evm_pool_rpc_node_highest_seen_block{`+p.PlatformOpts.LabelQuery+`evmChainID="${evmChainID}"}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), - ), - ), - row.WithTimeSeries( - "EVM Pool Num Seen Blocks", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit("Block"), - ), - timeseries.WithPrometheusTarget( - `evm_pool_rpc_node_num_seen_blocks{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), - ), - ), - row.WithTimeSeries( - "EVM Pool Node Polls Total", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit("Block"), - ), - timeseries.WithPrometheusTarget( - `evm_pool_rpc_node_polls_total{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), - ), - ), - row.WithTimeSeries( - "EVM Pool Node Polls Failed", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit("Block"), - ), - timeseries.WithPrometheusTarget( - `evm_pool_rpc_node_polls_failed{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), - ), - ), - row.WithTimeSeries( - "EVM Pool Node Polls Success", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit("Block"), - ), - timeseries.WithPrometheusTarget( - `evm_pool_rpc_node_polls_success{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), - ), - ), - ), - } -} - -func nodesRPCRow(p Props) []dashboard.Option { - return []dashboard.Option{ - dashboard.Row( - "Node RPC State", - row.Collapse(), - row.WithStat( - "Node RPC Alive", - stat.DataSource(p.PrometheusDataSource), - stat.Text(stat.TextValueAndName), - stat.Orientation(stat.OrientationHorizontal), - stat.TitleFontSize(12), - stat.ValueFontSize(20), - stat.Span(3), - stat.WithPrometheusTarget( - `sum(multi_node_states{`+p.PlatformOpts.LabelQuery+`chainId=~"$evmChainID", state="Alive"}) by (`+p.PlatformOpts.LegendString+`, chainId)`, - prometheus.Legend("{{pod}} - {{chainId}}"), - ), - ), - row.WithStat( - "Node RPC Closed", - stat.DataSource(p.PrometheusDataSource), - stat.Text(stat.TextValueAndName), - stat.Orientation(stat.OrientationHorizontal), - stat.TitleFontSize(12), - stat.ValueFontSize(20), - stat.Span(3), - stat.WithPrometheusTarget( - `sum(multi_node_states{`+p.PlatformOpts.LabelQuery+`chainId=~"$evmChainID", state="Closed"}) by (`+p.PlatformOpts.LegendString+`, chainId)`, - prometheus.Legend("{{pod}} - {{chainId}}"), - ), - ), - row.WithStat( - "Node RPC Dialed", - stat.DataSource(p.PrometheusDataSource), - stat.Text(stat.TextValueAndName), - stat.Orientation(stat.OrientationHorizontal), - stat.TitleFontSize(12), - stat.ValueFontSize(20), - stat.Span(3), - stat.WithPrometheusTarget( - `sum(multi_node_states{`+p.PlatformOpts.LabelQuery+`chainId=~"$evmChainID", state="Dialed"}) by (`+p.PlatformOpts.LegendString+`, chainId)`, - prometheus.Legend("{{pod}} - {{chainId}}"), - ), - ), - row.WithStat( - "Node RPC InvalidChainID", - stat.DataSource(p.PrometheusDataSource), - stat.Text(stat.TextValueAndName), - stat.Orientation(stat.OrientationHorizontal), - stat.TitleFontSize(12), - stat.ValueFontSize(20), - stat.Span(3), - stat.WithPrometheusTarget( - `sum(multi_node_states{`+p.PlatformOpts.LabelQuery+`chainId=~"$evmChainID", state="InvalidChainID"}) by (`+p.PlatformOpts.LegendString+`, chainId)`, - prometheus.Legend("{{pod}} - {{chainId}}"), - ), - ), - row.WithStat( - "Node RPC OutOfSync", - stat.DataSource(p.PrometheusDataSource), - stat.Text(stat.TextValueAndName), - stat.Orientation(stat.OrientationHorizontal), - stat.TitleFontSize(12), - stat.ValueFontSize(20), - stat.Span(3), - stat.WithPrometheusTarget( - `sum(multi_node_states{`+p.PlatformOpts.LabelQuery+`chainId=~"$evmChainID", state="OutOfSync"}) by (`+p.PlatformOpts.LegendString+`, chainId)`, - prometheus.Legend("{{pod}} - {{chainId}}"), - ), - ), - row.WithStat( - "Node RPC UnDialed", - stat.DataSource(p.PrometheusDataSource), - stat.Text(stat.TextValueAndName), - stat.Orientation(stat.OrientationHorizontal), - stat.TitleFontSize(12), - stat.ValueFontSize(20), - stat.Span(3), - stat.WithPrometheusTarget( - `sum(multi_node_states{`+p.PlatformOpts.LabelQuery+`chainId=~"$evmChainID", state="Undialed"}) by (`+p.PlatformOpts.LegendString+`, chainId)`, - prometheus.Legend("{{pod}} - {{chainId}}"), - ), - ), - row.WithStat( - "Node RPC Unreachable", - stat.DataSource(p.PrometheusDataSource), - stat.Text(stat.TextValueAndName), - stat.Orientation(stat.OrientationHorizontal), - stat.TitleFontSize(12), - stat.ValueFontSize(20), - stat.Span(3), - stat.WithPrometheusTarget( - `sum(multi_node_states{`+p.PlatformOpts.LabelQuery+`chainId=~"$evmChainID", state="Unreachable"}) by (`+p.PlatformOpts.LegendString+`, chainId)`, - prometheus.Legend("{{pod}} - {{chainId}}"), - ), - ), - row.WithStat( - "Node RPC Unusable", - stat.DataSource(p.PrometheusDataSource), - stat.Text(stat.TextValueAndName), - stat.Orientation(stat.OrientationHorizontal), - stat.TitleFontSize(12), - stat.ValueFontSize(20), - stat.Span(3), - stat.WithPrometheusTarget( - `sum(multi_node_states{`+p.PlatformOpts.LabelQuery+`chainId=~"$evmChainID", state="Unusable"}) by (`+p.PlatformOpts.LegendString+`, chainId)`, - prometheus.Legend("{{pod}} - {{chainId}}"), - ), - ), - ), - } -} - -func evmNodeRPCRow(p Props) []dashboard.Option { - return []dashboard.Option{ - dashboard.Row( - "EVM Pool RPC Node Metrics (App)", - row.Collapse(), - row.WithTimeSeries( - "EVM Pool RPC Node Calls Success Rate", - timeseries.Span(12), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit(""), - axis.Label("%"), - axis.SoftMin(0), - axis.SoftMax(100), - ), - timeseries.WithPrometheusTarget( - `sum(increase(evm_pool_rpc_node_calls_success{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (`+p.PlatformOpts.LegendString+`, evmChainID, nodeName) / sum(increase(evm_pool_rpc_node_calls_total{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (`+p.PlatformOpts.LegendString+`, evmChainID, nodeName) * 100`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - {{evmChainID}} - {{nodeName}}"), - ), - ), - row.WithGauge( - "EVM Pool RPC Node Calls Success Rate", - gauge.Span(12), - gauge.Orientation(gauge.OrientationVertical), - gauge.DataSource(p.PrometheusDataSource), - gauge.Unit("percentunit"), - gauge.WithPrometheusTarget( - `sum(increase(evm_pool_rpc_node_calls_success{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (`+p.PlatformOpts.LegendString+`, evmChainID, nodeName) / sum(increase(evm_pool_rpc_node_calls_total{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (`+p.PlatformOpts.LegendString+`, evmChainID, nodeName)`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - {{evmChainID}} - {{nodeName}}"), - ), - gauge.AbsoluteThresholds([]gauge.ThresholdStep{ - {Color: "#ff0000"}, - {Color: "#ffa500", Value: float64Ptr(0.8)}, - {Color: "#00ff00", Value: float64Ptr(0.9)}, - }), - ), - // issue when value is 0 - row.WithTimeSeries( - "EVM Pool RPC Node Dials Success Rate", - timeseries.Span(12), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit(""), - axis.Label("%"), - axis.SoftMin(0), - axis.SoftMax(100), - ), - timeseries.WithPrometheusTarget( - `sum(increase(evm_pool_rpc_node_dials_success{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (`+p.PlatformOpts.LegendString+`, evmChainID, nodeName) / sum(increase(evm_pool_rpc_node_dials_total{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (`+p.PlatformOpts.LegendString+`, evmChainID, nodeName) * 100`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - {{evmChainID}} - {{nodeName}}"), - ), - ), - // issue when value is 0 - row.WithTimeSeries( - "EVM Pool RPC Node Dials Failure Rate", - timeseries.Span(12), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit(""), - axis.Label("%"), - axis.SoftMin(0), - axis.SoftMax(100), - ), - timeseries.WithPrometheusTarget( - `sum(increase(evm_pool_rpc_node_dials_failed{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (`+p.PlatformOpts.LegendString+`, evmChainID, nodeName) / sum(increase(evm_pool_rpc_node_dials_total{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (`+p.PlatformOpts.LegendString+`, evmChainID, nodeName) * 100`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - {{evmChainID}} - {{nodeName}}"), - ), - ), - row.WithTimeSeries( - "EVM Pool RPC Node Transitions", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit(""), - ), - timeseries.WithPrometheusTarget( - `evm_pool_rpc_node_num_transitions_to_alive{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend(""), - ), - timeseries.WithPrometheusTarget( - `evm_pool_rpc_node_num_transitions_to_in_sync{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend(""), - ), - timeseries.WithPrometheusTarget( - `evm_pool_rpc_node_num_transitions_to_out_of_sync{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend(""), - ), - timeseries.WithPrometheusTarget( - `evm_pool_rpc_node_num_transitions_to_unreachable{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend(""), - ), - timeseries.WithPrometheusTarget( - `evm_pool_rpc_node_num_transitions_to_invalid_chain_id{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend(""), - ), - timeseries.WithPrometheusTarget( - `evm_pool_rpc_node_num_transitions_to_unusable{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend(""), - ), - ), - row.WithTimeSeries( - "EVM Pool RPC Node States", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit(""), - ), - timeseries.WithPrometheusTarget( - `evm_pool_rpc_node_states{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - {{evmChainID}} - {{state}}"), - ), - ), - row.WithTimeSeries( - "EVM Pool RPC Node Verifies Success Rate", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit(""), - axis.Label("%"), - axis.SoftMin(0), - axis.SoftMax(100), - ), - timeseries.WithPrometheusTarget( - `sum(increase(evm_pool_rpc_node_verifies_success{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (`+p.PlatformOpts.LegendString+`, evmChainID, nodeName) / sum(increase(evm_pool_rpc_node_verifies{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (`+p.PlatformOpts.LegendString+`, evmChainID, nodeName) * 100`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - {{evmChainID}} - {{nodeName}}"), - ), - ), - row.WithTimeSeries( - "EVM Pool RPC Node Verifies Failure Rate", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit(""), - axis.Label("%"), - axis.SoftMin(0), - axis.SoftMax(100), - ), - timeseries.WithPrometheusTarget( - `sum(increase(evm_pool_rpc_node_verifies_failed{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (`+p.PlatformOpts.LegendString+`, evmChainID, nodeName) / sum(increase(evm_pool_rpc_node_verifies{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (`+p.PlatformOpts.LegendString+`, evmChainID, nodeName) * 100`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - {{evmChainID}} - {{nodeName}}"), - ), - ), - ), - } -} - -func evmRPCNodeLatenciesRow(p Props) []dashboard.Option { - return []dashboard.Option{ - dashboard.Row( - "EVM Pool RPC Node Latencies (App)", - row.Collapse(), - row.WithTimeSeries( - "EVM Pool RPC Node Calls Latency 0.90 quantile", - timeseries.Span(12), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit("ms"), - ), - timeseries.WithPrometheusTarget( - `histogram_quantile(0.90, sum(rate(evm_pool_rpc_node_rpc_call_time_bucket{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (`+p.PlatformOpts.LegendString+`, le, rpcCallName)) / 1e6`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - {{rpcCallName}}"), - ), - ), - row.WithTimeSeries( - "EVM Pool RPC Node Calls Latency 0.95 quantile", - timeseries.Span(12), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit("ms"), - ), - timeseries.WithPrometheusTarget( - `histogram_quantile(0.95, sum(rate(evm_pool_rpc_node_rpc_call_time_bucket{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (`+p.PlatformOpts.LegendString+`, le, rpcCallName)) / 1e6`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - {{rpcCallName}}"), - ), - ), - row.WithTimeSeries( - "EVM Pool RPC Node Calls Latency 0.99 quantile", - timeseries.Span(12), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit("ms"), - ), - timeseries.WithPrometheusTarget( - `histogram_quantile(0.99, sum(rate(evm_pool_rpc_node_rpc_call_time_bucket{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (`+p.PlatformOpts.LegendString+`, le, rpcCallName)) / 1e6`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - {{rpcCallName}}"), - ), - ), - ), - } -} - -func evmBlockHistoryEstimatorRow(p Props) []dashboard.Option { - return []dashboard.Option{ - dashboard.Row("Block History Estimator", - row.Collapse(), - row.WithTimeSeries( - "Gas Updater All Gas Price Percentiles", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit(""), - ), - timeseries.WithPrometheusTarget( - `gas_updater_all_gas_price_percentiles{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - {{ percentile }}"), - ), - ), - row.WithTimeSeries( - "Gas Updater All Tip Cap Percentiles", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit(""), - ), - timeseries.WithPrometheusTarget( - `gas_updater_all_tip_cap_percentiles{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - {{ percentile }}"), - ), - ), - row.WithTimeSeries( - "Gas Updater Set Gas Price", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit(""), - ), - timeseries.WithPrometheusTarget( - `gas_updater_set_gas_price{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), - ), - ), - row.WithTimeSeries( - "Gas Updater Set Tip Cap", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit(""), - ), - timeseries.WithPrometheusTarget( - `gas_updater_set_tip_cap{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), - ), - ), - row.WithTimeSeries( - "Gas Updater Current Base Fee", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit(""), - ), - timeseries.WithPrometheusTarget( - `gas_updater_current_base_fee{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), - ), - ), - row.WithTimeSeries( - "Block History Estimator Connectivity Failure Count", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit(""), - ), - timeseries.WithPrometheusTarget( - `block_history_estimator_connectivity_failure_count{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), - ), - ), - ), - } -} - -func pipelinesRow(p Props) []dashboard.Option { - return []dashboard.Option{ - dashboard.Row("Pipeline Metrics (Runner)", - row.Collapse(), - row.WithTimeSeries( - "Pipeline Task Execution Time", - timeseries.Span(12), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit("Sec"), - ), - timeseries.WithPrometheusTarget( - `pipeline_task_execution_time{`+p.PlatformOpts.LabelQuery+`} / 1e6`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} JobID: {{ job_id }}"), - ), - ), - row.WithTimeSeries( - "Pipeline Run Errors", - timeseries.Span(12), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit(""), - ), - timeseries.WithPrometheusTarget( - `pipeline_run_errors{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} JobID: {{ job_id }}"), - ), - ), - row.WithTimeSeries( - "Pipeline Run Total Time to Completion", - timeseries.Span(12), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit("Sec"), - ), - timeseries.WithPrometheusTarget( - `pipeline_run_total_time_to_completion{`+p.PlatformOpts.LabelQuery+`} / 1e6`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} JobID: {{ job_id }}"), - ), - ), - row.WithTimeSeries( - "Pipeline Tasks Total Finished", - timeseries.Span(12), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit(""), - ), - timeseries.WithPrometheusTarget( - `pipeline_tasks_total_finished{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} JobID: {{ job_id }}"), - ), - ), - ), - dashboard.Row( - "Pipeline Metrics (ETHCall)", - row.Collapse(), - row.WithTimeSeries( - "Pipeline Task ETH Call Execution Time", - timeseries.Span(12), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit("Sec"), - ), - timeseries.WithPrometheusTarget( - `pipeline_task_eth_call_execution_time{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), - ), - ), - ), - dashboard.Row( - "Pipeline Metrics (HTTP)", - row.Collapse(), - row.WithTimeSeries( - "Pipeline Task HTTP Fetch Time", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit("Sec"), - ), - timeseries.WithPrometheusTarget( - `pipeline_task_http_fetch_time{`+p.PlatformOpts.LabelQuery+`} / 1e6`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), - ), - ), - row.WithTimeSeries( - "Pipeline Task HTTP Response Body Size", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit("Bytes"), - ), - timeseries.WithPrometheusTarget( - `pipeline_task_http_response_body_size{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), - ), - ), - ), - dashboard.Row( - "Pipeline Metrics (Bridge)", - row.Collapse(), - row.WithTimeSeries( - "Pipeline Bridge Latency", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit("Sec"), - ), - timeseries.WithPrometheusTarget( - `bridge_latency_seconds{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), - ), - ), - row.WithTimeSeries( - "Pipeline Bridge Errors Total", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit(""), - ), - timeseries.WithPrometheusTarget( - `bridge_errors_total{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), - ), - ), - row.WithTimeSeries( - "Pipeline Bridge Cache Hits Total", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit(""), - ), - timeseries.WithPrometheusTarget( - `bridge_cache_hits_total{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), - ), - ), - row.WithTimeSeries( - "Pipeline Bridge Cache Errors Total", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit(""), - ), - timeseries.WithPrometheusTarget( - `bridge_cache_errors_total{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), - ), - ), - ), - dashboard.Row( - "Pipeline Metrics", - row.Collapse(), - row.WithTimeSeries( - "Pipeline Runs Queued", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit(""), - ), - timeseries.WithPrometheusTarget( - `pipeline_runs_queued{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), - ), - ), - row.WithTimeSeries( - "Pipeline Runs Tasks Queued", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit(""), - ), - timeseries.WithPrometheusTarget( - `pipeline_task_runs_queued{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), - ), - ), - ), - } -} - -func httpAPIRow(p Props) []dashboard.Option { - return []dashboard.Option{ - - dashboard.Row( - "HTTP API Metrics", - row.Collapse(), - row.WithTimeSeries( - "Request Duration p95", - timeseries.Span(12), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit("Sec"), - ), - timeseries.WithPrometheusTarget( - `histogram_quantile(0.95, sum(rate(service_gonic_request_duration_bucket{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (`+p.PlatformOpts.LegendString+`, le, path, method))`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - {{ method }} - {{ path }}"), - ), - ), - row.WithTimeSeries( - "Request Total Rate over interval", - timeseries.Span(12), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit(""), - ), - timeseries.WithPrometheusTarget( - `sum(rate(service_gonic_requests_total{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (`+p.PlatformOpts.LegendString+`, path, method, code)`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - {{ method }} - {{ path }} - {{ code }}"), - ), - ), - row.WithTimeSeries( - "Average Request Size", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit("Bytes"), - ), - timeseries.WithPrometheusTarget( - `avg(rate(service_gonic_request_size_bytes_sum{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (`+p.PlatformOpts.LegendString+`)/avg(rate(service_gonic_request_size_bytes_count{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (`+p.PlatformOpts.LegendString+`)`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), - ), - ), - row.WithTimeSeries( - "Response Size", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit("Bytes"), - ), - timeseries.WithPrometheusTarget( - `avg(rate(service_gonic_response_size_bytes_sum{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (`+p.PlatformOpts.LegendString+`)/avg(rate(service_gonic_response_size_bytes_count{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (`+p.PlatformOpts.LegendString+`)`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), - ), - ), - ), - } -} - -func promHTTPRow(p Props) []dashboard.Option { - return []dashboard.Option{ - dashboard.Row( - "PromHTTP Metrics", - row.Collapse(), - row.WithGauge("HTTP Request in flight", - gauge.Span(12), - gauge.Orientation(gauge.OrientationVertical), - gauge.DataSource(p.PrometheusDataSource), - gauge.WithPrometheusTarget( - `promhttp_metric_handler_requests_in_flight{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), - ), - ), - row.WithTimeSeries( - "HTTP rate", - timeseries.Span(12), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit(""), - ), - timeseries.WithPrometheusTarget( - `sum(rate(promhttp_metric_handler_requests_total{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (`+p.PlatformOpts.LegendString+`, code)`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), - ), - ), - ), - } -} - -func goMetricsRow(p Props) []dashboard.Option { - return []dashboard.Option{ - dashboard.Row( - "Go Metrics", - row.Collapse(), - row.WithTable( - "Threads", - table.Span(3), - table.Height("200px"), - table.DataSource(p.PrometheusDataSource), - table.WithPrometheusTarget( - `sum(go_threads{`+p.PlatformOpts.LabelQuery+`}) by (`+p.PlatformOpts.LegendString+`)`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}")), - table.HideColumn("Time"), - table.AsTimeSeriesAggregations([]table.Aggregation{ - {Label: "AVG", Type: table.AVG}, - {Label: "Current", Type: table.Current}, - }), - ), - row.WithTimeSeries( - "Threads", - timeseries.Span(9), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit(""), - ), - timeseries.WithPrometheusTarget( - `sum(go_threads{`+p.PlatformOpts.LabelQuery+`}) by (`+p.PlatformOpts.LegendString+`)`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), - ), - ), - row.WithStat( - "Heap Allocations", - stat.Span(12), - stat.Orientation(stat.OrientationVertical), - stat.DataSource(p.PrometheusDataSource), - stat.Unit("bytes"), - stat.ColorValue(), - stat.WithPrometheusTarget( - `sum(go_memstats_heap_alloc_bytes{`+p.PlatformOpts.LabelQuery+`}) by (`+p.PlatformOpts.LegendString+`)`, - ), - ), - row.WithTimeSeries( - "Heap allocations", - timeseries.Span(12), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.WithPrometheusTarget( - `sum(go_memstats_heap_alloc_bytes{`+p.PlatformOpts.LabelQuery+`}) by (`+p.PlatformOpts.LegendString+`)`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), - ), - timeseries.Axis( - axis.Unit("bytes"), - axis.Label("Memory"), - axis.SoftMin(0), - ), - ), - row.WithTimeSeries( - "Memory in Heap", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit("bytes"), - axis.Label("Memory"), - axis.SoftMin(0), - ), - timeseries.WithPrometheusTarget( - `go_memstats_heap_alloc_bytes{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - Alloc"), - ), - timeseries.WithPrometheusTarget( - `go_memstats_heap_sys_bytes{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - Sys"), - ), - timeseries.WithPrometheusTarget( - `go_memstats_heap_idle_bytes{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - Idle"), - ), - timeseries.WithPrometheusTarget( - `go_memstats_heap_inuse_bytes{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - InUse"), - ), - timeseries.WithPrometheusTarget( - `go_memstats_heap_released_bytes{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - Released"), - ), - ), - row.WithTimeSeries( - "Memory in Off-Heap", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit("bytes"), - axis.Label("Memory"), - axis.SoftMin(0), - ), - timeseries.WithPrometheusTarget( - `go_memstats_mspan_inuse_bytes{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - Total InUse"), - ), - timeseries.WithPrometheusTarget( - `go_memstats_mspan_sys_bytes{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - Total Sys"), - ), - timeseries.WithPrometheusTarget( - `go_memstats_mcache_inuse_bytes{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - Cache InUse"), - ), - timeseries.WithPrometheusTarget( - `go_memstats_mcache_sys_bytes{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - Cache Sys"), - ), - timeseries.WithPrometheusTarget( - `go_memstats_buck_hash_sys_bytes{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - Hash Sys"), - ), - timeseries.WithPrometheusTarget( - `go_memstats_gc_sys_bytes{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - GC Sys"), - ), - timeseries.WithPrometheusTarget( - `go_memstats_other_sys_bytes{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - bytes of memory are used for other runtime allocations"), - ), - timeseries.WithPrometheusTarget( - `go_memstats_next_gc_bytes{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - Next GC"), - ), - ), - row.WithTimeSeries( - "Memory in Stack", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.WithPrometheusTarget( - `go_memstats_stack_inuse_bytes{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - InUse"), - ), - timeseries.WithPrometheusTarget( - `go_memstats_stack_sys_bytes{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - Sys"), - ), - timeseries.Axis( - axis.Unit("bytes"), - axis.Label("Memory"), - axis.SoftMin(0), - ), - ), - row.WithTimeSeries( - "Total Used Memory", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.WithPrometheusTarget( - `go_memstats_sys_bytes{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), - ), - timeseries.Axis( - axis.Unit("bytes"), - axis.Label("Memory"), - axis.SoftMin(0), - ), - ), - row.WithTimeSeries( - "Number of Live Objects", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.WithPrometheusTarget( - `go_memstats_mallocs_total{`+p.PlatformOpts.LabelQuery+`} - go_memstats_frees_total{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), - ), - timeseries.Axis( - axis.SoftMin(0), - ), - ), - row.WithTimeSeries( - "Rate of Objects Allocated", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.WithPrometheusTarget( - `rate(go_memstats_mallocs_total{`+p.PlatformOpts.LabelQuery+`}[1m])`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), - ), - timeseries.Axis( - axis.SoftMin(0), - ), - ), - row.WithTimeSeries( - "Rate of a Pointer Dereferences", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.WithPrometheusTarget( - `rate(go_memstats_lookups_total{`+p.PlatformOpts.LabelQuery+`}[1m])`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), - ), - timeseries.Axis( - axis.Unit("ops"), - axis.SoftMin(0), - ), - ), - row.WithTimeSeries( - "Goroutines", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.WithPrometheusTarget( - `go_goroutines{`+p.PlatformOpts.LabelQuery+`}`, - prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), - ), - timeseries.Axis( - axis.SoftMin(0), - ), - ), - ), - } -} - -func float64Ptr(input float64) *float64 { - return &input -} - -func New(p Props) []dashboard.Option { - opts := vars(p) - opts = append(opts, generalInfoRow(p)...) - opts = append(opts, logPollerRow(p)...) - opts = append(opts, feedJobsRow(p)...) - opts = append(opts, mailBoxRow(p)...) - opts = append(opts, promReporterRow(p)...) - opts = append(opts, txManagerRow(p)...) - opts = append(opts, headTrackerRow(p)...) - opts = append(opts, appDBConnectionsRow(p)...) - opts = append(opts, sqlQueriesRow(p)...) - opts = append(opts, logsCountersRow(p)...) - opts = append(opts, evmPoolLifecycleRow(p)...) - opts = append(opts, nodesRPCRow(p)...) - opts = append(opts, evmNodeRPCRow(p)...) - opts = append(opts, evmRPCNodeLatenciesRow(p)...) - opts = append(opts, evmBlockHistoryEstimatorRow(p)...) - opts = append(opts, pipelinesRow(p)...) - opts = append(opts, httpAPIRow(p)...) - opts = append(opts, promHTTPRow(p)...) - opts = append(opts, goMetricsRow(p)...) - return opts -} diff --git a/dashboard/lib/core-don/platform.go b/dashboard/lib/core-don/platform.go deleted file mode 100644 index fbfed548146..00000000000 --- a/dashboard/lib/core-don/platform.go +++ /dev/null @@ -1,45 +0,0 @@ -package core_don - -import "fmt" - -type PlatformOpts struct { - // Platform is infrastructure deployment platform: docker or k8s - Platform string - LabelFilters map[string]string - LabelFilter string - LegendString string - LabelQuery string -} - -// PlatformPanelOpts generate different queries for "docker" and "k8s" deployment platforms -func PlatformPanelOpts(platform string) PlatformOpts { - po := PlatformOpts{ - LabelFilters: map[string]string{ - "instance": `=~"${instance}"`, - "commit": `=~"${commit:pipe}"`, - }, - } - switch platform { - case "kubernetes": - po.LabelFilters = map[string]string{ - "namespace": `=~"${namespace}"`, - "pod": `=~"${pod}"`, - } - po.LabelFilter = "job" - po.LegendString = "pod" - break - case "docker": - po.LabelFilters = map[string]string{ - "instance": `=~"${instance}"`, - } - po.LabelFilter = "instance" - po.LegendString = "instance" - break - default: - panic(fmt.Sprintf("failed to generate Platform dependent queries, unknown platform: %s", platform)) - } - for key, value := range po.LabelFilters { - po.LabelQuery += key + value + ", " - } - return po -} diff --git a/dashboard/lib/core-ocrv2-ccip/component.go b/dashboard/lib/core-ocrv2-ccip/component.go deleted file mode 100644 index 837f693fcc7..00000000000 --- a/dashboard/lib/core-ocrv2-ccip/component.go +++ /dev/null @@ -1,83 +0,0 @@ -package core_ocrv2_ccip - -import ( - "fmt" - "github.com/K-Phoen/grabana/dashboard" - "github.com/K-Phoen/grabana/row" - "github.com/K-Phoen/grabana/target/prometheus" - "github.com/K-Phoen/grabana/timeseries" -) - -type Props struct { - PrometheusDataSource string - PluginName string -} - -func quantileRowOpts(ds string, pluginName string, perc string) row.Option { - return row.WithTimeSeries( - fmt.Sprintf("(%s) OCR2 duration (%s)", pluginName, perc), - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(ds), - timeseries.WithPrometheusTarget( - fmt.Sprintf(`histogram_quantile(%s, sum(rate(ocr2_reporting_plugin_observation_time_bucket{plugin="%s", job=~"$instance", chainID=~"$evmChainID"}[$__rate_interval])) by (le)) / 1e9`, perc, pluginName), - prometheus.Legend("Observation"), - ), - timeseries.WithPrometheusTarget( - fmt.Sprintf(`histogram_quantile(%s, sum(rate(ocr2_reporting_plugin_report_time_bucket{plugin="%s", job=~"$instance", chainID=~"$evmChainID"}[$__rate_interval])) by (le)) / 1e9`, perc, pluginName), - prometheus.Legend("Report"), - ), - timeseries.WithPrometheusTarget( - fmt.Sprintf(`histogram_quantile(%s, sum(rate(ocr2_reporting_plugin_should_accept_finalized_report_time_bucket{plugin="%s", job=~"$instance", chainID=~"$evmChainID"}[$__rate_interval])) by (le)) / 1e9`, perc, pluginName), - prometheus.Legend("ShouldAcceptFinalizedReport"), - ), - timeseries.WithPrometheusTarget( - fmt.Sprintf(`histogram_quantile(%s, sum(rate(ocr2_reporting_plugin_should_transmit_accepted_report_time_bucket{plugin="%s", job=~"$instance", chainID=~"$evmChainID"}[$__rate_interval])) by (le)) / 1e9`, perc, pluginName), - prometheus.Legend("ShouldTransmitAcceptedReport"), - ), - ) -} - -func ocrv2PluginObservationStageQuantiles(p Props) []dashboard.Option { - opts := make([]row.Option, 0) - opts = append(opts, - row.Collapse(), - row.WithTimeSeries( - fmt.Sprintf("(%s) OCR2 RPS by phase", p.PluginName), - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.WithPrometheusTarget( - fmt.Sprintf(`sum(rate(ocr2_reporting_plugin_observation_time_count{plugin="%s", job=~"$instance", chainID=~"$evmChainID"}[$__range]))`, p.PluginName), - prometheus.Legend("Observation"), - ), - timeseries.WithPrometheusTarget( - fmt.Sprintf(`sum(rate(ocr2_reporting_plugin_report_time_count{plugin="%s", job=~"$instance", chainID=~"$evmChainID"}[$__range]))`, p.PluginName), - prometheus.Legend("Report"), - ), - timeseries.WithPrometheusTarget( - fmt.Sprintf(`sum(rate(ocr2_reporting_plugin_should_accept_finalized_report_time_count{plugin="%s", job=~"$instance", chainID=~"$evmChainID"}[$__range]))`, p.PluginName), - prometheus.Legend("ShouldAcceptFinalizedReport"), - ), - timeseries.WithPrometheusTarget( - fmt.Sprintf(`sum(rate(ocr2_reporting_plugin_should_transmit_accepted_report_time_count{plugin="%s", job=~"$instance", chainID=~"$evmChainID"}[$__range]))`, p.PluginName), - prometheus.Legend("ShouldTransmitAcceptedReport"), - ), - ), - quantileRowOpts(p.PrometheusDataSource, p.PluginName, "0.5"), - quantileRowOpts(p.PrometheusDataSource, p.PluginName, "0.9"), - quantileRowOpts(p.PrometheusDataSource, p.PluginName, "0.99"), - ) - return []dashboard.Option{ - dashboard.Row( - fmt.Sprintf("OCRv2 Metrics - Plugin: %s", p.PluginName), - opts..., - ), - } -} - -func New(p Props) []dashboard.Option { - opts := make([]dashboard.Option, 0) - opts = append(opts, ocrv2PluginObservationStageQuantiles(p)...) - return opts -} diff --git a/dashboard/lib/dashboard.go b/dashboard/lib/dashboard.go deleted file mode 100644 index 3343530a835..00000000000 --- a/dashboard/lib/dashboard.go +++ /dev/null @@ -1,90 +0,0 @@ -package dashboardlib - -import ( - "context" - "encoding/json" - "github.com/K-Phoen/grabana" - "github.com/K-Phoen/grabana/dashboard" - "github.com/pkg/errors" - "net/http" - "os" -) - -type Dashboard struct { - Name string - DeployOpts EnvConfig - /* SDK panels that are missing in Grabana */ - SDKPanels []map[string]interface{} - /* generated dashboard opts and builder */ - builder dashboard.Builder - Opts []dashboard.Option -} - -func NewDashboard( - name string, - deployOpts EnvConfig, - opts []dashboard.Option, -) *Dashboard { - return &Dashboard{ - Name: name, - DeployOpts: deployOpts, - Opts: opts, - } -} - -func (m *Dashboard) Deploy() error { - ctx := context.Background() - b, err := m.build() - if err != nil { - return err - } - client := grabana.NewClient(&http.Client{}, m.DeployOpts.GrafanaURL, grabana.WithAPIToken(m.DeployOpts.GrafanaToken)) - fo, folderErr := client.FindOrCreateFolder(ctx, m.DeployOpts.GrafanaFolder) - if folderErr != nil { - return errors.Wrap(err, "could not find or create Grafana folder") - } - if _, err := client.UpsertDashboard(ctx, fo, b); err != nil { - return errors.Wrap(err, "failed to upsert the dashboard") - } - return nil -} - -func (m *Dashboard) Add(opts []dashboard.Option) { - m.Opts = append(m.Opts, opts...) -} - -func (m *Dashboard) AddSDKPanel(panel map[string]interface{}) { - m.SDKPanels = append(m.SDKPanels, panel) -} - -func (m *Dashboard) build() (dashboard.Builder, error) { - b, err := dashboard.New( - m.Name, - m.Opts..., - ) - if err != nil { - return dashboard.Builder{}, errors.Wrap(err, "failed to build the dashboard") - } - return b, nil -} - -// TODO: re-write after forking Grabana, inject foundation SDK components from official schema -func (m *Dashboard) injectSDKPanels(b dashboard.Builder) (dashboard.Builder, error) { - data, err := b.MarshalIndentJSON() - if err != nil { - return dashboard.Builder{}, err - } - var asMap map[string]interface{} - if err := json.Unmarshal(data, &asMap); err != nil { - return dashboard.Builder{}, err - } - asMap["rows"].([]interface{})[0].(map[string]interface{})["panels"] = append(asMap["rows"].([]interface{})[0].(map[string]interface{})["panels"].([]interface{}), m.SDKPanels[0]) - d, err := json.Marshal(asMap) - if err != nil { - return dashboard.Builder{}, err - } - if err := os.WriteFile("generated_ccip_dashboard.json", d, os.ModePerm); err != nil { - return dashboard.Builder{}, err - } - return b, nil -} diff --git a/dashboard/lib/k8s-pods/component.go b/dashboard/lib/k8s-pods/component.go deleted file mode 100644 index 4ef90c3012f..00000000000 --- a/dashboard/lib/k8s-pods/component.go +++ /dev/null @@ -1,198 +0,0 @@ -package k8spods - -import ( - "github.com/K-Phoen/grabana/dashboard" - "github.com/K-Phoen/grabana/logs" - "github.com/K-Phoen/grabana/row" - "github.com/K-Phoen/grabana/stat" - "github.com/K-Phoen/grabana/target/prometheus" - "github.com/K-Phoen/grabana/timeseries" - "github.com/K-Phoen/grabana/timeseries/axis" - "github.com/K-Phoen/grabana/variable/query" -) - -type Props struct { - LokiDataSource string - PrometheusDataSource string -} - -func vars(p Props) []dashboard.Option { - return []dashboard.Option{ - dashboard.VariableAsQuery( - "namespace", - query.DataSource(p.PrometheusDataSource), - query.Multiple(), - query.IncludeAll(), - query.Request("label_values(namespace)"), - query.Sort(query.NumericalAsc), - ), - dashboard.VariableAsQuery( - "pod", - query.DataSource(p.PrometheusDataSource), - query.Multiple(), - query.IncludeAll(), - query.Request("label_values(kube_pod_container_info{namespace=\"$namespace\"}, pod)"), - query.Sort(query.NumericalAsc), - ), - } -} - -func logsRow(p Props) []dashboard.Option { - return []dashboard.Option{ - dashboard.Row( - "K8s Logs", - row.Collapse(), - row.WithLogs( - "All Logs", - logs.DataSource(p.LokiDataSource), - logs.Span(12), - logs.Height("300px"), - logs.Transparent(), - logs.WithLokiTarget(`{namespace="$namespace", pod=~"${pod:pipe}"}`), - ), - row.WithLogs( - "All Errors", - logs.DataSource(p.LokiDataSource), - logs.Span(12), - logs.Height("300px"), - logs.Transparent(), - logs.WithLokiTarget(`{namespace="$namespace", pod=~"${pod:pipe}"} | json | level=~"error|warn|fatal|panic"`), - ), - ), - } - -} - -func New(p Props) []dashboard.Option { - opts := vars(p) - opts = append(opts, - []dashboard.Option{ - dashboard.Row( - "K8s Pods", - row.Collapse(), - row.WithStat( - "Pod Restarts", - stat.Span(4), - stat.Text(stat.TextValueAndName), - stat.Orientation(stat.OrientationHorizontal), - stat.DataSource(p.PrometheusDataSource), - stat.SparkLine(), - stat.SparkLineYMin(0), - stat.WithPrometheusTarget( - `sum(increase(kube_pod_container_status_restarts_total{pod=~"$pod", namespace=~"${namespace}"}[$__rate_interval])) by (pod)`, - prometheus.Legend("{{pod}}"), - ), - ), - row.WithStat( - "OOM Events", - stat.Span(4), - stat.Text(stat.TextValueAndName), - stat.Orientation(stat.OrientationHorizontal), - stat.DataSource(p.PrometheusDataSource), - stat.SparkLine(), - stat.SparkLineYMin(0), - stat.WithPrometheusTarget( - `sum(container_oom_events_total{pod=~"$pod", namespace=~"${namespace}"}) by (pod)`, - prometheus.Legend("{{pod}}"), - ), - ), - row.WithStat( - "OOM Killed", - stat.Span(4), - stat.Text(stat.TextValueAndName), - stat.Orientation(stat.OrientationHorizontal), - stat.DataSource(p.PrometheusDataSource), - stat.SparkLine(), - stat.SparkLineYMin(0), - stat.WithPrometheusTarget( - `kube_pod_container_status_last_terminated_reason{reason="OOMKilled", pod=~"$pod", namespace=~"${namespace}"}`, - prometheus.Legend("{{pod}}"), - ), - ), - row.WithTimeSeries( - "CPU Usage", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.WithPrometheusTarget( - `sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{pod=~"$pod", namespace=~"${namespace}"}) by (pod)`, - prometheus.Legend("{{pod}}"), - ), - ), - row.WithTimeSeries( - "Memory Usage", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit("bytes"), - axis.Label("Memory"), - axis.SoftMin(0), - ), - timeseries.WithPrometheusTarget( - `sum(container_memory_rss{pod=~"$pod", namespace=~"${namespace}", container!=""}) by (pod)`, - prometheus.Legend("{{pod}}"), - ), - ), - row.WithTimeSeries( - "Receive Bandwidth", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit("Bps"), - axis.SoftMin(0), - ), - timeseries.WithPrometheusTarget( - `sum(irate(container_network_receive_bytes_total{pod=~"$pod", namespace=~"${namespace}"}[$__rate_interval])) by (pod)`, - prometheus.Legend("{{pod}}"), - ), - ), - row.WithTimeSeries( - "Transmit Bandwidth", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit("Bps"), - axis.SoftMin(0), - ), - timeseries.WithPrometheusTarget( - `sum(irate(container_network_transmit_bytes_total{pod=~"$pod", namespace=~"${namespace}"}[$__rate_interval])) by (pod)`, - prometheus.Legend("{{pod}}"), - ), - ), - row.WithTimeSeries( - "Average Container Bandwidth by Namespace: Received", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit("Bps"), - axis.SoftMin(0), - ), - timeseries.WithPrometheusTarget( - `avg(irate(container_network_receive_bytes_total{pod=~"$pod", namespace=~"${namespace}"}[$__rate_interval])) by (pod)`, - prometheus.Legend("{{pod}}"), - ), - ), - row.WithTimeSeries( - "Average Container Bandwidth by Namespace: Transmitted", - timeseries.Span(6), - timeseries.Height("200px"), - timeseries.DataSource(p.PrometheusDataSource), - timeseries.Axis( - axis.Unit("Bps"), - axis.SoftMin(0), - ), - timeseries.WithPrometheusTarget( - `avg(irate(container_network_transmit_bytes_total{pod=~"$pod", namespace=~"${namespace}"}[$__rate_interval])) by (pod)`, - prometheus.Legend("{{pod}}"), - ), - ), - ), - }..., - ) - opts = append(opts, logsRow(p)...) - return opts -} diff --git a/dashboard/lib/log.go b/dashboard/lib/log.go deleted file mode 100644 index fe30efc7f59..00000000000 --- a/dashboard/lib/log.go +++ /dev/null @@ -1,28 +0,0 @@ -package dashboardlib - -import ( - "os" - - "github.com/rs/zerolog" - "github.com/rs/zerolog/log" -) - -const ( - LogLevelEnvVar = "DASHBOARD_LOG_LEVEL" -) - -var ( - L zerolog.Logger -) - -func init() { - lvlStr := os.Getenv(LogLevelEnvVar) - if lvlStr == "" { - lvlStr = "info" - } - lvl, err := zerolog.ParseLevel(lvlStr) - if err != nil { - panic(err) - } - L = log.Output(zerolog.ConsoleWriter{Out: os.Stderr}).Level(lvl) -} From e3412e333bd91c2b5b8bd060640f9771fb820d27 Mon Sep 17 00:00:00 2001 From: skudasov Date: Wed, 20 Mar 2024 17:52:22 +0100 Subject: [PATCH 2/3] dashboard lib --- dashboard-lib/README.md | 24 + .../ccip-load-test-view/component.go | 497 +++++ dashboard-lib/config.go | 70 + dashboard-lib/core-don/component.go | 1806 +++++++++++++++++ dashboard-lib/core-don/platform.go | 45 + dashboard-lib/core-ocrv2-ccip/component.go | 83 + dashboard-lib/dashboard.go | 90 + dashboard-lib/go.mod | 22 + dashboard-lib/go.sum | 37 + dashboard-lib/k8s-pods/component.go | 198 ++ dashboard-lib/log.go | 28 + 11 files changed, 2900 insertions(+) create mode 100644 dashboard-lib/README.md create mode 100644 dashboard-lib/ccip-load-test-view/component.go create mode 100644 dashboard-lib/config.go create mode 100644 dashboard-lib/core-don/component.go create mode 100644 dashboard-lib/core-don/platform.go create mode 100644 dashboard-lib/core-ocrv2-ccip/component.go create mode 100644 dashboard-lib/dashboard.go create mode 100644 dashboard-lib/go.mod create mode 100644 dashboard-lib/go.sum create mode 100644 dashboard-lib/k8s-pods/component.go create mode 100644 dashboard-lib/log.go diff --git a/dashboard-lib/README.md b/dashboard-lib/README.md new file mode 100644 index 00000000000..44fd655c72a --- /dev/null +++ b/dashboard-lib/README.md @@ -0,0 +1,24 @@ +# Chainlink Grafana Dashboards Library + +This library offers dashboard components and tools for constructing and testing Grafana dashboards at Chainlink. + +Components structure is as follows: +``` +dashboard + |- lib + |- component_1 + |- component.go + |- component_2 + |- component.go +|- go.mod +|- go.sum +``` + +Each component should contain rows, logic and unique variables in `component.go` + +Components should be imported from this module, see [example](../charts/chainlink-cluster/dashboard/cmd/deploy.go) + +## How to convert from JSON using Grabana codegen utility +1. Download Grabana binary [here](https://github.com/K-Phoen/grabana/releases) +2. ./bin/grabana convert-go -i dashboard.json > lib/my_new_component/rows.go +3. Create a [component](k8s-pods/component.go) \ No newline at end of file diff --git a/dashboard-lib/ccip-load-test-view/component.go b/dashboard-lib/ccip-load-test-view/component.go new file mode 100644 index 00000000000..9f58438410e --- /dev/null +++ b/dashboard-lib/ccip-load-test-view/component.go @@ -0,0 +1,497 @@ +package ccip_load_test_view + +import ( + "encoding/json" + "fmt" + "github.com/K-Phoen/grabana/dashboard" + "github.com/K-Phoen/grabana/logs" + "github.com/K-Phoen/grabana/row" + "github.com/K-Phoen/grabana/stat" + "github.com/K-Phoen/grabana/target/loki" + "github.com/K-Phoen/grabana/target/prometheus" + "github.com/K-Phoen/grabana/timeseries" + "github.com/K-Phoen/grabana/timeseries/axis" + "github.com/K-Phoen/grabana/variable/query" + cLoki "github.com/grafana/grafana-foundation-sdk/go/loki" + cXYChart "github.com/grafana/grafana-foundation-sdk/go/xychart" +) + +type Props struct { + LokiDataSource string +} + +func vars(p Props) []dashboard.Option { + return []dashboard.Option{ + dashboard.VariableAsQuery( + "Test Run Name", + query.DataSource(p.LokiDataSource), + query.Multiple(), + query.IncludeAll(), + query.Request("label_values(namespace)"), + ), + dashboard.VariableAsQuery( + "cluster", + query.DataSource(p.LokiDataSource), + query.Multiple(), + query.IncludeAll(), + query.Request("label_values(cluster)"), + ), + dashboard.VariableAsQuery( + "test_group", + query.DataSource(p.LokiDataSource), + query.Multiple(), + query.IncludeAll(), + query.Request("label_values(test_group)"), + ), + dashboard.VariableAsQuery( + "test_id", + query.DataSource(p.LokiDataSource), + query.Multiple(), + query.IncludeAll(), + query.Request("label_values(test_id)"), + ), + dashboard.VariableAsQuery( + "source_chain", + query.DataSource(p.LokiDataSource), + query.Multiple(), + query.IncludeAll(), + query.Request("label_values(source_chain)"), + ), + dashboard.VariableAsQuery( + "dest_chain", + query.DataSource(p.LokiDataSource), + query.Multiple(), + query.IncludeAll(), + query.Request("label_values(dest_chain)"), + ), + dashboard.VariableAsQuery( + "geth_node", + query.DataSource(p.LokiDataSource), + query.Multiple(), + query.IncludeAll(), + query.Request("label_values(geth_node)"), + ), + dashboard.VariableAsQuery( + "remote_runner", + query.DataSource(p.LokiDataSource), + query.Multiple(), + query.IncludeAll(), + query.Request("namespace"), + ), + } +} + +func XYChartSeqNum() map[string]interface{} { + // TODO: https://github.com/grafana/grafana-foundation-sdk/tree/v10.4.x%2Bcog-v0.0.x/go has a lot of useful components + // TODO: need to change upload API and use combined upload in lib/dashboard.go + xAxisName := "seq_num" + builder := cXYChart.NewPanelBuilder(). + Title("XYChart"). + Dims(cXYChart.XYDimensionConfig{ + X: &xAxisName, + }). + WithTarget( + cLoki.NewDataqueryBuilder(). + Expr(`{namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_Commit_ReportAccepted_duration!= "" | data_Commit_ReportAccepted_success="✅"`). + LegendFormat("Commit Report Accepted"), + ) + sampleDashboard, err := builder.Build() + if err != nil { + panic(err) + } + dashboardJson, err := json.MarshalIndent(sampleDashboard, "", " ") + if err != nil { + panic(err) + } + var data map[string]interface{} + if err := json.Unmarshal(dashboardJson, &data); err != nil { + panic(err) + } + fmt.Println(string(dashboardJson)) + return data +} + +func statsRow(p Props) []dashboard.Option { + return []dashboard.Option{ + dashboard.Row( + "CCIP Duration Stats", + row.Collapse(), + row.WithTimeSeries( + "Sequence numbers", + timeseries.Transparent(), + timeseries.Description("Sequence Numbers triggered by Test"), + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.LokiDataSource), + timeseries.WithLokiTarget( + `min_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", source_chain="${source_chain}", dest_chain="${dest_chain}"} | json | data_CCIPSendRequested_success="✅" | unwrap data_CCIPSendRequested_seq_num [$__range]) by (test_id)`, + loki.Legend("Starts"), + ), + timeseries.WithLokiTarget( + `max_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}", source_chain="${source_chain}", dest_chain="${dest_chain}"} | json | data_CCIPSendRequested_success="✅" | unwrap data_CCIPSendRequested_seq_num [$__range]) by (test_id)`, + loki.Legend("Ends"), + ), + ), + row.WithTimeSeries( + "Source Router Fees ( /1e18)", + timeseries.Transparent(), + timeseries.Description("Router.GetFee"), + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.LokiDataSource), + timeseries.WithLokiTarget( + `avg_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_CCIP_Send_Transaction_success="✅"| unwrap data_CCIP_Send_Transaction_ccip_send_data_fee [$__range]) by (test_id) /1e18`, + loki.Legend("Avg"), + ), + timeseries.WithLokiTarget( + `min_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_CCIP_Send_Transaction_success="✅"| unwrap data_CCIP_Send_Transaction_ccip_send_data_fee [$__range]) by (test_id) /1e18`, + loki.Legend("Min"), + ), + timeseries.WithLokiTarget( + `max_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_CCIP_Send_Transaction_success="✅"| unwrap data_CCIP_Send_Transaction_ccip_send_data_fee [$__range]) by (test_id) /1e18 `, + loki.Legend("Max"), + ), + ), + row.WithTimeSeries( + "Commit Duration Summary", + timeseries.Transparent(), + timeseries.Description(""), + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.LokiDataSource), + timeseries.Axis( + axis.Unit("seconds"), + ), + timeseries.WithLokiTarget( + `avg_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_Commit_ReportAccepted_success="✅"| unwrap data_Commit_ReportAccepted_duration [$__range]) by (data_Commit_ReportAccepted_seqNum)`, + loki.Legend("Avg"), + ), + timeseries.WithLokiTarget( + `min_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_Commit_ReportAccepted_success="✅"| unwrap data_Commit_ReportAccepted_duration [$__range]) by (data_Commit_ReportAccepted_seqNum)`, + loki.Legend("Min"), + ), + timeseries.WithLokiTarget( + `max_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_Commit_ReportAccepted_success="✅"| unwrap data_Commit_ReportAccepted_duration [$__range]) by (data_Commit_ReportAccepted_seqNum)`, + loki.Legend("Max"), + ), + ), + row.WithTimeSeries( + "Report Blessing Summary", + timeseries.Transparent(), + timeseries.Description(""), + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.LokiDataSource), + timeseries.Axis( + axis.Unit("seconds"), + ), + timeseries.WithLokiTarget( + `avg_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_ReportBlessedByARM_success="✅"| unwrap data_ReportBlessedByARM_duration [$__range]) by (data_ReportBlessedByARM_seqNum)`, + loki.Legend("Avg"), + ), + timeseries.WithLokiTarget( + `min_over_time({ namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_ReportBlessedByARM_success="✅"| unwrap data_ReportBlessedByARM_duration [$__range]) by (data_ReportBlessedByARM_seqNum)`, + loki.Legend("Min"), + ), + timeseries.WithLokiTarget( + `max_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_ReportBlessedByARM_success="✅"| unwrap data_ReportBlessedByARM_duration [$__range]) by (data_ReportBlessedByARM_seqNum)`, + loki.Legend("Max"), + ), + ), + row.WithTimeSeries( + "Execution Duration Summary", + timeseries.Transparent(), + timeseries.Description(""), + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.LokiDataSource), + timeseries.Axis( + axis.Unit("seconds"), + ), + timeseries.WithLokiTarget( + `avg_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_ExecutionStateChanged_success="✅"| unwrap data_ExecutionStateChanged_duration [$__range]) by (data_ExecutionStateChanged_seqNum)`, + loki.Legend("Avg"), + ), + timeseries.WithLokiTarget( + `min_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_ExecutionStateChanged_success="✅"| unwrap data_ExecutionStateChanged_duration [$__range]) by (data_ExecutionStateChanged_seqNum)`, + loki.Legend("Min"), + ), + timeseries.WithLokiTarget( + `max_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_ExecutionStateChanged_success="✅"| unwrap data_ExecutionStateChanged_duration [$__range]) by (data_ExecutionStateChanged_seqNum)`, + loki.Legend("Max"), + ), + ), + row.WithTimeSeries( + "E2E (Commit, ARM, Execution)", + timeseries.Transparent(), + timeseries.Description(""), + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.LokiDataSource), + timeseries.Axis( + axis.Unit("seconds"), + ), + timeseries.WithLokiTarget( + `avg_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_CommitAndExecute_success="✅"| unwrap data_CommitAndExecute_duration [$__range]) by (data_CommitAndExecute_seqNum)`, + loki.Legend("Avg"), + ), + timeseries.WithLokiTarget( + `min_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_CommitAndExecute_success="✅"| unwrap data_CommitAndExecute_duration [$__range]) by (data_CommitAndExecute_seqNum)`, + loki.Legend("Min"), + ), + timeseries.WithLokiTarget( + `max_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_CommitAndExecute_success="✅"| unwrap data_CommitAndExecute_duration [$__range]) by (data_CommitAndExecute_seqNum)`, + loki.Legend("Max"), + ), + ), + ), + } +} + +func failedMessagesRow(p Props) []dashboard.Option { + return []dashboard.Option{ + dashboard.Row( + "Failed Messages", + row.Collapse(), + row.WithTimeSeries( + "Failed Commit", + timeseries.Transparent(), + timeseries.Description(""), + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.LokiDataSource), + timeseries.WithLokiTarget( + `count_over_time({ namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_Commit_ReportAccepted_success="❌" [$__range])`, + loki.Legend("{{error}}"), + ), + ), + row.WithTimeSeries( + "Failed Bless", + timeseries.Transparent(), + timeseries.Description(""), + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.LokiDataSource), + timeseries.WithLokiTarget( + `count_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_ReportBlessedByARM_success="❌" [$__range])`, + loki.Legend("{{error}}"), + ), + ), + row.WithTimeSeries( + "Failed Execution", + timeseries.Transparent(), + timeseries.Description(""), + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.LokiDataSource), + timeseries.WithLokiTarget( + `count_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_ExecutionStateChanged_success="❌" [$__range])`, + loki.Legend("{{error}}"), + ), + ), + row.WithTimeSeries( + "Failed Commit and Execution", + timeseries.Transparent(), + timeseries.Description(""), + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.LokiDataSource), + timeseries.WithLokiTarget( + `count_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_CommitAndExecute_success="❌" [$__range])`, + loki.Legend("{{error}}"), + ), + ), + ), + } +} + +func reqRespRow(p Props) []dashboard.Option { + return []dashboard.Option{ + dashboard.Row( + "Requests/Responses", + row.WithStat( + "Stats", + stat.DataSource(p.LokiDataSource), + stat.Transparent(), + stat.Text(stat.TextValueAndName), + stat.Height("100px"), + stat.TitleFontSize(20), + stat.ValueFontSize(20), + stat.Span(12), + stat.WithPrometheusTarget( + `max_over_time({namespace="${namespace}", go_test_name="${go_test_name:pipe}", test_data_type="stats", test_group="$test_group", test_id=~"${test_id:pipe}", source_chain="${source_chain}", dest_chain="${dest_chain}"} +| json +| unwrap current_time_unit [$__range]) by (test_id)`, + prometheus.Legend("Time Unit"), + ), + stat.WithPrometheusTarget( + `max_over_time({namespace="${namespace}", go_test_name="${go_test_name:pipe}", test_data_type="stats", test_group="$test_group", test_id=~"${test_id:pipe}", source_chain="${source_chain}", dest_chain="${dest_chain}"} +| json +| unwrap load_duration [$__range]) by (test_id)/ 1e9 `, + prometheus.Legend("Total Duration"), + ), + stat.WithPrometheusTarget( + `max_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_CCIP_Send_Transaction_success="✅"| unwrap data_CCIP_Send_Transaction_ccip_send_data_message_bytes_length [$__range]) by (test_id)`, + prometheus.Legend("Max Byte Len Sent"), + ), + stat.WithPrometheusTarget( + `max_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_CCIP_Send_Transaction_success="✅"| unwrap data_CCIP_Send_Transaction_ccip_send_data_no_of_tokens_sent [$__range]) by (test_id)`, + prometheus.Legend("Max No Of Tokens Sent"), + ), + ), + row.WithTimeSeries( + "Request Rate", + timeseries.Transparent(), + timeseries.Description("Requests triggered over test duration"), + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.LokiDataSource), + timeseries.WithLokiTarget( + `last_over_time({namespace="${namespace}", go_test_name="${go_test_name:pipe}", test_data_type="stats", test_group="$test_group", test_id="${test_id:pipe}", source_chain="${source_chain}", dest_chain="${dest_chain}"}| json | unwrap current_rps [$__interval]) by (test_id,gen_name)`, + loki.Legend("Request Triggered/TimeUnit"), + ), + ), + row.WithTimeSeries( + "Trigger Summary", + timeseries.Transparent(), + timeseries.Points(), + timeseries.Description("Latest Stage Stats"), + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.LokiDataSource), + timeseries.WithLokiTarget( + `max_over_time({namespace="${namespace}", go_test_name="${go_test_name:pipe}", test_data_type="stats", test_group="$test_group", test_id=~"${test_id:pipe}", source_chain="${source_chain}", dest_chain="${dest_chain}"} +| json +| unwrap success [$__range]) by (test_id)`, + loki.Legend("Successful Requests"), + ), + timeseries.WithLokiTarget( + `max_over_time({namespace="${namespace}", go_test_name="${go_test_name:pipe}", test_data_type="stats", test_group="$test_group", test_id=~"${test_id:pipe}", source_chain="${source_chain}", dest_chain="${dest_chain}"} +| json +| unwrap failed [$__range]) by (test_id)`, + loki.Legend("Failed Requests"), + ), + ), + row.WithLogs( + "All CCIP Phases Stats", + logs.DataSource(p.LokiDataSource), + logs.Span(12), + logs.Height("300px"), + logs.Transparent(), + logs.WithLokiTarget( + `{namespace="${namespace}", go_test_name="${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json `, + ), + ), + ), + } +} + +func gasStatsRow(p Props) []dashboard.Option { + return []dashboard.Option{ + dashboard.Row( + "CCIP Gas Stats", + row.Collapse(), + row.WithTimeSeries( + "Gas Used in CCIP-Send⛽️", + timeseries.Transparent(), + timeseries.Description(""), + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.LokiDataSource), + timeseries.Axis( + axis.Unit("wei"), + ), + timeseries.WithLokiTarget( + `avg_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_CCIP_Send_Transaction_success="✅"| unwrap data_CCIP_Send_Transaction_ccip_send_data_gas_used [$__range]) by (test_id)`, + loki.Legend("Avg"), + ), + timeseries.WithLokiTarget( + `min_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_CCIP_Send_Transaction_success="✅"| unwrap data_CCIP_Send_Transaction_ccip_send_data_gas_used [$__range]) by (test_id)`, + loki.Legend("Min"), + ), + timeseries.WithLokiTarget( + `max_over_time({ namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_CCIP_Send_Transaction_success="✅"| unwrap data_CCIP_Send_Transaction_ccip_send_data_gas_used [$__range]) by (test_id) `, + loki.Legend("Max"), + ), + ), + row.WithTimeSeries( + "Gas Used in Commit⛽️", + timeseries.Transparent(), + timeseries.Description(""), + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.LokiDataSource), + timeseries.Axis( + axis.Unit("wei"), + ), + timeseries.WithLokiTarget( + `avg_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_Commit_ReportAccepted_success="✅"| unwrap data_Commit_ReportAccepted_ccip_send_data_gas_used [$__range]) by (test_id)`, + loki.Legend("Avg"), + ), + timeseries.WithLokiTarget( + `min_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_Commit_ReportAccepted_success="✅"| unwrap data_Commit_ReportAccepted_ccip_send_data_gas_used [$__range]) by (test_id)`, + loki.Legend("Min"), + ), + timeseries.WithLokiTarget( + `max_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_Commit_ReportAccepted_success="✅"| unwrap data_Commit_ReportAccepted_ccip_send_data_gas_used [$__range]) by (test_id) `, + loki.Legend("Max"), + ), + ), + row.WithTimeSeries( + "Gas Used in ARM Blessing⛽️", + timeseries.Transparent(), + timeseries.Description(""), + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.LokiDataSource), + timeseries.Axis( + axis.Unit("wei"), + ), + timeseries.WithLokiTarget( + `avg_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_ReportBlessedByARM_success="✅"| unwrap data_ReportBlessedByARM_ccip_send_data_gas_used [$__range]) by (test_id)`, + loki.Legend("Avg"), + ), + timeseries.WithLokiTarget( + `min_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_ReportBlessedByARM_success="✅"| unwrap data_ReportBlessedByARM_ccip_send_data_gas_used [$__range]) by (test_id)`, + loki.Legend("Min"), + ), + timeseries.WithLokiTarget( + `max_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_ReportBlessedByARM_success="✅"| unwrap data_ReportBlessedByARM_ccip_send_data_gas_used [$__range]) by (test_id) `, + loki.Legend("Max"), + ), + ), + row.WithTimeSeries( + "Gas Used in Execution⛽️", + timeseries.Transparent(), + timeseries.Description(""), + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.LokiDataSource), + timeseries.Axis( + axis.Unit("wei"), + ), + timeseries.WithLokiTarget( + `avg_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_ExecutionStateChanged_success="✅"| unwrap data_ExecutionStateChanged_ccip_send_data_gas_used [$__range]) by (test_id)`, + loki.Legend("Avg"), + ), + timeseries.WithLokiTarget( + `min_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_ExecutionStateChanged_success="✅"| unwrap data_ExecutionStateChanged_ccip_send_data_gas_used [$__range]) by (test_id)`, + loki.Legend("Min"), + ), + timeseries.WithLokiTarget( + `max_over_time({namespace="${namespace}", go_test_name=~"${go_test_name:pipe}", test_data_type="responses", test_group="${test_group}", test_id=~"${test_id:pipe}",source_chain="${source_chain}",dest_chain="${dest_chain}"} | json | data_ExecutionStateChanged_success="✅"| unwrap data_ExecutionStateChanged_ccip_send_data_gas_used [$__range]) by (test_id) `, + loki.Legend("Max"), + ), + ), + ), + } +} + +func New(p Props) []dashboard.Option { + opts := vars(p) + opts = append(opts, statsRow(p)...) + opts = append(opts, gasStatsRow(p)...) + opts = append(opts, failedMessagesRow(p)...) + opts = append(opts, reqRespRow(p)...) + return opts +} diff --git a/dashboard-lib/config.go b/dashboard-lib/config.go new file mode 100644 index 00000000000..7d2db0878d8 --- /dev/null +++ b/dashboard-lib/config.go @@ -0,0 +1,70 @@ +package dashboard_lib + +import "os" + +type EnvConfig struct { + Platform string + GrafanaURL string + GrafanaToken string + GrafanaFolder string + DataSources DataSources +} + +type DataSources struct { + Loki string + Prometheus string +} + +type DashboardOpts struct { + Tags []string + AutoRefresh string +} + +func ReadEnvDeployOpts() EnvConfig { + name := os.Getenv("DASHBOARD_NAME") + if name == "" { + L.Fatal().Msg("DASHBOARD_NAME must be provided") + } + lokiDataSourceName := os.Getenv("LOKI_DATA_SOURCE_NAME") + if lokiDataSourceName == "" { + L.Fatal().Msg("LOKI_DATA_SOURCE_NAME is empty, panels with logs will be disabled") + } + prometheusDataSourceName := os.Getenv("PROMETHEUS_DATA_SOURCE_NAME") + if prometheusDataSourceName == "" { + L.Fatal().Msg("PROMETHEUS_DATA_SOURCE_NAME must be provided") + } + grafanaURL := os.Getenv("GRAFANA_URL") + if grafanaURL == "" { + L.Fatal().Msg("GRAFANA_URL must be provided") + } + grafanaToken := os.Getenv("GRAFANA_TOKEN") + if grafanaToken == "" { + L.Fatal().Msg("GRAFANA_TOKEN must be provided") + } + grafanaFolder := os.Getenv("GRAFANA_FOLDER") + if grafanaFolder == "" { + L.Fatal().Msg("GRAFANA_FOLDER must be provided") + } + platform := os.Getenv("INFRA_PLATFORM") + if platform == "" { + L.Fatal().Msg("INFRA_PLATFORM must be provided, can be either docker|kubernetes") + } + loki := os.Getenv("LOKI_DATA_SOURCE_NAME") + if lokiDataSourceName == "" { + L.Fatal().Msg("LOKI_DATA_SOURCE_NAME is empty, panels with logs will be disabled") + } + prom := os.Getenv("PROMETHEUS_DATA_SOURCE_NAME") + if prometheusDataSourceName == "" { + L.Fatal().Msg("PROMETHEUS_DATA_SOURCE_NAME must be provided") + } + return EnvConfig{ + GrafanaURL: grafanaURL, + GrafanaToken: grafanaToken, + GrafanaFolder: grafanaFolder, + Platform: platform, + DataSources: DataSources{ + Loki: loki, + Prometheus: prom, + }, + } +} diff --git a/dashboard-lib/core-don/component.go b/dashboard-lib/core-don/component.go new file mode 100644 index 00000000000..24173fb6cc9 --- /dev/null +++ b/dashboard-lib/core-don/component.go @@ -0,0 +1,1806 @@ +package core_don + +import ( + "fmt" + "github.com/K-Phoen/grabana/dashboard" + "github.com/K-Phoen/grabana/gauge" + "github.com/K-Phoen/grabana/row" + "github.com/K-Phoen/grabana/stat" + "github.com/K-Phoen/grabana/table" + "github.com/K-Phoen/grabana/target/prometheus" + "github.com/K-Phoen/grabana/timeseries" + "github.com/K-Phoen/grabana/timeseries/axis" + "github.com/K-Phoen/grabana/variable/query" +) + +type Props struct { + PrometheusDataSource string + PlatformOpts PlatformOpts +} + +func vars(p Props) []dashboard.Option { + return []dashboard.Option{ + dashboard.VariableAsQuery( + "instance", + query.DataSource(p.PrometheusDataSource), + query.Multiple(), + query.IncludeAll(), + query.Request(fmt.Sprintf("label_values(%s)", p.PlatformOpts.LabelFilter)), + query.Sort(query.NumericalAsc), + ), + dashboard.VariableAsQuery( + "evmChainID", + query.DataSource(p.PrometheusDataSource), + query.Multiple(), + query.IncludeAll(), + query.Request(fmt.Sprintf("label_values(%s)", "evmChainID")), + query.Sort(query.NumericalAsc), + ), + } +} + +func generalInfoRow(p Props) []dashboard.Option { + return []dashboard.Option{ + dashboard.Row( + "General CL Cluster Info", + row.Collapse(), + row.WithStat( + "App Version", + stat.DataSource(p.PrometheusDataSource), + stat.Text(stat.TextValueAndName), + stat.Orientation(stat.OrientationAuto), + stat.TitleFontSize(12), + stat.ValueFontSize(20), + stat.Span(2), + stat.Text("name"), + stat.WithPrometheusTarget( + `version{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{version}}"), + ), + ), + row.WithStat( + "Go Version", + stat.DataSource(p.PrometheusDataSource), + stat.Text(stat.TextValueAndName), + stat.Orientation(stat.OrientationAuto), + stat.TitleFontSize(12), + stat.ValueFontSize(20), + stat.Span(2), + stat.Text("name"), + stat.WithPrometheusTarget( + `go_info{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{version}}"), + ), + ), + row.WithStat( + "Uptime in days", + stat.DataSource(p.PrometheusDataSource), + stat.Text(stat.TextValueAndName), + stat.Orientation(stat.OrientationHorizontal), + stat.TitleFontSize(12), + stat.ValueFontSize(20), + stat.Span(8), + stat.WithPrometheusTarget( + `uptime_seconds{`+p.PlatformOpts.LabelQuery+`} / 86400`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), + ), + ), + row.WithStat( + "ETH Balance", + stat.DataSource(p.PrometheusDataSource), + stat.Text(stat.TextValueAndName), + stat.Orientation(stat.OrientationHorizontal), + stat.TitleFontSize(12), + stat.ValueFontSize(20), + stat.Span(6), + stat.Decimals(2), + stat.WithPrometheusTarget( + `eth_balance{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - {{account}}"), + ), + ), + row.WithStat( + "Solana Balance", + stat.DataSource(p.PrometheusDataSource), + stat.Text(stat.TextValueAndName), + stat.Orientation(stat.OrientationHorizontal), + stat.TitleFontSize(12), + stat.ValueFontSize(20), + stat.Span(6), + stat.Decimals(2), + stat.WithPrometheusTarget( + `solana_balance{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LabelFilter+"}} - {{account}}"), + ), + ), + row.WithTimeSeries( + "Service Components Health", + timeseries.Span(12), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.WithPrometheusTarget( + `health{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - {{service_id}}"), + ), + ), + row.WithTimeSeries( + "ETH Balance", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit(""), + axis.Decimals(2), + ), + timeseries.WithPrometheusTarget( + `eth_balance{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - {{account}}"), + ), + ), + row.WithTimeSeries( + "SOL Balance", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit(""), + axis.Decimals(2), + ), + timeseries.WithPrometheusTarget( + `solana_balance{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - {{account}}"), + ), + ), + ), + } +} + +func logPollerRow(p Props) []dashboard.Option { + return []dashboard.Option{ + dashboard.Row("LogPoller", + row.Collapse(), + row.WithStat( + "Goroutines", + stat.DataSource(p.PrometheusDataSource), + stat.Text(stat.TextValueAndName), + stat.Orientation(stat.OrientationAuto), + stat.Height("200px"), + stat.TitleFontSize(30), + stat.ValueFontSize(30), + stat.Span(6), + stat.Text("Goroutines"), + stat.WithPrometheusTarget( + `count(count by (evmChainID) (log_poller_query_duration_sum{job=~"$instance"}))`, + prometheus.Legend("Goroutines"), + ), + ), + row.WithTimeSeries( + "RPS", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit("requests"), + ), + timeseries.WithPrometheusTarget( + `avg by (query) (sum by (query, job) (rate(log_poller_query_duration_count{job=~"$instance", evmChainID=~"$evmChainID"}[$__rate_interval])))`, + prometheus.Legend("{{query}} - {{job}}"), + ), + timeseries.WithPrometheusTarget( + `avg (sum by(job) (rate(log_poller_query_duration_count{job=~"$instance", evmChainID=~"$evmChainID"}[$__rate_interval])))`, + prometheus.Legend("Total"), + ), + ), + row.WithTimeSeries( + "RPS by type", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit("requests"), + ), + timeseries.WithPrometheusTarget( + `avg by (type) (sum by (type, job) (rate(log_poller_query_duration_count{job=~"$instance", evmChainID=~"$evmChainID"}[$__rate_interval])))`, + ), + ), + row.WithTimeSeries( + "Avg number of logs returned", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit("logs"), + ), + timeseries.WithPrometheusTarget( + `avg by (query) (log_poller_query_dataset_size{job=~"$instance", evmChainID=~"$evmChainID"})`, + prometheus.Legend("{{query}} - {{job}}"), + ), + ), + row.WithTimeSeries( + "Max number of logs returned", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit("logs"), + ), + timeseries.WithPrometheusTarget( + `max by (query) (log_poller_query_dataset_size{job=~"$instance", evmChainID=~"$evmChainID"})`, + prometheus.Legend("{{query}} - {{job}}"), + ), + ), + row.WithTimeSeries( + "Logs returned by chain", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit("logs"), + ), + timeseries.WithPrometheusTarget( + `max by (evmChainID) (log_poller_query_dataset_size{job=~"$instance", evmChainID=~"$evmChainID"})`, + prometheus.Legend("{{evmChainID}}"), + ), + ), + row.WithTimeSeries( + "Queries duration by type (0.5 perc)", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit("ms"), + ), + timeseries.WithPrometheusTarget( + `histogram_quantile(0.5, sum(rate(log_poller_query_duration_bucket{job=~"$instance", evmChainID=~"$evmChainID"}[$__rate_interval])) by (le, query)) / 1e6`, + prometheus.Legend("{{query}}"), + ), + ), + row.WithTimeSeries( + "queries duration by type (0.9 perc)", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit("ms"), + ), + timeseries.WithPrometheusTarget( + `histogram_quantile(0.9, sum(rate(log_poller_query_duration_bucket{job=~"$instance", evmChainID=~"$evmChainID"}[$__rate_interval])) by (le, query)) / 1e6`, + prometheus.Legend("{{query}}"), + ), + ), + row.WithTimeSeries( + "Queries duration by type (0.99 perc)", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit("ms"), + ), + timeseries.WithPrometheusTarget( + `histogram_quantile(0.99, sum(rate(log_poller_query_duration_bucket{job=~"$instance", evmChainID=~"$evmChainID"}[$__rate_interval])) by (le, query)) / 1e6`, + prometheus.Legend("{{query}}"), + ), + ), + row.WithTimeSeries( + "Queries duration by chain (0.99 perc)", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit("ms"), + ), + timeseries.WithPrometheusTarget( + `histogram_quantile(0.99, sum(rate(log_poller_query_duration_bucket{job=~"$instance", evmChainID=~"$evmChainID"}[$__rate_interval])) by (le, evmChainID)) / 1e6`, + prometheus.Legend("{{query}}"), + ), + ), + row.WithTimeSeries( + "Number of logs inserted", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit("logs"), + ), + timeseries.WithPrometheusTarget( + `avg by (evmChainID) (log_poller_logs_inserted{job=~"$instance", evmChainID=~"$evmChainID"})`, + prometheus.Legend("{{evmChainID}}"), + ), + ), + row.WithTimeSeries( + "Logs insertion rate", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.WithPrometheusTarget( + `avg by (evmChainID) (rate(log_poller_logs_inserted{job=~"$instance", evmChainID=~"$evmChainID"}[$__rate_interval]))`, + prometheus.Legend("{{evmChainID}}"), + ), + ), + row.WithTimeSeries( + "Number of blocks inserted", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit("blocks"), + ), + timeseries.WithPrometheusTarget( + `avg by (evmChainID) (log_poller_blocks_inserted{job=~"$instance", evmChainID=~"$evmChainID"})`, + prometheus.Legend("{{evmChainID}}"), + ), + ), + row.WithTimeSeries( + "Blocks insertion rate", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.WithPrometheusTarget( + `avg by (evmChainID) (rate(log_poller_blocks_inserted{job=~"$instance", evmChainID=~"$evmChainID"}[$__rate_interval]))`, + prometheus.Legend("{{evmChainID}}"), + ), + ), + ), + } +} + +func feedJobsRow(p Props) []dashboard.Option { + return []dashboard.Option{ + dashboard.Row("Feeds Jobs", + row.Collapse(), + row.WithTimeSeries( + "Feeds Job Proposal Requests", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit(""), + ), + timeseries.WithPrometheusTarget( + `feeds_job_proposal_requests{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), + ), + ), + row.WithTimeSeries( + "Feeds Job Proposal Count", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit(""), + ), + timeseries.WithPrometheusTarget( + `feeds_job_proposal_count{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), + ), + ), + ), + } +} + +func mailBoxRow(p Props) []dashboard.Option { + return []dashboard.Option{ + dashboard.Row("Mailbox", + row.Collapse(), + row.WithTimeSeries( + "Mailbox Load Percent", + timeseries.Span(12), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit(""), + ), + timeseries.WithPrometheusTarget( + `mailbox_load_percent{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - {{ name }}"), + ), + ), + ), + } +} + +func promReporterRow(p Props) []dashboard.Option { + return []dashboard.Option{ + dashboard.Row("Prom Reporter", + row.Collapse(), + row.WithTimeSeries( + "Unconfirmed Transactions", + timeseries.Span(4), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit("Tx"), + ), + timeseries.WithPrometheusTarget( + `unconfirmed_transactions{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), + ), + ), + row.WithTimeSeries( + "Unconfirmed TX Age", + timeseries.Span(4), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit("Sec"), + ), + timeseries.WithPrometheusTarget( + `max_unconfirmed_tx_age{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), + ), + ), + row.WithTimeSeries( + "Unconfirmed TX Blocks", + timeseries.Span(4), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit("Blocks"), + ), + timeseries.WithPrometheusTarget( + `max_unconfirmed_blocks{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), + ), + ), + ), + } +} + +func txManagerRow(p Props) []dashboard.Option { + return []dashboard.Option{ + dashboard.Row("TX Manager", + row.Collapse(), + row.WithTimeSeries( + "TX Manager Time Until TX Broadcast", + timeseries.Span(12), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit(""), + ), + timeseries.WithPrometheusTarget( + `tx_manager_time_until_tx_broadcast{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), + ), + ), + row.WithTimeSeries( + "TX Manager Num Gas Bumps", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit(""), + ), + timeseries.WithPrometheusTarget( + `tx_manager_num_gas_bumps{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), + ), + ), + row.WithTimeSeries( + "TX Manager Num Gas Bumps Exceeds Limit", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit(""), + ), + timeseries.WithPrometheusTarget( + `tx_manager_gas_bump_exceeds_limit{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), + ), + ), + row.WithTimeSeries( + "TX Manager Num Confirmed Transactions", + timeseries.Span(3), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit(""), + ), + timeseries.WithPrometheusTarget( + `tx_manager_num_confirmed_transactions{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), + ), + ), + row.WithTimeSeries( + "TX Manager Num Successful Transactions", + timeseries.Span(3), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit(""), + ), + timeseries.WithPrometheusTarget( + `tx_manager_num_successful_transactions{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), + ), + ), + row.WithTimeSeries( + "TX Manager Num Reverted Transactions", + timeseries.Span(3), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit(""), + ), + timeseries.WithPrometheusTarget( + `tx_manager_num_tx_reverted{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), + ), + ), + row.WithTimeSeries( + "TX Manager Num Fwd Transactions", + timeseries.Span(3), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit(""), + ), + timeseries.WithPrometheusTarget( + `tx_manager_fwd_tx_count{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), + ), + ), + row.WithTimeSeries( + "TX Manager Num Transactions Attempts", + timeseries.Span(12), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit(""), + ), + timeseries.WithPrometheusTarget( + `tx_manager_tx_attempt_count{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), + ), + ), + row.WithTimeSeries( + "TX Manager Time Until TX Confirmed", + timeseries.Span(12), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit(""), + ), + timeseries.WithPrometheusTarget( + `tx_manager_time_until_tx_confirmed{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), + ), + ), + row.WithTimeSeries( + "TX Manager Block Until TX Confirmed", + timeseries.Span(12), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit(""), + ), + timeseries.WithPrometheusTarget( + `tx_manager_blocks_until_tx_confirmed{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), + ), + ), + ), + } +} + +func headTrackerRow(p Props) []dashboard.Option { + return []dashboard.Option{ + dashboard.Row("Head tracker", + row.Collapse(), + row.WithTimeSeries( + "Head tracker current head", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit("Block"), + ), + timeseries.WithPrometheusTarget( + `head_tracker_current_head{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), + ), + ), + row.WithTimeSeries( + "Head tracker very old head", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit("Block"), + ), + timeseries.WithPrometheusTarget( + `head_tracker_very_old_head{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), + ), + ), + row.WithTimeSeries( + "Head tracker heads received", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit("Block"), + ), + timeseries.WithPrometheusTarget( + `head_tracker_heads_received{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), + ), + ), + row.WithTimeSeries( + "Head tracker connection errors", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit("Block"), + ), + timeseries.WithPrometheusTarget( + `head_tracker_connection_errors{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), + ), + ), + ), + } +} + +func appDBConnectionsRow(p Props) []dashboard.Option { + return []dashboard.Option{ + dashboard.Row("DB Connection Metrics (App)", + row.Collapse(), + row.WithTimeSeries( + "DB Connections", + timeseries.Span(12), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit("Conn"), + ), + timeseries.WithPrometheusTarget( + `db_conns_max{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - Max"), + ), + timeseries.WithPrometheusTarget( + `db_conns_open{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - Open"), + ), + timeseries.WithPrometheusTarget( + `db_conns_used{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - Used"), + ), + timeseries.WithPrometheusTarget( + `db_conns_wait{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - Wait"), + ), + ), + row.WithTimeSeries( + "DB Wait Count", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit(""), + ), + timeseries.WithPrometheusTarget( + `db_wait_count{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), + ), + ), + row.WithTimeSeries( + "DB Wait Time", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit("Sec"), + ), + timeseries.WithPrometheusTarget( + `db_wait_time_seconds{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), + ), + ), + ), + } +} + +func sqlQueriesRow(p Props) []dashboard.Option { + return []dashboard.Option{ + dashboard.Row( + "SQL Query", + row.Collapse(), + row.WithTimeSeries( + "SQL Query Timeout Percent", + timeseries.Span(12), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit("percent"), + ), + timeseries.WithPrometheusTarget( + `histogram_quantile(0.9, sum(rate(sql_query_timeout_percent_bucket{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (le))`, + prometheus.Legend("p90"), + ), + timeseries.WithPrometheusTarget( + `histogram_quantile(0.95, sum(rate(sql_query_timeout_percent_bucket{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (le))`, + prometheus.Legend("p95"), + ), + timeseries.WithPrometheusTarget( + `histogram_quantile(0.99, sum(rate(sql_query_timeout_percent_bucket{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (le))`, + prometheus.Legend("p99"), + ), + ), + ), + } +} + +func logsCountersRow(p Props) []dashboard.Option { + return []dashboard.Option{ + dashboard.Row("Logs Metrics", + row.Collapse(), + row.WithTimeSeries( + "Logs Counters", + timeseries.Span(12), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.WithPrometheusTarget( + `log_panic_count{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - panic"), + ), + timeseries.WithPrometheusTarget( + `log_fatal_count{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - fatal"), + ), + timeseries.WithPrometheusTarget( + `log_critical_count{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - critical"), + ), + timeseries.WithPrometheusTarget( + `log_warn_count{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - warn"), + ), + timeseries.WithPrometheusTarget( + `log_error_count{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - error"), + ), + ), + row.WithTimeSeries( + "Logs Rate", + timeseries.Span(12), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.WithPrometheusTarget( + `sum(rate(log_panic_count{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (`+p.PlatformOpts.LegendString+`)`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - panic"), + ), + timeseries.WithPrometheusTarget( + `sum(rate(log_fatal_count{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (`+p.PlatformOpts.LegendString+`)`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - fatal"), + ), + timeseries.WithPrometheusTarget( + `sum(rate(log_critical_count{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (`+p.PlatformOpts.LegendString+`)`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - critical"), + ), + timeseries.WithPrometheusTarget( + `sum(rate(log_warn_count{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (`+p.PlatformOpts.LegendString+`)`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - warn"), + ), + timeseries.WithPrometheusTarget( + `sum(rate(log_error_count{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (`+p.PlatformOpts.LegendString+`)`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - error"), + ), + ), + ), + } +} + +// TODO: fix, no data points for OCRv1 +func evmPoolLifecycleRow(p Props) []dashboard.Option { + return []dashboard.Option{ + dashboard.Row( + "EVM Pool Lifecycle", + row.Collapse(), + row.WithTimeSeries( + "EVM Pool Highest Seen Block", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit("Block"), + ), + timeseries.WithPrometheusTarget( + `evm_pool_rpc_node_highest_seen_block{`+p.PlatformOpts.LabelQuery+`evmChainID="${evmChainID}"}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), + ), + ), + row.WithTimeSeries( + "EVM Pool Num Seen Blocks", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit("Block"), + ), + timeseries.WithPrometheusTarget( + `evm_pool_rpc_node_num_seen_blocks{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), + ), + ), + row.WithTimeSeries( + "EVM Pool Node Polls Total", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit("Block"), + ), + timeseries.WithPrometheusTarget( + `evm_pool_rpc_node_polls_total{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), + ), + ), + row.WithTimeSeries( + "EVM Pool Node Polls Failed", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit("Block"), + ), + timeseries.WithPrometheusTarget( + `evm_pool_rpc_node_polls_failed{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), + ), + ), + row.WithTimeSeries( + "EVM Pool Node Polls Success", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit("Block"), + ), + timeseries.WithPrometheusTarget( + `evm_pool_rpc_node_polls_success{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), + ), + ), + ), + } +} + +func nodesRPCRow(p Props) []dashboard.Option { + return []dashboard.Option{ + dashboard.Row( + "Node RPC State", + row.Collapse(), + row.WithStat( + "Node RPC Alive", + stat.DataSource(p.PrometheusDataSource), + stat.Text(stat.TextValueAndName), + stat.Orientation(stat.OrientationHorizontal), + stat.TitleFontSize(12), + stat.ValueFontSize(20), + stat.Span(3), + stat.WithPrometheusTarget( + `sum(multi_node_states{`+p.PlatformOpts.LabelQuery+`chainId=~"$evmChainID", state="Alive"}) by (`+p.PlatformOpts.LegendString+`, chainId)`, + prometheus.Legend("{{pod}} - {{chainId}}"), + ), + ), + row.WithStat( + "Node RPC Closed", + stat.DataSource(p.PrometheusDataSource), + stat.Text(stat.TextValueAndName), + stat.Orientation(stat.OrientationHorizontal), + stat.TitleFontSize(12), + stat.ValueFontSize(20), + stat.Span(3), + stat.WithPrometheusTarget( + `sum(multi_node_states{`+p.PlatformOpts.LabelQuery+`chainId=~"$evmChainID", state="Closed"}) by (`+p.PlatformOpts.LegendString+`, chainId)`, + prometheus.Legend("{{pod}} - {{chainId}}"), + ), + ), + row.WithStat( + "Node RPC Dialed", + stat.DataSource(p.PrometheusDataSource), + stat.Text(stat.TextValueAndName), + stat.Orientation(stat.OrientationHorizontal), + stat.TitleFontSize(12), + stat.ValueFontSize(20), + stat.Span(3), + stat.WithPrometheusTarget( + `sum(multi_node_states{`+p.PlatformOpts.LabelQuery+`chainId=~"$evmChainID", state="Dialed"}) by (`+p.PlatformOpts.LegendString+`, chainId)`, + prometheus.Legend("{{pod}} - {{chainId}}"), + ), + ), + row.WithStat( + "Node RPC InvalidChainID", + stat.DataSource(p.PrometheusDataSource), + stat.Text(stat.TextValueAndName), + stat.Orientation(stat.OrientationHorizontal), + stat.TitleFontSize(12), + stat.ValueFontSize(20), + stat.Span(3), + stat.WithPrometheusTarget( + `sum(multi_node_states{`+p.PlatformOpts.LabelQuery+`chainId=~"$evmChainID", state="InvalidChainID"}) by (`+p.PlatformOpts.LegendString+`, chainId)`, + prometheus.Legend("{{pod}} - {{chainId}}"), + ), + ), + row.WithStat( + "Node RPC OutOfSync", + stat.DataSource(p.PrometheusDataSource), + stat.Text(stat.TextValueAndName), + stat.Orientation(stat.OrientationHorizontal), + stat.TitleFontSize(12), + stat.ValueFontSize(20), + stat.Span(3), + stat.WithPrometheusTarget( + `sum(multi_node_states{`+p.PlatformOpts.LabelQuery+`chainId=~"$evmChainID", state="OutOfSync"}) by (`+p.PlatformOpts.LegendString+`, chainId)`, + prometheus.Legend("{{pod}} - {{chainId}}"), + ), + ), + row.WithStat( + "Node RPC UnDialed", + stat.DataSource(p.PrometheusDataSource), + stat.Text(stat.TextValueAndName), + stat.Orientation(stat.OrientationHorizontal), + stat.TitleFontSize(12), + stat.ValueFontSize(20), + stat.Span(3), + stat.WithPrometheusTarget( + `sum(multi_node_states{`+p.PlatformOpts.LabelQuery+`chainId=~"$evmChainID", state="Undialed"}) by (`+p.PlatformOpts.LegendString+`, chainId)`, + prometheus.Legend("{{pod}} - {{chainId}}"), + ), + ), + row.WithStat( + "Node RPC Unreachable", + stat.DataSource(p.PrometheusDataSource), + stat.Text(stat.TextValueAndName), + stat.Orientation(stat.OrientationHorizontal), + stat.TitleFontSize(12), + stat.ValueFontSize(20), + stat.Span(3), + stat.WithPrometheusTarget( + `sum(multi_node_states{`+p.PlatformOpts.LabelQuery+`chainId=~"$evmChainID", state="Unreachable"}) by (`+p.PlatformOpts.LegendString+`, chainId)`, + prometheus.Legend("{{pod}} - {{chainId}}"), + ), + ), + row.WithStat( + "Node RPC Unusable", + stat.DataSource(p.PrometheusDataSource), + stat.Text(stat.TextValueAndName), + stat.Orientation(stat.OrientationHorizontal), + stat.TitleFontSize(12), + stat.ValueFontSize(20), + stat.Span(3), + stat.WithPrometheusTarget( + `sum(multi_node_states{`+p.PlatformOpts.LabelQuery+`chainId=~"$evmChainID", state="Unusable"}) by (`+p.PlatformOpts.LegendString+`, chainId)`, + prometheus.Legend("{{pod}} - {{chainId}}"), + ), + ), + ), + } +} + +func evmNodeRPCRow(p Props) []dashboard.Option { + return []dashboard.Option{ + dashboard.Row( + "EVM Pool RPC Node Metrics (App)", + row.Collapse(), + row.WithTimeSeries( + "EVM Pool RPC Node Calls Success Rate", + timeseries.Span(12), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit(""), + axis.Label("%"), + axis.SoftMin(0), + axis.SoftMax(100), + ), + timeseries.WithPrometheusTarget( + `sum(increase(evm_pool_rpc_node_calls_success{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (`+p.PlatformOpts.LegendString+`, evmChainID, nodeName) / sum(increase(evm_pool_rpc_node_calls_total{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (`+p.PlatformOpts.LegendString+`, evmChainID, nodeName) * 100`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - {{evmChainID}} - {{nodeName}}"), + ), + ), + row.WithGauge( + "EVM Pool RPC Node Calls Success Rate", + gauge.Span(12), + gauge.Orientation(gauge.OrientationVertical), + gauge.DataSource(p.PrometheusDataSource), + gauge.Unit("percentunit"), + gauge.WithPrometheusTarget( + `sum(increase(evm_pool_rpc_node_calls_success{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (`+p.PlatformOpts.LegendString+`, evmChainID, nodeName) / sum(increase(evm_pool_rpc_node_calls_total{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (`+p.PlatformOpts.LegendString+`, evmChainID, nodeName)`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - {{evmChainID}} - {{nodeName}}"), + ), + gauge.AbsoluteThresholds([]gauge.ThresholdStep{ + {Color: "#ff0000"}, + {Color: "#ffa500", Value: float64Ptr(0.8)}, + {Color: "#00ff00", Value: float64Ptr(0.9)}, + }), + ), + // issue when value is 0 + row.WithTimeSeries( + "EVM Pool RPC Node Dials Success Rate", + timeseries.Span(12), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit(""), + axis.Label("%"), + axis.SoftMin(0), + axis.SoftMax(100), + ), + timeseries.WithPrometheusTarget( + `sum(increase(evm_pool_rpc_node_dials_success{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (`+p.PlatformOpts.LegendString+`, evmChainID, nodeName) / sum(increase(evm_pool_rpc_node_dials_total{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (`+p.PlatformOpts.LegendString+`, evmChainID, nodeName) * 100`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - {{evmChainID}} - {{nodeName}}"), + ), + ), + // issue when value is 0 + row.WithTimeSeries( + "EVM Pool RPC Node Dials Failure Rate", + timeseries.Span(12), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit(""), + axis.Label("%"), + axis.SoftMin(0), + axis.SoftMax(100), + ), + timeseries.WithPrometheusTarget( + `sum(increase(evm_pool_rpc_node_dials_failed{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (`+p.PlatformOpts.LegendString+`, evmChainID, nodeName) / sum(increase(evm_pool_rpc_node_dials_total{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (`+p.PlatformOpts.LegendString+`, evmChainID, nodeName) * 100`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - {{evmChainID}} - {{nodeName}}"), + ), + ), + row.WithTimeSeries( + "EVM Pool RPC Node Transitions", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit(""), + ), + timeseries.WithPrometheusTarget( + `evm_pool_rpc_node_num_transitions_to_alive{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend(""), + ), + timeseries.WithPrometheusTarget( + `evm_pool_rpc_node_num_transitions_to_in_sync{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend(""), + ), + timeseries.WithPrometheusTarget( + `evm_pool_rpc_node_num_transitions_to_out_of_sync{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend(""), + ), + timeseries.WithPrometheusTarget( + `evm_pool_rpc_node_num_transitions_to_unreachable{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend(""), + ), + timeseries.WithPrometheusTarget( + `evm_pool_rpc_node_num_transitions_to_invalid_chain_id{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend(""), + ), + timeseries.WithPrometheusTarget( + `evm_pool_rpc_node_num_transitions_to_unusable{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend(""), + ), + ), + row.WithTimeSeries( + "EVM Pool RPC Node States", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit(""), + ), + timeseries.WithPrometheusTarget( + `evm_pool_rpc_node_states{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - {{evmChainID}} - {{state}}"), + ), + ), + row.WithTimeSeries( + "EVM Pool RPC Node Verifies Success Rate", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit(""), + axis.Label("%"), + axis.SoftMin(0), + axis.SoftMax(100), + ), + timeseries.WithPrometheusTarget( + `sum(increase(evm_pool_rpc_node_verifies_success{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (`+p.PlatformOpts.LegendString+`, evmChainID, nodeName) / sum(increase(evm_pool_rpc_node_verifies{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (`+p.PlatformOpts.LegendString+`, evmChainID, nodeName) * 100`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - {{evmChainID}} - {{nodeName}}"), + ), + ), + row.WithTimeSeries( + "EVM Pool RPC Node Verifies Failure Rate", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit(""), + axis.Label("%"), + axis.SoftMin(0), + axis.SoftMax(100), + ), + timeseries.WithPrometheusTarget( + `sum(increase(evm_pool_rpc_node_verifies_failed{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (`+p.PlatformOpts.LegendString+`, evmChainID, nodeName) / sum(increase(evm_pool_rpc_node_verifies{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (`+p.PlatformOpts.LegendString+`, evmChainID, nodeName) * 100`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - {{evmChainID}} - {{nodeName}}"), + ), + ), + ), + } +} + +func evmRPCNodeLatenciesRow(p Props) []dashboard.Option { + return []dashboard.Option{ + dashboard.Row( + "EVM Pool RPC Node Latencies (App)", + row.Collapse(), + row.WithTimeSeries( + "EVM Pool RPC Node Calls Latency 0.90 quantile", + timeseries.Span(12), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit("ms"), + ), + timeseries.WithPrometheusTarget( + `histogram_quantile(0.90, sum(rate(evm_pool_rpc_node_rpc_call_time_bucket{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (`+p.PlatformOpts.LegendString+`, le, rpcCallName)) / 1e6`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - {{rpcCallName}}"), + ), + ), + row.WithTimeSeries( + "EVM Pool RPC Node Calls Latency 0.95 quantile", + timeseries.Span(12), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit("ms"), + ), + timeseries.WithPrometheusTarget( + `histogram_quantile(0.95, sum(rate(evm_pool_rpc_node_rpc_call_time_bucket{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (`+p.PlatformOpts.LegendString+`, le, rpcCallName)) / 1e6`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - {{rpcCallName}}"), + ), + ), + row.WithTimeSeries( + "EVM Pool RPC Node Calls Latency 0.99 quantile", + timeseries.Span(12), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit("ms"), + ), + timeseries.WithPrometheusTarget( + `histogram_quantile(0.99, sum(rate(evm_pool_rpc_node_rpc_call_time_bucket{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (`+p.PlatformOpts.LegendString+`, le, rpcCallName)) / 1e6`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - {{rpcCallName}}"), + ), + ), + ), + } +} + +func evmBlockHistoryEstimatorRow(p Props) []dashboard.Option { + return []dashboard.Option{ + dashboard.Row("Block History Estimator", + row.Collapse(), + row.WithTimeSeries( + "Gas Updater All Gas Price Percentiles", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit(""), + ), + timeseries.WithPrometheusTarget( + `gas_updater_all_gas_price_percentiles{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - {{ percentile }}"), + ), + ), + row.WithTimeSeries( + "Gas Updater All Tip Cap Percentiles", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit(""), + ), + timeseries.WithPrometheusTarget( + `gas_updater_all_tip_cap_percentiles{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - {{ percentile }}"), + ), + ), + row.WithTimeSeries( + "Gas Updater Set Gas Price", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit(""), + ), + timeseries.WithPrometheusTarget( + `gas_updater_set_gas_price{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), + ), + ), + row.WithTimeSeries( + "Gas Updater Set Tip Cap", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit(""), + ), + timeseries.WithPrometheusTarget( + `gas_updater_set_tip_cap{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), + ), + ), + row.WithTimeSeries( + "Gas Updater Current Base Fee", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit(""), + ), + timeseries.WithPrometheusTarget( + `gas_updater_current_base_fee{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), + ), + ), + row.WithTimeSeries( + "Block History Estimator Connectivity Failure Count", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit(""), + ), + timeseries.WithPrometheusTarget( + `block_history_estimator_connectivity_failure_count{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), + ), + ), + ), + } +} + +func pipelinesRow(p Props) []dashboard.Option { + return []dashboard.Option{ + dashboard.Row("Pipeline Metrics (Runner)", + row.Collapse(), + row.WithTimeSeries( + "Pipeline Task Execution Time", + timeseries.Span(12), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit("Sec"), + ), + timeseries.WithPrometheusTarget( + `pipeline_task_execution_time{`+p.PlatformOpts.LabelQuery+`} / 1e6`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} JobID: {{ job_id }}"), + ), + ), + row.WithTimeSeries( + "Pipeline Run Errors", + timeseries.Span(12), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit(""), + ), + timeseries.WithPrometheusTarget( + `pipeline_run_errors{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} JobID: {{ job_id }}"), + ), + ), + row.WithTimeSeries( + "Pipeline Run Total Time to Completion", + timeseries.Span(12), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit("Sec"), + ), + timeseries.WithPrometheusTarget( + `pipeline_run_total_time_to_completion{`+p.PlatformOpts.LabelQuery+`} / 1e6`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} JobID: {{ job_id }}"), + ), + ), + row.WithTimeSeries( + "Pipeline Tasks Total Finished", + timeseries.Span(12), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit(""), + ), + timeseries.WithPrometheusTarget( + `pipeline_tasks_total_finished{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} JobID: {{ job_id }}"), + ), + ), + ), + dashboard.Row( + "Pipeline Metrics (ETHCall)", + row.Collapse(), + row.WithTimeSeries( + "Pipeline Task ETH Call Execution Time", + timeseries.Span(12), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit("Sec"), + ), + timeseries.WithPrometheusTarget( + `pipeline_task_eth_call_execution_time{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), + ), + ), + ), + dashboard.Row( + "Pipeline Metrics (HTTP)", + row.Collapse(), + row.WithTimeSeries( + "Pipeline Task HTTP Fetch Time", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit("Sec"), + ), + timeseries.WithPrometheusTarget( + `pipeline_task_http_fetch_time{`+p.PlatformOpts.LabelQuery+`} / 1e6`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), + ), + ), + row.WithTimeSeries( + "Pipeline Task HTTP Response Body Size", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit("Bytes"), + ), + timeseries.WithPrometheusTarget( + `pipeline_task_http_response_body_size{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), + ), + ), + ), + dashboard.Row( + "Pipeline Metrics (Bridge)", + row.Collapse(), + row.WithTimeSeries( + "Pipeline Bridge Latency", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit("Sec"), + ), + timeseries.WithPrometheusTarget( + `bridge_latency_seconds{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), + ), + ), + row.WithTimeSeries( + "Pipeline Bridge Errors Total", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit(""), + ), + timeseries.WithPrometheusTarget( + `bridge_errors_total{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), + ), + ), + row.WithTimeSeries( + "Pipeline Bridge Cache Hits Total", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit(""), + ), + timeseries.WithPrometheusTarget( + `bridge_cache_hits_total{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), + ), + ), + row.WithTimeSeries( + "Pipeline Bridge Cache Errors Total", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit(""), + ), + timeseries.WithPrometheusTarget( + `bridge_cache_errors_total{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), + ), + ), + ), + dashboard.Row( + "Pipeline Metrics", + row.Collapse(), + row.WithTimeSeries( + "Pipeline Runs Queued", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit(""), + ), + timeseries.WithPrometheusTarget( + `pipeline_runs_queued{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), + ), + ), + row.WithTimeSeries( + "Pipeline Runs Tasks Queued", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit(""), + ), + timeseries.WithPrometheusTarget( + `pipeline_task_runs_queued{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), + ), + ), + ), + } +} + +func httpAPIRow(p Props) []dashboard.Option { + return []dashboard.Option{ + + dashboard.Row( + "HTTP API Metrics", + row.Collapse(), + row.WithTimeSeries( + "Request Duration p95", + timeseries.Span(12), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit("Sec"), + ), + timeseries.WithPrometheusTarget( + `histogram_quantile(0.95, sum(rate(service_gonic_request_duration_bucket{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (`+p.PlatformOpts.LegendString+`, le, path, method))`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - {{ method }} - {{ path }}"), + ), + ), + row.WithTimeSeries( + "Request Total Rate over interval", + timeseries.Span(12), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit(""), + ), + timeseries.WithPrometheusTarget( + `sum(rate(service_gonic_requests_total{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (`+p.PlatformOpts.LegendString+`, path, method, code)`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - {{ method }} - {{ path }} - {{ code }}"), + ), + ), + row.WithTimeSeries( + "Average Request Size", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit("Bytes"), + ), + timeseries.WithPrometheusTarget( + `avg(rate(service_gonic_request_size_bytes_sum{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (`+p.PlatformOpts.LegendString+`)/avg(rate(service_gonic_request_size_bytes_count{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (`+p.PlatformOpts.LegendString+`)`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), + ), + ), + row.WithTimeSeries( + "Response Size", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit("Bytes"), + ), + timeseries.WithPrometheusTarget( + `avg(rate(service_gonic_response_size_bytes_sum{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (`+p.PlatformOpts.LegendString+`)/avg(rate(service_gonic_response_size_bytes_count{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (`+p.PlatformOpts.LegendString+`)`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), + ), + ), + ), + } +} + +func promHTTPRow(p Props) []dashboard.Option { + return []dashboard.Option{ + dashboard.Row( + "PromHTTP Metrics", + row.Collapse(), + row.WithGauge("HTTP Request in flight", + gauge.Span(12), + gauge.Orientation(gauge.OrientationVertical), + gauge.DataSource(p.PrometheusDataSource), + gauge.WithPrometheusTarget( + `promhttp_metric_handler_requests_in_flight{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), + ), + ), + row.WithTimeSeries( + "HTTP rate", + timeseries.Span(12), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit(""), + ), + timeseries.WithPrometheusTarget( + `sum(rate(promhttp_metric_handler_requests_total{`+p.PlatformOpts.LabelQuery+`}[$__rate_interval])) by (`+p.PlatformOpts.LegendString+`, code)`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), + ), + ), + ), + } +} + +func goMetricsRow(p Props) []dashboard.Option { + return []dashboard.Option{ + dashboard.Row( + "Go Metrics", + row.Collapse(), + row.WithTable( + "Threads", + table.Span(3), + table.Height("200px"), + table.DataSource(p.PrometheusDataSource), + table.WithPrometheusTarget( + `sum(go_threads{`+p.PlatformOpts.LabelQuery+`}) by (`+p.PlatformOpts.LegendString+`)`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}")), + table.HideColumn("Time"), + table.AsTimeSeriesAggregations([]table.Aggregation{ + {Label: "AVG", Type: table.AVG}, + {Label: "Current", Type: table.Current}, + }), + ), + row.WithTimeSeries( + "Threads", + timeseries.Span(9), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit(""), + ), + timeseries.WithPrometheusTarget( + `sum(go_threads{`+p.PlatformOpts.LabelQuery+`}) by (`+p.PlatformOpts.LegendString+`)`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), + ), + ), + row.WithStat( + "Heap Allocations", + stat.Span(12), + stat.Orientation(stat.OrientationVertical), + stat.DataSource(p.PrometheusDataSource), + stat.Unit("bytes"), + stat.ColorValue(), + stat.WithPrometheusTarget( + `sum(go_memstats_heap_alloc_bytes{`+p.PlatformOpts.LabelQuery+`}) by (`+p.PlatformOpts.LegendString+`)`, + ), + ), + row.WithTimeSeries( + "Heap allocations", + timeseries.Span(12), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.WithPrometheusTarget( + `sum(go_memstats_heap_alloc_bytes{`+p.PlatformOpts.LabelQuery+`}) by (`+p.PlatformOpts.LegendString+`)`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), + ), + timeseries.Axis( + axis.Unit("bytes"), + axis.Label("Memory"), + axis.SoftMin(0), + ), + ), + row.WithTimeSeries( + "Memory in Heap", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit("bytes"), + axis.Label("Memory"), + axis.SoftMin(0), + ), + timeseries.WithPrometheusTarget( + `go_memstats_heap_alloc_bytes{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - Alloc"), + ), + timeseries.WithPrometheusTarget( + `go_memstats_heap_sys_bytes{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - Sys"), + ), + timeseries.WithPrometheusTarget( + `go_memstats_heap_idle_bytes{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - Idle"), + ), + timeseries.WithPrometheusTarget( + `go_memstats_heap_inuse_bytes{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - InUse"), + ), + timeseries.WithPrometheusTarget( + `go_memstats_heap_released_bytes{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - Released"), + ), + ), + row.WithTimeSeries( + "Memory in Off-Heap", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit("bytes"), + axis.Label("Memory"), + axis.SoftMin(0), + ), + timeseries.WithPrometheusTarget( + `go_memstats_mspan_inuse_bytes{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - Total InUse"), + ), + timeseries.WithPrometheusTarget( + `go_memstats_mspan_sys_bytes{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - Total Sys"), + ), + timeseries.WithPrometheusTarget( + `go_memstats_mcache_inuse_bytes{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - Cache InUse"), + ), + timeseries.WithPrometheusTarget( + `go_memstats_mcache_sys_bytes{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - Cache Sys"), + ), + timeseries.WithPrometheusTarget( + `go_memstats_buck_hash_sys_bytes{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - Hash Sys"), + ), + timeseries.WithPrometheusTarget( + `go_memstats_gc_sys_bytes{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - GC Sys"), + ), + timeseries.WithPrometheusTarget( + `go_memstats_other_sys_bytes{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - bytes of memory are used for other runtime allocations"), + ), + timeseries.WithPrometheusTarget( + `go_memstats_next_gc_bytes{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - Next GC"), + ), + ), + row.WithTimeSeries( + "Memory in Stack", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.WithPrometheusTarget( + `go_memstats_stack_inuse_bytes{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - InUse"), + ), + timeseries.WithPrometheusTarget( + `go_memstats_stack_sys_bytes{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}} - Sys"), + ), + timeseries.Axis( + axis.Unit("bytes"), + axis.Label("Memory"), + axis.SoftMin(0), + ), + ), + row.WithTimeSeries( + "Total Used Memory", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.WithPrometheusTarget( + `go_memstats_sys_bytes{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), + ), + timeseries.Axis( + axis.Unit("bytes"), + axis.Label("Memory"), + axis.SoftMin(0), + ), + ), + row.WithTimeSeries( + "Number of Live Objects", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.WithPrometheusTarget( + `go_memstats_mallocs_total{`+p.PlatformOpts.LabelQuery+`} - go_memstats_frees_total{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), + ), + timeseries.Axis( + axis.SoftMin(0), + ), + ), + row.WithTimeSeries( + "Rate of Objects Allocated", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.WithPrometheusTarget( + `rate(go_memstats_mallocs_total{`+p.PlatformOpts.LabelQuery+`}[1m])`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), + ), + timeseries.Axis( + axis.SoftMin(0), + ), + ), + row.WithTimeSeries( + "Rate of a Pointer Dereferences", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.WithPrometheusTarget( + `rate(go_memstats_lookups_total{`+p.PlatformOpts.LabelQuery+`}[1m])`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), + ), + timeseries.Axis( + axis.Unit("ops"), + axis.SoftMin(0), + ), + ), + row.WithTimeSeries( + "Goroutines", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.WithPrometheusTarget( + `go_goroutines{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+p.PlatformOpts.LegendString+"}}"), + ), + timeseries.Axis( + axis.SoftMin(0), + ), + ), + ), + } +} + +func float64Ptr(input float64) *float64 { + return &input +} + +func New(p Props) []dashboard.Option { + opts := vars(p) + opts = append(opts, generalInfoRow(p)...) + opts = append(opts, logPollerRow(p)...) + opts = append(opts, feedJobsRow(p)...) + opts = append(opts, mailBoxRow(p)...) + opts = append(opts, promReporterRow(p)...) + opts = append(opts, txManagerRow(p)...) + opts = append(opts, headTrackerRow(p)...) + opts = append(opts, appDBConnectionsRow(p)...) + opts = append(opts, sqlQueriesRow(p)...) + opts = append(opts, logsCountersRow(p)...) + opts = append(opts, evmPoolLifecycleRow(p)...) + opts = append(opts, nodesRPCRow(p)...) + opts = append(opts, evmNodeRPCRow(p)...) + opts = append(opts, evmRPCNodeLatenciesRow(p)...) + opts = append(opts, evmBlockHistoryEstimatorRow(p)...) + opts = append(opts, pipelinesRow(p)...) + opts = append(opts, httpAPIRow(p)...) + opts = append(opts, promHTTPRow(p)...) + opts = append(opts, goMetricsRow(p)...) + return opts +} diff --git a/dashboard-lib/core-don/platform.go b/dashboard-lib/core-don/platform.go new file mode 100644 index 00000000000..fbfed548146 --- /dev/null +++ b/dashboard-lib/core-don/platform.go @@ -0,0 +1,45 @@ +package core_don + +import "fmt" + +type PlatformOpts struct { + // Platform is infrastructure deployment platform: docker or k8s + Platform string + LabelFilters map[string]string + LabelFilter string + LegendString string + LabelQuery string +} + +// PlatformPanelOpts generate different queries for "docker" and "k8s" deployment platforms +func PlatformPanelOpts(platform string) PlatformOpts { + po := PlatformOpts{ + LabelFilters: map[string]string{ + "instance": `=~"${instance}"`, + "commit": `=~"${commit:pipe}"`, + }, + } + switch platform { + case "kubernetes": + po.LabelFilters = map[string]string{ + "namespace": `=~"${namespace}"`, + "pod": `=~"${pod}"`, + } + po.LabelFilter = "job" + po.LegendString = "pod" + break + case "docker": + po.LabelFilters = map[string]string{ + "instance": `=~"${instance}"`, + } + po.LabelFilter = "instance" + po.LegendString = "instance" + break + default: + panic(fmt.Sprintf("failed to generate Platform dependent queries, unknown platform: %s", platform)) + } + for key, value := range po.LabelFilters { + po.LabelQuery += key + value + ", " + } + return po +} diff --git a/dashboard-lib/core-ocrv2-ccip/component.go b/dashboard-lib/core-ocrv2-ccip/component.go new file mode 100644 index 00000000000..837f693fcc7 --- /dev/null +++ b/dashboard-lib/core-ocrv2-ccip/component.go @@ -0,0 +1,83 @@ +package core_ocrv2_ccip + +import ( + "fmt" + "github.com/K-Phoen/grabana/dashboard" + "github.com/K-Phoen/grabana/row" + "github.com/K-Phoen/grabana/target/prometheus" + "github.com/K-Phoen/grabana/timeseries" +) + +type Props struct { + PrometheusDataSource string + PluginName string +} + +func quantileRowOpts(ds string, pluginName string, perc string) row.Option { + return row.WithTimeSeries( + fmt.Sprintf("(%s) OCR2 duration (%s)", pluginName, perc), + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(ds), + timeseries.WithPrometheusTarget( + fmt.Sprintf(`histogram_quantile(%s, sum(rate(ocr2_reporting_plugin_observation_time_bucket{plugin="%s", job=~"$instance", chainID=~"$evmChainID"}[$__rate_interval])) by (le)) / 1e9`, perc, pluginName), + prometheus.Legend("Observation"), + ), + timeseries.WithPrometheusTarget( + fmt.Sprintf(`histogram_quantile(%s, sum(rate(ocr2_reporting_plugin_report_time_bucket{plugin="%s", job=~"$instance", chainID=~"$evmChainID"}[$__rate_interval])) by (le)) / 1e9`, perc, pluginName), + prometheus.Legend("Report"), + ), + timeseries.WithPrometheusTarget( + fmt.Sprintf(`histogram_quantile(%s, sum(rate(ocr2_reporting_plugin_should_accept_finalized_report_time_bucket{plugin="%s", job=~"$instance", chainID=~"$evmChainID"}[$__rate_interval])) by (le)) / 1e9`, perc, pluginName), + prometheus.Legend("ShouldAcceptFinalizedReport"), + ), + timeseries.WithPrometheusTarget( + fmt.Sprintf(`histogram_quantile(%s, sum(rate(ocr2_reporting_plugin_should_transmit_accepted_report_time_bucket{plugin="%s", job=~"$instance", chainID=~"$evmChainID"}[$__rate_interval])) by (le)) / 1e9`, perc, pluginName), + prometheus.Legend("ShouldTransmitAcceptedReport"), + ), + ) +} + +func ocrv2PluginObservationStageQuantiles(p Props) []dashboard.Option { + opts := make([]row.Option, 0) + opts = append(opts, + row.Collapse(), + row.WithTimeSeries( + fmt.Sprintf("(%s) OCR2 RPS by phase", p.PluginName), + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.WithPrometheusTarget( + fmt.Sprintf(`sum(rate(ocr2_reporting_plugin_observation_time_count{plugin="%s", job=~"$instance", chainID=~"$evmChainID"}[$__range]))`, p.PluginName), + prometheus.Legend("Observation"), + ), + timeseries.WithPrometheusTarget( + fmt.Sprintf(`sum(rate(ocr2_reporting_plugin_report_time_count{plugin="%s", job=~"$instance", chainID=~"$evmChainID"}[$__range]))`, p.PluginName), + prometheus.Legend("Report"), + ), + timeseries.WithPrometheusTarget( + fmt.Sprintf(`sum(rate(ocr2_reporting_plugin_should_accept_finalized_report_time_count{plugin="%s", job=~"$instance", chainID=~"$evmChainID"}[$__range]))`, p.PluginName), + prometheus.Legend("ShouldAcceptFinalizedReport"), + ), + timeseries.WithPrometheusTarget( + fmt.Sprintf(`sum(rate(ocr2_reporting_plugin_should_transmit_accepted_report_time_count{plugin="%s", job=~"$instance", chainID=~"$evmChainID"}[$__range]))`, p.PluginName), + prometheus.Legend("ShouldTransmitAcceptedReport"), + ), + ), + quantileRowOpts(p.PrometheusDataSource, p.PluginName, "0.5"), + quantileRowOpts(p.PrometheusDataSource, p.PluginName, "0.9"), + quantileRowOpts(p.PrometheusDataSource, p.PluginName, "0.99"), + ) + return []dashboard.Option{ + dashboard.Row( + fmt.Sprintf("OCRv2 Metrics - Plugin: %s", p.PluginName), + opts..., + ), + } +} + +func New(p Props) []dashboard.Option { + opts := make([]dashboard.Option, 0) + opts = append(opts, ocrv2PluginObservationStageQuantiles(p)...) + return opts +} diff --git a/dashboard-lib/dashboard.go b/dashboard-lib/dashboard.go new file mode 100644 index 00000000000..73dde5704f0 --- /dev/null +++ b/dashboard-lib/dashboard.go @@ -0,0 +1,90 @@ +package dashboard_lib + +import ( + "context" + "encoding/json" + "github.com/K-Phoen/grabana" + "github.com/K-Phoen/grabana/dashboard" + "github.com/pkg/errors" + "net/http" + "os" +) + +type Dashboard struct { + Name string + DeployOpts EnvConfig + /* SDK panels that are missing in Grabana */ + SDKPanels []map[string]interface{} + /* generated dashboard opts and builder */ + builder dashboard.Builder + Opts []dashboard.Option +} + +func NewDashboard( + name string, + deployOpts EnvConfig, + opts []dashboard.Option, +) *Dashboard { + return &Dashboard{ + Name: name, + DeployOpts: deployOpts, + Opts: opts, + } +} + +func (m *Dashboard) Deploy() error { + ctx := context.Background() + b, err := m.build() + if err != nil { + return err + } + client := grabana.NewClient(&http.Client{}, m.DeployOpts.GrafanaURL, grabana.WithAPIToken(m.DeployOpts.GrafanaToken)) + fo, folderErr := client.FindOrCreateFolder(ctx, m.DeployOpts.GrafanaFolder) + if folderErr != nil { + return errors.Wrap(err, "could not find or create Grafana folder") + } + if _, err := client.UpsertDashboard(ctx, fo, b); err != nil { + return errors.Wrap(err, "failed to upsert the dashboard") + } + return nil +} + +func (m *Dashboard) Add(opts []dashboard.Option) { + m.Opts = append(m.Opts, opts...) +} + +func (m *Dashboard) AddSDKPanel(panel map[string]interface{}) { + m.SDKPanels = append(m.SDKPanels, panel) +} + +func (m *Dashboard) build() (dashboard.Builder, error) { + b, err := dashboard.New( + m.Name, + m.Opts..., + ) + if err != nil { + return dashboard.Builder{}, errors.Wrap(err, "failed to build the dashboard") + } + return b, nil +} + +// TODO: re-write after forking Grabana, inject foundation SDK components from official schema +func (m *Dashboard) injectSDKPanels(b dashboard.Builder) (dashboard.Builder, error) { + data, err := b.MarshalIndentJSON() + if err != nil { + return dashboard.Builder{}, err + } + var asMap map[string]interface{} + if err := json.Unmarshal(data, &asMap); err != nil { + return dashboard.Builder{}, err + } + asMap["rows"].([]interface{})[0].(map[string]interface{})["panels"] = append(asMap["rows"].([]interface{})[0].(map[string]interface{})["panels"].([]interface{}), m.SDKPanels[0]) + d, err := json.Marshal(asMap) + if err != nil { + return dashboard.Builder{}, err + } + if err := os.WriteFile("generated_ccip_dashboard.json", d, os.ModePerm); err != nil { + return dashboard.Builder{}, err + } + return b, nil +} diff --git a/dashboard-lib/go.mod b/dashboard-lib/go.mod new file mode 100644 index 00000000000..eef60129771 --- /dev/null +++ b/dashboard-lib/go.mod @@ -0,0 +1,22 @@ +module github.com/smartcontractkit/chainlink/dashboard-lib + +go 1.21.7 + +require ( + github.com/K-Phoen/grabana v0.22.1 + github.com/grafana/grafana-foundation-sdk/go v0.0.0-00010101000000-000000000000 + github.com/pkg/errors v0.9.1 + github.com/rs/zerolog v1.32.0 +) + +replace github.com/grafana/grafana-foundation-sdk/go => github.com/grafana/grafana-foundation-sdk/go v0.0.0-20240314112857-a7c9c6d0044c + +require ( + github.com/K-Phoen/sdk v0.12.4 // indirect + github.com/gosimple/slug v1.13.1 // indirect + github.com/gosimple/unidecode v1.0.1 // indirect + github.com/mattn/go-colorable v0.1.13 // indirect + github.com/mattn/go-isatty v0.0.19 // indirect + github.com/prometheus/common v0.45.0 // indirect + golang.org/x/sys v0.13.0 // indirect +) diff --git a/dashboard-lib/go.sum b/dashboard-lib/go.sum new file mode 100644 index 00000000000..0af3f10f4fe --- /dev/null +++ b/dashboard-lib/go.sum @@ -0,0 +1,37 @@ +github.com/K-Phoen/grabana v0.22.1 h1:b/O+C3H2H6VNYSeMCYUO4X4wYuwFXgBcRkvYa+fjpQA= +github.com/K-Phoen/grabana v0.22.1/go.mod h1:3LTXrTzQzTKTgvKSXdRjlsJbizSOW/V23Q3iX00R5bU= +github.com/K-Phoen/sdk v0.12.4 h1:j2EYuBJm3zDTD0fGKACVFWxAXtkR0q5QzfVqxmHSeGQ= +github.com/K-Phoen/sdk v0.12.4/go.mod h1:qmM0wO23CtoDux528MXPpYvS4XkRWkWX6rvX9Za8EVU= +github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= +github.com/gosimple/slug v1.13.1 h1:bQ+kpX9Qa6tHRaK+fZR0A0M2Kd7Pa5eHPPsb1JpHD+Q= +github.com/gosimple/slug v1.13.1/go.mod h1:UiRaFH+GEilHstLUmcBgWcI42viBN7mAb818JrYOeFQ= +github.com/gosimple/unidecode v1.0.1 h1:hZzFTMMqSswvf0LBJZCZgThIZrpDHFXux9KeGmn6T/o= +github.com/gosimple/unidecode v1.0.1/go.mod h1:CP0Cr1Y1kogOtx0bJblKzsVWrqYaqfNOnHzpgWw4Awc= +github.com/grafana/grafana-foundation-sdk/go v0.0.0-20240314112857-a7c9c6d0044c h1:0vdGmlvHPzjNHx9Tx8soQEKe1ci0WVtA82s00sZDYUs= +github.com/grafana/grafana-foundation-sdk/go v0.0.0-20240314112857-a7c9c6d0044c/go.mod h1:WtWosval1KCZP9BGa42b8aVoJmVXSg0EvQXi9LDSVZQ= +github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= +github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= +github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= +github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA= +github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/common v0.45.0 h1:2BGz0eBc2hdMDLnO/8n0jeB3oPrt2D08CekT0lneoxM= +github.com/prometheus/common v0.45.0/go.mod h1:YJmSTw9BoKxJplESWWxlbyttQR4uaEcGyv9MZjVOJsY= +github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg= +github.com/rs/zerolog v1.32.0 h1:keLypqrlIjaFsbmJOBdB/qvyF8KEtCWHwobLp5l/mQ0= +github.com/rs/zerolog v1.32.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss= +github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE= +golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/dashboard-lib/k8s-pods/component.go b/dashboard-lib/k8s-pods/component.go new file mode 100644 index 00000000000..4ef90c3012f --- /dev/null +++ b/dashboard-lib/k8s-pods/component.go @@ -0,0 +1,198 @@ +package k8spods + +import ( + "github.com/K-Phoen/grabana/dashboard" + "github.com/K-Phoen/grabana/logs" + "github.com/K-Phoen/grabana/row" + "github.com/K-Phoen/grabana/stat" + "github.com/K-Phoen/grabana/target/prometheus" + "github.com/K-Phoen/grabana/timeseries" + "github.com/K-Phoen/grabana/timeseries/axis" + "github.com/K-Phoen/grabana/variable/query" +) + +type Props struct { + LokiDataSource string + PrometheusDataSource string +} + +func vars(p Props) []dashboard.Option { + return []dashboard.Option{ + dashboard.VariableAsQuery( + "namespace", + query.DataSource(p.PrometheusDataSource), + query.Multiple(), + query.IncludeAll(), + query.Request("label_values(namespace)"), + query.Sort(query.NumericalAsc), + ), + dashboard.VariableAsQuery( + "pod", + query.DataSource(p.PrometheusDataSource), + query.Multiple(), + query.IncludeAll(), + query.Request("label_values(kube_pod_container_info{namespace=\"$namespace\"}, pod)"), + query.Sort(query.NumericalAsc), + ), + } +} + +func logsRow(p Props) []dashboard.Option { + return []dashboard.Option{ + dashboard.Row( + "K8s Logs", + row.Collapse(), + row.WithLogs( + "All Logs", + logs.DataSource(p.LokiDataSource), + logs.Span(12), + logs.Height("300px"), + logs.Transparent(), + logs.WithLokiTarget(`{namespace="$namespace", pod=~"${pod:pipe}"}`), + ), + row.WithLogs( + "All Errors", + logs.DataSource(p.LokiDataSource), + logs.Span(12), + logs.Height("300px"), + logs.Transparent(), + logs.WithLokiTarget(`{namespace="$namespace", pod=~"${pod:pipe}"} | json | level=~"error|warn|fatal|panic"`), + ), + ), + } + +} + +func New(p Props) []dashboard.Option { + opts := vars(p) + opts = append(opts, + []dashboard.Option{ + dashboard.Row( + "K8s Pods", + row.Collapse(), + row.WithStat( + "Pod Restarts", + stat.Span(4), + stat.Text(stat.TextValueAndName), + stat.Orientation(stat.OrientationHorizontal), + stat.DataSource(p.PrometheusDataSource), + stat.SparkLine(), + stat.SparkLineYMin(0), + stat.WithPrometheusTarget( + `sum(increase(kube_pod_container_status_restarts_total{pod=~"$pod", namespace=~"${namespace}"}[$__rate_interval])) by (pod)`, + prometheus.Legend("{{pod}}"), + ), + ), + row.WithStat( + "OOM Events", + stat.Span(4), + stat.Text(stat.TextValueAndName), + stat.Orientation(stat.OrientationHorizontal), + stat.DataSource(p.PrometheusDataSource), + stat.SparkLine(), + stat.SparkLineYMin(0), + stat.WithPrometheusTarget( + `sum(container_oom_events_total{pod=~"$pod", namespace=~"${namespace}"}) by (pod)`, + prometheus.Legend("{{pod}}"), + ), + ), + row.WithStat( + "OOM Killed", + stat.Span(4), + stat.Text(stat.TextValueAndName), + stat.Orientation(stat.OrientationHorizontal), + stat.DataSource(p.PrometheusDataSource), + stat.SparkLine(), + stat.SparkLineYMin(0), + stat.WithPrometheusTarget( + `kube_pod_container_status_last_terminated_reason{reason="OOMKilled", pod=~"$pod", namespace=~"${namespace}"}`, + prometheus.Legend("{{pod}}"), + ), + ), + row.WithTimeSeries( + "CPU Usage", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.WithPrometheusTarget( + `sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{pod=~"$pod", namespace=~"${namespace}"}) by (pod)`, + prometheus.Legend("{{pod}}"), + ), + ), + row.WithTimeSeries( + "Memory Usage", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit("bytes"), + axis.Label("Memory"), + axis.SoftMin(0), + ), + timeseries.WithPrometheusTarget( + `sum(container_memory_rss{pod=~"$pod", namespace=~"${namespace}", container!=""}) by (pod)`, + prometheus.Legend("{{pod}}"), + ), + ), + row.WithTimeSeries( + "Receive Bandwidth", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit("Bps"), + axis.SoftMin(0), + ), + timeseries.WithPrometheusTarget( + `sum(irate(container_network_receive_bytes_total{pod=~"$pod", namespace=~"${namespace}"}[$__rate_interval])) by (pod)`, + prometheus.Legend("{{pod}}"), + ), + ), + row.WithTimeSeries( + "Transmit Bandwidth", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit("Bps"), + axis.SoftMin(0), + ), + timeseries.WithPrometheusTarget( + `sum(irate(container_network_transmit_bytes_total{pod=~"$pod", namespace=~"${namespace}"}[$__rate_interval])) by (pod)`, + prometheus.Legend("{{pod}}"), + ), + ), + row.WithTimeSeries( + "Average Container Bandwidth by Namespace: Received", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit("Bps"), + axis.SoftMin(0), + ), + timeseries.WithPrometheusTarget( + `avg(irate(container_network_receive_bytes_total{pod=~"$pod", namespace=~"${namespace}"}[$__rate_interval])) by (pod)`, + prometheus.Legend("{{pod}}"), + ), + ), + row.WithTimeSeries( + "Average Container Bandwidth by Namespace: Transmitted", + timeseries.Span(6), + timeseries.Height("200px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit("Bps"), + axis.SoftMin(0), + ), + timeseries.WithPrometheusTarget( + `avg(irate(container_network_transmit_bytes_total{pod=~"$pod", namespace=~"${namespace}"}[$__rate_interval])) by (pod)`, + prometheus.Legend("{{pod}}"), + ), + ), + ), + }..., + ) + opts = append(opts, logsRow(p)...) + return opts +} diff --git a/dashboard-lib/log.go b/dashboard-lib/log.go new file mode 100644 index 00000000000..edb4607a0ba --- /dev/null +++ b/dashboard-lib/log.go @@ -0,0 +1,28 @@ +package dashboard_lib + +import ( + "os" + + "github.com/rs/zerolog" + "github.com/rs/zerolog/log" +) + +const ( + LogLevelEnvVar = "DASHBOARD_LOG_LEVEL" +) + +var ( + L zerolog.Logger +) + +func init() { + lvlStr := os.Getenv(LogLevelEnvVar) + if lvlStr == "" { + lvlStr = "info" + } + lvl, err := zerolog.ParseLevel(lvlStr) + if err != nil { + panic(err) + } + L = log.Output(zerolog.ConsoleWriter{Out: os.Stderr}).Level(lvl) +} From 7d53abeea08d12de426bb3779d52f44a6c036e1a Mon Sep 17 00:00:00 2001 From: skudasov Date: Wed, 20 Mar 2024 18:21:50 +0100 Subject: [PATCH 3/3] fix go mod --- charts/chainlink-cluster/go.mod | 2 -- 1 file changed, 2 deletions(-) diff --git a/charts/chainlink-cluster/go.mod b/charts/chainlink-cluster/go.mod index 58bc332b64b..4a8dd43fd5f 100644 --- a/charts/chainlink-cluster/go.mod +++ b/charts/chainlink-cluster/go.mod @@ -20,8 +20,6 @@ require ( golang.org/x/sys v0.15.0 // indirect ) -replace github.com/grafana/grafana-foundation-sdk/go => github.com/grafana/grafana-foundation-sdk/go v0.0.0-20240314112857-a7c9c6d0044c - replace ( github.com/go-kit/log => github.com/go-kit/log v0.2.1