From 03eb05e73a927a63a0908001e3c6048738daf558 Mon Sep 17 00:00:00 2001 From: Clement Erena Date: Thu, 5 Dec 2024 11:57:44 +0100 Subject: [PATCH] feat(observability-lib): remove consumers + refactor cmd --- observability-lib/README.md | 74 +- observability-lib/api/notification-policy.go | 44 +- .../api/notification-policy_test.go | 219 + observability-lib/cmd/api/api.go | 31 + .../cmd/api/contact_point/contact-point.go | 14 + .../cmd/api/contact_point/delete.go | 35 + .../cmd/api/contact_point/list.go | 28 + .../cmd/api/dashboard/dashboard.go | 14 + observability-lib/cmd/api/dashboard/delete.go | 35 + .../cmd/api/notification_policy/delete.go | 61 + .../cmd/api/notification_policy/list.go | 26 + .../notification-policy.go | 14 + observability-lib/cmd/builder.go | 115 - observability-lib/cmd/delete.go | 36 - observability-lib/cmd/deploy.go | 109 - observability-lib/cmd/generate.go | 46 - observability-lib/cmd/log.go | 28 - .../cmd/notification-templates.yaml | 65 - observability-lib/cmd/root.go | 23 + .../dashboards/atlas-don/component.go | 790 -- .../dashboards/atlas-don/component_test.go | 81 - .../dashboards/atlas-don/platform.go | 57 - .../dashboards/atlas-don/test-output.json | 1503 ---- .../dashboards/capabilities/component.go | 219 - .../dashboards/capabilities/component_test.go | 77 - .../dashboards/capabilities/test-output.json | 468 -- .../core-node-components/component.go | 210 - .../core-node-components/component_test.go | 81 - .../core-node-components/platform.go | 40 - .../core-node-components/test-output.json | 433 -- .../dashboards/core-node/component.go | 2301 ------ .../dashboards/core-node/component_test.go | 82 - .../dashboards/core-node/platform.go | 55 - .../dashboards/core-node/test-output.json | 6359 ----------------- .../dashboards/k8s-resources/component.go | 416 -- .../k8s-resources/component_test.go | 75 - .../dashboards/k8s-resources/test-output.json | 991 --- .../dashboards/nop-ocr/component.go | 352 - .../dashboards/nop-ocr/component_test.go | 76 - .../dashboards/nop-ocr/test-output.json | 687 -- observability-lib/go.mod | 4 - observability-lib/go.sum | 17 - observability-lib/main.go | 19 +- 43 files changed, 577 insertions(+), 15833 deletions(-) create mode 100644 observability-lib/cmd/api/api.go create mode 100644 observability-lib/cmd/api/contact_point/contact-point.go create mode 100644 observability-lib/cmd/api/contact_point/delete.go create mode 100644 observability-lib/cmd/api/contact_point/list.go create mode 100644 observability-lib/cmd/api/dashboard/dashboard.go create mode 100644 observability-lib/cmd/api/dashboard/delete.go create mode 100644 observability-lib/cmd/api/notification_policy/delete.go create mode 100644 observability-lib/cmd/api/notification_policy/list.go create mode 100644 observability-lib/cmd/api/notification_policy/notification-policy.go delete mode 100644 observability-lib/cmd/builder.go delete mode 100644 observability-lib/cmd/delete.go delete mode 100644 observability-lib/cmd/deploy.go delete mode 100644 observability-lib/cmd/generate.go delete mode 100644 observability-lib/cmd/log.go delete mode 100644 observability-lib/cmd/notification-templates.yaml create mode 100644 observability-lib/cmd/root.go delete mode 100644 observability-lib/dashboards/atlas-don/component.go delete mode 100644 observability-lib/dashboards/atlas-don/component_test.go delete mode 100644 observability-lib/dashboards/atlas-don/platform.go delete mode 100644 observability-lib/dashboards/atlas-don/test-output.json delete mode 100644 observability-lib/dashboards/capabilities/component.go delete mode 100644 observability-lib/dashboards/capabilities/component_test.go delete mode 100644 observability-lib/dashboards/capabilities/test-output.json delete mode 100644 observability-lib/dashboards/core-node-components/component.go delete mode 100644 observability-lib/dashboards/core-node-components/component_test.go delete mode 100644 observability-lib/dashboards/core-node-components/platform.go delete mode 100644 observability-lib/dashboards/core-node-components/test-output.json delete mode 100644 observability-lib/dashboards/core-node/component.go delete mode 100644 observability-lib/dashboards/core-node/component_test.go delete mode 100644 observability-lib/dashboards/core-node/platform.go delete mode 100644 observability-lib/dashboards/core-node/test-output.json delete mode 100644 observability-lib/dashboards/k8s-resources/component.go delete mode 100644 observability-lib/dashboards/k8s-resources/component_test.go delete mode 100644 observability-lib/dashboards/k8s-resources/test-output.json delete mode 100644 observability-lib/dashboards/nop-ocr/component.go delete mode 100644 observability-lib/dashboards/nop-ocr/component_test.go delete mode 100644 observability-lib/dashboards/nop-ocr/test-output.json diff --git a/observability-lib/README.md b/observability-lib/README.md index 27b0dad0f..8c4e4d249 100644 --- a/observability-lib/README.md +++ b/observability-lib/README.md @@ -15,8 +15,7 @@ The observability-lib is structured as follows: ```shell observability-lib/ api/ # Grafana HTTP API Client to interact with resources - cmd/ # CLI to interact deploy or generateJSON from dashboards defined in folder below - dashboards/ # Dashboards definitions + cmd/ # CLI grafana/ # grafana-foundations-sdk abstraction to manipulate grafana resources ``` @@ -89,43 +88,56 @@ func main() { ``` -More advanced examples can be found in the [dashboards](./dashboards) folder : -- [DON OCR](./dashboards/atlas-don/component.go) -- [Capabilities](./dashboards/capabilities/component.go) -- [Node General](./dashboards/core-node/component.go) -- [Node Components](./dashboards/core-node-components/component.go) -- [Kubernetes Resources](./dashboards/k8s-resources/component.go) -- [NOP OCR Health](./dashboards/nop-ocr/component.go) - ## Cmd Usage -The CLI can be used to : -- Deploy dashboards and alerts to grafana -- Generate JSON from dashboards defined in the `dashboards` folder +CLI to manipulate grafana resources + +### Contact Point -`func NewDashboard(props *Props)` in each [dashboards](./dashboards) packages is called from [cmd](./cmd/builder.go) to deploy or generate JSON from the dashboard. +#### List -Example to deploy a dashboard to grafana instance using URL and token: ```shell -make build -./observability-lib deploy \ - --dashboard-name DashboardName \ - --dashboard-folder FolderName \ - --grafana-url $GRAFANA_URL \ - --grafana-token $GRAFANA_TOKEN \ - --type core-node \ - --platform kubernetes \ - --metrics-datasource Prometheus +./observability-lib api contact-point list \ + --grafana-url http://localhost:3000 \ + --grafana-token ``` -To see how to get a grafana token you can check this [page](https://grafana.com/docs/grafana/latest/administration/service-accounts/) -Example to generate JSON from a dashboard defined in the `dashboards` folder: +#### Delete + ```shell -make build -./observability-lib generate \ - --dashboard-name DashboardName \ - --type core-node-components \ - --platform kubernetes +./observability-lib api contact-point delete \ + --grafana-url http://localhost:3000 \ + --grafana-token +``` + +### Dashboard + +#### Delete + +```shell +./observability-lib api dashboard delete \ + --grafana-url http://localhost:3000 \ + --grafana-token +``` + +### Notification Policy + +#### List + +```shell +./observability-lib api notification-policy list \ + --grafana-url http://localhost:3000 \ + --grafana-token +``` + +#### Delete + +```shell +./observability-lib api notification-policy delete \ + --grafana-url http://localhost:3000 \ + --grafana-token \ + --matchers key,=,value \ + --matchers key2,=,value2 ``` ## Makefile Usage diff --git a/observability-lib/api/notification-policy.go b/observability-lib/api/notification-policy.go index d20d21a67..5812e81d2 100644 --- a/observability-lib/api/notification-policy.go +++ b/observability-lib/api/notification-policy.go @@ -29,6 +29,23 @@ func objectMatchersEqual(a alerting.ObjectMatchers, b alerting.ObjectMatchers) b return true } +func PrintPolicyTree(policy alerting.NotificationPolicy, depth int) { + if depth == 0 { + fmt.Printf("| Root Policy | Receiver: %s\n", *policy.Receiver) + } + + for _, notificationPolicy := range policy.Routes { + for i := 0; i < depth; i++ { + fmt.Print("--") + } + fmt.Printf("| Matchers %s | Receiver: %s\n", *notificationPolicy.ObjectMatchers, *notificationPolicy.Receiver) + + if notificationPolicy.Routes != nil { + PrintPolicyTree(notificationPolicy, depth+1) + } + } +} + func policyExist(parent alerting.NotificationPolicy, newNotificationPolicy alerting.NotificationPolicy) bool { for _, notificationPolicy := range parent.Routes { matchersEqual := false @@ -40,7 +57,7 @@ func policyExist(parent alerting.NotificationPolicy, newNotificationPolicy alert return true } if notificationPolicy.Routes != nil { - policyExist(notificationPolicy, newNotificationPolicy) + return policyExist(notificationPolicy, newNotificationPolicy) } } return false @@ -58,7 +75,7 @@ func updateInPlace(parent *alerting.NotificationPolicy, newNotificationPolicy al return true } if notificationPolicy.Routes != nil { - policyExist(notificationPolicy, newNotificationPolicy) + return updateInPlace(&parent.Routes[key], newNotificationPolicy) } } return false @@ -72,11 +89,18 @@ func deleteInPlace(parent *alerting.NotificationPolicy, newNotificationPolicy al } receiversEqual := reflect.DeepEqual(notificationPolicy.Receiver, newNotificationPolicy.Receiver) if matchersEqual && receiversEqual { - parent.Routes = append(parent.Routes[:key], parent.Routes[key+1:]...) - return true + if len(parent.Routes) == 1 { + parent.Routes = nil + return true + } else if len(parent.Routes) > 1 { + parent.Routes = append(parent.Routes[:key], parent.Routes[key+1:]...) + return true + } else { + return false + } } if notificationPolicy.Routes != nil { - policyExist(notificationPolicy, newNotificationPolicy) + return deleteInPlace(&parent.Routes[key], newNotificationPolicy) } } return false @@ -85,16 +109,14 @@ func deleteInPlace(parent *alerting.NotificationPolicy, newNotificationPolicy al // DeleteNestedPolicy Delete Nested Policy from Notification Policy Tree func (c *Client) DeleteNestedPolicy(newNotificationPolicy alerting.NotificationPolicy) error { notificationPolicyTreeResponse, _, err := c.GetNotificationPolicy() - notificationPolicyTree := alerting.NotificationPolicy(notificationPolicyTreeResponse) - if err != nil { return err } - if policyExist(notificationPolicyTree, newNotificationPolicy) { - deleteInPlace(¬ificationPolicyTree, newNotificationPolicy) - } else { - return fmt.Errorf("policy not found") + notificationPolicyTree := alerting.NotificationPolicy(notificationPolicyTreeResponse) + if !policyExist(notificationPolicyTree, newNotificationPolicy) { + return fmt.Errorf("notification policy not found") } + deleteInPlace(¬ificationPolicyTree, newNotificationPolicy) _, _, errPutNotificationPolicy := c.PutNotificationPolicy(notificationPolicyTree) if errPutNotificationPolicy != nil { return errPutNotificationPolicy diff --git a/observability-lib/api/notification-policy_test.go b/observability-lib/api/notification-policy_test.go index 16ead9063..3891a0458 100644 --- a/observability-lib/api/notification-policy_test.go +++ b/observability-lib/api/notification-policy_test.go @@ -7,6 +7,10 @@ import ( "github.com/stretchr/testify/require" ) +func Pointer[T any](d T) *T { + return &d +} + func TestObjectMatchersEqual(t *testing.T) { t.Run("returns true if the two object matchers are equal", func(t *testing.T) { a := alerting.ObjectMatchers{{"team", "=", "chainlink"}} @@ -44,3 +48,218 @@ func TestObjectMatchersEqual(t *testing.T) { require.False(t, result) }) } + +func TestPolicyExists(t *testing.T) { + t.Run("policyExists return true if policy exists", func(t *testing.T) { + notificationPolicyTree := &alerting.NotificationPolicy{ + Receiver: Pointer("grafana-default-email"), + Routes: []alerting.NotificationPolicy{ + { + Receiver: Pointer("slack"), + ObjectMatchers: &alerting.ObjectMatchers{ + {"team", "=", "chainlink"}, + }, + Routes: []alerting.NotificationPolicy{ + { + Receiver: Pointer("pagerduty"), + ObjectMatchers: &alerting.ObjectMatchers{ + {"env", "=", "production"}, + }, + }, + }, + }, + }, + } + + newNotificationPolicy := alerting.NotificationPolicy{ + Receiver: Pointer("pagerduty"), + ObjectMatchers: &alerting.ObjectMatchers{ + {"env", "=", "production"}, + }, + } + result := policyExist(*notificationPolicyTree, newNotificationPolicy) + require.True(t, result) + }) + + t.Run("policyExists return false if policy does not exists", func(t *testing.T) { + notificationPolicyTree := &alerting.NotificationPolicy{ + Receiver: Pointer("grafana-default-email"), + Routes: []alerting.NotificationPolicy{ + { + Receiver: Pointer("slack"), + ObjectMatchers: &alerting.ObjectMatchers{ + {"team", "=", "chainlink"}, + }, + Routes: []alerting.NotificationPolicy{ + { + Receiver: Pointer("pagerduty"), + ObjectMatchers: &alerting.ObjectMatchers{ + {"env", "=", "production"}, + }, + }, + }, + }, + }, + } + + newNotificationPolicy := alerting.NotificationPolicy{ + Receiver: Pointer("pagerduty"), + ObjectMatchers: &alerting.ObjectMatchers{ + {"key", "=", "value"}, + }, + } + result := policyExist(*notificationPolicyTree, newNotificationPolicy) + require.False(t, result) + }) + + t.Run("updateInPlace should update notification policy if already exists", func(t *testing.T) { + notificationPolicyTree := &alerting.NotificationPolicy{ + Receiver: Pointer("grafana-default-email"), + Routes: []alerting.NotificationPolicy{ + { + Receiver: Pointer("slack"), + ObjectMatchers: &alerting.ObjectMatchers{ + {"team", "=", "chainlink"}, + }, + Routes: []alerting.NotificationPolicy{ + { + Receiver: Pointer("pagerduty"), + ObjectMatchers: &alerting.ObjectMatchers{ + {"env", "=", "production"}, + }, + }, + }, + }, + }, + } + + newNotificationPolicy := alerting.NotificationPolicy{ + Receiver: Pointer("pagerduty"), + ObjectMatchers: &alerting.ObjectMatchers{ + {"env", "=", "production"}, + }, + Continue: Pointer(true), + } + + expectedNotificationPolicyTree := &alerting.NotificationPolicy{ + Receiver: Pointer("grafana-default-email"), + Routes: []alerting.NotificationPolicy{ + { + Receiver: Pointer("slack"), + ObjectMatchers: &alerting.ObjectMatchers{ + {"team", "=", "chainlink"}, + }, + Routes: []alerting.NotificationPolicy{ + { + Receiver: Pointer("pagerduty"), + ObjectMatchers: &alerting.ObjectMatchers{ + {"env", "=", "production"}, + }, + Continue: Pointer(true), + }, + }, + }, + }, + } + + updateInPlace(notificationPolicyTree, newNotificationPolicy) + require.Equal(t, expectedNotificationPolicyTree, notificationPolicyTree) + }) + + t.Run("deleteInPlace should delete notification policy if exists", func(t *testing.T) { + notificationPolicyTree := &alerting.NotificationPolicy{ + Receiver: Pointer("grafana-default-email"), + Routes: []alerting.NotificationPolicy{ + { + Receiver: Pointer("slack"), + ObjectMatchers: &alerting.ObjectMatchers{ + {"team", "=", "chainlink"}, + }, + }, + { + Receiver: Pointer("slack2"), + ObjectMatchers: &alerting.ObjectMatchers{ + {"team", "=", "chainlink2"}, + }, + }, + { + Receiver: Pointer("slack3"), + ObjectMatchers: &alerting.ObjectMatchers{ + {"team", "=", "chainlink3"}, + }, + }, + }, + } + + newNotificationPolicy := alerting.NotificationPolicy{ + Receiver: Pointer("slack2"), + ObjectMatchers: &alerting.ObjectMatchers{ + {"team", "=", "chainlink2"}, + }, + } + + expectedNotificationPolicyTree := &alerting.NotificationPolicy{ + Receiver: Pointer("grafana-default-email"), + Routes: []alerting.NotificationPolicy{ + { + Receiver: Pointer("slack"), + ObjectMatchers: &alerting.ObjectMatchers{ + {"team", "=", "chainlink"}, + }, + }, + { + Receiver: Pointer("slack3"), + ObjectMatchers: &alerting.ObjectMatchers{ + {"team", "=", "chainlink3"}, + }, + }, + }, + } + deleteInPlace(notificationPolicyTree, newNotificationPolicy) + require.Equal(t, expectedNotificationPolicyTree, notificationPolicyTree) + }) + + t.Run("deleteInPlace should delete notification policy if exists", func(t *testing.T) { + notificationPolicyTree := &alerting.NotificationPolicy{ + Receiver: Pointer("grafana-default-email"), + Routes: []alerting.NotificationPolicy{ + { + Receiver: Pointer("slack"), + ObjectMatchers: &alerting.ObjectMatchers{ + {"team", "=", "chainlink"}, + }, + Routes: []alerting.NotificationPolicy{ + { + Receiver: Pointer("pagerduty"), + ObjectMatchers: &alerting.ObjectMatchers{ + {"env", "=", "production"}, + }, + }, + }, + }, + }, + } + + newNotificationPolicy := alerting.NotificationPolicy{ + Receiver: Pointer("pagerduty"), + ObjectMatchers: &alerting.ObjectMatchers{ + {"env", "=", "production"}, + }, + } + + expectedNotificationPolicyTree := &alerting.NotificationPolicy{ + Receiver: Pointer("grafana-default-email"), + Routes: []alerting.NotificationPolicy{ + { + Receiver: Pointer("slack"), + ObjectMatchers: &alerting.ObjectMatchers{ + {"team", "=", "chainlink"}, + }, + }, + }, + } + deleteInPlace(notificationPolicyTree, newNotificationPolicy) + require.Equal(t, expectedNotificationPolicyTree, notificationPolicyTree) + }) + +} diff --git a/observability-lib/cmd/api/api.go b/observability-lib/cmd/api/api.go new file mode 100644 index 000000000..d6c407f76 --- /dev/null +++ b/observability-lib/cmd/api/api.go @@ -0,0 +1,31 @@ +package api + +import ( + "github.com/smartcontractkit/chainlink-common/observability-lib/cmd/api/contact_point" + "github.com/smartcontractkit/chainlink-common/observability-lib/cmd/api/dashboard" + "github.com/smartcontractkit/chainlink-common/observability-lib/cmd/api/notification_policy" + "github.com/spf13/cobra" +) + +var Cmd = &cobra.Command{ + Use: "api [resources]", + Short: "Select resources to perform actions", +} + +func init() { + Cmd.AddCommand(contact_point.Cmd) + Cmd.AddCommand(dashboard.Cmd) + Cmd.AddCommand(notification_policy.Cmd) + + Cmd.PersistentFlags().String("grafana-url", "", "Grafana URL") + errURL := Cmd.MarkPersistentFlagRequired("grafana-url") + if errURL != nil { + panic(errURL) + } + + Cmd.PersistentFlags().String("grafana-token", "", "Grafana API token") + errToken := Cmd.MarkPersistentFlagRequired("grafana-token") + if errToken != nil { + panic(errToken) + } +} diff --git a/observability-lib/cmd/api/contact_point/contact-point.go b/observability-lib/cmd/api/contact_point/contact-point.go new file mode 100644 index 000000000..e32f202c6 --- /dev/null +++ b/observability-lib/cmd/api/contact_point/contact-point.go @@ -0,0 +1,14 @@ +package contact_point + +import ( + "github.com/spf13/cobra" +) + +var Cmd = &cobra.Command{ + Use: "contact-point [actions]", + Short: "Perform actions on contact point", +} + +func init() { + Cmd.AddCommand(listCmd, deleteCmd) +} diff --git a/observability-lib/cmd/api/contact_point/delete.go b/observability-lib/cmd/api/contact_point/delete.go new file mode 100644 index 000000000..01af1940e --- /dev/null +++ b/observability-lib/cmd/api/contact_point/delete.go @@ -0,0 +1,35 @@ +package contact_point + +import ( + "errors" + + "github.com/smartcontractkit/chainlink-common/observability-lib/api" + "github.com/spf13/cobra" +) + +var deleteCmd = &cobra.Command{ + Use: "delete [name]", + Short: "Delete contact point by name", + RunE: func(cmd *cobra.Command, args []string) error { + grafanaClient := api.NewClient( + cmd.Flag("grafana-url").Value.String(), + cmd.Flag("grafana-token").Value.String(), + ) + + contactPoint, err := grafanaClient.GetContactPointByName(args[0]) + if err != nil { + return err + } + + if contactPoint == nil { + return errors.New("contact point not found") + } + + _, _, errDelete := grafanaClient.DeleteContactPoint(*contactPoint.Uid) + if errDelete != nil { + return errDelete + } + + return nil + }, +} diff --git a/observability-lib/cmd/api/contact_point/list.go b/observability-lib/cmd/api/contact_point/list.go new file mode 100644 index 000000000..ca62b79f7 --- /dev/null +++ b/observability-lib/cmd/api/contact_point/list.go @@ -0,0 +1,28 @@ +package contact_point + +import ( + "github.com/smartcontractkit/chainlink-common/observability-lib/api" + "github.com/spf13/cobra" +) + +var listCmd = &cobra.Command{ + Use: "list", + Short: "List contact point", + RunE: func(cmd *cobra.Command, args []string) error { + grafanaClient := api.NewClient( + cmd.Flag("grafana-url").Value.String(), + cmd.Flag("grafana-token").Value.String(), + ) + + contactPoints, _, err := grafanaClient.GetContactPoints() + if err != nil { + return err + } + + for _, contactPoint := range contactPoints { + cmd.Printf("| Name: %s | UID: %s\n", *contactPoint.Name, *contactPoint.Uid) + } + + return nil + }, +} diff --git a/observability-lib/cmd/api/dashboard/dashboard.go b/observability-lib/cmd/api/dashboard/dashboard.go new file mode 100644 index 000000000..8c46d0abe --- /dev/null +++ b/observability-lib/cmd/api/dashboard/dashboard.go @@ -0,0 +1,14 @@ +package dashboard + +import ( + "github.com/spf13/cobra" +) + +var Cmd = &cobra.Command{ + Use: "dashboard [actions]", + Short: "Perform actions on dashboard", +} + +func init() { + Cmd.AddCommand(deleteCmd) +} diff --git a/observability-lib/cmd/api/dashboard/delete.go b/observability-lib/cmd/api/dashboard/delete.go new file mode 100644 index 000000000..5b40f41a5 --- /dev/null +++ b/observability-lib/cmd/api/dashboard/delete.go @@ -0,0 +1,35 @@ +package dashboard + +import ( + "errors" + + "github.com/smartcontractkit/chainlink-common/observability-lib/api" + "github.com/spf13/cobra" +) + +var deleteCmd = &cobra.Command{ + Use: "delete [name]", + Short: "Delete dashboard by name", + RunE: func(cmd *cobra.Command, args []string) error { + grafanaClient := api.NewClient( + cmd.Flag("grafana-url").Value.String(), + cmd.Flag("grafana-token").Value.String(), + ) + + delDashboard, _, err := grafanaClient.GetDashboardByName(args[0]) + if err != nil { + return err + } + + if delDashboard.UID == nil { + return errors.New("contact point not found") + } + + _, errDelete := grafanaClient.DeleteDashboardByUID(*delDashboard.UID) + if errDelete != nil { + return errDelete + } + + return nil + }, +} diff --git a/observability-lib/cmd/api/notification_policy/delete.go b/observability-lib/cmd/api/notification_policy/delete.go new file mode 100644 index 000000000..918a1d5dd --- /dev/null +++ b/observability-lib/cmd/api/notification_policy/delete.go @@ -0,0 +1,61 @@ +package notification_policy + +import ( + "errors" + "strings" + + "github.com/grafana/grafana-foundation-sdk/go/alerting" + "github.com/smartcontractkit/chainlink-common/observability-lib/api" + "github.com/smartcontractkit/chainlink-common/observability-lib/grafana" + "github.com/spf13/cobra" +) + +var deleteCmd = &cobra.Command{ + Use: "delete [receiver]", + Short: "Delete notification policy", + RunE: func(cmd *cobra.Command, args []string) error { + grafanaClient := api.NewClient( + cmd.Flag("grafana-url").Value.String(), + cmd.Flag("grafana-token").Value.String(), + ) + + if len(args) != 1 { + return errors.New("receiver argument missing") + } + + matchers, err := cmd.Flags().GetStringArray("matchers") + if err != nil { + return err + } + if matchers != nil && len(matchers) > 0 { + objectMatchers := alerting.ObjectMatchers{} + notificationPolicy := alerting.NotificationPolicy{ + Receiver: grafana.Pointer(args[0]), + } + for _, matcher := range matchers { + objectMatcher := strings.Split(matcher, ",") + if len(objectMatcher) != 3 { + return errors.New("invalid matcher format must be key,operator,value") + } + + objectMatchers = append(objectMatchers, objectMatcher) + } + notificationPolicy.ObjectMatchers = &objectMatchers + errDelete := grafanaClient.DeleteNestedPolicy(notificationPolicy) + + if errDelete != nil { + return errDelete + } + } + + return nil + }, +} + +func init() { + deleteCmd.Flags().StringArray("matchers", []string{}, "Object matchers, in the form of key,operator,value e.g. 'key,=,value'") + errMatchers := deleteCmd.MarkFlagRequired("matchers") + if errMatchers != nil { + panic(errMatchers) + } +} diff --git a/observability-lib/cmd/api/notification_policy/list.go b/observability-lib/cmd/api/notification_policy/list.go new file mode 100644 index 000000000..d20971561 --- /dev/null +++ b/observability-lib/cmd/api/notification_policy/list.go @@ -0,0 +1,26 @@ +package notification_policy + +import ( + "github.com/grafana/grafana-foundation-sdk/go/alerting" + "github.com/smartcontractkit/chainlink-common/observability-lib/api" + "github.com/spf13/cobra" +) + +var listCmd = &cobra.Command{ + Use: "list", + Short: "List notification policy", + RunE: func(cmd *cobra.Command, args []string) error { + grafanaClient := api.NewClient( + cmd.Flag("grafana-url").Value.String(), + cmd.Flag("grafana-token").Value.String(), + ) + + notificationPolicyTree, _, err := grafanaClient.GetNotificationPolicy() + if err != nil { + return err + } + + api.PrintPolicyTree(alerting.NotificationPolicy(notificationPolicyTree), 0) + return nil + }, +} diff --git a/observability-lib/cmd/api/notification_policy/notification-policy.go b/observability-lib/cmd/api/notification_policy/notification-policy.go new file mode 100644 index 000000000..8361c3a2a --- /dev/null +++ b/observability-lib/cmd/api/notification_policy/notification-policy.go @@ -0,0 +1,14 @@ +package notification_policy + +import ( + "github.com/spf13/cobra" +) + +var Cmd = &cobra.Command{ + Use: "notification-policy [actions]", + Short: "Perform actions on notification policy", +} + +func init() { + Cmd.AddCommand(listCmd, deleteCmd) +} diff --git a/observability-lib/cmd/builder.go b/observability-lib/cmd/builder.go deleted file mode 100644 index 4be5289e2..000000000 --- a/observability-lib/cmd/builder.go +++ /dev/null @@ -1,115 +0,0 @@ -package cmd - -import ( - "errors" - - atlasdon "github.com/smartcontractkit/chainlink-common/observability-lib/dashboards/atlas-don" - "github.com/smartcontractkit/chainlink-common/observability-lib/dashboards/capabilities" - corenode "github.com/smartcontractkit/chainlink-common/observability-lib/dashboards/core-node" - corenodecomponents "github.com/smartcontractkit/chainlink-common/observability-lib/dashboards/core-node-components" - k8sresources "github.com/smartcontractkit/chainlink-common/observability-lib/dashboards/k8s-resources" - nopocr "github.com/smartcontractkit/chainlink-common/observability-lib/dashboards/nop-ocr" - "github.com/smartcontractkit/chainlink-common/observability-lib/grafana" -) - -type TypeDashboard string - -const ( - TypeDashboardCoreNode TypeDashboard = "core-node" - TypeDashboardCoreNodeComponents TypeDashboard = "core-node-components" - TypeDashboardCoreNodeResources TypeDashboard = "core-node-resources" - TypeDashboardDONOCR TypeDashboard = "don-ocr" - TypeDashboardDONOCR2 TypeDashboard = "don-ocr2" - TypeDashboardDONOCR3 TypeDashboard = "don-ocr3" - TypeDashboardNOPOCR2 TypeDashboard = "nop-ocr2" - TypeDashboardNOPOCR3 TypeDashboard = "nop-ocr3" - TypeDashboardCapabilities TypeDashboard = "capabilities" -) - -type OCRVersion string - -const ( - OCRVersionOCR OCRVersion = "ocr" - OCRVersionOCR2 OCRVersion = "ocr2" - OCRVersionOCR3 OCRVersion = "ocr3" -) - -type BuildOptions struct { - Name string - Platform grafana.TypePlatform - TypeDashboard TypeDashboard - MetricsDataSource *grafana.DataSource - LogsDataSource *grafana.DataSource - SlackChannel string - SlackWebhookURL string - AlertsTags map[string]string - AlertsFilters string -} - -func BuildDashboardWithType(options *BuildOptions) (*grafana.Observability, error) { - switch options.TypeDashboard { - case TypeDashboardCoreNode: - return corenode.NewDashboard(&corenode.Props{ - Name: options.Name, - Platform: options.Platform, - MetricsDataSource: options.MetricsDataSource, - LogsDataSource: options.LogsDataSource, - SlackChannel: options.SlackChannel, - SlackWebhookURL: options.SlackWebhookURL, - AlertsTags: options.AlertsTags, - AlertsFilters: options.AlertsFilters, - }) - case TypeDashboardCoreNodeComponents: - return corenodecomponents.NewDashboard(&corenodecomponents.Props{ - Name: options.Name, - Platform: options.Platform, - MetricsDataSource: options.MetricsDataSource, - LogsDataSource: options.LogsDataSource, - }) - case TypeDashboardCoreNodeResources: - if options.Platform != grafana.TypePlatformKubernetes { - return nil, errors.New("core-node-resources dashboard is only available for kubernetes") - } - return k8sresources.NewDashboard(&k8sresources.Props{ - Name: options.Name, - MetricsDataSource: options.MetricsDataSource, - }) - case TypeDashboardDONOCR: - return atlasdon.NewDashboard(&atlasdon.Props{ - Name: options.Name, - MetricsDataSource: options.MetricsDataSource, - OCRVersion: string(OCRVersionOCR), - }) - case TypeDashboardDONOCR2: - return atlasdon.NewDashboard(&atlasdon.Props{ - Name: options.Name, - MetricsDataSource: options.MetricsDataSource, - OCRVersion: string(OCRVersionOCR2), - }) - case TypeDashboardDONOCR3: - return atlasdon.NewDashboard(&atlasdon.Props{ - Name: options.Name, - MetricsDataSource: options.MetricsDataSource, - OCRVersion: string(OCRVersionOCR3), - }) - case TypeDashboardNOPOCR2: - return nopocr.NewDashboard(&nopocr.Props{ - Name: options.Name, - MetricsDataSource: options.MetricsDataSource, - OCRVersion: string(OCRVersionOCR2), - }) - case TypeDashboardNOPOCR3: - return nopocr.NewDashboard(&nopocr.Props{ - Name: options.Name, - MetricsDataSource: options.MetricsDataSource, - OCRVersion: string(OCRVersionOCR3), - }) - case TypeDashboardCapabilities: - return capabilities.NewDashboard(&capabilities.Props{ - Name: options.Name, - MetricsDataSource: options.MetricsDataSource, - }) - default: - return nil, errors.New("invalid dashboard type") - } -} diff --git a/observability-lib/cmd/delete.go b/observability-lib/cmd/delete.go deleted file mode 100644 index f60783c5e..000000000 --- a/observability-lib/cmd/delete.go +++ /dev/null @@ -1,36 +0,0 @@ -package cmd - -import ( - "github.com/smartcontractkit/chainlink-common/observability-lib/grafana" - "github.com/spf13/cobra" -) - -var DeleteCmd = &cobra.Command{ - Use: "delete", - Short: "Delete Grafana Dashboard", - RunE: func(cmd *cobra.Command, args []string) error { - return grafana.DeleteDashboard(&grafana.DeleteOptions{ - GrafanaURL: cmd.Flag("grafana-url").Value.String(), - GrafanaToken: cmd.Flag("grafana-token").Value.String(), - Name: cmd.Flag("dashboard-name").Value.String(), - }) - }, -} - -func init() { - DeleteCmd.Flags().String("dashboard-name", "", "Name of the dashboard to deploy") - errName := DeleteCmd.MarkFlagRequired("dashboard-name") - if errName != nil { - panic(errName) - } - DeleteCmd.Flags().String("grafana-url", "", "Grafana URL") - errURL := DeleteCmd.MarkFlagRequired("grafana-url") - if errURL != nil { - panic(errURL) - } - DeleteCmd.Flags().String("grafana-token", "", "Grafana API token") - errToken := DeleteCmd.MarkFlagRequired("grafana-token") - if errToken != nil { - panic(errToken) - } -} diff --git a/observability-lib/cmd/deploy.go b/observability-lib/cmd/deploy.go deleted file mode 100644 index 17e2b0633..000000000 --- a/observability-lib/cmd/deploy.go +++ /dev/null @@ -1,109 +0,0 @@ -package cmd - -import ( - "github.com/smartcontractkit/chainlink-common/observability-lib/grafana" - "github.com/spf13/cobra" -) - -var DeployCmd = &cobra.Command{ - Use: "deploy", - Short: "Deploy Grafana dashboard and associated alerts", - RunE: func(cmd *cobra.Command, args []string) error { - alertsTags, errAlertsTags := cmd.Flags().GetStringToString("alerts-tags") - if errAlertsTags != nil { - return errAlertsTags - } - - var metricsDataSource *grafana.DataSource - if cmd.Flag("metrics-datasource").Value.String() != "" { - var errMetricsDataSource error - metricsDataSource, errMetricsDataSource = grafana.GetDataSourceFromGrafana( - cmd.Flag("metrics-datasource").Value.String(), - cmd.Flag("grafana-url").Value.String(), - cmd.Flag("grafana-token").Value.String(), - ) - - if errMetricsDataSource != nil { - return errMetricsDataSource - } - } - - var logsDataSource *grafana.DataSource - if cmd.Flag("logs-datasource").Value.String() != "" { - var errLogsDataSource error - logsDataSource, errLogsDataSource = grafana.GetDataSourceFromGrafana( - cmd.Flag("logs-datasource").Value.String(), - cmd.Flag("grafana-url").Value.String(), - cmd.Flag("grafana-token").Value.String(), - ) - - if errLogsDataSource != nil { - return errLogsDataSource - } - } - - dashboard, err := BuildDashboardWithType(&BuildOptions{ - Name: cmd.Flag("dashboard-name").Value.String(), - Platform: grafana.TypePlatform(cmd.Flag("platform").Value.String()), - TypeDashboard: TypeDashboard(cmd.Flag("type").Value.String()), - MetricsDataSource: metricsDataSource, - LogsDataSource: logsDataSource, - SlackChannel: cmd.Flag("slack-channel").Value.String(), - SlackWebhookURL: cmd.Flag("slack-webhook").Value.String(), - AlertsTags: alertsTags, - AlertsFilters: cmd.Flag("alerts-filters").Value.String(), - }) - if err != nil { - return err - } - - errDeploy := dashboard.DeployToGrafana(&grafana.DeployOptions{ - GrafanaURL: cmd.Flag("grafana-url").Value.String(), - GrafanaToken: cmd.Flag("grafana-token").Value.String(), - FolderName: cmd.Flag("dashboard-folder").Value.String(), - EnableAlerts: cmd.Flag("enable-alerts").Value.String() == "true", - NotificationTemplates: cmd.Flag("notification-templates").Value.String(), - }) - if errDeploy != nil { - return errDeploy - } - - return nil - }, -} - -func init() { - DeployCmd.Flags().String("dashboard-name", "", "Name of the dashboard to deploy") - errName := DeployCmd.MarkFlagRequired("dashboard-name") - if errName != nil { - panic(errName) - } - DeployCmd.Flags().String("dashboard-folder", "", "Dashboard folder") - errFolder := DeployCmd.MarkFlagRequired("dashboard-folder") - if errFolder != nil { - panic(errFolder) - } - DeployCmd.Flags().String("grafana-url", "", "Grafana URL") - errURL := DeployCmd.MarkFlagRequired("grafana-url") - if errURL != nil { - panic(errURL) - } - DeployCmd.Flags().String("grafana-token", "", "Grafana API token") - errToken := DeployCmd.MarkFlagRequired("grafana-token") - if errToken != nil { - panic(errToken) - } - DeployCmd.Flags().String("metrics-datasource", "Prometheus", "Metrics datasource name") - DeployCmd.Flags().String("logs-datasource", "", "Logs datasource name") - DeployCmd.Flags().String("platform", "docker", "Platform where the dashboard is deployed (docker or kubernetes)") - DeployCmd.Flags().String("type", "core-node", "Dashboard type can be either core-node | core-node-components | core-node-resources | don-ocr | don-ocr2 | don-ocr3 | nop-ocr2 | nop-ocr3") - DeployCmd.Flags().Bool("enable-alerts", false, "Deploy alerts") - DeployCmd.Flags().StringToString("alerts-tags", map[string]string{ - "team": "chainlink-team", - }, "Alerts tags") - DeployCmd.Flags().String("notification-templates", "", "Filepath in yaml format, will create notification templates depending on key-value pairs in the yaml file") - DeployCmd.Flags().String("slack-channel", "", "Slack channel, required when setting up slack contact points") - DeployCmd.Flags().String("slack-webhook", "", "Slack webhook URL, required when setting up slack contact points") - DeployCmd.Flags().String("slack-token", "", "Slack token, required when setting up slack contact points and slack webhook is not provided") - DeployCmd.Flags().String("alerts-filters", "", "Alerts Filters applied to the queries") -} diff --git a/observability-lib/cmd/generate.go b/observability-lib/cmd/generate.go deleted file mode 100644 index 821c3ee90..000000000 --- a/observability-lib/cmd/generate.go +++ /dev/null @@ -1,46 +0,0 @@ -package cmd - -import ( - "fmt" - - "github.com/smartcontractkit/chainlink-common/observability-lib/grafana" - "github.com/spf13/cobra" -) - -var GenerateCmd = &cobra.Command{ - Use: "generate", - Short: "Generate Grafana Dashboard JSON", - RunE: func(cmd *cobra.Command, args []string) error { - dashboard, err := BuildDashboardWithType(&BuildOptions{ - Name: cmd.Flag("dashboard-name").Value.String(), - Platform: grafana.TypePlatform(cmd.Flag("platform").Value.String()), - TypeDashboard: TypeDashboard(cmd.Flag("type").Value.String()), - MetricsDataSource: grafana.NewDataSource(cmd.Flag("metrics-datasource").Value.String(), ""), - LogsDataSource: grafana.NewDataSource(cmd.Flag("logs-datasource").Value.String(), ""), - }) - if err != nil { - return err - } - - dashboardJSON, errDashboardJSON := dashboard.GenerateJSON() - if errDashboardJSON != nil { - return errDashboardJSON - } - - fmt.Print(string(dashboardJSON)) - - return nil - }, -} - -func init() { - GenerateCmd.Flags().String("dashboard-name", "", "Name of the dashboard to deploy") - errName := GenerateCmd.MarkFlagRequired("dashboard-name") - if errName != nil { - panic(errName) - } - GenerateCmd.Flags().String("metrics-datasource", "Prometheus", "Metrics datasource name") - GenerateCmd.Flags().String("logs-datasource", "", "Logs datasource name") - GenerateCmd.Flags().String("platform", "docker", "Platform where the dashboard is deployed (docker or kubernetes)") - GenerateCmd.Flags().String("type", "core-node", "Dashboard type can be either core-node | core-node-components | core-node-resources | don-ocr | don-ocr2 | don-ocr3") -} diff --git a/observability-lib/cmd/log.go b/observability-lib/cmd/log.go deleted file mode 100644 index 8e643f1f9..000000000 --- a/observability-lib/cmd/log.go +++ /dev/null @@ -1,28 +0,0 @@ -package cmd - -import ( - "os" - - "github.com/rs/zerolog" - "github.com/rs/zerolog/log" -) - -const ( - LogLevelEnvVar = "DASHBOARD_LOG_LEVEL" -) - -var ( - Logger zerolog.Logger -) - -func init() { - lvlStr := os.Getenv(LogLevelEnvVar) - if lvlStr == "" { - lvlStr = "info" - } - lvl, err := zerolog.ParseLevel(lvlStr) - if err != nil { - panic(err) - } - Logger = log.Output(zerolog.ConsoleWriter{Out: os.Stderr}).Level(lvl) -} diff --git a/observability-lib/cmd/notification-templates.yaml b/observability-lib/cmd/notification-templates.yaml deleted file mode 100644 index 60df98cc6..000000000 --- a/observability-lib/cmd/notification-templates.yaml +++ /dev/null @@ -1,65 +0,0 @@ -slack: |- - {{ define "slack.chainlink.text" }} - {{- $root := . -}} - {{ range .Alerts }} - {{ template "slack.print_alert" . }} - {{ end }} - {{ end }} - - {{ define "slack.print_alert" }} - *Summary:* ```{{ .Annotations.summary }}``` - {{ if gt (len .Annotations.description) 0 }}*Description:* ```{{ .Annotations.description }}```{{ end }} - *Labels:* ```{{- range .Labels.SortedPairs }} - {{- if and (ne .Name "alertname") (ne .Name "grafana_folder") (ne .Name "severity") }} - • {{ .Name }}: {{ .Value }} - {{- end }} - {{- end }}``` - {{- if gt (len .GeneratorURL ) 0 }} - <{{ .GeneratorURL }}|:grafana: Grafana Alert URL> - {{- end }} - {{- if gt (len .DashboardURL ) 0 }} - <{{ .DashboardURL }}|:desktop_computer: Dashboard URL> - {{- end }} - {{- if gt (len .PanelURL ) 0 }} - <{{ .PanelURL }}|:bar_chart: Panel URL> - {{- end }} - {{- if gt (len .SilenceURL ) 0 }} - <{{ .SilenceURL }}|:no_bell: Silence alert> - {{- end }} - {{- if gt (len .Annotations.runbook_url ) 0 }} - <{{ .Annotations.runbook_url }}|:spiral_note_pad: Runbook> - {{- end }} - {{ end }} - - {{ define "slack.chainlink.color" }} - {{- if ne .Status "firing" -}} - good - {{- else if eq .CommonLabels.severity "critical" -}} - danger - {{- else if eq .CommonLabels.severity "warning" -}} - warning - {{- end -}} - {{ end }} - - {{ define "alert_severity_prefix_emoji" -}} - {{- if ne .Status "firing" -}} - :white_check_mark: - {{- else if eq .CommonLabels.severity "critical" -}} - :red_circle: - {{- else if eq .CommonLabels.severity "warning" -}} - :warning: - {{- end -}} - {{- end -}} - - {{ define "slack.chainlink.title" }} - {{ template "alert_severity_prefix_emoji" . }} [{{- if gt (len .Alerts.Resolved) 0}}{{ .Status | toUpper }}{{- else }}{{ .CommonLabels.severity | toUpper }}{{- end }}:{{ .Alerts | len }}] {{ .CommonLabels.alertname }} - {{ end }} - -pagerduty: |- - {{ define "pagerduty.chainlink.title" }} - [{{- if gt (len .Alerts.Resolved) 0}}{{ .Status | toUpper }}{{- else }}{{ .CommonLabels.severity | toUpper }}{{- end }}:{{ .Alerts | len }}] {{ .CommonLabels.alertname }} - {{ end }} - - {{ define "pagerduty.chainlink.severity" }} - {{ if .CommonLabels.severity }}{{ .CommonLabels.severity | toLower }}{{ else }}critical{{ end }} - {{ end }} \ No newline at end of file diff --git a/observability-lib/cmd/root.go b/observability-lib/cmd/root.go new file mode 100644 index 000000000..df7e375f2 --- /dev/null +++ b/observability-lib/cmd/root.go @@ -0,0 +1,23 @@ +package cmd + +import ( + "log" + + "github.com/smartcontractkit/chainlink-common/observability-lib/cmd/api" + "github.com/spf13/cobra" +) + +var rootCmd = &cobra.Command{ + Use: "observability-lib [command]", + Short: "observability-lib CLI to perform actions on observability resources", +} + +func init() { + rootCmd.AddCommand(api.Cmd) +} + +func Execute() { + if err := rootCmd.Execute(); err != nil { + log.Fatalln(err) + } +} diff --git a/observability-lib/dashboards/atlas-don/component.go b/observability-lib/dashboards/atlas-don/component.go deleted file mode 100644 index 098bda871..000000000 --- a/observability-lib/dashboards/atlas-don/component.go +++ /dev/null @@ -1,790 +0,0 @@ -package atlasdon - -import ( - "fmt" - - "github.com/grafana/grafana-foundation-sdk/go/cog" - "github.com/grafana/grafana-foundation-sdk/go/common" - "github.com/grafana/grafana-foundation-sdk/go/dashboard" - - "github.com/smartcontractkit/chainlink-common/observability-lib/grafana" -) - -func NewDashboard(props *Props) (*grafana.Observability, error) { - if props.Name == "" { - return nil, fmt.Errorf("Name is required") - } - - if props.OCRVersion == "" { - return nil, fmt.Errorf("OCRVersion is required") - } - - if props.MetricsDataSource == nil { - return nil, fmt.Errorf("MetricsDataSource is required") - } else { - if props.MetricsDataSource.Name == "" { - return nil, fmt.Errorf("MetricsDataSource.Name is required") - } - if props.MetricsDataSource.UID == "" { - return nil, fmt.Errorf("MetricsDataSource.UID is required") - } - } - - props.platformOpts = platformPanelOpts(props.OCRVersion) - if props.Tested { - props.platformOpts.LabelQuery = "" - } - - builder := grafana.NewBuilder(&grafana.BuilderOptions{ - Name: props.Name, - Tags: []string{"DON", props.OCRVersion}, - Refresh: "30s", - TimeFrom: "now-30m", - TimeTo: "now", - }) - - builder.AddVars(vars(props)...) - - builder.AddRow("Summary") - builder.AddPanel(summary(props)...) - - builder.AddRow("OCR Contract Oracle") - builder.AddPanel(ocrContractConfigOracle(props)...) - - builder.AddRow("DON Nodes") - builder.AddPanel(ocrContractConfigNodes(props)...) - - builder.AddRow("Price Reporting") - builder.AddPanel(priceReporting(props)...) - - builder.AddRow("Round / Epoch Progression") - builder.AddPanel(roundEpochProgression(props)...) - - builder.AddRow("OCR Contract Config Delta") - builder.AddPanel(ocrContractConfigDelta(props)...) - - return builder.Build() -} - -func vars(p *Props) []cog.Builder[dashboard.VariableModel] { - var variables []cog.Builder[dashboard.VariableModel] - - variables = append(variables, grafana.NewQueryVariable(&grafana.QueryVariableOptions{ - VariableOption: &grafana.VariableOption{ - Label: "Job", - Name: "job", - }, - Datasource: p.MetricsDataSource.Name, - Query: `label_values(up{namespace` + p.platformOpts.LabelFilters["namespace"] + `}, job)`, - })) - - variableFeedID := "feed_id" - if p.OCRVersion == "ocr3" { - variableFeedID = "feed_id_name" - } - - variableQueryContract := grafana.NewQueryVariable(&grafana.QueryVariableOptions{ - VariableOption: &grafana.VariableOption{ - Label: "Contract", - Name: "contract", - }, - Datasource: p.MetricsDataSource.Name, - Query: `label_values(` + p.OCRVersion + `_contract_config_f{job="$job"}, contract)`, - }) - - variableQueryFeedID := grafana.NewQueryVariable(&grafana.QueryVariableOptions{ - VariableOption: &grafana.VariableOption{ - Label: "Feed ID", - Name: variableFeedID, - }, - Datasource: p.MetricsDataSource.Name, - Query: `label_values(` + p.OCRVersion + `_contract_config_f{job="$job", contract="$contract"}, ` + variableFeedID + `)`, - Multi: true, - }) - - variables = append(variables, variableQueryContract) - - switch p.OCRVersion { - case "ocr2": - variables = append(variables, variableQueryFeedID) - case "ocr3": - variables = append(variables, variableQueryFeedID) - } - - return variables -} - -func summary(p *Props) []*grafana.Panel { - var panels []*grafana.Panel - - panels = append(panels, grafana.NewStatPanel(&grafana.StatPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Telemetry Down", - Description: "Which jobs are not receiving any telemetry?", - Span: 8, - Height: 4, - Query: []grafana.Query{ - { - Expr: `bool:` + p.OCRVersion + `_telemetry_down{` + p.platformOpts.LabelQuery + `} == 1`, - Legend: "{{job}} | {{report_type}}", - }, - }, - Threshold: &grafana.ThresholdOptions{ - Mode: dashboard.ThresholdsModeAbsolute, - Steps: []dashboard.Threshold{ - {Value: nil, Color: "default"}, - {Value: grafana.Pointer[float64](0), Color: "green"}, - {Value: grafana.Pointer[float64](0.99), Color: "red"}, - }, - }, - }, - TextMode: common.BigValueTextModeName, - Orientation: common.VizOrientationHorizontal, - })) - - panels = append(panels, grafana.NewStatPanel(&grafana.StatPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Oracle Down", - Description: "Which NOPs are not providing any telemetry?", - Span: 8, - Height: 4, - Query: []grafana.Query{ - { - Expr: `bool:` + p.OCRVersion + `_oracle_telemetry_down_except_telemetry_down{job=~"${job}", oracle!="csa_unknown"} == 1`, - Legend: "{{oracle}}", - }, - }, - Threshold: &grafana.ThresholdOptions{ - Mode: dashboard.ThresholdsModeAbsolute, - Steps: []dashboard.Threshold{ - {Value: nil, Color: "default"}, - {Value: grafana.Pointer[float64](0), Color: "green"}, - {Value: grafana.Pointer[float64](0.99), Color: "red"}, - }, - }, - Transform: &grafana.TransformOptions{ - ID: "renameByRegex", - Options: map[string]string{ - "regex": "/^(.*[\\\\\\/])/", - "renamePattern": "", - }, - }, - }, - TextMode: common.BigValueTextModeName, - Orientation: common.VizOrientationHorizontal, - })) - - panels = append(panels, grafana.NewStatPanel(&grafana.StatPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Feeds reporting failure", - Description: "Which feeds are failing to report?", - Span: 8, - Height: 4, - Query: []grafana.Query{ - { - Expr: `bool:` + p.OCRVersion + `_feed_reporting_failure_except_feed_telemetry_down{job=~"${job}", oracle!="csa_unknown"} == 1`, - Legend: "{{feed_id_name}} on {{job}}", - }, - }, - Threshold: &grafana.ThresholdOptions{ - Mode: dashboard.ThresholdsModeAbsolute, - Steps: []dashboard.Threshold{ - {Value: nil, Color: "default"}, - {Value: grafana.Pointer[float64](0), Color: "green"}, - {Value: grafana.Pointer[float64](0.99), Color: "red"}, - }, - }, - }, - TextMode: common.BigValueTextModeName, - Orientation: common.VizOrientationHorizontal, - })) - - panels = append(panels, grafana.NewStatPanel(&grafana.StatPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Feed telemetry Down", - Description: "Which feeds are not receiving any telemetry?", - Span: 8, - Height: 4, - Query: []grafana.Query{ - { - Expr: `bool:` + p.OCRVersion + `_feed_telemetry_down_except_telemetry_down{job=~"${job}"} == 1`, - Legend: "{{feed_id_name}} on {{job}}", - }, - }, - Threshold: &grafana.ThresholdOptions{ - Mode: dashboard.ThresholdsModeAbsolute, - Steps: []dashboard.Threshold{ - {Value: nil, Color: "default"}, - {Value: grafana.Pointer[float64](0), Color: "green"}, - {Value: grafana.Pointer[float64](0.99), Color: "red"}, - }, - }, - }, - TextMode: common.BigValueTextModeName, - Orientation: common.VizOrientationHorizontal, - })) - - panels = append(panels, grafana.NewStatPanel(&grafana.StatPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Oracles no observations", - Description: "Which NOPs are not providing observations?", - Span: 8, - Height: 4, - Query: []grafana.Query{ - { - Expr: `bool:` + p.OCRVersion + `_oracle_blind_except_telemetry_down{job=~"${job}"} == 1`, - Legend: "{{oracle}}", - }, - }, - Threshold: &grafana.ThresholdOptions{ - Mode: dashboard.ThresholdsModeAbsolute, - Steps: []dashboard.Threshold{ - {Value: nil, Color: "default"}, - {Value: grafana.Pointer[float64](0), Color: "green"}, - {Value: grafana.Pointer[float64](0.99), Color: "red"}, - }, - }, - Transform: &grafana.TransformOptions{ - ID: "renameByRegex", - Options: map[string]string{ - "regex": "/^(.*[\\\\\\/])/", - "renamePattern": "", - }, - }, - }, - TextMode: common.BigValueTextModeName, - Orientation: common.VizOrientationHorizontal, - })) - - panels = append(panels, grafana.NewStatPanel(&grafana.StatPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Oracles not contributing observations to feeds", - Description: "Which oracles are failing to make observations on feeds they should be participating in?", - Span: 8, - Height: 4, - Query: []grafana.Query{ - { - Expr: `bool:` + p.OCRVersion + `_oracle_feed_no_observations_except_oracle_blind_except_feed_reporting_failure_except_feed_telemetry_down{job=~"${job}"} == 1`, - Legend: "{{oracle}}", - }, - }, - Threshold: &grafana.ThresholdOptions{ - Mode: dashboard.ThresholdsModeAbsolute, - Steps: []dashboard.Threshold{ - {Value: nil, Color: "default"}, - {Value: grafana.Pointer[float64](0), Color: "green"}, - {Value: grafana.Pointer[float64](0.99), Color: "red"}, - }, - }, - Transform: &grafana.TransformOptions{ - ID: "renameByRegex", - Options: map[string]string{ - "regex": "/^(.*[\\\\\\/])/", - "renamePattern": "", - }, - }, - }, - TextMode: common.BigValueTextModeName, - Orientation: common.VizOrientationHorizontal, - })) - - return panels -} - -func ocrContractConfigOracle(p *Props) []*grafana.Panel { - var panels []*grafana.Panel - - panels = append(panels, grafana.NewStatPanel(&grafana.StatPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "OCR Contract Oracle Active", - Description: "set to one as long as an oracle is on a feed", - Span: 24, - Height: 8, - Decimals: 1, - Query: []grafana.Query{ - { - Expr: `sum(` + p.OCRVersion + `_contract_oracle_active{` + p.platformOpts.LabelQuery + `}) by (contract, oracle)`, - Legend: "{{oracle}}", - }, - }, - Threshold: &grafana.ThresholdOptions{ - Mode: dashboard.ThresholdsModeAbsolute, - Steps: []dashboard.Threshold{ - {Value: nil, Color: "default"}, - {Value: grafana.Pointer[float64](0), Color: "red"}, - {Value: grafana.Pointer[float64](0.99), Color: "green"}, - }, - }, - Transform: &grafana.TransformOptions{ - ID: "renameByRegex", - Options: map[string]string{ - "regex": "/^(.*[\\\\\\/])/", - "renamePattern": "", - }, - }, - }, - TextMode: common.BigValueTextModeName, - Orientation: common.VizOrientationHorizontal, - })) - - return panels -} - -func ocrContractConfigNodes(p *Props) []*grafana.Panel { - var panels []*grafana.Panel - - var variableFeedID string - switch p.OCRVersion { - case "ocr": - variableFeedID = "contract" - case "ocr2": - variableFeedID = "feed_id" - case "ocr3": - variableFeedID = "feed_id_name" - } - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Number of NOPs", - Span: 24, - Height: 6, - Decimals: 1, - Query: []grafana.Query{ - { - Expr: `` + p.OCRVersion + `_contract_config_n{` + p.platformOpts.LabelQuery + `}`, - Legend: `{{` + variableFeedID + `}}`, - }, - { - Expr: `` + p.OCRVersion + `_contract_config_r_max{` + p.platformOpts.LabelQuery + `}`, - Legend: `Max nodes`, - }, - { - Expr: `avg(2 * ` + p.OCRVersion + `_contract_config_f{` + p.platformOpts.LabelQuery + `} + 1)`, - Legend: `Min nodes`, - }, - }, - Min: grafana.Pointer[float64](0), - }, - })) - - return panels -} - -func priceReporting(p *Props) []*grafana.Panel { - var panels []*grafana.Panel - - telemetryP2PReceivedTotal := grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "P2P messages received", - Description: "From an individual node's perspective, how many messages are they receiving from other nodes? Uses ocr_telemetry_p2p_received_total", - Span: 24, - Height: 6, - Decimals: 1, - Query: []grafana.Query{ - { - Expr: `sum by (sender, receiver) (increase(` + p.OCRVersion + `_telemetry_p2p_received_total{job=~"${job}"}[5m]))`, - Legend: `{{sender}} > {{receiver}}`, - }, - }, - }, - }) - - telemetryP2PReceivedTotalRate := grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "P2P messages received Rate", - Description: "From an individual node's perspective, how many messages are they receiving from other nodes? Uses ocr_telemetry_p2p_received_total", - Span: 24, - Height: 6, - Decimals: 1, - Query: []grafana.Query{ - { - Expr: `sum by (sender, receiver) (rate(` + p.OCRVersion + `_telemetry_p2p_received_total{job=~"${job}"}[5m]))`, - Legend: `{{sender}} > {{receiver}}`, - }, - }, - }, - }) - - telemetryObservationAsk := grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Ask observation in MessageObserve sent", - Span: 24, - Height: 6, - Decimals: 1, - Query: []grafana.Query{ - { - Expr: `` + p.OCRVersion + `_telemetry_observation_ask{` + p.platformOpts.LabelQuery + `}`, - Legend: `{{oracle}}`, - }, - }, - }, - }) - - telemetryObservation := grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Price observation in MessageObserve sent", - Span: 24, - Height: 6, - Decimals: 1, - Query: []grafana.Query{ - { - Expr: `` + p.OCRVersion + `_telemetry_observation{` + p.platformOpts.LabelQuery + `}`, - Legend: `{{oracle}}`, - }, - }, - }, - }) - - telemetryObservationBid := grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Bid observation in MessageObserve sent", - Span: 24, - Height: 6, - Decimals: 1, - Query: []grafana.Query{ - { - Expr: `` + p.OCRVersion + `_telemetry_observation_bid{` + p.platformOpts.LabelQuery + `}`, - Legend: `{{oracle}}`, - }, - }, - }, - }) - - telemetryMessageProposeObservationAsk := grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Ask MessagePropose observations", - Span: 24, - Height: 6, - Decimals: 1, - Query: []grafana.Query{ - { - Expr: `` + p.OCRVersion + `_telemetry_message_propose_observation_ask{` + p.platformOpts.LabelQuery + `}`, - Legend: `{{oracle}}`, - }, - }, - }, - }) - - telemetryMessageProposeObservation := grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Price MessagePropose observations", - Span: 24, - Height: 6, - Decimals: 1, - Query: []grafana.Query{ - { - Expr: `` + p.OCRVersion + `_telemetry_message_propose_observation{` + p.platformOpts.LabelQuery + `}`, - Legend: `{{oracle}}`, - }, - }, - }, - }) - - telemetryMessageProposeObservationBid := grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Bid MessagePropose observations", - Span: 24, - Height: 6, - Decimals: 1, - Query: []grafana.Query{ - { - Expr: `` + p.OCRVersion + `_telemetry_message_propose_observation_bid{` + p.platformOpts.LabelQuery + `}`, - Legend: `{{oracle}}`, - }, - }, - }, - }) - - telemetryMessageProposeObservationTotal := grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Total number of observations included in MessagePropose", - Description: "How often is a node's observation included in the report?", - Span: 24, - Height: 6, - Decimals: 1, - Query: []grafana.Query{ - { - Expr: `` + p.OCRVersion + `_telemetry_message_propose_observation_total{` + p.platformOpts.LabelQuery + `}`, - Legend: `{{oracle}}`, - }, - }, - }, - }) - - telemetryMessageObserveTotal := grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Total MessageObserve sent", - Description: "From an individual node's perspective, how often are they sending an observation?", - Span: 24, - Height: 6, - Decimals: 1, - Query: []grafana.Query{ - { - Expr: `rate(` + p.OCRVersion + `_telemetry_message_observe_total{` + p.platformOpts.LabelQuery + `}[5m])`, - Legend: `{{oracle}}`, - }, - }, - }, - }) - - switch p.OCRVersion { - case "ocr": - panels = append(panels, telemetryP2PReceivedTotal) - panels = append(panels, telemetryP2PReceivedTotalRate) - panels = append(panels, telemetryObservation) - panels = append(panels, telemetryMessageObserveTotal) - case "ocr2": - panels = append(panels, telemetryP2PReceivedTotal) - panels = append(panels, telemetryP2PReceivedTotalRate) - panels = append(panels, telemetryObservation) - panels = append(panels, telemetryMessageObserveTotal) - case "ocr3": - panels = append(panels, telemetryP2PReceivedTotal) - panels = append(panels, telemetryP2PReceivedTotalRate) - panels = append(panels, telemetryObservationAsk) - panels = append(panels, telemetryObservation) - panels = append(panels, telemetryObservationBid) - panels = append(panels, telemetryMessageProposeObservationAsk) - panels = append(panels, telemetryMessageProposeObservation) - panels = append(panels, telemetryMessageProposeObservationBid) - panels = append(panels, telemetryMessageProposeObservationTotal) - panels = append(panels, telemetryMessageObserveTotal) - } - - return panels -} - -func roundEpochProgression(p *Props) []*grafana.Panel { - var panels []*grafana.Panel - - var variableFeedID string - switch p.OCRVersion { - case "ocr": - variableFeedID = "contract" - case "ocr2": - variableFeedID = "feed_id" - case "ocr3": - variableFeedID = "feed_id_name" - } - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Agreed Epoch Progression", - Span: 12, - Height: 6, - Decimals: 1, - Unit: "short", - Query: []grafana.Query{ - { - Expr: `` + p.OCRVersion + `_telemetry_feed_agreed_epoch{` + p.platformOpts.LabelQuery + `}`, - Legend: `{{` + variableFeedID + `}}`, - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Round Epoch Progression", - Span: 12, - Height: 6, - Decimals: 1, - Unit: "short", - Query: []grafana.Query{ - { - Expr: `` + p.OCRVersion + `_telemetry_epoch_round{` + p.platformOpts.LabelQuery + `}`, - Legend: `{{oracle}}`, - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Rounds Started", - Description: `Tracks individual nodes firing "new round" message via telemetry (not part of P2P messages)`, - Span: 12, - Height: 6, - Decimals: 1, - Unit: "short", - Query: []grafana.Query{ - { - Expr: `rate(` + p.OCRVersion + `_telemetry_round_started_total{` + p.platformOpts.LabelQuery + `}[1m])`, - Legend: `{{oracle}}`, - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Telemetry Ingested", - Span: 12, - Height: 6, - Decimals: 1, - Unit: "short", - Query: []grafana.Query{ - { - Expr: `rate(` + p.OCRVersion + `_telemetry_ingested_total{` + p.platformOpts.LabelQuery + `}[1m])`, - Legend: `{{oracle}}`, - }, - }, - }, - })) - - return panels -} - -func ocrContractConfigDelta(p *Props) []*grafana.Panel { - var panels []*grafana.Panel - - panels = append(panels, grafana.NewStatPanel(&grafana.StatPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Relative Deviation Threshold", - Span: 8, - Height: 4, - Decimals: 1, - Query: []grafana.Query{ - { - Expr: `` + p.OCRVersion + `_contract_config_alpha{` + p.platformOpts.LabelQuery + `}`, - Legend: "{{contract}}", - }, - }, - }, - TextMode: common.BigValueTextModeValueAndName, - Orientation: common.VizOrientationHorizontal, - })) - - panels = append(panels, grafana.NewStatPanel(&grafana.StatPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Max Contract Value Age Seconds", - Span: 8, - Height: 4, - Decimals: 1, - Query: []grafana.Query{ - { - Expr: `` + p.OCRVersion + `_contract_config_delta_c_seconds{` + p.platformOpts.LabelQuery + `}`, - Legend: "{{contract}}", - }, - }, - }, - TextMode: common.BigValueTextModeValueAndName, - Orientation: common.VizOrientationHorizontal, - })) - - panels = append(panels, grafana.NewStatPanel(&grafana.StatPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Observation Grace Period Seconds", - Span: 8, - Height: 4, - Decimals: 1, - Query: []grafana.Query{ - { - Expr: `` + p.OCRVersion + `_contract_config_delta_grace_seconds{` + p.platformOpts.LabelQuery + `}`, - Legend: "{{contract}}", - }, - }, - }, - TextMode: common.BigValueTextModeValueAndName, - Orientation: common.VizOrientationHorizontal, - })) - - panels = append(panels, grafana.NewStatPanel(&grafana.StatPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Bad Epoch Timeout Seconds", - Span: 8, - Height: 4, - Decimals: 1, - Query: []grafana.Query{ - { - Expr: `` + p.OCRVersion + `_contract_config_delta_progress_seconds{` + p.platformOpts.LabelQuery + `}`, - Legend: "{{contract}}", - }, - }, - }, - TextMode: common.BigValueTextModeValueAndName, - Orientation: common.VizOrientationHorizontal, - })) - - panels = append(panels, grafana.NewStatPanel(&grafana.StatPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Resend Interval Seconds", - Span: 8, - Height: 4, - Decimals: 1, - Query: []grafana.Query{ - { - Expr: `` + p.OCRVersion + `_contract_config_delta_resend_seconds{` + p.platformOpts.LabelQuery + `}`, - Legend: "{{contract}}", - }, - }, - }, - TextMode: common.BigValueTextModeValueAndName, - Orientation: common.VizOrientationHorizontal, - })) - - panels = append(panels, grafana.NewStatPanel(&grafana.StatPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Round Interval Seconds", - Span: 8, - Height: 4, - Decimals: 1, - Query: []grafana.Query{ - { - Expr: `` + p.OCRVersion + `_contract_config_delta_round_seconds{` + p.platformOpts.LabelQuery + `}`, - Legend: "{{contract}}", - }, - }, - }, - TextMode: common.BigValueTextModeValueAndName, - Orientation: common.VizOrientationHorizontal, - })) - - panels = append(panels, grafana.NewStatPanel(&grafana.StatPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Transmission Stage Timeout Second", - Span: 8, - Height: 4, - Decimals: 1, - Query: []grafana.Query{ - { - Expr: `` + p.OCRVersion + `_contract_config_delta_stage_seconds{` + p.platformOpts.LabelQuery + `}`, - Legend: "{{contract}}", - }, - }, - }, - TextMode: common.BigValueTextModeValueAndName, - Orientation: common.VizOrientationHorizontal, - })) - - return panels -} diff --git a/observability-lib/dashboards/atlas-don/component_test.go b/observability-lib/dashboards/atlas-don/component_test.go deleted file mode 100644 index 7ce5bb775..000000000 --- a/observability-lib/dashboards/atlas-don/component_test.go +++ /dev/null @@ -1,81 +0,0 @@ -package atlasdon_test - -import ( - "flag" - "os" - "testing" - - "github.com/stretchr/testify/require" - - "github.com/smartcontractkit/chainlink-common/observability-lib/grafana" - - atlasdon "github.com/smartcontractkit/chainlink-common/observability-lib/dashboards/atlas-don" -) - -var update = flag.Bool("update", false, "update golden test files") - -const fileOutput = "test-output.json" - -func TestGenerateFile(t *testing.T) { - if *update == false { - t.Skip("skipping test") - } - - testDashboard, err := atlasdon.NewDashboard(&atlasdon.Props{ - Name: "DON OCR Dashboard", - MetricsDataSource: grafana.NewDataSource("Prometheus", "1"), - OCRVersion: "ocr2", - Tested: true, - }) - if err != nil { - t.Errorf("Error creating dashboard: %v", err) - } - json, errJSON := testDashboard.GenerateJSON() - if errJSON != nil { - t.Errorf("Error generating JSON: %v", errJSON) - } - if _, errExists := os.Stat(fileOutput); errExists == nil { - errRemove := os.Remove(fileOutput) - if errRemove != nil { - t.Errorf("Error removing file: %v", errRemove) - } - } - file, errFile := os.Create(fileOutput) - if errFile != nil { - panic(errFile) - } - writeString, err := file.WriteString(string(json)) - if err != nil { - t.Errorf("Error writing to file: %v", writeString) - } - t.Cleanup(func() { - file.Close() - }) -} - -func TestNewDashboard(t *testing.T) { - t.Run("NewDashboard creates a dashboard", func(t *testing.T) { - testDashboard, err := atlasdon.NewDashboard(&atlasdon.Props{ - Name: "DON OCR Dashboard", - MetricsDataSource: grafana.NewDataSource("Prometheus", "1"), - OCRVersion: "ocr2", - Tested: true, - }) - if err != nil { - t.Errorf("Error creating dashboard: %v", err) - } - require.IsType(t, grafana.Observability{}, *testDashboard) - require.Equal(t, "DON OCR Dashboard", *testDashboard.Dashboard.Title) - json, errJSON := testDashboard.GenerateJSON() - if errJSON != nil { - t.Errorf("Error generating JSON: %v", errJSON) - } - - jsonCompared, errCompared := os.ReadFile(fileOutput) - if errCompared != nil { - t.Errorf("Error reading file: %v", errCompared) - } - - require.JSONEq(t, string(jsonCompared), string(json)) - }) -} diff --git a/observability-lib/dashboards/atlas-don/platform.go b/observability-lib/dashboards/atlas-don/platform.go deleted file mode 100644 index dd953a2f7..000000000 --- a/observability-lib/dashboards/atlas-don/platform.go +++ /dev/null @@ -1,57 +0,0 @@ -package atlasdon - -import ( - "github.com/smartcontractkit/chainlink-common/observability-lib/grafana" -) - -type platformOpts struct { - LabelFilters map[string]string - LabelFilter string - LegendString string - LabelQuery string -} - -type Props struct { - Name string // Name is the name of the dashboard - MetricsDataSource *grafana.DataSource // MetricsDataSource is the datasource for querying metrics - OCRVersion string // OCRVersion is the version of the OCR (ocr, ocr2, ocr3) - platformOpts platformOpts - Tested bool -} - -// PlatformPanelOpts generate different queries depending on params -func platformPanelOpts(ocrVersion string) platformOpts { - po := platformOpts{ - LabelFilters: map[string]string{ - "contract": `=~"${contract}"`, - }, - } - - variableFeedID := "feed_id" - if ocrVersion == "ocr3" { - variableFeedID = "feed_id_name" - } - - switch ocrVersion { - case "ocr2": - po.LabelFilters[variableFeedID] = `=~"${` + variableFeedID + `}"` - case "ocr3": - po.LabelFilters[variableFeedID] = `=~"${` + variableFeedID + `}"` - } - namespace := "otpe" - if ocrVersion == "ocr2" { - namespace = "otpe2" - } else if ocrVersion == "ocr3" { - namespace = "otpe3" - } - - po.LabelFilters["namespace"] = `="` + namespace + `"` - po.LabelFilters["job"] = `=~"${job}"` - po.LabelFilter = "job" - po.LegendString = "job" - - for key, value := range po.LabelFilters { - po.LabelQuery += key + value + ", " - } - return po -} diff --git a/observability-lib/dashboards/atlas-don/test-output.json b/observability-lib/dashboards/atlas-don/test-output.json deleted file mode 100644 index 5e9e19748..000000000 --- a/observability-lib/dashboards/atlas-don/test-output.json +++ /dev/null @@ -1,1503 +0,0 @@ -{ - "Dashboard": { - "title": "DON OCR Dashboard", - "tags": [ - "DON", - "ocr2" - ], - "timezone": "browser", - "editable": true, - "graphTooltip": 0, - "time": { - "from": "now-30m", - "to": "now" - }, - "fiscalYearStartMonth": 0, - "refresh": "30s", - "schemaVersion": 39, - "panels": [ - { - "type": "row", - "collapsed": false, - "title": "Summary", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 0, - "panels": null - }, - { - "type": "stat", - "id": 1, - "targets": [ - { - "expr": "bool:ocr2_telemetry_down{} == 1", - "format": "", - "legendFormat": "{{job}} | {{report_type}}", - "refId": "" - } - ], - "title": "Telemetry Down", - "description": "Which jobs are not receiving any telemetry?", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 4, - "w": 8, - "x": 0, - "y": 1 - }, - "options": { - "graphMode": "none", - "colorMode": "value", - "justifyMode": "auto", - "textMode": "name", - "wideLayout": true, - "showPercentChange": false, - "reduceOptions": { - "calcs": [ - "last" - ] - }, - "text": { - "titleSize": 10, - "valueSize": 18 - }, - "percentChangeColorMode": "standard", - "orientation": "horizontal" - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "default" - }, - { - "value": 0, - "color": "green" - }, - { - "value": 0.99, - "color": "red" - } - ] - }, - "noValue": "No data" - }, - "overrides": null - } - }, - { - "type": "stat", - "id": 2, - "targets": [ - { - "expr": "bool:ocr2_oracle_telemetry_down_except_telemetry_down{job=~\"${job}\", oracle!=\"csa_unknown\"} == 1", - "format": "", - "legendFormat": "{{oracle}}", - "refId": "" - } - ], - "title": "Oracle Down", - "description": "Which NOPs are not providing any telemetry?", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 4, - "w": 8, - "x": 8, - "y": 1 - }, - "transformations": [ - { - "id": "renameByRegex", - "options": { - "regex": "/^(.*[\\\\\\/])/", - "renamePattern": "" - } - } - ], - "options": { - "graphMode": "none", - "colorMode": "value", - "justifyMode": "auto", - "textMode": "name", - "wideLayout": true, - "showPercentChange": false, - "reduceOptions": { - "calcs": [ - "last" - ] - }, - "text": { - "titleSize": 10, - "valueSize": 18 - }, - "percentChangeColorMode": "standard", - "orientation": "horizontal" - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "default" - }, - { - "value": 0, - "color": "green" - }, - { - "value": 0.99, - "color": "red" - } - ] - }, - "noValue": "No data" - }, - "overrides": null - } - }, - { - "type": "stat", - "id": 3, - "targets": [ - { - "expr": "bool:ocr2_feed_reporting_failure_except_feed_telemetry_down{job=~\"${job}\", oracle!=\"csa_unknown\"} == 1", - "format": "", - "legendFormat": "{{feed_id_name}} on {{job}}", - "refId": "" - } - ], - "title": "Feeds reporting failure", - "description": "Which feeds are failing to report?", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 4, - "w": 8, - "x": 16, - "y": 1 - }, - "options": { - "graphMode": "none", - "colorMode": "value", - "justifyMode": "auto", - "textMode": "name", - "wideLayout": true, - "showPercentChange": false, - "reduceOptions": { - "calcs": [ - "last" - ] - }, - "text": { - "titleSize": 10, - "valueSize": 18 - }, - "percentChangeColorMode": "standard", - "orientation": "horizontal" - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "default" - }, - { - "value": 0, - "color": "green" - }, - { - "value": 0.99, - "color": "red" - } - ] - }, - "noValue": "No data" - }, - "overrides": null - } - }, - { - "type": "stat", - "id": 4, - "targets": [ - { - "expr": "bool:ocr2_feed_telemetry_down_except_telemetry_down{job=~\"${job}\"} == 1", - "format": "", - "legendFormat": "{{feed_id_name}} on {{job}}", - "refId": "" - } - ], - "title": "Feed telemetry Down", - "description": "Which feeds are not receiving any telemetry?", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 4, - "w": 8, - "x": 0, - "y": 5 - }, - "options": { - "graphMode": "none", - "colorMode": "value", - "justifyMode": "auto", - "textMode": "name", - "wideLayout": true, - "showPercentChange": false, - "reduceOptions": { - "calcs": [ - "last" - ] - }, - "text": { - "titleSize": 10, - "valueSize": 18 - }, - "percentChangeColorMode": "standard", - "orientation": "horizontal" - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "default" - }, - { - "value": 0, - "color": "green" - }, - { - "value": 0.99, - "color": "red" - } - ] - }, - "noValue": "No data" - }, - "overrides": null - } - }, - { - "type": "stat", - "id": 5, - "targets": [ - { - "expr": "bool:ocr2_oracle_blind_except_telemetry_down{job=~\"${job}\"} == 1", - "format": "", - "legendFormat": "{{oracle}}", - "refId": "" - } - ], - "title": "Oracles no observations", - "description": "Which NOPs are not providing observations?", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 4, - "w": 8, - "x": 8, - "y": 5 - }, - "transformations": [ - { - "id": "renameByRegex", - "options": { - "regex": "/^(.*[\\\\\\/])/", - "renamePattern": "" - } - } - ], - "options": { - "graphMode": "none", - "colorMode": "value", - "justifyMode": "auto", - "textMode": "name", - "wideLayout": true, - "showPercentChange": false, - "reduceOptions": { - "calcs": [ - "last" - ] - }, - "text": { - "titleSize": 10, - "valueSize": 18 - }, - "percentChangeColorMode": "standard", - "orientation": "horizontal" - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "default" - }, - { - "value": 0, - "color": "green" - }, - { - "value": 0.99, - "color": "red" - } - ] - }, - "noValue": "No data" - }, - "overrides": null - } - }, - { - "type": "stat", - "id": 6, - "targets": [ - { - "expr": "bool:ocr2_oracle_feed_no_observations_except_oracle_blind_except_feed_reporting_failure_except_feed_telemetry_down{job=~\"${job}\"} == 1", - "format": "", - "legendFormat": "{{oracle}}", - "refId": "" - } - ], - "title": "Oracles not contributing observations to feeds", - "description": "Which oracles are failing to make observations on feeds they should be participating in?", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 4, - "w": 8, - "x": 16, - "y": 5 - }, - "transformations": [ - { - "id": "renameByRegex", - "options": { - "regex": "/^(.*[\\\\\\/])/", - "renamePattern": "" - } - } - ], - "options": { - "graphMode": "none", - "colorMode": "value", - "justifyMode": "auto", - "textMode": "name", - "wideLayout": true, - "showPercentChange": false, - "reduceOptions": { - "calcs": [ - "last" - ] - }, - "text": { - "titleSize": 10, - "valueSize": 18 - }, - "percentChangeColorMode": "standard", - "orientation": "horizontal" - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "default" - }, - { - "value": 0, - "color": "green" - }, - { - "value": 0.99, - "color": "red" - } - ] - }, - "noValue": "No data" - }, - "overrides": null - } - }, - { - "type": "row", - "collapsed": false, - "title": "OCR Contract Oracle", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 9 - }, - "id": 0, - "panels": null - }, - { - "type": "stat", - "id": 7, - "targets": [ - { - "expr": "sum(ocr2_contract_oracle_active{}) by (contract, oracle)", - "format": "", - "legendFormat": "{{oracle}}", - "refId": "" - } - ], - "title": "OCR Contract Oracle Active", - "description": "set to one as long as an oracle is on a feed", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 10 - }, - "transformations": [ - { - "id": "renameByRegex", - "options": { - "regex": "/^(.*[\\\\\\/])/", - "renamePattern": "" - } - } - ], - "options": { - "graphMode": "none", - "colorMode": "value", - "justifyMode": "auto", - "textMode": "name", - "wideLayout": true, - "showPercentChange": false, - "reduceOptions": { - "calcs": [ - "last" - ] - }, - "text": { - "titleSize": 10, - "valueSize": 18 - }, - "percentChangeColorMode": "standard", - "orientation": "horizontal" - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 1, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "default" - }, - { - "value": 0, - "color": "red" - }, - { - "value": 0.99, - "color": "green" - } - ] - }, - "noValue": "No data" - }, - "overrides": null - } - }, - { - "type": "row", - "collapsed": false, - "title": "DON Nodes", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 18 - }, - "id": 0, - "panels": null - }, - { - "type": "timeseries", - "id": 8, - "targets": [ - { - "expr": "ocr2_contract_config_n{}", - "format": "", - "legendFormat": "{{feed_id}}", - "refId": "" - }, - { - "expr": "ocr2_contract_config_r_max{}", - "format": "", - "legendFormat": "Max nodes", - "refId": "" - }, - { - "expr": "avg(2 * ocr2_contract_config_f{} + 1)", - "format": "", - "legendFormat": "Min nodes", - "refId": "" - } - ], - "title": "Number of NOPs", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 24, - "x": 0, - "y": 19 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 1, - "min": 0, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "row", - "collapsed": false, - "title": "Price Reporting", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 25 - }, - "id": 0, - "panels": null - }, - { - "type": "timeseries", - "id": 9, - "targets": [ - { - "expr": "sum by (sender, receiver) (increase(ocr2_telemetry_p2p_received_total{job=~\"${job}\"}[5m]))", - "format": "", - "legendFormat": "{{sender}} \u003e {{receiver}}", - "refId": "" - } - ], - "title": "P2P messages received", - "description": "From an individual node's perspective, how many messages are they receiving from other nodes? Uses ocr_telemetry_p2p_received_total", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 24, - "x": 0, - "y": 26 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 1, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 10, - "targets": [ - { - "expr": "sum by (sender, receiver) (rate(ocr2_telemetry_p2p_received_total{job=~\"${job}\"}[5m]))", - "format": "", - "legendFormat": "{{sender}} \u003e {{receiver}}", - "refId": "" - } - ], - "title": "P2P messages received Rate", - "description": "From an individual node's perspective, how many messages are they receiving from other nodes? Uses ocr_telemetry_p2p_received_total", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 24, - "x": 0, - "y": 32 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 1, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 11, - "targets": [ - { - "expr": "ocr2_telemetry_observation{}", - "format": "", - "legendFormat": "{{oracle}}", - "refId": "" - } - ], - "title": "Price observation in MessageObserve sent", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 24, - "x": 0, - "y": 38 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 1, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 12, - "targets": [ - { - "expr": "rate(ocr2_telemetry_message_observe_total{}[5m])", - "format": "", - "legendFormat": "{{oracle}}", - "refId": "" - } - ], - "title": "Total MessageObserve sent", - "description": "From an individual node's perspective, how often are they sending an observation?", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 24, - "x": 0, - "y": 44 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 1, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "row", - "collapsed": false, - "title": "Round / Epoch Progression", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 50 - }, - "id": 0, - "panels": null - }, - { - "type": "timeseries", - "id": 13, - "targets": [ - { - "expr": "ocr2_telemetry_feed_agreed_epoch{}", - "format": "", - "legendFormat": "{{feed_id}}", - "refId": "" - } - ], - "title": "Agreed Epoch Progression", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 0, - "y": 51 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "short", - "decimals": 1, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 14, - "targets": [ - { - "expr": "ocr2_telemetry_epoch_round{}", - "format": "", - "legendFormat": "{{oracle}}", - "refId": "" - } - ], - "title": "Round Epoch Progression", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 12, - "y": 51 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "short", - "decimals": 1, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 15, - "targets": [ - { - "expr": "rate(ocr2_telemetry_round_started_total{}[1m])", - "format": "", - "legendFormat": "{{oracle}}", - "refId": "" - } - ], - "title": "Rounds Started", - "description": "Tracks individual nodes firing \"new round\" message via telemetry (not part of P2P messages)", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 0, - "y": 57 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "short", - "decimals": 1, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 16, - "targets": [ - { - "expr": "rate(ocr2_telemetry_ingested_total{}[1m])", - "format": "", - "legendFormat": "{{oracle}}", - "refId": "" - } - ], - "title": "Telemetry Ingested", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 12, - "y": 57 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "short", - "decimals": 1, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "row", - "collapsed": false, - "title": "OCR Contract Config Delta", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 63 - }, - "id": 0, - "panels": null - }, - { - "type": "stat", - "id": 17, - "targets": [ - { - "expr": "ocr2_contract_config_alpha{}", - "format": "", - "legendFormat": "{{contract}}", - "refId": "" - } - ], - "title": "Relative Deviation Threshold", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 4, - "w": 8, - "x": 0, - "y": 64 - }, - "options": { - "graphMode": "none", - "colorMode": "value", - "justifyMode": "auto", - "textMode": "value_and_name", - "wideLayout": true, - "showPercentChange": false, - "reduceOptions": { - "calcs": [ - "last" - ] - }, - "text": { - "titleSize": 10, - "valueSize": 18 - }, - "percentChangeColorMode": "standard", - "orientation": "horizontal" - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 1, - "noValue": "No data" - }, - "overrides": null - } - }, - { - "type": "stat", - "id": 18, - "targets": [ - { - "expr": "ocr2_contract_config_delta_c_seconds{}", - "format": "", - "legendFormat": "{{contract}}", - "refId": "" - } - ], - "title": "Max Contract Value Age Seconds", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 4, - "w": 8, - "x": 8, - "y": 64 - }, - "options": { - "graphMode": "none", - "colorMode": "value", - "justifyMode": "auto", - "textMode": "value_and_name", - "wideLayout": true, - "showPercentChange": false, - "reduceOptions": { - "calcs": [ - "last" - ] - }, - "text": { - "titleSize": 10, - "valueSize": 18 - }, - "percentChangeColorMode": "standard", - "orientation": "horizontal" - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 1, - "noValue": "No data" - }, - "overrides": null - } - }, - { - "type": "stat", - "id": 19, - "targets": [ - { - "expr": "ocr2_contract_config_delta_grace_seconds{}", - "format": "", - "legendFormat": "{{contract}}", - "refId": "" - } - ], - "title": "Observation Grace Period Seconds", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 4, - "w": 8, - "x": 16, - "y": 64 - }, - "options": { - "graphMode": "none", - "colorMode": "value", - "justifyMode": "auto", - "textMode": "value_and_name", - "wideLayout": true, - "showPercentChange": false, - "reduceOptions": { - "calcs": [ - "last" - ] - }, - "text": { - "titleSize": 10, - "valueSize": 18 - }, - "percentChangeColorMode": "standard", - "orientation": "horizontal" - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 1, - "noValue": "No data" - }, - "overrides": null - } - }, - { - "type": "stat", - "id": 20, - "targets": [ - { - "expr": "ocr2_contract_config_delta_progress_seconds{}", - "format": "", - "legendFormat": "{{contract}}", - "refId": "" - } - ], - "title": "Bad Epoch Timeout Seconds", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 4, - "w": 8, - "x": 0, - "y": 68 - }, - "options": { - "graphMode": "none", - "colorMode": "value", - "justifyMode": "auto", - "textMode": "value_and_name", - "wideLayout": true, - "showPercentChange": false, - "reduceOptions": { - "calcs": [ - "last" - ] - }, - "text": { - "titleSize": 10, - "valueSize": 18 - }, - "percentChangeColorMode": "standard", - "orientation": "horizontal" - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 1, - "noValue": "No data" - }, - "overrides": null - } - }, - { - "type": "stat", - "id": 21, - "targets": [ - { - "expr": "ocr2_contract_config_delta_resend_seconds{}", - "format": "", - "legendFormat": "{{contract}}", - "refId": "" - } - ], - "title": "Resend Interval Seconds", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 4, - "w": 8, - "x": 8, - "y": 68 - }, - "options": { - "graphMode": "none", - "colorMode": "value", - "justifyMode": "auto", - "textMode": "value_and_name", - "wideLayout": true, - "showPercentChange": false, - "reduceOptions": { - "calcs": [ - "last" - ] - }, - "text": { - "titleSize": 10, - "valueSize": 18 - }, - "percentChangeColorMode": "standard", - "orientation": "horizontal" - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 1, - "noValue": "No data" - }, - "overrides": null - } - }, - { - "type": "stat", - "id": 22, - "targets": [ - { - "expr": "ocr2_contract_config_delta_round_seconds{}", - "format": "", - "legendFormat": "{{contract}}", - "refId": "" - } - ], - "title": "Round Interval Seconds", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 4, - "w": 8, - "x": 16, - "y": 68 - }, - "options": { - "graphMode": "none", - "colorMode": "value", - "justifyMode": "auto", - "textMode": "value_and_name", - "wideLayout": true, - "showPercentChange": false, - "reduceOptions": { - "calcs": [ - "last" - ] - }, - "text": { - "titleSize": 10, - "valueSize": 18 - }, - "percentChangeColorMode": "standard", - "orientation": "horizontal" - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 1, - "noValue": "No data" - }, - "overrides": null - } - }, - { - "type": "stat", - "id": 23, - "targets": [ - { - "expr": "ocr2_contract_config_delta_stage_seconds{}", - "format": "", - "legendFormat": "{{contract}}", - "refId": "" - } - ], - "title": "Transmission Stage Timeout Second", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 4, - "w": 8, - "x": 0, - "y": 72 - }, - "options": { - "graphMode": "none", - "colorMode": "value", - "justifyMode": "auto", - "textMode": "value_and_name", - "wideLayout": true, - "showPercentChange": false, - "reduceOptions": { - "calcs": [ - "last" - ] - }, - "text": { - "titleSize": 10, - "valueSize": 18 - }, - "percentChangeColorMode": "standard", - "orientation": "horizontal" - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 1, - "noValue": "No data" - }, - "overrides": null - } - } - ], - "templating": { - "list": [ - { - "type": "query", - "name": "job", - "label": "Job", - "description": "", - "query": "label_values(up{namespace=\"otpe2\"}, job)", - "datasource": { - "uid": "Prometheus" - }, - "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] - }, - "multi": false, - "sort": 1 - }, - { - "type": "query", - "name": "contract", - "label": "Contract", - "description": "", - "query": "label_values(ocr2_contract_config_f{job=\"$job\"}, contract)", - "datasource": { - "uid": "Prometheus" - }, - "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] - }, - "multi": false, - "sort": 1 - }, - { - "type": "query", - "name": "feed_id", - "label": "Feed ID", - "description": "", - "query": "label_values(ocr2_contract_config_f{job=\"$job\", contract=\"$contract\"}, feed_id)", - "datasource": { - "uid": "Prometheus" - }, - "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] - }, - "multi": true, - "sort": 1 - } - ] - }, - "annotations": {} - }, - "Alerts": null, - "AlertGroups": null, - "ContactPoints": null, - "NotificationPolicies": null -} \ No newline at end of file diff --git a/observability-lib/dashboards/capabilities/component.go b/observability-lib/dashboards/capabilities/component.go deleted file mode 100644 index 9c36d9be7..000000000 --- a/observability-lib/dashboards/capabilities/component.go +++ /dev/null @@ -1,219 +0,0 @@ -package capabilities - -import ( - "fmt" - - "github.com/grafana/grafana-foundation-sdk/go/cog" - "github.com/grafana/grafana-foundation-sdk/go/dashboard" - - "github.com/smartcontractkit/chainlink-common/observability-lib/grafana" -) - -type Props struct { - Name string // Name is the name of the dashboard - MetricsDataSource *grafana.DataSource // MetricsDataSource is the datasource for querying metrics -} - -// NewDashboard creates a Capabilities dashboard -func NewDashboard(props *Props) (*grafana.Observability, error) { - if props.Name == "" { - return nil, fmt.Errorf("Name is required") - } - - if props.MetricsDataSource == nil { - return nil, fmt.Errorf("MetricsDataSource is required") - } else { - if props.MetricsDataSource.Name == "" { - return nil, fmt.Errorf("MetricsDataSource.Name is required") - } - } - - builder := grafana.NewBuilder(&grafana.BuilderOptions{ - Name: props.Name, - Tags: []string{"Capabilities"}, - Refresh: "30s", - TimeFrom: "now-7d", - TimeTo: "now", - }) - - builder.AddVars(vars(props)...) - - builder.AddRow("Common indicators for capabilities") - builder.AddPanel(capabilitiesCommon(props)...) - - return builder.Build() -} - -func vars(p *Props) []cog.Builder[dashboard.VariableModel] { - var variables []cog.Builder[dashboard.VariableModel] - - variables = append(variables, grafana.NewQueryVariable(&grafana.QueryVariableOptions{ - VariableOption: &grafana.VariableOption{ - Label: "Environment", - Name: "env", - }, - Datasource: p.MetricsDataSource.Name, - Query: `label_values(up, env)`, - Multi: false, - })) - - variables = append(variables, grafana.NewQueryVariable(&grafana.QueryVariableOptions{ - VariableOption: &grafana.VariableOption{ - Label: "Cluster", - Name: "cluster", - }, - Datasource: p.MetricsDataSource.Name, - Query: `label_values(up{env="$env"}, cluster)`, - Multi: false, - })) - - variables = append(variables, grafana.NewQueryVariable(&grafana.QueryVariableOptions{ - VariableOption: &grafana.VariableOption{ - Label: "Namespace", - Name: "namespace", - }, - Datasource: p.MetricsDataSource.Name, - Query: `label_values(up{env="$env", cluster="$cluster"}, namespace)`, - Multi: false, - })) - - variables = append(variables, grafana.NewQueryVariable(&grafana.QueryVariableOptions{ - VariableOption: &grafana.VariableOption{ - Label: "Job", - Name: "job", - }, - Datasource: p.MetricsDataSource.Name, - Query: `label_values(up{env="$env", cluster="$cluster", namespace="$namespace"}, job)`, - Multi: false, - })) - - variables = append(variables, grafana.NewQueryVariable(&grafana.QueryVariableOptions{ - VariableOption: &grafana.VariableOption{ - Label: "Pod", - Name: "pod", - }, - Datasource: p.MetricsDataSource.Name, - Query: `label_values(up{env="$env", cluster="$cluster", namespace="$namespace", job="$job"}, pod)`, - Multi: false, - })) - - variables = append(variables, grafana.NewQueryVariable(&grafana.QueryVariableOptions{ - VariableOption: &grafana.VariableOption{ - Label: "Capability", - Name: "capability", - }, - Datasource: p.MetricsDataSource.Name, - Query: `label_values(up{env="$env", cluster="$cluster", namespace="$namespace", job="$job"}, pod)`, - Multi: false, - })) - - return variables -} - -func capabilitiesCommon(p *Props) []*grafana.Panel { - var panels []*grafana.Panel - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Execution Time", - Span: 6, - Height: 4, - Decimals: 1, - Unit: "ms", - Query: []grafana.Query{ - { - Expr: `capability_execution_time_ms`, - Legend: "{{capability}}", - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Runs Count", - Span: 6, - Height: 4, - Decimals: 1, - Unit: "ms", - Query: []grafana.Query{ - { - Expr: `capability_runs_count`, - Legend: "{{capability}}", - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Runs Fault Count", - Span: 6, - Height: 4, - Decimals: 1, - Unit: "ms", - Query: []grafana.Query{ - { - Expr: `capability_runs_fault_count`, - Legend: "{{capability}}", - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Runs Invalid Count", - Span: 6, - Height: 4, - Decimals: 1, - Unit: "ms", - Query: []grafana.Query{ - { - Expr: `capability_runs_invalid_count`, - Legend: "{{capability}}", - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Runs Unauthorized Count", - Span: 6, - Height: 4, - Decimals: 1, - Unit: "ms", - Query: []grafana.Query{ - { - Expr: `capability_runs_unauthorized_count`, - Legend: "{{capability}}", - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Runs No Resource Count", - Span: 6, - Height: 4, - Decimals: 1, - Unit: "ms", - Query: []grafana.Query{ - { - Expr: `capability_runs_no_resource_count`, - Legend: "{{capability}}", - }, - }, - }, - })) - - return panels -} diff --git a/observability-lib/dashboards/capabilities/component_test.go b/observability-lib/dashboards/capabilities/component_test.go deleted file mode 100644 index 90ad5ce9e..000000000 --- a/observability-lib/dashboards/capabilities/component_test.go +++ /dev/null @@ -1,77 +0,0 @@ -package capabilities_test - -import ( - "flag" - "os" - "testing" - - "github.com/stretchr/testify/require" - - "github.com/smartcontractkit/chainlink-common/observability-lib/grafana" - - "github.com/smartcontractkit/chainlink-common/observability-lib/dashboards/capabilities" -) - -var update = flag.Bool("update", false, "update golden test files") - -const fileOutput = "test-output.json" - -func TestGenerateFile(t *testing.T) { - if *update == false { - t.Skip("skipping test") - } - - testDashboard, err := capabilities.NewDashboard(&capabilities.Props{ - Name: "Capabilities Dashboard", - MetricsDataSource: grafana.NewDataSource("Prometheus", ""), - }) - if err != nil { - t.Errorf("Error creating dashboard: %v", err) - } - json, errJSON := testDashboard.GenerateJSON() - if errJSON != nil { - t.Errorf("Error generating JSON: %v", errJSON) - } - if _, errExists := os.Stat(fileOutput); errExists == nil { - errRemove := os.Remove(fileOutput) - if errRemove != nil { - t.Errorf("Error removing file: %v", errRemove) - } - } - file, errFile := os.Create(fileOutput) - if errFile != nil { - panic(errFile) - } - writeString, err := file.WriteString(string(json)) - if err != nil { - t.Errorf("Error writing to file: %v", writeString) - } - t.Cleanup(func() { - file.Close() - }) -} - -func TestNewDashboard(t *testing.T) { - t.Run("NewDashboard creates a dashboard", func(t *testing.T) { - testDashboard, err := capabilities.NewDashboard(&capabilities.Props{ - Name: "Capabilities Dashboard", - MetricsDataSource: grafana.NewDataSource("Prometheus", ""), - }) - if err != nil { - t.Errorf("Error creating dashboard: %v", err) - } - require.IsType(t, grafana.Observability{}, *testDashboard) - require.Equal(t, "Capabilities Dashboard", *testDashboard.Dashboard.Title) - json, errJSON := testDashboard.GenerateJSON() - if errJSON != nil { - t.Errorf("Error generating JSON: %v", errJSON) - } - - jsonCompared, errCompared := os.ReadFile(fileOutput) - if errCompared != nil { - t.Errorf("Error reading file: %v", errCompared) - } - - require.JSONEq(t, string(jsonCompared), string(json)) - }) -} diff --git a/observability-lib/dashboards/capabilities/test-output.json b/observability-lib/dashboards/capabilities/test-output.json deleted file mode 100644 index 5bb361e82..000000000 --- a/observability-lib/dashboards/capabilities/test-output.json +++ /dev/null @@ -1,468 +0,0 @@ -{ - "Dashboard": { - "title": "Capabilities Dashboard", - "tags": [ - "Capabilities" - ], - "timezone": "browser", - "editable": true, - "graphTooltip": 0, - "time": { - "from": "now-7d", - "to": "now" - }, - "fiscalYearStartMonth": 0, - "refresh": "30s", - "schemaVersion": 39, - "panels": [ - { - "type": "row", - "collapsed": false, - "title": "Common indicators for capabilities", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 0, - "panels": null - }, - { - "type": "timeseries", - "id": 1, - "targets": [ - { - "expr": "capability_execution_time_ms", - "format": "", - "legendFormat": "{{capability}}", - "refId": "" - } - ], - "title": "Execution Time", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 4, - "w": 6, - "x": 0, - "y": 1 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "ms", - "decimals": 1, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 2, - "targets": [ - { - "expr": "capability_runs_count", - "format": "", - "legendFormat": "{{capability}}", - "refId": "" - } - ], - "title": "Runs Count", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 4, - "w": 6, - "x": 6, - "y": 1 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "ms", - "decimals": 1, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 3, - "targets": [ - { - "expr": "capability_runs_fault_count", - "format": "", - "legendFormat": "{{capability}}", - "refId": "" - } - ], - "title": "Runs Fault Count", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 4, - "w": 6, - "x": 12, - "y": 1 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "ms", - "decimals": 1, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 4, - "targets": [ - { - "expr": "capability_runs_invalid_count", - "format": "", - "legendFormat": "{{capability}}", - "refId": "" - } - ], - "title": "Runs Invalid Count", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 4, - "w": 6, - "x": 18, - "y": 1 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "ms", - "decimals": 1, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 5, - "targets": [ - { - "expr": "capability_runs_unauthorized_count", - "format": "", - "legendFormat": "{{capability}}", - "refId": "" - } - ], - "title": "Runs Unauthorized Count", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 4, - "w": 6, - "x": 0, - "y": 5 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "ms", - "decimals": 1, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 6, - "targets": [ - { - "expr": "capability_runs_no_resource_count", - "format": "", - "legendFormat": "{{capability}}", - "refId": "" - } - ], - "title": "Runs No Resource Count", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 4, - "w": 6, - "x": 6, - "y": 5 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "ms", - "decimals": 1, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - } - ], - "templating": { - "list": [ - { - "type": "query", - "name": "env", - "label": "Environment", - "description": "", - "query": "label_values(up, env)", - "datasource": { - "uid": "Prometheus" - }, - "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] - }, - "multi": false, - "sort": 1 - }, - { - "type": "query", - "name": "cluster", - "label": "Cluster", - "description": "", - "query": "label_values(up{env=\"$env\"}, cluster)", - "datasource": { - "uid": "Prometheus" - }, - "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] - }, - "multi": false, - "sort": 1 - }, - { - "type": "query", - "name": "namespace", - "label": "Namespace", - "description": "", - "query": "label_values(up{env=\"$env\", cluster=\"$cluster\"}, namespace)", - "datasource": { - "uid": "Prometheus" - }, - "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] - }, - "multi": false, - "sort": 1 - }, - { - "type": "query", - "name": "job", - "label": "Job", - "description": "", - "query": "label_values(up{env=\"$env\", cluster=\"$cluster\", namespace=\"$namespace\"}, job)", - "datasource": { - "uid": "Prometheus" - }, - "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] - }, - "multi": false, - "sort": 1 - }, - { - "type": "query", - "name": "pod", - "label": "Pod", - "description": "", - "query": "label_values(up{env=\"$env\", cluster=\"$cluster\", namespace=\"$namespace\", job=\"$job\"}, pod)", - "datasource": { - "uid": "Prometheus" - }, - "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] - }, - "multi": false, - "sort": 1 - }, - { - "type": "query", - "name": "capability", - "label": "Capability", - "description": "", - "query": "label_values(up{env=\"$env\", cluster=\"$cluster\", namespace=\"$namespace\", job=\"$job\"}, pod)", - "datasource": { - "uid": "Prometheus" - }, - "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] - }, - "multi": false, - "sort": 1 - } - ] - }, - "annotations": {} - }, - "Alerts": null, - "AlertGroups": null, - "ContactPoints": null, - "NotificationPolicies": null -} \ No newline at end of file diff --git a/observability-lib/dashboards/core-node-components/component.go b/observability-lib/dashboards/core-node-components/component.go deleted file mode 100644 index 6175fd438..000000000 --- a/observability-lib/dashboards/core-node-components/component.go +++ /dev/null @@ -1,210 +0,0 @@ -package corenodecomponents - -import ( - "fmt" - - "github.com/grafana/grafana-foundation-sdk/go/cog" - "github.com/grafana/grafana-foundation-sdk/go/common" - "github.com/grafana/grafana-foundation-sdk/go/dashboard" - "github.com/smartcontractkit/chainlink-common/observability-lib/grafana" -) - -func NewDashboard(props *Props) (*grafana.Observability, error) { - if props.Name == "" { - return nil, fmt.Errorf("Name is required") - } - - props.platformOpts = platformPanelOpts() - if props.Tested { - props.platformOpts.LabelQuery = "" - } - - builder := grafana.NewBuilder(&grafana.BuilderOptions{ - Name: props.Name, - Tags: []string{"Core", "Node", "Components"}, - Refresh: "30s", - TimeFrom: "now-30m", - TimeTo: "now", - }) - - builder.AddVars(vars(props)...) - builder.AddPanel(panelsGeneralInfo(props)...) - - return builder.Build() -} - -func vars(p *Props) []cog.Builder[dashboard.VariableModel] { - var variables []cog.Builder[dashboard.VariableModel] - - variables = append(variables, grafana.NewIntervalVariable(&grafana.IntervalVariableOptions{ - VariableOption: &grafana.VariableOption{ - Label: "Interval", - Name: "interval", - }, - Interval: "30s,1m,5m,15m,30m,1h,6h,12h", - })) - - variables = append(variables, grafana.NewQueryVariable(&grafana.QueryVariableOptions{ - VariableOption: &grafana.VariableOption{ - Label: "Environment", - Name: "env", - }, - Datasource: p.MetricsDataSource.Name, - Query: `label_values(up, env)`, - })) - - variables = append(variables, grafana.NewQueryVariable(&grafana.QueryVariableOptions{ - VariableOption: &grafana.VariableOption{ - Label: "Cluster", - Name: "cluster", - }, - Datasource: p.MetricsDataSource.Name, - Query: `label_values(up{env="$env"}, cluster)`, - })) - - variables = append(variables, grafana.NewQueryVariable(&grafana.QueryVariableOptions{ - VariableOption: &grafana.VariableOption{ - Label: "Blockchain", - Name: "blockchain", - }, - Datasource: p.MetricsDataSource.Name, - Query: `label_values(up{env="$env", cluster="$cluster"}, blockchain)`, - })) - - variables = append(variables, grafana.NewQueryVariable(&grafana.QueryVariableOptions{ - VariableOption: &grafana.VariableOption{ - Label: "Product", - Name: "product", - }, - Datasource: p.MetricsDataSource.Name, - Query: `label_values(up{env="$env", cluster="$cluster", blockchain="$blockchain"}, product)`, - })) - - variables = append(variables, grafana.NewQueryVariable(&grafana.QueryVariableOptions{ - VariableOption: &grafana.VariableOption{ - Label: "Network Type", - Name: "network_type", - }, - Datasource: p.MetricsDataSource.Name, - Query: `label_values(up{env="$env", cluster="$cluster", blockchain="$blockchain", product="$product"}, network_type)`, - })) - - variables = append(variables, grafana.NewQueryVariable(&grafana.QueryVariableOptions{ - VariableOption: &grafana.VariableOption{ - Label: "Component", - Name: "component", - }, - Datasource: p.MetricsDataSource.Name, - Query: `label_values(up{env="$env", cluster="$cluster", blockchain="$blockchain", network_type="$network_type"}, component)`, - })) - - variables = append(variables, grafana.NewQueryVariable(&grafana.QueryVariableOptions{ - VariableOption: &grafana.VariableOption{ - Label: "Service", - Name: "service", - }, - Datasource: p.MetricsDataSource.Name, - Query: `label_values(up{env="$env", cluster="$cluster", blockchain="$blockchain", network_type="$network_type", component="$component"}, service)`, - })) - - variables = append(variables, grafana.NewQueryVariable(&grafana.QueryVariableOptions{ - VariableOption: &grafana.VariableOption{ - Label: "Service ID", - Name: "service_id", - }, - Datasource: p.MetricsDataSource.Name, - Query: `label_values(health{cluster="$cluster", blockchain="$blockchain", network_type="$network_type", component="$component", service="$service"}, service_id)`, - Multi: true, - IncludeAll: true, - })) - - return variables -} - -func panelsGeneralInfo(p *Props) []*grafana.Panel { - var panels []*grafana.Panel - - panels = append(panels, grafana.NewStatPanel(&grafana.StatPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Components Health Avg by Service", - Span: 24, - Height: 4, - Decimals: 1, - Unit: "percent", - Query: []grafana.Query{ - { - Expr: `100 * avg(avg_over_time(health{` + p.platformOpts.LabelQuery + `service_id=~"${service_id}"}[$interval])) by (service_id, version, service, cluster, env)`, - Legend: "{{service_id}}", - }, - }, - Threshold: &grafana.ThresholdOptions{ - Mode: dashboard.ThresholdsModeAbsolute, - Steps: []dashboard.Threshold{ - {Value: nil, Color: "default"}, - {Value: grafana.Pointer[float64](0), Color: "red"}, - {Value: grafana.Pointer[float64](80), Color: "orange"}, - {Value: grafana.Pointer[float64](0.99), Color: "green"}, - }, - }, - }, - GraphMode: common.BigValueGraphModeLine, - TextMode: common.BigValueTextModeValueAndName, - Orientation: common.VizOrientationVertical, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Components Health by Service", - Span: 24, - Height: 6, - Decimals: 1, - Unit: "percent", - Query: []grafana.Query{ - { - Expr: `100 * (health{` + p.platformOpts.LabelQuery + `service_id=~"${service_id}"})`, - Legend: "{{service_id}}", - }, - }, - Min: grafana.Pointer[float64](0), - Max: grafana.Pointer[float64](100), - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Components Health Avg by Service", - Span: 24, - Height: 6, - Decimals: 1, - Unit: "percent", - Query: []grafana.Query{ - { - Expr: `100 * (avg(avg_over_time(health{` + p.platformOpts.LabelQuery + `service_id=~"${service_id}"}[$interval])) by (service_id, version, service, cluster, env))`, - Legend: "{{service_id}}", - }, - }, - Min: grafana.Pointer[float64](0), - Max: grafana.Pointer[float64](100), - }, - })) - - panels = append(panels, grafana.NewLogPanel(&grafana.LogPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.LogsDataSource.Name, - Title: "Logs with severity >= error", - Span: 24, - Height: 6, - Query: []grafana.Query{ - { - Expr: `{env="${env}", cluster="${cluster}", product="${product}", network_type="${network_type}", instance=~"${service}"} | json | level=~"(error|panic|fatal|crit)"`, - Legend: "", - }, - }, - }, - })) - - return panels -} diff --git a/observability-lib/dashboards/core-node-components/component_test.go b/observability-lib/dashboards/core-node-components/component_test.go deleted file mode 100644 index ce257164c..000000000 --- a/observability-lib/dashboards/core-node-components/component_test.go +++ /dev/null @@ -1,81 +0,0 @@ -package corenodecomponents_test - -import ( - "flag" - "os" - "testing" - - "github.com/stretchr/testify/require" - - "github.com/smartcontractkit/chainlink-common/observability-lib/grafana" - - corenodecomponents "github.com/smartcontractkit/chainlink-common/observability-lib/dashboards/core-node-components" -) - -var update = flag.Bool("update", false, "update golden test files") - -const fileOutput = "test-output.json" - -func TestGenerateFile(t *testing.T) { - if *update == false { - t.Skip("skipping test") - } - - testDashboard, err := corenodecomponents.NewDashboard(&corenodecomponents.Props{ - Name: "Core Node Components Dashboard", - MetricsDataSource: grafana.NewDataSource("Prometheus", ""), - LogsDataSource: grafana.NewDataSource("Loki", ""), - Tested: true, - }) - if err != nil { - t.Errorf("Error creating dashboard: %v", err) - } - json, errJSON := testDashboard.GenerateJSON() - if errJSON != nil { - t.Errorf("Error generating JSON: %v", errJSON) - } - if _, errExists := os.Stat(fileOutput); errExists == nil { - errRemove := os.Remove(fileOutput) - if errRemove != nil { - t.Errorf("Error removing file: %v", errRemove) - } - } - file, errFile := os.Create(fileOutput) - if errFile != nil { - panic(errFile) - } - writeString, err := file.WriteString(string(json)) - if err != nil { - t.Errorf("Error writing to file: %v", writeString) - } - t.Cleanup(func() { - file.Close() - }) -} - -func TestNewDashboard(t *testing.T) { - t.Run("NewDashboard creates a dashboard", func(t *testing.T) { - testDashboard, err := corenodecomponents.NewDashboard(&corenodecomponents.Props{ - Name: "Core Node Components Dashboard", - MetricsDataSource: grafana.NewDataSource("Prometheus", ""), - LogsDataSource: grafana.NewDataSource("Loki", ""), - Tested: true, - }) - if err != nil { - t.Errorf("Error creating dashboard: %v", err) - } - require.IsType(t, grafana.Observability{}, *testDashboard) - require.Equal(t, "Core Node Components Dashboard", *testDashboard.Dashboard.Title) - json, errJSON := testDashboard.GenerateJSON() - if errJSON != nil { - t.Errorf("Error generating JSON: %v", errJSON) - } - - jsonCompared, errCompared := os.ReadFile(fileOutput) - if errCompared != nil { - t.Errorf("Error reading file: %v", errCompared) - } - - require.JSONEq(t, string(jsonCompared), string(json)) - }) -} diff --git a/observability-lib/dashboards/core-node-components/platform.go b/observability-lib/dashboards/core-node-components/platform.go deleted file mode 100644 index cd64ad669..000000000 --- a/observability-lib/dashboards/core-node-components/platform.go +++ /dev/null @@ -1,40 +0,0 @@ -package corenodecomponents - -import "github.com/smartcontractkit/chainlink-common/observability-lib/grafana" - -type platformOpts struct { - // Platform is infrastructure deployment platform: docker or k8s - Platform string - LabelFilters map[string]string - LabelFilter string - LegendString string - LabelQuery string -} - -type Props struct { - Name string // Name is the name of the dashboard - Platform grafana.TypePlatform // Platform is infrastructure deployment platform: docker or k8s - MetricsDataSource *grafana.DataSource // MetricsDataSource is the datasource for querying metrics - LogsDataSource *grafana.DataSource // LogsDataSource is the datasource for querying logs - platformOpts platformOpts - Tested bool -} - -// PlatformPanelOpts generate different queries for "docker" and "k8s" deployment platforms -func platformPanelOpts() platformOpts { - po := platformOpts{ - LabelFilters: map[string]string{ - "env": `=~"${env}"`, - "cluster": `=~"${cluster}"`, - "blockchain": `=~"${blockchain}"`, - "product": `=~"${product}"`, - "network_type": `=~"${network_type}"`, - "component": `=~"${component}"`, - "service": `=~"${service}"`, - }, - } - for key, value := range po.LabelFilters { - po.LabelQuery += key + value + ", " - } - return po -} diff --git a/observability-lib/dashboards/core-node-components/test-output.json b/observability-lib/dashboards/core-node-components/test-output.json deleted file mode 100644 index c7e2a3082..000000000 --- a/observability-lib/dashboards/core-node-components/test-output.json +++ /dev/null @@ -1,433 +0,0 @@ -{ - "Dashboard": { - "title": "Core Node Components Dashboard", - "tags": [ - "Core", - "Node", - "Components" - ], - "timezone": "browser", - "editable": true, - "graphTooltip": 0, - "time": { - "from": "now-30m", - "to": "now" - }, - "fiscalYearStartMonth": 0, - "refresh": "30s", - "schemaVersion": 39, - "panels": [ - { - "type": "stat", - "id": 1, - "targets": [ - { - "expr": "100 * avg(avg_over_time(health{service_id=~\"${service_id}\"}[$interval])) by (service_id, version, service, cluster, env)", - "format": "", - "legendFormat": "{{service_id}}", - "refId": "" - } - ], - "title": "Components Health Avg by Service", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 4, - "w": 24, - "x": 0, - "y": 0 - }, - "options": { - "graphMode": "line", - "colorMode": "value", - "justifyMode": "auto", - "textMode": "value_and_name", - "wideLayout": true, - "showPercentChange": false, - "reduceOptions": { - "calcs": [ - "last" - ] - }, - "text": { - "titleSize": 10, - "valueSize": 18 - }, - "percentChangeColorMode": "standard", - "orientation": "vertical" - }, - "fieldConfig": { - "defaults": { - "unit": "percent", - "decimals": 1, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "default" - }, - { - "value": 0, - "color": "red" - }, - { - "value": 80, - "color": "orange" - }, - { - "value": 0.99, - "color": "green" - } - ] - }, - "noValue": "No data" - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 2, - "targets": [ - { - "expr": "100 * (health{service_id=~\"${service_id}\"})", - "format": "", - "legendFormat": "{{service_id}}", - "refId": "" - } - ], - "title": "Components Health by Service", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 24, - "x": 0, - "y": 4 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "percent", - "decimals": 1, - "min": 0, - "max": 100, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 3, - "targets": [ - { - "expr": "100 * (avg(avg_over_time(health{service_id=~\"${service_id}\"}[$interval])) by (service_id, version, service, cluster, env))", - "format": "", - "legendFormat": "{{service_id}}", - "refId": "" - } - ], - "title": "Components Health Avg by Service", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 24, - "x": 0, - "y": 10 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "percent", - "decimals": 1, - "min": 0, - "max": 100, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "logs", - "id": 4, - "targets": [ - { - "expr": "{env=\"${env}\", cluster=\"${cluster}\", product=\"${product}\", network_type=\"${network_type}\", instance=~\"${service}\"} | json | level=~\"(error|panic|fatal|crit)\"", - "format": "", - "legendFormat": "", - "refId": "" - } - ], - "title": "Logs with severity \u003e= error", - "description": "", - "transparent": false, - "datasource": { - "uid": "Loki" - }, - "gridPos": { - "h": 6, - "w": 24, - "x": 0, - "y": 16 - }, - "options": { - "showLabels": false, - "showCommonLabels": false, - "showTime": false, - "showLogContextToggle": false, - "wrapLogMessage": false, - "prettifyLogMessage": false, - "enableLogDetails": false, - "sortOrder": "", - "dedupStrategy": "" - }, - "fieldConfig": { - "defaults": { - "noValue": "No data" - }, - "overrides": null - } - } - ], - "templating": { - "list": [ - { - "type": "interval", - "name": "interval", - "label": "Interval", - "description": "", - "query": "30s,1m,5m,15m,30m,1h,6h,12h", - "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] - } - }, - { - "type": "query", - "name": "env", - "label": "Environment", - "description": "", - "query": "label_values(up, env)", - "datasource": { - "uid": "Prometheus" - }, - "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] - }, - "multi": false, - "sort": 1 - }, - { - "type": "query", - "name": "cluster", - "label": "Cluster", - "description": "", - "query": "label_values(up{env=\"$env\"}, cluster)", - "datasource": { - "uid": "Prometheus" - }, - "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] - }, - "multi": false, - "sort": 1 - }, - { - "type": "query", - "name": "blockchain", - "label": "Blockchain", - "description": "", - "query": "label_values(up{env=\"$env\", cluster=\"$cluster\"}, blockchain)", - "datasource": { - "uid": "Prometheus" - }, - "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] - }, - "multi": false, - "sort": 1 - }, - { - "type": "query", - "name": "product", - "label": "Product", - "description": "", - "query": "label_values(up{env=\"$env\", cluster=\"$cluster\", blockchain=\"$blockchain\"}, product)", - "datasource": { - "uid": "Prometheus" - }, - "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] - }, - "multi": false, - "sort": 1 - }, - { - "type": "query", - "name": "network_type", - "label": "Network Type", - "description": "", - "query": "label_values(up{env=\"$env\", cluster=\"$cluster\", blockchain=\"$blockchain\", product=\"$product\"}, network_type)", - "datasource": { - "uid": "Prometheus" - }, - "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] - }, - "multi": false, - "sort": 1 - }, - { - "type": "query", - "name": "component", - "label": "Component", - "description": "", - "query": "label_values(up{env=\"$env\", cluster=\"$cluster\", blockchain=\"$blockchain\", network_type=\"$network_type\"}, component)", - "datasource": { - "uid": "Prometheus" - }, - "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] - }, - "multi": false, - "sort": 1 - }, - { - "type": "query", - "name": "service", - "label": "Service", - "description": "", - "query": "label_values(up{env=\"$env\", cluster=\"$cluster\", blockchain=\"$blockchain\", network_type=\"$network_type\", component=\"$component\"}, service)", - "datasource": { - "uid": "Prometheus" - }, - "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] - }, - "multi": false, - "sort": 1 - }, - { - "type": "query", - "name": "service_id", - "label": "Service ID", - "description": "", - "query": "label_values(health{cluster=\"$cluster\", blockchain=\"$blockchain\", network_type=\"$network_type\", component=\"$component\", service=\"$service\"}, service_id)", - "datasource": { - "uid": "Prometheus" - }, - "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] - }, - "multi": true, - "sort": 1, - "includeAll": true - } - ] - }, - "annotations": {} - }, - "Alerts": null, - "AlertGroups": null, - "ContactPoints": null, - "NotificationPolicies": null -} \ No newline at end of file diff --git a/observability-lib/dashboards/core-node/component.go b/observability-lib/dashboards/core-node/component.go deleted file mode 100644 index 3e69e97a8..000000000 --- a/observability-lib/dashboards/core-node/component.go +++ /dev/null @@ -1,2301 +0,0 @@ -package corenode - -import ( - "fmt" - "strconv" - - "github.com/grafana/grafana-foundation-sdk/go/alerting" - "github.com/grafana/grafana-foundation-sdk/go/cog" - "github.com/grafana/grafana-foundation-sdk/go/common" - "github.com/grafana/grafana-foundation-sdk/go/dashboard" - "github.com/grafana/grafana-foundation-sdk/go/expr" - - "github.com/smartcontractkit/chainlink-common/observability-lib/grafana" -) - -// NewDashboard creates a DON dashboard for the given OCR version -func NewDashboard(props *Props) (*grafana.Observability, error) { - if props.Name == "" { - return nil, fmt.Errorf("Name is required") - } - - if props.Platform == "" { - return nil, fmt.Errorf("Platform is required") - } - - if props.MetricsDataSource == nil { - return nil, fmt.Errorf("MetricsDataSource is required") - } else { - if props.MetricsDataSource.Name == "" { - return nil, fmt.Errorf("MetricsDataSource.Name is required") - } - if props.MetricsDataSource.UID == "" { - return nil, fmt.Errorf("MetricsDataSource.UID is required") - } - } - - if props.LogsDataSource == nil { - return nil, fmt.Errorf("LogsDataSource is required") - } else { - if props.LogsDataSource.Name == "" { - return nil, fmt.Errorf("LogsDataSource.Name is required") - } - if props.LogsDataSource.UID == "" { - return nil, fmt.Errorf("LogsDataSource.UID is required") - } - } - - props.platformOpts = platformPanelOpts(props.Platform) - if props.Tested { - props.platformOpts.LabelQuery = "" - } - - builder := grafana.NewBuilder(&grafana.BuilderOptions{ - Name: props.Name, - Tags: []string{"Core", "Node"}, - Refresh: "30s", - TimeFrom: "now-30m", - TimeTo: "now", - AlertsTags: props.AlertsTags, - }) - - builder.AddAlertGroup(grafana.NewAlertGroup(&grafana.AlertGroupOptions{ - Title: props.Name, - Interval: 60, - })) - - if props.SlackChannel != "" && props.SlackWebhookURL != "" { - builder.AddContactPoint(grafana.NewContactPoint(&grafana.ContactPointOptions{ - Name: "chainlink-slack", - Type: "slack", - Settings: map[string]interface{}{ - "url": props.SlackWebhookURL, - "recipient": props.SlackChannel, - "username": "Chainlink Alerts", - "title": `{{ template "slack.chainlink.title" . }}`, - "text": `{{ template "slack.chainlink.text" . }}`, - "color": `{{ template "slack.chainlink.color" . }}`, - }, - })) - - notificationPolicySlackOptions := &grafana.NotificationPolicyOptions{ - Receiver: "chainlink-slack", - GroupBy: []string{"grafana_folder", "alertname"}, - Continue: grafana.Pointer(true), - } - for name, value := range props.AlertsTags { - notificationPolicySlackOptions.ObjectMatchers = append(notificationPolicySlackOptions.ObjectMatchers, alerting.ObjectMatcher{name, "=", value}) - } - builder.AddNotificationPolicy(grafana.NewNotificationPolicy(notificationPolicySlackOptions)) - } - - builder.AddVars(vars(props)...) - - builder.AddRow("Headlines") - builder.AddPanel(headlines(props)...) - - builder.AddRow("AppDBConnections") - builder.AddPanel(appDBConnections(props)...) - - builder.AddRow("SQLQueries") - builder.AddPanel(sqlQueries(props)...) - - builder.AddRow("HeadTracker") - builder.AddPanel(headTracker(props)...) - - builder.AddRow("HeadReporter") - builder.AddPanel(headReporter(props)...) - - builder.AddRow("TxManager") - builder.AddPanel(txManager(props)...) - - builder.AddRow("LogPoller") - builder.AddPanel(logPoller(props)...) - - builder.AddRow("Feeds Jobs") - builder.AddPanel(feedsJobs(props)...) - - builder.AddRow("Mailbox") - builder.AddPanel(mailbox(props)...) - - builder.AddRow("Logs Counters") - builder.AddPanel(logsCounters(props)...) - - builder.AddRow("Logs Rate") - builder.AddPanel(logsRate(props)...) - - builder.AddRow("EvmPoolLifecycle") - builder.AddPanel(evmPoolLifecycle(props)...) - - builder.AddRow("Node RPC State") - builder.AddPanel(nodesRPC(props)...) - - builder.AddRow("EVM Pool RPC Node Metrics (App)") - builder.AddPanel(evmNodeRPC(props)...) - - builder.AddRow("EVM Pool RPC Node Latencies (App)") - builder.AddPanel(evmPoolRPCNodeLatencies(props)...) - - builder.AddRow("Block History Estimator") - builder.AddPanel(evmBlockHistoryEstimator(props)...) - - builder.AddRow("Pipeline Metrics (Runner)") - builder.AddPanel(pipelines(props)...) - - builder.AddRow("HTTP API") - builder.AddPanel(httpAPI(props)...) - - builder.AddRow("PromHTTP") - builder.AddPanel(promHTTP(props)...) - - builder.AddRow("Go Metrics") - builder.AddPanel(goMetrics(props)...) - - return builder.Build() -} - -func vars(p *Props) []cog.Builder[dashboard.VariableModel] { - var variables []cog.Builder[dashboard.VariableModel] - - if p.platformOpts.Platform == "kubernetes" { - variables = append(variables, grafana.NewQueryVariable(&grafana.QueryVariableOptions{ - VariableOption: &grafana.VariableOption{ - Label: "Environment", - Name: "env", - }, - Datasource: p.MetricsDataSource.Name, - Query: `label_values(uptime_seconds, env)`, - })) - - variables = append(variables, grafana.NewQueryVariable(&grafana.QueryVariableOptions{ - VariableOption: &grafana.VariableOption{ - Label: "Cluster", - Name: "cluster", - }, - Datasource: p.MetricsDataSource.Name, - Query: `label_values(uptime_seconds{env="$env"}, cluster)`, - })) - - variables = append(variables, grafana.NewQueryVariable(&grafana.QueryVariableOptions{ - VariableOption: &grafana.VariableOption{ - Label: "Namespace", - Name: "namespace", - }, - Datasource: p.MetricsDataSource.Name, - Query: `label_values(uptime_seconds{env="$env", cluster="$cluster"}, namespace)`, - })) - - variables = append(variables, grafana.NewQueryVariable(&grafana.QueryVariableOptions{ - VariableOption: &grafana.VariableOption{ - Label: "Blockchain", - Name: "blockchain", - }, - Datasource: p.MetricsDataSource.Name, - Query: `label_values(uptime_seconds{env="$env", cluster="$cluster", namespace="$namespace"}, blockchain)`, - })) - - variables = append(variables, grafana.NewQueryVariable(&grafana.QueryVariableOptions{ - VariableOption: &grafana.VariableOption{ - Label: "Product", - Name: "product", - }, - Datasource: p.MetricsDataSource.Name, - Query: `label_values(uptime_seconds{env="$env", cluster="$cluster", namespace="$namespace", blockchain="$blockchain"}, product)`, - })) - - variables = append(variables, grafana.NewQueryVariable(&grafana.QueryVariableOptions{ - VariableOption: &grafana.VariableOption{ - Label: "Network Type", - Name: "network_type", - }, - Datasource: p.MetricsDataSource.Name, - Query: `label_values(uptime_seconds{env="$env", cluster="$cluster", namespace="$namespace", blockchain="$blockchain", product="$product"}, network_type)`, - })) - - variables = append(variables, grafana.NewQueryVariable(&grafana.QueryVariableOptions{ - VariableOption: &grafana.VariableOption{ - Label: "Job", - Name: "job", - }, - Datasource: p.MetricsDataSource.Name, - Query: `label_values(uptime_seconds{env="$env", cluster="$cluster", namespace="$namespace", blockchain="$blockchain", product="$product", network_type="$network_type"}, job)`, - })) - - variables = append(variables, grafana.NewQueryVariable(&grafana.QueryVariableOptions{ - VariableOption: &grafana.VariableOption{ - Label: "Pod", - Name: "pod", - }, - Datasource: p.MetricsDataSource.Name, - Query: `label_values(uptime_seconds{env="$env", cluster="$cluster", namespace="$namespace", job="$job"}, pod)`, - Multi: true, - IncludeAll: true, - })) - } else { - variables = append(variables, grafana.NewQueryVariable(&grafana.QueryVariableOptions{ - VariableOption: &grafana.VariableOption{ - Label: "Instance", - Name: "instance", - }, - Datasource: p.MetricsDataSource.Name, - Query: fmt.Sprintf("label_values(%s)", p.platformOpts.LabelFilter), - Multi: true, - IncludeAll: true, - })) - } - - return variables -} - -func healthAverageAlertRule(p *Props, threshold float64, tags map[string]string) grafana.AlertOptions { - return grafana.AlertOptions{ - Title: `Health Avg by Service is less than ` + strconv.FormatFloat(threshold, 'f', -1, 64) + `%`, - Summary: `Uptime less than ` + strconv.FormatFloat(threshold, 'f', -1, 64) + `% over last 15 minutes on one component in a Node`, - Description: `Component {{ index $labels "service_id" }} uptime in the last 15m is {{ index $values "C" }}%`, - RunbookURL: "https://github.com/smartcontractkit/chainlink-common/tree/main/observability-lib", - For: "15m", - Tags: tags, - Query: []grafana.RuleQuery{ - { - Expr: `health{` + p.AlertsFilters + `}`, - RefID: "A", - Datasource: p.MetricsDataSource.UID, - }, - }, - QueryRefCondition: "D", - Condition: []grafana.ConditionQuery{ - { - RefID: "B", - ReduceExpression: &grafana.ReduceExpression{ - Expression: "A", - Reducer: expr.TypeReduceReducerMean, - }, - }, - { - RefID: "C", - MathExpression: &grafana.MathExpression{ - Expression: "$B * 100", - }, - }, - { - RefID: "D", - ThresholdExpression: &grafana.ThresholdExpression{ - Expression: "C", - ThresholdConditionsOptions: grafana.ThresholdConditionsOption{ - Params: []float64{threshold}, - Type: grafana.TypeThresholdTypeLt, - }, - }, - }, - }, - } -} - -func headlines(p *Props) []*grafana.Panel { - var panels []*grafana.Panel - - panels = append(panels, grafana.NewStatPanel(&grafana.StatPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "App Version", - Description: "app version with commit and branch links", - Span: 12, - Height: 4, - Decimals: 1, - Query: []grafana.Query{ - { - Expr: `version{` + p.platformOpts.LabelQuery + `}`, - Legend: "Version: {{version}} https://github.com/smartcontractkit/chainlink/commit/{{commit}} https://github.com/smartcontractkit/chainlink/tree/release/{{version}}", - Instant: true, - }, - }, - }, - ColorMode: common.BigValueColorModeNone, - TextMode: common.BigValueTextModeName, - Orientation: common.VizOrientationHorizontal, - })) - - panels = append(panels, grafana.NewStatPanel(&grafana.StatPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Uptime", - Description: "instance uptime", - Span: 12, - Height: 4, - Decimals: 2, - Unit: "s", - Query: []grafana.Query{ - { - Expr: `uptime_seconds{` + p.platformOpts.LabelQuery + `}`, - Legend: `{{` + p.platformOpts.LegendString + `}}`, - }, - }, - }, - ColorMode: common.BigValueColorModeNone, - TextMode: common.BigValueTextModeValueAndName, - Orientation: common.VizOrientationHorizontal, - })) - - panels = append(panels, grafana.NewStatPanel(&grafana.StatPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "ETH Balance Summary", - Span: 12, - Height: 4, - Decimals: 2, - Query: []grafana.Query{ - { - Expr: `sum(eth_balance{` + p.platformOpts.LabelQuery + `}) by (` + p.platformOpts.LabelFilter + `, account)`, - Legend: `{{` + p.platformOpts.LabelFilter + `}} - {{account}}`, - Instant: true, - }, - }, - Threshold: &grafana.ThresholdOptions{ - Mode: dashboard.ThresholdsModeAbsolute, - Steps: []dashboard.Threshold{ - {Value: nil, Color: "default"}, - {Value: grafana.Pointer[float64](0), Color: "red"}, - {Value: grafana.Pointer[float64](0.99), Color: "green"}, - }, - }, - }, - GraphMode: common.BigValueGraphModeLine, - TextMode: common.BigValueTextModeValueAndName, - Orientation: common.VizOrientationHorizontal, - })) - - panels = append(panels, grafana.NewStatPanel(&grafana.StatPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Solana Balance Summary", - Span: 12, - Height: 4, - Decimals: 2, - Query: []grafana.Query{ - { - Expr: `sum(solana_balance{` + p.platformOpts.LabelQuery + `}) by (` + p.platformOpts.LabelFilter + `, account)`, - Legend: `{{` + p.platformOpts.LabelFilter + `}} - {{account}}`, - Instant: true, - }, - }, - Threshold: &grafana.ThresholdOptions{ - Mode: dashboard.ThresholdsModeAbsolute, - Steps: []dashboard.Threshold{ - {Value: nil, Color: "default"}, - {Value: grafana.Pointer[float64](0), Color: "red"}, - {Value: grafana.Pointer[float64](0.99), Color: "green"}, - }, - }, - }, - GraphMode: common.BigValueGraphModeLine, - TextMode: common.BigValueTextModeValueAndName, - Orientation: common.VizOrientationHorizontal, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Health Avg by Service over 15m", - Span: 24, - Height: 6, - Decimals: 1, - Unit: "percent", - Query: []grafana.Query{ - { - Expr: `100 * (avg(avg_over_time(health{` + p.platformOpts.LabelQuery + `}[15m])) by (` + p.platformOpts.LabelFilter + `, service_id, version, service, cluster, env))`, - Legend: `{{` + p.platformOpts.LabelFilter + `}} - {{service_id}}`, - }, - }, - Min: grafana.Pointer[float64](0), - Max: grafana.Pointer[float64](100), - Threshold: &grafana.ThresholdOptions{ - Mode: dashboard.ThresholdsModeAbsolute, - Steps: []dashboard.Threshold{ - {Value: nil, Color: "green"}, - {Value: grafana.Pointer[float64](50), Color: "red"}, - {Value: grafana.Pointer[float64](70), Color: "orange"}, - {Value: grafana.Pointer[float64](90), Color: "green"}, - }, - }, - }, - ThresholdStyle: common.GraphThresholdsStyleModeDashed, - LegendOptions: &grafana.LegendOptions{ - DisplayMode: common.LegendDisplayModeList, - Placement: common.LegendPlacementRight, - }, - AlertsOptions: []grafana.AlertOptions{ - healthAverageAlertRule(p, 90, map[string]string{"severity": "info"}), - healthAverageAlertRule(p, 70, map[string]string{"severity": "warning"}), - healthAverageAlertRule(p, 50, map[string]string{"severity": "critical"}), - }, - })) - - panels = append(panels, grafana.NewStatPanel(&grafana.StatPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Health Avg by Service over 15m with health < 90%", - Description: "Only displays services with health average < 90%", - Span: 24, - Height: 6, - Decimals: 1, - Unit: "percent", - Query: []grafana.Query{ - { - Expr: `100 * avg(avg_over_time(health{` + p.platformOpts.LabelQuery + `}[15m])) by (` + p.platformOpts.LabelFilter + `, service_id, version, service, cluster, env) < 90`, - Legend: `{{` + p.platformOpts.LabelFilter + `}} - {{service_id}}`, - }, - }, - Threshold: &grafana.ThresholdOptions{ - Mode: dashboard.ThresholdsModeAbsolute, - Steps: []dashboard.Threshold{ - {Value: nil, Color: "green"}, - {Value: grafana.Pointer[float64](1), Color: "red"}, - {Value: grafana.Pointer[float64](80), Color: "orange"}, - {Value: grafana.Pointer[float64](99), Color: "green"}, - }, - }, - NoValue: "All services healthy", - }, - GraphMode: common.BigValueGraphModeLine, - TextMode: common.BigValueTextModeValueAndName, - Orientation: common.VizOrientationHorizontal, - })) - - panels = append(panels, grafana.NewLogPanel(&grafana.LogPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.LogsDataSource.Name, - Title: "Logs with severity >= error", - Span: 24, - Height: 10, - Query: []grafana.Query{ - { - Expr: `{env="${env}", cluster="${cluster}", product="${product}", network_type="${network_type}", namespace="${namespace}", pod="${pod}"} | json | level=~"(error|panic|fatal|crit)"`, - Legend: "", - }, - }, - }, - PrettifyJSON: true, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "ETH Balance", - Span: 12, - Height: 6, - Decimals: 2, - Query: []grafana.Query{ - { - Expr: `sum(eth_balance{` + p.platformOpts.LabelQuery + `}) by (` + p.platformOpts.LabelFilter + `, account)`, - Legend: `{{` + p.platformOpts.LabelFilter + `}} - {{account}}`, - }, - }, - }, - AlertsOptions: []grafana.AlertOptions{ - { - Summary: `ETH Balance is lower than threshold`, - Description: `ETH Balance critically low at {{ index $values "A" }} on {{ index $labels "` + p.platformOpts.LabelFilter + `" }}`, - RunbookURL: "https://github.com/smartcontractkit/chainlink-common/tree/main/observability-lib", - For: "15m", - NoDataState: alerting.RuleNoDataStateOK, - Tags: map[string]string{ - "severity": "critical", - }, - Query: []grafana.RuleQuery{ - { - Expr: `eth_balance{` + p.AlertsFilters + `}`, - Instant: true, - RefID: "A", - Datasource: p.MetricsDataSource.UID, - }, - }, - QueryRefCondition: "B", - Condition: []grafana.ConditionQuery{ - { - RefID: "B", - ThresholdExpression: &grafana.ThresholdExpression{ - Expression: "A", - ThresholdConditionsOptions: grafana.ThresholdConditionsOption{ - Params: []float64{1}, - Type: grafana.TypeThresholdTypeLt, - }, - }, - }, - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "SOL Balance", - Span: 12, - Height: 6, - Decimals: 2, - Query: []grafana.Query{ - { - Expr: `sum(solana_balance{` + p.platformOpts.LabelQuery + `}) by (` + p.platformOpts.LabelFilter + `, account)`, - Legend: `{{` + p.platformOpts.LabelFilter + `}} - {{account}}`, - }, - }, - }, - AlertsOptions: []grafana.AlertOptions{ - { - Summary: `Solana Balance is lower than threshold`, - Description: `Solana Balance critically low at {{ index $values "A" }} on {{ index $labels "` + p.platformOpts.LabelFilter + `" }}`, - RunbookURL: "https://github.com/smartcontractkit/chainlink-common/tree/main/observability-lib", - For: "15m", - NoDataState: alerting.RuleNoDataStateOK, - Tags: map[string]string{ - "severity": "critical", - }, - Query: []grafana.RuleQuery{ - { - Expr: `solana_balance{` + p.AlertsFilters + `}`, - Instant: true, - RefID: "A", - Datasource: p.MetricsDataSource.UID, - }, - }, - QueryRefCondition: "B", - Condition: []grafana.ConditionQuery{ - { - RefID: "B", - ThresholdExpression: &grafana.ThresholdExpression{ - Expression: "A", - ThresholdConditionsOptions: grafana.ThresholdConditionsOption{ - Params: []float64{1}, - Type: grafana.TypeThresholdTypeLt, - }, - }, - }, - }, - }, - }, - })) - - if p.platformOpts.Platform == "kubernetes" { - panels = append(panels, grafana.NewStatPanel(&grafana.StatPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "CPU Utilisation (from requests)", - Span: 6, - Height: 4, - Decimals: 1, - Unit: "percent", - Query: []grafana.Query{ - { - Expr: `100 * sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster="$cluster", namespace="$namespace", pod="$pod"}) by (container) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster="$cluster", namespace="$namespace", pod="$pod"}) by (container)`, - Legend: `{{pod}}`, - Instant: true, - }, - }, - }, - })) - - panels = append(panels, grafana.NewStatPanel(&grafana.StatPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "CPU Utilisation (from limits)", - Span: 6, - Height: 4, - Decimals: 1, - Unit: "percent", - Query: []grafana.Query{ - { - Expr: `100 * sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster="$cluster", namespace="$namespace", pod="$pod"}) by (container) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster="$cluster", namespace="$namespace", pod="$pod"}) by (container)`, - Legend: `{{pod}}`, - Instant: true, - }, - }, - }, - })) - - panels = append(panels, grafana.NewStatPanel(&grafana.StatPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Memory Utilisation (from requests)", - Span: 6, - Height: 4, - Decimals: 1, - Unit: "percent", - Query: []grafana.Query{ - { - Expr: `100 * sum(container_memory_working_set_bytes{job="kubelet", metrics_path="/metrics/cadvisor", cluster="$cluster", namespace="$namespace", pod="$pod", image!=""}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster="$cluster", namespace="$namespace", pod="$pod"}) by (container)`, - Legend: `{{pod}}`, - Instant: true, - }, - }, - }, - })) - - panels = append(panels, grafana.NewStatPanel(&grafana.StatPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Memory Utilisation (from limits)", - Span: 6, - Height: 4, - Decimals: 1, - Unit: "percent", - Query: []grafana.Query{ - { - Expr: `100 * sum(container_memory_working_set_bytes{job="kubelet", metrics_path="/metrics/cadvisor", cluster="$cluster", namespace="$namespace", pod="$pod", container!="", image!=""}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster="$cluster", namespace="$namespace", pod="$pod"}) by (container)`, - Legend: `{{pod}}`, - Instant: true, - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "CPU Usage", - Span: 12, - Height: 8, - Decimals: 3, - Query: []grafana.Query{ - { - Expr: `sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{pod=~"$pod", namespace=~"${namespace}"}) by (pod)`, - Legend: "{{pod}}", - }, - { - Expr: `sum(kube_pod_container_resource_requests{job="kube-state-metrics", cluster="$cluster", namespace="$namespace", pod="$pod", resource="cpu"})`, - Legend: "Requests", - }, - { - Expr: `sum(kube_pod_container_resource_limits{job="kube-state-metrics", cluster="$cluster", namespace="$namespace", pod="$pod", resource="cpu"})`, - Legend: "Limits", - }, - }, - }, - ScaleDistribution: common.ScaleDistributionLog, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Memory Usage", - Span: 12, - Height: 8, - Unit: "bytes", - Query: []grafana.Query{ - { - Expr: `sum(container_memory_rss{pod=~"$pod", namespace=~"${namespace}", container!=""}) by (pod)`, - Legend: "{{pod}}", - }, - { - Expr: `sum(kube_pod_container_resource_requests{job="kube-state-metrics", cluster="$cluster", namespace="$namespace", pod="$pod", resource="memory"})`, - Legend: "Requests", - }, - { - Expr: `sum(kube_pod_container_resource_limits{job="kube-state-metrics", cluster="$cluster", namespace="$namespace", pod="$pod", resource="memory"})`, - Legend: "Limits", - }, - }, - }, - ScaleDistribution: common.ScaleDistributionLog, - })) - } - - panels = append(panels, grafana.NewStatPanel(&grafana.StatPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Open File Descriptors", - Span: 6, - Height: 4, - Decimals: 1, - Query: []grafana.Query{ - { - Expr: `process_open_fds{` + p.platformOpts.LabelQuery + `}`, - Legend: `{{` + p.platformOpts.LegendString + `}}`, - }, - }, - }, - GraphMode: common.BigValueGraphModeArea, - })) - - panels = append(panels, grafana.NewStatPanel(&grafana.StatPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Go Version", - Span: 4, - Height: 4, - Decimals: 1, - Query: []grafana.Query{ - { - Expr: `go_info{` + p.platformOpts.LabelQuery + `}`, - Legend: "{{exported_version}}", - Instant: true, - }, - }, - }, - ColorMode: common.BigValueColorModeNone, - TextMode: common.BigValueTextModeName, - })) - - return panels -} - -func appDBConnections(p *Props) []*grafana.Panel { - var panels []*grafana.Panel - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "DB Connections", - Span: 24, - Height: 6, - Decimals: 1, - Unit: "Conn", - Query: []grafana.Query{ - { - Expr: `sum(db_conns_max{` + p.platformOpts.LabelQuery + `}) by (` + p.platformOpts.LabelFilter + `)`, - Legend: `{{` + p.platformOpts.LabelFilter + `}} - Max`, - }, - { - Expr: `sum(db_conns_open{` + p.platformOpts.LabelQuery + `}) by (` + p.platformOpts.LabelFilter + `)`, - Legend: `{{` + p.platformOpts.LabelFilter + `}} - Open`, - }, - { - Expr: `sum(db_conns_used{` + p.platformOpts.LabelQuery + `}) by (` + p.platformOpts.LabelFilter + `)`, - Legend: `{{` + p.platformOpts.LabelFilter + `}} - Used`, - }, - { - Expr: `sum(db_conns_wait{` + p.platformOpts.LabelQuery + `}) by (` + p.platformOpts.LabelFilter + `)`, - Legend: `{{` + p.platformOpts.LabelFilter + `}} - Wait`, - }, - }, - }, - LegendOptions: &grafana.LegendOptions{ - DisplayMode: common.LegendDisplayModeList, - Placement: common.LegendPlacementRight, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "DB Wait Count", - Span: 12, - Height: 6, - Query: []grafana.Query{ - { - Expr: `sum(db_wait_count{` + p.platformOpts.LabelQuery + `}) by (` + p.platformOpts.LabelFilter + `)`, - Legend: `{{` + p.platformOpts.LabelFilter + `}}`, - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "DB Wait Time", - Span: 12, - Height: 6, - Unit: "Sec", - Query: []grafana.Query{ - { - Expr: `sum(db_wait_time_seconds{` + p.platformOpts.LabelQuery + `}) by (` + p.platformOpts.LabelFilter + `)`, - Legend: `{{` + p.platformOpts.LabelFilter + `}}`, - }, - }, - }, - })) - - return panels -} - -func sqlQueries(p *Props) []*grafana.Panel { - var panels []*grafana.Panel - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "SQL Query Timeout Percent", - Span: 24, - Height: 6, - Decimals: 1, - Unit: "percent", - Query: []grafana.Query{ - { - Expr: `histogram_quantile(0.9, sum(rate(sql_query_timeout_percent_bucket{` + p.platformOpts.LabelQuery + `}[$__rate_interval])) by (le))`, - Legend: "p90", - }, - { - Expr: `histogram_quantile(0.95, sum(rate(sql_query_timeout_percent_bucket{` + p.platformOpts.LabelQuery + `}[$__rate_interval])) by (le))`, - Legend: "p95", - }, - { - Expr: `histogram_quantile(0.99, sum(rate(sql_query_timeout_percent_bucket{` + p.platformOpts.LabelQuery + `}[$__rate_interval])) by (le))`, - Legend: "p99", - }, - }, - }, - LegendOptions: &grafana.LegendOptions{ - DisplayMode: common.LegendDisplayModeList, - Placement: common.LegendPlacementRight, - }, - })) - - return panels -} - -func headTracker(p *Props) []*grafana.Panel { - var panels []*grafana.Panel - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Head Tracker Current Head", - Span: 18, - Height: 6, - Unit: "Block", - Query: []grafana.Query{ - { - Expr: `sum(head_tracker_current_head{` + p.platformOpts.LabelQuery + `}) by (` + p.platformOpts.LabelFilter + `)`, - Legend: `{{` + p.platformOpts.LegendString + `}}`, - }, - }, - }, - })) - - panels = append(panels, grafana.NewStatPanel(&grafana.StatPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Head Tracker Current Head Summary", - Span: 6, - Height: 6, - Query: []grafana.Query{ - { - Expr: `head_tracker_current_head{` + p.platformOpts.LabelQuery + `}`, - Legend: `{{` + p.platformOpts.LegendString + `}}`, - Instant: true, - }, - }, - }, - ColorMode: common.BigValueColorModeNone, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Head Tracker Heads Received Rate", - Span: 24, - Height: 6, - Unit: "Block", - Query: []grafana.Query{ - { - Expr: `rate(head_tracker_heads_received{` + p.platformOpts.LabelQuery + `}[1m])`, - Legend: `{{` + p.platformOpts.LabelFilter + `}}`, - }, - }, - }, - AlertsOptions: []grafana.AlertOptions{ - { - Summary: `No Headers Received`, - Description: `{{ index $labels "` + p.platformOpts.LabelFilter + `" }} on ChainID {{ index $labels "ChainID" }} has received {{ index $values "A" }} heads over 10 minutes.`, - RunbookURL: "https://github.com/smartcontractkit/chainlink-common/tree/main/observability-lib", - For: "10m", - NoDataState: alerting.RuleNoDataStateOK, - Tags: map[string]string{ - "severity": "critical", - }, - Query: []grafana.RuleQuery{ - { - Expr: `increase(head_tracker_heads_received{` + p.AlertsFilters + `}[10m])`, - Instant: true, - RefID: "A", - Datasource: p.MetricsDataSource.UID, - }, - }, - QueryRefCondition: "B", - Condition: []grafana.ConditionQuery{ - { - RefID: "B", - ThresholdExpression: &grafana.ThresholdExpression{ - Expression: "A", - ThresholdConditionsOptions: grafana.ThresholdConditionsOption{ - Params: []float64{1}, - Type: grafana.TypeThresholdTypeLt, - }, - }, - }, - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Head Tracker Very Old Head", - Span: 12, - Height: 6, - Unit: "Block", - Query: []grafana.Query{ - { - Expr: `head_tracker_very_old_head{` + p.platformOpts.LabelQuery + `}`, - Legend: `{{` + p.platformOpts.LegendString + `}}`, - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Head Tracker Connection Errors Rate", - Span: 12, - Height: 6, - Decimals: 1, - Query: []grafana.Query{ - { - Expr: `rate(head_tracker_connection_errors{` + p.platformOpts.LabelQuery + `}[1m])`, - Legend: `{{` + p.platformOpts.LegendString + `}}`, - }, - }, - }, - })) - - return panels -} - -func headReporter(p *Props) []*grafana.Panel { - var panels []*grafana.Panel - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Unconfirmed Transactions", - Span: 8, - Height: 6, - Unit: "Tx", - Query: []grafana.Query{ - { - Expr: `sum(unconfirmed_transactions{` + p.platformOpts.LabelQuery + `}) by (` + p.platformOpts.LabelFilter + `)`, - Legend: `{{` + p.platformOpts.LegendString + `}}`, - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Unconfirmed TX Age", - Span: 8, - Height: 6, - Unit: "s", - Query: []grafana.Query{ - { - Expr: `sum(max_unconfirmed_tx_age{` + p.platformOpts.LabelQuery + `}) by (` + p.platformOpts.LabelFilter + `)`, - Legend: `{{` + p.platformOpts.LegendString + `}}`, - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Unconfirmed TX Blocks", - Span: 8, - Height: 6, - Unit: "Blocks", - Query: []grafana.Query{ - { - Expr: `sum(max_unconfirmed_blocks{` + p.platformOpts.LabelQuery + `}) by (` + p.platformOpts.LabelFilter + `)`, - Legend: `{{` + p.platformOpts.LegendString + `}}`, - }, - }, - }, - })) - - return panels -} - -func txManager(p *Props) []*grafana.Panel { - var panels []*grafana.Panel - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "TX Manager Confirmed", - Span: 6, - Height: 6, - Query: []grafana.Query{ - { - Expr: `sum(tx_manager_num_confirmed_transactions{` + p.platformOpts.LabelQuery + `}) by (blockchain, chainID, ` + p.platformOpts.LabelFilter + `)`, - Legend: `{{` + p.platformOpts.LabelFilter + `}} - {{blockchain}} - {{chainID}}`, - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "TX Manager Successful", - Span: 6, - Height: 6, - Query: []grafana.Query{ - { - Expr: `sum(tx_manager_num_successful_transactions{` + p.platformOpts.LabelQuery + `}) by (blockchain, chainID, ` + p.platformOpts.LabelFilter + `)`, - Legend: `{{` + p.platformOpts.LabelFilter + `}} - {{blockchain}} - {{chainID}}`, - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "TX Manager Reverted", - Span: 6, - Height: 6, - Query: []grafana.Query{ - { - Expr: `sum(tx_manager_num_tx_reverted{` + p.platformOpts.LabelQuery + `}) by (blockchain, chainID, ` + p.platformOpts.LabelFilter + `)`, - Legend: `{{` + p.platformOpts.LabelFilter + `}} - {{blockchain}} - {{chainID}}`, - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "TX Manager Gas Bumps", - Span: 6, - Height: 6, - Query: []grafana.Query{ - { - Expr: `sum(tx_manager_num_gas_bumps{` + p.platformOpts.LabelQuery + `}) by (blockchain, chainID, ` + p.platformOpts.LabelFilter + `)`, - Legend: `{{` + p.platformOpts.LabelFilter + `}} - {{blockchain}} - {{chainID}}`, - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "TX Manager Forwarded", - Span: 6, - Height: 6, - Query: []grafana.Query{ - { - Expr: `sum(tx_manager_fwd_tx_count{` + p.platformOpts.LabelQuery + `}) by (blockchain, chainID, ` + p.platformOpts.LabelFilter + `)`, - Legend: `{{` + p.platformOpts.LabelFilter + `}} - {{blockchain}} - {{chainID}}`, - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "TX Manager Attempts", - Span: 6, - Height: 6, - Query: []grafana.Query{ - { - Expr: `sum(tx_manager_tx_attempt_count{` + p.platformOpts.LabelQuery + `}) by (blockchain, chainID, ` + p.platformOpts.LabelFilter + `)`, - Legend: `{{` + p.platformOpts.LabelFilter + `}} - {{blockchain}} - {{chainID}}`, - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "TX Manager Gas Bump Exceeds Limit", - Span: 6, - Height: 6, - Query: []grafana.Query{ - { - Expr: `sum(tx_manager_gas_bump_exceeds_limit{` + p.platformOpts.LabelQuery + `}) by (blockchain, chainID, ` + p.platformOpts.LabelFilter + `)`, - Legend: `{{` + p.platformOpts.LabelFilter + `}} - {{blockchain}} - {{chainID}}`, - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "TX Manager Time Until Broadcast", - Description: "The amount of time elapsed from when a transaction is enqueued to until it is broadcast", - Span: 6, - Height: 6, - Decimals: 1, - Unit: "ms", - Query: []grafana.Query{ - { - Expr: `histogram_quantile(0.9, sum(rate(tx_manager_time_until_tx_broadcast_bucket{` + p.platformOpts.LabelQuery + `}[$__rate_interval])) by (le, ` + p.platformOpts.LabelFilter + `, blockchain, chainID)) / 1e6`, - Legend: `{{` + p.platformOpts.LabelFilter + `}} - {{blockchain}} - {{chainID}}`, - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "TX Manager Time Until Confirmed", - Description: "The amount of time elapsed from a transaction being broadcast to being included in a block", - Span: 6, - Height: 6, - Decimals: 1, - Unit: "ms", - Query: []grafana.Query{ - { - Expr: `histogram_quantile(0.9, sum(rate(tx_manager_time_until_tx_confirmed_bucket{` + p.platformOpts.LabelQuery + `}[$__rate_interval])) by (le, ` + p.platformOpts.LabelFilter + `, blockchain, chainID)) / 1e6`, - Legend: `{{` + p.platformOpts.LabelFilter + `}} - {{blockchain}} - {{chainID}}`, - }, - }, - }, - })) - - return panels -} - -func logPoller(p *Props) []*grafana.Panel { - var panels []*grafana.Panel - - panels = append(panels, grafana.NewStatPanel(&grafana.StatPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Goroutines per ChainId", - Description: "goroutines per chainId", - Span: 12, - Height: 6, - Decimals: 1, - Query: []grafana.Query{ - { - Expr: `count(log_poller_query_duration_sum{` + p.platformOpts.LabelQuery + `}) by (evmChainID)`, - Legend: "chainId: {{evmChainID}}", - }, - }, - }, - ColorMode: common.BigValueColorModeValue, - GraphMode: common.BigValueGraphModeLine, - TextMode: common.BigValueTextModeValueAndName, - Orientation: common.VizOrientationHorizontal, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "RPS", - Description: "requests per second", - Span: 12, - Height: 6, - Decimals: 2, - Unit: "reqps", - Query: []grafana.Query{ - { - Expr: `avg by (query, ` + p.platformOpts.LabelFilter + `) (sum by (query, job) (rate(log_poller_query_duration_count{` + p.platformOpts.LabelQuery + `}[$__rate_interval])))`, - Legend: `{{` + p.platformOpts.LabelFilter + `}} - {{query}}`, - }, - { - Expr: `avg (sum by(` + p.platformOpts.LabelFilter + `) (rate(log_poller_query_duration_count{` + p.platformOpts.LabelQuery + `}[$__rate_interval])))`, - Legend: "Total", - }, - }, - }, - LegendOptions: &grafana.LegendOptions{ - DisplayMode: common.LegendDisplayModeList, - Placement: common.LegendPlacementRight, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "RPS by Type", - Span: 12, - Height: 6, - Decimals: 2, - Unit: "reqps", - Query: []grafana.Query{ - { - Expr: `avg by (` + p.platformOpts.LabelFilter + `, type) (sum by (type, ` + p.platformOpts.LabelFilter + `) (rate(log_poller_query_duration_count{` + p.platformOpts.LabelQuery + `}[$__rate_interval])))`, - Legend: `{{` + p.platformOpts.LabelFilter + `}} - {{type}}`, - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Avg number of logs returned", - Span: 12, - Height: 6, - Decimals: 2, - Query: []grafana.Query{ - { - Expr: `avg by (` + p.platformOpts.LabelFilter + `, query) (log_poller_query_dataset_size{` + p.platformOpts.LabelQuery + `})`, - Legend: `{{` + p.platformOpts.LabelFilter + `}} - {{query}}`, - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Max number of logs returned", - Span: 12, - Height: 6, - Decimals: 2, - Query: []grafana.Query{ - { - Expr: `max by (` + p.platformOpts.LabelFilter + `, query) (log_poller_query_dataset_size{` + p.platformOpts.LabelQuery + `})`, - Legend: `{{` + p.platformOpts.LabelFilter + `}} - {{query}}`, - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Logs returned by chain", - Span: 12, - Height: 6, - Decimals: 2, - Query: []grafana.Query{ - { - Expr: `max by (evmChainID) (log_poller_query_dataset_size{` + p.platformOpts.LabelQuery + `})`, - Legend: "{{evmChainID}}", - }, - }, - }, - })) - - quantiles := []string{"0.5", "0.9", "0.99"} - for _, quantile := range quantiles { - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: `Queries duration by query ` + quantile + ` quantile`, - Span: 24, - Height: 6, - Decimals: 2, - Unit: "ms", - Query: []grafana.Query{ - { - Expr: `histogram_quantile(` + quantile + `, sum(rate(log_poller_query_duration_bucket{` + p.platformOpts.LabelQuery + `}[$__rate_interval])) by (le, ` + p.platformOpts.LabelFilter + `, query)) / 1e6`, - Legend: `{{` + p.platformOpts.LabelFilter + `}} - {{query}}`, - }, - }, - }, - LegendOptions: &grafana.LegendOptions{ - DisplayMode: common.LegendDisplayModeList, - Placement: common.LegendPlacementRight, - }, - })) - } - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Number of logs inserted", - Span: 12, - Height: 6, - Decimals: 2, - Query: []grafana.Query{ - { - Expr: `avg by (evmChainID) (log_poller_logs_inserted{` + p.platformOpts.LabelQuery + `})`, - Legend: "{{evmChainID}}", - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Logs insertion rate", - Span: 12, - Height: 6, - Decimals: 2, - Query: []grafana.Query{ - { - Expr: `avg by (evmChainID) (rate(log_poller_logs_inserted{` + p.platformOpts.LabelQuery + `}[$__rate_interval]))`, - Legend: "{{evmChainID}}", - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Number of blocks inserted", - Span: 12, - Height: 6, - Decimals: 2, - Query: []grafana.Query{ - { - Expr: `avg by (evmChainID) (log_poller_blocks_inserted{` + p.platformOpts.LabelQuery + `})`, - Legend: "{{evmChainID}}", - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Blocks insertion rate", - Span: 12, - Height: 6, - Decimals: 2, - Query: []grafana.Query{ - { - Expr: `avg by (evmChainID) (rate(log_poller_blocks_inserted{` + p.platformOpts.LabelQuery + `}[$__rate_interval]))`, - Legend: "{{evmChainID}}", - }, - }, - }, - })) - - return panels -} - -func feedsJobs(p *Props) []*grafana.Panel { - var panels []*grafana.Panel - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Feeds Job Proposal Requests", - Span: 12, - Height: 6, - Decimals: 1, - Query: []grafana.Query{ - { - Expr: `sum(feeds_job_proposal_requests{` + p.platformOpts.LabelQuery + `}) by (` + p.platformOpts.LabelFilter + `)`, - Legend: `{{` + p.platformOpts.LegendString + `}}`, - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Feeds Job Proposal Count", - Description: "", - Span: 12, - Height: 6, - Decimals: 1, - Query: []grafana.Query{ - { - Expr: `sum(feeds_job_proposal_count{` + p.platformOpts.LabelQuery + `}) by (` + p.platformOpts.LabelFilter + `)`, - Legend: `{{` + p.platformOpts.LegendString + `}}`, - }, - }, - }, - })) - - return panels -} - -func mailbox(p *Props) []*grafana.Panel { - var panels []*grafana.Panel - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Mailbox Load Percent", - Span: 24, - Height: 6, - Decimals: 1, - Unit: "percent", - Query: []grafana.Query{ - { - Expr: `sum(mailbox_load_percent{` + p.platformOpts.LabelQuery + `}) by (capacity, name, ` + p.platformOpts.LabelFilter + `)`, - Legend: `{{` + p.platformOpts.LabelFilter + `}} - Capacity: {{capacity}} - {{name}}`, - }, - }, - }, - LegendOptions: &grafana.LegendOptions{ - DisplayMode: common.LegendDisplayModeList, - Placement: common.LegendPlacementRight, - }, - })) - - return panels -} - -func logsCounters(p *Props) []*grafana.Panel { - var panels []*grafana.Panel - - logStatuses := []string{"panic", "fatal", "critical", "warn", "error"} - for _, status := range logStatuses { - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Logs Counter - " + status, - Span: 8, - Height: 6, - Query: []grafana.Query{ - { - Expr: `sum(log_` + status + `_count{` + p.platformOpts.LabelQuery + `}) by (` + p.platformOpts.LabelFilter + `)`, - Legend: `{{` + p.platformOpts.LabelFilter + `}} - ` + status, - }, - }, - }, - })) - } - - return panels -} - -func logsRate(p *Props) []*grafana.Panel { - var panels []*grafana.Panel - - logStatuses := []string{"panic", "fatal", "critical", "warn", "error"} - for _, status := range logStatuses { - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Logs Rate - " + status, - Span: 8, - Height: 6, - Query: []grafana.Query{ - { - Expr: `sum(rate(log_` + status + `_count{` + p.platformOpts.LabelQuery + `}[$__rate_interval])) by (` + p.platformOpts.LabelFilter + `)`, - Legend: `{{` + p.platformOpts.LabelFilter + `}} - error`, - }, - }, - }, - })) - } - - return panels -} - -func evmPoolLifecycle(p *Props) []*grafana.Panel { - var panels []*grafana.Panel - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "EVM Pool Highest Seen Block", - Span: 12, - Height: 6, - Decimals: 1, - Unit: "Block", - Query: []grafana.Query{ - { - Expr: `evm_pool_rpc_node_highest_seen_block{` + p.platformOpts.LabelQuery + `}`, - Legend: `{{` + p.platformOpts.LegendString + `}}`, - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "EVM Pool Num Seen Blocks", - Span: 12, - Height: 6, - Decimals: 1, - Unit: "Block", - Query: []grafana.Query{ - { - Expr: `evm_pool_rpc_node_num_seen_blocks{` + p.platformOpts.LabelQuery + `}`, - Legend: `{{` + p.platformOpts.LegendString + `}}`, - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "EVM Pool Node Polls Total", - Span: 12, - Height: 6, - Decimals: 1, - Unit: "Block", - Query: []grafana.Query{ - { - Expr: `evm_pool_rpc_node_polls_total{` + p.platformOpts.LabelQuery + `}`, - Legend: `{{` + p.platformOpts.LegendString + `}}`, - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "EVM Pool Node Polls Failed", - Span: 12, - Height: 6, - Decimals: 1, - Unit: "Block", - Query: []grafana.Query{ - { - Expr: `evm_pool_rpc_node_polls_failed{` + p.platformOpts.LabelQuery + `}`, - Legend: `{{` + p.platformOpts.LegendString + `}}`, - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "EVM Pool Node Polls Success", - Span: 12, - Height: 6, - Decimals: 1, - Unit: "Block", - Query: []grafana.Query{ - { - Expr: `evm_pool_rpc_node_polls_success{` + p.platformOpts.LabelQuery + `}`, - Legend: `{{` + p.platformOpts.LegendString + `}}`, - }, - }, - }, - })) - - return panels -} - -func nodesRPC(p *Props) []*grafana.Panel { - var panels []*grafana.Panel - - nodeRPCStates := []string{"Alive", "Closed", "Dialed", "InvalidChainID", "OutOfSync", "Undialed", "Unreachable", "Unusable"} - for _, state := range nodeRPCStates { - panels = append(panels, grafana.NewStatPanel(&grafana.StatPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Node RPC " + state, - Span: 6, - Height: 6, - Decimals: 1, - Query: []grafana.Query{ - { - Expr: `sum(multi_node_states{` + p.platformOpts.LabelQuery + `state="` + state + `"}) by (` + p.platformOpts.LabelFilter + `, chainId)`, - Legend: `{{` + p.platformOpts.LabelFilter + `}} - {{chainId}}`, - }, - }, - }, - TextMode: common.BigValueTextModeValueAndName, - Orientation: common.VizOrientationHorizontal, - })) - } - - return panels -} - -func evmNodeRPC(p *Props) []*grafana.Panel { - var panels []*grafana.Panel - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "EVM Pool RPC Node Calls Success Rate", - Span: 24, - Height: 6, - Decimals: 2, - Unit: "percentunit", - Max: grafana.Pointer[float64](1), - Query: []grafana.Query{ - { - Expr: `sum(increase(evm_pool_rpc_node_calls_success{` + p.platformOpts.LabelQuery + `}[$__rate_interval])) by (` + p.platformOpts.LabelFilter + `, evmChainID, nodeName) / sum(increase(evm_pool_rpc_node_calls_total{` + p.platformOpts.LabelQuery + `}[$__rate_interval])) by (` + p.platformOpts.LabelFilter + `, evmChainID, nodeName)`, - Legend: `{{` + p.platformOpts.LabelFilter + `}} - {{nodeName}}`, - }, - }, - Threshold: &grafana.ThresholdOptions{ - Mode: dashboard.ThresholdsModeAbsolute, - Steps: []dashboard.Threshold{ - {Value: nil, Color: "default"}, - {Value: grafana.Pointer[float64](0), Color: "red"}, - {Value: grafana.Pointer[float64](0.8), Color: "orange"}, - {Value: grafana.Pointer[float64](0.99), Color: "green"}, - }, - }, - }, - LegendOptions: &grafana.LegendOptions{ - DisplayMode: common.LegendDisplayModeList, - Placement: common.LegendPlacementRight, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "EVM Pool RPC Node Dials Failure Rate", - Span: 24, - Height: 6, - Decimals: 2, - Unit: "percentunit", - Max: grafana.Pointer[float64](1), - Query: []grafana.Query{ - { - Expr: `sum(increase(evm_pool_rpc_node_dials_failed{` + p.platformOpts.LabelQuery + `}[$__rate_interval])) by (` + p.platformOpts.LabelFilter + `, evmChainID, nodeName) / sum(increase(evm_pool_rpc_node_calls_total{` + p.platformOpts.LabelQuery + `}[$__rate_interval])) by (` + p.platformOpts.LabelFilter + `, evmChainID, nodeName)`, - Legend: `{{` + p.platformOpts.LabelFilter + `}} - {{evmChainID}} - {{nodeName}}`, - }, - }, - Threshold: &grafana.ThresholdOptions{ - Mode: dashboard.ThresholdsModeAbsolute, - Steps: []dashboard.Threshold{ - {Value: nil, Color: "default"}, - {Value: grafana.Pointer[float64](0), Color: "red"}, - {Value: grafana.Pointer[float64](0.8), Color: "orange"}, - {Value: grafana.Pointer[float64](0.99), Color: "green"}, - }, - }, - }, - LegendOptions: &grafana.LegendOptions{ - DisplayMode: common.LegendDisplayModeList, - Placement: common.LegendPlacementRight, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "EVM Pool RPC Node Transitions", - Span: 12, - Height: 6, - Decimals: 1, - Query: []grafana.Query{ - { - Expr: `evm_pool_rpc_node_num_transitions_to_alive{` + p.platformOpts.LabelQuery + `}`, - Legend: "Alive", - }, - { - Expr: `evm_pool_rpc_node_num_transitions_to_in_sync{` + p.platformOpts.LabelQuery + `}`, - Legend: "InSync", - }, - { - Expr: `evm_pool_rpc_node_num_transitions_to_out_of_sync{` + p.platformOpts.LabelQuery + `}`, - Legend: "OutOfSync", - }, - { - Expr: `evm_pool_rpc_node_num_transitions_to_unreachable{` + p.platformOpts.LabelQuery + `}`, - Legend: "UnReachable", - }, - { - Expr: `evm_pool_rpc_node_num_transitions_to_invalid_chain_id{` + p.platformOpts.LabelQuery + `}`, - Legend: "InvalidChainID", - }, - { - Expr: `evm_pool_rpc_node_num_transitions_to_unusable{` + p.platformOpts.LabelQuery + `}`, - Legend: "TransitionToUnusable", - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "EVM Pool RPC Node States", - Span: 12, - Height: 6, - Decimals: 1, - Query: []grafana.Query{ - { - Expr: `evm_pool_rpc_node_states{` + p.platformOpts.LabelQuery + `}`, - Legend: `{{` + p.platformOpts.LegendString + `}} - {{evmChainID}} - {{state}}`, - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "EVM Pool RPC Node Verifies Success Rate", - Span: 12, - Height: 6, - Decimals: 1, - Unit: "percentunit", - Query: []grafana.Query{ - { - Expr: `sum(increase(evm_pool_rpc_node_verifies_success{` + p.platformOpts.LabelQuery + `}[$__rate_interval])) by (` + p.platformOpts.LegendString + `, evmChainID, nodeName) / sum(increase(evm_pool_rpc_node_verifies{` + p.platformOpts.LabelQuery + `}[$__rate_interval])) by (` + p.platformOpts.LegendString + `, evmChainID, nodeName) * 100`, - Legend: `{{` + p.platformOpts.LegendString + `}} - {{evmChainID}} - {{nodeName}}`, - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "EVM Pool RPC Node Verifies Failure Rate", - Span: 12, - Height: 6, - Decimals: 1, - Unit: "percentunit", - Query: []grafana.Query{ - { - Expr: `sum(increase(evm_pool_rpc_node_verifies_failed{` + p.platformOpts.LabelQuery + `}[$__rate_interval])) by (` + p.platformOpts.LegendString + `, evmChainID, nodeName) / sum(increase(evm_pool_rpc_node_verifies{` + p.platformOpts.LabelQuery + `}[$__rate_interval])) by (` + p.platformOpts.LegendString + `, evmChainID, nodeName) * 100`, - Legend: `{{` + p.platformOpts.LegendString + `}} - {{evmChainID}} - {{nodeName}}`, - }, - }, - }, - })) - - return panels -} - -func evmPoolRPCNodeLatencies(p *Props) []*grafana.Panel { - var panels []*grafana.Panel - - quantiles := []string{"0.90", "0.95", "0.99"} - for _, quantile := range quantiles { - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: `EVM Pool RPC Node Calls Latency ` + quantile + ` quantile`, - Span: 24, - Height: 6, - Decimals: 1, - Unit: "ms", - Query: []grafana.Query{ - { - Expr: `histogram_quantile(` + quantile + `, sum(rate(evm_pool_rpc_node_rpc_call_time_bucket{` + p.platformOpts.LabelQuery + `}[$__rate_interval])) by (` + p.platformOpts.LabelFilter + `, le, rpcCallName)) / 1e6`, - Legend: `{{` + p.platformOpts.LabelFilter + `}} - {{rpcCallName}}`, - }, - }, - }, - LegendOptions: &grafana.LegendOptions{ - DisplayMode: common.LegendDisplayModeList, - Placement: common.LegendPlacementRight, - }, - })) - } - - return panels -} - -func evmBlockHistoryEstimator(p *Props) []*grafana.Panel { - var panels []*grafana.Panel - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Gas Updater All Gas Price Percentiles", - Description: "Gas price at given percentile", - Span: 24, - Height: 6, - Unit: "gwei", - Query: []grafana.Query{ - { - Expr: `sum(gas_updater_all_gas_price_percentiles{` + p.platformOpts.LabelQuery + `}) by (` + p.platformOpts.LabelFilter + `, evmChainID, percentile)`, - Legend: `{{` + p.platformOpts.LabelFilter + `}} - {{evmChainID}} - {{percentile}}`, - }, - }, - }, - LegendOptions: &grafana.LegendOptions{ - DisplayMode: common.LegendDisplayModeList, - Placement: common.LegendPlacementRight, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Gas Updater All Tip Cap Percentiles", - Description: "Tip cap at given percentile", - Span: 24, - Height: 6, - Query: []grafana.Query{ - { - Expr: `sum(gas_updater_all_tip_cap_percentiles{` + p.platformOpts.LabelQuery + `}) by (` + p.platformOpts.LabelFilter + `, evmChainID, percentile)`, - Legend: `{{` + p.platformOpts.LabelFilter + `}} - {{evmChainID}} - {{percentile}}`, - }, - }, - }, - LegendOptions: &grafana.LegendOptions{ - DisplayMode: common.LegendDisplayModeList, - Placement: common.LegendPlacementRight, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Gas Updater Set Gas Price", - Span: 12, - Height: 6, - Query: []grafana.Query{ - { - Expr: `sum(gas_updater_set_gas_price{` + p.platformOpts.LabelQuery + `}) by (` + p.platformOpts.LabelFilter + `)`, - Legend: `{{` + p.platformOpts.LabelFilter + `}}`, - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Gas Updater Set Tip Cap", - Span: 12, - Height: 6, - Query: []grafana.Query{ - { - Expr: `sum(gas_updater_set_tip_cap{` + p.platformOpts.LabelQuery + `}) by (` + p.platformOpts.LabelFilter + `)`, - Legend: `{{` + p.platformOpts.LabelFilter + `}}`, - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Gas Updater Current Base Fee", - Span: 12, - Height: 6, - Query: []grafana.Query{ - { - Expr: `sum(gas_updater_current_base_fee{` + p.platformOpts.LabelQuery + `}) by (` + p.platformOpts.LabelFilter + `)`, - Legend: `{{` + p.platformOpts.LabelFilter + `}}`, - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Block History Estimator Connectivity Failure Count", - Span: 12, - Height: 6, - Query: []grafana.Query{ - { - Expr: `sum(block_history_estimator_connectivity_failure_count{` + p.platformOpts.LabelQuery + `}) by (` + p.platformOpts.LabelFilter + `)`, - Legend: `{{` + p.platformOpts.LabelFilter + `}}`, - }, - }, - }, - })) - - return panels -} - -func pipelines(p *Props) []*grafana.Panel { - var panels []*grafana.Panel - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Pipeline Task Execution Time", - Span: 24, - Height: 6, - Decimals: 1, - Unit: "s", - Query: []grafana.Query{ - { - Expr: `pipeline_task_execution_time{` + p.platformOpts.LabelQuery + `} / 1e6`, - Legend: `{{` + p.platformOpts.LegendString + `}} JobID: {{job_id}}`, - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Pipeline Run Errors", - Span: 24, - Height: 6, - Decimals: 1, - Query: []grafana.Query{ - { - Expr: `pipeline_run_errors{` + p.platformOpts.LabelQuery + `}`, - Legend: `{{` + p.platformOpts.LegendString + `}} JobID: {{job_id}}`, - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Pipeline Run Total Time to Completion", - Span: 24, - Height: 6, - Decimals: 1, - Unit: "s", - Query: []grafana.Query{ - { - Expr: `pipeline_run_total_time_to_completion{` + p.platformOpts.LabelQuery + `} / 1e6`, - Legend: `{{` + p.platformOpts.LegendString + `}} JobID: {{job_id}}`, - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Pipeline Tasks Total Finished", - Span: 24, - Height: 6, - Decimals: 1, - Query: []grafana.Query{ - { - Expr: `pipeline_tasks_total_finished{` + p.platformOpts.LabelQuery + `}`, - Legend: `{{` + p.platformOpts.LegendString + `}} JobID: {{job_id}}`, - }, - }, - }, - })) - - return panels -} - -func httpAPI(p *Props) []*grafana.Panel { - var panels []*grafana.Panel - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Request Duration p95", - Span: 24, - Height: 6, - Decimals: 1, - Unit: "s", - Query: []grafana.Query{ - { - Expr: `histogram_quantile(0.95, sum(rate(service_gonic_request_duration_bucket{` + p.platformOpts.LabelQuery + `}[$__rate_interval])) by (` + p.platformOpts.LegendString + `, le, path, method))`, - Legend: `{{` + p.platformOpts.LegendString + `}} - {{method}} - {{path}}`, - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Request Total Rate over interval", - Span: 24, - Height: 6, - Decimals: 1, - Query: []grafana.Query{ - { - Expr: `sum(rate(service_gonic_requests_total{` + p.platformOpts.LabelQuery + `}[$__rate_interval])) by (` + p.platformOpts.LegendString + `, path, method, code)`, - Legend: `{{` + p.platformOpts.LegendString + `}} - {{method}} - {{path}} - {{code}}`, - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Average Request Size", - Span: 12, - Height: 6, - Decimals: 1, - Unit: "bytes", - Query: []grafana.Query{ - { - Expr: `avg(rate(service_gonic_request_size_bytes_sum{` + p.platformOpts.LabelQuery + `}[$__rate_interval])) by (` + p.platformOpts.LegendString + `)/avg(rate(service_gonic_request_size_bytes_count{` + p.platformOpts.LabelQuery + `}[$__rate_interval])) by (` + p.platformOpts.LegendString + `)`, - Legend: `{{` + p.platformOpts.LegendString + `}}`, - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Response Size", - Span: 12, - Height: 6, - Decimals: 1, - Unit: "bytes", - Query: []grafana.Query{ - { - Expr: `avg(rate(service_gonic_response_size_bytes_sum{` + p.platformOpts.LabelQuery + `}[$__rate_interval])) by (` + p.platformOpts.LegendString + `)/avg(rate(service_gonic_response_size_bytes_count{` + p.platformOpts.LabelQuery + `}[$__rate_interval])) by (` + p.platformOpts.LegendString + `)`, - Legend: `{{` + p.platformOpts.LegendString + `}}`, - }, - }, - }, - })) - - return panels -} - -func promHTTP(p *Props) []*grafana.Panel { - var panels []*grafana.Panel - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "HTTP rate by return code", - Span: 24, - Height: 6, - Decimals: 1, - Query: []grafana.Query{ - { - Expr: `sum(rate(promhttp_metric_handler_requests_total{` + p.platformOpts.LabelQuery + `}[$__rate_interval])) by (` + p.platformOpts.LegendString + `, code)`, - Legend: `{{` + p.platformOpts.LegendString + `}} - {{code}}`, - }, - }, - }, - })) - - return panels -} - -func goMetrics(p *Props) []*grafana.Panel { - var panels []*grafana.Panel - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Threads", - Span: 24, - Height: 6, - Decimals: 1, - Query: []grafana.Query{ - { - Expr: `sum(go_threads{` + p.platformOpts.LabelQuery + `}) by (` + p.platformOpts.LegendString + `)`, - Legend: `{{` + p.platformOpts.LegendString + `}}`, - }, - }, - }, - })) - - panels = append(panels, grafana.NewStatPanel(&grafana.StatPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Heap Allocations Stats", - Span: 24, - Height: 6, - Decimals: 1, - Unit: "bytes", - Query: []grafana.Query{ - { - Expr: `sum(go_memstats_heap_alloc_bytes{` + p.platformOpts.LabelQuery + `}) by (` + p.platformOpts.LegendString + `)`, - Legend: "", - }, - }, - }, - ColorMode: common.BigValueColorModeNone, - Orientation: common.VizOrientationHorizontal, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Heap allocations Graph", - Span: 24, - Height: 6, - Decimals: 1, - Unit: "bytes", - Query: []grafana.Query{ - { - Expr: `sum(go_memstats_heap_alloc_bytes{` + p.platformOpts.LabelQuery + `}) by (` + p.platformOpts.LegendString + `)`, - Legend: `{{` + p.platformOpts.LegendString + `}}`, - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Heap Usage", - Span: 12, - Height: 6, - Decimals: 1, - Unit: "bytes", - Query: []grafana.Query{ - { - Expr: `go_memstats_heap_alloc_bytes{` + p.platformOpts.LabelQuery + `}`, - Legend: `{{` + p.platformOpts.LegendString + `}} - Alloc`, - }, - { - Expr: `go_memstats_heap_sys_bytes{` + p.platformOpts.LabelQuery + `}`, - Legend: `{{` + p.platformOpts.LegendString + `}} - Sys`, - }, - { - Expr: `go_memstats_heap_idle_bytes{` + p.platformOpts.LabelQuery + `}`, - Legend: `{{` + p.platformOpts.LegendString + `}} - Idle`, - }, - { - Expr: `go_memstats_heap_inuse_bytes{` + p.platformOpts.LabelQuery + `}`, - Legend: `{{` + p.platformOpts.LegendString + `}} - InUse`, - }, - { - Expr: `go_memstats_heap_released_bytes{` + p.platformOpts.LabelQuery + `}`, - Legend: `{{` + p.platformOpts.LegendString + `}} - Released`, - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Memory in Off-Heap", - Span: 12, - Height: 6, - Decimals: 1, - Unit: "bytes", - Query: []grafana.Query{ - { - Expr: `go_memstats_mspan_inuse_bytes{` + p.platformOpts.LabelQuery + `}`, - Legend: `{{` + p.platformOpts.LegendString + `}} - Total InUse`, - }, - { - Expr: `go_memstats_mspan_sys_bytes{` + p.platformOpts.LabelQuery + `}`, - Legend: `{{` + p.platformOpts.LegendString + `}} - Total Sys`, - }, - { - Expr: `go_memstats_mcache_inuse_bytes{` + p.platformOpts.LabelQuery + `}`, - Legend: `{{` + p.platformOpts.LegendString + `}} - Cache InUse`, - }, - { - Expr: `go_memstats_mcache_sys_bytes{` + p.platformOpts.LabelQuery + `}`, - Legend: `{{` + p.platformOpts.LegendString + `}} - Cache Sys`, - }, - { - Expr: `go_memstats_buck_hash_sys_bytes{` + p.platformOpts.LabelQuery + `}`, - Legend: `{{` + p.platformOpts.LegendString + `}} - Hash Sys`, - }, - { - Expr: `go_memstats_gc_sys_bytes{` + p.platformOpts.LabelQuery + `}`, - Legend: `{{` + p.platformOpts.LegendString + `}} - GC Sys`, - }, - { - Expr: `go_memstats_other_sys_bytes{` + p.platformOpts.LabelQuery + `}`, - Legend: `{{` + p.platformOpts.LegendString + `}} - bytes of memory are used for other runtime allocations`, - }, - { - Expr: `go_memstats_next_gc_bytes{` + p.platformOpts.LabelQuery + `}`, - Legend: `{{` + p.platformOpts.LegendString + `}} - Next GC`, - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Memory in Stack", - Span: 12, - Height: 6, - Decimals: 1, - Unit: "bytes", - Query: []grafana.Query{ - { - Expr: `go_memstats_stack_inuse_bytes{` + p.platformOpts.LabelQuery + `}`, - Legend: `{{` + p.platformOpts.LegendString + `}} - InUse`, - }, - { - Expr: `go_memstats_stack_sys_bytes{` + p.platformOpts.LabelQuery + `}`, - Legend: `{{` + p.platformOpts.LegendString + `}} - Sys`, - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Total Used Memory", - Span: 12, - Height: 6, - Decimals: 1, - Unit: "bytes", - Query: []grafana.Query{ - { - Expr: `go_memstats_sys_bytes{` + p.platformOpts.LabelQuery + `}`, - Legend: `{{` + p.platformOpts.LegendString + `}}`, - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Number of Live Objects", - Span: 12, - Height: 6, - Decimals: 1, - Query: []grafana.Query{ - { - Expr: `go_memstats_mallocs_total{` + p.platformOpts.LabelQuery + `} - go_memstats_frees_total{` + p.platformOpts.LabelQuery + `}`, - Legend: `{{` + p.platformOpts.LegendString + `}}`, - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Rate of Objects Allocated", - Span: 12, - Height: 6, - Decimals: 1, - Query: []grafana.Query{ - { - Expr: `rate(go_memstats_mallocs_total{` + p.platformOpts.LabelQuery + `}[1m])`, - Legend: `{{` + p.platformOpts.LegendString + `}}`, - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Rate of a Pointer Dereferences", - Span: 12, - Height: 6, - Decimals: 1, - Unit: "ops", - Query: []grafana.Query{ - { - Expr: `rate(go_memstats_lookups_total{` + p.platformOpts.LabelQuery + `}[1m])`, - Legend: `{{` + p.platformOpts.LegendString + `}}`, - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Goroutines", - Span: 12, - Height: 6, - Decimals: 1, - Unit: "ops", - Query: []grafana.Query{ - { - Expr: `go_goroutines{` + p.platformOpts.LabelQuery + `}`, - Legend: `{{` + p.platformOpts.LegendString + `}}`, - }, - }, - }, - })) - - return panels -} diff --git a/observability-lib/dashboards/core-node/component_test.go b/observability-lib/dashboards/core-node/component_test.go deleted file mode 100644 index 965acc493..000000000 --- a/observability-lib/dashboards/core-node/component_test.go +++ /dev/null @@ -1,82 +0,0 @@ -package corenode_test - -import ( - "flag" - "os" - "testing" - - corenode "github.com/smartcontractkit/chainlink-common/observability-lib/dashboards/core-node" - "github.com/smartcontractkit/chainlink-common/observability-lib/grafana" - - "github.com/stretchr/testify/require" -) - -var update = flag.Bool("update", false, "update golden test files") - -const fileOutput = "test-output.json" - -func TestGenerateFile(t *testing.T) { - if *update == false { - t.Skip("skipping test") - } - - testDashboard, err := corenode.NewDashboard(&corenode.Props{ - Name: "Core Node Dashboard", - Platform: grafana.TypePlatformDocker, - MetricsDataSource: grafana.NewDataSource("Prometheus", "1"), - LogsDataSource: grafana.NewDataSource("Loki", "2"), - Tested: true, - }) - if err != nil { - t.Errorf("Error creating dashboard: %v", err) - } - json, errJSON := testDashboard.GenerateJSON() - if errJSON != nil { - t.Errorf("Error generating JSON: %v", errJSON) - } - if _, errExists := os.Stat(fileOutput); errExists == nil { - errRemove := os.Remove(fileOutput) - if errRemove != nil { - t.Errorf("Error removing file: %v", errRemove) - } - } - file, errFile := os.Create(fileOutput) - if errFile != nil { - panic(errFile) - } - writeString, err := file.WriteString(string(json)) - if err != nil { - t.Errorf("Error writing to file: %v", writeString) - } - t.Cleanup(func() { - file.Close() - }) -} - -func TestNewDashboard(t *testing.T) { - t.Run("NewDashboard creates a dashboard", func(t *testing.T) { - testDashboard, err := corenode.NewDashboard(&corenode.Props{ - Name: "Core Node Dashboard", - Platform: grafana.TypePlatformDocker, - MetricsDataSource: grafana.NewDataSource("Prometheus", "1"), - LogsDataSource: grafana.NewDataSource("Loki", "2"), - Tested: true, - }) - if err != nil { - t.Errorf("Error creating dashboard: %v", err) - } - require.IsType(t, grafana.Observability{}, *testDashboard) - require.Equal(t, "Core Node Dashboard", *testDashboard.Dashboard.Title) - json, errJSON := testDashboard.GenerateJSON() - if errJSON != nil { - t.Errorf("Error generating JSON: %v", errJSON) - } - - jsonCompared, errCompared := os.ReadFile(fileOutput) - if errCompared != nil { - t.Errorf("Error reading file: %v", errCompared) - } - - require.JSONEq(t, string(jsonCompared), string(json)) - }) -} diff --git a/observability-lib/dashboards/core-node/platform.go b/observability-lib/dashboards/core-node/platform.go deleted file mode 100644 index fbd7b6c6c..000000000 --- a/observability-lib/dashboards/core-node/platform.go +++ /dev/null @@ -1,55 +0,0 @@ -package corenode - -import ( - "fmt" - - "github.com/smartcontractkit/chainlink-common/observability-lib/grafana" -) - -type platformOpts struct { - // Platform is infrastructure deployment platform: docker or k8s - Platform grafana.TypePlatform - LabelFilters map[string]string - LabelFilter string - LegendString string - LabelQuery string -} - -type Props struct { - Name string // Name is the name of the dashboard - Platform grafana.TypePlatform // Platform is infrastructure deployment platform: docker or k8s - MetricsDataSource *grafana.DataSource // MetricsDataSource is the datasource for querying metrics - LogsDataSource *grafana.DataSource // LogsDataSource is the datasource for querying logs - SlackChannel string // SlackChannel is the channel to send alerts to - SlackWebhookURL string // SlackWebhookURL is the URL to send alerts to - AlertsTags map[string]string // AlertsTags is the tags to map with notification policy - AlertsFilters string // AlertsFilters is the filters to apply to alerts - platformOpts platformOpts - Tested bool -} - -// PlatformPanelOpts generate different queries for "docker" and "k8s" deployment platforms -func platformPanelOpts(platform grafana.TypePlatform) platformOpts { - po := platformOpts{ - LabelFilters: map[string]string{}, - Platform: platform, - } - switch platform { - case grafana.TypePlatformKubernetes: - po.LabelFilters["namespace"] = `=~"${namespace}"` - po.LabelFilters["job"] = `=~"${job}"` - po.LabelFilters["pod"] = `=~"${pod}"` - po.LabelFilter = "job" - po.LegendString = "pod" - case grafana.TypePlatformDocker: - po.LabelFilters["instance"] = `=~"${instance}"` - po.LabelFilter = "instance" - po.LegendString = "instance" - default: - panic(fmt.Sprintf("failed to generate Platform dependent queries, unknown platform: %s", platform)) - } - for key, value := range po.LabelFilters { - po.LabelQuery += key + value + ", " - } - return po -} diff --git a/observability-lib/dashboards/core-node/test-output.json b/observability-lib/dashboards/core-node/test-output.json deleted file mode 100644 index 717436e9b..000000000 --- a/observability-lib/dashboards/core-node/test-output.json +++ /dev/null @@ -1,6359 +0,0 @@ -{ - "Dashboard": { - "title": "Core Node Dashboard", - "tags": [ - "Core", - "Node" - ], - "timezone": "browser", - "editable": true, - "graphTooltip": 0, - "time": { - "from": "now-30m", - "to": "now" - }, - "fiscalYearStartMonth": 0, - "refresh": "30s", - "schemaVersion": 39, - "panels": [ - { - "type": "row", - "collapsed": false, - "title": "Headlines", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 0, - "panels": null - }, - { - "type": "stat", - "id": 1, - "targets": [ - { - "expr": "version{}", - "instant": true, - "range": false, - "format": "", - "legendFormat": "Version: {{version}} https://github.com/smartcontractkit/chainlink/commit/{{commit}} https://github.com/smartcontractkit/chainlink/tree/release/{{version}}", - "refId": "" - } - ], - "title": "App Version", - "description": "app version with commit and branch links", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 4, - "w": 12, - "x": 0, - "y": 1 - }, - "options": { - "graphMode": "none", - "colorMode": "none", - "justifyMode": "auto", - "textMode": "name", - "wideLayout": true, - "showPercentChange": false, - "reduceOptions": { - "calcs": [ - "last" - ] - }, - "text": { - "titleSize": 10, - "valueSize": 18 - }, - "percentChangeColorMode": "standard", - "orientation": "horizontal" - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 1, - "noValue": "No data" - }, - "overrides": null - } - }, - { - "type": "stat", - "id": 2, - "targets": [ - { - "expr": "uptime_seconds{}", - "format": "", - "legendFormat": "{{instance}}", - "refId": "" - } - ], - "title": "Uptime", - "description": "instance uptime", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 4, - "w": 12, - "x": 12, - "y": 1 - }, - "options": { - "graphMode": "none", - "colorMode": "none", - "justifyMode": "auto", - "textMode": "value_and_name", - "wideLayout": true, - "showPercentChange": false, - "reduceOptions": { - "calcs": [ - "last" - ] - }, - "text": { - "titleSize": 10, - "valueSize": 18 - }, - "percentChangeColorMode": "standard", - "orientation": "horizontal" - }, - "fieldConfig": { - "defaults": { - "unit": "s", - "decimals": 2, - "noValue": "No data" - }, - "overrides": null - } - }, - { - "type": "stat", - "id": 3, - "targets": [ - { - "expr": "sum(eth_balance{}) by (instance, account)", - "instant": true, - "range": false, - "format": "", - "legendFormat": "{{instance}} - {{account}}", - "refId": "" - } - ], - "title": "ETH Balance Summary", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 4, - "w": 12, - "x": 0, - "y": 5 - }, - "options": { - "graphMode": "line", - "colorMode": "value", - "justifyMode": "auto", - "textMode": "value_and_name", - "wideLayout": true, - "showPercentChange": false, - "reduceOptions": { - "calcs": [ - "last" - ] - }, - "text": { - "titleSize": 10, - "valueSize": 18 - }, - "percentChangeColorMode": "standard", - "orientation": "horizontal" - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 2, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "default" - }, - { - "value": 0, - "color": "red" - }, - { - "value": 0.99, - "color": "green" - } - ] - }, - "noValue": "No data" - }, - "overrides": null - } - }, - { - "type": "stat", - "id": 4, - "targets": [ - { - "expr": "sum(solana_balance{}) by (instance, account)", - "instant": true, - "range": false, - "format": "", - "legendFormat": "{{instance}} - {{account}}", - "refId": "" - } - ], - "title": "Solana Balance Summary", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 4, - "w": 12, - "x": 12, - "y": 5 - }, - "options": { - "graphMode": "line", - "colorMode": "value", - "justifyMode": "auto", - "textMode": "value_and_name", - "wideLayout": true, - "showPercentChange": false, - "reduceOptions": { - "calcs": [ - "last" - ] - }, - "text": { - "titleSize": 10, - "valueSize": 18 - }, - "percentChangeColorMode": "standard", - "orientation": "horizontal" - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 2, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "default" - }, - { - "value": 0, - "color": "red" - }, - { - "value": 0.99, - "color": "green" - } - ] - }, - "noValue": "No data" - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 5, - "targets": [ - { - "expr": "100 * (avg(avg_over_time(health{}[15m])) by (instance, service_id, version, service, cluster, env))", - "format": "", - "legendFormat": "{{instance}} - {{service_id}}", - "refId": "" - } - ], - "title": "Health Avg by Service over 15m", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 24, - "x": 0, - "y": 9 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "right", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "percent", - "decimals": 1, - "min": 0, - "max": 100, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "green" - }, - { - "value": 50, - "color": "red" - }, - { - "value": 70, - "color": "orange" - }, - { - "value": 90, - "color": "green" - } - ] - }, - "noValue": "No data", - "custom": { - "thresholdsStyle": { - "mode": "dashed" - }, - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "stat", - "id": 6, - "targets": [ - { - "expr": "100 * avg(avg_over_time(health{}[15m])) by (instance, service_id, version, service, cluster, env) \u003c 90", - "format": "", - "legendFormat": "{{instance}} - {{service_id}}", - "refId": "" - } - ], - "title": "Health Avg by Service over 15m with health \u003c 90%", - "description": "Only displays services with health average \u003c 90%", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 24, - "x": 0, - "y": 15 - }, - "options": { - "graphMode": "line", - "colorMode": "value", - "justifyMode": "auto", - "textMode": "value_and_name", - "wideLayout": true, - "showPercentChange": false, - "reduceOptions": { - "calcs": [ - "last" - ] - }, - "text": { - "titleSize": 10, - "valueSize": 18 - }, - "percentChangeColorMode": "standard", - "orientation": "horizontal" - }, - "fieldConfig": { - "defaults": { - "unit": "percent", - "decimals": 1, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "green" - }, - { - "value": 1, - "color": "red" - }, - { - "value": 80, - "color": "orange" - }, - { - "value": 99, - "color": "green" - } - ] - }, - "noValue": "All services healthy" - }, - "overrides": null - } - }, - { - "type": "logs", - "id": 7, - "targets": [ - { - "expr": "{env=\"${env}\", cluster=\"${cluster}\", product=\"${product}\", network_type=\"${network_type}\", namespace=\"${namespace}\", pod=\"${pod}\"} | json | level=~\"(error|panic|fatal|crit)\"", - "format": "", - "legendFormat": "", - "refId": "" - } - ], - "title": "Logs with severity \u003e= error", - "description": "", - "transparent": false, - "datasource": { - "uid": "Loki" - }, - "gridPos": { - "h": 10, - "w": 24, - "x": 0, - "y": 21 - }, - "options": { - "showLabels": false, - "showCommonLabels": false, - "showTime": false, - "showLogContextToggle": false, - "wrapLogMessage": false, - "prettifyLogMessage": true, - "enableLogDetails": false, - "sortOrder": "", - "dedupStrategy": "" - }, - "fieldConfig": { - "defaults": { - "noValue": "No data" - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 8, - "targets": [ - { - "expr": "sum(eth_balance{}) by (instance, account)", - "format": "", - "legendFormat": "{{instance}} - {{account}}", - "refId": "" - } - ], - "title": "ETH Balance", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 0, - "y": 31 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 2, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 9, - "targets": [ - { - "expr": "sum(solana_balance{}) by (instance, account)", - "format": "", - "legendFormat": "{{instance}} - {{account}}", - "refId": "" - } - ], - "title": "SOL Balance", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 12, - "y": 31 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 2, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "stat", - "id": 10, - "targets": [ - { - "expr": "process_open_fds{}", - "format": "", - "legendFormat": "{{instance}}", - "refId": "" - } - ], - "title": "Open File Descriptors", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 4, - "w": 6, - "x": 0, - "y": 37 - }, - "options": { - "graphMode": "area", - "colorMode": "value", - "justifyMode": "auto", - "textMode": "value", - "wideLayout": true, - "showPercentChange": false, - "reduceOptions": { - "calcs": [ - "last" - ] - }, - "text": { - "titleSize": 10, - "valueSize": 18 - }, - "percentChangeColorMode": "standard", - "orientation": "auto" - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 1, - "noValue": "No data" - }, - "overrides": null - } - }, - { - "type": "stat", - "id": 11, - "targets": [ - { - "expr": "go_info{}", - "instant": true, - "range": false, - "format": "", - "legendFormat": "{{exported_version}}", - "refId": "" - } - ], - "title": "Go Version", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 6, - "y": 37 - }, - "options": { - "graphMode": "none", - "colorMode": "none", - "justifyMode": "auto", - "textMode": "name", - "wideLayout": true, - "showPercentChange": false, - "reduceOptions": { - "calcs": [ - "last" - ] - }, - "text": { - "titleSize": 10, - "valueSize": 18 - }, - "percentChangeColorMode": "standard", - "orientation": "auto" - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 1, - "noValue": "No data" - }, - "overrides": null - } - }, - { - "type": "row", - "collapsed": false, - "title": "AppDBConnections", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 41 - }, - "id": 0, - "panels": null - }, - { - "type": "timeseries", - "id": 12, - "targets": [ - { - "expr": "sum(db_conns_max{}) by (instance)", - "format": "", - "legendFormat": "{{instance}} - Max", - "refId": "" - }, - { - "expr": "sum(db_conns_open{}) by (instance)", - "format": "", - "legendFormat": "{{instance}} - Open", - "refId": "" - }, - { - "expr": "sum(db_conns_used{}) by (instance)", - "format": "", - "legendFormat": "{{instance}} - Used", - "refId": "" - }, - { - "expr": "sum(db_conns_wait{}) by (instance)", - "format": "", - "legendFormat": "{{instance}} - Wait", - "refId": "" - } - ], - "title": "DB Connections", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 24, - "x": 0, - "y": 42 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "right", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "Conn", - "decimals": 1, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 13, - "targets": [ - { - "expr": "sum(db_wait_count{}) by (instance)", - "format": "", - "legendFormat": "{{instance}}", - "refId": "" - } - ], - "title": "DB Wait Count", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 0, - "y": 48 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 0, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 14, - "targets": [ - { - "expr": "sum(db_wait_time_seconds{}) by (instance)", - "format": "", - "legendFormat": "{{instance}}", - "refId": "" - } - ], - "title": "DB Wait Time", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 12, - "y": 48 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "Sec", - "decimals": 0, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "row", - "collapsed": false, - "title": "SQLQueries", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 54 - }, - "id": 0, - "panels": null - }, - { - "type": "timeseries", - "id": 15, - "targets": [ - { - "expr": "histogram_quantile(0.9, sum(rate(sql_query_timeout_percent_bucket{}[$__rate_interval])) by (le))", - "format": "", - "legendFormat": "p90", - "refId": "" - }, - { - "expr": "histogram_quantile(0.95, sum(rate(sql_query_timeout_percent_bucket{}[$__rate_interval])) by (le))", - "format": "", - "legendFormat": "p95", - "refId": "" - }, - { - "expr": "histogram_quantile(0.99, sum(rate(sql_query_timeout_percent_bucket{}[$__rate_interval])) by (le))", - "format": "", - "legendFormat": "p99", - "refId": "" - } - ], - "title": "SQL Query Timeout Percent", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 24, - "x": 0, - "y": 55 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "right", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "percent", - "decimals": 1, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "row", - "collapsed": false, - "title": "HeadTracker", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 61 - }, - "id": 0, - "panels": null - }, - { - "type": "timeseries", - "id": 16, - "targets": [ - { - "expr": "sum(head_tracker_current_head{}) by (instance)", - "format": "", - "legendFormat": "{{instance}}", - "refId": "" - } - ], - "title": "Head Tracker Current Head", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 18, - "x": 0, - "y": 62 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "Block", - "decimals": 0, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "stat", - "id": 17, - "targets": [ - { - "expr": "head_tracker_current_head{}", - "instant": true, - "range": false, - "format": "", - "legendFormat": "{{instance}}", - "refId": "" - } - ], - "title": "Head Tracker Current Head Summary", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 18, - "y": 62 - }, - "options": { - "graphMode": "none", - "colorMode": "none", - "justifyMode": "auto", - "textMode": "value", - "wideLayout": true, - "showPercentChange": false, - "reduceOptions": { - "calcs": [ - "last" - ] - }, - "text": { - "titleSize": 10, - "valueSize": 18 - }, - "percentChangeColorMode": "standard", - "orientation": "auto" - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 0, - "noValue": "No data" - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 18, - "targets": [ - { - "expr": "rate(head_tracker_heads_received{}[1m])", - "format": "", - "legendFormat": "{{instance}}", - "refId": "" - } - ], - "title": "Head Tracker Heads Received Rate", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 24, - "x": 0, - "y": 68 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "Block", - "decimals": 0, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 19, - "targets": [ - { - "expr": "head_tracker_very_old_head{}", - "format": "", - "legendFormat": "{{instance}}", - "refId": "" - } - ], - "title": "Head Tracker Very Old Head", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 0, - "y": 74 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "Block", - "decimals": 0, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 20, - "targets": [ - { - "expr": "rate(head_tracker_connection_errors{}[1m])", - "format": "", - "legendFormat": "{{instance}}", - "refId": "" - } - ], - "title": "Head Tracker Connection Errors Rate", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 12, - "y": 74 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 1, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "row", - "collapsed": false, - "title": "HeadReporter", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 80 - }, - "id": 0, - "panels": null - }, - { - "type": "timeseries", - "id": 21, - "targets": [ - { - "expr": "sum(unconfirmed_transactions{}) by (instance)", - "format": "", - "legendFormat": "{{instance}}", - "refId": "" - } - ], - "title": "Unconfirmed Transactions", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 8, - "x": 0, - "y": 81 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "Tx", - "decimals": 0, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 22, - "targets": [ - { - "expr": "sum(max_unconfirmed_tx_age{}) by (instance)", - "format": "", - "legendFormat": "{{instance}}", - "refId": "" - } - ], - "title": "Unconfirmed TX Age", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 8, - "x": 8, - "y": 81 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "s", - "decimals": 0, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 23, - "targets": [ - { - "expr": "sum(max_unconfirmed_blocks{}) by (instance)", - "format": "", - "legendFormat": "{{instance}}", - "refId": "" - } - ], - "title": "Unconfirmed TX Blocks", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 8, - "x": 16, - "y": 81 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "Blocks", - "decimals": 0, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "row", - "collapsed": false, - "title": "TxManager", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 87 - }, - "id": 0, - "panels": null - }, - { - "type": "timeseries", - "id": 24, - "targets": [ - { - "expr": "sum(tx_manager_num_confirmed_transactions{}) by (blockchain, chainID, instance)", - "format": "", - "legendFormat": "{{instance}} - {{blockchain}} - {{chainID}}", - "refId": "" - } - ], - "title": "TX Manager Confirmed", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 0, - "y": 88 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 0, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 25, - "targets": [ - { - "expr": "sum(tx_manager_num_successful_transactions{}) by (blockchain, chainID, instance)", - "format": "", - "legendFormat": "{{instance}} - {{blockchain}} - {{chainID}}", - "refId": "" - } - ], - "title": "TX Manager Successful", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 6, - "y": 88 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 0, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 26, - "targets": [ - { - "expr": "sum(tx_manager_num_tx_reverted{}) by (blockchain, chainID, instance)", - "format": "", - "legendFormat": "{{instance}} - {{blockchain}} - {{chainID}}", - "refId": "" - } - ], - "title": "TX Manager Reverted", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 12, - "y": 88 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 0, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 27, - "targets": [ - { - "expr": "sum(tx_manager_num_gas_bumps{}) by (blockchain, chainID, instance)", - "format": "", - "legendFormat": "{{instance}} - {{blockchain}} - {{chainID}}", - "refId": "" - } - ], - "title": "TX Manager Gas Bumps", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 18, - "y": 88 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 0, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 28, - "targets": [ - { - "expr": "sum(tx_manager_fwd_tx_count{}) by (blockchain, chainID, instance)", - "format": "", - "legendFormat": "{{instance}} - {{blockchain}} - {{chainID}}", - "refId": "" - } - ], - "title": "TX Manager Forwarded", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 0, - "y": 94 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 0, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 29, - "targets": [ - { - "expr": "sum(tx_manager_tx_attempt_count{}) by (blockchain, chainID, instance)", - "format": "", - "legendFormat": "{{instance}} - {{blockchain}} - {{chainID}}", - "refId": "" - } - ], - "title": "TX Manager Attempts", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 6, - "y": 94 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 0, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 30, - "targets": [ - { - "expr": "sum(tx_manager_gas_bump_exceeds_limit{}) by (blockchain, chainID, instance)", - "format": "", - "legendFormat": "{{instance}} - {{blockchain}} - {{chainID}}", - "refId": "" - } - ], - "title": "TX Manager Gas Bump Exceeds Limit", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 12, - "y": 94 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 0, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 31, - "targets": [ - { - "expr": "histogram_quantile(0.9, sum(rate(tx_manager_time_until_tx_broadcast_bucket{}[$__rate_interval])) by (le, instance, blockchain, chainID)) / 1e6", - "format": "", - "legendFormat": "{{instance}} - {{blockchain}} - {{chainID}}", - "refId": "" - } - ], - "title": "TX Manager Time Until Broadcast", - "description": "The amount of time elapsed from when a transaction is enqueued to until it is broadcast", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 18, - "y": 94 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "ms", - "decimals": 1, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 32, - "targets": [ - { - "expr": "histogram_quantile(0.9, sum(rate(tx_manager_time_until_tx_confirmed_bucket{}[$__rate_interval])) by (le, instance, blockchain, chainID)) / 1e6", - "format": "", - "legendFormat": "{{instance}} - {{blockchain}} - {{chainID}}", - "refId": "" - } - ], - "title": "TX Manager Time Until Confirmed", - "description": "The amount of time elapsed from a transaction being broadcast to being included in a block", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 0, - "y": 100 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "ms", - "decimals": 1, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "row", - "collapsed": false, - "title": "LogPoller", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 106 - }, - "id": 0, - "panels": null - }, - { - "type": "stat", - "id": 33, - "targets": [ - { - "expr": "count(log_poller_query_duration_sum{}) by (evmChainID)", - "format": "", - "legendFormat": "chainId: {{evmChainID}}", - "refId": "" - } - ], - "title": "Goroutines per ChainId", - "description": "goroutines per chainId", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 0, - "y": 107 - }, - "options": { - "graphMode": "line", - "colorMode": "value", - "justifyMode": "auto", - "textMode": "value_and_name", - "wideLayout": true, - "showPercentChange": false, - "reduceOptions": { - "calcs": [ - "last" - ] - }, - "text": { - "titleSize": 10, - "valueSize": 18 - }, - "percentChangeColorMode": "standard", - "orientation": "horizontal" - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 1, - "noValue": "No data" - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 34, - "targets": [ - { - "expr": "avg by (query, instance) (sum by (query, job) (rate(log_poller_query_duration_count{}[$__rate_interval])))", - "format": "", - "legendFormat": "{{instance}} - {{query}}", - "refId": "" - }, - { - "expr": "avg (sum by(instance) (rate(log_poller_query_duration_count{}[$__rate_interval])))", - "format": "", - "legendFormat": "Total", - "refId": "" - } - ], - "title": "RPS", - "description": "requests per second", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 12, - "y": 107 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "right", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "reqps", - "decimals": 2, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 35, - "targets": [ - { - "expr": "avg by (instance, type) (sum by (type, instance) (rate(log_poller_query_duration_count{}[$__rate_interval])))", - "format": "", - "legendFormat": "{{instance}} - {{type}}", - "refId": "" - } - ], - "title": "RPS by Type", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 0, - "y": 113 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "reqps", - "decimals": 2, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 36, - "targets": [ - { - "expr": "avg by (instance, query) (log_poller_query_dataset_size{})", - "format": "", - "legendFormat": "{{instance}} - {{query}}", - "refId": "" - } - ], - "title": "Avg number of logs returned", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 12, - "y": 113 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 2, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 37, - "targets": [ - { - "expr": "max by (instance, query) (log_poller_query_dataset_size{})", - "format": "", - "legendFormat": "{{instance}} - {{query}}", - "refId": "" - } - ], - "title": "Max number of logs returned", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 0, - "y": 119 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 2, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 38, - "targets": [ - { - "expr": "max by (evmChainID) (log_poller_query_dataset_size{})", - "format": "", - "legendFormat": "{{evmChainID}}", - "refId": "" - } - ], - "title": "Logs returned by chain", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 12, - "y": 119 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 2, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 39, - "targets": [ - { - "expr": "histogram_quantile(0.5, sum(rate(log_poller_query_duration_bucket{}[$__rate_interval])) by (le, instance, query)) / 1e6", - "format": "", - "legendFormat": "{{instance}} - {{query}}", - "refId": "" - } - ], - "title": "Queries duration by query 0.5 quantile", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 24, - "x": 0, - "y": 125 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "right", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "ms", - "decimals": 2, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 40, - "targets": [ - { - "expr": "histogram_quantile(0.9, sum(rate(log_poller_query_duration_bucket{}[$__rate_interval])) by (le, instance, query)) / 1e6", - "format": "", - "legendFormat": "{{instance}} - {{query}}", - "refId": "" - } - ], - "title": "Queries duration by query 0.9 quantile", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 24, - "x": 0, - "y": 131 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "right", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "ms", - "decimals": 2, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 41, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(log_poller_query_duration_bucket{}[$__rate_interval])) by (le, instance, query)) / 1e6", - "format": "", - "legendFormat": "{{instance}} - {{query}}", - "refId": "" - } - ], - "title": "Queries duration by query 0.99 quantile", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 24, - "x": 0, - "y": 137 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "right", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "ms", - "decimals": 2, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 42, - "targets": [ - { - "expr": "avg by (evmChainID) (log_poller_logs_inserted{})", - "format": "", - "legendFormat": "{{evmChainID}}", - "refId": "" - } - ], - "title": "Number of logs inserted", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 0, - "y": 143 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 2, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 43, - "targets": [ - { - "expr": "avg by (evmChainID) (rate(log_poller_logs_inserted{}[$__rate_interval]))", - "format": "", - "legendFormat": "{{evmChainID}}", - "refId": "" - } - ], - "title": "Logs insertion rate", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 12, - "y": 143 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 2, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 44, - "targets": [ - { - "expr": "avg by (evmChainID) (log_poller_blocks_inserted{})", - "format": "", - "legendFormat": "{{evmChainID}}", - "refId": "" - } - ], - "title": "Number of blocks inserted", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 0, - "y": 149 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 2, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 45, - "targets": [ - { - "expr": "avg by (evmChainID) (rate(log_poller_blocks_inserted{}[$__rate_interval]))", - "format": "", - "legendFormat": "{{evmChainID}}", - "refId": "" - } - ], - "title": "Blocks insertion rate", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 12, - "y": 149 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 2, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "row", - "collapsed": false, - "title": "Feeds Jobs", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 155 - }, - "id": 0, - "panels": null - }, - { - "type": "timeseries", - "id": 46, - "targets": [ - { - "expr": "sum(feeds_job_proposal_requests{}) by (instance)", - "format": "", - "legendFormat": "{{instance}}", - "refId": "" - } - ], - "title": "Feeds Job Proposal Requests", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 0, - "y": 156 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 1, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 47, - "targets": [ - { - "expr": "sum(feeds_job_proposal_count{}) by (instance)", - "format": "", - "legendFormat": "{{instance}}", - "refId": "" - } - ], - "title": "Feeds Job Proposal Count", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 12, - "y": 156 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 1, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "row", - "collapsed": false, - "title": "Mailbox", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 162 - }, - "id": 0, - "panels": null - }, - { - "type": "timeseries", - "id": 48, - "targets": [ - { - "expr": "sum(mailbox_load_percent{}) by (capacity, name, instance)", - "format": "", - "legendFormat": "{{instance}} - Capacity: {{capacity}} - {{name}}", - "refId": "" - } - ], - "title": "Mailbox Load Percent", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 24, - "x": 0, - "y": 163 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "right", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "percent", - "decimals": 1, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "row", - "collapsed": false, - "title": "Logs Counters", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 169 - }, - "id": 0, - "panels": null - }, - { - "type": "timeseries", - "id": 49, - "targets": [ - { - "expr": "sum(log_panic_count{}) by (instance)", - "format": "", - "legendFormat": "{{instance}} - panic", - "refId": "" - } - ], - "title": "Logs Counter - panic", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 8, - "x": 0, - "y": 170 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 0, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 50, - "targets": [ - { - "expr": "sum(log_fatal_count{}) by (instance)", - "format": "", - "legendFormat": "{{instance}} - fatal", - "refId": "" - } - ], - "title": "Logs Counter - fatal", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 8, - "x": 8, - "y": 170 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 0, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 51, - "targets": [ - { - "expr": "sum(log_critical_count{}) by (instance)", - "format": "", - "legendFormat": "{{instance}} - critical", - "refId": "" - } - ], - "title": "Logs Counter - critical", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 8, - "x": 16, - "y": 170 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 0, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 52, - "targets": [ - { - "expr": "sum(log_warn_count{}) by (instance)", - "format": "", - "legendFormat": "{{instance}} - warn", - "refId": "" - } - ], - "title": "Logs Counter - warn", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 8, - "x": 0, - "y": 176 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 0, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 53, - "targets": [ - { - "expr": "sum(log_error_count{}) by (instance)", - "format": "", - "legendFormat": "{{instance}} - error", - "refId": "" - } - ], - "title": "Logs Counter - error", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 8, - "x": 8, - "y": 176 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 0, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "row", - "collapsed": false, - "title": "Logs Rate", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 182 - }, - "id": 0, - "panels": null - }, - { - "type": "timeseries", - "id": 54, - "targets": [ - { - "expr": "sum(rate(log_panic_count{}[$__rate_interval])) by (instance)", - "format": "", - "legendFormat": "{{instance}} - error", - "refId": "" - } - ], - "title": "Logs Rate - panic", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 8, - "x": 0, - "y": 183 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 0, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 55, - "targets": [ - { - "expr": "sum(rate(log_fatal_count{}[$__rate_interval])) by (instance)", - "format": "", - "legendFormat": "{{instance}} - error", - "refId": "" - } - ], - "title": "Logs Rate - fatal", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 8, - "x": 8, - "y": 183 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 0, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 56, - "targets": [ - { - "expr": "sum(rate(log_critical_count{}[$__rate_interval])) by (instance)", - "format": "", - "legendFormat": "{{instance}} - error", - "refId": "" - } - ], - "title": "Logs Rate - critical", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 8, - "x": 16, - "y": 183 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 0, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 57, - "targets": [ - { - "expr": "sum(rate(log_warn_count{}[$__rate_interval])) by (instance)", - "format": "", - "legendFormat": "{{instance}} - error", - "refId": "" - } - ], - "title": "Logs Rate - warn", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 8, - "x": 0, - "y": 189 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 0, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 58, - "targets": [ - { - "expr": "sum(rate(log_error_count{}[$__rate_interval])) by (instance)", - "format": "", - "legendFormat": "{{instance}} - error", - "refId": "" - } - ], - "title": "Logs Rate - error", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 8, - "x": 8, - "y": 189 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 0, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "row", - "collapsed": false, - "title": "EvmPoolLifecycle", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 195 - }, - "id": 0, - "panels": null - }, - { - "type": "timeseries", - "id": 59, - "targets": [ - { - "expr": "evm_pool_rpc_node_highest_seen_block{}", - "format": "", - "legendFormat": "{{instance}}", - "refId": "" - } - ], - "title": "EVM Pool Highest Seen Block", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 0, - "y": 196 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "Block", - "decimals": 1, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 60, - "targets": [ - { - "expr": "evm_pool_rpc_node_num_seen_blocks{}", - "format": "", - "legendFormat": "{{instance}}", - "refId": "" - } - ], - "title": "EVM Pool Num Seen Blocks", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 12, - "y": 196 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "Block", - "decimals": 1, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 61, - "targets": [ - { - "expr": "evm_pool_rpc_node_polls_total{}", - "format": "", - "legendFormat": "{{instance}}", - "refId": "" - } - ], - "title": "EVM Pool Node Polls Total", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 0, - "y": 202 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "Block", - "decimals": 1, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 62, - "targets": [ - { - "expr": "evm_pool_rpc_node_polls_failed{}", - "format": "", - "legendFormat": "{{instance}}", - "refId": "" - } - ], - "title": "EVM Pool Node Polls Failed", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 12, - "y": 202 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "Block", - "decimals": 1, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 63, - "targets": [ - { - "expr": "evm_pool_rpc_node_polls_success{}", - "format": "", - "legendFormat": "{{instance}}", - "refId": "" - } - ], - "title": "EVM Pool Node Polls Success", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 0, - "y": 208 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "Block", - "decimals": 1, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "row", - "collapsed": false, - "title": "Node RPC State", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 214 - }, - "id": 0, - "panels": null - }, - { - "type": "stat", - "id": 64, - "targets": [ - { - "expr": "sum(multi_node_states{state=\"Alive\"}) by (instance, chainId)", - "format": "", - "legendFormat": "{{instance}} - {{chainId}}", - "refId": "" - } - ], - "title": "Node RPC Alive", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 0, - "y": 215 - }, - "options": { - "graphMode": "none", - "colorMode": "value", - "justifyMode": "auto", - "textMode": "value_and_name", - "wideLayout": true, - "showPercentChange": false, - "reduceOptions": { - "calcs": [ - "last" - ] - }, - "text": { - "titleSize": 10, - "valueSize": 18 - }, - "percentChangeColorMode": "standard", - "orientation": "horizontal" - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 1, - "noValue": "No data" - }, - "overrides": null - } - }, - { - "type": "stat", - "id": 65, - "targets": [ - { - "expr": "sum(multi_node_states{state=\"Closed\"}) by (instance, chainId)", - "format": "", - "legendFormat": "{{instance}} - {{chainId}}", - "refId": "" - } - ], - "title": "Node RPC Closed", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 6, - "y": 215 - }, - "options": { - "graphMode": "none", - "colorMode": "value", - "justifyMode": "auto", - "textMode": "value_and_name", - "wideLayout": true, - "showPercentChange": false, - "reduceOptions": { - "calcs": [ - "last" - ] - }, - "text": { - "titleSize": 10, - "valueSize": 18 - }, - "percentChangeColorMode": "standard", - "orientation": "horizontal" - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 1, - "noValue": "No data" - }, - "overrides": null - } - }, - { - "type": "stat", - "id": 66, - "targets": [ - { - "expr": "sum(multi_node_states{state=\"Dialed\"}) by (instance, chainId)", - "format": "", - "legendFormat": "{{instance}} - {{chainId}}", - "refId": "" - } - ], - "title": "Node RPC Dialed", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 12, - "y": 215 - }, - "options": { - "graphMode": "none", - "colorMode": "value", - "justifyMode": "auto", - "textMode": "value_and_name", - "wideLayout": true, - "showPercentChange": false, - "reduceOptions": { - "calcs": [ - "last" - ] - }, - "text": { - "titleSize": 10, - "valueSize": 18 - }, - "percentChangeColorMode": "standard", - "orientation": "horizontal" - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 1, - "noValue": "No data" - }, - "overrides": null - } - }, - { - "type": "stat", - "id": 67, - "targets": [ - { - "expr": "sum(multi_node_states{state=\"InvalidChainID\"}) by (instance, chainId)", - "format": "", - "legendFormat": "{{instance}} - {{chainId}}", - "refId": "" - } - ], - "title": "Node RPC InvalidChainID", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 18, - "y": 215 - }, - "options": { - "graphMode": "none", - "colorMode": "value", - "justifyMode": "auto", - "textMode": "value_and_name", - "wideLayout": true, - "showPercentChange": false, - "reduceOptions": { - "calcs": [ - "last" - ] - }, - "text": { - "titleSize": 10, - "valueSize": 18 - }, - "percentChangeColorMode": "standard", - "orientation": "horizontal" - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 1, - "noValue": "No data" - }, - "overrides": null - } - }, - { - "type": "stat", - "id": 68, - "targets": [ - { - "expr": "sum(multi_node_states{state=\"OutOfSync\"}) by (instance, chainId)", - "format": "", - "legendFormat": "{{instance}} - {{chainId}}", - "refId": "" - } - ], - "title": "Node RPC OutOfSync", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 0, - "y": 221 - }, - "options": { - "graphMode": "none", - "colorMode": "value", - "justifyMode": "auto", - "textMode": "value_and_name", - "wideLayout": true, - "showPercentChange": false, - "reduceOptions": { - "calcs": [ - "last" - ] - }, - "text": { - "titleSize": 10, - "valueSize": 18 - }, - "percentChangeColorMode": "standard", - "orientation": "horizontal" - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 1, - "noValue": "No data" - }, - "overrides": null - } - }, - { - "type": "stat", - "id": 69, - "targets": [ - { - "expr": "sum(multi_node_states{state=\"Undialed\"}) by (instance, chainId)", - "format": "", - "legendFormat": "{{instance}} - {{chainId}}", - "refId": "" - } - ], - "title": "Node RPC Undialed", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 6, - "y": 221 - }, - "options": { - "graphMode": "none", - "colorMode": "value", - "justifyMode": "auto", - "textMode": "value_and_name", - "wideLayout": true, - "showPercentChange": false, - "reduceOptions": { - "calcs": [ - "last" - ] - }, - "text": { - "titleSize": 10, - "valueSize": 18 - }, - "percentChangeColorMode": "standard", - "orientation": "horizontal" - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 1, - "noValue": "No data" - }, - "overrides": null - } - }, - { - "type": "stat", - "id": 70, - "targets": [ - { - "expr": "sum(multi_node_states{state=\"Unreachable\"}) by (instance, chainId)", - "format": "", - "legendFormat": "{{instance}} - {{chainId}}", - "refId": "" - } - ], - "title": "Node RPC Unreachable", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 12, - "y": 221 - }, - "options": { - "graphMode": "none", - "colorMode": "value", - "justifyMode": "auto", - "textMode": "value_and_name", - "wideLayout": true, - "showPercentChange": false, - "reduceOptions": { - "calcs": [ - "last" - ] - }, - "text": { - "titleSize": 10, - "valueSize": 18 - }, - "percentChangeColorMode": "standard", - "orientation": "horizontal" - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 1, - "noValue": "No data" - }, - "overrides": null - } - }, - { - "type": "stat", - "id": 71, - "targets": [ - { - "expr": "sum(multi_node_states{state=\"Unusable\"}) by (instance, chainId)", - "format": "", - "legendFormat": "{{instance}} - {{chainId}}", - "refId": "" - } - ], - "title": "Node RPC Unusable", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 6, - "x": 18, - "y": 221 - }, - "options": { - "graphMode": "none", - "colorMode": "value", - "justifyMode": "auto", - "textMode": "value_and_name", - "wideLayout": true, - "showPercentChange": false, - "reduceOptions": { - "calcs": [ - "last" - ] - }, - "text": { - "titleSize": 10, - "valueSize": 18 - }, - "percentChangeColorMode": "standard", - "orientation": "horizontal" - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 1, - "noValue": "No data" - }, - "overrides": null - } - }, - { - "type": "row", - "collapsed": false, - "title": "EVM Pool RPC Node Metrics (App)", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 227 - }, - "id": 0, - "panels": null - }, - { - "type": "timeseries", - "id": 72, - "targets": [ - { - "expr": "sum(increase(evm_pool_rpc_node_calls_success{}[$__rate_interval])) by (instance, evmChainID, nodeName) / sum(increase(evm_pool_rpc_node_calls_total{}[$__rate_interval])) by (instance, evmChainID, nodeName)", - "format": "", - "legendFormat": "{{instance}} - {{nodeName}}", - "refId": "" - } - ], - "title": "EVM Pool RPC Node Calls Success Rate", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 24, - "x": 0, - "y": 228 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "right", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "percentunit", - "decimals": 2, - "max": 1, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "default" - }, - { - "value": 0, - "color": "red" - }, - { - "value": 0.8, - "color": "orange" - }, - { - "value": 0.99, - "color": "green" - } - ] - }, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 73, - "targets": [ - { - "expr": "sum(increase(evm_pool_rpc_node_dials_failed{}[$__rate_interval])) by (instance, evmChainID, nodeName) / sum(increase(evm_pool_rpc_node_calls_total{}[$__rate_interval])) by (instance, evmChainID, nodeName)", - "format": "", - "legendFormat": "{{instance}} - {{evmChainID}} - {{nodeName}}", - "refId": "" - } - ], - "title": "EVM Pool RPC Node Dials Failure Rate", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 24, - "x": 0, - "y": 234 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "right", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "percentunit", - "decimals": 2, - "max": 1, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "default" - }, - { - "value": 0, - "color": "red" - }, - { - "value": 0.8, - "color": "orange" - }, - { - "value": 0.99, - "color": "green" - } - ] - }, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 74, - "targets": [ - { - "expr": "evm_pool_rpc_node_num_transitions_to_alive{}", - "format": "", - "legendFormat": "Alive", - "refId": "" - }, - { - "expr": "evm_pool_rpc_node_num_transitions_to_in_sync{}", - "format": "", - "legendFormat": "InSync", - "refId": "" - }, - { - "expr": "evm_pool_rpc_node_num_transitions_to_out_of_sync{}", - "format": "", - "legendFormat": "OutOfSync", - "refId": "" - }, - { - "expr": "evm_pool_rpc_node_num_transitions_to_unreachable{}", - "format": "", - "legendFormat": "UnReachable", - "refId": "" - }, - { - "expr": "evm_pool_rpc_node_num_transitions_to_invalid_chain_id{}", - "format": "", - "legendFormat": "InvalidChainID", - "refId": "" - }, - { - "expr": "evm_pool_rpc_node_num_transitions_to_unusable{}", - "format": "", - "legendFormat": "TransitionToUnusable", - "refId": "" - } - ], - "title": "EVM Pool RPC Node Transitions", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 0, - "y": 240 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 1, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 75, - "targets": [ - { - "expr": "evm_pool_rpc_node_states{}", - "format": "", - "legendFormat": "{{instance}} - {{evmChainID}} - {{state}}", - "refId": "" - } - ], - "title": "EVM Pool RPC Node States", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 12, - "y": 240 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 1, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 76, - "targets": [ - { - "expr": "sum(increase(evm_pool_rpc_node_verifies_success{}[$__rate_interval])) by (instance, evmChainID, nodeName) / sum(increase(evm_pool_rpc_node_verifies{}[$__rate_interval])) by (instance, evmChainID, nodeName) * 100", - "format": "", - "legendFormat": "{{instance}} - {{evmChainID}} - {{nodeName}}", - "refId": "" - } - ], - "title": "EVM Pool RPC Node Verifies Success Rate", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 0, - "y": 246 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "percentunit", - "decimals": 1, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 77, - "targets": [ - { - "expr": "sum(increase(evm_pool_rpc_node_verifies_failed{}[$__rate_interval])) by (instance, evmChainID, nodeName) / sum(increase(evm_pool_rpc_node_verifies{}[$__rate_interval])) by (instance, evmChainID, nodeName) * 100", - "format": "", - "legendFormat": "{{instance}} - {{evmChainID}} - {{nodeName}}", - "refId": "" - } - ], - "title": "EVM Pool RPC Node Verifies Failure Rate", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 12, - "y": 246 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "percentunit", - "decimals": 1, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "row", - "collapsed": false, - "title": "EVM Pool RPC Node Latencies (App)", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 252 - }, - "id": 0, - "panels": null - }, - { - "type": "timeseries", - "id": 78, - "targets": [ - { - "expr": "histogram_quantile(0.90, sum(rate(evm_pool_rpc_node_rpc_call_time_bucket{}[$__rate_interval])) by (instance, le, rpcCallName)) / 1e6", - "format": "", - "legendFormat": "{{instance}} - {{rpcCallName}}", - "refId": "" - } - ], - "title": "EVM Pool RPC Node Calls Latency 0.90 quantile", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 24, - "x": 0, - "y": 253 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "right", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "ms", - "decimals": 1, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 79, - "targets": [ - { - "expr": "histogram_quantile(0.95, sum(rate(evm_pool_rpc_node_rpc_call_time_bucket{}[$__rate_interval])) by (instance, le, rpcCallName)) / 1e6", - "format": "", - "legendFormat": "{{instance}} - {{rpcCallName}}", - "refId": "" - } - ], - "title": "EVM Pool RPC Node Calls Latency 0.95 quantile", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 24, - "x": 0, - "y": 259 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "right", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "ms", - "decimals": 1, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 80, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(evm_pool_rpc_node_rpc_call_time_bucket{}[$__rate_interval])) by (instance, le, rpcCallName)) / 1e6", - "format": "", - "legendFormat": "{{instance}} - {{rpcCallName}}", - "refId": "" - } - ], - "title": "EVM Pool RPC Node Calls Latency 0.99 quantile", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 24, - "x": 0, - "y": 265 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "right", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "ms", - "decimals": 1, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "row", - "collapsed": false, - "title": "Block History Estimator", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 271 - }, - "id": 0, - "panels": null - }, - { - "type": "timeseries", - "id": 81, - "targets": [ - { - "expr": "sum(gas_updater_all_gas_price_percentiles{}) by (instance, evmChainID, percentile)", - "format": "", - "legendFormat": "{{instance}} - {{evmChainID}} - {{percentile}}", - "refId": "" - } - ], - "title": "Gas Updater All Gas Price Percentiles", - "description": "Gas price at given percentile", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 24, - "x": 0, - "y": 272 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "right", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "gwei", - "decimals": 0, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 82, - "targets": [ - { - "expr": "sum(gas_updater_all_tip_cap_percentiles{}) by (instance, evmChainID, percentile)", - "format": "", - "legendFormat": "{{instance}} - {{evmChainID}} - {{percentile}}", - "refId": "" - } - ], - "title": "Gas Updater All Tip Cap Percentiles", - "description": "Tip cap at given percentile", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 24, - "x": 0, - "y": 278 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "right", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 0, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 83, - "targets": [ - { - "expr": "sum(gas_updater_set_gas_price{}) by (instance)", - "format": "", - "legendFormat": "{{instance}}", - "refId": "" - } - ], - "title": "Gas Updater Set Gas Price", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 0, - "y": 284 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 0, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 84, - "targets": [ - { - "expr": "sum(gas_updater_set_tip_cap{}) by (instance)", - "format": "", - "legendFormat": "{{instance}}", - "refId": "" - } - ], - "title": "Gas Updater Set Tip Cap", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 12, - "y": 284 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 0, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 85, - "targets": [ - { - "expr": "sum(gas_updater_current_base_fee{}) by (instance)", - "format": "", - "legendFormat": "{{instance}}", - "refId": "" - } - ], - "title": "Gas Updater Current Base Fee", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 0, - "y": 290 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 0, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 86, - "targets": [ - { - "expr": "sum(block_history_estimator_connectivity_failure_count{}) by (instance)", - "format": "", - "legendFormat": "{{instance}}", - "refId": "" - } - ], - "title": "Block History Estimator Connectivity Failure Count", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 12, - "y": 290 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 0, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "row", - "collapsed": false, - "title": "Pipeline Metrics (Runner)", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 296 - }, - "id": 0, - "panels": null - }, - { - "type": "timeseries", - "id": 87, - "targets": [ - { - "expr": "pipeline_task_execution_time{} / 1e6", - "format": "", - "legendFormat": "{{instance}} JobID: {{job_id}}", - "refId": "" - } - ], - "title": "Pipeline Task Execution Time", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 24, - "x": 0, - "y": 297 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "s", - "decimals": 1, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 88, - "targets": [ - { - "expr": "pipeline_run_errors{}", - "format": "", - "legendFormat": "{{instance}} JobID: {{job_id}}", - "refId": "" - } - ], - "title": "Pipeline Run Errors", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 24, - "x": 0, - "y": 303 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 1, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 89, - "targets": [ - { - "expr": "pipeline_run_total_time_to_completion{} / 1e6", - "format": "", - "legendFormat": "{{instance}} JobID: {{job_id}}", - "refId": "" - } - ], - "title": "Pipeline Run Total Time to Completion", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 24, - "x": 0, - "y": 309 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "s", - "decimals": 1, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 90, - "targets": [ - { - "expr": "pipeline_tasks_total_finished{}", - "format": "", - "legendFormat": "{{instance}} JobID: {{job_id}}", - "refId": "" - } - ], - "title": "Pipeline Tasks Total Finished", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 24, - "x": 0, - "y": 315 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 1, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "row", - "collapsed": false, - "title": "HTTP API", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 321 - }, - "id": 0, - "panels": null - }, - { - "type": "timeseries", - "id": 91, - "targets": [ - { - "expr": "histogram_quantile(0.95, sum(rate(service_gonic_request_duration_bucket{}[$__rate_interval])) by (instance, le, path, method))", - "format": "", - "legendFormat": "{{instance}} - {{method}} - {{path}}", - "refId": "" - } - ], - "title": "Request Duration p95", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 24, - "x": 0, - "y": 322 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "s", - "decimals": 1, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 92, - "targets": [ - { - "expr": "sum(rate(service_gonic_requests_total{}[$__rate_interval])) by (instance, path, method, code)", - "format": "", - "legendFormat": "{{instance}} - {{method}} - {{path}} - {{code}}", - "refId": "" - } - ], - "title": "Request Total Rate over interval", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 24, - "x": 0, - "y": 328 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 1, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 93, - "targets": [ - { - "expr": "avg(rate(service_gonic_request_size_bytes_sum{}[$__rate_interval])) by (instance)/avg(rate(service_gonic_request_size_bytes_count{}[$__rate_interval])) by (instance)", - "format": "", - "legendFormat": "{{instance}}", - "refId": "" - } - ], - "title": "Average Request Size", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 0, - "y": 334 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "bytes", - "decimals": 1, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 94, - "targets": [ - { - "expr": "avg(rate(service_gonic_response_size_bytes_sum{}[$__rate_interval])) by (instance)/avg(rate(service_gonic_response_size_bytes_count{}[$__rate_interval])) by (instance)", - "format": "", - "legendFormat": "{{instance}}", - "refId": "" - } - ], - "title": "Response Size", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 12, - "y": 334 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "bytes", - "decimals": 1, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "row", - "collapsed": false, - "title": "PromHTTP", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 340 - }, - "id": 0, - "panels": null - }, - { - "type": "timeseries", - "id": 95, - "targets": [ - { - "expr": "sum(rate(promhttp_metric_handler_requests_total{}[$__rate_interval])) by (instance, code)", - "format": "", - "legendFormat": "{{instance}} - {{code}}", - "refId": "" - } - ], - "title": "HTTP rate by return code", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 24, - "x": 0, - "y": 341 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 1, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "row", - "collapsed": false, - "title": "Go Metrics", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 347 - }, - "id": 0, - "panels": null - }, - { - "type": "timeseries", - "id": 96, - "targets": [ - { - "expr": "sum(go_threads{}) by (instance)", - "format": "", - "legendFormat": "{{instance}}", - "refId": "" - } - ], - "title": "Threads", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 24, - "x": 0, - "y": 348 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 1, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "stat", - "id": 97, - "targets": [ - { - "expr": "sum(go_memstats_heap_alloc_bytes{}) by (instance)", - "format": "", - "legendFormat": "", - "refId": "" - } - ], - "title": "Heap Allocations Stats", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 24, - "x": 0, - "y": 354 - }, - "options": { - "graphMode": "none", - "colorMode": "none", - "justifyMode": "auto", - "textMode": "value", - "wideLayout": true, - "showPercentChange": false, - "reduceOptions": { - "calcs": [ - "last" - ] - }, - "text": { - "titleSize": 10, - "valueSize": 18 - }, - "percentChangeColorMode": "standard", - "orientation": "horizontal" - }, - "fieldConfig": { - "defaults": { - "unit": "bytes", - "decimals": 1, - "noValue": "No data" - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 98, - "targets": [ - { - "expr": "sum(go_memstats_heap_alloc_bytes{}) by (instance)", - "format": "", - "legendFormat": "{{instance}}", - "refId": "" - } - ], - "title": "Heap allocations Graph", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 24, - "x": 0, - "y": 360 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "bytes", - "decimals": 1, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 99, - "targets": [ - { - "expr": "go_memstats_heap_alloc_bytes{}", - "format": "", - "legendFormat": "{{instance}} - Alloc", - "refId": "" - }, - { - "expr": "go_memstats_heap_sys_bytes{}", - "format": "", - "legendFormat": "{{instance}} - Sys", - "refId": "" - }, - { - "expr": "go_memstats_heap_idle_bytes{}", - "format": "", - "legendFormat": "{{instance}} - Idle", - "refId": "" - }, - { - "expr": "go_memstats_heap_inuse_bytes{}", - "format": "", - "legendFormat": "{{instance}} - InUse", - "refId": "" - }, - { - "expr": "go_memstats_heap_released_bytes{}", - "format": "", - "legendFormat": "{{instance}} - Released", - "refId": "" - } - ], - "title": "Heap Usage", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 0, - "y": 366 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "bytes", - "decimals": 1, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 100, - "targets": [ - { - "expr": "go_memstats_mspan_inuse_bytes{}", - "format": "", - "legendFormat": "{{instance}} - Total InUse", - "refId": "" - }, - { - "expr": "go_memstats_mspan_sys_bytes{}", - "format": "", - "legendFormat": "{{instance}} - Total Sys", - "refId": "" - }, - { - "expr": "go_memstats_mcache_inuse_bytes{}", - "format": "", - "legendFormat": "{{instance}} - Cache InUse", - "refId": "" - }, - { - "expr": "go_memstats_mcache_sys_bytes{}", - "format": "", - "legendFormat": "{{instance}} - Cache Sys", - "refId": "" - }, - { - "expr": "go_memstats_buck_hash_sys_bytes{}", - "format": "", - "legendFormat": "{{instance}} - Hash Sys", - "refId": "" - }, - { - "expr": "go_memstats_gc_sys_bytes{}", - "format": "", - "legendFormat": "{{instance}} - GC Sys", - "refId": "" - }, - { - "expr": "go_memstats_other_sys_bytes{}", - "format": "", - "legendFormat": "{{instance}} - bytes of memory are used for other runtime allocations", - "refId": "" - }, - { - "expr": "go_memstats_next_gc_bytes{}", - "format": "", - "legendFormat": "{{instance}} - Next GC", - "refId": "" - } - ], - "title": "Memory in Off-Heap", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 12, - "y": 366 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "bytes", - "decimals": 1, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 101, - "targets": [ - { - "expr": "go_memstats_stack_inuse_bytes{}", - "format": "", - "legendFormat": "{{instance}} - InUse", - "refId": "" - }, - { - "expr": "go_memstats_stack_sys_bytes{}", - "format": "", - "legendFormat": "{{instance}} - Sys", - "refId": "" - } - ], - "title": "Memory in Stack", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 0, - "y": 372 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "bytes", - "decimals": 1, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 102, - "targets": [ - { - "expr": "go_memstats_sys_bytes{}", - "format": "", - "legendFormat": "{{instance}}", - "refId": "" - } - ], - "title": "Total Used Memory", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 12, - "y": 372 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "bytes", - "decimals": 1, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 103, - "targets": [ - { - "expr": "go_memstats_mallocs_total{} - go_memstats_frees_total{}", - "format": "", - "legendFormat": "{{instance}}", - "refId": "" - } - ], - "title": "Number of Live Objects", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 0, - "y": 378 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 1, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 104, - "targets": [ - { - "expr": "rate(go_memstats_mallocs_total{}[1m])", - "format": "", - "legendFormat": "{{instance}}", - "refId": "" - } - ], - "title": "Rate of Objects Allocated", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 12, - "y": 378 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 1, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 105, - "targets": [ - { - "expr": "rate(go_memstats_lookups_total{}[1m])", - "format": "", - "legendFormat": "{{instance}}", - "refId": "" - } - ], - "title": "Rate of a Pointer Dereferences", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 0, - "y": 384 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "ops", - "decimals": 1, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 106, - "targets": [ - { - "expr": "go_goroutines{}", - "format": "", - "legendFormat": "{{instance}}", - "refId": "" - } - ], - "title": "Goroutines", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 12, - "y": 384 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "ops", - "decimals": 1, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - } - ], - "templating": { - "list": [ - { - "type": "query", - "name": "instance", - "label": "Instance", - "description": "", - "query": "label_values(instance)", - "datasource": { - "uid": "Prometheus" - }, - "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] - }, - "multi": true, - "sort": 1, - "includeAll": true - } - ] - }, - "annotations": {} - }, - "Alerts": [ - { - "annotations": { - "description": "Component {{ index $labels \"service_id\" }} uptime in the last 15m is {{ index $values \"C\" }}%", - "panel_title": "Health Avg by Service over 15m", - "runbook_url": "https://github.com/smartcontractkit/chainlink-common/tree/main/observability-lib", - "summary": "Uptime less than 90% over last 15 minutes on one component in a Node" - }, - "condition": "D", - "data": [ - { - "datasourceUid": "1", - "model": { - "expr": "health{}", - "legendFormat": "__auto", - "refId": "A" - }, - "refId": "A", - "relativeTimeRange": { - "from": 600, - "to": 0 - } - }, - { - "datasourceUid": "__expr__", - "model": { - "expression": "A", - "intervalMs": 1000, - "maxDataPoints": 43200, - "reducer": "mean", - "refId": "B", - "type": "reduce" - }, - "refId": "B", - "relativeTimeRange": { - "from": 600, - "to": 0 - } - }, - { - "datasourceUid": "__expr__", - "model": { - "expression": "$B * 100", - "intervalMs": 1000, - "maxDataPoints": 43200, - "refId": "C", - "type": "math" - }, - "refId": "C", - "relativeTimeRange": { - "from": 600, - "to": 0 - } - }, - { - "datasourceUid": "__expr__", - "model": { - "conditions": [ - { - "evaluator": { - "params": [ - 90, - 0 - ], - "type": "lt" - } - } - ], - "expression": "C", - "intervalMs": 1000, - "maxDataPoints": 43200, - "refId": "D", - "type": "threshold" - }, - "refId": "D", - "relativeTimeRange": { - "from": 600, - "to": 0 - } - } - ], - "execErrState": "Alerting", - "folderUID": "", - "for": "15m", - "labels": { - "severity": "info" - }, - "noDataState": "NoData", - "orgID": 0, - "ruleGroup": "", - "title": "Health Avg by Service is less than 90%" - }, - { - "annotations": { - "description": "Component {{ index $labels \"service_id\" }} uptime in the last 15m is {{ index $values \"C\" }}%", - "panel_title": "Health Avg by Service over 15m", - "runbook_url": "https://github.com/smartcontractkit/chainlink-common/tree/main/observability-lib", - "summary": "Uptime less than 70% over last 15 minutes on one component in a Node" - }, - "condition": "D", - "data": [ - { - "datasourceUid": "1", - "model": { - "expr": "health{}", - "legendFormat": "__auto", - "refId": "A" - }, - "refId": "A", - "relativeTimeRange": { - "from": 600, - "to": 0 - } - }, - { - "datasourceUid": "__expr__", - "model": { - "expression": "A", - "intervalMs": 1000, - "maxDataPoints": 43200, - "reducer": "mean", - "refId": "B", - "type": "reduce" - }, - "refId": "B", - "relativeTimeRange": { - "from": 600, - "to": 0 - } - }, - { - "datasourceUid": "__expr__", - "model": { - "expression": "$B * 100", - "intervalMs": 1000, - "maxDataPoints": 43200, - "refId": "C", - "type": "math" - }, - "refId": "C", - "relativeTimeRange": { - "from": 600, - "to": 0 - } - }, - { - "datasourceUid": "__expr__", - "model": { - "conditions": [ - { - "evaluator": { - "params": [ - 70, - 0 - ], - "type": "lt" - } - } - ], - "expression": "C", - "intervalMs": 1000, - "maxDataPoints": 43200, - "refId": "D", - "type": "threshold" - }, - "refId": "D", - "relativeTimeRange": { - "from": 600, - "to": 0 - } - } - ], - "execErrState": "Alerting", - "folderUID": "", - "for": "15m", - "labels": { - "severity": "warning" - }, - "noDataState": "NoData", - "orgID": 0, - "ruleGroup": "", - "title": "Health Avg by Service is less than 70%" - }, - { - "annotations": { - "description": "Component {{ index $labels \"service_id\" }} uptime in the last 15m is {{ index $values \"C\" }}%", - "panel_title": "Health Avg by Service over 15m", - "runbook_url": "https://github.com/smartcontractkit/chainlink-common/tree/main/observability-lib", - "summary": "Uptime less than 50% over last 15 minutes on one component in a Node" - }, - "condition": "D", - "data": [ - { - "datasourceUid": "1", - "model": { - "expr": "health{}", - "legendFormat": "__auto", - "refId": "A" - }, - "refId": "A", - "relativeTimeRange": { - "from": 600, - "to": 0 - } - }, - { - "datasourceUid": "__expr__", - "model": { - "expression": "A", - "intervalMs": 1000, - "maxDataPoints": 43200, - "reducer": "mean", - "refId": "B", - "type": "reduce" - }, - "refId": "B", - "relativeTimeRange": { - "from": 600, - "to": 0 - } - }, - { - "datasourceUid": "__expr__", - "model": { - "expression": "$B * 100", - "intervalMs": 1000, - "maxDataPoints": 43200, - "refId": "C", - "type": "math" - }, - "refId": "C", - "relativeTimeRange": { - "from": 600, - "to": 0 - } - }, - { - "datasourceUid": "__expr__", - "model": { - "conditions": [ - { - "evaluator": { - "params": [ - 50, - 0 - ], - "type": "lt" - } - } - ], - "expression": "C", - "intervalMs": 1000, - "maxDataPoints": 43200, - "refId": "D", - "type": "threshold" - }, - "refId": "D", - "relativeTimeRange": { - "from": 600, - "to": 0 - } - } - ], - "execErrState": "Alerting", - "folderUID": "", - "for": "15m", - "labels": { - "severity": "critical" - }, - "noDataState": "NoData", - "orgID": 0, - "ruleGroup": "", - "title": "Health Avg by Service is less than 50%" - }, - { - "annotations": { - "description": "ETH Balance critically low at {{ index $values \"A\" }} on {{ index $labels \"instance\" }}", - "panel_title": "ETH Balance", - "runbook_url": "https://github.com/smartcontractkit/chainlink-common/tree/main/observability-lib", - "summary": "ETH Balance is lower than threshold" - }, - "condition": "B", - "data": [ - { - "datasourceUid": "1", - "model": { - "expr": "eth_balance{}", - "instant": true, - "range": false, - "legendFormat": "__auto", - "refId": "A" - }, - "refId": "A", - "relativeTimeRange": { - "from": 600, - "to": 0 - } - }, - { - "datasourceUid": "__expr__", - "model": { - "conditions": [ - { - "evaluator": { - "params": [ - 1, - 0 - ], - "type": "lt" - } - } - ], - "expression": "A", - "intervalMs": 1000, - "maxDataPoints": 43200, - "refId": "B", - "type": "threshold" - }, - "refId": "B", - "relativeTimeRange": { - "from": 600, - "to": 0 - } - } - ], - "execErrState": "Alerting", - "folderUID": "", - "for": "15m", - "labels": { - "severity": "critical" - }, - "noDataState": "OK", - "orgID": 0, - "ruleGroup": "", - "title": "ETH Balance" - }, - { - "annotations": { - "description": "Solana Balance critically low at {{ index $values \"A\" }} on {{ index $labels \"instance\" }}", - "panel_title": "SOL Balance", - "runbook_url": "https://github.com/smartcontractkit/chainlink-common/tree/main/observability-lib", - "summary": "Solana Balance is lower than threshold" - }, - "condition": "B", - "data": [ - { - "datasourceUid": "1", - "model": { - "expr": "solana_balance{}", - "instant": true, - "range": false, - "legendFormat": "__auto", - "refId": "A" - }, - "refId": "A", - "relativeTimeRange": { - "from": 600, - "to": 0 - } - }, - { - "datasourceUid": "__expr__", - "model": { - "conditions": [ - { - "evaluator": { - "params": [ - 1, - 0 - ], - "type": "lt" - } - } - ], - "expression": "A", - "intervalMs": 1000, - "maxDataPoints": 43200, - "refId": "B", - "type": "threshold" - }, - "refId": "B", - "relativeTimeRange": { - "from": 600, - "to": 0 - } - } - ], - "execErrState": "Alerting", - "folderUID": "", - "for": "15m", - "labels": { - "severity": "critical" - }, - "noDataState": "OK", - "orgID": 0, - "ruleGroup": "", - "title": "SOL Balance" - }, - { - "annotations": { - "description": "{{ index $labels \"instance\" }} on ChainID {{ index $labels \"ChainID\" }} has received {{ index $values \"A\" }} heads over 10 minutes.", - "panel_title": "Head Tracker Heads Received Rate", - "runbook_url": "https://github.com/smartcontractkit/chainlink-common/tree/main/observability-lib", - "summary": "No Headers Received" - }, - "condition": "B", - "data": [ - { - "datasourceUid": "1", - "model": { - "expr": "increase(head_tracker_heads_received{}[10m])", - "instant": true, - "range": false, - "legendFormat": "__auto", - "refId": "A" - }, - "refId": "A", - "relativeTimeRange": { - "from": 600, - "to": 0 - } - }, - { - "datasourceUid": "__expr__", - "model": { - "conditions": [ - { - "evaluator": { - "params": [ - 1, - 0 - ], - "type": "lt" - } - } - ], - "expression": "A", - "intervalMs": 1000, - "maxDataPoints": 43200, - "refId": "B", - "type": "threshold" - }, - "refId": "B", - "relativeTimeRange": { - "from": 600, - "to": 0 - } - } - ], - "execErrState": "Alerting", - "folderUID": "", - "for": "10m", - "labels": { - "severity": "critical" - }, - "noDataState": "OK", - "orgID": 0, - "ruleGroup": "", - "title": "Head Tracker Heads Received Rate" - } - ], - "AlertGroups": [ - { - "interval": 60, - "title": "Core Node Dashboard" - } - ], - "ContactPoints": null, - "NotificationPolicies": null -} \ No newline at end of file diff --git a/observability-lib/dashboards/k8s-resources/component.go b/observability-lib/dashboards/k8s-resources/component.go deleted file mode 100644 index 2e3b19d9e..000000000 --- a/observability-lib/dashboards/k8s-resources/component.go +++ /dev/null @@ -1,416 +0,0 @@ -package k8sresources - -import ( - "fmt" - - "github.com/grafana/grafana-foundation-sdk/go/cog" - "github.com/grafana/grafana-foundation-sdk/go/common" - "github.com/grafana/grafana-foundation-sdk/go/dashboard" - - "github.com/smartcontractkit/chainlink-common/observability-lib/grafana" -) - -type Props struct { - Name string // Name is the name of the dashboard - MetricsDataSource *grafana.DataSource // MetricsDataSource is the datasource for querying metrics -} - -func NewDashboard(props *Props) (*grafana.Observability, error) { - if props.Name == "" { - return nil, fmt.Errorf("Name is required") - } - - builder := grafana.NewBuilder(&grafana.BuilderOptions{ - Name: props.Name, - Tags: []string{"Core", "Node", "Kubernetes", "Resources"}, - Refresh: "30s", - TimeFrom: "now-30m", - TimeTo: "now", - }) - - builder.AddVars(vars(props)...) - - builder.AddRow("Headlines") - builder.AddPanel(headlines(props)...) - - builder.AddRow("Pod Status") - builder.AddPanel(podStatus(props)...) - - builder.AddRow("Resources Usage") - builder.AddPanel(resourcesUsage(props)...) - - builder.AddRow("Network Usage") - builder.AddPanel(networkUsage(props)...) - - builder.AddRow("Disk Usage") - builder.AddPanel(diskUsage(props)...) - - return builder.Build() -} - -func vars(p *Props) []cog.Builder[dashboard.VariableModel] { - var variables []cog.Builder[dashboard.VariableModel] - - variables = append(variables, grafana.NewQueryVariable(&grafana.QueryVariableOptions{ - VariableOption: &grafana.VariableOption{ - Label: "Environment", - Name: "env", - }, - Datasource: p.MetricsDataSource.Name, - Query: `label_values(up, env)`, - Multi: false, - })) - - variables = append(variables, grafana.NewQueryVariable(&grafana.QueryVariableOptions{ - VariableOption: &grafana.VariableOption{ - Label: "Cluster", - Name: "cluster", - }, - Datasource: p.MetricsDataSource.Name, - Query: `label_values(up{env="$env"}, cluster)`, - Multi: false, - })) - - variables = append(variables, grafana.NewQueryVariable(&grafana.QueryVariableOptions{ - VariableOption: &grafana.VariableOption{ - Label: "Namespace", - Name: "namespace", - }, - Datasource: p.MetricsDataSource.Name, - Query: `label_values(up{env="$env", cluster="$cluster"}, namespace)`, - Multi: false, - })) - - variables = append(variables, grafana.NewQueryVariable(&grafana.QueryVariableOptions{ - VariableOption: &grafana.VariableOption{ - Label: "Job", - Name: "job", - }, - Datasource: p.MetricsDataSource.Name, - Query: `label_values(up{env="$env", cluster="$cluster", namespace="$namespace"}, job)`, - Multi: false, - })) - - variables = append(variables, grafana.NewQueryVariable(&grafana.QueryVariableOptions{ - VariableOption: &grafana.VariableOption{ - Label: "Pod", - Name: "pod", - }, - Datasource: p.MetricsDataSource.Name, - Query: `label_values(up{env="$env", cluster="$cluster", namespace="$namespace", job="$job"}, pod)`, - Multi: false, - })) - - return variables -} - -func headlines(p *Props) []*grafana.Panel { - var panels []*grafana.Panel - - panels = append(panels, grafana.NewStatPanel(&grafana.StatPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "CPU Utilisation (from requests)", - Span: 6, - Height: 4, - Decimals: 1, - Unit: "percent", - Query: []grafana.Query{ - { - Expr: `100 * sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster="$cluster", namespace="$namespace", pod="$pod"}) by (container) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster="$cluster", namespace="$namespace", pod="$pod"}) by (container)`, - Legend: "{{pod}}", - Instant: true, - }, - }, - }, - Orientation: common.VizOrientationHorizontal, - })) - - panels = append(panels, grafana.NewStatPanel(&grafana.StatPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "CPU Utilisation (from limits)", - Span: 6, - Height: 4, - Decimals: 1, - Unit: "percent", - Query: []grafana.Query{ - { - Expr: `100 * sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster="$cluster", namespace="$namespace", pod="$pod"}) by (container) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster="$cluster", namespace="$namespace", pod="$pod"}) by (container)`, - Legend: "{{pod}}", - Instant: true, - }, - }, - }, - Orientation: common.VizOrientationHorizontal, - })) - - panels = append(panels, grafana.NewStatPanel(&grafana.StatPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Memory Utilisation (from requests)", - Span: 6, - Height: 4, - Decimals: 1, - Unit: "percent", - Query: []grafana.Query{ - { - Expr: `100 * sum(container_memory_working_set_bytes{job="kubelet", metrics_path="/metrics/cadvisor", cluster="$cluster", namespace="$namespace", pod="$pod", image!=""}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster="$cluster", namespace="$namespace", pod="$pod"}) by (container)`, - Legend: "{{pod}}", - Instant: true, - }, - }, - }, - Orientation: common.VizOrientationHorizontal, - })) - - panels = append(panels, grafana.NewStatPanel(&grafana.StatPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Memory Utilisation (from limits)", - Span: 6, - Height: 4, - Decimals: 1, - Unit: "percent", - Query: []grafana.Query{ - { - Expr: `100 * sum(container_memory_working_set_bytes{job="kubelet", metrics_path="/metrics/cadvisor", cluster="$cluster", namespace="$namespace", pod="$pod", container!="", image!=""}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster="$cluster", namespace="$namespace", pod="$pod"}) by (container)`, - Legend: "{{pod}}", - Instant: true, - }, - }, - }, - Orientation: common.VizOrientationHorizontal, - })) - - return panels -} - -func podStatus(p *Props) []*grafana.Panel { - var panels []*grafana.Panel - - panels = append(panels, grafana.NewStatPanel(&grafana.StatPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Pod Restarts", - Description: "Number of pod restarts", - Span: 8, - Height: 4, - Query: []grafana.Query{ - { - Expr: `sum(increase(kube_pod_container_status_restarts_total{pod=~"$pod", namespace=~"${namespace}"}[$__rate_interval])) by (pod)`, - Legend: "{{pod}}", - }, - }, - }, - ColorMode: common.BigValueColorModeNone, - GraphMode: common.BigValueGraphModeLine, - TextMode: common.BigValueTextModeValueAndName, - Orientation: common.VizOrientationHorizontal, - })) - - panels = append(panels, grafana.NewStatPanel(&grafana.StatPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "OOM Events", - Description: "Out-of-memory number of events", - Span: 8, - Height: 4, - Query: []grafana.Query{ - { - Expr: `sum(container_oom_events_total{pod=~"$pod", namespace=~"${namespace}"}) by (pod)`, - Legend: "{{pod}}", - }, - }, - }, - ColorMode: common.BigValueColorModeNone, - GraphMode: common.BigValueGraphModeLine, - TextMode: common.BigValueTextModeValueAndName, - Orientation: common.VizOrientationHorizontal, - })) - - panels = append(panels, grafana.NewStatPanel(&grafana.StatPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "OOM Killed", - Span: 8, - Height: 4, - Query: []grafana.Query{ - { - Expr: `kube_pod_container_status_last_terminated_reason{reason="OOMKilled", pod=~"$pod", namespace=~"${namespace}"}`, - Legend: "{{pod}}", - }, - }, - }, - ColorMode: common.BigValueColorModeNone, - GraphMode: common.BigValueGraphModeLine, - TextMode: common.BigValueTextModeValueAndName, - Orientation: common.VizOrientationHorizontal, - })) - - return panels -} - -func resourcesUsage(p *Props) []*grafana.Panel { - var panels []*grafana.Panel - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "CPU Usage", - Span: 12, - Height: 6, - Decimals: 3, - Query: []grafana.Query{ - { - Expr: `sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{pod=~"$pod", namespace=~"${namespace}"}) by (pod)`, - Legend: "{{pod}}", - }, - { - Expr: `sum(kube_pod_container_resource_requests{job="kube-state-metrics", cluster="$cluster", namespace="$namespace", pod="$pod", resource="cpu"})`, - Legend: "Requests", - }, - { - Expr: `sum(kube_pod_container_resource_limits{job="kube-state-metrics", cluster="$cluster", namespace="$namespace", pod="$pod", resource="cpu"})`, - Legend: "Limits", - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Memory Usage", - Span: 12, - Height: 6, - Unit: "bytes", - Query: []grafana.Query{ - { - Expr: `sum(container_memory_rss{pod=~"$pod", namespace=~"${namespace}", container!=""}) by (pod)`, - Legend: "{{pod}}", - }, - { - Expr: `sum(kube_pod_container_resource_requests{job="kube-state-metrics", cluster="$cluster", namespace="$namespace", pod="$pod", resource="memory"})`, - Legend: "Requests", - }, - { - Expr: `sum(kube_pod_container_resource_limits{job="kube-state-metrics", cluster="$cluster", namespace="$namespace", pod="$pod", resource="memory"})`, - Legend: "Limits", - }, - }, - }, - })) - - return panels -} - -func networkUsage(p *Props) []*grafana.Panel { - var panels []*grafana.Panel - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Receive Bandwidth", - Span: 12, - Height: 6, - Unit: "bps", - Query: []grafana.Query{ - { - Expr: `sum(irate(container_network_receive_bytes_total{pod=~"$pod", namespace=~"${namespace}"}[$__rate_interval])) by (pod)`, - Legend: "{{pod}}", - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Transmit Bandwidth", - Span: 12, - Height: 6, - Unit: "bps", - Query: []grafana.Query{ - { - Expr: `sum(irate(container_network_transmit_bytes_total{pod=~"$pod", namespace=~"${namespace}"}[$__rate_interval])) by (pod)`, - Legend: "{{pod}}", - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Average Container Bandwidth: Received", - Span: 12, - Height: 6, - Unit: "bps", - Query: []grafana.Query{ - { - Expr: `avg(irate(container_network_receive_bytes_total{pod=~"$pod", namespace=~"${namespace}"}[$__rate_interval])) by (pod)`, - Legend: "{{pod}}", - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Average Container Bandwidth: Transmitted", - Span: 12, - Height: 6, - Unit: "bps", - Query: []grafana.Query{ - { - Expr: `avg(irate(container_network_transmit_bytes_total{pod=~"$pod", namespace=~"${namespace}"}[$__rate_interval])) by (pod)`, - Legend: "{{pod}}", - }, - }, - }, - })) - - return panels -} - -func diskUsage(p *Props) []*grafana.Panel { - var panels []*grafana.Panel - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "IOPS(Read+Write)", - Span: 12, - Height: 6, - Decimals: 2, - Unit: "short", - Query: []grafana.Query{ - { - Expr: `ceil(sum by(container, pod) (rate(container_fs_reads_total{job="kubelet", metrics_path="/metrics/cadvisor", container!="", cluster="$cluster", namespace="$namespace", pod="$pod"}[$__rate_interval]) + rate(container_fs_writes_total{job="kubelet", metrics_path="/metrics/cadvisor", container!="", cluster="$cluster", namespace="$namespace", pod="$pod"}[$__rate_interval])))`, - Legend: "{{pod}}", - }, - }, - }, - })) - - panels = append(panels, grafana.NewTimeSeriesPanel(&grafana.TimeSeriesPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "ThroughPut(Read+Write)", - Span: 12, - Height: 6, - Decimals: 2, - Unit: "short", - Query: []grafana.Query{ - { - Expr: `sum by(container, pod) (rate(container_fs_reads_bytes_total{job="kubelet", metrics_path="/metrics/cadvisor", container!="", cluster="$cluster", namespace="$namespace", pod="$pod"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job="kubelet", metrics_path="/metrics/cadvisor", container!="", cluster="$cluster", namespace="$namespace", pod="$pod"}[$__rate_interval]))`, - Legend: "{{pod}}", - }, - }, - }, - })) - - return panels -} diff --git a/observability-lib/dashboards/k8s-resources/component_test.go b/observability-lib/dashboards/k8s-resources/component_test.go deleted file mode 100644 index a32c7cda6..000000000 --- a/observability-lib/dashboards/k8s-resources/component_test.go +++ /dev/null @@ -1,75 +0,0 @@ -package k8sresources_test - -import ( - "flag" - "os" - "testing" - - k8sresources "github.com/smartcontractkit/chainlink-common/observability-lib/dashboards/k8s-resources" - "github.com/smartcontractkit/chainlink-common/observability-lib/grafana" - - "github.com/stretchr/testify/require" -) - -var update = flag.Bool("update", false, "update golden test files") -var fileOutput = "test-output.json" - -func TestGenerateFile(t *testing.T) { - if *update == false { - t.Skip("skipping test") - } - - testDashboard, err := k8sresources.NewDashboard(&k8sresources.Props{ - Name: "K8s resources", - MetricsDataSource: grafana.NewDataSource("Prometheus", ""), - }) - if err != nil { - t.Errorf("Error creating dashboard: %v", err) - } - json, errJSON := testDashboard.GenerateJSON() - if errJSON != nil { - t.Errorf("Error generating JSON: %v", errJSON) - } - if _, errExists := os.Stat(fileOutput); errExists == nil { - errRemove := os.Remove(fileOutput) - if errRemove != nil { - t.Errorf("Error removing file: %v", errRemove) - } - } - file, errFile := os.Create(fileOutput) - if errFile != nil { - panic(errFile) - } - writeString, err := file.WriteString(string(json)) - if err != nil { - t.Errorf("Error writing to file: %v", writeString) - } - t.Cleanup(func() { - file.Close() - }) -} - -func TestNewDashboard(t *testing.T) { - t.Run("NewDashboard creates a dashboard", func(t *testing.T) { - testDashboard, err := k8sresources.NewDashboard(&k8sresources.Props{ - Name: "K8s resources", - MetricsDataSource: grafana.NewDataSource("Prometheus", ""), - }) - if err != nil { - t.Errorf("Error creating dashboard: %v", err) - } - require.IsType(t, grafana.Observability{}, *testDashboard) - require.Equal(t, "K8s resources", *testDashboard.Dashboard.Title) - json, errJSON := testDashboard.GenerateJSON() - if errJSON != nil { - t.Errorf("Error generating JSON: %v", errJSON) - } - - jsonCompared, errCompared := os.ReadFile(fileOutput) - if errCompared != nil { - t.Errorf("Error reading file: %v", errCompared) - } - - require.JSONEq(t, string(jsonCompared), string(json)) - }) -} diff --git a/observability-lib/dashboards/k8s-resources/test-output.json b/observability-lib/dashboards/k8s-resources/test-output.json deleted file mode 100644 index 3d9ba2e06..000000000 --- a/observability-lib/dashboards/k8s-resources/test-output.json +++ /dev/null @@ -1,991 +0,0 @@ -{ - "Dashboard": { - "title": "K8s resources", - "tags": [ - "Core", - "Node", - "Kubernetes", - "Resources" - ], - "timezone": "browser", - "editable": true, - "graphTooltip": 0, - "time": { - "from": "now-30m", - "to": "now" - }, - "fiscalYearStartMonth": 0, - "refresh": "30s", - "schemaVersion": 39, - "panels": [ - { - "type": "row", - "collapsed": false, - "title": "Headlines", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 0, - "panels": null - }, - { - "type": "stat", - "id": 1, - "targets": [ - { - "expr": "100 * sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", - "instant": true, - "range": false, - "format": "", - "legendFormat": "{{pod}}", - "refId": "" - } - ], - "title": "CPU Utilisation (from requests)", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 4, - "w": 6, - "x": 0, - "y": 1 - }, - "options": { - "graphMode": "none", - "colorMode": "value", - "justifyMode": "auto", - "textMode": "value", - "wideLayout": true, - "showPercentChange": false, - "reduceOptions": { - "calcs": [ - "last" - ] - }, - "text": { - "titleSize": 10, - "valueSize": 18 - }, - "percentChangeColorMode": "standard", - "orientation": "horizontal" - }, - "fieldConfig": { - "defaults": { - "unit": "percent", - "decimals": 1, - "noValue": "No data" - }, - "overrides": null - } - }, - { - "type": "stat", - "id": 2, - "targets": [ - { - "expr": "100 * sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", - "instant": true, - "range": false, - "format": "", - "legendFormat": "{{pod}}", - "refId": "" - } - ], - "title": "CPU Utilisation (from limits)", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 4, - "w": 6, - "x": 6, - "y": 1 - }, - "options": { - "graphMode": "none", - "colorMode": "value", - "justifyMode": "auto", - "textMode": "value", - "wideLayout": true, - "showPercentChange": false, - "reduceOptions": { - "calcs": [ - "last" - ] - }, - "text": { - "titleSize": 10, - "valueSize": 18 - }, - "percentChangeColorMode": "standard", - "orientation": "horizontal" - }, - "fieldConfig": { - "defaults": { - "unit": "percent", - "decimals": 1, - "noValue": "No data" - }, - "overrides": null - } - }, - { - "type": "stat", - "id": 3, - "targets": [ - { - "expr": "100 * sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", image!=\"\"}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", - "instant": true, - "range": false, - "format": "", - "legendFormat": "{{pod}}", - "refId": "" - } - ], - "title": "Memory Utilisation (from requests)", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 4, - "w": 6, - "x": 12, - "y": 1 - }, - "options": { - "graphMode": "none", - "colorMode": "value", - "justifyMode": "auto", - "textMode": "value", - "wideLayout": true, - "showPercentChange": false, - "reduceOptions": { - "calcs": [ - "last" - ] - }, - "text": { - "titleSize": 10, - "valueSize": 18 - }, - "percentChangeColorMode": "standard", - "orientation": "horizontal" - }, - "fieldConfig": { - "defaults": { - "unit": "percent", - "decimals": 1, - "noValue": "No data" - }, - "overrides": null - } - }, - { - "type": "stat", - "id": 4, - "targets": [ - { - "expr": "100 * sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", - "instant": true, - "range": false, - "format": "", - "legendFormat": "{{pod}}", - "refId": "" - } - ], - "title": "Memory Utilisation (from limits)", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 4, - "w": 6, - "x": 18, - "y": 1 - }, - "options": { - "graphMode": "none", - "colorMode": "value", - "justifyMode": "auto", - "textMode": "value", - "wideLayout": true, - "showPercentChange": false, - "reduceOptions": { - "calcs": [ - "last" - ] - }, - "text": { - "titleSize": 10, - "valueSize": 18 - }, - "percentChangeColorMode": "standard", - "orientation": "horizontal" - }, - "fieldConfig": { - "defaults": { - "unit": "percent", - "decimals": 1, - "noValue": "No data" - }, - "overrides": null - } - }, - { - "type": "row", - "collapsed": false, - "title": "Pod Status", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 5 - }, - "id": 0, - "panels": null - }, - { - "type": "stat", - "id": 5, - "targets": [ - { - "expr": "sum(increase(kube_pod_container_status_restarts_total{pod=~\"$pod\", namespace=~\"${namespace}\"}[$__rate_interval])) by (pod)", - "format": "", - "legendFormat": "{{pod}}", - "refId": "" - } - ], - "title": "Pod Restarts", - "description": "Number of pod restarts", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 4, - "w": 8, - "x": 0, - "y": 6 - }, - "options": { - "graphMode": "line", - "colorMode": "none", - "justifyMode": "auto", - "textMode": "value_and_name", - "wideLayout": true, - "showPercentChange": false, - "reduceOptions": { - "calcs": [ - "last" - ] - }, - "text": { - "titleSize": 10, - "valueSize": 18 - }, - "percentChangeColorMode": "standard", - "orientation": "horizontal" - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 0, - "noValue": "No data" - }, - "overrides": null - } - }, - { - "type": "stat", - "id": 6, - "targets": [ - { - "expr": "sum(container_oom_events_total{pod=~\"$pod\", namespace=~\"${namespace}\"}) by (pod)", - "format": "", - "legendFormat": "{{pod}}", - "refId": "" - } - ], - "title": "OOM Events", - "description": "Out-of-memory number of events", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 4, - "w": 8, - "x": 8, - "y": 6 - }, - "options": { - "graphMode": "line", - "colorMode": "none", - "justifyMode": "auto", - "textMode": "value_and_name", - "wideLayout": true, - "showPercentChange": false, - "reduceOptions": { - "calcs": [ - "last" - ] - }, - "text": { - "titleSize": 10, - "valueSize": 18 - }, - "percentChangeColorMode": "standard", - "orientation": "horizontal" - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 0, - "noValue": "No data" - }, - "overrides": null - } - }, - { - "type": "stat", - "id": 7, - "targets": [ - { - "expr": "kube_pod_container_status_last_terminated_reason{reason=\"OOMKilled\", pod=~\"$pod\", namespace=~\"${namespace}\"}", - "format": "", - "legendFormat": "{{pod}}", - "refId": "" - } - ], - "title": "OOM Killed", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 4, - "w": 8, - "x": 16, - "y": 6 - }, - "options": { - "graphMode": "line", - "colorMode": "none", - "justifyMode": "auto", - "textMode": "value_and_name", - "wideLayout": true, - "showPercentChange": false, - "reduceOptions": { - "calcs": [ - "last" - ] - }, - "text": { - "titleSize": 10, - "valueSize": 18 - }, - "percentChangeColorMode": "standard", - "orientation": "horizontal" - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 0, - "noValue": "No data" - }, - "overrides": null - } - }, - { - "type": "row", - "collapsed": false, - "title": "Resources Usage", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 10 - }, - "id": 0, - "panels": null - }, - { - "type": "timeseries", - "id": 8, - "targets": [ - { - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{pod=~\"$pod\", namespace=~\"${namespace}\"}) by (pod)", - "format": "", - "legendFormat": "{{pod}}", - "refId": "" - }, - { - "expr": "sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"})", - "format": "", - "legendFormat": "Requests", - "refId": "" - }, - { - "expr": "sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"})", - "format": "", - "legendFormat": "Limits", - "refId": "" - } - ], - "title": "CPU Usage", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 0, - "y": 11 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "", - "decimals": 3, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 9, - "targets": [ - { - "expr": "sum(container_memory_rss{pod=~\"$pod\", namespace=~\"${namespace}\", container!=\"\"}) by (pod)", - "format": "", - "legendFormat": "{{pod}}", - "refId": "" - }, - { - "expr": "sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"})", - "format": "", - "legendFormat": "Requests", - "refId": "" - }, - { - "expr": "sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"})", - "format": "", - "legendFormat": "Limits", - "refId": "" - } - ], - "title": "Memory Usage", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 12, - "y": 11 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "bytes", - "decimals": 0, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "row", - "collapsed": false, - "title": "Network Usage", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 17 - }, - "id": 0, - "panels": null - }, - { - "type": "timeseries", - "id": 10, - "targets": [ - { - "expr": "sum(irate(container_network_receive_bytes_total{pod=~\"$pod\", namespace=~\"${namespace}\"}[$__rate_interval])) by (pod)", - "format": "", - "legendFormat": "{{pod}}", - "refId": "" - } - ], - "title": "Receive Bandwidth", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 0, - "y": 18 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "bps", - "decimals": 0, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 11, - "targets": [ - { - "expr": "sum(irate(container_network_transmit_bytes_total{pod=~\"$pod\", namespace=~\"${namespace}\"}[$__rate_interval])) by (pod)", - "format": "", - "legendFormat": "{{pod}}", - "refId": "" - } - ], - "title": "Transmit Bandwidth", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 12, - "y": 18 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "bps", - "decimals": 0, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 12, - "targets": [ - { - "expr": "avg(irate(container_network_receive_bytes_total{pod=~\"$pod\", namespace=~\"${namespace}\"}[$__rate_interval])) by (pod)", - "format": "", - "legendFormat": "{{pod}}", - "refId": "" - } - ], - "title": "Average Container Bandwidth: Received", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 0, - "y": 24 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "bps", - "decimals": 0, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 13, - "targets": [ - { - "expr": "avg(irate(container_network_transmit_bytes_total{pod=~\"$pod\", namespace=~\"${namespace}\"}[$__rate_interval])) by (pod)", - "format": "", - "legendFormat": "{{pod}}", - "refId": "" - } - ], - "title": "Average Container Bandwidth: Transmitted", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 12, - "y": 24 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "bps", - "decimals": 0, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "row", - "collapsed": false, - "title": "Disk Usage", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 30 - }, - "id": 0, - "panels": null - }, - { - "type": "timeseries", - "id": 14, - "targets": [ - { - "expr": "ceil(sum by(container, pod) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval])))", - "format": "", - "legendFormat": "{{pod}}", - "refId": "" - } - ], - "title": "IOPS(Read+Write)", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 0, - "y": 31 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "short", - "decimals": 2, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - }, - { - "type": "timeseries", - "id": 15, - "targets": [ - { - "expr": "sum by(container, pod) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", - "format": "", - "legendFormat": "{{pod}}", - "refId": "" - } - ], - "title": "ThroughPut(Read+Write)", - "description": "", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 6, - "w": 12, - "x": 12, - "y": 31 - }, - "options": { - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "calcs": [] - }, - "tooltip": { - "mode": "", - "sort": "" - } - }, - "fieldConfig": { - "defaults": { - "unit": "short", - "decimals": 2, - "noValue": "No data", - "custom": { - "fillOpacity": 0, - "scaleDistribution": { - "type": "linear" - } - } - }, - "overrides": null - } - } - ], - "templating": { - "list": [ - { - "type": "query", - "name": "env", - "label": "Environment", - "description": "", - "query": "label_values(up, env)", - "datasource": { - "uid": "Prometheus" - }, - "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] - }, - "multi": false, - "sort": 1 - }, - { - "type": "query", - "name": "cluster", - "label": "Cluster", - "description": "", - "query": "label_values(up{env=\"$env\"}, cluster)", - "datasource": { - "uid": "Prometheus" - }, - "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] - }, - "multi": false, - "sort": 1 - }, - { - "type": "query", - "name": "namespace", - "label": "Namespace", - "description": "", - "query": "label_values(up{env=\"$env\", cluster=\"$cluster\"}, namespace)", - "datasource": { - "uid": "Prometheus" - }, - "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] - }, - "multi": false, - "sort": 1 - }, - { - "type": "query", - "name": "job", - "label": "Job", - "description": "", - "query": "label_values(up{env=\"$env\", cluster=\"$cluster\", namespace=\"$namespace\"}, job)", - "datasource": { - "uid": "Prometheus" - }, - "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] - }, - "multi": false, - "sort": 1 - }, - { - "type": "query", - "name": "pod", - "label": "Pod", - "description": "", - "query": "label_values(up{env=\"$env\", cluster=\"$cluster\", namespace=\"$namespace\", job=\"$job\"}, pod)", - "datasource": { - "uid": "Prometheus" - }, - "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] - }, - "multi": false, - "sort": 1 - } - ] - }, - "annotations": {} - }, - "Alerts": null, - "AlertGroups": null, - "ContactPoints": null, - "NotificationPolicies": null -} \ No newline at end of file diff --git a/observability-lib/dashboards/nop-ocr/component.go b/observability-lib/dashboards/nop-ocr/component.go deleted file mode 100644 index 9978c5f73..000000000 --- a/observability-lib/dashboards/nop-ocr/component.go +++ /dev/null @@ -1,352 +0,0 @@ -package nopocr - -import ( - "fmt" - - "github.com/grafana/grafana-foundation-sdk/go/cog" - "github.com/grafana/grafana-foundation-sdk/go/common" - "github.com/grafana/grafana-foundation-sdk/go/dashboard" - - "github.com/smartcontractkit/chainlink-common/observability-lib/grafana" -) - -type Props struct { - Name string // Name is the name of the dashboard - MetricsDataSource *grafana.DataSource // MetricsDataSource is the datasource for querying metrics - OCRVersion string // OCRVersion is the version of the OCR (ocr, ocr2, ocr3) -} - -func NewDashboard(props *Props) (*grafana.Observability, error) { - if props.Name == "" { - return nil, fmt.Errorf("Name is required") - } - - builder := grafana.NewBuilder(&grafana.BuilderOptions{ - Name: props.Name, - Tags: []string{"NOP", "Health", props.OCRVersion}, - Refresh: "30s", - TimeFrom: "now-1d", - TimeTo: "now", - }) - - builder.AddVars(vars(props)...) - - builder.AddRow("Per Contract") - builder.AddPanel(perContract(props)...) - - builder.AddRow("Per NOP") - builder.AddPanel(perNOP(props)...) - - return builder.Build() -} - -func vars(p *Props) []cog.Builder[dashboard.VariableModel] { - var variables []cog.Builder[dashboard.VariableModel] - - variables = append(variables, grafana.NewQueryVariable(&grafana.QueryVariableOptions{ - VariableOption: &grafana.VariableOption{ - Label: "Environment", - Name: "env", - }, - Datasource: p.MetricsDataSource.Name, - Query: `label_values(` + p.OCRVersion + `_contract_config_f{}, env)`, - })) - - variables = append(variables, grafana.NewQueryVariable(&grafana.QueryVariableOptions{ - VariableOption: &grafana.VariableOption{ - Label: "Contract", - Name: "contract", - }, - Datasource: p.MetricsDataSource.Name, - Query: `label_values(` + p.OCRVersion + `_contract_oracle_active{env="$env"}, contract)`, - })) - - variables = append(variables, grafana.NewQueryVariable(&grafana.QueryVariableOptions{ - VariableOption: &grafana.VariableOption{ - Label: "NOP", - Name: "oracle", - }, - Datasource: p.MetricsDataSource.Name, - Query: `label_values(` + p.OCRVersion + `_contract_oracle_active{env="$env"}, oracle)`, - })) - - return variables -} - -func perContract(p *Props) []*grafana.Panel { - var panels []*grafana.Panel - - panels = append(panels, grafana.NewStatPanel(&grafana.StatPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Rounds Epoch Progression", - Description: "Rounds have stopped progressing for 90 seconds means NOP is unhealthy", - Span: 24, - Height: 10, - Decimals: 2, - Unit: "percentunit", - Query: []grafana.Query{ - { - Expr: `avg_over_time((sum(changes(` + p.OCRVersion + `_telemetry_epoch_round{env=~"${env}", contract=~"${contract}"}[90s])) by (env, contract, feed_id, network_name, oracle) >bool 0)[$__range:])`, - Legend: `{{oracle}}`, - }, - }, - Threshold: &grafana.ThresholdOptions{ - Mode: dashboard.ThresholdsModeAbsolute, - Steps: []dashboard.Threshold{ - {Value: nil, Color: "default"}, - {Value: grafana.Pointer[float64](0), Color: "red"}, - {Value: grafana.Pointer[float64](0.80), Color: "orange"}, - {Value: grafana.Pointer[float64](0.99), Color: "green"}, - }, - }, - Transform: &grafana.TransformOptions{ - ID: "renameByRegex", - Options: map[string]string{ - "regex": "/^(.*[\\\\\\/])/", - "renamePattern": "", - }, - }, - }, - TextSize: 10, - ValueSize: 18, - GraphMode: common.BigValueGraphModeLine, - TextMode: common.BigValueTextModeValueAndName, - })) - - panels = append(panels, grafana.NewStatPanel(&grafana.StatPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Message Observe", - Description: "NOP have stopped sending messages for 3mins means NOP is unhealthy", - Span: 24, - Height: 10, - Decimals: 2, - Unit: "percentunit", - Query: []grafana.Query{ - { - Expr: `avg_over_time((sum(changes(` + p.OCRVersion + `_telemetry_message_observe_total{env=~"${env}", contract=~"${contract}"}[3m])) by (env, contract, feed_id, network_name, oracle) >bool 0)[$__range:])`, - Legend: `{{oracle}}`, - }, - }, - Threshold: &grafana.ThresholdOptions{ - Mode: dashboard.ThresholdsModeAbsolute, - Steps: []dashboard.Threshold{ - {Value: nil, Color: "default"}, - {Value: grafana.Pointer[float64](0), Color: "red"}, - {Value: grafana.Pointer[float64](0.80), Color: "orange"}, - {Value: grafana.Pointer[float64](0.99), Color: "green"}, - }, - }, - Transform: &grafana.TransformOptions{ - ID: "renameByRegex", - Options: map[string]string{ - "regex": "/^(.*[\\\\\\/])/", - "renamePattern": "", - }, - }, - }, - TextSize: 10, - ValueSize: 18, - GraphMode: common.BigValueGraphModeLine, - TextMode: common.BigValueTextModeValueAndName, - })) - - panels = append(panels, grafana.NewStatPanel(&grafana.StatPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Observations included in report", - Description: "NOP observations were not including in report for 3mins means NOP is unhealthy", - Span: 24, - Height: 10, - Decimals: 2, - Unit: "percentunit", - Query: []grafana.Query{ - { - Expr: `avg_over_time((sum(changes(` + p.OCRVersion + `_telemetry_message_report_req_observation_total{env=~"${env}", contract=~"${contract}"}[3m])) by (env, contract, feed_id, network_name, oracle) >bool 0)[$__range:])`, - Legend: `{{oracle}}`, - }, - }, - Threshold: &grafana.ThresholdOptions{ - Mode: dashboard.ThresholdsModeAbsolute, - Steps: []dashboard.Threshold{ - {Value: nil, Color: "default"}, - {Value: grafana.Pointer[float64](0), Color: "red"}, - {Value: grafana.Pointer[float64](0.80), Color: "orange"}, - {Value: grafana.Pointer[float64](0.99), Color: "green"}, - }, - }, - Transform: &grafana.TransformOptions{ - ID: "renameByRegex", - Options: map[string]string{ - "regex": "/^(.*[\\\\\\/])/", - "renamePattern": "", - }, - }, - }, - TextSize: 10, - ValueSize: 18, - GraphMode: common.BigValueGraphModeLine, - TextMode: common.BigValueTextModeValueAndName, - })) - - return panels -} - -func perNOP(p *Props) []*grafana.Panel { - var panels []*grafana.Panel - - panels = append(panels, grafana.NewStatPanel(&grafana.StatPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Rounds Epoch Progression", - Description: "Rounds have stopped progressing for 5mins means NOP is unhealthy", - Span: 24, - Height: 32, - Decimals: 2, - Unit: "percentunit", - Query: []grafana.Query{ - { - Expr: `avg_over_time((sum(changes(` + p.OCRVersion + `_telemetry_epoch_round{env=~"${env}", oracle=~"${oracle}"}[90s])) by (env, contract, feed_id, network_name, oracle) >bool 0)[$__range:])`, - Legend: `{{contract}}`, - }, - }, - Threshold: &grafana.ThresholdOptions{ - Mode: dashboard.ThresholdsModeAbsolute, - Steps: []dashboard.Threshold{ - {Value: nil, Color: "default"}, - {Value: grafana.Pointer[float64](0), Color: "red"}, - {Value: grafana.Pointer[float64](0.80), Color: "orange"}, - {Value: grafana.Pointer[float64](0.99), Color: "green"}, - }, - }, - Transform: &grafana.TransformOptions{ - ID: "renameByRegex", - Options: map[string]string{ - "regex": "/^(.*[\\\\\\/])/", - "renamePattern": "", - }, - }, - }, - TextSize: 10, - ValueSize: 18, - GraphMode: common.BigValueGraphModeLine, - TextMode: common.BigValueTextModeValueAndName, - })) - - panels = append(panels, grafana.NewStatPanel(&grafana.StatPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Message Observe", - Description: "NOP have stopped sending messages for 3mins means NOP is unhealthy", - Span: 24, - Height: 32, - Decimals: 2, - Unit: "percentunit", - Query: []grafana.Query{ - { - Expr: `avg_over_time((sum(changes(` + p.OCRVersion + `_telemetry_message_observe_total{env=~"${env}", oracle=~"${oracle}"}[3m])) by (env, contract, feed_id, network_name, oracle) >bool 0)[$__range:])`, - Legend: `{{contract}}`, - }, - }, - Threshold: &grafana.ThresholdOptions{ - Mode: dashboard.ThresholdsModeAbsolute, - Steps: []dashboard.Threshold{ - {Value: nil, Color: "default"}, - {Value: grafana.Pointer[float64](0), Color: "red"}, - {Value: grafana.Pointer[float64](0.80), Color: "orange"}, - {Value: grafana.Pointer[float64](0.99), Color: "green"}, - }, - }, - Transform: &grafana.TransformOptions{ - ID: "renameByRegex", - Options: map[string]string{ - "regex": "/^(.*[\\\\\\/])/", - "renamePattern": "", - }, - }, - }, - TextSize: 10, - ValueSize: 18, - GraphMode: common.BigValueGraphModeLine, - TextMode: common.BigValueTextModeValueAndName, - })) - - panels = append(panels, grafana.NewStatPanel(&grafana.StatPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "Observations included in report", - Description: "NOP observations were not including in report for 3mins means NOP is unhealthy", - Span: 24, - Height: 32, - Decimals: 2, - Unit: "percentunit", - Query: []grafana.Query{ - { - Expr: `avg_over_time((sum(changes(` + p.OCRVersion + `_telemetry_message_report_req_observation_total{env=~"${env}", oracle=~"${oracle}"}[3m])) by (env, contract, feed_id, network_name, oracle) >bool 0)[$__range:])`, - Legend: `{{contract}}`, - }, - }, - Threshold: &grafana.ThresholdOptions{ - Mode: dashboard.ThresholdsModeAbsolute, - Steps: []dashboard.Threshold{ - {Value: nil, Color: "default"}, - {Value: grafana.Pointer[float64](0), Color: "red"}, - {Value: grafana.Pointer[float64](0.80), Color: "orange"}, - {Value: grafana.Pointer[float64](0.99), Color: "green"}, - }, - }, - Transform: &grafana.TransformOptions{ - ID: "renameByRegex", - Options: map[string]string{ - "regex": "/^(.*[\\\\\\/])/", - "renamePattern": "", - }, - }, - }, - TextSize: 10, - ValueSize: 18, - GraphMode: common.BigValueGraphModeLine, - TextMode: common.BigValueTextModeValueAndName, - })) - - panels = append(panels, grafana.NewStatPanel(&grafana.StatPanelOptions{ - PanelOptions: &grafana.PanelOptions{ - Datasource: p.MetricsDataSource.Name, - Title: "P2P Connectivity", - Description: "Connectivity got interrupted for 60 seconds received from other nodes", - Span: 24, - Height: 32, - Decimals: 2, - Unit: "percentunit", - Query: []grafana.Query{ - { - Expr: `avg_over_time((sum(changes(` + p.OCRVersion + `_telemetry_p2p_received_total{env=~"${env}", receiver=~"${oracle}"}[3m])) by (sender, receiver) >bool 0)[$__range:])`, - Legend: `{{receiver}} < {{sender}}`, - }, - }, - Threshold: &grafana.ThresholdOptions{ - Mode: dashboard.ThresholdsModeAbsolute, - Steps: []dashboard.Threshold{ - {Value: nil, Color: "default"}, - {Value: grafana.Pointer[float64](0), Color: "red"}, - {Value: grafana.Pointer[float64](0.80), Color: "orange"}, - {Value: grafana.Pointer[float64](0.99), Color: "green"}, - }, - }, - Transform: &grafana.TransformOptions{ - ID: "renameByRegex", - Options: map[string]string{ - "regex": "/^(.*[\\\\\\/])/", - "renamePattern": "", - }, - }, - }, - TextSize: 10, - ValueSize: 18, - GraphMode: common.BigValueGraphModeLine, - TextMode: common.BigValueTextModeValueAndName, - })) - - return panels -} diff --git a/observability-lib/dashboards/nop-ocr/component_test.go b/observability-lib/dashboards/nop-ocr/component_test.go deleted file mode 100644 index cfa4009a3..000000000 --- a/observability-lib/dashboards/nop-ocr/component_test.go +++ /dev/null @@ -1,76 +0,0 @@ -package nopocr_test - -import ( - "flag" - "os" - "testing" - - nopocr "github.com/smartcontractkit/chainlink-common/observability-lib/dashboards/nop-ocr" - "github.com/smartcontractkit/chainlink-common/observability-lib/grafana" - - "github.com/stretchr/testify/require" -) - -var update = flag.Bool("update", false, "update golden test files") - -const fileOutput = "test-output.json" - -func TestGenerateFile(t *testing.T) { - if *update == false { - t.Skip("skipping test") - } - - testDashboard, err := nopocr.NewDashboard(&nopocr.Props{ - Name: "NOP OCR Dashboard", - MetricsDataSource: grafana.NewDataSource("Prometheus", ""), - }) - if err != nil { - t.Errorf("Error creating dashboard: %v", err) - } - json, errJSON := testDashboard.GenerateJSON() - if errJSON != nil { - t.Errorf("Error generating JSON: %v", errJSON) - } - if _, errExists := os.Stat(fileOutput); errExists == nil { - errRemove := os.Remove(fileOutput) - if errRemove != nil { - t.Errorf("Error removing file: %v", errRemove) - } - } - file, errFile := os.Create(fileOutput) - if errFile != nil { - panic(errFile) - } - writeString, err := file.WriteString(string(json)) - if err != nil { - t.Errorf("Error writing to file: %v", writeString) - } - t.Cleanup(func() { - file.Close() - }) -} - -func TestNewDashboard(t *testing.T) { - t.Run("NewDashboard creates a dashboard", func(t *testing.T) { - testDashboard, err := nopocr.NewDashboard(&nopocr.Props{ - Name: "NOP OCR Dashboard", - MetricsDataSource: grafana.NewDataSource("Prometheus", ""), - }) - if err != nil { - t.Errorf("Error creating dashboard: %v", err) - } - require.IsType(t, grafana.Observability{}, *testDashboard) - require.Equal(t, "NOP OCR Dashboard", *testDashboard.Dashboard.Title) - json, errJSON := testDashboard.GenerateJSON() - if errJSON != nil { - t.Errorf("Error generating JSON: %v", errJSON) - } - - jsonCompared, errCompared := os.ReadFile(fileOutput) - if errCompared != nil { - t.Errorf("Error reading file: %v", errCompared) - } - - require.JSONEq(t, string(jsonCompared), string(json)) - }) -} diff --git a/observability-lib/dashboards/nop-ocr/test-output.json b/observability-lib/dashboards/nop-ocr/test-output.json deleted file mode 100644 index 7d2f71e11..000000000 --- a/observability-lib/dashboards/nop-ocr/test-output.json +++ /dev/null @@ -1,687 +0,0 @@ -{ - "Dashboard": { - "title": "NOP OCR Dashboard", - "tags": [ - "NOP", - "Health", - "" - ], - "timezone": "browser", - "editable": true, - "graphTooltip": 0, - "time": { - "from": "now-1d", - "to": "now" - }, - "fiscalYearStartMonth": 0, - "refresh": "30s", - "schemaVersion": 39, - "panels": [ - { - "type": "row", - "collapsed": false, - "title": "Per Contract", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 0, - "panels": null - }, - { - "type": "stat", - "id": 1, - "targets": [ - { - "expr": "avg_over_time((sum(changes(_telemetry_epoch_round{env=~\"${env}\", contract=~\"${contract}\"}[90s])) by (env, contract, feed_id, network_name, oracle) \u003ebool 0)[$__range:])", - "format": "", - "legendFormat": "{{oracle}}", - "refId": "" - } - ], - "title": "Rounds Epoch Progression", - "description": "Rounds have stopped progressing for 90 seconds means NOP is unhealthy", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 10, - "w": 24, - "x": 0, - "y": 1 - }, - "transformations": [ - { - "id": "renameByRegex", - "options": { - "regex": "/^(.*[\\\\\\/])/", - "renamePattern": "" - } - } - ], - "options": { - "graphMode": "line", - "colorMode": "value", - "justifyMode": "auto", - "textMode": "value_and_name", - "wideLayout": true, - "showPercentChange": false, - "reduceOptions": { - "calcs": [ - "last" - ] - }, - "text": { - "titleSize": 10, - "valueSize": 18 - }, - "percentChangeColorMode": "standard", - "orientation": "auto" - }, - "fieldConfig": { - "defaults": { - "unit": "percentunit", - "decimals": 2, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "default" - }, - { - "value": 0, - "color": "red" - }, - { - "value": 0.8, - "color": "orange" - }, - { - "value": 0.99, - "color": "green" - } - ] - }, - "noValue": "No data" - }, - "overrides": null - } - }, - { - "type": "stat", - "id": 2, - "targets": [ - { - "expr": "avg_over_time((sum(changes(_telemetry_message_observe_total{env=~\"${env}\", contract=~\"${contract}\"}[3m])) by (env, contract, feed_id, network_name, oracle) \u003ebool 0)[$__range:])", - "format": "", - "legendFormat": "{{oracle}}", - "refId": "" - } - ], - "title": "Message Observe", - "description": "NOP have stopped sending messages for 3mins means NOP is unhealthy", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 10, - "w": 24, - "x": 0, - "y": 11 - }, - "transformations": [ - { - "id": "renameByRegex", - "options": { - "regex": "/^(.*[\\\\\\/])/", - "renamePattern": "" - } - } - ], - "options": { - "graphMode": "line", - "colorMode": "value", - "justifyMode": "auto", - "textMode": "value_and_name", - "wideLayout": true, - "showPercentChange": false, - "reduceOptions": { - "calcs": [ - "last" - ] - }, - "text": { - "titleSize": 10, - "valueSize": 18 - }, - "percentChangeColorMode": "standard", - "orientation": "auto" - }, - "fieldConfig": { - "defaults": { - "unit": "percentunit", - "decimals": 2, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "default" - }, - { - "value": 0, - "color": "red" - }, - { - "value": 0.8, - "color": "orange" - }, - { - "value": 0.99, - "color": "green" - } - ] - }, - "noValue": "No data" - }, - "overrides": null - } - }, - { - "type": "stat", - "id": 3, - "targets": [ - { - "expr": "avg_over_time((sum(changes(_telemetry_message_report_req_observation_total{env=~\"${env}\", contract=~\"${contract}\"}[3m])) by (env, contract, feed_id, network_name, oracle) \u003ebool 0)[$__range:])", - "format": "", - "legendFormat": "{{oracle}}", - "refId": "" - } - ], - "title": "Observations included in report", - "description": "NOP observations were not including in report for 3mins means NOP is unhealthy", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 10, - "w": 24, - "x": 0, - "y": 21 - }, - "transformations": [ - { - "id": "renameByRegex", - "options": { - "regex": "/^(.*[\\\\\\/])/", - "renamePattern": "" - } - } - ], - "options": { - "graphMode": "line", - "colorMode": "value", - "justifyMode": "auto", - "textMode": "value_and_name", - "wideLayout": true, - "showPercentChange": false, - "reduceOptions": { - "calcs": [ - "last" - ] - }, - "text": { - "titleSize": 10, - "valueSize": 18 - }, - "percentChangeColorMode": "standard", - "orientation": "auto" - }, - "fieldConfig": { - "defaults": { - "unit": "percentunit", - "decimals": 2, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "default" - }, - { - "value": 0, - "color": "red" - }, - { - "value": 0.8, - "color": "orange" - }, - { - "value": 0.99, - "color": "green" - } - ] - }, - "noValue": "No data" - }, - "overrides": null - } - }, - { - "type": "row", - "collapsed": false, - "title": "Per NOP", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 31 - }, - "id": 0, - "panels": null - }, - { - "type": "stat", - "id": 4, - "targets": [ - { - "expr": "avg_over_time((sum(changes(_telemetry_epoch_round{env=~\"${env}\", oracle=~\"${oracle}\"}[90s])) by (env, contract, feed_id, network_name, oracle) \u003ebool 0)[$__range:])", - "format": "", - "legendFormat": "{{contract}}", - "refId": "" - } - ], - "title": "Rounds Epoch Progression", - "description": "Rounds have stopped progressing for 5mins means NOP is unhealthy", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 32, - "w": 24, - "x": 0, - "y": 32 - }, - "transformations": [ - { - "id": "renameByRegex", - "options": { - "regex": "/^(.*[\\\\\\/])/", - "renamePattern": "" - } - } - ], - "options": { - "graphMode": "line", - "colorMode": "value", - "justifyMode": "auto", - "textMode": "value_and_name", - "wideLayout": true, - "showPercentChange": false, - "reduceOptions": { - "calcs": [ - "last" - ] - }, - "text": { - "titleSize": 10, - "valueSize": 18 - }, - "percentChangeColorMode": "standard", - "orientation": "auto" - }, - "fieldConfig": { - "defaults": { - "unit": "percentunit", - "decimals": 2, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "default" - }, - { - "value": 0, - "color": "red" - }, - { - "value": 0.8, - "color": "orange" - }, - { - "value": 0.99, - "color": "green" - } - ] - }, - "noValue": "No data" - }, - "overrides": null - } - }, - { - "type": "stat", - "id": 5, - "targets": [ - { - "expr": "avg_over_time((sum(changes(_telemetry_message_observe_total{env=~\"${env}\", oracle=~\"${oracle}\"}[3m])) by (env, contract, feed_id, network_name, oracle) \u003ebool 0)[$__range:])", - "format": "", - "legendFormat": "{{contract}}", - "refId": "" - } - ], - "title": "Message Observe", - "description": "NOP have stopped sending messages for 3mins means NOP is unhealthy", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 32, - "w": 24, - "x": 0, - "y": 64 - }, - "transformations": [ - { - "id": "renameByRegex", - "options": { - "regex": "/^(.*[\\\\\\/])/", - "renamePattern": "" - } - } - ], - "options": { - "graphMode": "line", - "colorMode": "value", - "justifyMode": "auto", - "textMode": "value_and_name", - "wideLayout": true, - "showPercentChange": false, - "reduceOptions": { - "calcs": [ - "last" - ] - }, - "text": { - "titleSize": 10, - "valueSize": 18 - }, - "percentChangeColorMode": "standard", - "orientation": "auto" - }, - "fieldConfig": { - "defaults": { - "unit": "percentunit", - "decimals": 2, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "default" - }, - { - "value": 0, - "color": "red" - }, - { - "value": 0.8, - "color": "orange" - }, - { - "value": 0.99, - "color": "green" - } - ] - }, - "noValue": "No data" - }, - "overrides": null - } - }, - { - "type": "stat", - "id": 6, - "targets": [ - { - "expr": "avg_over_time((sum(changes(_telemetry_message_report_req_observation_total{env=~\"${env}\", oracle=~\"${oracle}\"}[3m])) by (env, contract, feed_id, network_name, oracle) \u003ebool 0)[$__range:])", - "format": "", - "legendFormat": "{{contract}}", - "refId": "" - } - ], - "title": "Observations included in report", - "description": "NOP observations were not including in report for 3mins means NOP is unhealthy", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 32, - "w": 24, - "x": 0, - "y": 96 - }, - "transformations": [ - { - "id": "renameByRegex", - "options": { - "regex": "/^(.*[\\\\\\/])/", - "renamePattern": "" - } - } - ], - "options": { - "graphMode": "line", - "colorMode": "value", - "justifyMode": "auto", - "textMode": "value_and_name", - "wideLayout": true, - "showPercentChange": false, - "reduceOptions": { - "calcs": [ - "last" - ] - }, - "text": { - "titleSize": 10, - "valueSize": 18 - }, - "percentChangeColorMode": "standard", - "orientation": "auto" - }, - "fieldConfig": { - "defaults": { - "unit": "percentunit", - "decimals": 2, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "default" - }, - { - "value": 0, - "color": "red" - }, - { - "value": 0.8, - "color": "orange" - }, - { - "value": 0.99, - "color": "green" - } - ] - }, - "noValue": "No data" - }, - "overrides": null - } - }, - { - "type": "stat", - "id": 7, - "targets": [ - { - "expr": "avg_over_time((sum(changes(_telemetry_p2p_received_total{env=~\"${env}\", receiver=~\"${oracle}\"}[3m])) by (sender, receiver) \u003ebool 0)[$__range:])", - "format": "", - "legendFormat": "{{receiver}} \u003c {{sender}}", - "refId": "" - } - ], - "title": "P2P Connectivity", - "description": "Connectivity got interrupted for 60 seconds received from other nodes", - "transparent": false, - "datasource": { - "uid": "Prometheus" - }, - "gridPos": { - "h": 32, - "w": 24, - "x": 0, - "y": 128 - }, - "transformations": [ - { - "id": "renameByRegex", - "options": { - "regex": "/^(.*[\\\\\\/])/", - "renamePattern": "" - } - } - ], - "options": { - "graphMode": "line", - "colorMode": "value", - "justifyMode": "auto", - "textMode": "value_and_name", - "wideLayout": true, - "showPercentChange": false, - "reduceOptions": { - "calcs": [ - "last" - ] - }, - "text": { - "titleSize": 10, - "valueSize": 18 - }, - "percentChangeColorMode": "standard", - "orientation": "auto" - }, - "fieldConfig": { - "defaults": { - "unit": "percentunit", - "decimals": 2, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "value": null, - "color": "default" - }, - { - "value": 0, - "color": "red" - }, - { - "value": 0.8, - "color": "orange" - }, - { - "value": 0.99, - "color": "green" - } - ] - }, - "noValue": "No data" - }, - "overrides": null - } - } - ], - "templating": { - "list": [ - { - "type": "query", - "name": "env", - "label": "Environment", - "description": "", - "query": "label_values(_contract_config_f{}, env)", - "datasource": { - "uid": "Prometheus" - }, - "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] - }, - "multi": false, - "sort": 1 - }, - { - "type": "query", - "name": "contract", - "label": "Contract", - "description": "", - "query": "label_values(_contract_oracle_active{env=\"$env\"}, contract)", - "datasource": { - "uid": "Prometheus" - }, - "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] - }, - "multi": false, - "sort": 1 - }, - { - "type": "query", - "name": "oracle", - "label": "NOP", - "description": "", - "query": "label_values(_contract_oracle_active{env=\"$env\"}, oracle)", - "datasource": { - "uid": "Prometheus" - }, - "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] - }, - "multi": false, - "sort": 1 - } - ] - }, - "annotations": {} - }, - "Alerts": null, - "AlertGroups": null, - "ContactPoints": null, - "NotificationPolicies": null -} \ No newline at end of file diff --git a/observability-lib/go.mod b/observability-lib/go.mod index e03ce2f63..544957751 100644 --- a/observability-lib/go.mod +++ b/observability-lib/go.mod @@ -5,7 +5,6 @@ go 1.21.4 require ( github.com/go-resty/resty/v2 v2.15.3 github.com/grafana/grafana-foundation-sdk/go v0.0.0-20241009194022-923b32e3e69b - github.com/rs/zerolog v1.33.0 github.com/spf13/cobra v1.8.1 github.com/stretchr/testify v1.9.0 gopkg.in/yaml.v3 v3.0.1 @@ -15,12 +14,9 @@ require ( github.com/davecgh/go-spew v1.1.1 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/kr/pretty v0.3.1 // indirect - github.com/mattn/go-colorable v0.1.13 // indirect - github.com/mattn/go-isatty v0.0.20 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/rogpeppe/go-internal v1.10.0 // indirect github.com/spf13/pflag v1.0.5 // indirect golang.org/x/net v0.30.0 // indirect - golang.org/x/sys v0.26.0 // indirect gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect ) diff --git a/observability-lib/go.sum b/observability-lib/go.sum index f59de2c33..02dadb6ce 100644 --- a/observability-lib/go.sum +++ b/observability-lib/go.sum @@ -1,11 +1,9 @@ -github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/go-resty/resty/v2 v2.15.3 h1:bqff+hcqAflpiF591hhJzNdkRsFhlB96CYfBwSFvql8= github.com/go-resty/resty/v2 v2.15.3/go.mod h1:0fHAoK7JoBy/Ch36N8VFeMsK7xQOHhvWaC3iOktwmIU= -github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/grafana/grafana-foundation-sdk/go v0.0.0-20241009194022-923b32e3e69b h1:YxlugK0wL5hh86wT0hZSGw9cPTvacOUmHxjP15fsIlE= github.com/grafana/grafana-foundation-sdk/go v0.0.0-20241009194022-923b32e3e69b/go.mod h1:WtWosval1KCZP9BGa42b8aVoJmVXSg0EvQXi9LDSVZQ= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= @@ -17,22 +15,12 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= -github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= -github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= -github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= -github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= -github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= -github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= -github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= -github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg= -github.com/rs/zerolog v1.33.0 h1:1cU2KZkvPxNyfgEmhHAz/1A9Bz+llsdYzklWFzgp0r8= -github.com/rs/zerolog v1.33.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM= github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y= @@ -42,11 +30,6 @@ github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsT github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4= golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU= -golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= -golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/time v0.6.0 h1:eTDhh4ZXt5Qf0augr54TN6suAUudPcawVZeIAPU7D4U= golang.org/x/time v0.6.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/observability-lib/main.go b/observability-lib/main.go index 73a5d750d..0a1350eb7 100644 --- a/observability-lib/main.go +++ b/observability-lib/main.go @@ -1,26 +1,9 @@ package main import ( - "log" - - "github.com/spf13/cobra" - "github.com/smartcontractkit/chainlink-common/observability-lib/cmd" ) -var rootCmd = &cobra.Command{ - Use: "observability-lib", - Short: "observability-lib is a library for creating and deploying Grafana dashboards and alerts", -} - -func init() { - rootCmd.AddCommand(cmd.DeployCmd) - rootCmd.AddCommand(cmd.DeleteCmd) - rootCmd.AddCommand(cmd.GenerateCmd) -} - func main() { - if err := rootCmd.Execute(); err != nil { - log.Fatalln(err) - } + cmd.Execute() }