diff --git a/charts/chainlink-cluster/README.md b/charts/chainlink-cluster/README.md index 0fbbd5d16df..a0a4b8a78cc 100644 --- a/charts/chainlink-cluster/README.md +++ b/charts/chainlink-cluster/README.md @@ -151,6 +151,9 @@ We are using [Grabana](https://github.com/K-Phoen/grabana) lib to create dashboa You can also select dashboard platform in `INFRA_PLATFORM` either `kubernetes` or `docker` +You can select the dashboard panels with `PANELS_INCLUDED` which is a list of panel names separated by comma +If you don't specify it will include core panels by default + ``` export LOKI_TENANT_ID=promtail export LOKI_URL=... @@ -159,7 +162,7 @@ export GRAFANA_TOKEN=... export PROMETHEUS_DATA_SOURCE_NAME=Thanos export LOKI_DATA_SOURCE_NAME=Loki export INFRA_PLATFORM=kubernetes -export GRAFANA_FOLDER=CRIB +export GRAFANA_FOLDER=DashboardCoreDebug export DASHBOARD_NAME=CL-Cluster devspace run dashboard_deploy diff --git a/charts/chainlink-cluster/dashboard/cmd/delete.go b/charts/chainlink-cluster/dashboard/cmd/delete.go new file mode 100644 index 00000000000..45b4b11d67f --- /dev/null +++ b/charts/chainlink-cluster/dashboard/cmd/delete.go @@ -0,0 +1,19 @@ +package main + +import ( + lib "github.com/smartcontractkit/chainlink/dashboard-lib" +) + +func main() { + cfg := lib.ReadEnvDeployOpts() + db := lib.NewDashboard(cfg.Name, cfg, nil) + err := db.Delete() + if err != nil { + lib.L.Fatal().Err(err).Msg("failed to delete the dashboard") + } + lib.L.Info(). + Str("Name", db.Name). + Str("GrafanaURL", db.DeployOpts.GrafanaURL). + Str("GrafanaFolder", db.DeployOpts.GrafanaFolder). + Msg("Dashboard deleted") +} diff --git a/charts/chainlink-cluster/dashboard/cmd/deploy.go b/charts/chainlink-cluster/dashboard/cmd/deploy.go index 883c1939a6b..24c3af4589b 100644 --- a/charts/chainlink-cluster/dashboard/cmd/deploy.go +++ b/charts/chainlink-cluster/dashboard/cmd/deploy.go @@ -3,31 +3,48 @@ package main import ( "github.com/K-Phoen/grabana/dashboard" lib "github.com/smartcontractkit/chainlink/dashboard-lib" + atlas_don "github.com/smartcontractkit/chainlink/dashboard-lib/atlas-don" core_don "github.com/smartcontractkit/chainlink/dashboard-lib/core-don" k8spods "github.com/smartcontractkit/chainlink/dashboard-lib/k8s-pods" waspdb "github.com/smartcontractkit/wasp/dashboard" -) - -const ( - DashboardName = "Chainlink Cluster (DON)" + "strings" ) func main() { cfg := lib.ReadEnvDeployOpts() - db := lib.NewDashboard(DashboardName, cfg, + db := lib.NewDashboard(cfg.Name, cfg, []dashboard.Option{ dashboard.AutoRefresh("10s"), dashboard.Tags([]string{"generated"}), }, ) - db.Add( - core_don.New( - core_don.Props{ - PrometheusDataSource: cfg.DataSources.Prometheus, - PlatformOpts: core_don.PlatformPanelOpts(cfg.Platform), - }, - ), - ) + if len(cfg.PanelsIncluded) == 0 || cfg.PanelsIncluded["core"] { + db.Add( + core_don.New( + core_don.Props{ + PrometheusDataSource: cfg.DataSources.Prometheus, + PlatformOpts: core_don.PlatformPanelOpts(cfg.Platform), + }, + ), + ) + // TODO: refactor as a component later + addWASPRows(db, cfg) + } + if cfg.PanelsIncluded["ocr"] || cfg.PanelsIncluded["ocr2"] || cfg.PanelsIncluded["ocr3"] { + for key := range cfg.PanelsIncluded { + if strings.Contains(key, "ocr") { + db.Add( + atlas_don.New( + atlas_don.Props{ + PrometheusDataSource: cfg.DataSources.Prometheus, + PlatformOpts: atlas_don.PlatformPanelOpts(cfg.Platform, key), + OcrVersion: key, + }, + ), + ) + } + } + } if cfg.Platform == "kubernetes" { db.Add( k8spods.New( @@ -38,13 +55,11 @@ func main() { ), ) } - // TODO: refactor as a component later - addWASPRows(db, cfg) if err := db.Deploy(); err != nil { lib.L.Fatal().Err(err).Msg("failed to deploy the dashboard") } lib.L.Info(). - Str("Name", DashboardName). + Str("Name", db.Name). Str("GrafanaURL", db.DeployOpts.GrafanaURL). Str("GrafanaFolder", db.DeployOpts.GrafanaFolder). Msg("Dashboard deployed") diff --git a/dashboard-lib/atlas-don/component.go b/dashboard-lib/atlas-don/component.go new file mode 100644 index 00000000000..39218c7aea8 --- /dev/null +++ b/dashboard-lib/atlas-don/component.go @@ -0,0 +1,611 @@ +package atlas_don + +import ( + "fmt" + "github.com/K-Phoen/grabana/dashboard" + "github.com/K-Phoen/grabana/row" + "github.com/K-Phoen/grabana/stat" + "github.com/K-Phoen/grabana/target/prometheus" + "github.com/K-Phoen/grabana/timeseries" + "github.com/K-Phoen/grabana/timeseries/axis" + "github.com/K-Phoen/grabana/variable/query" +) + +type Props struct { + PrometheusDataSource string + PlatformOpts PlatformOpts + OcrVersion string +} + +func vars(p Props) []dashboard.Option { + variableFeedId := "feed_id" + if p.OcrVersion == "ocr3" { + variableFeedId = "feed_id_name" + } + + variableQueryContract := dashboard.VariableAsQuery( + "contract", + query.DataSource(p.PrometheusDataSource), + query.Multiple(), + query.IncludeAll(), + query.Request(fmt.Sprintf(`label_values(`+p.OcrVersion+`_contract_config_f{job="$job"}, %s)`, "contract")), + query.Sort(query.NumericalAsc), + ) + + variableQueryFeedId := dashboard.VariableAsQuery( + variableFeedId, + query.DataSource(p.PrometheusDataSource), + query.Multiple(), + query.IncludeAll(), + query.Request(fmt.Sprintf(`label_values(`+p.OcrVersion+`_contract_config_f{job="$job", contract="$contract"}, %s)`, variableFeedId)), + query.Sort(query.NumericalAsc), + ) + + variables := []dashboard.Option{ + variableQueryContract, + } + + switch p.OcrVersion { + case "ocr": + break + case "ocr2": + variables = append(variables, variableQueryFeedId) + break + case "ocr3": + variables = append(variables, variableQueryFeedId) + break + } + + return variables +} + +func summary(p Props) []dashboard.Option { + return []dashboard.Option{ + dashboard.Row("Summary", + row.WithStat( + "Telemetry Down", + stat.DataSource(p.PrometheusDataSource), + stat.Text(stat.TextName), + stat.Description("Which jobs are not receiving any telemetry?"), + stat.Orientation(stat.OrientationHorizontal), + stat.TitleFontSize(12), + stat.ValueFontSize(12), + stat.Span(4), + stat.WithPrometheusTarget( + `bool:`+p.OcrVersion+`_telemetry_down{`+p.PlatformOpts.LabelQuery+`} == 1`, + prometheus.Legend("{{job}} | {{report_type}}"), + ), + stat.AbsoluteThresholds([]stat.ThresholdStep{ + {Color: "#008000", Value: float64Ptr(0.0)}, + {Color: "#FF0000", Value: float64Ptr(1.0)}, + }), + ), + row.WithStat( + "Oracles Down", + stat.DataSource(p.PrometheusDataSource), + stat.Text(stat.TextName), + stat.Description("Which NOPs are not providing any telemetry?"), + stat.Orientation(stat.OrientationHorizontal), + stat.TitleFontSize(12), + stat.ValueFontSize(12), + stat.Span(4), + stat.ValueType(stat.Last), + stat.WithPrometheusTarget( + `bool:`+p.OcrVersion+`_oracle_telemetry_down_except_telemetry_down{job=~"${job}", oracle!="csa_unknown"} == 1`, + prometheus.Legend("{{oracle}} | {{report_type}}"), + ), + stat.AbsoluteThresholds([]stat.ThresholdStep{ + {Color: "#008000", Value: float64Ptr(0.0)}, + {Color: "#FF0000", Value: float64Ptr(1.0)}, + }), + ), + row.WithStat( + "Feeds reporting failure", + stat.DataSource(p.PrometheusDataSource), + stat.Text(stat.TextName), + stat.Description("Which feeds are failing to report?"), + stat.Orientation(stat.OrientationHorizontal), + stat.TitleFontSize(12), + stat.ValueFontSize(12), + stat.Span(4), + stat.ValueType(stat.Last), + stat.WithPrometheusTarget( + `bool:`+p.OcrVersion+`_feed_reporting_failure_except_feed_telemetry_down{job=~"${job}", oracle!="csa_unknown"} == 1`, + prometheus.Legend("{{feed_id_name}} on {{job}}"), + ), + stat.AbsoluteThresholds([]stat.ThresholdStep{ + {Color: "#008000", Value: float64Ptr(0.0)}, + {Color: "#FF0000", Value: float64Ptr(1.0)}, + }), + ), + row.WithStat( + "Feed telemetry Down", + stat.DataSource(p.PrometheusDataSource), + stat.Text(stat.TextName), + stat.Description("Which feeds are not receiving any telemetry?"), + stat.Orientation(stat.OrientationHorizontal), + stat.TitleFontSize(12), + stat.ValueFontSize(12), + stat.Span(4), + stat.ValueType(stat.Last), + stat.WithPrometheusTarget( + `bool:`+p.OcrVersion+`_feed_telemetry_down_except_telemetry_down{job=~"${job}"} == 1`, + prometheus.Legend("{{feed_id_name}} on {{job}}"), + ), + stat.AbsoluteThresholds([]stat.ThresholdStep{ + {Color: "#008000", Value: float64Ptr(0.0)}, + {Color: "#FF0000", Value: float64Ptr(1.0)}, + }), + ), + row.WithStat( + "Oracles no observations", + stat.DataSource(p.PrometheusDataSource), + stat.Text(stat.TextName), + stat.Description("Which NOPs are not providing observations?"), + stat.Orientation(stat.OrientationHorizontal), + stat.TitleFontSize(12), + stat.ValueFontSize(12), + stat.Span(4), + stat.ValueType(stat.Last), + stat.WithPrometheusTarget( + `bool:`+p.OcrVersion+`_oracle_blind_except_telemetry_down{job=~"${job}"} == 1`, + prometheus.Legend("{{oracle}} | {{report_type}}"), + ), + stat.AbsoluteThresholds([]stat.ThresholdStep{ + {Color: "#008000", Value: float64Ptr(0.0)}, + {Color: "#FF0000", Value: float64Ptr(1.0)}, + }), + ), + row.WithStat( + "Oracles not contributing observations to feeds", + stat.DataSource(p.PrometheusDataSource), + stat.Text(stat.TextName), + stat.Description("Which oracles are failing to make observations on feeds they should be participating in?"), + stat.Orientation(stat.OrientationHorizontal), + stat.TitleFontSize(12), + stat.ValueFontSize(12), + stat.Span(4), + stat.ValueType(stat.Last), + stat.WithPrometheusTarget( + `bool:`+p.OcrVersion+`_oracle_feed_no_observations_except_oracle_blind_except_feed_reporting_failure_except_feed_telemetry_down{job=~"${job}"} == 1`, + prometheus.Legend("{{oracle}} | {{report_type}}"), + ), + stat.AbsoluteThresholds([]stat.ThresholdStep{ + {Color: "#008000", Value: float64Ptr(0.0)}, + {Color: "#FF0000", Value: float64Ptr(1.0)}, + }), + ), + ), + } +} + +func ocrContractConfigOracle(p Props) []dashboard.Option { + return []dashboard.Option{ + dashboard.Row("OCR Contract Oracle", + row.Collapse(), + row.WithStat( + "OCR Contract Oracle Active", + stat.DataSource(p.PrometheusDataSource), + stat.Text(stat.TextName), + stat.Description("set to one as long as an oracle is on a feed"), + stat.Orientation(stat.OrientationHorizontal), + stat.ValueFontSize(12), + stat.Span(12), + stat.WithPrometheusTarget( + `sum(`+p.OcrVersion+`_contract_oracle_active{`+p.PlatformOpts.LabelQuery+`}) by (contract, oracle)`, + prometheus.Legend("{{ contract }} - {{oracle}}"), + ), + stat.AbsoluteThresholds([]stat.ThresholdStep{ + {Color: "#FF0000", Value: float64Ptr(0.0)}, + {Color: "#008000", Value: float64Ptr(1.0)}, + }), + ), + ), + } +} + +func ocrContractConfigNodes(p Props) []dashboard.Option { + variableFeedId := "feed_id" + if p.OcrVersion == "ocr3" { + variableFeedId = "feed_id_name" + } + + var options []timeseries.Option + + options = append(options, timeseries.Span(12), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Legend(timeseries.ToTheRight), + timeseries.Axis( + axis.Min(0), + ), + ) + + switch p.OcrVersion { + case "ocr": + options = append(options, timeseries.WithPrometheusTarget( + ``+p.OcrVersion+`_contract_config_n{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{contract}}"), + )) + break + case "ocr2": + options = append(options, timeseries.WithPrometheusTarget( + ``+p.OcrVersion+`_contract_config_n{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{"+variableFeedId+"}}"), + )) + break + case "ocr3": + options = append(options, timeseries.WithPrometheusTarget( + ``+p.OcrVersion+`_telemetry_message_observe_total_nop_count{contract=~"${contract}", `+variableFeedId+`=~"${`+variableFeedId+`}", job=~"${job}"}`, + prometheus.Legend("{{"+variableFeedId+"}}"), + )) + break + } + + options = append(options, + timeseries.WithPrometheusTarget( + `avg(2 * `+p.OcrVersion+`_contract_config_r_max{`+p.PlatformOpts.LabelQuery+`} + 4)`, + prometheus.Legend("Max nodes"), + ), + timeseries.WithPrometheusTarget( + `avg(2 * `+p.OcrVersion+`_contract_config_f{`+p.PlatformOpts.LabelQuery+`} + 1)`, + prometheus.Legend("Min nodes"), + ), + ) + + return []dashboard.Option{ + dashboard.Row("DON Nodes", + row.Collapse(), + row.WithTimeSeries( + "Number of NOPs", + options..., + ), + ), + } +} + +func priceReporting(p Props) []dashboard.Option { + telemetryP2PReceivedTotal := row.WithTimeSeries( + "P2P messages received", + timeseries.Span(12), + timeseries.Height("600px"), + timeseries.Description("From an individual node's perspective, how many messages are they receiving from other nodes? Uses ocr_telemetry_p2p_received_total"), + timeseries.Axis( + axis.Min(0), + ), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.WithPrometheusTarget( + `sum by (sender, receiver) (increase(`+p.OcrVersion+`_telemetry_p2p_received_total{job=~"${job}"}[5m]))`, + prometheus.Legend("{{sender}} > {{receiver}}"), + ), + ) + + telemetryP2PReceivedTotalRate := row.WithTimeSeries( + "P2P messages received Rate", + timeseries.Span(12), + timeseries.Height("600px"), + timeseries.Description("From an individual node's perspective, how many messages are they receiving from other nodes? Uses ocr_telemetry_p2p_received_total"), + timeseries.Axis( + axis.Min(0), + ), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.WithPrometheusTarget( + `sum by (sender, receiver) (rate(`+p.OcrVersion+`_telemetry_p2p_received_total{job=~"${job}"}[5m]))`, + prometheus.Legend("{{sender}} > {{receiver}}"), + ), + ) + + telemetryObservationAsk := row.WithTimeSeries( + "Ask observation in MessageObserve sent", + timeseries.Span(12), + timeseries.Legend(timeseries.ToTheRight), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.WithPrometheusTarget( + ``+p.OcrVersion+`_telemetry_observation_ask{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{oracle}}"), + ), + ) + + telemetryObservation := row.WithTimeSeries( + "Price observation in MessageObserve sent", + timeseries.Span(12), + timeseries.Legend(timeseries.ToTheRight), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.WithPrometheusTarget( + ``+p.OcrVersion+`_telemetry_observation{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{oracle}}"), + ), + ) + + telemetryObservationBid := row.WithTimeSeries( + "Bid observation in MessageObserve sent", + timeseries.Span(12), + timeseries.Legend(timeseries.ToTheRight), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.WithPrometheusTarget( + ``+p.OcrVersion+`_telemetry_observation_bid{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{oracle}}"), + ), + ) + + telemetryMessageProposeObservationAsk := row.WithTimeSeries( + "Ask MessagePropose observations", + timeseries.Span(12), + timeseries.Legend(timeseries.ToTheRight), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.WithPrometheusTarget( + ``+p.OcrVersion+`_telemetry_message_propose_observation_ask{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{oracle}}"), + ), + ) + + telemetryMessageProposeObservation := row.WithTimeSeries( + "Price MessagePropose observations", + timeseries.Span(12), + timeseries.Legend(timeseries.ToTheRight), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.WithPrometheusTarget( + ``+p.OcrVersion+`_telemetry_message_propose_observation{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{oracle}}"), + ), + ) + + telemetryMessageProposeObservationBid := row.WithTimeSeries( + "Bid MessagePropose observations", + timeseries.Span(12), + timeseries.Legend(timeseries.ToTheRight), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.WithPrometheusTarget( + ``+p.OcrVersion+`_telemetry_message_propose_observation_bid{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{oracle}}"), + ), + ) + + telemetryMessageProposeObservationTotal := row.WithTimeSeries( + "Total number of observations included in MessagePropose", + timeseries.Span(12), + timeseries.Description("How often is a node's observation included in the report?"), + timeseries.Legend(timeseries.ToTheRight), + timeseries.Axis( + axis.Min(0), + ), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.WithPrometheusTarget( + `rate(`+p.OcrVersion+`_telemetry_message_propose_observation_total{`+p.PlatformOpts.LabelQuery+`}[5m])`, + prometheus.Legend("{{oracle}}"), + ), + ) + + telemetryMessageObserveTotal := row.WithTimeSeries( + "Total MessageObserve sent", + timeseries.Span(12), + timeseries.Description("From an individual node's perspective, how often are they sending an observation?"), + timeseries.Legend(timeseries.ToTheRight), + timeseries.Axis( + axis.Min(0), + ), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.WithPrometheusTarget( + `rate(`+p.OcrVersion+`_telemetry_message_observe_total{`+p.PlatformOpts.LabelQuery+`}[5m])`, + prometheus.Legend("{{oracle}}"), + ), + ) + + panels := []row.Option{ + row.Collapse(), + } + + switch p.OcrVersion { + case "ocr": + panels = append(panels, telemetryP2PReceivedTotal) + panels = append(panels, telemetryP2PReceivedTotalRate) + panels = append(panels, telemetryObservation) + panels = append(panels, telemetryMessageObserveTotal) + break + case "ocr2": + panels = append(panels, telemetryP2PReceivedTotal) + panels = append(panels, telemetryP2PReceivedTotalRate) + panels = append(panels, telemetryObservation) + panels = append(panels, telemetryMessageObserveTotal) + break + case "ocr3": + panels = append(panels, telemetryP2PReceivedTotal) + panels = append(panels, telemetryP2PReceivedTotalRate) + panels = append(panels, telemetryObservationAsk) + panels = append(panels, telemetryObservation) + panels = append(panels, telemetryObservationBid) + panels = append(panels, telemetryMessageProposeObservationAsk) + panels = append(panels, telemetryMessageProposeObservation) + panels = append(panels, telemetryMessageProposeObservationBid) + panels = append(panels, telemetryMessageProposeObservationTotal) + panels = append(panels, telemetryMessageObserveTotal) + break + } + + return []dashboard.Option{ + dashboard.Row("Price Reporting", panels...), + } +} + +func ocrContractConfigDelta(p Props) []dashboard.Option { + return []dashboard.Option{ + dashboard.Row("OCR Contract Config Delta", + row.Collapse(), + row.WithStat( + "relativeDeviationThreshold", + stat.DataSource(p.PrometheusDataSource), + stat.Text(stat.TextValueAndName), + stat.Orientation(stat.OrientationHorizontal), + stat.TitleFontSize(12), + stat.ValueFontSize(28), + stat.Span(4), + stat.SparkLine(), + stat.WithPrometheusTarget( + ``+p.OcrVersion+`_contract_config_alpha{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{ contract }}"), + ), + ), + row.WithStat( + "maxContractValueAgeSeconds", + stat.DataSource(p.PrometheusDataSource), + stat.Text(stat.TextValueAndName), + stat.Orientation(stat.OrientationHorizontal), + stat.TitleFontSize(12), + stat.ValueFontSize(28), + stat.Span(4), + stat.SparkLine(), + stat.WithPrometheusTarget( + ``+p.OcrVersion+`_contract_config_delta_c_seconds{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{ contract }}"), + ), + ), + row.WithStat( + "observationGracePeriodSeconds", + stat.DataSource(p.PrometheusDataSource), + stat.Text(stat.TextValueAndName), + stat.Orientation(stat.OrientationHorizontal), + stat.TitleFontSize(12), + stat.ValueFontSize(28), + stat.Span(4), + stat.SparkLine(), + stat.WithPrometheusTarget( + ``+p.OcrVersion+`_contract_config_delta_grace_seconds{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{ contract }}"), + ), + ), + row.WithStat( + "badEpochTimeoutSeconds", + stat.DataSource(p.PrometheusDataSource), + stat.Text(stat.TextValueAndName), + stat.Orientation(stat.OrientationHorizontal), + stat.TitleFontSize(12), + stat.ValueFontSize(28), + stat.Span(4), + stat.SparkLine(), + stat.WithPrometheusTarget( + ``+p.OcrVersion+`_contract_config_delta_progress_seconds{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{ contract }}"), + ), + ), + row.WithStat( + "resendIntervalSeconds", + stat.DataSource(p.PrometheusDataSource), + stat.Text(stat.TextValueAndName), + stat.Orientation(stat.OrientationHorizontal), + stat.TitleFontSize(12), + stat.ValueFontSize(28), + stat.Span(4), + stat.SparkLine(), + stat.WithPrometheusTarget( + ``+p.OcrVersion+`_contract_config_delta_resend_seconds{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{ contract }}"), + ), + ), + row.WithStat( + "roundIntervalSeconds", + stat.DataSource(p.PrometheusDataSource), + stat.Text(stat.TextValueAndName), + stat.Orientation(stat.OrientationHorizontal), + stat.TitleFontSize(12), + stat.ValueFontSize(28), + stat.Span(4), + stat.SparkLine(), + stat.WithPrometheusTarget( + ``+p.OcrVersion+`_contract_config_delta_round_seconds{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{ contract }}"), + ), + ), + row.WithStat( + "transmissionStageTimeoutSeconds", + stat.DataSource(p.PrometheusDataSource), + stat.Text(stat.TextValueAndName), + stat.Orientation(stat.OrientationHorizontal), + stat.TitleFontSize(12), + stat.ValueFontSize(28), + stat.Span(4), + stat.SparkLine(), + stat.WithPrometheusTarget( + ``+p.OcrVersion+`_contract_config_delta_stage_seconds{`+p.PlatformOpts.LabelQuery+`}`, + prometheus.Legend("{{ contract }}"), + ), + ), + ), + } +} + +func roundEpochProgression(p Props) []dashboard.Option { + variableFeedId := "feed_id" + if p.OcrVersion == "ocr3" { + variableFeedId = "feed_id_name" + } + + return []dashboard.Option{ + dashboard.Row("Round / Epoch Progression", + row.Collapse(), + row.WithTimeSeries( + "Agreed Epoch Progression", + timeseries.Span(4), + timeseries.Height("300px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit("short"), + ), + timeseries.WithPrometheusTarget( + ``+p.OcrVersion+`_telemetry_feed_agreed_epoch{`+variableFeedId+`=~"${`+variableFeedId+`}"}`, + prometheus.Legend("{{"+variableFeedId+"}}"), + ), + ), + row.WithTimeSeries( + "Round Epoch Progression", + timeseries.Span(4), + timeseries.Height("300px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit("short"), + ), + timeseries.WithPrometheusTarget( + ``+p.OcrVersion+`_telemetry_epoch_round{`+variableFeedId+`=~"${`+variableFeedId+`}"}`, + prometheus.Legend("{{oracle}}"), + ), + ), + row.WithTimeSeries( + "Rounds Started", + timeseries.Description("Tracks individual nodes firing \"new round\" message via telemetry (not part of P2P messages)"), + timeseries.Span(4), + timeseries.Height("300px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit("short"), + ), + timeseries.WithPrometheusTarget( + `rate(`+p.OcrVersion+`_telemetry_round_started_total{`+variableFeedId+`=~"${`+variableFeedId+`}"}[1m])`, + prometheus.Legend("{{oracle}}"), + ), + ), + row.WithTimeSeries( + "Telemetry Ingested", + timeseries.Span(12), + timeseries.Height("300px"), + timeseries.DataSource(p.PrometheusDataSource), + timeseries.Axis( + axis.Unit("short"), + ), + timeseries.Legend(timeseries.ToTheRight), + timeseries.WithPrometheusTarget( + `rate(`+p.OcrVersion+`_telemetry_ingested_total{`+variableFeedId+`=~"${`+variableFeedId+`}"}[1m])`, + prometheus.Legend("{{oracle}}"), + ), + ), + ), + } +} + +func New(p Props) []dashboard.Option { + opts := vars(p) + opts = append(opts, summary(p)...) + opts = append(opts, ocrContractConfigOracle(p)...) + opts = append(opts, ocrContractConfigNodes(p)...) + opts = append(opts, priceReporting(p)...) + opts = append(opts, roundEpochProgression(p)...) + opts = append(opts, ocrContractConfigDelta(p)...) + return opts +} diff --git a/dashboard-lib/atlas-don/platform.go b/dashboard-lib/atlas-don/platform.go new file mode 100644 index 00000000000..b55bab3f7ef --- /dev/null +++ b/dashboard-lib/atlas-don/platform.go @@ -0,0 +1,57 @@ +package atlas_don + +import "fmt" + +type PlatformOpts struct { + // Platform is infrastructure deployment platform: docker or k8s + Platform string + LabelFilters map[string]string + LabelFilter string + LegendString string + LabelQuery string +} + +// PlatformPanelOpts generate different queries for "docker" and "k8s" deployment platforms +func PlatformPanelOpts(platform string, ocrVersion string) PlatformOpts { + po := PlatformOpts{ + LabelFilters: map[string]string{ + "contract": `=~"${contract}"`, + }, + } + + variableFeedId := "feed_id" + if ocrVersion == "ocr3" { + variableFeedId = "feed_id_name" + } + + switch ocrVersion { + case "ocr": + break + case "ocr2": + po.LabelFilters[variableFeedId] = `=~"${` + variableFeedId + `}"` + break + case "ocr3": + po.LabelFilters[variableFeedId] = `=~"${` + variableFeedId + `}"` + break + } + switch platform { + case "kubernetes": + po.LabelFilters["namespace"] = `=~"${namespace}"` + po.LabelFilters["job"] = `=~"${job}"` + po.LabelFilters["pod"] = `=~"${pod}"` + po.LabelFilter = "job" + po.LegendString = "pod" + break + case "docker": + po.LabelFilters["instance"] = `=~"${instance}"` + po.LabelFilter = "instance" + po.LegendString = "instance" + break + default: + panic(fmt.Sprintf("failed to generate Platform dependent queries, unknown platform: %s", platform)) + } + for key, value := range po.LabelFilters { + po.LabelQuery += key + value + ", " + } + return po +} diff --git a/dashboard-lib/atlas-don/utils.go b/dashboard-lib/atlas-don/utils.go new file mode 100644 index 00000000000..03221905493 --- /dev/null +++ b/dashboard-lib/atlas-don/utils.go @@ -0,0 +1,5 @@ +package atlas_don + +func float64Ptr(input float64) *float64 { + return &input +} diff --git a/dashboard-lib/config.go b/dashboard-lib/config.go index a2bbd3174a6..2e0b9cad993 100644 --- a/dashboard-lib/config.go +++ b/dashboard-lib/config.go @@ -8,6 +8,7 @@ import ( ) type EnvConfig struct { + Name string Platform string GrafanaURL string GrafanaToken string @@ -15,6 +16,7 @@ type EnvConfig struct { GrafanaBasicAuthPassword string GrafanaFolder string DataSources DataSources + PanelsIncluded map[string]bool } type DataSources struct { @@ -60,6 +62,16 @@ func ReadEnvDeployOpts() EnvConfig { if prometheusDataSourceName == "" { L.Fatal().Msg("PROMETHEUS_DATA_SOURCE_NAME must be provided") } + panelsIncludedString := os.Getenv("PANELS_INCLUDED") + panelsIncludedArray := strings.Split(panelsIncludedString, ",") + panelsIncluded := make(map[string]bool) + + if panelsIncludedString != "" { + for _, panelName := range panelsIncludedArray { + panelsIncluded[panelName] = true + } + } + ba := os.Getenv("GRAFANA_BASIC_AUTH") grafanaToken := os.Getenv("GRAFANA_TOKEN") if grafanaToken == "" && ba == "" { @@ -73,7 +85,9 @@ func ReadEnvDeployOpts() EnvConfig { L.Fatal().Err(err).Msg("failed to decode basic auth") } } + return EnvConfig{ + Name: name, GrafanaURL: grafanaURL, GrafanaToken: grafanaToken, GrafanaBasicAuthUser: user, @@ -84,6 +98,7 @@ func ReadEnvDeployOpts() EnvConfig { Loki: loki, Prometheus: prom, }, + PanelsIncluded: panelsIncluded, } } diff --git a/dashboard-lib/dashboard.go b/dashboard-lib/dashboard.go index 33371a6163a..70892586bb0 100644 --- a/dashboard-lib/dashboard.go +++ b/dashboard-lib/dashboard.go @@ -3,6 +3,7 @@ package dashboard_lib import ( "context" "encoding/json" + "fmt" "github.com/K-Phoen/grabana" "github.com/K-Phoen/grabana/dashboard" "github.com/pkg/errors" @@ -72,6 +73,36 @@ func (m *Dashboard) AddSDKPanel(panel map[string]interface{}) { m.SDKPanels = append(m.SDKPanels, panel) } +func (m *Dashboard) Delete() error { + ctx := context.Background() + var client *grabana.Client + if m.DeployOpts.GrafanaBasicAuthUser != "" && m.DeployOpts.GrafanaBasicAuthPassword != "" { + L.Info().Msg("Authorizing using BasicAuth") + client = grabana.NewClient( + &http.Client{}, + m.DeployOpts.GrafanaURL, + grabana.WithBasicAuth(m.DeployOpts.GrafanaBasicAuthUser, m.DeployOpts.GrafanaBasicAuthPassword), + ) + } else { + L.Info().Msg("Authorizing using Bearer token") + client = grabana.NewClient( + &http.Client{}, + m.DeployOpts.GrafanaURL, + grabana.WithAPIToken(m.DeployOpts.GrafanaToken), + ) + } + db, err := client.GetDashboardByTitle(ctx, m.Name) + if err != nil { + return errors.Wrap(err, "failed to get the dashboard") + } + fmt.Println(db.UID) + errDelete := client.DeleteDashboard(ctx, db.UID) + if errDelete != nil { + return errors.Wrap(errDelete, "failed to delete the dashboard") + } + return nil +} + func (m *Dashboard) build() (dashboard.Builder, error) { b, err := dashboard.New( m.Name, diff --git a/dashboard-lib/k8s-pods/component.go b/dashboard-lib/k8s-pods/component.go index 4ef90c3012f..df9a6ac6a69 100644 --- a/dashboard-lib/k8s-pods/component.go +++ b/dashboard-lib/k8s-pods/component.go @@ -26,12 +26,20 @@ func vars(p Props) []dashboard.Option { query.Request("label_values(namespace)"), query.Sort(query.NumericalAsc), ), + dashboard.VariableAsQuery( + "job", + query.DataSource(p.PrometheusDataSource), + query.Multiple(), + query.IncludeAll(), + query.Request(`label_values(up{namespace="$namespace"}, job)`), + query.Sort(query.NumericalAsc), + ), dashboard.VariableAsQuery( "pod", query.DataSource(p.PrometheusDataSource), query.Multiple(), query.IncludeAll(), - query.Request("label_values(kube_pod_container_info{namespace=\"$namespace\"}, pod)"), + query.Request(`label_values(up{namespace="$namespace", job="$job"}, pod)`), query.Sort(query.NumericalAsc), ), } diff --git a/sonar-project.properties b/sonar-project.properties index 616d7883e19..b0e773b1048 100644 --- a/sonar-project.properties +++ b/sonar-project.properties @@ -58,8 +58,8 @@ sonar.cpd.exclusions=\ **/integration-tests/contracts/ethereum_keeper_contracts.go,\ integration-tests/contracts/ethereum_contracts_seth.go,\ integration-tests/contracts/ethereum_contracts_seth.go,\ -integration-tests/actions/seth/actions.go -dashboard/** +integration-tests/actions/seth/actions.go,\ +dashboard-lib/** # Tests' root folder, inclusions (tests to check and count) and exclusions sonar.tests=.