diff --git a/cmd/start/cmd.go b/cmd/start/cmd.go index a4a7f60b..9153d56f 100644 --- a/cmd/start/cmd.go +++ b/cmd/start/cmd.go @@ -135,6 +135,7 @@ func enqueueMissingBlocks(exportQueue types.HeightQueue, ctx *parser.Context) { lastDbBlockHeight, err := ctx.Database.GetLastBlockHeight() if err != nil { ctx.Logger.Error("failed to get last block height from database", "error", err) + logging.SignalDBOperationError() } // Get the start height, default to the config's height @@ -207,6 +208,7 @@ func mustGetLatestHeight(ctx *parser.Context) int64 { } ctx.Logger.Error("failed to get last block from rpc client", "err", err, "retry count", retryCount) + logging.SignalRPCRequestError() time.Sleep(ctx.Config.GetAvgBlockTime() * time.Duration(retryCount)) } diff --git a/logging/prometheus.go b/logging/prometheus.go index 96dea5a9..8380712f 100644 --- a/logging/prometheus.go +++ b/logging/prometheus.go @@ -1,6 +1,8 @@ package logging import ( + "fmt" + "github.com/prometheus/client_golang/prometheus" ) @@ -45,6 +47,31 @@ var DbBlockCount = prometheus.NewGaugeVec( []string{"total_blocks_in_db"}, ) +// RPC Liveness +var RpcRequestErrors = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "juno_rpc_errors_total", + Help: "Total number of errors occurred during RPC requests", + }, +) + +// Database Liveness +var DbOperationErrors = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "juno_db_errors_total", + Help: "Total number of errors occurred during database operations", + }, +) + +// Block parsing +var FetchBlockErrorCount = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "juno_block_errors_total", + Help: "Total number of errors per block", + }, + []string{"block"}, +) + // DbLatestHeight represents the Telemetry counter used to track the last indexed height in the database var DbLatestHeight = prometheus.NewGaugeVec( prometheus.GaugeOpts{ @@ -54,34 +81,32 @@ var DbLatestHeight = prometheus.NewGaugeVec( []string{"db_latest_height"}, ) -func init() { - err := prometheus.Register(StartHeight) - if err != nil { - panic(err) - } - - err = prometheus.Register(WorkerCount) - if err != nil { - panic(err) - } - - err = prometheus.Register(WorkerHeight) - if err != nil { - panic(err) - } +// SignalRPCRequestError signal that a new rpc request error occurred +func SignalRPCRequestError() { + RpcRequestErrors.Inc() +} - err = prometheus.Register(ErrorCount) - if err != nil { - panic(err) - } +// SignalDBOperationError signal that a new error occurred while interacting +// with the database +func SignalDBOperationError() { + DbOperationErrors.Inc() +} - err = prometheus.Register(DbBlockCount) - if err != nil { - panic(err) - } +// SignalBlockError increments the error counter for the given block +func SignalBlockError(blockHeight int64) { + blockStr := fmt.Sprintf("%d", blockHeight) + FetchBlockErrorCount.WithLabelValues(blockStr).Inc() + prometheus.MustRegister() +} - err = prometheus.Register(DbLatestHeight) - if err != nil { - panic(err) - } +func init() { + prometheus.MustRegister(StartHeight) + prometheus.MustRegister(WorkerCount) + prometheus.MustRegister(WorkerHeight) + prometheus.MustRegister(ErrorCount) + prometheus.MustRegister(DbBlockCount) + prometheus.MustRegister(DbLatestHeight) + prometheus.MustRegister(RpcRequestErrors) + prometheus.MustRegister(DbOperationErrors) + prometheus.MustRegister(FetchBlockErrorCount) } diff --git a/parser/worker.go b/parser/worker.go index 49cba0bf..2099b445 100644 --- a/parser/worker.go +++ b/parser/worker.go @@ -78,6 +78,9 @@ func (w Worker) Start() { err = w.ProcessIfNotExists(i.Height) if err != nil { go func() { + // Signal that an error occurred while processing this block + logging.SignalBlockError(i.Height) + // Build the block with the updated retry count and log the error newBlock := i.IncrementRetryCount() w.logger.Debug("re-enqueuing failed block", "height", i.Height, "err", err, "count", newBlock.RetryCount)