From 9c7c758bda941bc959b8c953f44843fb2b3887bc Mon Sep 17 00:00:00 2001 From: Dmytro Date: Tue, 7 Nov 2023 15:50:36 +0300 Subject: [PATCH] added snapshot sync diagnostic information, updated diagnostic channel (#8645) --- cmd/devnet/main.go | 5 +-- diagnostics/diagnostic.go | 54 ++++++++++++++++++++++++++++++ diagnostics/peers.go | 4 +-- diagnostics/setup.go | 4 +++ diagnostics/snapshot_sync.go | 18 ++++++++++ erigon-lib/diagnostics/entities.go | 19 +++++++++++ erigon-lib/diagnostics/provider.go | 1 - turbo/snapshotsync/snapshotsync.go | 38 +++++++++++++++++++++ 8 files changed, 138 insertions(+), 5 deletions(-) create mode 100644 diagnostics/diagnostic.go create mode 100644 diagnostics/snapshot_sync.go diff --git a/cmd/devnet/main.go b/cmd/devnet/main.go index 8d47c09a3ea..d241040c09a 100644 --- a/cmd/devnet/main.go +++ b/cmd/devnet/main.go @@ -2,8 +2,6 @@ package main import ( "fmt" - "github.com/ledgerwatch/erigon/cmd/devnet/services" - "github.com/ledgerwatch/erigon/cmd/devnet/services/polygon" "os" "os/signal" "path/filepath" @@ -12,6 +10,9 @@ import ( "syscall" "time" + "github.com/ledgerwatch/erigon/cmd/devnet/services" + "github.com/ledgerwatch/erigon/cmd/devnet/services/polygon" + "github.com/ledgerwatch/erigon-lib/chain/networkname" "github.com/ledgerwatch/erigon-lib/common/metrics" "github.com/ledgerwatch/erigon/cmd/devnet/accounts" diff --git a/diagnostics/diagnostic.go b/diagnostics/diagnostic.go new file mode 100644 index 00000000000..67b18d53783 --- /dev/null +++ b/diagnostics/diagnostic.go @@ -0,0 +1,54 @@ +package diagnostics + +import ( + "context" + "net/http" + + "github.com/ledgerwatch/erigon-lib/common" + diaglib "github.com/ledgerwatch/erigon-lib/diagnostics" + "github.com/ledgerwatch/erigon/turbo/node" + "github.com/ledgerwatch/log/v3" + "github.com/urfave/cli/v2" +) + +type DiagnosticClient struct { + ctx *cli.Context + metricsMux *http.ServeMux + node *node.ErigonNode + + snapshotDownload map[string]diaglib.DownloadStatistics +} + +func NewDiagnosticClient(ctx *cli.Context, metricsMux *http.ServeMux, node *node.ErigonNode) *DiagnosticClient { + return &DiagnosticClient{ctx: ctx, metricsMux: metricsMux, node: node, snapshotDownload: map[string]diaglib.DownloadStatistics{}} +} + +func (d *DiagnosticClient) Setup() { + d.runSnapshotListener() +} + +func (d *DiagnosticClient) runSnapshotListener() { + go func() { + ctx, ch, _ /*cancel*/ := diaglib.Context[diaglib.DownloadStatistics](context.Background(), 1) + + rootCtx, _ := common.RootContext() + + diaglib.StartProviders(ctx, diaglib.TypeOf(diaglib.DownloadStatistics{}), log.Root()) + for { + select { + case <-rootCtx.Done(): + return + case info := <-ch: + d.snapshotDownload[info.StagePrefix] = info + if info.DownloadFinished { + return + } + } + } + + }() +} + +func (d *DiagnosticClient) SnapshotDownload() map[string]diaglib.DownloadStatistics { + return d.snapshotDownload +} diff --git a/diagnostics/peers.go b/diagnostics/peers.go index 260c60b3456..e65e3713d2f 100644 --- a/diagnostics/peers.go +++ b/diagnostics/peers.go @@ -36,11 +36,11 @@ type PeerResponse struct { Protocols map[string]interface{} `json:"protocols"` // Sub-protocol specific metadata fields } -func SetupPeersAccess(ctx *cli.Context, metricsMux *http.ServeMux, node *node.ErigonNode) { +func SetupPeersAccess(ctxclient *cli.Context, metricsMux *http.ServeMux, node *node.ErigonNode) { metricsMux.HandleFunc("/peers", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Access-Control-Allow-Origin", "*") w.Header().Set("Content-Type", "application/json") - writePeers(w, ctx, node) + writePeers(w, ctxclient, node) }) } diff --git a/diagnostics/setup.go b/diagnostics/setup.go index 022e7eb7e56..44fc74570fc 100644 --- a/diagnostics/setup.go +++ b/diagnostics/setup.go @@ -11,6 +11,9 @@ import ( func Setup(ctx *cli.Context, metricsMux *http.ServeMux, node *node.ErigonNode) { debugMux := http.NewServeMux() + diagnostic := NewDiagnosticClient(ctx, debugMux, node) + diagnostic.Setup() + metricsMux.HandleFunc("/debug/", func(w http.ResponseWriter, r *http.Request) { r.URL.Path = strings.TrimPrefix(r.URL.Path, "/debug") r.URL.RawPath = strings.TrimPrefix(r.URL.RawPath, "/debug") @@ -27,5 +30,6 @@ func Setup(ctx *cli.Context, metricsMux *http.ServeMux, node *node.ErigonNode) { SetupNodeInfoAccess(debugMux, node) SetupPeersAccess(ctx, debugMux, node) SetupBootnodesAccess(debugMux, node) + SetupStagesAccess(debugMux, diagnostic) } diff --git a/diagnostics/snapshot_sync.go b/diagnostics/snapshot_sync.go new file mode 100644 index 00000000000..66bb2a8a392 --- /dev/null +++ b/diagnostics/snapshot_sync.go @@ -0,0 +1,18 @@ +package diagnostics + +import ( + "encoding/json" + "net/http" +) + +func SetupStagesAccess(metricsMux *http.ServeMux, diag *DiagnosticClient) { + metricsMux.HandleFunc("/snapshot-sync", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Access-Control-Allow-Origin", "*") + w.Header().Set("Content-Type", "application/json") + writeStages(w, diag) + }) +} + +func writeStages(w http.ResponseWriter, diag *DiagnosticClient) { + json.NewEncoder(w).Encode(diag.SnapshotDownload()) +} diff --git a/erigon-lib/diagnostics/entities.go b/erigon-lib/diagnostics/entities.go index 9b03d7813f2..d8b8172fa14 100644 --- a/erigon-lib/diagnostics/entities.go +++ b/erigon-lib/diagnostics/entities.go @@ -28,3 +28,22 @@ type PeerStatistics struct { TypeBytesIn map[string]uint64 TypeBytesOut map[string]uint64 } + +type DownloadStatistics struct { + Downloaded uint64 `json:"downloaded"` + Total uint64 `json:"total"` + TotalTime float64 `json:"totalTime"` + DownloadRate uint64 `json:"downloadRate"` + UploadRate uint64 `json:"uploadRate"` + Peers int32 `json:"peers"` + Files int32 `json:"files"` + Connections uint64 `json:"connections"` + Alloc uint64 `json:"alloc"` + Sys uint64 `json:"sys"` + DownloadFinished bool `json:"downloadFinished"` + StagePrefix string `json:"stagePrefix"` +} + +func (ti DownloadStatistics) Type() Type { + return TypeOf(ti) +} diff --git a/erigon-lib/diagnostics/provider.go b/erigon-lib/diagnostics/provider.go index 2982df16338..ef9b3f045f5 100644 --- a/erigon-lib/diagnostics/provider.go +++ b/erigon-lib/diagnostics/provider.go @@ -103,7 +103,6 @@ func StartProviders(ctx context.Context, infoType Type, logger log.Logger) { providerMutex.Lock() reg := providers[infoType] - if reg == nil { reg = ®istry{} providers[infoType] = reg diff --git a/turbo/snapshotsync/snapshotsync.go b/turbo/snapshotsync/snapshotsync.go index 2298ba8ed79..b1d28436c63 100644 --- a/turbo/snapshotsync/snapshotsync.go +++ b/turbo/snapshotsync/snapshotsync.go @@ -12,6 +12,7 @@ import ( "github.com/ledgerwatch/erigon-lib/chain/snapcfg" "github.com/ledgerwatch/erigon-lib/common" "github.com/ledgerwatch/erigon-lib/common/dbg" + "github.com/ledgerwatch/erigon-lib/diagnostics" "github.com/ledgerwatch/erigon-lib/downloader/downloadergrpc" "github.com/ledgerwatch/erigon-lib/downloader/snaptype" proto_downloader "github.com/ledgerwatch/erigon-lib/gointerfaces/downloader" @@ -130,6 +131,7 @@ func WaitForDownloader(logPrefix string, ctx context.Context, histV3 bool, capli // send all hashes to the Downloader service preverifiedBlockSnapshots := snapcfg.KnownCfg(cc.ChainName, []string{} /* whitelist */, snHistInDB).Preverified downloadRequest := make([]services.DownloadRequest, 0, len(preverifiedBlockSnapshots)+len(missingSnapshots)) + // build all download requests // builds preverified snapshots request for _, p := range preverifiedBlockSnapshots { @@ -182,6 +184,10 @@ func WaitForDownloader(logPrefix string, ctx context.Context, histV3 bool, capli defer logEvery.Stop() var m runtime.MemStats + /*diagnostics.RegisterProvider(diagnostics.ProviderFunc(func(ctx context.Context) error { + return nil + }), diagnostics.TypeOf(diagnostics.DownloadStatistics{}), log.Root())*/ + // Check once without delay, for faster erigon re-start stats, err := snapshotDownloader.Stats(ctx, &proto_downloader.StatsRequest{}) if err == nil && stats.Completed { @@ -205,6 +211,22 @@ Loop: } } */ + + diagnostics.Send(diagnostics.DownloadStatistics{ + Downloaded: stats.BytesCompleted, + Total: stats.BytesTotal, + TotalTime: time.Since(downloadStartTime).Round(time.Second).Seconds(), + DownloadRate: stats.DownloadRate, + UploadRate: stats.UploadRate, + Peers: stats.PeersUnique, + Files: stats.FilesTotal, + Connections: stats.ConnectionsTotal, + Alloc: m.Alloc, + Sys: m.Sys, + DownloadFinished: stats.Completed, + StagePrefix: logPrefix, + }) + log.Info(fmt.Sprintf("[%s] download finished", logPrefix), "time", time.Since(downloadStartTime).String()) break Loop } else { @@ -218,6 +240,22 @@ Loop: if stats.Progress > 0 && stats.DownloadRate == 0 { suffix += " (or verifying)" } + + diagnostics.Send(diagnostics.DownloadStatistics{ + Downloaded: stats.BytesCompleted, + Total: stats.BytesTotal, + TotalTime: time.Since(downloadStartTime).Round(time.Second).Seconds(), + DownloadRate: stats.DownloadRate, + UploadRate: stats.UploadRate, + Peers: stats.PeersUnique, + Files: stats.FilesTotal, + Connections: stats.ConnectionsTotal, + Alloc: m.Alloc, + Sys: m.Sys, + DownloadFinished: stats.Completed, + StagePrefix: logPrefix, + }) + log.Info(fmt.Sprintf("[%s] %s", logPrefix, suffix), "progress", fmt.Sprintf("%.2f%% %s/%s", stats.Progress, common.ByteCount(stats.BytesCompleted), common.ByteCount(stats.BytesTotal)), "time-left", downloadTimeLeft,