diff --git a/chain/metric.go b/chain/metric.go new file mode 100644 index 000000000..8c9a764d4 --- /dev/null +++ b/chain/metric.go @@ -0,0 +1,12 @@ +// Copyright (c) 2024 The VeChainThor developers +// +// Distributed under the GNU Lesser General Public License v3.0 software license, see the accompanying +// file LICENSE or + +package chain + +import "github.com/vechain/thor/v2/metrics" + +var ( + metricCacheHitMiss = metrics.LazyLoadCounterVec("repo_cache_hit_miss_count", []string{"type", "event"}) +) diff --git a/chain/repository.go b/chain/repository.go index 2460d6a3c..81b1f6af4 100644 --- a/chain/repository.go +++ b/chain/repository.go @@ -322,23 +322,29 @@ func (r *Repository) GetMaxBlockNum() (uint32, error) { // GetBlockSummary get block summary by block id. func (r *Repository) GetBlockSummary(id thor.Bytes32) (summary *BlockSummary, err error) { - var cached interface{} - if cached, err = r.caches.summaries.GetOrLoad(id, func() (interface{}, error) { + var blk interface{} + result := "hit" + if blk, err = r.caches.summaries.GetOrLoad(id, func() (interface{}, error) { + result = "miss" return loadBlockSummary(r.data, id) }); err != nil { return } - return cached.(*BlockSummary), nil + metricCacheHitMiss().AddWithLabel(1, map[string]string{"type": "blocks", "event": result}) + return blk.(*BlockSummary), nil } func (r *Repository) getTransaction(key txKey) (*tx.Transaction, error) { - cached, err := r.caches.txs.GetOrLoad(key, func() (interface{}, error) { + result := "hit" + trx, err := r.caches.txs.GetOrLoad(key, func() (interface{}, error) { + result = "miss" return loadTransaction(r.data, key) }) if err != nil { return nil, err } - return cached.(*tx.Transaction), nil + metricCacheHitMiss().AddWithLabel(1, map[string]string{"type": "transaction", "event": result}) + return trx.(*tx.Transaction), nil } // GetBlockTransactions get all transactions of the block for given block id. @@ -377,13 +383,17 @@ func (r *Repository) GetBlock(id thor.Bytes32) (*block.Block, error) { } func (r *Repository) getReceipt(key txKey) (*tx.Receipt, error) { - cached, err := r.caches.receipts.GetOrLoad(key, func() (interface{}, error) { + result := "hit" + receipt, err := r.caches.receipts.GetOrLoad(key, func() (interface{}, error) { + result = "miss" return loadReceipt(r.data, key) }) if err != nil { return nil, err } - return cached.(*tx.Receipt), nil + metricCacheHitMiss().AddWithLabel(1, map[string]string{"type": "receipt", "event": result}) + + return receipt.(*tx.Receipt), nil } // GetBlockReceipts get all tx receipts of the block for given block id. diff --git a/metrics/noop.go b/metrics/noop.go index 6eb909ff9..a9e24ab2c 100644 --- a/metrics/noop.go +++ b/metrics/noop.go @@ -5,7 +5,9 @@ package metrics -import "net/http" +import ( + "net/http" +) // noopMetrics implements a no operations metrics service type noopMetrics struct{} diff --git a/metrics/prometheus.go b/metrics/prometheus.go index 50745752c..15447f6dc 100644 --- a/metrics/prometheus.go +++ b/metrics/prometheus.go @@ -6,8 +6,15 @@ package metrics import ( + "bufio" + "fmt" "net/http" + "os" + "runtime" + "strconv" + "strings" "sync" + "time" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" @@ -24,6 +31,8 @@ func InitializePrometheusMetrics() { // don't allow for reset if _, ok := metrics.(*prometheusMetrics); !ok { metrics = newPrometheusMetrics() + // collection disk io metrics every 5 seconds + go metrics.(*prometheusMetrics).collectDiskIO(5 * time.Second) } } @@ -123,6 +132,62 @@ func (o *prometheusMetrics) GetOrCreateGaugeVecMeter(name string, labels []strin return meter } +func getIOLineValue(line string) int64 { + fields := strings.Fields(line) + if len(fields) != 2 { + logger.Warn("this io file line is malformed", "err", line) + return 0 + } + value, err := strconv.ParseInt(fields[1], 10, 64) + if err != nil { + logger.Warn("unable to parse int", "err", err) + return 0 + } + + return value +} + +func getDiskIOData() (int64, int64, error) { + pid := os.Getpid() + ioFilePath := fmt.Sprintf("/proc/%d/io", pid) + file, err := os.Open(ioFilePath) + if err != nil { + return 0, 0, err + } + + // Parse the file line by line + scanner := bufio.NewScanner(file) + var reads, writes int64 + for scanner.Scan() { + line := scanner.Text() + if strings.HasPrefix(line, "syscr") { + reads = getIOLineValue(line) + } else if strings.HasPrefix(line, "syscw") { + writes = getIOLineValue(line) + } + } + + return reads, writes, nil +} + +func (o *prometheusMetrics) collectDiskIO(refresh time.Duration) { + if runtime.GOOS != "linux" { + return + } + for { + reads, writes, err := getDiskIOData() + if err == nil { + readsMeter := o.GetOrCreateGaugeMeter("disk_reads") + readsMeter.Set(reads) + + writesMeter := o.GetOrCreateGaugeMeter("disk_writes") + writesMeter.Set(writes) + } + + time.Sleep(refresh) + } +} + func (o *prometheusMetrics) newHistogramMeter(name string, buckets []int64) HistogramMeter { var floatBuckets []float64 for _, bucket := range buckets { diff --git a/muxdb/internal/trie/cache.go b/muxdb/internal/trie/cache.go index cc7bca300..d7d78aaae 100644 --- a/muxdb/internal/trie/cache.go +++ b/muxdb/internal/trie/cache.go @@ -45,12 +45,16 @@ func (c *Cache) log() { last := atomic.SwapInt64(&c.lastLogTime, now) if now-last > int64(time.Second*20) { - log1, ok1 := c.nodeStats.ShouldLog("node cache stats") - log2, ok2 := c.rootStats.ShouldLog("root cache stats") - - if ok1 || ok2 { - log1() - log2() + logNode, hitNode, missNode, okNode := c.nodeStats.shouldLog("node cache stats") + logRoot, hitRoot, missRoot, okRoot := c.rootStats.shouldLog("root cache stats") + + if okNode || okRoot { + logNode() + metricCacheHitMissGaugeVec().SetWithLabel(hitNode, map[string]string{"type": "node", "event": "hit"}) + metricCacheHitMissGaugeVec().SetWithLabel(missNode, map[string]string{"type": "node", "event": "miss"}) + logRoot() + metricCacheHitMissGaugeVec().SetWithLabel(hitRoot, map[string]string{"type": "root", "event": "hit"}) + metricCacheHitMissGaugeVec().SetWithLabel(missRoot, map[string]string{"type": "root", "event": "miss"}) } } else { atomic.CompareAndSwapInt64(&c.lastLogTime, now, last) @@ -189,7 +193,7 @@ type cacheStats struct { func (cs *cacheStats) Hit() int64 { return atomic.AddInt64(&cs.hit, 1) } func (cs *cacheStats) Miss() int64 { return atomic.AddInt64(&cs.miss, 1) } -func (cs *cacheStats) ShouldLog(msg string) (func(), bool) { +func (cs *cacheStats) shouldLog(msg string) (func(), int64, int64, bool) { hit := atomic.LoadInt64(&cs.hit) miss := atomic.LoadInt64(&cs.miss) lookups := hit + miss @@ -209,5 +213,5 @@ func (cs *cacheStats) ShouldLog(msg string) (func(), bool) { "hitrate", str, ) atomic.StoreInt32(&cs.flag, flag) - }, atomic.LoadInt32(&cs.flag) != flag + }, hit, miss, atomic.LoadInt32(&cs.flag) != flag } diff --git a/muxdb/internal/trie/metrics.go b/muxdb/internal/trie/metrics.go new file mode 100644 index 000000000..6db862df9 --- /dev/null +++ b/muxdb/internal/trie/metrics.go @@ -0,0 +1,12 @@ +// Copyright (c) 2024 The VeChainThor developers +// +// Distributed under the GNU Lesser General Public License v3.0 software license, see the accompanying +// file LICENSE or + +package trie + +import ( + "github.com/vechain/thor/v2/metrics" +) + +var metricCacheHitMissGaugeVec = metrics.LazyLoadGaugeVec("cache_hit_miss_count", []string{"type", "event"})