Skip to content

Commit

Permalink
Add a scraper to scrape disk usage metrics (#612)
Browse files Browse the repository at this point in the history
Signed-off-by: Nghia Tran <[email protected]>
  • Loading branch information
tcnghia authored Oct 28, 2024
1 parent 2450811 commit 97b27ad
Show file tree
Hide file tree
Showing 5 changed files with 162 additions and 0 deletions.
3 changes: 3 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ require (
github.com/google/go-github/v61 v61.0.0
github.com/prometheus/client_golang v1.20.5
github.com/sethvargo/go-envconfig v1.1.0
github.com/shirou/gopsutil v3.21.11+incompatible
github.com/snabb/httpreaderat v1.0.1
go.opentelemetry.io/contrib/detectors/gcp v1.31.0
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.56.0
Expand Down Expand Up @@ -69,6 +70,7 @@ require (
github.com/go-jose/go-jose/v4 v4.0.4 // indirect
github.com/go-logr/logr v1.4.2 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
github.com/go-ole/go-ole v1.2.6 // indirect
github.com/goccy/go-json v0.10.2 // indirect
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
github.com/google/flatbuffers v23.5.26+incompatible // indirect
Expand Down Expand Up @@ -102,6 +104,7 @@ require (
github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 // indirect
github.com/skeema/knownhosts v1.2.2 // indirect
github.com/xanzy/ssh-agent v0.3.3 // indirect
github.com/yusufpapurcu/wmi v1.2.4 // indirect
github.com/zeebo/xxh3 v1.0.2 // indirect
go.opencensus.io v0.24.0 // indirect
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.56.0 // indirect
Expand Down
7 changes: 7 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,8 @@ github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY=
github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
Expand Down Expand Up @@ -297,6 +299,8 @@ github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 h1:n661drycOFuPLCN
github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3/go.mod h1:A0bzQcvG0E7Rwjx0REVgAGH58e96+X0MeOfepqsbeW4=
github.com/sethvargo/go-envconfig v1.1.0 h1:cWZiJxeTm7AlCvzGXrEXaSTCNgip5oJepekh/BOQuog=
github.com/sethvargo/go-envconfig v1.1.0/go.mod h1:JLd0KFWQYzyENqnEPWWZ49i4vzZo/6nRidxI8YvGiHw=
github.com/shirou/gopsutil v3.21.11+incompatible h1:+1+c1VGhc88SSonWP6foOcLhvnKlUeu/erjjvaPEYiI=
github.com/shirou/gopsutil v3.21.11+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA=
github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/skeema/knownhosts v1.2.2 h1:Iug2P4fLmDw9f41PB6thxUkNUkJzB5i+1/exaj40L3A=
Expand All @@ -323,6 +327,8 @@ github.com/xanzy/ssh-agent v0.3.3/go.mod h1:6dzNDKs0J9rVPHPhaGCukekBHKqfl+L3KghI
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo0=
github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0=
github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ=
github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0=
github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0=
Expand Down Expand Up @@ -432,6 +438,7 @@ golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5h
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
Expand Down
111 changes: 111 additions & 0 deletions pkg/httpmetrics/disk_metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
package httpmetrics

import (
"context"
"os"
"strings"
"sync"
"time"

"github.com/chainguard-dev/clog"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/shirou/gopsutil/disk"
)

const (
DiskUsageScrapeInterval = 5 * time.Second
DiskUsageScrapeIntervalEnv = "DISK_USAGE_SCRAPE_INTERVAL"
)

var (
// Prometheus metrics for disk usage.
diskUsageBytesGauge = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "disk_usage_bytes",
Help: "Disk usage in bytes.",
},
[]string{"mount"},
)

diskUsageScrapeFailures = promauto.NewCounter(
prometheus.CounterOpts{
Name: "disk_usage_scrape_failures",
Help: "The number of failures when scraping disk usage.",
},
)
// So that we only start the disk usage scraper once.
once = new(sync.Once)
)

// scrapeDiskUsage returns the disk usage of all mounted partitions.
//
// On Cloud Run, there are 4 partitions:
// - /tmp
// - /
// - /dev
// - /dev/shm
// - and any additional volume mounts.
//
// We ignore all the /dev partitions here, keeping /, /tmp, and any additional
// volumn mounts that may come from the Revision config.
func scrapeDiskUsage() map[string]uint64 {
parts, err := disk.Partitions(true)
if err != nil {
// It is better to be silent here and missing metrics, than to be spam log
// here, and/or panic.
diskUsageScrapeFailures.Inc()
return nil
}
usage := make(map[string]uint64, len(parts))
for _, p := range parts {
device := p.Mountpoint
s, err := disk.Usage(device)
if err != nil || s == nil || s.Total == 0 {
// Some Cloud Run partitions don't implement usage stats.
continue
}
if strings.HasPrefix(device, "/dev") {
// Ignore /dev partitions, nothing useful there.
continue
}
usage[device] = s.Used
}
return usage
}

func ScrapeDiskUsage(ctx context.Context) {
once.Do(func() {
clog.FromContext(ctx).Info("Starting disk usage scraper with interval", "interval", DiskUsageScrapeInterval)
// Check the env var for the interval.
interval := scrapeInterval()
// Start a timer to scrape disk usage every 5 seconds.
ticker := time.NewTicker(interval)
defer ticker.Stop()

for {
select {
case <-ticker.C:
usage := scrapeDiskUsage()
// report metrics
for mount, used := range usage {
diskUsageBytesGauge.WithLabelValues(mount).Set(float64(used))
clog.FromContext(ctx).Info("Disk usage reported", "mount", mount, "used", used)
}
case <-ctx.Done():
return
}
}
})
}

func scrapeInterval() time.Duration {
interval := DiskUsageScrapeInterval

if s, ok := os.LookupEnv(DiskUsageScrapeIntervalEnv); ok {
if i, err := time.ParseDuration(s); err == nil {
interval = i
}
}
return interval
}
38 changes: 38 additions & 0 deletions pkg/httpmetrics/disk_metrics_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package httpmetrics

import (
"os"
"strings"
"testing"
"time"
)

func TestScrapeDiskUsage(t *testing.T) {
t.Parallel()

usage := scrapeDiskUsage()
if len(usage) == 0 {
t.Error("expected disk usage, got none")
}

for k := range usage {
if strings.HasPrefix(k, "/dev") {
t.Errorf("expected non-dev mount, got %q", k)
}
}
}

func TestScrapeInterval(t *testing.T) {
t.Parallel()

if got, want := scrapeInterval(), DiskUsageScrapeInterval; got != want {
t.Errorf("expected positive scrape interval, want %v got %v", want, got)
}

// set the env
os.Setenv(DiskUsageScrapeIntervalEnv, "1m30s")
defer os.Unsetenv(DiskUsageScrapeIntervalEnv)
if got, want := scrapeInterval(), 90*time.Second; got != want {
t.Errorf("expected positive scrape interval, want %v got %v", want, got)
}
}
3 changes: 3 additions & 0 deletions pkg/httpmetrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ func ServeMetrics() {
ReadHeaderTimeout: 10 * time.Second,
}

ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go ScrapeDiskUsage(ctx)
if err := srv.ListenAndServe(); err != nil {
slog.Error("listen and serve for http /metrics", "error", err)
}
Expand Down

0 comments on commit 97b27ad

Please sign in to comment.