Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a scraper to scrape disk usage metrics #612

Merged
merged 2 commits into from
Oct 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ require (
github.com/google/go-github/v61 v61.0.0
github.com/prometheus/client_golang v1.20.5
github.com/sethvargo/go-envconfig v1.1.0
github.com/shirou/gopsutil v3.21.11+incompatible
github.com/snabb/httpreaderat v1.0.1
go.opentelemetry.io/contrib/detectors/gcp v1.31.0
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.56.0
Expand Down Expand Up @@ -69,6 +70,7 @@ require (
github.com/go-jose/go-jose/v4 v4.0.4 // indirect
github.com/go-logr/logr v1.4.2 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
github.com/go-ole/go-ole v1.2.6 // indirect
github.com/goccy/go-json v0.10.2 // indirect
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
github.com/google/flatbuffers v23.5.26+incompatible // indirect
Expand Down Expand Up @@ -102,6 +104,7 @@ require (
github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 // indirect
github.com/skeema/knownhosts v1.2.2 // indirect
github.com/xanzy/ssh-agent v0.3.3 // indirect
github.com/yusufpapurcu/wmi v1.2.4 // indirect
github.com/zeebo/xxh3 v1.0.2 // indirect
go.opencensus.io v0.24.0 // indirect
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.56.0 // indirect
Expand Down
7 changes: 7 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,8 @@ github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY=
github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
Expand Down Expand Up @@ -297,6 +299,8 @@ github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 h1:n661drycOFuPLCN
github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3/go.mod h1:A0bzQcvG0E7Rwjx0REVgAGH58e96+X0MeOfepqsbeW4=
github.com/sethvargo/go-envconfig v1.1.0 h1:cWZiJxeTm7AlCvzGXrEXaSTCNgip5oJepekh/BOQuog=
github.com/sethvargo/go-envconfig v1.1.0/go.mod h1:JLd0KFWQYzyENqnEPWWZ49i4vzZo/6nRidxI8YvGiHw=
github.com/shirou/gopsutil v3.21.11+incompatible h1:+1+c1VGhc88SSonWP6foOcLhvnKlUeu/erjjvaPEYiI=
github.com/shirou/gopsutil v3.21.11+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA=
github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/skeema/knownhosts v1.2.2 h1:Iug2P4fLmDw9f41PB6thxUkNUkJzB5i+1/exaj40L3A=
Expand All @@ -323,6 +327,8 @@ github.com/xanzy/ssh-agent v0.3.3/go.mod h1:6dzNDKs0J9rVPHPhaGCukekBHKqfl+L3KghI
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo0=
github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0=
github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ=
github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0=
github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0=
Expand Down Expand Up @@ -432,6 +438,7 @@ golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5h
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
Expand Down
111 changes: 111 additions & 0 deletions pkg/httpmetrics/disk_metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
package httpmetrics

import (
"context"
"os"
"strings"
"sync"
"time"

"github.com/chainguard-dev/clog"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/shirou/gopsutil/disk"
)

const (
DiskUsageScrapeInterval = 5 * time.Second
DiskUsageScrapeIntervalEnv = "DISK_USAGE_SCRAPE_INTERVAL"
)

var (
// Prometheus metrics for disk usage.
diskUsageBytesGauge = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "disk_usage_bytes",
Help: "Disk usage in bytes.",
},
[]string{"mount"},
)

diskUsageScrapeFailures = promauto.NewCounter(
prometheus.CounterOpts{
Name: "disk_usage_scrape_failures",
Help: "The number of failures when scraping disk usage.",
},
)
// So that we only start the disk usage scraper once.
once = new(sync.Once)
)

// scrapeDiskUsage returns the disk usage of all mounted partitions.
//
// On Cloud Run, there are 4 partitions:
// - /tmp
// - /
// - /dev
// - /dev/shm
// - and any additional volume mounts.
//
// We ignore all the /dev partitions here, keeping /, /tmp, and any additional
// volumn mounts that may come from the Revision config.
func scrapeDiskUsage() map[string]uint64 {
parts, err := disk.Partitions(true)
if err != nil {
// It is better to be silent here and missing metrics, than to be spam log
// here, and/or panic.
diskUsageScrapeFailures.Inc()
return nil
tcnghia marked this conversation as resolved.
Show resolved Hide resolved
}
usage := make(map[string]uint64, len(parts))
for _, p := range parts {
device := p.Mountpoint
s, err := disk.Usage(device)
if err != nil || s == nil || s.Total == 0 {
// Some Cloud Run partitions don't implement usage stats.
continue
}
if strings.HasPrefix(device, "/dev") {
// Ignore /dev partitions, nothing useful there.
continue
}
usage[device] = s.Used
}
return usage
}

func ScrapeDiskUsage(ctx context.Context) {
once.Do(func() {
clog.FromContext(ctx).Info("Starting disk usage scraper with interval", "interval", DiskUsageScrapeInterval)
// Check the env var for the interval.
interval := scrapeInterval()
// Start a timer to scrape disk usage every 5 seconds.
ticker := time.NewTicker(interval)
defer ticker.Stop()

for {
select {
case <-ticker.C:
usage := scrapeDiskUsage()
// report metrics
for mount, used := range usage {
diskUsageBytesGauge.WithLabelValues(mount).Set(float64(used))
clog.FromContext(ctx).Info("Disk usage reported", "mount", mount, "used", used)
}
case <-ctx.Done():
return
}
}
})
}

func scrapeInterval() time.Duration {
interval := DiskUsageScrapeInterval

if s, ok := os.LookupEnv(DiskUsageScrapeIntervalEnv); ok {
if i, err := time.ParseDuration(s); err == nil {
interval = i
}
}
return interval
}
38 changes: 38 additions & 0 deletions pkg/httpmetrics/disk_metrics_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package httpmetrics

import (
"os"
"strings"
"testing"
"time"
)

func TestScrapeDiskUsage(t *testing.T) {
t.Parallel()

usage := scrapeDiskUsage()
if len(usage) == 0 {
t.Error("expected disk usage, got none")
}

for k := range usage {
if strings.HasPrefix(k, "/dev") {
t.Errorf("expected non-dev mount, got %q", k)
}
}
}

func TestScrapeInterval(t *testing.T) {
t.Parallel()

if got, want := scrapeInterval(), DiskUsageScrapeInterval; got != want {
t.Errorf("expected positive scrape interval, want %v got %v", want, got)
}

// set the env
os.Setenv(DiskUsageScrapeIntervalEnv, "1m30s")
defer os.Unsetenv(DiskUsageScrapeIntervalEnv)
if got, want := scrapeInterval(), 90*time.Second; got != want {
t.Errorf("expected positive scrape interval, want %v got %v", want, got)
}
}
3 changes: 3 additions & 0 deletions pkg/httpmetrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ func ServeMetrics() {
ReadHeaderTimeout: 10 * time.Second,
}

ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go ScrapeDiskUsage(ctx)
if err := srv.ListenAndServe(); err != nil {
slog.Error("listen and serve for http /metrics", "error", err)
}
Expand Down
Loading