diff --git a/server/cmd/server/config.go b/server/cmd/server/config.go index 1cde13e867c..3896e558c7a 100644 --- a/server/cmd/server/config.go +++ b/server/cmd/server/config.go @@ -22,7 +22,7 @@ import ( "os" "runtime" - "github.com/grafana/pyroscope-go" + pyroscope "github.com/grafana/pyroscope-go" yaml "gopkg.in/yaml.v2" ) @@ -32,6 +32,7 @@ type Config struct { ContinuousProfile ContinuousProfile `yaml:"continuous-profile"` Profiler bool `yaml:"profiler"` MaxCPUs int `yaml:"max-cpus"` + MonitorPaths []string `yaml:"monitor-paths"` } type ContinuousProfile struct { @@ -55,6 +56,7 @@ func loadConfig(path string) *Config { BlockRate: 5, LogEnabled: true, }, + MonitorPaths: []string{"/", "/mnt", "/var/log"}, } configBytes, err := ioutil.ReadFile(path) if err != nil { diff --git a/server/cmd/server/main.go b/server/cmd/server/main.go index 2085b586846..c2f6e6451d8 100644 --- a/server/cmd/server/main.go +++ b/server/cmd/server/main.go @@ -111,7 +111,7 @@ func main() { go querier.Start(*configPath, cfg.LogFile) closers := ingester.Start(*configPath, shared) - common.NewMonitor() + common.NewMonitor(cfg.MonitorPaths) // TODO: loghandle提取出来,并增加log // setup system signal diff --git a/server/common/monitor.go b/server/common/monitor.go index 7630b7cc554..6b438133952 100644 --- a/server/common/monitor.go +++ b/server/common/monitor.go @@ -22,6 +22,7 @@ import ( "github.com/deepflowio/deepflow/server/libs/stats" "github.com/deepflowio/deepflow/server/libs/utils" + "github.com/shirou/gopsutil/disk" "github.com/shirou/gopsutil/load" "github.com/shirou/gopsutil/v3/cpu" "github.com/shirou/gopsutil/v3/net" @@ -99,7 +100,7 @@ type Counter struct { CPUNum uint64 `statsd:"cpu-num"` } -func NewMonitor() (*Monitor, error) { +func NewMonitor(paths []string) (*Monitor, error) { p, err := process.NewProcess(int32(os.Getpid())) if err != nil { return nil, err @@ -109,6 +110,8 @@ func NewMonitor() (*Monitor, error) { } myNodeIP, _ := os.LookupEnv(ENV_K8S_NODE_IP) stats.RegisterCountable("monitor", m, stats.OptionStatTags{"host_ip": myNodeIP}) + NewDiskMonitor(paths, myNodeIP) + return m, nil } @@ -134,3 +137,40 @@ func (m *Monitor) GetCounter() interface{} { func (m *Monitor) Stop() { m.Close() } + +type DiskMonitor struct { + path string + utils.Closable +} + +type DiskCounter struct { + Total uint64 `statsd:"total"` + Free uint64 `statsd:"free"` + Used uint64 `statsd:"used"` + UsedPercent float64 `statsd:"used-percent"` +} + +func (m *DiskMonitor) GetCounter() interface{} { + usage, err := disk.Usage(m.path) + if err != nil { + return &DiskCounter{} + + } + return &DiskCounter{ + Total: usage.Total, + Free: usage.Free, + Used: usage.Used, + UsedPercent: usage.UsedPercent, + } +} + +func (m *DiskMonitor) Stop() { + m.Close() +} + +func NewDiskMonitor(paths []string, hostIp string) { + for _, path := range paths { + m := &DiskMonitor{path: path} + stats.RegisterCountable("monitor_disk", m, stats.OptionStatTags{"host_ip": hostIp, "path": path}) + } +} diff --git a/server/server.yaml b/server/server.yaml index dcd7eec5b80..6bf1b22677b 100644 --- a/server/server.yaml +++ b/server/server.yaml @@ -18,7 +18,7 @@ log-level: info # block-rate: 5 # valid when ProfileTypes contains 'block_count' or 'block_duration' # log-enabled: true # whether record profile debug logs -# extract an integer (generally used timestamp) from traceId as an additional index to speed up traceId queries. +## extract an integer (generally used timestamp) from traceId as an additional index to speed up traceId queries. #trace-id-with-index: # enabled: false # type: hash # hash/incremental-id @@ -28,6 +28,9 @@ log-level: info # length: 13 # eg. 1701419226688, length is 13 # format: decimal # hex/decimal +## monitor the disk usage of the paths +#monitor-paths: [/,/mnt,/var/log] + controller: ## controller http listenport #listen-port: 20417