diff --git a/README.md b/README.md index 339f5079..db280867 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,7 @@ Usage: prometheus-podman-exporter [flags] Flags: + -t, --collector.cache_duration int Duration (seconds) to retrieve container, size and refresh the cache (default 3600) -a, --collector.enable-all Enable all collectors by default. -i, --collector.image Enable image collector. -n, --collector.network Enable network collector. @@ -76,8 +77,6 @@ The table below list all existing collector and their description. # HELP podman_container_info Container information. # TYPE podman_container_info gauge podman_container_info{id="19286a13dc23",image="docker.io/library/sonarqube:latest",name="sonar01",pod_id="",pod_name="",ports="0.0.0.0:9000->9000/tcp"} 1 -podman_container_info{id="22e3d69be889",image="localhost/podman-pause:4.1.0-1651853754",name="959a0a3530db-infra",pod_id="959a0a3530db",pod_name="pod02",ports=""} 1 -podman_container_info{id="390ac740fa80",image="localhost/podman-pause:4.1.0-1651853754",name="d05cda23085a-infra",pod_id="d05cda23085a",pod_name="pod03",ports=""} 1 podman_container_info{id="482113b805f7",image="docker.io/library/httpd:latest",name="web_server",pod_id="",pod_name="",ports="0.0.0.0:8000->80/tcp"} 1 podman_container_info{id="642490688d9c",image="docker.io/grafana/grafana:latest",name="grafana",pod_id="",pod_name="",ports="0.0.0.0:3000->3000/tcp"} 1 podman_container_info{id="ad36e85960a1",image="docker.io/library/busybox:latest",name="busybox01",pod_id="3e8bae64e9af",pod_name="pod01",ports=""} 1 @@ -86,8 +85,6 @@ podman_container_info{id="dda983cc3ecf",image="localhost/podman-pause:4.1.0-1651 # HELP podman_container_state Container current state (-1=unknown,0=created,1=initialized,2=running,3=stopped,4=paused,5=exited,6=removing,7=stopping). # TYPE podman_container_state gauge podman_container_state{id="19286a13dc23",pod_id="",pod_name=""} 2 -podman_container_state{id="22e3d69be889",pod_id="959a0a3530db",pod_name="pod02"} 0 -podman_container_state{id="390ac740fa80",pod_id="d05cda23085a",pod_name="pod03"} 5 podman_container_state{id="482113b805f7",pod_id="",pod_name=""} 4 podman_container_state{id="642490688d9c",pod_id="",pod_name=""} 2 podman_container_state{id="ad36e85960a1",pod_id="3e8bae64e9af",pod_name="pod01"} 5 @@ -96,8 +93,6 @@ podman_container_state{id="dda983cc3ecf",pod_id="3e8bae64e9af",pod_name="pod01"} # HELP podman_container_block_input_total Container block input. # TYPE podman_container_block_input_total counter podman_container_block_input_total{id="19286a13dc23",pod_id="",pod_name=""} 49152 -podman_container_block_input_total{id="22e3d69be889",pod_id="959a0a3530db",pod_name="pod02"} 0 -podman_container_block_input_total{id="390ac740fa80",pod_id="d05cda23085a",pod_name="pod03"} 0 podman_container_block_input_total{id="482113b805f7",pod_id="",pod_name=""} 0 podman_container_block_input_total{id="642490688d9c",pod_id="",pod_name=""} 1.41533184e+08 podman_container_block_input_total{id="ad36e85960a1",pod_id="3e8bae64e9af",pod_name="pod01"} 0 @@ -106,8 +101,6 @@ podman_container_block_input_total{id="dda983cc3ecf",pod_id="3e8bae64e9af",pod_n # HELP podman_container_block_output_total Container block output. # TYPE podman_container_block_output_total counter podman_container_block_output_total{id="19286a13dc23",pod_id="",pod_name=""} 1.790976e+06 -podman_container_block_output_total{id="22e3d69be889",pod_id="959a0a3530db",pod_name="pod02"} 0 -podman_container_block_output_total{id="390ac740fa80",pod_id="d05cda23085a",pod_name="pod03"} 0 podman_container_block_output_total{id="482113b805f7",pod_id="",pod_name=""} 8192 podman_container_block_output_total{id="642490688d9c",pod_id="",pod_name=""} 4.69248e+07 podman_container_block_output_total{id="ad36e85960a1",pod_id="3e8bae64e9af",pod_name="pod01"} 0 @@ -116,8 +109,6 @@ podman_container_block_output_total{id="dda983cc3ecf",pod_id="3e8bae64e9af",pod_ # HELP podman_container_cpu_seconds_total total CPU time spent for container in seconds. # TYPE podman_container_cpu_seconds_total counter podman_container_cpu_seconds_total{id="19286a13dc23",pod_id="",pod_name=""} 83.231904 -podman_container_cpu_seconds_total{id="22e3d69be889",pod_id="959a0a3530db",pod_name="pod02"} 0 -podman_container_cpu_seconds_total{id="390ac740fa80",pod_id="d05cda23085a",pod_name="pod03"} 0 podman_container_cpu_seconds_total{id="482113b805f7",pod_id="",pod_name=""} 0.069712 podman_container_cpu_seconds_total{id="642490688d9c",pod_id="",pod_name=""} 3.028685 podman_container_cpu_seconds_total{id="ad36e85960a1",pod_id="3e8bae64e9af",pod_name="pod01"} 0 @@ -126,8 +117,6 @@ podman_container_cpu_seconds_total{id="dda983cc3ecf",pod_id="3e8bae64e9af",pod_n # HELP podman_container_cpu_system_seconds_total total system CPU time spent for container in seconds. # TYPE podman_container_cpu_system_seconds_total counter podman_container_cpu_system_seconds_total{id="19286a13dc23",pod_id="",pod_name=""} 0.007993418 -podman_container_cpu_system_seconds_total{id="22e3d69be889",pod_id="959a0a3530db",pod_name="pod02"} 0 -podman_container_cpu_system_seconds_total{id="390ac740fa80",pod_id="d05cda23085a",pod_name="pod03"} 0 podman_container_cpu_system_seconds_total{id="482113b805f7",pod_id="",pod_name=""} 4.8591e-05 podman_container_cpu_system_seconds_total{id="642490688d9c",pod_id="",pod_name=""} 0.00118734 podman_container_cpu_system_seconds_total{id="ad36e85960a1",pod_id="3e8bae64e9af",pod_name="pod01"} 0 @@ -136,8 +125,6 @@ podman_container_cpu_system_seconds_total{id="dda983cc3ecf",pod_id="3e8bae64e9af # HELP podman_container_created_seconds Container creation time in unixtime. # TYPE podman_container_created_seconds gauge podman_container_created_seconds{id="19286a13dc23",pod_id="",pod_name=""} 1.655859887e+09 -podman_container_created_seconds{id="22e3d69be889",pod_id="959a0a3530db",pod_name="pod02"} 1.655484892e+09 -podman_container_created_seconds{id="390ac740fa80",pod_id="d05cda23085a",pod_name="pod03"} 1.655489348e+09 podman_container_created_seconds{id="482113b805f7",pod_id="",pod_name=""} 1.655859728e+09 podman_container_created_seconds{id="642490688d9c",pod_id="",pod_name=""} 1.655859511e+09 podman_container_created_seconds{id="ad36e85960a1",pod_id="3e8bae64e9af",pod_name="pod01"} 1.655859858e+09 @@ -146,8 +133,6 @@ podman_container_created_seconds{id="dda983cc3ecf",pod_id="3e8bae64e9af",pod_nam # HELP podman_container_started_seconds Container started time in unixtime. # TYPE podman_container_started_seconds gauge podman_container_started_seconds{id="19286a13dc23",pod_id="",pod_name=""} 1.659253804e+09 -podman_container_started_seconds{id="22e3d69be889",pod_id="959a0a3530db",pod_name="pod02"} -6.21355968e+10 -podman_container_started_seconds{id="390ac740fa80",pod_id="d05cda23085a",pod_name="pod03"} 1.66064284e+09 podman_container_started_seconds{id="482113b805f7",pod_id="",pod_name=""} 1.659253804e+09 podman_container_started_seconds{id="642490688d9c",pod_id="",pod_name=""} 1.660642996e+09 podman_container_started_seconds{id="ad36e85960a1",pod_id="3e8bae64e9af",pod_name="pod01"} 1.66064284e+09 @@ -156,8 +141,6 @@ podman_container_started_seconds{id="dda983cc3ecf",pod_id="3e8bae64e9af",pod_nam # HELP podman_container_exit_code Container exit code, if the container has not exited or restarted then the exit code will be 0. # TYPE podman_container_exit_code gauge podman_container_exit_code{id="19286a13dc23",pod_id="",pod_name=""} 0 -podman_container_exit_code{id="22e3d69be889",pod_id="959a0a3530db",pod_name="pod02"} 0 -podman_container_exit_code{id="390ac740fa80",pod_id="d05cda23085a",pod_name="pod03"} 1 podman_container_exit_code{id="482113b805f7",pod_id="",pod_name=""} 0 podman_container_exit_code{id="642490688d9c",pod_id="",pod_name=""} 0 podman_container_exit_code{id="ad36e85960a1",pod_id="3e8bae64e9af",pod_name="pod01"} 130 @@ -166,8 +149,6 @@ podman_container_exit_code{id="dda983cc3ecf",pod_id="3e8bae64e9af",pod_name="pod # HELP podman_container_exited_seconds Container exited time in unixtime. # TYPE podman_container_exited_seconds gauge podman_container_exited_seconds{id="19286a13dc23",pod_id="",pod_name=""} 1.659253805e+09 -podman_container_exited_seconds{id="22e3d69be889",pod_id="959a0a3530db",pod_name="pod02"} -6.21355968e+10 -podman_container_exited_seconds{id="390ac740fa80",pod_id="d05cda23085a",pod_name="pod03"} 1.660643511e+09 podman_container_exited_seconds{id="482113b805f7",pod_id="",pod_name=""} 1.659253805e+09 podman_container_exited_seconds{id="642490688d9c",pod_id="",pod_name=""} 1.659253804e+09 podman_container_exited_seconds{id="ad36e85960a1",pod_id="3e8bae64e9af",pod_name="pod01"} 1.660643511e+09 @@ -176,8 +157,6 @@ podman_container_exited_seconds{id="dda983cc3ecf",pod_id="3e8bae64e9af",pod_name # HELP podman_container_mem_limit_bytes Container memory limit. # TYPE podman_container_mem_limit_bytes gauge podman_container_mem_limit_bytes{id="19286a13dc23",pod_id="",pod_name=""} 9.713655808e+09 -podman_container_mem_limit_bytes{id="22e3d69be889",pod_id="959a0a3530db",pod_name="pod02"} 0 -podman_container_mem_limit_bytes{id="390ac740fa80",pod_id="d05cda23085a",pod_name="pod03"} 0 podman_container_mem_limit_bytes{id="482113b805f7",pod_id="",pod_name=""} 9.713655808e+09 podman_container_mem_limit_bytes{id="642490688d9c",pod_id="",pod_name=""} 9.713655808e+09 podman_container_mem_limit_bytes{id="ad36e85960a1",pod_id="3e8bae64e9af",pod_name="pod01"} 0 @@ -186,8 +165,6 @@ podman_container_mem_limit_bytes{id="dda983cc3ecf",pod_id="3e8bae64e9af",pod_nam # HELP podman_container_mem_usage_bytes Container memory usage. # TYPE podman_container_mem_usage_bytes gauge podman_container_mem_usage_bytes{id="19286a13dc23",pod_id="",pod_name=""} 1.029062656e+09 -podman_container_mem_usage_bytes{id="22e3d69be889",pod_id="959a0a3530db",pod_name="pod02"} 0 -podman_container_mem_usage_bytes{id="390ac740fa80",pod_id="d05cda23085a",pod_name="pod03"} 0 podman_container_mem_usage_bytes{id="482113b805f7",pod_id="",pod_name=""} 2.748416e+06 podman_container_mem_usage_bytes{id="642490688d9c",pod_id="",pod_name=""} 3.67616e+07 podman_container_mem_usage_bytes{id="ad36e85960a1",pod_id="3e8bae64e9af",pod_name="pod01"} 0 @@ -196,8 +173,6 @@ podman_container_mem_usage_bytes{id="dda983cc3ecf",pod_id="3e8bae64e9af",pod_nam # HELP podman_container_net_input_total Container network input. # TYPE podman_container_net_input_total counter podman_container_net_input_total{id="19286a13dc23",pod_id="",pod_name=""} 430 -podman_container_net_input_total{id="22e3d69be889",pod_id="959a0a3530db",pod_name="pod02"} 0 -podman_container_net_input_total{id="390ac740fa80",pod_id="d05cda23085a",pod_name="pod03"} 0 podman_container_net_input_total{id="482113b805f7",pod_id="",pod_name=""} 430 podman_container_net_input_total{id="642490688d9c",pod_id="",pod_name=""} 4323 podman_container_net_input_total{id="ad36e85960a1",pod_id="3e8bae64e9af",pod_name="pod01"} 0 @@ -206,8 +181,6 @@ podman_container_net_input_total{id="dda983cc3ecf",pod_id="3e8bae64e9af",pod_nam # HELP podman_container_net_output_total Container network output. # TYPE podman_container_net_output_total counter podman_container_net_output_total{id="19286a13dc23",pod_id="",pod_name=""} 110 -podman_container_net_output_total{id="22e3d69be889",pod_id="959a0a3530db",pod_name="pod02"} 0 -podman_container_net_output_total{id="390ac740fa80",pod_id="d05cda23085a",pod_name="pod03"} 0 podman_container_net_output_total{id="482113b805f7",pod_id="",pod_name=""} 110 podman_container_net_output_total{id="642490688d9c",pod_id="",pod_name=""} 12071 podman_container_net_output_total{id="ad36e85960a1",pod_id="3e8bae64e9af",pod_name="pod01"} 0 @@ -216,12 +189,26 @@ podman_container_net_output_total{id="dda983cc3ecf",pod_id="3e8bae64e9af",pod_na # HELP podman_container_pids Container pid number. # TYPE podman_container_pids gauge podman_container_pids{id="19286a13dc23",pod_id="",pod_name=""} 94 -podman_container_pids{id="22e3d69be889",pod_id="959a0a3530db",pod_name="pod02"} 0 -podman_container_pids{id="390ac740fa80",pod_id="d05cda23085a",pod_name="pod03"} 0 podman_container_pids{id="482113b805f7",pod_id="",pod_name=""} 82 podman_container_pids{id="642490688d9c",pod_id="",pod_name=""} 14 podman_container_pids{id="ad36e85960a1",pod_id="3e8bae64e9af",pod_name="pod01"} 0 podman_container_pids{id="dda983cc3ecf",pod_id="3e8bae64e9af",pod_name="pod01"} 1 + +# HELP podman_container_rootfs_size_bytes Container root filesystem size in bytes. +# TYPE podman_container_rootfs_size_bytes gauge +podman_container_rootfs_size_bytes{id="19286a13dc23",pod_id="",pod_name=""} 1.452382e+06 +podman_container_rootfs_size_bytes{id="482113b805f7",pod_id="",pod_name=""} 1.135744e+06 +podman_container_rootfs_size_bytes{id="642490688d9c",pod_id="",pod_name=""} 1.72771905e+08 +podman_container_rootfs_size_bytes{id="ad36e85960a1",pod_id="3e8bae64e9af",pod_name="pod01"} 1.135744e+06 +podman_container_rootfs_size_bytes{id="dda983cc3ecf",pod_id="3e8bae64e9af",pod_name="pod01"} 1.035744e+06 + +# HELP podman_container_rw_size_bytes Container top read-write layer size in bytes. +# TYPE podman_container_rw_size_bytes gauge +podman_container_rw_size_bytes{id="19286a13dc23",pod_id="",pod_name=""} 0 +podman_container_rw_size_bytes{id="482113b805f7",pod_id="",pod_name=""} 0 +podman_container_rw_size_bytes{id="642490688d9c",pod_id="",pod_name=""} 26261 +podman_container_rw_size_bytes{id="ad36e85960a1",pod_id="3e8bae64e9af",pod_name="pod01"} 3551 +podman_container_rw_size_bytes{id="dda983cc3ecf",pod_id="3e8bae64e9af",pod_name="pod01"} 0 ``` #### `pod` diff --git a/cmd/root.go b/cmd/root.go index 85af28b3..71cdb2d4 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -11,7 +11,8 @@ import ( ) const ( - maxRequest int = 40 + maxRequest int = 40 + cacheDuration int64 = 3600 ) var ( @@ -50,13 +51,15 @@ func preRun(cmd *cobra.Command, _ []string) error { func run(cmd *cobra.Command, args []string) { if err := exporter.Start(cmd, args); err != nil { - log.Panic(err.Error()) + log.Print(err.Error()) + os.Exit(1) } } // Execute adds all child commands to the root command and sets flags appropriately. func Execute() { if err := rootCmd.Execute(); err != nil { + log.Print(err.Error()) os.Exit(1) } } @@ -94,4 +97,6 @@ func init() { "Comma separated list of pod/container/image labels to be converted\n"+ "to labels on prometheus metrics for each pod/container/image.\n"+ "collector.store_labels must be set to false for this to take effect.") + rootCmd.Flags().Int64P("collector.cache_duration", "t", cacheDuration, + "Duration (seconds) to retrieve container, size and refresh the cache") } diff --git a/collector/container.go b/collector/container.go index 213d2867..2634dd05 100644 --- a/collector/container.go +++ b/collector/container.go @@ -23,6 +23,8 @@ type containerCollector struct { netOutput typedDesc blockInput typedDesc blockOutput typedDesc + rwSize typedDesc + rootFsSize typedDesc logger log.Logger } @@ -142,6 +144,20 @@ func NewContainerStatsCollector(logger log.Logger) (Collector, error) { []string{"id", "pod_id", "pod_name"}, nil, ), prometheus.CounterValue, }, + rwSize: typedDesc{ + prometheus.NewDesc( + prometheus.BuildFQName(namespace, "container", "rw_size_bytes"), + "Container top read-write layer size in bytes.", + []string{"id", "pod_id", "pod_name"}, nil, + ), prometheus.GaugeValue, + }, + rootFsSize: typedDesc{ + prometheus.NewDesc( + prometheus.BuildFQName(namespace, "container", "rootfs_size_bytes"), + "Container root filesystem size in bytes.", + []string{"id", "pod_id", "pod_name"}, nil, + ), prometheus.GaugeValue, + }, logger: logger, }, nil } @@ -170,6 +186,8 @@ func (c *containerCollector) Update(ch chan<- prometheus.Metric) error { ch <- c.started.mustNewConstMetric(float64(rep.Started), rep.ID, rep.PodID, rep.PodName) ch <- c.exited.mustNewConstMetric(float64(rep.Exited), rep.ID, rep.PodID, rep.PodName) ch <- c.exitCode.mustNewConstMetric(float64(rep.ExitCode), rep.ID, rep.PodID, rep.PodName) + ch <- c.rwSize.mustNewConstMetric(float64(rep.RwSize), rep.ID, rep.PodID, rep.PodName) + ch <- c.rootFsSize.mustNewConstMetric(float64(rep.RootFsSize), rep.ID, rep.PodID, rep.PodName) if cntStat != nil { ch <- c.pids.mustNewConstMetric(float64(cntStat.PIDs), rep.ID, rep.PodID, rep.PodName) diff --git a/exporter/exporter.go b/exporter/exporter.go index 9fa5a612..28c840c6 100644 --- a/exporter/exporter.go +++ b/exporter/exporter.go @@ -1,7 +1,9 @@ package exporter import ( + "errors" "net/http" + "strconv" "time" "github.com/containers/prometheus-podman-exporter/collector" @@ -13,57 +15,60 @@ import ( "github.com/spf13/cobra" ) +const minCacheDuration int64 = 5 + +var errMinCacheDurtion = errors.New("invalid cache duration value, shall be >= " + strconv.Itoa(int(minCacheDuration))) + +type exporterOptions struct { + debug bool + webListen string + webMaxRequests int + webTelemetryPath string + webDisableExporterMetrics bool + webConfigFile string + cacheDuration int64 + enableAll bool + storeLabels bool + whiteListedLabels string + enableImages bool + enablePods bool + enableVolumes bool + enableNetworks bool + enableSystem bool +} + // Start starts prometheus exporter. func Start(cmd *cobra.Command, _ []string) error { // setup exporter - promlogConfig := &promlog.Config{Level: &promlog.AllowedLevel{}} - logLevel := "info" + promlogConfig := &promlog.Config{Level: &promlog.AllowedLevel{}} - debug, _ := cmd.Flags().GetBool("debug") - if debug { - logLevel = "debug" - } - - if err := promlogConfig.Level.Set(logLevel); err != nil { - return err - } - - webListen, err := cmd.Flags().GetString("web.listen-address") - if err != nil { - return err - } - - webMaxRequests, err := cmd.Flags().GetInt("web.max-requests") - if err != nil { - return err - } - - webTelemetryPath, err := cmd.Flags().GetString("web.telemetry-path") + cmdOptions, err := parseOptions(cmd) if err != nil { return err } - webDisableExporterMetrics, err := cmd.Flags().GetBool("web.disable-exporter-metrics") - if err != nil { - return err + if cmdOptions.debug { + logLevel = "debug" } - webConfigFile, err := cmd.Flags().GetString("web.config.file") - if err != nil { + if err := promlogConfig.Level.Set(logLevel); err != nil { return err } logger := promlog.New(promlogConfig) - if err := setEnabledCollectors(cmd); err != nil { + if err := setEnabledCollectors(cmdOptions); err != nil { level.Error(logger).Log("msg", "cannot set enabled collectors", "err", err) return err } level.Info(logger).Log("msg", "Starting podman-prometheus-exporter", "version", version.Info()) - http.Handle(webTelemetryPath, newHandler(webDisableExporterMetrics, webMaxRequests, logger)) + http.Handle( + cmdOptions.webTelemetryPath, + newHandler(cmdOptions.webDisableExporterMetrics, cmdOptions.webMaxRequests, logger), + ) http.HandleFunc("/", func(w http.ResponseWriter, _ *http.Request) { w.Write([]byte(` Podman Exporter @@ -77,20 +82,26 @@ func Start(cmd *cobra.Command, _ []string) error { // setup podman registry pdcs.SetupRegistry() // start podman event streamer and initiate first update. - pdcs.StartEventStreamer(logger) + updateImages := false + if cmdOptions.enableAll || cmdOptions.enableImages { + updateImages = true + } + + pdcs.StartEventStreamer(logger, updateImages) + pdcs.StartCacheSizeTicker(logger, cmdOptions.cacheDuration) - level.Info(logger).Log("msg", "Listening on", "address", webListen) + level.Info(logger).Log("msg", "Listening on", "address", cmdOptions.webListen) server := &http.Server{ ReadHeaderTimeout: 3 * time.Second, //nolint:gomnd } serverSystemd := false - serverWebListen := []string{webListen} + serverWebListen := []string{cmdOptions.webListen} toolkitFlag := new(web.FlagConfig) toolkitFlag.WebSystemdSocket = &serverSystemd toolkitFlag.WebListenAddresses = &serverWebListen - toolkitFlag.WebConfigFile = &webConfigFile + toolkitFlag.WebConfigFile = &cmdOptions.webConfigFile if err := web.ListenAndServe(server, toolkitFlag, logger); err != nil { return err @@ -99,34 +110,19 @@ func Start(cmd *cobra.Command, _ []string) error { return nil } -func setEnabledCollectors(cmd *cobra.Command) error { +func setEnabledCollectors(opts *exporterOptions) error { enList := []string{"container"} - enableAll, err := cmd.Flags().GetBool("collector.enable-all") - if err != nil { - return err - } + collector.RegisterVariableLabels(opts.storeLabels, opts.whiteListedLabels) - storeLabels, err := cmd.Flags().GetBool("collector.store_labels") - if err != nil { - return err - } - - whiteListedLabels, err := cmd.Flags().GetString("collector.whitelisted_labels") - if err != nil { - return err - } - - collector.RegisterVariableLabels(storeLabels, whiteListedLabels) - - if enableAll { + if opts.enableAll { enList = append(enList, "pod") enList = append(enList, "image") enList = append(enList, "volume") enList = append(enList, "network") enList = append(enList, "system") } else { - enList = append(enList, getEnabledCollectors(cmd)...) + enList = append(enList, getEnabledCollectors(opts)...) } // set podman collector state @@ -137,46 +133,127 @@ func setEnabledCollectors(cmd *cobra.Command) error { return nil } -func getEnabledCollectors(cmd *cobra.Command) []string { +func getEnabledCollectors(opts *exporterOptions) []string { enCollectors := make([]string, 0) - enimage := command{cmd}.isEnabled("collector.image") - if enimage { + if opts.enableImages { enCollectors = append(enCollectors, "image") } - enpod := command{cmd}.isEnabled("collector.pod") - if enpod { + if opts.enablePods { enCollectors = append(enCollectors, "pod") } - envolume := command{cmd}.isEnabled("collector.volume") - if envolume { + if opts.enableVolumes { enCollectors = append(enCollectors, "volume") } - ennetwork := command{cmd}.isEnabled("collector.network") - if ennetwork { + if opts.enableNetworks { enCollectors = append(enCollectors, "network") } - ensystem := command{cmd}.isEnabled("collector.system") - if ensystem { + if opts.enableSystem { enCollectors = append(enCollectors, "system") } return enCollectors } -type command struct { - *cobra.Command -} +func parseOptions(cmd *cobra.Command) (*exporterOptions, error) { //nolint:cyclop + debug, err := cmd.Flags().GetBool("debug") + if err != nil { + return nil, err + } -func (c command) isEnabled(name string) bool { - enable, err := c.Flags().GetBool(name) + webListen, err := cmd.Flags().GetString("web.listen-address") + if err != nil { + return nil, err + } + + webMaxRequests, err := cmd.Flags().GetInt("web.max-requests") + if err != nil { + return nil, err + } + + webTelemetryPath, err := cmd.Flags().GetString("web.telemetry-path") + if err != nil { + return nil, err + } + + webDisableExporterMetrics, err := cmd.Flags().GetBool("web.disable-exporter-metrics") + if err != nil { + return nil, err + } + + webConfigFile, err := cmd.Flags().GetString("web.config.file") + if err != nil { + return nil, err + } + + enableAll, err := cmd.Flags().GetBool("collector.enable-all") + if err != nil { + return nil, err + } + + storeLabels, err := cmd.Flags().GetBool("collector.store_labels") if err != nil { - return false + return nil, err + } + + whiteListedLabels, err := cmd.Flags().GetString("collector.whitelisted_labels") + if err != nil { + return nil, err + } + + enableImages, err := cmd.Flags().GetBool("collector.image") + if err != nil { + return nil, err + } + + enablePods, err := cmd.Flags().GetBool("collector.pod") + if err != nil { + return nil, err + } + + enableVolumes, err := cmd.Flags().GetBool("collector.volume") + if err != nil { + return nil, err + } + + enableNetworks, err := cmd.Flags().GetBool("collector.network") + if err != nil { + return nil, err + } + + enableSystem, err := cmd.Flags().GetBool("collector.system") + if err != nil { + return nil, err + } + + cacheDuration, err := cmd.Flags().GetInt64("collector.cache_duration") + if err != nil { + return nil, err + } + + if cacheDuration < minCacheDuration { + return nil, errMinCacheDurtion } - return enable + return &exporterOptions{ + debug: debug, + webListen: webListen, + webMaxRequests: webMaxRequests, + webTelemetryPath: webTelemetryPath, + webDisableExporterMetrics: webDisableExporterMetrics, + webConfigFile: webConfigFile, + enableAll: enableAll, + storeLabels: storeLabels, + whiteListedLabels: whiteListedLabels, + enableImages: enableImages, + enablePods: enablePods, + enableVolumes: enableVolumes, + enableNetworks: enableNetworks, + enableSystem: enableSystem, + cacheDuration: cacheDuration, + }, nil } diff --git a/pdcs/container.go b/pdcs/container.go index 361d0b4a..b3588e4f 100644 --- a/pdcs/container.go +++ b/pdcs/container.go @@ -2,32 +2,39 @@ package pdcs import ( "context" + "sync" "time" "github.com/containers/podman/v4/cmd/podman/registry" "github.com/containers/podman/v4/libpod/define" "github.com/containers/podman/v4/pkg/domain/entities" + klog "github.com/go-kit/log" + "github.com/go-kit/log/level" ) const ( nano float64 = 1e+9 ) +var cntSizeCache containerSizeCache + // Container implements container's basic information and its state. type Container struct { - ID string - PodID string // if container is part of pod - PodName string // if container is part of pod - Name string - Labels map[string]string - Image string - Created int64 - Started int64 - Exited int64 - ExitCode int32 - Ports string - State int - Health int + ID string + PodID string // if container is part of pod + PodName string // if container is part of pod + Name string + Labels map[string]string + Image string + Created int64 + Started int64 + Exited int64 + ExitCode int32 + Ports string + State int + Health int + RwSize int64 + RootFsSize int64 } // ContainerStat implements container's stat. @@ -45,6 +52,17 @@ type ContainerStat struct { BlockOutput uint64 } +type containerSizeCache struct { + cacheLock sync.Mutex + cacheError error + cache map[string]containerSize +} + +type containerSize struct { + rwSize int64 + rootFsSize int64 +} + // Containers returns list of containers (Container). func Containers() ([]Container, error) { containers := make([]Container, 0) @@ -54,24 +72,50 @@ func Containers() ([]Container, error) { entities.ContainerListOptions{All: true, Pod: true}, ) if err != nil { - return containers, err + return nil, err + } + + cntSizeCache.cacheLock.Lock() + + cacheSizeInfo := cntSizeCache.cache + cacheErr := cntSizeCache.cacheError + + cntSizeCache.cacheLock.Unlock() + + if cacheErr != nil { + return nil, err } for _, rep := range reports { + var ( + rwSize int64 + rootFsSize int64 + ) + + cntID := getID(rep.ID) + + cntSizeInfo, ok := cacheSizeInfo[cntID] + if ok { + rwSize = cntSizeInfo.rwSize + rootFsSize = cntSizeInfo.rootFsSize + } + containers = append(containers, Container{ - ID: getID(rep.ID), - PodID: getID(rep.Pod), - PodName: rep.PodName, - Name: rep.Names[0], - Image: rep.Image, - Created: rep.Created.Unix(), - Started: rep.StartedAt, - Exited: rep.ExitedAt, - ExitCode: rep.ExitCode, - State: conReporter{rep}.state(), - Health: conReporter{rep}.health(), - Ports: conReporter{rep}.ports(), - Labels: rep.Labels, + ID: cntID, + PodID: getID(rep.Pod), + PodName: rep.PodName, + Name: rep.Names[0], + Image: rep.Image, + Created: rep.Created.Unix(), + Started: rep.StartedAt, + Exited: rep.ExitedAt, + ExitCode: rep.ExitCode, + State: conReporter{rep}.state(), + Health: conReporter{rep}.health(), + Ports: conReporter{rep}.ports(), + Labels: rep.Labels, + RwSize: rwSize, + RootFsSize: rootFsSize, }) } @@ -128,3 +172,48 @@ func ContainersStats() ([]ContainerStat, error) { return stat, nil } + +func updateContainerSize() { + cntSizeCache.cacheLock.Lock() + defer cntSizeCache.cacheLock.Unlock() + + reports, err := registry.ContainerEngine().ContainerList( + registry.Context(), + entities.ContainerListOptions{All: true, Pod: false, Size: true}, + ) + if err != nil { + cntSizeCache.cacheError = err + + return + } + + for _, cnt := range reports { + cntID := getID(cnt.ID) + + var cntSz containerSize + + if cnt.Size != nil { + cntSz.rwSize = cnt.Size.RwSize + cntSz.rootFsSize = cnt.Size.RootFsSize + } + + cntSizeCache.cache[cntID] = cntSz + } +} + +// StartCacheSizeTicker starts container cache refresh routine. +func StartCacheSizeTicker(logger klog.Logger, duration int64) { + level.Debug(logger).Log("msg", "starting container size cache ticker", "duration", duration) + level.Debug(logger).Log("msg", "update container size cache") + updateContainerSize() + + ticker := time.NewTicker(time.Duration(duration) * time.Second) + + go func() { + for { + <-ticker.C + level.Debug(logger).Log("msg", "update container size cache") + updateContainerSize() + } + }() +} diff --git a/pdcs/events.go b/pdcs/events.go index 5e2794d6..86a11738 100644 --- a/pdcs/events.go +++ b/pdcs/events.go @@ -11,12 +11,15 @@ import ( "github.com/go-kit/log/level" ) -func StartEventStreamer(logger klog.Logger) { +func StartEventStreamer(logger klog.Logger, updateImage bool) { var eventOptions entities.EventsOptions level.Debug(logger).Log("msg", "starting podman event streamer") - level.Debug(logger).Log("msg", "update images") - updateImages() + + if updateImage { + level.Debug(logger).Log("msg", "update images") + updateImages() + } eventChannel := make(chan *events.Event, 1) eventOptions.EventChan = eventChannel @@ -41,7 +44,7 @@ func StartEventStreamer(logger klog.Logger) { continue } - if event.Type == events.Image { + if updateImage && event.Type == events.Image { level.Debug(logger).Log("msg", "update images") updateImages() } diff --git a/pdcs/events_test.go b/pdcs/events_test.go index 0ef45062..b4277603 100644 --- a/pdcs/events_test.go +++ b/pdcs/events_test.go @@ -17,7 +17,7 @@ var _ = Describe("Pdcs/Events", func() { imageCount01 := len(podmanImages) logger := promlog.New(&promlog.Config{}) - pdcs.StartEventStreamer(logger) + pdcs.StartEventStreamer(logger, true) testImage := "quay.io/libpod/alpine" diff --git a/pdcs/registry.go b/pdcs/registry.go index 6ac2504b..2ef2d3de 100644 --- a/pdcs/registry.go +++ b/pdcs/registry.go @@ -25,4 +25,6 @@ func SetupRegistry() { if err != nil { log.Fatal(err) } + + cntSizeCache.cache = make(map[string]containerSize) } diff --git a/test/e2e/container_test.go b/test/e2e/container_test.go index c638fcb3..65a6f07c 100644 --- a/test/e2e/container_test.go +++ b/test/e2e/container_test.go @@ -90,5 +90,22 @@ var _ = Describe("Container", func() { Expect(response).Should(ContainElement(ContainSubstring(expectedCnt01Info))) Expect(response).Should(ContainElement(ContainSubstring(expectedCnt02Info))) + // podman_container_rw_size_bytes + expectedCnt01RwSize := fmt.Sprintf("podman_container_rw_size_bytes{id=\"%s\",pod_id=\"%s\",pod_name=\"%s\"} 0", + cnt01Inpect[0].ID[0:12], cnt01Pod01Inspect.ID[0:12], cnt01Pod01Inspect.Name) + expectedCnt02RwSize := fmt.Sprintf("podman_container_rw_size_bytes{id=\"%s\",pod_id=\"\",pod_name=\"\"} 0", + cnt02Inpect[0].ID[0:12]) + + Expect(response).Should(ContainElement(ContainSubstring(expectedCnt01RwSize))) + Expect(response).Should(ContainElement(ContainSubstring(expectedCnt02RwSize))) + + // podman_container_rootfs_size_bytes + expectedCnt01RootFsSize := fmt.Sprintf("podman_container_rootfs_size_bytes{id=\"%s\",pod_id=\"%s\",pod_name=\"%s\"}", + cnt01Inpect[0].ID[0:12], cnt01Pod01Inspect.ID[0:12], cnt01Pod01Inspect.Name) + expectedCnt02RootFsSize := fmt.Sprintf("podman_container_rootfs_size_bytes{id=\"%s\",pod_id=\"\",pod_name=\"\"}", + cnt02Inpect[0].ID[0:12]) + + Expect(response).Should(ContainElement(ContainSubstring(expectedCnt01RootFsSize))) + Expect(response).Should(ContainElement(ContainSubstring(expectedCnt02RootFsSize))) }) }) diff --git a/test/e2e/e2e_suite_test.go b/test/e2e/e2e_suite_test.go index d661a7d6..782fbb1d 100644 --- a/test/e2e/e2e_suite_test.go +++ b/test/e2e/e2e_suite_test.go @@ -16,7 +16,8 @@ import ( ) var ( - URL = "http://127.0.0.1:9882/metrics" + endpointURL = "http://127.0.0.1:9882/metrics" + cacheDuration int64 = 3600 ) func TestE2e(t *testing.T) { @@ -46,6 +47,7 @@ var _ = BeforeSuite(func() { rootCmd.Flags().BoolP("collector.system", "s", false, "") rootCmd.Flags().BoolP("collector.store_labels", "b", false, "") rootCmd.Flags().StringP("collector.whitelisted_labels", "w", "", "") + rootCmd.Flags().Int64P("collector.cache_duration", "t", cacheDuration, "") go func() { err := exporter.Start(rootCmd, nil) @@ -77,7 +79,7 @@ func extractLabelValue(line string, label string) string { } func queryEndPoint() []string { - req, err := http.NewRequest("GET", URL, nil) + req, err := http.NewRequest("GET", endpointURL, nil) Expect(err).To(BeNil()) res, err := http.DefaultClient.Do(req)