From 0ba4be3cefce16d55f2fd89748cde4c212ce5161 Mon Sep 17 00:00:00 2001 From: "mergify[bot]" <37929162+mergify[bot]@users.noreply.github.com> Date: Tue, 10 Dec 2024 14:52:15 +0200 Subject: [PATCH] Add support for podman metrics in docker module (#41889) (#41966) * Add support for podman metrics (cherry picked from commit 1fefdbb2c26128c18e339a78866d4b756548b6c2) Co-authored-by: Michael Katsoulis --- metricbeat/docs/modules/docker.asciidoc | 9 +++++ metricbeat/metricbeat.reference.yml | 3 ++ .../module/docker/_meta/config.reference.yml | 3 ++ metricbeat/module/docker/_meta/config.yml | 3 ++ metricbeat/module/docker/_meta/docs.asciidoc | 6 +++ metricbeat/module/docker/config.go | 8 ++-- metricbeat/module/docker/cpu/cpu.go | 4 +- metricbeat/module/docker/diskio/diskio.go | 2 +- metricbeat/module/docker/docker.go | 40 +++++++++++++++---- metricbeat/module/docker/memory/memory.go | 4 +- metricbeat/module/docker/network/network.go | 2 +- .../docker/network_summary/network_summary.go | 2 +- metricbeat/modules.d/docker.yml.disabled | 3 ++ x-pack/metricbeat/metricbeat.reference.yml | 3 ++ 14 files changed, 76 insertions(+), 16 deletions(-) diff --git a/metricbeat/docs/modules/docker.asciidoc b/metricbeat/docs/modules/docker.asciidoc index cd5e2a207a83..a8ae9cbf4882 100644 --- a/metricbeat/docs/modules/docker.asciidoc +++ b/metricbeat/docs/modules/docker.asciidoc @@ -22,6 +22,9 @@ The Docker module is currently tested on Linux and Mac with the community edition engine, versions 1.11 and 17.09.0-ce. It is not tested on Windows, but it should also work there. +The Docker module supports collection of metrics from Podman's Docker-compatible API. +It has been tested on Linux and Mac with Podman Rest API v2.0.0 and above. + [float] === Module-specific configuration notes @@ -30,6 +33,9 @@ It is strongly recommended that you run Docker metricsets with a Docker API already takes up to 2 seconds. Specifying less than 3 seconds will result in requests that timeout, and no data will be reported for those requests. +In the case of Podman, the configuration parameter `podman` should be set to `true`. +This enables streaming of container stats output, which allows for more accurate +CPU percentage calculations when using Podman. :edit_url: @@ -62,6 +68,9 @@ metricbeat.modules: # If set to true, replace dots in labels with `_`. #labels.dedot: false + # Docker module supports metrics collection from podman's docker compatible API. In case of podman set to true. + # podman: false + # Skip metrics for certain device major numbers in docker/diskio. # Necessary on systems with software RAID, device mappers, # or other configurations where virtual disks will sum metrics from other disks. diff --git a/metricbeat/metricbeat.reference.yml b/metricbeat/metricbeat.reference.yml index b87cdb049fe6..aaa85594ffc9 100644 --- a/metricbeat/metricbeat.reference.yml +++ b/metricbeat/metricbeat.reference.yml @@ -258,6 +258,9 @@ metricbeat.modules: # If set to true, replace dots in labels with `_`. #labels.dedot: false + # Docker module supports metrics collection from podman's docker compatible API. In case of podman set to true. + # podman: false + # Skip metrics for certain device major numbers in docker/diskio. # Necessary on systems with software RAID, device mappers, # or other configurations where virtual disks will sum metrics from other disks. diff --git a/metricbeat/module/docker/_meta/config.reference.yml b/metricbeat/module/docker/_meta/config.reference.yml index 8d11201983cf..184d6592bf17 100644 --- a/metricbeat/module/docker/_meta/config.reference.yml +++ b/metricbeat/module/docker/_meta/config.reference.yml @@ -17,6 +17,9 @@ # If set to true, replace dots in labels with `_`. #labels.dedot: false + # Docker module supports metrics collection from podman's docker compatible API. In case of podman set to true. + # podman: false + # Skip metrics for certain device major numbers in docker/diskio. # Necessary on systems with software RAID, device mappers, # or other configurations where virtual disks will sum metrics from other disks. diff --git a/metricbeat/module/docker/_meta/config.yml b/metricbeat/module/docker/_meta/config.yml index da3c1e02a068..a7b9b9196fca 100644 --- a/metricbeat/module/docker/_meta/config.yml +++ b/metricbeat/module/docker/_meta/config.yml @@ -15,6 +15,9 @@ # If set to true, replace dots in labels with `_`. #labels.dedot: false + # Docker module supports metrics collection from podman's Docker-compatible API. In case of podman set to true. + # podman: false + # Skip metrics for certain device major numbers in docker/diskio. # Necessary on systems with software RAID, device mappers, # or other configurations where virtual disks will sum metrics from other disks. diff --git a/metricbeat/module/docker/_meta/docs.asciidoc b/metricbeat/module/docker/_meta/docs.asciidoc index e1d5437572a4..ca2da0ea26dc 100644 --- a/metricbeat/module/docker/_meta/docs.asciidoc +++ b/metricbeat/module/docker/_meta/docs.asciidoc @@ -11,6 +11,9 @@ The Docker module is currently tested on Linux and Mac with the community edition engine, versions 1.11 and 17.09.0-ce. It is not tested on Windows, but it should also work there. +The Docker module supports collection of metrics from Podman's Docker-compatible API. +It has been tested on Linux and Mac with Podman Rest API v2.0.0 and above. + [float] === Module-specific configuration notes @@ -19,3 +22,6 @@ It is strongly recommended that you run Docker metricsets with a Docker API already takes up to 2 seconds. Specifying less than 3 seconds will result in requests that timeout, and no data will be reported for those requests. +In the case of Podman, the configuration parameter `podman` should be set to `true`. +This enables streaming of container stats output, which allows for more accurate +CPU percentage calculations when using Podman. diff --git a/metricbeat/module/docker/config.go b/metricbeat/module/docker/config.go index 40698cb0baf9..b9bee9b35e9d 100644 --- a/metricbeat/module/docker/config.go +++ b/metricbeat/module/docker/config.go @@ -19,14 +19,16 @@ package docker // Config contains the config needed for the docker type Config struct { - TLS *TLSConfig `config:"ssl"` - DeDot bool `config:"labels.dedot"` + TLS *TLSConfig `config:"ssl"` + DeDot bool `config:"labels.dedot"` + Podman bool `config:"podman"` } // DefaultConfig returns default module config func DefaultConfig() Config { return Config{ - DeDot: true, + DeDot: true, + Podman: false, } } diff --git a/metricbeat/module/docker/cpu/cpu.go b/metricbeat/module/docker/cpu/cpu.go index a29ee8a00cc2..6869dd30a461 100644 --- a/metricbeat/module/docker/cpu/cpu.go +++ b/metricbeat/module/docker/cpu/cpu.go @@ -40,6 +40,7 @@ type MetricSet struct { cpuService *CPUService dockerClient *client.Client dedot bool + podman bool } // New creates a new instance of the docker cpu MetricSet. @@ -68,12 +69,13 @@ func New(base mb.BaseMetricSet) (mb.MetricSet, error) { dockerClient: client, cpuService: &CPUService{Cores: cpuConfig.Cores}, dedot: config.DeDot, + podman: config.Podman, }, nil } // Fetch returns a list of docker CPU stats. func (m *MetricSet) Fetch(r mb.ReporterV2) error { - stats, err := docker.FetchStats(m.dockerClient, m.Module().Config().Timeout) + stats, err := docker.FetchStats(m.dockerClient, m.Module().Config().Timeout, m.podman, m.Logger()) if err != nil { return fmt.Errorf("failed to get docker stats: %w", err) } diff --git a/metricbeat/module/docker/diskio/diskio.go b/metricbeat/module/docker/diskio/diskio.go index df5a0f2dff51..a59de2d52683 100644 --- a/metricbeat/module/docker/diskio/diskio.go +++ b/metricbeat/module/docker/diskio/diskio.go @@ -89,7 +89,7 @@ func New(base mb.BaseMetricSet) (mb.MetricSet, error) { // Fetch creates list of events with diskio stats for all containers. func (m *MetricSet) Fetch(r mb.ReporterV2) error { - stats, err := docker.FetchStats(m.dockerClient, m.Module().Config().Timeout) + stats, err := docker.FetchStats(m.dockerClient, m.Module().Config().Timeout, false, m.Logger()) if err != nil { return fmt.Errorf("failed to get docker stats: %w", err) } diff --git a/metricbeat/module/docker/docker.go b/metricbeat/module/docker/docker.go index 2020df91975b..d4595ef8e5ed 100644 --- a/metricbeat/module/docker/docker.go +++ b/metricbeat/module/docker/docker.go @@ -34,6 +34,7 @@ import ( "github.com/elastic/beats/v7/metricbeat/mb" "github.com/elastic/beats/v7/metricbeat/mb/parse" "github.com/elastic/elastic-agent-autodiscover/docker" + "github.com/elastic/elastic-agent-libs/logp" ) // HostParser is a TCP host parser function for docker tcp host addresses @@ -91,7 +92,7 @@ func NewDockerClient(endpoint string, config Config) (*client.Client, error) { } // FetchStats returns a list of running containers with all related stats inside -func FetchStats(client *client.Client, timeout time.Duration) ([]Stat, error) { +func FetchStats(client *client.Client, timeout time.Duration, stream bool, logger *logp.Logger) ([]Stat, error) { ctx, cancel := context.WithTimeout(context.Background(), timeout) defer cancel() containers, err := client.ContainerList(ctx, container.ListOptions{}) @@ -108,7 +109,7 @@ func FetchStats(client *client.Client, timeout time.Duration) ([]Stat, error) { for _, container := range containers { go func(container types.Container) { defer wg.Done() - statsQueue <- exportContainerStats(ctx, client, &container) + statsQueue <- exportContainerStats(ctx, client, &container, stream, logger) }(container) } @@ -133,18 +134,41 @@ func FetchStats(client *client.Client, timeout time.Duration) ([]Stat, error) { // This is currently very inefficient as docker calculates the average for each request, // means each request will take at least 2s: https://github.com/docker/docker/blob/master/cli/command/container/stats_helpers.go#L148 // Getting all stats at once is implemented here: https://github.com/docker/docker/pull/25361 -func exportContainerStats(ctx context.Context, client *client.Client, container *types.Container) Stat { +// In case stream is true, we use get a stream of results for container stats. From the stream we keep the second result. +// This is needed for podman use case where in case stream is false, no precpu stats are returned. The precpu stats +// are required for the cpu percentage calculation. We keep the second result as in the first result, the stats are not correct. +func exportContainerStats(ctx context.Context, client *client.Client, container *types.Container, stream bool, logger *logp.Logger) Stat { var event Stat event.Container = container - - containerStats, err := client.ContainerStats(ctx, container.ID, false) + containerStats, err := client.ContainerStats(ctx, container.ID, stream) if err != nil { + logger.Debugf("Failed fetching container stats: %v", err) return event } - defer containerStats.Body.Close() - decoder := json.NewDecoder(containerStats.Body) - decoder.Decode(&event.Stats) + // JSON decoder + decoder := json.NewDecoder(containerStats.Body) + if !stream { + if err := decoder.Decode(&event.Stats); err != nil { + logger.Debugf("Failed decoding event: %v", err) + return event + } + } else { + // handle stream. Take the second result. + count := 0 + for decoder.More() { + if err := decoder.Decode(&event.Stats); err != nil { + logger.Debugf("Failed decoding event: %v", err) + return event + } + + count++ + // Exit after the second result + if count == 2 { + break + } + } + } return event } diff --git a/metricbeat/module/docker/memory/memory.go b/metricbeat/module/docker/memory/memory.go index 140383de833e..5c90c09d39c2 100644 --- a/metricbeat/module/docker/memory/memory.go +++ b/metricbeat/module/docker/memory/memory.go @@ -43,6 +43,7 @@ type MetricSet struct { memoryService *MemoryService dockerClient *client.Client dedot bool + podman bool logger *logp.Logger } @@ -64,13 +65,14 @@ func New(base mb.BaseMetricSet) (mb.MetricSet, error) { memoryService: &MemoryService{}, dockerClient: dockerClient, dedot: config.DeDot, + podman: config.Podman, logger: logger, }, nil } // Fetch creates a list of memory events for each container. func (m *MetricSet) Fetch(r mb.ReporterV2) error { - stats, err := docker.FetchStats(m.dockerClient, m.Module().Config().Timeout) + stats, err := docker.FetchStats(m.dockerClient, m.Module().Config().Timeout, m.podman, m.Logger()) if err != nil { return fmt.Errorf("failed to get docker stats: %w", err) } diff --git a/metricbeat/module/docker/network/network.go b/metricbeat/module/docker/network/network.go index 8a70fd124466..34487aa49835 100644 --- a/metricbeat/module/docker/network/network.go +++ b/metricbeat/module/docker/network/network.go @@ -66,7 +66,7 @@ func New(base mb.BaseMetricSet) (mb.MetricSet, error) { // Fetch methods creates a list of network events for each container. func (m *MetricSet) Fetch(r mb.ReporterV2) error { - stats, err := docker.FetchStats(m.dockerClient, m.Module().Config().Timeout) + stats, err := docker.FetchStats(m.dockerClient, m.Module().Config().Timeout, false, m.Logger()) if err != nil { return fmt.Errorf("failed to get docker stats: %w", err) } diff --git a/metricbeat/module/docker/network_summary/network_summary.go b/metricbeat/module/docker/network_summary/network_summary.go index 9753b449adde..9052e38580b0 100644 --- a/metricbeat/module/docker/network_summary/network_summary.go +++ b/metricbeat/module/docker/network_summary/network_summary.go @@ -84,7 +84,7 @@ func New(base mb.BaseMetricSet) (mb.MetricSet, error) { // of an error set the Error field of mb.Event or simply call report.Error(). func (m *MetricSet) Fetch(ctx context.Context, report mb.ReporterV2) error { - stats, err := docker.FetchStats(m.dockerClient, m.Module().Config().Timeout) + stats, err := docker.FetchStats(m.dockerClient, m.Module().Config().Timeout, false, m.Logger()) if err != nil { return fmt.Errorf("failed to get docker stats: %w", err) } diff --git a/metricbeat/modules.d/docker.yml.disabled b/metricbeat/modules.d/docker.yml.disabled index acae1fde2598..03215d2e8659 100644 --- a/metricbeat/modules.d/docker.yml.disabled +++ b/metricbeat/modules.d/docker.yml.disabled @@ -18,6 +18,9 @@ # If set to true, replace dots in labels with `_`. #labels.dedot: false + # Docker module supports metrics collection from podman's Docker-compatible API. In case of podman set to true. + # podman: false + # Skip metrics for certain device major numbers in docker/diskio. # Necessary on systems with software RAID, device mappers, # or other configurations where virtual disks will sum metrics from other disks. diff --git a/x-pack/metricbeat/metricbeat.reference.yml b/x-pack/metricbeat/metricbeat.reference.yml index 240acb2cfd6a..58e7150b650d 100644 --- a/x-pack/metricbeat/metricbeat.reference.yml +++ b/x-pack/metricbeat/metricbeat.reference.yml @@ -503,6 +503,9 @@ metricbeat.modules: # If set to true, replace dots in labels with `_`. #labels.dedot: false + # Docker module supports metrics collection from podman's docker compatible API. In case of podman set to true. + # podman: false + # Skip metrics for certain device major numbers in docker/diskio. # Necessary on systems with software RAID, device mappers, # or other configurations where virtual disks will sum metrics from other disks.