From bbd6281ecc7d3b95513a936d5998a6e5cc7d7de2 Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Tue, 17 Oct 2023 16:26:56 +0200 Subject: [PATCH] libpod: restart+userns cleanup netns correctly When a userns and netns is used we need to let the runtime create the netns otherwise the netns is not owned by the right userns and thus the capabilities would not be correct. The current restart logic tries to reuse the netns which is fine if no userns is used but when one is used we setup a new netns (which is correct) but forgot to cleanup the old netns. This resulted in leaked network namespaces and because no teardown was ever called leaked ipam assignments, thus a quickly restarting container will run out of ip space very fast. Fixes #18615 Signed-off-by: Paul Holzinger --- libpod/container_internal.go | 11 +++++++++ test/system/500-networking.bats | 40 +++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/libpod/container_internal.go b/libpod/container_internal.go index 6e8306a240..4e3bd8e9e9 100644 --- a/libpod/container_internal.go +++ b/libpod/container_internal.go @@ -288,6 +288,17 @@ func (c *Container) handleRestartPolicy(ctx context.Context) (_ bool, retErr err } } }() + + // Now this is a bit of a mess, normally we try to reuse the netns but if a userns + // is used this is not possible as it must be owned by the userns which is created + // by the oci runtime. Thus we need to teardown the netns so that the runtime + // creates the users+netns and then we setup in completeNetworkSetup() again. + if c.config.PostConfigureNetNS { + if err := c.cleanupNetwork(); err != nil { + return false, err + } + } + if err := c.prepare(); err != nil { return false, err } diff --git a/test/system/500-networking.bats b/test/system/500-networking.bats index 6b6a86251d..a34d854244 100644 --- a/test/system/500-networking.bats +++ b/test/system/500-networking.bats @@ -862,4 +862,44 @@ EOF fi } +# Test for https://github.com/containers/podman/issues/18615 +@test "podman network cleanup --userns + --restart" { + skip_if_cgroupsv1 "run --uidmap fails on cgroups v1 (issue 15025, wontfix)" + userns="--userns=keep-id" + if ! is_rootless; then + userns="--uidmap=0:1111111:65536 --gidmap=0:1111111:65536" + fi + + local net1=a-$(random_string 10) + # use /29 subnet to limt available ip space, a 29 gives 5 useable addresses (6 - 1 for the gw) + local subnet="$(random_rfc1918_subnet).0/29" + run_podman network create --subnet $subnet $net1 + local cname=con-$(random_string 10) + + local netns_count= + if ! is_rootless; then + netns_count=$(ls /run/netns | wc -l) + fi + + # This will cause 7 containers runs with the restart policy (one more than the on failure limit) + # Since they run sequentially they should run fine without allocating all ips. + run_podman 1 run --name $cname --network $net1 --restart on-failure:6 --userns keep-id $IMAGE false + + # Previously this would fail as the container would run out of ips after 5 restarts. + run_podman inspect --format "{{.RestartCount}}" $cname + assert "$output" == "6" "RestartCount for failing container" + + # Now make sure we can still run a contianer with free ips. + run_podman run --rm --network $net1 $IMAGE true + + if ! is_rootless; then + # This is racy if other programs modify /run/netns while the test is running. + # However I think the risk is minimal and I think checking for this is important. + assert "$(ls /run/netns | wc -l)" == "$netns_count" "/run/netns has no leaked netns files" + fi + + run_podman rm -f -t0 $cname + run_podman network rm $net1 +} + # vim: filetype=sh