From 04a444a45334c605c31b361c192af4bb129b40f6 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Thu, 12 Dec 2024 11:52:04 -0500 Subject: [PATCH] cni: use check command when restoring from restart When the Nomad client restarts and restores allocations, the network namespace for an allocation may exist but no longer be correctly configured. For example, if the host is rebooted and the task was a Docker task using a pause container, the network namespace may be recreated by the docker daemon. When we restore an allocation, use the CNI "check" command to verify that any existing network namespace matches the expected configuration. This requires CNI plugins of at least version 1.2.0 to avoid a bug in older plugin versions that would cause the check to fail. If the check fails, destroy the network namespace and try to recreate it from scratch once. If that fails in the second pass, fail the restore so that the allocation can be recreated (rather than silently having networking fail). This should fix the gap left #24650 for Docker task drivers and any other drivers with the `MustInitiateNetwork` capability. Fixes: https://github.com/hashicorp/nomad/issues/24292 Ref: https://github.com/hashicorp/nomad/pull/24650 --- .changelog/24658.txt | 3 + client/allocrunner/network_hook.go | 23 ++- client/allocrunner/network_hook_test.go | 146 +++++++++++++++++- client/allocrunner/networking.go | 8 +- client/allocrunner/networking_bridge_linux.go | 4 +- client/allocrunner/networking_cni.go | 42 ++++- .../allocrunner/networking_cni_mock_test.go | 12 +- client/allocrunner/networking_cni_test.go | 2 +- 8 files changed, 228 insertions(+), 12 deletions(-) create mode 100644 .changelog/24658.txt diff --git a/.changelog/24658.txt b/.changelog/24658.txt new file mode 100644 index 00000000000..f46da955490 --- /dev/null +++ b/.changelog/24658.txt @@ -0,0 +1,3 @@ +```release-note:bug +networking: check network namespaces on Linux during client restarts and fail the allocation if an existing namespace is invalid +``` diff --git a/client/allocrunner/network_hook.go b/client/allocrunner/network_hook.go index 570943b3795..d8d998d9fd3 100644 --- a/client/allocrunner/network_hook.go +++ b/client/allocrunner/network_hook.go @@ -5,6 +5,7 @@ package allocrunner import ( "context" + "errors" "fmt" hclog "github.com/hashicorp/go-hclog" @@ -132,6 +133,9 @@ func (h *networkHook) Prerun() error { Hostname: interpolatedNetworks[0].Hostname, } + var checkedOnce bool + +CREATE: spec, created, err := h.manager.CreateNetwork(h.alloc.ID, &networkCreateReq) if err != nil { return fmt.Errorf("failed to create network for alloc: %v", err) @@ -142,11 +146,26 @@ func (h *networkHook) Prerun() error { h.isolationSetter.SetNetworkIsolation(spec) } - if created { - status, err := h.networkConfigurator.Setup(context.TODO(), h.alloc, spec) + if spec != nil { + status, err := h.networkConfigurator.Setup(context.TODO(), h.alloc, spec, created) if err != nil { + // if the netns already existed but is invalid, we get + // ErrCNICheckFailed. We'll try to recover from this one time by + // recreating the netns from scratch before giving up + if errors.Is(err, ErrCNICheckFailed) && !checkedOnce { + checkedOnce = true + destroyErr := h.manager.DestroyNetwork(h.alloc.ID, spec) + if destroyErr != nil { + return fmt.Errorf("%w: destroying network to retry failed: %v", err, destroyErr) + } + goto CREATE + } + return fmt.Errorf("failed to configure networking for alloc: %v", err) } + if status == nil { + return nil // netns already existed and was correctly configured + } // If the driver set the sandbox hostname label, then we will use that // to set the HostsConfig.Hostname. Otherwise, identify the sandbox diff --git a/client/allocrunner/network_hook_test.go b/client/allocrunner/network_hook_test.go index 99845f875bc..9e4be2da89a 100644 --- a/client/allocrunner/network_hook_test.go +++ b/client/allocrunner/network_hook_test.go @@ -4,6 +4,7 @@ package allocrunner import ( + "fmt" "testing" "github.com/hashicorp/nomad/ci" @@ -14,6 +15,7 @@ import ( "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/plugins/drivers" "github.com/hashicorp/nomad/plugins/drivers/testutils" + "github.com/hashicorp/nomad/testutil" "github.com/shoenig/test" "github.com/shoenig/test/must" ) @@ -65,7 +67,7 @@ func TestNetworkHook_Prerun_Postrun_group(t *testing.T) { MockNetworkManager: testutils.MockNetworkManager{ CreateNetworkF: func(allocID string, req *drivers.NetworkCreateRequest) (*drivers.NetworkIsolationSpec, bool, error) { test.Eq(t, alloc.ID, allocID) - return spec, false, nil + return spec, true, nil }, DestroyNetworkF: func(allocID string, netSpec *drivers.NetworkIsolationSpec) error { @@ -137,3 +139,145 @@ func TestNetworkHook_Prerun_Postrun_host(t *testing.T) { must.NoError(t, hook.Postrun()) must.True(t, destroyCalled) } + +// TestNetworkHook_Prerun_Postrun_ExistingNetNS tests that the prerun and +// postrun hooks call the Setup and Destroy with the expected behaviors when the +// network namespace already exists (typical of agent restarts and host reboots) +func TestNetworkHook_Prerun_Postrun_ExistingNetNS(t *testing.T) { + ci.Parallel(t) + + alloc := mock.Alloc() + alloc.Job.TaskGroups[0].Networks = []*structs.NetworkResource{ + {Mode: "bridge"}, + } + + spec := &drivers.NetworkIsolationSpec{ + Mode: drivers.NetIsolationModeGroup, + Path: "test", + Labels: map[string]string{"abc": "123"}, + } + isolationSetter := &mockNetworkIsolationSetter{t: t, expectedSpec: spec} + statusSetter := &mockNetworkStatusSetter{t: t, expectedStatus: nil} + + callCounts := testutil.NewCallCounter() + + nm := &testutils.MockDriver{ + MockNetworkManager: testutils.MockNetworkManager{ + CreateNetworkF: func(allocID string, req *drivers.NetworkCreateRequest) (*drivers.NetworkIsolationSpec, bool, error) { + test.Eq(t, alloc.ID, allocID) + callCounts.Inc("CreateNetwork") + return spec, false, nil + }, + + DestroyNetworkF: func(allocID string, netSpec *drivers.NetworkIsolationSpec) error { + test.Eq(t, alloc.ID, allocID) + test.Eq(t, spec, netSpec) + callCounts.Inc("DestroyNetwork") + return nil + }, + }, + } + + fakePlugin := newMockCNIPlugin() + + configurator := &cniNetworkConfigurator{ + nodeAttrs: map[string]string{ + "plugins.cni.version.bridge": "1.6.1", + }, + nodeMeta: map[string]string{}, + logger: testlog.HCLogger(t), + cni: fakePlugin, + nsOpts: &nsOpts{}, + } + + envBuilder := taskenv.NewBuilder(mock.Node(), alloc, nil, alloc.Job.Region) + + testCases := []struct { + name string + cniVersion string + checkErrs []error + setupErrs []string + expectPrerunCreateNetworkCalls int + expectPrerunDestroyNetworkCalls int + expectCheckCalls int + expectSetupCalls int + expectPostrunDestroyNetworkCalls int + expectPrerunError string + }{ + { + name: "good check", + cniVersion: "1.6.1", + expectPrerunCreateNetworkCalls: 1, + expectPrerunDestroyNetworkCalls: 0, + expectCheckCalls: 1, + expectSetupCalls: 0, + expectPostrunDestroyNetworkCalls: 1, + }, + { + name: "initial check fails", + cniVersion: "1.6.1", + checkErrs: []error{fmt.Errorf("whatever")}, + expectPrerunCreateNetworkCalls: 2, + expectPrerunDestroyNetworkCalls: 1, + expectCheckCalls: 2, + expectSetupCalls: 0, + expectPostrunDestroyNetworkCalls: 2, + }, + { + name: "check fails twice", + cniVersion: "1.6.1", + checkErrs: []error{ + fmt.Errorf("whatever"), + fmt.Errorf("whatever"), + }, + expectPrerunCreateNetworkCalls: 2, + expectPrerunDestroyNetworkCalls: 1, + expectCheckCalls: 2, + expectSetupCalls: 0, + expectPostrunDestroyNetworkCalls: 2, + expectPrerunError: "failed to configure networking for alloc: network namespace already exists but was misconfigured: whatever", + }, + { + name: "old CNI version skips check", + cniVersion: "1.2.0", + expectPrerunCreateNetworkCalls: 1, + expectPrerunDestroyNetworkCalls: 0, + expectCheckCalls: 0, + expectSetupCalls: 0, + expectPostrunDestroyNetworkCalls: 1, + }, + } + + for _, tc := range testCases { + + t.Run(tc.name, func(t *testing.T) { + callCounts.Reset() + fakePlugin.counter.Reset() + fakePlugin.checkErrors = tc.checkErrs + configurator.nodeAttrs["plugins.cni.version.bridge"] = tc.cniVersion + hook := newNetworkHook(testlog.HCLogger(t), isolationSetter, + alloc, nm, configurator, statusSetter, envBuilder.Build()) + + err := hook.Prerun() + if tc.expectPrerunError == "" { + must.NoError(t, err) + } else { + must.EqError(t, err, tc.expectPrerunError) + } + + test.Eq(t, tc.expectPrerunDestroyNetworkCalls, + callCounts.Get()["DestroyNetwork"], test.Sprint("DestroyNetwork calls after prerun")) + test.Eq(t, tc.expectPrerunCreateNetworkCalls, + callCounts.Get()["CreateNetwork"], test.Sprint("CreateNetwork calls after prerun")) + + test.Eq(t, tc.expectCheckCalls, fakePlugin.counter.Get()["Check"], test.Sprint("Check calls")) + test.Eq(t, tc.expectSetupCalls, fakePlugin.counter.Get()["Setup"], test.Sprint("Setup calls")) + + must.NoError(t, hook.Postrun()) + test.Eq(t, tc.expectPostrunDestroyNetworkCalls, + callCounts.Get()["DestroyNetwork"], test.Sprint("DestroyNetwork calls after postrun")) + + }) + + } +} diff --git a/client/allocrunner/networking.go b/client/allocrunner/networking.go index e98af0b4f89..07a98660795 100644 --- a/client/allocrunner/networking.go +++ b/client/allocrunner/networking.go @@ -14,7 +14,7 @@ import ( // NetworkConfigurator sets up and tears down the interfaces, routes, firewall // rules, etc for the configured networking mode of the allocation. type NetworkConfigurator interface { - Setup(context.Context, *structs.Allocation, *drivers.NetworkIsolationSpec) (*structs.AllocNetworkStatus, error) + Setup(context.Context, *structs.Allocation, *drivers.NetworkIsolationSpec, bool) (*structs.AllocNetworkStatus, error) Teardown(context.Context, *structs.Allocation, *drivers.NetworkIsolationSpec) error } @@ -23,7 +23,7 @@ type NetworkConfigurator interface { // require further configuration type hostNetworkConfigurator struct{} -func (h *hostNetworkConfigurator) Setup(context.Context, *structs.Allocation, *drivers.NetworkIsolationSpec) (*structs.AllocNetworkStatus, error) { +func (h *hostNetworkConfigurator) Setup(context.Context, *structs.Allocation, *drivers.NetworkIsolationSpec, bool) (*structs.AllocNetworkStatus, error) { return nil, nil } func (h *hostNetworkConfigurator) Teardown(context.Context, *structs.Allocation, *drivers.NetworkIsolationSpec) error { @@ -41,10 +41,10 @@ type synchronizedNetworkConfigurator struct { nc NetworkConfigurator } -func (s *synchronizedNetworkConfigurator) Setup(ctx context.Context, allocation *structs.Allocation, spec *drivers.NetworkIsolationSpec) (*structs.AllocNetworkStatus, error) { +func (s *synchronizedNetworkConfigurator) Setup(ctx context.Context, allocation *structs.Allocation, spec *drivers.NetworkIsolationSpec, created bool) (*structs.AllocNetworkStatus, error) { networkingGlobalMutex.Lock() defer networkingGlobalMutex.Unlock() - return s.nc.Setup(ctx, allocation, spec) + return s.nc.Setup(ctx, allocation, spec, created) } func (s *synchronizedNetworkConfigurator) Teardown(ctx context.Context, allocation *structs.Allocation, spec *drivers.NetworkIsolationSpec) error { diff --git a/client/allocrunner/networking_bridge_linux.go b/client/allocrunner/networking_bridge_linux.go index 63a11c8f093..1204837b0dd 100644 --- a/client/allocrunner/networking_bridge_linux.go +++ b/client/allocrunner/networking_bridge_linux.go @@ -120,12 +120,12 @@ func (b *bridgeNetworkConfigurator) ensureForwardingRules() error { } // Setup calls the CNI plugins with the add action -func (b *bridgeNetworkConfigurator) Setup(ctx context.Context, alloc *structs.Allocation, spec *drivers.NetworkIsolationSpec) (*structs.AllocNetworkStatus, error) { +func (b *bridgeNetworkConfigurator) Setup(ctx context.Context, alloc *structs.Allocation, spec *drivers.NetworkIsolationSpec, created bool) (*structs.AllocNetworkStatus, error) { if err := b.ensureForwardingRules(); err != nil { return nil, fmt.Errorf("failed to initialize table forwarding rules: %v", err) } - return b.cni.Setup(ctx, alloc, spec) + return b.cni.Setup(ctx, alloc, spec, created) } // Teardown calls the CNI plugins with the delete action diff --git a/client/allocrunner/networking_cni.go b/client/allocrunner/networking_cni.go index 1c877fbe2ff..e981f1521e2 100644 --- a/client/allocrunner/networking_cni.go +++ b/client/allocrunner/networking_cni.go @@ -28,6 +28,7 @@ import ( consulIPTables "github.com/hashicorp/consul/sdk/iptables" log "github.com/hashicorp/go-hclog" "github.com/hashicorp/go-set/v3" + "github.com/hashicorp/go-version" "github.com/hashicorp/nomad/client/taskenv" "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/helper/envoy" @@ -138,8 +139,22 @@ func addNomadWorkloadCNIArgs(logger log.Logger, alloc *structs.Allocation, cniAr } } +var ( + supportsCNICheck = mustCNICheckConstraint() + ErrCNICheckFailed = errors.New("network namespace already exists but was misconfigured") +) + +func mustCNICheckConstraint() version.Constraints { + version, err := version.NewConstraint(">= 1.3.0") + if err != nil { + panic(err) + } + return version +} + // Setup calls the CNI plugins with the add action -func (c *cniNetworkConfigurator) Setup(ctx context.Context, alloc *structs.Allocation, spec *drivers.NetworkIsolationSpec) (*structs.AllocNetworkStatus, error) { +func (c *cniNetworkConfigurator) Setup(ctx context.Context, alloc *structs.Allocation, spec *drivers.NetworkIsolationSpec, created bool) (*structs.AllocNetworkStatus, error) { + if err := c.ensureCNIInitialized(); err != nil { return nil, fmt.Errorf("cni not initialized: %w", err) } @@ -171,6 +186,31 @@ func (c *cniNetworkConfigurator) Setup(ctx context.Context, alloc *structs.Alloc cniArgs[ConsulIPTablesConfigEnvVar] = string(iptablesCfg) } + if !created { + // The netns will not be created if it already exists, typically on + // agent restart. If the configuration of a prexisting netns is wrong + // (ex. after a host reboot for docker created netns), networking will + // be broken. CNI's ADD command is not idempotent so we can't simply try + // again. Run CHECK to verify the network is still valid. Older plugins + // have a broken CHECK, so we have to allow the buggy behavior in the + // case of a host reboot with docker-created netns there. + cniVersion, err := version.NewSemver(c.nodeAttrs["plugins.cni.version.bridge"]) + if err == nil && supportsCNICheck.Check(cniVersion) { + err := c.cni.Check(ctx, alloc.ID, spec.Path, + c.nsOpts.withCapabilityPortMap(portMaps.ports), + c.nsOpts.withArgs(cniArgs), + ) + if err != nil { + return nil, fmt.Errorf("%w: %w", ErrCNICheckFailed, err) + } + } else { + c.logger.Trace("network namespace exists but could not check if networking is valid because bridge plugin version was <1.3.0: continuing anyways") + return nil, nil + } + c.logger.Trace("network namespace exists and passed check: skipping setup") + return nil, nil + } + // Depending on the version of bridge cni plugin used, a known race could occure // where two alloc attempt to create the nomad bridge at the same time, resulting // in one of them to fail. This rety attempts to overcome those erroneous failures. diff --git a/client/allocrunner/networking_cni_mock_test.go b/client/allocrunner/networking_cni_mock_test.go index 9d8e67e22a9..2bc06e06819 100644 --- a/client/allocrunner/networking_cni_mock_test.go +++ b/client/allocrunner/networking_cni_mock_test.go @@ -18,6 +18,7 @@ var _ cni.CNI = &mockCNIPlugin{} type mockCNIPlugin struct { counter *testutil.CallCounter setupErrors []string + checkErrors []error } func newMockCNIPlugin() *mockCNIPlugin { @@ -25,7 +26,9 @@ func newMockCNIPlugin() *mockCNIPlugin { callCounts := testutil.NewCallCounter() callCounts.Reset() return &mockCNIPlugin{ - counter: callCounts, + counter: callCounts, + setupErrors: []string{}, + checkErrors: []error{}, } } @@ -65,6 +68,13 @@ func (f *mockCNIPlugin) Remove(ctx context.Context, id, path string, opts ...cni } func (f *mockCNIPlugin) Check(ctx context.Context, id, path string, opts ...cni.NamespaceOpts) error { + if f.counter != nil { + f.counter.Inc("Check") + } + numOfCalls := f.counter.Get()["Check"] + if numOfCalls <= len(f.checkErrors) { + return f.checkErrors[numOfCalls-1] + } return nil } diff --git a/client/allocrunner/networking_cni_test.go b/client/allocrunner/networking_cni_test.go index 798d641adb4..f76f2b2462e 100644 --- a/client/allocrunner/networking_cni_test.go +++ b/client/allocrunner/networking_cni_test.go @@ -296,7 +296,7 @@ func TestSetup(t *testing.T) { } // method under test - result, err := c.Setup(context.Background(), alloc, spec) + result, err := c.Setup(context.Background(), alloc, spec, true) if tc.expectErr == "" { must.NoError(t, err) must.Eq(t, tc.expectResult, result)