From c5d5f7ab0c6bbd65e6f25f40619fefb42e19a776 Mon Sep 17 00:00:00 2001 From: Leonard Cohnen Date: Fri, 3 Nov 2023 16:52:08 +0100 Subject: [PATCH] daemon: push own CiliumNode later When WireGuard node-to-node encryption is enabled and the control-planes are encrypted, this leads to the KubeAPI becoming unresponsive. This happens when the second control-plane with a stacked etcd architecture joins because the second etcd will join the first and then the first control-plane consumes the published CiliumNode CR and add the node to its WireGuard interface and IPCache so that all traffic is now routed over it. This includes the etcd traffic. The second node does not yet have the first control-plane added to the WireGuard interface, hence the etcd traffic is dropped. This leads to an unresponsive KubeAPI when the second node now queries the CiliumNode CR it has created and the daemon setup never reaches the inclusion of the first node in the WireGuard interface. Therefore, we re-order the setup logic to first enable the CiliumNode watchers and push their own CiliumNode resource later. Fixes: #28965 Signed-off-by: Leonard Cohnen --- daemon/cmd/daemon.go | 70 ++++++++++++++++++++++---------------------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/daemon/cmd/daemon.go b/daemon/cmd/daemon.go index ead2b6f13e508..5b3d0903191a9 100644 --- a/daemon/cmd/daemon.go +++ b/daemon/cmd/daemon.go @@ -682,41 +682,6 @@ func newDaemon(ctx context.Context, cleaner *daemonCleanup, params *daemonParams bootstrapStats.fqdn.End(true) - if params.Clientset.IsEnabled() { - bootstrapStats.k8sInit.Start() - // Errors are handled inside WaitForCRDsToRegister. It will fatal on a - // context deadline or if the context has been cancelled, the context's - // error will be returned. Otherwise, it succeeded. - if !option.Config.DryMode { - if err := d.k8sWatcher.WaitForCRDsToRegister(d.ctx); err != nil { - return nil, restoredEndpoints, err - } - } - - if option.Config.IPAM == ipamOption.IPAMClusterPool || - option.Config.IPAM == ipamOption.IPAMMultiPool { - // Create the CiliumNode custom resource. This call will block until - // the custom resource has been created - d.nodeDiscovery.UpdateCiliumNodeResource() - } - - if err := agentK8s.WaitForNodeInformation(d.ctx, log, params.Resources.LocalNode, params.Resources.LocalCiliumNode); err != nil { - log.WithError(err).Error("unable to connect to get node spec from apiserver") - return nil, nil, fmt.Errorf("unable to connect to get node spec from apiserver: %w", err) - } - - // Kubernetes demands that the localhost can always reach local - // pods. Therefore unless the AllowLocalhost policy is set to a - // specific mode, always allow localhost to reach local - // endpoints. - if option.Config.AllowLocalhost == option.AllowLocalhostAuto { - option.Config.AllowLocalhost = option.AllowLocalhostAlways - log.Info("k8s mode: Allowing localhost to reach local endpoints") - } - - bootstrapStats.k8sInit.End(true) - } - if params.WGAgent != nil && option.Config.EnableWireguard { if err := params.WGAgent.Init(d.ipcache, d.mtuConfig); err != nil { log.WithError(err).Error("failed to initialize WireGuard agent") @@ -840,6 +805,41 @@ func newDaemon(ctx context.Context, cleaner *daemonCleanup, params *daemonParams close(params.CacheStatus) } + if params.Clientset.IsEnabled() { + bootstrapStats.k8sInit.Start() + // Errors are handled inside WaitForCRDsToRegister. It will fatal on a + // context deadline or if the context has been cancelled, the context's + // error will be returned. Otherwise, it succeeded. + if !option.Config.DryMode { + if err := d.k8sWatcher.WaitForCRDsToRegister(d.ctx); err != nil { + return nil, restoredEndpoints, err + } + } + + if option.Config.IPAM == ipamOption.IPAMClusterPool || + option.Config.IPAM == ipamOption.IPAMMultiPool { + // Create the CiliumNode custom resource. This call will block until + // the custom resource has been created + d.nodeDiscovery.UpdateCiliumNodeResource() + } + + if err := agentK8s.WaitForNodeInformation(d.ctx, log, params.Resources.LocalNode, params.Resources.LocalCiliumNode); err != nil { + log.WithError(err).Error("unable to connect to get node spec from apiserver") + return nil, nil, fmt.Errorf("unable to connect to get node spec from apiserver: %w", err) + } + + // Kubernetes demands that the localhost can always reach local + // pods. Therefore unless the AllowLocalhost policy is set to a + // specific mode, always allow localhost to reach local + // endpoints. + if option.Config.AllowLocalhost == option.AllowLocalhostAuto { + option.Config.AllowLocalhost = option.AllowLocalhostAlways + log.Info("k8s mode: Allowing localhost to reach local endpoints") + } + + bootstrapStats.k8sInit.End(true) + } + bootstrapStats.cleanup.Start() err = clearCiliumVeths() bootstrapStats.cleanup.EndError(err)