From 0d4d3c3d8386b18874a5a8f9f62739580b57d635 Mon Sep 17 00:00:00 2001 From: Leonard Cohnen Date: Fri, 3 Nov 2023 20:45:32 +0100 Subject: [PATCH] remove cilium restart fix --- bootstrapper/cmd/bootstrapper/run.go | 4 +- bootstrapper/cmd/bootstrapper/test.go | 2 +- .../internal/kubernetes/k8sapi/k8sutil.go | 67 ----------------- bootstrapper/internal/kubernetes/k8sutil.go | 2 - .../internal/kubernetes/kubernetes.go | 24 +------ .../internal/kubernetes/kubernetes_test.go | 8 --- cli/internal/helm/BUILD.bazel | 4 -- cli/internal/helm/actionfactory.go | 29 -------- cli/internal/helm/ciliumhelper.go | 72 ------------------- 9 files changed, 4 insertions(+), 208 deletions(-) delete mode 100644 cli/internal/helm/ciliumhelper.go diff --git a/bootstrapper/cmd/bootstrapper/run.go b/bootstrapper/cmd/bootstrapper/run.go index f3639bd599..c036b5ba9f 100644 --- a/bootstrapper/cmd/bootstrapper/run.go +++ b/bootstrapper/cmd/bootstrapper/run.go @@ -51,7 +51,7 @@ func run(issuer atls.Issuer, openDevice vtpm.TPMOpenFunc, fileHandler file.Handl } if nodeBootstrapped { - if err := kube.StartKubelet(log); err != nil { + if err := kube.StartKubelet(); err != nil { log.With(zap.Error(err)).Fatalf("Failed to restart kubelet") } return @@ -93,7 +93,7 @@ func getDiskUUID() (string, error) { type clusterInitJoiner interface { joinclient.ClusterJoiner initserver.ClusterInitializer - StartKubelet(*logger.Logger) error + StartKubelet() error } type metadataAPI interface { diff --git a/bootstrapper/cmd/bootstrapper/test.go b/bootstrapper/cmd/bootstrapper/test.go index bdf03bbd3c..3317326ae6 100644 --- a/bootstrapper/cmd/bootstrapper/test.go +++ b/bootstrapper/cmd/bootstrapper/test.go @@ -33,7 +33,7 @@ func (c *clusterFake) JoinCluster(context.Context, *kubeadm.BootstrapTokenDiscov } // StartKubelet starts the kubelet service. -func (c *clusterFake) StartKubelet(*logger.Logger) error { +func (c *clusterFake) StartKubelet() error { return nil } diff --git a/bootstrapper/internal/kubernetes/k8sapi/k8sutil.go b/bootstrapper/internal/kubernetes/k8sapi/k8sutil.go index 230e5958f2..bf0b0ee805 100644 --- a/bootstrapper/internal/kubernetes/k8sapi/k8sutil.go +++ b/bootstrapper/internal/kubernetes/k8sapi/k8sutil.go @@ -14,11 +14,9 @@ import ( "errors" "fmt" "net" - "net/http" "os" "os/exec" "path/filepath" - "strings" "time" "github.com/edgelesssys/constellation/v2/bootstrapper/internal/certificate" @@ -192,71 +190,6 @@ type SetupPodNetworkInput struct { LoadBalancerPort string } -// WaitForCilium waits until Cilium reports a healthy status over its /healthz endpoint. -func (k *KubernetesUtil) WaitForCilium(ctx context.Context, log *logger.Logger) error { - // wait for cilium pod to be healthy - client := http.Client{} - for { - select { - case <-ctx.Done(): - return ctx.Err() - default: - time.Sleep(5 * time.Second) - req, err := http.NewRequestWithContext(ctx, http.MethodGet, "http://127.0.0.1:9879/healthz", http.NoBody) - if err != nil { - return fmt.Errorf("unable to create request: %w", err) - } - resp, err := client.Do(req) - if err != nil { - log.With(zap.Error(err)).Infof("Waiting for local Cilium DaemonSet - Pod not healthy yet") - continue - } - resp.Body.Close() - if resp.StatusCode == 200 { - return nil - } - } - } -} - -// FixCilium fixes https://github.com/cilium/cilium/issues/19958 -// Instead of a rollout restart of the Cilium DaemonSet, it only restarts the local Cilium Pod. -func (k *KubernetesUtil) FixCilium(ctx context.Context) error { - // get cilium container id - out, err := exec.CommandContext(ctx, "/run/state/bin/crictl", "ps", "--name", "cilium-agent", "-q").CombinedOutput() - if err != nil { - return fmt.Errorf("getting cilium container id failed: %s", out) - } - outLines := strings.Split(string(out), "\n") - if len(outLines) < 2 { - return fmt.Errorf("getting cilium container id returned invalid output: %s", out) - } - containerID := outLines[len(outLines)-2] - - // get cilium pod id - out, err = exec.CommandContext(ctx, "/run/state/bin/crictl", "inspect", "-o", "go-template", "--template", "{{ .info.sandboxID }}", containerID).CombinedOutput() - if err != nil { - return fmt.Errorf("getting Cilium Pod ID failed: %s", out) - } - outLines = strings.Split(string(out), "\n") - if len(outLines) < 2 { - return fmt.Errorf("getting Cilium Pod ID returned invalid output: %s", out) - } - podID := outLines[len(outLines)-2] - - // stop and delete pod - out, err = exec.CommandContext(ctx, "/run/state/bin/crictl", "stopp", podID).CombinedOutput() - if err != nil { - return fmt.Errorf("stopping Cilium agent Pod failed: %s", out) - } - out, err = exec.CommandContext(ctx, "/run/state/bin/crictl", "rmp", podID).CombinedOutput() - if err != nil { - return fmt.Errorf("removing Cilium agent Pod failed: %s", out) - } - - return nil -} - // JoinCluster joins existing Kubernetes cluster using kubeadm join. func (k *KubernetesUtil) JoinCluster(ctx context.Context, joinConfig []byte, log *logger.Logger) error { // TODO(3u13r): audit policy should be user input diff --git a/bootstrapper/internal/kubernetes/k8sutil.go b/bootstrapper/internal/kubernetes/k8sutil.go index 070f2a1de2..3c7b55718c 100644 --- a/bootstrapper/internal/kubernetes/k8sutil.go +++ b/bootstrapper/internal/kubernetes/k8sutil.go @@ -18,7 +18,5 @@ type clusterUtil interface { InstallComponents(ctx context.Context, kubernetesComponents components.Components) error InitCluster(ctx context.Context, initConfig []byte, nodeName, clusterName string, ips []net.IP, conformanceMode bool, log *logger.Logger) ([]byte, error) JoinCluster(ctx context.Context, joinConfig []byte, log *logger.Logger) error - WaitForCilium(ctx context.Context, log *logger.Logger) error - FixCilium(ctx context.Context) error StartKubelet() error } diff --git a/bootstrapper/internal/kubernetes/kubernetes.go b/bootstrapper/internal/kubernetes/kubernetes.go index d2b86972eb..193c7cb452 100644 --- a/bootstrapper/internal/kubernetes/kubernetes.go +++ b/bootstrapper/internal/kubernetes/kubernetes.go @@ -242,17 +242,6 @@ func (k *KubeWrapper) JoinCluster(ctx context.Context, args *kubeadm.BootstrapTo return fmt.Errorf("joining cluster: %v; %w ", string(joinConfigYAML), err) } - log.Infof("Waiting for Cilium to become healthy") - if err := k.clusterUtil.WaitForCilium(context.Background(), log); err != nil { - return fmt.Errorf("waiting for Cilium to become healthy: %w", err) - } - - log.Infof("Restarting Cilium") - if err := k.clusterUtil.FixCilium(context.Background()); err != nil { - log.With(zap.Error(err)).Errorf("FixCilium failed") - // Continue and don't throw an error here - things might be okay. - } - return nil } @@ -307,22 +296,11 @@ func k8sCompliantHostname(in string) (string, error) { } // StartKubelet starts the kubelet service. -func (k *KubeWrapper) StartKubelet(log *logger.Logger) error { +func (k *KubeWrapper) StartKubelet() error { if err := k.clusterUtil.StartKubelet(); err != nil { return fmt.Errorf("starting kubelet: %w", err) } - log.Infof("Waiting for Cilium to become healthy") - if err := k.clusterUtil.WaitForCilium(context.Background(), log); err != nil { - return fmt.Errorf("waiting for Cilium to become healthy: %w", err) - } - - log.Infof("Restarting Cilium") - if err := k.clusterUtil.FixCilium(context.Background()); err != nil { - log.With(zap.Error(err)).Errorf("FixCilium failed") - // Continue and don't throw an error here - things might be okay. - } - return nil } diff --git a/bootstrapper/internal/kubernetes/kubernetes_test.go b/bootstrapper/internal/kubernetes/kubernetes_test.go index 76e1ef258e..39e35a80bc 100644 --- a/bootstrapper/internal/kubernetes/kubernetes_test.go +++ b/bootstrapper/internal/kubernetes/kubernetes_test.go @@ -474,14 +474,6 @@ func (s *stubClusterUtil) StartKubelet() error { return s.startKubeletErr } -func (s *stubClusterUtil) WaitForCilium(_ context.Context, _ *logger.Logger) error { - return nil -} - -func (s *stubClusterUtil) FixCilium(_ context.Context) error { - return nil -} - type stubConfigProvider struct { initConfig k8sapi.KubeadmInitYAML joinConfig k8sapi.KubeadmJoinYAML diff --git a/cli/internal/helm/BUILD.bazel b/cli/internal/helm/BUILD.bazel index 9099211a12..c392c2257a 100644 --- a/cli/internal/helm/BUILD.bazel +++ b/cli/internal/helm/BUILD.bazel @@ -7,7 +7,6 @@ go_library( "action.go", "actionfactory.go", "chartutil.go", - "ciliumhelper.go", "helm.go", "loader.go", "overrides.go", @@ -467,9 +466,6 @@ go_library( "//internal/semver", "//internal/versions", "@com_github_pkg_errors//:errors", - "@io_k8s_apimachinery//pkg/apis/meta/v1:meta", - "@io_k8s_client_go//kubernetes", - "@io_k8s_client_go//tools/clientcmd", "@io_k8s_client_go//util/retry", "@sh_helm_helm//pkg/ignore", "@sh_helm_helm_v3//pkg/action", diff --git a/cli/internal/helm/actionfactory.go b/cli/internal/helm/actionfactory.go index 177e5b23b7..d36dcc761e 100644 --- a/cli/internal/helm/actionfactory.go +++ b/cli/internal/helm/actionfactory.go @@ -11,10 +11,8 @@ import ( "errors" "fmt" "strings" - "time" "github.com/edgelesssys/constellation/v2/internal/compatibility" - "github.com/edgelesssys/constellation/v2/internal/constants" "github.com/edgelesssys/constellation/v2/internal/semver" "helm.sh/helm/v3/pkg/action" "helm.sh/helm/v3/pkg/chart" @@ -133,36 +131,9 @@ func (a actionFactory) appendNewAction(release Release, configTargetVersion semv func (a actionFactory) newInstall(release Release) *installAction { action := &installAction{helmAction: newHelmInstallAction(a.cfg, release), release: release, log: a.log} - if action.ReleaseName() == ciliumInfo.releaseName { - action.postInstall = func(ctx context.Context) error { - return ciliumPostInstall(ctx, a.log) - } - } return action } -func ciliumPostInstall(ctx context.Context, log debugLog) error { - log.Debugf("Waiting for Cilium to become ready") - helper, err := newK8sCiliumHelper(constants.AdminConfFilename) - if err != nil { - return fmt.Errorf("creating Kubernetes client: %w", err) - } - timeToStartWaiting := time.Now() - // TODO(3u13r): Reduce the timeout when we switched the package repository - this is only this high because we once - // saw polling times of ~16 minutes when hitting a slow PoP from Fastly (GitHub's / ghcr.io CDN). - if err := helper.WaitForDS(ctx, "kube-system", "cilium", log); err != nil { - return fmt.Errorf("waiting for Cilium to become healthy: %w", err) - } - timeUntilFinishedWaiting := time.Since(timeToStartWaiting) - log.Debugf("Cilium became healthy after %s", timeUntilFinishedWaiting.String()) - - log.Debugf("Fix Cilium through restart") - if err := helper.RestartDS("kube-system", "cilium"); err != nil { - return fmt.Errorf("restarting Cilium: %w", err) - } - return nil -} - func (a actionFactory) newUpgrade(release Release) *upgradeAction { action := &upgradeAction{helmAction: newHelmUpgradeAction(a.cfg), release: release, log: a.log} if release.ReleaseName == constellationOperatorsInfo.releaseName { diff --git a/cli/internal/helm/ciliumhelper.go b/cli/internal/helm/ciliumhelper.go deleted file mode 100644 index c36dcdcc64..0000000000 --- a/cli/internal/helm/ciliumhelper.go +++ /dev/null @@ -1,72 +0,0 @@ -/* -Copyright (c) Edgeless Systems GmbH - -SPDX-License-Identifier: AGPL-3.0-only -*/ - -package helm - -import ( - "context" - "fmt" - "time" - - v1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/kubernetes" - "k8s.io/client-go/tools/clientcmd" -) - -type k8sDsClient struct { - clientset *kubernetes.Clientset -} - -func newK8sCiliumHelper(kubeconfigPath string) (*k8sDsClient, error) { - config, err := clientcmd.BuildConfigFromFlags("", kubeconfigPath) - if err != nil { - return nil, err - } - clientset, err := kubernetes.NewForConfig(config) - if err != nil { - return nil, err - } - return &k8sDsClient{clientset: clientset}, nil -} - -// WaitForDS waits for a DaemonSet to become ready. -func (h *k8sDsClient) WaitForDS(ctx context.Context, namespace, name string, log debugLog) error { - for { - select { - case <-ctx.Done(): - return fmt.Errorf("context expired before DaemonSet %q became ready", name) - default: - ds, err := h.clientset.AppsV1().DaemonSets(namespace).Get(ctx, name, v1.GetOptions{}) - if err != nil { - return err - } - - if ds.Status.NumberReady == ds.Status.DesiredNumberScheduled { - log.Debugf("DaemonSet %s is ready\n", name) - return nil - } - - log.Debugf("Waiting for DaemonSet %s to become ready...\n", name) - time.Sleep(10 * time.Second) - } - } -} - -// RestartDS restarts all pods of a DaemonSet by updating its template. -func (h *k8sDsClient) RestartDS(namespace, name string) error { - ds, err := h.clientset.AppsV1().DaemonSets(namespace).Get(context.Background(), name, v1.GetOptions{}) - if err != nil { - return err - } - - ds.Spec.Template.ObjectMeta.Annotations["restartTimestamp"] = fmt.Sprintf("%d", time.Now().Unix()) - _, err = h.clientset.AppsV1().DaemonSets(namespace).Update(context.Background(), ds, v1.UpdateOptions{}) - if err != nil { - return err - } - - return nil -}