Skip to content

Commit

Permalink
remove cilium restart fix
Browse files Browse the repository at this point in the history
  • Loading branch information
3u13r committed Nov 14, 2023
1 parent 0a3aba5 commit 0d4d3c3
Show file tree
Hide file tree
Showing 9 changed files with 4 additions and 208 deletions.
4 changes: 2 additions & 2 deletions bootstrapper/cmd/bootstrapper/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ func run(issuer atls.Issuer, openDevice vtpm.TPMOpenFunc, fileHandler file.Handl
}

if nodeBootstrapped {
if err := kube.StartKubelet(log); err != nil {
if err := kube.StartKubelet(); err != nil {
log.With(zap.Error(err)).Fatalf("Failed to restart kubelet")
}
return
Expand Down Expand Up @@ -93,7 +93,7 @@ func getDiskUUID() (string, error) {
type clusterInitJoiner interface {
joinclient.ClusterJoiner
initserver.ClusterInitializer
StartKubelet(*logger.Logger) error
StartKubelet() error
}

type metadataAPI interface {
Expand Down
2 changes: 1 addition & 1 deletion bootstrapper/cmd/bootstrapper/test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ func (c *clusterFake) JoinCluster(context.Context, *kubeadm.BootstrapTokenDiscov
}

// StartKubelet starts the kubelet service.
func (c *clusterFake) StartKubelet(*logger.Logger) error {
func (c *clusterFake) StartKubelet() error {
return nil
}

Expand Down
67 changes: 0 additions & 67 deletions bootstrapper/internal/kubernetes/k8sapi/k8sutil.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,9 @@ import (
"errors"
"fmt"
"net"
"net/http"
"os"
"os/exec"
"path/filepath"
"strings"
"time"

"github.com/edgelesssys/constellation/v2/bootstrapper/internal/certificate"
Expand Down Expand Up @@ -192,71 +190,6 @@ type SetupPodNetworkInput struct {
LoadBalancerPort string
}

// WaitForCilium waits until Cilium reports a healthy status over its /healthz endpoint.
func (k *KubernetesUtil) WaitForCilium(ctx context.Context, log *logger.Logger) error {
// wait for cilium pod to be healthy
client := http.Client{}
for {
select {
case <-ctx.Done():
return ctx.Err()
default:
time.Sleep(5 * time.Second)
req, err := http.NewRequestWithContext(ctx, http.MethodGet, "http://127.0.0.1:9879/healthz", http.NoBody)
if err != nil {
return fmt.Errorf("unable to create request: %w", err)
}
resp, err := client.Do(req)
if err != nil {
log.With(zap.Error(err)).Infof("Waiting for local Cilium DaemonSet - Pod not healthy yet")
continue
}
resp.Body.Close()
if resp.StatusCode == 200 {
return nil
}
}
}
}

// FixCilium fixes https://github.com/cilium/cilium/issues/19958
// Instead of a rollout restart of the Cilium DaemonSet, it only restarts the local Cilium Pod.
func (k *KubernetesUtil) FixCilium(ctx context.Context) error {
// get cilium container id
out, err := exec.CommandContext(ctx, "/run/state/bin/crictl", "ps", "--name", "cilium-agent", "-q").CombinedOutput()
if err != nil {
return fmt.Errorf("getting cilium container id failed: %s", out)
}
outLines := strings.Split(string(out), "\n")
if len(outLines) < 2 {
return fmt.Errorf("getting cilium container id returned invalid output: %s", out)
}
containerID := outLines[len(outLines)-2]

// get cilium pod id
out, err = exec.CommandContext(ctx, "/run/state/bin/crictl", "inspect", "-o", "go-template", "--template", "{{ .info.sandboxID }}", containerID).CombinedOutput()
if err != nil {
return fmt.Errorf("getting Cilium Pod ID failed: %s", out)
}
outLines = strings.Split(string(out), "\n")
if len(outLines) < 2 {
return fmt.Errorf("getting Cilium Pod ID returned invalid output: %s", out)
}
podID := outLines[len(outLines)-2]

// stop and delete pod
out, err = exec.CommandContext(ctx, "/run/state/bin/crictl", "stopp", podID).CombinedOutput()
if err != nil {
return fmt.Errorf("stopping Cilium agent Pod failed: %s", out)
}
out, err = exec.CommandContext(ctx, "/run/state/bin/crictl", "rmp", podID).CombinedOutput()
if err != nil {
return fmt.Errorf("removing Cilium agent Pod failed: %s", out)
}

return nil
}

// JoinCluster joins existing Kubernetes cluster using kubeadm join.
func (k *KubernetesUtil) JoinCluster(ctx context.Context, joinConfig []byte, log *logger.Logger) error {
// TODO(3u13r): audit policy should be user input
Expand Down
2 changes: 0 additions & 2 deletions bootstrapper/internal/kubernetes/k8sutil.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,5 @@ type clusterUtil interface {
InstallComponents(ctx context.Context, kubernetesComponents components.Components) error
InitCluster(ctx context.Context, initConfig []byte, nodeName, clusterName string, ips []net.IP, conformanceMode bool, log *logger.Logger) ([]byte, error)
JoinCluster(ctx context.Context, joinConfig []byte, log *logger.Logger) error
WaitForCilium(ctx context.Context, log *logger.Logger) error
FixCilium(ctx context.Context) error
StartKubelet() error
}
24 changes: 1 addition & 23 deletions bootstrapper/internal/kubernetes/kubernetes.go
Original file line number Diff line number Diff line change
Expand Up @@ -242,17 +242,6 @@ func (k *KubeWrapper) JoinCluster(ctx context.Context, args *kubeadm.BootstrapTo
return fmt.Errorf("joining cluster: %v; %w ", string(joinConfigYAML), err)
}

log.Infof("Waiting for Cilium to become healthy")
if err := k.clusterUtil.WaitForCilium(context.Background(), log); err != nil {
return fmt.Errorf("waiting for Cilium to become healthy: %w", err)
}

log.Infof("Restarting Cilium")
if err := k.clusterUtil.FixCilium(context.Background()); err != nil {
log.With(zap.Error(err)).Errorf("FixCilium failed")
// Continue and don't throw an error here - things might be okay.
}

return nil
}

Expand Down Expand Up @@ -307,22 +296,11 @@ func k8sCompliantHostname(in string) (string, error) {
}

// StartKubelet starts the kubelet service.
func (k *KubeWrapper) StartKubelet(log *logger.Logger) error {
func (k *KubeWrapper) StartKubelet() error {
if err := k.clusterUtil.StartKubelet(); err != nil {
return fmt.Errorf("starting kubelet: %w", err)
}

log.Infof("Waiting for Cilium to become healthy")
if err := k.clusterUtil.WaitForCilium(context.Background(), log); err != nil {
return fmt.Errorf("waiting for Cilium to become healthy: %w", err)
}

log.Infof("Restarting Cilium")
if err := k.clusterUtil.FixCilium(context.Background()); err != nil {
log.With(zap.Error(err)).Errorf("FixCilium failed")
// Continue and don't throw an error here - things might be okay.
}

return nil
}

Expand Down
8 changes: 0 additions & 8 deletions bootstrapper/internal/kubernetes/kubernetes_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -474,14 +474,6 @@ func (s *stubClusterUtil) StartKubelet() error {
return s.startKubeletErr
}

func (s *stubClusterUtil) WaitForCilium(_ context.Context, _ *logger.Logger) error {
return nil
}

func (s *stubClusterUtil) FixCilium(_ context.Context) error {
return nil
}

type stubConfigProvider struct {
initConfig k8sapi.KubeadmInitYAML
joinConfig k8sapi.KubeadmJoinYAML
Expand Down
4 changes: 0 additions & 4 deletions cli/internal/helm/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ go_library(
"action.go",
"actionfactory.go",
"chartutil.go",
"ciliumhelper.go",
"helm.go",
"loader.go",
"overrides.go",
Expand Down Expand Up @@ -467,9 +466,6 @@ go_library(
"//internal/semver",
"//internal/versions",
"@com_github_pkg_errors//:errors",
"@io_k8s_apimachinery//pkg/apis/meta/v1:meta",
"@io_k8s_client_go//kubernetes",
"@io_k8s_client_go//tools/clientcmd",
"@io_k8s_client_go//util/retry",
"@sh_helm_helm//pkg/ignore",
"@sh_helm_helm_v3//pkg/action",
Expand Down
29 changes: 0 additions & 29 deletions cli/internal/helm/actionfactory.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,8 @@ import (
"errors"
"fmt"
"strings"
"time"

"github.com/edgelesssys/constellation/v2/internal/compatibility"
"github.com/edgelesssys/constellation/v2/internal/constants"
"github.com/edgelesssys/constellation/v2/internal/semver"
"helm.sh/helm/v3/pkg/action"
"helm.sh/helm/v3/pkg/chart"
Expand Down Expand Up @@ -133,36 +131,9 @@ func (a actionFactory) appendNewAction(release Release, configTargetVersion semv

func (a actionFactory) newInstall(release Release) *installAction {
action := &installAction{helmAction: newHelmInstallAction(a.cfg, release), release: release, log: a.log}
if action.ReleaseName() == ciliumInfo.releaseName {
action.postInstall = func(ctx context.Context) error {
return ciliumPostInstall(ctx, a.log)
}
}
return action
}

func ciliumPostInstall(ctx context.Context, log debugLog) error {
log.Debugf("Waiting for Cilium to become ready")
helper, err := newK8sCiliumHelper(constants.AdminConfFilename)
if err != nil {
return fmt.Errorf("creating Kubernetes client: %w", err)
}
timeToStartWaiting := time.Now()
// TODO(3u13r): Reduce the timeout when we switched the package repository - this is only this high because we once
// saw polling times of ~16 minutes when hitting a slow PoP from Fastly (GitHub's / ghcr.io CDN).
if err := helper.WaitForDS(ctx, "kube-system", "cilium", log); err != nil {
return fmt.Errorf("waiting for Cilium to become healthy: %w", err)
}
timeUntilFinishedWaiting := time.Since(timeToStartWaiting)
log.Debugf("Cilium became healthy after %s", timeUntilFinishedWaiting.String())

log.Debugf("Fix Cilium through restart")
if err := helper.RestartDS("kube-system", "cilium"); err != nil {
return fmt.Errorf("restarting Cilium: %w", err)
}
return nil
}

func (a actionFactory) newUpgrade(release Release) *upgradeAction {
action := &upgradeAction{helmAction: newHelmUpgradeAction(a.cfg), release: release, log: a.log}
if release.ReleaseName == constellationOperatorsInfo.releaseName {
Expand Down
72 changes: 0 additions & 72 deletions cli/internal/helm/ciliumhelper.go

This file was deleted.

0 comments on commit 0d4d3c3

Please sign in to comment.