-
Notifications
You must be signed in to change notification settings - Fork 14
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
✨ Detect OOMkilled status for the operator itself (#939)
* ✨ Report on OOMkilled status * ✨ Detect OOMkilled status for the operator itself --------- Signed-off-by: Christian Zunker <[email protected]>
- Loading branch information
Showing
12 changed files
with
460 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
// Copyright (c) Mondoo, Inc. | ||
// SPDX-License-Identifier: BUSL-1.1 | ||
|
||
package operator | ||
|
||
import ( | ||
"context" | ||
|
||
"github.com/go-logr/logr" | ||
"k8s.io/apimachinery/pkg/api/errors" | ||
|
||
k8sv1alpha2 "go.mondoo.com/mondoo-operator/api/v1alpha2" | ||
"go.mondoo.com/mondoo-operator/controllers" | ||
"go.mondoo.com/mondoo-operator/controllers/status" | ||
"go.mondoo.com/mondoo-operator/pkg/utils/k8s" | ||
"go.mondoo.com/mondoo-operator/pkg/utils/mondoo" | ||
corev1 "k8s.io/api/core/v1" | ||
"k8s.io/apimachinery/pkg/labels" | ||
"k8s.io/apimachinery/pkg/types" | ||
k8sversion "k8s.io/apimachinery/pkg/version" | ||
"sigs.k8s.io/controller-runtime/pkg/client" | ||
) | ||
|
||
func checkForTerminatedState(ctx context.Context, nonCacheClient client.Client, v *k8sversion.Info, logger logr.Logger) error { | ||
statusReport := status.NewStatusReporter(nonCacheClient, controllers.MondooClientBuilder, v) | ||
|
||
var err error | ||
config := &k8sv1alpha2.MondooOperatorConfig{} | ||
if err = nonCacheClient.Get(ctx, types.NamespacedName{Name: k8sv1alpha2.MondooOperatorConfigName}, config); err != nil { | ||
if errors.IsNotFound(err) { | ||
logger.Info("MondooOperatorConfig not found, using defaults") | ||
} else { | ||
logger.Error(err, "Failed to check for MondooOpertorConfig") | ||
return err | ||
} | ||
} | ||
|
||
mondooAuditConfigs := &k8sv1alpha2.MondooAuditConfigList{} | ||
if err := nonCacheClient.List(ctx, mondooAuditConfigs); err != nil { | ||
logger.Error(err, "error listing MondooAuditConfigs") | ||
return err | ||
} | ||
|
||
for _, mondooAuditConfig := range mondooAuditConfigs.Items { | ||
mondooAuditConfigCopy := mondooAuditConfig.DeepCopy() | ||
|
||
podList := &corev1.PodList{} | ||
listOpts := &client.ListOptions{ | ||
Namespace: mondooAuditConfig.Namespace, | ||
LabelSelector: labels.SelectorFromSet(map[string]string{ | ||
"app.kubernetes.io/name": "mondoo-operator", | ||
}), | ||
} | ||
if err := nonCacheClient.List(ctx, podList, listOpts); err != nil { | ||
logger.Error(err, "failed to list pods", "Mondoo.Namespace", mondooAuditConfig.Namespace, "Mondoo.Name", mondooAuditConfig.Name) | ||
return err | ||
} | ||
|
||
currentPod := k8s.GetNewestPodFromList(podList) | ||
for _, containerStatus := range currentPod.Status.ContainerStatuses { | ||
if containerStatus.Name != "manager" { | ||
continue | ||
} | ||
stateUpdate := false | ||
if containerStatus.State.Terminated != nil || containerStatus.LastTerminationState.Terminated != nil { | ||
logger.Info("mondoo-operator was terminated before") | ||
// Update status | ||
updateOperatorConditions(&mondooAuditConfig, true, currentPod) | ||
stateUpdate = true | ||
} else if containerStatus.RestartCount == 0 && containerStatus.State.Terminated == nil { | ||
logger.Info("mondoo-operator is running or starting", "state", containerStatus.State) | ||
updateOperatorConditions(&mondooAuditConfig, false, &corev1.Pod{}) | ||
stateUpdate = true | ||
} | ||
if stateUpdate { | ||
err := mondoo.UpdateMondooAuditStatus(ctx, nonCacheClient, mondooAuditConfigCopy, &mondooAuditConfig, logger) | ||
if err != nil { | ||
logger.Error(err, "failed to update status for MondooAuditConfig") | ||
return err | ||
} | ||
// Report upstream before we get OOMkilled again | ||
err = statusReport.Report(ctx, mondooAuditConfig, *config) | ||
if err != nil { | ||
logger.Error(err, "failed to report status upstream") | ||
return err | ||
} | ||
break | ||
} | ||
} | ||
} | ||
return nil | ||
} | ||
|
||
func updateOperatorConditions(config *k8sv1alpha2.MondooAuditConfig, degradedStatus bool, pod *corev1.Pod) { | ||
msg := "Mondoo Operator controller is available" | ||
reason := "MondooOperatorAvailable" | ||
status := corev1.ConditionFalse | ||
updateCheck := mondoo.UpdateConditionIfReasonOrMessageChange | ||
affectedPods := []string{} | ||
memoryLimit := "" | ||
if degradedStatus { | ||
msg = "Mondoo Operator controller is unavailable" | ||
for i, containerStatus := range pod.Status.ContainerStatuses { | ||
if (containerStatus.LastTerminationState.Terminated != nil && containerStatus.LastTerminationState.Terminated.ExitCode == 137) || | ||
(containerStatus.State.Terminated != nil && containerStatus.State.Terminated.ExitCode == 137) { | ||
msg = "Mondoo Operator controller is unavailable due to OOM" | ||
affectedPods = append(affectedPods, pod.Name) | ||
memoryLimit = pod.Spec.Containers[i].Resources.Limits.Memory().String() | ||
break | ||
} | ||
} | ||
|
||
reason = "MondooOperatorUnavailable" | ||
status = corev1.ConditionTrue | ||
} | ||
|
||
config.Status.Conditions = mondoo.SetMondooAuditCondition(config.Status.Conditions, k8sv1alpha2.MondooOperaotrDegraded, status, reason, msg, updateCheck, affectedPods, memoryLimit) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,160 @@ | ||
// Copyright (c) Mondoo, Inc. | ||
// SPDX-License-Identifier: BUSL-1.1 | ||
|
||
package operator | ||
|
||
import ( | ||
"context" | ||
"testing" | ||
"time" | ||
|
||
"github.com/go-logr/zapr" | ||
"github.com/golang/mock/gomock" | ||
"github.com/stretchr/testify/suite" | ||
"go.uber.org/zap" | ||
|
||
corev1 "k8s.io/api/core/v1" | ||
"k8s.io/apimachinery/pkg/api/resource" | ||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
"k8s.io/apimachinery/pkg/runtime" | ||
"k8s.io/apimachinery/pkg/version" | ||
clientgoscheme "k8s.io/client-go/kubernetes/scheme" | ||
"sigs.k8s.io/controller-runtime/pkg/client" | ||
"sigs.k8s.io/controller-runtime/pkg/client/fake" | ||
|
||
mondoov1alpha2 "go.mondoo.com/mondoo-operator/api/v1alpha2" | ||
scanapistoremock "go.mondoo.com/mondoo-operator/controllers/resource_monitor/scan_api_store/mock" | ||
"go.mondoo.com/mondoo-operator/controllers/scanapi" | ||
"go.mondoo.com/mondoo-operator/pkg/utils/mondoo" | ||
fakeMondoo "go.mondoo.com/mondoo-operator/pkg/utils/mondoo/fake" | ||
"go.mondoo.com/mondoo-operator/pkg/utils/test" | ||
"go.mondoo.com/mondoo-operator/tests/framework/utils" | ||
) | ||
|
||
type DeploymentHandlerSuite struct { | ||
suite.Suite | ||
ctx context.Context | ||
scheme *runtime.Scheme | ||
containerImageResolver mondoo.ContainerImageResolver | ||
|
||
auditConfig mondoov1alpha2.MondooAuditConfig | ||
fakeClientBuilder *fake.ClientBuilder | ||
mockCtrl *gomock.Controller | ||
scanApiStoreMock *scanapistoremock.MockScanApiStore | ||
} | ||
|
||
func (s *DeploymentHandlerSuite) SetupSuite() { | ||
s.ctx = context.Background() | ||
s.scheme = clientgoscheme.Scheme | ||
s.Require().NoError(mondoov1alpha2.AddToScheme(s.scheme)) | ||
s.containerImageResolver = fakeMondoo.NewNoOpContainerImageResolver() | ||
s.mockCtrl = gomock.NewController(s.T()) | ||
s.scanApiStoreMock = scanapistoremock.NewMockScanApiStore(s.mockCtrl) | ||
} | ||
|
||
func (s *DeploymentHandlerSuite) BeforeTest(suiteName, testName string) { | ||
s.auditConfig = utils.DefaultAuditConfig("mondoo-operator", true, false, false, false) | ||
s.fakeClientBuilder = fake.NewClientBuilder().WithObjects(&corev1.Secret{ | ||
ObjectMeta: metav1.ObjectMeta{ | ||
Name: scanapi.TokenSecretName(s.auditConfig.Name), | ||
Namespace: s.auditConfig.Namespace, | ||
}, | ||
Data: map[string][]byte{"token": []byte("token")}, | ||
}, test.TestKubeSystemNamespace()) | ||
} | ||
|
||
func (s *DeploymentHandlerSuite) AfterTest(suiteName, testName string) { | ||
s.mockCtrl.Finish() | ||
} | ||
|
||
func (s *DeploymentHandlerSuite) TestOOMDetect() { | ||
mondooAuditConfig := &s.auditConfig | ||
|
||
oomPod := &corev1.Pod{ | ||
ObjectMeta: metav1.ObjectMeta{ | ||
Name: "mondoo-operator-123", | ||
Namespace: s.auditConfig.Namespace, | ||
Labels: map[string]string{"app.kubernetes.io/name": "mondoo-operator"}, | ||
CreationTimestamp: metav1.Time{ | ||
Time: time.Now(), | ||
}, | ||
}, | ||
Spec: corev1.PodSpec{ | ||
Containers: []corev1.Container{ | ||
{ | ||
Name: "manager", | ||
Resources: corev1.ResourceRequirements{ | ||
Limits: corev1.ResourceList{ | ||
corev1.ResourceMemory: *resource.NewQuantity(1, resource.BinarySI), | ||
}, | ||
}, | ||
}, | ||
}, | ||
}, | ||
Status: corev1.PodStatus{ | ||
ContainerStatuses: []corev1.ContainerStatus{ | ||
{ | ||
Name: "manager", | ||
LastTerminationState: corev1.ContainerState{ | ||
Terminated: &corev1.ContainerStateTerminated{ | ||
ExitCode: 137, | ||
}, | ||
}, | ||
}, | ||
}, | ||
}, | ||
} | ||
|
||
// This is needed because of https://github.com/kubernetes-sigs/controller-runtime/issues/2362 | ||
objs := []client.Object{mondooAuditConfig, oomPod} | ||
k8sClient := s.fakeClientBuilder.WithScheme(clientgoscheme.Scheme).WithStatusSubresource(objs...).WithObjects(objs...).Build() | ||
|
||
v := &version.Info{} | ||
cfg := zap.NewDevelopmentConfig() | ||
cfg.InitialFields = map[string]interface{}{ | ||
"controller": "terminated-test", | ||
} | ||
zapLog, err := cfg.Build() | ||
s.Require().NoError(err, "failed to set up logging for test cases") | ||
testLogger := zapr.NewLogger(zapLog) | ||
|
||
err = checkForTerminatedState(s.ctx, k8sClient, v, testLogger) | ||
s.NoError(err) | ||
|
||
mondooAuditConfigs := &mondoov1alpha2.MondooAuditConfigList{} | ||
err = k8sClient.List(s.ctx, mondooAuditConfigs) | ||
s.NoError(err) | ||
s.Len(mondooAuditConfigs.Items, 1) | ||
|
||
condition := mondooAuditConfigs.Items[0].Status.Conditions[0] | ||
s.Equal("Mondoo Operator controller is unavailable due to OOM", condition.Message) | ||
s.Len(condition.AffectedPods, 1) | ||
s.Contains(condition.AffectedPods, "mondoo-operator-123") | ||
containerMemory := oomPod.Spec.Containers[0].Resources.Limits.Memory() | ||
s.Equal(containerMemory.String(), condition.MemoryLimit) | ||
s.Equal("MondooOperatorUnavailable", condition.Reason) | ||
s.Equal(corev1.ConditionTrue, condition.Status) | ||
|
||
oomPod.Status.ContainerStatuses[0].LastTerminationState = corev1.ContainerState{} | ||
oomPod.Status.ContainerStatuses[0].State.Running = &corev1.ContainerStateRunning{} | ||
s.NoError(k8sClient.Status().Update(s.ctx, oomPod)) | ||
|
||
err = checkForTerminatedState(s.ctx, k8sClient, v, testLogger) | ||
s.NoError(err) | ||
|
||
mondooAuditConfigs = &mondoov1alpha2.MondooAuditConfigList{} | ||
err = k8sClient.List(s.ctx, mondooAuditConfigs) | ||
s.NoError(err) | ||
s.Len(mondooAuditConfigs.Items, 1) | ||
|
||
condition = mondooAuditConfigs.Items[0].Status.Conditions[0] | ||
s.Equal("Mondoo Operator controller is available", condition.Message) | ||
s.Len(condition.AffectedPods, 0) | ||
s.Equal("", condition.MemoryLimit) | ||
s.Equal("MondooOperatorAvailable", condition.Reason) | ||
s.Equal(corev1.ConditionFalse, condition.Status) | ||
} | ||
|
||
func TestOperatorSuite(t *testing.T) { | ||
suite.Run(t, new(DeploymentHandlerSuite)) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.