diff --git a/cmd/handler/main.go b/cmd/handler/main.go index cb6b56b36..f007b89ad 100644 --- a/cmd/handler/main.go +++ b/cmd/handler/main.go @@ -207,6 +207,7 @@ func setupHandlerControllers(mgr manager.Manager) error { APIClient: apiClient, Log: ctrl.Log.WithName("controllers").WithName("NodeNetworkConfigurationPolicy"), Scheme: mgr.GetScheme(), + Recorder: mgr.GetEventRecorderFor(fmt.Sprintf("%s.nmstate-handler", environment.NodeName())), }).SetupWithManager(mgr); err != nil { setupLog.Error(err, "unable to create NodeNetworkConfigurationPolicy controller", "controller", "NMState") return err diff --git a/controllers/handler/nodenetworkconfigurationpolicy_controller.go b/controllers/handler/nodenetworkconfigurationpolicy_controller.go index 5fee65679..1cca421f2 100644 --- a/controllers/handler/nodenetworkconfigurationpolicy_controller.go +++ b/controllers/handler/nodenetworkconfigurationpolicy_controller.go @@ -25,6 +25,7 @@ import ( "github.com/go-logr/logr" "github.com/pkg/errors" + "k8s.io/client-go/tools/record" "k8s.io/client-go/util/retry" ctrl "sigs.k8s.io/controller-runtime" @@ -60,6 +61,10 @@ import ( "github.com/nmstate/kubernetes-nmstate/pkg/selectors" ) +const ( + ReconcileFailed = "ReconcileFailed" +) + var ( nodeName string nodeRunningUpdateRetryTime = 5 * time.Second @@ -103,6 +108,7 @@ type NodeNetworkConfigurationPolicyReconciler struct { APIClient client.Client Log logr.Logger Scheme *runtime.Scheme + Recorder record.EventRecorder } func init() { @@ -121,7 +127,8 @@ func init() { // Note: // The Controller will requeue the Request to be processed again if the returned error is non-nil or // Result.Requeue is true, otherwise upon completion it will remove the work from the queue. -//nolint: funlen, gocyclo +// +//nolint:funlen,gocyclo func (r *NodeNetworkConfigurationPolicyReconciler) Reconcile(_ context.Context, request ctrl.Request) (ctrl.Result, error) { _ = context.Background() log := r.Log.WithValues("nodenetworkconfigurationpolicy", request.NamespacedName) @@ -218,9 +225,13 @@ func (r *NodeNetworkConfigurationPolicyReconciler) Reconcile(_ context.Context, nmstateOutput, err := nmstate.ApplyDesiredState(r.APIClient, enactmentInstance.Status.DesiredState) if err != nil { - errmsg := fmt.Errorf("error reconciling NodeNetworkConfigurationPolicy at desired state apply: %s,\n %v", nmstateOutput, err) + errmsg := fmt.Errorf("error reconciling NodeNetworkConfigurationPolicy on node %s at desired state apply: %q,\n %v", + nodeName, nmstateOutput, err) enactmentConditions.NotifyFailedToConfigure(errmsg) log.Error(errmsg, fmt.Sprintf("Rolling back network configuration, manual intervention needed: %s", nmstateOutput)) + if r.Recorder != nil { + r.Recorder.Event(instance, corev1.EventTypeWarning, ReconcileFailed, errmsg.Error()) + } return ctrl.Result{}, nil } log.Info("nmstate", "output", nmstateOutput) diff --git a/deploy/handler/role.yaml b/deploy/handler/role.yaml index b3b6ed104..12ef6606f 100644 --- a/deploy/handler/role.yaml +++ b/deploy/handler/role.yaml @@ -128,3 +128,16 @@ rules: verbs: - use {{- end }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{template "handlerPrefix" .}}nmstate-handler-events + namespace: default +rules: +- apiGroups: + - "" + resources: + - events + verbs: + - 'create' diff --git a/deploy/handler/role_binding.yaml b/deploy/handler/role_binding.yaml index 8d6f0f6d0..444c726d2 100644 --- a/deploy/handler/role_binding.yaml +++ b/deploy/handler/role_binding.yaml @@ -27,3 +27,18 @@ roleRef: kind: ClusterRole name: {{template "handlerPrefix" .}}nmstate-handler apiGroup: rbac.authorization.k8s.io +--- +kind: RoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: {{template "handlerPrefix" .}}nmstate-handler-events + namespace: default +subjects: +- kind: ServiceAccount + name: {{template "handlerPrefix" .}}nmstate-handler + namespace: {{ .HandlerNamespace }} +roleRef: + kind: Role + apiGroup: rbac.authorization.k8s.io + name: {{template "handlerPrefix" .}}nmstate-handler-events + namespace: default diff --git a/docs/user-guide/103-troubleshooting.md b/docs/user-guide/103-troubleshooting.md index c40148c5c..3bc6ef095 100644 --- a/docs/user-guide/103-troubleshooting.md +++ b/docs/user-guide/103-troubleshooting.md @@ -11,7 +11,8 @@ the operator protect the user from breaking the cluster networking. ## Invalid configuration If any of the following cases render the configuration faulty, the setup will be -automatically rolled back and Enactment will report the failure. +automatically rolled back and Enactment will report the failure. In addition, +an event for the NodeNetworkConfigurationPolicy will be created. * Configuration fails to be applied on the host (due to missing interfaces, inability to obtain IP, invalid attributes, ...) * Connectivity to the default gateway is broken @@ -110,6 +111,28 @@ the `ERROR` log line: Connection activation failed on connection_id eth666: error=nm-manager-error-quark: No suitable device found for this connection ``` +In addition, events are generated in the default namespace: +```shell +$ kubectl get events | grep Warning +6m55s Warning ReconcileFailed nodenetworkconfigurationpolicy/eth666 error reconciling NodeNetworkConfigurationPolicy on node node02 at desired state apply: "",... +``` + +And these events are also linked to the NodeNetworkConfigurationPolicy when using `kubectl describe`: +```shell +kubectl describe nodenetworkconfigurationpolicies.nmstate.io eth666 +``` + +``` +# output truncated +Events: + Type Reason Age From Message + ---- ------ ---- ---- ------- + Warning ReconcileFailed 8m25s node02.nmstate-handler error reconciling NodeNetworkConfigurationPolicy on node node02 at desired state apply: "", +(...) +NmstateError: InvalidArgument: Ethernet interface eth666 does not exists +' +``` + The configuration therefore failed due to absence of NIC `eth666` on the node. Now we can either fix the Policy to edit an available interface or safely remove it: