From 5211620f9d726ec72b474013b9bc3ab7f50f3e47 Mon Sep 17 00:00:00 2001 From: zerospiel Date: Tue, 21 Jan 2025 18:02:21 +0100 Subject: [PATCH] Create backups on release upgrade * add new flag to scheduled backups to enable autobackups during upgrades * create auto backups if velero is installed and upgrade is progressing * block upgrades until all of the backups are completed * new indexer for backups with autoupgrades * new RBAC permission for apps/deployments --- api/v1alpha1/indexers.go | 23 +++-- api/v1alpha1/management_backup_types.go | 9 ++ internal/controller/backup/reconcile.go | 1 + internal/controller/management_controller.go | 89 +++++++++++++++++++ ...0rdent.mirantis.com_managementbackups.yaml | 6 ++ .../kcm/templates/rbac/controller/roles.yaml | 6 ++ 6 files changed, 128 insertions(+), 6 deletions(-) diff --git a/api/v1alpha1/indexers.go b/api/v1alpha1/indexers.go index f962de362..66e3402d0 100644 --- a/api/v1alpha1/indexers.go +++ b/api/v1alpha1/indexers.go @@ -36,6 +36,7 @@ func SetupIndexers(ctx context.Context, mgr ctrl.Manager) error { setupMultiClusterServiceServicesIndexer, setupOwnerReferenceIndexers, setupManagementBackupIndexer, + setupManagementBackupAutoUpgradesIndexer, } { merr = errors.Join(merr, f(ctx, mgr)) } @@ -252,15 +253,25 @@ func setupManagementBackupIndexer(ctx context.Context, mgr ctrl.Manager) error { return nil } - const trueVal = "true" - if mb.Spec.Schedule != "" { - return []string{trueVal} + if mb.Spec.Schedule != "" || !mb.IsCompleted() { + return []string{"true"} } - if mb.Status.LastBackup == nil || mb.Status.LastBackup.CompletionTimestamp.IsZero() { - return []string{trueVal} + return nil + }) +} + +// ManagementBackupAutoUpgradeIndexKey indexer field name to extract only [ManagementBackup] objects +// with schedule and auto-upgrade set. +const ManagementBackupAutoUpgradeIndexKey = "k0rdent.management-backup-upgrades" + +func setupManagementBackupAutoUpgradesIndexer(ctx context.Context, mgr ctrl.Manager) error { + return mgr.GetFieldIndexer().IndexField(ctx, &ManagementBackup{}, ManagementBackupAutoUpgradeIndexKey, func(o client.Object) []string { + mb, ok := o.(*ManagementBackup) + if !ok || mb.Spec.Schedule == "" || !mb.Spec.PerformOnManagementUpgrade { + return nil } - return nil + return []string{"true"} }) } diff --git a/api/v1alpha1/management_backup_types.go b/api/v1alpha1/management_backup_types.go index c79db3bad..0199cebbf 100644 --- a/api/v1alpha1/management_backup_types.go +++ b/api/v1alpha1/management_backup_types.go @@ -37,6 +37,10 @@ type ManagementBackupSpec struct { // Schedule is a Cron expression defining when to run the scheduled [ManagementBackup]. // If not set, the object is considered to be run only once. Schedule string `json:"schedule,omitempty"` + // PerformOnManagementUpgrade indicates that a single [ManagementBackup] + // should be created and stored in the [ManagementBackup] storage location if not default + // before the [Management] release upgrade. + PerformOnManagementUpgrade bool `json:"performOnManagementUpgrade,omitempty"` } // ManagementBackupStatus defines the observed state of ManagementBackup @@ -59,6 +63,11 @@ func (s *ManagementBackup) IsSchedule() bool { return s.Spec.Schedule != "" } +// IsCompleted checks if the latest underlaying backup has been completed. +func (s *ManagementBackup) IsCompleted() bool { + return s.Status.LastBackup != nil && !s.Status.LastBackup.CompletionTimestamp.IsZero() +} + // TimestampedBackupName returns the backup name related to scheduled [ManagementBackup] based on the given timestamp. func (s *ManagementBackup) TimestampedBackupName(timestamp time.Time) string { return s.Name + "-" + timestamp.Format("20060102150405") diff --git a/internal/controller/backup/reconcile.go b/internal/controller/backup/reconcile.go index ec1c07165..62e1c78c4 100644 --- a/internal/controller/backup/reconcile.go +++ b/internal/controller/backup/reconcile.go @@ -51,6 +51,7 @@ func (r *Reconciler) ReconcileBackup(ctx context.Context, mgmtBackup *kcmv1alpha isDue, nextAttemptTime := getNextAttemptTime(mgmtBackup, cronSchedule) // here we can put as many conditions as we want, e.g. if upgrade is progressing + // TODO: add a condition to check if management upgrade is progressing isOkayToCreateBackup := isDue && !r.isVeleroBackupProgressing(ctx, mgmtBackup) if isOkayToCreateBackup { diff --git a/internal/controller/management_controller.go b/internal/controller/management_controller.go index 019c9bb94..3ba5a3967 100644 --- a/internal/controller/management_controller.go +++ b/internal/controller/management_controller.go @@ -27,6 +27,7 @@ import ( fluxconditions "github.com/fluxcd/pkg/runtime/conditions" sourcev1 "github.com/fluxcd/source-controller/api/v1" "helm.sh/helm/v3/pkg/chartutil" + appsv1 "k8s.io/api/apps/v1" apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -104,6 +105,16 @@ func (r *ManagementReconciler) Update(ctx context.Context, management *kcm.Manag return ctrl.Result{}, err } + requeueAutoUpgradeBackups, err := r.ensureUpgradeBackup(ctx, management) + if err != nil { + l.Error(err, "failed to ensure release backups before upgrades") + return ctrl.Result{}, err + } + if requeueAutoUpgradeBackups { + l.Info("Still creating or waiting for backups to be completed before the upgrade", "current_release", management.Status.Release, "new_release", management.Spec.Release) + return ctrl.Result{Requeue: true}, nil + } + if err := r.ensureAccessManagement(ctx, management); err != nil { l.Error(err, "failed to ensure AccessManagement is created") return ctrl.Result{}, err @@ -646,6 +657,84 @@ func (r *ManagementReconciler) enableAdditionalComponents(ctx context.Context, m return nil } +func (r *ManagementReconciler) ensureUpgradeBackup(ctx context.Context, mgmt *kcm.Management) (requeue bool, _ error) { + if mgmt.Status.Release == "" { + return false, nil + } + if mgmt.Spec.Release == mgmt.Status.Release { + return false, nil + } + + // check if velero is enabled but with real objects + deploys := new(appsv1.DeploymentList) + if err := r.Client.List(ctx, deploys, + client.MatchingLabels{"component": "velero"}, + client.Limit(1)); err != nil { + return false, fmt.Errorf("failed to list Deployments to find velero: %w", err) + } + + if len(deploys.Items) == 0 { + return false, nil // velero is not enabled, nothing to do + } + + autoUpgradeBackups := new(kcm.ManagementBackupList) + if err := r.Client.List(ctx, autoUpgradeBackups, client.MatchingFields{kcm.ManagementBackupAutoUpgradeIndexKey: "true"}); err != nil { + return false, fmt.Errorf("failed to list ManagementBackup with schedule set: %w", err) + } + + if len(autoUpgradeBackups.Items) == 0 { + return false, nil // no autoupgrades, nothing to do + } + + singleName2Location := make(map[string]string, len(autoUpgradeBackups.Items)) + for _, v := range autoUpgradeBackups.Items { + // TODO: check for name length? + singleName2Location[v.Name+"-"+mgmt.Status.Release] = v.Spec.StorageLocation + } + + requeue = false + for name, location := range singleName2Location { + mb := new(kcm.ManagementBackup) + err := r.Client.Get(ctx, client.ObjectKey{Name: name}, mb) + isNotFoundErr := apierrors.IsNotFound(err) + if err != nil && !isNotFoundErr { + return false, fmt.Errorf("failed to get ManagementBackup %s: %w", name, err) + } + + // have to create + if isNotFoundErr { + mb = &kcm.ManagementBackup{ + TypeMeta: metav1.TypeMeta{ + APIVersion: kcm.GroupVersion.String(), + Kind: "ManagementBackup", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: name, + // TODO: generilize the label? + Labels: map[string]string{"k0rdent.mirantis.com/release-backup": mgmt.Status.Release}, + }, + Spec: kcm.ManagementBackupSpec{ + StorageLocation: location, + }, + } + + if err := r.Client.Create(ctx, mb); err != nil { + return false, fmt.Errorf("failed to create a single ManagementBackup %s: %w", name, err) + } + + // a fresh backup is not completed, so the next statement will set requeue + } + + // + if !mb.IsCompleted() { + requeue = true // let us continue with creation of others if any, then requeue + continue + } + } + + return requeue, nil +} + type mgmtStatusAccumulator struct { components map[string]kcm.ComponentStatus compatibilityContracts map[string]kcm.CompatibilityContracts diff --git a/templates/provider/kcm/templates/crds/k0rdent.mirantis.com_managementbackups.yaml b/templates/provider/kcm/templates/crds/k0rdent.mirantis.com_managementbackups.yaml index 1e2efe25d..d9cad6e16 100644 --- a/templates/provider/kcm/templates/crds/k0rdent.mirantis.com_managementbackups.yaml +++ b/templates/provider/kcm/templates/crds/k0rdent.mirantis.com_managementbackups.yaml @@ -65,6 +65,12 @@ spec: spec: description: ManagementBackupSpec defines the desired state of ManagementBackup properties: + performOnManagementUpgrade: + description: |- + PerformOnManagementUpgrade indicates that a single [ManagementBackup] + should be created and stored in the [ManagementBackup] storage location if not default + before the [Management] release upgrade. + type: boolean schedule: description: |- Schedule is a Cron expression defining when to run the scheduled [ManagementBackup]. diff --git a/templates/provider/kcm/templates/rbac/controller/roles.yaml b/templates/provider/kcm/templates/rbac/controller/roles.yaml index 56ff627f9..962bb4ec9 100644 --- a/templates/provider/kcm/templates/rbac/controller/roles.yaml +++ b/templates/provider/kcm/templates/rbac/controller/roles.yaml @@ -244,6 +244,12 @@ rules: verbs: - '*' # managementbackups-ctrl +- apiGroups: # required for autobackup on upgrade + - apps + resources: + - deployments + verbs: + - list --- apiVersion: rbac.authorization.k8s.io/v1 kind: Role