diff --git a/internal/controller/postgrescluster/pgbackrest.go b/internal/controller/postgrescluster/pgbackrest.go index 21dbcc84f..d470daab3 100644 --- a/internal/controller/postgrescluster/pgbackrest.go +++ b/internal/controller/postgrescluster/pgbackrest.go @@ -2491,6 +2491,8 @@ func (r *Reconciler) reconcileManualBackup(ctx context.Context, }) backupJob.ObjectMeta.Labels = labels backupJob.ObjectMeta.Annotations = annotations + // K8SPG-703 + backupJob.Finalizers = []string{pNaming.FinalizerKeepJob} // K8SPG-613 initImage, err := k8s.InitImage(ctx, r.Client, postgresCluster, &postgresCluster.Spec.Backups.PGBackRest) diff --git a/percona/controller/pgbackup/controller.go b/percona/controller/pgbackup/controller.go index 5b4e6a4f3..57c617f29 100644 --- a/percona/controller/pgbackup/controller.go +++ b/percona/controller/pgbackup/controller.go @@ -131,7 +131,7 @@ func (r *PGBackupReconciler) Reconcile(ctx context.Context, request reconcile.Re } // start backup only if backup job doesn't exist - _, err := findBackupJob(ctx, r.Client, pgCluster, pgBackup) + _, err := findBackupJob(ctx, r.Client, pgBackup) if err != nil { if !errors.Is(err, ErrBackupJobNotFound) { return reconcile.Result{}, errors.Wrap(err, "find backup job") @@ -184,7 +184,7 @@ func (r *PGBackupReconciler) Reconcile(ctx context.Context, request reconcile.Re return reconcile.Result{}, errors.Errorf("PostgresCluster %s is not found", pgBackup.Spec.PGCluster) } - job, err := findBackupJob(ctx, r.Client, pgCluster, pgBackup) + job, err := findBackupJob(ctx, r.Client, pgBackup) if err != nil { if errors.Is(err, ErrBackupJobNotFound) { log.Info("Waiting for backup to start") @@ -231,6 +231,15 @@ func (r *PGBackupReconciler) Reconcile(ctx context.Context, request reconcile.Re job := &batchv1.Job{} err := r.Client.Get(ctx, types.NamespacedName{Name: pgBackup.Status.JobName, Namespace: pgBackup.Namespace}, job) if err != nil { + // If something has deleted the job even with the finalizer, we should fail the backup. + if k8serrors.IsNotFound(err) { + if err := updateStatus(ctx, r.Client, pgBackup, func(bcp *v2.PerconaPGBackup) { + bcp.Status.State = v2.BackupFailed + }); err != nil { + return reconcile.Result{}, errors.Wrap(err, "update PGBackup status") + } + return reconcile.Result{}, nil + } return reconcile.Result{}, errors.Wrap(err, "get backup job") } @@ -265,6 +274,21 @@ func (r *PGBackupReconciler) Reconcile(ctx context.Context, request reconcile.Re return reconcile.Result{}, nil case v2.BackupSucceeded: + job, err := findBackupJob(ctx, r.Client, pgBackup) + if err == nil && len(job.Finalizers) > 0 { + if err := retry.RetryOnConflict(retry.DefaultBackoff, func() error { + j := new(batchv1.Job) + if err := r.Client.Get(ctx, client.ObjectKeyFromObject(job), j); err != nil { + return errors.Wrap(err, "get job") + } + j.Finalizers = []string{} + + return r.Client.Update(ctx, j) + }); err != nil { + return reconcile.Result{}, errors.Wrap(err, "update PGBackup status") + } + } + if pgCluster == nil { return reconcile.Result{}, nil } @@ -636,8 +660,11 @@ func startBackup(ctx context.Context, c client.Client, pb *v2.PerconaPGBackup) e return nil } -func findBackupJob(ctx context.Context, c client.Client, pg *v2.PerconaPGCluster, pb *v2.PerconaPGBackup) (*batchv1.Job, error) { - if jobName := pb.GetAnnotations()[pNaming.AnnotationPGBackrestBackupJobName]; jobName != "" { +func findBackupJob(ctx context.Context, c client.Client, pb *v2.PerconaPGBackup) (*batchv1.Job, error) { + if jobName := pb.GetAnnotations()[pNaming.AnnotationPGBackrestBackupJobName]; jobName != "" || pb.Status.JobName != "" { + if jobName == "" { + jobName = pb.Status.JobName + } job := new(batchv1.Job) err := c.Get(ctx, types.NamespacedName{Name: jobName, Namespace: pb.Namespace}, job) if err != nil { @@ -648,9 +675,9 @@ func findBackupJob(ctx context.Context, c client.Client, pg *v2.PerconaPGCluster jobList := &batchv1.JobList{} err := c.List(ctx, jobList, - client.InNamespace(pg.Namespace), + client.InNamespace(pb.Namespace), client.MatchingLabelsSelector{ - Selector: naming.PGBackRestBackupJobSelector(pg.Name, pb.Spec.RepoName, naming.BackupManual), + Selector: naming.PGBackRestBackupJobSelector(pb.Spec.PGCluster, pb.Spec.RepoName, naming.BackupManual), }) if err != nil { return nil, errors.Wrap(err, "get backup jobs") diff --git a/percona/controller/pgcluster/backup.go b/percona/controller/pgcluster/backup.go index 44639f1b1..5b432283e 100644 --- a/percona/controller/pgcluster/backup.go +++ b/percona/controller/pgcluster/backup.go @@ -5,6 +5,7 @@ import ( "github.com/pkg/errors" batchv1 "k8s.io/api/batch/v1" + k8serrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/util/retry" @@ -87,14 +88,18 @@ func (r *PGClusterReconciler) cleanupOutdatedBackups(ctx context.Context, cr *v2 // After the pg-backup is deleted, the job is not deleted immediately. // We need to set the DeletionTimestamp for a job so that `reconcileBackupJob` doesn't create a new pg-backup before the job deletion. job := new(batchv1.Job) - if err := r.Client.Get(ctx, types.NamespacedName{Name: pgBackup.Status.JobName, Namespace: pgBackup.Namespace}, job); err != nil { + err := r.Client.Get(ctx, types.NamespacedName{Name: pgBackup.Status.JobName, Namespace: pgBackup.Namespace}, job) + if client.IgnoreNotFound(err) != nil { return errors.Wrap(err, "get backup job") } - prop := metav1.DeletePropagationForeground - if err := r.Client.Delete(ctx, job, &client.DeleteOptions{ - PropagationPolicy: &prop, - }); err != nil { - return errors.Wrapf(err, "delete job %s/%s", job.Name, job.Namespace) + // The job may be deleted earlier due to ttlSecondsAfterFinished + if !k8serrors.IsNotFound(err) { + prop := metav1.DeletePropagationForeground + if err := r.Client.Delete(ctx, job, &client.DeleteOptions{ + PropagationPolicy: &prop, + }); err != nil { + return errors.Wrapf(err, "delete job %s/%s", job.Name, job.Namespace) + } } if err := r.Client.Delete(ctx, &pgBackup); err != nil { return errors.Wrapf(err, "delete backup %s/%s", pgBackup.Name, pgBackup.Namespace) diff --git a/percona/naming/finalizers.go b/percona/naming/finalizers.go index 604f809b3..d2fa2858b 100644 --- a/percona/naming/finalizers.go +++ b/percona/naming/finalizers.go @@ -19,3 +19,8 @@ const ( const ( FinalizerDeleteBackup = PrefixPerconaInternal + "delete-backup" //nolint:gosec ) + +// PerconaPGBackup job finalizers +const ( + FinalizerKeepJob = PrefixPerconaInternal + "keep-job" //nolint:gosec +)