From 391d36568006b0495fe8a1f775df322ae1f56d8c Mon Sep 17 00:00:00 2001 From: Lalatendu Das Date: Wed, 15 May 2024 06:39:51 +0000 Subject: [PATCH] pb-6681: Add support for PSA in kdmp job - Added check and collected psa info from the namespace - if psa enabled then extracted the uid and gid used by the POD. here the pod is choosen based on whichever PVC is used by that pod - applied those uid and GID to all relevant job pod spec - this is done only if the PSA mode enforced with "restricted" value - for baseline and privilege mode no restriction on UID/GID and default setting of SElinux and secomp is adopted in the POD spec Signed-off-by: Lalatendu Das --- pkg/controllers/dataexport/reconcile.go | 34 ++++++++++++++-- pkg/controllers/resourceexport/reconcile.go | 12 ++++++ pkg/drivers/kopiabackup/kopiabackup.go | 8 ++++ pkg/drivers/kopiarestore/kopiarestore.go | 9 ++++- pkg/drivers/nfsbackup/nfsbackup.go | 14 ++++++- pkg/drivers/nfscsirestore/nfscsirestore.go | 7 ++++ pkg/drivers/nfsrestore/nfsrestore.go | 9 +++++ pkg/drivers/options.go | 28 +++++++++++++ pkg/drivers/utils/utils.go | 45 +++++++++++++++++++++ 9 files changed, 161 insertions(+), 5 deletions(-) diff --git a/pkg/controllers/dataexport/reconcile.go b/pkg/controllers/dataexport/reconcile.go index d3277151a..c881efff4 100644 --- a/pkg/controllers/dataexport/reconcile.go +++ b/pkg/controllers/dataexport/reconcile.go @@ -1137,9 +1137,9 @@ func (c *Controller) stageLocalSnapshotRestore(ctx context.Context, dataExport * } // Already done with max retries, so moving to kdmp restore anyway data := updateDataExportDetail{ - stage: kdmpapi.DataExportStageTransferScheduled, - status: kdmpapi.DataExportStatusInitial, - reason: "switching to restore from objectstore bucket as restoring from local snapshot did not happen", + stage: kdmpapi.DataExportStageTransferScheduled, + status: kdmpapi.DataExportStatusInitial, + reason: "switching to restore from objectstore bucket as restoring from local snapshot did not happen", resetLocalSnapshotRestore: true, } logrus.Infof("%v: In stageLocalSnapshotRestore stage, local snapshot restore failed, trying KDMP restore.", dataExport.Name) @@ -1872,6 +1872,9 @@ func startTransferJob( nfsServerAddr string nfsExportPath string nfsMountOption string + psaIsEnabled string + psaJobUid string + psaJobGid string ) if backupLocation != nil { @@ -1879,6 +1882,11 @@ func startTransferJob( nfsExportPath = backupLocation.Location.NFSConfig.SubPath nfsMountOption = backupLocation.Location.NFSConfig.MountOptions } + if dataExport != nil { + psaIsEnabled = getAnnotationValue(dataExport, utils.PsaEnabledKey) + psaJobUid = getAnnotationValue(dataExport, utils.PsaUIDKey) + psaJobGid = getAnnotationValue(dataExport, utils.PsaGIDKey) + } switch drv.Name() { case drivers.Rsync: return drv.StartJob( @@ -1927,6 +1935,9 @@ func startTransferJob( drivers.WithNfsServer(nfsServerAddr), drivers.WithNfsExportDir(nfsExportPath), drivers.WithNfsMountOption(nfsMountOption), + drivers.WithPsaIsEnabled(psaIsEnabled), + drivers.WithPodUserId(psaJobUid), + drivers.WithPodGroupId(psaJobGid), ) case drivers.KopiaRestore: return drv.StartJob( @@ -1945,6 +1956,9 @@ func startTransferJob( drivers.WithJobConfigMapNs(jobConfigMapNs), drivers.WithNfsServer(nfsServerAddr), drivers.WithNfsExportDir(nfsExportPath), + drivers.WithPsaIsEnabled(psaIsEnabled), + drivers.WithPodUserId(psaJobUid), + drivers.WithPodGroupId(psaJobGid), ) } @@ -2353,6 +2367,17 @@ func startNfsCSIRestoreVolumeJob( bl *storkapi.BackupLocation, ) (string, error) { + var ( + psaIsEnabled string + psaJobUid string + psaJobGid string + ) + + if de != nil { + psaIsEnabled = getAnnotationValue(de, utils.PsaEnabledKey) + psaJobUid = getAnnotationValue(de, utils.PsaUIDKey) + psaJobGid = getAnnotationValue(de, utils.PsaGIDKey) + } jobName := utils.GetCsiRestoreJobName(drivers.NFSCSIRestore, de.Name) err := utils.CreateNfsSecret(utils.GetCredSecretName(jobName), bl, de.Namespace, nil) if err != nil { @@ -2371,6 +2396,9 @@ func startNfsCSIRestoreVolumeJob( drivers.WithNfsExportDir(bl.Location.NFSConfig.SubPath), drivers.WithNfsMountOption(bl.Location.NFSConfig.MountOptions), drivers.WithNfsSubPath(bl.Location.Path), + drivers.WithPsaIsEnabled(psaIsEnabled), + drivers.WithPodUserId(psaJobUid), + drivers.WithPodGroupId(psaJobGid), ) } return "", fmt.Errorf("unknown driver for nfs csi volume restore: %s", drv.Name()) diff --git a/pkg/controllers/resourceexport/reconcile.go b/pkg/controllers/resourceexport/reconcile.go index 56fb7b4a9..c1842f4f2 100644 --- a/pkg/controllers/resourceexport/reconcile.go +++ b/pkg/controllers/resourceexport/reconcile.go @@ -38,6 +38,14 @@ type updateResourceExportFields struct { LargeResourceEnabled bool } +func getAnnotationValue(re *kdmpapi.ResourceExport, key string) string { + var val string + if _, ok := re.Annotations[key]; ok { + val = re.Annotations[key] + } + return val +} + func (c *Controller) process(ctx context.Context, in *kdmpapi.ResourceExport) (bool, error) { funct := "resourceExport.process" if in == nil { @@ -400,6 +408,8 @@ func startNfsResourceJob( bl *storkapi.BackupLocation, ) (string, error) { + isPsaEnabled := getAnnotationValue(re, utils.PsaEnabledKey) + err := utils.CreateNfsSecret(utils.GetCredSecretName(re.Name), bl, re.Namespace, nil) if err != nil { logrus.Errorf("failed to create NFS cred secret: %v", err) @@ -425,6 +435,7 @@ func startNfsResourceJob( drivers.WithNfsExportDir(bl.Location.NFSConfig.SubPath), drivers.WithJobConfigMap(jobConfigMap), drivers.WithJobConfigMapNs(jobConfigMapNs), + drivers.WithPsaIsEnabled(isPsaEnabled), ) case drivers.NFSRestore: return drv.StartJob( @@ -443,6 +454,7 @@ func startNfsResourceJob( drivers.WithNfsExportDir(bl.Location.NFSConfig.SubPath), drivers.WithJobConfigMap(jobConfigMap), drivers.WithJobConfigMapNs(jobConfigMapNs), + drivers.WithPsaIsEnabled(isPsaEnabled), ) } return "", fmt.Errorf("unknown data transfer driver: %s", drv.Name()) diff --git a/pkg/drivers/kopiabackup/kopiabackup.go b/pkg/drivers/kopiabackup/kopiabackup.go index c9e17ff43..ba5053f21 100644 --- a/pkg/drivers/kopiabackup/kopiabackup.go +++ b/pkg/drivers/kopiabackup/kopiabackup.go @@ -318,6 +318,7 @@ func jobFor( logrus.Errorf("failed to get the toleration details: %v", err) return nil, fmt.Errorf("failed to get the toleration details for job [%s/%s]", jobOption.Namespace, jobName) } + job := &batchv1.Job{ ObjectMeta: metav1.ObjectMeta{ Name: jobName, @@ -384,6 +385,13 @@ func jobFor( }, }, } + // Add security Context only if the PSA is enabled. + if jobOption.PsaIsEnabled == "true" { + job, err = utils.AddSecurityContextToJob(job, jobOption.PodUserId, jobOption.PodGroupId) + if err != nil { + return nil, err + } + } if len(nodeName) != 0 { job.Spec.Template.Spec.NodeName = nodeName diff --git a/pkg/drivers/kopiarestore/kopiarestore.go b/pkg/drivers/kopiarestore/kopiarestore.go index 771f3961a..32439c997 100644 --- a/pkg/drivers/kopiarestore/kopiarestore.go +++ b/pkg/drivers/kopiarestore/kopiarestore.go @@ -219,6 +219,7 @@ func jobFor( logrus.Errorf("failed to get the toleration details: %v", err) return nil, fmt.Errorf("failed to get the toleration details for job [%s/%s]", jobOption.Namespace, jobName) } + job := &batchv1.Job{ ObjectMeta: metav1.ObjectMeta{ Name: jobName, @@ -285,7 +286,13 @@ func jobFor( }, }, } - + // Add security Context only if the PSA is enabled. + if jobOption.PsaIsEnabled == "true" { + job, err = utils.AddSecurityContextToJob(job, jobOption.PodUserId, jobOption.PodGroupId) + if err != nil { + return nil, err + } + } // Add the image secret in job spec only if it is present in the stork deployment. if len(imageRegistrySecret) != 0 { job.Spec.Template.Spec.ImagePullSecrets = utils.ToImagePullSecret(utils.GetImageSecretName(jobName)) diff --git a/pkg/drivers/nfsbackup/nfsbackup.go b/pkg/drivers/nfsbackup/nfsbackup.go index 8a35494fb..9cb10e52f 100644 --- a/pkg/drivers/nfsbackup/nfsbackup.go +++ b/pkg/drivers/nfsbackup/nfsbackup.go @@ -199,7 +199,6 @@ func jobForBackupResource( }, " ") labels := addJobLabels(jobOption) - nfsExecutorImage, imageRegistrySecret, err := utils.GetExecutorImageAndSecret(drivers.NfsExecutorImage, jobOption.NfsImageExecutorSource, jobOption.NfsImageExecutorSourceNs, @@ -216,6 +215,7 @@ func jobForBackupResource( logrus.Errorf("failed to get the toleration details: %v", err) return nil, fmt.Errorf("failed to get the toleration details for job [%s/%s]", jobOption.Namespace, jobOption.RestoreExportName) } + job := &batchv1.Job{ ObjectMeta: metav1.ObjectMeta{ Name: jobOption.RestoreExportName, @@ -270,6 +270,18 @@ func jobForBackupResource( }, }, } + + // Add security Context only if the PSA is enabled. + if jobOption.PsaIsEnabled == "true" { + // The Job is intended to backup resources to NFS backuplocation + // and it doesn't need a specific JOB uid/gid since it will be sqaushed at NFS server + // hence used a global hardcoded UID/GID. + job, err = utils.AddSecurityContextToJob(job, utils.KdmpJobUid, utils.KdmpJobGid) + if err != nil { + return nil, err + } + } + // Add the image secret in job spec only if it is present in the stork deployment. if len(imageRegistrySecret) != 0 { job.Spec.Template.Spec.ImagePullSecrets = utils.ToImagePullSecret(utils.GetImageSecretName(jobOption.RestoreExportName)) diff --git a/pkg/drivers/nfscsirestore/nfscsirestore.go b/pkg/drivers/nfscsirestore/nfscsirestore.go index 520c84973..cbbe439a1 100644 --- a/pkg/drivers/nfscsirestore/nfscsirestore.go +++ b/pkg/drivers/nfscsirestore/nfscsirestore.go @@ -266,6 +266,13 @@ func jobForRestoreCSISnapshot( }, }, } + // Add security Context only if the PSA is enabled. + if jobOption.PsaIsEnabled == "true" { + job, err = utils.AddSecurityContextToJob(job, jobOption.PodUserId, jobOption.PodGroupId) + if err != nil { + return nil, err + } + } // Add the image secret in job spec only if it is present in the stork deployment. if len(imageRegistrySecret) != 0 { job.Spec.Template.Spec.ImagePullSecrets = utils.ToImagePullSecret(utils.GetImageSecretName(jobName)) diff --git a/pkg/drivers/nfsrestore/nfsrestore.go b/pkg/drivers/nfsrestore/nfsrestore.go index 711ad59bf..eadcbf93e 100644 --- a/pkg/drivers/nfsrestore/nfsrestore.go +++ b/pkg/drivers/nfsrestore/nfsrestore.go @@ -313,6 +313,15 @@ func jobForRestoreResource( }, }, } + + // Add security Context only if the PSA is enabled. + if jobOption.PsaIsEnabled == "true" { + job, err = utils.AddSecurityContextToJob(job, utils.KdmpJobUid, utils.KdmpJobGid) + if err != nil { + return nil, err + } + } + // Add the image secret in job spec only if it is present in the stork deployment. if len(imageRegistrySecret) != 0 { job.Spec.Template.Spec.ImagePullSecrets = utils.ToImagePullSecret(utils.GetImageSecretName(jobOption.RestoreExportName)) diff --git a/pkg/drivers/options.go b/pkg/drivers/options.go index cf4a0b161..40f271e02 100644 --- a/pkg/drivers/options.go +++ b/pkg/drivers/options.go @@ -59,6 +59,10 @@ type JobOpts struct { ResoureBackupName string ResoureBackupNamespace string S3DisableSSL bool + // psa specifc option to be used by job + PsaIsEnabled string + PodUserId string + PodGroupId string } // WithS3DisableSSL is job parameter @@ -517,3 +521,27 @@ func WithNodeAffinity(l map[string]string) JobOption { return nil } } + +// WithpsaIsEnabled is job parameter. +func WithPsaIsEnabled(psaIsEnabled string) JobOption { + return func(opts *JobOpts) error { + opts.PsaIsEnabled = psaIsEnabled + return nil + } +} + +// WithpsaIsEnabled is job parameter. +func WithPodUserId(podUserId string) JobOption { + return func(opts *JobOpts) error { + opts.PodUserId = podUserId + return nil + } +} + +// WithpsaIsEnabled is job parameter. +func WithPodGroupId(PodGroupId string) JobOption { + return func(opts *JobOpts) error { + opts.PodGroupId = PodGroupId + return nil + } +} diff --git a/pkg/drivers/utils/utils.go b/pkg/drivers/utils/utils.go index a80986d74..1024fad93 100644 --- a/pkg/drivers/utils/utils.go +++ b/pkg/drivers/utils/utils.go @@ -25,6 +25,7 @@ import ( "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/validation" "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/utils/pointer" ) const ( @@ -67,6 +68,11 @@ const ( IstioInjectLabel = "sidecar.istio.io/inject" // ProcessVMResourceSuccessMsg - vm resources processed successfully ProcessVMResourceSuccessMsg = "vm resources processed successfully" + PsaEnabledKey = "portworx.io/psa-enabled" + PsaUIDKey = "portworx.io/psa-uid" + PsaGIDKey = "portworx.io/psa-gid" + KdmpJobUid = "1013" + KdmpJobGid = "1013" ) var ( @@ -966,3 +972,42 @@ func GetShortUID(uid string) string { } return uid[:8] } + +// Add container security Context to job pod if the PSA is enabled. +// if static uids like kdmpJobUid or kdmpJobGid is used that means +// these are dummy UIDs used for backing up resources to backuplocation +// which doesn't need specific UID specific permission. +func AddSecurityContextToJob(job *batchv1.Job, podUserId, podGroupId string) (*batchv1.Job, error) { + if job != nil { + if podUserId != "" { + uid, err := strconv.ParseInt(podUserId, 10, 64) + if err != nil { + logrus.Errorf("failed to convert the UID to int: %v", err) + return nil, fmt.Errorf("failed to convert the UID to int: %v", err) + } + job.Spec.Template.Spec.SecurityContext.RunAsUser = &uid + } + if podGroupId != "" { + gid, err := strconv.ParseInt(podGroupId, 10, 64) + if err != nil { + logrus.Errorf("failed to convert the GID to int: %v", err) + return nil, fmt.Errorf("failed to convert the GID to int: %v", err) + } + job.Spec.Template.Spec.SecurityContext.RunAsGroup = &gid + } + // Add RunAsNonRoot to true and drop all capabilities and seccomp profile and allowPrivilegeEscalation to false + job.Spec.Template.Spec.Containers[0].SecurityContext.RunAsNonRoot = pointer.Bool(true) + job.Spec.Template.Spec.Containers[0].SecurityContext.AllowPrivilegeEscalation = pointer.Bool(false) + job.Spec.Template.Spec.Containers[0].SecurityContext.SeccompProfile = &corev1.SeccompProfile{ + Type: "RuntimeDefault", + } + job.Spec.Template.Spec.Containers[0].SecurityContext.Capabilities = &corev1.Capabilities{ + Drop: []corev1.Capability{ + "ALL", + }, + } + } else { + return job, fmt.Errorf("recieved a nil job object to add security context") + } + return job, nil +}