From a382276223590c163622ca5fe35eee544fe0a871 Mon Sep 17 00:00:00 2001 From: Lalatendu Das Date: Wed, 15 May 2024 06:39:51 +0000 Subject: [PATCH] pb-6681: Add support for PSA in kdmp job - if psa enabled then extracted the uid and gid used by the POD. here the pod is choosen based on whichever PVC is used by that pod - applied those uid and GID to all relevant job pod spec - for baseline and privilege mode no restriction on UID/GID and default setting of SElinux and secomp is adopted in the job POD spec Signed-off-by: Lalatendu Das --- pkg/controllers/dataexport/reconcile.go | 21 +++++ pkg/controllers/resourceexport/reconcile.go | 8 ++ pkg/drivers/kopiabackup/kopiabackup.go | 8 ++ pkg/drivers/kopiarestore/kopiarestore.go | 9 +- pkg/drivers/nfsbackup/nfsbackup.go | 11 ++- pkg/drivers/nfscsirestore/nfscsirestore.go | 7 ++ pkg/drivers/nfsrestore/nfsrestore.go | 4 + pkg/drivers/options.go | 19 ++++ pkg/drivers/utils/utils.go | 96 +++++++++++++++++++++ 9 files changed, 181 insertions(+), 2 deletions(-) diff --git a/pkg/controllers/dataexport/reconcile.go b/pkg/controllers/dataexport/reconcile.go index c764df49a..25ab340bf 100644 --- a/pkg/controllers/dataexport/reconcile.go +++ b/pkg/controllers/dataexport/reconcile.go @@ -1872,6 +1872,8 @@ func startTransferJob( nfsServerAddr string nfsExportPath string nfsMountOption string + psaJobUid string + psaJobGid string ) if backupLocation != nil { @@ -1879,6 +1881,10 @@ func startTransferJob( nfsExportPath = backupLocation.Location.NFSConfig.SubPath nfsMountOption = backupLocation.Location.NFSConfig.MountOptions } + if dataExport != nil { + psaJobUid = getAnnotationValue(dataExport, utils.PsaUIDKey) + psaJobGid = getAnnotationValue(dataExport, utils.PsaGIDKey) + } switch drv.Name() { case drivers.Rsync: return drv.StartJob( @@ -1927,6 +1933,8 @@ func startTransferJob( drivers.WithNfsServer(nfsServerAddr), drivers.WithNfsExportDir(nfsExportPath), drivers.WithNfsMountOption(nfsMountOption), + drivers.WithPodUserId(psaJobUid), + drivers.WithPodGroupId(psaJobGid), ) case drivers.KopiaRestore: return drv.StartJob( @@ -1945,6 +1953,8 @@ func startTransferJob( drivers.WithJobConfigMapNs(jobConfigMapNs), drivers.WithNfsServer(nfsServerAddr), drivers.WithNfsExportDir(nfsExportPath), + drivers.WithPodUserId(psaJobUid), + drivers.WithPodGroupId(psaJobGid), ) } @@ -2353,6 +2363,15 @@ func startNfsCSIRestoreVolumeJob( bl *storkapi.BackupLocation, ) (string, error) { + var ( + psaJobUid string + psaJobGid string + ) + + if de != nil { + psaJobUid = getAnnotationValue(de, utils.PsaUIDKey) + psaJobGid = getAnnotationValue(de, utils.PsaGIDKey) + } jobName := utils.GetCsiRestoreJobName(drivers.NFSCSIRestore, de.Name) err := utils.CreateNfsSecret(utils.GetCredSecretName(jobName), bl, de.Namespace, nil) if err != nil { @@ -2371,6 +2390,8 @@ func startNfsCSIRestoreVolumeJob( drivers.WithNfsExportDir(bl.Location.NFSConfig.SubPath), drivers.WithNfsMountOption(bl.Location.NFSConfig.MountOptions), drivers.WithNfsSubPath(bl.Location.Path), + drivers.WithPodUserId(psaJobUid), + drivers.WithPodGroupId(psaJobGid), ) } return "", fmt.Errorf("unknown driver for nfs csi volume restore: %s", drv.Name()) diff --git a/pkg/controllers/resourceexport/reconcile.go b/pkg/controllers/resourceexport/reconcile.go index 56fb7b4a9..1e6316990 100644 --- a/pkg/controllers/resourceexport/reconcile.go +++ b/pkg/controllers/resourceexport/reconcile.go @@ -38,6 +38,14 @@ type updateResourceExportFields struct { LargeResourceEnabled bool } +func getAnnotationValue(re *kdmpapi.ResourceExport, key string) string { + var val string + if _, ok := re.Annotations[key]; ok { + val = re.Annotations[key] + } + return val +} + func (c *Controller) process(ctx context.Context, in *kdmpapi.ResourceExport) (bool, error) { funct := "resourceExport.process" if in == nil { diff --git a/pkg/drivers/kopiabackup/kopiabackup.go b/pkg/drivers/kopiabackup/kopiabackup.go index c9e17ff43..9ec53af97 100644 --- a/pkg/drivers/kopiabackup/kopiabackup.go +++ b/pkg/drivers/kopiabackup/kopiabackup.go @@ -318,6 +318,7 @@ func jobFor( logrus.Errorf("failed to get the toleration details: %v", err) return nil, fmt.Errorf("failed to get the toleration details for job [%s/%s]", jobOption.Namespace, jobName) } + job := &batchv1.Job{ ObjectMeta: metav1.ObjectMeta{ Name: jobName, @@ -384,6 +385,13 @@ func jobFor( }, }, } + // Add security Context only if the PSA is enabled. + if jobOption.PodUserId != "" || jobOption.PodGroupId != "" { + job, err = utils.AddSecurityContextToJob(job, jobOption.PodUserId, jobOption.PodGroupId) + if err != nil { + return nil, err + } + } if len(nodeName) != 0 { job.Spec.Template.Spec.NodeName = nodeName diff --git a/pkg/drivers/kopiarestore/kopiarestore.go b/pkg/drivers/kopiarestore/kopiarestore.go index 771f3961a..bc3160e2a 100644 --- a/pkg/drivers/kopiarestore/kopiarestore.go +++ b/pkg/drivers/kopiarestore/kopiarestore.go @@ -219,6 +219,7 @@ func jobFor( logrus.Errorf("failed to get the toleration details: %v", err) return nil, fmt.Errorf("failed to get the toleration details for job [%s/%s]", jobOption.Namespace, jobName) } + job := &batchv1.Job{ ObjectMeta: metav1.ObjectMeta{ Name: jobName, @@ -285,7 +286,13 @@ func jobFor( }, }, } - + // Add security Context only if the PSA is enabled. + if jobOption.PodUserId != "" || jobOption.PodGroupId != "" { + job, err = utils.AddSecurityContextToJob(job, jobOption.PodUserId, jobOption.PodGroupId) + if err != nil { + return nil, err + } + } // Add the image secret in job spec only if it is present in the stork deployment. if len(imageRegistrySecret) != 0 { job.Spec.Template.Spec.ImagePullSecrets = utils.ToImagePullSecret(utils.GetImageSecretName(jobName)) diff --git a/pkg/drivers/nfsbackup/nfsbackup.go b/pkg/drivers/nfsbackup/nfsbackup.go index 8a35494fb..48c6f8e88 100644 --- a/pkg/drivers/nfsbackup/nfsbackup.go +++ b/pkg/drivers/nfsbackup/nfsbackup.go @@ -199,7 +199,6 @@ func jobForBackupResource( }, " ") labels := addJobLabels(jobOption) - nfsExecutorImage, imageRegistrySecret, err := utils.GetExecutorImageAndSecret(drivers.NfsExecutorImage, jobOption.NfsImageExecutorSource, jobOption.NfsImageExecutorSourceNs, @@ -216,6 +215,7 @@ func jobForBackupResource( logrus.Errorf("failed to get the toleration details: %v", err) return nil, fmt.Errorf("failed to get the toleration details for job [%s/%s]", jobOption.Namespace, jobOption.RestoreExportName) } + job := &batchv1.Job{ ObjectMeta: metav1.ObjectMeta{ Name: jobOption.RestoreExportName, @@ -270,6 +270,15 @@ func jobForBackupResource( }, }, } + + // The Job is intended to backup resources to NFS backuplocation + // and it doesn't need a specific JOB uid/gid since it will be sqaushed at NFS server + // hence used a global hardcoded UID/GID. + job, err = utils.AddSecurityContextToJob(job, utils.KdmpJobUid, utils.KdmpJobGid) + if err != nil { + return nil, err + } + // Add the image secret in job spec only if it is present in the stork deployment. if len(imageRegistrySecret) != 0 { job.Spec.Template.Spec.ImagePullSecrets = utils.ToImagePullSecret(utils.GetImageSecretName(jobOption.RestoreExportName)) diff --git a/pkg/drivers/nfscsirestore/nfscsirestore.go b/pkg/drivers/nfscsirestore/nfscsirestore.go index 520c84973..3a4d4cd1b 100644 --- a/pkg/drivers/nfscsirestore/nfscsirestore.go +++ b/pkg/drivers/nfscsirestore/nfscsirestore.go @@ -266,6 +266,13 @@ func jobForRestoreCSISnapshot( }, }, } + // Add security Context only if the PSA is enabled. + if jobOption.PodUserId != "" || jobOption.PodGroupId != "" { + job, err = utils.AddSecurityContextToJob(job, jobOption.PodUserId, jobOption.PodGroupId) + if err != nil { + return nil, err + } + } // Add the image secret in job spec only if it is present in the stork deployment. if len(imageRegistrySecret) != 0 { job.Spec.Template.Spec.ImagePullSecrets = utils.ToImagePullSecret(utils.GetImageSecretName(jobName)) diff --git a/pkg/drivers/nfsrestore/nfsrestore.go b/pkg/drivers/nfsrestore/nfsrestore.go index 711ad59bf..fbff30e03 100644 --- a/pkg/drivers/nfsrestore/nfsrestore.go +++ b/pkg/drivers/nfsrestore/nfsrestore.go @@ -313,6 +313,10 @@ func jobForRestoreResource( }, }, } + job, err = utils.AddSecurityContextToJob(job, utils.KdmpJobUid, utils.KdmpJobGid) + if err != nil { + return nil, err + } // Add the image secret in job spec only if it is present in the stork deployment. if len(imageRegistrySecret) != 0 { job.Spec.Template.Spec.ImagePullSecrets = utils.ToImagePullSecret(utils.GetImageSecretName(jobOption.RestoreExportName)) diff --git a/pkg/drivers/options.go b/pkg/drivers/options.go index cf4a0b161..49ce3925a 100644 --- a/pkg/drivers/options.go +++ b/pkg/drivers/options.go @@ -59,6 +59,9 @@ type JobOpts struct { ResoureBackupName string ResoureBackupNamespace string S3DisableSSL bool + // psa specifc option to be used by job + PodUserId string + PodGroupId string } // WithS3DisableSSL is job parameter @@ -517,3 +520,19 @@ func WithNodeAffinity(l map[string]string) JobOption { return nil } } + +// WithPodUserId is job parameter. +func WithPodUserId(podUserId string) JobOption { + return func(opts *JobOpts) error { + opts.PodUserId = podUserId + return nil + } +} + +// WithPodGroupId is job parameter. +func WithPodGroupId(PodGroupId string) JobOption { + return func(opts *JobOpts) error { + opts.PodGroupId = PodGroupId + return nil + } +} diff --git a/pkg/drivers/utils/utils.go b/pkg/drivers/utils/utils.go index a80986d74..feb9048c1 100644 --- a/pkg/drivers/utils/utils.go +++ b/pkg/drivers/utils/utils.go @@ -25,6 +25,7 @@ import ( "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/validation" "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/utils/ptr" ) const ( @@ -67,6 +68,13 @@ const ( IstioInjectLabel = "sidecar.istio.io/inject" // ProcessVMResourceSuccessMsg - vm resources processed successfully ProcessVMResourceSuccessMsg = "vm resources processed successfully" + PsaUIDKey = "portworx.io/psa-uid" + PsaGIDKey = "portworx.io/psa-gid" + KdmpJobUid = "1013" + KdmpJobGid = "1013" + OcpUidRangeAnnotationKey = "openshift.io/sa.scc.uid-range" + OcpGidRangeAnnotationKey = "openshift.io/sa.scc.supplemental-groups" + kopiaBackupString = "kopiaexecutor backup" ) var ( @@ -966,3 +974,91 @@ func GetShortUID(uid string) string { } return uid[:8] } + +// Add container security Context to job pod if the PSA is enabled. +// if static uids like kdmpJobUid or kdmpJobGid is used that means +// these are dummy UIDs used for backing up resources to backuplocation +// which doesn't need specific UID specific permission. +func AddSecurityContextToJob(job *batchv1.Job, podUserId, podGroupId string) (*batchv1.Job, error) { + if job == nil { + return job, fmt.Errorf("recieved a nil job object to add security context") + } + if job.Spec.Template.Spec.Containers[0].SecurityContext == nil { + job.Spec.Template.Spec.Containers[0].SecurityContext = &corev1.SecurityContext{} + } + // call GetOcpNsUidGid to get the UID and GID from the namespace annotation if it is an OCP cluster. + // In case of OCP we cannot run with hardcoded UID and GID or backup CR preserved UID and GID. + // We need to run with the UID and GID from the namespace annotation. + ocpUid, ocpGid, isOcp, err := GetOcpNsUidGid(job.Namespace, podUserId, podGroupId) + if err != nil { + return nil, err + } + // if the namespace is OCP, then overwrite the UID and GID from the namespace annotation + if isOcp { + podUserId = ocpUid + podGroupId = ocpGid + } + + if podUserId != "" { + uid, err := strconv.ParseInt(podUserId, 10, 64) + if err != nil { + logrus.Errorf("failed to convert the UID to int: %v", err) + return nil, fmt.Errorf("failed to convert the UID to int: %v", err) + } + job.Spec.Template.Spec.Containers[0].SecurityContext.RunAsUser = &uid + + // Add fsgroup in Pod security context with the same UID as RunAsUser + // But we shouldn't add fsgroup if it is a kopia backup because it will alter the permission + // of the backup pod filesystem. + if !strings.Contains(job.Spec.Template.Spec.Containers[0].Command[0], kopiaBackupString) { + job.Spec.Template.Spec.SecurityContext = &corev1.PodSecurityContext{ + FSGroup: &uid, + } + } + } + if podGroupId != "" { + gid, err := strconv.ParseInt(podGroupId, 10, 64) + if err != nil { + logrus.Errorf("failed to convert the GID to int: %v", err) + return nil, fmt.Errorf("failed to convert the GID to int: %v", err) + } + job.Spec.Template.Spec.Containers[0].SecurityContext.RunAsGroup = &gid + } + // Add RunAsNonRoot to true and drop all capabilities and seccomp profile and allowPrivilegeEscalation to false + job.Spec.Template.Spec.Containers[0].SecurityContext.RunAsNonRoot = ptr.To(true) + job.Spec.Template.Spec.Containers[0].SecurityContext.AllowPrivilegeEscalation = ptr.To(false) + job.Spec.Template.Spec.Containers[0].SecurityContext.SeccompProfile = &corev1.SeccompProfile{ + Type: "RuntimeDefault", + } + job.Spec.Template.Spec.Containers[0].SecurityContext.Capabilities = &corev1.Capabilities{ + Drop: []corev1.Capability{ + "ALL", + }, + } + return job, nil +} + +// read if destination namespace has annotion like openshift.io/sa.scc.uid-range or openshift.io/sa.scc.supplemental-groups +// if yes then read the first value and pass it to the restore job for both uid and gid and use it as fsgroup too. +func GetOcpNsUidGid(nsName string, psaJobUid string, psaJobGid string) (string, string, bool, error) { + isOcp := false + if nsName == "" { + return "", "", false, fmt.Errorf("namespace name is empty") + } + + ns, err := core.Instance().GetNamespace(nsName) + if err != nil { + return "", "", false, fmt.Errorf("failed to get namespace %s: %v", nsName, err) + } + if ns.Annotations != nil { + if ns.Annotations[OcpUidRangeAnnotationKey] != "" { + psaJobUid = strings.Split(ns.Annotations[OcpUidRangeAnnotationKey], "/")[0] + isOcp = true + } + if ns.Annotations[OcpGidRangeAnnotationKey] != "" { + psaJobGid = strings.Split(ns.Annotations[OcpGidRangeAnnotationKey], "/")[0] + isOcp = true + } + } + return psaJobUid, psaJobGid, isOcp, nil +}