Skip to content

Commit

Permalink
pb-7504: make NFS job pod to use root for resource backup
Browse files Browse the repository at this point in the history
- When we use GCP based file store as NFS backup location, the job pod
  using that doesn't have write permission for group user, this causes
  the non-root user permission denied error during backup and restore.

- This is GKE specific behaviour hence a check added to force all job pod
  to run as a root user eradicating the permission denied error.

Signed-off-by: Lalatendu Das <[email protected]>
  • Loading branch information
lalat-das committed Jul 30, 2024
1 parent 1f487d1 commit 423ce1f
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 3 deletions.
24 changes: 21 additions & 3 deletions pkg/drivers/nfsbackup/nfsbackup.go
Original file line number Diff line number Diff line change
Expand Up @@ -278,14 +278,32 @@ func jobForBackupResource(
},
}

uid := utils.KdmpJobUid
// For GCP based clusters the NFS PVC mounted with a anomalous GID permissions( i.e. sans GID write permission)
// hence avoiding passing any specific UID or GID so that Job pod will always run as ROOT user.
// This makes the job pod to fail in GCP based cluster with PSA enabled environment.

// check the cluster is GCP based or not
isGcpBasedCluster, err := utils.IsGcpHostedCluster()
if err != nil {
logrus.Errorf("failed to check the cluster is GCP based or not: %v", err)
return nil, fmt.Errorf("failed to check the cluster is GCP based or not for job [%s/%s]", jobOption.Namespace, jobOption.RestoreExportName)
}
if isGcpBasedCluster {
logrus.Debugf("Found a GCP based cluster hence not adding any specific UID/GID to the job, it will run with root user")
uid = ""
}

// The Job is intended to backup resources to NFS backuplocation
// and it doesn't need a specific JOB uid/gid since it will be sqaushed at NFS server
// hence used a global hardcoded UID/GID.
// Not passing the groupId as we do not want to set the RunAsGroup field in the securityContext
// This helps us in setting the primaryGroup ID to root for the user ID.
job, err = utils.AddSecurityContextToJob(job, utils.KdmpJobUid, "")
if err != nil {
return nil, err
if uid != "" {
job, err = utils.AddSecurityContextToJob(job, uid, "")
if err != nil {
return nil, err
}
}

// Add the image secret in job spec only if it is present in the stork deployment.
Expand Down
20 changes: 20 additions & 0 deletions pkg/drivers/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ const (
OcpUidRangeAnnotationKey = "openshift.io/sa.scc.uid-range"
OcpGidRangeAnnotationKey = "openshift.io/sa.scc.supplemental-groups"
kopiaBackupString = "kopiaexecutor backup"
// if providerType in node spec has this string then it is GCP hosted cluster
GCPBasedClusterString = "gce://"
)

var (
Expand Down Expand Up @@ -1088,3 +1090,21 @@ func GetOcpNsUidGid(nsName string, psaJobUid string, psaJobGid string) (string,
}
return psaJobUid, psaJobGid, isOcp, nil
}

// Checks if the cluster is GCP hosted cluster.
func IsGcpHostedCluster() (bool, error) {
// Any GCP hosted cluster be it vanilla , OCP or GKE
// it is expected to have a ProviderId in its spec with a prefix of "gce"
nodes, err := core.Instance().GetNodes()
if err != nil {
return false, fmt.Errorf("failed to get nodes: %v", err)
}

for _, node := range nodes.Items {
providerID := node.Spec.ProviderID
if strings.HasPrefix(providerID, GCPBasedClusterString) {
return true, nil
}
}
return false, nil
}

0 comments on commit 423ce1f

Please sign in to comment.