Skip to content

Commit

Permalink
Merge branch 'master' into akstest
Browse files Browse the repository at this point in the history
  • Loading branch information
gejain committed Aug 14, 2024
2 parents 46fb50e + 4846a1e commit 6a03049
Show file tree
Hide file tree
Showing 7 changed files with 308 additions and 9 deletions.
6 changes: 6 additions & 0 deletions deployments/deploy-ssh.sh
Original file line number Diff line number Diff line change
Expand Up @@ -600,6 +600,8 @@ spec:
value: "${K8S_VENDOR}"
- name: TORPEDO_SSH_USER
value: "${TORPEDO_SSH_USER}"
- name: LB_SUBNET_KEY
value: "${LB_SUBNET_KEY}"
- name: TORPEDO_SSH_PASSWORD
value: "${TORPEDO_SSH_PASSWORD}"
- name: TORPEDO_SSH_KEY
Expand Down Expand Up @@ -806,6 +808,10 @@ spec:
value: "${EKS_CLUSTER_REGION}"
- name: EKS_PX_NODEGROUP_NAME
value: "${EKS_PX_NODEGROUP_NAME}"
- name: EKS_MAX_UNAVAILABLE_NODES_VALUE
value: "${EKS_MAX_UNAVAILABLE_NODES_VALUE}"
- name: EKS_MAX_UNAVAILABLE_PERCENTAGE_VALUE
value: "${EKS_MAX_UNAVAILABLE_PERCENTAGE_VALUE}"
- name: IKS_CLUSTER_NAME
value: "${IKS_CLUSTER_NAME}"
- name: IKS_PX_WORKERPOOL_NAME
Expand Down
7 changes: 6 additions & 1 deletion drivers/node/vsphere/vsphere.go
Original file line number Diff line number Diff line change
Expand Up @@ -1018,8 +1018,13 @@ func filterTargetDatastores(ctx context.Context, sourceDatastore *object.Datasto
availableSpace := dsProps.Summary.FreeSpace / 1024
spaceAfterMove := availableSpace - totalDiskSizeToMove
maxAllowedUsage := (dsProps.Summary.Capacity / 1024) * 90 / 100
log.Infof("Available Space is: %v", availableSpace)
log.Infof("Total Disk size to move is: %v", totalDiskSizeToMove)
log.Infof("Space after move is: %v", spaceAfterMove)
log.Infof("Max Allowed Usage is: %v", maxAllowedUsage)

if spaceAfterMove < 0 || spaceAfterMove > maxAllowedUsage {
usedSpace := (dsProps.Summary.Capacity / 1024) - availableSpace
if spaceAfterMove < 0 || (usedSpace+totalDiskSizeToMove > maxAllowedUsage) {
log.Infof("Datastore %v does not have enough space or will exceed 90 percent capacity.", ds.Name())
continue
}
Expand Down
106 changes: 102 additions & 4 deletions drivers/scheduler/eks/eks.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@ package eks
import (
"context"
"fmt"
"os"
"strconv"
"strings"
"time"

"github.com/aws/aws-sdk-go-v2/aws"
"github.com/aws/aws-sdk-go-v2/config"
"github.com/aws/aws-sdk-go-v2/service/autoscaling"
Expand All @@ -19,9 +24,6 @@ import (
"github.com/portworx/torpedo/drivers/scheduler"
kube "github.com/portworx/torpedo/drivers/scheduler/k8s"
"github.com/portworx/torpedo/pkg/log"
"os"
"strings"
"time"
)

const (
Expand Down Expand Up @@ -143,6 +145,96 @@ func (e *EKS) UpgradeNodeGroup(nodeGroupName string, version string) error {
return nil
}

// SetMaxUnavailableForUpgradeInNodeGroup updates Max Unavailable values for upgrades for a given node group
func (e *EKS) SetMaxUnavailableForUpgradeInNodeGroup(nodeGroupName string) error {
updateNodeGroup := false

log.Info("Checking if we need to configure EKS Node Group Max Unavailable values for surge upgrade..")
// Get Max Unavailable values from ENV vars, if present
maxUnavailableNodes := os.Getenv("EKS_MAX_UNAVAILABLE_NODES_VALUE")
maxUnavailablePercentage := os.Getenv("EKS_MAX_UNAVAILABLE_PERCENTAGE_VALUE")

// MaxUnavailable: aws.Int32(1), // Set the maximum number of nodes that can be unavailable during the update
// MaxUnavailablePercentage: aws.Int32(25), // Set the maximum percentage of nodes that can be unavailable during the update
updateConfig := &types.NodegroupUpdateConfig{}

if maxUnavailableNodes != "" {
log.Infof("Setting MaxUnavailable to [%s]", maxUnavailableNodes)
maxUnavailableNodesInt, err := strconv.Atoi(maxUnavailableNodes)
if err != nil {
return fmt.Errorf("failed to convert maxUnavailableNode string [%s] to int, Err: %v", maxUnavailableNodes, err)
}
updateConfig.MaxUnavailable = aws.Int32(int32(maxUnavailableNodesInt))
updateNodeGroup = true
}

if maxUnavailablePercentage != "" {
log.Infof("Setting MaxUnavailablePercentage to [%s]", maxUnavailablePercentage)
maxUnavailablePercentageInt, err := strconv.Atoi(maxUnavailablePercentage)
if err != nil {
return fmt.Errorf("failed to convert maxUnavailablePercentage string [%s] to int, Err: %v", maxUnavailablePercentage, err)
}
updateConfig.MaxUnavailablePercentage = aws.Int32(int32(maxUnavailablePercentageInt))
updateNodeGroup = true
}

if !updateNodeGroup {
log.Info("Skipping updating Node Group Max Unavailable values as none were passed..")
return nil
}

log.Infof("Updating Node Group [%s] with Max Unavailable values for surge upgrade for EKS cluster [%s]", nodeGroupName, e.clusterName)
_, err := e.eksClient.UpdateNodegroupConfig(context.TODO(), &eks.UpdateNodegroupConfigInput{
ClusterName: aws.String(e.clusterName),
NodegroupName: aws.String(nodeGroupName),
UpdateConfig: updateConfig,
})
if err != nil {
return fmt.Errorf("failed to configure Max Unavailable for EKS cluster [%s] node group [%s], Err: [%v]", e.clusterName, nodeGroupName, err)
}

if err := e.WaitForNodeGroupUpdate(nodeGroupName); err != nil {
return err
}

log.Infof("Successfully configured EKS cluster [%s] node group [%s] with Max Unavailable values for surge upgrade", e.clusterName, nodeGroupName)
return nil
}

// WaitForNodeGroupUpdate waits for Node Group to be updated and have expected status
func (e *EKS) WaitForNodeGroupUpdate(nodeGroupName string) error {
log.Infof("Waiting for EKS cluster [%s] Node Group [%s] to be updated", e.clusterName, nodeGroupName)
expectedUpgradeStatus := types.NodegroupStatusActive
t := func() (interface{}, bool, error) {
eksDescribeNodegroupOutput, err := e.eksClient.DescribeNodegroup(
context.TODO(),
&eks.DescribeNodegroupInput{
ClusterName: aws.String(e.clusterName),
NodegroupName: aws.String(nodeGroupName),
},
)
if err != nil {
return nil, false, err
}
if eksDescribeNodegroupOutput.Nodegroup == nil {
return nil, false, fmt.Errorf("failed to describe EKS cluster [%s] Node Group [%s], node group not found", e.clusterName, nodeGroupName)
}
status := eksDescribeNodegroupOutput.Nodegroup.Status
if status == expectedUpgradeStatus {
return nil, false, nil
} else {
return nil, true, fmt.Errorf("waiting for EKS cluster [%s] Node Group [%s] update to complete, expected status [%s], actual status [%s]", e.clusterName, nodeGroupName, expectedUpgradeStatus, status)
}
}
_, err := task.DoRetryWithTimeout(t, defaultEKSUpgradeTimeout, defaultEKSUpgradeRetryInterval)
if err != nil {
return fmt.Errorf("failed to update EKS cluster [%s] Node Group [%s], Err: [%v]", e.clusterName, nodeGroupName, err)
}

log.Infof("Successfully updated EKS cluster [%s] Node Group [%s]", e.clusterName, nodeGroupName)
return nil
}

// WaitForNodeGroupToUpgrade waits for the EKS node group to be upgraded to the specified version
func (e *EKS) WaitForNodeGroupToUpgrade(nodeGroupName string, version string) error {
log.Infof("Waiting for EKS cluster [%s] node group [%s] to be upgraded to [%s]", e.clusterName, nodeGroupName, version)
Expand Down Expand Up @@ -325,6 +417,12 @@ func (e *EKS) UpgradeScheduler(version string) error {
return fmt.Errorf("failed to wait for EKS cluster [%s] control plane to be upgraded to [%s], Err: [%v]", e.clusterName, version, err)
}

// Update Max Unavailable values for Node Group
err = e.SetMaxUnavailableForUpgradeInNodeGroup(e.pxNodeGroupName)
if err != nil {
return fmt.Errorf("failed to configure EKS cluster [%s] node group [%s] Max Unavailable values, Err: [%v]", e.clusterName, e.pxNodeGroupName, err)
}

// Upgrade Node Group
err = e.UpgradeNodeGroup(e.pxNodeGroupName, version)
if err != nil {
Expand Down Expand Up @@ -659,4 +757,4 @@ func (e *EKS) connect(n node.Node, listCmdsInput *ssm.ListCommandInvocationsInpu
func init() {
e := &EKS{}
scheduler.Register(SchedName, e)
}
}
13 changes: 10 additions & 3 deletions drivers/scheduler/k8s/k8s.go
Original file line number Diff line number Diff line change
Expand Up @@ -5593,10 +5593,17 @@ func (k *K8s) createVirtualMachineObjects(
if err != nil {
return nil, fmt.Errorf("failed to retrieve VM after creating/waiting for DataVolumes: %v", err)
}
// Check if the VM is in 'Running' phase.
if !vm.Status.Ready {
return nil, fmt.Errorf("VM is not in the expected 'Running' state")
t := func() (interface{}, bool, error) {
vm, err = k8sKubevirt.GetVirtualMachine(obj.Name, obj.Namespace)
if err != nil {
return nil, true, err
}
if vm.Status.Ready {
return nil, false, nil
}
return nil, true, fmt.Errorf("waiting for VM [%s] in namespace [%s] to be ready", obj.Name, obj.Namespace)
}
_, err = task.DoRetryWithTimeout(t, cdiImageImportTimeout, cdiImageImportRetry)
return vm, nil
}
}
Expand Down
78 changes: 77 additions & 1 deletion tests/backup/backup_azure_credential_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@ import (
. "github.com/onsi/ginkgo/v2"
"github.com/pborman/uuid"
api "github.com/portworx/px-backup-api/pkg/apis/v1"
"github.com/portworx/sched-ops/task"
"github.com/portworx/torpedo/drivers/backup"
"github.com/portworx/torpedo/drivers/scheduler"
"github.com/portworx/torpedo/pkg/log"
. "github.com/portworx/torpedo/tests"
"golang.org/x/sync/errgroup"
"strings"
)

// This testcase creates Azure cloud account with mandatory and non-mandatory fields and take backup & restore
Expand Down Expand Up @@ -51,7 +53,7 @@ var _ = Describe("{AzureCloudAccountCreationWithMandatoryAndNonMandatoryFields}"
)

JustBeforeEach(func() {
StartPxBackupTorpedoTest("AzureCloudAccountCreationWithMandatoryAndNonMandatoryFields", "Azure cloud account with mandatory and non mandatory fields", nil, 31661, Sagrawal, Q2FY25)
StartPxBackupTorpedoTest("AzureCloudAccountCreationWithMandatoryAndNonMandatoryFields", "Azure cloud account with mandatory and non mandatory fields", nil, 300036, Sagrawal, Q2FY25)
backupLocationMap1 = make(map[string]string)
cloudCredentialMap1 = make(map[string]string)
backupLocationMap2 = make(map[string]string)
Expand Down Expand Up @@ -229,3 +231,77 @@ var _ = Describe("{AzureCloudAccountCreationWithMandatoryAndNonMandatoryFields}"
log.FailOnError(err, "Data validations failed")
})
})

// This testcase verifies the error message while creating immutable backup location with few mandatory parameters missing for immutable backup location while creating cloud cred
var _ = Describe("{AzureCloudAccountForLockedBucket}", Label(TestCaseLabelsMap[AzureCloudAccountForLockedBucket]...), func() {

var (
credUidWithMandatoryFields string
azureCredNameWithMandatoryFields string
azureBackupLocationNameWithMandatoryFields string
backupLocationMandatoryFieldsUID string
containerLevelStorageAccount string
containerLevelStorageAccountKey string
azureImmutableBucket string
backupLocationMap map[string]string
cloudCredentialMap map[string]string
azureConfigFields *api.AzureConfig
scheduledAppContexts []*scheduler.Context
)

JustBeforeEach(func() {
StartPxBackupTorpedoTest("AzureCloudAccountForLockedBucket", "Azure cloud account for immutable bucket", nil, 300037, Sagrawal, Q2FY25)
backupLocationMap = make(map[string]string)
cloudCredentialMap = make(map[string]string)
})

It("Azure cloud account with mandatory and non mandatory fields", func() {
ctx, err := backup.GetAdminCtxFromSecret()
log.FailOnError(err, "Fetching px-central-admin ctx")
_, containerLevelStorageAccount, containerLevelStorageAccountKey, _, _, _, _ = GetAzureImmutabilityCredsFromEnv()

Step("Creating azure cloud account with only mandatory fields", func() {
log.InfoD("Creating azure cloud account with only mandatory fields")
credUidWithMandatoryFields = uuid.New()
azureConfigFields = &api.AzureConfig{
AccountName: containerLevelStorageAccount,
AccountKey: containerLevelStorageAccountKey,
}
azureCredNameWithMandatoryFields = fmt.Sprintf("%s-azure-cred-with-mandatory-fields-immutable", RandomString(5))
err = CreateAzureCloudCredential(azureCredNameWithMandatoryFields, credUidWithMandatoryFields, BackupOrgID, azureConfigFields, ctx)
dash.VerifyFatal(err, nil, fmt.Sprintf("Verifying creation of azure cloud credential named [%s] for org [%s] having only mandatory fields for immutable bucket", azureCredNameWithMandatoryFields, BackupOrgID))
cloudCredentialMap[azureCredNameWithMandatoryFields] = credUidWithMandatoryFields

log.InfoD("Creating azure immutable bucket")
azureImmutableBucket = RandomString(5) + "azure-immutable-bucket"
CreateAzureBucket(azureImmutableBucket, true, Container_level, 2, true)

log.InfoD("Creating azure immutable backup location with mandatory fields in azure credentials")
azureBackupLocationNameWithMandatoryFields = fmt.Sprintf("azure-immutable-bkp-loc-mandatory-fields-%v", RandomString(5))
backupLocationMandatoryFieldsUID = uuid.New()
backupLocationMap[backupLocationMandatoryFieldsUID] = azureBackupLocationNameWithMandatoryFields
err = CreateBackupLocation("azure", azureBackupLocationNameWithMandatoryFields, backupLocationMandatoryFieldsUID, azureCredNameWithMandatoryFields, credUidWithMandatoryFields, azureImmutableBucket, BackupOrgID, "", true)
dash.VerifyFatal(strings.Contains(err.Error(), fmt.Sprintf("error in obtaining object-lock info for the bucket %v: secret can't be empty string", azureBackupLocationNameWithMandatoryFields)), true, fmt.Sprintf("Verifying message while creating immutable backup location %v with few mandatory parameters missing for immutable backup location while creating cloud cred", azureBackupLocationNameWithMandatoryFields))
})
})

JustAfterEach(func() {
defer EndPxBackupTorpedoTest(scheduledAppContexts)
ctx, err := backup.GetAdminCtxFromSecret()
log.FailOnError(err, "Fetching px-central-admin ctx")
err = DeleteCloudCredentialWithContext(azureCredNameWithMandatoryFields, BackupOrgID, credUidWithMandatoryFields, ctx)
Inst().Dash.VerifyFatal(err, nil, fmt.Sprintf("Verifying deletion of cloud cred [%s]", azureCredNameWithMandatoryFields))
cloudCredDeleteStatus := func() (interface{}, bool, error) {
status, err := IsCloudCredPresent(azureCredNameWithMandatoryFields, ctx, BackupOrgID)
if err != nil {
return "", true, fmt.Errorf("cloud cred %s still present with error %v", azureCredNameWithMandatoryFields, err)
}
if status {
return "", true, fmt.Errorf("cloud cred %s is not deleted yet", azureCredNameWithMandatoryFields)
}
return "", false, nil
}
_, err = task.DoRetryWithTimeout(cloudCredDeleteStatus, CloudAccountDeleteTimeout, CloudAccountDeleteRetryTime)
Inst().Dash.VerifySafely(err, nil, fmt.Sprintf("Deleting cloud cred %s", azureCredNameWithMandatoryFields))
})
})
2 changes: 2 additions & 0 deletions tests/backup/backup_test_labels.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ const (
PartialBackupSuccessWithAzureEndpoint TestCaseName = "PartialBackupSuccessWithAzureEndpoint"
PSALowerPrivilegeToHigherPrivilegeWithProjectMapping TestCaseName = "PSALowerPrivilegeToHigherPrivilegeWithProjectMapping"
AzureCloudAccountCreationWithMandatoryAndNonMandatoryFields TestCaseName = "AzureCloudAccountCreationWithMandatoryAndNonMandatoryFields"
AzureCloudAccountForLockedBucket TestCaseName = "AzureCloudAccountForLockedBucket"
)

// Test case labels
Expand Down Expand Up @@ -269,6 +270,7 @@ const (
PsaTakeBackupInLowerPrevilegeRestoreInHigherPrivilege TestCaseLabel = "PsaTakeBackupInLowerPrevilegeRestoreInHigherPrivilege"
PSALowerPrivilegeToHigherPrivilegeWithProjectMappingLabel TestCaseLabel = "PSALowerPrivilegeToHigherPrivilegeWithProjectMapping"
AzureCloudAccountCreationWithMandatoryAndNonMandatoryFieldsLabel TestCaseLabel = "AzureCloudAccountCreationWithMandatoryAndNonMandatoryFields"
AzureCloudAccountForLockedBucketLabel TestCaseLabel = "AzureCloudAccountForLockedBucket"
)

// Common Labels
Expand Down
Loading

0 comments on commit 6a03049

Please sign in to comment.