Skip to content

Commit

Permalink
K0-314: Added support for batch scale-down (#279)
Browse files Browse the repository at this point in the history
* Added support for batch scale-down

Co-authored-by: Sudhanshu Ranjan <[email protected]>
  • Loading branch information
abhishekdwivedi3060 and sud82 authored Apr 17, 2024
1 parent f004170 commit fec77eb
Show file tree
Hide file tree
Showing 13 changed files with 410 additions and 87 deletions.
5 changes: 4 additions & 1 deletion api/v1/aerospikecluster_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -321,9 +321,12 @@ type RackConfig struct { //nolint:govet // for readability
// Racks is the list of all racks
// +nullable
Racks []Rack `json:"racks,omitempty"`
// RollingUpdateBatchSize is the percentage/number of rack pods that will be restarted simultaneously
// RollingUpdateBatchSize is the percentage/number of rack pods that can be restarted simultaneously
// +optional
RollingUpdateBatchSize *intstr.IntOrString `json:"rollingUpdateBatchSize,omitempty"`
// ScaleDownBatchSize is the percentage/number of rack pods that can be scaled down simultaneously
// +optional
ScaleDownBatchSize *intstr.IntOrString `json:"scaleDownBatchSize,omitempty"`
// MaxIgnorablePods is the maximum number/percentage of pending/failed pods in a rack that are ignored while
// assessing cluster stability. Pods identified using this value are not considered part of the cluster.
// Additionally, in SC mode clusters, these pods are removed from the roster.
Expand Down
100 changes: 66 additions & 34 deletions api/v1/aerospikecluster_validating_webhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -602,38 +602,15 @@ func (c *AerospikeCluster) validateRackConfig(_ logr.Logger) error {
}

// Validate batch upgrade/restart param
if c.Spec.RackConfig.RollingUpdateBatchSize != nil {
if err := validateIntOrStringField(c.Spec.RackConfig.RollingUpdateBatchSize,
"spec.rackConfig.rollingUpdateBatchSize"); err != nil {
return err
}

if len(c.Spec.RackConfig.Racks) < 2 {
return fmt.Errorf("can not use rackConfig.RollingUpdateBatchSize when number of racks is less than two")
}

nsConfsNamespaces := c.getNsConfsForNamespaces()
for ns, nsConf := range nsConfsNamespaces {
if !isNameExist(c.Spec.RackConfig.Namespaces, ns) {
return fmt.Errorf(
"can not use rackConfig.RollingUpdateBatchSize when there is any non-rack enabled namespace %s", ns,
)
}

if nsConf.noOfRacksForNamespaces <= 1 {
return fmt.Errorf(
"can not use rackConfig.RollingUpdateBatchSize when namespace `%s` is configured in only one rack",
ns,
)
}
if err := c.validateBatchSize(c.Spec.RackConfig.RollingUpdateBatchSize,
"spec.rackConfig.rollingUpdateBatchSize"); err != nil {
return err
}

if nsConf.replicationFactor <= 1 {
return fmt.Errorf(
"can not use rackConfig.RollingUpdateBatchSize when namespace `%s` is configured with replication-factor 1",
ns,
)
}
}
// Validate batch scaleDown param
if err := c.validateBatchSize(c.Spec.RackConfig.ScaleDownBatchSize,
"spec.rackConfig.scaleDownBatchSize"); err != nil {
return err
}

// Validate MaxIgnorablePods param
Expand All @@ -652,11 +629,11 @@ type nsConf struct {
replicationFactor int
}

func (c *AerospikeCluster) getNsConfsForNamespaces() map[string]nsConf {
func getNsConfForNamespaces(rackConfig RackConfig) map[string]nsConf {
nsConfs := map[string]nsConf{}

for idx := range c.Spec.RackConfig.Racks {
rack := &c.Spec.RackConfig.Racks[idx]
for idx := range rackConfig.Racks {
rack := &rackConfig.Racks[idx]
nsList := rack.AerospikeConfig.Value["namespaces"].([]interface{})

for _, nsInterface := range nsList {
Expand Down Expand Up @@ -2173,6 +2150,61 @@ func (c *AerospikeCluster) validateNetworkPolicy(namespace string) error {
return nil
}

func (c *AerospikeCluster) validateBatchSize(batchSize *intstr.IntOrString, fieldPath string) error {
if batchSize == nil {
return nil
}

if err := validateIntOrStringField(batchSize, fieldPath); err != nil {
return err
}

validateRacksForBatchSize := func(rackConfig RackConfig) error {
if len(rackConfig.Racks) < 2 {
return fmt.Errorf("can not use %s when number of racks is less than two", fieldPath)
}

nsConfsNamespaces := getNsConfForNamespaces(rackConfig)
for ns, nsConf := range nsConfsNamespaces {
if !isNameExist(rackConfig.Namespaces, ns) {
return fmt.Errorf(
"can not use %s when there is any non-rack enabled namespace %s", fieldPath, ns,
)
}

if nsConf.noOfRacksForNamespaces <= 1 {
return fmt.Errorf(
"can not use %s when namespace `%s` is configured in only one rack", fieldPath, ns,
)
}

if nsConf.replicationFactor <= 1 {
return fmt.Errorf(
"can not use %s when namespace `%s` is configured with replication-factor 1", fieldPath,
ns,
)
}
}

return nil
}

// validate rackConf from spec
if err := validateRacksForBatchSize(c.Spec.RackConfig); err != nil {
return err
}

// If the status is not nil, validate rackConf from status to restrict batch-size update
// when old rackConfig is not valid for batch-size
if c.Status.AerospikeConfig != nil {
if err := validateRacksForBatchSize(c.Status.RackConfig); err != nil {
return fmt.Errorf("status invalid for %s: update, %v", fieldPath, err)
}
}

return nil
}

func validateIntOrStringField(value *intstr.IntOrString, fieldPath string) error {
randomNumber := 100
// Just validate if value is valid number or string.
Expand Down
5 changes: 5 additions & 0 deletions api/v1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 16 additions & 2 deletions config/crd/bases/asdb.aerospike.com_aerospikeclusters.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8690,7 +8690,14 @@ spec:
- type: integer
- type: string
description: RollingUpdateBatchSize is the percentage/number of
rack pods that will be restarted simultaneously
rack pods that can be restarted simultaneously
x-kubernetes-int-or-string: true
scaleDownBatchSize:
anyOf:
- type: integer
- type: string
description: ScaleDownBatchSize is the percentage/number of rack
pods that can be scaled down simultaneously
x-kubernetes-int-or-string: true
type: object
rosterNodeBlockList:
Expand Down Expand Up @@ -18143,7 +18150,14 @@ spec:
- type: integer
- type: string
description: RollingUpdateBatchSize is the percentage/number of
rack pods that will be restarted simultaneously
rack pods that can be restarted simultaneously
x-kubernetes-int-or-string: true
scaleDownBatchSize:
anyOf:
- type: integer
- type: string
description: ScaleDownBatchSize is the percentage/number of rack
pods that can be scaled down simultaneously
x-kubernetes-int-or-string: true
type: object
resources:
Expand Down
4 changes: 2 additions & 2 deletions controllers/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -897,10 +897,10 @@ func (r *SingleClusterReconciler) getClusterPodList() (
return podList, nil
}

func (r *SingleClusterReconciler) isAnyPodInImageFailedState(podList []corev1.Pod, ignorablePodNames sets.Set[string],
func (r *SingleClusterReconciler) isAnyPodInImageFailedState(podList []*corev1.Pod, ignorablePodNames sets.Set[string],
) bool {
for idx := range podList {
pod := &podList[idx]
pod := podList[idx]
if ignorablePodNames.Has(pod.Name) {
continue
}
Expand Down
Loading

0 comments on commit fec77eb

Please sign in to comment.