Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

KO-165: Added support for simultaneous scale-down and rolling restart SC cluster #233

Merged
merged 7 commits into from
Oct 3, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
repos:
- repo: https://github.com/tekwizely/pre-commit-golang
rev: v1.0.0-rc.1 # change this to the latest version
rev: v1.0.0-rc.1
hooks:
- id: go-mod-tidy

- repo: https://github.com/golangci/golangci-lint
rev: v1.52.2
hooks:
- id: golangci-lint
args:
- --config=.golangci.yml
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -327,5 +327,5 @@ catalog: opm ## Generate a file-based catalog and its dockerfile.
docker-buildx-catalog: catalog
- docker buildx create --name project-v3-builder
docker buildx use project-v3-builder
- docker buildx build --push --no-cache --platform=$(PLATFORMS) --tag ${CATALOG_IMG} -f $(CATALOG_DIR).Dockerfile .
- docker buildx build --push --no-cache --platform=$(PLATFORMS) --tag $(CATALOG_IMG) -f $(CATALOG_DIR).Dockerfile .
- docker buildx rm project-v3-builder
6 changes: 3 additions & 3 deletions api/v1/aerospikecluster_validating_webhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ func (c *AerospikeCluster) validateSCNamespaces() error {
for _, nsConfInterface := range nsList {
nsConf := nsConfInterface.(map[string]interface{})

isEnabled := isNSSCEnabled(nsConf)
isEnabled := IsNSSCEnabled(nsConf)
if isEnabled {
tmpSCNamespaceSet.Insert(nsConf["name"].(string))

Expand Down Expand Up @@ -1244,7 +1244,7 @@ func validateNamespaceReplicationFactor(
return err
}

scEnabled := isNSSCEnabled(nsConf)
scEnabled := IsNSSCEnabled(nsConf)

// clSize < rf is allowed in AP mode but not in sc mode
if scEnabled && (clSize < rf) {
Expand All @@ -1255,7 +1255,7 @@ func validateNamespaceReplicationFactor(

return nil
}
func isNSSCEnabled(nsConf map[string]interface{}) bool {
func IsNSSCEnabled(nsConf map[string]interface{}) bool {
scEnabled, ok := nsConf["strong-consistency"]
if !ok {
return false
Expand Down
2 changes: 1 addition & 1 deletion api/v1/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,7 @@ func IsClusterSCEnabled(aeroCluster *AerospikeCluster) bool {

nsList := rack.AerospikeConfig.Value["namespaces"].([]interface{})
for _, nsConfInterface := range nsList {
isEnabled := isNSSCEnabled(nsConfInterface.(map[string]interface{}))
isEnabled := IsNSSCEnabled(nsConfInterface.(map[string]interface{}))
if isEnabled {
return true
}
Expand Down
16 changes: 5 additions & 11 deletions controllers/aero_info_calls.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ import (
// The ignorablePods list should be a list of failed or pending pods that are going to be
// deleted eventually and are safe to ignore in stability checks.
func (r *SingleClusterReconciler) waitForMultipleNodesSafeStopReady(
pods []*corev1.Pod, ignorablePods []corev1.Pod, setRoster bool,
pods []*corev1.Pod, ignorablePods []corev1.Pod,
) reconcileResult {
if len(pods) == 0 {
return reconcileSuccess()
Expand Down Expand Up @@ -63,16 +63,10 @@ func (r *SingleClusterReconciler) waitForMultipleNodesSafeStopReady(
return res
}

if setRoster {
// Setup roster after migration.
if err = r.getAndSetRoster(policy, r.aeroCluster.Spec.RosterNodeBlockList, ignorablePods); err != nil {
r.Log.Error(err, "Failed to set roster for cluster")
return reconcileRequeueAfter(1)
}
} else {
if err := r.validateSCClusterState(policy, ignorablePods); err != nil {
return reconcileError(err)
}
// Setup roster after migration.
if err = r.getAndSetRoster(policy, r.aeroCluster.Spec.RosterNodeBlockList, ignorablePods); err != nil {
r.Log.Error(err, "Failed to set roster for cluster")
return reconcileRequeueAfter(1)
}

if err := r.quiescePods(policy, allHostConns, pods, ignorablePods); err != nil {
Expand Down
4 changes: 2 additions & 2 deletions controllers/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ func (r *SingleClusterReconciler) rollingRestartPods(
if len(activePods) != 0 {
r.Log.Info("Restart active pods", "pods", getPodNames(activePods))

if res := r.waitForMultipleNodesSafeStopReady(activePods, ignorablePods, false); !res.isSuccess {
if res := r.waitForMultipleNodesSafeStopReady(activePods, ignorablePods); !res.isSuccess {
return res
}

Expand Down Expand Up @@ -376,7 +376,7 @@ func (r *SingleClusterReconciler) safelyDeletePodsAndEnsureImageUpdated(
if len(activePods) != 0 {
r.Log.Info("Restart active pods with updated container image", "pods", getPodNames(activePods))

if res := r.waitForMultipleNodesSafeStopReady(activePods, ignorablePods, false); !res.isSuccess {
if res := r.waitForMultipleNodesSafeStopReady(activePods, ignorablePods); !res.isSuccess {
return res
}

Expand Down
2 changes: 1 addition & 1 deletion controllers/rack.go
Original file line number Diff line number Diff line change
Expand Up @@ -788,7 +788,7 @@ func (r *SingleClusterReconciler) scaleDownRack(
// Ignore safe stop check if pod is not running.
// Ignore migrate-fill-delay if pod is not running. Deleting this pod will not lead to any migration.
if isPodRunningAndReady {
if res := r.waitForMultipleNodesSafeStopReady([]*corev1.Pod{pod}, ignorablePods, true); !res.isSuccess {
if res := r.waitForMultipleNodesSafeStopReady([]*corev1.Pod{pod}, ignorablePods); !res.isSuccess {
// The pod is running and is unsafe to terminate.
return found, res
}
Expand Down
58 changes: 54 additions & 4 deletions controllers/strong_consistency.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@ package controllers

import (
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/sets"

asdbv1 "github.com/aerospike/aerospike-kubernetes-operator/api/v1"
"github.com/aerospike/aerospike-management-lib/deployment"
as "github.com/ashishshinde/aerospike-client-go/v6"
)
Expand All @@ -16,12 +18,12 @@ func (r *SingleClusterReconciler) getAndSetRoster(
return err
}

removedNSes, err := r.removedNamespaces(allHostConns)
ignorableNamespaces, err := r.getIgnorableNamespaces(allHostConns)
if err != nil {
return err
}

return deployment.GetAndSetRoster(r.Log, allHostConns, policy, rosterNodeBlockList, removedNSes)
return deployment.GetAndSetRoster(r.Log, allHostConns, policy, rosterNodeBlockList, ignorableNamespaces)
}

func (r *SingleClusterReconciler) validateSCClusterState(policy *as.ClientPolicy, ignorablePods []corev1.Pod) error {
Expand All @@ -30,10 +32,58 @@ func (r *SingleClusterReconciler) validateSCClusterState(policy *as.ClientPolicy
return err
}

removedNSes, err := r.removedNamespaces(allHostConns)
ignorableNamespaces, err := r.getIgnorableNamespaces(allHostConns)
if err != nil {
return err
}

return deployment.ValidateSCClusterState(r.Log, allHostConns, policy, removedNSes)
return deployment.ValidateSCClusterState(r.Log, allHostConns, policy, ignorableNamespaces)
}

func (r *SingleClusterReconciler) addedSCNamespaces(allHostConns []*deployment.HostConn) ([]string, error) {
var (
specSCNamespaces = sets.NewString()
newAddedSCNamespaces = sets.NewString()
)

// Look inside only 1st rack. SC namespaces should be same across all the racks
rack := r.aeroCluster.Spec.RackConfig.Racks[0]
nsList := rack.AerospikeConfig.Value["namespaces"].([]interface{})

for _, nsConfInterface := range nsList {
if asdbv1.IsNSSCEnabled(nsConfInterface.(map[string]interface{})) {
specSCNamespaces.Insert(nsConfInterface.(map[string]interface{})["name"].(string))
}
}

nodesNamespaces, err := deployment.GetClusterNamespaces(r.Log, r.getClientPolicy(), allHostConns)
if err != nil {
return nil, err
}

// Check if SC namespaces are present in all node's namespaces, if not then it's a new SC namespace
for _, namespaces := range nodesNamespaces {
nodeNamespaces := sets.NewString(namespaces...)
newAddedSCNamespaces.Insert(specSCNamespaces.Difference(nodeNamespaces).List()...)
}

return newAddedSCNamespaces.List(), nil
}

func (r *SingleClusterReconciler) getIgnorableNamespaces(allHostConns []*deployment.HostConn) (
sets.Set[string], error) {
removedNSes, err := r.removedNamespaces(allHostConns)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we call deployment.GetClusterNamespaces in both removedNamespaces as well as r.addedSCNamespaces, see if we can reduce calls here.

if err != nil {
return nil, err
}

addSCNamespaces, err := r.addedSCNamespaces(allHostConns)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it ok to get sc namespaces from all the hosts even if there are some entries in r.aeroCluster.Spec.RosterNodeBlockList? similarly in validateSCClusterState

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As per discussion, not much useful. So skipping extra checks for RosterNodeBlockList

if err != nil {
return nil, err
}

ignorableNamespaces := sets.New[string](removedNSes...)
ignorableNamespaces.Insert(addSCNamespaces...)

return ignorableNamespaces, nil
}
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ module github.com/aerospike/aerospike-kubernetes-operator
go 1.19

require (
github.com/aerospike/aerospike-management-lib v0.0.0-20230803080950-65d07e4ee37c
github.com/aerospike/aerospike-management-lib v0.0.0-20230814122406-a5247ec7486a
github.com/asaskevich/govalidator v0.0.0-20210307081110-f21760c49a8d
github.com/ashishshinde/aerospike-client-go/v6 v6.0.1-0.20220606044039-77304169d3a4
github.com/evanphx/json-patch v4.12.0+incompatible
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ cloud.google.com/go/storage v1.10.0/go.mod h1:FLPqc6j+Ki4BU591ie1oL6qBQGu2Bl/tZ9
dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
github.com/aerospike/aerospike-management-lib v0.0.0-20230803080950-65d07e4ee37c h1:hOERw+GeP1d6Vd+RIQZmkh7oEAPJV5i/VkJkfUT76TM=
github.com/aerospike/aerospike-management-lib v0.0.0-20230803080950-65d07e4ee37c/go.mod h1:An7udaZ/C3TiDGI24gc4nOEomeMDD6ObBnLYpjjH/+A=
github.com/aerospike/aerospike-management-lib v0.0.0-20230814122406-a5247ec7486a h1:A6KnC0czL5xt3HGcTQL1KAQgtme4zlo2hncLg/B4XqI=
github.com/aerospike/aerospike-management-lib v0.0.0-20230814122406-a5247ec7486a/go.mod h1:An7udaZ/C3TiDGI24gc4nOEomeMDD6ObBnLYpjjH/+A=
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
Expand Down