Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

KO-165: Added support for simultaneous scale-down and rolling restart SC cluster #233

Merged
merged 7 commits into from
Oct 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
repos:
- repo: https://github.com/tekwizely/pre-commit-golang
rev: v1.0.0-rc.1 # change this to the latest version
rev: v1.0.0-rc.1
hooks:
- id: go-mod-tidy

- repo: https://github.com/golangci/golangci-lint
rev: v1.52.2
hooks:
- id: golangci-lint
args:
- --config=.golangci.yml
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -332,5 +332,5 @@ catalog: opm ## Generate a file-based catalog and its dockerfile.
docker-buildx-catalog: catalog
- docker buildx create --name project-v3-builder
docker buildx use project-v3-builder
- docker buildx build --push --no-cache --platform=$(PLATFORMS) --tag ${CATALOG_IMG} -f $(CATALOG_DIR).Dockerfile .
- docker buildx build --push --no-cache --platform=$(PLATFORMS) --tag $(CATALOG_IMG) -f $(CATALOG_DIR).Dockerfile .
- docker buildx rm project-v3-builder
6 changes: 3 additions & 3 deletions api/v1/aerospikecluster_validating_webhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ func (c *AerospikeCluster) validateSCNamespaces() error {
for _, nsConfInterface := range nsList {
nsConf := nsConfInterface.(map[string]interface{})

isEnabled := isNSSCEnabled(nsConf)
isEnabled := IsNSSCEnabled(nsConf)
if isEnabled {
tmpSCNamespaceSet.Insert(nsConf["name"].(string))

Expand Down Expand Up @@ -1244,7 +1244,7 @@ func validateNamespaceReplicationFactor(
return err
}

scEnabled := isNSSCEnabled(nsConf)
scEnabled := IsNSSCEnabled(nsConf)

// clSize < rf is allowed in AP mode but not in sc mode
if scEnabled && (clSize < rf) {
Expand All @@ -1255,7 +1255,7 @@ func validateNamespaceReplicationFactor(

return nil
}
func isNSSCEnabled(nsConf map[string]interface{}) bool {
func IsNSSCEnabled(nsConf map[string]interface{}) bool {
scEnabled, ok := nsConf["strong-consistency"]
if !ok {
return false
Expand Down
2 changes: 1 addition & 1 deletion api/v1/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,7 @@ func IsClusterSCEnabled(aeroCluster *AerospikeCluster) bool {

nsList := rack.AerospikeConfig.Value["namespaces"].([]interface{})
for _, nsConfInterface := range nsList {
isEnabled := isNSSCEnabled(nsConfInterface.(map[string]interface{}))
isEnabled := IsNSSCEnabled(nsConfInterface.(map[string]interface{}))
if isEnabled {
return true
}
Expand Down
28 changes: 11 additions & 17 deletions controllers/aero_info_calls.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ import (
// The ignorablePods list should be a list of failed or pending pods that are going to be
// deleted eventually and are safe to ignore in stability checks.
func (r *SingleClusterReconciler) waitForMultipleNodesSafeStopReady(
pods []*corev1.Pod, ignorablePods []corev1.Pod, setRoster bool,
pods []*corev1.Pod, ignorablePods []corev1.Pod,
) reconcileResult {
if len(pods) == 0 {
return reconcileSuccess()
Expand Down Expand Up @@ -63,16 +63,10 @@ func (r *SingleClusterReconciler) waitForMultipleNodesSafeStopReady(
return res
}

if setRoster {
// Setup roster after migration.
if err = r.getAndSetRoster(policy, r.aeroCluster.Spec.RosterNodeBlockList, ignorablePods); err != nil {
r.Log.Error(err, "Failed to set roster for cluster")
return reconcileRequeueAfter(1)
}
} else {
if err := r.validateSCClusterState(policy, ignorablePods); err != nil {
return reconcileError(err)
}
// Setup roster after migration.
if err = r.getAndSetRoster(policy, r.aeroCluster.Spec.RosterNodeBlockList, ignorablePods); err != nil {
r.Log.Error(err, "Failed to set roster for cluster")
return reconcileRequeueAfter(1)
}

if err := r.quiescePods(policy, allHostConns, pods, ignorablePods); err != nil {
Expand All @@ -85,11 +79,6 @@ func (r *SingleClusterReconciler) waitForMultipleNodesSafeStopReady(
func (r *SingleClusterReconciler) quiescePods(
policy *as.ClientPolicy, allHostConns []*deployment.HostConn, pods []*corev1.Pod, ignorablePods []corev1.Pod,
) error {
removedNSes, err := r.removedNamespaces(allHostConns)
if err != nil {
return err
}

podList := make([]corev1.Pod, 0, len(pods))

for idx := range pods {
Expand All @@ -101,7 +90,12 @@ func (r *SingleClusterReconciler) quiescePods(
return err
}

return deployment.InfoQuiesce(r.Log, policy, allHostConns, selectedHostConns, removedNSes)
nodesNamespaces, err := deployment.GetClusterNamespaces(r.Log, r.getClientPolicy(), allHostConns)
if err != nil {
return err
}

return deployment.InfoQuiesce(r.Log, policy, allHostConns, selectedHostConns, r.removedNamespaces(nodesNamespaces))
}

// TODO: Check only for migration
Expand Down
4 changes: 2 additions & 2 deletions controllers/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ func (r *SingleClusterReconciler) rollingRestartPods(
if len(activePods) != 0 {
r.Log.Info("Restart active pods", "pods", getPodNames(activePods))

if res := r.waitForMultipleNodesSafeStopReady(activePods, ignorablePods, false); !res.isSuccess {
if res := r.waitForMultipleNodesSafeStopReady(activePods, ignorablePods); !res.isSuccess {
return res
}

Expand Down Expand Up @@ -376,7 +376,7 @@ func (r *SingleClusterReconciler) safelyDeletePodsAndEnsureImageUpdated(
if len(activePods) != 0 {
r.Log.Info("Restart active pods with updated container image", "pods", getPodNames(activePods))

if res := r.waitForMultipleNodesSafeStopReady(activePods, ignorablePods, false); !res.isSuccess {
if res := r.waitForMultipleNodesSafeStopReady(activePods, ignorablePods); !res.isSuccess {
return res
}

Expand Down
2 changes: 1 addition & 1 deletion controllers/rack.go
Original file line number Diff line number Diff line change
Expand Up @@ -773,7 +773,7 @@ func (r *SingleClusterReconciler) scaleDownRack(
// Ignore safe stop check if pod is not running.
// Ignore migrate-fill-delay if pod is not running. Deleting this pod will not lead to any migration.
if isPodRunningAndReady {
if res := r.waitForMultipleNodesSafeStopReady([]*corev1.Pod{pod}, ignorablePods, true); !res.isSuccess {
if res := r.waitForMultipleNodesSafeStopReady([]*corev1.Pod{pod}, ignorablePods); !res.isSuccess {
// The pod is running and is unsafe to terminate.
return found, res
}
Expand Down
9 changes: 2 additions & 7 deletions controllers/reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -705,12 +705,7 @@ func (r *SingleClusterReconciler) checkPreviouslyFailedCluster() (bool, error) {
return false, nil
}

func (r *SingleClusterReconciler) removedNamespaces(allHostConns []*deployment.HostConn) ([]string, error) {
nodesNamespaces, err := deployment.GetClusterNamespaces(r.Log, r.getClientPolicy(), allHostConns)
if err != nil {
return nil, err
}

func (r *SingleClusterReconciler) removedNamespaces(nodesNamespaces map[string][]string) []string {
statusNamespaces := sets.NewString()
for _, namespaces := range nodesNamespaces {
statusNamespaces.Insert(namespaces...)
Expand All @@ -727,7 +722,7 @@ func (r *SingleClusterReconciler) removedNamespaces(allHostConns []*deployment.H

removedNamespaces := statusNamespaces.Difference(specNamespaces)

return removedNamespaces.List(), nil
return removedNamespaces.List()
}

func (r *SingleClusterReconciler) IsStatusEmpty() bool {
Expand Down
51 changes: 47 additions & 4 deletions controllers/strong_consistency.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@ package controllers

import (
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/sets"

as "github.com/aerospike/aerospike-client-go/v6"
asdbv1 "github.com/aerospike/aerospike-kubernetes-operator/api/v1"
"github.com/aerospike/aerospike-management-lib/deployment"
)

Expand All @@ -16,12 +18,12 @@ func (r *SingleClusterReconciler) getAndSetRoster(
return err
}

removedNSes, err := r.removedNamespaces(allHostConns)
ignorableNamespaces, err := r.getIgnorableNamespaces(allHostConns)
if err != nil {
return err
}

return deployment.GetAndSetRoster(r.Log, allHostConns, policy, rosterNodeBlockList, removedNSes)
return deployment.GetAndSetRoster(r.Log, allHostConns, policy, rosterNodeBlockList, ignorableNamespaces)
}

func (r *SingleClusterReconciler) validateSCClusterState(policy *as.ClientPolicy, ignorablePods []corev1.Pod) error {
Expand All @@ -30,10 +32,51 @@ func (r *SingleClusterReconciler) validateSCClusterState(policy *as.ClientPolicy
return err
}

removedNSes, err := r.removedNamespaces(allHostConns)
ignorableNamespaces, err := r.getIgnorableNamespaces(allHostConns)
if err != nil {
return err
}

return deployment.ValidateSCClusterState(r.Log, allHostConns, policy, removedNSes)
return deployment.ValidateSCClusterState(r.Log, allHostConns, policy, ignorableNamespaces)
}

func (r *SingleClusterReconciler) addedSCNamespaces(nodesNamespaces map[string][]string) []string {
var (
specSCNamespaces = sets.NewString()
newAddedSCNamespaces = sets.NewString()
)

// Look inside only 1st rack. SC namespaces should be same across all the racks
rack := r.aeroCluster.Spec.RackConfig.Racks[0]
nsList := rack.AerospikeConfig.Value["namespaces"].([]interface{})

for _, nsConfInterface := range nsList {
if asdbv1.IsNSSCEnabled(nsConfInterface.(map[string]interface{})) {
specSCNamespaces.Insert(nsConfInterface.(map[string]interface{})["name"].(string))
}
}

// Check if SC namespaces are present in all node's namespaces, if not then it's a new SC namespace
for _, namespaces := range nodesNamespaces {
nodeNamespaces := sets.NewString(namespaces...)
newAddedSCNamespaces.Insert(specSCNamespaces.Difference(nodeNamespaces).List()...)
}

return newAddedSCNamespaces.List()
}

func (r *SingleClusterReconciler) getIgnorableNamespaces(allHostConns []*deployment.HostConn) (
sets.Set[string], error) {
nodesNamespaces, err := deployment.GetClusterNamespaces(r.Log, r.getClientPolicy(), allHostConns)
if err != nil {
return nil, err
}

removedNamespaces := r.removedNamespaces(nodesNamespaces)
addSCNamespaces := r.addedSCNamespaces(nodesNamespaces)

ignorableNamespaces := sets.New[string](removedNamespaces...)
ignorableNamespaces.Insert(addSCNamespaces...)

return ignorableNamespaces, nil
}
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ go 1.19

require (
github.com/aerospike/aerospike-client-go/v6 v6.14.0
github.com/aerospike/aerospike-management-lib v0.0.0-20230919064932-78dffc17b22f
github.com/aerospike/aerospike-management-lib v0.0.0-20231003114052-c91758138c9f
github.com/asaskevich/govalidator v0.0.0-20210307081110-f21760c49a8d
github.com/evanphx/json-patch v4.12.0+incompatible
github.com/go-logr/logr v1.2.3
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
github.com/aerospike/aerospike-client-go/v6 v6.14.0 h1:Z3FcGWJda1sagzdc6Akz4EJ13Pq55Uyn6qtFLrVUDd0=
github.com/aerospike/aerospike-client-go/v6 v6.14.0/go.mod h1:/0Wm81GhMqem+9flWcpazPKoRfjFeG6WrQdXGiMNi0A=
github.com/aerospike/aerospike-management-lib v0.0.0-20230919064932-78dffc17b22f h1:88Kb5XNy9+qS1/m8fr1WsbI8SlXhd2SSSKJXlg7mtz0=
github.com/aerospike/aerospike-management-lib v0.0.0-20230919064932-78dffc17b22f/go.mod h1:GddTWFXSpiOUmuyG32pfjdQT7yjWTGNSZKunEFK4osc=
github.com/aerospike/aerospike-management-lib v0.0.0-20231003114052-c91758138c9f h1:bR4R6TYDV8j2q+jOjQvS0LQ/eHdSHwkJWja+d7WrJuM=
github.com/aerospike/aerospike-management-lib v0.0.0-20231003114052-c91758138c9f/go.mod h1:GddTWFXSpiOUmuyG32pfjdQT7yjWTGNSZKunEFK4osc=
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
Expand Down