Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[KO-293] - Adding support for dynamic config change. #262

Merged
merged 27 commits into from
Apr 10, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
495db95
Adding support for dynamic config change.
tanmayja Dec 11, 2023
3751956
Code structure change
tanmayja Dec 14, 2023
7de7c95
Supporting logging dynamic changes.
tanmayja Dec 20, 2023
f62c38f
Moving dynamic field validation in management lib.
tanmayja Dec 22, 2023
4a31423
version 2, handing diff in more detailed way
tanmayja Dec 27, 2023
8422b3b
Merge branch 'master' into dynamicConfig
tanmayja Jan 3, 2024
0eac6fa
Disallow DC and Namespace addition/removal dynamically from xdr.
tanmayja Jan 9, 2024
7ec3d63
storing status in annotation of pod for error handling
tanmayja Jan 30, 2024
a18475d
Giving pod update permission in cluster role and adding testcases.
tanmayja Feb 2, 2024
d5feeef
Merge branch 'master' into dynamicConfig
tanmayja Feb 5, 2024
c064dfe
Merge branch 'master' into dynamicConfig
tanmayja Feb 6, 2024
05ffaef
adressing comments
tanmayja Feb 12, 2024
5ebb6f2
fixing lint
tanmayja Feb 12, 2024
b73f541
addressing comments
tanmayja Feb 14, 2024
2573a38
addressing comments
tanmayja Feb 15, 2024
ea10a6f
fixing tests
tanmayja Feb 17, 2024
afe0e13
fixing failed pod handling
tanmayja Feb 23, 2024
22a4781
Merge branch 'master' into dynamicConfig
tanmayja Mar 4, 2024
4f28102
Merge branch 'master' into dynamicConfig
tanmayja Mar 22, 2024
c52d173
Merge branch 'master' into dynamicConfig
tanmayja Mar 26, 2024
dd8412f
Using latest lib and disallowing dynamic config change in xdr
tanmayja Mar 26, 2024
5fdde7c
Modifying test case
tanmayja Mar 26, 2024
46ce1ae
address comments
tanmayja Mar 28, 2024
53aa9ca
Merge branch 'master' into dynamicConfig
tanmayja Apr 3, 2024
ce05234
fixing testcases
tanmayja Apr 3, 2024
d612203
addressing comments and adding testcases
tanmayja Apr 10, 2024
3ee87e1
Updating init tag
tanmayja Apr 10, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 21 additions & 8 deletions api/v1/aerospikecluster_mutating_webhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ func (c *AerospikeCluster) setDefaults(asLog logr.Logger) error {

// Set common aerospikeConfig defaults
// Update configMap
if err := c.setDefaultAerospikeConfigs(asLog, *c.Spec.AerospikeConfig, DefaultRackID); err != nil {
if err := c.setDefaultAerospikeConfigs(asLog, *c.Spec.AerospikeConfig, nil); err != nil {
return err
}

Expand Down Expand Up @@ -280,7 +280,7 @@ func (c *AerospikeCluster) updateRacksAerospikeConfigFromGlobal(asLog logr.Logge

// Set defaults in updated rack config
// Above merge function may have overwritten defaults.
if err := c.setDefaultAerospikeConfigs(asLog, AerospikeConfigSpec{Value: m}, rack.ID); err != nil {
if err := c.setDefaultAerospikeConfigs(asLog, AerospikeConfigSpec{Value: m}, &rack.ID); err != nil {
return err
}

Expand All @@ -291,7 +291,7 @@ func (c *AerospikeCluster) updateRacksAerospikeConfigFromGlobal(asLog logr.Logge
}

func (c *AerospikeCluster) setDefaultAerospikeConfigs(asLog logr.Logger,
configSpec AerospikeConfigSpec, rackID int) error {
configSpec AerospikeConfigSpec, rackID *int) error {
config := configSpec.Value

// namespace conf
Expand Down Expand Up @@ -362,7 +362,8 @@ func (n *AerospikeNetworkPolicy) setNetworkNamespace(namespace string) {
// Helper
// *****************************************************************************

func setDefaultNsConf(asLog logr.Logger, configSpec AerospikeConfigSpec, rackEnabledNsList []string, rackID int) error {
func setDefaultNsConf(asLog logr.Logger, configSpec AerospikeConfigSpec,
rackEnabledNsList []string, rackID *int) error {
config := configSpec.Value
// namespace conf
nsConf, ok := config["namespaces"]
Expand Down Expand Up @@ -397,16 +398,28 @@ func setDefaultNsConf(asLog logr.Logger, configSpec AerospikeConfigSpec, rackEna
}

if nsName, ok := nsMap["name"]; ok {
if _, ok := nsName.(string); ok {
if isNameExist(rackEnabledNsList, nsName.(string)) {
nsMap["rack-id"] = rackID
if name, ok := nsName.(string); ok {
if isNameExist(rackEnabledNsList, name) {
// Add rack-id only for rackEnabled namespaces
if rackID != nil {
// Add rack-id only in rack specific config, not in global config
defaultConfs := map[string]interface{}{"rack-id": *rackID}
if err := setDefaultsInConfigMap(
asLog, nsMap, defaultConfs,
); err != nil {
return fmt.Errorf(
"failed to set default aerospikeConfig.namespaces rack config: %v",
err,
)
}
}
} else {
// User may have added this key or may have patched object with new smaller rackEnabledNamespace list
// but left namespace defaults. This key should be removed then only controller will detect
// that some namespace is removed from rackEnabledNamespace list and cluster needs rolling restart
asLog.Info(
"Name aerospikeConfig.namespaces.name not found in rackEnabled namespace list. "+
"Namespace will not have defaultRackID",
"Namespace will not have any rackID",
"nsName", nsName, "rackEnabledNamespaces",
rackEnabledNsList,
)
Expand Down
2 changes: 1 addition & 1 deletion api/v1/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ const (
AerospikeInitContainerRegistryEnvVar = "AEROSPIKE_KUBERNETES_INIT_REGISTRY"
AerospikeInitContainerDefaultRegistry = "docker.io"
AerospikeInitContainerDefaultRegistryNamespace = "tanmayj10"
AerospikeInitContainerDefaultRepoAndTag = "aerospike-kubernetes-init:2.1.0-dev1"
AerospikeInitContainerDefaultRepoAndTag = "aerospike-kubernetes-init:2.1.0-dev3"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Create a new init tag and use that. Init changes are merged.

AerospikeAppLabel = "app"
AerospikeCustomResourceLabel = "aerospike.com/cr"
AerospikeRackIDLabel = "aerospike.com/rack-id"
Expand Down
6 changes: 3 additions & 3 deletions controllers/aero_info_calls.go
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@ func (r *SingleClusterReconciler) setDynamicConfig(
return reconcileError(err)
}

r.Log.Info("Generated dynamic commands", "commands", fmt.Sprintf("%v", asConfCmds), "pod", podName)
r.Log.Info("Generated dynamic config commands", "commands", fmt.Sprintf("%v", asConfCmds), "pod", podName)

if err := deployment.SetConfigCommandsOnHosts(r.Log, r.getClientPolicy(), allHostConns,
[]*deployment.HostConn{host}, asConfCmds); err != nil {
Expand All @@ -362,13 +362,13 @@ func (r *SingleClusterReconciler) setDynamicConfig(
if patchErr := r.patchPodStatus(
context.TODO(), patches,
); patchErr != nil {
return reconcileError(fmt.Errorf("error updating status: %v", patchErr))
return reconcileError(fmt.Errorf("error updating status: %v, dynamic config command error: %v", patchErr, err))
}

return reconcileError(err)
}

if err := r.updatePod(podName); err != nil {
if err := r.updateAerospikeConfInPod(podName); err != nil {
return reconcileError(err)
}
}
Expand Down
12 changes: 6 additions & 6 deletions controllers/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,12 +112,12 @@ func (r *SingleClusterReconciler) getRollingRestartTypeMap(rackState *RackState,
return nil, nil, err
}

v, err := lib.CompareVersions(version, "7.0.0")
v, err := lib.CompareVersions(version, "6.0.0")
if err != nil {
return nil, nil, err
}

// If version >= 7.0.0, then we can update config dynamically.
// If version >= 6.0.0, then we can update config dynamically.
if v >= 0 {
// If dynamic commands have failed in previous retry, then we should not try to update config dynamically.
if !podStatus.DynamicConfigFailed {
Expand All @@ -128,7 +128,7 @@ func (r *SingleClusterReconciler) getRollingRestartTypeMap(rackState *RackState,
}
}
} else {
r.Log.Info("Dynamic config change not supported for version < 7.0.0", "currentVersion", version)
r.Log.Info("Dynamic config change not supported for version < 6.0.0", "currentVersion", version)
}
}

Expand Down Expand Up @@ -394,14 +394,14 @@ func (r *SingleClusterReconciler) restartPods(
return reconcileSuccess()
}

func (r *SingleClusterReconciler) updatePod(podName string) error {
r.Log.Info("Updating pod", "pod", podName)
func (r *SingleClusterReconciler) updateAerospikeConfInPod(podName string) error {
r.Log.Info("Updating aerospike config file in pod", "pod", podName)

if err := r.restartASDOrUpdateAerospikeConf(podName, noRestartUpdateConf); err != nil {
return err
}

r.Log.V(1).Info("Pod Updated", "podName", podName)
r.Log.V(1).Info("Updated aerospike config file in pod", "podName", podName)

return nil
}
Expand Down
46 changes: 30 additions & 16 deletions controllers/rack.go
Original file line number Diff line number Diff line change
Expand Up @@ -397,14 +397,13 @@ func (r *SingleClusterReconciler) upgradeOrRollingRestartRack(found *appsv1.Stat
return found, res
}
} else {
var needRollingRestartRack, needDynamicUpdateRack, restartTypeMap,
dynamicConfDiffPerPod, nErr = r.needRollingRestartRack(rackState, ignorablePodNames)
var rollingRestartInfo, nErr = r.getRollingRestartInfo(rackState, ignorablePodNames)
if nErr != nil {
return found, reconcileError(nErr)
}

if needRollingRestartRack {
found, res = r.rollingRestartRack(found, rackState, ignorablePodNames, restartTypeMap, failedPods)
if rollingRestartInfo.needRestart {
found, res = r.rollingRestartRack(found, rackState, ignorablePodNames, rollingRestartInfo.restartTypeMap, failedPods)
if !res.isSuccess {
if res.err != nil {
r.Log.Error(
Expand All @@ -424,8 +423,9 @@ func (r *SingleClusterReconciler) upgradeOrRollingRestartRack(found *appsv1.Stat
}
}

if len(failedPods) == 0 && needDynamicUpdateRack {
res = r.updateDynamicConfig(rackState, ignorablePodNames, restartTypeMap, dynamicConfDiffPerPod)
if len(failedPods) == 0 && rollingRestartInfo.needUpdateConf {
res = r.updateDynamicConfig(rackState, ignorablePodNames,
rollingRestartInfo.restartTypeMap, rollingRestartInfo.dynamicConfDiffPerPod)
if !res.isSuccess {
if res.err != nil {
r.Log.Error(
Expand Down Expand Up @@ -469,8 +469,8 @@ func (r *SingleClusterReconciler) updateDynamicConfig(rackState *RackState,
r.Log.Info("Update dynamic config in Aerospike pods")

r.Recorder.Eventf(
r.aeroCluster, corev1.EventTypeNormal, "RackDynamicUpdate",
"[rack-%d] Started Dynamic update", rackState.Rack.ID,
r.aeroCluster, corev1.EventTypeNormal, "DynamicConfigUpdate",
"[rack-%d] Started dynamic config update", rackState.Rack.ID,
)

var (
Expand Down Expand Up @@ -504,8 +504,8 @@ func (r *SingleClusterReconciler) updateDynamicConfig(rackState *RackState,
}

r.Recorder.Eventf(
sud82 marked this conversation as resolved.
Show resolved Hide resolved
r.aeroCluster, corev1.EventTypeNormal, "RackDynamicUpdate",
"[rack-%d] Finished Dynamic update", rackState.Rack.ID,
r.aeroCluster, corev1.EventTypeNormal, "DynamicConfigUpdate",
"[rack-%d] Finished Dynamic config update", rackState.Rack.ID,
)

return reconcileSuccess()
Expand Down Expand Up @@ -1215,15 +1215,22 @@ func (r *SingleClusterReconciler) handleK8sNodeBlockListPods(statefulSet *appsv1
return statefulSet, reconcileSuccess()
}

func (r *SingleClusterReconciler) needRollingRestartRack(rackState *RackState, ignorablePodNames sets.Set[string]) (
needRestart, needUpdateConf bool, restartTypeMap map[string]RestartType,
dynamicConfDiffPerPod map[string]asconfig.DynamicConfigMap, err error,
type rollingRestartInfo struct {
restartTypeMap map[string]RestartType
dynamicConfDiffPerPod map[string]asconfig.DynamicConfigMap
needRestart, needUpdateConf bool
}

func (r *SingleClusterReconciler) getRollingRestartInfo(rackState *RackState, ignorablePodNames sets.Set[string]) (
info *rollingRestartInfo, err error,
) {
restartTypeMap, dynamicConfDiffPerPod, err = r.getRollingRestartTypeMap(rackState, ignorablePodNames)
restartTypeMap, dynamicConfDiffPerPod, err := r.getRollingRestartTypeMap(rackState, ignorablePodNames)
if err != nil {
return needRestart, needUpdateConf, nil, nil, err
return nil, err
}

needRestart, needUpdateConf := false, false

for _, restartType := range restartTypeMap {
switch restartType {
case noRestart:
Expand All @@ -1235,7 +1242,14 @@ func (r *SingleClusterReconciler) needRollingRestartRack(rackState *RackState, i
}
}

return needRestart, needUpdateConf, restartTypeMap, dynamicConfDiffPerPod, nil
info = &rollingRestartInfo{
needRestart: needRestart,
needUpdateConf: needUpdateConf,
restartTypeMap: restartTypeMap,
dynamicConfDiffPerPod: dynamicConfDiffPerPod,
}

return info, nil
}

func (r *SingleClusterReconciler) isRackUpgradeNeeded(rackID int, ignorablePodNames sets.Set[string]) (
Expand Down
11 changes: 7 additions & 4 deletions controllers/reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -463,10 +463,13 @@ func (r *SingleClusterReconciler) updateAccessControlStatus() error {
return err
}

// AerospikeAccessControl
statusAerospikeAccessControl := lib.DeepCopy(
r.aeroCluster.Spec.AerospikeAccessControl,
).(*asdbv1.AerospikeAccessControlSpec)
var statusAerospikeAccessControl *asdbv1.AerospikeAccessControlSpec
if r.aeroCluster.Spec.AerospikeAccessControl != nil {
// AerospikeAccessControl
statusAerospikeAccessControl = lib.DeepCopy(
r.aeroCluster.Spec.AerospikeAccessControl,
).(*asdbv1.AerospikeAccessControlSpec)
}

newAeroCluster.Status.AerospikeClusterStatusSpec.AerospikeAccessControl = statusAerospikeAccessControl

Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ go 1.21
toolchain go1.21.8

require (
github.com/aerospike/aerospike-management-lib v1.2.1-0.20240325134810-f8046fe9872e
github.com/aerospike/aerospike-management-lib v1.3.1-0.20240404063536-2adfbedf9687
github.com/asaskevich/govalidator v0.0.0-20210307081110-f21760c49a8d
github.com/evanphx/json-patch v4.12.0+incompatible
github.com/go-logr/logr v1.3.0
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ github.com/aerospike/aerospike-client-go/v7 v7.1.0 h1:yvCTKdbpqZxHvv7sWsFHV1j49j
github.com/aerospike/aerospike-client-go/v7 v7.1.0/go.mod h1:AkHiKvCbqa1c16gCNGju3c5X/yzwLVvblNczqjxNwNk=
github.com/aerospike/aerospike-management-lib v1.2.1-0.20240325134810-f8046fe9872e h1:Q/AfYe++0ouO5csLS8l99kCQqJJvDKlfHwhuWbECpaQ=
github.com/aerospike/aerospike-management-lib v1.2.1-0.20240325134810-f8046fe9872e/go.mod h1:E4dk798IikCp9a8fugpYoeQVIXuvdxogHvt6sKhaORQ=
github.com/aerospike/aerospike-management-lib v1.3.1-0.20240404063536-2adfbedf9687 h1:d7oDvHmiKhq4rzcD/w3z9tP3wH0+iaDvxKDk3IYuqeU=
github.com/aerospike/aerospike-management-lib v1.3.1-0.20240404063536-2adfbedf9687/go.mod h1:E4dk798IikCp9a8fugpYoeQVIXuvdxogHvt6sKhaORQ=
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio=
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs=
github.com/asaskevich/govalidator v0.0.0-20210307081110-f21760c49a8d h1:Byv0BzEl3/e6D5CLfI0j/7hiIEtvGVFPCZ7Ei2oq8iQ=
Expand Down
80 changes: 79 additions & 1 deletion test/cluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,68 @@ var _ = Describe(
ScaleDownWithMigrateFillDelay(ctx)
},
)
Context(
"UpdateClusterPre600", func() {
UpdateClusterPre600(ctx)
},
)
},
)

func UpdateClusterPre600(ctx goctx.Context) {
Context(
"UpdateClusterPre600", func() {
clusterNamespacedName := getNamespacedName(
"deploy-cluster-pre6", namespace,
)

BeforeEach(
func() {
image := fmt.Sprintf(
"aerospike/aerospike-server-enterprise:%s", pre6Version,
)
aeroCluster, err := getAeroClusterConfig(
clusterNamespacedName, image,
)
Expect(err).ToNot(HaveOccurred())

err = deployCluster(k8sClient, ctx, aeroCluster)
Expect(err).ToNot(HaveOccurred())
},
)

AfterEach(
func() {
aeroCluster, err := getCluster(
k8sClient, ctx, clusterNamespacedName,
)
Expect(err).ToNot(HaveOccurred())

_ = deleteCluster(k8sClient, ctx, aeroCluster)
},
)

It(
"UpdateReplicationFactor: should fail for updating namespace replication-factor on server"+
"before 6.0.0. Cannot be updated", func() {
aeroCluster, err := getCluster(k8sClient, ctx, clusterNamespacedName)
Expect(err).ToNot(HaveOccurred())

namespaceConfig :=
aeroCluster.Spec.AerospikeConfig.Value["namespaces"].([]interface{})[0].(map[string]interface{})
namespaceConfig["replication-factor"] = 5
aeroCluster.Spec.AerospikeConfig.Value["namespaces"].([]interface{})[0] = namespaceConfig

err = k8sClient.Update(
ctx, aeroCluster,
)
Expect(err).Should(HaveOccurred())
},
)
},
)
}

func ScaleDownWithMigrateFillDelay(ctx goctx.Context) {
Context(
"ScaleDownWithMigrateFillDelay", func() {
Expand Down Expand Up @@ -920,6 +979,24 @@ func UpdateClusterTest(ctx goctx.Context) {
)
Expect(err).ToNot(HaveOccurred())

By("UpdateReplicationFactor: should update namespace replication-factor on non-SC namespace")

aeroCluster, err := getCluster(
k8sClient, ctx,
clusterNamespacedName,
)
Expect(err).ToNot(HaveOccurred())
nsList := aeroCluster.Spec.AerospikeConfig.Value["namespaces"].([]interface{})
namespaceConfig := nsList[len(nsList)-1].(map[string]interface{})
// aeroCluster.Spec.AerospikeConfig.Value["namespaces"].([]interface{})[0].(map[string]interface{})
namespaceConfig["replication-factor"] = 3
aeroCluster.Spec.AerospikeConfig.Value["namespaces"].([]interface{})[len(nsList)-1] = namespaceConfig

err = k8sClient.Update(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use update with wait. The current call will only check for validation and not actual dynamic setting of rf

ctx, aeroCluster,
)
Expect(err).ToNot(HaveOccurred())

By("Scaling up along with modifying Namespace storage Dynamically")

err = scaleUpClusterTestWithNSDeviceHandling(
Expand Down Expand Up @@ -1067,7 +1144,8 @@ func UpdateClusterTest(ctx goctx.Context) {
Context(
"Namespace", func() {
It(
"UpdateReplicationFactor: should fail for updating namespace replication-factor. Cannot be updated",
"UpdateReplicationFactor: should fail for updating namespace"+
"replication-factor on SC namespace. Cannot be updated",
func() {
aeroCluster, err := getCluster(
k8sClient, ctx,
Expand Down
4 changes: 1 addition & 3 deletions test/services_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,9 +129,7 @@ func createLoadBalancer() *asdbv1.LoadBalancerSpec {
),
)

result := lib.DeepCopy(&lb).(*asdbv1.LoadBalancerSpec)

return result
return lib.DeepCopy(&lb).(*asdbv1.LoadBalancerSpec)
}

func loadBalancerName(aeroCluster *asdbv1.AerospikeCluster) types.NamespacedName {
Expand Down
Loading