Skip to content

Commit

Permalink
Added EnableDynamicUpdate flag to enable dynamic update flow (#284)
Browse files Browse the repository at this point in the history
* Added EnableDynamicUpdate flag to enable dynamic update flow
  • Loading branch information
abhishekdwivedi3060 authored Apr 15, 2024
1 parent 6285ed9 commit f004170
Show file tree
Hide file tree
Showing 11 changed files with 83 additions and 22 deletions.
19 changes: 17 additions & 2 deletions api/v1/aerospikecluster_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,15 @@ const (
AerospikeClusterError AerospikeClusterPhase = "Error"
)

// +kubebuilder:validation:Enum=Failed;PartiallyFailed;""
type DynamicConfigUpdateStatus string

const (
Failed DynamicConfigUpdateStatus = "Failed"
PartiallyFailed DynamicConfigUpdateStatus = "PartiallyFailed"
Empty DynamicConfigUpdateStatus = ""
)

// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized.

// AerospikeClusterSpec defines the desired state of AerospikeCluster
Expand Down Expand Up @@ -77,6 +86,11 @@ type AerospikeClusterSpec struct { //nolint:govet // for readability
// +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Aerospike Server Configuration"
// +kubebuilder:pruning:PreserveUnknownFields
AerospikeConfig *AerospikeConfigSpec `json:"aerospikeConfig"`
// EnableDynamicConfigUpdate enables dynamic config update flow of the operator.
// If enabled, operator will try to update the Aerospike config dynamically.
// In case of inconsistent state during dynamic config update, operator falls back to rolling restart.
// +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Enable Dynamic Config Update"
EnableDynamicConfigUpdate *bool `json:"enableDynamicConfigUpdate,omitempty"`
// ValidationPolicy controls validation of the Aerospike cluster resource.
// +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Validation Policy"
ValidationPolicy *ValidationPolicySpec `json:"validationPolicy,omitempty"`
Expand Down Expand Up @@ -872,8 +886,9 @@ type AerospikePodStatus struct { //nolint:govet // for readability
// PodSpecHash is ripemd160 hash of PodSpec used by this pod
PodSpecHash string `json:"podSpecHash"`

// DynamicConfigFailed is true if aerospike config change failed to apply dynamically.
DynamicConfigFailed bool `json:"dynamicConfigFailed,omitempty"`
// DynamicConfigUpdateStatus is the status of dynamic config update operation.
// Empty "" status means successful update.
DynamicConfigUpdateStatus DynamicConfigUpdateStatus `json:"dynamicConfigUpdateStatus,omitempty"`

// IsSecurityEnabled is true if security is enabled in the pod
IsSecurityEnabled bool `json:"isSecurityEnabled"`
Expand Down
2 changes: 1 addition & 1 deletion api/v1/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ const (
AerospikeInitContainerRegistryEnvVar = "AEROSPIKE_KUBERNETES_INIT_REGISTRY"
AerospikeInitContainerDefaultRegistry = "docker.io"
AerospikeInitContainerDefaultRegistryNamespace = "aerospike"
AerospikeInitContainerDefaultRepoAndTag = "aerospike-kubernetes-init:2.2.0-dev2"
AerospikeInitContainerDefaultRepoAndTag = "aerospike-kubernetes-init:2.2.0-dev3"
AerospikeAppLabel = "app"
AerospikeCustomResourceLabel = "aerospike.com/cr"
AerospikeRackIDLabel = "aerospike.com/rack-id"
Expand Down
5 changes: 5 additions & 0 deletions api/v1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 15 additions & 4 deletions config/crd/bases/asdb.aerospike.com_aerospikeclusters.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,12 @@ spec:
- customInterface
type: string
type: object
enableDynamicConfigUpdate:
description: EnableDynamicConfigUpdate enables dynamic config update
flow of the operator. If enabled, operator will try to update the
Aerospike config dynamically. In case of inconsistent state during
dynamic config update, operator falls back to rolling restart.
type: boolean
image:
description: Aerospike server image
type: string
Expand Down Expand Up @@ -14238,10 +14244,15 @@ spec:
items:
type: string
type: array
dynamicConfigFailed:
description: DynamicConfigFailed is true if aerospike config
change failed to apply dynamically.
type: boolean
dynamicConfigUpdateStatus:
description: DynamicConfigUpdateStatus is the status of dynamic
config update operation. Empty "" status means successful
update.
enum:
- Failed
- PartiallyFailed
- ""
type: string
hostExternalIP:
description: HostExternalIP of the K8s host this pod is scheduled
on.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,12 @@ spec:
the Aerospike cluster.
displayName: Aerospike Network Policy
path: aerospikeNetworkPolicy
- description: EnableDynamicUpdate enables dynamic config update flow of the
operator. If enabled, operator will try to update the Aerospike config dynamically.
In case of inconsistent state during dynamic config update, operator falls
back to rolling restart.
displayName: EnableDynamicUpdate
path: enableDynamicUpdate
- description: Aerospike server image
displayName: Server Image
path: image
Expand Down
13 changes: 10 additions & 3 deletions controllers/aero_info_calls.go
Original file line number Diff line number Diff line change
Expand Up @@ -348,14 +348,21 @@ func (r *SingleClusterReconciler) setDynamicConfig(

r.Log.Info("Generated dynamic config commands", "commands", fmt.Sprintf("%v", asConfCmds), "pod", podName)

if err := deployment.SetConfigCommandsOnHosts(r.Log, r.getClientPolicy(), allHostConns,
if succeededCmds, err := deployment.SetConfigCommandsOnHosts(r.Log, r.getClientPolicy(), allHostConns,
[]*deployment.HostConn{host}, asConfCmds); err != nil {
errorStatus := asdbv1.Failed

// if the len of succeededCmds is not 0 along with error, then it is partially failed.
if len(succeededCmds) != 0 {
errorStatus = asdbv1.PartiallyFailed
}

var patches []jsonpatch.PatchOperation

patch := jsonpatch.PatchOperation{
Operation: "replace",
Path: "/status/pods/" + podName + "/dynamicConfigFailed",
Value: true,
Path: "/status/pods/" + podName + "/dynamicConfigUpdateStatus",
Value: errorStatus,
}
patches = append(patches, patch)

Expand Down
10 changes: 7 additions & 3 deletions controllers/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,11 @@ func (r *SingleClusterReconciler) getRollingRestartTypeMap(rackState *RackState,

// If version >= 6.0.0, then we can update config dynamically.
if v >= 0 {
// If dynamic commands have failed in previous retry, then we should not try to update config dynamically.
if !podStatus.DynamicConfigFailed {
// If EnableDynamicConfigUpdate is set and dynamic config command exec partially failed in previous try
// then skip dynamic config update and fall back to rolling restart.
// Continue with dynamic config update in case of Failed DynamicConfigUpdateStatus
if asdbv1.GetBool(r.aeroCluster.Spec.EnableDynamicConfigUpdate) &&
podStatus.DynamicConfigUpdateStatus != asdbv1.PartiallyFailed {
// Fetching all dynamic config change.
dynamicConfDiffPerPod[pods[idx].Name], err = r.handleDynamicConfigChange(rackState, pods[idx], version)
if err != nil {
Expand All @@ -135,7 +138,8 @@ func (r *SingleClusterReconciler) getRollingRestartTypeMap(rackState *RackState,
restartTypeMap[pods[idx].Name] = r.getRollingRestartTypePod(rackState, pods[idx], confMap, addedNSDevices,
len(dynamicConfDiffPerPod[pods[idx].Name]) > 0)

if podStatus.DynamicConfigFailed {
// Fallback to rolling restart in case of partial failure to recover with the desired Aerospike config
if podStatus.DynamicConfigUpdateStatus == asdbv1.PartiallyFailed {
restartTypeMap[pods[idx].Name] = mergeRestartType(restartTypeMap[pods[idx].Name], quickRestart)
}
}
Expand Down
2 changes: 2 additions & 0 deletions controllers/rack.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,8 @@ func (r *SingleClusterReconciler) reconcileRacks() reconcileResult {
}

r.Log.Info("Restarted the failed pods in the Rack", "rackID", state.Rack.ID, "failedPods", failedPods)
// Requeue after 1 second to fetch latest CR object with updated pod status
return reconcileRequeueAfter(1)
}
}

Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ go 1.21
toolchain go1.21.8

require (
github.com/aerospike/aerospike-management-lib v1.3.1-0.20240404063536-2adfbedf9687
github.com/aerospike/aerospike-management-lib v1.3.1-0.20240412130613-2bc07a8654c3
github.com/asaskevich/govalidator v0.0.0-20210307081110-f21760c49a8d
github.com/evanphx/json-patch v4.12.0+incompatible
github.com/go-logr/logr v1.3.0
Expand Down
8 changes: 4 additions & 4 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
github.com/aerospike/aerospike-client-go/v7 v7.1.0 h1:yvCTKdbpqZxHvv7sWsFHV1j49jZcC8yXRooWsDFqKtA=
github.com/aerospike/aerospike-client-go/v7 v7.1.0/go.mod h1:AkHiKvCbqa1c16gCNGju3c5X/yzwLVvblNczqjxNwNk=
github.com/aerospike/aerospike-management-lib v1.2.1-0.20240325134810-f8046fe9872e h1:Q/AfYe++0ouO5csLS8l99kCQqJJvDKlfHwhuWbECpaQ=
github.com/aerospike/aerospike-management-lib v1.2.1-0.20240325134810-f8046fe9872e/go.mod h1:E4dk798IikCp9a8fugpYoeQVIXuvdxogHvt6sKhaORQ=
github.com/aerospike/aerospike-management-lib v1.3.1-0.20240404063536-2adfbedf9687 h1:d7oDvHmiKhq4rzcD/w3z9tP3wH0+iaDvxKDk3IYuqeU=
github.com/aerospike/aerospike-management-lib v1.3.1-0.20240404063536-2adfbedf9687/go.mod h1:E4dk798IikCp9a8fugpYoeQVIXuvdxogHvt6sKhaORQ=
github.com/aerospike/aerospike-management-lib v1.3.1-0.20240412042741-c7d631bbfa43 h1:7xdFCD3e1rdy5GiznyTrxinShah6cNJAMZZAphGEPZs=
github.com/aerospike/aerospike-management-lib v1.3.1-0.20240412042741-c7d631bbfa43/go.mod h1:E4dk798IikCp9a8fugpYoeQVIXuvdxogHvt6sKhaORQ=
github.com/aerospike/aerospike-management-lib v1.3.1-0.20240412130613-2bc07a8654c3 h1:buzjr9iDYSuI0jy/A8366pfPXalBP3Gke3MN+rO/Vzo=
github.com/aerospike/aerospike-management-lib v1.3.1-0.20240412130613-2bc07a8654c3/go.mod h1:E4dk798IikCp9a8fugpYoeQVIXuvdxogHvt6sKhaORQ=
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio=
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs=
github.com/asaskevich/govalidator v0.0.0-20210307081110-f21760c49a8d h1:Byv0BzEl3/e6D5CLfI0j/7hiIEtvGVFPCZ7Ei2oq8iQ=
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,12 @@ spec:
- customInterface
type: string
type: object
enableDynamicConfigUpdate:
description: EnableDynamicConfigUpdate enables dynamic config update
flow of the operator. If enabled, operator will try to update the
Aerospike config dynamically. In case of inconsistent state during
dynamic config update, operator falls back to rolling restart.
type: boolean
image:
description: Aerospike server image
type: string
Expand Down Expand Up @@ -14238,10 +14244,15 @@ spec:
items:
type: string
type: array
dynamicConfigFailed:
description: DynamicConfigFailed is true if aerospike config
change failed to apply dynamically.
type: boolean
dynamicConfigUpdateStatus:
description: DynamicConfigUpdateStatus is the status of dynamic
config update operation. Empty "" status means successful
update.
enum:
- Failed
- PartiallyFailed
- ""
type: string
hostExternalIP:
description: HostExternalIP of the K8s host this pod is scheduled
on.
Expand Down

0 comments on commit f004170

Please sign in to comment.