Skip to content

Commit

Permalink
Added EnableDynamicUpdate flag to enable dynamic update flow
Browse files Browse the repository at this point in the history
  • Loading branch information
abhishekdwivedi3060 committed Apr 12, 2024
1 parent 27f19b6 commit 23e7901
Show file tree
Hide file tree
Showing 11 changed files with 90 additions and 19 deletions.
19 changes: 17 additions & 2 deletions api/v1/aerospikecluster_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,15 @@ const (
AerospikeClusterError AerospikeClusterPhase = "Error"
)

// +kubebuilder:validation:Enum=Failed;PartiallyFailed;""
type DynamicConfigUpdateStatus string

const (
Failed DynamicConfigUpdateStatus = "Failed"
PartiallyFailed DynamicConfigUpdateStatus = "PartiallyFailed"
Empty DynamicConfigUpdateStatus = ""
)

// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized.

// AerospikeClusterSpec defines the desired state of AerospikeCluster
Expand Down Expand Up @@ -77,6 +86,11 @@ type AerospikeClusterSpec struct { //nolint:govet // for readability
// +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Aerospike Server Configuration"
// +kubebuilder:pruning:PreserveUnknownFields
AerospikeConfig *AerospikeConfigSpec `json:"aerospikeConfig"`
// EnableDynamicUpdate enables dynamic config update flow of the operator.
// If enabled, operator will try to update the Aerospike config dynamically.
// In case of inconsistent state during dynamic config update, operator falls back to rolling restart.
// +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="EnableDynamicUpdate"
EnableDynamicUpdate *bool `json:"enableDynamicUpdate,omitempty"`
// ValidationPolicy controls validation of the Aerospike cluster resource.
// +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Validation Policy"
ValidationPolicy *ValidationPolicySpec `json:"validationPolicy,omitempty"`
Expand Down Expand Up @@ -872,8 +886,9 @@ type AerospikePodStatus struct { //nolint:govet // for readability
// PodSpecHash is ripemd160 hash of PodSpec used by this pod
PodSpecHash string `json:"podSpecHash"`

// DynamicConfigFailed is true if aerospike config change failed to apply dynamically.
DynamicConfigFailed bool `json:"dynamicConfigFailed,omitempty"`
// DynamicConfigUpdateStatus is the status of dynamic config update operation.
// Empty "" status means successful update.
DynamicConfigUpdateStatus DynamicConfigUpdateStatus `json:"dynamicConfigUpdateStatus,omitempty"`

// IsSecurityEnabled is true if security is enabled in the pod
IsSecurityEnabled bool `json:"isSecurityEnabled"`
Expand Down
4 changes: 2 additions & 2 deletions api/v1/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ const (
AerospikeInitContainerName = "aerospike-init"
AerospikeInitContainerRegistryEnvVar = "AEROSPIKE_KUBERNETES_INIT_REGISTRY"
AerospikeInitContainerDefaultRegistry = "docker.io"
AerospikeInitContainerDefaultRegistryNamespace = "aerospike"
AerospikeInitContainerDefaultRepoAndTag = "aerospike-kubernetes-init:2.2.0-dev2"
AerospikeInitContainerDefaultRegistryNamespace = "abhishekdwivedi3060"
AerospikeInitContainerDefaultRepoAndTag = "aerospike-kubernetes-init:2.2.0-dev3"
AerospikeAppLabel = "app"
AerospikeCustomResourceLabel = "aerospike.com/cr"
AerospikeRackIDLabel = "aerospike.com/rack-id"
Expand Down
5 changes: 5 additions & 0 deletions api/v1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 15 additions & 4 deletions config/crd/bases/asdb.aerospike.com_aerospikeclusters.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,12 @@ spec:
- customInterface
type: string
type: object
enableDynamicUpdate:
description: EnableDynamicUpdate enables dynamic config update flow
of the operator. If enabled, operator will try to update the Aerospike
config dynamically. In case of inconsistent state during dynamic
config update, operator falls back to rolling restart.
type: boolean
image:
description: Aerospike server image
type: string
Expand Down Expand Up @@ -14238,10 +14244,15 @@ spec:
items:
type: string
type: array
dynamicConfigFailed:
description: DynamicConfigFailed is true if aerospike config
change failed to apply dynamically.
type: boolean
dynamicConfigUpdateStatus:
description: DynamicConfigUpdateStatus is the status of dynamic
config update operation. Empty "" status means successful
update.
enum:
- Failed
- PartiallyFailed
- ""
type: string
hostExternalIP:
description: HostExternalIP of the K8s host this pod is scheduled
on.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,12 @@ spec:
the Aerospike cluster.
displayName: Aerospike Network Policy
path: aerospikeNetworkPolicy
- description: EnableDynamicUpdate enables dynamic config update flow of the
operator. If enabled, operator will try to update the Aerospike config dynamically.
In case of inconsistent state during dynamic config update, operator falls
back to rolling restart.
displayName: EnableDynamicUpdate
path: enableDynamicUpdate
- description: Aerospike server image
displayName: Server Image
path: image
Expand Down
21 changes: 18 additions & 3 deletions controllers/aero_info_calls.go
Original file line number Diff line number Diff line change
Expand Up @@ -348,14 +348,29 @@ func (r *SingleClusterReconciler) setDynamicConfig(

r.Log.Info("Generated dynamic config commands", "commands", fmt.Sprintf("%v", asConfCmds), "pod", podName)

if err := deployment.SetConfigCommandsOnHosts(r.Log, r.getClientPolicy(), allHostConns,
if cmdStatus, err := deployment.SetConfigCommandsOnHosts(r.Log, r.getClientPolicy(), allHostConns,
[]*deployment.HostConn{host}, asConfCmds); err != nil {
errorStatus := asdbv1.Failed

// Calculate the number of passed commands and based on that set Failed or PartiallyFailed status.
var passedCounter int

for _, passed := range cmdStatus {
if passed {
passedCounter++
}
}

if passedCounter != 0 {
errorStatus = asdbv1.PartiallyFailed
}

var patches []jsonpatch.PatchOperation

patch := jsonpatch.PatchOperation{
Operation: "replace",
Path: "/status/pods/" + podName + "/dynamicConfigFailed",
Value: true,
Path: "/status/pods/" + podName + "/dynamicConfigUpdateStatus",
Value: errorStatus,
}
patches = append(patches, patch)

Expand Down
10 changes: 7 additions & 3 deletions controllers/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,11 @@ func (r *SingleClusterReconciler) getRollingRestartTypeMap(rackState *RackState,

// If version >= 6.0.0, then we can update config dynamically.
if v >= 0 {
// If dynamic commands have failed in previous retry, then we should not try to update config dynamically.
if !podStatus.DynamicConfigFailed {
// If EnableDynamicUpdate is set and dynamic config command exec partially failed in previous try
// then skip dynamic config update and fall back to rolling restart.
// Continue with dynamic config update in case of Failed DynamicConfigUpdateStatus
if asdbv1.GetBool(r.aeroCluster.Spec.EnableDynamicUpdate) &&
podStatus.DynamicConfigUpdateStatus != asdbv1.PartiallyFailed {
// Fetching all dynamic config change.
dynamicConfDiffPerPod[pods[idx].Name], err = r.handleDynamicConfigChange(rackState, pods[idx], version)
if err != nil {
Expand All @@ -135,7 +138,8 @@ func (r *SingleClusterReconciler) getRollingRestartTypeMap(rackState *RackState,
restartTypeMap[pods[idx].Name] = r.getRollingRestartTypePod(rackState, pods[idx], confMap, addedNSDevices,
len(dynamicConfDiffPerPod[pods[idx].Name]) > 0)

if podStatus.DynamicConfigFailed {
// Fallback to rolling restart in case of partial failure to recover with the desired Aerospike config
if podStatus.DynamicConfigUpdateStatus == asdbv1.PartiallyFailed {
restartTypeMap[pods[idx].Name] = mergeRestartType(restartTypeMap[pods[idx].Name], quickRestart)
}
}
Expand Down
2 changes: 2 additions & 0 deletions controllers/rack.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,8 @@ func (r *SingleClusterReconciler) reconcileRacks() reconcileResult {
}

r.Log.Info("Restarted the failed pods in the Rack", "rackID", state.Rack.ID, "failedPods", failedPods)
// Requeue after 1 second to fetch latest CR object with updated pod status
return reconcileRequeueAfter(1)
}
}

Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ go 1.21
toolchain go1.21.8

require (
github.com/aerospike/aerospike-management-lib v1.3.1-0.20240404063536-2adfbedf9687
github.com/aerospike/aerospike-management-lib v1.3.1-0.20240412042741-c7d631bbfa43
github.com/asaskevich/govalidator v0.0.0-20210307081110-f21760c49a8d
github.com/evanphx/json-patch v4.12.0+incompatible
github.com/go-logr/logr v1.3.0
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ github.com/aerospike/aerospike-management-lib v1.2.1-0.20240325134810-f8046fe987
github.com/aerospike/aerospike-management-lib v1.2.1-0.20240325134810-f8046fe9872e/go.mod h1:E4dk798IikCp9a8fugpYoeQVIXuvdxogHvt6sKhaORQ=
github.com/aerospike/aerospike-management-lib v1.3.1-0.20240404063536-2adfbedf9687 h1:d7oDvHmiKhq4rzcD/w3z9tP3wH0+iaDvxKDk3IYuqeU=
github.com/aerospike/aerospike-management-lib v1.3.1-0.20240404063536-2adfbedf9687/go.mod h1:E4dk798IikCp9a8fugpYoeQVIXuvdxogHvt6sKhaORQ=
github.com/aerospike/aerospike-management-lib v1.3.1-0.20240412042741-c7d631bbfa43 h1:7xdFCD3e1rdy5GiznyTrxinShah6cNJAMZZAphGEPZs=
github.com/aerospike/aerospike-management-lib v1.3.1-0.20240412042741-c7d631bbfa43/go.mod h1:E4dk798IikCp9a8fugpYoeQVIXuvdxogHvt6sKhaORQ=
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio=
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs=
github.com/asaskevich/govalidator v0.0.0-20210307081110-f21760c49a8d h1:Byv0BzEl3/e6D5CLfI0j/7hiIEtvGVFPCZ7Ei2oq8iQ=
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,12 @@ spec:
- customInterface
type: string
type: object
enableDynamicUpdate:
description: EnableDynamicUpdate enables dynamic config update flow
of the operator. If enabled, operator will try to update the Aerospike
config dynamically. In case of inconsistent state during dynamic
config update, operator falls back to rolling restart.
type: boolean
image:
description: Aerospike server image
type: string
Expand Down Expand Up @@ -14238,10 +14244,15 @@ spec:
items:
type: string
type: array
dynamicConfigFailed:
description: DynamicConfigFailed is true if aerospike config
change failed to apply dynamically.
type: boolean
dynamicConfigUpdateStatus:
description: DynamicConfigUpdateStatus is the status of dynamic
config update operation. Empty "" status means successful
update.
enum:
- Failed
- PartiallyFailed
- ""
type: string
hostExternalIP:
description: HostExternalIP of the K8s host this pod is scheduled
on.
Expand Down

0 comments on commit 23e7901

Please sign in to comment.