diff --git a/.github/workflows/docker-image-release.yaml b/.github/workflows/docker-image-release.yaml new file mode 100644 index 000000000..7d8908aad --- /dev/null +++ b/.github/workflows/docker-image-release.yaml @@ -0,0 +1,60 @@ +name: Release Container Image +on: + push: + tags: + - '[0-9]+.[0-9]+.[0-9]+' + +jobs: + build-and-push: + name: Build and push image + runs-on: ubuntu-latest + env: + IMG_BASE: ${{ github.repository }} + TAG: ${{ github.ref_name }} + steps: + - name: Checkout code + uses: actions/checkout@v3 + with: + submodules: 'true' + + - name: Set up Docker Buildx + id: buildx + uses: docker/setup-buildx-action@v2 + + - name: Login to DockerHub + uses: docker/login-action@v2 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Login to Quay.io + uses: docker/login-action@v2 + with: + registry: quay.io + username: ${{ secrets.QUAY_USERNAME }} + password: ${{ secrets.QUAY_TOKEN }} + + - name: Build and push image on Dockerhub + run: make docker-buildx IMG=${{ env.IMG_BASE }}:${{ env.TAG }} VERSION=${{ env.TAG }} + + - name: Build and push image on Quay + run: make docker-buildx IMG=quay.io/${{ env.IMG_BASE }}:${{ env.TAG }} VERSION=${{ env.TAG }} + + - name: Run Snyk image security scan + uses: snyk/actions/docker@master + continue-on-error: true + id: docker-image-scan + env: + SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }} + with: + image: ${{ env.IMG_BASE }}:${{ env.TAG }} + args: --file=Dockerfile --severity-threshold=high --fail-on=all # fail on vulnerabilities with fix available + + - name: Upload result to GitHub Code Scanning + uses: github/codeql-action/upload-sarif@v2 + with: + sarif_file: snyk.sarif + + - name: Check docker image scan status + if: ${{ steps.docker-image-scan.outcome == 'failure' }} + run: exit 1 \ No newline at end of file diff --git a/Makefile b/Makefile index ded61ce7c..318d5af75 100644 --- a/Makefile +++ b/Makefile @@ -65,7 +65,6 @@ endif # Image URL to use all building/pushing operator manager image targets IMG ?= controller:latest -IMG_TAGS ?= "" # ENVTEST_K8S_VERSION refers to the version of kubebuilder assets to be downloaded by envtest binary. ENVTEST_K8S_VERSION = 1.26 @@ -164,7 +163,7 @@ docker-buildx: ## Build and push docker image for the manager for cross-platform docker-buildx-openshift: ## Build and push docker image for the manager for openshift cross-platform support - docker buildx create --name project-v3-builder docker buildx use project-v3-builder - - docker buildx build --push --no-cache --provenance=false --platform=$(PLATFORMS) --tag ${IMG} --tag ${IMG_TAGS} --build-arg VERSION=$(VERSION) --build-arg USER=1001 . + - docker buildx build --push --no-cache --provenance=false --platform=$(PLATFORMS) --tag ${IMG} --build-arg VERSION=$(VERSION) --build-arg USER=1001 . - docker buildx rm project-v3-builder .PHONY: docker-push diff --git a/api/v1/aerospikecluster_types.go b/api/v1/aerospikecluster_types.go index d5de29b16..27627f043 100644 --- a/api/v1/aerospikecluster_types.go +++ b/api/v1/aerospikecluster_types.go @@ -47,6 +47,15 @@ const ( AerospikeClusterError AerospikeClusterPhase = "Error" ) +// +kubebuilder:validation:Enum=Failed;PartiallyFailed;"" +type DynamicConfigUpdateStatus string + +const ( + Failed DynamicConfigUpdateStatus = "Failed" + PartiallyFailed DynamicConfigUpdateStatus = "PartiallyFailed" + Empty DynamicConfigUpdateStatus = "" +) + // NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized. // AerospikeClusterSpec defines the desired state of AerospikeCluster @@ -77,6 +86,11 @@ type AerospikeClusterSpec struct { //nolint:govet // for readability // +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Aerospike Server Configuration" // +kubebuilder:pruning:PreserveUnknownFields AerospikeConfig *AerospikeConfigSpec `json:"aerospikeConfig"` + // EnableDynamicConfigUpdate enables dynamic config update flow of the operator. + // If enabled, operator will try to update the Aerospike config dynamically. + // In case of inconsistent state during dynamic config update, operator falls back to rolling restart. + // +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Enable Dynamic Config Update" + EnableDynamicConfigUpdate *bool `json:"enableDynamicConfigUpdate,omitempty"` // ValidationPolicy controls validation of the Aerospike cluster resource. // +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Validation Policy" ValidationPolicy *ValidationPolicySpec `json:"validationPolicy,omitempty"` @@ -875,8 +889,9 @@ type AerospikePodStatus struct { //nolint:govet // for readability // PodSpecHash is ripemd160 hash of PodSpec used by this pod PodSpecHash string `json:"podSpecHash"` - // DynamicConfigFailed is true if aerospike config change failed to apply dynamically. - DynamicConfigFailed bool `json:"dynamicConfigFailed,omitempty"` + // DynamicConfigUpdateStatus is the status of dynamic config update operation. + // Empty "" status means successful update. + DynamicConfigUpdateStatus DynamicConfigUpdateStatus `json:"dynamicConfigUpdateStatus,omitempty"` // IsSecurityEnabled is true if security is enabled in the pod IsSecurityEnabled bool `json:"isSecurityEnabled"` diff --git a/api/v1/utils.go b/api/v1/utils.go index f4062b48f..ed5c65040 100644 --- a/api/v1/utils.go +++ b/api/v1/utils.go @@ -63,7 +63,7 @@ const ( AerospikeInitContainerRegistryEnvVar = "AEROSPIKE_KUBERNETES_INIT_REGISTRY" AerospikeInitContainerDefaultRegistry = "docker.io" AerospikeInitContainerDefaultRegistryNamespace = "aerospike" - AerospikeInitContainerDefaultRepoAndTag = "aerospike-kubernetes-init:2.2.0-dev2" + AerospikeInitContainerDefaultRepoAndTag = "aerospike-kubernetes-init:2.2.0-dev3" AerospikeAppLabel = "app" AerospikeCustomResourceLabel = "aerospike.com/cr" AerospikeRackIDLabel = "aerospike.com/rack-id" diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go index d4afc22b4..fad9aac52 100644 --- a/api/v1/zz_generated.deepcopy.go +++ b/api/v1/zz_generated.deepcopy.go @@ -169,6 +169,11 @@ func (in *AerospikeClusterSpec) DeepCopyInto(out *AerospikeClusterSpec) { in, out := &in.AerospikeConfig, &out.AerospikeConfig *out = (*in).DeepCopy() } + if in.EnableDynamicConfigUpdate != nil { + in, out := &in.EnableDynamicConfigUpdate, &out.EnableDynamicConfigUpdate + *out = new(bool) + **out = **in + } if in.ValidationPolicy != nil { in, out := &in.ValidationPolicy, &out.ValidationPolicy *out = new(ValidationPolicySpec) diff --git a/config/crd/bases/asdb.aerospike.com_aerospikeclusters.yaml b/config/crd/bases/asdb.aerospike.com_aerospikeclusters.yaml index 2066ce116..bef8aea20 100644 --- a/config/crd/bases/asdb.aerospike.com_aerospikeclusters.yaml +++ b/config/crd/bases/asdb.aerospike.com_aerospikeclusters.yaml @@ -281,6 +281,12 @@ spec: - customInterface type: string type: object + enableDynamicConfigUpdate: + description: EnableDynamicConfigUpdate enables dynamic config update + flow of the operator. If enabled, operator will try to update the + Aerospike config dynamically. In case of inconsistent state during + dynamic config update, operator falls back to rolling restart. + type: boolean image: description: Aerospike server image type: string @@ -14245,10 +14251,15 @@ spec: items: type: string type: array - dynamicConfigFailed: - description: DynamicConfigFailed is true if aerospike config - change failed to apply dynamically. - type: boolean + dynamicConfigUpdateStatus: + description: DynamicConfigUpdateStatus is the status of dynamic + config update operation. Empty "" status means successful + update. + enum: + - Failed + - PartiallyFailed + - "" + type: string hostExternalIP: description: HostExternalIP of the K8s host this pod is scheduled on. diff --git a/config/manifests/bases/aerospike-kubernetes-operator.clusterserviceversion.yaml b/config/manifests/bases/aerospike-kubernetes-operator.clusterserviceversion.yaml index 64dcdffae..cd2b711bb 100644 --- a/config/manifests/bases/aerospike-kubernetes-operator.clusterserviceversion.yaml +++ b/config/manifests/bases/aerospike-kubernetes-operator.clusterserviceversion.yaml @@ -47,6 +47,12 @@ spec: the Aerospike cluster. displayName: Aerospike Network Policy path: aerospikeNetworkPolicy + - description: EnableDynamicUpdate enables dynamic config update flow of the + operator. If enabled, operator will try to update the Aerospike config dynamically. + In case of inconsistent state during dynamic config update, operator falls + back to rolling restart. + displayName: EnableDynamicUpdate + path: enableDynamicUpdate - description: Aerospike server image displayName: Server Image path: image diff --git a/controllers/aero_info_calls.go b/controllers/aero_info_calls.go index df016e31a..e7537a7eb 100644 --- a/controllers/aero_info_calls.go +++ b/controllers/aero_info_calls.go @@ -348,14 +348,21 @@ func (r *SingleClusterReconciler) setDynamicConfig( r.Log.Info("Generated dynamic config commands", "commands", fmt.Sprintf("%v", asConfCmds), "pod", podName) - if err := deployment.SetConfigCommandsOnHosts(r.Log, r.getClientPolicy(), allHostConns, + if succeededCmds, err := deployment.SetConfigCommandsOnHosts(r.Log, r.getClientPolicy(), allHostConns, []*deployment.HostConn{host}, asConfCmds); err != nil { + errorStatus := asdbv1.Failed + + // if the len of succeededCmds is not 0 along with error, then it is partially failed. + if len(succeededCmds) != 0 { + errorStatus = asdbv1.PartiallyFailed + } + var patches []jsonpatch.PatchOperation patch := jsonpatch.PatchOperation{ Operation: "replace", - Path: "/status/pods/" + podName + "/dynamicConfigFailed", - Value: true, + Path: "/status/pods/" + podName + "/dynamicConfigUpdateStatus", + Value: errorStatus, } patches = append(patches, patch) diff --git a/controllers/pod.go b/controllers/pod.go index 7fe37d1c5..29d9d27e2 100644 --- a/controllers/pod.go +++ b/controllers/pod.go @@ -119,8 +119,11 @@ func (r *SingleClusterReconciler) getRollingRestartTypeMap(rackState *RackState, // If version >= 6.0.0, then we can update config dynamically. if v >= 0 { - // If dynamic commands have failed in previous retry, then we should not try to update config dynamically. - if !podStatus.DynamicConfigFailed { + // If EnableDynamicConfigUpdate is set and dynamic config command exec partially failed in previous try + // then skip dynamic config update and fall back to rolling restart. + // Continue with dynamic config update in case of Failed DynamicConfigUpdateStatus + if asdbv1.GetBool(r.aeroCluster.Spec.EnableDynamicConfigUpdate) && + podStatus.DynamicConfigUpdateStatus != asdbv1.PartiallyFailed { // Fetching all dynamic config change. dynamicConfDiffPerPod[pods[idx].Name], err = r.handleDynamicConfigChange(rackState, pods[idx], version) if err != nil { @@ -135,7 +138,8 @@ func (r *SingleClusterReconciler) getRollingRestartTypeMap(rackState *RackState, restartTypeMap[pods[idx].Name] = r.getRollingRestartTypePod(rackState, pods[idx], confMap, addedNSDevices, len(dynamicConfDiffPerPod[pods[idx].Name]) > 0) - if podStatus.DynamicConfigFailed { + // Fallback to rolling restart in case of partial failure to recover with the desired Aerospike config + if podStatus.DynamicConfigUpdateStatus == asdbv1.PartiallyFailed { restartTypeMap[pods[idx].Name] = mergeRestartType(restartTypeMap[pods[idx].Name], quickRestart) } } diff --git a/controllers/rack.go b/controllers/rack.go index 2198f5b14..f4600741c 100644 --- a/controllers/rack.go +++ b/controllers/rack.go @@ -122,6 +122,8 @@ func (r *SingleClusterReconciler) reconcileRacks() reconcileResult { } r.Log.Info("Restarted the failed pods in the Rack", "rackID", state.Rack.ID, "failedPods", failedPods) + // Requeue after 1 second to fetch latest CR object with updated pod status + return reconcileRequeueAfter(1) } } diff --git a/go.mod b/go.mod index df7b4e00c..e0b0469e8 100644 --- a/go.mod +++ b/go.mod @@ -5,7 +5,7 @@ go 1.21 toolchain go1.21.8 require ( - github.com/aerospike/aerospike-management-lib v1.3.1-0.20240404063536-2adfbedf9687 + github.com/aerospike/aerospike-management-lib v1.3.1-0.20240412130613-2bc07a8654c3 github.com/asaskevich/govalidator v0.0.0-20210307081110-f21760c49a8d github.com/evanphx/json-patch v4.12.0+incompatible github.com/go-logr/logr v1.3.0 diff --git a/go.sum b/go.sum index f51cff785..8130ff961 100644 --- a/go.sum +++ b/go.sum @@ -1,7 +1,7 @@ github.com/aerospike/aerospike-client-go/v7 v7.1.0 h1:yvCTKdbpqZxHvv7sWsFHV1j49jZcC8yXRooWsDFqKtA= github.com/aerospike/aerospike-client-go/v7 v7.1.0/go.mod h1:AkHiKvCbqa1c16gCNGju3c5X/yzwLVvblNczqjxNwNk= -github.com/aerospike/aerospike-management-lib v1.3.1-0.20240404063536-2adfbedf9687 h1:d7oDvHmiKhq4rzcD/w3z9tP3wH0+iaDvxKDk3IYuqeU= -github.com/aerospike/aerospike-management-lib v1.3.1-0.20240404063536-2adfbedf9687/go.mod h1:E4dk798IikCp9a8fugpYoeQVIXuvdxogHvt6sKhaORQ= +github.com/aerospike/aerospike-management-lib v1.3.1-0.20240412130613-2bc07a8654c3 h1:buzjr9iDYSuI0jy/A8366pfPXalBP3Gke3MN+rO/Vzo= +github.com/aerospike/aerospike-management-lib v1.3.1-0.20240412130613-2bc07a8654c3/go.mod h1:E4dk798IikCp9a8fugpYoeQVIXuvdxogHvt6sKhaORQ= github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio= github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs= github.com/asaskevich/govalidator v0.0.0-20210307081110-f21760c49a8d h1:Byv0BzEl3/e6D5CLfI0j/7hiIEtvGVFPCZ7Ei2oq8iQ= diff --git a/helm-charts/aerospike-kubernetes-operator/crds/customresourcedefinition_aerospikeclusters.asdb.aerospike.com.yaml b/helm-charts/aerospike-kubernetes-operator/crds/customresourcedefinition_aerospikeclusters.asdb.aerospike.com.yaml index 2066ce116..bef8aea20 100644 --- a/helm-charts/aerospike-kubernetes-operator/crds/customresourcedefinition_aerospikeclusters.asdb.aerospike.com.yaml +++ b/helm-charts/aerospike-kubernetes-operator/crds/customresourcedefinition_aerospikeclusters.asdb.aerospike.com.yaml @@ -281,6 +281,12 @@ spec: - customInterface type: string type: object + enableDynamicConfigUpdate: + description: EnableDynamicConfigUpdate enables dynamic config update + flow of the operator. If enabled, operator will try to update the + Aerospike config dynamically. In case of inconsistent state during + dynamic config update, operator falls back to rolling restart. + type: boolean image: description: Aerospike server image type: string @@ -14245,10 +14251,15 @@ spec: items: type: string type: array - dynamicConfigFailed: - description: DynamicConfigFailed is true if aerospike config - change failed to apply dynamically. - type: boolean + dynamicConfigUpdateStatus: + description: DynamicConfigUpdateStatus is the status of dynamic + config update operation. Empty "" status means successful + update. + enum: + - Failed + - PartiallyFailed + - "" + type: string hostExternalIP: description: HostExternalIP of the K8s host this pod is scheduled on.