Skip to content

Commit

Permalink
[KO-211] A way to trigger a warm and cold rolling restart on demand (#…
Browse files Browse the repository at this point in the history
…294)

* Added a way to trigger a warm and cold rolling restart on demand using the `Operations` field in the CR

* Allowing only a single operation at a time.

* Disallow operation modification

* Blocking image upgrade and rack scaleup along with on-demand operations
  • Loading branch information
tanmayja authored Jul 11, 2024
1 parent 03662f3 commit efa1ba4
Show file tree
Hide file tree
Showing 24 changed files with 1,130 additions and 217 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Build the manager binary
FROM --platform=$BUILDPLATFORM golang:1.21 as builder
FROM --platform=$BUILDPLATFORM golang:1.21 AS builder

# OS and Arch args
ARG TARGETOS
Expand Down
4 changes: 2 additions & 2 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,8 @@ pipeline {
sh "./snyk-linux test --severity-threshold=high --fail-on=all"

// Scan the operator images
sh "./snyk-linux container test ${OPERATOR_CONTAINER_IMAGE_CANDIDATE_NAME} --severity-threshold=high --file=Dockerfile --policy-path=.snyk --fail-on=all"
sh "./snyk-linux container test ${OPERATOR_BUNDLE_IMAGE_CANDIDATE_NAME} --severity-threshold=high --file=Dockerfile --policy-path=.snyk --fail-on=all"
sh "./snyk-linux container test ${OPERATOR_CONTAINER_IMAGE_CANDIDATE_NAME} --severity-threshold=high --file=Dockerfile --policy-path=.snyk --fail-on=all"
sh "./snyk-linux container test ${OPERATOR_BUNDLE_IMAGE_CANDIDATE_NAME} --severity-threshold=high --file=Dockerfile --policy-path=.snyk --fail-on=all"
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ go-lint: golanci-lint ## Run golangci-lint against code.
.PHONY: test
test: manifests generate fmt vet envtest ## Run tests.
# KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) -p path)" go test ./... -coverprofile cover.out
KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" cd $(shell pwd)/test; go run github.com/onsi/ginkgo/v2/ginkgo -coverprofile cover.out -progress -v -timeout=12h0m0s -focus=${FOCUS} --junit-report="junit.xml" -- ${ARGS}
KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" cd $(shell pwd)/test; go run github.com/onsi/ginkgo/v2/ginkgo -coverprofile cover.out -show-node-events -v -timeout=12h0m0s -focus=${FOCUS} --junit-report="junit.xml" -- ${ARGS}

##@ Build

Expand Down
38 changes: 38 additions & 0 deletions api/v1/aerospikecluster_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,31 @@ type AerospikeClusterSpec struct { //nolint:govet // for readability
// +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Kubernetes Node BlockList"
// +kubebuilder:validation:MinItems:=1
K8sNodeBlockList []string `json:"k8sNodeBlockList,omitempty"`
// Operations is a list of on-demand operations to be performed on the Aerospike cluster.
// +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Operations"
// +kubebuilder:validation:MaxItems:=1
Operations []OperationSpec `json:"operations,omitempty"`
}

type OperationKind string

const (
// OperationWarmRestart is the on-demand operation that leads to the warm restart of the aerospike pods
// (Restarting ASD in the pods). https://aerospike.com/docs/cloud/kubernetes/operator/Warm-restart
OperationWarmRestart OperationKind = "WarmRestart"

// OperationPodRestart is the on-demand operation that leads to the restart of aerospike pods.
OperationPodRestart OperationKind = "PodRestart"
)

type OperationSpec struct {
// Kind is the type of operation to be performed on the Aerospike cluster.
// +kubebuilder:validation:Enum=WarmRestart;PodRestart
Kind OperationKind `json:"kind"`
// +kubebuilder:validation:MaxLength=20
// +kubebuilder:validation:MinLength=1
ID string `json:"id"`
PodList []string `json:"podList,omitempty"`
}

type SeedsFinderServices struct {
Expand Down Expand Up @@ -699,6 +724,8 @@ type AerospikeClusterStatusSpec struct { //nolint:govet // for readability
RosterNodeBlockList []string `json:"rosterNodeBlockList,omitempty"`
// K8sNodeBlockList is a list of Kubernetes nodes which are not used for Aerospike pods.
K8sNodeBlockList []string `json:"k8sNodeBlockList,omitempty"`
// Operations is a list of on-demand operation to be performed on the Aerospike cluster.
Operations []OperationSpec `json:"operations,omitempty"`
}

// AerospikeClusterStatus defines the observed state of AerospikeCluster
Expand Down Expand Up @@ -1044,6 +1071,12 @@ func CopySpecToStatus(spec *AerospikeClusterSpec) (*AerospikeClusterStatusSpec,
status.K8sNodeBlockList = *k8sNodeBlockList
}

if len(spec.Operations) != 0 {
operations := lib.DeepCopy(&spec.Operations).(*[]OperationSpec)

status.Operations = *operations
}

return &status, nil
}

Expand Down Expand Up @@ -1143,5 +1176,10 @@ func CopyStatusToSpec(status *AerospikeClusterStatusSpec) (*AerospikeClusterSpec
spec.K8sNodeBlockList = *k8sNodeBlockList
}

if len(status.Operations) != 0 {
operations := lib.DeepCopy(&status.Operations).(*[]OperationSpec)
spec.Operations = *operations
}

return &spec, nil
}
98 changes: 68 additions & 30 deletions api/v1/aerospikecluster_validating_webhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,12 @@ func (c *AerospikeCluster) ValidateUpdate(oldObj runtime.Object) (admission.Warn
return nil, err
}

if err := validateOperationUpdate(
&old.Spec, &c.Spec, &c.Status,
); err != nil {
return nil, err
}

// Validate AerospikeConfig update
if err := validateAerospikeConfigUpdate(
aslog, incomingVersion, outgoingVersion,
Expand Down Expand Up @@ -191,6 +197,10 @@ func (c *AerospikeCluster) validate(aslog logr.Logger) error {
return err
}

if err := c.validateOperation(); err != nil {
return err
}

// Storage should be validated before validating aerospikeConfig and fileStorage
if err := validateStorage(&c.Spec.Storage, &c.Spec.PodSpec); err != nil {
return err
Expand Down Expand Up @@ -263,6 +273,19 @@ func (c *AerospikeCluster) validate(aslog logr.Logger) error {
return c.validateSCNamespaces()
}

func (c *AerospikeCluster) validateOperation() error {
// Nothing to validate if no operation
if len(c.Spec.Operations) == 0 {
return nil
}

if c.Status.AerospikeConfig == nil {
return fmt.Errorf("operation cannot be added during aerospike cluster creation")
}

return nil
}

func (c *AerospikeCluster) validateSCNamespaces() error {
scNamespaceSet := sets.NewString()

Expand Down Expand Up @@ -1292,20 +1315,22 @@ func validateSecurityConfigUpdate(
func validateEnableSecurityConfig(newConfSpec, oldConfSpec *AerospikeConfigSpec) error {
newConf := newConfSpec.Value
oldConf := oldConfSpec.Value

oldSec, oldSecConfFound := oldConf["security"]
newSec, newSecConfFound := newConf["security"]
if !oldSecConfFound {
return nil
}

if oldSecConfFound && !newSecConfFound {
newSec, newSecConfFound := newConf["security"]
if !newSecConfFound {
return fmt.Errorf("cannot remove cluster security config")
}

if oldSecConfFound && newSecConfFound {
oldSecFlag, oldEnableSecurityFlagFound := oldSec.(map[string]interface{})["enable-security"]
newSecFlag, newEnableSecurityFlagFound := newSec.(map[string]interface{})["enable-security"]
oldSecFlag, oldEnableSecurityFlagFound := oldSec.(map[string]interface{})["enable-security"]
newSecFlag, newEnableSecurityFlagFound := newSec.(map[string]interface{})["enable-security"]

if oldEnableSecurityFlagFound && oldSecFlag.(bool) && (!newEnableSecurityFlagFound || !newSecFlag.(bool)) {
return fmt.Errorf("cannot disable cluster security in running cluster")
}
if oldEnableSecurityFlagFound && oldSecFlag.(bool) && (!newEnableSecurityFlagFound || !newSecFlag.(bool)) {
return fmt.Errorf("cannot disable cluster security in running cluster")
}

return nil
Expand Down Expand Up @@ -2360,33 +2385,46 @@ func (c *AerospikeCluster) validateEnableDynamicConfigUpdate() error {
return nil
}

func getMinRunningInitVersion(pods map[string]AerospikePodStatus) (string, error) {
minVersion := ""
func validateOperationUpdate(oldSpec, newSpec *AerospikeClusterSpec, status *AerospikeClusterStatus) error {
if len(newSpec.Operations) == 0 {
return nil
}

for idx := range pods {
if pods[idx].InitImage != "" {
version, err := GetImageVersion(pods[idx].InitImage)
if err != nil {
return "", err
}
newOp := &newSpec.Operations[0]

if minVersion == "" {
minVersion = version
continue
}
var oldOp *OperationSpec

val, err := lib.CompareVersions(version, minVersion)
if err != nil {
return "", fmt.Errorf("failed to check image version: %v", err)
}
if len(oldSpec.Operations) != 0 {
oldOp = &oldSpec.Operations[0]
}

if val < 0 {
minVersion = version
}
} else {
return baseInitVersion, nil
if oldOp != nil && oldOp.ID == newOp.ID && !reflect.DeepEqual(oldOp, newOp) {
return fmt.Errorf("operation %s cannot be updated", newOp.ID)
}

allPodNames := GetAllPodNames(status.Pods)

podSet := sets.New(newSpec.Operations[0].PodList...)
if !allPodNames.IsSuperset(podSet) {
return fmt.Errorf("invalid pod names in operation %v", podSet.Difference(allPodNames).UnsortedList())
}

// Don't allow any on-demand operation along with these cluster change:
// 1- scale up
// 2- racks added or removed
// 3- image update
// New pods won't be available for operation
if !reflect.DeepEqual(newSpec.Operations, status.Operations) {
switch {
case newSpec.Size > status.Size:
return fmt.Errorf("cannot change Spec.Operations along with cluster scale-up")
case len(newSpec.RackConfig.Racks) != len(status.RackConfig.Racks) ||
len(newSpec.RackConfig.Racks) != len(oldSpec.RackConfig.Racks):
return fmt.Errorf("cannot change Spec.Operations along with rack addition/removal")
case newSpec.Image != status.Image || newSpec.Image != oldSpec.Image:
return fmt.Errorf("cannot change Spec.Operations along with image update")
}
}

return minVersion, nil
return nil
}
61 changes: 61 additions & 0 deletions api/v1/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ import (
"regexp"
"strings"

"k8s.io/apimachinery/pkg/util/sets"

v1 "k8s.io/api/core/v1"
"k8s.io/utils/ptr"

Expand Down Expand Up @@ -536,3 +538,62 @@ func GetDefaultPasswordFilePath(aerospikeConfigSpec *AerospikeConfigSpec) *strin

return &passFile
}

func getMinRunningInitVersion(pods map[string]AerospikePodStatus) (string, error) {
minVersion := ""

for idx := range pods {
if pods[idx].InitImage != "" {
version, err := GetImageVersion(pods[idx].InitImage)
if err != nil {
return "", err
}

if minVersion == "" {
minVersion = version
continue
}

val, err := lib.CompareVersions(version, minVersion)
if err != nil {
return "", fmt.Errorf("failed to check image version: %v", err)
}

if val < 0 {
minVersion = version
}
} else {
return baseInitVersion, nil
}
}

return minVersion, nil
}

func DistributeItems(totalItems, totalGroups int) []int {
itemsPerGroup, extraItems := totalItems/totalGroups, totalItems%totalGroups

// Distributing nodes in given racks
var topology []int

for groupIdx := 0; groupIdx < totalGroups; groupIdx++ {
itemsForThisGroup := itemsPerGroup
if groupIdx < extraItems {
itemsForThisGroup++
}

topology = append(topology, itemsForThisGroup)
}

return topology
}

func GetAllPodNames(pods map[string]AerospikePodStatus) sets.Set[string] {
podNames := make(sets.Set[string])

for podName := range pods {
podNames.Insert(podName)
}

return podNames
}
34 changes: 34 additions & 0 deletions api/v1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit efa1ba4

Please sign in to comment.