Skip to content

Commit

Permalink
Use correct order-of-operations when performing certificate rotation …
Browse files Browse the repository at this point in the history
…of K3s/RKE2 clusters (rancher#46940)


Signed-off-by: Chris Kim <[email protected]>
  • Loading branch information
Oats87 authored Sep 4, 2024
1 parent a1a0d2e commit a463c34
Show file tree
Hide file tree
Showing 4 changed files with 236 additions and 23 deletions.
19 changes: 18 additions & 1 deletion pkg/capr/planner/certificaterotation.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,10 @@ func (p *Planner) rotateCertificates(controlPlane *rkev1.RKEControlPlane, status
return status, nil
}

for _, node := range collect(clusterPlan, anyRole) {
// Assemble our list of nodes in order of etcd-only, etcd with controlplane, controlplane-only, and everything else
orderedEntriesToRotate := collectOrderedCertificateRotationEntries(clusterPlan)

for _, node := range orderedEntriesToRotate {
if !shouldRotateEntry(controlPlane.Spec.RotateCertificates, node) {
continue
}
Expand Down Expand Up @@ -56,6 +59,14 @@ func (p *Planner) rotateCertificates(controlPlane *rkev1.RKEControlPlane, status
return status, errWaiting("certificate rotation done")
}

func collectOrderedCertificateRotationEntries(clusterPlan *plan.Plan) []*planEntry {
orderedEntriesToRotate := collect(clusterPlan, IsOnlyEtcd) // etcd or etcd + worker
orderedEntriesToRotate = append(orderedEntriesToRotate, collect(clusterPlan, roleAnd(isControlPlane, isEtcd))...) // etcd + controlplane or etcd + controlplane+worker
orderedEntriesToRotate = append(orderedEntriesToRotate, collect(clusterPlan, isOnlyControlPlane)...) // controlplane or controlplane + worker
orderedEntriesToRotate = append(orderedEntriesToRotate, collect(clusterPlan, isOnlyWorker)...) // worker
return orderedEntriesToRotate
}

// shouldRotate `true` if the cluster is ready and the generation is stale
func shouldRotate(cp *rkev1.RKEControlPlane) bool {
// if a spec is not defined there is nothing to do
Expand Down Expand Up @@ -94,6 +105,12 @@ func (p *Planner) rotateCertificatesPlan(controlPlane *rkev1.RKEControlPlane, to
return rotatePlan, joinedServer, nil
}

rotatePlan.Instructions = append(rotatePlan.Instructions, idempotentStopInstruction(
controlPlane,
"certificate-rotation/stop",
strconv.FormatInt(rotation.Generation, 10),
capr.GetRuntimeServerUnit(controlPlane.Spec.KubernetesVersion)))

args := []string{
"certificate",
"rotate",
Expand Down
206 changes: 196 additions & 10 deletions pkg/capr/planner/certificaterotation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ import (
"github.com/rancher/rancher/pkg/capr"
"github.com/rancher/rancher/pkg/provisioningv2/image"
"github.com/stretchr/testify/assert"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
capi "sigs.k8s.io/cluster-api/api/v1beta1"
)

func Test_shouldRotateEntry(t *testing.T) {
Expand Down Expand Up @@ -52,6 +54,190 @@ func Test_shouldRotateEntry(t *testing.T) {
}
}

func Test_certificateRotationOrderedEntriesPlan(t *testing.T) {
tests := []struct {
name string
clusterPlan *plan.Plan
expectedNumberOfCollectedEntries int
expectedOrder []string
}{
{
name: "one all-in-one",
clusterPlan: &plan.Plan{
Machines: map[string]*capi.Machine{
"node1": {ObjectMeta: metav1.ObjectMeta{Name: "node1"}},
},
Metadata: map[string]*plan.Metadata{
"node1": {Labels: map[string]string{capr.WorkerRoleLabel: "true", capr.ControlPlaneRoleLabel: "true", capr.EtcdRoleLabel: "true"}},
},
},
expectedNumberOfCollectedEntries: 1,
expectedOrder: []string{
"node1",
},
},
{
name: "various dedicated roles",
clusterPlan: &plan.Plan{
Machines: map[string]*capi.Machine{
"node1": {ObjectMeta: metav1.ObjectMeta{Name: "node1"}},
"node2": {ObjectMeta: metav1.ObjectMeta{Name: "node2"}},
"node3": {ObjectMeta: metav1.ObjectMeta{Name: "node3"}},
},
Metadata: map[string]*plan.Metadata{
"node1": {Labels: map[string]string{capr.WorkerRoleLabel: "true"}},
"node2": {Labels: map[string]string{capr.ControlPlaneRoleLabel: "true"}},
"node3": {Labels: map[string]string{capr.EtcdRoleLabel: "true"}},
},
},
expectedNumberOfCollectedEntries: 3,
expectedOrder: []string{
"node3",
"node2",
"node1",
},
},
{
name: "combined control and dedicated worker roles",
clusterPlan: &plan.Plan{
Machines: map[string]*capi.Machine{
"node1": {ObjectMeta: metav1.ObjectMeta{Name: "node1"}},
"node2": {ObjectMeta: metav1.ObjectMeta{Name: "node2"}},
"node3": {ObjectMeta: metav1.ObjectMeta{Name: "node3"}},
"node4": {ObjectMeta: metav1.ObjectMeta{Name: "node4"}},
"node5": {ObjectMeta: metav1.ObjectMeta{Name: "node5"}},
"node6": {ObjectMeta: metav1.ObjectMeta{Name: "node6"}},
},
Metadata: map[string]*plan.Metadata{
"node1": {Labels: map[string]string{capr.ControlPlaneRoleLabel: "true", capr.EtcdRoleLabel: "true"}},
"node2": {Labels: map[string]string{capr.WorkerRoleLabel: "true"}},
"node3": {Labels: map[string]string{capr.ControlPlaneRoleLabel: "true", capr.EtcdRoleLabel: "true"}},
"node4": {Labels: map[string]string{capr.WorkerRoleLabel: "true"}},
"node5": {Labels: map[string]string{capr.ControlPlaneRoleLabel: "true", capr.EtcdRoleLabel: "true"}},
"node6": {Labels: map[string]string{capr.WorkerRoleLabel: "true"}},
},
},
expectedNumberOfCollectedEntries: 6,
expectedOrder: []string{
"node1",
"node3",
"node5",
"node2",
"node4",
"node6",
},
},
{
name: "etcd-worker and dedicated control and worker roles",
clusterPlan: &plan.Plan{
Machines: map[string]*capi.Machine{
"node1": {ObjectMeta: metav1.ObjectMeta{Name: "node1"}},
"node2": {ObjectMeta: metav1.ObjectMeta{Name: "node2"}},
"node3": {ObjectMeta: metav1.ObjectMeta{Name: "node3"}},
"node4": {ObjectMeta: metav1.ObjectMeta{Name: "node4"}},
"node5": {ObjectMeta: metav1.ObjectMeta{Name: "node5"}},
"node6": {ObjectMeta: metav1.ObjectMeta{Name: "node6"}},
"node7": {ObjectMeta: metav1.ObjectMeta{Name: "node7"}},
},
Metadata: map[string]*plan.Metadata{
"node1": {Labels: map[string]string{capr.WorkerRoleLabel: "true", capr.EtcdRoleLabel: "true"}},
"node2": {Labels: map[string]string{capr.WorkerRoleLabel: "true"}},
"node3": {Labels: map[string]string{capr.WorkerRoleLabel: "true", capr.EtcdRoleLabel: "true"}},
"node4": {Labels: map[string]string{capr.ControlPlaneRoleLabel: "true"}},
"node5": {Labels: map[string]string{capr.WorkerRoleLabel: "true"}},
"node6": {Labels: map[string]string{capr.WorkerRoleLabel: "true", capr.EtcdRoleLabel: "true"}},
"node7": {Labels: map[string]string{capr.EtcdRoleLabel: "true"}},
},
},
expectedNumberOfCollectedEntries: 7,
expectedOrder: []string{
"node1",
"node3",
"node6",
"node7",
"node4",
"node2",
"node5",
},
},
{
name: "control-worker and dedicated etcd and worker roles",
clusterPlan: &plan.Plan{
Machines: map[string]*capi.Machine{
"node1": {ObjectMeta: metav1.ObjectMeta{Name: "node1"}},
"node2": {ObjectMeta: metav1.ObjectMeta{Name: "node2"}},
"node3": {ObjectMeta: metav1.ObjectMeta{Name: "node3"}},
"node4": {ObjectMeta: metav1.ObjectMeta{Name: "node4"}},
"node5": {ObjectMeta: metav1.ObjectMeta{Name: "node5"}},
"node6": {ObjectMeta: metav1.ObjectMeta{Name: "node6"}},
"node7": {ObjectMeta: metav1.ObjectMeta{Name: "node7"}},
},
Metadata: map[string]*plan.Metadata{
"node1": {Labels: map[string]string{capr.WorkerRoleLabel: "true", capr.ControlPlaneRoleLabel: "true"}},
"node2": {Labels: map[string]string{capr.WorkerRoleLabel: "true"}},
"node3": {Labels: map[string]string{capr.WorkerRoleLabel: "true", capr.ControlPlaneRoleLabel: "true"}},
"node4": {Labels: map[string]string{capr.EtcdRoleLabel: "true"}},
"node5": {Labels: map[string]string{capr.WorkerRoleLabel: "true"}},
"node6": {Labels: map[string]string{capr.WorkerRoleLabel: "true"}},
"node7": {Labels: map[string]string{capr.WorkerRoleLabel: "true", capr.ControlPlaneRoleLabel: "true"}},
},
},
expectedNumberOfCollectedEntries: 7,
expectedOrder: []string{
"node4",
"node1",
"node3",
"node7",
"node2",
"node5",
"node6",
},
},
{
name: "traditional architecture with a no role node",
clusterPlan: &plan.Plan{
Machines: map[string]*capi.Machine{
"node1": {ObjectMeta: metav1.ObjectMeta{Name: "node1"}},
"node2": {ObjectMeta: metav1.ObjectMeta{Name: "node2"}},
"node3": {ObjectMeta: metav1.ObjectMeta{Name: "node3"}},
"node4": {ObjectMeta: metav1.ObjectMeta{Name: "node4"}},
"node5": {ObjectMeta: metav1.ObjectMeta{Name: "node5"}},
"node6": {ObjectMeta: metav1.ObjectMeta{Name: "node6"}},
"node7": {ObjectMeta: metav1.ObjectMeta{Name: "node7"}},
},
Metadata: map[string]*plan.Metadata{
"node1": {Labels: map[string]string{capr.EtcdRoleLabel: "true", capr.ControlPlaneRoleLabel: "true"}},
"node2": {Labels: map[string]string{capr.EtcdRoleLabel: "true", capr.ControlPlaneRoleLabel: "true"}},
"node3": {Labels: map[string]string{capr.EtcdRoleLabel: "true", capr.ControlPlaneRoleLabel: "true"}},
"node4": {Labels: map[string]string{capr.WorkerRoleLabel: "true"}},
"node5": {Labels: map[string]string{capr.WorkerRoleLabel: "true"}},
"node6": {Labels: map[string]string{capr.WorkerRoleLabel: "true"}},
"node7": {Labels: map[string]string{}},
},
},
expectedNumberOfCollectedEntries: 6,
expectedOrder: []string{
"node1",
"node2",
"node3",
"node4",
"node5",
"node6",
},
},
}

for _, tt := range tests {
t.Run((tt.name), func(t *testing.T) {
collected := collectOrderedCertificateRotationEntries(tt.clusterPlan)
assert.Equal(t, tt.expectedNumberOfCollectedEntries, len(collected))
for i, n := range tt.expectedOrder {
assert.Equal(t, n, collected[i].Machine.Name)
}
})
}
}

func Test_rotateCertificatesPlan(t *testing.T) {
type expected struct {
otiIndex int
Expand Down Expand Up @@ -82,7 +268,7 @@ func Test_rotateCertificatesPlan(t *testing.T) {
joinServer: "my-magic-joinserver",
setup: genericSetup,
expected: expected{
otiIndex: 1,
otiIndex: 2,
oti: &[]plan.OneTimeInstruction{idempotentInstruction(
createTestControlPlane("v1.25.7+k3s1"),
"certificate-rotation/rm-kcm-cert",
Expand All @@ -94,7 +280,7 @@ func Test_rotateCertificatesPlan(t *testing.T) {
},
[]string{},
)}[0],
otiCount: 7,
otiCount: 8,
joinServer: "my-magic-joinserver",
},
},
Expand All @@ -108,7 +294,7 @@ func Test_rotateCertificatesPlan(t *testing.T) {
Generation: 244,
},
expected: expected{
otiIndex: 1,
otiIndex: 2,
oti: &[]plan.OneTimeInstruction{idempotentInstruction(
createTestControlPlane("v1.25.7+rke2r1"),
"certificate-rotation/rm-kcm-cert",
Expand All @@ -120,7 +306,7 @@ func Test_rotateCertificatesPlan(t *testing.T) {
},
[]string{},
)}[0],
otiCount: 10, // the extra removal instructions are for removing the static pod manifests for RKE2
otiCount: 11, // the extra removal instructions are for removing the static pod manifests for RKE2
joinServer: "my-magic-joinserver",
},
},
Expand All @@ -131,7 +317,7 @@ func Test_rotateCertificatesPlan(t *testing.T) {
joinServer: "my-magic-joinserver",
setup: genericSetup,
expected: expected{
otiIndex: 3,
otiIndex: 4,
oti: &[]plan.OneTimeInstruction{idempotentInstruction(
createTestControlPlane("v1.25.7+k3s1"),
"certificate-rotation/rm-ks-cert",
Expand All @@ -143,7 +329,7 @@ func Test_rotateCertificatesPlan(t *testing.T) {
},
[]string{},
)}[0],
otiCount: 7,
otiCount: 8,
joinServer: "my-magic-joinserver",
},
},
Expand All @@ -154,7 +340,7 @@ func Test_rotateCertificatesPlan(t *testing.T) {
joinServer: "my-magic-joinserver",
setup: genericSetup,
expected: expected{
otiIndex: 4,
otiIndex: 5,
oti: &[]plan.OneTimeInstruction{idempotentInstruction(
createTestControlPlane("v1.25.7+rke2r1"),
"certificate-rotation/rm-ks-cert",
Expand All @@ -166,7 +352,7 @@ func Test_rotateCertificatesPlan(t *testing.T) {
},
[]string{},
)}[0],
otiCount: 10, // the extra removal instructions are for removing the static pod manifests for RKE2
otiCount: 11, // the extra removal instructions are for removing the static pod manifests for RKE2
joinServer: "my-magic-joinserver",
},
},
Expand Down Expand Up @@ -214,7 +400,7 @@ func Test_rotateCertificatesPlan(t *testing.T) {
},
},
expected: expected{
otiIndex: 1,
otiIndex: 2,
oti: &[]plan.OneTimeInstruction{idempotentInstruction(
createTestControlPlane("v1.25.7+k3s1"),
"certificate-rotation/rm-kcm-cert",
Expand All @@ -226,7 +412,7 @@ func Test_rotateCertificatesPlan(t *testing.T) {
},
[]string{},
)}[0],
otiCount: 7,
otiCount: 8,
joinServer: "my-magic-joinserver",
},
},
Expand Down
21 changes: 20 additions & 1 deletion pkg/capr/planner/idempotent.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ func idempotentInstruction(controlPlane *rkev1.RKEControlPlane, identifier, valu

// convertToIdempotentInstruction converts a OneTimeInstruction to a OneTimeInstruction wrapped with the idempotent script.
// This is useful when an instruction may be used in various phases, without needing idempotency in all cases.
// identifier is expected to be a unique key for tracking, and value should be something like the generation of the attempt
// (and is what we track to determine whether we should run the instruction or not)
func convertToIdempotentInstruction(controlPlane *rkev1.RKEControlPlane, identifier, value string, instruction plan.OneTimeInstruction) plan.OneTimeInstruction {
newInstruction := idempotentInstruction(controlPlane, identifier, value, instruction.Command, instruction.Args, instruction.Env)
newInstruction.Image = instruction.Image
Expand All @@ -86,7 +88,8 @@ func convertToIdempotentInstruction(controlPlane *rkev1.RKEControlPlane, identif
}

// idempotentRestartInstructions generates an idempotent restart instructions for the given runtimeUnit. It checks the
// unit for failure, resets it if necessary, and restarts the unit.
// unit for failure, resets it if necessary, and restarts the unit. identifier is expected to be a unique key for tracking,
// and value should be something like the generation of the attempt (and is what we track to determine whether we should run the instruction or not)
func idempotentRestartInstructions(controlPlane *rkev1.RKEControlPlane, identifier, value, runtimeUnit string) []plan.OneTimeInstruction {
return []plan.OneTimeInstruction{
idempotentInstruction(
Expand All @@ -113,3 +116,19 @@ func idempotentRestartInstructions(controlPlane *rkev1.RKEControlPlane, identifi
),
}
}

// idempotentStopInstruction generates an idempotent stop instruction for the given runtimeUnit. It simply calls systemctl stop <runtime-unit>
// identifier is expected to be a unique key for tracking, and value should be something like the generation of the attempt (and is what we track to determine whether we should run the instruction or not)
func idempotentStopInstruction(controlPlane *rkev1.RKEControlPlane, identifier, value, runtimeUnit string) plan.OneTimeInstruction {
return idempotentInstruction(
controlPlane,
identifier+"-stop",
value,
"systemctl",
[]string{
"stop",
runtimeUnit,
},
[]string{},
)
}
13 changes: 2 additions & 11 deletions scripts/provisioning-tests
Original file line number Diff line number Diff line change
Expand Up @@ -46,23 +46,14 @@ eval "$(grep '^ENV CATTLE_WINS_AGENT' package/Dockerfile | awk '{print "export "
eval "$(grep '^ENV CATTLE_CSI_PROXY_AGENT' package/Dockerfile | awk '{print "export " $2 "=" $3}')"
eval "$(grep '^ENV CATTLE_KDM_BRANCH' package/Dockerfile | awk '{print "export " $2 "=" $3}')"

#if [ -z "${SOME_K8S_VERSION}" ]; then
if [ -z "${SOME_K8S_VERSION}" ]; then
# Only set SOME_K8S_VERSION if it is empty -- for KDM provisioning tests, this value should already be populated
# Get the last release for $DIST, which is usually the latest version or an experimental version.
# Previously this would use channels, but channels no longer reflect the latest version since
# https://github.com/rancher/rancher/issues/36827 has added appDefaults. We do not use appDefaults
# here for simplicity's sake, as it requires semver parsing & matching. The last release should
# be good enough for our needs.
#export SOME_K8S_VERSION=$(curl -sS https://raw.githubusercontent.com/rancher/kontainer-driver-metadata/dev-v2.9/data/data.json | jq -r ".$DIST.releases[-1].version")
#fi

# Remove the following K8s version setting after the root cause of issues/rancher/45577 is determined
if [ -z "${SOME_K8S_VERSION}" ]; then
if [ "$DIST" = "rke2" ]; then
export SOME_K8S_VERSION="v1.27.10+rke2r1"
else
export SOME_K8S_VERSION=$(curl https://raw.githubusercontent.com/rancher/kontainer-driver-metadata/dev-v2.10/data/data.json | jq -r ".$DIST.releases[-1].version")
fi
export SOME_K8S_VERSION=$(curl -sS https://raw.githubusercontent.com/rancher/kontainer-driver-metadata/dev-v2.10/data/data.json | jq -r ".$DIST.releases[-1].version")
fi

if [ -z "${CATTLE_CHART_DEFAULT_URL}" ]; then
Expand Down

0 comments on commit a463c34

Please sign in to comment.