Skip to content

Commit

Permalink
Support synchronizing resources directly to downstream clusters (ranc…
Browse files Browse the repository at this point in the history
…her#46722)

* conditionally bootstrap controllers based on condition

* finish pre-bootstrap templating

* change to feature-flag for feature and implement controllers

* update tests to expect second call to planner#mgmtClusters.Get(...)

* code review comments

Co-authored-by: Jake Hyde <[email protected]>

* refactor boostrapManifests into an InfoFunction

---------

Co-authored-by: Jake Hyde <[email protected]>
  • Loading branch information
thatmidwesterncoder and jakefhyde authored Oct 2, 2024
1 parent bb98b23 commit 0e82900
Show file tree
Hide file tree
Showing 18 changed files with 374 additions and 30 deletions.
5 changes: 5 additions & 0 deletions pkg/agent/rancher/rancher.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,11 @@ type handler struct {
}

func (h *handler) startRancher() {
if features.ProvisioningPreBootstrap.Enabled() {
logrus.Debugf("not starting embedded rancher due to pre-bootstrap...")
return
}

clientConfig := kubeconfig.GetNonInteractiveClientConfig("")
server, err := rancher.New(h.ctx, clientConfig, &rancher.Options{
HTTPListenPort: 80,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ func (ch *ClusterImport) ClusterImportHandler(resp http.ResponseWriter, req *htt
}

if err = systemtemplate.SystemTemplate(resp, image.Resolve(settings.AgentImage.Get()), authImage, "", token, url,
false, cluster, nil, nil, nil); err != nil {
false, false, cluster, nil, nil, nil); err != nil {
resp.WriteHeader(500)
resp.Write([]byte(err.Error()))
}
Expand Down
19 changes: 10 additions & 9 deletions pkg/apis/management.cattle.io/v3/cluster_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,16 @@ const (
ClusterActionSaveAsTemplate = "saveAsTemplate"

// ClusterConditionReady Cluster ready to serve API (healthy when true, unhealthy when false)
ClusterConditionReady condition.Cond = "Ready"
ClusterConditionPending condition.Cond = "Pending"
ClusterConditionCertsGenerated condition.Cond = "CertsGenerated"
ClusterConditionEtcd condition.Cond = "etcd"
ClusterConditionProvisioned condition.Cond = "Provisioned"
ClusterConditionUpdated condition.Cond = "Updated"
ClusterConditionUpgraded condition.Cond = "Upgraded"
ClusterConditionWaiting condition.Cond = "Waiting"
ClusterConditionRemoved condition.Cond = "Removed"
ClusterConditionReady condition.Cond = "Ready"
ClusterConditionPending condition.Cond = "Pending"
ClusterConditionCertsGenerated condition.Cond = "CertsGenerated"
ClusterConditionEtcd condition.Cond = "etcd"
ClusterConditionPreBootstrapped condition.Cond = "PreBootstrapped"
ClusterConditionProvisioned condition.Cond = "Provisioned"
ClusterConditionUpdated condition.Cond = "Updated"
ClusterConditionUpgraded condition.Cond = "Upgraded"
ClusterConditionWaiting condition.Cond = "Waiting"
ClusterConditionRemoved condition.Cond = "Removed"
// ClusterConditionNoDiskPressure true when all cluster nodes have sufficient disk
ClusterConditionNoDiskPressure condition.Cond = "NoDiskPressure"
// ClusterConditionNoMemoryPressure true when all cluster nodes have sufficient memory
Expand Down
2 changes: 1 addition & 1 deletion pkg/capr/planner/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ func (p *Planner) generateClusterAgentManifest(controlPlane *rkev1.RKEControlPla
return nil, nil
}

tokens, err := p.clusterRegistrationTokenCache.GetByIndex(clusterRegToken, controlPlane.Spec.ManagementClusterName)
tokens, err := p.clusterRegistrationTokenCache.GetByIndex(ClusterRegToken, controlPlane.Spec.ManagementClusterName)
if err != nil {
return nil, err
}
Expand Down
7 changes: 4 additions & 3 deletions pkg/capr/planner/certificaterotation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ func Test_rotateCertificatesPlan(t *testing.T) {
}

genericSetup := func(mp *mockPlanner) {
mp.clusterRegistrationTokenCache.EXPECT().GetByIndex(clusterRegToken, "somecluster").Return([]*v3.ClusterRegistrationToken{{Status: v3.ClusterRegistrationTokenStatus{Token: "lol"}}}, nil)
mp.clusterRegistrationTokenCache.EXPECT().GetByIndex(ClusterRegToken, "somecluster").Return([]*v3.ClusterRegistrationToken{{Status: v3.ClusterRegistrationTokenStatus{Token: "lol"}}}, nil)
mp.managementClusters.EXPECT().Get("somecluster").Return(&v3.Cluster{}, nil)
}

Expand Down Expand Up @@ -423,8 +423,9 @@ func Test_rotateCertificatesPlan(t *testing.T) {
tt := tt
t.Run(tt.name, func(t *testing.T) {
mockPlanner := newMockPlanner(t, InfoFunctions{
SystemAgentImage: func() string { return "system-agent" },
ImageResolver: image.ResolveWithControlPlane,
SystemAgentImage: func() string { return "system-agent" },
ImageResolver: image.ResolveWithControlPlane,
GetBootstrapManifests: func(plane *rkev1.RKEControlPlane) ([]plan.File, error) { return nil, nil },
})
if tt.setup != nil {
tt.setup(mockPlanner)
Expand Down
26 changes: 24 additions & 2 deletions pkg/capr/planner/planner.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ import (
)

const (
clusterRegToken = "clusterRegToken"
ClusterRegToken = "clusterRegToken"

EtcdSnapshotConfigMapKey = "provisioning-cluster-spec"

Expand Down Expand Up @@ -135,10 +135,11 @@ type InfoFunctions struct {
ReleaseData func(context.Context, *rkev1.RKEControlPlane) *model.Release
SystemAgentImage func() string
SystemPodLabelSelectors func(plane *rkev1.RKEControlPlane) []string
GetBootstrapManifests func(plane *rkev1.RKEControlPlane) ([]plan.File, error)
}

func New(ctx context.Context, clients *wrangler.Context, functions InfoFunctions) *Planner {
clients.Mgmt.ClusterRegistrationToken().Cache().AddIndexer(clusterRegToken, func(obj *v3.ClusterRegistrationToken) ([]string, error) {
clients.Mgmt.ClusterRegistrationToken().Cache().AddIndexer(ClusterRegToken, func(obj *v3.ClusterRegistrationToken) ([]string, error) {
return []string{obj.Spec.ClusterName}, nil
})
store := NewStore(clients.Core.Secret(),
Expand Down Expand Up @@ -868,6 +869,11 @@ func (p *Planner) reconcile(controlPlane *rkev1.RKEControlPlane, tokensSecret pl
return err
}

preBootstrapManifests, err := p.retrievalFunctions.GetBootstrapManifests(controlPlane)
if err != nil {
return err
}

for _, r := range reconcilables {
logrus.Tracef("[planner] rkecluster %s/%s reconcile tier %s - processing machine entry: %s/%s", controlPlane.Namespace, controlPlane.Name, tierName, r.entry.Machine.Namespace, r.entry.Machine.Name)
// we exclude here and not in collect to ensure that include matched at least one node
Expand Down Expand Up @@ -961,6 +967,9 @@ func (p *Planner) reconcile(controlPlane *rkev1.RKEControlPlane, tokensSecret pl
} else if !kubeletVersionUpToDate(controlPlane, r.entry.Machine) {
outOfSync = append(outOfSync, r.entry.Machine.Name)
messages[r.entry.Machine.Name] = append(messages[r.entry.Machine.Name], "waiting for kubelet to update")
} else if isControlPlane(r.entry) && len(preBootstrapManifests) > 0 {
outOfSync = append(outOfSync, r.entry.Machine.Name)
messages[r.entry.Machine.Name] = append(messages[r.entry.Machine.Name], "waiting for cluster pre-bootstrap to complete")
} else if isControlPlane(r.entry) && !controlPlane.Status.AgentConnected {
// If the control plane nodes are currently being provisioned/updated, then it should be ensured that cluster-agent is connected.
// Without the agent connected, the controllers running in Rancher, including CAPI, can't communicate with the downstream cluster.
Expand Down Expand Up @@ -1021,6 +1030,7 @@ func (p *Planner) generatePlanWithConfigFiles(controlPlane *rkev1.RKEControlPlan
nodePlan plan.NodePlan
err error
)

if !controlPlane.Spec.UnmanagedConfig {
nodePlan, reg, err = p.commonNodePlan(controlPlane, plan.NodePlan{})
if err != nil {
Expand All @@ -1030,11 +1040,22 @@ func (p *Planner) generatePlanWithConfigFiles(controlPlane *rkev1.RKEControlPlan
joinedServer string
config map[string]interface{}
)

nodePlan, config, joinedServer, err = p.addConfigFile(nodePlan, controlPlane, entry, tokensSecret, joinServer, reg, renderS3)
if err != nil {
return nodePlan, config, joinedServer, err
}

bootstrapManifests, err := p.retrievalFunctions.GetBootstrapManifests(controlPlane)
if err != nil {
return nodePlan, config, joinedServer, err
}
if len(bootstrapManifests) > 0 {
logrus.Debugf("[planner] adding pre-bootstrap manifests")
nodePlan.Files = append(nodePlan.Files, bootstrapManifests...)
return nodePlan, config, joinedServer, err
}

nodePlan, err = p.addManifests(nodePlan, controlPlane, entry)
if err != nil {
return nodePlan, config, joinedServer, err
Expand All @@ -1058,6 +1079,7 @@ func (p *Planner) generatePlanWithConfigFiles(controlPlane *rkev1.RKEControlPlan

return nodePlan, config, joinedServer, err
}

return plan.NodePlan{}, map[string]interface{}{}, "", nil
}

Expand Down
2 changes: 2 additions & 0 deletions pkg/capr/planner/planner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ func TestPlanner_addInstruction(t *testing.T) {
entry := createTestPlanEntry(tt.args.os)
planner.retrievalFunctions.SystemAgentImage = func() string { return "system-agent" }
planner.retrievalFunctions.ImageResolver = image.ResolveWithControlPlane
planner.retrievalFunctions.GetBootstrapManifests = func(cp *rkev1.RKEControlPlane) ([]plan.File, error) { return nil, nil }
// act
p, err := planner.addInstallInstructionWithRestartStamp(plan.NodePlan{}, controlPlane, entry)

Expand Down Expand Up @@ -518,6 +519,7 @@ func Test_getInstallerImage(t *testing.T) {
var planner Planner
planner.retrievalFunctions.ImageResolver = image.ResolveWithControlPlane
planner.retrievalFunctions.SystemAgentImage = func() string { return "rancher/system-agent-installer-" }
planner.retrievalFunctions.GetBootstrapManifests = func(cp *rkev1.RKEControlPlane) ([]plan.File, error) { return nil, nil }

assert.Equal(t, tt.expected, planner.getInstallerImage(tt.controlPlane))
})
Expand Down
11 changes: 11 additions & 0 deletions pkg/clustermanager/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,17 @@ func (m *Manager) doStart(rec *record, clusterOwner bool) (exit error) {
defer m.startSem.Release(1)

transaction := controller.NewHandlerTransaction(rec.ctx)

// pre-bootstrap the cluster if it's not already bootstrapped
apimgmtv3.ClusterConditionPreBootstrapped.CreateUnknownIfNotExists(rec.clusterRec)
if !apimgmtv3.ClusterConditionPreBootstrapped.IsTrue(rec.clusterRec) {
err := clusterController.PreBootstrap(transaction, m.ScaledContext, rec.cluster, rec.clusterRec, m)
if err != nil {
transaction.Rollback()
return err
}
}

if clusterOwner {
if err := clusterController.Register(transaction, m.ScaledContext, rec.cluster, rec.clusterRec, m); err != nil {
transaction.Rollback()
Expand Down
2 changes: 2 additions & 0 deletions pkg/controllers/capr/controllers.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"github.com/rancher/rancher/pkg/features"
"github.com/rancher/rancher/pkg/provisioningv2/image"
"github.com/rancher/rancher/pkg/provisioningv2/kubeconfig"
"github.com/rancher/rancher/pkg/provisioningv2/prebootstrap"
"github.com/rancher/rancher/pkg/provisioningv2/systeminfo"
"github.com/rancher/rancher/pkg/settings"
"github.com/rancher/rancher/pkg/wrangler"
Expand All @@ -30,6 +31,7 @@ func Register(ctx context.Context, clients *wrangler.Context, kubeconfigManager
ReleaseData: capr.GetKDMReleaseData,
SystemAgentImage: settings.SystemAgentInstallerImage.Get,
SystemPodLabelSelectors: systeminfo.NewRetriever(clients).GetSystemPodLabelSelectors,
GetBootstrapManifests: prebootstrap.NewRetriever(clients).GeneratePreBootstrapClusterAgentManifest,
})
if features.MCM.Enabled() {
dynamicschema.Register(ctx, clients)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,14 @@ func (c *checker) checkCluster(cluster *v3.Cluster) error {
return nil
}

// RKE2: wait to update the connected condition until it is pre-bootstrapped
if cluster.Annotations["provisioning.cattle.io/administrated"] == "true" &&
!v3.ClusterConditionPreBootstrapped.IsTrue(cluster) {
// overriding it to be disconnected until bootstrapping is done
logrus.Debugf("[pre-bootstrap][%v] Waiting for cluster to be pre-bootstrapped - not marking agent connected", cluster.Name)
return c.updateClusterConnectedCondition(cluster, false)
}

return c.updateClusterConnectedCondition(cluster, hasSession)
}

Expand Down
3 changes: 2 additions & 1 deletion pkg/controllers/management/clusterdeploy/clusterdeploy.go
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,8 @@ func (cd *clusterDeploy) getYAML(cluster *apimgmtv3.Cluster, agentImage, authIma
}

buf := &bytes.Buffer{}
err = systemtemplate.SystemTemplate(buf, agentImage, authImage, cluster.Name, token, url, cluster.Spec.WindowsPreferedCluster,
err = systemtemplate.SystemTemplate(buf, agentImage, authImage, cluster.Name, token, url,
cluster.Spec.WindowsPreferedCluster, !apimgmtv3.ClusterConditionPreBootstrapped.IsTrue(cluster),
cluster, features, taints, cd.secretLister)

return buf.Bytes(), err
Expand Down
18 changes: 17 additions & 1 deletion pkg/controllers/managementuser/controllers.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package managementuser

import (
"context"
"fmt"

apimgmtv3 "github.com/rancher/rancher/pkg/apis/management.cattle.io/v3"
"github.com/rancher/rancher/pkg/controllers/managementlegacy/compose/common"
Expand Down Expand Up @@ -30,7 +31,7 @@ func Register(ctx context.Context, mgmt *config.ScaledContext, cluster *config.U
healthsyncer.Register(ctx, cluster)
networkpolicy.Register(ctx, cluster)
nodesyncer.Register(ctx, cluster, kubeConfigGetter)
secret.Register(ctx, cluster)
secret.Register(ctx, mgmt, cluster, clusterRec)
resourcequota.Register(ctx, cluster)
certsexpiration.Register(ctx, cluster)
windows.Register(ctx, clusterRec, cluster)
Expand Down Expand Up @@ -85,3 +86,18 @@ func registerImpersonationCaches(cluster *config.UserContext) {
cluster.Core.ServiceAccounts("").Controller()
cluster.Core.Namespaces("").Controller()
}

// PreBootstrap is a list of functions that _need_ to be run before the rest of the controllers start
// the functions should return an error if they fail, and the start of the controllers will be blocked until all of them succeed
func PreBootstrap(ctx context.Context, mgmt *config.ScaledContext, cluster *config.UserContext, clusterRec *apimgmtv3.Cluster, kubeConfigGetter common.KubeConfigGetter) error {
if cluster.ClusterName == "local" {
return nil
}

err := secret.Bootstrap(ctx, mgmt, cluster, clusterRec)
if err != nil {
return fmt.Errorf("failed to bootstrap secrets: %w", err)
}

return nil
}
Loading

0 comments on commit 0e82900

Please sign in to comment.