Skip to content

Commit

Permalink
[backport 2.8] Support synchronizing resources directly to downstream…
Browse files Browse the repository at this point in the history
… clusters (rancher#46722) (rancher#47356)

* Support synchronizing resources directly to downstream clusters (rancher#46722)

* conditionally bootstrap controllers based on condition

* finish pre-bootstrap templating

* change to feature-flag for feature and implement controllers

* update tests to expect second call to planner#mgmtClusters.Get(...)

* code review comments



* refactor boostrapManifests into an InfoFunction

---------



* only skip marking connected if pre-bootstrap feature is enabled

support replacing strings in synchronized-downstream secrets

change provisioning-tests to use rancher-agent:head rather than 2.9-head

---------

Co-authored-by: Jake Hyde <[email protected]>
  • Loading branch information
thatmidwesterncoder and jakefhyde authored Oct 9, 2024
1 parent d0cdc69 commit 5e7d7aa
Show file tree
Hide file tree
Showing 21 changed files with 455 additions and 30 deletions.
5 changes: 5 additions & 0 deletions pkg/agent/rancher/rancher.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,11 @@ type handler struct {
}

func (h *handler) startRancher() {
if features.ProvisioningPreBootstrap.Enabled() {
logrus.Debugf("not starting embedded rancher due to pre-bootstrap...")
return
}

clientConfig := kubeconfig.GetNonInteractiveClientConfig("")
server, err := rancher.New(h.ctx, clientConfig, &rancher.Options{
HTTPListenPort: 80,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ func (ch *ClusterImport) ClusterImportHandler(resp http.ResponseWriter, req *htt
}

if err = systemtemplate.SystemTemplate(resp, image.Resolve(settings.AgentImage.Get()), authImage, "", token, url,
false, cluster, nil, nil, nil); err != nil {
false, false, cluster, nil, nil, nil); err != nil {
resp.WriteHeader(500)
resp.Write([]byte(err.Error()))
}
Expand Down
19 changes: 10 additions & 9 deletions pkg/apis/management.cattle.io/v3/cluster_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,16 @@ const (
ClusterActionSaveAsTemplate = "saveAsTemplate"

// ClusterConditionReady Cluster ready to serve API (healthy when true, unhealthy when false)
ClusterConditionReady condition.Cond = "Ready"
ClusterConditionPending condition.Cond = "Pending"
ClusterConditionCertsGenerated condition.Cond = "CertsGenerated"
ClusterConditionEtcd condition.Cond = "etcd"
ClusterConditionProvisioned condition.Cond = "Provisioned"
ClusterConditionUpdated condition.Cond = "Updated"
ClusterConditionUpgraded condition.Cond = "Upgraded"
ClusterConditionWaiting condition.Cond = "Waiting"
ClusterConditionRemoved condition.Cond = "Removed"
ClusterConditionReady condition.Cond = "Ready"
ClusterConditionPending condition.Cond = "Pending"
ClusterConditionCertsGenerated condition.Cond = "CertsGenerated"
ClusterConditionEtcd condition.Cond = "etcd"
ClusterConditionPreBootstrapped condition.Cond = "PreBootstrapped"
ClusterConditionProvisioned condition.Cond = "Provisioned"
ClusterConditionUpdated condition.Cond = "Updated"
ClusterConditionUpgraded condition.Cond = "Upgraded"
ClusterConditionWaiting condition.Cond = "Waiting"
ClusterConditionRemoved condition.Cond = "Removed"
// ClusterConditionNoDiskPressure true when all cluster nodes have sufficient disk
ClusterConditionNoDiskPressure condition.Cond = "NoDiskPressure"
// ClusterConditionNoMemoryPressure true when all cluster nodes have sufficient memory
Expand Down
13 changes: 13 additions & 0 deletions pkg/capr/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,12 @@ import (
"time"

"github.com/rancher/channelserver/pkg/model"
v3 "github.com/rancher/rancher/pkg/apis/management.cattle.io/v3"
provv1 "github.com/rancher/rancher/pkg/apis/provisioning.cattle.io/v1"
rkev1 "github.com/rancher/rancher/pkg/apis/rke.cattle.io/v1"
"github.com/rancher/rancher/pkg/apis/rke.cattle.io/v1/plan"
"github.com/rancher/rancher/pkg/channelserver"
"github.com/rancher/rancher/pkg/features"
capicontrollers "github.com/rancher/rancher/pkg/generated/controllers/cluster.x-k8s.io/v1beta1"
rkecontroller "github.com/rancher/rancher/pkg/generated/controllers/rke.cattle.io/v1"
"github.com/rancher/rancher/pkg/serviceaccounttoken"
Expand All @@ -30,6 +32,7 @@ import (
corecontrollers "github.com/rancher/wrangler/v2/pkg/generated/controllers/core/v1"
"github.com/rancher/wrangler/v2/pkg/generic"
"github.com/rancher/wrangler/v2/pkg/name"
"github.com/sirupsen/logrus"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -639,3 +642,13 @@ func ParseSnapshotClusterSpecOrError(snapshot *rkev1.ETCDSnapshot) (*provv1.Clus
}
return nil, fmt.Errorf("unable to find and decode snapshot ClusterSpec for snapshot")
}

func PreBootstrap(mgmtCluster *v3.Cluster) bool {
// if the upstream rancher _does not_ have pre-bootstrapping enabled just always return false.
if !features.ProvisioningPreBootstrap.Enabled() {
logrus.Debug("[pre-bootstrap] feature-flag disabled, skipping pre-bootstrap flow")
return false
}

return !v3.ClusterConditionPreBootstrapped.IsTrue(mgmtCluster)
}
2 changes: 1 addition & 1 deletion pkg/capr/planner/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ func (p *Planner) generateClusterAgentManifest(controlPlane *rkev1.RKEControlPla
return nil, nil
}

tokens, err := p.clusterRegistrationTokenCache.GetByIndex(clusterRegToken, controlPlane.Spec.ManagementClusterName)
tokens, err := p.clusterRegistrationTokenCache.GetByIndex(ClusterRegToken, controlPlane.Spec.ManagementClusterName)
if err != nil {
return nil, err
}
Expand Down
7 changes: 4 additions & 3 deletions pkg/capr/planner/certificaterotation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ func Test_rotateCertificatesPlan(t *testing.T) {
}

genericSetup := func(mp *mockPlanner) {
mp.clusterRegistrationTokenCache.EXPECT().GetByIndex(clusterRegToken, "somecluster").Return([]*v3.ClusterRegistrationToken{{Status: v3.ClusterRegistrationTokenStatus{Token: "lol"}}}, nil)
mp.clusterRegistrationTokenCache.EXPECT().GetByIndex(ClusterRegToken, "somecluster").Return([]*v3.ClusterRegistrationToken{{Status: v3.ClusterRegistrationTokenStatus{Token: "lol"}}}, nil)
mp.managementClusters.EXPECT().Get("somecluster").Return(&v3.Cluster{}, nil)
}

Expand Down Expand Up @@ -410,8 +410,9 @@ func Test_rotateCertificatesPlan(t *testing.T) {
tt := tt
t.Run(tt.name, func(t *testing.T) {
mockPlanner := newMockPlanner(t, InfoFunctions{
SystemAgentImage: func() string { return "system-agent" },
ImageResolver: image.ResolveWithControlPlane,
SystemAgentImage: func() string { return "system-agent" },
ImageResolver: image.ResolveWithControlPlane,
GetBootstrapManifests: func(plane *rkev1.RKEControlPlane) ([]plan.File, error) { return nil, nil },
})
if tt.setup != nil {
tt.setup(mockPlanner)
Expand Down
26 changes: 24 additions & 2 deletions pkg/capr/planner/planner.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ import (
)

const (
clusterRegToken = "clusterRegToken"
ClusterRegToken = "clusterRegToken"

EtcdSnapshotConfigMapKey = "provisioning-cluster-spec"

Expand Down Expand Up @@ -135,10 +135,11 @@ type InfoFunctions struct {
ReleaseData func(context.Context, *rkev1.RKEControlPlane) *model.Release
SystemAgentImage func() string
SystemPodLabelSelectors func(plane *rkev1.RKEControlPlane) []string
GetBootstrapManifests func(plane *rkev1.RKEControlPlane) ([]plan.File, error)
}

func New(ctx context.Context, clients *wrangler.Context, functions InfoFunctions) *Planner {
clients.Mgmt.ClusterRegistrationToken().Cache().AddIndexer(clusterRegToken, func(obj *v3.ClusterRegistrationToken) ([]string, error) {
clients.Mgmt.ClusterRegistrationToken().Cache().AddIndexer(ClusterRegToken, func(obj *v3.ClusterRegistrationToken) ([]string, error) {
return []string{obj.Spec.ClusterName}, nil
})
store := NewStore(clients.Core.Secret(),
Expand Down Expand Up @@ -920,6 +921,11 @@ func (p *Planner) reconcile(controlPlane *rkev1.RKEControlPlane, tokensSecret pl
return err
}

preBootstrapManifests, err := p.retrievalFunctions.GetBootstrapManifests(controlPlane)
if err != nil {
return err
}

for _, r := range reconcilables {
logrus.Tracef("[planner] rkecluster %s/%s reconcile tier %s - processing machine entry: %s/%s", controlPlane.Namespace, controlPlane.Name, tierName, r.entry.Machine.Namespace, r.entry.Machine.Name)
// we exclude here and not in collect to ensure that include matched at least one node
Expand Down Expand Up @@ -1013,6 +1019,9 @@ func (p *Planner) reconcile(controlPlane *rkev1.RKEControlPlane, tokensSecret pl
} else if !kubeletVersionUpToDate(controlPlane, r.entry.Machine) {
outOfSync = append(outOfSync, r.entry.Machine.Name)
messages[r.entry.Machine.Name] = append(messages[r.entry.Machine.Name], "waiting for kubelet to update")
} else if isControlPlane(r.entry) && len(preBootstrapManifests) > 0 {
outOfSync = append(outOfSync, r.entry.Machine.Name)
messages[r.entry.Machine.Name] = append(messages[r.entry.Machine.Name], "waiting for cluster pre-bootstrap to complete")
} else if isControlPlane(r.entry) && !controlPlane.Status.AgentConnected {
// If the control plane nodes are currently being provisioned/updated, then it should be ensured that cluster-agent is connected.
// Without the agent connected, the controllers running in Rancher, including CAPI, can't communicate with the downstream cluster.
Expand Down Expand Up @@ -1073,6 +1082,7 @@ func (p *Planner) generatePlanWithConfigFiles(controlPlane *rkev1.RKEControlPlan
nodePlan plan.NodePlan
err error
)

if !controlPlane.Spec.UnmanagedConfig {
nodePlan, reg, err = p.commonNodePlan(controlPlane, plan.NodePlan{})
if err != nil {
Expand All @@ -1082,11 +1092,22 @@ func (p *Planner) generatePlanWithConfigFiles(controlPlane *rkev1.RKEControlPlan
joinedServer string
config map[string]interface{}
)

nodePlan, config, joinedServer, err = p.addConfigFile(nodePlan, controlPlane, entry, tokensSecret, joinServer, reg, renderS3)
if err != nil {
return nodePlan, config, joinedServer, err
}

bootstrapManifests, err := p.retrievalFunctions.GetBootstrapManifests(controlPlane)
if err != nil {
return nodePlan, config, joinedServer, err
}
if len(bootstrapManifests) > 0 {
logrus.Debugf("[planner] adding pre-bootstrap manifests")
nodePlan.Files = append(nodePlan.Files, bootstrapManifests...)
return nodePlan, config, joinedServer, err
}

nodePlan, err = p.addManifests(nodePlan, controlPlane, entry)
if err != nil {
return nodePlan, config, joinedServer, err
Expand All @@ -1103,6 +1124,7 @@ func (p *Planner) generatePlanWithConfigFiles(controlPlane *rkev1.RKEControlPlan

return nodePlan, config, joinedServer, err
}

return plan.NodePlan{}, map[string]interface{}{}, "", nil
}

Expand Down
2 changes: 2 additions & 0 deletions pkg/capr/planner/planner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ func TestPlanner_addInstruction(t *testing.T) {
entry := createTestPlanEntry(tt.args.os)
planner.retrievalFunctions.SystemAgentImage = func() string { return "system-agent" }
planner.retrievalFunctions.ImageResolver = image.ResolveWithControlPlane
planner.retrievalFunctions.GetBootstrapManifests = func(cp *rkev1.RKEControlPlane) ([]plan.File, error) { return nil, nil }
// act
p, err := planner.addInstallInstructionWithRestartStamp(plan.NodePlan{}, controlPlane, entry)

Expand Down Expand Up @@ -518,6 +519,7 @@ func Test_getInstallerImage(t *testing.T) {
var planner Planner
planner.retrievalFunctions.ImageResolver = image.ResolveWithControlPlane
planner.retrievalFunctions.SystemAgentImage = func() string { return "rancher/system-agent-installer-" }
planner.retrievalFunctions.GetBootstrapManifests = func(cp *rkev1.RKEControlPlane) ([]plan.File, error) { return nil, nil }

assert.Equal(t, tt.expected, planner.getInstallerImage(tt.controlPlane))
})
Expand Down
12 changes: 12 additions & 0 deletions pkg/clustermanager/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"github.com/rancher/norman/httperror"
"github.com/rancher/norman/types"
apimgmtv3 "github.com/rancher/rancher/pkg/apis/management.cattle.io/v3"
"github.com/rancher/rancher/pkg/capr"
"github.com/rancher/rancher/pkg/clusterrouter"
"github.com/rancher/rancher/pkg/controllers/management/secretmigrator"
clusterController "github.com/rancher/rancher/pkg/controllers/managementuser"
Expand Down Expand Up @@ -219,6 +220,17 @@ func (m *Manager) doStart(rec *record, clusterOwner bool) (exit error) {
defer m.startSem.Release(1)

transaction := controller.NewHandlerTransaction(rec.ctx)

// pre-bootstrap the cluster if it's not already bootstrapped
apimgmtv3.ClusterConditionPreBootstrapped.CreateUnknownIfNotExists(rec.clusterRec)
if capr.PreBootstrap(rec.clusterRec) {
err := clusterController.PreBootstrap(transaction, m.ScaledContext, rec.cluster, rec.clusterRec, m)
if err != nil {
transaction.Rollback()
return err
}
}

if clusterOwner {
if err := clusterController.Register(transaction, m.ScaledContext, rec.cluster, rec.clusterRec, m); err != nil {
transaction.Rollback()
Expand Down
2 changes: 2 additions & 0 deletions pkg/controllers/capr/controllers.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"github.com/rancher/rancher/pkg/features"
"github.com/rancher/rancher/pkg/provisioningv2/image"
"github.com/rancher/rancher/pkg/provisioningv2/kubeconfig"
"github.com/rancher/rancher/pkg/provisioningv2/prebootstrap"
"github.com/rancher/rancher/pkg/provisioningv2/systeminfo"
"github.com/rancher/rancher/pkg/settings"
"github.com/rancher/rancher/pkg/wrangler"
Expand All @@ -30,6 +31,7 @@ func Register(ctx context.Context, clients *wrangler.Context, kubeconfigManager
ReleaseData: capr.GetKDMReleaseData,
SystemAgentImage: settings.SystemAgentInstallerImage.Get,
SystemPodLabelSelectors: systeminfo.NewRetriever(clients).GetSystemPodLabelSelectors,
GetBootstrapManifests: prebootstrap.NewRetriever(clients).GeneratePreBootstrapClusterAgentManifest,
})
if features.MCM.Enabled() {
dynamicschema.Register(ctx, clients)
Expand Down
10 changes: 10 additions & 0 deletions pkg/controllers/management/clusterconnected/clusterconnected.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (

"github.com/rancher/rancher/pkg/api/steve/proxy"
v3 "github.com/rancher/rancher/pkg/apis/management.cattle.io/v3"
"github.com/rancher/rancher/pkg/capr"
managementcontrollers "github.com/rancher/rancher/pkg/generated/controllers/management.cattle.io/v3"
"github.com/rancher/rancher/pkg/wrangler"
"github.com/rancher/remotedialer"
Expand Down Expand Up @@ -103,6 +104,15 @@ func (c *checker) checkCluster(cluster *v3.Cluster) error {
return nil
}

// RKE2: wait to update the connected condition until it is pre-bootstrapped
if capr.PreBootstrap(cluster) &&
cluster.Annotations["provisioning.cattle.io/administrated"] == "true" &&
cluster.Name != "local" {
// overriding it to be disconnected until bootstrapping is done
logrus.Debugf("[pre-bootstrap][%v] Waiting for cluster to be pre-bootstrapped - not marking agent connected", cluster.Name)
return c.updateClusterConnectedCondition(cluster, false)
}

return c.updateClusterConnectedCondition(cluster, hasSession)
}

Expand Down
5 changes: 4 additions & 1 deletion pkg/controllers/management/clusterdeploy/clusterdeploy.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ import (
"sync"
"time"

"github.com/rancher/rancher/pkg/capr"

"github.com/pkg/errors"
"github.com/rancher/norman/types"
apimgmtv3 "github.com/rancher/rancher/pkg/apis/management.cattle.io/v3"
Expand Down Expand Up @@ -458,7 +460,8 @@ func (cd *clusterDeploy) getYAML(cluster *apimgmtv3.Cluster, agentImage, authIma
}

buf := &bytes.Buffer{}
err = systemtemplate.SystemTemplate(buf, agentImage, authImage, cluster.Name, token, url, cluster.Spec.WindowsPreferedCluster,
err = systemtemplate.SystemTemplate(buf, agentImage, authImage, cluster.Name, token, url,
cluster.Spec.WindowsPreferedCluster, capr.PreBootstrap(cluster),
cluster, features, taints, cd.secretLister)

return buf.Bytes(), err
Expand Down
18 changes: 17 additions & 1 deletion pkg/controllers/managementuser/controllers.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package managementuser

import (
"context"
"fmt"

apimgmtv3 "github.com/rancher/rancher/pkg/apis/management.cattle.io/v3"
"github.com/rancher/rancher/pkg/controllers/managementlegacy/compose/common"
Expand Down Expand Up @@ -33,7 +34,7 @@ func Register(ctx context.Context, mgmt *config.ScaledContext, cluster *config.U
networkpolicy.Register(ctx, cluster)
nodesyncer.Register(ctx, cluster, kubeConfigGetter)
podsecuritypolicy.Register(ctx, cluster)
secret.Register(ctx, cluster)
secret.Register(ctx, mgmt, cluster, clusterRec)
resourcequota.Register(ctx, cluster)
certsexpiration.Register(ctx, cluster)
windows.Register(ctx, clusterRec, cluster)
Expand Down Expand Up @@ -88,3 +89,18 @@ func RegisterFollower(cluster *config.UserContext) error {
cluster.RBAC.Roles("").Controller()
return nil
}

// PreBootstrap is a list of functions that _need_ to be run before the rest of the controllers start
// the functions should return an error if they fail, and the start of the controllers will be blocked until all of them succeed
func PreBootstrap(ctx context.Context, mgmt *config.ScaledContext, cluster *config.UserContext, clusterRec *apimgmtv3.Cluster, kubeConfigGetter common.KubeConfigGetter) error {
if cluster.ClusterName == "local" {
return nil
}

err := secret.Bootstrap(ctx, mgmt, cluster, clusterRec)
if err != nil {
return fmt.Errorf("failed to bootstrap secrets: %w", err)
}

return nil
}
Loading

0 comments on commit 5e7d7aa

Please sign in to comment.