From 5e7d7aa2c5437ee2bc9e3a9a13ca84e10ea98b48 Mon Sep 17 00:00:00 2001 From: Jacob Lindgren Date: Wed, 9 Oct 2024 10:36:16 -0600 Subject: [PATCH] [backport 2.8] Support synchronizing resources directly to downstream clusters (#46722) (#47356) * Support synchronizing resources directly to downstream clusters (#46722) * conditionally bootstrap controllers based on condition * finish pre-bootstrap templating * change to feature-flag for feature and implement controllers * update tests to expect second call to planner#mgmtClusters.Get(...) * code review comments * refactor boostrapManifests into an InfoFunction --------- * only skip marking connected if pre-bootstrap feature is enabled support replacing strings in synchronized-downstream secrets change provisioning-tests to use rancher-agent:head rather than 2.9-head --------- Co-authored-by: Jake Hyde <33796120+jakefhyde@users.noreply.github.com> --- pkg/agent/rancher/rancher.go | 5 + .../clusterregistrationtokens/import.go | 2 +- .../management.cattle.io/v3/cluster_types.go | 19 +- pkg/capr/common.go | 13 ++ pkg/capr/planner/agent.go | 2 +- pkg/capr/planner/certificaterotation_test.go | 7 +- pkg/capr/planner/planner.go | 26 ++- pkg/capr/planner/planner_test.go | 2 + pkg/clustermanager/manager.go | 12 ++ pkg/controllers/capr/controllers.go | 2 + .../clusterconnected/clusterconnected.go | 10 + .../management/clusterdeploy/clusterdeploy.go | 5 +- pkg/controllers/managementuser/controllers.go | 18 +- .../managementuser/secret/secret.go | 184 +++++++++++++++++- .../managementuser/secret/secret_test.go | 28 +++ pkg/features/feature.go | 6 + pkg/provisioningv2/prebootstrap/resolve.go | 80 ++++++++ pkg/systemtemplate/import.go | 23 ++- pkg/systemtemplate/import_test.go | 3 +- pkg/systemtemplate/template.go | 30 ++- scripts/provisioning-tests | 8 + 21 files changed, 455 insertions(+), 30 deletions(-) create mode 100644 pkg/controllers/managementuser/secret/secret_test.go create mode 100644 pkg/provisioningv2/prebootstrap/resolve.go diff --git a/pkg/agent/rancher/rancher.go b/pkg/agent/rancher/rancher.go index 351aecb7e8b..99c92771ab7 100644 --- a/pkg/agent/rancher/rancher.go +++ b/pkg/agent/rancher/rancher.go @@ -70,6 +70,11 @@ type handler struct { } func (h *handler) startRancher() { + if features.ProvisioningPreBootstrap.Enabled() { + logrus.Debugf("not starting embedded rancher due to pre-bootstrap...") + return + } + clientConfig := kubeconfig.GetNonInteractiveClientConfig("") server, err := rancher.New(h.ctx, clientConfig, &rancher.Options{ HTTPListenPort: 80, diff --git a/pkg/api/norman/customization/clusterregistrationtokens/import.go b/pkg/api/norman/customization/clusterregistrationtokens/import.go index d7660b7fa26..1d9589785ed 100644 --- a/pkg/api/norman/customization/clusterregistrationtokens/import.go +++ b/pkg/api/norman/customization/clusterregistrationtokens/import.go @@ -47,7 +47,7 @@ func (ch *ClusterImport) ClusterImportHandler(resp http.ResponseWriter, req *htt } if err = systemtemplate.SystemTemplate(resp, image.Resolve(settings.AgentImage.Get()), authImage, "", token, url, - false, cluster, nil, nil, nil); err != nil { + false, false, cluster, nil, nil, nil); err != nil { resp.WriteHeader(500) resp.Write([]byte(err.Error())) } diff --git a/pkg/apis/management.cattle.io/v3/cluster_types.go b/pkg/apis/management.cattle.io/v3/cluster_types.go index f2acb59b73a..c744294e97b 100644 --- a/pkg/apis/management.cattle.io/v3/cluster_types.go +++ b/pkg/apis/management.cattle.io/v3/cluster_types.go @@ -40,15 +40,16 @@ const ( ClusterActionSaveAsTemplate = "saveAsTemplate" // ClusterConditionReady Cluster ready to serve API (healthy when true, unhealthy when false) - ClusterConditionReady condition.Cond = "Ready" - ClusterConditionPending condition.Cond = "Pending" - ClusterConditionCertsGenerated condition.Cond = "CertsGenerated" - ClusterConditionEtcd condition.Cond = "etcd" - ClusterConditionProvisioned condition.Cond = "Provisioned" - ClusterConditionUpdated condition.Cond = "Updated" - ClusterConditionUpgraded condition.Cond = "Upgraded" - ClusterConditionWaiting condition.Cond = "Waiting" - ClusterConditionRemoved condition.Cond = "Removed" + ClusterConditionReady condition.Cond = "Ready" + ClusterConditionPending condition.Cond = "Pending" + ClusterConditionCertsGenerated condition.Cond = "CertsGenerated" + ClusterConditionEtcd condition.Cond = "etcd" + ClusterConditionPreBootstrapped condition.Cond = "PreBootstrapped" + ClusterConditionProvisioned condition.Cond = "Provisioned" + ClusterConditionUpdated condition.Cond = "Updated" + ClusterConditionUpgraded condition.Cond = "Upgraded" + ClusterConditionWaiting condition.Cond = "Waiting" + ClusterConditionRemoved condition.Cond = "Removed" // ClusterConditionNoDiskPressure true when all cluster nodes have sufficient disk ClusterConditionNoDiskPressure condition.Cond = "NoDiskPressure" // ClusterConditionNoMemoryPressure true when all cluster nodes have sufficient memory diff --git a/pkg/capr/common.go b/pkg/capr/common.go index df2fe0c7848..ffb64f0f857 100644 --- a/pkg/capr/common.go +++ b/pkg/capr/common.go @@ -17,10 +17,12 @@ import ( "time" "github.com/rancher/channelserver/pkg/model" + v3 "github.com/rancher/rancher/pkg/apis/management.cattle.io/v3" provv1 "github.com/rancher/rancher/pkg/apis/provisioning.cattle.io/v1" rkev1 "github.com/rancher/rancher/pkg/apis/rke.cattle.io/v1" "github.com/rancher/rancher/pkg/apis/rke.cattle.io/v1/plan" "github.com/rancher/rancher/pkg/channelserver" + "github.com/rancher/rancher/pkg/features" capicontrollers "github.com/rancher/rancher/pkg/generated/controllers/cluster.x-k8s.io/v1beta1" rkecontroller "github.com/rancher/rancher/pkg/generated/controllers/rke.cattle.io/v1" "github.com/rancher/rancher/pkg/serviceaccounttoken" @@ -30,6 +32,7 @@ import ( corecontrollers "github.com/rancher/wrangler/v2/pkg/generated/controllers/core/v1" "github.com/rancher/wrangler/v2/pkg/generic" "github.com/rancher/wrangler/v2/pkg/name" + "github.com/sirupsen/logrus" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -639,3 +642,13 @@ func ParseSnapshotClusterSpecOrError(snapshot *rkev1.ETCDSnapshot) (*provv1.Clus } return nil, fmt.Errorf("unable to find and decode snapshot ClusterSpec for snapshot") } + +func PreBootstrap(mgmtCluster *v3.Cluster) bool { + // if the upstream rancher _does not_ have pre-bootstrapping enabled just always return false. + if !features.ProvisioningPreBootstrap.Enabled() { + logrus.Debug("[pre-bootstrap] feature-flag disabled, skipping pre-bootstrap flow") + return false + } + + return !v3.ClusterConditionPreBootstrapped.IsTrue(mgmtCluster) +} diff --git a/pkg/capr/planner/agent.go b/pkg/capr/planner/agent.go index 23e1cdfd811..6162f60b6e9 100644 --- a/pkg/capr/planner/agent.go +++ b/pkg/capr/planner/agent.go @@ -14,7 +14,7 @@ func (p *Planner) generateClusterAgentManifest(controlPlane *rkev1.RKEControlPla return nil, nil } - tokens, err := p.clusterRegistrationTokenCache.GetByIndex(clusterRegToken, controlPlane.Spec.ManagementClusterName) + tokens, err := p.clusterRegistrationTokenCache.GetByIndex(ClusterRegToken, controlPlane.Spec.ManagementClusterName) if err != nil { return nil, err } diff --git a/pkg/capr/planner/certificaterotation_test.go b/pkg/capr/planner/certificaterotation_test.go index 1f57c028e56..a506115bc1b 100644 --- a/pkg/capr/planner/certificaterotation_test.go +++ b/pkg/capr/planner/certificaterotation_test.go @@ -247,7 +247,7 @@ func Test_rotateCertificatesPlan(t *testing.T) { } genericSetup := func(mp *mockPlanner) { - mp.clusterRegistrationTokenCache.EXPECT().GetByIndex(clusterRegToken, "somecluster").Return([]*v3.ClusterRegistrationToken{{Status: v3.ClusterRegistrationTokenStatus{Token: "lol"}}}, nil) + mp.clusterRegistrationTokenCache.EXPECT().GetByIndex(ClusterRegToken, "somecluster").Return([]*v3.ClusterRegistrationToken{{Status: v3.ClusterRegistrationTokenStatus{Token: "lol"}}}, nil) mp.managementClusters.EXPECT().Get("somecluster").Return(&v3.Cluster{}, nil) } @@ -410,8 +410,9 @@ func Test_rotateCertificatesPlan(t *testing.T) { tt := tt t.Run(tt.name, func(t *testing.T) { mockPlanner := newMockPlanner(t, InfoFunctions{ - SystemAgentImage: func() string { return "system-agent" }, - ImageResolver: image.ResolveWithControlPlane, + SystemAgentImage: func() string { return "system-agent" }, + ImageResolver: image.ResolveWithControlPlane, + GetBootstrapManifests: func(plane *rkev1.RKEControlPlane) ([]plan.File, error) { return nil, nil }, }) if tt.setup != nil { tt.setup(mockPlanner) diff --git a/pkg/capr/planner/planner.go b/pkg/capr/planner/planner.go index 33a6eb0f090..7ae25e958d3 100644 --- a/pkg/capr/planner/planner.go +++ b/pkg/capr/planner/planner.go @@ -42,7 +42,7 @@ import ( ) const ( - clusterRegToken = "clusterRegToken" + ClusterRegToken = "clusterRegToken" EtcdSnapshotConfigMapKey = "provisioning-cluster-spec" @@ -135,10 +135,11 @@ type InfoFunctions struct { ReleaseData func(context.Context, *rkev1.RKEControlPlane) *model.Release SystemAgentImage func() string SystemPodLabelSelectors func(plane *rkev1.RKEControlPlane) []string + GetBootstrapManifests func(plane *rkev1.RKEControlPlane) ([]plan.File, error) } func New(ctx context.Context, clients *wrangler.Context, functions InfoFunctions) *Planner { - clients.Mgmt.ClusterRegistrationToken().Cache().AddIndexer(clusterRegToken, func(obj *v3.ClusterRegistrationToken) ([]string, error) { + clients.Mgmt.ClusterRegistrationToken().Cache().AddIndexer(ClusterRegToken, func(obj *v3.ClusterRegistrationToken) ([]string, error) { return []string{obj.Spec.ClusterName}, nil }) store := NewStore(clients.Core.Secret(), @@ -920,6 +921,11 @@ func (p *Planner) reconcile(controlPlane *rkev1.RKEControlPlane, tokensSecret pl return err } + preBootstrapManifests, err := p.retrievalFunctions.GetBootstrapManifests(controlPlane) + if err != nil { + return err + } + for _, r := range reconcilables { logrus.Tracef("[planner] rkecluster %s/%s reconcile tier %s - processing machine entry: %s/%s", controlPlane.Namespace, controlPlane.Name, tierName, r.entry.Machine.Namespace, r.entry.Machine.Name) // we exclude here and not in collect to ensure that include matched at least one node @@ -1013,6 +1019,9 @@ func (p *Planner) reconcile(controlPlane *rkev1.RKEControlPlane, tokensSecret pl } else if !kubeletVersionUpToDate(controlPlane, r.entry.Machine) { outOfSync = append(outOfSync, r.entry.Machine.Name) messages[r.entry.Machine.Name] = append(messages[r.entry.Machine.Name], "waiting for kubelet to update") + } else if isControlPlane(r.entry) && len(preBootstrapManifests) > 0 { + outOfSync = append(outOfSync, r.entry.Machine.Name) + messages[r.entry.Machine.Name] = append(messages[r.entry.Machine.Name], "waiting for cluster pre-bootstrap to complete") } else if isControlPlane(r.entry) && !controlPlane.Status.AgentConnected { // If the control plane nodes are currently being provisioned/updated, then it should be ensured that cluster-agent is connected. // Without the agent connected, the controllers running in Rancher, including CAPI, can't communicate with the downstream cluster. @@ -1073,6 +1082,7 @@ func (p *Planner) generatePlanWithConfigFiles(controlPlane *rkev1.RKEControlPlan nodePlan plan.NodePlan err error ) + if !controlPlane.Spec.UnmanagedConfig { nodePlan, reg, err = p.commonNodePlan(controlPlane, plan.NodePlan{}) if err != nil { @@ -1082,11 +1092,22 @@ func (p *Planner) generatePlanWithConfigFiles(controlPlane *rkev1.RKEControlPlan joinedServer string config map[string]interface{} ) + nodePlan, config, joinedServer, err = p.addConfigFile(nodePlan, controlPlane, entry, tokensSecret, joinServer, reg, renderS3) if err != nil { return nodePlan, config, joinedServer, err } + bootstrapManifests, err := p.retrievalFunctions.GetBootstrapManifests(controlPlane) + if err != nil { + return nodePlan, config, joinedServer, err + } + if len(bootstrapManifests) > 0 { + logrus.Debugf("[planner] adding pre-bootstrap manifests") + nodePlan.Files = append(nodePlan.Files, bootstrapManifests...) + return nodePlan, config, joinedServer, err + } + nodePlan, err = p.addManifests(nodePlan, controlPlane, entry) if err != nil { return nodePlan, config, joinedServer, err @@ -1103,6 +1124,7 @@ func (p *Planner) generatePlanWithConfigFiles(controlPlane *rkev1.RKEControlPlan return nodePlan, config, joinedServer, err } + return plan.NodePlan{}, map[string]interface{}{}, "", nil } diff --git a/pkg/capr/planner/planner_test.go b/pkg/capr/planner/planner_test.go index 76c7672a239..ec087527322 100644 --- a/pkg/capr/planner/planner_test.go +++ b/pkg/capr/planner/planner_test.go @@ -148,6 +148,7 @@ func TestPlanner_addInstruction(t *testing.T) { entry := createTestPlanEntry(tt.args.os) planner.retrievalFunctions.SystemAgentImage = func() string { return "system-agent" } planner.retrievalFunctions.ImageResolver = image.ResolveWithControlPlane + planner.retrievalFunctions.GetBootstrapManifests = func(cp *rkev1.RKEControlPlane) ([]plan.File, error) { return nil, nil } // act p, err := planner.addInstallInstructionWithRestartStamp(plan.NodePlan{}, controlPlane, entry) @@ -518,6 +519,7 @@ func Test_getInstallerImage(t *testing.T) { var planner Planner planner.retrievalFunctions.ImageResolver = image.ResolveWithControlPlane planner.retrievalFunctions.SystemAgentImage = func() string { return "rancher/system-agent-installer-" } + planner.retrievalFunctions.GetBootstrapManifests = func(cp *rkev1.RKEControlPlane) ([]plan.File, error) { return nil, nil } assert.Equal(t, tt.expected, planner.getInstallerImage(tt.controlPlane)) }) diff --git a/pkg/clustermanager/manager.go b/pkg/clustermanager/manager.go index 0a54d8aeeb4..1d3db574198 100644 --- a/pkg/clustermanager/manager.go +++ b/pkg/clustermanager/manager.go @@ -17,6 +17,7 @@ import ( "github.com/rancher/norman/httperror" "github.com/rancher/norman/types" apimgmtv3 "github.com/rancher/rancher/pkg/apis/management.cattle.io/v3" + "github.com/rancher/rancher/pkg/capr" "github.com/rancher/rancher/pkg/clusterrouter" "github.com/rancher/rancher/pkg/controllers/management/secretmigrator" clusterController "github.com/rancher/rancher/pkg/controllers/managementuser" @@ -219,6 +220,17 @@ func (m *Manager) doStart(rec *record, clusterOwner bool) (exit error) { defer m.startSem.Release(1) transaction := controller.NewHandlerTransaction(rec.ctx) + + // pre-bootstrap the cluster if it's not already bootstrapped + apimgmtv3.ClusterConditionPreBootstrapped.CreateUnknownIfNotExists(rec.clusterRec) + if capr.PreBootstrap(rec.clusterRec) { + err := clusterController.PreBootstrap(transaction, m.ScaledContext, rec.cluster, rec.clusterRec, m) + if err != nil { + transaction.Rollback() + return err + } + } + if clusterOwner { if err := clusterController.Register(transaction, m.ScaledContext, rec.cluster, rec.clusterRec, m); err != nil { transaction.Rollback() diff --git a/pkg/controllers/capr/controllers.go b/pkg/controllers/capr/controllers.go index daabbf1749d..53a1f8dc81b 100644 --- a/pkg/controllers/capr/controllers.go +++ b/pkg/controllers/capr/controllers.go @@ -19,6 +19,7 @@ import ( "github.com/rancher/rancher/pkg/features" "github.com/rancher/rancher/pkg/provisioningv2/image" "github.com/rancher/rancher/pkg/provisioningv2/kubeconfig" + "github.com/rancher/rancher/pkg/provisioningv2/prebootstrap" "github.com/rancher/rancher/pkg/provisioningv2/systeminfo" "github.com/rancher/rancher/pkg/settings" "github.com/rancher/rancher/pkg/wrangler" @@ -30,6 +31,7 @@ func Register(ctx context.Context, clients *wrangler.Context, kubeconfigManager ReleaseData: capr.GetKDMReleaseData, SystemAgentImage: settings.SystemAgentInstallerImage.Get, SystemPodLabelSelectors: systeminfo.NewRetriever(clients).GetSystemPodLabelSelectors, + GetBootstrapManifests: prebootstrap.NewRetriever(clients).GeneratePreBootstrapClusterAgentManifest, }) if features.MCM.Enabled() { dynamicschema.Register(ctx, clients) diff --git a/pkg/controllers/management/clusterconnected/clusterconnected.go b/pkg/controllers/management/clusterconnected/clusterconnected.go index df61df1779e..b6788ae3a73 100644 --- a/pkg/controllers/management/clusterconnected/clusterconnected.go +++ b/pkg/controllers/management/clusterconnected/clusterconnected.go @@ -9,6 +9,7 @@ import ( "github.com/rancher/rancher/pkg/api/steve/proxy" v3 "github.com/rancher/rancher/pkg/apis/management.cattle.io/v3" + "github.com/rancher/rancher/pkg/capr" managementcontrollers "github.com/rancher/rancher/pkg/generated/controllers/management.cattle.io/v3" "github.com/rancher/rancher/pkg/wrangler" "github.com/rancher/remotedialer" @@ -103,6 +104,15 @@ func (c *checker) checkCluster(cluster *v3.Cluster) error { return nil } + // RKE2: wait to update the connected condition until it is pre-bootstrapped + if capr.PreBootstrap(cluster) && + cluster.Annotations["provisioning.cattle.io/administrated"] == "true" && + cluster.Name != "local" { + // overriding it to be disconnected until bootstrapping is done + logrus.Debugf("[pre-bootstrap][%v] Waiting for cluster to be pre-bootstrapped - not marking agent connected", cluster.Name) + return c.updateClusterConnectedCondition(cluster, false) + } + return c.updateClusterConnectedCondition(cluster, hasSession) } diff --git a/pkg/controllers/management/clusterdeploy/clusterdeploy.go b/pkg/controllers/management/clusterdeploy/clusterdeploy.go index 3369e35e751..c8c36e7c80d 100644 --- a/pkg/controllers/management/clusterdeploy/clusterdeploy.go +++ b/pkg/controllers/management/clusterdeploy/clusterdeploy.go @@ -10,6 +10,8 @@ import ( "sync" "time" + "github.com/rancher/rancher/pkg/capr" + "github.com/pkg/errors" "github.com/rancher/norman/types" apimgmtv3 "github.com/rancher/rancher/pkg/apis/management.cattle.io/v3" @@ -458,7 +460,8 @@ func (cd *clusterDeploy) getYAML(cluster *apimgmtv3.Cluster, agentImage, authIma } buf := &bytes.Buffer{} - err = systemtemplate.SystemTemplate(buf, agentImage, authImage, cluster.Name, token, url, cluster.Spec.WindowsPreferedCluster, + err = systemtemplate.SystemTemplate(buf, agentImage, authImage, cluster.Name, token, url, + cluster.Spec.WindowsPreferedCluster, capr.PreBootstrap(cluster), cluster, features, taints, cd.secretLister) return buf.Bytes(), err diff --git a/pkg/controllers/managementuser/controllers.go b/pkg/controllers/managementuser/controllers.go index daf2029ba6b..6a0f7a6a58d 100644 --- a/pkg/controllers/managementuser/controllers.go +++ b/pkg/controllers/managementuser/controllers.go @@ -2,6 +2,7 @@ package managementuser import ( "context" + "fmt" apimgmtv3 "github.com/rancher/rancher/pkg/apis/management.cattle.io/v3" "github.com/rancher/rancher/pkg/controllers/managementlegacy/compose/common" @@ -33,7 +34,7 @@ func Register(ctx context.Context, mgmt *config.ScaledContext, cluster *config.U networkpolicy.Register(ctx, cluster) nodesyncer.Register(ctx, cluster, kubeConfigGetter) podsecuritypolicy.Register(ctx, cluster) - secret.Register(ctx, cluster) + secret.Register(ctx, mgmt, cluster, clusterRec) resourcequota.Register(ctx, cluster) certsexpiration.Register(ctx, cluster) windows.Register(ctx, clusterRec, cluster) @@ -88,3 +89,18 @@ func RegisterFollower(cluster *config.UserContext) error { cluster.RBAC.Roles("").Controller() return nil } + +// PreBootstrap is a list of functions that _need_ to be run before the rest of the controllers start +// the functions should return an error if they fail, and the start of the controllers will be blocked until all of them succeed +func PreBootstrap(ctx context.Context, mgmt *config.ScaledContext, cluster *config.UserContext, clusterRec *apimgmtv3.Cluster, kubeConfigGetter common.KubeConfigGetter) error { + if cluster.ClusterName == "local" { + return nil + } + + err := secret.Bootstrap(ctx, mgmt, cluster, clusterRec) + if err != nil { + return fmt.Errorf("failed to bootstrap secrets: %w", err) + } + + return nil +} diff --git a/pkg/controllers/managementuser/secret/secret.go b/pkg/controllers/managementuser/secret/secret.go index 40cb9b669f0..cb720eb103c 100644 --- a/pkg/controllers/managementuser/secret/secret.go +++ b/pkg/controllers/managementuser/secret/secret.go @@ -1,14 +1,19 @@ package secret import ( + "bytes" "context" "fmt" "reflect" + "slices" "strings" + "time" "k8s.io/apimachinery/pkg/runtime" "github.com/rancher/norman/controller" + apimgmtv3 "github.com/rancher/rancher/pkg/apis/management.cattle.io/v3" + "github.com/rancher/rancher/pkg/capr" v1 "github.com/rancher/rancher/pkg/generated/norman/core/v1" v3 "github.com/rancher/rancher/pkg/generated/norman/management.cattle.io/v3" "github.com/rancher/rancher/pkg/types/config" @@ -32,6 +37,18 @@ const ( update = "update" projectNamespaceAnnotation = "management.cattle.io/system-namespace" userSecretAnnotation = "secret.user.cattle.io/secret" + + syncAnnotation = "provisioning.cattle.io/sync" + syncPreBootstrapAnnotation = "provisioning.cattle.io/sync-bootstrap" + syncNamespaceAnnotation = "provisioning.cattle.io/sync-target-namespace" + syncNameAnnotation = "provisioning.cattle.io/sync-target-name" + syncedAtAnnotation = "provisioning.cattle.io/synced-at" +) + +var ( + // as of right now kube-system is the only appropriate target for this functionality. + // also included is "" to support copying into the same ns the secret is in + approvedPreBootstrapTargetNamespaces = []string{"kube-system", ""} ) type Controller struct { @@ -43,7 +60,25 @@ type Controller struct { clusterName string } -func Register(ctx context.Context, cluster *config.UserContext) { +type ResourceSyncController struct { + upstreamSecrets v1.SecretInterface + downstreamSecrets v1.SecretInterface + clusterName string + clusterId string +} + +func Bootstrap(ctx context.Context, mgmt *config.ScaledContext, cluster *config.UserContext, clusterRec *apimgmtv3.Cluster) error { + c := &ResourceSyncController{ + upstreamSecrets: mgmt.Core.Secrets(clusterRec.Spec.FleetWorkspaceName), + downstreamSecrets: cluster.Core.Secrets(""), + clusterName: clusterRec.Spec.DisplayName, + clusterId: clusterRec.Name, + } + + return c.bootstrap(mgmt.Management.Clusters(""), clusterRec) +} + +func Register(ctx context.Context, mgmt *config.ScaledContext, cluster *config.UserContext, clusterRec *apimgmtv3.Cluster) { starter := cluster.DeferredStart(ctx, func(ctx context.Context) error { registerDeferred(ctx, cluster) return nil @@ -69,6 +104,153 @@ func Register(ctx context.Context, cluster *config.UserContext) { return obj, nil }) + + resourceSyncController := &ResourceSyncController{ + upstreamSecrets: mgmt.Core.Secrets(clusterRec.Spec.FleetWorkspaceName), + downstreamSecrets: cluster.Core.Secrets(""), + clusterName: clusterRec.Spec.DisplayName, + } + + resourceSyncController.upstreamSecrets.AddHandler(ctx, "secret-resource-synced", resourceSyncController.sync) +} + +func (c *ResourceSyncController) bootstrap(mgmtClusterClient v3.ClusterInterface, mgmtCluster *apimgmtv3.Cluster) error { + secrets, err := c.upstreamSecrets.List(metav1.ListOptions{}) + if err != nil { + return fmt.Errorf("failed to list secrets in %v namespace: %w", mgmtCluster.Spec.FleetWorkspaceName, err) + } + + logrus.Debugf("[pre-bootstrap][secrets] looking for secrets-to synchronize to cluster %v", c.clusterName) + + for _, sec := range secrets.Items { + s := &sec + + if !c.bootstrapSyncable(s) { + continue + } + + logrus.Debugf("[pre-bootstrap-sync][secrets] syncing secret %v/%v to cluster %v", s.Namespace, s.Name, c.clusterName) + + _, err = c.sync("", s) + if err != nil { + return fmt.Errorf("failed to synchronize secret %v/%v to cluster %v: %w", s.Namespace, s.Name, c.clusterName, err) + } + + logrus.Debugf("[pre-boostrap-sync][secret] successfully synced secret %v/%v to downstream cluster %v", s.Namespace, s.Name, c.clusterName) + } + + apimgmtv3.ClusterConditionPreBootstrapped.True(mgmtCluster) + _, err = mgmtClusterClient.Update(mgmtCluster) + if err != nil { + return fmt.Errorf("failed to update cluster bootstrap condition for %v: %w", c.clusterName, err) + } + + return nil +} + +func (c *ResourceSyncController) syncable(obj *corev1.Secret) bool { + // no sync annotations, we don't care about this secret + if obj.Annotations[syncAnnotation] == "" && obj.Annotations[syncPreBootstrapAnnotation] == "" { + return false + } + + // if secret is authorized to be synchronized to the cluster + if !slices.Contains(strings.Split(obj.Annotations[capr.AuthorizedObjectAnnotation], ","), c.clusterName) { + return false + } + + // if the secret is not in a namespace that we are allowed to sync to + if !slices.Contains(approvedPreBootstrapTargetNamespaces, obj.Annotations[syncNamespaceAnnotation]) { + return false + } + + return true +} + +func (c *ResourceSyncController) bootstrapSyncable(obj *corev1.Secret) bool { + // only difference between sync and bootstrapSync is requiring the boostrap sync annotation to be set to "true" + return c.syncable(obj) && obj.Annotations[syncPreBootstrapAnnotation] == "true" +} + +func (c *ResourceSyncController) injectClusterIdIntoSecretData(sec *corev1.Secret) *corev1.Secret { + for key, value := range sec.Data { + if bytes.Contains(value, []byte("{{clusterId}}")) { + sec.Data[key] = bytes.ReplaceAll(value, []byte("{{clusterId}}"), []byte(c.clusterId)) + } + } + + return sec +} + +func (c *ResourceSyncController) removeClusterIdFromSecretData(sec *corev1.Secret) *corev1.Secret { + for key, value := range sec.Data { + if bytes.Contains(value, []byte(c.clusterId)) { + sec.Data[key] = bytes.ReplaceAll(value, []byte(c.clusterId), []byte("{{clusterId}}")) + } + } + + return sec +} + +func (c *ResourceSyncController) sync(key string, obj *corev1.Secret) (runtime.Object, error) { + if obj == nil { + return nil, nil + } + + if !c.syncable(obj) { + return obj, nil + } + + name := obj.Annotations[syncNameAnnotation] + if name == "" { + name = obj.Name + } + ns := obj.Annotations[syncNamespaceAnnotation] + if ns == "" { + ns = obj.Namespace + } + + logrus.Debugf("[resource-sync][secret] synchronizing %v/%v to %v/%v for cluster %v", obj.Namespace, obj.Name, ns, name, c.clusterName) + + var targetSecret *corev1.Secret + var err error + if targetSecret, err = c.downstreamSecrets.GetNamespaced(ns, name, metav1.GetOptions{}); err != nil && !errors.IsNotFound(err) { + return nil, fmt.Errorf("failed to get downstream secret %v/%v in cluster %v: %w", ns, name, c.clusterName, err) + } + + if targetSecret == nil || errors.IsNotFound(err) { + logrus.Debugf("[resource-sync][secret] creating secret %v/%v in cluster %v", ns, name, c.clusterName) + + newSecret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: ns}, + Data: obj.Data, + } + + newSecret = c.injectClusterIdIntoSecretData(newSecret) + + _, err = c.downstreamSecrets.Create(newSecret) + if err != nil { + return nil, fmt.Errorf("failed to create secret %v/%v in cluster %v: %w", ns, name, c.clusterName, err) + } + } else if !reflect.DeepEqual(c.removeClusterIdFromSecretData(targetSecret).Data, obj.Data) { + logrus.Debugf("[resource-sync][secret] updating secret %v/%v in cluster %v", ns, name, c.clusterName) + + targetSecret.Data = obj.Data + targetSecret = c.injectClusterIdIntoSecretData(targetSecret) + + _, err = c.downstreamSecrets.Update(targetSecret) + if err != nil { + return nil, fmt.Errorf("failed to update secret %v/%v in cluster %v: %w", ns, name, c.clusterName, err) + } + } else { + logrus.Debugf("[resource-sync][secret] skipping downstream update - contents are the same") + return obj, nil + } + + logrus.Debugf("[resource-sync][secret] successfully synchronized secret %v/%v to %v/%v for cluster %v", obj.Namespace, obj.Name, ns, name, c.clusterName) + + obj.Annotations[syncedAtAnnotation] = time.Now().Format(time.RFC3339) + return obj, nil } func registerDeferred(ctx context.Context, cluster *config.UserContext) { diff --git a/pkg/controllers/managementuser/secret/secret_test.go b/pkg/controllers/managementuser/secret/secret_test.go new file mode 100644 index 00000000000..b9c47b250f0 --- /dev/null +++ b/pkg/controllers/managementuser/secret/secret_test.go @@ -0,0 +1,28 @@ +package secret + +import ( + "testing" + + "github.com/stretchr/testify/assert" + corev1 "k8s.io/api/core/v1" +) + +func TestInjectClusterIdIntoSecretData(t *testing.T) { + sec := &corev1.Secret{Data: map[string][]byte{ + "bazqux": []byte("{{clusterId}}"), + }} + + c := ResourceSyncController{clusterId: "foobar"} + + assert.Equal(t, c.injectClusterIdIntoSecretData(sec).Data["bazqux"], []byte("foobar")) +} + +func TestRemoveClusterIdFromSecretData(t *testing.T) { + sec := &corev1.Secret{Data: map[string][]byte{ + "bazqux": []byte("foobar"), + }} + + c := ResourceSyncController{clusterId: "foobar"} + + assert.Equal(t, c.removeClusterIdFromSecretData(sec).Data["bazqux"], []byte("{{clusterId}}")) +} diff --git a/pkg/features/feature.go b/pkg/features/feature.go index 67e7cbc705d..c4c55870dec 100644 --- a/pkg/features/feature.go +++ b/pkg/features/feature.go @@ -125,6 +125,12 @@ var ( true, true, true) + ProvisioningPreBootstrap = newFeature( + "provisioningprebootstrap", + "Support running pre-bootstrap workloads on downstream clusters", + false, + true, + true) ) type Feature struct { diff --git a/pkg/provisioningv2/prebootstrap/resolve.go b/pkg/provisioningv2/prebootstrap/resolve.go new file mode 100644 index 00000000000..a23cc786749 --- /dev/null +++ b/pkg/provisioningv2/prebootstrap/resolve.go @@ -0,0 +1,80 @@ +package prebootstrap + +import ( + "encoding/base64" + "fmt" + "sort" + + rkev1 "github.com/rancher/rancher/pkg/apis/rke.cattle.io/v1" + "github.com/rancher/rancher/pkg/apis/rke.cattle.io/v1/plan" + "github.com/rancher/rancher/pkg/capr" + "github.com/rancher/rancher/pkg/capr/planner" + mgmtcontrollers "github.com/rancher/rancher/pkg/generated/controllers/management.cattle.io/v3" + "github.com/rancher/rancher/pkg/systemtemplate" + "github.com/rancher/rancher/pkg/wrangler" + corecontrollers "github.com/rancher/wrangler/v2/pkg/generated/controllers/core/v1" + "github.com/sirupsen/logrus" +) + +func NewRetriever(clients *wrangler.Context) *Retriever { + return &Retriever{ + mgmtClusterCache: clients.Mgmt.Cluster().Cache(), + clusterRegistrationTokenCache: clients.Mgmt.ClusterRegistrationToken().Cache(), + secretCache: clients.Core.Secret().Cache(), + } + +} + +type Retriever struct { + mgmtClusterCache mgmtcontrollers.ClusterCache + clusterRegistrationTokenCache mgmtcontrollers.ClusterRegistrationTokenCache + secretCache corecontrollers.SecretCache +} + +func (r *Retriever) GeneratePreBootstrapClusterAgentManifest(controlPlane *rkev1.RKEControlPlane) ([]plan.File, error) { + shouldDo, err := r.preBootstrapCluster(controlPlane) + if !shouldDo { + return nil, nil + } + + tokens, err := r.clusterRegistrationTokenCache.GetByIndex(planner.ClusterRegToken, controlPlane.Spec.ManagementClusterName) + if err != nil { + return nil, err + } + + if len(tokens) == 0 { + return nil, fmt.Errorf("no cluster registration token found") + } + + sort.Slice(tokens, func(i, j int) bool { + return tokens[i].Name < tokens[j].Name + }) + + mgmtCluster, err := r.mgmtClusterCache.Get(controlPlane.Spec.ManagementClusterName) + if err != nil { + return nil, fmt.Errorf("failed to get mgmt Cluster %v: %w", controlPlane.Spec.ManagementClusterName, err) + } + + // passing in nil for taints since prebootstrapping involves specific taints to uninitialized nodes + data, err := systemtemplate.ForCluster(mgmtCluster, tokens[0].Status.Token, nil, r.secretCache) + if err != nil { + return nil, fmt.Errorf("failed to generate pre-bootstrap cluster-agent manifest: %w", err) + } + + return []plan.File{{ + Content: base64.StdEncoding.EncodeToString(data), + Path: fmt.Sprintf("/var/lib/rancher/%s/server/manifests/rancher/cluster-agent.yaml", capr.GetRuntime(controlPlane.Spec.KubernetesVersion)), + Dynamic: true, + Minor: true, + }}, nil +} + +func (r *Retriever) preBootstrapCluster(cp *rkev1.RKEControlPlane) (bool, error) { + mgmtCluster, err := r.mgmtClusterCache.Get(cp.Spec.ManagementClusterName) + if err != nil { + logrus.Warnf("[pre-bootstrap] failed to get management cluster [%v] for rke control plane [%v]: %v", cp.Spec.ManagementClusterName, cp.Name, err) + return false, fmt.Errorf("failed to get mgmt Cluster %v: %w", cp.Spec.ManagementClusterName, err) + } + + return capr.PreBootstrap(mgmtCluster), nil +} diff --git a/pkg/systemtemplate/import.go b/pkg/systemtemplate/import.go index a7adde01057..1cc913c7c9b 100644 --- a/pkg/systemtemplate/import.go +++ b/pkg/systemtemplate/import.go @@ -14,6 +14,7 @@ import ( "github.com/Masterminds/sprig/v3" apimgmtv3 "github.com/rancher/rancher/pkg/apis/management.cattle.io/v3" + "github.com/rancher/rancher/pkg/capr" util "github.com/rancher/rancher/pkg/cluster" "github.com/rancher/rancher/pkg/features" v1 "github.com/rancher/rancher/pkg/generated/norman/core/v1" @@ -41,6 +42,7 @@ type context struct { Namespace string URLPlain string IsWindowsCluster bool + IsPreBootstrap bool IsRKE bool PrivateRegistryConfig string Tolerations string @@ -73,7 +75,7 @@ func toFeatureString(features map[string]bool) string { return buf.String() } -func SystemTemplate(resp io.Writer, agentImage, authImage, namespace, token, url string, isWindowsCluster bool, +func SystemTemplate(resp io.Writer, agentImage, authImage, namespace, token, url string, isWindowsCluster bool, isPreBootstrap bool, cluster *apimgmtv3.Cluster, features map[string]bool, taints []corev1.Taint, secretLister v1.SecretLister) error { var tolerations, agentEnvVars, agentAppendTolerations, agentAffinity, agentResourceRequirements string d := md5.Sum([]byte(url + token + namespace)) @@ -155,6 +157,7 @@ func SystemTemplate(resp io.Writer, agentImage, authImage, namespace, token, url Namespace: base64.StdEncoding.EncodeToString([]byte(namespace)), URLPlain: url, IsWindowsCluster: isWindowsCluster, + IsPreBootstrap: isPreBootstrap, IsRKE: cluster != nil && cluster.Status.Driver == apimgmtv3.ClusterDriverRKE, PrivateRegistryConfig: registryConfig, Tolerations: tolerations, @@ -169,13 +172,14 @@ func SystemTemplate(resp io.Writer, agentImage, authImage, namespace, token, url func GetDesiredFeatures(cluster *apimgmtv3.Cluster) map[string]bool { return map[string]bool{ - features.MCM.Name(): false, - features.MCMAgent.Name(): true, - features.Fleet.Name(): false, - features.RKE2.Name(): false, - features.ProvisioningV2.Name(): false, - features.EmbeddedClusterAPI.Name(): false, - features.MonitoringV1.Name(): cluster.Spec.EnableClusterMonitoring, + features.MCM.Name(): false, + features.MCMAgent.Name(): true, + features.Fleet.Name(): false, + features.RKE2.Name(): false, + features.ProvisioningV2.Name(): false, + features.EmbeddedClusterAPI.Name(): false, + features.MonitoringV1.Name(): cluster.Spec.EnableClusterMonitoring, + features.ProvisioningPreBootstrap.Name(): capr.PreBootstrap(cluster), } } @@ -183,7 +187,8 @@ func ForCluster(cluster *apimgmtv3.Cluster, token string, taints []corev1.Taint, buf := &bytes.Buffer{} err := SystemTemplate(buf, GetDesiredAgentImage(cluster), GetDesiredAuthImage(cluster), - cluster.Name, token, settings.ServerURL.Get(), cluster.Spec.WindowsPreferedCluster, + cluster.Name, token, settings.ServerURL.Get(), + cluster.Spec.WindowsPreferedCluster, capr.PreBootstrap(cluster), cluster, GetDesiredFeatures(cluster), taints, secretLister) return buf.Bytes(), err } diff --git a/pkg/systemtemplate/import_test.go b/pkg/systemtemplate/import_test.go index cc85522eaca..67820e5cba3 100644 --- a/pkg/systemtemplate/import_test.go +++ b/pkg/systemtemplate/import_test.go @@ -53,6 +53,7 @@ func TestSystemTemplate_systemtemplate(t *testing.T) { token string url string isWindowsCluster bool + isPreBootstrap bool features map[string]bool taints []corev1.Taint secrets map[string]*corev1.Secret @@ -183,7 +184,7 @@ func TestSystemTemplate_systemtemplate(t *testing.T) { mockSecrets = tt.secrets var b bytes.Buffer - err := SystemTemplate(&b, tt.agentImage, tt.authImage, tt.namespace, tt.token, tt.url, tt.isWindowsCluster, tt.cluster, tt.features, tt.taints, secretLister) + err := SystemTemplate(&b, tt.agentImage, tt.authImage, tt.namespace, tt.token, tt.url, tt.isWindowsCluster, tt.isPreBootstrap, tt.cluster, tt.features, tt.taints, secretLister) assert.Nil(t, err) decoder := scheme.Codecs.UniversalDeserializer() diff --git a/pkg/systemtemplate/template.go b/pkg/systemtemplate/template.go index 757ecb4dcc2..52ba51a44d2 100644 --- a/pkg/systemtemplate/template.go +++ b/pkg/systemtemplate/template.go @@ -111,7 +111,11 @@ rules: apiVersion: apps/v1 kind: Deployment metadata: + {{- if .IsPreBootstrap }} + name: cattle-cluster-agent-bootstrap + {{- else }} name: cattle-cluster-agent + {{- end }} namespace: cattle-system annotations: management.cattle.io/scale-available: "2" @@ -130,7 +134,24 @@ spec: {{- end }} serviceAccountName: cattle tolerations: - {{- if .Tolerations }} + {{- if .IsPreBootstrap }} + # tolerations wrt running on the pre-bootstrapped node + - effect: NoSchedule + key: node-role.kubernetes.io/controlplane + value: "true" + - effect: NoSchedule + key: "node-role.kubernetes.io/control-plane" + operator: "Exists" + - effect: NoSchedule + key: "node-role.kubernetes.io/master" + operator: "Exists" + - effect: NoExecute + key: "node-role.kubernetes.io/etcd" + operator: "Exists" + - effect: NoSchedule + key: node.cloudprovider.kubernetes.io/uninitialized + operator: "Exists" + {{- else if .Tolerations }} # Tolerations added based on found taints on controlplane nodes {{ .Tolerations | indent 6 }} {{- else }} @@ -172,6 +193,9 @@ spec: value: "true" - name: CATTLE_CLUSTER_REGISTRY value: "{{.ClusterRegistry}}" + {{- if .IsPreBootstrap }} + # since we're on the host network, talk to the apiserver over localhost + {{- end }} {{- if .AgentEnvVars}} {{ .AgentEnvVars | indent 10 }} {{- end }} @@ -184,6 +208,10 @@ spec: imagePullSecrets: - name: cattle-private-registry {{- end }} + {{- if .IsPreBootstrap }} + # use hostNetwork since the CNI (and coreDNS) is not up yet + hostNetwork: true + {{- end }} volumes: - name: cattle-credentials secret: diff --git a/scripts/provisioning-tests b/scripts/provisioning-tests index 1ddb94e5c8d..dfc5d0ab31e 100755 --- a/scripts/provisioning-tests +++ b/scripts/provisioning-tests @@ -41,6 +41,14 @@ export SOME_K8S_VERSION=${SOME_K8S_VERSION} export TB_ORG=${TB_ORG} export CATTLE_CHART_DEFAULT_URL=${CATTLE_CHART_DEFAULT_URL} +# Tell Rancher to use the recently-built Rancher cluster agent image. This image is built as part of CI and will be +# copied to the in-cluster registry during test setup below. +source ./scripts/version +# export CATTLE_AGENT_IMAGE="rancher/rancher-agent:${AGENT_TAG}" +# using :head for now while figuring out why loading from the docker cache isn't working +export CATTLE_AGENT_IMAGE="rancher/rancher-agent:v2.8-head" +echo "Using Rancher agent image $CATTLE_AGENT_IMAGE" + eval "$(grep '^ENV CATTLE_SYSTEM_AGENT' package/Dockerfile | awk '{print "export " $2 "=" $3}')" eval "$(grep '^ENV CATTLE_WINS_AGENT' package/Dockerfile | awk '{print "export " $2 "=" $3}')" eval "$(grep '^ENV CATTLE_CSI_PROXY_AGENT' package/Dockerfile | awk '{print "export " $2 "=" $3}')"