diff --git a/.github/scripts/deploy-fleet.sh b/.github/scripts/deploy-fleet.sh index 37982117bc..189b8981be 100755 --- a/.github/scripts/deploy-fleet.sh +++ b/.github/scripts/deploy-fleet.sh @@ -22,6 +22,10 @@ else agentTag="dev" fi +host=$(kubectl get node k3d-upstream-server-0 -o jsonpath='{.status.addresses[?(@.type=="InternalIP")].address}') +ca=$( kubectl config view --flatten -o jsonpath='{.clusters[?(@.name == "k3d-upstream")].cluster.certificate-authority-data}' | base64 -d ) +server="https://$host:6443" + eventually helm upgrade --install fleet-crd charts/fleet-crd \ --atomic \ -n cattle-fleet-system \ @@ -34,7 +38,9 @@ eventually helm upgrade --install fleet charts/fleet \ --set image.tag="$fleetTag" \ --set agentImage.repository="$agentRepo" \ --set agentImage.tag="$agentTag" \ - --set agentImage.imagePullPolicy=IfNotPresent + --set agentImage.imagePullPolicy=IfNotPresent \ + --set apiServerCA="$ca" \ + --set apiServerURL="$server" \ # wait for controller and agent rollout kubectl -n cattle-fleet-system rollout status deploy/fleet-controller diff --git a/.github/workflows/e2e-ci.yml b/.github/workflows/e2e-ci.yml index 2c02636077..b12883022c 100644 --- a/.github/workflows/e2e-ci.yml +++ b/.github/workflows/e2e-ci.yml @@ -76,7 +76,7 @@ jobs: # k3d will automatically create a network named k3d-test-cluster-1 with the range 172.18.0.0/16 with: k3d-version: ${{ env.SETUP_K3D_VERSION }} - cluster-name: "k3s-default" + cluster-name: "upstream" args: >- --agents 1 --network "nw01" @@ -84,7 +84,7 @@ jobs: - name: Import Images Into k3d run: | - ./.github/scripts/k3d-import-retry.sh rancher/fleet:dev rancher/fleet-agent:dev nginx-git:test + ./.github/scripts/k3d-import-retry.sh rancher/fleet:dev rancher/fleet-agent:dev nginx-git:test -c upstream - name: Set Up Tmate Debug Session if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.enable_tmate == 'true' }} diff --git a/.github/workflows/e2e-multicluster-ci.yml b/.github/workflows/e2e-multicluster-ci.yml index 41e36e6bad..3b37852ff0 100644 --- a/.github/workflows/e2e-multicluster-ci.yml +++ b/.github/workflows/e2e-multicluster-ci.yml @@ -71,7 +71,7 @@ jobs: --agents 1 --network "nw01" - - name: Provision k3d Downstream Cluster + name: Provision k3d Downstream Cluster for agent-initiated registration uses: AbsaOSS/k3d-action@v2 with: k3d-version: ${{ env.SETUP_K3D_VERSION }} @@ -82,11 +82,24 @@ jobs: --api-port 6644 --agents 1 --network "nw01" + - + name: Provision k3d Downstream Cluster for manager-initiated registration + uses: AbsaOSS/k3d-action@v2 + with: + k3d-version: ${{ env.SETUP_K3D_VERSION }} + cluster-name: "managed-downstream" + args: >- + -p "82:80@agent:0:direct" + -p "445:443@agent:0:direct" + --api-port 6645 + --agents 1 + --network "nw01" - name: Import Images Into k3d run: | ./.github/scripts/k3d-import-retry.sh rancher/fleet:dev rancher/fleet-agent:dev -c upstream ./.github/scripts/k3d-import-retry.sh rancher/fleet-agent:dev -c downstream + ./.github/scripts/k3d-import-retry.sh rancher/fleet-agent:dev -c managed-downstream - name: Set Up Tmate Debug Session if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.enable_tmate == 'true' }} @@ -140,6 +153,61 @@ jobs: while [ $(kubectl -n fleet-default get cluster -o jsonpath='{.items[0].status.summary.ready}') -ne 1 ]; do sleep 1 done + + - + name: Deploy and Register Managed Downstream Fleet + run: | + kubectl config use-context k3d-managed-downstream + host=$(kubectl get node k3d-managed-downstream-server-0 -o jsonpath='{.status.addresses[?(@.type=="InternalIP")].address}') + ca=$( kubectl config view --flatten -o jsonpath='{.clusters[?(@.name == "k3d-managed-downstream")].cluster.certificate-authority-data}' ) + client_cert=$( kubectl config view --flatten -o jsonpath='{.users[?(@.name == "admin@k3d-managed-downstream")].user.client-certificate-data}' ) + token=$( kubectl config view --flatten -o jsonpath='{.users[?(@.name == "admin@k3d-managed-downstream")].user.client-key-data}' ) + server="https://$host:6443" + + kubectl config use-context k3d-upstream + + value=$(cat < diff --git a/charts/fleet-agent/templates/configmap.yaml b/charts/fleet-agent/templates/configmap.yaml index ce61a87568..f3e83a89cc 100644 --- a/charts/fleet-agent/templates/configmap.yaml +++ b/charts/fleet-agent/templates/configmap.yaml @@ -8,5 +8,6 @@ data: {{ if .Values.labels }} "labels":{{toJson .Values.labels}}, {{ end }} - "clientID":"{{.Values.clientID}}" + "clientID":"{{.Values.clientID}}", + "agentTLSMode": "{{.Values.agentTLSMode}}" } diff --git a/charts/fleet-agent/values.yaml b/charts/fleet-agent/values.yaml index c653e2b000..b3e6035fc6 100644 --- a/charts/fleet-agent/values.yaml +++ b/charts/fleet-agent/values.yaml @@ -11,6 +11,10 @@ apiServerURL: "" # If left empty it is assumed this Kubernetes API TLS is signed by a well known CA. apiServerCA: "" +# Determines whether the agent should trust CA bundles from the operating system's trust store when connecting to a +# management cluster. True in `system-store` mode, false in `strict` mode. +agentTLSMode: "system-store" + # The cluster registration value token: "" diff --git a/charts/fleet-crd/templates/crds.yaml b/charts/fleet-crd/templates/crds.yaml index a476e8dddd..8ae8101218 100644 --- a/charts/fleet-crd/templates/crds.yaml +++ b/charts/fleet-crd/templates/crds.yaml @@ -4463,6 +4463,14 @@ spec: configuration, used to detect changes. nullable: true type: string + agentTLSMode: + description: 'AgentTLSMode supports two values: `system-store` and + `strict`. If set to `system-store`, instructs the agent to trust + CA bundles from the operating system''s store. If set to `strict`, + then the agent shall only connect to a server which uses the exact + CA configured when creating/updating the agent.' + nullable: true + type: string agentTolerationsHash: description: AgentTolerationsHash is a hash of the agent's tolerations configuration, used to detect changes. diff --git a/charts/fleet/templates/configmap.yaml b/charts/fleet/templates/configmap.yaml index 07f1b5924d..3fd0b15cf8 100644 --- a/charts/fleet/templates/configmap.yaml +++ b/charts/fleet/templates/configmap.yaml @@ -11,6 +11,7 @@ data: "apiServerURL": "{{.Values.apiServerURL}}", "apiServerCA": "{{b64enc .Values.apiServerCA}}", "agentCheckinInterval": "{{.Values.agentCheckinInterval}}", + "agentTLSMode": "{{.Values.agentTLSMode}}", "ignoreClusterRegistrationLabels": {{.Values.ignoreClusterRegistrationLabels}}, "bootstrap": { "paths": "{{.Values.bootstrap.paths}}", diff --git a/charts/fleet/values.yaml b/charts/fleet/values.yaml index 8c3a9c596c..7ee9f14c51 100644 --- a/charts/fleet/values.yaml +++ b/charts/fleet/values.yaml @@ -16,6 +16,10 @@ apiServerURL: "" # If left empty it is assumed this Kubernetes API TLS is signed by a well known CA. apiServerCA: "" +# Determines whether the agent should trust CA bundles from the operating system's trust store when connecting to a +# management cluster. True in `system-store` mode, false in `strict` mode. +agentTLSMode: "system-store" + # A duration string for how often agents should report a heartbeat agentCheckinInterval: "15m" diff --git a/e2e/multi-cluster/installation/agent_test.go b/e2e/multi-cluster/installation/agent_test.go new file mode 100644 index 0000000000..c030d8cba5 --- /dev/null +++ b/e2e/multi-cluster/installation/agent_test.go @@ -0,0 +1,100 @@ +package installation_test + +import ( + "fmt" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "github.com/onsi/gomega/matchers" + "github.com/rancher/fleet/e2e/testenv/kubectl" +) + +var ( + agentMode string + kd kubectl.Command +) + +var _ = Describe("Fleet installation with TLS agent modes", func() { + BeforeEach(func() { + kd = env.Kubectl.Context(env.Downstream) + }) + + JustBeforeEach(func() { + out, err := ku.Patch( + "configmap", + "fleet-controller", + "-n", + "cattle-fleet-system", + "--type=merge", + "-p", + fmt.Sprintf( + `{"data":{"config":"{\"apiServerURL\": \"https://google.com\", \"apiServerCA\": \"\", \"agentTLSMode\": \"%s\"}"}}`, + agentMode, + ), + ) + Expect(err).ToNot(HaveOccurred(), string(out)) + + }) + + Context("with non-strict agent TLS mode", func() { + When("fetching fleet-agent-register logs", func() { + BeforeEach(func() { + agentMode = "system-store" + }) + + It("reaches the server without cert issues", func() { + Eventually(func() bool { + logs, err := kd.Namespace("cattle-fleet-system").Logs( + "-l", + "app=fleet-agent", + "--tail=-1", + ) + if err != nil { + return false + } + + regexMatcher := matchers.MatchRegexpMatcher{ + Regexp: "Failed to register agent.*could not find the requested resource", + } + reachesServerWithoutCertIssue, err := regexMatcher.Match(logs) + if err != nil { + return false + } + + return reachesServerWithoutCertIssue + }).Should(BeTrue()) + }) + }) + }) + + Context("with strict agent TLS mode", func() { + When("fetching fleet-agent-register logs", func() { + BeforeEach(func() { + agentMode = "strict" + }) + + It("cannot reach the server because the cert is signed by an unknown authority", func() { + Eventually(func() bool { + logs, err := kd.Namespace("cattle-fleet-system").Logs( + "-l", + "app=fleet-agent", + "--tail=-1", + ) + if err != nil { + return false + } + + regexMatcher := matchers.MatchRegexpMatcher{ + Regexp: "Failed to register agent.*signed by unknown authority", + } + reachesServerWithoutCertIssue, err := regexMatcher.Match(logs) + if err != nil { + return false + } + + return reachesServerWithoutCertIssue + }).Should(BeTrue()) + }) + }) + }) +}) diff --git a/e2e/multi-cluster/installation/suite_test.go b/e2e/multi-cluster/installation/suite_test.go new file mode 100644 index 0000000000..003a08fec5 --- /dev/null +++ b/e2e/multi-cluster/installation/suite_test.go @@ -0,0 +1,61 @@ +// Package installation contains e2e tests deploying Fleet to multiple clusters. The tests use kubectl to apply +// manifests. Expectations are verified by checking cluster resources. +package installation_test + +import ( + "fmt" + "strings" + "testing" + + "github.com/rancher/fleet/e2e/testenv" + "github.com/rancher/fleet/e2e/testenv/kubectl" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestE2E(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "E2E Installation Suite for Multi-Cluster") +} + +var ( + env *testenv.Env + ku kubectl.Command + config string +) + +var _ = BeforeSuite(func() { + SetDefaultEventuallyTimeout(testenv.Timeout) + testenv.SetRoot("../..") + + env = testenv.New() + ku = env.Kubectl.Context(env.Upstream) + + // Save initial state of `fleet-controller` config map + cfg, err := ku.Get( + "configmap", + "fleet-controller", + "-n", + "cattle-fleet-system", + "-o", + "jsonpath={.data.config}") + Expect(err).ToNot(HaveOccurred(), cfg) + + cfg = strings.ReplaceAll(cfg, `"`, `\"`) + config = strings.ReplaceAll(cfg, "\n", "") +}) + +var _ = AfterSuite(func() { + // Restore initial state of config map + out, err := ku.Patch( + "configmap", + "fleet-controller", + "-n", + "cattle-fleet-system", + "--type=merge", + "-p", + fmt.Sprintf(`{"data":{"config":"%s"}}`, config), + ) + Expect(err).ToNot(HaveOccurred(), string(out)) +}) diff --git a/e2e/testenv/kubectl/kubectl.go b/e2e/testenv/kubectl/kubectl.go index b5e0162ff9..ca89bbb20f 100644 --- a/e2e/testenv/kubectl/kubectl.go +++ b/e2e/testenv/kubectl/kubectl.go @@ -62,6 +62,10 @@ func (c Command) Create(args ...string) (string, error) { return c.Run(append([]string{"create"}, args...)...) } +func (c Command) Logs(args ...string) (string, error) { + return c.Run(append([]string{"logs"}, args...)...) +} + func (c Command) Patch(args ...string) (string, error) { return c.Run(append([]string{"patch"}, args...)...) } diff --git a/internal/cmd/agent/register/register.go b/internal/cmd/agent/register/register.go index 8fac2ea58d..30a9a66dba 100644 --- a/internal/cmd/agent/register/register.go +++ b/internal/cmd/agent/register/register.go @@ -129,7 +129,12 @@ func runRegistration(ctx context.Context, k8s coreInterface, namespace, clusterI return nil, fmt.Errorf("looking up secret %s/%s: %w", namespace, config.AgentBootstrapConfigName, err) } - clientConfig := createClientConfigFromSecret(secret) + cfg, err := config.Lookup(ctx, secret.Namespace, config.AgentConfigName, k8s.ConfigMap()) + if err != nil { + return nil, fmt.Errorf("failed to look up client config %s/%s: %w", secret.Namespace, config.AgentConfigName, err) + } + + clientConfig := createClientConfigFromSecret(secret, cfg.AgentTLSMode == config.AgentTLSModeSystemStore) ns, _, err := clientConfig.Namespace() if err != nil { @@ -141,11 +146,6 @@ func runRegistration(ctx context.Context, k8s coreInterface, namespace, clusterI return nil, err } - cfg, err := config.Lookup(ctx, secret.Namespace, config.AgentConfigName, k8s.ConfigMap()) - if err != nil { - return nil, fmt.Errorf("failed to look up client config %s/%s: %w", secret.Namespace, config.AgentConfigName, err) - } - fleetK8s, err := kubernetes.NewForConfig(kc) if err != nil { return nil, err @@ -278,15 +278,30 @@ func values(data map[string][]byte) map[string][]byte { // createClientConfigFromSecret reads the fleet-agent-bootstrap secret and // creates a clientConfig to access the upstream cluster -func createClientConfigFromSecret(secret *corev1.Secret) clientcmd.ClientConfig { +func createClientConfigFromSecret(secret *corev1.Secret, trustSystemStoreCAs bool) clientcmd.ClientConfig { data := values(secret.Data) apiServerURL := string(data[config.APIServerURLKey]) apiServerCA := data[config.APIServerCAKey] namespace := string(data[ClusterNamespace]) token := string(data[Token]) - if _, err := http.Get(apiServerURL); err == nil { - apiServerCA = nil + if trustSystemStoreCAs { // Save a request to the API server URL if system CAs are not to be trusted. + if _, err := http.Get(apiServerURL); err == nil { + apiServerCA = nil + } + } else { + // Bypass the OS trust store through env vars, see https://pkg.go.dev/crypto/x509#SystemCertPool + // We set values to paths belonging to the root filesystem, which is read-only, to prevent tampering. + // Note: this will not work on Windows nor Mac OS. Agent are expected to run on Linux nodes. + err := os.Setenv("SSL_CERT_FILE", "/dev/null") + if err != nil { + logrus.Errorf("failed to set env var SSL_CERT_FILE: %s", err.Error()) + } + + err = os.Setenv("SSL_CERT_DIR", "/dev/null") + if err != nil { + logrus.Errorf("failed to set env var SSL_CERT_DIR: %s", err.Error()) + } } cfg := clientcmdapi.Config{ diff --git a/internal/cmd/controller/agent/config.go b/internal/cmd/controller/agent/config.go index b8ad7b26cc..c88983a8f5 100644 --- a/internal/cmd/controller/agent/config.go +++ b/internal/cmd/controller/agent/config.go @@ -12,8 +12,9 @@ import ( ) type ConfigOptions struct { - Labels map[string]string - ClientID string + Labels map[string]string + ClientID string + AgentTLSMode string } func agentConfig(ctx context.Context, agentNamespace, controllerNamespace string, cg *client.Getter, opts *ConfigOptions) ([]runtime.Object, error) { @@ -32,13 +33,14 @@ func agentConfig(ctx context.Context, agentNamespace, controllerNamespace string return nil, err } - return configObjects(agentNamespace, opts.Labels, opts.ClientID) + return configObjects(agentNamespace, opts) } -func configObjects(controllerNamespace string, clusterLabels map[string]string, clientID string) ([]runtime.Object, error) { +func configObjects(controllerNamespace string, co *ConfigOptions) ([]runtime.Object, error) { cm, err := config.ToConfigMap(controllerNamespace, config.AgentConfigName, &config.Config{ - Labels: clusterLabels, - ClientID: clientID, + Labels: co.Labels, + ClientID: co.ClientID, + AgentTLSMode: co.AgentTLSMode, }) if err != nil { return nil, err diff --git a/internal/cmd/controller/controllers/cluster/import.go b/internal/cmd/controller/controllers/cluster/import.go index 013a46e1a6..7db4e76714 100644 --- a/internal/cmd/controller/controllers/cluster/import.go +++ b/internal/cmd/controller/controllers/cluster/import.go @@ -92,7 +92,12 @@ func (i *importHandler) onConfig(config *config.Config) error { if cluster.Spec.KubeConfigSecret == "" { continue } - if config.APIServerURL != cluster.Status.APIServerURL || hashStatusField(config.APIServerCA) != cluster.Status.APIServerCAHash { + + hasConfigChanged := config.APIServerURL != cluster.Status.APIServerURL || + hashStatusField(config.APIServerCA) != cluster.Status.APIServerCAHash || + config.AgentTLSMode != cluster.Status.AgentTLSMode + + if hasConfigChanged { logrus.Infof("API server config changed, trigger cluster import for cluster %s/%s", cluster.Namespace, cluster.Name) c := cluster.DeepCopy() c.Status.AgentConfigChanged = true @@ -261,7 +266,19 @@ func (i *importHandler) importCluster(cluster *fleet.Cluster, status fleet.Clust apiServerCA = cfg.APIServerCA } - restConfig, err := i.restConfigFromKubeConfig(secret.Data[config.KubeConfigSecretValueKey]) + if cfg.AgentTLSMode != config.AgentTLSModeStrict && cfg.AgentTLSMode != config.AgentTLSModeSystemStore { + return status, + fmt.Errorf( + "provided config value for agentTLSMode is none of [%q,%q]", + config.AgentTLSModeStrict, + config.AgentTLSModeSystemStore, + ) + } + + restConfig, err := i.restConfigFromKubeConfig( + secret.Data[config.KubeConfigSecretValueKey], + cfg.AgentTLSMode == config.AgentTLSModeSystemStore, + ) if err != nil { return status, err } @@ -328,8 +345,9 @@ func (i *importHandler) importCluster(cluster *fleet.Cluster, status fleet.Clust APIServerCA: apiServerCA, APIServerURL: apiServerURL, ConfigOptions: agent.ConfigOptions{ - ClientID: cluster.Spec.ClientID, - Labels: clusterLabels, + ClientID: cluster.Spec.ClientID, + Labels: clusterLabels, + AgentTLSMode: cfg.AgentTLSMode, }, ManifestOptions: agent.ManifestOptions{ AgentEnvVars: cluster.Spec.AgentEnvVars, @@ -396,6 +414,8 @@ func (i *importHandler) importCluster(cluster *fleet.Cluster, status fleet.Clust status.AgentConfigChanged = false status.APIServerURL = apiServerURL status.APIServerCAHash = hashStatusField(apiServerCA) + status.AgentTLSMode = cfg.AgentTLSMode + return status, nil } @@ -410,8 +430,9 @@ func isLegacyAgentNamespaceSelectedByUser() bool { cfg.Bootstrap.AgentNamespace == config.LegacyDefaultNamespace } -// restConfigFromKubeConfig checks kubeconfig data and tries to connect to server. If server is behind public CA, remove CertificateAuthorityData in kubeconfig file. -func (i *importHandler) restConfigFromKubeConfig(data []byte) (*rest.Config, error) { +// restConfigFromKubeConfig checks kubeconfig data and tries to connect to server. If server is behind public CA, remove +// CertificateAuthorityData in kubeconfig file unless strict TLS mode is enabled. +func (i *importHandler) restConfigFromKubeConfig(data []byte, trustSystemStoreCAs bool) (*rest.Config, error) { clientConfig, err := clientcmd.NewClientConfigFromBytes(data) if err != nil { return nil, err @@ -422,11 +443,10 @@ func (i *importHandler) restConfigFromKubeConfig(data []byte) (*rest.Config, err return nil, err } - if raw.Contexts[raw.CurrentContext] != nil { + if trustSystemStoreCAs && raw.Contexts[raw.CurrentContext] != nil { cluster := raw.Contexts[raw.CurrentContext].Cluster if raw.Clusters[cluster] != nil { - _, err := http.Get(raw.Clusters[cluster].Server) - if err == nil { + if _, err := http.Get(raw.Clusters[cluster].Server); err == nil { raw.Clusters[cluster].CertificateAuthorityData = nil } } diff --git a/internal/config/config.go b/internal/config/config.go index 7c91b8acc2..6cf970407e 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -20,6 +20,8 @@ const ( ManagerConfigName = "fleet-controller" AgentConfigName = "fleet-agent" AgentBootstrapConfigName = "fleet-agent-bootstrap" + AgentTLSModeStrict = "strict" + AgentTLSModeSystemStore = "system-store" Key = "config" // DefaultNamespace is the default for the system namespace, which // contains the manager and agent @@ -101,6 +103,11 @@ type Config struct { // IgnoreClusterRegistrationLabels if set to true, the labels on the cluster registration resource will not be copied to the cluster resource. IgnoreClusterRegistrationLabels bool `json:"ignoreClusterRegistrationLabels,omitempty"` + + // AgentTLSMode supports two values: `system-store` and `strict`. If set to `system-store`, instructs the agent + // to trust CA bundles from the operating system's store. If set to `strict`, then the agent shall only connect + // to a server which uses the exact CA configured when creating/updating the agent. + AgentTLSMode string `json:"agentTLSMode,omitempty"` } type Bootstrap struct { diff --git a/pkg/apis/fleet.cattle.io/v1alpha1/cluster_types.go b/pkg/apis/fleet.cattle.io/v1alpha1/cluster_types.go index c96772fead..dff411684c 100644 --- a/pkg/apis/fleet.cattle.io/v1alpha1/cluster_types.go +++ b/pkg/apis/fleet.cattle.io/v1alpha1/cluster_types.go @@ -153,6 +153,13 @@ type ClusterStatus struct { // APIServerCAHash is a hash of the upstream API server CA, used to detect changes. APIServerCAHash string `json:"apiServerCAHash,omitempty"` + // AgentTLSMode supports two values: `system-store` and `strict`. If set to + // `system-store`, instructs the agent to trust CA bundles from the operating + // system's store. If set to `strict`, then the agent shall only connect to a + // server which uses the exact CA configured when creating/updating the agent. + // +nullable + AgentTLSMode string `json:"agentTLSMode,omitempty"` + // Display contains the number of ready bundles, nodes and a summary state. Display ClusterDisplay `json:"display,omitempty"` // AgentStatus contains information about the agent.