From 910002d9a93f25baac6a4871e4856ff04e74cb47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Corentin=20N=C3=A9au?= Date: Mon, 9 Dec 2024 17:59:18 +0100 Subject: [PATCH] Make max concurrent reconciles configurable for agent and remaining controllers (#3094) * Make worker count configurable for cluster, clustergroup and imagescan Reconcilers for clusters, cluster groups and image scans now have configurable `controller-runtime` worker counts, similarly to what is already supported for gitrepo, bundle and bundle deployment reconcilers. * Remove workers env vars from cleanup container Fleet's cleanup logic does not make use of controller-runtime reconcilers, hence does not need environment variables specifying how many workers such reconcilers can use. * Make worker count configurable for agent reconcilers Reconcilers living in the Fleet agent now have configurable `controller-runtime` worker counts, similarly to what is already supported in the Fleet controller. --- charts/fleet-agent/templates/deployment.yaml | 8 +++++ charts/fleet-agent/values.yaml | 8 +++++ charts/fleet/templates/deployment.yaml | 24 +++++++-------- charts/fleet/values.yaml | 3 ++ .../controller/bundledeployment_controller.go | 4 ++- .../cmd/agent/controller/drift_controller.go | 4 +++ internal/cmd/agent/operator.go | 14 ++++++++- internal/cmd/agent/root.go | 26 ++++++++++++++++- internal/cmd/controller/operator.go | 6 ++++ .../reconciler/cluster_controller.go | 4 ++- .../reconciler/clustergroup_controller.go | 4 ++- .../reconciler/imagescan_controller.go | 4 ++- internal/cmd/controller/root.go | 29 ++++++++++++++++++- 13 files changed, 119 insertions(+), 19 deletions(-) diff --git a/charts/fleet-agent/templates/deployment.yaml b/charts/fleet-agent/templates/deployment.yaml index 571f346783..8ca21dc724 100644 --- a/charts/fleet-agent/templates/deployment.yaml +++ b/charts/fleet-agent/templates/deployment.yaml @@ -43,6 +43,14 @@ spec: valueFrom: fieldRef: fieldPath: metadata.namespace + {{- if $.Values.agent.reconciler.workers.bundledeployment }} + - name: BUNDLEDEPLOYMENT_RECONCILER_WORKERS + value: {{ quote $.Values.agent.reconciler.workers.bundledeployment }} + {{- end }} + {{- if $.Values.agent.reconciler.workers.drift }} + - name: DRIFT_RECONCILER_WORKERS + value: {{ quote $.Values.agent.reconciler.workers.drift }} + {{- end }} image: '{{ template "system_default_registry" . }}{{.Values.image.repository}}:{{.Values.image.tag}}' name: fleet-agent command: diff --git a/charts/fleet-agent/values.yaml b/charts/fleet-agent/values.yaml index 74dffada17..f3ef3771ff 100644 --- a/charts/fleet-agent/values.yaml +++ b/charts/fleet-agent/values.yaml @@ -73,3 +73,11 @@ global: debug: false debugLevel: 0 disableSecurityContext: false + +## Fleet agent configuration +agent: + reconciler: + # The number of workers that are allowed for each type of reconciler + workers: + bundledeployment: "50" + drift: "50" diff --git a/charts/fleet/templates/deployment.yaml b/charts/fleet/templates/deployment.yaml index 5f207b6301..2951a84494 100644 --- a/charts/fleet/templates/deployment.yaml +++ b/charts/fleet/templates/deployment.yaml @@ -69,6 +69,18 @@ spec: - name: BUNDLEDEPLOYMENT_RECONCILER_WORKERS value: {{ quote $.Values.controller.reconciler.workers.bundledeployment }} {{- end }} + {{- if $.Values.controller.reconciler.workers.cluster }} + - name: CLUSTER_RECONCILER_WORKERS + value: {{ quote $.Values.controller.reconciler.workers.cluster }} + {{- end }} + {{- if $.Values.controller.reconciler.workers.clustergroup }} + - name: CLUSTERGROUP_RECONCILER_WORKERS + value: {{ quote $.Values.controller.reconciler.workers.clustergroup }} + {{- end }} + {{- if $.Values.controller.reconciler.workers.imagescan }} + - name: IMAGESCAN_RECONCILER_WORKERS + value: {{ quote $.Values.controller.reconciler.workers.imagescan }} + {{- end }} {{- if $.Values.extraEnv }} {{ toYaml $.Values.extraEnv | indent 8}} {{- end }} @@ -128,18 +140,6 @@ spec: - name: CATTLE_ELECTION_RENEW_DEADLINE value: {{$.Values.leaderElection.renewDeadline}} {{- end }} - {{- if $.Values.controller.reconciler.workers.gitrepo }} - - name: GITREPO_RECONCILER_WORKERS - value: {{ quote $.Values.controller.reconciler.workers.gitrepo }} - {{- end }} - {{- if $.Values.controller.reconciler.workers.bundle }} - - name: BUNDLE_RECONCILER_WORKERS - value: {{ quote $.Values.controller.reconciler.workers.bundle }} - {{- end }} - {{- if $.Values.controller.reconciler.workers.bundledeployment }} - - name: BUNDLEDEPLOYMENT_RECONCILER_WORKERS - value: {{ quote $.Values.controller.reconciler.workers.bundledeployment }} - {{- end }} image: '{{ template "system_default_registry" $ }}{{ $.Values.image.repository }}:{{ $.Values.image.tag }}' name: fleet-cleanup imagePullPolicy: "{{ $.Values.image.imagePullPolicy }}" diff --git a/charts/fleet/values.yaml b/charts/fleet/values.yaml index 6e9d874454..61d3157d5f 100644 --- a/charts/fleet/values.yaml +++ b/charts/fleet/values.yaml @@ -104,6 +104,9 @@ controller: gitrepo: "50" bundle: "50" bundledeployment: "50" + cluster: "50" + clustergroup: "50" + imagescan: "50" # Extra environment variables passed to the fleet pods. # extraEnv: diff --git a/internal/cmd/agent/controller/bundledeployment_controller.go b/internal/cmd/agent/controller/bundledeployment_controller.go index e9d91b8dc2..f659f4349f 100644 --- a/internal/cmd/agent/controller/bundledeployment_controller.go +++ b/internal/cmd/agent/controller/bundledeployment_controller.go @@ -41,6 +41,8 @@ type BundleDeploymentReconciler struct { // AgentInfo is the labelSuffix used by the helm deployer AgentScope string + + Workers int } var DefaultRetry = wait.Backoff{ @@ -75,7 +77,7 @@ func (r *BundleDeploymentReconciler) SetupWithManager(mgr ctrl.Manager) error { }, }, )). - WithOptions(controller.Options{MaxConcurrentReconciles: 50}). + WithOptions(controller.Options{MaxConcurrentReconciles: r.Workers}). Complete(r) } diff --git a/internal/cmd/agent/controller/drift_controller.go b/internal/cmd/agent/controller/drift_controller.go index d60ed6a598..36ea8dfbbd 100644 --- a/internal/cmd/agent/controller/drift_controller.go +++ b/internal/cmd/agent/controller/drift_controller.go @@ -16,6 +16,7 @@ import ( errutil "k8s.io/apimachinery/pkg/util/errors" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" "sigs.k8s.io/controller-runtime/pkg/event" "sigs.k8s.io/controller-runtime/pkg/handler" "sigs.k8s.io/controller-runtime/pkg/log" @@ -31,6 +32,8 @@ type DriftReconciler struct { DriftDetect *driftdetect.DriftDetect DriftChan chan event.GenericEvent + + Workers int } // SetupWithManager sets up the controller with the Manager. @@ -39,6 +42,7 @@ func (r *DriftReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). Named("drift-reconciler"). WatchesRawSource(src). + WithOptions(controller.Options{MaxConcurrentReconciles: r.Workers}). Complete(r) } diff --git a/internal/cmd/agent/operator.go b/internal/cmd/agent/operator.go index 2eccc335e6..9e6fc4d970 100644 --- a/internal/cmd/agent/operator.go +++ b/internal/cmd/agent/operator.go @@ -52,7 +52,13 @@ func init() { // start the fleet agent // systemNamespace is the namespace the agent is running in, e.g. cattle-fleet-system -func start(ctx context.Context, localConfig *rest.Config, systemNamespace, agentScope string) error { +func start( + ctx context.Context, + localConfig *rest.Config, + systemNamespace, + agentScope string, + workersOpts AgentReconcilerWorkers, +) error { // Registration is done in an init container. If we are here, we are already registered. // Retrieve the existing config from the registration. // Cannot start without kubeconfig for upstream cluster: @@ -98,6 +104,7 @@ func start(ctx context.Context, localConfig *rest.Config, systemNamespace, agent agentScope, agentConfig, driftChan, + workersOpts.BundleDeployment, ) if err != nil { setupLog.Error(err, "unable to set up bundledeployment reconciler") @@ -121,6 +128,8 @@ func start(ctx context.Context, localConfig *rest.Config, systemNamespace, agent DriftDetect: reconciler.DriftDetect, DriftChan: driftChan, + + Workers: workersOpts.Drift, } if err = driftReconciler.SetupWithManager(mgr); err != nil { setupLog.Error(err, "unable to create controller", "controller", "BundleDeployment") @@ -188,6 +197,7 @@ func newReconciler( agentScope string, agentConfig config.Config, driftChan chan event.GenericEvent, + workers int, ) (*controller.BundleDeploymentReconciler, error) { upstreamClient := mgr.GetClient() @@ -281,6 +291,8 @@ func newReconciler( DefaultNamespace: defaultNamespace, AgentScope: agentScope, + + Workers: workers, }, nil } diff --git a/internal/cmd/agent/root.go b/internal/cmd/agent/root.go index ea398b4d90..2030c02d73 100644 --- a/internal/cmd/agent/root.go +++ b/internal/cmd/agent/root.go @@ -5,6 +5,8 @@ import ( "fmt" glog "log" "net/http" + "os" + "strconv" "github.com/spf13/cobra" @@ -27,6 +29,11 @@ type FleetAgent struct { AgentScope string `usage:"An identifier used to scope the agent bundleID names, typically the same as namespace" env:"AGENT_SCOPE"` } +type AgentReconcilerWorkers struct { + BundleDeployment int + Drift int +} + var ( setupLog = ctrl.Log.WithName("setup") zopts = &zap.Options{ @@ -47,6 +54,23 @@ func (a *FleetAgent) Run(cmd *cobra.Command, args []string) error { ctx := log.IntoContext(cmd.Context(), ctrl.Log) localConfig := ctrl.GetConfigOrDie() + workersOpts := AgentReconcilerWorkers{} + + if d := os.Getenv("BUNDLEDEPLOYMENT_RECONCILER_WORKERS"); d != "" { + w, err := strconv.Atoi(d) + if err != nil { + setupLog.Error(err, "failed to parse BUNDLEDEPLOYMENT_RECONCILER_WORKERS", "value", d) + } + workersOpts.BundleDeployment = w + } + + if d := os.Getenv("DRIFT_RECONCILER_WORKERS"); d != "" { + w, err := strconv.Atoi(d) + if err != nil { + setupLog.Error(err, "failed to parse DRIFT_RECONCILER_WORKERS", "value", d) + } + workersOpts.Drift = w + } go func() { glog.Println(http.ListenAndServe("localhost:6060", nil)) // nolint:gosec // Debugging only @@ -55,7 +79,7 @@ func (a *FleetAgent) Run(cmd *cobra.Command, args []string) error { if a.Namespace == "" { return fmt.Errorf("--namespace or env NAMESPACE is required to be set") } - if err := start(ctx, localConfig, a.Namespace, a.AgentScope); err != nil { + if err := start(ctx, localConfig, a.Namespace, a.AgentScope, workersOpts); err != nil { return err } diff --git a/internal/cmd/controller/operator.go b/internal/cmd/controller/operator.go index c119c756fa..73881c62a9 100644 --- a/internal/cmd/controller/operator.go +++ b/internal/cmd/controller/operator.go @@ -98,6 +98,8 @@ func start( Query: builder, ShardID: shardID, + + Workers: workersOpts.Cluster, }).SetupWithManager(mgr); err != nil { setupLog.Error(err, "unable to create controller", "controller", "Cluster") return err @@ -125,6 +127,8 @@ func start( Client: mgr.GetClient(), Scheme: mgr.GetScheme(), ShardID: shardID, + + Workers: workersOpts.ClusterGroup, }).SetupWithManager(mgr); err != nil { setupLog.Error(err, "unable to create controller", "controller", "ClusterGroup") return err @@ -148,6 +152,8 @@ func start( Scheduler: sched, ShardID: shardID, + + Workers: workersOpts.ImageScan, }).SetupWithManager(mgr); err != nil { setupLog.Error(err, "unable to create controller", "controller", "ImageScan") return err diff --git a/internal/cmd/controller/reconciler/cluster_controller.go b/internal/cmd/controller/reconciler/cluster_controller.go index 67ffb23543..14fe99e3ec 100644 --- a/internal/cmd/controller/reconciler/cluster_controller.go +++ b/internal/cmd/controller/reconciler/cluster_controller.go @@ -55,6 +55,8 @@ type ClusterReconciler struct { Query BundleQuery ShardID string + + Workers int } // SetupWithManager sets up the controller with the Manager. @@ -101,7 +103,7 @@ func (r *ClusterReconciler) SetupWithManager(mgr ctrl.Manager) error { }), ). WithEventFilter(sharding.FilterByShardID(r.ShardID)). - WithOptions(controller.Options{MaxConcurrentReconciles: 50}). + WithOptions(controller.Options{MaxConcurrentReconciles: r.Workers}). Complete(r) } diff --git a/internal/cmd/controller/reconciler/clustergroup_controller.go b/internal/cmd/controller/reconciler/clustergroup_controller.go index 7d44a174e1..4f1cb0bb13 100644 --- a/internal/cmd/controller/reconciler/clustergroup_controller.go +++ b/internal/cmd/controller/reconciler/clustergroup_controller.go @@ -39,6 +39,8 @@ type ClusterGroupReconciler struct { client.Client Scheme *runtime.Scheme ShardID string + + Workers int } const MaxReportedNonReadyClusters = 10 @@ -68,7 +70,7 @@ func (r *ClusterGroupReconciler) SetupWithManager(mgr ctrl.Manager) error { handler.EnqueueRequestsFromMapFunc(r.mapClusterToClusterGroup), ). WithEventFilter(sharding.FilterByShardID(r.ShardID)). - WithOptions(controller.Options{MaxConcurrentReconciles: 50}). + WithOptions(controller.Options{MaxConcurrentReconciles: r.Workers}). Complete(r) } diff --git a/internal/cmd/controller/reconciler/imagescan_controller.go b/internal/cmd/controller/reconciler/imagescan_controller.go index 7e2ced1e76..e99ec053ad 100644 --- a/internal/cmd/controller/reconciler/imagescan_controller.go +++ b/internal/cmd/controller/reconciler/imagescan_controller.go @@ -27,6 +27,8 @@ type ImageScanReconciler struct { Scheduler quartz.Scheduler ShardID string + + Workers int } // SetupWithManager sets up the controller with the Manager. @@ -43,7 +45,7 @@ func (r *ImageScanReconciler) SetupWithManager(mgr ctrl.Manager) error { predicate.LabelChangedPredicate{}, ), )). - WithOptions(controller.Options{MaxConcurrentReconciles: 50}). + WithOptions(controller.Options{MaxConcurrentReconciles: r.Workers}). Complete(r) } diff --git a/internal/cmd/controller/root.go b/internal/cmd/controller/root.go index 4e4c7906bc..d6dd2c1270 100644 --- a/internal/cmd/controller/root.go +++ b/internal/cmd/controller/root.go @@ -32,9 +32,11 @@ type FleetController struct { } type ControllerReconcilerWorkers struct { - GitRepo int Bundle int BundleDeployment int + Cluster int + ClusterGroup int + ImageScan int } type BindAddresses struct { @@ -88,6 +90,7 @@ func (f *FleetController) Run(cmd *cobra.Command, args []string) error { } workersOpts.Bundle = w } + if d := os.Getenv("BUNDLEDEPLOYMENT_RECONCILER_WORKERS"); d != "" { w, err := strconv.Atoi(d) if err != nil { @@ -96,6 +99,30 @@ func (f *FleetController) Run(cmd *cobra.Command, args []string) error { workersOpts.BundleDeployment = w } + if d := os.Getenv("CLUSTER_RECONCILER_WORKERS"); d != "" { + w, err := strconv.Atoi(d) + if err != nil { + setupLog.Error(err, "failed to parse CLUSTER_RECONCILER_WORKERS", "value", d) + } + workersOpts.Cluster = w + } + + if d := os.Getenv("CLUSTERGROUP_RECONCILER_WORKERS"); d != "" { + w, err := strconv.Atoi(d) + if err != nil { + setupLog.Error(err, "failed to parse CLUSTERGROUP_RECONCILER_WORKERS", "value", d) + } + workersOpts.ClusterGroup = w + } + + if d := os.Getenv("IMAGESCAN_RECONCILER_WORKERS"); d != "" { + w, err := strconv.Atoi(d) + if err != nil { + setupLog.Error(err, "failed to parse IMAGESCAN_RECONCILER_WORKERS", "value", d) + } + workersOpts.ImageScan = w + } + go func() { log.Println(http.ListenAndServe("localhost:6060", nil)) // nolint:gosec // Debugging only }()