diff --git a/changelog/fragments/1696249276-Start-stop-monitoring-server-based-on-monitoring-config.yaml b/changelog/fragments/1696249276-Start-stop-monitoring-server-based-on-monitoring-config.yaml new file mode 100644 index 00000000000..141c18715bf --- /dev/null +++ b/changelog/fragments/1696249276-Start-stop-monitoring-server-based-on-monitoring-config.yaml @@ -0,0 +1,31 @@ +# Kind can be one of: +# - breaking-change: a change to previously-documented behavior +# - deprecation: functionality that is being removed in a later release +# - bug-fix: fixes a problem in a previous version +# - enhancement: extends functionality but does not break or fix existing behavior +# - feature: new functionality +# - known-issue: problems that we are aware of in a given version +# - security: impacts on the security of a product or a user’s deployment. +# - upgrade: important information for someone upgrading from a prior version +# - other: does not fit into any of the other categories +kind: enhancement + +# Change summary; a 80ish characters long description of the change. +summary: Start/stop monitoring server based on monitoring config + +# Long description; in case the summary is not enough to describe the change +# this field accommodate a description without length limits. +#description: + +# Affected component; a word indicating the component this changeset affects. +component: elastic-agent + +# PR number; optional; the PR number that added the changeset. +# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added. +# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number. +# Please provide it if you are adding a fragment for a different PR. +pr: 3492 + +# Issue number; optional; the GitHub issue related to this changeset (either closes or is part of). +# If not present is automatically filled by the tooling with the issue linked to the PR number. +issue: 2735 diff --git a/internal/pkg/agent/application/coordinator/coordinator.go b/internal/pkg/agent/application/coordinator/coordinator.go index e6a2dbc182f..856c624076f 100644 --- a/internal/pkg/agent/application/coordinator/coordinator.go +++ b/internal/pkg/agent/application/coordinator/coordinator.go @@ -158,6 +158,10 @@ type ComponentsModifier func(comps []component.Component, cfg map[string]interfa // CoordinatorShutdownTimeout is how long the coordinator will wait during shutdown to receive a "clean" shutdown from other components var CoordinatorShutdownTimeout = time.Second * 5 +type configReloader interface { + Reload(*config.Config) error +} + // Coordinator manages the entire state of the Elastic Agent. // // All configuration changes, update variables, and upgrade actions are managed and controlled by the coordinator. @@ -173,6 +177,8 @@ type Coordinator struct { upgradeMgr UpgradeManager monitorMgr MonitorManager + monitoringServerReloader configReloader + runtimeMgr RuntimeManager configMgr ConfigManager varsMgr VarsManager @@ -365,6 +371,10 @@ func (c *Coordinator) State() State { return c.stateBroadcaster.Get() } +func (c *Coordinator) RegisterMonitoringServer(s configReloader) { + c.monitoringServerReloader = s +} + // StateSubscribe returns a channel that reports changes in Coordinator state. // // bufferLen specifies how many state changes should be queued in addition to @@ -1008,6 +1018,12 @@ func (c *Coordinator) generateAST(cfg *config.Config) (err error) { } } + if c.monitoringServerReloader != nil { + if err := c.monitoringServerReloader.Reload(cfg); err != nil { + return fmt.Errorf("failed to reload monitor manager configuration: %w", err) + } + } + c.ast = rawAst return nil } diff --git a/internal/pkg/agent/application/coordinator/coordinator_unit_test.go b/internal/pkg/agent/application/coordinator/coordinator_unit_test.go index 1a49d0b7927..7e6d2b224a5 100644 --- a/internal/pkg/agent/application/coordinator/coordinator_unit_test.go +++ b/internal/pkg/agent/application/coordinator/coordinator_unit_test.go @@ -29,6 +29,7 @@ import ( "github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade/artifact" "github.com/elastic/elastic-agent/internal/pkg/agent/transpiler" "github.com/elastic/elastic-agent/internal/pkg/config" + monitoringCfg "github.com/elastic/elastic-agent/internal/pkg/core/monitoring/config" "github.com/elastic/elastic-agent/pkg/component" "github.com/elastic/elastic-agent/pkg/component/runtime" agentclient "github.com/elastic/elastic-agent/pkg/control/v2/client" @@ -992,3 +993,25 @@ func emptyAST(t *testing.T) *transpiler.AST { require.NoError(t, err, "AST creation must succeed") return ast } + +type fakeMonitoringServer struct { + startTriggered bool + stopTriggered bool + isRunning bool +} + +func (fs *fakeMonitoringServer) Start() { + fs.startTriggered = true + fs.isRunning = true +} + +func (fs *fakeMonitoringServer) Stop() error { + fs.stopTriggered = true + fs.isRunning = false + return nil +} + +func (fs *fakeMonitoringServer) Reset() { + fs.stopTriggered = false + fs.startTriggered = false +} diff --git a/internal/pkg/agent/application/monitoring/server.go b/internal/pkg/agent/application/monitoring/server.go index 390a472d5ed..7f43bf1866b 100644 --- a/internal/pkg/agent/application/monitoring/server.go +++ b/internal/pkg/agent/application/monitoring/server.go @@ -20,6 +20,9 @@ import ( "github.com/elastic/elastic-agent-libs/config" "github.com/elastic/elastic-agent-libs/monitoring" "github.com/elastic/elastic-agent/internal/pkg/agent/application/coordinator" + "github.com/elastic/elastic-agent/internal/pkg/agent/application/monitoring/reload" + "github.com/elastic/elastic-agent/internal/pkg/agent/errors" + monitoringCfg "github.com/elastic/elastic-agent/internal/pkg/core/monitoring/config" "github.com/elastic/elastic-agent/pkg/core/logger" ) @@ -32,7 +35,8 @@ func NewServer( coord *coordinator.Coordinator, enableProcessStats bool, operatingSystem string, -) (*api.Server, error) { + mcfg *monitoringCfg.MonitoringConfig, +) (*reload.ServerReloader, error) { if err := createAgentMonitoringDrop(endpointConfig.Host); err != nil { // log but ignore log.Errorf("failed to create monitoring drop: %v", err) @@ -43,7 +47,7 @@ func NewServer( return nil, err } - return exposeMetricsEndpoint(log, cfg, ns, tracer, coord, enableProcessStats, operatingSystem) + return exposeMetricsEndpoint(log, cfg, ns, tracer, coord, enableProcessStats, operatingSystem, mcfg) } func exposeMetricsEndpoint( @@ -54,7 +58,8 @@ func exposeMetricsEndpoint( coord *coordinator.Coordinator, enableProcessStats bool, operatingSystem string, -) (*api.Server, error) { + mcfg *monitoringCfg.MonitoringConfig, +) (*reload.ServerReloader, error) { r := mux.NewRouter() if tracer != nil { r.Use(apmgorilla.Middleware(apmgorilla.WithTracer(tracer))) @@ -72,7 +77,15 @@ func exposeMetricsEndpoint( mux := http.NewServeMux() mux.Handle("/", r) - return api.New(log, mux, config) + newServerFn := func() (reload.ServerController, error) { + apiServer, err := api.New(log, mux, config) + if err != nil { + return nil, errors.New(err, "failed to create api server") + } + return apiServer, nil + } + + return reload.NewServerReloader(newServerFn, log, mcfg), nil } func createAgentMonitoringDrop(drop string) error { diff --git a/internal/pkg/agent/cmd/run.go b/internal/pkg/agent/cmd/run.go index 91d470ac00b..353c9d1e7a7 100644 --- a/internal/pkg/agent/cmd/run.go +++ b/internal/pkg/agent/cmd/run.go @@ -32,6 +32,7 @@ import ( "github.com/elastic/elastic-agent/internal/pkg/agent/application/filelock" "github.com/elastic/elastic-agent/internal/pkg/agent/application/info" "github.com/elastic/elastic-agent/internal/pkg/agent/application/monitoring" + "github.com/elastic/elastic-agent/internal/pkg/agent/application/monitoring/reload" "github.com/elastic/elastic-agent/internal/pkg/agent/application/paths" "github.com/elastic/elastic-agent/internal/pkg/agent/application/reexec" "github.com/elastic/elastic-agent/internal/pkg/agent/application/secret" @@ -248,12 +249,15 @@ func run(override cfgOverrider, testingMode bool, fleetInitTimeout time.Duration } defer composable.Close() - serverStopFn, err := setupMetrics(l, cfg.Settings.DownloadConfig.OS(), cfg.Settings.MonitoringConfig, tracer, coord) + monitoringServer, err := setupMetrics(l, cfg.Settings.DownloadConfig.OS(), cfg.Settings.MonitoringConfig, tracer, coord) if err != nil { return err } + coord.RegisterMonitoringServer(monitoringServer) defer func() { - _ = serverStopFn() + if monitoringServer != nil { + _ = monitoringServer.Stop() + } }() diagHooks := diagnostics.GlobalHooks() @@ -547,7 +551,7 @@ func setupMetrics( cfg *monitoringCfg.MonitoringConfig, tracer *apm.Tracer, coord *coordinator.Coordinator, -) (func() error, error) { +) (*reload.ServerReloader, error) { if err := report.SetupMetrics(logger, agentName, version.GetDefaultVersion()); err != nil { return nil, err } @@ -558,14 +562,12 @@ func setupMetrics( Host: monitoring.AgentMonitoringEndpoint(operatingSystem, cfg), } - s, err := monitoring.NewServer(logger, endpointConfig, monitoringLib.GetNamespace, tracer, coord, isProcessStatsEnabled(cfg), operatingSystem) + s, err := monitoring.NewServer(logger, endpointConfig, monitoringLib.GetNamespace, tracer, coord, isProcessStatsEnabled(cfg), operatingSystem, cfg) if err != nil { return nil, errors.New(err, "could not start the HTTP server for the API") } - s.Start() - // return server stopper - return s.Stop, nil + return s, nil } func isProcessStatsEnabled(cfg *monitoringCfg.MonitoringConfig) bool {