Skip to content

Commit

Permalink
Enable log errors check test and filter for acceptable errors (#3616)
Browse files Browse the repository at this point in the history
(cherry picked from commit 96d46f9)
  • Loading branch information
michalpristas authored and mergify[bot] committed Oct 19, 2023
1 parent 11c32ba commit be7233b
Show file tree
Hide file tree
Showing 9 changed files with 36 additions and 11 deletions.
2 changes: 1 addition & 1 deletion internal/pkg/agent/application/dispatcher/dispatcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ func (ad *ActionDispatcher) scheduleRetry(ctx context.Context, action fleetapi.R
attempt := action.RetryAttempt()
d, err := ad.rt.GetWait(attempt)
if err != nil {
ad.log.Errorf("No more reties for action id %s: %v", action.ID(), err)
ad.log.Errorf("No more retries for action id %s: %v", action.ID(), err)
action.SetRetryAttempt(-1)
if err := acker.Ack(ctx, action); err != nil {
ad.log.Errorf("Unable to ack action failure (id %s) to fleet-server: %v", action.ID(), err)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ func (f *FleetGateway) doExecute(ctx context.Context, bo backoff.Backoff) (*flee

if f.checkinFailCounter > 0 {
// Log at same level as error logs above so subsequent successes are visible when log level is set to 'error'.
f.log.Errorf("Checkin request to fleet-server succeeded after %d failures", f.checkinFailCounter)
f.log.Warnf("Checkin request to fleet-server succeeded after %d failures", f.checkinFailCounter)
}

f.checkinFailCounter = 0
Expand Down
2 changes: 1 addition & 1 deletion internal/pkg/agent/application/monitoring/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ func NewServer(
) (*api.Server, error) {
if err := createAgentMonitoringDrop(endpointConfig.Host); err != nil {
// log but ignore
log.Errorf("failed to create monitoring drop: %v", err)
log.Warnf("failed to create monitoring drop: %v", err)
}

cfg, err := config.NewConfigFrom(endpointConfig)
Expand Down
2 changes: 1 addition & 1 deletion internal/pkg/agent/storage/store/action_store.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ func newActionStore(log *logger.Logger, store storeLoad) (*actionStore, error) {
// and return an empty store.
reader, err := store.Load()
if err != nil {
log.Errorf("failed to load action store, returning empty contents: %v", err.Error())
log.Warnf("failed to load action store, returning empty contents: %v", err.Error())
return &actionStore{log: log, store: store}, nil
}
defer reader.Close()
Expand Down
2 changes: 1 addition & 1 deletion internal/pkg/agent/storage/store/state_store.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ func NewStateStore(log *logger.Logger, store storeLoad) (*StateStore, error) {
// and return an empty store.
reader, err := store.Load()
if err != nil {
log.Errorf("failed to load state store, returning empty contents: %v", err.Error())
log.Warnf("failed to load state store, returning empty contents: %v", err.Error())
return &StateStore{log: log, store: store}, nil
}
defer reader.Close()
Expand Down
2 changes: 1 addition & 1 deletion internal/pkg/capabilities/upgrade.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ func allowUpgrade(
for _, cap := range upgradeCaps {
result, err := cap.condition.Eval(varStore, true)
if err != nil {
log.Errorf("failed evaluating eql formula %q, skipping: %v", cap.conditionStr, err)
log.Warnf("failed evaluating eql formula %q, skipping: %v", cap.conditionStr, err)
continue
}
if result {
Expand Down
2 changes: 1 addition & 1 deletion internal/pkg/fleetapi/acker/lazy/lazy_acker.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ func (f *Acker) Commit(ctx context.Context) (err error) {
// If request failed enqueue all actions with retrier if it is set
if err != nil {
if f.retrier != nil {
f.log.Errorf("lazy acker: failed ack batch, enqueue for retry: %s", actions)
f.log.Warnf("lazy acker: failed ack batch, enqueue for retry: %s", actions)
f.retrier.Enqueue(actions)
return nil
}
Expand Down
7 changes: 7 additions & 0 deletions pkg/testing/tools/estools/elasticsearch.go
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,13 @@ func CheckForErrorsInLogsWithContext(ctx context.Context, client elastictranspor
"log.level": "error",
},
},
{
"term": map[string]interface{}{
"data_stream.namespace": map[string]interface{}{
"value": namespace,
},
},
},
},
"must_not": excludeStatements,
},
Expand Down
26 changes: 22 additions & 4 deletions testing/integration/monitoring_logs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ func TestMonitoringLogsShipped(t *testing.T) {
ctx := context.Background()

t.Logf("got namespace: %s", info.Namespace)
t.Skip("Test is flaky; see https://github.com/elastic/elastic-agent/issues/3081")

agentFixture, err := define.NewFixture(t, define.Version())
require.NoError(t, err)
Expand Down Expand Up @@ -90,7 +89,7 @@ func TestMonitoringLogsShipped(t *testing.T) {
require.NotZero(t, len(docs.Hits.Hits))
t.Logf("metricbeat: Got %d documents", len(docs.Hits.Hits))

// Stage 4: make sure all components are health
// Stage 4: make sure all components are healthy
t.Log("Making sure all components are healthy")
status, err := agentFixture.ExecStatus(ctx)
require.NoError(t, err,
Expand All @@ -101,15 +100,34 @@ func TestMonitoringLogsShipped(t *testing.T) {
c.Name, client.Healthy, client.State(c.State))
}

// Stage 5: Make sure we have message confirming central management is running
// Stage 5: Make sure there are no errors in logs
t.Log("Making sure there are no error logs")
docs = findESDocs(t, func() (estools.Documents, error) {
return estools.CheckForErrorsInLogs(info.ESClient, info.Namespace, []string{
// acceptable error messages (include reason)
"Error dialing dial tcp 127.0.0.1:9200: connect: connection refused", // beat is running default config before its config gets updated
"Global configuration artifact is not available", // Endpoint: failed to load user artifact due to connectivity issues
"Failed to download artifact",
"Failed to initialize artifact",
"Failed to apply initial policy from on disk configuration",
"elastic-agent-client error: rpc error: code = Canceled desc = context canceled", // can happen on restart
})
})
t.Logf("errors: Got %d documents", len(docs.Hits.Hits))
for _, doc := range docs.Hits.Hits {
t.Logf("%#v", doc.Source)
}
require.Empty(t, docs.Hits.Hits)

// Stage 6: Make sure we have message confirming central management is running
t.Log("Making sure we have message confirming central management is running")
docs = findESDocs(t, func() (estools.Documents, error) {
return estools.FindMatchingLogLines(info.ESClient, info.Namespace,
"Parsed configuration and determined agent is managed by Fleet")
})
require.NotZero(t, len(docs.Hits.Hits))

// Stage 6: verify logs from the monitoring components are not sent to the output
// Stage 7: verify logs from the monitoring components are not sent to the output
t.Log("Check monitoring logs")
hostname, err := os.Hostname()
if err != nil {
Expand Down

0 comments on commit be7233b

Please sign in to comment.