Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable log errors check test and filter for acceptable errors #3616

Merged
merged 8 commits into from
Oct 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion internal/pkg/agent/application/dispatcher/dispatcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ func (ad *ActionDispatcher) scheduleRetry(ctx context.Context, action fleetapi.R
attempt := action.RetryAttempt()
d, err := ad.rt.GetWait(attempt)
if err != nil {
ad.log.Errorf("No more reties for action id %s: %v", action.ID(), err)
ad.log.Errorf("No more retries for action id %s: %v", action.ID(), err)
action.SetRetryAttempt(-1)
if err := acker.Ack(ctx, action); err != nil {
ad.log.Errorf("Unable to ack action failure (id %s) to fleet-server: %v", action.ID(), err)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ func (f *FleetGateway) doExecute(ctx context.Context, bo backoff.Backoff) (*flee

if f.checkinFailCounter > 0 {
// Log at same level as error logs above so subsequent successes are visible when log level is set to 'error'.
f.log.Errorf("Checkin request to fleet-server succeeded after %d failures", f.checkinFailCounter)
f.log.Warnf("Checkin request to fleet-server succeeded after %d failures", f.checkinFailCounter)
}

f.checkinFailCounter = 0
Expand Down
2 changes: 1 addition & 1 deletion internal/pkg/agent/application/monitoring/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ func NewServer(
) (*api.Server, error) {
if err := createAgentMonitoringDrop(endpointConfig.Host); err != nil {
// log but ignore
log.Errorf("failed to create monitoring drop: %v", err)
log.Warnf("failed to create monitoring drop: %v", err)
}

cfg, err := config.NewConfigFrom(endpointConfig)
Expand Down
2 changes: 1 addition & 1 deletion internal/pkg/agent/storage/store/action_store.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ func newActionStore(log *logger.Logger, store storeLoad) (*actionStore, error) {
// and return an empty store.
reader, err := store.Load()
if err != nil {
log.Errorf("failed to load action store, returning empty contents: %v", err.Error())
log.Warnf("failed to load action store, returning empty contents: %v", err.Error())
return &actionStore{log: log, store: store}, nil
}
defer reader.Close()
Expand Down
2 changes: 1 addition & 1 deletion internal/pkg/agent/storage/store/state_store.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ func NewStateStore(log *logger.Logger, store storeLoad) (*StateStore, error) {
// and return an empty store.
reader, err := store.Load()
if err != nil {
log.Errorf("failed to load state store, returning empty contents: %v", err.Error())
log.Warnf("failed to load state store, returning empty contents: %v", err.Error())
return &StateStore{log: log, store: store}, nil
}
defer reader.Close()
Expand Down
2 changes: 1 addition & 1 deletion internal/pkg/capabilities/upgrade.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ func allowUpgrade(
for _, cap := range upgradeCaps {
result, err := cap.condition.Eval(varStore, true)
if err != nil {
log.Errorf("failed evaluating eql formula %q, skipping: %v", cap.conditionStr, err)
log.Warnf("failed evaluating eql formula %q, skipping: %v", cap.conditionStr, err)
continue
}
if result {
Expand Down
2 changes: 1 addition & 1 deletion internal/pkg/fleetapi/acker/lazy/lazy_acker.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ func (f *Acker) Commit(ctx context.Context) (err error) {
// If request failed enqueue all actions with retrier if it is set
if err != nil {
if f.retrier != nil {
f.log.Errorf("lazy acker: failed ack batch, enqueue for retry: %s", actions)
f.log.Warnf("lazy acker: failed ack batch, enqueue for retry: %s", actions)
f.retrier.Enqueue(actions)
return nil
}
Expand Down
7 changes: 7 additions & 0 deletions pkg/testing/tools/estools/elasticsearch.go
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,13 @@ func CheckForErrorsInLogsWithContext(ctx context.Context, client elastictranspor
"log.level": "error",
},
},
{
"term": map[string]interface{}{
"data_stream.namespace": map[string]interface{}{
"value": namespace,
},
},
},
},
"must_not": excludeStatements,
},
Expand Down
26 changes: 22 additions & 4 deletions testing/integration/monitoring_logs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ func TestMonitoringLogsShipped(t *testing.T) {
ctx := context.Background()

t.Logf("got namespace: %s", info.Namespace)
t.Skip("Test is flaky; see https://github.com/elastic/elastic-agent/issues/3081")

agentFixture, err := define.NewFixture(t, define.Version())
require.NoError(t, err)
Expand Down Expand Up @@ -90,7 +89,7 @@ func TestMonitoringLogsShipped(t *testing.T) {
require.NotZero(t, len(docs.Hits.Hits))
t.Logf("metricbeat: Got %d documents", len(docs.Hits.Hits))

// Stage 4: make sure all components are health
// Stage 4: make sure all components are healthy
t.Log("Making sure all components are healthy")
status, err := agentFixture.ExecStatus(ctx)
require.NoError(t, err,
Expand All @@ -101,15 +100,34 @@ func TestMonitoringLogsShipped(t *testing.T) {
c.Name, client.Healthy, client.State(c.State))
}

// Stage 5: Make sure we have message confirming central management is running
// Stage 5: Make sure there are no errors in logs
t.Log("Making sure there are no error logs")
docs = findESDocs(t, func() (estools.Documents, error) {
return estools.CheckForErrorsInLogs(info.ESClient, info.Namespace, []string{
// acceptable error messages (include reason)
"Error dialing dial tcp 127.0.0.1:9200: connect: connection refused", // beat is running default config before its config gets updated
"Global configuration artifact is not available", // Endpoint: failed to load user artifact due to connectivity issues
"Failed to download artifact",
"Failed to initialize artifact",
"Failed to apply initial policy from on disk configuration",
"elastic-agent-client error: rpc error: code = Canceled desc = context canceled", // can happen on restart
})
})
t.Logf("errors: Got %d documents", len(docs.Hits.Hits))
for _, doc := range docs.Hits.Hits {
t.Logf("%#v", doc.Source)
}
require.Empty(t, docs.Hits.Hits)

// Stage 6: Make sure we have message confirming central management is running
t.Log("Making sure we have message confirming central management is running")
docs = findESDocs(t, func() (estools.Documents, error) {
return estools.FindMatchingLogLines(info.ESClient, info.Namespace,
"Parsed configuration and determined agent is managed by Fleet")
})
require.NotZero(t, len(docs.Hits.Hits))

// Stage 6: verify logs from the monitoring components are not sent to the output
// Stage 7: verify logs from the monitoring components are not sent to the output
t.Log("Check monitoring logs")
hostname, err := os.Hostname()
if err != nil {
Expand Down
Loading