From 76104ad527056e51076b4970205aa00e3241b98a Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Tue, 14 Nov 2023 13:18:00 +0100 Subject: [PATCH 01/12] Fix index pattern when querying ES The pattern used to query ES was not working on serverless, this commit updates it to a pattern that works on both stateful and serverless. --- pkg/testing/tools/estools/elasticsearch.go | 2 +- testing/integration/logs_ingestion_test.go | 16 ++++++---------- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/pkg/testing/tools/estools/elasticsearch.go b/pkg/testing/tools/estools/elasticsearch.go index 1c85ed788f3..974434898fb 100644 --- a/pkg/testing/tools/estools/elasticsearch.go +++ b/pkg/testing/tools/estools/elasticsearch.go @@ -488,7 +488,7 @@ func GetLogsForDatasetWithContext(ctx context.Context, client elastictransport.I }, } - return performQueryForRawQuery(ctx, indexQuery, "*ds-logs*", client) + return performQueryForRawQuery(ctx, indexQuery, "logs*", client) } // GetPing performs a basic ping and returns ES config info diff --git a/testing/integration/logs_ingestion_test.go b/testing/integration/logs_ingestion_test.go index d9fb2f511a8..a78c28d42c2 100644 --- a/testing/integration/logs_ingestion_test.go +++ b/testing/integration/logs_ingestion_test.go @@ -104,15 +104,11 @@ func testMonitoringLogsAreShipped( ) { // Stage 1: Make sure metricbeat logs are populated t.Log("Making sure metricbeat logs are populated") - require.Eventually(t, - func() bool { - docs := findESDocs(t, func() (estools.Documents, error) { - return estools.GetLogsForDataset(info.ESClient, "elastic_agent.metricbeat") - }) - return len(docs.Hits.Hits) > 0 - }, - 1*time.Minute, 500*time.Millisecond, - "there should be metricbeats logs by now") + docs := findESDocs(t, func() (estools.Documents, error) { + return estools.GetLogsForDataset(info.ESClient, "elastic_agent.metricbeat") + }) + t.Logf("metricbeat: Got %d documents", len(docs.Hits.Hits)) + require.NotZero(t, len(docs.Hits.Hits)) // Stage 2: make sure all components are healthy t.Log("Making sure all components are healthy") @@ -127,7 +123,7 @@ func testMonitoringLogsAreShipped( // Stage 3: Make sure there are no errors in logs t.Log("Making sure there are no error logs") - docs := findESDocs(t, func() (estools.Documents, error) { + docs = findESDocs(t, func() (estools.Documents, error) { return estools.CheckForErrorsInLogs(info.ESClient, info.Namespace, []string{ // acceptable error messages (include reason) "Error dialing dial tcp 127.0.0.1:9200: connect: connection refused", // beat is running default config before its config gets updated From ef03e68dfb53900d31a83fdc65aa036cb396b344 Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Tue, 14 Nov 2023 17:42:46 +0100 Subject: [PATCH 02/12] increase query time and add debug --- testing/integration/logs_ingestion_test.go | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/testing/integration/logs_ingestion_test.go b/testing/integration/logs_ingestion_test.go index a78c28d42c2..e12acf1adde 100644 --- a/testing/integration/logs_ingestion_test.go +++ b/testing/integration/logs_ingestion_test.go @@ -195,14 +195,21 @@ func testMonitoringLogsAreShipped( func findESDocs(t *testing.T, findFn func() (estools.Documents, error)) estools.Documents { var docs estools.Documents + start := time.Now() + defer func() { + fmt.Println(">>>>>>>>>>>>>>>>>>>> Took", time.Now().Sub(start)) + }() + count := 0 require.Eventually( t, func() bool { + count++ + fmt.Println(">>>>>>>>>>>>>>>>>>>> Iteration ", count) var err error docs, err = findFn() return err == nil }, - 3*time.Minute, + 8*time.Minute, 15*time.Second, ) From 10aabb3c59fa5fe3c654863dfea3135cf8a12e67 Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Wed, 15 Nov 2023 08:18:23 +0100 Subject: [PATCH 03/12] more debug --- .buildkite/scripts/steps/integration_tests.sh | 2 +- testing/integration/logs_ingestion_test.go | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.buildkite/scripts/steps/integration_tests.sh b/.buildkite/scripts/steps/integration_tests.sh index 2a129193267..cfec71148a5 100755 --- a/.buildkite/scripts/steps/integration_tests.sh +++ b/.buildkite/scripts/steps/integration_tests.sh @@ -23,7 +23,7 @@ AGENT_PACKAGE_VERSION="${OVERRIDE_AGENT_PACKAGE_VERSION}" DEV=true EXTERNAL=true # Run integration tests set +e -AGENT_VERSION="${OVERRIDE_TEST_AGENT_VERSION}" TEST_INTEG_CLEAN_ON_EXIT=true STACK_PROVISIONER="$STACK_PROVISIONER" SNAPSHOT=true mage $MAGE_TARGET $MAGE_SUBTARGET +AGENT_VERSION="${OVERRIDE_TEST_AGENT_VERSION}" TEST_INTEG_CLEAN_ON_EXIT=true STACK_PROVISIONER="$STACK_PROVISIONER" SNAPSHOT=true mage -v $MAGE_TARGET $MAGE_SUBTARGET TESTS_EXIT_STATUS=$? set -e diff --git a/testing/integration/logs_ingestion_test.go b/testing/integration/logs_ingestion_test.go index e12acf1adde..d707d2504f8 100644 --- a/testing/integration/logs_ingestion_test.go +++ b/testing/integration/logs_ingestion_test.go @@ -207,6 +207,9 @@ func findESDocs(t *testing.T, findFn func() (estools.Documents, error)) estools. fmt.Println(">>>>>>>>>>>>>>>>>>>> Iteration ", count) var err error docs, err = findFn() + if err != nil { + fmt.Println(">>>>>>>>>>>>>>>>>>>> error tying to fetch ES docs: ", err) + } return err == nil }, 8*time.Minute, From 867a3d8f6f6de6889132f7929559d7b906943f92 Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Wed, 15 Nov 2023 10:24:01 +0100 Subject: [PATCH 04/12] remove debug --- .buildkite/scripts/steps/integration_tests.sh | 2 +- testing/integration/logs_ingestion_test.go | 10 ---------- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/.buildkite/scripts/steps/integration_tests.sh b/.buildkite/scripts/steps/integration_tests.sh index cfec71148a5..2a129193267 100755 --- a/.buildkite/scripts/steps/integration_tests.sh +++ b/.buildkite/scripts/steps/integration_tests.sh @@ -23,7 +23,7 @@ AGENT_PACKAGE_VERSION="${OVERRIDE_AGENT_PACKAGE_VERSION}" DEV=true EXTERNAL=true # Run integration tests set +e -AGENT_VERSION="${OVERRIDE_TEST_AGENT_VERSION}" TEST_INTEG_CLEAN_ON_EXIT=true STACK_PROVISIONER="$STACK_PROVISIONER" SNAPSHOT=true mage -v $MAGE_TARGET $MAGE_SUBTARGET +AGENT_VERSION="${OVERRIDE_TEST_AGENT_VERSION}" TEST_INTEG_CLEAN_ON_EXIT=true STACK_PROVISIONER="$STACK_PROVISIONER" SNAPSHOT=true mage $MAGE_TARGET $MAGE_SUBTARGET TESTS_EXIT_STATUS=$? set -e diff --git a/testing/integration/logs_ingestion_test.go b/testing/integration/logs_ingestion_test.go index d707d2504f8..68418646215 100644 --- a/testing/integration/logs_ingestion_test.go +++ b/testing/integration/logs_ingestion_test.go @@ -195,21 +195,11 @@ func testMonitoringLogsAreShipped( func findESDocs(t *testing.T, findFn func() (estools.Documents, error)) estools.Documents { var docs estools.Documents - start := time.Now() - defer func() { - fmt.Println(">>>>>>>>>>>>>>>>>>>> Took", time.Now().Sub(start)) - }() - count := 0 require.Eventually( t, func() bool { - count++ - fmt.Println(">>>>>>>>>>>>>>>>>>>> Iteration ", count) var err error docs, err = findFn() - if err != nil { - fmt.Println(">>>>>>>>>>>>>>>>>>>> error tying to fetch ES docs: ", err) - } return err == nil }, 8*time.Minute, From 999d579f7f39619a6abbb4fbd7b4d04558a526bd Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Thu, 16 Nov 2023 13:05:43 +0100 Subject: [PATCH 05/12] try fixing endpoint tests by isolating them --- testing/integration/endpoint_security_test.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/testing/integration/endpoint_security_test.go b/testing/integration/endpoint_security_test.go index e58afa72c45..46534f91d9b 100644 --- a/testing/integration/endpoint_security_test.go +++ b/testing/integration/endpoint_security_test.go @@ -76,7 +76,7 @@ func TestInstallAndCLIUninstallWithEndpointSecurity(t *testing.T) { info := define.Require(t, define.Requirements{ Stack: &define.Stack{}, Local: false, // requires Agent installation - Isolate: false, + Isolate: true, Sudo: true, // requires Agent installation OS: []define.OS{ {Type: define.Linux}, @@ -102,7 +102,7 @@ func TestInstallAndUnenrollWithEndpointSecurity(t *testing.T) { info := define.Require(t, define.Requirements{ Stack: &define.Stack{}, Local: false, // requires Agent installation - Isolate: false, + Isolate: true, Sudo: true, // requires Agent installation OS: []define.OS{ {Type: define.Linux}, @@ -130,7 +130,7 @@ func TestInstallWithEndpointSecurityAndRemoveEndpointIntegration(t *testing.T) { info := define.Require(t, define.Requirements{ Stack: &define.Stack{}, Local: false, // requires Agent installation - Isolate: false, + Isolate: true, Sudo: true, // requires Agent installation OS: []define.OS{ {Type: define.Linux}, @@ -493,7 +493,7 @@ func TestEndpointSecurityNonDefaultBasePath(t *testing.T) { info := define.Require(t, define.Requirements{ Stack: &define.Stack{}, Local: false, // requires Agent installation - Isolate: false, + Isolate: true, Sudo: true, // requires Agent installation }) From a45f70a1fc6bdabdea240010d23ac238c9ff8b1c Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Mon, 20 Nov 2023 12:57:38 +0100 Subject: [PATCH 06/12] fix batchID length, revert endpoint test changes --- testing/integration/endpoint_security_test.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/testing/integration/endpoint_security_test.go b/testing/integration/endpoint_security_test.go index 46534f91d9b..e58afa72c45 100644 --- a/testing/integration/endpoint_security_test.go +++ b/testing/integration/endpoint_security_test.go @@ -76,7 +76,7 @@ func TestInstallAndCLIUninstallWithEndpointSecurity(t *testing.T) { info := define.Require(t, define.Requirements{ Stack: &define.Stack{}, Local: false, // requires Agent installation - Isolate: true, + Isolate: false, Sudo: true, // requires Agent installation OS: []define.OS{ {Type: define.Linux}, @@ -102,7 +102,7 @@ func TestInstallAndUnenrollWithEndpointSecurity(t *testing.T) { info := define.Require(t, define.Requirements{ Stack: &define.Stack{}, Local: false, // requires Agent installation - Isolate: true, + Isolate: false, Sudo: true, // requires Agent installation OS: []define.OS{ {Type: define.Linux}, @@ -130,7 +130,7 @@ func TestInstallWithEndpointSecurityAndRemoveEndpointIntegration(t *testing.T) { info := define.Require(t, define.Requirements{ Stack: &define.Stack{}, Local: false, // requires Agent installation - Isolate: true, + Isolate: false, Sudo: true, // requires Agent installation OS: []define.OS{ {Type: define.Linux}, @@ -493,7 +493,7 @@ func TestEndpointSecurityNonDefaultBasePath(t *testing.T) { info := define.Require(t, define.Requirements{ Stack: &define.Stack{}, Local: false, // requires Agent installation - Isolate: true, + Isolate: false, Sudo: true, // requires Agent installation }) From bbfb31968919ff68400c0c8d193078b51f8e0a09 Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Thu, 23 Nov 2023 12:49:10 +0100 Subject: [PATCH 07/12] only query Elastic-Agent logs datastream --- pkg/testing/tools/estools/elasticsearch.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/testing/tools/estools/elasticsearch.go b/pkg/testing/tools/estools/elasticsearch.go index 974434898fb..8874eec3c44 100644 --- a/pkg/testing/tools/estools/elasticsearch.go +++ b/pkg/testing/tools/estools/elasticsearch.go @@ -362,7 +362,7 @@ func FindMatchingLogLinesWithContext(ctx context.Context, client elastictranspor return Documents{}, fmt.Errorf("error creating ES query: %w", err) } - return performQueryForRawQuery(ctx, queryRaw, "*ds-logs*", client) + return performQueryForRawQuery(ctx, queryRaw, "logs-elastic_agent*", client) } @@ -434,7 +434,7 @@ func CheckForErrorsInLogsWithContext(ctx context.Context, client elastictranspor return Documents{}, fmt.Errorf("error creating ES query: %w", err) } - return performQueryForRawQuery(ctx, queryRaw, "*ds-logs*", client) + return performQueryForRawQuery(ctx, queryRaw, "logs-elastic_agent*", client) } @@ -461,7 +461,7 @@ func GetLogsForAgentID(client elastictransport.Interface, id string) (Documents, es := esapi.New(client) res, err := es.Search( - es.Search.WithIndex("*.ds-logs*"), + es.Search.WithIndex("logs-elastic_agent*"), es.Search.WithExpandWildcards("all"), es.Search.WithBody(&buf), es.Search.WithTrackTotalHits(true), From 1dd88ce0c55b501a28a7d8cff20fa5992f5df48a Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Thu, 23 Nov 2023 15:02:47 +0100 Subject: [PATCH 08/12] revert eventually time --- testing/integration/logs_ingestion_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testing/integration/logs_ingestion_test.go b/testing/integration/logs_ingestion_test.go index 68418646215..a78c28d42c2 100644 --- a/testing/integration/logs_ingestion_test.go +++ b/testing/integration/logs_ingestion_test.go @@ -202,7 +202,7 @@ func findESDocs(t *testing.T, findFn func() (estools.Documents, error)) estools. docs, err = findFn() return err == nil }, - 8*time.Minute, + 3*time.Minute, 15*time.Second, ) From 7f273ff82f753d7acace4b5a61081d1f25b54acf Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Thu, 23 Nov 2023 15:11:49 +0100 Subject: [PATCH 09/12] last index/ds name update --- pkg/testing/tools/estools/elasticsearch.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/testing/tools/estools/elasticsearch.go b/pkg/testing/tools/estools/elasticsearch.go index 8874eec3c44..5e4a8c3f7b0 100644 --- a/pkg/testing/tools/estools/elasticsearch.go +++ b/pkg/testing/tools/estools/elasticsearch.go @@ -488,7 +488,7 @@ func GetLogsForDatasetWithContext(ctx context.Context, client elastictransport.I }, } - return performQueryForRawQuery(ctx, indexQuery, "logs*", client) + return performQueryForRawQuery(ctx, indexQuery, "logs-elastic_agent*", client) } // GetPing performs a basic ping and returns ES config info From d610f901ec15f2d378bded62960fc7dcc802c891 Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Mon, 27 Nov 2023 10:01:37 +0100 Subject: [PATCH 10/12] improve test error message --- testing/integration/logs_ingestion_test.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/testing/integration/logs_ingestion_test.go b/testing/integration/logs_ingestion_test.go index a78c28d42c2..9757c11db05 100644 --- a/testing/integration/logs_ingestion_test.go +++ b/testing/integration/logs_ingestion_test.go @@ -108,7 +108,8 @@ func testMonitoringLogsAreShipped( return estools.GetLogsForDataset(info.ESClient, "elastic_agent.metricbeat") }) t.Logf("metricbeat: Got %d documents", len(docs.Hits.Hits)) - require.NotZero(t, len(docs.Hits.Hits)) + require.NotZero(t, len(docs.Hits.Hits), + "Looking for logs in dataset 'elastic_agent.metricbeat'") // Stage 2: make sure all components are healthy t.Log("Making sure all components are healthy") From a081090da71999190b80e4fd800cf789e9770e64 Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Mon, 27 Nov 2023 10:01:51 +0100 Subject: [PATCH 11/12] debug logs --- pkg/testing/tools/estools/elasticsearch.go | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/pkg/testing/tools/estools/elasticsearch.go b/pkg/testing/tools/estools/elasticsearch.go index 5e4a8c3f7b0..36788dd39d7 100644 --- a/pkg/testing/tools/estools/elasticsearch.go +++ b/pkg/testing/tools/estools/elasticsearch.go @@ -533,7 +533,22 @@ func performQueryForRawQuery(ctx context.Context, queryRaw map[string]interface{ return Documents{}, fmt.Errorf("error performing ES search: %w", err) } - return handleDocsResponse(res) + docs, err := handleDocsResponse(res) + + fmt.Println(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>") + fmt.Println("Query:") + query, debugErr := json.MarshalIndent(queryRaw, "|", " ") + if debugErr != nil { + fmt.Println("Error marshalling 'queryRaw':", debugErr) + return docs, err + } + fmt.Println("Raw Query") + fmt.Println(string(query)) + fmt.Println("Documents docs.Hits.Total.Value: ", docs.Hits.Total.Value) + fmt.Println("Documents len(docs.Hits.Hits): ", len(docs.Hits.Hits)) + fmt.Println(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>") + + return docs, err } // GetLogsForDatastream returns any logs associated with the datastream From ff4edf2824dc9d3ff7b9a230bebe6c2d1c85a0a0 Mon Sep 17 00:00:00 2001 From: Tiago Queiroz Date: Mon, 27 Nov 2023 15:18:12 +0100 Subject: [PATCH 12/12] findESDocs ensures non-zero documents findESDocs now ensures the function returns at least one document. Debug logs are also removed. --- pkg/testing/tools/estools/elasticsearch.go | 18 +----------- testing/integration/logs_ingestion_test.go | 32 ++++++++++++++++++++-- 2 files changed, 30 insertions(+), 20 deletions(-) diff --git a/pkg/testing/tools/estools/elasticsearch.go b/pkg/testing/tools/estools/elasticsearch.go index 36788dd39d7..c87172ff011 100644 --- a/pkg/testing/tools/estools/elasticsearch.go +++ b/pkg/testing/tools/estools/elasticsearch.go @@ -435,7 +435,6 @@ func CheckForErrorsInLogsWithContext(ctx context.Context, client elastictranspor } return performQueryForRawQuery(ctx, queryRaw, "logs-elastic_agent*", client) - } // GetLogsForDataset returns any logs associated with the datastream @@ -533,22 +532,7 @@ func performQueryForRawQuery(ctx context.Context, queryRaw map[string]interface{ return Documents{}, fmt.Errorf("error performing ES search: %w", err) } - docs, err := handleDocsResponse(res) - - fmt.Println(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>") - fmt.Println("Query:") - query, debugErr := json.MarshalIndent(queryRaw, "|", " ") - if debugErr != nil { - fmt.Println("Error marshalling 'queryRaw':", debugErr) - return docs, err - } - fmt.Println("Raw Query") - fmt.Println(string(query)) - fmt.Println("Documents docs.Hits.Total.Value: ", docs.Hits.Total.Value) - fmt.Println("Documents len(docs.Hits.Hits): ", len(docs.Hits.Hits)) - fmt.Println(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>") - - return docs, err + return handleDocsResponse(res) } // GetLogsForDatastream returns any logs associated with the datastream diff --git a/testing/integration/logs_ingestion_test.go b/testing/integration/logs_ingestion_test.go index 9757c11db05..b0dcbc9b4c3 100644 --- a/testing/integration/logs_ingestion_test.go +++ b/testing/integration/logs_ingestion_test.go @@ -124,7 +124,7 @@ func testMonitoringLogsAreShipped( // Stage 3: Make sure there are no errors in logs t.Log("Making sure there are no error logs") - docs = findESDocs(t, func() (estools.Documents, error) { + docs = queryESDocs(t, func() (estools.Documents, error) { return estools.CheckForErrorsInLogs(info.ESClient, info.Namespace, []string{ // acceptable error messages (include reason) "Error dialing dial tcp 127.0.0.1:9200: connect: connection refused", // beat is running default config before its config gets updated @@ -164,7 +164,6 @@ func testMonitoringLogsAreShipped( // this field is not mapped. There is an issue for that: // https://github.com/elastic/integrations/issues/6545 // TODO: use runtime fields while the above issue is not resolved. - docs = findESDocs(t, func() (estools.Documents, error) { return estools.GetLogsForAgentID(info.ESClient, agentID) }) @@ -194,13 +193,18 @@ func testMonitoringLogsAreShipped( } } -func findESDocs(t *testing.T, findFn func() (estools.Documents, error)) estools.Documents { +// queryESDocs runs `findFn` until it returns no error. Zero documents returned +// is considered a success. +func queryESDocs(t *testing.T, findFn func() (estools.Documents, error)) estools.Documents { var docs estools.Documents require.Eventually( t, func() bool { var err error docs, err = findFn() + if err != nil { + t.Logf("got an error querying ES, retrying. Error: %s", err) + } return err == nil }, 3*time.Minute, @@ -210,6 +214,28 @@ func findESDocs(t *testing.T, findFn func() (estools.Documents, error)) estools. return docs } +// findESDocs runs `findFn` until at least one document is returned and there is no error +func findESDocs(t *testing.T, findFn func() (estools.Documents, error)) estools.Documents { + var docs estools.Documents + require.Eventually( + t, + func() bool { + var err error + docs, err = findFn() + if err != nil { + t.Logf("got an error querying ES, retrying. Error: %s", err) + return false + } + + return docs.Hits.Total.Value != 0 + }, + 3*time.Minute, + 15*time.Second, + ) + + return docs +} + func testFlattenedDatastreamFleetPolicy( t *testing.T, ctx context.Context,