Skip to content

Commit

Permalink
Fix TestInstallAndCLIUninstallWithEndpointSecurity integration test…
Browse files Browse the repository at this point in the history
… flakiness (#3410)

* Return error if more than one Agent with same hostname is found in Fleet

* Fix make call

* Un-enroll Agent on test cleanup

* Use policy ID and hostname to uniquely find Agent

* Passing policy ID in more calls

* Fix implicit memory aliasing

* [Testing] Increasing timeout for status check to 10m

* Get Agent by ID

* Revert "Get Agent by ID"

This reverts commit 1c7da6e.
  • Loading branch information
ycombinator authored Sep 18, 2023
1 parent 23fe2ef commit b19e950
Show file tree
Hide file tree
Showing 6 changed files with 62 additions and 45 deletions.
43 changes: 27 additions & 16 deletions pkg/testing/tools/agents.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,57 +19,68 @@ import (
"github.com/elastic/elastic-agent/pkg/control/v2/cproto"
)

// GetAgentByHostnameFromList get an agent by the local_metadata.host.name property, reading from the agents list
func GetAgentByHostnameFromList(client *kibana.Client, hostname string) (*kibana.AgentExisting, error) {
// GetAgentByPolicyIDAndHostnameFromList get an agent by the local_metadata.host.name property, reading from the agents list
func GetAgentByPolicyIDAndHostnameFromList(client *kibana.Client, policyID, hostname string) (*kibana.AgentExisting, error) {
listAgentsResp, err := client.ListAgents(context.Background(), kibana.ListAgentsRequest{})
if err != nil {
return nil, err
}

for _, item := range listAgentsResp.Items {
hostnameAgents := make([]*kibana.AgentExisting, 0)
for i, item := range listAgentsResp.Items {
agentHostname := item.LocalMetadata.Host.Hostname
if agentHostname == hostname {
return &item, nil
agentPolicyID := item.PolicyID

if agentHostname == hostname && agentPolicyID == policyID {
hostnameAgents = append(hostnameAgents, &listAgentsResp.Items[i])
}
}

return nil, fmt.Errorf("unable to find agent with hostname [%s]", hostname)
if len(hostnameAgents) == 0 {
return nil, fmt.Errorf("unable to find agent with hostname [%s]", hostname)
}

if len(hostnameAgents) > 1 {
return nil, fmt.Errorf("found %d agents with hostname [%s]; expected to find only one", len(hostnameAgents), hostname)
}

return hostnameAgents[0], nil
}

func GetAgentStatus(client *kibana.Client) (string, error) {
func GetAgentStatus(client *kibana.Client, policyID string) (string, error) {
hostname, err := os.Hostname()
if err != nil {
return "", err
}

agent, err := GetAgentByHostnameFromList(client, hostname)
agent, err := GetAgentByPolicyIDAndHostnameFromList(client, policyID, hostname)
if err != nil {
return "", err
}

return agent.Status, nil
}

func GetAgentVersion(client *kibana.Client) (string, error) {
func GetAgentVersion(client *kibana.Client, policyID string) (string, error) {
hostname, err := os.Hostname()
if err != nil {
return "", err
}

agent, err := GetAgentByHostnameFromList(client, hostname)
agent, err := GetAgentByPolicyIDAndHostnameFromList(client, policyID, hostname)
if err != nil {
return "", err
}

return agent.Agent.Version, err
}

func UnEnrollAgent(client *kibana.Client) error {
func UnEnrollAgent(client *kibana.Client, policyID string) error {
hostname, err := os.Hostname()
if err != nil {
return err
}
agentID, err := GetAgentIDByHostname(client, hostname)
agentID, err := GetAgentIDByHostname(client, policyID, hostname)
if err != nil {
return err
}
Expand All @@ -86,20 +97,20 @@ func UnEnrollAgent(client *kibana.Client) error {
return nil
}

func GetAgentIDByHostname(client *kibana.Client, hostname string) (string, error) {
agent, err := GetAgentByHostnameFromList(client, hostname)
func GetAgentIDByHostname(client *kibana.Client, policyID, hostname string) (string, error) {
agent, err := GetAgentByPolicyIDAndHostnameFromList(client, policyID, hostname)
if err != nil {
return "", err
}
return agent.Agent.ID, nil
}

func UpgradeAgent(client *kibana.Client, version string) error {
func UpgradeAgent(client *kibana.Client, policyID, version string) error {
hostname, err := os.Hostname()
if err != nil {
return err
}
agentID, err := GetAgentIDByHostname(client, hostname)
agentID, err := GetAgentIDByHostname(client, policyID, hostname)
if err != nil {
return err
}
Expand Down
8 changes: 4 additions & 4 deletions pkg/testing/tools/tools.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ import (
// WaitForAgentStatus returns a niladic function that returns true if the agent
// has reached expectedStatus; false otherwise. The returned function is intended
// for use with assert.Eventually or require.Eventually.
func WaitForAgentStatus(t *testing.T, client *kibana.Client, expectedStatus string) func() bool {
func WaitForAgentStatus(t *testing.T, client *kibana.Client, policyID string, expectedStatus string) func() bool {
return func() bool {
currentStatus, err := GetAgentStatus(client)
currentStatus, err := GetAgentStatus(client, policyID)
if err != nil {
t.Errorf("unable to determine agent status: %s", err.Error())
return false
Expand Down Expand Up @@ -128,14 +128,14 @@ func InstallAgentForPolicy(t *testing.T, ctx context.Context,
}
t.Logf(">>> Ran Enroll. Output: %s", output)

timeout := 5 * time.Minute
timeout := 10 * time.Minute
if deadline, ok := ctx.Deadline(); ok {
timeout = time.Until(deadline)
}
// Wait for Agent to be healthy
require.Eventually(
t,
WaitForAgentStatus(t, kibClient, "online"),
WaitForAgentStatus(t, kibClient, policyID, "online"),
timeout,
10*time.Second,
"Elastic Agent status is not online",
Expand Down
8 changes: 7 additions & 1 deletion testing/integration/endpoint_security_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"text/template"
"time"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

"github.com/google/uuid"
Expand Down Expand Up @@ -135,6 +136,11 @@ func testInstallAndCLIUninstallWithEndpointSecurity(t *testing.T, info *define.I
installOpts, fixture, info.KibanaClient, createPolicyReq)
require.NoError(t, err, "failed to install agent with policy")

t.Cleanup(func() {
t.Log("Un-enrolling Elastic Agent...")
assert.NoError(t, tools.UnEnrollAgent(info.KibanaClient, policy.ID))
})

t.Log("Installing Elastic Defend")
pkgPolicyResp, err := installElasticDefendPackage(t, info, policy.ID)
require.NoErrorf(t, err, "Policy Response was: %v", pkgPolicyResp)
Expand Down Expand Up @@ -238,7 +244,7 @@ func testInstallAndUnenrollWithEndpointSecurity(t *testing.T, info *define.Info,
hostname, err := os.Hostname()
require.NoError(t, err)

agentID, err := tools.GetAgentIDByHostname(info.KibanaClient, hostname)
agentID, err := tools.GetAgentIDByHostname(info.KibanaClient, policy.ID, hostname)
require.NoError(t, err)

_, err = info.KibanaClient.UnEnrollAgent(ctx, kibana.UnEnrollAgentRequest{ID: agentID})
Expand Down
36 changes: 18 additions & 18 deletions testing/integration/fqdn_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,19 +56,6 @@ func TestFQDN(t *testing.T) {
origHostname, err := getHostname(context.Background())
require.NoError(t, err)

t.Cleanup(func() {
t.Log("Un-enrolling Elastic Agent...")
assert.NoError(t, tools.UnEnrollAgent(info.KibanaClient))

t.Log("Restoring hostname...")
err := setHostname(context.Background(), origHostname, t.Log)
require.NoError(t, err)

t.Log("Restoring original /etc/hosts...")
err = setEtcHosts(origEtcHosts)
require.NoError(t, err)
})

ctx := context.Background()
kibClient := info.KibanaClient

Expand Down Expand Up @@ -101,8 +88,21 @@ func TestFQDN(t *testing.T) {
policy, err := tools.InstallAgentWithPolicy(t, ctx, installOpts, agentFixture, kibClient, createPolicyReq)
require.NoError(t, err)

t.Cleanup(func() {
t.Log("Un-enrolling Elastic Agent...")
assert.NoError(t, tools.UnEnrollAgent(info.KibanaClient, policy.ID))

t.Log("Restoring hostname...")
err := setHostname(context.Background(), origHostname, t.Log)
require.NoError(t, err)

t.Log("Restoring original /etc/hosts...")
err = setEtcHosts(origEtcHosts)
require.NoError(t, err)
})

t.Log("Verify that agent name is short hostname")
agent := verifyAgentName(t, shortName, info.KibanaClient)
agent := verifyAgentName(t, policy.ID, shortName, info.KibanaClient)

t.Log("Verify that hostname in `logs-*` and `metrics-*` is short hostname")
verifyHostNameInIndices(t, "logs-*", shortName, info.Namespace, info.ESClient)
Expand Down Expand Up @@ -133,7 +133,7 @@ func TestFQDN(t *testing.T) {
)

t.Log("Verify that agent name is FQDN")
verifyAgentName(t, fqdn, info.KibanaClient)
verifyAgentName(t, policy.ID, fqdn, info.KibanaClient)

t.Log("Verify that hostname in `logs-*` and `metrics-*` is FQDN")
verifyHostNameInIndices(t, "logs-*", fqdn, info.Namespace, info.ESClient)
Expand Down Expand Up @@ -164,7 +164,7 @@ func TestFQDN(t *testing.T) {
)

t.Log("Verify that agent name is short hostname again")
verifyAgentName(t, shortName, info.KibanaClient)
verifyAgentName(t, policy.ID, shortName, info.KibanaClient)

// TODO: Re-enable assertion once https://github.com/elastic/elastic-agent/issues/3078 is
// investigated for root cause and resolved.
Expand All @@ -173,7 +173,7 @@ func TestFQDN(t *testing.T) {
// verifyHostNameInIndices(t, "metrics-*", shortName, info.ESClient)
}

func verifyAgentName(t *testing.T, hostname string, kibClient *kibana.Client) *kibana.AgentExisting {
func verifyAgentName(t *testing.T, policyID, hostname string, kibClient *kibana.Client) *kibana.AgentExisting {
t.Helper()

var agent *kibana.AgentExisting
Expand All @@ -182,7 +182,7 @@ func verifyAgentName(t *testing.T, hostname string, kibClient *kibana.Client) *k
require.Eventually(
t,
func() bool {
agent, err = tools.GetAgentByHostnameFromList(kibClient, hostname)
agent, err = tools.GetAgentByPolicyIDAndHostnameFromList(kibClient, policyID, hostname)
return err == nil && agent != nil
},
5*time.Minute,
Expand Down
2 changes: 1 addition & 1 deletion testing/integration/monitoring_logs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ func TestMonitoringLogsShipped(t *testing.T) {
t.Fatalf("could not get hostname to filter Agent: %s", err)
}

agentID, err := tools.GetAgentIDByHostname(info.KibanaClient, hostname)
agentID, err := tools.GetAgentIDByHostname(info.KibanaClient, policy.ID, hostname)
require.NoError(t, err, "could not get Agent ID by hostname")
t.Logf("Agent ID: %q", agentID)

Expand Down
10 changes: 5 additions & 5 deletions testing/integration/upgrade_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,25 +158,25 @@ func testUpgradeFleetManagedElasticAgent(t *testing.T, ctx context.Context, info
require.NoError(t, err)
t.Cleanup(func() {
t.Log("Un-enrolling Elastic Agent...")
assert.NoError(t, tools.UnEnrollAgent(info.KibanaClient))
assert.NoError(t, tools.UnEnrollAgent(info.KibanaClient, policy.ID))
})

t.Log(`Waiting for enrolled Agent status to be "online"...`)
require.Eventually(t, tools.WaitForAgentStatus(t, kibClient, "online"), 2*time.Minute, 10*time.Second, "Agent status is not online")
require.Eventually(t, tools.WaitForAgentStatus(t, kibClient, policy.ID, "online"), 2*time.Minute, 10*time.Second, "Agent status is not online")

t.Logf("Upgrade Elastic Agent to version %s...", toVersion)
err = tools.UpgradeAgent(kibClient, toVersion)
err = tools.UpgradeAgent(kibClient, policy.ID, toVersion)
require.NoError(t, err)

t.Log(`Waiting for enrolled Agent status to be "online"...`)
require.Eventually(t, tools.WaitForAgentStatus(t, kibClient, "online"), 10*time.Minute, 15*time.Second, "Agent status is not online")
require.Eventually(t, tools.WaitForAgentStatus(t, kibClient, policy.ID, "online"), 10*time.Minute, 15*time.Second, "Agent status is not online")

// We remove the `-SNAPSHOT` suffix because, post-upgrade, the version reported
// by the Agent will not contain this suffix, even if a `-SNAPSHOT`-suffixed
// version was used as the target version for the upgrade.
require.Eventually(t, func() bool {
t.Log("Getting Agent version...")
newVersion, err := tools.GetAgentVersion(kibClient)
newVersion, err := tools.GetAgentVersion(kibClient, policy.ID)
if err != nil {
t.Logf("error getting agent version: %v", err)
return false
Expand Down

0 comments on commit b19e950

Please sign in to comment.