Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[action] [PR:13066] [BGP Memory Leak] Add FRR memory checker in the continuous link flap test #14234

Merged
merged 1 commit into from
Aug 28, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 51 additions & 3 deletions tests/platform_tests/link_flap/test_cont_link_flap.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

import logging
import pytest
import time
import math

from tests.common.helpers.assertions import pytest_assert, pytest_require
from tests.common import port_toggle
Expand All @@ -28,6 +30,13 @@ class TestContLinkFlap(object):
TestContLinkFlap class for continuous link flap
"""

def get_frr_daemon_memory_usage(self, duthost, daemon):
frr_daemon_memory_output = duthost.shell(f'vtysh -c "show memory {daemon}"')["stdout"]
logging.info(f"{daemon} memory status: \n%s", frr_daemon_memory_output)
frr_daemon_memory = duthost.shell(
f'vtysh -c "show memory {daemon}" | grep "Used ordinary blocks"')["stdout"].split()[-2]
return frr_daemon_memory

def test_cont_link_flap(self, request, duthosts, nbrhosts, enum_rand_one_per_hwsku_frontend_hostname,
fanouthosts, bring_up_dut_interfaces, tbinfo):
"""
Expand All @@ -38,10 +47,10 @@ def test_cont_link_flap(self, request, duthosts, nbrhosts, enum_rand_one_per_hws
to cause BGP Flaps.
2.) Flap all interfaces on peer (FanOutLeaf) one by one 1-3 iteration
to cause BGP Flaps.
3.) Watch for memory (show system-memory) ,orchagent CPU Utilization
and Redis_memory.
3.) Watch for memory (show system-memory), FRR daemons memory(vtysh -c "show memory bgp/zebra"),
orchagent CPU Utilization and Redis_memory.

Pass Criteria: All routes must be re-learned with < 5% increase in Redis and
Pass Criteria: All routes must be re-learned with < 5% increase in Redis/FRR memory usage and
ORCH agent CPU consumption below threshold after 3 mins after stopping flaps.
"""
duthost = duthosts[enum_rand_one_per_hwsku_frontend_hostname]
Expand Down Expand Up @@ -69,6 +78,13 @@ def test_cont_link_flap(self, request, duthosts, nbrhosts, enum_rand_one_per_hws
logging.info("IPv4 routes: start {}, summary {}".format(start_time_ipv4_route_counts, sumv4))
logging.info("IPv6 routes: start {}, summary {}".format(start_time_ipv6_route_counts, sumv6))

# Record FRR daemons memory status at start
frr_demons_to_check = ['bgpd', 'zebra']
start_time_frr_daemon_memory = {}
for daemon in frr_demons_to_check:
start_time_frr_daemon_memory[daemon] = self.get_frr_daemon_memory_usage(duthost, daemon)
logging.info(f"{daemon} memory usage at start: \n%s", start_time_frr_daemon_memory[daemon])

# Make Sure Orch CPU < orch_cpu_threshold before starting test.
logging.info("Make Sure orchagent CPU utilization is less that %d before link flap", orch_cpu_threshold)
pytest_assert(wait_until(100, 2, 0, check_orch_cpu_utilization, duthost, orch_cpu_threshold),
Expand Down Expand Up @@ -124,10 +140,42 @@ def test_cont_link_flap(self, request, duthosts, nbrhosts, enum_rand_one_per_hws

pytest.fail(str(failmsg))

# Wait 30s for the memory usage to be stable
time.sleep(30)

# Record memory status at end
memory_output = duthost.shell("show system-memory")["stdout"]
logging.info("Memory Status at end: %s", memory_output)

# Check the FRR daemons memory usage at end
end_time_frr_daemon_memory = {}
incr_frr_daemon_memory_threshold = {}
for daemon in frr_demons_to_check:
incr_frr_daemon_memory_threshold[daemon] = 10 if tbinfo["topo"]["type"] in ["m0", "mx"] else 5
min_threshold_percent = 1 / float(start_time_frr_daemon_memory[daemon]) * 100
if min_threshold_percent > incr_frr_daemon_memory_threshold[daemon]:
incr_frr_daemon_memory_threshold[daemon] = math.ceil(min_threshold_percent)
logging.info(f"The memory increment threshold for frr daemon {daemon} "
f"is {incr_frr_daemon_memory_threshold[daemon]}%")
for daemon in frr_demons_to_check:
# Record FRR daemon memory status at end
end_time_frr_daemon_memory[daemon] = self.get_frr_daemon_memory_usage(duthost, daemon)
logging.info(f"{daemon} memory usage at end: \n%s", end_time_frr_daemon_memory[daemon])

# Calculate diff in FRR daemon memory
incr_frr_daemon_memory = \
float(end_time_frr_daemon_memory[daemon]) - float(start_time_frr_daemon_memory[daemon])
logging.info(f"{daemon} absolute difference: %d", incr_frr_daemon_memory)

# Check FRR daemon memory only if it is increased else default to pass
if incr_frr_daemon_memory > 0:
percent_incr_frr_daemon_memory = \
(incr_frr_daemon_memory / float(start_time_frr_daemon_memory[daemon])) * 100
logging.info(f"{daemon} memory percentage increase: %d", percent_incr_frr_daemon_memory)
pytest_assert(percent_incr_frr_daemon_memory < incr_frr_daemon_memory_threshold[daemon],
f"{daemon} memory increase more than expected: "
f"{incr_frr_daemon_memory_threshold[daemon]}%")

# Record orchagent CPU utilization at end
orch_cpu = duthost.shell(
"COLUMNS=512 show processes cpu | grep orchagent | awk '{print $1, $9}'")["stdout_lines"]
Expand Down
Loading