Skip to content

Commit

Permalink
[BGP Memory Leak] Add FRR memory checker in the continuous link flap …
Browse files Browse the repository at this point in the history
…test (#13066)

* Add bgp memory checker in the continuous link flap test
  • Loading branch information
congh-nvidia authored and mssonicbld committed Aug 28, 2024
1 parent 11304b4 commit 7f07449
Showing 1 changed file with 51 additions and 3 deletions.
54 changes: 51 additions & 3 deletions tests/platform_tests/link_flap/test_cont_link_flap.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

import logging
import pytest
import time
import math

from tests.common.helpers.assertions import pytest_assert, pytest_require
from tests.common import port_toggle
Expand All @@ -28,6 +30,13 @@ class TestContLinkFlap(object):
TestContLinkFlap class for continuous link flap
"""

def get_frr_daemon_memory_usage(self, duthost, daemon):
frr_daemon_memory_output = duthost.shell(f'vtysh -c "show memory {daemon}"')["stdout"]
logging.info(f"{daemon} memory status: \n%s", frr_daemon_memory_output)
frr_daemon_memory = duthost.shell(
f'vtysh -c "show memory {daemon}" | grep "Used ordinary blocks"')["stdout"].split()[-2]
return frr_daemon_memory

def test_cont_link_flap(self, request, duthosts, nbrhosts, enum_rand_one_per_hwsku_frontend_hostname,
fanouthosts, bring_up_dut_interfaces, tbinfo):
"""
Expand All @@ -38,10 +47,10 @@ def test_cont_link_flap(self, request, duthosts, nbrhosts, enum_rand_one_per_hws
to cause BGP Flaps.
2.) Flap all interfaces on peer (FanOutLeaf) one by one 1-3 iteration
to cause BGP Flaps.
3.) Watch for memory (show system-memory) ,orchagent CPU Utilization
and Redis_memory.
3.) Watch for memory (show system-memory), FRR daemons memory(vtysh -c "show memory bgp/zebra"),
orchagent CPU Utilization and Redis_memory.
Pass Criteria: All routes must be re-learned with < 5% increase in Redis and
Pass Criteria: All routes must be re-learned with < 5% increase in Redis/FRR memory usage and
ORCH agent CPU consumption below threshold after 3 mins after stopping flaps.
"""
duthost = duthosts[enum_rand_one_per_hwsku_frontend_hostname]
Expand Down Expand Up @@ -69,6 +78,13 @@ def test_cont_link_flap(self, request, duthosts, nbrhosts, enum_rand_one_per_hws
logging.info("IPv4 routes: start {}, summary {}".format(start_time_ipv4_route_counts, sumv4))
logging.info("IPv6 routes: start {}, summary {}".format(start_time_ipv6_route_counts, sumv6))

# Record FRR daemons memory status at start
frr_demons_to_check = ['bgpd', 'zebra']
start_time_frr_daemon_memory = {}
for daemon in frr_demons_to_check:
start_time_frr_daemon_memory[daemon] = self.get_frr_daemon_memory_usage(duthost, daemon)
logging.info(f"{daemon} memory usage at start: \n%s", start_time_frr_daemon_memory[daemon])

# Make Sure Orch CPU < orch_cpu_threshold before starting test.
logging.info("Make Sure orchagent CPU utilization is less that %d before link flap", orch_cpu_threshold)
pytest_assert(wait_until(100, 2, 0, check_orch_cpu_utilization, duthost, orch_cpu_threshold),
Expand Down Expand Up @@ -124,10 +140,42 @@ def test_cont_link_flap(self, request, duthosts, nbrhosts, enum_rand_one_per_hws

pytest.fail(str(failmsg))

# Wait 30s for the memory usage to be stable
time.sleep(30)

# Record memory status at end
memory_output = duthost.shell("show system-memory")["stdout"]
logging.info("Memory Status at end: %s", memory_output)

# Check the FRR daemons memory usage at end
end_time_frr_daemon_memory = {}
incr_frr_daemon_memory_threshold = {}
for daemon in frr_demons_to_check:
incr_frr_daemon_memory_threshold[daemon] = 10 if tbinfo["topo"]["type"] in ["m0", "mx"] else 5
min_threshold_percent = 1 / float(start_time_frr_daemon_memory[daemon]) * 100
if min_threshold_percent > incr_frr_daemon_memory_threshold[daemon]:
incr_frr_daemon_memory_threshold[daemon] = math.ceil(min_threshold_percent)
logging.info(f"The memory increment threshold for frr daemon {daemon} "
f"is {incr_frr_daemon_memory_threshold[daemon]}%")
for daemon in frr_demons_to_check:
# Record FRR daemon memory status at end
end_time_frr_daemon_memory[daemon] = self.get_frr_daemon_memory_usage(duthost, daemon)
logging.info(f"{daemon} memory usage at end: \n%s", end_time_frr_daemon_memory[daemon])

# Calculate diff in FRR daemon memory
incr_frr_daemon_memory = \
float(end_time_frr_daemon_memory[daemon]) - float(start_time_frr_daemon_memory[daemon])
logging.info(f"{daemon} absolute difference: %d", incr_frr_daemon_memory)

# Check FRR daemon memory only if it is increased else default to pass
if incr_frr_daemon_memory > 0:
percent_incr_frr_daemon_memory = \
(incr_frr_daemon_memory / float(start_time_frr_daemon_memory[daemon])) * 100
logging.info(f"{daemon} memory percentage increase: %d", percent_incr_frr_daemon_memory)
pytest_assert(percent_incr_frr_daemon_memory < incr_frr_daemon_memory_threshold[daemon],
f"{daemon} memory increase more than expected: "
f"{incr_frr_daemon_memory_threshold[daemon]}%")

# Record orchagent CPU utilization at end
orch_cpu = duthost.shell(
"COLUMNS=512 show processes cpu | grep orchagent | awk '{print $1, $9}'")["stdout_lines"]
Expand Down

0 comments on commit 7f07449

Please sign in to comment.