Skip to content

Commit 039e62e

Browse files
authored
Merge pull request #163 from redhat-partner-solutions/performance
Performance Testing Implementation
2 parents 1b945e4 + 12086fa commit 039e62e

File tree

8 files changed

+371
-21
lines changed

8 files changed

+371
-21
lines changed

.github/workflows/e2e.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -67,10 +67,10 @@ jobs:
6767
cd sriov/tests
6868
cp /config/*.yaml ./
6969
if [[ "${mode}" == "full" ]]; then
70-
pytest -v --html=report.html --self-contained-html SR_IOV_* common
70+
pytest -v --html=report.html --self-contained-html SR_IOV_* common --ignore-glob="*test_SR_IOV_Performance.py"
7171
echo "generated=true" >> $GITHUB_ENV
7272
elif [[ ${#tests[@]} -ne 0 ]]; then
73-
test_string="pytest -v --html=report.html --self-contained-html"
73+
test_string="pytest -v --html=report.html --self-contained-html --ignore-glob='*test_SR_IOV_Performance.py'"
7474
for testname in $(echo "${tests[@]}" | tr ' ' '\n' | sort -u); do
7575
test_string="${test_string} ${testname}"
7676
echo "Testing ${testname}"

README.md

+9-1
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,15 @@ container_volumes: # the volume mapping to use with the container
108108
# example: "-v /sys:/sys -v /dev:/dev -v /lib/modules:/lib/modules"
109109
vlan: # vlan tag used by the vlan tests, default is 10
110110
mtu: # MTU size; if unspecified, the script will derive it
111-
bonding_switch_delay # Expected bonding switch over/back delay in second, default is 1
111+
bonding_switch_delay: # Expected bonding switch over/back delay in second, default is 1
112+
# Below required for SR_IOV_Performance
113+
testpmd_img: # testpmd container image
114+
testpmd_port: # testpmd REST port
115+
trafficgen_img: # trafficgen container image
116+
trafficgen_port: # trafficgen REST port
117+
trafficgen_timeout: # trafficgen command timeout (in minutes)
118+
trafficgen_rx_bps_limit: # trafficgen baseline comparison (bps)
119+
log_performance: # boolean, use false to omit performance test details in logs/result files (only pass or fail)
112120
```
113121

114122
A current version of Python is recommended to run the tests. As of writing the minimum version to avoid warnings would be 3.7. However, the tests have been successfully run up to version 3.11, the latest active release as of writing. The same is true of pip, which should be a current version (23.0 as of writing, but this should be upgraded in the following steps).

sriov/common/configtestdata.py

+4
Original file line numberDiff line numberDiff line change
@@ -55,3 +55,7 @@ def __init__(self, settings: Config) -> None:
5555
)
5656
self.ping = {} # track ping test
5757
self.mtu = {} # track mtu change
58+
59+
# track testpmd and trafficgen container IDs from SR_IOV_Performance for cleanup
60+
self.testpmd_id = ""
61+
self.trafficgen_id = ""

sriov/common/utils.py

+39-13
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,10 @@ def bind_driver(ssh_obj: ShellHandler, pci: str, driver: str, timeout: int = 5)
5656
return True
5757

5858

59-
def bind_driver_with_dpdk(settings: object, ssh_obj: ShellHandler, pci: str,
60-
driver: str, timeout: int = 5) -> bool:
61-
""" Bind the PCI address to the driver using dpdk-devbind.py
59+
def bind_driver_with_dpdk(
60+
settings: object, ssh_obj: ShellHandler, pci: str, driver: str, timeout: int = 5
61+
) -> bool:
62+
"""Bind the PCI address to the driver using dpdk-devbind.py
6263
in the dpdk container
6364
6465
Args:
@@ -78,12 +79,10 @@ def bind_driver_with_dpdk(settings: object, ssh_obj: ShellHandler, pci: str,
7879
dpdk_devbind_cmd = (
7980
f"{settings.config['container_manager']} run -it --rm --privileged "
8081
f"{settings.config['container_volumes']} "
81-
f"{settings.config['dpdk_img']} dpdk-devbind.py -b {driver} {pci}\n")
82+
f"{settings.config['dpdk_img']} dpdk-devbind.py -b {driver} {pci}\n"
83+
)
8284

83-
steps = [
84-
("modprobe {}".format(driver), None),
85-
(dpdk_devbind_cmd, "Error")
86-
]
85+
steps = [("modprobe {}".format(driver), None), (dpdk_devbind_cmd, "Error")]
8786

8887
for step, errorOnStr in steps:
8988
ssh_obj.log_str(step)
@@ -802,7 +801,11 @@ def set_pipefail(ssh_obj: ShellHandler) -> bool:
802801

803802

804803
def execute_and_assert(
805-
ssh_obj: ShellHandler, cmds: list, exit_code: int, timeout: int = 0
804+
ssh_obj: ShellHandler,
805+
cmds: list,
806+
exit_code: int,
807+
timeout: int = 0,
808+
cmd_timeout: int = 5,
806809
) -> Tuple[list, list]:
807810
"""Execute the list of commands, assert exit code, and return stdouts and stderrs
808811
@@ -811,6 +814,7 @@ def execute_and_assert(
811814
cmds (list): list of str commands to run
812815
exit_code (int): the code to assert
813816
timeout (int): optional timeout between cmds (default 0)
817+
cmd_timeout (int): optional timeout to wait for commands to complete (default 5)
814818
815819
Returns:
816820
outs (list): list of lists of str stdout lines
@@ -820,7 +824,7 @@ def execute_and_assert(
820824
errs = []
821825
for cmd in cmds:
822826
ssh_obj.log_str(cmd)
823-
code, out, err = ssh_obj.execute(cmd)
827+
code, out, err = ssh_obj.execute(cmd, cmd_timeout)
824828
outs.append(out)
825829
errs.append(err)
826830
assert code == exit_code, "\nstdout:" + str(outs) + "\nstderr:" + str(errs)
@@ -851,7 +855,7 @@ def execute_until_timeout(
851855
return True
852856
count -= 1
853857
time.sleep(1)
854-
print("\nstdout:" + str(out) + "\nstderr:" + str(err))
858+
print("\nstdout:" + str(out) + "\nstderr:" + str(err) + "\ncode:" + str(code))
855859
return False
856860

857861

@@ -897,14 +901,13 @@ def get_isolated_cpus(ssh_obj: ShellHandler) -> list:
897901
898902
Args:
899903
ssh_obj (ShellHandler): ssh connection obj
900-
type (str): type of hugepage, 1G or 2M
901904
902905
Returns:
903906
list: The list of isolated CPUs
904907
"""
905908
cmd = ["cat /sys/devices/system/cpu/isolated"]
906909
outs, errs = execute_and_assert(ssh_obj, cmd, 0)
907-
isolated = outs[0][0]
910+
isolated = outs[0][0].strip()
908911
isolated_cores = isolated.split(",")
909912
isolated_list = []
910913
for core in isolated_cores:
@@ -918,6 +921,29 @@ def get_isolated_cpus(ssh_obj: ShellHandler) -> list:
918921
return isolated_list
919922

920923

924+
def get_isolated_cpus_numa(ssh_obj: ShellHandler, numa: int) -> list:
925+
"""Return a list of the isolated CPUs belonging to a NUMA node
926+
927+
Args:
928+
ssh_obj (ShellHandler): ssh connection obj
929+
numa (int): the numa node
930+
931+
Returns:
932+
list: The list of isolated CPUs belonging to numa
933+
"""
934+
isolated_list = get_isolated_cpus(ssh_obj)
935+
936+
cmd = [f"lscpu | grep 'NUMA node{numa}'"]
937+
outs, errs = execute_and_assert(ssh_obj, cmd, 0)
938+
isolated_numa = outs[0][0]
939+
isolated_numa_cores = isolated_numa.split(":")[1].strip().split(",")
940+
isolated_numa_list = []
941+
for core in isolated_numa_cores:
942+
isolated_numa_list.append(int(core))
943+
944+
return list(set(isolated_list) & set(isolated_numa_list))
945+
946+
921947
def page_in_kb(type: str) -> str:
922948
"""convert "1G" or "2M" to page size in KB
923949
+73
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
# Test Case Name: SR-IOV.Performance
2+
3+
### Objective(s): A RFC2544 performance test to evaluate a system's (relative) performance when running DPDK workloads. This relies on a trafficgen client and containers built from [netgauge](https://github.com/redhat-eets/netgauge)
4+
5+
### Test procedure
6+
7+
* Reset the VFs
8+
```
9+
echo 0 > /sys/class/net/$PF/device/sriov_numvfs
10+
```
11+
12+
* Ensure the reset succeeds (or check no VF exists under the ```$PF``` by ensuring ```sriov_numvfs``` is 0
13+
14+
* Ensure the trafficgen server ports are on the same numa node, repeat with the two dut server ports
15+
```
16+
cat /sys/bus/pci/devices/<pci_address>/numa_node
17+
```
18+
19+
* Ensure the trafficgen server has 2 1GB hugepages, repeat with the dut server
20+
21+
* On the trafficgen, get 7 isolated CPUs from the numa node associated with the trafficgen ports, repeat on the dut server with 3 isolated CPUs
22+
23+
* On the dut server, create 1 VF on each PF, setting spoof checking off and trust mode on, and bind to vfio-pci
24+
```
25+
echo 1 > /sys/class/net/$PF/device/sriov_numvfs
26+
ip link set $PF vf 0 spoof off
27+
ip link set $PF vf 0 trust on
28+
echo $VF_PCI > /sys/bus/pci/devices/$VF_PCI/driver/unbind
29+
echo vfio-pci > /sys/bus/pci/devices/$VF_PCI/driver_override
30+
echo $VF_PCI > /sys/bus/pci/drivers/vfio-pci/bind
31+
```
32+
33+
* On the dut server, start the prebuilt testpmd container
34+
```
35+
podman run -d --rm --privileged -p $PORT:$PORT -v /dev/hugepages:/dev/hugepages -v /sys/bus/pci/devices:/sys/bus/pci/devices -v /lib/firmware:/lib/firmware --cpuset-cpus $CPUs $testpmd_container --pci $VF1 --pci $VF2 --http-port $PORT --auto
36+
```
37+
38+
* On the dut server, ensure that testpmd has started using the REST API
39+
```
40+
curl localhost:$PORT/testpmd/status
41+
```
42+
43+
* On the trafficgen, bind the 2 trafficgen PF ports to vfio-pci
44+
```
45+
echo $PF_PCI > /sys/bus/pci/devices/$PF_PCI/driver/unbind
46+
echo vfio-pci > /sys/bus/pci/devices/$PF_PCI/driver_override
47+
echo $PF_PCI > /sys/bus/pci/drivers/vfio-pci/bind
48+
```
49+
50+
* On the trafficgen, start the trafficgen container
51+
```
52+
podman run -d --rm --privileged -p $PORT:$PORT -v /dev:/dev -v /sys:/sys -v /lib/modules:/lib/modules --cpuset-cpus $CPUs -e pci_list=$PF1,$PF2 --ip=$IP $trafficgen_container
53+
```
54+
55+
* On the trafficgen, start the client (once to stabilize, once to collect results)
56+
```
57+
python3 /tmp/client.py status --server-addr $IP --server-port $PORT
58+
python3 /tmp/client.py start --server-addr $IP --server-port $PORT --timeout 60
59+
python3 /tmp/client.py stop --server-addr $IP --server-port $PORT
60+
python3 /tmp/client.py auto --server-addr $IP --server-port $PORT
61+
```
62+
63+
* Compare the results bps to the baseline value
64+
65+
### Clean up
66+
* Kill containers on dut and trafficgen
67+
68+
* Reset PF driver on trafficgen
69+
70+
* Remove VFs on dut
71+
```
72+
echo 0 > /sys/class/net/$PF/device/sriov_numvfs
73+
```

0 commit comments

Comments
 (0)