Skip to content

Commit

Permalink
Fixing index & type errors in prime_offload_tester.py (Bugfix) (#1588)
Browse files Browse the repository at this point in the history
* fix: make annotations compatible with py 3.5

* fix: non-null assertions, comments

* fix: root check

* fix: only the letter 'g' was used in offloading to specific gpu test

* style: less brackets

* fix: update unit tests to reflect the changes
  • Loading branch information
tomli380576 authored Nov 22, 2024
1 parent 78e251d commit a0a37e3
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 19 deletions.
40 changes: 26 additions & 14 deletions providers/base/bin/prime_offload_tester.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
import sys
import re
import os
import typing as T
from shlex import split as sh_split


class PrimeOffloader:
Expand All @@ -48,7 +50,7 @@ class PrimeOffloader:

def find_file_containing_string(
self, search_directory: str, filename_pattern: str, search_string: str
) -> str:
) -> T.Optional[str]:
"""
Search for a file matching a specific pattern
that contains a given string.
Expand Down Expand Up @@ -89,6 +91,7 @@ def find_card_id(self, pci_bdf: str) -> str:
card_path = self.find_file_containing_string(
"/sys/kernel/debug/dri", "name", pci_bdf
)
assert card_path, "Couldn't find a card named: {}".format(pci_bdf)
return card_path.split("/")[5]
except IndexError as e:
raise SystemExit("return value format error {}".format(repr(e)))
Expand Down Expand Up @@ -139,7 +142,7 @@ def get_clients(self, card_id: str) -> str:
return ""

def check_offload(
self, cmd: list, card_id: str, card_name: str, timeout: int
self, cmd: T.List[str], card_id: str, card_name: str, timeout: int
):
"""
Used to check if the provided command is executed on a specific GPU.
Expand Down Expand Up @@ -168,7 +171,7 @@ def check_offload(
self.logger.info(" Device Name:[{}]".format(card_name))
return
self.logger.info("Checking fail:")
self.logger.info(" Couldn't find process [{}]".format(cmd))
self.logger.info(" Couldn't find process {}".format(cmd))
self.check_result = True

def _find_bdf(self, card_id: str):
Expand All @@ -182,7 +185,7 @@ def _find_bdf(self, card_id: str):
data_in_name = f.read()
return data_in_name.split()[1].split("=")[1]

def find_offload(self, cmd: str, timeout: int):
def find_offload(self, cmd_str: str, timeout: int):
"""
Find the card that the command is running on.
This script looks for the card on which a specific command is running.
Expand All @@ -200,7 +203,7 @@ def find_offload(self, cmd: str, timeout: int):

deadline = time.time() + timeout

cmd = cmd.split()
cmd = sh_split(cmd_str)

while time.time() < deadline:
time.sleep(delay)
Expand All @@ -209,15 +212,16 @@ def find_offload(self, cmd: str, timeout: int):
card_path = self.find_file_containing_string(
directory, "clients", cmd[0]
)
if directory in card_path:

if card_path and directory in card_path:
try:
# The graphic will be shown such as 0 and 128
# at the same time. Therefore, pick up the first one
first_card = card_path.splitlines()[0]
card_id = first_card.split("/")[5]
bdf = self._find_bdf(card_id)
self.logger.info("Process is running on:")
self.logger.info(" process:[{}]".format(cmd))
self.logger.info(" process:[{}]".format(cmd[0]))
self.logger.info(
" Card ID:[{}]".format(self.find_card_id(bdf))
)
Expand All @@ -229,8 +233,9 @@ def find_offload(self, cmd: str, timeout: int):
self.logger.info(
"Finding card information failed {}".format(repr(e))
)

self.logger.info("Checking fail:")
self.logger.info(" Couldn't find process [{}]".format(cmd))
self.logger.info(" Couldn't find process {}".format(cmd))
self.check_result = True

def check_nv_offload_env(self):
Expand Down Expand Up @@ -262,7 +267,7 @@ def check_nv_offload_env(self):
"No prime-select, it should be ok to run prime offload"
)

def cmd_runner(self, cmd: list, env: dict = None):
def cmd_runner(self, cmd: T.List[str], env: T.Optional[T.Dict] = None):
"""
use to execute command and piping the output to the screen.
Expand All @@ -283,7 +288,8 @@ def cmd_runner(self, cmd: list, env: dict = None):

# redirect command output real time
while runner.poll() is None:
line = runner.stdout.readline().strip()
# when stdout=subprocess.PIPE, stdout is not None
line = runner.stdout.readline().strip() # type: ignore
self.logger.info(line)
except subprocess.CalledProcessError as e:
raise SystemExit("run command failed {}".format(repr(e)))
Expand Down Expand Up @@ -313,7 +319,9 @@ def cmd_finder(self, cmd: str, timeout: int):
if self.check_result:
raise SystemExit("Couldn't find process running on GPU")

def cmd_checker(self, cmd: str, pci_bdf: str, driver: str, timeout: int):
def cmd_checker(
self, cmd_str: str, pci_bdf: str, driver: str, timeout: int
):
"""
run offload command and check it runs on correct GPU
Expand All @@ -331,7 +339,7 @@ def cmd_checker(self, cmd: str, pci_bdf: str, driver: str, timeout: int):
# run offload command in other process
dri_pci_bdf_format = re.sub("[:.]", "_", pci_bdf)

if "timeout" in cmd:
if "timeout" in cmd_str:
raise SystemExit("Put timeout in command isn't allowed")

env = os.environ.copy()
Expand All @@ -349,19 +357,22 @@ def cmd_checker(self, cmd: str, pci_bdf: str, driver: str, timeout: int):
# if nv driver under nvidia mode, prime/reverse prime couldn't work.
self.check_nv_offload_env()

cmd = sh_split(cmd_str)
# use other thread to check offload is correctly or not
check_thread = threading.Thread(
target=self.check_offload, args=(cmd, card_id, card_name, timeout)
)
check_thread.start()
try:
run_with_timeout(self.cmd_runner, timeout, cmd.split(), env)
run_with_timeout(self.cmd_runner, timeout, cmd, env)
except TimeoutError:
self.logger.info("Test finished")
check_thread.join()

if self.check_result:
raise SystemExit("offload to specific GPU failed")
raise SystemExit(
"offload to specific GPU: {} failed".format(pci_bdf)
)

def parse_args(self, args=sys.argv[1:]):
"""
Expand Down Expand Up @@ -428,4 +439,5 @@ def main(self):


if __name__ == "__main__":
assert os.getuid() == 0, "This test must be run as root"
PrimeOffloader().main()
10 changes: 5 additions & 5 deletions providers/base/tests/test_prime_offload_tester.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,8 @@ def test_pci_name_digital_error_format_check(self, mock_cmd):
def test_empty_string_id_not_found(self, mock_cmd):
po = PrimeOffloader()
# empty string
mock_cmd.return_value = ""
with self.assertRaises(SystemExit):
mock_cmd.return_value = None
with self.assertRaises(AssertionError):
po.find_card_id("0000:00:00.0")
mock_cmd.assert_called_with(
"/sys/kernel/debug/dri",
Expand Down Expand Up @@ -578,7 +578,7 @@ def test_non_nv_driver_check(self):
os.environ.copy = MagicMock(return_value={})
po.cmd_checker("glxgears", "0000:00:00.0", "xxx", 0)
# check check_offload function get correct args
po.check_offload.assert_called_with("glxgears", "0", "Intel", 0)
po.check_offload.assert_called_with(["glxgears"], "0", "Intel", 0)

@patch("prime_offload_tester.run_with_timeout", MagicMock())
def test_nv_driver_check(self):
Expand All @@ -591,7 +591,7 @@ def test_nv_driver_check(self):
os.environ.copy = MagicMock(return_value={})
po.cmd_checker("glxgears", "0000:00:00.0", "nvidia", 1)
# check check_offload function get correct args
po.check_offload.assert_called_with("glxgears", "0", "NV", 1)
po.check_offload.assert_called_with(["glxgears"], "0", "NV", 1)

@patch("prime_offload_tester.run_with_timeout")
@patch("threading.Thread")
Expand All @@ -608,7 +608,7 @@ def test_not_found(self, mock_thread, mock_run_timeout):
po.cmd_checker("glxgears", "0000:00:00.0", "nvidia", 1)
# check check_offload function get correct args
mock_thread.assert_called_with(
target=po.check_offload, args=("glxgears", "0", "NV", 1)
target=po.check_offload, args=(["glxgears"], "0", "NV", 1)
)


Expand Down

0 comments on commit a0a37e3

Please sign in to comment.