From 52caaef6d01a94962576e9510d982f12c1de20c1 Mon Sep 17 00:00:00 2001 From: Shantanu <12621235+hauntsaninja@users.noreply.github.com> Date: Sat, 24 Aug 2024 14:54:31 -0700 Subject: [PATCH] =?UTF-8?q?Revert=20"GH-120754:=20Add=20a=20strace=20helpe?= =?UTF-8?q?r=20and=20test=20set=20of=20syscalls=20for=20o=E2=80=A6=20(#123?= =?UTF-8?q?303)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Revert "GH-120754: Add a strace helper and test set of syscalls for open().read() (#121143)" This reverts commit e38d0afe3548b856ccf0b05c01ed3eefc69cb3e7. --- Lib/test/support/strace_helper.py | 166 ------------------------------ Lib/test/test_fileio.py | 93 +---------------- Lib/test/test_subprocess.py | 53 ++++++---- Misc/ACKS | 1 - 4 files changed, 33 insertions(+), 280 deletions(-) delete mode 100644 Lib/test/support/strace_helper.py diff --git a/Lib/test/support/strace_helper.py b/Lib/test/support/strace_helper.py deleted file mode 100644 index b881670b89751d..00000000000000 --- a/Lib/test/support/strace_helper.py +++ /dev/null @@ -1,166 +0,0 @@ -import re -import sys -import textwrap -import unittest -from dataclasses import dataclass -from functools import cache -from test import support -from test.support.script_helper import run_python_until_end - -_strace_binary = "/usr/bin/strace" -_syscall_regex = re.compile( - r"(?P[^(]*)\((?P[^)]*)\)\s*[=]\s*(?P.+)") -_returncode_regex = re.compile( - br"\+\+\+ exited with (?P\d+) \+\+\+") - - -@dataclass -class StraceEvent: - syscall: str - args: list[str] - returncode: str - - -@dataclass -class StraceResult: - strace_returncode: int - python_returncode: int - - """The event messages generated by strace. This is very similar to the - stderr strace produces with returncode marker section removed.""" - event_bytes: bytes - stdout: bytes - stderr: bytes - - def events(self): - """Parse event_bytes data into system calls for easier processing. - - This assumes the program under inspection doesn't print any non-utf8 - strings which would mix into the strace output.""" - decoded_events = self.event_bytes.decode('utf-8') - matches = [ - _syscall_regex.match(event) - for event in decoded_events.splitlines() - ] - return [ - StraceEvent(match["syscall"], - [arg.strip() for arg in (match["args"].split(","))], - match["returncode"]) for match in matches if match - ] - - def sections(self): - """Find all "MARK " writes and use them to make groups of events. - - This is useful to avoid variable / overhead events, like those at - interpreter startup or when opening a file so a test can verify just - the small case under study.""" - current_section = "__startup" - sections = {current_section: []} - for event in self.events(): - if event.syscall == 'write' and len( - event.args) > 2 and event.args[1].startswith("\"MARK "): - # Found a new section, don't include the write in the section - # but all events until next mark should be in that section - current_section = event.args[1].split( - " ", 1)[1].removesuffix('\\n"') - if current_section not in sections: - sections[current_section] = list() - else: - sections[current_section].append(event) - - return sections - - -@support.requires_subprocess() -def strace_python(code, strace_flags, check=True): - """Run strace and return the trace. - - Sets strace_returncode and python_returncode to `-1` on error.""" - res = None - - def _make_error(reason, details): - return StraceResult( - strace_returncode=-1, - python_returncode=-1, - event_bytes=f"error({reason},details={details}) = -1".encode('utf-8'), - stdout=res.out if res else "", - stderr=res.err if res else "") - - # Run strace, and get out the raw text - try: - res, cmd_line = run_python_until_end( - "-c", - textwrap.dedent(code), - __run_using_command=[_strace_binary] + strace_flags) - except OSError as err: - return _make_error("Caught OSError", err) - - if check and res.rc: - res.fail(cmd_line) - - # Get out program returncode - stripped = res.err.strip() - output = stripped.rsplit(b"\n", 1) - if len(output) != 2: - return _make_error("Expected strace events and exit code line", - stripped[-50:]) - - returncode_match = _returncode_regex.match(output[1]) - if not returncode_match: - return _make_error("Expected to find returncode in last line.", - output[1][:50]) - - python_returncode = int(returncode_match["returncode"]) - if check and python_returncode: - res.fail(cmd_line) - - return StraceResult(strace_returncode=res.rc, - python_returncode=python_returncode, - event_bytes=output[0], - stdout=res.out, - stderr=res.err) - - -def _get_events(code, strace_flags, prelude, cleanup): - # NOTE: The flush is currently required to prevent the prints from getting - # buffered and done all at once at exit - prelude = textwrap.dedent(prelude) - code = textwrap.dedent(code) - cleanup = textwrap.dedent(cleanup) - to_run = f""" -print("MARK prelude", flush=True) -{prelude} -print("MARK code", flush=True) -{code} -print("MARK cleanup", flush=True) -{cleanup} -print("MARK __shutdown", flush=True) - """ - trace = strace_python(to_run, strace_flags) - all_sections = trace.sections() - return all_sections['code'] - - -def get_syscalls(code, strace_flags, prelude="", cleanup=""): - """Get the syscalls which a given chunk of python code generates""" - events = _get_events(code, strace_flags, prelude=prelude, cleanup=cleanup) - return [ev.syscall for ev in events] - - -# Moderately expensive (spawns a subprocess), so share results when possible. -@cache -def _can_strace(): - res = strace_python("import sys; sys.exit(0)", [], check=False) - assert res.events(), "Should have parsed multiple calls" - - return res.strace_returncode == 0 and res.python_returncode == 0 - - -def requires_strace(): - if sys.platform != "linux": - return unittest.skip("Linux only, requires strace.") - - return unittest.skipUnless(_can_strace(), "Requires working strace") - - -__all__ = ["requires_strace", "strace_python", "StraceEvent", "StraceResult"] diff --git a/Lib/test/test_fileio.py b/Lib/test/test_fileio.py index bded9c8b8d3004..0611d1749f41c1 100644 --- a/Lib/test/test_fileio.py +++ b/Lib/test/test_fileio.py @@ -11,7 +11,7 @@ from test.support import ( cpython_only, swap_attr, gc_collect, is_emscripten, is_wasi, - infinite_recursion, strace_helper + infinite_recursion, ) from test.support.os_helper import ( TESTFN, TESTFN_ASCII, TESTFN_UNICODE, make_bad_fd, @@ -24,9 +24,6 @@ import _pyio # Python implementation of io -_strace_flags=["--trace=%file,%desc"] - - class AutoFileTests: # file tests for which a test file is automatically set up @@ -362,94 +359,6 @@ def testErrnoOnClosedReadinto(self, f): a = array('b', b'x'*10) f.readinto(a) - @strace_helper.requires_strace() - def test_syscalls_read(self): - """Check that the set of system calls produced by the I/O stack is what - is expected for various read cases. - - It's expected as bits of the I/O implementation change, this will need - to change. The goal is to catch changes that unintentionally add - additional systemcalls (ex. additional calls have been looked at in - bpo-21679 and gh-120754). - """ - self.f.write(b"Hello, World!") - self.f.close() - - - def check_readall(name, code, prelude="", cleanup=""): - with self.subTest(name=name): - syscalls = strace_helper.get_syscalls(code, _strace_flags, - prelude=prelude, - cleanup=cleanup) - - # There are a number of related syscalls used to implement - # behaviors in a libc (ex. fstat, newfstatat, open, openat). - # Allow any that use the same substring. - def count_similarname(name): - return len([sc for sc in syscalls if name in sc]) - - # Should open and close the file exactly once - self.assertEqual(count_similarname('open'), 1) - self.assertEqual(count_similarname('close'), 1) - - # Should only have one fstat (bpo-21679, gh-120754) - self.assertEqual(count_similarname('fstat'), 1) - - - # "open, read, close" file using different common patterns. - check_readall( - "open builtin with default options", - f""" - f = open('{TESTFN}') - f.read() - f.close() - """ - ) - - check_readall( - "open in binary mode", - f""" - f = open('{TESTFN}', 'rb') - f.read() - f.close() - """ - ) - - check_readall( - "open in text mode", - f""" - f = open('{TESTFN}', 'rt') - f.read() - f.close() - """ - ) - - check_readall( - "pathlib read_bytes", - "p.read_bytes()", - prelude=f"""from pathlib import Path; p = Path("{TESTFN}")""" - - ) - - check_readall( - "pathlib read_text", - "p.read_text()", - prelude=f"""from pathlib import Path; p = Path("{TESTFN}")""" - ) - - # Focus on just `read()`. - calls = strace_helper.get_syscalls( - prelude=f"f = open('{TESTFN}')", - code="f.read()", - cleanup="f.close()", - strace_flags=_strace_flags - ) - # One to read all the bytes - # One to read the EOF and get a size 0 return. - self.assertEqual(calls.count("read"), 2) - - - class CAutoFileTests(AutoFileTests, unittest.TestCase): FileIO = _io.FileIO modulename = '_io' diff --git a/Lib/test/test_subprocess.py b/Lib/test/test_subprocess.py index e45701dfe033a6..f065b9c9bb1c2c 100644 --- a/Lib/test/test_subprocess.py +++ b/Lib/test/test_subprocess.py @@ -4,7 +4,6 @@ from test.support import check_sanitizer from test.support import import_helper from test.support import os_helper -from test.support import strace_helper from test.support import warnings_helper from test.support.script_helper import assert_python_ok import subprocess @@ -3416,7 +3415,7 @@ def __del__(self): @unittest.skipIf(not sysconfig.get_config_var("HAVE_VFORK"), "vfork() not enabled by configure.") - @strace_helper.requires_strace() + @unittest.skipIf(sys.platform != "linux", "Linux only, requires strace.") @mock.patch("subprocess._USE_POSIX_SPAWN", new=False) def test_vfork_used_when_expected(self): # This is a performance regression test to ensure we default to using @@ -3424,25 +3423,36 @@ def test_vfork_used_when_expected(self): # Technically this test could pass when posix_spawn is used as well # because libc tends to implement that internally using vfork. But # that'd just be testing a libc+kernel implementation detail. - - # Are intersted in the system calls: - # clone,clone2,clone3,fork,vfork,exit,exit_group - # Unfortunately using `--trace` with that list to strace fails because - # not all are supported on all platforms (ex. clone2 is ia64 only...) - # So instead use `%process` which is recommended by strace, and contains - # the above. + strace_binary = "/usr/bin/strace" + # The only system calls we are interested in. + strace_filter = "--trace=clone,clone2,clone3,fork,vfork,exit,exit_group" true_binary = "/bin/true" - strace_args = ["--trace=%process"] + strace_command = [strace_binary, strace_filter] + + try: + does_strace_work_process = subprocess.run( + strace_command + [true_binary], + stderr=subprocess.PIPE, + stdout=subprocess.DEVNULL, + ) + rc = does_strace_work_process.returncode + stderr = does_strace_work_process.stderr + except OSError: + rc = -1 + stderr = "" + if rc or (b"+++ exited with 0 +++" not in stderr): + self.skipTest("strace not found or not working as expected.") with self.subTest(name="default_is_vfork"): - vfork_result = strace_helper.strace_python( - f"""\ - import subprocess - subprocess.check_call([{true_binary!r}])""", - strace_args + vfork_result = assert_python_ok( + "-c", + textwrap.dedent(f"""\ + import subprocess + subprocess.check_call([{true_binary!r}])"""), + __run_using_command=strace_command, ) # Match both vfork() and clone(..., flags=...|CLONE_VFORK|...) - self.assertRegex(vfork_result.event_bytes, br"(?i)vfork") + self.assertRegex(vfork_result.err, br"(?i)vfork") # Do NOT check that fork() or other clones did not happen. # If the OS denys the vfork it'll fallback to plain fork(). @@ -3455,8 +3465,9 @@ def test_vfork_used_when_expected(self): ("setgroups", "", "extra_groups=[]", True), ): with self.subTest(name=sub_name): - non_vfork_result = strace_helper.strace_python( - f"""\ + non_vfork_result = assert_python_ok( + "-c", + textwrap.dedent(f"""\ import subprocess {preamble} try: @@ -3464,11 +3475,11 @@ def test_vfork_used_when_expected(self): [{true_binary!r}], **dict({sp_kwarg})) except PermissionError: if not {expect_permission_error}: - raise""", - strace_args + raise"""), + __run_using_command=strace_command, ) # Ensure neither vfork() or clone(..., flags=...|CLONE_VFORK|...). - self.assertNotRegex(non_vfork_result.event_bytes, br"(?i)vfork") + self.assertNotRegex(non_vfork_result.err, br"(?i)vfork") @unittest.skipUnless(mswindows, "Windows specific tests") diff --git a/Misc/ACKS b/Misc/ACKS index aced869333f2b6..b031eb7c11f73f 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1164,7 +1164,6 @@ Grzegorz Makarewicz David Malcolm Greg Malcolm William Mallard -Cody Maloney Ken Manheimer Vladimir Marangozov Colin Marc