From 4b6cb0004f4f2dc5f2728e9e89dec535318c9d61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=C2=A0Miros=C5=82aw?= Date: Thu, 24 Aug 2023 17:54:02 +0200 Subject: [PATCH] ztdm: inhfd: handle child hanging MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's kill the child when the test is hanging. Due to PEP 475 the SIGALRM handler needs to throw an exception to be able to interrupt wait(). To improve debuggig, close the fd in the child after reading it and detect that in the parent to show whether the child hung part way. Signed-off-by: Michał Mirosław --- test/zdtm.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/test/zdtm.py b/test/zdtm.py index c6e852dc1a3..8b395ab7ae6 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -40,6 +40,7 @@ def alarm(*args): print("==== ALARM ====") + raise InterruptedError("ALARM") def traceit(f, e, a): @@ -710,11 +711,9 @@ def start(self): # regular files, so we loop. data = b'' while not data: - # In python 2.7, peer_file.read() doesn't call the read - # system call if it's read file to the end once. The - # next seek allows to workaround this problem. - data = os.read(peer_file.fileno(), 16) + data = os.read(peer_file.fileno(), len(msg) + 16) time.sleep(0.1) + peer_file.close() except Exception as e: print("Unable to read a peer file: %s" % e) sys.exit(1) @@ -745,7 +744,16 @@ def stop(self): my_file.write(msg) my_file.flush() i += 1 - pid, status = os.waitpid(self.__peer_pid, 0) + signal.alarm(10) + try: + pid, status = os.waitpid(self.__peer_pid, 0) + except InterruptedError: + fds = set(os.listdir("/proc/%s/fd" % self.__peer_pid)) + self.kill() + pid, status = os.waitpid(self.__peer_pid, 0) + fds = self.__fds.difference(fds) + if fds: + print("before SIGKILL, child managed to close fds: " + str(fds)) with open(self.__name + ".out") as output: print(output.read()) self.__peer_pid = 0