Skip to content

Commit

Permalink
ztdm: inhfd: handle child hanging
Browse files Browse the repository at this point in the history
Let's kill the child when the test is hanging.  Due to PEP 475 the
SIGALRM handler needs to throw an exception to be able to interrupt
wait().  To improve debuggig, close the fd in the child after reading
it and detect that in the parent to show whether the child hung
part way.

Signed-off-by: Michał Mirosław <[email protected]>
  • Loading branch information
osctobe committed Aug 29, 2023
1 parent 6d0e785 commit 4b6cb00
Showing 1 changed file with 13 additions and 5 deletions.
18 changes: 13 additions & 5 deletions test/zdtm.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@

def alarm(*args):
print("==== ALARM ====")
raise InterruptedError("ALARM")


def traceit(f, e, a):
Expand Down Expand Up @@ -710,11 +711,9 @@ def start(self):
# regular files, so we loop.
data = b''
while not data:
# In python 2.7, peer_file.read() doesn't call the read
# system call if it's read file to the end once. The
# next seek allows to workaround this problem.
data = os.read(peer_file.fileno(), 16)
data = os.read(peer_file.fileno(), len(msg) + 16)
time.sleep(0.1)
peer_file.close()
except Exception as e:
print("Unable to read a peer file: %s" % e)
sys.exit(1)
Expand Down Expand Up @@ -745,7 +744,16 @@ def stop(self):
my_file.write(msg)
my_file.flush()
i += 1
pid, status = os.waitpid(self.__peer_pid, 0)
signal.alarm(10)
try:
pid, status = os.waitpid(self.__peer_pid, 0)
except InterruptedError:
fds = set(os.listdir("/proc/%s/fd" % self.__peer_pid))
self.kill()
pid, status = os.waitpid(self.__peer_pid, 0)
fds = self.__fds.difference(fds)
if fds:
print("before SIGKILL, child managed to close fds: " + str(fds))
with open(self.__name + ".out") as output:
print(output.read())
self.__peer_pid = 0
Expand Down

0 comments on commit 4b6cb00

Please sign in to comment.