From 45f7bbce213322b15e468cbc57fa1bedd997b5ee Mon Sep 17 00:00:00 2001 From: Elias Kahl Date: Fri, 28 Apr 2023 11:56:00 +0200 Subject: [PATCH 1/5] Fix #74 --- src/shellingham/posix/__init__.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/shellingham/posix/__init__.py b/src/shellingham/posix/__init__.py index b81bf08..3dfde8c 100644 --- a/src/shellingham/posix/__init__.py +++ b/src/shellingham/posix/__init__.py @@ -71,6 +71,11 @@ def _get_shell(cmd, *args): if cmd.startswith("-"): # Login shell! Let's use this. return _get_login_shell(cmd) name = os.path.basename(cmd).lower() + if name == "rosetta" or name.contains("qemu-"): + # Running (probably in docker) with rosetta or qemu, first arg is real command + cmd = args[0] + args = args[1:] + name = os.path.basename(cmd).lower() if name in SHELL_NAMES: # Command looks like a shell. return (name, cmd) shell = _get_interpreter_shell(name, args) From 7bf9e76accdf6e9e92f8e27f9c9341e47ea8489d Mon Sep 17 00:00:00 2001 From: Elias Kahl Date: Sat, 13 May 2023 21:43:06 +0000 Subject: [PATCH 2/5] refactor proc tree iter and ps parsing Fixes #55 Tries to also address #21, #35 --- src/shellingham/posix/__init__.py | 28 ++++++---------- src/shellingham/posix/proc.py | 39 +++++++++++------------ src/shellingham/posix/ps.py | 53 +++++++++++++++++++++---------- 3 files changed, 65 insertions(+), 55 deletions(-) diff --git a/src/shellingham/posix/__init__.py b/src/shellingham/posix/__init__.py index 3dfde8c..15f3fb5 100644 --- a/src/shellingham/posix/__init__.py +++ b/src/shellingham/posix/__init__.py @@ -4,8 +4,12 @@ from .._core import SHELL_NAMES, ShellDetectionFailure from . import proc, ps +QEMU_BIN_REGEX = re.compile( + r"qemu-(alpha|armeb|arm|m68k|cris|i386|x86_64|microblaze|mips|mipsel|mips64|mips64el|mipsn32|mipsn32el|nios2|ppc64|ppc|sh4eb|sh4|sparc|sparc32plus|sparc64)" +) -def _get_process_mapping(): + +def _get_process_parents(pid, max_depth=10): """Select a way to obtain process information from the system. * `/proc` is used if supported. @@ -13,25 +17,13 @@ def _get_process_mapping(): """ for impl in (proc, ps): try: - mapping = impl.get_process_mapping() + mapping = impl.get_process_parents(pid, max_depth) except EnvironmentError: continue return mapping raise ShellDetectionFailure("compatible proc fs or ps utility is required") -def _iter_process_args(mapping, pid, max_depth): - """Traverse up the tree and yield each process's argument list.""" - for _ in range(max_depth): - try: - proc = mapping[pid] - except KeyError: # We've reached the root process. Give up. - break - if proc.args: # Presumably the process should always have a name? - yield proc.args - pid = proc.ppid # Go up one level. - - def _get_login_shell(proc_cmd): """Form shell information from SHELL environ if possible.""" login_shell = os.environ.get("SHELL", "") @@ -71,8 +63,8 @@ def _get_shell(cmd, *args): if cmd.startswith("-"): # Login shell! Let's use this. return _get_login_shell(cmd) name = os.path.basename(cmd).lower() - if name == "rosetta" or name.contains("qemu-"): - # Running (probably in docker) with rosetta or qemu, first arg is real command + if name == "rosetta" or QEMU_BIN_REGEX.fullmatch(name): + # Running (probably in docker) with rosetta or qemu, first arg is actual command cmd = args[0] args = args[1:] name = os.path.basename(cmd).lower() @@ -87,8 +79,8 @@ def _get_shell(cmd, *args): def get_shell(pid=None, max_depth=10): """Get the shell that the supplied pid or os.getpid() is running in.""" pid = str(pid or os.getpid()) - mapping = _get_process_mapping() - for proc_args in _iter_process_args(mapping, pid, max_depth): + processes = _get_process_parents(pid, max_depth) + for proc_args, _, _ in processes: shell = _get_shell(*proc_args) if shell: return shell diff --git a/src/shellingham/posix/proc.py b/src/shellingham/posix/proc.py index 4405731..05f7d31 100644 --- a/src/shellingham/posix/proc.py +++ b/src/shellingham/posix/proc.py @@ -9,11 +9,9 @@ # NetBSD: https://man.netbsd.org/NetBSD-9.3-STABLE/mount_procfs.8 # DragonFlyBSD: https://www.dragonflybsd.org/cgi/web-man?command=procfs BSD_STAT_PPID = 2 -BSD_STAT_TTY = 5 # See https://docs.kernel.org/filesystems/proc.html LINUX_STAT_PPID = 3 -LINUX_STAT_TTY = 6 STAT_PATTERN = re.compile(r"\(.+\)|\S+") @@ -41,14 +39,14 @@ def _use_bsd_stat_format(): return False -def _get_stat(pid, name): +def _get_ppid(pid, name): path = os.path.join("/proc", str(pid), name) with io.open(path, encoding="ascii", errors="replace") as f: parts = STAT_PATTERN.findall(f.read()) # We only care about TTY and PPID -- both are numbers. if _use_bsd_stat_format(): - return parts[BSD_STAT_TTY], parts[BSD_STAT_PPID] - return parts[LINUX_STAT_TTY], parts[LINUX_STAT_PPID] + return parts[BSD_STAT_PPID] + return parts[LINUX_STAT_PPID] def _get_cmdline(pid): @@ -66,21 +64,22 @@ class ProcFormatError(EnvironmentError): pass -def get_process_mapping(): +def get_process_parents(pid, max_depth=10): """Try to look up the process tree via the /proc interface.""" stat_name = detect_proc() - self_tty = _get_stat(os.getpid(), stat_name)[0] - processes = {} - for pid in os.listdir("/proc"): - if not pid.isdigit(): - continue - try: - tty, ppid = _get_stat(pid, stat_name) - if tty != self_tty: - continue - args = _get_cmdline(pid) - processes[pid] = Process(args=args, pid=pid, ppid=ppid) - except IOError: - # Process has disappeared - just ignore it. - continue + processes = [] + + depth = 0 + while depth < max_depth: + depth += 1 + ppid = _get_ppid(pid, stat_name) + args = _get_cmdline(pid) + processes.append(Process(args=args, pid=pid, ppid=ppid)) + + if ppid == "0": + break + + pid = ppid + + return processes diff --git a/src/shellingham/posix/ps.py b/src/shellingham/posix/ps.py index 3de6d25..a249b57 100644 --- a/src/shellingham/posix/ps.py +++ b/src/shellingham/posix/ps.py @@ -8,11 +8,9 @@ class PsNotAvailable(EnvironmentError): pass - -def get_process_mapping(): - """Try to look up the process tree via the output of `ps`.""" +def _get_stats(pid): try: - cmd = ["ps", "-ww", "-o", "pid=", "-o", "ppid=", "-o", "args="] + cmd = ["ps", "wwl", "-P", pid] output = subprocess.check_output(cmd) except OSError as e: # Python 2-compatible FileNotFoundError. if e.errno != errno.ENOENT: @@ -27,17 +25,38 @@ def get_process_mapping(): if not isinstance(output, str): encoding = sys.getfilesystemencoding() or sys.getdefaultencoding() output = output.decode(encoding) - processes = {} - for line in output.split("\n"): - try: - pid, ppid, args = line.strip().split(None, 2) - # XXX: This is not right, but we are really out of options. - # ps does not offer a sane way to decode the argument display, - # and this is "Good Enough" for obtaining shell names. Hopefully - # people don't name their shell with a space, or have something - # like "/usr/bin/xonsh is uber". (sarugaku/shellingham#14) - args = tuple(a.strip() for a in args.split(" ")) - except ValueError: - continue - processes[pid] = Process(args=args, pid=pid, ppid=ppid) + + print(output) + + header, row = output.split("\n")[:2] + header = header.split() + row = row.split() + + pid_index = header.index("PID") + ppid_index = header.index("PPID") + + try: + cmd_index = header.index("COMMAND") + except ValueError: + # https://github.com/sarugaku/shellingham/pull/23#issuecomment-474005491 + cmd_index = header.index("CMD") + + + return row[cmd_index:], row[pid_index], row[ppid_index] + + + + +def get_process_parents(pid, max_depth=10): + """Try to look up the process tree via the output of `ps`.""" + processes = [] + + depth = 0 + while pid != "0" and depth < max_depth: + depth += 1 + cmd, pid, ppid = _get_stats(pid) + processes.append(Process(args=cmd, pid=pid, ppid=ppid)) + + pid = ppid + return processes From 71a3b726ec7ed0655b62df124a70557d312d52ab Mon Sep 17 00:00:00 2001 From: Elias Kahl Date: Tue, 16 May 2023 11:25:39 +0200 Subject: [PATCH 3/5] improve qemu regex readability --- src/shellingham/posix/__init__.py | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/src/shellingham/posix/__init__.py b/src/shellingham/posix/__init__.py index 15f3fb5..4b21519 100644 --- a/src/shellingham/posix/__init__.py +++ b/src/shellingham/posix/__init__.py @@ -4,8 +4,33 @@ from .._core import SHELL_NAMES, ShellDetectionFailure from . import proc, ps +# Based on QEMU docs: https://www.qemu.org/docs/master/user/main.html QEMU_BIN_REGEX = re.compile( - r"qemu-(alpha|armeb|arm|m68k|cris|i386|x86_64|microblaze|mips|mipsel|mips64|mips64el|mipsn32|mipsn32el|nios2|ppc64|ppc|sh4eb|sh4|sparc|sparc32plus|sparc64)" + r"""qemu- + (alpha + |armeb + |arm + |m68k + |cris + |i386 + |x86_64 + |microblaze + |mips + |mipsel + |mips64 + |mips64el + |mipsn32 + |mipsn32el + |nios2 + |ppc64 + |ppc + |sh4eb + |sh4 + |sparc + |sparc32plus + |sparc64 + )""", + re.VERBOSE, ) From 6b3410912f8bdc6ee10aee2cb8c50fb2568e6820 Mon Sep 17 00:00:00 2001 From: Elias Kahl Date: Tue, 16 May 2023 11:30:08 +0200 Subject: [PATCH 4/5] revert ps call to version before #7bf9e76 --- src/shellingham/posix/ps.py | 56 +++++++++++++++---------------------- 1 file changed, 23 insertions(+), 33 deletions(-) diff --git a/src/shellingham/posix/ps.py b/src/shellingham/posix/ps.py index a249b57..d46ff77 100644 --- a/src/shellingham/posix/ps.py +++ b/src/shellingham/posix/ps.py @@ -8,9 +8,11 @@ class PsNotAvailable(EnvironmentError): pass -def _get_stats(pid): + +def get_process_parents(pid, max_depth=10): + """Try to look up the process tree via the output of `ps`.""" try: - cmd = ["ps", "wwl", "-P", pid] + cmd = ["ps", "-ww", "-o", "pid=", "-o", "ppid=", "-o", "args="] output = subprocess.check_output(cmd) except OSError as e: # Python 2-compatible FileNotFoundError. if e.errno != errno.ENOENT: @@ -26,37 +28,25 @@ def _get_stats(pid): encoding = sys.getfilesystemencoding() or sys.getdefaultencoding() output = output.decode(encoding) - print(output) - - header, row = output.split("\n")[:2] - header = header.split() - row = row.split() - - pid_index = header.index("PID") - ppid_index = header.index("PPID") - - try: - cmd_index = header.index("COMMAND") - except ValueError: - # https://github.com/sarugaku/shellingham/pull/23#issuecomment-474005491 - cmd_index = header.index("CMD") - - - return row[cmd_index:], row[pid_index], row[ppid_index] - - - - -def get_process_parents(pid, max_depth=10): - """Try to look up the process tree via the output of `ps`.""" - processes = [] - + processes_mapping = {} + for line in output.split("\n"): + try: + pid, ppid, args = line.strip().split(None, 2) + # XXX: This is not right, but we are really out of options. + # ps does not offer a sane way to decode the argument display, + # and this is "Good Enough" for obtaining shell names. Hopefully + # people don't name their shell with a space, or have something + # like "/usr/bin/xonsh is uber". (sarugaku/shellingham#14) + args = tuple(a.strip() for a in args.split(" ")) + except ValueError: + continue + processes_mapping[pid] = Process(args=args, pid=pid, ppid=ppid) + + parent_processes = [] depth = 0 - while pid != "0" and depth < max_depth: + while pid in processes_mapping and depth < max_depth: + parent_processes.append(processes_mapping[pid]) + pid = processes_mapping[pid].ppid depth += 1 - cmd, pid, ppid = _get_stats(pid) - processes.append(Process(args=cmd, pid=pid, ppid=ppid)) - - pid = ppid - return processes + return parent_processes From 82b66850019e78840b56ca18fb008b79acf70743 Mon Sep 17 00:00:00 2001 From: Tzu-ping Chung Date: Thu, 18 May 2023 10:36:11 +0800 Subject: [PATCH 5/5] Use iterator to inspect parents --- src/shellingham/posix/__init__.py | 12 ++++++------ src/shellingham/posix/proc.py | 15 +++------------ src/shellingham/posix/ps.py | 19 +++++++++---------- 3 files changed, 18 insertions(+), 28 deletions(-) diff --git a/src/shellingham/posix/__init__.py b/src/shellingham/posix/__init__.py index 4b21519..5bd2070 100644 --- a/src/shellingham/posix/__init__.py +++ b/src/shellingham/posix/__init__.py @@ -34,7 +34,7 @@ ) -def _get_process_parents(pid, max_depth=10): +def _iter_process_parents(pid, max_depth=10): """Select a way to obtain process information from the system. * `/proc` is used if supported. @@ -42,10 +42,10 @@ def _get_process_parents(pid, max_depth=10): """ for impl in (proc, ps): try: - mapping = impl.get_process_parents(pid, max_depth) + iterator = impl.iter_process_parents(pid, max_depth) except EnvironmentError: continue - return mapping + return iterator raise ShellDetectionFailure("compatible proc fs or ps utility is required") @@ -89,7 +89,8 @@ def _get_shell(cmd, *args): return _get_login_shell(cmd) name = os.path.basename(cmd).lower() if name == "rosetta" or QEMU_BIN_REGEX.fullmatch(name): - # Running (probably in docker) with rosetta or qemu, first arg is actual command + # If the current process is Rosetta or QEMU, this likely is a + # containerized process. Parse out the actual command instead. cmd = args[0] args = args[1:] name = os.path.basename(cmd).lower() @@ -104,8 +105,7 @@ def _get_shell(cmd, *args): def get_shell(pid=None, max_depth=10): """Get the shell that the supplied pid or os.getpid() is running in.""" pid = str(pid or os.getpid()) - processes = _get_process_parents(pid, max_depth) - for proc_args, _, _ in processes: + for proc_args, _, _ in _iter_process_parents(pid, max_depth): shell = _get_shell(*proc_args) if shell: return shell diff --git a/src/shellingham/posix/proc.py b/src/shellingham/posix/proc.py index 05f7d31..14160f9 100644 --- a/src/shellingham/posix/proc.py +++ b/src/shellingham/posix/proc.py @@ -64,22 +64,13 @@ class ProcFormatError(EnvironmentError): pass -def get_process_parents(pid, max_depth=10): +def iter_process_parents(pid, max_depth=10): """Try to look up the process tree via the /proc interface.""" stat_name = detect_proc() - processes = [] - - depth = 0 - while depth < max_depth: - depth += 1 + for _ in range(max_depth): ppid = _get_ppid(pid, stat_name) args = _get_cmdline(pid) - processes.append(Process(args=args, pid=pid, ppid=ppid)) - + yield Process(args=args, pid=pid, ppid=ppid) if ppid == "0": break - pid = ppid - - - return processes diff --git a/src/shellingham/posix/ps.py b/src/shellingham/posix/ps.py index d46ff77..d2715cf 100644 --- a/src/shellingham/posix/ps.py +++ b/src/shellingham/posix/ps.py @@ -9,7 +9,7 @@ class PsNotAvailable(EnvironmentError): pass -def get_process_parents(pid, max_depth=10): +def iter_process_parents(pid, max_depth=10): """Try to look up the process tree via the output of `ps`.""" try: cmd = ["ps", "-ww", "-o", "pid=", "-o", "ppid=", "-o", "args="] @@ -22,7 +22,7 @@ def get_process_parents(pid, max_depth=10): # `ps` can return 1 if the process list is completely empty. # (sarugaku/shellingham#15) if not e.output.strip(): - return {} + return raise if not isinstance(output, str): encoding = sys.getfilesystemencoding() or sys.getdefaultencoding() @@ -42,11 +42,10 @@ def get_process_parents(pid, max_depth=10): continue processes_mapping[pid] = Process(args=args, pid=pid, ppid=ppid) - parent_processes = [] - depth = 0 - while pid in processes_mapping and depth < max_depth: - parent_processes.append(processes_mapping[pid]) - pid = processes_mapping[pid].ppid - depth += 1 - - return parent_processes + for _ in range(max_depth): + try: + process = processes_mapping[pid] + except KeyError: + return + yield process + pid = process.ppid