From a5fed68c851376640120f5fbf92110fd56e1aaae Mon Sep 17 00:00:00 2001 From: Xun Li Date: Fri, 10 Jan 2025 10:27:01 -0800 Subject: [PATCH] Add threadstall analyzer script --- scripts/threadstall_analyzer.py | 164 ++++++++++++++++++++++++++++++++ 1 file changed, 164 insertions(+) create mode 100755 scripts/threadstall_analyzer.py diff --git a/scripts/threadstall_analyzer.py b/scripts/threadstall_analyzer.py new file mode 100755 index 00000000000000..dc8e908203fcf3 --- /dev/null +++ b/scripts/threadstall_analyzer.py @@ -0,0 +1,164 @@ +#!/usr/bin/env python3 +# Copyright (c) Mysten Labs, Inc. +# SPDX-License-Identifier: Apache-2.0 + +""" +This script is used to analyze threadstalls. It will print out the most common +stack patterns and the most frequent interesting functions. +""" + +import os +import re +from collections import Counter, defaultdict +from typing import List, Dict, Set + +class ThreadDumpAnalyzer: + def __init__(self): + # All patterns that should be skipped + self.skip_patterns = [ + # Runtime and standard library + r'tokio::runtime.*', + r'tokio::task.*', + r'std::.*', # Filter all std:: namespace + r'core::.*', + r'core::ops::function.*', + r'<.*>::{closure}', + r'alloc::.*', + + # System and threading + r'pthread::.*', + r'__pthread_.*', + r'_pthread_.*', + r'parking_lot::.*', + r'syscall\.S.*', + r'/sysdeps/unix', + r'nptl/.*', # Filter all nptl/ directory + r'sys::unix', # Filter sys::unix anywhere in the line + r'\.llvm\.', # Filter .llvm. anywhere in the line + r'^\?\?', # Filter lines starting with ?? + + # Common I/O operations + r'recv\.c', + r'send\.c', + r'write\.c', + r'read\.c', + + # Thread operations + r'./nptl/pthread_create\.c:442', + r'./nptl/futex-internal\.c:57', + ] + self.skip_regex = re.compile('|'.join(self.skip_patterns)) + + def is_interesting(self, line: str) -> bool: + # Only lines that starts with "crates/" are interesting + return line.startswith('crates/') + + def parse_dump_file(self, file_path: str) -> List[List[str]]: + stacks = [] + current_stack = [] + + with open(file_path, 'r') as f: + for line in f: + line = line.strip() + if line.startswith('Thread'): + if current_stack: + stacks.append(current_stack) + current_stack = [] + elif line and not line.startswith('---'): + if ' at ' in line: + func = line.split(' at ')[-1].strip() + current_stack.append(func) + + if current_stack: + stacks.append(current_stack) + + return stacks + + def compress_stack(self, stack: List[str]) -> List[str]: + """Compress stack to only interesting parts while maintaining context""" + compressed = [] + last_was_skipped = False + + for frame in stack: + if self.is_interesting(frame): + compressed.append(frame) + last_was_skipped = False + else: + if not last_was_skipped: + compressed.append("...") # Add ellipsis to show skipped frames + last_was_skipped = True + + # Remove trailing ellipsis + if compressed and compressed[-1] == "...": + compressed.pop() + + return compressed + + def analyze_dumps(self, dump_dir: str) -> Dict: + all_dumps_analysis = { + 'individual_dumps': [], + 'common_patterns': defaultdict(int), + 'frequent_functions': Counter() + } + + dump_files = [f for f in os.listdir(dump_dir)] + + for dump_file in sorted(dump_files): + file_path = os.path.join(dump_dir, dump_file) + stacks = self.parse_dump_file(file_path) + + dump_analysis = { + 'file': dump_file, + 'compressed_stacks': [] + } + + for stack in stacks: + compressed = self.compress_stack(stack) + if compressed: + dump_analysis['compressed_stacks'].append(compressed) + + # Track individual interesting functions + for frame in compressed: + if frame != "...": + all_dumps_analysis['frequent_functions'][frame] += 1 + + # Track stack patterns + stack_signature = ' -> '.join(compressed) + all_dumps_analysis['common_patterns'][stack_signature] += 1 + + all_dumps_analysis['individual_dumps'].append(dump_analysis) + + return all_dumps_analysis + + def print_analysis(self, analysis: Dict): + print("=== Individual Dump Analysis ===") + for dump in analysis['individual_dumps']: + print(f"\nFile: {dump['file']}") + print("Compressed stacks:") + + for stack_num, stack in enumerate(dump['compressed_stacks'], 1): + print(f"\nStack {stack_num}:") + for frame in stack: + print(f" {frame}") + + print("\n=== Most Common Stack Patterns ===") + for pattern, count in sorted(analysis['common_patterns'].items(), key=lambda x: x[1], reverse=True)[:10]: + print(f"\nOccurred {count} times:") + for frame in pattern.split(' -> '): + print(f" {frame}") + + print("\n=== Most Frequent Functions ===") + for func, count in analysis['frequent_functions'].most_common(20): + print(f"{func}: {count} occurrences") + +def main(): + analyzer = ThreadDumpAnalyzer() + import sys + if len(sys.argv) != 2: + print("Usage: threadstall_analyzer.py ") + sys.exit(1) + analysis = analyzer.analyze_dumps(sys.argv[1]) + analyzer.print_analysis(analysis) + +if __name__ == "__main__": + main()