diff --git a/.github/workflows/slow-tests-issue.yml b/.github/workflows/slow-tests-issue.yml new file mode 100644 index 0000000000..643853f617 --- /dev/null +++ b/.github/workflows/slow-tests-issue.yml @@ -0,0 +1,31 @@ +# Taken from https://github.com/pymc-labs/pymc-marketing/tree/main/.github/workflows/slow-tests-issue.yml +# See the scripts in the `scripts/slowest_tests` directory for more information +--- +name: Slow Tests Issue Body + +on: + workflow_dispatch: + schedule: + - cron: '0 */6 * * *' + +permissions: + issues: write + +jobs: + update-comment: + runs-on: ubuntu-latest + steps: + - name: Install ZSH + run: sudo apt-get update && sudo apt-get install -y zsh + - name: Checkout code + uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + - name: Trigger the script + working-directory: scripts/slowest_tests + shell: zsh {0} + run: source update-slowest-times-issue.sh + env: + GITHUB_TOKEN: ${{ github.token }} diff --git a/scripts/slowest_tests/extract-slow-tests.py b/scripts/slowest_tests/extract-slow-tests.py new file mode 100644 index 0000000000..3a06e4a68b --- /dev/null +++ b/scripts/slowest_tests/extract-slow-tests.py @@ -0,0 +1,80 @@ +"""This script parses the GitHub action log for test times. + +Taken from https://github.com/pymc-labs/pymc-marketing/tree/main/scripts/slowest_tests/extract-slow-tests.py + +""" + +import re +import sys +from pathlib import Path + + +start_pattern = re.compile(r"==== slow") +separator_pattern = re.compile(r"====") +time_pattern = re.compile(r"(\d+\.\d+)s ") + + +def extract_lines(lines: list[str]) -> list[str]: + times = [] + + in_section = False + for line in lines: + detect_start = start_pattern.search(line) + detect_end = separator_pattern.search(line) + + if detect_start: + in_section = True + + if in_section: + times.append(line) + + if not detect_start and in_section and detect_end: + break + + return times + + +def trim_up_to_match(pattern, string: str) -> str: + match = pattern.search(string) + if not match: + return "" + + return string[match.start() :] + + +def trim(pattern, lines: list[str]) -> list[str]: + return [trim_up_to_match(pattern, line) for line in lines] + + +def strip_ansi(text: str) -> str: + ansi_escape = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])") + return ansi_escape.sub("", text) + + +def format_times(times: list[str]) -> list[str]: + return ( + trim(separator_pattern, times[:1]) + + trim(time_pattern, times[1:-1]) + + [strip_ansi(line) for line in trim(separator_pattern, times[-1:])] + ) + + +def read_lines_from_stdin(): + return sys.stdin.read().splitlines() + + +def read_from_file(file: Path): + """For testing purposes.""" + return file.read_text().splitlines() + + +def main(read_lines): + lines = read_lines() + times = extract_lines(lines) + parsed_times = format_times(times) + print("\n".join(parsed_times)) + + +if __name__ == "__main__": + read_lines = read_lines_from_stdin + main(read_lines) diff --git a/scripts/slowest_tests/update-slowest-times-issue.sh b/scripts/slowest_tests/update-slowest-times-issue.sh new file mode 100644 index 0000000000..b1c0c15789 --- /dev/null +++ b/scripts/slowest_tests/update-slowest-times-issue.sh @@ -0,0 +1,113 @@ +#!/bin/zsh + +DRY_RUN=false + +owner=pymc-devs +repo=pytensor +issue_number=1124 +title="Speed up test times :rocket:" +workflow=Tests +latest_id=$(gh run list --workflow $workflow --status success --limit 1 --json databaseId --jq '.[0].databaseId') +jobs=$(gh api /repos/$owner/$repo/actions/runs/$latest_id/jobs --jq '.jobs | map({name: .name, run_id: .run_id, id: .id, started_at: .started_at, completed_at: .completed_at})') + +# Skip 3.10, float32, and Benchmark tests +function skip_job() { + name=$1 + if [[ $name == *"py3.10"* ]]; then + return 0 + fi + + if [[ $name == *"float32 1"* ]]; then + return 0 + fi + + if [[ $name == *"Benchmark"* ]]; then + return 0 + fi + + return 1 +} + +# Remove common prefix from the name +function remove_prefix() { + name=$1 + echo $name | sed -e 's/^ubuntu-latest test py3.12 : fast-compile 0 : float32 0 : //' +} + +function human_readable_time() { + started_at=$1 + completed_at=$2 + + start_seconds=$(date -d "$started_at" +%s) + end_seconds=$(date -d "$completed_at" +%s) + + seconds=$(($end_seconds - $start_seconds)) + + if [ $seconds -lt 60 ]; then + echo "$seconds seconds" + else + echo "$(date -u -d @$seconds +'%-M minutes %-S seconds')" + fi +} + +all_times="" +echo "$jobs" | jq -c '.[]' | while read -r job; do + id=$(echo $job | jq -r '.id') + name=$(echo $job | jq -r '.name') + run_id=$(echo $job | jq -r '.run_id') + started_at=$(echo $job | jq -r '.started_at') + completed_at=$(echo $job | jq -r '.completed_at') + + if skip_job $name; then + echo "Skipping $name" + continue + fi + + echo "Processing job: $name (ID: $id, Run ID: $run_id)" + times=$(gh run view --job $id --log | python extract-slow-tests.py) + + if [ -z "$times" ]; then + # Some of the jobs are non-test jobs, so we skip them + echo "No tests found for '$name', skipping" + continue + fi + + echo $times + + human_readable=$(human_readable_time $started_at $completed_at) + name=$(remove_prefix $name) + + top="
($human_readable) $name\n\n\n\`\`\`" + bottom="\`\`\`\n\n
" + + formatted_times="$top\n$times\n$bottom" + + if [ -n "$all_times" ]; then + all_times="$all_times\n$formatted_times" + else + all_times="$formatted_times" + fi +done + +run_date=$(date +"%Y-%m-%d") +body=$(cat << EOF +If you are motivated to help speed up some tests, we would appreciate it! + +Here are some of the slowest test times: + +$all_times + +You can find more information on how to contribute [here](https://pytensor.readthedocs.io/en/latest/dev_start_guide.html) + +Automatically generated by [GitHub Action](https://github.com/pymc-devs/pytensor/blob/main/.github/workflows/slow-tests-issue.yml) +Latest run date: $run_date +EOF +) + +if [ "$DRY_RUN" = true ]; then + echo "Dry run, not updating issue" + echo $body + exit +fi +echo $body | gh issue edit $issue_number --body-file - --title "$title" +echo "Updated issue $issue_number with all times"