Skip to content

Check Python Notebooks #41

Check Python Notebooks

Check Python Notebooks #41

name: Check Python Notebooks
on:
pull_request:
branches:
- main
schedule:
- cron: '0 0 * * 0' # Runs once a week on Sunday at midnight
concurrency:
group: test-python-notebooks-${{ github.ref }}
cancel-in-progress: true
jobs:
test-python-notebooks:
runs-on: ${{ matrix.os }}
strategy:
matrix:
# Removed: {windows-latest, macos-latest} as they were taking too long. Consider adding back in the future
os: [ubuntu-latest]
directory: [PM1, PM2, PM3, PM4, PM5, CM1, CM2, CM3, AC1, AC2, T]
steps:
- name: Checkout repository
if: "! (matrix.directory == 'CM3' && (matrix.os == 'windows-latest' || matrix.os == 'macos-latest'))"
uses: actions/checkout@v3
with:
fetch-depth: ${{ github.event_name == 'pull_request' && 2 || 0 }}
- name: Find changed notebooks in PR
if: github.event_name == 'pull_request'
id: find_notebooks_pr
run: |
# git fetch origin ${{ github.event.pull_request.base.ref }} ${{ github.event.pull_request.head.ref }}
# git diff --name-only origin/${{ github.event.pull_request.base.ref }}...origin/${{ github.event.pull_request.head.ref }} > changed_files.txt
git diff --name-only -r HEAD^1 HEAD > changed_files.txt
grep -E '\.ipynb$|\.py$|\.github/workflows/python-notebooks.yml$' changed_files.txt > changed_notebooks.txt || echo "No notebooks changed" > changed_notebooks.txt
- name: Find changed notebooks in Push
if: github.event_name == 'push'
id: find_notebooks_push
run: |
git diff --name-only ${{ github.event.before }} ${{ github.event.after }} > changed_files.txt
grep -E '\.ipynb$|\.py$|\.github/workflows/python-notebooks.yml$' changed_files.txt > changed_notebooks.txt || echo "No notebooks changed" > changed_notebooks.txt
- name: Check if any notebooks changed in PR or Push
if: (github.event_name == 'push') || (github.event_name == 'pull_request')
id: check_notebooks
run: |
cat changed_notebooks.txt
if grep -q -E '^${{ matrix.directory }}/[^/]+\.ipynb$|^${{ matrix.directory }}/.*\.py$|\.github/workflows/python-notebooks.yml$' changed_notebooks.txt; then
echo "notebooks_changed=true" >> $GITHUB_ENV
else
echo "notebooks_changed=false" >> $GITHUB_ENV
echo "No Python notebooks or python scripts changed in folder ${{ matrix.directory }} in this PR."
fi
- name: Set notebooks changed to true for schedule (or other ways of triggering the job)
if: "! ((github.event_name == 'push') || (github.event_name == 'pull_request'))"
id: set_check_notebooks_true
run: |
# we run all folders if it is the weekly scheduled run to
# check if something broke due to changes in dependencies
echo "notebooks_changed=true" >> $GITHUB_ENV
- name: Install git
if: "(env.notebooks_changed == 'true') && (! (matrix.directory == 'CM3' && (matrix.os == 'windows-latest' || matrix.os == 'macos-latest')))"
run: |
if [ "$RUNNER_OS" == "Linux" ]; then
sudo apt-get update
sudo apt-get install -y git
elif [ "$RUNNER_OS" == "macOS" ]; then
brew install git
elif [ "$RUNNER_OS" == "Windows" ]; then
choco install git -y --no-progress || powershell -Command "Start-Sleep -Seconds 30; choco install git -y --no-progress"
fi
shell: bash
- name: Install libgraphviz-dev if folder is CM3
if: "(env.notebooks_changed == 'true') && (matrix.directory == 'CM3' && matrix.os == 'ubuntu-latest')"
run: sudo apt-get install -y libgraphviz-dev
shell: bash
- name: Install R if folder is CM3
if: "(env.notebooks_changed == 'true') && (matrix.directory == 'CM3' && matrix.os == 'ubuntu-latest')"
run: |
sudo apt-get update
sudo apt-get install -y r-base
shell: bash
- name: Set up Python
if: "(env.notebooks_changed == 'true') && (! (matrix.directory == 'CM3' && (matrix.os == 'windows-latest' || matrix.os == 'macos-latest')))"
uses: actions/setup-python@v2
with:
python-version: '3.10' # Use Python 3.10
- name: Install dependencies
if: "(env.notebooks_changed == 'true') && (! (matrix.directory == 'CM3' && (matrix.os == 'windows-latest' || matrix.os == 'macos-latest')))"
run: |
python -m pip install --upgrade pip
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
pip install jupyter nbconvert flake8 flake8-nb ipython nbstripout
shell: bash
- name: Strip outputs from .ipynb files
if: "(env.notebooks_changed == 'true') && (! (matrix.directory == 'CM3' && (matrix.os == 'windows-latest' || matrix.os == 'macos-latest')))"
run: |
for notebook in ${{ matrix.directory }}/*.ipynb; do
nbstripout "$notebook"
done
- name: Run Flake8 linting on notebooks
if: "(env.notebooks_changed == 'true') && (! (matrix.directory == 'CM3' && (matrix.os == 'windows-latest' || matrix.os == 'macos-latest')))"
id: lint
run: |
mkdir -p linting_logs
flake8_errors=0
for notebook in ${{ matrix.directory }}/*.ipynb; do
[ "$(basename "$notebook")" = "python-dosearch.ipynb" ] && continue
flake8-nb --config=.flake8 "$notebook" > "linting_logs/$(basename "$notebook" .ipynb)_linting.txt" 2>&1 || flake8_errors=$((flake8_errors+1))
done
echo "flake8_errors=$flake8_errors" >> $GITHUB_ENV
shell: bash
- name: Convert Jupyter notebooks to Python scripts
if: "(env.notebooks_changed == 'true') && (! (matrix.directory == 'CM3' && (matrix.os == 'windows-latest' || matrix.os == 'macos-latest')))"
id: convert
run: |
mkdir -p converted_scripts
for notebook in ${{ matrix.directory }}/*.ipynb; do
[ -e "$notebook" ] || continue
[ "$(basename "$notebook")" = "DoubleML_and_Feature_Engineering_with_BERT.ipynb" ] && continue
jupyter nbconvert --to script "$notebook" --output-dir converted_scripts
done
shell: bash
- name: Run converted Python scripts with IPython
if: "(env.notebooks_changed == 'true') && (! (matrix.directory == 'CM3' && (matrix.os == 'windows-latest' || matrix.os == 'macos-latest')))"
id: execute
run: |
mkdir -p logs
script_errors=0
for script in converted_scripts/*.py; do
[ -e "$script" ] || continue
echo "Running $script"
ipython "$script" > "logs/$(basename "$script" .py).txt" 2>&1 || script_errors=$((script_errors+1))
done
echo "script_errors=$script_errors" >> $GITHUB_ENV
shell: bash
- name: Aggregate and report errors and warnings
if: "(env.notebooks_changed == 'true') && (! (matrix.directory == 'CM3' && (matrix.os == 'windows-latest' || matrix.os == 'macos-latest')))"
run: |
echo "Aggregating errors and warnings..."
grep -E "Traceback|Error:|Exception:|ModuleNotFoundError|FutureWarning|TypeError" logs/*.txt linting_logs/*.txt > logs/errors_and_warnings.txt || true
echo "Errors and Warnings:"
cat logs/errors_and_warnings.txt
shell: bash
- name: Upload linting logs
if: "(env.notebooks_changed == 'true') && (! (matrix.directory == 'CM3' && (matrix.os == 'windows-latest' || matrix.os == 'macos-latest')))"
uses: actions/upload-artifact@v2
with:
name: linting-logs-${{ matrix.directory }}-${{ matrix.os }}
path: linting_logs
- name: Upload execution logs
if: "(env.notebooks_changed == 'true') && (! (matrix.directory == 'CM3' && (matrix.os == 'windows-latest' || matrix.os == 'macos-latest')))"
uses: actions/upload-artifact@v2
with:
name: execution-logs-${{ matrix.directory }}-${{ matrix.os }}
path: logs
- name: Send failure mail
if: "((github.event_name == 'schedule') && (env.notebooks_changed == 'true') && (! (matrix.directory == 'CM3' && (matrix.os == 'windows-latest' || matrix.os == 'macos-latest')) && (env.flake8_errors != '0' || env.script_errors != '0')))"
uses: dawidd6/action-send-mail@v3
with:
# Required mail server address if not connection_url:
server_address: smtp.gmail.com
# Server port, default 25:
server_port: 465
# Optional whether this connection use TLS (default is true if server_port is 465)
secure: true
# Optional (recommended) mail server username:
username: ${{secrets.MAIL_USERNAME}}
# Optional (recommended) mail server password:
password: ${{secrets.MAIL_PASSWORD}}
# Required mail subject:
subject: FAILURE Python (${{matrix.directory}}) test Github Action job failed!
# Required recipients' addresses:
to: [email protected]
# Required sender full name (address can be skipped):
from: GA-MetricsML-Notebooks <[email protected]>
# Optional plain body:
body: Python notebook tests of directory ${{matrix.directory}} in Git repo ${{github.repository}} failed!!
ignore_cert: true
- name: Check for errors
if: "(env.notebooks_changed == 'true') && (! (matrix.directory == 'CM3' && (matrix.os == 'windows-latest' || matrix.os == 'macos-latest')) && (env.flake8_errors != '0' || env.script_errors != '0'))"
run: exit 1
- name: Send success mail
if: "((github.event_name == 'schedule') && (env.notebooks_changed == 'true') && (! (matrix.directory == 'CM3' && (matrix.os == 'windows-latest' || matrix.os == 'macos-latest'))))"
uses: dawidd6/action-send-mail@v3
with:
# Required mail server address if not connection_url:
server_address: smtp.gmail.com
# Server port, default 25:
server_port: 465
# Optional whether this connection use TLS (default is true if server_port is 465)
secure: true
# Optional (recommended) mail server username:
username: ${{secrets.MAIL_USERNAME}}
# Optional (recommended) mail server password:
password: ${{secrets.MAIL_PASSWORD}}
# Required mail subject:
subject: SUCCESS Python (${{matrix.directory}}) test Github Action job succeeded!
# Required recipients' addresses:
to: [email protected]
# Required sender full name (address can be skipped):
from: GA-MetricsML-Notebooks <[email protected]>
# Optional plain body:
body: Python notebook tests of directory ${{matrix.directory}} in Git repo ${{github.repository}} succedded.
ignore_cert: true
# - name: Check out the branch for pull request
# if: "(github.event_name == 'pull_request') && (env.notebooks_changed == 'true') && (! (matrix.directory == 'CM3' && (matrix.os == 'windows-latest' || matrix.os == 'macos-latest')))"
# run: |
# git fetch --all
# git checkout ${{ github.event.pull_request.head.ref }}
# - name: Check if there are any changes (e.g. stripped outputs)
# if: "(env.notebooks_changed == 'true') && (! (matrix.directory == 'CM3' && (matrix.os == 'windows-latest' || matrix.os == 'macos-latest')))"
# id: verify_diff
# run: |
# git pull
# git diff --quiet ${{ matrix.directory }}/*.ipynb || echo "changed=true" >> $GITHUB_OUTPUT
# - name: Commit and push stripped .ipynb files
# if: "(env.notebooks_changed == 'true') && (! (matrix.directory == 'CM3' && (matrix.os == 'windows-latest' || matrix.os == 'macos-latest'))) && (steps.verify_diff.outputs.changed == 'true')"
# run: |
# git config --global user.name 'github-actions[bot]'
# git config --global user.email 'github-actions[bot]@users.noreply.github.com'
# git pull
# git add ${{ matrix.directory }}/*.ipynb
# git commit -m 'Strip outputs from .ipynb files in ${{ matrix.directory }} [skip ci]'
# git push --force-with-lease
# env:
# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}