Check Python Notebooks #57
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Check Python Notebooks | |
on: | |
pull_request: | |
branches: | |
- main | |
schedule: | |
- cron: '0 0 * * 0' # Runs once a week on Sunday at midnight | |
concurrency: | |
group: test-python-notebooks-${{ github.ref }} | |
cancel-in-progress: true | |
jobs: | |
test-python-notebooks: | |
runs-on: ${{ matrix.os }} | |
strategy: | |
matrix: | |
# Removed: {windows-latest, macos-latest} as they were taking too long. Consider adding back in the future | |
os: [ubuntu-latest] | |
directory: [PM1, PM2, PM3, PM4, PM5, CM1, CM2, CM3, AC1, AC2, T] | |
steps: | |
- name: Checkout repository | |
if: "! (matrix.directory == 'CM3' && (matrix.os == 'windows-latest' || matrix.os == 'macos-latest'))" | |
uses: actions/checkout@v3 | |
with: | |
fetch-depth: ${{ github.event_name == 'pull_request' && 2 || 0 }} | |
- name: Find changed notebooks in PR | |
if: github.event_name == 'pull_request' | |
id: find_notebooks_pr | |
run: | | |
# git fetch origin ${{ github.event.pull_request.base.ref }} ${{ github.event.pull_request.head.ref }} | |
# git diff --name-only origin/${{ github.event.pull_request.base.ref }}...origin/${{ github.event.pull_request.head.ref }} > changed_files.txt | |
git diff --name-only -r HEAD^1 HEAD > changed_files.txt | |
grep -E '\.ipynb$|\.py$|\.github/workflows/python-notebooks.yml$' changed_files.txt > changed_notebooks.txt || echo "No notebooks changed" > changed_notebooks.txt | |
- name: Find changed notebooks in Push | |
if: github.event_name == 'push' | |
id: find_notebooks_push | |
run: | | |
git diff --name-only ${{ github.event.before }} ${{ github.event.after }} > changed_files.txt | |
grep -E '\.ipynb$|\.py$|\.github/workflows/python-notebooks.yml$' changed_files.txt > changed_notebooks.txt || echo "No notebooks changed" > changed_notebooks.txt | |
- name: Check if any notebooks changed in PR or Push | |
if: (github.event_name == 'push') || (github.event_name == 'pull_request') | |
id: check_notebooks | |
run: | | |
cat changed_notebooks.txt | |
if grep -q -E '^${{ matrix.directory }}/[^/]+\.ipynb$|^${{ matrix.directory }}/.*\.py$|\.github/workflows/python-notebooks.yml$' changed_notebooks.txt; then | |
echo "notebooks_changed=true" >> $GITHUB_ENV | |
else | |
echo "notebooks_changed=false" >> $GITHUB_ENV | |
echo "No Python notebooks or python scripts changed in folder ${{ matrix.directory }} in this PR." | |
fi | |
- name: Set notebooks changed to true for schedule (or other ways of triggering the job) | |
if: "! ((github.event_name == 'push') || (github.event_name == 'pull_request'))" | |
id: set_check_notebooks_true | |
run: | | |
# we run all folders if it is the weekly scheduled run to | |
# check if something broke due to changes in dependencies | |
echo "notebooks_changed=true" >> $GITHUB_ENV | |
- name: Install git | |
if: "(env.notebooks_changed == 'true') && (! (matrix.directory == 'CM3' && (matrix.os == 'windows-latest' || matrix.os == 'macos-latest')))" | |
run: | | |
if [ "$RUNNER_OS" == "Linux" ]; then | |
sudo apt-get update | |
sudo apt-get install -y git | |
elif [ "$RUNNER_OS" == "macOS" ]; then | |
brew install git | |
elif [ "$RUNNER_OS" == "Windows" ]; then | |
choco install git -y --no-progress || powershell -Command "Start-Sleep -Seconds 30; choco install git -y --no-progress" | |
fi | |
shell: bash | |
- name: Install libgraphviz-dev if folder is CM3 | |
if: "(env.notebooks_changed == 'true') && (matrix.directory == 'CM3' && matrix.os == 'ubuntu-latest')" | |
run: sudo apt-get install -y libgraphviz-dev | |
shell: bash | |
- name: Install R if folder is CM3 | |
if: "(env.notebooks_changed == 'true') && (matrix.directory == 'CM3' && matrix.os == 'ubuntu-latest')" | |
run: | | |
sudo apt-get update | |
sudo apt-get install -y r-base | |
shell: bash | |
- name: Set up Python | |
if: "(env.notebooks_changed == 'true') && (! (matrix.directory == 'CM3' && (matrix.os == 'windows-latest' || matrix.os == 'macos-latest')))" | |
uses: actions/setup-python@v2 | |
with: | |
python-version: '3.10' # Use Python 3.10 | |
- name: Install dependencies | |
if: "(env.notebooks_changed == 'true') && (! (matrix.directory == 'CM3' && (matrix.os == 'windows-latest' || matrix.os == 'macos-latest')))" | |
run: | | |
python -m pip install --upgrade pip | |
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi | |
pip install jupyter nbconvert flake8 flake8-nb ipython nbstripout | |
shell: bash | |
- name: Strip outputs from .ipynb files | |
if: "(env.notebooks_changed == 'true') && (! (matrix.directory == 'CM3' && (matrix.os == 'windows-latest' || matrix.os == 'macos-latest')))" | |
run: | | |
for notebook in ${{ matrix.directory }}/*.ipynb; do | |
nbstripout "$notebook" | |
done | |
- name: Run Flake8 linting on notebooks | |
if: "(env.notebooks_changed == 'true') && (! (matrix.directory == 'CM3' && (matrix.os == 'windows-latest' || matrix.os == 'macos-latest')))" | |
id: lint | |
run: | | |
mkdir -p linting_logs | |
flake8_errors=0 | |
for notebook in ${{ matrix.directory }}/*.ipynb; do | |
[ "$(basename "$notebook")" = "python-dosearch.ipynb" ] && continue | |
flake8-nb --config=.flake8 "$notebook" > "linting_logs/$(basename "$notebook" .ipynb)_linting.txt" 2>&1 || flake8_errors=$((flake8_errors+1)) | |
done | |
echo "flake8_errors=$flake8_errors" >> $GITHUB_ENV | |
shell: bash | |
- name: Convert Jupyter notebooks to Python scripts | |
if: "(env.notebooks_changed == 'true') && (! (matrix.directory == 'CM3' && (matrix.os == 'windows-latest' || matrix.os == 'macos-latest')))" | |
id: convert | |
run: | | |
mkdir -p converted_scripts | |
for notebook in ${{ matrix.directory }}/*.ipynb; do | |
[ -e "$notebook" ] || continue | |
[ "$(basename "$notebook")" = "DoubleML_and_Feature_Engineering_with_BERT.ipynb" ] && continue | |
jupyter nbconvert --to script "$notebook" --output-dir converted_scripts | |
done | |
shell: bash | |
- name: Run converted Python scripts with IPython | |
if: "(env.notebooks_changed == 'true') && (! (matrix.directory == 'CM3' && (matrix.os == 'windows-latest' || matrix.os == 'macos-latest')))" | |
id: execute | |
run: | | |
mkdir -p logs | |
script_errors=0 | |
for script in converted_scripts/*.py; do | |
[ -e "$script" ] || continue | |
echo "Running $script" | |
ipython "$script" > "logs/$(basename "$script" .py).txt" 2>&1 || script_errors=$((script_errors+1)) | |
done | |
echo "script_errors=$script_errors" >> $GITHUB_ENV | |
shell: bash | |
- name: Aggregate and report errors and warnings | |
if: "(env.notebooks_changed == 'true') && (! (matrix.directory == 'CM3' && (matrix.os == 'windows-latest' || matrix.os == 'macos-latest')))" | |
run: | | |
echo "Aggregating errors and warnings..." | |
grep -E "Traceback|Error:|Exception:|ModuleNotFoundError|FutureWarning|TypeError" logs/*.txt linting_logs/*.txt > logs/errors_and_warnings.txt || true | |
echo "Errors and Warnings:" | |
cat logs/errors_and_warnings.txt | |
shell: bash | |
- name: Upload linting logs | |
if: "(env.notebooks_changed == 'true') && (! (matrix.directory == 'CM3' && (matrix.os == 'windows-latest' || matrix.os == 'macos-latest')))" | |
uses: actions/upload-artifact@v4 | |
with: | |
name: linting-logs-${{ matrix.directory }}-${{ matrix.os }} | |
path: linting_logs | |
- name: Upload execution logs | |
if: "(env.notebooks_changed == 'true') && (! (matrix.directory == 'CM3' && (matrix.os == 'windows-latest' || matrix.os == 'macos-latest')))" | |
uses: actions/upload-artifact@v4 | |
with: | |
name: execution-logs-${{ matrix.directory }}-${{ matrix.os }} | |
path: logs | |
- name: Send failure mail | |
if: "((github.event_name == 'schedule') && (env.notebooks_changed == 'true') && (! (matrix.directory == 'CM3' && (matrix.os == 'windows-latest' || matrix.os == 'macos-latest')) && (env.flake8_errors != '0' || env.script_errors != '0')))" | |
uses: dawidd6/action-send-mail@v3 | |
with: | |
# Required mail server address if not connection_url: | |
server_address: smtp.gmail.com | |
# Server port, default 25: | |
server_port: 465 | |
# Optional whether this connection use TLS (default is true if server_port is 465) | |
secure: true | |
# Optional (recommended) mail server username: | |
username: ${{secrets.MAIL_USERNAME}} | |
# Optional (recommended) mail server password: | |
password: ${{secrets.MAIL_PASSWORD}} | |
# Required mail subject: | |
subject: FAILURE Python (${{matrix.directory}}) test Github Action job failed! | |
# Required recipients' addresses: | |
to: [email protected] | |
# Required sender full name (address can be skipped): | |
from: GA-MetricsML-Notebooks <[email protected]> | |
# Optional plain body: | |
body: Python notebook tests of directory ${{matrix.directory}} in Git repo ${{github.repository}} failed!! | |
ignore_cert: true | |
- name: Check for errors | |
if: "(env.notebooks_changed == 'true') && (! (matrix.directory == 'CM3' && (matrix.os == 'windows-latest' || matrix.os == 'macos-latest')) && (env.flake8_errors != '0' || env.script_errors != '0'))" | |
run: exit 1 | |
- name: Send success mail | |
if: "((github.event_name == 'schedule') && (env.notebooks_changed == 'true') && (! (matrix.directory == 'CM3' && (matrix.os == 'windows-latest' || matrix.os == 'macos-latest'))))" | |
uses: dawidd6/action-send-mail@v3 | |
with: | |
# Required mail server address if not connection_url: | |
server_address: smtp.gmail.com | |
# Server port, default 25: | |
server_port: 465 | |
# Optional whether this connection use TLS (default is true if server_port is 465) | |
secure: true | |
# Optional (recommended) mail server username: | |
username: ${{secrets.MAIL_USERNAME}} | |
# Optional (recommended) mail server password: | |
password: ${{secrets.MAIL_PASSWORD}} | |
# Required mail subject: | |
subject: SUCCESS Python (${{matrix.directory}}) test Github Action job succeeded! | |
# Required recipients' addresses: | |
to: [email protected] | |
# Required sender full name (address can be skipped): | |
from: GA-MetricsML-Notebooks <[email protected]> | |
# Optional plain body: | |
body: Python notebook tests of directory ${{matrix.directory}} in Git repo ${{github.repository}} succedded. | |
ignore_cert: true | |
# - name: Check out the branch for pull request | |
# if: "(github.event_name == 'pull_request') && (env.notebooks_changed == 'true') && (! (matrix.directory == 'CM3' && (matrix.os == 'windows-latest' || matrix.os == 'macos-latest')))" | |
# run: | | |
# git fetch --all | |
# git checkout ${{ github.event.pull_request.head.ref }} | |
# - name: Check if there are any changes (e.g. stripped outputs) | |
# if: "(env.notebooks_changed == 'true') && (! (matrix.directory == 'CM3' && (matrix.os == 'windows-latest' || matrix.os == 'macos-latest')))" | |
# id: verify_diff | |
# run: | | |
# git pull | |
# git diff --quiet ${{ matrix.directory }}/*.ipynb || echo "changed=true" >> $GITHUB_OUTPUT | |
# - name: Commit and push stripped .ipynb files | |
# if: "(env.notebooks_changed == 'true') && (! (matrix.directory == 'CM3' && (matrix.os == 'windows-latest' || matrix.os == 'macos-latest'))) && (steps.verify_diff.outputs.changed == 'true')" | |
# run: | | |
# git config --global user.name 'github-actions[bot]' | |
# git config --global user.email 'github-actions[bot]@users.noreply.github.com' | |
# git pull | |
# git add ${{ matrix.directory }}/*.ipynb | |
# git commit -m 'Strip outputs from .ipynb files in ${{ matrix.directory }} [skip ci]' | |
# git push --force-with-lease | |
# env: | |
# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} |