Merge pull request #87 from aeturrell/build-book-action

GitHub Actions to test, build, and release versions of the book
aeturrell · Jan 2, 2025 · 1f73fd6 · 1f73fd6
2 parents 8c743bb + dfb103c
commit 1f73fd6
Show file tree

Hide file tree

Showing 9 changed files with 272 additions and 102 deletions.
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -0,0 +1,111 @@
+name: release
+
+permissions:
+  contents: write
+  pages: write
+
+on:
+  push:
+    branches:
+      - main
+
+jobs:
+  release:
+    name: release
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check out the repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 2
+
+      - name: install mamba
+        uses: mamba-org/setup-micromamba@v2
+        with:
+          environment-file: environment.yml
+          init-shell: >-
+            bash
+          cache-environment: true
+          post-cleanup: 'all'
+
+      - name: set timezone
+        run: |
+          TZ="Europe/London" &&
+          sudo ln -snf /usr/share/zoneinfo/$TZ /etc/localtime
+
+      - name: install linux deps
+        run: |
+          sudo apt-get -y install openssl graphviz nano texlive graphviz-dev unzip
+
+      - name: install special fonts
+        run: |
+          mkdir -p /usr/share/fonts/truetype/ &&
+          wget https://www.wfonts.com/download/data/2015/10/08/varta/varta.zip &&
+          unzip varta.zip &&
+          install -m644 *.ttf /usr/share/fonts/truetype/ &&
+          rm *.ttf
+          rm varta.zip
+
+      - name: install text models
+        run: |
+          micromamba run -n codeforecon python3 -m spacy download en_core_web_sm &&
+          micromamba run -n codeforecon python3 -m nltk.downloader all
+
+      - name: special fix for todoify
+        run: |
+          sed -i '90 s/^/#/' /home/runner/micromamba/envs/codeforecon/lib/python3.10/site-packages/mdit_py_plugins/tasklists/__init__.py
+
+      # Issue with pymc needing np<2.0 and skimpy needing >2.0, so pip install of latter causes upgrade to np
+      - name: special fix for numpy
+        run: |
+          micromamba run -n codeforecon pip install --force-reinstall -v "numpy==1.26"
+
+      - name: Monkey patch for binsreg  # sets np.math = math
+        run: |
+          sed -i '/^import\|^from.*import/ { :a; n; /^import\|^from.*import/! { x; s/.*//; x; ba; }; }; a\import math\nimport numpy as np\nnp.math = math' ~/micromamba/envs/codeforecon/lib/python3.10/site-packages/binsreg/funs.py
+
+      - name: git config  # Needed as git config --get user.name is used in one example
+        run: |
+          git config user.name "$(git log -n 1 --pretty=format:%an)" &&
+          git config user.email "$(git log -n 1 --pretty=format:%ae)"
+
+      - name: build the book
+        run: |
+          micromamba run -n codeforecon jupyter-book build . --verbose
+
+      - name: Check if there is a parent commit
+        id: check-parent-commit
+        run: |
+          echo "::set-output name=sha::$(git rev-parse --verify --quiet HEAD^)"
+  
+      - name: Detect and tag new version
+        id: check-version
+        if: steps.check-parent-commit.outputs.sha
+        uses: salsify/[email protected]
+        with:
+          version-command: |
+            micromamba run -n codeforecon toml get --toml-path=pyproject.toml project.version
+  
+      - name: Bump version for developmental release
+        if: "! steps.check-version.outputs.tag"
+        run: |
+          micromamba run -n codeforecon python version_bumper.py &&
+          version=$(micromamba run -n codeforecon toml get --toml-path=pyproject.toml project.version) &&
+          micromamba run -n codeforecon toml set --toml-path=pyproject.toml project.version $version.dev.$(date +%s)
+  
+      - name: Publish
+        if: steps.check-version.outputs.tag
+        run: ghp-import -n -p -f _build/html
+
+      - name: Publish the release notes
+        uses: release-drafter/[email protected]
+        with:
+          publish: ${{ steps.check-version.outputs.tag != '' }}
+          tag: ${{ steps.check-version.outputs.tag }}
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Success
+        if: steps.check-version.outputs.tag
+        run: |
+          echo "Success in releasing a new version of Coding for Economists!"
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -0,0 +1,81 @@
+name: tests
+
+on:
+  - pull_request
+  - push
+
+jobs:
+  pre-commit:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: '3.10.6'
+      - uses: pre-commit/[email protected]
+
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check out the repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 2
+
+      - name: install mamba
+        uses: mamba-org/setup-micromamba@v2
+        with:
+          environment-file: environment.yml
+          init-shell: >-
+            bash
+          cache-environment: true
+          post-cleanup: 'all'
+
+      - name: set timezone
+        run: |
+          TZ="Europe/London" &&
+          sudo ln -snf /usr/share/zoneinfo/$TZ /etc/localtime
+
+      - name: install linux deps
+        run: |
+          sudo apt-get -y install openssl graphviz nano texlive graphviz-dev unzip
+
+      - name: install special fonts
+        run: |
+          mkdir -p /usr/share/fonts/truetype/ &&
+          wget https://www.wfonts.com/download/data/2015/10/08/varta/varta.zip &&
+          unzip varta.zip &&
+          install -m644 *.ttf /usr/share/fonts/truetype/ &&
+          rm *.ttf
+          rm varta.zip
+
+      - name: install text models
+        run: |
+          micromamba run -n codeforecon python3 -m spacy download en_core_web_sm &&
+          micromamba run -n codeforecon python3 -m nltk.downloader all
+
+      - name: special fix for todoify
+        run: |
+          sed -i '90 s/^/#/' /home/runner/micromamba/envs/codeforecon/lib/python3.10/site-packages/mdit_py_plugins/tasklists/__init__.py
+
+      # Issue with pymc needing np<2.0 and skimpy needing >2.0, so pip install of latter causes upgrade to np
+      - name: special fix for numpy
+        run: |
+          micromamba run -n codeforecon pip install --force-reinstall -v "numpy==1.26"
+
+      - name: Monkey patch for binsreg  # sets np.math = math
+        run: |
+          sed -i '/^import\|^from.*import/ { :a; n; /^import\|^from.*import/! { x; s/.*//; x; ba; }; }; a\import math\nimport numpy as np\nnp.math = math' ~/micromamba/envs/codeforecon/lib/python3.10/site-packages/binsreg/funs.py
+
+      - name: git config  # Needed as git config --get user.name is used in one example
+        run: |
+          git config user.name "$(git log -n 1 --pretty=format:%an)" &&
+          git config user.email "$(git log -n 1 --pretty=format:%ae)"
+
+      - name: build the book
+        run: |
+          micromamba run -n codeforecon jupyter-book build . --verbose
+
+      - name: success
+        run: |
+          echo "Success in building book without errors!"
diff --git a/README.md b/README.md
@@ -6,6 +6,8 @@ To read or use the book, head to the [*Coding for Economists* website](https://a
 
 The rest of this readme is intended to help those who are contributing to the book, rather than readers.
 
+Note that there is a custom, non-compliant `pyproject.toml` file included in this project. Its main purpose is to provide the version of the book, and the version of Python.
+
 ## Dev
 
 These instructions are only for developers working on the book.
@@ -67,7 +69,7 @@ There is a bug that arises during builds due to the many dependencies of the boo
 
 ### Building the Book
 
-To build the book using **Jupyter Book** use
+To build the book locally (using **Jupyter Book**), the command is:
 
 ```bash
 jupyter-book build .
@@ -79,14 +81,12 @@ Note that, due to package conflicts, several pages may not compile when taking t
 
 ### Uploading Built Files
 
-Only upload built files based on a successful commit or merge to the main branch. See [here](https://jupyterbook.org/publish/gh-pages.html) for how to upload revised HTML files, but the key command is
+You should not need to upload built files manually as there are GitHub Actions that auto build and auto publish when there's a new release. Sometimes it is useful to republish locally, however. See [here](https://jupyterbook.org/publish/gh-pages.html) for how to upload revised HTML files, but the key command is
 
 ```bash
 ghp-import -n -p -f _build/html
 ```
 
-Typically, only maintainers will need to upload built files.
-
 ### Pre-commit
 
 To perform the pre-commit checks, use
@@ -99,9 +99,10 @@ Pre-commit is currently configured to:
 
 - check for large added files
 - strip outputs from notebooks
-- apply the [black](https://black.readthedocs.io/en/stable/) code formatter to .py and .ipnb scripts
+- apply the Ruff code formatter to .py and .ipynb scripts
+- sort imports
 
-If **black-nb** finds a pre-commit error that is difficult to diagnose, a tip is to convert it to a regular script to find the problem, using, for example,
+If there's a pre-commit error that is difficult to diagnose, a tip is to convert it to a regular script to find the problem, using, for example,
 
 ```bash
 jupytext --to py data-intro.ipynb
@@ -142,4 +143,15 @@ rm ~/.matplotlib/fontlist-v330.json
 
 ## Creating a release
 
-Head over to GitHub, and go to the releases page. Create a tag with the new version number, eg `v1.0.3`. Use the generate release notes button. Then publish. The Zenodo repository and version badge on the intro page will update automatically. The releases and the uploaded website should be consistent.
+There's a GitHub Action set up to help with releases (a release is a new version of the book). These are the steps:
+
+- Open a new branch with the new version name
+- Change the version in `pyproject.toml`
+- Commit the change with a new version label as the commit message (checking the tests pass)
+- Head to GitHub and (create a pull request and) merge into main (assuming tests etc pass)
+- A new release should be automatically drafted based on that most recent merge commit, and it should use the new version as the tag
+- The new release should automatically be published on GitHub
+- A new version of the book should automatically be uploaded
+- The Zenodo repository and version badge on the intro page will update automatically
+
+This process should mean the releases and the uploaded website (on GitHub pages) should be consistent
diff --git a/_config.yml b/_config.yml
@@ -13,6 +13,7 @@ execute:
   execute_notebooks: cache
   allow_errors: false
   timeout: 600
+  nb_output_stderr: show
 
 # Do not build what does not need to be built
 only_build_toc_files: true
@@ -54,3 +55,5 @@ sphinx:
     - https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js
     bibtex_reference_style: author_year
     suppress_warnings: ["mystnb.unknown_mime_type"]
+    nb_execution_show_tb: true
+    nb_execution_raise_on_error: true  # Make build fail any content errors (don't want to publish if errors)
diff --git a/auto-research-outputs.md b/auto-research-outputs.md
@@ -15,6 +15,7 @@ kernelspec:
 (auto-research-outputs)=
 # Automating Research Outputs
 
+
 In this chapter, you'll learn how to automate the inclusion of figures and tables in LaTeX-derived research outputs including PDFs and slides——plus how to convert those outputs to Microsoft Word documents and more. Much of what you'll see in this chapter applies to a wide range of coding languages.
 
 This chapter has some similarities with another chapter, on {ref}`quarto`. But this chapter puts the LaTeX typesetting language front and centre, because it's the *de facto* standard for preparing *research outputs* (most journals have a LaTeX template for submission, for example), and it gives you full control over every aspect of how your outputs look. However, if you don't already know LaTeX, there is a steep-ish learning curve and—if you're just looking to create some automated reports using code and text rather than write pre-prints, working papers, journal articles, or academic-talk style slide decks—the chapter on {ref}`quarto` is going to be a better and easier fit for you.

diff --git a/data-advanced.ipynb b/data-advanced.ipynb
@@ -395,100 +395,6 @@
     "preprocessed_df"
    ]
   },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Great Expectations\n",
-    "\n",
-    "[**Great Expectations**](https://greatexpectations.io/) is another data validation library. It's really geared toward to production (and can integrate well with some other production tools). I wouldn't necessarily recommend it if you're doing a research project but it's got some amazing features that just mean it made it into this chapter--namely that it doesn't just validate data, but it documents and profiles it too.\n",
-    "\n",
-    "Their philosophy is that 'Expectations', the checks or assertions about data, are unit tests for your data (don't worry if you're not familiar with unit tests-they are checks for your code!). Once you have run your expectations, the library can create data documentation and data quality reports from them. The data documentation is quite an amazing feature, providing a navigable HTML report of your dataset.\n",
-    "\n",
-    "Using **Great Expectations** is a bit different from **pandera** as it replaces your dataframe with a **Great Expectations** `PandasDataset` that looks and feels just like a regular pandas dataframe but has extra methods related to data validation. To convert a regular pandas dataframe, you wrap it with `ge.from_pandas()`. \n",
-    "\n",
-    "Let's look at some examples:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import great_expectations as ge\n",
-    "\n",
-    "df = ge.from_pandas(\n",
-    "    pd.DataFrame(\n",
-    "        {\n",
-    "            \"column1\": [1, 4, 0, 10, 9],\n",
-    "            \"column2\": [-1.3, -1.4, -2.9, -10.1, -20.4],\n",
-    "            \"column3\": [\"value_1\", \"value_2\", \"value_3\", \"value_2\", \"value_1\"],\n",
-    "        }\n",
-    "    )\n",
-    ")\n",
-    "df.head()"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Now let's run some expectations:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df.expect_column_values_to_be_of_type(\"column1\", \"int\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df.expect_column_proportion_of_unique_values_to_be_between(\"column3\", 3, 5)"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Note that the first expectation passed, while the second failed but did *not* throw an error... we were just told in JSON.\n",
-    "\n",
-    "Let's also see a less stringent expectations:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df.expect_column_values_to_be_between(\n",
-    "    column=\"column2\",\n",
-    "    min_value=-15,\n",
-    "    max_value=0,\n",
-    "    mostly=0.80,\n",
-    ")"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Having a bunch of JSON thrown at you is hardly a pleasant experience, and running a few commands solely in a notebook (which is how this was generated) is not really the way that **Great Expectations** is intended to be used. Instead, it is designed to be used via several commands in the terminal that set up folders and files that will track the expectations you create and produce nice HTML reports on your data and whether it passed your tests or not. There is a [full tutorial](https://docs.greatexpectations.io/en/latest/guides/tutorials/getting_started.html) available on the website."
-   ]
-  },
   {
    "attachments": {},
    "cell_type": "markdown",

diff --git a/environment.yml b/environment.yml
@@ -36,7 +36,6 @@ dependencies:
   - plotnine
   - altair
   - cerberus
-  - great-expectations>=0.17.14
   - waterfallcharts
   - matplotlib-venn
   - pywaffle
@@ -79,3 +78,4 @@ dependencies:
     - pyfixest>=0.17.0
     - watermark
     - ydata_profiling
+    - toml-cli