From fc59a109dd5e43c291f52fdda05aa4e38c5f111d Mon Sep 17 00:00:00 2001
From: Martin Bernstorff <martinbernstorff@gmail.com>
Date: Fri, 21 Jun 2024 11:16:47 +0200
Subject: [PATCH] ci: cleanup (#140)

- **update 13 files and delete 9 files**
- **update 8 files, copy 1 file and create 2 files**
- **test: update test_markdown.py**
- **update Dockerfile and main.py**
- **update 11 files and delete 3 files**
- **update docker_smoketest.yml, .gitignore, Makefile and __main__.py**
- **update 5 files**
- **chore: update 1 file and delete 1 file**
---
 .copier-answers.yml                           |   4 +-
 .dockerignore                                 | 162 +++++++++++++++++
 .env.sample                                   |   8 +-
 .github/Dockerfile.dev                        |   7 +-
 .github/workflows/docker_release.yml          |   2 +-
 .github/workflows/docker_smoketest.yml        |   8 +-
 .gitignore                                    |   7 +
 Dockerfile                                    |  15 +-
 Makefile                                      |  19 +-
 README.md                                     |  48 ++---
 compose.sample.yml                            |  11 ++
 lefthook.yml                                  |   6 +-
 memorymarker/__main__.py                      | 141 +++++++--------
 memorymarker/cli/document_selector.py         |  16 --
 memorymarker/document_providers/base.py       |   4 +-
 .../document_providers/hydrator/main.py       | 107 -----------
 .../hydrator/test_hydrator.py                 |  34 ----
 memorymarker/document_providers/omnivore.py   |  46 ++++-
 .../document_providers/omnivore_document.py   |  44 -----
 .../hydrator => persister}/__init__.py        |   0
 .../__snapshots__/test_markdown.ambr          |   0
 .../markdown.py                               |   0
 .../test_markdown.py                          |   2 +-
 .../__init__.py                               |   0
 memorymarker/question_generator/chunker.py    |  28 +++
 .../example_repo.py}                          |   0
 .../question_generator/flows/question_flow.py |   8 +-
 memorymarker/question_generator/main.py       | 168 ------------------
 .../question_generator/pipeline_runner.py     |  38 ----
 .../question_generator/steps/qa_extractor.py  |   2 +-
 .../question_generator/steps/qa_generation.py |   2 +-
 .../steps/question_wikilinker.py              |   2 +-
 .../question_generator/steps/reasoning.py     |   2 +-
 pyproject.toml                                |   8 +-
 pyrightconfig.json                            |   3 +-
 pytest.ini                                    |   3 +
 requirements-dev.lock                         |   8 +-
 requirements.lock                             |   6 +-
 src/memorymarker/__init__.py                  |  38 ++++
 39 files changed, 435 insertions(+), 572 deletions(-)
 create mode 100644 .dockerignore
 create mode 100644 compose.sample.yml
 delete mode 100644 memorymarker/cli/document_selector.py
 delete mode 100644 memorymarker/document_providers/hydrator/main.py
 delete mode 100644 memorymarker/document_providers/hydrator/test_hydrator.py
 delete mode 100644 memorymarker/document_providers/omnivore_document.py
 rename memorymarker/{document_providers/hydrator => persister}/__init__.py (100%)
 rename memorymarker/{persist_questions => persister}/__snapshots__/test_markdown.ambr (100%)
 rename memorymarker/{persist_questions => persister}/markdown.py (100%)
 rename memorymarker/{persist_questions => persister}/test_markdown.py (97%)
 rename memorymarker/{persist_questions => question_generator}/__init__.py (100%)
 create mode 100644 memorymarker/question_generator/chunker.py
 rename memorymarker/question_generator/{example_repo_airtable.py => evaluation/example_repo.py} (100%)
 delete mode 100644 memorymarker/question_generator/main.py
 delete mode 100644 memorymarker/question_generator/pipeline_runner.py
 create mode 100644 pytest.ini
 create mode 100644 src/memorymarker/__init__.py

diff --git a/.copier-answers.yml b/.copier-answers.yml
index c4c61c5..158af1c 100644
--- a/.copier-answers.yml
+++ b/.copier-answers.yml
@@ -1,5 +1,5 @@
 # Changes here will be overwritten by Copier; NEVER EDIT MANUALLY
-_commit: ae075e2
+_commit: ac0611c
 _src_path: https://github.com/MartinBernstorff/nimble-python-template
 email: martinbernstorff@gmail.com
 full_name: Martin Bernstorff
@@ -7,6 +7,6 @@ github_username: MartinBernstorff
 package_name: memorymarker
 project_name: memorymarker
 project_slug: memorymarker
-python_version: '3.12'
+python_version: '3.11'
 release_docker_image: true
 release_package: true
diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..7dbf7e8
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,162 @@
+# macOS
+.DS_Store
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+*.whl
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+.ruff_cache
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# Tests
+.testmondata*
\ No newline at end of file
diff --git a/.env.sample b/.env.sample
index dc75ca4..8d17342 100644
--- a/.env.sample
+++ b/.env.sample
@@ -1,4 +1,4 @@
-OMNIVORE_API_KEY=""
-OPENAI_API_KEY=""
-ANTHROPIC_API_KEY=""
-AIRTABLE_PAT=""
\ No newline at end of file
+OMNIVORE_API_KEY=
+OPENAI_API_KEY=
+ANTHROPIC_API_KEY=
+MAX_N=
\ No newline at end of file
diff --git a/.github/Dockerfile.dev b/.github/Dockerfile.dev
index 271eca6..bd80cf9 100644
--- a/.github/Dockerfile.dev
+++ b/.github/Dockerfile.dev
@@ -1,4 +1,4 @@
-FROM python:3.12
+FROM python:3.11
 
 # Set the working directory to /app
 WORKDIR /app
@@ -7,16 +7,15 @@ ENV RYE_HOME="/opt/rye"
 ENV PATH="$RYE_HOME/shims:$PATH"
 ENV RYE_INSTALL_OPTION="--yes"
 ENV RYE_TOOLCHAIN="/usr/local/bin/python"
-ENV RYE_VERSION=0.26.0
+ENV RYE_VERSION=0.33.0
 
-RUN curl -sSf https://rye-up.com/get > /tmp/get-rye.sh
+RUN curl -sSf https://rye.astral.sh/get > /tmp/get-rye.sh
 RUN bash /tmp/get-rye.sh
 RUN rm /tmp/get-rye.sh
 RUN echo 'source "$HOME/.rye/env"' >> ~/.bashrc
 
 RUN rye config --set-bool behavior.use-uv=true
 RUN rye config --set-bool behavior.global-python=true
-RUN rye config --set default.dependency-operator="~="
 
 COPY Makefile ./
 COPY pyproject.toml ./
diff --git a/.github/workflows/docker_release.yml b/.github/workflows/docker_release.yml
index a49ebaa..81fe292 100644
--- a/.github/workflows/docker_release.yml
+++ b/.github/workflows/docker_release.yml
@@ -47,4 +47,4 @@ jobs:
           context: .
           push: true
           platforms: linux/amd64,linux/arm64
-          tags: ${{steps.meta.outputs.tags }}
\ No newline at end of file
+          tags: ${{ steps.meta.outputs.tags }}
\ No newline at end of file
diff --git a/.github/workflows/docker_smoketest.yml b/.github/workflows/docker_smoketest.yml
index 778ca79..897a470 100644
--- a/.github/workflows/docker_smoketest.yml
+++ b/.github/workflows/docker_smoketest.yml
@@ -10,7 +10,13 @@ jobs:
       - name: Checkout (GitHub)
         uses: actions/checkout@v4
 
+      - name: Get environment variables
+        run: |
+          echo "OMNIVORE_API_KEY=${{ secrets.OMNIVORE_API_KEY }}" >> .env
+          echo "OPENAI_API_KEY=${{ secrets.OPENAI_API_KEY }}" >> .env
+          echo "ANTHROPIC_API_KEY=${{ secrets.ANTHROPIC_API_KEY }}" >> .env
+
       - name: Run integration test
         shell: bash
         run: |
-          docker build -t memorymarker -f Dockerfile .
\ No newline at end of file
+          make docker-smoketest
diff --git a/.gitignore b/.gitignore
index 135d907..793da31 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,9 @@
 # macOS
 .DS_Store
 
+# IDEs
+.vscode
+
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
@@ -164,3 +167,7 @@ cython_debug/
 # Cache
 omnivore_cache/
 profile.html
+
+*.smoketest*
+compose.yml
+smoketest_output
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
index c4bc639..ca12b4f 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,4 +1,4 @@
-FROM python:3.12
+FROM python:3.11
 
 # Set the working directory to /app
 WORKDIR /app
@@ -7,16 +7,19 @@ ENV RYE_HOME="/opt/rye"
 ENV PATH="$RYE_HOME/shims:$PATH"
 ENV RYE_INSTALL_OPTION="--yes"
 ENV RYE_TOOLCHAIN="/usr/local/bin/python"
-ENV RYE_VERSION=0.26.0
+ENV RYE_VERSION=0.33.0
 
-RUN curl -sSf https://rye-up.com/get > /tmp/get-rye.sh
+RUN curl -sSf https://rye.astral.sh/get > /tmp/get-rye.sh
 RUN bash /tmp/get-rye.sh
 RUN rm /tmp/get-rye.sh
 RUN echo 'source "$HOME/.rye/env"' >> ~/.bashrc
 
 RUN rye config --set-bool behavior.use-uv=true
 RUN rye config --set-bool behavior.global-python=true
-RUN rye config --set default.dependency-operator="~="
 
-COPY . /app
-RUN make quicksync
+COPY pyproject.toml requirements.lock requirements-dev.lock ./
+RUN rye sync --no-lock
+
+COPY . /app/
+RUN rye sync --no-lock
+ENTRYPOINT ["python", "-m", "memorymarker"]
\ No newline at end of file
diff --git a/Makefile b/Makefile
index d487867..962c5a2 100644
--- a/Makefile
+++ b/Makefile
@@ -13,7 +13,7 @@ quicksync:
 	rye sync --no-lock
 
 test:
-	@rye run pytest --cov=$(SRC_PATH) $(SRC_PATH) --cov-report xml:.coverage.xml --cov-report lcov:.coverage.lcov --testmon
+	rye test
 
 test-with-coverage: 
 	@echo "––– Testing –––"
@@ -24,9 +24,7 @@ test-with-coverage:
 lint: ## Format code
 	@echo "––– Linting –––"
 	@rye run ruff format .
-	@rye run ruff . --fix --unsafe-fixes \
-		--extend-select F401 \
-		--extend-select F841
+	@rye run ruff . --fix --unsafe-fixes
 	@echo "✅✅✅ Lint ✅✅✅"
 
 types: ## Type-check code
@@ -46,12 +44,19 @@ docker_ci: ## Run all checks in docker
 	docker build -t memorymarker_ci -f .github/Dockerfile.dev .
 	docker run --env-file .env memorymarker_ci make validate_ci
 
-pr: ## Submit a PR
-	@lumberman sync --squash --automerge
-
 #########################
 # End template makefile #
 #########################
 
+docker-smoketest:
+	cp compose.sample.yml compose.smoketest.yml
+	perl -pi -e 's#YOUR_OUTPUT_DIR#./smoketest_output#' compose.smoketest.yml
+
+	cp .env .env.smoketest
+	echo "MAX_N=1" >> .env.smoketest
+
+	docker build . -t ghcr.io/martinbernstorff/memorymarker:latest
+	docker compose -f compose.smoketest.yml --env-file .env.smoketest up
+
 update-snapshots:
 	@rye run pytest --snapshot-update
diff --git a/README.md b/README.md
index 512039a..157a1b1 100644
--- a/README.md
+++ b/README.md
@@ -2,61 +2,49 @@
 
 # memorymarker
 
-[![PyPI](https://img.shields.io/pypi/v/memorymarker.svg)][pypi status]
-[![Python Version](https://img.shields.io/pypi/pyversions/memorymarker)][pypi status]
-[![documentation](https://github.com/martinbernstorff/memorymarker/actions/workflows/documentation.yml/badge.svg)][documentation]
-[![Tests](https://github.com/martinbernstorff/memorymarker/actions/workflows/tests.yml/badge.svg)][tests]
-[![Black](https://img.shields.io/badge/code%20style-black-000000.svg)][black]
-
-[pypi status]: https://pypi.org/project/memorymarker/
-[documentation]: https://martinbernstorff.github.io/memorymarker/
-[tests]: https://github.com/martinbernstorff/memorymarker/actions?workflow=Tests
-[black]: https://github.com/psf/black
+<!-- start short-description -->
 
+Highlighting does not aid memory. Questions do. But they take time. MemoryMarker turns your highlights into questions, so you can maintain traction at speed.
 
-<!-- start short-description -->
+Specifically, it takes highlights from [Omnivore](https://www.omnivore.app/) and turns them into markdown questions.
 
-TODO: Figure out github actions and add description
+To supercharge this, you can even ingest these questions into [Anki](https://apps.ankiweb.net/) using [Memium](https://github.com/MartinBernstorff/Memium).
 
 <!-- end short-description -->
 
-## Installation
+## Setup
 
-You can install `memorymarker` via [pip] from [PyPI]:
-
-```bash
-pip install memorymarker
-```
+A Docker image for Omnivore is continuously built and pushed to [ghcr.io/martinbernstorff/memorymarker](https://github.com/martinbernstorff/memorymarker/pkgs/container/memorymarker).
 
-[pip]: https://pip.pypa.io/en/stable/installing/
-[PyPI]: https://pypi.org/project/memorymarker/
+1. Install [Docker](https://docs.docker.com/get-docker/) or [Orbstack](https://orbstack.dev/)
 
-## Usage
+2. Update the api keys in the `.env` file
 
-TODO: Add minimal usage example
+3. Run the container:
 
-To see more examples, see the [documentation].
+```bash
+docker compose up
+```
 
 # 📖 Documentation
 
-| Documentation         |                                                          |
-| --------------------- | -------------------------------------------------------- |
+| Documentation          |                                                          |
+| ---------------------- | -------------------------------------------------------- |
 | 🔧 **[Installation]**  | Installation instructions on how to install this package |
 | 📖 **[Documentation]** | A minimal and developing documentation                   |
 | 👩‍💻 **[Tutorials]**     | Tutorials for using this package                         |
 | 🎛️ **[API Reference]** | API reference for this package                           |
 | 📚 **[FAQ]**           | Frequently asked questions                               |
 
-
 # 💬 Where to ask questions
 
-| Type                           |                        |
-| ------------------------------ | ---------------------- |
+| Type                            |                        |
+| ------------------------------- | ---------------------- |
 | 📚 **FAQ**                      | [FAQ]                  |
 | 🚨 **Bug Reports**              | [GitHub Issue Tracker] |
 | 🎁 **Feature Requests & Ideas** | [GitHub Issue Tracker] |
 | 👩‍💻 **Usage Questions**          | [GitHub Discussions]   |
-| 🗯 **General Discussion**       | [GitHub Discussions]   |
+| 🗯 **General Discussion**        | [GitHub Discussions]   |
 
 [Documentation]: https://martinbernstorff.github.io/memorymarker/index.html
 [Installation]: https://martinbernstorff.github.io/memorymarker/installation.html
@@ -65,5 +53,3 @@ To see more examples, see the [documentation].
 [FAQ]: https://martinbernstorff.github.io/memorymarker/faq.html
 [github issue tracker]: https://github.com/martinbernstorff/memorymarker/issues
 [github discussions]: https://github.com/martinbernstorff/memorymarker/discussions
-
-
diff --git a/compose.sample.yml b/compose.sample.yml
new file mode 100644
index 0000000..e507ac2
--- /dev/null
+++ b/compose.sample.yml
@@ -0,0 +1,11 @@
+services:
+  memorymarker:
+    image: ghcr.io/martinbernstorff/memorymarker:latest
+    container_name: memorymarker
+    volumes:
+      - YOUR_OUTPUT_DIR:/output
+    environment:
+      - OMNIVORE_API_KEY=${OMNIVORE_API_KEY}
+      - OPENAI_API_KEY=${OPENAI_API_KEY}
+      - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}
+      - MAX_N=${MAX_N}
diff --git a/lefthook.yml b/lefthook.yml
index 902d225..414fcd3 100644
--- a/lefthook.yml
+++ b/lefthook.yml
@@ -3,13 +3,9 @@
 
 pre-commit:
   commands:
-    format:
-      glob: "*.{py}"
-      run: rye run ruff format {staged_files}
-      stage_fixed: true
     lint:
       glob: "*.{py}"
-      run: rye run ruff --fix --extend-select F401 --extend-select F841 --extend-select B007 {staged_files}
+      run: make lint
       stage_fixed: true
 
 post-checkout:
diff --git a/memorymarker/__main__.py b/memorymarker/__main__.py
index 5148bfc..f1cbe02 100644
--- a/memorymarker/__main__.py
+++ b/memorymarker/__main__.py
@@ -2,10 +2,8 @@
 import datetime as dt
 import logging
 import os
-import time
 from dataclasses import dataclass
 from pathlib import Path
-from typing import Callable
 
 import coloredlogs
 import pytz
@@ -13,9 +11,9 @@
 from dotenv import load_dotenv
 from iterpy.iter import Iter
 
-from memorymarker.cli.document_selector import select_documents
 from memorymarker.document_providers.omnivore import Omnivore
-from memorymarker.persist_questions.markdown import highlight_group_to_file
+from memorymarker.persister.markdown import highlight_group_to_file
+from memorymarker.question_generator.chunker import chunk_highlights
 from memorymarker.question_generator.completers.anthropic_completer import (
     AnthropicCompleter,
 )
@@ -24,14 +22,11 @@
     OpenAIModelCompleter,
 )
 from memorymarker.question_generator.flows.question_flow import QuestionFlow
-from memorymarker.question_generator.main import chunk_highlights
 from memorymarker.question_generator.qa_responses import QAResponses
-from memorymarker.question_generator.steps.qa_extractor import QuestionExtractionStep
-from memorymarker.question_generator.steps.qa_generation import QuestionGenerationStep
-from memorymarker.question_generator.steps.question_wikilinker import (
-    QuestionWikilinkerStep,
-)
-from memorymarker.question_generator.steps.reasoning import ReasoningStep
+from memorymarker.question_generator.steps.qa_extractor import QuestionExtractor
+from memorymarker.question_generator.steps.qa_generation import QuestionGenerator
+from memorymarker.question_generator.steps.question_wikilinker import QuestionWikilinker
+from memorymarker.question_generator.steps.reasoning import Reasoning
 
 app = typer.Typer(no_args_is_help=True)
 
@@ -43,7 +38,9 @@ def get_api_key_from_env(env_var: str) -> str | None:
 
 
 @dataclass(frozen=True)
-class TimestampHandler:
+class TimestampRepository:
+    """Writes and gets a timestamp for syncing."""
+
     filepath: Path
 
     def update_timestamp(self) -> None:
@@ -52,54 +49,61 @@ def update_timestamp(self) -> None:
 
         self.filepath.write_text(dt.datetime.now(pytz.UTC).isoformat())
 
-    def get_timestamp(self) -> dt.datetime | None:
+    def get_timestamp(self) -> dt.datetime:
+        """Returns the last run timestamp or a value far in the past if it doesn't exist."""
         try:
             return dt.datetime.fromisoformat(self.filepath.read_text())
         except FileNotFoundError:
-            return None
-
-
-def sleep_and_run(sleep_time: int, run_func: Callable[[], None]) -> None:
-    time.sleep(sleep_time)
-    run_func()
+            logging.info(
+                "No last run timestamp found, generating questions for all highlights"
+            )
+            return dt.datetime(1970, 1, 1, tzinfo=pytz.UTC)
 
 
 @app.command()  # type: ignore
 def typer_cli(
-    omnivore_api_key: str = typer.Option(
-        None, help="Omnivore API key", envvar="OMNIVORE_API_KEY"
+    omnivore_api_key: str = typer.Argument(
+        help="Omnivore API key", envvar="OMNIVORE_API_KEY"
     ),
-    openai_api_key: str = typer.Option(
-        None, help="Anthropic API key", envvar="OPENAI_API_KEY"
+    openai_api_key: str = typer.Argument(
+        help="OpenAI API key", envvar="OPENAI_API_KEY"
     ),
-    anthropic_api_key: str = typer.Option(
-        None, help="Anthropic API key", envvar="ANTHROPIC_API_KEY"
+    anthropic_api_key: str = typer.Argument(
+        help="Anthropic API key", envvar="ANTHROPIC_API_KEY"
     ),
     output_dir: Path = typer.Argument(  # noqa: B008 # type: ignore
-        Path("questions"),
+        Path("/output"),
         help="Directory to save the generated questions to",
         file_okay=False,
         dir_okay=True,
         writable=True,
     ),
-    run_every: int = typer.Option(
-        None, help="How often to run the script in seconds", envvar="RUN_EVERY"
-    ),
     max_n: int = typer.Argument(
-        1, help="Maximum number of questions to generate from highlights"
+        help="Maximum number of questions in total", envvar="MAX_N"
     ),
     only_new: bool = typer.Option(
         True, help="Only generate questions from highlights since last run"
     ),
-    select: bool = typer.Option(
-        False, help="Prompt to select which documents to generate questions from"
+    log_level: str = typer.Option(
+        "INFO",
+        help="Log level",
+        case_sensitive=False,
+        show_default=True,
+        envvar="LOG_LEVEL",
     ),
 ) -> None:
+    logging.basicConfig(
+        level=logging.INFO,
+        format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
+        datefmt="%Y/&m/%d %H:%M:%S",
+        filename="main.log",
+    )
+    coloredlogs.install(level=log_level)  # type: ignore
+
     output_dir.mkdir(exist_ok=True, parents=True)
-    last_run_timestamper = TimestampHandler(output_dir / ".memorymarker")
-    last_run_timestamp = last_run_timestamper.get_timestamp()
 
     logging.info(f"MemoryMarker version {version('memorymarker')}")
+
     logging.info("Fetching documents")
     documents = (
         Omnivore(omnivore_api_key)
@@ -107,54 +111,51 @@ def typer_cli(
         .filter(lambda _: len(_.highlights) > 0)
     )
 
-    if select:
-        documents = select_documents(documents)
-
-    logging.info("Processing to highlights")
+    # Extract highlights from documents
     highlights = documents.map(lambda _: _.get_highlights()).flatten()
+    last_run_timestamper = TimestampRepository(output_dir / ".memorymarker")
 
     if only_new:
-        if not last_run_timestamp:
-            logging.info(
-                "No last run timestamp found, generating questions for all highlights"
-            )
-            last_run_timestamp = dt.datetime(1970, 1, 1, tzinfo=pytz.UTC)
-
+        last_run_timestamp = last_run_timestamper.get_timestamp()
         logging.info(
             f"Last run at UTC {last_run_timestamp.strftime('%Y-%m-%d %H:%M:%S')}"
         )
         highlights = highlights.filter(lambda _: _.updated_at > last_run_timestamp)
 
-        if highlights.count() == 0:
-            logging.info("No new highlights since last run")
-            if not run_every:
-                return
-
+    if highlights.count() == 0:
+        logging.info("No new highlights since last run")
+        return
     logging.info(f"Received {highlights.count()} new highlights")
+    logging.info(
+        f"max_n is set to {max_n}, so processing {min(max_n, highlights.count())} highlights"
+    )
 
+    # Chunk highlights for better reasoning and fewer duplicate questions
     logging.info("Generating questions from highlights...")
-    base_completer = AnthropicCompleter(
-        api_key=anthropic_api_key, model="claude-3-opus-20240229"
-    )
     chunked_highlights = (
         highlights.groupby(lambda _: _.source_document.title)
         .map(lambda _: chunk_highlights(_, 5))
         .flatten()
     )
+
+    # Generate questions
+    base_completer = AnthropicCompleter(
+        api_key=anthropic_api_key, model="claude-3-opus-20240229"
+    )
     questions = asyncio.run(
         QuestionFlow(
-            _name="simplified_reasoning",
+            name="simplified_reasoning",
             steps=(
-                ReasoningStep(completer=base_completer),
-                QuestionGenerationStep(completer=base_completer, n_questions=(1, 5)),
-                QuestionExtractionStep(
+                Reasoning(completer=base_completer),
+                QuestionGenerator(completer=base_completer, n_questions=(1, 5)),
+                QuestionExtractor(
                     completer=OpenAIModelCompleter(
                         api_key=openai_api_key,
                         model="gpt-3.5-turbo",
                         response_model=QAResponses,  # type: ignore
                     )
                 ),
-                QuestionWikilinkerStep(
+                QuestionWikilinker(
                     completer=OpenAICompleter(
                         api_key=os.getenv("OPENAI_API_KEY", "No OPENAI_API"),
                         model="gpt-4-turbo-preview",
@@ -164,36 +165,14 @@ def typer_cli(
         )(chunked_highlights[0:max_n])
     )
 
+    # Write to disk
     logging.info("Writing questions to markdown...")
-
-    highlight_groups = Iter(questions[0:max_n]).groupby(
-        lambda _: _.source_document.title
-    )
-    for group in highlight_groups:
+    for group in questions.groupby(lambda _: _.source_document.title):
         highlight_group_to_file(output_dir, group)
 
     last_run_timestamper.update_timestamp()
-    if run_every:
-        logging.info(f"Running every {run_every} seconds")
-        time.sleep(run_every)
-        logging.info("Running again")
-        typer_cli(
-            omnivore_api_key=omnivore_api_key,
-            output_dir=output_dir,
-            run_every=run_every,
-            max_n=max_n,
-            only_new=only_new,
-            select=select,
-        )
 
 
 if __name__ == "__main__":
     load_dotenv()
-    logging.basicConfig(
-        level=logging.INFO,
-        format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
-        datefmt="%Y/&m/%d %H:%M:%S",
-        filename="main.log",
-    )
-    coloredlogs.install(level="DEBUG")  # type: ignore
     app()
diff --git a/memorymarker/cli/document_selector.py b/memorymarker/cli/document_selector.py
deleted file mode 100644
index 08c3e19..0000000
--- a/memorymarker/cli/document_selector.py
+++ /dev/null
@@ -1,16 +0,0 @@
-from typing import TYPE_CHECKING
-
-import questionary
-
-if TYPE_CHECKING:
-    from iterpy.iter import Iter
-
-    from ..document_providers.omnivore_document import OmnivoreDocument
-
-
-def select_documents(docs: "Iter[OmnivoreDocument]") -> "Iter[OmnivoreDocument]":
-    doc_titles = docs.map(lambda d: d.title).to_list()
-    selected_doc_names = questionary.checkbox(
-        message="Select documents", choices=doc_titles
-    ).ask()
-    return docs.filter(lambda d: d.title in selected_doc_names)
diff --git a/memorymarker/document_providers/base.py b/memorymarker/document_providers/base.py
index 06a7a68..163e929 100644
--- a/memorymarker/document_providers/base.py
+++ b/memorymarker/document_providers/base.py
@@ -9,11 +9,13 @@
 
     from memorymarker.question_generator.reasoned_highlight import Highlights
 
-    from .omnivore_document import OmnivoreDocument
+    from .omnivore import OmnivoreDocument
 
 
 @dataclass(frozen=True)
 class OrphanHighlight:
+    """Highlight without a source document"""
+
     highlight: str
     uri: str
     title: str
diff --git a/memorymarker/document_providers/hydrator/main.py b/memorymarker/document_providers/hydrator/main.py
deleted file mode 100644
index 2109abc..0000000
--- a/memorymarker/document_providers/hydrator/main.py
+++ /dev/null
@@ -1,107 +0,0 @@
-import logging
-import re
-from typing import TYPE_CHECKING, Callable, Sequence
-from urllib.request import urlopen
-
-import requests
-from bs4 import BeautifulSoup, NavigableString, Tag
-from joblib import Memory
-
-from memorymarker.question_generator.reasoned_highlight import Highlights
-
-if TYPE_CHECKING:
-    from memorymarker.document_providers.base import OrphanHighlight
-
-memory = Memory(".soup_download_cache", verbose=0)
-
-
-@memory.cache()  # type: ignore
-def download_soup_from_url(url: str) -> BeautifulSoup:
-    # Send HTTP request to URL and save the response from server in a response object called r
-    r = requests.get(url)
-
-    # Create a BeautifulSoup object and specify the parser
-    soup = BeautifulSoup(r.text, "html.parser")
-    return soup
-
-
-class ContextParser:
-    @staticmethod
-    def get_highlight_context(
-        soup: BeautifulSoup,
-        highlight: str,
-        n_chars_before: int = 100,
-        n_chars_after: int = 100,
-    ) -> str:
-        highlight_selection = soup.find(text=re.compile(highlight))
-
-        if highlight_selection is None:
-            logging.info(f"Could not find highlight {highlight} in {soup.title}")
-            return ""
-
-        highlight_container: Tag = highlight_selection.parent.parent  # type: ignore
-
-        context_strings: list[str] = []
-
-        for child in highlight_container.descendants:
-            if isinstance(child, NavigableString):
-                context_strings.append(str(child))
-
-        context = " ".join(context_strings)
-
-        context = ContextParser._select_context_slice(
-            highlight=highlight,
-            n_chars_before=n_chars_before,
-            n_chars_after=n_chars_after,
-            context=context,
-        )
-        return context
-
-    @staticmethod
-    def _select_context_slice(
-        highlight: str, n_chars_before: int, n_chars_after: int, context: str
-    ) -> str:
-        highlight_index = context.find(highlight)
-        context_start_index = max(0, highlight_index - n_chars_before)
-        context_end_index = min(
-            len(context), highlight_index + len(highlight) + n_chars_after
-        )
-
-        return context[context_start_index:context_end_index]
-
-
-class HighlightHydrator:
-    def __init__(self, soup_downloader: Callable[[str], BeautifulSoup]) -> None:
-        self.soup_downloader = soup_downloader
-
-    def hydrate_highlights(
-        self, highlights: Sequence["OrphanHighlight"]
-    ) -> Sequence[Highlights | None]:
-        hydrated_highlights: list[Highlights | None] = []
-        for highlight in highlights:
-            try:
-                page = urlopen(highlight.uri)
-            except Exception:
-                logging.info(f"Could not open {highlight.uri}")
-                hydrated_highlights.append(None)
-                continue
-
-            soup = self.soup_downloader(page)
-            context = ContextParser.get_highlight_context(
-                soup=soup, highlight=highlight.highlight
-            )
-            hydrated_highlights.append(
-                Highlights(
-                    highlighted_text=highlight.highlight,
-                    prefix=context[:100],
-                    suffix=context[-100:],
-                )  # type: ignore
-            )
-
-        return hydrated_highlights
-
-
-if __name__ == "__main__":
-    result = download_soup_from_url(
-        "https://www.gutenberg.org/files/2701/2701-h/2701-h.htm"
-    )
diff --git a/memorymarker/document_providers/hydrator/test_hydrator.py b/memorymarker/document_providers/hydrator/test_hydrator.py
deleted file mode 100644
index bb35592..0000000
--- a/memorymarker/document_providers/hydrator/test_hydrator.py
+++ /dev/null
@@ -1,34 +0,0 @@
-from bs4 import BeautifulSoup
-
-from memorymarker.document_providers.hydrator.main import ContextParser
-
-
-def test_context_parser():
-    input_soup = BeautifulSoup(
-        """
-    <html>
-        <body>
-            <p>Some text</p>
-            <p>Some more text</p>
-            <p>Even more text</p>
-        </body>
-    </html>
-    """,
-        "html.parser",
-    )
-
-    expected_output = "\n Some text \n Some more text \n Even more text \n"
-    context = ContextParser.get_highlight_context(soup=input_soup, highlight="more")
-    assert context == expected_output
-
-
-def test_context_slicing():
-    highlight = "highlight"
-    context = "54321highlight12345"
-
-    assert (
-        ContextParser._select_context_slice(  # type: ignore
-            highlight=highlight, n_chars_before=1, n_chars_after=1, context=context
-        )
-        == "1highlight1"
-    )
diff --git a/memorymarker/document_providers/omnivore.py b/memorymarker/document_providers/omnivore.py
index 91df260..775d437 100644
--- a/memorymarker/document_providers/omnivore.py
+++ b/memorymarker/document_providers/omnivore.py
@@ -1,14 +1,54 @@
+import os
 from dataclasses import dataclass
-from typing import Mapping
+from typing import Any, Mapping, Sequence
 
 from iterpy.iter import Iter
 from omnivoreql import OmnivoreQL
+from pydantic import BaseModel
 
-from memorymarker.document_providers.omnivore_document import OmnivoreDocument
+from memorymarker.question_generator.reasoned_highlight import (
+    Highlights,
+    SourceDocument,
+)
 
 from .base import DocumentProvider
 
 
+def _empty_string_if_none(value: str | None) -> str:
+    return value or ""
+
+
+class OmnivoreDocument(BaseModel):
+    title: str
+    uri: str
+    slug: str
+    highlights: Sequence[Mapping[str, Any]]
+
+    def _parse_highlight(self, highlight: Mapping[str, str]) -> Highlights | None:
+        if "quote" not in highlight or highlight["quote"] is None:  # type: ignore
+            return None
+
+        return Highlights(
+            source_document=SourceDocument(
+                title=self.title,
+                uri=f"https://omnivore.app/me/{self.slug}#{highlight['id']}",
+            ),
+            pipeline_name="",
+            reasoning_prompt="",
+            reasoning="",
+            qa_string="",
+            question_answer_pairs=[],
+            highlighted_text=highlight["quote"],
+            prefix=_empty_string_if_none(highlight["prefix"]),
+            suffix=_empty_string_if_none(highlight["suffix"]),
+            updated_at=highlight["updatedAt"],  # type: ignore # Will be recast on init.
+        )
+
+    def get_highlights(self) -> Iter[Highlights]:
+        highlights = Iter(self.highlights).map(self._parse_highlight)
+        return highlights.filter(lambda _: _ is not None)  # type: ignore
+
+
 @dataclass
 class Omnivore(DocumentProvider):
     api_key: str
@@ -26,7 +66,7 @@ def _parse_doc(self, document: Mapping[str, str]) -> OmnivoreDocument:
 
     def get_documents(self) -> Iter[OmnivoreDocument]:
         documents = (
-            Iter(self.client.get_articles(limit=1000)["search"]["edges"])
+            Iter(self.client.get_articles(limit=100)["search"]["edges"])
             .map(lambda a: a["node"])
             .map(self._parse_doc)
             .flatten()
diff --git a/memorymarker/document_providers/omnivore_document.py b/memorymarker/document_providers/omnivore_document.py
deleted file mode 100644
index 7646040..0000000
--- a/memorymarker/document_providers/omnivore_document.py
+++ /dev/null
@@ -1,44 +0,0 @@
-from typing import Any, Mapping, Sequence
-
-from iterpy.iter import Iter
-from pydantic import BaseModel
-
-from memorymarker.question_generator.reasoned_highlight import (
-    Highlights,
-    SourceDocument,
-)
-
-
-def empty_string_if_none(value: str | None) -> str:
-    return value or ""
-
-
-class OmnivoreDocument(BaseModel):
-    title: str
-    uri: str
-    slug: str
-    highlights: Sequence[Mapping[str, Any]]
-
-    def _parse_highlight(self, highlight: Mapping[str, str]) -> Highlights | None:
-        if "quote" not in highlight or highlight["quote"] is None:  # type: ignore
-            return None
-
-        return Highlights(
-            source_document=SourceDocument(
-                title=self.title,
-                uri=f"https://omnivore.app/me/{self.slug}#{highlight["id"]}",
-            ),
-            pipeline_name="",
-            reasoning_prompt="",
-            reasoning="",
-            qa_string="",
-            question_answer_pairs=[],
-            highlighted_text=highlight["quote"],
-            prefix=empty_string_if_none(highlight["prefix"]),
-            suffix=empty_string_if_none(highlight["suffix"]),
-            updated_at=highlight["updatedAt"],  # type: ignore # Will be recast on init.
-        )
-
-    def get_highlights(self) -> Iter[Highlights]:
-        highlights = Iter(self.highlights).map(self._parse_highlight)
-        return highlights.filter(lambda _: _ is not None)  # type: ignore
diff --git a/memorymarker/document_providers/hydrator/__init__.py b/memorymarker/persister/__init__.py
similarity index 100%
rename from memorymarker/document_providers/hydrator/__init__.py
rename to memorymarker/persister/__init__.py
diff --git a/memorymarker/persist_questions/__snapshots__/test_markdown.ambr b/memorymarker/persister/__snapshots__/test_markdown.ambr
similarity index 100%
rename from memorymarker/persist_questions/__snapshots__/test_markdown.ambr
rename to memorymarker/persister/__snapshots__/test_markdown.ambr
diff --git a/memorymarker/persist_questions/markdown.py b/memorymarker/persister/markdown.py
similarity index 100%
rename from memorymarker/persist_questions/markdown.py
rename to memorymarker/persister/markdown.py
diff --git a/memorymarker/persist_questions/test_markdown.py b/memorymarker/persister/test_markdown.py
similarity index 97%
rename from memorymarker/persist_questions/test_markdown.py
rename to memorymarker/persister/test_markdown.py
index 5a9c6f3..46c7dee 100644
--- a/memorymarker/persist_questions/test_markdown.py
+++ b/memorymarker/persister/test_markdown.py
@@ -3,7 +3,7 @@
 
 import pytest
 
-import memorymarker.persist_questions.markdown as markdown
+import memorymarker.persister.markdown as markdown
 from memorymarker.question_generator.qa_responses import QAPrompt
 from memorymarker.question_generator.reasoned_highlight import (
     Highlights,
diff --git a/memorymarker/persist_questions/__init__.py b/memorymarker/question_generator/__init__.py
similarity index 100%
rename from memorymarker/persist_questions/__init__.py
rename to memorymarker/question_generator/__init__.py
diff --git a/memorymarker/question_generator/chunker.py b/memorymarker/question_generator/chunker.py
new file mode 100644
index 0000000..42a8ccf
--- /dev/null
+++ b/memorymarker/question_generator/chunker.py
@@ -0,0 +1,28 @@
+from typing import TYPE_CHECKING, Sequence
+
+from joblib import Memory
+
+if TYPE_CHECKING:
+    from memorymarker.question_generator.reasoned_highlight import Highlights
+
+omnivore_cache = Memory(".cache/omnivore")
+
+
+def chunk_highlights(
+    group: tuple[str, Sequence["Highlights"]], chunk_size: int
+) -> Sequence["Highlights"]:
+    groups: Sequence["Highlights"] = []
+
+    for i in range(0, len(group[1]), 5):
+        subset: Sequence["Highlights"] = group[1][i : i + chunk_size]
+        combined_text = "\n---\n".join(
+            f"> {_.prefix}<HIGHLIGHT>{_.highlighted_text}</HIGHLIGHT>{_.suffix}"
+            for _ in subset
+        )
+        new_highlight = subset[-1]
+        new_highlight.highlighted_text = combined_text
+        new_highlight.prefix = ""
+        new_highlight.suffix = ""
+        groups.append(new_highlight)
+
+    return groups
diff --git a/memorymarker/question_generator/example_repo_airtable.py b/memorymarker/question_generator/evaluation/example_repo.py
similarity index 100%
rename from memorymarker/question_generator/example_repo_airtable.py
rename to memorymarker/question_generator/evaluation/example_repo.py
diff --git a/memorymarker/question_generator/flows/question_flow.py b/memorymarker/question_generator/flows/question_flow.py
index f72d44f..4b5da4d 100644
--- a/memorymarker/question_generator/flows/question_flow.py
+++ b/memorymarker/question_generator/flows/question_flow.py
@@ -13,7 +13,7 @@
 
 @dataclass(frozen=True)
 class QuestionFlow:
-    _name: str
+    name: str
     steps: tuple["FlowStep"]
 
     async def _process_item(self, highlight: "Highlights") -> "Highlights":
@@ -21,7 +21,7 @@ async def _process_item(self, highlight: "Highlights") -> "Highlights":
         async with sem:
             for step in self.steps:
                 result = await step(highlight)
-        result.pipeline_name = self.name
+        result.pipeline_name = self.identity
         return result
 
     async def __call__(self, highlights: Iter["Highlights"]) -> Iter["Highlights"]:
@@ -32,6 +32,6 @@ async def __call__(self, highlights: Iter["Highlights"]) -> Iter["Highlights"]:
         return Iter(results)
 
     @property
-    def name(self) -> str:
+    def identity(self) -> str:
         step_identites = "_".join(step.identity() for step in self.steps)
-        return f"{self._name}_{step_identites}"
+        return f"{self.name}_{step_identites}"
diff --git a/memorymarker/question_generator/main.py b/memorymarker/question_generator/main.py
deleted file mode 100644
index 10e5725..0000000
--- a/memorymarker/question_generator/main.py
+++ /dev/null
@@ -1,168 +0,0 @@
-import asyncio
-import logging
-import os
-from dataclasses import dataclass
-from typing import TYPE_CHECKING, Sequence
-
-import coloredlogs
-from iterpy.iter import Iter
-from joblib import Memory
-
-from memorymarker.document_providers.omnivore import Omnivore
-from memorymarker.question_generator.completers.anthropic_completer import (
-    AnthropicCompleter,
-)
-from memorymarker.question_generator.completers.openai_completer import (
-    OpenAICompleter,
-    OpenAIModelCompleter,
-)
-from memorymarker.question_generator.example_repo_airtable import (
-    AirtableExampleRepo,
-    PipelineHighlightIdentity,
-    update_repository,
-)
-from memorymarker.question_generator.flows.question_flow import QuestionFlow
-from memorymarker.question_generator.pipeline_runner import run_pipelines
-from memorymarker.question_generator.qa_responses import QAResponses
-from memorymarker.question_generator.steps.qa_extractor import QuestionExtractionStep
-from memorymarker.question_generator.steps.qa_generation import QuestionGenerationStep
-from memorymarker.question_generator.steps.question_wikilinker import (
-    QuestionWikilinkerStep,
-)
-from memorymarker.question_generator.steps.reasoning import ReasoningStep
-
-if TYPE_CHECKING:
-    from memorymarker.question_generator.reasoned_highlight import Highlights
-
-omnivore_cache = Memory(".cache/omnivore")
-
-
-@dataclass(frozen=True)
-class HighlightWithPipeline(PipelineHighlightIdentity):
-    highlight: "Highlights"
-    pipeline: QuestionFlow
-
-    def identity(self) -> int:
-        return self.pipeline_highlight_id(
-            self.pipeline.name, self.highlight.highlighted_text
-        )
-
-
-def _generate_highlight_pipeline_pairs(
-    selected_highlights: Iter["Highlights"], pipelines: Sequence[QuestionFlow]
-) -> Iter[HighlightWithPipeline]:
-    return Iter(
-        [
-            HighlightWithPipeline(highlight=highlight, pipeline=pipeline)
-            for pipeline in pipelines
-            for highlight in selected_highlights.to_list()
-        ]
-    )
-
-
-@omnivore_cache.cache()  # type: ignore
-def _select_highlights_from_omnivore() -> Iter["Highlights"]:
-    highlights = (
-        Omnivore(
-            api_key=os.getenv("OMNIVORE_API_KEY", "No OMNIVORE_API_KEY in environment")
-        )
-        .get_documents()
-        .map(lambda _: _.get_highlights().to_list())
-        .flatten()
-    )
-
-    return highlights
-
-
-def chunk_highlights(
-    group: tuple[str, Sequence["Highlights"]], chunk_size: int
-) -> Sequence["Highlights"]:
-    groups: Sequence["Highlights"] = []
-
-    for i in range(0, len(group[1]), 5):
-        subset: Sequence["Highlights"] = group[1][i : i + chunk_size]
-        combined_text = "\n---\n".join(
-            f"> {_.prefix}<HIGHLIGHT>{_.highlighted_text}</HIGHLIGHT>{_.suffix}"
-            for _ in subset
-        )
-        new_highlight = subset[-1]
-        new_highlight.highlighted_text = combined_text
-        new_highlight.prefix = ""
-        new_highlight.suffix = ""
-        groups.append(new_highlight)
-
-    return groups
-
-
-async def main():
-    repository = AirtableExampleRepo()
-    # content_filter = {
-    #     "drenge og mænd ikke har nogen værdi",
-    #     "The quality of a model",
-    #     "Dependency injection is not effective if",
-    #     "The essence of writing code then is to internalize the problem domain",
-    #     "stack is a data structure that contains a collection of elements where you can add and delete elements from just one end ",
-    #     "A semaphore manages an internal counter",
-    # }
-    document_titles = {"Singly Linked List"}
-    input_highlights = _select_highlights_from_omnivore()
-    selected_highlights = input_highlights.filter(
-        lambda _: any(title in _.source_document.title for title in document_titles)
-    )
-
-    grouped_highlights = (
-        selected_highlights.groupby(lambda _: _.source_document.title)
-        .map(lambda group: chunk_highlights(group=group, chunk_size=5))
-        .flatten()
-    )
-
-    old_example_hashes = (
-        Iter(repository.get_existing_examples()).map(lambda _: _.__hash__()).to_list()
-    )
-
-    base_completer = AnthropicCompleter(
-        api_key=os.getenv("ANTHROPIC_API_KEY", None), model="claude-3-opus-20240229"
-    )
-    # base_completer = OpenAICompleter(
-    #     api_key=os.getenv("OPENAI_API_KEY", None), model="gpt-4-turbo-preview"
-    # )
-    new_highlights = _generate_highlight_pipeline_pairs(
-        grouped_highlights,
-        [
-            QuestionFlow(
-                _name="chunked_reasoning_with_wikilinks",
-                steps=(
-                    ReasoningStep(completer=base_completer),
-                    QuestionGenerationStep(
-                        completer=base_completer, n_questions=(1, 5)
-                    ),
-                    QuestionExtractionStep(
-                        completer=OpenAIModelCompleter(
-                            api_key=os.getenv("OPENAI_API_KEY", "No OPENAI_API"),
-                            model="gpt-3.5-turbo",
-                            response_model=QAResponses,  # type: ignore
-                        )
-                    ),
-                    QuestionWikilinkerStep(
-                        completer=OpenAICompleter(
-                            api_key=os.getenv("OPENAI_API_KEY", "No OPENAI_API"),
-                            model="gpt-4-turbo-preview",
-                        )
-                    ),
-                ),
-            )
-        ],
-    ).filter(lambda pair: pair.identity() not in old_example_hashes)
-
-    new_responses = await run_pipelines(new_highlights)
-    update_repository(new_responses, repository=repository)
-
-
-if __name__ == "__main__":
-    coloredlogs.install(  # type: ignore
-        level=logging.INFO,
-        format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
-        datefmt="%Y/%m/%d %H:%M:%S",
-        file_name="tester.log",
-    )
-    asyncio.run(main())
diff --git a/memorymarker/question_generator/pipeline_runner.py b/memorymarker/question_generator/pipeline_runner.py
deleted file mode 100644
index 7d1d274..0000000
--- a/memorymarker/question_generator/pipeline_runner.py
+++ /dev/null
@@ -1,38 +0,0 @@
-import asyncio
-from typing import TYPE_CHECKING, Mapping, Sequence
-
-from iterpy.iter import Iter
-
-if TYPE_CHECKING:
-    from memorymarker.question_generator.flows.question_flow import QuestionFlow
-    from memorymarker.question_generator.main import HighlightWithPipeline
-    from memorymarker.question_generator.reasoned_highlight import Highlights
-
-
-async def run_pipeline(
-    pipeline_name: str,
-    pipelinename2pipeline: Mapping[str, "QuestionFlow"],
-    highlights: Sequence["Highlights"],
-) -> Iter["Highlights"]:
-    pipeline = pipelinename2pipeline[pipeline_name]
-    prompts = await pipeline(Iter(highlights))
-    return prompts
-
-
-async def run_pipelines(pairs: Iter["HighlightWithPipeline"]) -> Iter["Highlights"]:
-    pipelinename2pipeline = {pair.pipeline.name: pair.pipeline for pair in pairs}
-    pipelines_with_highlights = pairs.groupby(lambda _: _.pipeline.name)
-
-    examples = await asyncio.gather(
-        *[
-            run_pipeline(
-                pipeline_name=pipeline_name,
-                pipelinename2pipeline=pipelinename2pipeline,
-                highlights=[pair.highlight],
-            )
-            for pipeline_name, pairs_instance in pipelines_with_highlights
-            for pair in pairs_instance
-        ]
-    )
-
-    return Iter(examples).flatten()
diff --git a/memorymarker/question_generator/steps/qa_extractor.py b/memorymarker/question_generator/steps/qa_extractor.py
index e1e208e..19288d2 100644
--- a/memorymarker/question_generator/steps/qa_extractor.py
+++ b/memorymarker/question_generator/steps/qa_extractor.py
@@ -12,7 +12,7 @@
 
 
 @dataclass(frozen=True)
-class QuestionExtractionStep(FlowStep):
+class QuestionExtractor(FlowStep):
     completer: "ModelCompleter"
 
     def identity(self) -> str:
diff --git a/memorymarker/question_generator/steps/qa_generation.py b/memorymarker/question_generator/steps/qa_generation.py
index bc6d2fa..f7bc56c 100644
--- a/memorymarker/question_generator/steps/qa_generation.py
+++ b/memorymarker/question_generator/steps/qa_generation.py
@@ -9,7 +9,7 @@
 
 
 @dataclass(frozen=True)
-class QuestionGenerationStep(FlowStep):
+class QuestionGenerator(FlowStep):
     completer: "Completer"
     n_questions: tuple[int, int]
     prompt = """You are generating interesting questions. The questions should:
diff --git a/memorymarker/question_generator/steps/question_wikilinker.py b/memorymarker/question_generator/steps/question_wikilinker.py
index d53bb95..9a91898 100644
--- a/memorymarker/question_generator/steps/question_wikilinker.py
+++ b/memorymarker/question_generator/steps/question_wikilinker.py
@@ -11,7 +11,7 @@
 
 
 @dataclass(frozen=True)
-class QuestionWikilinkerStep(FlowStep):
+class QuestionWikilinker(FlowStep):
     completer: "Completer"
     prompt = """In the following, identify the important, domain-specific terms. Then, capitalise them, and surround them with wikilinks. There can be more than one important term. Identify terms as you would in a wikipedia article.
 
diff --git a/memorymarker/question_generator/steps/reasoning.py b/memorymarker/question_generator/steps/reasoning.py
index d43353d..23192b2 100644
--- a/memorymarker/question_generator/steps/reasoning.py
+++ b/memorymarker/question_generator/steps/reasoning.py
@@ -9,7 +9,7 @@
 
 
 @dataclass(frozen=True)
-class ReasoningStep(FlowStep):
+class Reasoning(FlowStep):
     completer: "Completer"
     prompt = """Document title: {document_title}
 
diff --git a/pyproject.toml b/pyproject.toml
index 9e9f488..6944c6d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 version = "0.22.1"
-requires-python = ">=3.12"
+requires-python = ">=3.11"
 name = "memorymarker"
 description = "memorymarker"
 dependencies = [
@@ -8,7 +8,7 @@ dependencies = [
     "instructor>=0.6.2",
     "iterpy>=1.6.0",
     "joblib>=1.3.2",
-    "omnivoreql>=0.2.1",
+    "omnivoreql>=0.3.3",
     "openai>=1.13.3",
     "pydantic>=2.6.2",
     "python-dotenv>=1.0.1",
@@ -19,7 +19,7 @@ dependencies = [
     "anthropic>=0.21.3",
 ]
 authors = [{ name = "Martin Bernstorff", email = "martinbernstorff@gmail.com" }]
-classifiers = ["Programming Language :: Python :: 3.12"]
+classifiers = ["Programming Language :: Python :: 3.11"]
 
 [project.license]
 file = "LICENSE"
@@ -31,7 +31,7 @@ content-type = "text/markdown"
 [tool]
 rye = { dev-dependencies = [
     "diff-cover==8.0.3",
-    "pyright==1.1.350",
+    "pyright>=1.1.368",
     "pytest>=7.4.0",
     "pytest-cov==4.1.0",
     "pytest-xdist==3.5.0",
diff --git a/pyrightconfig.json b/pyrightconfig.json
index 77723b3..1e66e30 100644
--- a/pyrightconfig.json
+++ b/pyrightconfig.json
@@ -7,7 +7,8 @@
     "reportMissingTypeStubs": false,
     "reportMissingParameterType": false, // Covered by ruff ANN
     "reportUnknownParameterType": false, // Covered by ruff ANN
-    "reportPrivateUsage": false, // Covered by Ruff PLC
+    "reportUnusedExpression": false, // Covered by ruff B018
+    "reportPrivateUsage": false, // Covered by ruff PLC2701
     "reportUntypedFunctionDecorator": false,
     "reportUnusedImport": "none", // Covered by ruff
     "typeCheckingMode": "strict"
diff --git a/pytest.ini b/pytest.ini
new file mode 100644
index 0000000..83a8caf
--- /dev/null
+++ b/pytest.ini
@@ -0,0 +1,3 @@
+[pytest]
+python_files = *.py
+python_functions = test_* _should_*
\ No newline at end of file
diff --git a/requirements-dev.lock b/requirements-dev.lock
index d5aad81..badd755 100644
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@@ -18,6 +18,7 @@ anthropic==0.21.3
     # via memorymarker
 anyio==3.7.1
     # via anthropic
+    # via gql
     # via httpx
     # via openai
 attrs==23.2.0
@@ -56,7 +57,7 @@ frozenlist==1.4.1
     # via aiosignal
 fsspec==2024.3.1
     # via huggingface-hub
-gql==3.4.1
+gql==3.5.0
     # via omnivoreql
 graphql-core==3.2.3
     # via gql
@@ -99,7 +100,7 @@ multidict==6.0.5
     # via yarl
 nodeenv==1.8.0
     # via pyright
-omnivoreql==0.2.1
+omnivoreql==0.3.3
     # via memorymarker
 openai==1.13.3
     # via instructor
@@ -126,7 +127,7 @@ pygments==2.17.2
     # via diff-cover
     # via rich
 pyinstrument==4.6.2
-pyright==1.1.350
+pyright==1.1.368
 pytest==7.4.4
     # via pytest-asyncio
     # via pytest-cov
@@ -143,6 +144,7 @@ pytest-testmon==2.1.0
 pytest-xdist==3.5.0
 python-dotenv==1.0.1
     # via memorymarker
+    # via omnivoreql
     # via pytest-dotenv
 pytz==2024.1
     # via memorymarker
diff --git a/requirements.lock b/requirements.lock
index 3f4a979..7d1df40 100644
--- a/requirements.lock
+++ b/requirements.lock
@@ -18,6 +18,7 @@ anthropic==0.21.3
     # via memorymarker
 anyio==3.7.1
     # via anthropic
+    # via gql
     # via httpx
     # via openai
 attrs==23.2.0
@@ -48,7 +49,7 @@ frozenlist==1.4.1
     # via aiosignal
 fsspec==2024.3.1
     # via huggingface-hub
-gql==3.4.1
+gql==3.5.0
     # via omnivoreql
 graphql-core==3.2.3
     # via gql
@@ -81,7 +82,7 @@ mdurl==0.1.2
 multidict==6.0.5
     # via aiohttp
     # via yarl
-omnivoreql==0.2.1
+omnivoreql==0.3.3
     # via memorymarker
 openai==1.13.3
     # via instructor
@@ -101,6 +102,7 @@ pygments==2.17.2
     # via rich
 python-dotenv==1.0.1
     # via memorymarker
+    # via omnivoreql
 pytz==2024.1
     # via memorymarker
 pyyaml==6.0.1
diff --git a/src/memorymarker/__init__.py b/src/memorymarker/__init__.py
new file mode 100644
index 0000000..40b6cf6
--- /dev/null
+++ b/src/memorymarker/__init__.py
@@ -0,0 +1,38 @@
+# ############################
+# ## NOTES ON IMPORT FORMAT ##
+# ############################
+#
+# From https://github.com/dagster-io/dagster/blob/master/python_modules/dagster/dagster/__init__.py
+#
+# This file defines your package's public API. Imports need to be structured/formatted so as to to ensure
+# that the broadest possible set of static analyzers understand your_package's public API as intended.
+# The below guidelines ensure this is the case.
+#
+# (1) All imports in this module intended to define exported symbols should be of the form `from
+# your_package.foo import X as X`. This is because imported symbols are not by default considered public
+# by static analyzers. The redundant alias form `import X as X` overwrites the private imported `X`
+# with a public `X` bound to the same value. It is also possible to expose `X` as public by listing
+# it inside `__all__`, but the redundant alias form is preferred here due to easier maintainability.
+
+# (2) All imports should target the module in which a symbol is actually defined, rather than a
+# container module where it is imported. This rule also derives from the default private status of
+# imported symbols. So long as there is a private import somewhere in the import chain leading from
+# an import to its definition, some linters will be triggered (e.g. pyright). For example, the
+# following results in a linter error when using your_package as a third-party library:
+
+#     ### your_package/foo/bar.py
+#     BAR = "BAR"
+#
+#     ### your_package/foo/__init__.py
+#     from .bar import BAR  # BAR is imported so it is not part of your_package.foo public interface
+#     FOO = "FOO"
+#
+#     ### your_package/__init__.py
+#     from .foo import FOO, BAR  # importing BAR is importing a private symbol from your_package.foo
+#     __all__ = ["FOO", "BAR"]
+#
+#     ### some_user_code.py
+#     # from your_package import BAR  # linter error even though `BAR` is in `your_package.__all__`!
+#
+# We could get around this by always remembering to use the `from .foo import X as X` form in
+# containers, but it is simpler to just import directly from the defining module.