Add testing example

autoblocksai · Feb 22, 2024 · 444faa8 · 444faa8
1 parent f6d5e1a
commit 444faa8
Show file tree

Hide file tree

Showing 21 changed files with 1,336 additions and 0 deletions.
diff --git a/.github/workflows/autoblocks-testing.yml b/.github/workflows/autoblocks-testing.yml
@@ -0,0 +1,40 @@
+name: Autoblocks Testing
+
+on:
+  push: # Run on every push.
+  schedule: # Run every day at ~7:17am PST.
+    - cron: '17 15 * * *'
+
+jobs:
+  py:
+    runs-on: ubuntu-latest
+
+    defaults:
+      run:
+        shell: bash
+        working-directory: Python/testing-sdk
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Setup python
+        uses: actions/setup-python@v5
+        with:
+          python-version-file: '3.11'
+
+      - name: Install poetry
+        run: curl -sSL https://install.python-poetry.org | python3 -
+
+      - name: Check pyproject.toml & poetry.lock are in sync
+        run: poetry lock --check
+
+      - name: Install dependencies
+        run: poetry install
+
+      - name: Run Autoblocks tests
+        run: npx autoblocks testing exec -- poetry run start
+        env:
+          # Add your OpenAI API key & Autoblocks API key to the repository secrets.
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          AUTOBLOCKS_API_KEY: ${{ secrets.AUTOBLOCKS_API_KEY }}
diff --git a/Python/testing-sdk/README.md b/Python/testing-sdk/README.md
@@ -0,0 +1,117 @@
+<p align="center">
+  <img src="https://app.autoblocks.ai/images/logo.png" width="150px">
+</p>
+<p align="center">
+  📚
+  <a href="https://docs.autoblocks.ai/">Documentation</a>
+  &nbsp;
+  •
+  &nbsp;
+  🖥️
+  <a href="https://app.autoblocks.ai/">Application</a>
+  &nbsp;
+  •
+  &nbsp;
+  ☎️
+  <a href="https://cal.frontapp.com/autoblocks/adam/autoblocks-engineering/">Meet with Autoblocks Engineering</a>
+</p>
+
+## Setup
+
+### Install [`poetry`](https://python-poetry.org/)
+
+```bash
+curl -sSL https://install.python-poetry.org | python3 -
+```
+
+### Install [`npm`](https://docs.npmjs.com/about-npm)
+
+> **_NOTE:_** You might already have this installed. Check with `npm -v`.
+
+If you don't have `node` or `npm` installed, we recommend you use `nvm` to do so:
+
+#### Install [`nvm`](https://github.com/nvm-sh/nvm)
+
+```bash
+curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.7/install.sh | bash
+```
+
+#### Install `node` and `npm`
+
+```bash
+nvm install node
+```
+
+#### Set the default version when starting a new shell
+
+```bash
+nvm alias default node
+```
+
+### Install dependencies
+
+```
+poetry install
+```
+
+## Run Autoblocks tests
+
+### Set your Autoblocks API key
+
+Retrieve your **local testing API key** from the [settings page](https://app.autoblocks.ai/settings/api-keys) and set it as an environment variable:
+
+```bash
+export AUTOBLOCKS_API_KEY=...
+```
+
+### Set your OpenAI API key
+
+```bash
+export OPENAI_API_KEY=...
+```
+
+### Run the tests
+
+```bash
+npx autoblocks testing exec -m "my first run" -- poetry run start
+```
+
+You should see something like:
+
+<img width="1668" alt="Screenshot 2024-02-22 at 1 23 50 PM" src="https://github.com/autoblocksai/autoblocks-examples/assets/7498009/ded25aa8-1439-432d-86a6-7254b27b970b">
+
+You can click on the links next to each test name to dig into more details.
+You can also find all of your tests on the testing homepage in the [Autoblocks application](https://app.autoblocks.ai/testing/local).
+
+## GitHub Actions setup
+
+A starter workflow was added in [`.github/workflows/autoblocks-testing.yml`](./.github/workflows/ci.yml).
+This workflow runs the tests on every push to the repository and also
+on a daily schedule.
+
+To use this workflow, `AUTOBLOCKS_API_KEY` and `OPENAI_API_KEY` must be set as repository secrets.
+You can add these by going to **Settings** > **Secrets and variables** > **Actions** > **New repository secret**
+
+![repository-secrets](./media/repository-secrets.png)
+
+## Repo structure
+
+```
+my_project/
+  run.py <-- imports all tests from tests/ and runs them
+  evaluators/ <-- all common evaluators are implemented here
+    some_shared_evaluator1.py
+    some_shared_evaluator2.py
+  tasks/ <-- all "tasks" are implemented here
+    task1.py
+    task2.py
+  test_suites/ <-- tests for each task
+    task1/
+      __init__.py <-- implements the runner for task1
+      evaluators.py  <-- evaluators used only for task1
+      test_cases.py <-- contains test cases for task1
+    task2/
+      __init__.py <-- implements the runner for task2
+      evaluators.py  <-- evaluators used only for task2
+      test_cases.py <-- contains test cases for task2
+```
diff --git a/Python/testing-sdk/my_project/__init__.py b/Python/testing-sdk/my_project/__init__.py
diff --git a/Python/testing-sdk/my_project/evaluators/__init__.py b/Python/testing-sdk/my_project/evaluators/__init__.py
diff --git a/Python/testing-sdk/my_project/evaluators/has_substrings.py b/Python/testing-sdk/my_project/evaluators/has_substrings.py
@@ -0,0 +1,61 @@
+import abc
+from typing import Any
+from typing import List
+from typing import Optional
+
+from autoblocks.testing.models import BaseEvaluator
+from autoblocks.testing.models import BaseTestCase
+from autoblocks.testing.models import Evaluation
+from autoblocks.testing.models import Threshold
+
+
+class BaseHasSubstrings(BaseEvaluator, abc.ABC):
+    id = "has-substrings"
+
+    """
+    Can be overriden by subclassing and changing this threshold. For example:
+
+    class MyEvaluator(HasSubstrings):
+         threshold = None
+
+    run_test_suite(
+        ...
+        evaluators=[
+            MyEvaluator()
+        ],
+        ...
+    )
+    """
+    threshold: Optional[Threshold] = Threshold(gte=1)
+
+    @abc.abstractmethod
+    def expected_substrings(self, test_case: BaseTestCase) -> List[str]:
+        """
+        Required to be implemented by the subclass.
+
+        In most cases this will just return the field from the test case that contains the expected substrings,
+        but since it's a method, it can be used to calculate the expected substrings in a more complex way
+        if appropriate.
+
+        For example:
+
+        class MyEvaluator(HasSubstrings):
+            def expected_substrings(self, test_case: MyTestCase) -> List[str]:
+                return test_case.expected_substrings
+        """
+        ...
+
+    def output_as_str(self, output: Any) -> str:
+        """
+        Can be overriden by the subclass to change how the output is converted to a string.
+        """
+        return str(output)
+
+    def evaluate(self, test_case: BaseTestCase, output: Any) -> Evaluation:
+        expected_substrings = self.expected_substrings(test_case)
+        output_as_str = self.output_as_str(output)
+
+        for substring in expected_substrings:
+            if substring not in output_as_str:
+                return Evaluation(score=0, threshold=self.threshold)
+        return Evaluation(score=1, threshold=self.threshold)
diff --git a/Python/testing-sdk/my_project/evaluators/is_valid_json.py b/Python/testing-sdk/my_project/evaluators/is_valid_json.py
@@ -0,0 +1,47 @@
+import json
+from typing import Any
+from typing import Optional
+
+from autoblocks.testing.models import BaseEvaluator
+from autoblocks.testing.models import BaseTestCase
+from autoblocks.testing.models import Evaluation
+from autoblocks.testing.models import Threshold
+
+
+class IsValidJson(BaseEvaluator):
+    id = "is-valid-json"
+
+    """
+    Can be overriden by subclassing and changing this threshold. For example:
+
+    class MyEvaluator(IsValidJson):
+         threshold = None
+
+    run_test_suite(
+        ...
+        evaluators=[
+            MyEvaluator()
+        ],
+        ...
+    )
+    """
+    threshold: Optional[Threshold] = Threshold(gte=1)
+
+    def output_as_str(self, output: Any) -> str:
+        """
+        Can be overriden by the subclass to change how the output is converted to a string.
+
+        For example:
+
+        class MyEvaluator(IsValidJson):
+            def output_as_str(self, output: SomeCustomOutputType) -> str:
+                return output.as_json()
+        """
+        return str(output)
+
+    def evaluate(self, test_case: BaseTestCase, output: Any) -> Evaluation:
+        try:
+            json.loads(self.output_as_str(output))
+            return Evaluation(score=1, threshold=self.threshold)
+        except json.JSONDecodeError:
+            return Evaluation(score=0, threshold=self.threshold)
diff --git a/Python/testing-sdk/my_project/run.py b/Python/testing-sdk/my_project/run.py
@@ -0,0 +1,11 @@
+from my_project.test_suites import flashcard_generator
+from my_project.test_suites import study_guide_outline
+
+
+def run():
+    # Autoblocks handles running these tests asynchronously behind the scenes
+    # in a dedicated event loop, so no need to attempt to add any concurrency
+    # here or use asyncio.run() or similar. Just simply call each test suite's
+    # run() function and Autoblocks will handle the rest.
+    flashcard_generator.run()
+    study_guide_outline.run()
diff --git a/Python/testing-sdk/my_project/tasks/__init__.py b/Python/testing-sdk/my_project/tasks/__init__.py