-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Nicole White
committed
Feb 22, 2024
1 parent
f6d5e1a
commit 444faa8
Showing
21 changed files
with
1,336 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
name: Autoblocks Testing | ||
|
||
on: | ||
push: # Run on every push. | ||
schedule: # Run every day at ~7:17am PST. | ||
- cron: '17 15 * * *' | ||
|
||
jobs: | ||
py: | ||
runs-on: ubuntu-latest | ||
|
||
defaults: | ||
run: | ||
shell: bash | ||
working-directory: Python/testing-sdk | ||
|
||
steps: | ||
- name: Checkout repository | ||
uses: actions/checkout@v4 | ||
|
||
- name: Setup python | ||
uses: actions/setup-python@v5 | ||
with: | ||
python-version-file: '3.11' | ||
|
||
- name: Install poetry | ||
run: curl -sSL https://install.python-poetry.org | python3 - | ||
|
||
- name: Check pyproject.toml & poetry.lock are in sync | ||
run: poetry lock --check | ||
|
||
- name: Install dependencies | ||
run: poetry install | ||
|
||
- name: Run Autoblocks tests | ||
run: npx autoblocks testing exec -- poetry run start | ||
env: | ||
# Add your OpenAI API key & Autoblocks API key to the repository secrets. | ||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} | ||
AUTOBLOCKS_API_KEY: ${{ secrets.AUTOBLOCKS_API_KEY }} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
<p align="center"> | ||
<img src="https://app.autoblocks.ai/images/logo.png" width="150px"> | ||
</p> | ||
<p align="center"> | ||
📚 | ||
<a href="https://docs.autoblocks.ai/">Documentation</a> | ||
| ||
• | ||
| ||
🖥️ | ||
<a href="https://app.autoblocks.ai/">Application</a> | ||
| ||
• | ||
| ||
☎️ | ||
<a href="https://cal.frontapp.com/autoblocks/adam/autoblocks-engineering/">Meet with Autoblocks Engineering</a> | ||
</p> | ||
|
||
## Setup | ||
|
||
### Install [`poetry`](https://python-poetry.org/) | ||
|
||
```bash | ||
curl -sSL https://install.python-poetry.org | python3 - | ||
``` | ||
|
||
### Install [`npm`](https://docs.npmjs.com/about-npm) | ||
|
||
> **_NOTE:_** You might already have this installed. Check with `npm -v`. | ||
If you don't have `node` or `npm` installed, we recommend you use `nvm` to do so: | ||
|
||
#### Install [`nvm`](https://github.com/nvm-sh/nvm) | ||
|
||
```bash | ||
curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.7/install.sh | bash | ||
``` | ||
|
||
#### Install `node` and `npm` | ||
|
||
```bash | ||
nvm install node | ||
``` | ||
|
||
#### Set the default version when starting a new shell | ||
|
||
```bash | ||
nvm alias default node | ||
``` | ||
|
||
### Install dependencies | ||
|
||
``` | ||
poetry install | ||
``` | ||
|
||
## Run Autoblocks tests | ||
|
||
### Set your Autoblocks API key | ||
|
||
Retrieve your **local testing API key** from the [settings page](https://app.autoblocks.ai/settings/api-keys) and set it as an environment variable: | ||
|
||
```bash | ||
export AUTOBLOCKS_API_KEY=... | ||
``` | ||
|
||
### Set your OpenAI API key | ||
|
||
```bash | ||
export OPENAI_API_KEY=... | ||
``` | ||
|
||
### Run the tests | ||
|
||
```bash | ||
npx autoblocks testing exec -m "my first run" -- poetry run start | ||
``` | ||
|
||
You should see something like: | ||
|
||
<img width="1668" alt="Screenshot 2024-02-22 at 1 23 50 PM" src="https://github.com/autoblocksai/autoblocks-examples/assets/7498009/ded25aa8-1439-432d-86a6-7254b27b970b"> | ||
|
||
You can click on the links next to each test name to dig into more details. | ||
You can also find all of your tests on the testing homepage in the [Autoblocks application](https://app.autoblocks.ai/testing/local). | ||
|
||
## GitHub Actions setup | ||
|
||
A starter workflow was added in [`.github/workflows/autoblocks-testing.yml`](./.github/workflows/ci.yml). | ||
This workflow runs the tests on every push to the repository and also | ||
on a daily schedule. | ||
|
||
To use this workflow, `AUTOBLOCKS_API_KEY` and `OPENAI_API_KEY` must be set as repository secrets. | ||
You can add these by going to **Settings** > **Secrets and variables** > **Actions** > **New repository secret** | ||
|
||
![repository-secrets](./media/repository-secrets.png) | ||
|
||
## Repo structure | ||
|
||
``` | ||
my_project/ | ||
run.py <-- imports all tests from tests/ and runs them | ||
evaluators/ <-- all common evaluators are implemented here | ||
some_shared_evaluator1.py | ||
some_shared_evaluator2.py | ||
tasks/ <-- all "tasks" are implemented here | ||
task1.py | ||
task2.py | ||
test_suites/ <-- tests for each task | ||
task1/ | ||
__init__.py <-- implements the runner for task1 | ||
evaluators.py <-- evaluators used only for task1 | ||
test_cases.py <-- contains test cases for task1 | ||
task2/ | ||
__init__.py <-- implements the runner for task2 | ||
evaluators.py <-- evaluators used only for task2 | ||
test_cases.py <-- contains test cases for task2 | ||
``` |
Empty file.
Empty file.
61 changes: 61 additions & 0 deletions
61
Python/testing-sdk/my_project/evaluators/has_substrings.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
import abc | ||
from typing import Any | ||
from typing import List | ||
from typing import Optional | ||
|
||
from autoblocks.testing.models import BaseEvaluator | ||
from autoblocks.testing.models import BaseTestCase | ||
from autoblocks.testing.models import Evaluation | ||
from autoblocks.testing.models import Threshold | ||
|
||
|
||
class BaseHasSubstrings(BaseEvaluator, abc.ABC): | ||
id = "has-substrings" | ||
|
||
""" | ||
Can be overriden by subclassing and changing this threshold. For example: | ||
class MyEvaluator(HasSubstrings): | ||
threshold = None | ||
run_test_suite( | ||
... | ||
evaluators=[ | ||
MyEvaluator() | ||
], | ||
... | ||
) | ||
""" | ||
threshold: Optional[Threshold] = Threshold(gte=1) | ||
|
||
@abc.abstractmethod | ||
def expected_substrings(self, test_case: BaseTestCase) -> List[str]: | ||
""" | ||
Required to be implemented by the subclass. | ||
In most cases this will just return the field from the test case that contains the expected substrings, | ||
but since it's a method, it can be used to calculate the expected substrings in a more complex way | ||
if appropriate. | ||
For example: | ||
class MyEvaluator(HasSubstrings): | ||
def expected_substrings(self, test_case: MyTestCase) -> List[str]: | ||
return test_case.expected_substrings | ||
""" | ||
... | ||
|
||
def output_as_str(self, output: Any) -> str: | ||
""" | ||
Can be overriden by the subclass to change how the output is converted to a string. | ||
""" | ||
return str(output) | ||
|
||
def evaluate(self, test_case: BaseTestCase, output: Any) -> Evaluation: | ||
expected_substrings = self.expected_substrings(test_case) | ||
output_as_str = self.output_as_str(output) | ||
|
||
for substring in expected_substrings: | ||
if substring not in output_as_str: | ||
return Evaluation(score=0, threshold=self.threshold) | ||
return Evaluation(score=1, threshold=self.threshold) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
import json | ||
from typing import Any | ||
from typing import Optional | ||
|
||
from autoblocks.testing.models import BaseEvaluator | ||
from autoblocks.testing.models import BaseTestCase | ||
from autoblocks.testing.models import Evaluation | ||
from autoblocks.testing.models import Threshold | ||
|
||
|
||
class IsValidJson(BaseEvaluator): | ||
id = "is-valid-json" | ||
|
||
""" | ||
Can be overriden by subclassing and changing this threshold. For example: | ||
class MyEvaluator(IsValidJson): | ||
threshold = None | ||
run_test_suite( | ||
... | ||
evaluators=[ | ||
MyEvaluator() | ||
], | ||
... | ||
) | ||
""" | ||
threshold: Optional[Threshold] = Threshold(gte=1) | ||
|
||
def output_as_str(self, output: Any) -> str: | ||
""" | ||
Can be overriden by the subclass to change how the output is converted to a string. | ||
For example: | ||
class MyEvaluator(IsValidJson): | ||
def output_as_str(self, output: SomeCustomOutputType) -> str: | ||
return output.as_json() | ||
""" | ||
return str(output) | ||
|
||
def evaluate(self, test_case: BaseTestCase, output: Any) -> Evaluation: | ||
try: | ||
json.loads(self.output_as_str(output)) | ||
return Evaluation(score=1, threshold=self.threshold) | ||
except json.JSONDecodeError: | ||
return Evaluation(score=0, threshold=self.threshold) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
from my_project.test_suites import flashcard_generator | ||
from my_project.test_suites import study_guide_outline | ||
|
||
|
||
def run(): | ||
# Autoblocks handles running these tests asynchronously behind the scenes | ||
# in a dedicated event loop, so no need to attempt to add any concurrency | ||
# here or use asyncio.run() or similar. Just simply call each test suite's | ||
# run() function and Autoblocks will handle the rest. | ||
flashcard_generator.run() | ||
study_guide_outline.run() |
Empty file.
Oops, something went wrong.