-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Nicole White
authored
Oct 30, 2023
1 parent
cb05a8a
commit d980b3f
Showing
13 changed files
with
1,217 additions
and
85 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,66 +1,10 @@ | ||
const crypto = require('crypto'); | ||
const OpenAI = require('openai'); | ||
const { AutoblocksTracer } = require('@autoblocks/client'); | ||
const { run } = require('./run'); | ||
|
||
const openai = new OpenAI({ | ||
apiKey: process.env.OPENAI_API_KEY, | ||
}); | ||
// Simulate running the `run` function in a production setting, | ||
// i.e. without passing in a traceId from a test case (one will | ||
// be auto-generated). | ||
async function main() { | ||
await run({ input: 'How do I sign up?' }); | ||
} | ||
|
||
const tracer = new AutoblocksTracer(process.env.AUTOBLOCKS_INGESTION_KEY, { | ||
properties: { | ||
provider: 'openai', | ||
}, | ||
}); | ||
|
||
const run = async ({ input, traceId }) => { | ||
// Set the traceId to the one given, or fall back to a random UUID. | ||
// When we call this function from the test suite we will pass in a | ||
// traceId so that it is stable across replay runs, but in production | ||
// we'll only pass in an input, like run({ input }), so that we generate | ||
// a random traceId while in production. | ||
tracer.setTraceId(traceId || crypto.randomUUID()); | ||
|
||
const request = { | ||
model: 'gpt-3.5-turbo', | ||
messages: [ | ||
{ | ||
role: 'system', | ||
content: | ||
'You are a helpful assistant. ' + | ||
'You answer questions about a software product named Acme. ' + | ||
'Your answers should be in a friendly tone and include a bulleted or numbered list where appopriate. ' + | ||
'You should also include a link to the relevant page in the Acme documentation.', | ||
}, | ||
{ | ||
role: 'user', | ||
content: input, | ||
}, | ||
], | ||
temperature: 0.3, | ||
}; | ||
|
||
await tracer.sendEvent('ai.request', { | ||
properties: request, | ||
}); | ||
|
||
try { | ||
const now = Date.now(); | ||
const response = await openai.chat.completions.create(request); | ||
await tracer.sendEvent('ai.response', { | ||
properties: { | ||
response, | ||
latencyMs: Date.now() - now, | ||
}, | ||
}); | ||
return response.choices[0].message.content; | ||
} catch (error) { | ||
await tracer.sendEvent('ai.error', { | ||
properties: { | ||
error, | ||
}, | ||
}); | ||
throw error; | ||
} | ||
}; | ||
|
||
module.exports = { run }; | ||
main(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
const crypto = require('crypto'); | ||
const OpenAI = require('openai'); | ||
const { AutoblocksTracer } = require('@autoblocks/client'); | ||
|
||
const openai = new OpenAI({ | ||
apiKey: process.env.OPENAI_API_KEY, | ||
}); | ||
|
||
const tracer = new AutoblocksTracer(process.env.AUTOBLOCKS_INGESTION_KEY, { | ||
properties: { | ||
provider: 'openai', | ||
}, | ||
}); | ||
|
||
const run = async ({ input, traceId }) => { | ||
// Set the traceId to the one given, or fall back to a random UUID. | ||
// When we call this function from the test suite we will pass in a | ||
// traceId so that it is stable across replay runs, but in production | ||
// we'll only pass in an input, like run({ input }), so that we generate | ||
// a random traceId while in production. | ||
tracer.setTraceId(traceId || crypto.randomUUID()); | ||
|
||
const request = { | ||
model: 'gpt-3.5-turbo', | ||
messages: [ | ||
{ | ||
role: 'system', | ||
content: | ||
'You are a helpful assistant. ' + | ||
'You answer questions about a software product named Acme. ' + | ||
'Your answers should be in a friendly tone and include a bulleted or numbered list where appopriate. ' + | ||
'You should also include a link to the relevant page in the Acme documentation.', | ||
}, | ||
{ | ||
role: 'user', | ||
content: input, | ||
}, | ||
], | ||
temperature: 0.3, | ||
}; | ||
|
||
await tracer.sendEvent('ai.request', { | ||
properties: request, | ||
}); | ||
|
||
try { | ||
const now = Date.now(); | ||
const response = await openai.chat.completions.create(request); | ||
await tracer.sendEvent('ai.response', { | ||
properties: { | ||
response, | ||
latencyMs: Date.now() - now, | ||
}, | ||
}); | ||
return response.choices[0].message.content; | ||
} catch (error) { | ||
await tracer.sendEvent('ai.error', { | ||
properties: { | ||
error, | ||
}, | ||
}); | ||
throw error; | ||
} | ||
}; | ||
|
||
module.exports = { run }; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
const { run } = require('../src/index'); | ||
const { run } = require('../src/run'); | ||
|
||
jest.setTimeout(60000); | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
<!-- banner start --> | ||
<p align="center"> | ||
<img src="https://app.autoblocks.ai/images/logo.png" width="300px"> | ||
</p> | ||
|
||
<p align="center"> | ||
📚 | ||
<a href="https://docs.autoblocks.ai/">Documentation</a> | ||
| ||
• | ||
| ||
🖥️ | ||
<a href="https://app.autoblocks.ai/">Application</a> | ||
| ||
• | ||
| ||
🏠 | ||
<a href="https://www.autoblocks.ai/">Home</a> | ||
</p> | ||
<!-- banner end --> | ||
|
||
<!-- getting started start --> | ||
|
||
## Getting started | ||
|
||
- Sign up for an Autoblocks account at https://app.autoblocks.ai | ||
- Grab your Autoblocks ingestion key from https://app.autoblocks.ai/settings/api-keys | ||
- Grab your OpenAI API key from https://platform.openai.com/account/api-keys | ||
- Create a file named `.env` in this folder and include the following environment variables: | ||
|
||
``` | ||
OPENAI_API_KEY=<your-api-key> | ||
AUTOBLOCKS_INGESTION_KEY=<your-ingestion-key> | ||
``` | ||
|
||
<!-- getting started end --> | ||
|
||
## Replays | ||
|
||
This project shows how you can run Autoblocks Replays via your [pytest](https://docs.pytest.org/en/7.4.x/) test suite. Follow the steps below to get started. | ||
|
||
### 1. Use your replay key | ||
|
||
Replace the value for `AUTOBLOCKS_INGESTION_KEY` in the `.env` file with your replay key. Your replay key is in the same place as your | ||
ingestion key: https://app.autoblocks.ai/settings/api-keys | ||
|
||
> **_NOTE:_** This means you need to make very few code changes to your production code to get started with Autoblocks Replays. You simply need to swap out an environment variable. | ||
### 2. Run the tests | ||
|
||
First install the dependencies: | ||
|
||
``` | ||
poetry install | ||
``` | ||
|
||
Then run the test suite with Autoblocks Replays enabled: | ||
|
||
``` | ||
poetry run pytest --autoblocks | ||
``` | ||
|
||
Within the test suite, you should see a link printed to the console that will take you to the replay in the Autoblocks UI: | ||
|
||
``` | ||
➜ poetry run pytest --autoblocks | ||
=========================================== test session starts ============================================ | ||
platform darwin -- Python 3.11.4, pytest-7.4.3, pluggy-1.3.0 | ||
rootdir: /Users/nicole/autoblocks/autoblocks-examples/Python/pytest-replays | ||
plugins: anyio-4.0.0, autoblocksai-0.0.11 | ||
collected 3 items | ||
test_main.py ... [100%] | ||
======================================== Autoblocks Replay Results ========================================= | ||
View your replay: https://app.autoblocks.ai/replays/local/run/nicole-pytest-20231030-122752 | ||
============================================================================================================ | ||
============================================ 3 passed in 16.65s ============================================ | ||
``` | ||
|
||
Run the tests a few times so that you generate multiple replays (your first replay won't have any baseline to compare against!). | ||
|
||
### 3. View the replays in the Autoblocks UI | ||
|
||
The link will take you to the replay UI where you can see at-a-glance differences between the replay runs over the three test cases. There are four main columns: | ||
|
||
- **Message**: The name of the Autoblocks event sent | ||
- a gray icon indicates no changes | ||
- a yellow icon indicates changes | ||
- a red icon indicates the event was there before but not now | ||
- a green icon indicates the event was not there before but is now | ||
- **Changes**: The number of word changes between the event properties of the replay run and the baseline run | ||
- **Difference Scores**: For properties that we've detected to be LLM outputs, this column will show you a difference score between the value from the baseline run and the current run | ||
- **Evals**: The results of your [Autoblocks Evaluators](https://docs.autoblocks.ai/features/evaluators) | ||
|
||
In one of my runs, I could see that the difference score was pretty high for the `"What is your refund policy?"` test case: | ||
|
||
![replay-summary](https://github.com/autoblocksai/autoblocks-examples/assets/7498009/cb99858a-8f94-4bd9-b8b4-893e32097642) | ||
|
||
Clicking into **View Differences**, I could see that the response now included an apology about not being able to answer questions about refunds, even though it did previously: | ||
|
||
![replay-differences](https://github.com/autoblocksai/autoblocks-examples/assets/7498009/53b33ed5-fe8e-44cf-ac07-c2f315ecb4b9) | ||
|
||
This kind of snapshot / stability testing is important to run over LLM outputs on every pull request so that you can catch regressions before they go to production. | ||
|
||
### 4. Run the replays in GitHub Actions | ||
|
||
See the [Autoblocks Replays GitHub Action](/.github/workflows/autoblocks-replays.yml) workflow; this workflow runs replays on every pull request and also on a schedule. The results of these replays will be under the GitHub tab on the [replays](https://app.autoblocks.ai/replays) page. |
Oops, something went wrong.
d980b3f
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Successfully deployed to the following URLs:
autoblocks-examples – ./
autoblocks-examples-git-main-autoblocks.vercel.app
chatbot-example.autoblocks.ai
autoblocks-examples-autoblocks.vercel.app