diff --git a/.github/workflows/autoblocks-testing.yml b/.github/workflows/autoblocks-testing.yml index b508b711..74b65485 100644 --- a/.github/workflows/autoblocks-testing.yml +++ b/.github/workflows/autoblocks-testing.yml @@ -37,3 +37,29 @@ jobs: env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} AUTOBLOCKS_API_KEY: ${{ secrets.AUTOBLOCKS_API_KEY }} + + js: + runs-on: ubuntu-latest + + defaults: + run: + shell: bash + working-directory: JavaScript/testing-sdk + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Setup Node + uses: actions/setup-node@v4 + with: + node-version: '20' + + - name: Install dependencies + run: npm ci + + - name: Run Autoblocks tests + run: npx autoblocks testing exec -- npm run start + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + AUTOBLOCKS_API_KEY: ${{ secrets.AUTOBLOCKS_API_KEY }} diff --git a/JavaScript/testing-sdk/README.md b/JavaScript/testing-sdk/README.md new file mode 100644 index 00000000..92d343e6 --- /dev/null +++ b/JavaScript/testing-sdk/README.md @@ -0,0 +1,93 @@ + +

+ +

+ +

+ πŸ“š + Documentation +   + β€’ +   + πŸ–₯️ + Application +   + β€’ +   + 🏠 + Home +

+ + +## Setup + +### Install Autoblocks CLI + +See [Autoblocks CLI documentation](https://docs.autoblocks.ai/cli/setup) + +### Install dependencies + +``` +npm install +``` + +## Run Autoblocks tests + +### Set your Autoblocks API key + +Retrieve your **local testing API key** from the [settings page](https://app.autoblocks.ai/settings/api-keys) and set it as an environment variable: + +```bash +export AUTOBLOCKS_API_KEY=... +``` + +### Set your OpenAI API key + +```bash +export OPENAI_API_KEY=... +``` + +### Run the tests + +```bash +npx autoblocks testing exec -m "my first run" -- npm run start +``` + +You should see something like: + +Screenshot 2024-03-01 at 5 53 27β€―PM + +You can click on the links next to each test name to dig into more details. +You can also find all of your tests on the testing homepage in the [Autoblocks application](https://app.autoblocks.ai/testing/local). + +## GitHub Actions setup + +A starter workflow was added in [`.github/workflows/autoblocks-testing.yml`](./.github/workflows/autoblocks-testing.yml). +This workflow runs the tests on every push to the repository and also +on a daily schedule. + +## Repo structure + +``` +src/ + run.ts <-- imports all tests from test-suites/ and runs them + evaluators/ <-- all common evaluators are implemented here + some-shared-evaluator1.ts + some-shared-evaluator2.ts + tasks/ <-- all "tasks" are implemented here + task1.ts + task2.ts + test-suites/ <-- tests for each task + task1/ + index.ts <-- implements the runner for task1 + evaluators.ts <-- evaluators used only for task1 + test-cases.ts <-- contains test cases for task1 + task2/ + index.ts <-- implements the runner for task2 + evaluators.ts <-- evaluators used only for task2 + test-cases.ts <-- contains test cases for task2 +``` + +## Futher Reading + +- [Autoblocks Testing documentation](https://docs.autoblocks.ai/testing/sdks) diff --git a/JavaScript/testing-sdk/package-lock.json b/JavaScript/testing-sdk/package-lock.json new file mode 100644 index 00000000..1d231e9f --- /dev/null +++ b/JavaScript/testing-sdk/package-lock.json @@ -0,0 +1,864 @@ +{ + "name": "testing-sdk", + "version": "0.0.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "testing-sdk", + "version": "0.0.0", + "license": "MIT", + "dependencies": { + "@autoblocks/client": "^0.0.33", + "dotenv-cli": "^7.3.0", + "openai": "^4.6.0", + "tsx": "^4.1.3", + "typescript": "^5.2.2", + "zod": "^3.22.4" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/@autoblocks/client": { + "version": "0.0.33", + "resolved": "https://registry.npmjs.org/@autoblocks/client/-/client-0.0.33.tgz", + "integrity": "sha512-uivJ5XiFAWlK80n7IRTwRoY5oeIHIyy/9KbKCU8aLt4o2s/A0bgYsAgfqIjhJ2/ahiStwapAFnnt1tBfyibPpQ==", + "dependencies": { + "zod": "^3.21.4" + }, + "bin": { + "prompts": "bin/prompts-cli.js" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/aix-ppc64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.19.12.tgz", + "integrity": "sha512-bmoCYyWdEL3wDQIVbcyzRyeKLgk2WtWLTWz1ZIAZF/EGbNOwSA6ew3PftJ1PqMiOOGu0OyFMzG53L0zqIpPeNA==", + "cpu": [ + "ppc64" + ], + "optional": true, + "os": [ + "aix" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/android-arm": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.19.12.tgz", + "integrity": "sha512-qg/Lj1mu3CdQlDEEiWrlC4eaPZ1KztwGJ9B6J+/6G+/4ewxJg7gqj8eVYWvao1bXrqGiW2rsBZFSX3q2lcW05w==", + "cpu": [ + "arm" + ], + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/android-arm64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.19.12.tgz", + "integrity": "sha512-P0UVNGIienjZv3f5zq0DP3Nt2IE/3plFzuaS96vihvD0Hd6H/q4WXUGpCxD/E8YrSXfNyRPbpTq+T8ZQioSuPA==", + "cpu": [ + "arm64" + ], + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/android-x64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.19.12.tgz", + "integrity": "sha512-3k7ZoUW6Q6YqhdhIaq/WZ7HwBpnFBlW905Fa4s4qWJyiNOgT1dOqDiVAQFwBH7gBRZr17gLrlFCRzF6jFh7Kew==", + "cpu": [ + "x64" + ], + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/darwin-arm64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.19.12.tgz", + "integrity": "sha512-B6IeSgZgtEzGC42jsI+YYu9Z3HKRxp8ZT3cqhvliEHovq8HSX2YX8lNocDn79gCKJXOSaEot9MVYky7AKjCs8g==", + "cpu": [ + "arm64" + ], + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/darwin-x64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.19.12.tgz", + "integrity": "sha512-hKoVkKzFiToTgn+41qGhsUJXFlIjxI/jSYeZf3ugemDYZldIXIxhvwN6erJGlX4t5h417iFuheZ7l+YVn05N3A==", + "cpu": [ + "x64" + ], + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/freebsd-arm64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.19.12.tgz", + "integrity": "sha512-4aRvFIXmwAcDBw9AueDQ2YnGmz5L6obe5kmPT8Vd+/+x/JMVKCgdcRwH6APrbpNXsPz+K653Qg8HB/oXvXVukA==", + "cpu": [ + "arm64" + ], + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/freebsd-x64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.19.12.tgz", + "integrity": "sha512-EYoXZ4d8xtBoVN7CEwWY2IN4ho76xjYXqSXMNccFSx2lgqOG/1TBPW0yPx1bJZk94qu3tX0fycJeeQsKovA8gg==", + "cpu": [ + "x64" + ], + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-arm": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.19.12.tgz", + "integrity": "sha512-J5jPms//KhSNv+LO1S1TX1UWp1ucM6N6XuL6ITdKWElCu8wXP72l9MM0zDTzzeikVyqFE6U8YAV9/tFyj0ti+w==", + "cpu": [ + "arm" + ], + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-arm64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.19.12.tgz", + "integrity": "sha512-EoTjyYyLuVPfdPLsGVVVC8a0p1BFFvtpQDB/YLEhaXyf/5bczaGeN15QkR+O4S5LeJ92Tqotve7i1jn35qwvdA==", + "cpu": [ + "arm64" + ], + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-ia32": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.19.12.tgz", + "integrity": "sha512-Thsa42rrP1+UIGaWz47uydHSBOgTUnwBwNq59khgIwktK6x60Hivfbux9iNR0eHCHzOLjLMLfUMLCypBkZXMHA==", + "cpu": [ + "ia32" + ], + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-loong64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.19.12.tgz", + "integrity": "sha512-LiXdXA0s3IqRRjm6rV6XaWATScKAXjI4R4LoDlvO7+yQqFdlr1Bax62sRwkVvRIrwXxvtYEHHI4dm50jAXkuAA==", + "cpu": [ + "loong64" + ], + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-mips64el": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.19.12.tgz", + "integrity": "sha512-fEnAuj5VGTanfJ07ff0gOA6IPsvrVHLVb6Lyd1g2/ed67oU1eFzL0r9WL7ZzscD+/N6i3dWumGE1Un4f7Amf+w==", + "cpu": [ + "mips64el" + ], + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-ppc64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.19.12.tgz", + "integrity": "sha512-nYJA2/QPimDQOh1rKWedNOe3Gfc8PabU7HT3iXWtNUbRzXS9+vgB0Fjaqr//XNbd82mCxHzik2qotuI89cfixg==", + "cpu": [ + "ppc64" + ], + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-riscv64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.19.12.tgz", + "integrity": "sha512-2MueBrlPQCw5dVJJpQdUYgeqIzDQgw3QtiAHUC4RBz9FXPrskyyU3VI1hw7C0BSKB9OduwSJ79FTCqtGMWqJHg==", + "cpu": [ + "riscv64" + ], + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-s390x": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.19.12.tgz", + "integrity": "sha512-+Pil1Nv3Umes4m3AZKqA2anfhJiVmNCYkPchwFJNEJN5QxmTs1uzyy4TvmDrCRNT2ApwSari7ZIgrPeUx4UZDg==", + "cpu": [ + "s390x" + ], + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-x64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.19.12.tgz", + "integrity": "sha512-B71g1QpxfwBvNrfyJdVDexenDIt1CiDN1TIXLbhOw0KhJzE78KIFGX6OJ9MrtC0oOqMWf+0xop4qEU8JrJTwCg==", + "cpu": [ + "x64" + ], + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/netbsd-x64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.19.12.tgz", + "integrity": "sha512-3ltjQ7n1owJgFbuC61Oj++XhtzmymoCihNFgT84UAmJnxJfm4sYCiSLTXZtE00VWYpPMYc+ZQmB6xbSdVh0JWA==", + "cpu": [ + "x64" + ], + "optional": true, + "os": [ + "netbsd" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/openbsd-x64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.19.12.tgz", + "integrity": "sha512-RbrfTB9SWsr0kWmb9srfF+L933uMDdu9BIzdA7os2t0TXhCRjrQyCeOt6wVxr79CKD4c+p+YhCj31HBkYcXebw==", + "cpu": [ + "x64" + ], + "optional": true, + "os": [ + "openbsd" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/sunos-x64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.19.12.tgz", + "integrity": "sha512-HKjJwRrW8uWtCQnQOz9qcU3mUZhTUQvi56Q8DPTLLB+DawoiQdjsYq+j+D3s9I8VFtDr+F9CjgXKKC4ss89IeA==", + "cpu": [ + "x64" + ], + "optional": true, + "os": [ + "sunos" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/win32-arm64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.19.12.tgz", + "integrity": "sha512-URgtR1dJnmGvX864pn1B2YUYNzjmXkuJOIqG2HdU62MVS4EHpU2946OZoTMnRUHklGtJdJZ33QfzdjGACXhn1A==", + "cpu": [ + "arm64" + ], + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/win32-ia32": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.19.12.tgz", + "integrity": "sha512-+ZOE6pUkMOJfmxmBZElNOx72NKpIa/HFOMGzu8fqzQJ5kgf6aTGrcJaFsNiVMH4JKpMipyK+7k0n2UXN7a8YKQ==", + "cpu": [ + "ia32" + ], + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/win32-x64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.19.12.tgz", + "integrity": "sha512-T1QyPSDCyMXaO3pzBkF96E8xMkiRYbUEZADd29SyPGabqxMViNoii+NcK7eWJAEoU6RZyEm5lVSIjTmcdoB9HA==", + "cpu": [ + "x64" + ], + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@types/node": { + "version": "18.19.21", + "resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.21.tgz", + "integrity": "sha512-2Q2NeB6BmiTFQi4DHBzncSoq/cJMLDdhPaAoJFnFCyD9a8VPZRf7a1GAwp1Edb7ROaZc5Jz/tnZyL6EsWMRaqw==", + "dependencies": { + "undici-types": "~5.26.4" + } + }, + "node_modules/@types/node-fetch": { + "version": "2.6.11", + "resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-2.6.11.tgz", + "integrity": "sha512-24xFj9R5+rfQJLRyM56qh+wnVSYhyXC2tkoBndtY0U+vubqNsYXGjufB2nn8Q6gt0LrARwL6UBtMCSVCwl4B1g==", + "dependencies": { + "@types/node": "*", + "form-data": "^4.0.0" + } + }, + "node_modules/abort-controller": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/abort-controller/-/abort-controller-3.0.0.tgz", + "integrity": "sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==", + "dependencies": { + "event-target-shim": "^5.0.0" + }, + "engines": { + "node": ">=6.5" + } + }, + "node_modules/agentkeepalive": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/agentkeepalive/-/agentkeepalive-4.5.0.tgz", + "integrity": "sha512-5GG/5IbQQpC9FpkRGsSvZI5QYeSCzlJHdpBQntCsuTOxhKD8lqKhrleg2Yi7yvMIf82Ycmmqln9U8V9qwEiJew==", + "dependencies": { + "humanize-ms": "^1.2.1" + }, + "engines": { + "node": ">= 8.0.0" + } + }, + "node_modules/asynckit": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", + "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==" + }, + "node_modules/base-64": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/base-64/-/base-64-0.1.0.tgz", + "integrity": "sha512-Y5gU45svrR5tI2Vt/X9GPd3L0HNIKzGu202EjxrXMpuc2V2CiKgemAbUUsqYmZJvPtCXoUKjNZwBJzsNScUbXA==" + }, + "node_modules/charenc": { + "version": "0.0.2", + "resolved": "https://registry.npmjs.org/charenc/-/charenc-0.0.2.tgz", + "integrity": "sha512-yrLQ/yVUFXkzg7EDQsPieE/53+0RlaWTs+wBrvW36cyilJ2SaDWfl4Yj7MtLTXleV9uEKefbAGUPv2/iWSooRA==", + "engines": { + "node": "*" + } + }, + "node_modules/combined-stream": { + "version": "1.0.8", + "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", + "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", + "dependencies": { + "delayed-stream": "~1.0.0" + }, + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/cross-spawn": { + "version": "7.0.3", + "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz", + "integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==", + "dependencies": { + "path-key": "^3.1.0", + "shebang-command": "^2.0.0", + "which": "^2.0.1" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/crypt": { + "version": "0.0.2", + "resolved": "https://registry.npmjs.org/crypt/-/crypt-0.0.2.tgz", + "integrity": "sha512-mCxBlsHFYh9C+HVpiEacem8FEBnMXgU9gy4zmNC+SXAZNB/1idgp/aulFJ4FgCi7GPEVbfyng092GqL2k2rmow==", + "engines": { + "node": "*" + } + }, + "node_modules/delayed-stream": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", + "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==", + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/digest-fetch": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/digest-fetch/-/digest-fetch-1.3.0.tgz", + "integrity": "sha512-CGJuv6iKNM7QyZlM2T3sPAdZWd/p9zQiRNS9G+9COUCwzWFTs0Xp8NF5iePx7wtvhDykReiRRrSeNb4oMmB8lA==", + "dependencies": { + "base-64": "^0.1.0", + "md5": "^2.3.0" + } + }, + "node_modules/dotenv": { + "version": "16.4.5", + "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.4.5.tgz", + "integrity": "sha512-ZmdL2rui+eB2YwhsWzjInR8LldtZHGDoQ1ugH85ppHKwpUHL7j7rN0Ti9NCnGiQbhaZ11FpR+7ao1dNsmduNUg==", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://dotenvx.com" + } + }, + "node_modules/dotenv-cli": { + "version": "7.3.0", + "resolved": "https://registry.npmjs.org/dotenv-cli/-/dotenv-cli-7.3.0.tgz", + "integrity": "sha512-314CA4TyK34YEJ6ntBf80eUY+t1XaFLyem1k9P0sX1gn30qThZ5qZr/ZwE318gEnzyYP9yj9HJk6SqwE0upkfw==", + "dependencies": { + "cross-spawn": "^7.0.3", + "dotenv": "^16.3.0", + "dotenv-expand": "^10.0.0", + "minimist": "^1.2.6" + }, + "bin": { + "dotenv": "cli.js" + } + }, + "node_modules/dotenv-expand": { + "version": "10.0.0", + "resolved": "https://registry.npmjs.org/dotenv-expand/-/dotenv-expand-10.0.0.tgz", + "integrity": "sha512-GopVGCpVS1UKH75VKHGuQFqS1Gusej0z4FyQkPdwjil2gNIv+LNsqBlboOzpJFZKVT95GkCyWJbBSdFEFUWI2A==", + "engines": { + "node": ">=12" + } + }, + "node_modules/esbuild": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.19.12.tgz", + "integrity": "sha512-aARqgq8roFBj054KvQr5f1sFu0D65G+miZRCuJyJ0G13Zwx7vRar5Zhn2tkQNzIXcBrNVsv/8stehpj+GAjgbg==", + "hasInstallScript": true, + "bin": { + "esbuild": "bin/esbuild" + }, + "engines": { + "node": ">=12" + }, + "optionalDependencies": { + "@esbuild/aix-ppc64": "0.19.12", + "@esbuild/android-arm": "0.19.12", + "@esbuild/android-arm64": "0.19.12", + "@esbuild/android-x64": "0.19.12", + "@esbuild/darwin-arm64": "0.19.12", + "@esbuild/darwin-x64": "0.19.12", + "@esbuild/freebsd-arm64": "0.19.12", + "@esbuild/freebsd-x64": "0.19.12", + "@esbuild/linux-arm": "0.19.12", + "@esbuild/linux-arm64": "0.19.12", + "@esbuild/linux-ia32": "0.19.12", + "@esbuild/linux-loong64": "0.19.12", + "@esbuild/linux-mips64el": "0.19.12", + "@esbuild/linux-ppc64": "0.19.12", + "@esbuild/linux-riscv64": "0.19.12", + "@esbuild/linux-s390x": "0.19.12", + "@esbuild/linux-x64": "0.19.12", + "@esbuild/netbsd-x64": "0.19.12", + "@esbuild/openbsd-x64": "0.19.12", + "@esbuild/sunos-x64": "0.19.12", + "@esbuild/win32-arm64": "0.19.12", + "@esbuild/win32-ia32": "0.19.12", + "@esbuild/win32-x64": "0.19.12" + } + }, + "node_modules/event-target-shim": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/event-target-shim/-/event-target-shim-5.0.1.tgz", + "integrity": "sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==", + "engines": { + "node": ">=6" + } + }, + "node_modules/form-data": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz", + "integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==", + "dependencies": { + "asynckit": "^0.4.0", + "combined-stream": "^1.0.8", + "mime-types": "^2.1.12" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/form-data-encoder": { + "version": "1.7.2", + "resolved": "https://registry.npmjs.org/form-data-encoder/-/form-data-encoder-1.7.2.tgz", + "integrity": "sha512-qfqtYan3rxrnCk1VYaA4H+Ms9xdpPqvLZa6xmMgFvhO32x7/3J/ExcTd6qpxM0vH2GdMI+poehyBZvqfMTto8A==" + }, + "node_modules/formdata-node": { + "version": "4.4.1", + "resolved": "https://registry.npmjs.org/formdata-node/-/formdata-node-4.4.1.tgz", + "integrity": "sha512-0iirZp3uVDjVGt9p49aTaqjk84TrglENEDuqfdlZQ1roC9CWlPk6Avf8EEnZNcAqPonwkG35x4n3ww/1THYAeQ==", + "dependencies": { + "node-domexception": "1.0.0", + "web-streams-polyfill": "4.0.0-beta.3" + }, + "engines": { + "node": ">= 12.20" + } + }, + "node_modules/formdata-node/node_modules/web-streams-polyfill": { + "version": "4.0.0-beta.3", + "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-4.0.0-beta.3.tgz", + "integrity": "sha512-QW95TCTaHmsYfHDybGMwO5IJIM93I/6vTRk+daHTWFPhwh+C8Cg7j7XyKrwrj8Ib6vYXe0ocYNrmzY4xAAN6ug==", + "engines": { + "node": ">= 14" + } + }, + "node_modules/fsevents": { + "version": "2.3.3", + "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", + "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==", + "hasInstallScript": true, + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^8.16.0 || ^10.6.0 || >=11.0.0" + } + }, + "node_modules/get-tsconfig": { + "version": "4.7.2", + "resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.7.2.tgz", + "integrity": "sha512-wuMsz4leaj5hbGgg4IvDU0bqJagpftG5l5cXIAvo8uZrqn0NJqwtfupTN00VnkQJPcIRrxYrm1Ue24btpCha2A==", + "dependencies": { + "resolve-pkg-maps": "^1.0.0" + }, + "funding": { + "url": "https://github.com/privatenumber/get-tsconfig?sponsor=1" + } + }, + "node_modules/humanize-ms": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/humanize-ms/-/humanize-ms-1.2.1.tgz", + "integrity": "sha512-Fl70vYtsAFb/C06PTS9dZBo7ihau+Tu/DNCk/OyHhea07S+aeMWpFFkUaXRa8fI+ScZbEI8dfSxwY7gxZ9SAVQ==", + "dependencies": { + "ms": "^2.0.0" + } + }, + "node_modules/is-buffer": { + "version": "1.1.6", + "resolved": "https://registry.npmjs.org/is-buffer/-/is-buffer-1.1.6.tgz", + "integrity": "sha512-NcdALwpXkTm5Zvvbk7owOUSvVvBKDgKP5/ewfXEznmQFfs4ZRmanOeKBTjRVjka3QFoN6XJ+9F3USqfHqTaU5w==" + }, + "node_modules/isexe": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", + "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==" + }, + "node_modules/md5": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/md5/-/md5-2.3.0.tgz", + "integrity": "sha512-T1GITYmFaKuO91vxyoQMFETst+O71VUPEU3ze5GNzDm0OWdP8v1ziTaAEPUr/3kLsY3Sftgz242A1SetQiDL7g==", + "dependencies": { + "charenc": "0.0.2", + "crypt": "0.0.2", + "is-buffer": "~1.1.6" + } + }, + "node_modules/mime-db": { + "version": "1.52.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", + "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/mime-types": { + "version": "2.1.35", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz", + "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", + "dependencies": { + "mime-db": "1.52.0" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/minimist": { + "version": "1.2.8", + "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz", + "integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==", + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/ms": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==" + }, + "node_modules/node-domexception": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/node-domexception/-/node-domexception-1.0.0.tgz", + "integrity": "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/jimmywarting" + }, + { + "type": "github", + "url": "https://paypal.me/jimmywarting" + } + ], + "engines": { + "node": ">=10.5.0" + } + }, + "node_modules/node-fetch": { + "version": "2.7.0", + "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz", + "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==", + "dependencies": { + "whatwg-url": "^5.0.0" + }, + "engines": { + "node": "4.x || >=6.0.0" + }, + "peerDependencies": { + "encoding": "^0.1.0" + }, + "peerDependenciesMeta": { + "encoding": { + "optional": true + } + } + }, + "node_modules/openai": { + "version": "4.28.4", + "resolved": "https://registry.npmjs.org/openai/-/openai-4.28.4.tgz", + "integrity": "sha512-RNIwx4MT/F0zyizGcwS+bXKLzJ8QE9IOyigDG/ttnwB220d58bYjYFp0qjvGwEFBO6+pvFVIDABZPGDl46RFsg==", + "dependencies": { + "@types/node": "^18.11.18", + "@types/node-fetch": "^2.6.4", + "abort-controller": "^3.0.0", + "agentkeepalive": "^4.2.1", + "digest-fetch": "^1.3.0", + "form-data-encoder": "1.7.2", + "formdata-node": "^4.3.2", + "node-fetch": "^2.6.7", + "web-streams-polyfill": "^3.2.1" + }, + "bin": { + "openai": "bin/cli" + } + }, + "node_modules/path-key": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz", + "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==", + "engines": { + "node": ">=8" + } + }, + "node_modules/resolve-pkg-maps": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz", + "integrity": "sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==", + "funding": { + "url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1" + } + }, + "node_modules/shebang-command": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", + "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==", + "dependencies": { + "shebang-regex": "^3.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/shebang-regex": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz", + "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==", + "engines": { + "node": ">=8" + } + }, + "node_modules/tr46": { + "version": "0.0.3", + "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz", + "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==" + }, + "node_modules/tsx": { + "version": "4.7.1", + "resolved": "https://registry.npmjs.org/tsx/-/tsx-4.7.1.tgz", + "integrity": "sha512-8d6VuibXHtlN5E3zFkgY8u4DX7Y3Z27zvvPKVmLon/D4AjuKzarkUBTLDBgj9iTQ0hg5xM7c/mYiRVM+HETf0g==", + "dependencies": { + "esbuild": "~0.19.10", + "get-tsconfig": "^4.7.2" + }, + "bin": { + "tsx": "dist/cli.mjs" + }, + "engines": { + "node": ">=18.0.0" + }, + "optionalDependencies": { + "fsevents": "~2.3.3" + } + }, + "node_modules/typescript": { + "version": "5.3.3", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.3.3.tgz", + "integrity": "sha512-pXWcraxM0uxAS+tN0AG/BF2TyqmHO014Z070UsJ+pFvYuRSq8KH8DmWpnbXe0pEPDHXZV3FcAbJkijJ5oNEnWw==", + "bin": { + "tsc": "bin/tsc", + "tsserver": "bin/tsserver" + }, + "engines": { + "node": ">=14.17" + } + }, + "node_modules/undici-types": { + "version": "5.26.5", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz", + "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==" + }, + "node_modules/web-streams-polyfill": { + "version": "3.3.3", + "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-3.3.3.tgz", + "integrity": "sha512-d2JWLCivmZYTSIoge9MsgFCZrt571BikcWGYkjC1khllbTeDlGqZ2D8vD8E/lJa8WGWbb7Plm8/XJYV7IJHZZw==", + "engines": { + "node": ">= 8" + } + }, + "node_modules/webidl-conversions": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", + "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==" + }, + "node_modules/whatwg-url": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz", + "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==", + "dependencies": { + "tr46": "~0.0.3", + "webidl-conversions": "^3.0.0" + } + }, + "node_modules/which": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", + "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==", + "dependencies": { + "isexe": "^2.0.0" + }, + "bin": { + "node-which": "bin/node-which" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/zod": { + "version": "3.22.4", + "resolved": "https://registry.npmjs.org/zod/-/zod-3.22.4.tgz", + "integrity": "sha512-iC+8Io04lddc+mVqQ9AZ7OQ2MrUKGN+oIQyq1vemgt46jwCwLfhq7/pwnBnNXXXZb8VTVLKwp9EDkx+ryxIWmg==", + "funding": { + "url": "https://github.com/sponsors/colinhacks" + } + } + } +} diff --git a/JavaScript/testing-sdk/package.json b/JavaScript/testing-sdk/package.json new file mode 100644 index 00000000..09af6166 --- /dev/null +++ b/JavaScript/testing-sdk/package.json @@ -0,0 +1,22 @@ +{ + "name": "testing-sdk", + "description": "Using the Autoblocks Testing SDK with the Prompt SDK for experimentation, regression testing, and rapid prototyping.", + "version": "0.0.0", + "private": true, + "engines": { + "node": ">=18" + }, + "type": "module", + "scripts": { + "start": "dotenv -e .env -- tsx ./src/run.ts" + }, + "license": "MIT", + "dependencies": { + "@autoblocks/client": "^0.0.33", + "dotenv-cli": "^7.3.0", + "openai": "^4.6.0", + "tsx": "^4.1.3", + "typescript": "^5.2.2", + "zod": "^3.22.4" + } +} diff --git a/JavaScript/testing-sdk/src/evaluators/has-substrings.ts b/JavaScript/testing-sdk/src/evaluators/has-substrings.ts new file mode 100644 index 00000000..6cda254b --- /dev/null +++ b/JavaScript/testing-sdk/src/evaluators/has-substrings.ts @@ -0,0 +1,32 @@ +import { BaseTestEvaluator, type Threshold } from '@autoblocks/client/testing'; + +export abstract class BaseHasSubstrings extends BaseTestEvaluator { + id = 'has-substrings'; + + /** + * Subclasses should implement this method to return the expected substrings + * for the given test case and output. + * + * Both the test case and output are made available to the subclass to + * accommodate scenarios where the expected substrings are derived from: + * + * - The test case + * - The output + * - Both + */ + abstract expectedSubstrings(args: { testCase: T; output: O }): string[]; + + /** + * Subclasses should implement this method to convert the output to a string. + */ + abstract outputAsString(output: O): string; + + evaluateTestCase(args: { testCase: T; output: O }) { + const expectedSubstrings = this.expectedSubstrings(args); + const outputAsStr = this.outputAsString(args.output); + const score = expectedSubstrings.every((s) => outputAsStr.includes(s)) + ? 1 + : 0; + return { score, threshold: { gte: 1 } }; + } +} diff --git a/JavaScript/testing-sdk/src/run.ts b/JavaScript/testing-sdk/src/run.ts new file mode 100644 index 00000000..8460339c --- /dev/null +++ b/JavaScript/testing-sdk/src/run.ts @@ -0,0 +1,6 @@ +import * as flashcardGenerator from './test-suites/flashcard-generator'; +import * as studyGuideOutline from './test-suites/study-guide-outline'; + +(async () => { + await Promise.all([flashcardGenerator.run(), studyGuideOutline.run()]); +})(); diff --git a/JavaScript/testing-sdk/src/tasks/flashcard-generator.ts b/JavaScript/testing-sdk/src/tasks/flashcard-generator.ts new file mode 100644 index 00000000..8cb38472 --- /dev/null +++ b/JavaScript/testing-sdk/src/tasks/flashcard-generator.ts @@ -0,0 +1,113 @@ +import { z } from 'zod'; + +import OpenAI from 'openai'; + +const openai = new OpenAI(); + +const zFlashcardSchema = z.object({ + front: z.string(), + back: z.string(), +}); + +export type Flashcard = z.infer; + +const systemPrompt = `Given a user's notes, generate flashcards that will allow the user to study those notes. + +Your first task is to identify the facts or key points in the notes. +Then, create a flashcard for each fact or key point. +The front of the flashcard should be a question, and the back of the flashcard should be the answer to that question. +Each flashcard should be supported by content from the notes. +Ignore the tone of the notes and always make the flashcards in a professional tone. +Ignore any subjective commentary in the notes and only focus on the facts or key points. +Return the results as JSON in the below format: + +\`\`\` +{ + "cards": [ + { + "front": "What is the capital of France?", + "back": "Paris" + }, + { + "front": "Who painted the Mona Lisa?", + "back": "Leonardo da Vinci" + } + ] +} +\`\`\` + +Only return JSON in your response, nothing else. Do not include the backticks. + +Example: + +Notes: + +''' +Am. History Notes πŸ‡ΊπŸ‡Έ +Beginnings & Stuff +Columbus 1492, "found" America but actually not the first. +Native Americans were here first, tons of diff cultures. +Colonies & Things +13 Colonies cuz Brits wanted $ and land. +Taxation w/o Representation = Colonists mad at British taxes, no say in gov. +Boston Tea Party = Tea in the harbor, major protest. +Revolution Time +Declaration of Independence, 1776, basically "we're breaking up with you, Britain". +George Washington = First pres, war hero. +Moving West +Manifest Destiny = Idea that the US was supposed to own all land coast to coast. +Louisiana Purchase, 1803, Thomas Jefferson bought a ton of land from France. +''' + +Flashcards: + +{ + "cards": [ + { + "front": "Who was the first president of the United States?", + "back": "George Washington" + }, + { + "front": "What was the idea that the US was supposed to own all land coast to coast?", + "back": "Manifest Destiny" + }, + { + "front": "What was the year of the Louisiana Purchase?", + "back": "1803" + } + ] +} +`; + +function makeUserPrompt(notes: string): string { + return `Notes: + +''' +${notes} +''' + +Flashcards:`; +} + +export async function genFlashcardsFromNotes( + notes: string, +): Promise { + const response = await openai.chat.completions.create({ + model: 'gpt-3.5-turbo-1106', + temperature: 0.0, + response_format: { type: 'json_object' }, + messages: [ + { + role: 'system', + content: systemPrompt, + }, + { + role: 'user', + content: makeUserPrompt(notes), + }, + ], + }); + const rawContent = response.choices[0].message.content.trim(); + const parsedContent = JSON.parse(rawContent); + return z.array(zFlashcardSchema).parse(parsedContent.cards); +} diff --git a/JavaScript/testing-sdk/src/tasks/study-guide-outline.ts b/JavaScript/testing-sdk/src/tasks/study-guide-outline.ts new file mode 100644 index 00000000..d2695030 --- /dev/null +++ b/JavaScript/testing-sdk/src/tasks/study-guide-outline.ts @@ -0,0 +1,32 @@ +import OpenAI from 'openai'; + +const openai = new OpenAI(); + +const systemPrompt = `Generate a study guide outline for a given topic. +It should be a bulleted list with just the title of each category. +The top level bullets should be stars: * +The second level bullets should be dashes: - +The second level dashes should have two spaces before them. +The study guide should be no more than two levels deep. +There should be between five and ten top-level categories.`; + +export async function genStudyGuideOutline(topic: string): Promise { + const resp = await openai.chat.completions.create({ + model: 'gpt-3.5-turbo-1106', + temperature: 0.5, + max_tokens: 1_000, + n: 1, + messages: [ + { + role: 'system', + content: systemPrompt, + }, + { + role: 'user', + content: `Topic: ${topic}`, + }, + ], + }); + + return resp.choices[0].message.content.trim(); +} diff --git a/JavaScript/testing-sdk/src/test-suites/flashcard-generator/evaluators.ts b/JavaScript/testing-sdk/src/test-suites/flashcard-generator/evaluators.ts new file mode 100644 index 00000000..411ad7db --- /dev/null +++ b/JavaScript/testing-sdk/src/test-suites/flashcard-generator/evaluators.ts @@ -0,0 +1,152 @@ +import type { Flashcard } from '../../tasks/flashcard-generator'; +import type { TestCase } from './test-cases'; +import { BaseTestEvaluator, Evaluation } from '@autoblocks/client/testing'; +import OpenAI from 'openai'; + +const openai = new OpenAI(); + +export class IsProfessionalTone extends BaseTestEvaluator< + TestCase, + Flashcard[] +> { + id = 'is-professional-tone'; + + prompt = `Please evaluate the provided text for its professionalism in the context of formal communication. +Consider the following criteria in your assessment: + +Language Use: Formality, clarity, and precision of language without slang or casual expressions. +Sentence Structure: Logical and well-formed sentence construction without run-ons or fragments. +Tone and Style: Respectful, objective, and appropriately formal tone without bias or excessive emotionality. +Grammar and Punctuation: Correct grammar, punctuation, and capitalization. +Based on these criteria, provide a binary response where: + +0 indicates the text does not maintain a professional tone. +1 indicates the text maintains a professional tone. +No further explanation or summary is required; just provide the number that represents your assessment.`; + + async scoreFlashcard(flashcard: Flashcard): Promise { + const content = `${flashcard.front}\n${flashcard.back}`; + + const response = await openai.chat.completions.create({ + model: 'gpt-3.5-turbo-1106', + temperature: 0.0, + n: 1, + max_tokens: 1, + messages: [ + { + role: 'system', + content: this.prompt, + }, + { + role: 'user', + content: content, + }, + ], + }); + + const rawContent = response.choices[0].message.content.trim(); + + if (rawContent === '0') { + return 0; + } else if (rawContent === '1') { + return 1; + } + + throw new Error(`Unexpected response: ${rawContent}`); + } + + async evaluateTestCase(args: { + testCase: TestCase; + output: Flashcard[]; + }): Promise { + // Score each flashcard asynchronously + const scores = await Promise.all( + args.output.map((flashcard) => this.scoreFlashcard(flashcard)), + ); + + if (!scores.length) { + throw new Error('No scores were returned'); + } + + // Return the average score as the evaluation score + return { score: scores.reduce((a, b) => a + b, 0) / scores.length }; + } +} + +export class IsSupportedByNotes extends BaseTestEvaluator< + TestCase, + Flashcard[] +> { + id = 'is-supported-by-notes'; + + prompt = `Given some notes by a student and a flashcard in the form of a question and answer, evaluate whether the flashcard's question and answer are supported by the notes. +It's possible the question and answer aren't in the notes verbatim. +If the notes provide enough context or information to support the question and answer, consider that sufficient support. +Based on these criteria, provide a binary response where: +0 indicates the flashcard's question and answer are not supported by the notes. +1 indicates the flashcard's question and answer are supported by the notes. +No further explanation or summary is required; just provide the number that represents your assessment.`; + + async scoreFlashcard(args: { + testCase: TestCase; + flashcard: Flashcard; + }): Promise { + const content = `Notes: + + ''' + ${args.testCase.notes} + ''' + + Flashcard: + + Question: ${args.flashcard.front} + Answer: ${args.flashcard.back} + `; + + const response = await openai.chat.completions.create({ + model: 'gpt-3.5-turbo-1106', + temperature: 0.0, + n: 1, + max_tokens: 1, + messages: [ + { + role: 'system', + content: this.prompt, + }, + { + role: 'user', + content: content, + }, + ], + }); + + const rawContent = response.choices[0].message.content.trim(); + + if (rawContent === '0') { + return 0; + } else if (rawContent === '1') { + return 1; + } + + throw new Error(`Unexpected response: ${rawContent}`); + } + + async evaluateTestCase(args: { + testCase: TestCase; + output: Flashcard[]; + }): Promise { + // Score each flashcard asynchronously + const scores = await Promise.all( + args.output.map((flashcard) => + this.scoreFlashcard({ testCase: args.testCase, flashcard }), + ), + ); + + if (!scores.length) { + throw new Error('No scores were returned'); + } + + // Return the average score as the evaluation score + return { score: scores.reduce((a, b) => a + b, 0) / scores.length }; + } +} diff --git a/JavaScript/testing-sdk/src/test-suites/flashcard-generator/index.ts b/JavaScript/testing-sdk/src/test-suites/flashcard-generator/index.ts new file mode 100644 index 00000000..f2797ab1 --- /dev/null +++ b/JavaScript/testing-sdk/src/test-suites/flashcard-generator/index.ts @@ -0,0 +1,20 @@ +import { runTestSuite } from '@autoblocks/client/testing'; +import { genTestCases, type TestCase } from './test-cases'; +import { IsProfessionalTone, IsSupportedByNotes } from './evaluators'; +import { + genFlashcardsFromNotes, + type Flashcard, +} from '../../tasks/flashcard-generator'; + +export async function run() { + await runTestSuite({ + id: 'flashcard-generator', + testCases: genTestCases(), + testCaseHash: ['notes'], + evaluators: [new IsProfessionalTone(), new IsSupportedByNotes()], + fn: (args: { testCase: TestCase }) => + genFlashcardsFromNotes(args.testCase.notes), + maxTestCaseConcurrency: 5, + maxEvaluatorConcurrency: 1, + }); +} diff --git a/JavaScript/testing-sdk/src/test-suites/flashcard-generator/test-cases.ts b/JavaScript/testing-sdk/src/test-suites/flashcard-generator/test-cases.ts new file mode 100644 index 00000000..8454064b --- /dev/null +++ b/JavaScript/testing-sdk/src/test-suites/flashcard-generator/test-cases.ts @@ -0,0 +1,126 @@ +export interface TestCase { + notes: string; +} + +export function genTestCases(): TestCase[] { + return [ + { + notes: `Bio 101 Notes +Cells n stuff +Cells are like, the smallest thingies that are alive. +Some old dude named Hooke found them in 1665 by looking at cork. +2 kinds: Prokaryotic (no nucleus, think bacteria) & Eukaryotic (has a nucleus, like us and plants). +Cell Theory (important!!) +Everything alive = made of cells. +Cells = life's basic unit. +New cells come from old ones. +Parts of a Cell (the bits and pieces) +Cell Membrane: kinda like a bouncer, decides what gets in and out. +Nucleus: boss of the cell, has all the DNA. +Mitochondria: power station, makes energy. +Ribosomes: tiny factories for making proteins. +ER stuff: +Rough ER has ribosomes, makes proteins. +Smooth ER is like, no ribosomes, makes fats. +Golgi Thingy: packages proteins. +Lysosomes: trash disposals for cells. +Plants have extra stuff: +Chloroplasts for catching sunlight. +Cell Wall for extra toughness. +Membrane and Moving Stuff +Phospholipid bilayer = fancy term for the cell membrane structure. +It's picky about what it lets in/out. +Doing Things (Cellular Processes) +Photosynthesis: Only in plants, turns sunlight to food. +Breathing in Cells (Respiration): Turning food & O2 into energy. +Cell Division: Mitosis (for growing and fixing) & Meiosis (making baby cells). +DNA & Genes +DNA = double helix thing, basically the recipe book for making you. +Genes = specific recipes for traits like eye color. +Evolution (Darwin's big idea) +Survival of the fittest. +Animals change over time to become better at surviving. +Random Notes: +Need to remember: Cell wall = plants only. +Mitochondria and chloroplasts have their own DNA?? Check this. +DNA to protein = transcription and translation (need to clarify). +Why does rough ER look bumpy under a microscope? Oh, because of ribosomes. +Evolution examples for exam?`, + }, + { + notes: `Eng Lit Notes +Random Stuff on Books & Authors +Shakespeare (Big Deal) + +Wrote plays and sonnets. +Old English (hard to read lol). +Famous stuff: "Romeo & Juliet", "Hamlet", "Macbeth". +Themes: love, power, betrayal, the supernatural. +Chaucer's "Canterbury Tales" + +Super old stories, like medieval road trip. +Different people telling tales, some funny, some serious. +Middle English (even harder to read). +American Lit Bits + +Mark Twain: "Huckleberry Finn" = kid on a raft, talks about racism, freedom. +F. Scott Fitzgerald: "The Great Gatsby", 1920s jazz age, American Dream is kinda questioned. +Poetry Stuff +Poems = lots of feelings in few words. +Rhyme, rhythm, metaphors. +Emily Dickinson: Weird punctuation, lots of dashes, wrote about death and nature. +Robert Frost: "The Road Not Taken", about choices and life paths. +Modern Stuff (Kinda) +"To Kill a Mockingbird" by Harper Lee: Racism, growing up, the South. +"1984" by George Orwell: Creepy government watching everyone. +"The Catcher in the Rye" by J.D. Salinger: Teen angst, rebellion. +Themes & Symbols +Symbols: Stuff in books that stands for other stuff. Like, a road in a poem might not just be a road. +Themes: Big ideas in a story. Freedom, identity, conflict, etc. +Notes to Self: +Shakespeare invented a ton of words, look up some. +Need examples of irony from "The Great Gatsby". +What the heck is iambic pentameter again? +Look up what "postmodernism" means. +Remember to find quotes for essay on "Mockingbird". +Random Thoughts: +Why do all old books have to be tragic? +Need to watch some Shakespeare adaptations to get it better. +Symbols in "The Great Gatsby"? Green light = dream?? +Is every old poem about death or what?`, + }, + { + notes: `Early Stuff +Stonehenge: Big rocks in a circle, super old, no one knows why they did it. +Romans: Came, saw, conquered. Left a bunch of baths and walls (Hadrian's Wall). +Medieval Mayhem +1066: Normans (French guys) invade, William the Conqueror becomes king. +Magna Carta (1215): King John forced to sign it, basically "Kings can't do whatever they want." +Wars & Plagues +100 Years War: England vs. France, forever fighting. +Black Death: Wipes out like half the population. Seriously bad. +Tudor Drama +Henry VIII: Marries a bunch of women, starts his own church (Church of England) because the Pope won't let him divorce. +Elizabeth I: Virgin Queen, beats the Spanish Armada, arts and theatre flourish (Shakespeare time). +Civil War & The Commonwealth +1642-1651: Civil War, Charles I loses his head, literally. +Oliver Cromwell: Becomes "Lord Protector", basically a dictator but not called a king. +Restoration to Revolution +1660: Monarchy's back with Charles II. +1688: Glorious Revolution, William of Orange takes over, more power to Parliament. +Industrial Revolution +18th-19th Century: Everything changes, factories everywhere, British Empire expands big time. +20th Century Stuff +WWI & WWII: Major world wars, lots of impact. +Decolonization: Empire shrinks, countries gain independence. +Modern Bits +EU & Brexit: Joining and leaving the European Union. +Monarchs: From Elizabeth II to Charles III, royal family drama continues. +Random Thoughts: +Why so many Henrys and Edwards? +Need to remember dates for exams (ugh). +The industrial revolution = coal, steam, and smog. +How did Britain end up ruling so much of the world?`, + }, + ]; +} diff --git a/JavaScript/testing-sdk/src/test-suites/study-guide-outline/evaluators.ts b/JavaScript/testing-sdk/src/test-suites/study-guide-outline/evaluators.ts new file mode 100644 index 00000000..7f31e0ca --- /dev/null +++ b/JavaScript/testing-sdk/src/test-suites/study-guide-outline/evaluators.ts @@ -0,0 +1,74 @@ +import { BaseHasSubstrings } from '../../evaluators/has-substrings'; +import { BaseTestEvaluator } from '@autoblocks/client/testing'; +import type { TestCase } from './test-cases'; + +export class Formatting extends BaseTestEvaluator { + id = 'formatting'; + + /** + * Every line should either be blank or start with "* " or " - " + */ + score(output: string): number { + for (const line of output.split('\n')) { + const conditions: boolean[] = [ + line.trim() === '', + line.startsWith('* '), + line.startsWith(' - '), + ]; + if (!conditions.some((c) => c)) { + return 0; + } + } + return 1; + } + + evaluateTestCase(args: { testCase: TestCase; output: string }) { + return { + score: this.score(args.output), + threshold: { + gte: 1, + }, + }; + } +} + +export class NumCategories extends BaseTestEvaluator { + id = 'num-categories'; + + minCategories = 5; + maxCategories = 10; + + score(output: string): number { + const numCategories = output + .split('\n') + .filter((l) => l.startsWith('* ')).length; + if ( + numCategories >= this.minCategories && + numCategories <= this.maxCategories + ) { + return 1; + } + return 0; + } + + evaluateTestCase(args: { testCase: TestCase; output: string }) { + return { + score: this.score(args.output), + threshold: { + gte: 1, + }, + }; + } +} + +export class HasSubstrings extends BaseHasSubstrings { + id = 'has-substrings'; + + expectedSubstrings(args: { testCase: TestCase; output: string }): string[] { + return args.testCase.expectedSubstrings; + } + + outputAsString(output: string): string { + return output; + } +} diff --git a/JavaScript/testing-sdk/src/test-suites/study-guide-outline/index.ts b/JavaScript/testing-sdk/src/test-suites/study-guide-outline/index.ts new file mode 100644 index 00000000..fc9769fe --- /dev/null +++ b/JavaScript/testing-sdk/src/test-suites/study-guide-outline/index.ts @@ -0,0 +1,17 @@ +import { runTestSuite } from '@autoblocks/client/testing'; +import { genTestCases, type TestCase } from './test-cases'; +import { Formatting, NumCategories, HasSubstrings } from './evaluators'; +import { genStudyGuideOutline } from '../../tasks/study-guide-outline'; + +export async function run() { + await runTestSuite({ + id: 'study-guide-outline', + testCases: genTestCases(), + testCaseHash: ['topic'], + evaluators: [new Formatting(), new NumCategories(), new HasSubstrings()], + fn: (args: { testCase: TestCase }) => + genStudyGuideOutline(args.testCase.topic), + maxTestCaseConcurrency: 5, + maxEvaluatorConcurrency: 2, + }); +} diff --git a/JavaScript/testing-sdk/src/test-suites/study-guide-outline/test-cases.ts b/JavaScript/testing-sdk/src/test-suites/study-guide-outline/test-cases.ts new file mode 100644 index 00000000..b263d72e --- /dev/null +++ b/JavaScript/testing-sdk/src/test-suites/study-guide-outline/test-cases.ts @@ -0,0 +1,29 @@ +export interface TestCase { + topic: string; + expectedSubstrings: string[]; +} + +export function genTestCases(): TestCase[] { + return [ + { + topic: 'Introduction to Organic Chemistry', + expectedSubstrings: ['Functional Groups'], + }, + { + topic: 'Fundamentals of Calculus', + expectedSubstrings: ['Derivatives', 'Differentiation'], + }, + { + topic: 'World History: Ancient Civilizations', + expectedSubstrings: ['Mesopotamia', 'Egypt'], + }, + { + topic: 'Basics of Programming in Python', + expectedSubstrings: ['Syntax', 'Variables', 'Functions'], + }, + { + topic: 'Principles of Economics', + expectedSubstrings: ['Microeconomics', 'Macroeconomics'], + }, + ]; +} diff --git a/JavaScript/testing-sdk/tsconfig.json b/JavaScript/testing-sdk/tsconfig.json new file mode 100644 index 00000000..d5e42c74 --- /dev/null +++ b/JavaScript/testing-sdk/tsconfig.json @@ -0,0 +1,8 @@ +{ + "compilerOptions": { + "moduleResolution": "node", + "module": "esnext", + "target": "esnext" + }, + "include": ["src/**/*"] +} diff --git a/Python/testing-sdk-with-prompt-sdk/README.md b/Python/testing-sdk-with-prompt-sdk/README.md index 1b55a023..799edcb0 100644 --- a/Python/testing-sdk-with-prompt-sdk/README.md +++ b/Python/testing-sdk-with-prompt-sdk/README.md @@ -298,7 +298,7 @@ on a daily schedule. ``` my_project/ - run.py <-- imports all tests from tests/ and runs them + run.py <-- imports all tests from test_suites/ and runs them evaluators/ <-- all common evaluators are implemented here some_shared_evaluator1.py some_shared_evaluator2.py diff --git a/Python/testing-sdk/README.md b/Python/testing-sdk/README.md index 9edd936b..d7116392 100644 --- a/Python/testing-sdk/README.md +++ b/Python/testing-sdk/README.md @@ -76,7 +76,7 @@ on a daily schedule. ``` my_project/ - run.py <-- imports all tests from tests/ and runs them + run.py <-- imports all tests from test_suites/ and runs them evaluators/ <-- all common evaluators are implemented here some_shared_evaluator1.py some_shared_evaluator2.py diff --git a/README.md b/README.md index 049267c6..a4d0a436 100644 --- a/README.md +++ b/README.md @@ -44,6 +44,7 @@ | [prompt-sdk-headless](/JavaScript/prompt-sdk-headless) | Typesafe and backwards-compatability-ensured headless prompt management | | [prompt-sdk-local](/JavaScript/prompt-sdk-local) | Automated prompt versioning and typesafe prompt building with the local prompt SDK | | [spans](/JavaScript/spans) | Establish parent / child relationships between your events with the `spanId` and `parentSpanId` properties | +| [testing-sdk](/JavaScript/testing-sdk) | Using the Autoblocks Testing SDK with the Prompt SDK for experimentation, regression testing, and rapid prototyping. |