AutoGPTs Nightly Benchmark #66
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: AutoGPTs Nightly Benchmark | |
on: | |
workflow_dispatch: | |
schedule: | |
- cron: '0 2 * * *' | |
jobs: | |
benchmark: | |
permissions: | |
contents: write | |
runs-on: ubuntu-latest | |
strategy: | |
matrix: | |
agent-name: [ autogpt ] | |
fail-fast: false | |
timeout-minutes: 120 | |
env: | |
min-python-version: '3.10' | |
REPORTS_BRANCH: data/benchmark-reports | |
REPORTS_FOLDER: ${{ format('benchmark/reports/{0}', matrix.agent-name) }} | |
steps: | |
- name: Checkout repository | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: true | |
- name: Set up Python ${{ env.min-python-version }} | |
uses: actions/setup-python@v5 | |
with: | |
python-version: ${{ env.min-python-version }} | |
- name: Install Poetry | |
run: curl -sSL https://install.python-poetry.org | python - | |
- name: Prepare reports folder | |
run: mkdir -p ${{ env.REPORTS_FOLDER }} | |
- run: poetry -C benchmark install | |
- name: Benchmark ${{ matrix.agent-name }} | |
run: | | |
./run agent start ${{ matrix.agent-name }} | |
cd ${{ matrix.agent-name }} | |
set +e # Do not quit on non-zero exit codes | |
poetry run agbenchmark run -N 3 \ | |
--test=ReadFile \ | |
--test=BasicRetrieval --test=RevenueRetrieval2 \ | |
--test=CombineCsv --test=LabelCsv --test=AnswerQuestionCombineCsv \ | |
--test=UrlShortener --test=TicTacToe --test=Battleship \ | |
--test=WebArenaTask_0 --test=WebArenaTask_21 --test=WebArenaTask_124 \ | |
--test=WebArenaTask_134 --test=WebArenaTask_163 | |
# Convert exit code 1 (some challenges failed) to exit code 0 | |
if [ $? -eq 0 ] || [ $? -eq 1 ]; then | |
exit 0 | |
else | |
exit $? | |
fi | |
env: | |
AGENT_NAME: ${{ matrix.agent-name }} | |
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} | |
REQUESTS_CA_BUNDLE: /etc/ssl/certs/ca-certificates.crt | |
REPORTS_FOLDER: ${{ format('../../{0}', env.REPORTS_FOLDER) }} # account for changed workdir | |
TELEMETRY_ENVIRONMENT: autogpt-benchmark-ci | |
TELEMETRY_OPT_IN: ${{ github.ref_name == 'master' }} | |
- name: Push reports to data branch | |
run: | | |
# BODGE: Remove success_rate.json and regression_tests.json to avoid conflicts on checkout | |
rm ${{ env.REPORTS_FOLDER }}/*.json | |
# Find folder with newest (untracked) report in it | |
report_subfolder=$(find ${{ env.REPORTS_FOLDER }} -type f -name 'report.json' \ | |
| xargs -I {} dirname {} \ | |
| xargs -I {} git ls-files --others --exclude-standard {} \ | |
| xargs -I {} dirname {} \ | |
| sort -u) | |
json_report_file="$report_subfolder/report.json" | |
# Convert JSON report to Markdown | |
markdown_report_file="$report_subfolder/report.md" | |
poetry -C benchmark run benchmark/reports/format.py "$json_report_file" > "$markdown_report_file" | |
cat "$markdown_report_file" >> $GITHUB_STEP_SUMMARY | |
git config --global user.name 'GitHub Actions' | |
git config --global user.email '[email protected]' | |
git fetch origin ${{ env.REPORTS_BRANCH }}:${{ env.REPORTS_BRANCH }} \ | |
&& git checkout ${{ env.REPORTS_BRANCH }} \ | |
|| git checkout --orphan ${{ env.REPORTS_BRANCH }} | |
git reset --hard | |
git add ${{ env.REPORTS_FOLDER }} | |
git commit -m "Benchmark report for ${{ matrix.agent-name }} @ $(date +'%Y-%m-%d')" \ | |
&& git push origin ${{ env.REPORTS_BRANCH }} |