Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add benchmarks API-Bank, APIBench, Nexus #1136

Merged
merged 48 commits into from
Dec 29, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
713a2d2
add benchmark gorilla, nexus
HHHHHejia Oct 30, 2024
9398ffb
add apibank
HHHHHejia Nov 1, 2024
f8ba0d7
Merge branch 'master' into benchmark_hejia
harryeqs Nov 19, 2024
5352cea
Merge branch 'master' into benchmark_hejia
harryeqs Nov 24, 2024
f6d3436
Merge branch 'master' into benchmark_hejia
harryeqs Nov 25, 2024
638dc57
Convert gorilla and nexusraven to regular directories
HHHHHejia Nov 26, 2024
679225e
Merge branch 'master' into benchmark_hejia
harryeqs Dec 2, 2024
1c113d1
refactor: Constructed NexusBenchmark following BaseBenchmark from GAI…
harryeqs Dec 4, 2024
ca1e490
Merge branch 'master' into benchmark_hejia
harryeqs Dec 4, 2024
b5a7d0b
Merge branch 'master' into benchmark_hejia
harryeqs Dec 5, 2024
9083048
refactor: Refactored the integration of APIBench (Gorilla) benchmark
harryeqs Dec 6, 2024
1b151e0
Merge branch 'master' into benchmark_hejia
harryeqs Dec 6, 2024
14ee80a
refactor: Modified code structure
harryeqs Dec 7, 2024
42b7407
refactor: Integrated APIBank
harryeqs Dec 8, 2024
f908681
refactor: Change directory name for smoother merge
harryeqs Dec 8, 2024
9a77c78
Merge branch 'master' into benchmark_hejia
harryeqs Dec 8, 2024
0d71313
docs: Update docs and put into benchmarks directory
harryeqs Dec 9, 2024
7745c37
docs: Included examples
harryeqs Dec 9, 2024
8ad922a
update poetry lock
harryeqs Dec 9, 2024
acd676c
fix: Fix tree_sitter_import issue
harryeqs Dec 9, 2024
1511711
Merge branch 'master' into benchmark_hejia
harryeqs Dec 9, 2024
5a26acf
update poetry lock
harryeqs Dec 9, 2024
68c4e30
update poetry lock
harryeqs Dec 9, 2024
7c427ce
clean code for upcoming refactoring
harryeqs Dec 10, 2024
4730c16
Merge branch 'master' into benchmark_hejia
harryeqs Dec 10, 2024
bc057ab
fix tests
harryeqs Dec 11, 2024
0cce601
Merge branch 'benchmark_hejia' of https://github.com/HHHHHejia/camel …
harryeqs Dec 11, 2024
b757d8d
updated download method for APIBank benchmark
harryeqs Dec 11, 2024
03221e1
updated APIBench download and eval_ast
harryeqs Dec 11, 2024
2f52186
Merge branch 'master' into benchmark_hejia
harryeqs Dec 11, 2024
0e942ef
updated docstrings
harryeqs Dec 12, 2024
35f2afa
Merge branch 'master' into benchmark_hejia
harryeqs Dec 12, 2024
ad777cf
updated docstrings
harryeqs Dec 12, 2024
ded55bf
Merge branch 'master' into benchmark_hejia
harryeqs Dec 14, 2024
83805f0
update type annotations
harryeqs Dec 14, 2024
f534efe
clean code and update docstrings
harryeqs Dec 15, 2024
1eab6e3
Merge branch 'master' into benchmark_hejia
harryeqs Dec 15, 2024
34e33cf
Merge branch 'master' into benchmark_hejia
harryeqs Dec 16, 2024
1d280a9
Merge branch 'master' into benchmark_hejia
harryeqs Dec 17, 2024
41fe3ed
Merge branch 'master' into benchmark_hejia
harryeqs Dec 18, 2024
9a0dba3
update pyproject.toml and poetry.lock
harryeqs Dec 18, 2024
a367501
Merge branch 'master' into benchmark_hejia
harryeqs Dec 19, 2024
a5127f9
improve structure and add unit tests
harryeqs Dec 23, 2024
511fbe0
Merge branch 'master' into benchmark_hejia
harryeqs Dec 23, 2024
f3da00c
resolve conflicts
harryeqs Dec 23, 2024
8907b94
update poetry.lock
harryeqs Dec 23, 2024
c38f619
Merge branch 'master' into benchmark_hejia
harryeqs Dec 27, 2024
d2b7e9d
Merge branch 'master' into benchmark_hejia
Wendong-Fan Dec 29, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
update type annotations
  • Loading branch information
harryeqs committed Dec 14, 2024
commit 83805f0ef4b0b2ea2154e4ce6db857799fa223ab
14 changes: 12 additions & 2 deletions camel/benchmarks/apibank.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,12 +100,21 @@ def download_github_subdirectory(
sys.path.insert(0, self.data_dir)
logger.info("Download completed.")

def load(self, level):
def load(self, force_download: bool = False):
r"""Load the APIBank Benchmark dataset.

Args:
level: Level to run benchmark on.
force_download (bool, optional): Whether to
force download the data.
"""

level = self._level
harryeqs marked this conversation as resolved.
Show resolved Hide resolved

if force_download:
logger.info("Force downloading data.")
self.download()

if level == "level-1":
file_path = Path("api_bank/lv1-lv2-samples/level-1-given-desc")
elif level == 'level-2':
Expand Down Expand Up @@ -197,7 +206,8 @@ def run( # type: ignore[override, return]
rougel_scores = []

logger.info(f"Running APIBench benchmark on {level}.")
self.load(level)
self._level = level
self.load()
datas = self._data

# Shuffle and subset data if necessary
Expand Down
72 changes: 48 additions & 24 deletions camel/benchmarks/apibench.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import os
import random
import requests
from pathlib import Path
from typing import Any, Dict, Literal, Optional

from tqdm import tqdm
Expand Down Expand Up @@ -53,7 +54,7 @@

# This function is migrated from the original repo:
# https://github.com/ShishirPatil/gorilla
def encode_question(question, dataset_name):
def encode_question(question: str, dataset_name: str) -> str:
r"""Encode multiple prompt instructions into a single string."""

if dataset_name == "torchhub":
Expand Down Expand Up @@ -204,38 +205,60 @@ def download_github_subdirectory(

download_github_subdirectory(repo, subdir)

def load(self, dataset_name):
def load(self, force_download: bool = False):
r"""Load the APIBench Benchmark dataset.

Args:
dataset_name: Name of the dataset to be loaded.
force_download (bool, optional): Whether to
force download the data.
"""
dataset_name = self._dataset_name

if force_download:
logger.info("Force downloading data.")
self.download()

def load_json_lines(file_path: Path):
"""Helper function to load JSON lines from a file."""
try:
with open(file_path, "r") as f:
return [json.loads(line) for line in f]
except FileNotFoundError:
raise FileNotFoundError(f"File not found: {file_path}")
except json.JSONDecodeError as e:
raise ValueError(
f"Error decoding JSON in file {file_path}: {e}"
)

dataset_path = self.data_dir / dataset_name
if not dataset_path.exists():
raise FileNotFoundError(
f"Dataset directory does not exist: {dataset_path}"
)

for label in ['api', 'eval', 'questions']:
file_name = dataset_mapping[dataset_name][label]
if label == 'questions':
file_path = self.data_dir / dataset_name / file_name
questions = []
with open(file_path, "r") as f:
for line in f:
questions.append(json.loads(line))
self._data[label] = questions
if label == 'api':
file_path = self.data_dir / file_name
api_database = []
with open(file_path, "r") as f:
for line in f:
api_database.append(json.loads(line))
self._data[label] = api_database
elif label == 'eval':
file_path = self.data_dir / file_name
data = []
with open(file_path, "r") as f:
for line in f:
data.append(json.loads(line)['api_data'])
file_path = (
dataset_path / file_name
if label == 'questions'
else self.data_dir / file_name
)

# Load data based on label type
if label in ['api', 'questions', 'eval']:
data = load_json_lines(file_path)

if label == 'eval':
# Extract 'api_data' specifically for eval label
data = [item['api_data'] for item in data]

self._data[label] = data
else:
raise ValueError(f"Unknown label: {label}")

ast_database = []
for data in api_database:
for data in self._data['api']:
ast_tree = ast_parse(data['api_call'])
ast_database.append(ast_tree)
self._data['ast'] = ast_database
Expand Down Expand Up @@ -263,7 +286,8 @@ def run( # type: ignore[override]
raise ValueError(f"Invalid value for dataset: {dataset}.")

logger.info(f"Running APIBench benchmark on {dataset}.")
self.load(dataset)
self._dataset_name = dataset
self.load()
datas = self._data['questions']

# Shuffle and subset data if necessary
Expand Down
Loading
Loading