Skip to content

Commit

Permalink
[WIP] feat: update to use azure api (#340)
Browse files Browse the repository at this point in the history
* feat: update to use azure api

* feat: update mathvista with azure model as judge

* feat: update mathverse

* Update llava-in-the-wild.yaml

* lint: update
  • Loading branch information
Luodian authored Oct 27, 2024
1 parent f5f59c8 commit bc2899c
Show file tree
Hide file tree
Showing 6 changed files with 93 additions and 13 deletions.
21 changes: 14 additions & 7 deletions lmms_eval/api/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -891,6 +891,7 @@ def _download_from_youtube(path):
accelerator = Accelerator()
if accelerator.is_main_process:
dataset_kwargs.pop("From_YouTube")
assert "load_from_disk" not in dataset_kwargs, "load_from_disk must not be True when From_YouTube is True"
self.all_dataset = datasets.load_dataset(
path=self.DATASET_PATH,
name=self.DATASET_NAME,
Expand Down Expand Up @@ -1033,13 +1034,19 @@ def concat_tar_parts(tar_parts, output_tar):
if "create_link" in dataset_kwargs:
dataset_kwargs.pop("create_link")

self.dataset = datasets.load_dataset(
path=self.DATASET_PATH,
name=self.DATASET_NAME,
download_mode=datasets.DownloadMode.REUSE_DATASET_IF_EXISTS,
download_config=download_config,
**dataset_kwargs if dataset_kwargs is not None else {},
)
if "load_from_disk" in dataset_kwargs and dataset_kwargs["load_from_disk"]:
dataset_kwargs.pop("load_from_disk")
# using local task in offline environment, need to process the online dataset into local format via
# `ds = load_datasets("lmms-lab/MMMU")`
self.dataset = datasets.load_from_disk(path=self.DATASET_PATH, name=self.DATASET_NAME)
else:
self.dataset = datasets.load_dataset(
path=self.DATASET_PATH,
name=self.DATASET_NAME,
download_mode=datasets.DownloadMode.REUSE_DATASET_IF_EXISTS,
download_config=download_config,
**dataset_kwargs if dataset_kwargs is not None else {},
)

if self.config.process_docs is not None:
for split in self.dataset:
Expand Down
3 changes: 3 additions & 0 deletions lmms_eval/tasks/llava-in-the-wild/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,9 @@ def get_eval(content: str, max_tokens: int, retries: int = 5):
"max_tokens": max_tokens,
}

if API_TYPE == "azure":
payload.pop("model")

for attempt in range(retries):
try:
response = requests.post(API_URL, headers=headers, json=payload, timeout=60)
Expand Down
20 changes: 18 additions & 2 deletions lmms_eval/tasks/mathverse/mathverse_evals.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,27 @@


class MathVerseEvaluator:
API_URL = "https://api.openai.com/v1/chat/completions"
API_TYPE = os.getenv("API_TYPE", "openai")

def __init__(self, api_key, gpt_model="gpt-3.5-turbo"):
if API_TYPE == "openai":
API_URL = os.getenv("OPENAI_API_URL", "https://api.openai.com/v1/chat/completions")
API_KEY = os.getenv("OPENAI_API_KEY", "YOUR_API_KEY")
headers = {
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json",
}
elif API_TYPE == "azure":
API_URL = os.getenv("AZURE_ENDPOINT", "https://api.cognitive.microsoft.com/sts/v1.0/issueToken")
API_KEY = os.getenv("AZURE_API_KEY", "YOUR_API_KEY")
headers = {
"api-key": API_KEY,
"Content-Type": "application/json",
}

def __init__(self, api_key, gpt_model="gpt-3.5-turbo", quick_extract=False):
self.api_key = api_key
self.gpt_model = gpt_model
self.quick_extract = quick_extract

def _post_request(self, payload):
headers = {
Expand Down
21 changes: 20 additions & 1 deletion lmms_eval/tasks/mathvista/mathvista_evals.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import re
import time

Expand Down Expand Up @@ -145,7 +146,22 @@ def is_valid_triangle(a, b, perimeter):


class MathVistaEvaluator:
API_URL = "https://api.openai.com/v1/chat/completions"
API_TYPE = os.getenv("API_TYPE", "openai")

if API_TYPE == "openai":
API_URL = os.getenv("OPENAI_API_URL", "https://api.openai.com/v1/chat/completions")
API_KEY = os.getenv("OPENAI_API_KEY", "YOUR_API_KEY")
headers = {
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json",
}
elif API_TYPE == "azure":
API_URL = os.getenv("AZURE_ENDPOINT", "https://api.cognitive.microsoft.com/sts/v1.0/issueToken")
API_KEY = os.getenv("AZURE_API_KEY", "YOUR_API_KEY")
headers = {
"api-key": API_KEY,
"Content-Type": "application/json",
}

def __init__(self, api_key, gpt_model="gpt-3.5-turbo", quick_extract=False):
self.api_key = api_key
Expand All @@ -167,6 +183,9 @@ def get_chat_response(self, prompt, temperature=0, max_tokens=256, n=1, patience
]
payload = {"model": self.gpt_model, "messages": messages, "temperature": temperature, "max_tokens": max_tokens, "n": n}

if self.API_TYPE == "azure":
payload.pop("model")

while patience > 0:
patience -= 1
try:
Expand Down
19 changes: 18 additions & 1 deletion lmms_eval/tasks/mathvista/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,24 @@

config = yaml.safe_load("".join(safe_data))

mathvista_evaluator = MathVistaEvaluator(api_key=os.getenv("OPENAI_API_KEY", "YOUR_API_KEY"), gpt_model=config["metadata"]["gpt_eval_model_name"])

API_TYPE = os.getenv("API_TYPE", "openai")
if API_TYPE == "openai":
API_URL = os.getenv("OPENAI_API_URL", "https://api.openai.com/v1/chat/completions")
API_KEY = os.getenv("OPENAI_API_KEY", "YOUR_API_KEY")
headers = {
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json",
}
elif API_TYPE == "azure":
API_URL = os.getenv("AZURE_ENDPOINT", "https://api.cognitive.microsoft.com/sts/v1.0/issueToken")
API_KEY = os.getenv("AZURE_API_KEY", "YOUR_API_KEY")
headers = {
"api-key": API_KEY,
"Content-Type": "application/json",
}

mathvista_evaluator = MathVistaEvaluator(api_key=API_KEY, gpt_model=config["metadata"]["gpt_eval_model_name"])


def mathvista_doc_to_visual(doc):
Expand Down
22 changes: 20 additions & 2 deletions lmms_eval/tasks/mmvet/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,23 @@

config = yaml.safe_load("".join(safe_data))

API_URL = os.getenv("OPENAI_API_URL", "https://api.openai.com/v1/chat/completions")
API_KEY = os.getenv("OPENAI_API_KEY", "YOUR_API_KEY")
API_TYPE = os.getenv("API_TYPE", "openai")

if API_TYPE == "openai":
API_URL = os.getenv("OPENAI_API_URL", "https://api.openai.com/v1/chat/completions")
API_KEY = os.getenv("OPENAI_API_KEY", "YOUR_API_KEY")
headers = {
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json",
}
elif API_TYPE == "azure":
API_URL = os.getenv("AZURE_ENDPOINT", "https://api.cognitive.microsoft.com/sts/v1.0/issueToken")
API_KEY = os.getenv("AZURE_API_KEY", "YOUR_API_KEY")
headers = {
"api-key": API_KEY,
"Content-Type": "application/json",
}

GPT_EVAL_MODEL_NAME = config["metadata"]["gpt_eval_model_name"]
MM_VET_PROMPT = """Compare the ground truth and prediction from AI models, to give a correctness score for the prediction. <AND> in the ground truth means it is totally right only when all elements in the ground truth are present in the prediction, and <OR> means it is totally right when any one element in the ground truth is present in the prediction. The correctness score is 0.0 (totally wrong), 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, or 1.0 (totally right). Just complete the last space of the correctness score.
gpt_query_prompt | Ground truth | Prediction | Correctness
Expand Down Expand Up @@ -50,6 +65,9 @@ def get_chat_response(prompt, model=GPT_EVAL_MODEL_NAME, temperature=0.0, max_to
"max_tokens": max_tokens,
}

if API_TYPE == "azure":
payload.pop("model")

while patience > 0:
patience -= 1
try:
Expand Down

0 comments on commit bc2899c

Please sign in to comment.