Skip to content

Commit

Permalink
[Fix] Model name None in Task manager, mix eval model specific kwargs…
Browse files Browse the repository at this point in the history
…, claude retrying fix (#278)

* Fix task manager model name None issue

* Change model specific args in mix eval to lmms eval kwargs

* json dump indent 4

* lint

* Fix claude always retrying 5 time error
  • Loading branch information
kcz358 authored Sep 25, 2024
1 parent 65d7db4 commit dfbddf8
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 25 deletions.
4 changes: 1 addition & 3 deletions lmms_eval/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@ def cli_evaluate_single(args: Union[argparse.Namespace, None] = None) -> None:

if args.include_path is not None:
eval_logger.info(f"Including path: {args.include_path}")
task_manager = TaskManager(args.verbosity, include_path=args.include_path)
task_manager = TaskManager(args.verbosity, include_path=args.include_path, model_name=args.model)

# update the evaluation tracker args with the output path and the HF token
if args.output_path:
Expand All @@ -392,8 +392,6 @@ def cli_evaluate_single(args: Union[argparse.Namespace, None] = None) -> None:
if args.include_path is not None:
eval_logger.info(f"Including path: {args.include_path}")

task_manager = TaskManager(args.verbosity, include_path=args.include_path)

if "push_samples_to_hub" in evaluation_tracker_args and not args.log_samples:
eval_logger.warning("Pushing samples to the Hub requires --log_samples to be set. Samples will not be pushed to the Hub.")

Expand Down
7 changes: 6 additions & 1 deletion lmms_eval/models/claude.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,8 +232,10 @@ def generate_until(self, requests) -> List[str]:
gen_kwargs["num_beams"] = 1

for attempt in range(5):
retry_flag = True
try:
message = client.messages.create(model=self.model_version, max_tokens=gen_kwargs["max_new_tokens"], system=self.system_prompt, temperature=gen_kwargs["temperature"], top_p=gen_kwargs["top_p"], messages=messages)
retry_flag = False
except Exception as e:
eval_logger.info(f"Attempt {attempt + 1} failed with error: {str(e)}")
if attempt < 5 - 1: # If we have retries left, sleep and then continue to next attempt
Expand All @@ -243,6 +245,9 @@ def generate_until(self, requests) -> List[str]:
res.append("")
pbar.update(1)
continue
if not retry_flag:
break
eval_logger.info("Retrying...")

response_text = message.content[0].text
res.append(message.content[0].text)
Expand All @@ -254,7 +259,7 @@ def generate_until(self, requests) -> List[str]:
doc_uuid = f"{task}___{split}___{doc_id}"
self.response_cache[doc_uuid] = response_text
with open(self.response_persistent_file, "w") as f:
json.dump(self.response_cache, f)
json.dump(self.response_cache, f, indent=4)

pbar.close()

Expand Down
42 changes: 21 additions & 21 deletions lmms_eval/tasks/mix_evals/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,15 +130,15 @@ def mix_evals_video2text_doc_to_visual(doc):


# This is the place where you format your question
def mix_evals_video2text_doc_to_text(doc, model_specific_prompt_kwargs=None):
if model_specific_prompt_kwargs is None:
model_specific_prompt_kwargs = {}
def mix_evals_video2text_doc_to_text(doc, lmms_eval_specific_kwargs=None):
if lmms_eval_specific_kwargs is None:
lmms_eval_specific_kwargs = {}
pre_prompt = ""
post_prompt = ""
if "pre_prompt" in model_specific_prompt_kwargs:
pre_prompt = model_specific_prompt_kwargs["pre_prompt"]
if "post_prompt" in model_specific_prompt_kwargs:
post_prompt = model_specific_prompt_kwargs["post_prompt"]
if "pre_prompt" in lmms_eval_specific_kwargs:
pre_prompt = lmms_eval_specific_kwargs["pre_prompt"]
if "post_prompt" in lmms_eval_specific_kwargs:
post_prompt = lmms_eval_specific_kwargs["post_prompt"]

user_prompt = doc["prompt"]

Expand Down Expand Up @@ -166,15 +166,15 @@ def mix_evals_video2text_doc_to_text(doc, model_specific_prompt_kwargs=None):
"""


def mix_evals_video2text_doc_to_text_open_convs(doc, model_specific_prompt_kwargs=None):
if model_specific_prompt_kwargs is None:
model_specific_prompt_kwargs = {}
def mix_evals_video2text_doc_to_text_open_convs(doc, lmms_eval_specific_kwargs=None):
if lmms_eval_specific_kwargs is None:
lmms_eval_specific_kwargs = {}
pre_prompt = ""
post_prompt = ""
if "pre_prompt" in model_specific_prompt_kwargs:
pre_prompt = model_specific_prompt_kwargs["pre_prompt"]
if "post_prompt" in model_specific_prompt_kwargs:
post_prompt = model_specific_prompt_kwargs["post_prompt"]
if "pre_prompt" in lmms_eval_specific_kwargs:
pre_prompt = lmms_eval_specific_kwargs["pre_prompt"]
if "post_prompt" in lmms_eval_specific_kwargs:
post_prompt = lmms_eval_specific_kwargs["post_prompt"]

filtered_first_turn = re.sub(r"<video_[0-9]+>", "", doc["first_turn_user_prompt"])
return OPEN_CONVS_PROMPT.format(
Expand All @@ -192,15 +192,15 @@ def mix_evals_video2text_doc_to_text_open_convs(doc, model_specific_prompt_kwarg
"""


def mix_evals_video2text_doc_to_text_open_2nd_convs(doc, model_specific_prompt_kwargs=None):
if model_specific_prompt_kwargs is None:
model_specific_prompt_kwargs = {}
def mix_evals_video2text_doc_to_text_open_2nd_convs(doc, lmms_eval_specific_kwargs=None):
if lmms_eval_specific_kwargs is None:
lmms_eval_specific_kwargs = {}
pre_prompt = ""
post_prompt = ""
if "pre_prompt" in model_specific_prompt_kwargs:
pre_prompt = model_specific_prompt_kwargs["pre_prompt"]
if "post_prompt" in model_specific_prompt_kwargs:
post_prompt = model_specific_prompt_kwargs["post_prompt"]
if "pre_prompt" in lmms_eval_specific_kwargs:
pre_prompt = lmms_eval_specific_kwargs["pre_prompt"]
if "post_prompt" in lmms_eval_specific_kwargs:
post_prompt = lmms_eval_specific_kwargs["post_prompt"]

return MODEL_CONVS_PROMPT.format(
PRE=pre_prompt,
Expand Down

0 comments on commit dfbddf8

Please sign in to comment.