diff --git a/pipeline/benchmarks/datasets/magnifierbench.py b/pipeline/benchmarks/datasets/magnifierbench.py index 10663d8a..a0c4ed97 100644 --- a/pipeline/benchmarks/datasets/magnifierbench.py +++ b/pipeline/benchmarks/datasets/magnifierbench.py @@ -145,7 +145,7 @@ def _evaluate(self, model): model_response = model.generate(question, data["images"][0]) - pred_ans = self.parse_pred_ans(model_response) + pred_ans = self.parse_pred_ans(model_response, question) freeform_question = (question.split("?")[0] + "?").replace(self.prompt, "").strip() options = question.split("?")[1]