diff --git a/pipeline/benchmarks/datasets/magnifierbench.py b/pipeline/benchmarks/datasets/magnifierbench.py
index 10663d8a..a0c4ed97 100644
--- a/pipeline/benchmarks/datasets/magnifierbench.py
+++ b/pipeline/benchmarks/datasets/magnifierbench.py
@@ -145,7 +145,7 @@ def _evaluate(self, model):
 
             model_response = model.generate(question, data["images"][0])
 
-            pred_ans = self.parse_pred_ans(model_response)
+            pred_ans = self.parse_pred_ans(model_response, question)
 
             freeform_question = (question.split("?")[0] + "?").replace(self.prompt, "").strip()
             options = question.split("?")[1]