From be454e570402c8d47168acb0b0268c6aae614f82 Mon Sep 17 00:00:00 2001
From: Nicole White <nicole@autoblocks.ai>
Date: Fri, 23 Feb 2024 11:33:37 -0500
Subject: [PATCH] Update evaluators (#106)

---
 .../my_project/evaluators/has_substrings.py            |  6 +++---
 .../testing-sdk/my_project/evaluators/is_valid_json.py |  6 +++---
 .../test_suites/flashcard_generator/evaluators.py      | 10 +++++-----
 .../test_suites/study_guide_outline/evaluators.py      | 10 +++++-----
 Python/testing-sdk/poetry.lock                         |  8 ++++----
 Python/testing-sdk/pyproject.toml                      |  2 +-
 6 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/Python/testing-sdk/my_project/evaluators/has_substrings.py b/Python/testing-sdk/my_project/evaluators/has_substrings.py
index 6738e554..ef658d32 100644
--- a/Python/testing-sdk/my_project/evaluators/has_substrings.py
+++ b/Python/testing-sdk/my_project/evaluators/has_substrings.py
@@ -3,13 +3,13 @@
 from typing import List
 from typing import Optional
 
-from autoblocks.testing.models import BaseEvaluator
+from autoblocks.testing.models import BaseTestEvaluator
 from autoblocks.testing.models import BaseTestCase
 from autoblocks.testing.models import Evaluation
 from autoblocks.testing.models import Threshold
 
 
-class BaseHasSubstrings(BaseEvaluator, abc.ABC):
+class BaseHasSubstrings(BaseTestEvaluator, abc.ABC):
     id = "has-substrings"
 
     """
@@ -51,7 +51,7 @@ def output_as_str(self, output: Any) -> str:
         """
         return str(output)
 
-    def evaluate(self, test_case: BaseTestCase, output: Any) -> Evaluation:
+    def evaluate_test_case(self, test_case: BaseTestCase, output: Any) -> Evaluation:
         expected_substrings = self.expected_substrings(test_case)
         output_as_str = self.output_as_str(output)
 
diff --git a/Python/testing-sdk/my_project/evaluators/is_valid_json.py b/Python/testing-sdk/my_project/evaluators/is_valid_json.py
index 2bf82a31..1b9bee08 100644
--- a/Python/testing-sdk/my_project/evaluators/is_valid_json.py
+++ b/Python/testing-sdk/my_project/evaluators/is_valid_json.py
@@ -2,13 +2,13 @@
 from typing import Any
 from typing import Optional
 
-from autoblocks.testing.models import BaseEvaluator
+from autoblocks.testing.models import BaseTestEvaluator
 from autoblocks.testing.models import BaseTestCase
 from autoblocks.testing.models import Evaluation
 from autoblocks.testing.models import Threshold
 
 
-class IsValidJson(BaseEvaluator):
+class IsValidJson(BaseTestEvaluator):
     id = "is-valid-json"
 
     """
@@ -39,7 +39,7 @@ def output_as_str(self, output: SomeCustomOutputType) -> str:
         """
         return str(output)
 
-    def evaluate(self, test_case: BaseTestCase, output: Any) -> Evaluation:
+    def evaluate_test_case(self, test_case: BaseTestCase, output: Any) -> Evaluation:
         try:
             json.loads(self.output_as_str(output))
             return Evaluation(score=1, threshold=self.threshold)
diff --git a/Python/testing-sdk/my_project/test_suites/flashcard_generator/evaluators.py b/Python/testing-sdk/my_project/test_suites/flashcard_generator/evaluators.py
index 257a8b9e..c45655bf 100644
--- a/Python/testing-sdk/my_project/test_suites/flashcard_generator/evaluators.py
+++ b/Python/testing-sdk/my_project/test_suites/flashcard_generator/evaluators.py
@@ -1,7 +1,7 @@
 import asyncio
 from typing import List
 
-from autoblocks.testing.models import BaseEvaluator
+from autoblocks.testing.models import BaseTestEvaluator
 from autoblocks.testing.models import Evaluation
 from openai import AsyncOpenAI
 
@@ -11,7 +11,7 @@
 openai_client = AsyncOpenAI()
 
 
-class IsProfessionalTone(BaseEvaluator):
+class IsProfessionalTone(BaseTestEvaluator):
     id = "is-professional-tone"
 
     prompt = """Please evaluate the provided text for its professionalism in the context of formal communication.
@@ -55,7 +55,7 @@ async def score_flashcard(self, flashcard: Flashcard) -> int:
 
         raise ValueError(f"Unexpected response: {raw_content}")
 
-    async def evaluate(
+    async def evaluate_test_case(
         self, test_case: TestCase, output: List[Flashcard]
     ) -> Evaluation:
         # Score each flashcard asynchronously
@@ -69,7 +69,7 @@ async def evaluate(
         return Evaluation(score=sum(scores) / len(scores))
 
 
-class IsSupportedByNotes(BaseEvaluator):
+class IsSupportedByNotes(BaseTestEvaluator):
     id = "is-supported-by-notes"
 
     prompt = """Given some notes by a student and a flashcard in the form of a question and answer, evaluate whether the flashcard's question and answer are supported by the notes.
@@ -117,7 +117,7 @@ async def score_flashcard(self, test_case: TestCase, flashcard: Flashcard) -> in
 
         raise ValueError(f"Unexpected response: {raw_content}")
 
-    async def evaluate(
+    async def evaluate_test_case(
         self, test_case: TestCase, output: List[Flashcard]
     ) -> Evaluation:
         """
diff --git a/Python/testing-sdk/my_project/test_suites/study_guide_outline/evaluators.py b/Python/testing-sdk/my_project/test_suites/study_guide_outline/evaluators.py
index 16111f3c..81ab58ab 100644
--- a/Python/testing-sdk/my_project/test_suites/study_guide_outline/evaluators.py
+++ b/Python/testing-sdk/my_project/test_suites/study_guide_outline/evaluators.py
@@ -1,6 +1,6 @@
 from typing import List
 
-from autoblocks.testing.models import BaseEvaluator
+from autoblocks.testing.models import BaseTestEvaluator
 from autoblocks.testing.models import Evaluation
 from autoblocks.testing.models import Threshold
 
@@ -8,7 +8,7 @@
 from my_project.test_suites.study_guide_outline.test_cases import TestCase
 
 
-class Formatting(BaseEvaluator):
+class Formatting(BaseTestEvaluator):
     id = "formatting"
 
     @staticmethod
@@ -21,11 +21,11 @@ def score(output: str) -> int:
                 return 0
         return 1
 
-    def evaluate(self, test_case: TestCase, output: str) -> Evaluation:
+    def evaluate_test_case(self, test_case: TestCase, output: str) -> Evaluation:
         return Evaluation(score=self.score(output), threshold=Threshold(gte=1))
 
 
-class NumCategories(BaseEvaluator):
+class NumCategories(BaseTestEvaluator):
     id = "num-categories"
 
     min_categories: int = 5
@@ -34,7 +34,7 @@ class NumCategories(BaseEvaluator):
     def score(self, output: str) -> int:
         return int(self.min_categories <= output.count("* ") <= self.max_categories)
 
-    def evaluate(self, test_case: TestCase, output: str) -> Evaluation:
+    def evaluate_test_case(self, test_case: TestCase, output: str) -> Evaluation:
         return Evaluation(score=self.score(output), threshold=Threshold(gte=1))
 
 
diff --git a/Python/testing-sdk/poetry.lock b/Python/testing-sdk/poetry.lock
index 94032874..4e8d2a74 100644
--- a/Python/testing-sdk/poetry.lock
+++ b/Python/testing-sdk/poetry.lock
@@ -33,13 +33,13 @@ trio = ["trio (>=0.23)"]
 
 [[package]]
 name = "autoblocksai"
-version = "0.0.27"
+version = "0.0.28"
 description = "Python client for Autoblocks"
 optional = false
 python-versions = ">=3.8.1,<4.0.0"
 files = [
-    {file = "autoblocksai-0.0.27-py3-none-any.whl", hash = "sha256:6fb8976d957503d9ff757c7f224f4fd27ef14b0a69e5456afaa4a513bd5523db"},
-    {file = "autoblocksai-0.0.27.tar.gz", hash = "sha256:d31fe964e5a5105d10913a30664a2b356073a7483addf1e23df3902683ddb78e"},
+    {file = "autoblocksai-0.0.28-py3-none-any.whl", hash = "sha256:93c4f91e4bdb3ca2bde1ef7b4b49f42bae9d60f32c679de8d1465fd9aa35ce20"},
+    {file = "autoblocksai-0.0.28.tar.gz", hash = "sha256:e711213b6aba28fdd7ed3846fa3dd91a3f6f880fc276ffacb427ec963ff4a2ac"},
 ]
 
 [package.dependencies]
@@ -448,4 +448,4 @@ files = [
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.11"
-content-hash = "4925a91a5164a9e0abfb898b49ef93fdd270d5868d8002f5255df93dcceab0f4"
+content-hash = "d3ba99cd223e5f63e2a4071381e3077e60b7f95cbb7f9fa1ddcb9624094112c4"
diff --git a/Python/testing-sdk/pyproject.toml b/Python/testing-sdk/pyproject.toml
index 4aa409de..827f1cad 100644
--- a/Python/testing-sdk/pyproject.toml
+++ b/Python/testing-sdk/pyproject.toml
@@ -10,7 +10,7 @@ packages = [{include = "my_project"}]
 
 [tool.poetry.dependencies]
 python = "^3.11"
-autoblocksai = ">=0.0.27"
+autoblocksai = ">=0.0.28"
 openai = "^1.0.0"
 
 [tool.poetry.scripts]