Skip to content

Commit

Permalink
Update evaluators (#106)
Browse files Browse the repository at this point in the history
  • Loading branch information
Nicole White authored Feb 23, 2024
1 parent 931ee12 commit be454e5
Show file tree
Hide file tree
Showing 6 changed files with 21 additions and 21 deletions.
6 changes: 3 additions & 3 deletions Python/testing-sdk/my_project/evaluators/has_substrings.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@
from typing import List
from typing import Optional

from autoblocks.testing.models import BaseEvaluator
from autoblocks.testing.models import BaseTestEvaluator
from autoblocks.testing.models import BaseTestCase
from autoblocks.testing.models import Evaluation
from autoblocks.testing.models import Threshold


class BaseHasSubstrings(BaseEvaluator, abc.ABC):
class BaseHasSubstrings(BaseTestEvaluator, abc.ABC):
id = "has-substrings"

"""
Expand Down Expand Up @@ -51,7 +51,7 @@ def output_as_str(self, output: Any) -> str:
"""
return str(output)

def evaluate(self, test_case: BaseTestCase, output: Any) -> Evaluation:
def evaluate_test_case(self, test_case: BaseTestCase, output: Any) -> Evaluation:
expected_substrings = self.expected_substrings(test_case)
output_as_str = self.output_as_str(output)

Expand Down
6 changes: 3 additions & 3 deletions Python/testing-sdk/my_project/evaluators/is_valid_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
from typing import Any
from typing import Optional

from autoblocks.testing.models import BaseEvaluator
from autoblocks.testing.models import BaseTestEvaluator
from autoblocks.testing.models import BaseTestCase
from autoblocks.testing.models import Evaluation
from autoblocks.testing.models import Threshold


class IsValidJson(BaseEvaluator):
class IsValidJson(BaseTestEvaluator):
id = "is-valid-json"

"""
Expand Down Expand Up @@ -39,7 +39,7 @@ def output_as_str(self, output: SomeCustomOutputType) -> str:
"""
return str(output)

def evaluate(self, test_case: BaseTestCase, output: Any) -> Evaluation:
def evaluate_test_case(self, test_case: BaseTestCase, output: Any) -> Evaluation:
try:
json.loads(self.output_as_str(output))
return Evaluation(score=1, threshold=self.threshold)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import asyncio
from typing import List

from autoblocks.testing.models import BaseEvaluator
from autoblocks.testing.models import BaseTestEvaluator
from autoblocks.testing.models import Evaluation
from openai import AsyncOpenAI

Expand All @@ -11,7 +11,7 @@
openai_client = AsyncOpenAI()


class IsProfessionalTone(BaseEvaluator):
class IsProfessionalTone(BaseTestEvaluator):
id = "is-professional-tone"

prompt = """Please evaluate the provided text for its professionalism in the context of formal communication.
Expand Down Expand Up @@ -55,7 +55,7 @@ async def score_flashcard(self, flashcard: Flashcard) -> int:

raise ValueError(f"Unexpected response: {raw_content}")

async def evaluate(
async def evaluate_test_case(
self, test_case: TestCase, output: List[Flashcard]
) -> Evaluation:
# Score each flashcard asynchronously
Expand All @@ -69,7 +69,7 @@ async def evaluate(
return Evaluation(score=sum(scores) / len(scores))


class IsSupportedByNotes(BaseEvaluator):
class IsSupportedByNotes(BaseTestEvaluator):
id = "is-supported-by-notes"

prompt = """Given some notes by a student and a flashcard in the form of a question and answer, evaluate whether the flashcard's question and answer are supported by the notes.
Expand Down Expand Up @@ -117,7 +117,7 @@ async def score_flashcard(self, test_case: TestCase, flashcard: Flashcard) -> in

raise ValueError(f"Unexpected response: {raw_content}")

async def evaluate(
async def evaluate_test_case(
self, test_case: TestCase, output: List[Flashcard]
) -> Evaluation:
"""
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
from typing import List

from autoblocks.testing.models import BaseEvaluator
from autoblocks.testing.models import BaseTestEvaluator
from autoblocks.testing.models import Evaluation
from autoblocks.testing.models import Threshold

from my_project.evaluators.has_substrings import BaseHasSubstrings
from my_project.test_suites.study_guide_outline.test_cases import TestCase


class Formatting(BaseEvaluator):
class Formatting(BaseTestEvaluator):
id = "formatting"

@staticmethod
Expand All @@ -21,11 +21,11 @@ def score(output: str) -> int:
return 0
return 1

def evaluate(self, test_case: TestCase, output: str) -> Evaluation:
def evaluate_test_case(self, test_case: TestCase, output: str) -> Evaluation:
return Evaluation(score=self.score(output), threshold=Threshold(gte=1))


class NumCategories(BaseEvaluator):
class NumCategories(BaseTestEvaluator):
id = "num-categories"

min_categories: int = 5
Expand All @@ -34,7 +34,7 @@ class NumCategories(BaseEvaluator):
def score(self, output: str) -> int:
return int(self.min_categories <= output.count("* ") <= self.max_categories)

def evaluate(self, test_case: TestCase, output: str) -> Evaluation:
def evaluate_test_case(self, test_case: TestCase, output: str) -> Evaluation:
return Evaluation(score=self.score(output), threshold=Threshold(gte=1))


Expand Down
8 changes: 4 additions & 4 deletions Python/testing-sdk/poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Python/testing-sdk/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ packages = [{include = "my_project"}]

[tool.poetry.dependencies]
python = "^3.11"
autoblocksai = ">=0.0.27"
autoblocksai = ">=0.0.28"
openai = "^1.0.0"

[tool.poetry.scripts]
Expand Down

0 comments on commit be454e5

Please sign in to comment.