diff --git a/benchmarks/sql/bench.py b/benchmarks/sql/bench.py index a86c6ee4..0257b52d 100644 --- a/benchmarks/sql/bench.py +++ b/benchmarks/sql/bench.py @@ -8,10 +8,13 @@ from bench.evaluator import Evaluator from bench.loaders import CollectionDataLoader, IQLViewDataLoader, SQLViewDataLoader from bench.metrics import ( + AggregationAccuracy, ExecutionAccuracy, FilteringAccuracy, FilteringPrecision, FilteringRecall, + IQLAggregationCorrectness, + IQLAggregationParseability, IQLFiltersAccuracy, IQLFiltersCorrectness, IQLFiltersParseability, @@ -57,9 +60,12 @@ class EvaluationType(Enum): EVALUATION_METRICS = { EvaluationType.IQL.value: MetricSet( + AggregationAccuracy, FilteringAccuracy, FilteringPrecision, FilteringRecall, + IQLAggregationParseability, + IQLAggregationCorrectness, IQLFiltersAccuracy, IQLFiltersPrecision, IQLFiltersRecall, @@ -72,9 +78,12 @@ class EvaluationType(Enum): ExecutionAccuracy, ), EvaluationType.E2E.value: MetricSet( + AggregationAccuracy, FilteringAccuracy, FilteringPrecision, FilteringRecall, + IQLAggregationParseability, + IQLAggregationCorrectness, IQLFiltersAccuracy, IQLFiltersPrecision, IQLFiltersRecall, diff --git a/benchmarks/sql/bench/metrics/__init__.py b/benchmarks/sql/bench/metrics/__init__.py index f0edc124..ccd32b1c 100644 --- a/benchmarks/sql/bench/metrics/__init__.py +++ b/benchmarks/sql/bench/metrics/__init__.py @@ -1,8 +1,11 @@ from .base import Metric, MetricSet from .iql import ( + AggregationAccuracy, FilteringAccuracy, FilteringPrecision, FilteringRecall, + IQLAggregationCorrectness, + IQLAggregationParseability, IQLFiltersAccuracy, IQLFiltersCorrectness, IQLFiltersParseability, @@ -15,14 +18,17 @@ __all__ = [ "Metric", "MetricSet", + "AggregationAccuracy", "FilteringAccuracy", "FilteringPrecision", "FilteringRecall", + "IQLAggregationParseability", "IQLFiltersAccuracy", "IQLFiltersPrecision", "IQLFiltersRecall", "IQLFiltersParseability", "IQLFiltersCorrectness", + "IQLAggregationCorrectness", "SQLExactMatch", "ViewSelectionAccuracy", "ViewSelectionPrecision", diff --git a/benchmarks/sql/bench/metrics/iql.py b/benchmarks/sql/bench/metrics/iql.py index 07cf90c9..5cee9c1a 100644 --- a/benchmarks/sql/bench/metrics/iql.py +++ b/benchmarks/sql/bench/metrics/iql.py @@ -1,30 +1,49 @@ +from abc import ABC from typing import Any, Dict, List from ..pipelines import EvaluationResult from .base import Metric -class FilteringAccuracy(Metric): +class AssessingAccuracy(Metric, ABC): """ - Filtering accuracy is proportion of correct decisions (to filter or not) out of all decisions made. + Assessing accuracy is proportion of correct decisions out of all decisions made. """ + prefix: str + iql: str + def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: """ - Computes the filtering accuracy. + Computes the assessing accuracy. Args: results: List of evaluation results. Returns: - Filtering accuracy. + Assessing accuracy. """ - results = [result for result in results if result.reference.iql and result.prediction.iql] + results = [ + result + for result in results + if result.reference.iql + and result.prediction.iql + and result.reference.view_name + and result.prediction.view_name + and getattr(result.reference.iql, self.iql).generated + and getattr(result.prediction.iql, self.iql).generated + ] return { - "DM/FLT/ACC": ( + f"DM/{self.prefix}/ACC": ( sum( - isinstance(result.prediction.iql.filters.source, type(result.reference.iql.filters.source)) - and result.prediction.iql.filters.unsupported == result.reference.iql.filters.unsupported + ( + getattr(result.reference.iql, self.iql).source is not None + or getattr(result.reference.iql, self.iql).unsupported + ) + == ( + getattr(result.prediction.iql, self.iql).source is not None + or getattr(result.prediction.iql, self.iql).unsupported + ) for result in results ) / len(results) @@ -34,6 +53,24 @@ def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: } +class FilteringAccuracy(AssessingAccuracy): + """ + Filtering accuracy is proportion of correct decisions (to filter or not) out of all decisions made. + """ + + prefix: str = "FLT" + iql: str = "filters" + + +class AggregationAccuracy(AssessingAccuracy): + """ + Aggregation accuracy is proportion of correct decisions (to aggregate or not) out of all decisions made. + """ + + prefix: str = "AGG" + iql: str = "aggregation" + + class FilteringPrecision(Metric): """ Filtering precision is proportion of correct decisions to filter out of all decisions to filter. @@ -222,11 +259,14 @@ def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: } -class IQLFiltersParseability(Metric): +class IQLParseability(Metric, ABC): """ IQL filters parseability is proportion of syntactically correct (parseable) IQLs out of all generated IQLs. """ + prefix: str + iql: str + def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: """ Computes the IQL filters parseability. @@ -241,46 +281,90 @@ def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: result for result in results if (result.reference.iql and result.prediction.iql) - and (result.reference.iql.filters and result.prediction.iql.filters) - and (result.reference.iql.filters.source and result.prediction.iql.filters.source) + and (getattr(result.reference.iql, self.iql) and getattr(result.prediction.iql, self.iql)) + and (getattr(result.reference.iql, self.iql).source and getattr(result.prediction.iql, self.iql).source) ] return { - "IQL/FLT/PARSEABILITY": ( - sum(result.prediction.iql.filters.valid for result in results) / len(results) if results else None + f"IQL/{self.prefix}/PARSEABILITY": ( + sum(getattr(result.prediction.iql, self.iql).valid for result in results) / len(results) + if results + else None ) } -class IQLFiltersCorrectness(Metric): +class IQLFiltersParseability(IQLParseability): """ - IQL filters correctness is proportion of IQLs that produce correct results out of all parseable IQLs. + IQL filters parseability is proportion of syntactically correct (parseable) IQLs out of all generated IQLs. """ + prefix: str = "FLT" + iql: str = "filters" + + +class IQLAggregationParseability(IQLParseability): + """ + IQL aggregation parseability is proportion of syntactically correct (parseable) IQLs out of all generated IQLs. + """ + + prefix: str = "AGG" + iql: str = "aggregation" + + +class IQLCorrectness(Metric, ABC): + """ + IQL correctness is proportion of IQLs that produce correct results out of all parseable IQLs. + """ + + prefix: str + iql: str + def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: """ - Computes the IQL filters correctness. + Computes the IQL correctness. Args: results: List of evaluation results. Returns: - IQL filters correctness. + IQL correctness. """ results = [ result for result in results if (result.reference.iql and result.prediction.iql) and ( - result.reference.iql.filters.source - and result.prediction.iql.filters.source - and result.prediction.iql.filters.valid + getattr(result.reference.iql, self.iql).source + and getattr(result.prediction.iql, self.iql).source + and getattr(result.prediction.iql, self.iql).valid ) ] return { - "IQL/FLT/CORRECTNESS": ( - sum(result.prediction.iql.filters.source == result.reference.iql.filters.source for result in results) + f"IQL/{self.prefix}/CORRECTNESS": ( + sum( + getattr(result.prediction.iql, self.iql).source == getattr(result.reference.iql, self.iql).source + for result in results + ) / len(results) if results else None ) } + + +class IQLFiltersCorrectness(IQLCorrectness): + """ + IQL filters correctness is proportion of IQLs that produce correct results out of all parseable IQLs. + """ + + prefix: str = "FLT" + iql: str = "filters" + + +class IQLAggregationCorrectness(IQLCorrectness): + """ + IQL aggregation correctness is proportion of IQLs that produce correct results out of all parseable IQLs. + """ + + prefix: str = "AGG" + iql: str = "aggregation" diff --git a/benchmarks/sql/bench/metrics/sql.py b/benchmarks/sql/bench/metrics/sql.py index 0b5899e7..296acac3 100644 --- a/benchmarks/sql/bench/metrics/sql.py +++ b/benchmarks/sql/bench/metrics/sql.py @@ -25,6 +25,7 @@ def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: Returns: The exact match ratio. """ + results = [result for result in results if result.reference.sql and result.prediction.sql] return { "SQL/EM": ( sum(result.prediction.sql == result.reference.sql for result in results) / len(results) @@ -95,6 +96,7 @@ def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]: Returns: Execution accuracy score and valid efficiency score. """ + results = [result for result in results if result.reference.sql and result.prediction.sql] accurate_results = [result for result in results if self._execution_accuracy(result)] return { "EX": len(accurate_results) / len(results) if results else None, @@ -121,9 +123,6 @@ def _execution_accuracy(self, result: EvaluationResult) -> bool: Returns: True if the execution results are identical, False otherwise. """ - if result.prediction.sql is None: - return False - try: ref_results = self._execute_query(result.reference.sql, result.db_id) pred_results = self._execute_query(result.prediction.sql, result.db_id) @@ -138,6 +137,10 @@ def _execution_accuracy(self, result: EvaluationResult) -> bool: if reference.shape[0] != prediction.shape[0]: return False + # If both dataframes have only one column, compare the values directly + if reference.shape[1] == prediction.shape[1] == 1: + return reference.iloc[:, 0].equals(prediction.iloc[:, 0]) + # Returned view may have the same columns, or more columns than the ground truth if not reference.columns.isin(prediction.columns).all(): return False diff --git a/benchmarks/sql/bench/views/structured/superhero.py b/benchmarks/sql/bench/views/structured/superhero.py index 2a6a75a0..d6500ea3 100644 --- a/benchmarks/sql/bench/views/structured/superhero.py +++ b/benchmarks/sql/bench/views/structured/superhero.py @@ -1,9 +1,9 @@ -# pylint: disable=missing-docstring, missing-return-doc, missing-param-doc, singleton-comparison, consider-using-in, too-many-ancestors, too-many-public-methods +# pylint: disable=attribute-defined-outside-init, missing-docstring, missing-return-doc, missing-param-doc, singleton-comparison, consider-using-in, too-many-ancestors, too-many-public-methods # flake8: noqa from typing import Literal -from sqlalchemy import ColumnElement, Engine, Select, func, select +from sqlalchemy import ColumnElement, Engine, Float, Select, case, cast, func, select from sqlalchemy.ext.declarative import DeferredReflection from sqlalchemy.orm import aliased, declarative_base @@ -76,12 +76,6 @@ class SuperheroFilterMixin: def filter_by_superhero_id(self, superhero_id: int) -> ColumnElement: """ Filters the view by the superhero id. - - Args: - superhero_id: The id of the superhero. - - Returns: - The filter condition. """ return Superhero.id == superhero_id @@ -89,12 +83,6 @@ def filter_by_superhero_id(self, superhero_id: int) -> ColumnElement: def filter_by_superhero_name(self, superhero_name: str) -> ColumnElement: """ Filters the view by the superhero nick or handle. - - Args: - superhero_name: The abstract nick or handle of the superhero. - - Returns: - The filter condition. """ return Superhero.superhero_name == superhero_name @@ -102,9 +90,6 @@ def filter_by_superhero_name(self, superhero_name: str) -> ColumnElement: def filter_by_missing_superhero_full_name(self) -> ColumnElement: """ Filters the view by the missing full name of the superhero. - - Returns: - The filter condition. """ return Superhero.full_name == None @@ -112,12 +97,6 @@ def filter_by_missing_superhero_full_name(self) -> ColumnElement: def filter_by_superhero_full_name(self, superhero_full_name: str) -> ColumnElement: """ Filters the view by the full name of the superhero. - - Args: - superhero_full_name: The human name of the superhero. - - Returns: - The filter condition. """ return Superhero.full_name == superhero_full_name @@ -125,12 +104,6 @@ def filter_by_superhero_full_name(self, superhero_full_name: str) -> ColumnEleme def filter_by_superhero_first_name(self, superhero_first_name: str) -> ColumnElement: """ Filters the view by the simmilar full name of the superhero. - - Args: - superhero_first_name: The first name of the superhero. - - Returns: - The filter condition. """ return Superhero.full_name.like(f"{superhero_first_name}%") @@ -138,12 +111,6 @@ def filter_by_superhero_first_name(self, superhero_first_name: str) -> ColumnEle def filter_by_height_cm(self, height_cm: float) -> ColumnElement: """ Filters the view by the height of the superhero. - - Args: - height_cm: The height of the superhero. - - Returns: - The filter condition. """ return Superhero.height_cm == height_cm @@ -151,12 +118,6 @@ def filter_by_height_cm(self, height_cm: float) -> ColumnElement: def filter_by_height_cm_less_than(self, height_cm: float) -> ColumnElement: """ Filters the view by the height of the superhero. - - Args: - height_cm: The height of the superhero. - - Returns: - The filter condition. """ return Superhero.height_cm < height_cm @@ -164,12 +125,6 @@ def filter_by_height_cm_less_than(self, height_cm: float) -> ColumnElement: def filter_by_height_cm_greater_than(self, height_cm: float) -> ColumnElement: """ Filters the view by the height of the superhero. - - Args: - height_cm: The height of the superhero. - - Returns: - The filter condition. """ return Superhero.height_cm > height_cm @@ -177,13 +132,6 @@ def filter_by_height_cm_greater_than(self, height_cm: float) -> ColumnElement: def filter_by_height_cm_between(self, begin_height_cm: float, end_height_cm: float) -> ColumnElement: """ Filters the view by the height of the superhero. - - Args: - begin_height_cm: The begin height of the superhero. - end_height_cm: The end height of the superhero. - - Returns: - The filter condition. """ return Superhero.height_cm.between(begin_height_cm, end_height_cm) @@ -191,9 +139,6 @@ def filter_by_height_cm_between(self, begin_height_cm: float, end_height_cm: flo def filter_by_the_tallest(self) -> ColumnElement: """ Filter the view by the tallest superhero. - - Returns: - The filter condition. """ return Superhero.height_cm == select(func.max(Superhero.height_cm)).scalar_subquery() @@ -201,9 +146,6 @@ def filter_by_the_tallest(self) -> ColumnElement: def filter_by_missing_weight(self) -> ColumnElement: """ Filters the view by the missing weight of the superhero. - - Returns: - The filter condition. """ return Superhero.weight_kg == 0 or Superhero.weight_kg == None @@ -211,12 +153,6 @@ def filter_by_missing_weight(self) -> ColumnElement: def filter_by_weight_kg(self, weight_kg: int) -> ColumnElement: """ Filters the view by the weight of the superhero. - - Args: - weight_kg: The weight of the superhero. - - Returns: - The filter condition. """ return Superhero.weight_kg == weight_kg @@ -224,12 +160,6 @@ def filter_by_weight_kg(self, weight_kg: int) -> ColumnElement: def filter_by_weight_kg_greater_than(self, weight_kg: int) -> ColumnElement: """ Filters the view by the weight of the superhero. - - Args: - weight_kg: The weight of the superhero. - - Returns: - The filter condition. """ return Superhero.weight_kg > weight_kg @@ -237,12 +167,6 @@ def filter_by_weight_kg_greater_than(self, weight_kg: int) -> ColumnElement: def filter_by_weight_kg_less_than(self, weight_kg: int) -> ColumnElement: """ Filters the view by the weight of the superhero. - - Args: - weight_kg: The weight of the superhero. - - Returns: - The filter condition. """ return Superhero.weight_kg < weight_kg @@ -250,12 +174,6 @@ def filter_by_weight_kg_less_than(self, weight_kg: int) -> ColumnElement: def filter_by_weight_greater_than_percentage_of_average(self, average_percentage: int) -> ColumnElement: """ Filters the view by the weight greater than the percentage of average of superheroes. - - Args: - average_percentage: The percentage of the average weight. - - Returns: - The filter condition. """ return Superhero.weight_kg * 100 > select(func.avg(Superhero.weight_kg)).scalar_subquery() * average_percentage @@ -263,9 +181,6 @@ def filter_by_weight_greater_than_percentage_of_average(self, average_percentage def filter_by_the_heaviest(self) -> ColumnElement: """ Filters the view by the heaviest superhero. - - Returns: - The filter condition. """ return Superhero.weight_kg == select(func.max(Superhero.weight_kg)).scalar_subquery() @@ -273,34 +188,41 @@ def filter_by_the_heaviest(self) -> ColumnElement: def filter_by_missing_publisher(self) -> ColumnElement: """ Filters the view by the missing publisher of the superhero. - - Returns: - The filter condition. """ return Superhero.publisher_id == None +class SuperheroAggregationMixin: + """ + Mixin for aggregating the view by the superhero attributes. + """ + + @view_aggregation() + def count_superheroes(self) -> Select: + """ + Counts the number of superheros. + """ + return self.select.with_only_columns(func.count(Superhero.id).label("count_superheroes")).group_by(Superhero.id) + + @view_aggregation() + def average_height(self) -> Select: + """ + Averages the height of the superheros. + """ + return self.select.with_only_columns(func.avg(Superhero.height_cm).label("average_height")).group_by( + Superhero.id + ) + + class SuperheroColourFilterMixin: """ Mixin for filtering the view by the superhero colour attributes. """ - def __init__(self, *args, **kwargs) -> None: - super().__init__(*args, **kwargs) - self.eye_colour = aliased(Colour) - self.hair_colour = aliased(Colour) - self.skin_colour = aliased(Colour) - @view_filter() def filter_by_eye_colour(self, eye_colour: str) -> ColumnElement: """ Filters the view by the superhero eye colour. - - Args: - eye_colour: The eye colour of the superhero. - - Returns: - The filter condition. """ return self.eye_colour.colour == eye_colour @@ -308,12 +230,6 @@ def filter_by_eye_colour(self, eye_colour: str) -> ColumnElement: def filter_by_hair_colour(self, hair_colour: str) -> ColumnElement: """ Filters the view by the superhero hair colour. - - Args: - hair_colour: The hair colour of the superhero. - - Returns: - The filter condition. """ return self.hair_colour.colour == hair_colour @@ -321,12 +237,6 @@ def filter_by_hair_colour(self, hair_colour: str) -> ColumnElement: def filter_by_skin_colour(self, skin_colour: str) -> ColumnElement: """ Filters the view by the superhero skin colour. - - Args: - skin_colour: The skin colour of the superhero. - - Returns: - The filter condition. """ return self.skin_colour.colour == skin_colour @@ -334,9 +244,6 @@ def filter_by_skin_colour(self, skin_colour: str) -> ColumnElement: def filter_by_same_hair_and_eye_colour(self) -> ColumnElement: """ Filters the view by the superhero with the same hair and eye colour. - - Returns: - The filter condition. """ return self.eye_colour.colour == self.hair_colour.colour @@ -344,13 +251,29 @@ def filter_by_same_hair_and_eye_colour(self) -> ColumnElement: def filter_by_same_hair_and_skin_colour(self) -> ColumnElement: """ Filters the view by the superhero with the same hair and skin colour. - - Returns: - The filter condition. """ return self.hair_colour.colour == self.skin_colour.colour +class SuperheroColourAggregationMixin: + """ + Mixin for aggregating the view by the superhero colour attributes. + """ + + @view_aggregation() + def percentage_of_eye_colour(self, eye_colour: str) -> Select: + """ + Calculates the percentage of objects with eye colour. + """ + return self.select.with_only_columns( + ( + cast(func.count(case((self.eye_colour.colour == eye_colour, Superhero.id), else_=None)), Float) + * 100 + / func.count(Superhero.id) + ).label(f"percentage_of_{eye_colour.lower()}") + ) + + class PublisherFilterMixin: """ Mixin for filtering the view by the publisher attributes. @@ -360,14 +283,27 @@ class PublisherFilterMixin: def filter_by_publisher_name(self, publisher_name: str) -> ColumnElement: """ Filters the view by the publisher name. + """ + return Publisher.publisher_name == publisher_name - Args: - publisher_name: The name of the publisher. - Returns: - The filter condition. +class PublisherAggregationMixin: + """ + Mixin for aggregating the view by the publisher attributes. + """ + + @view_aggregation() + def percentage_of_publisher(self, publisher_name: str) -> Select: """ - return Publisher.publisher_name == publisher_name + Calculates the percentage of objects with publisher. + """ + return self.select.with_only_columns( + ( + cast(func.count(case((Publisher.publisher_name == publisher_name, Superhero.id), else_=None)), Float) + * 100 + / func.count(Superhero.id) + ).label(f"percentage_of_{publisher_name.lower()}") + ) class AlignmentFilterMixin: @@ -379,14 +315,27 @@ class AlignmentFilterMixin: def filter_by_alignment(self, alignment: Literal["Good", "Bad", "Neutral", "N/A"]) -> ColumnElement: """ Filters the view by the superhero alignment. + """ + return Alignment.alignment == alignment - Args: - alignment: The alignment of the superhero. - Returns: - The filter condition. +class AlignmentAggregationMixin: + """ + Mixin for aggregating the view by the alignment. + """ + + @view_aggregation() + def percentage_of_alignment(self, alignment: Literal["Good", "Bad", "Neutral", "N/A"]) -> Select: """ - return Alignment.alignment == alignment + Calculates the percentage of objects with alignment. + """ + return self.select.with_only_columns( + ( + cast(func.count(case((Alignment.alignment == alignment, Superhero.id), else_=None)), Float) + * 100 + / func.count(Superhero.id) + ).label(f"percentage_of_{alignment.lower()}") + ) class GenderFilterMixin: @@ -398,59 +347,53 @@ class GenderFilterMixin: def filter_by_gender(self, gender: Literal["Male", "Female", "N/A"]) -> ColumnElement: """ Filters the view by the object gender. - - Args: - gender: The gender of the object. - - Returns: - The filter condition. """ return Gender.gender == gender -class RaceFilterMixin: +class GenderAggregationMixin: """ - Mixin for filtering the view by the race. + Mixin for aggregating the view by the gender. """ - @view_filter() - def filter_by_race(self, race: str) -> ColumnElement: + @view_aggregation() + def percentage_of_gender(self, gender: Literal["Male", "Female", "N/A"]) -> Select: """ - Filters the view by the object race. - - Args: - race: The race of the object. - - Returns: - The filter condition. + Calculates the percentage of objects with gender. """ - return Race.race == race + return self.select.with_only_columns( + ( + cast(func.count(case((Gender.gender == gender, Superhero.id), else_=None)), Float) + * 100 + / func.count(Superhero.id) + ).label(f"percentage_of_{gender.lower()}") + ) -class SuperheroAggregationMixin: +class RaceFilterMixin: """ - Mixin for aggregating the view by the superhero attributes. + Mixin for filtering the view by the race. """ - @view_aggregation() - def count_superheroes(self) -> Select: + @view_filter() + def filter_by_race(self, race: str) -> ColumnElement: """ - Counts the number of superheros. - - Returns: - The superheros count. + Filters the view by the object race. """ - return self.select.with_only_columns(func.count(Superhero.id).label("count_superheroes")).group_by(Superhero.id) + return Race.race == race class SuperheroView( DBInitMixin, SqlAlchemyBaseView, - SuperheroFilterMixin, SuperheroAggregationMixin, + SuperheroFilterMixin, SuperheroColourFilterMixin, + AlignmentAggregationMixin, AlignmentFilterMixin, + GenderAggregationMixin, GenderFilterMixin, + PublisherAggregationMixin, PublisherFilterMixin, RaceFilterMixin, ): @@ -466,6 +409,10 @@ def get_select(self) -> Select: Returns: The select object. """ + self.eye_colour = aliased(Colour) + self.hair_colour = aliased(Colour) + self.skin_colour = aliased(Colour) + return ( select( Superhero.id,