Skip to content

Commit

Permalink
improve metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
micpst committed Sep 2, 2024
1 parent 21b7502 commit e7646ad
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 169 deletions.
9 changes: 9 additions & 0 deletions benchmarks/sql/bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,13 @@
from bench.evaluator import Evaluator
from bench.loaders import CollectionDataLoader, IQLViewDataLoader, SQLViewDataLoader
from bench.metrics import (
AggregationAccuracy,
ExecutionAccuracy,
FilteringAccuracy,
FilteringPrecision,
FilteringRecall,
IQLAggregationCorrectness,
IQLAggregationParseability,
IQLFiltersAccuracy,
IQLFiltersCorrectness,
IQLFiltersParseability,
Expand Down Expand Up @@ -57,9 +60,12 @@ class EvaluationType(Enum):

EVALUATION_METRICS = {
EvaluationType.IQL.value: MetricSet(
AggregationAccuracy,
FilteringAccuracy,
FilteringPrecision,
FilteringRecall,
IQLAggregationParseability,
IQLAggregationCorrectness,
IQLFiltersAccuracy,
IQLFiltersPrecision,
IQLFiltersRecall,
Expand All @@ -72,9 +78,12 @@ class EvaluationType(Enum):
ExecutionAccuracy,
),
EvaluationType.E2E.value: MetricSet(
AggregationAccuracy,
FilteringAccuracy,
FilteringPrecision,
FilteringRecall,
IQLAggregationParseability,
IQLAggregationCorrectness,
IQLFiltersAccuracy,
IQLFiltersPrecision,
IQLFiltersRecall,
Expand Down
2 changes: 2 additions & 0 deletions benchmarks/sql/bench/metrics/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from .base import Metric, MetricSet
from .iql import (
AggregationAccuracy,
FilteringAccuracy,
FilteringPrecision,
FilteringRecall,
Expand All @@ -17,6 +18,7 @@
__all__ = [
"Metric",
"MetricSet",
"AggregationAccuracy",
"FilteringAccuracy",
"FilteringPrecision",
"FilteringRecall",
Expand Down
45 changes: 36 additions & 9 deletions benchmarks/sql/bench/metrics/iql.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,23 @@
from .base import Metric


class FilteringAccuracy(Metric):
class AssessingAccuracy(Metric, ABC):
"""
Filtering accuracy is proportion of correct decisions (to filter or not) out of all decisions made.
Assessing accuracy is proportion of correct decisions out of all decisions made.
"""

prefix: str
iql: str

def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]:
"""
Computes the filtering accuracy.
Computes the assessing accuracy.
Args:
results: List of evaluation results.
Returns:
Filtering accuracy.
Assessing accuracy.
"""
results = [
result
Expand All @@ -27,14 +30,20 @@ def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]:
and result.prediction.iql
and result.reference.view_name
and result.prediction.view_name
and result.reference.iql.filters.generated
and result.prediction.iql.filters.generated
and getattr(result.reference.iql, self.iql).generated
and getattr(result.prediction.iql, self.iql).generated
]
return {
"DM/FLT/ACC": (
f"DM/{self.prefix}/ACC": (
sum(
(result.reference.iql.filters.source is not None or result.reference.iql.filters.unsupported)
== (result.prediction.iql.filters.source is not None or result.prediction.iql.filters.unsupported)
(
getattr(result.reference.iql, self.iql).source is not None
or getattr(result.reference.iql, self.iql).unsupported
)
== (
getattr(result.prediction.iql, self.iql).source is not None
or getattr(result.prediction.iql, self.iql).unsupported
)
for result in results
)
/ len(results)
Expand All @@ -44,6 +53,24 @@ def compute(self, results: List[EvaluationResult]) -> Dict[str, Any]:
}


class FilteringAccuracy(AssessingAccuracy):
"""
Filtering accuracy is proportion of correct decisions (to filter or not) out of all decisions made.
"""

prefix: str = "FLT"
iql: str = "filters"


class AggregationAccuracy(AssessingAccuracy):
"""
Aggregation accuracy is proportion of correct decisions (to aggregate or not) out of all decisions made.
"""

prefix: str = "AGG"
iql: str = "aggregation"


class FilteringPrecision(Metric):
"""
Filtering precision is proportion of correct decisions to filter out of all decisions to filter.
Expand Down
Loading

0 comments on commit e7646ad

Please sign in to comment.