deepsense-ai · jcierocki · Jun 21, 2024 · Jun 24, 2024 · Jun 24, 2024 · Jun 28, 2024
diff --git a/benchmarks/sql/bench.py b/benchmarks/sql/bench.py
@@ -28,6 +28,7 @@
 )
 from bench.pipelines import CollectionEvaluationPipeline, IQLViewEvaluationPipeline, SQLViewEvaluationPipeline
 from bench.utils import save
+from hydra.core.hydra_config import HydraConfig
 from neptune.utils import stringify_unsupported
 from omegaconf import DictConfig
 
@@ -120,7 +121,7 @@ async def bench(config: DictConfig) -> None:
 
     log.info("Evaluation finished. Saving results...")
 
-    output_dir = Path(hydra.core.hydra_config.HydraConfig.get().runtime.output_dir)
+    output_dir = Path(HydraConfig.get().runtime.output_dir)
     metrics_file = output_dir / "metrics.json"
     results_file = output_dir / "results.json"
 

diff --git a/benchmarks/sql/bench/contexts/__init__.py b/benchmarks/sql/bench/contexts/__init__.py
@@ -0,0 +1,10 @@
+from typing import Dict, Type
+
+from dbally.context import Context
+
+from .superhero import SuperheroContext, UserContext
+
+CONTEXTS_REGISTRY: Dict[str, Type[Context]] = {
+    UserContext.__name__: UserContext,
+    SuperheroContext.__name__: SuperheroContext,
+}
diff --git a/benchmarks/sql/bench/contexts/superhero.py b/benchmarks/sql/bench/contexts/superhero.py
@@ -0,0 +1,21 @@
+from dataclasses import dataclass
+
+from dbally.context import Context
+
+
+@dataclass
+class UserContext(Context):
+    """
+    Current user data.
+    """
+
+    name: str = "John Doe"
+
+
+@dataclass
+class SuperheroContext(Context):
+    """
+    Current user favourite superhero data.
+    """
+
+    name: str = "Batman"
diff --git a/benchmarks/sql/bench/metrics/base.py b/benchmarks/sql/bench/metrics/base.py
@@ -1,6 +1,7 @@
 from abc import ABC, abstractmethod
 from typing import Any, Dict, List, Optional, Type
 
+from omegaconf import DictConfig
 from typing_extensions import Self
 
 from ..pipelines import EvaluationResult
@@ -11,7 +12,7 @@ class Metric(ABC):
     Base class for metrics.
     """
 
-    def __init__(self, config: Optional[Dict] = None) -> None:
+    def __init__(self, config: Optional[DictConfig] = None) -> None:
         """
         Initializes the metric.
 
@@ -38,7 +39,7 @@ class MetricSet:
     Represents a set of metrics.
     """
 
-    def __init__(self, *metrics: List[Type[Metric]]) -> None:
+    def __init__(self, *metrics: Type[Metric]) -> None:
         """
         Initializes the metric set.
 
@@ -48,7 +49,7 @@ def __init__(self, *metrics: List[Type[Metric]]) -> None:
         self._metrics = metrics
         self.metrics: List[Metric] = []
 
-    def __call__(self, config: Dict) -> Self:
+    def __call__(self, config: DictConfig) -> Self:
         """
         Initializes the metrics.
 

diff --git a/benchmarks/sql/bench/pipelines/base.py b/benchmarks/sql/bench/pipelines/base.py
@@ -1,14 +1,24 @@
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
-from typing import Any, Dict, Optional, Union
+from functools import cached_property
+from typing import Any, Dict, Generic, List, Optional, Type, TypeVar, Union
 
+from omegaconf import DictConfig
+from sqlalchemy import Engine, create_engine
+
+from dbally.context import Context
 from dbally.iql._exceptions import IQLError
 from dbally.iql._query import IQLQuery
 from dbally.iql_generator.prompt import UnsupportedQueryError
 from dbally.llms.base import LLM
 from dbally.llms.clients.exceptions import LLMError
 from dbally.llms.litellm import LiteLLM
 from dbally.llms.local import LocalLLM
+from dbally.views.base import BaseView
+
+from ..contexts import CONTEXTS_REGISTRY
+
+ViewT = TypeVar("ViewT", bound=BaseView)
 
 
 @dataclass
@@ -23,7 +33,7 @@ class IQL:
     generated: bool = True
 
     @classmethod
-    def from_query(cls, query: Optional[Union[IQLQuery, Exception]]) -> "IQL":
+    def from_query(cls, query: Optional[Union[IQLQuery, BaseException]]) -> "IQL":
         """
         Creates an IQL object from the query.
 
@@ -81,7 +91,12 @@ class EvaluationPipeline(ABC):
     Collection evaluation pipeline.
     """
 
-    def get_llm(self, config: Dict) -> LLM:
+    def __init__(self, config: DictConfig) -> None:
+        super().__init__()
+        self.config = config
+
+    @staticmethod
+    def _get_llm(config: DictConfig) -> LLM:
         """
         Returns the LLM based on the configuration.
 
@@ -95,6 +110,13 @@ def get_llm(self, config: Dict) -> LLM:
             return LocalLLM(config.model_name.split("/", 1)[1])
         return LiteLLM(config.model_name)
 
+    @cached_property
+    def dbs(self) -> Dict[str, Engine]:
+        """
+        Returns the database engines based on the configuration.
+        """
+        return {db: create_engine(f"sqlite:///data/{db}.db") for db in self.config.setup.views}
+
     @abstractmethod
     async def __call__(self, data: Dict[str, Any]) -> EvaluationResult:
         """
@@ -106,3 +128,25 @@ async def __call__(self, data: Dict[str, Any]) -> EvaluationResult:
         Returns:
             The evaluation result.
         """
+
+
+class ViewEvaluationMixin(Generic[ViewT]):
+    """
+    View evaluation mixin.
+    """
+
+    @cached_property
+    def contexts(self) -> List[Context]:
+        """
+        Returns the contexts based on the configuration.
+        """
+        return [
+            CONTEXTS_REGISTRY[context]() for contexts in self.config.setup.contexts.values() for context in contexts
+        ]
+
+    @cached_property
+    @abstractmethod
+    def views(self) -> Dict[str, Type[ViewT]]:
+        """
+        Returns the view classes mapping based on the configuration
+        """
diff --git a/benchmarks/sql/bench/pipelines/collection.py b/benchmarks/sql/bench/pipelines/collection.py
@@ -1,57 +1,68 @@
-from typing import Any, Dict
-
-from sqlalchemy import create_engine
+from functools import cached_property
+from typing import Any, Dict, Type, Union
 
 import dbally
 from dbally.collection.collection import Collection
 from dbally.collection.exceptions import NoViewFoundError
+from dbally.llms.base import LLM
 from dbally.view_selection.llm_view_selector import LLMViewSelector
 from dbally.views.exceptions import ViewExecutionError
+from dbally.views.freeform.text2sql.view import BaseText2SQLView
+from dbally.views.sqlalchemy_base import SqlAlchemyBaseView
 
 from ..views import VIEWS_REGISTRY
-from .base import IQL, EvaluationPipeline, EvaluationResult, ExecutionResult, IQLResult
+from .base import IQL, EvaluationPipeline, EvaluationResult, ExecutionResult, IQLResult, ViewEvaluationMixin
 
 
-class CollectionEvaluationPipeline(EvaluationPipeline):
+class CollectionEvaluationPipeline(
+    EvaluationPipeline, ViewEvaluationMixin[Union[SqlAlchemyBaseView, BaseText2SQLView]]
+):
     """
     Collection evaluation pipeline.
     """
 
-    def __init__(self, config: Dict) -> None:
+    @cached_property
+    def selector(self) -> LLM:
         """
-        Constructs the pipeline for evaluating collection predictions.
-
-        Args:
-            config: The configuration for the pipeline.
+        Returns the selector LLM.
         """
-        self.collection = self.get_collection(config.setup)
+        return self._get_llm(self.config.setup.selector_llm)
 
-    def get_collection(self, config: Dict) -> Collection:
+    @cached_property
+    def generator(self) -> LLM:
         """
-        Sets up the collection based on the configuration.
-
-        Args:
-            config: The collection configuration.
+        Returns the generator LLM.
+        """
+        return self._get_llm(self.config.setup.generator_llm)
 
-        Returns:
-            The collection.
+    @cached_property
+    def views(self) -> Dict[str, Type[Union[SqlAlchemyBaseView, BaseText2SQLView]]]:
+        """
+        Returns the view classes mapping based on the configuration.
+        """
+        return {
+            db: cls
+            for db, views in self.config.setup.views.items()
+            for view in views
+            if issubclass(cls := VIEWS_REGISTRY[view], (SqlAlchemyBaseView, BaseText2SQLView))
+        }
+
+    @cached_property
+    def collection(self) -> Collection:
+        """
+        Returns the collection used for evaluation.
         """
-        generator_llm = self.get_llm(config.generator_llm)
-        selector_llm = self.get_llm(config.selector_llm)
-        view_selector = LLMViewSelector(selector_llm)
+        view_selector = LLMViewSelector(self.selector)
 
         collection = dbally.create_collection(
-            name=config.name,
-            llm=generator_llm,
+            name=self.config.setup.name,
+            llm=self.generator,
             view_selector=view_selector,
         )
         collection.n_retries = 0
 
-        for db_name, view_names in config.views.items():
-            db = create_engine(f"sqlite:///data/{db_name}.db")
-            for view_name in view_names:
-                view_cls = VIEWS_REGISTRY[view_name]
-                collection.add(view_cls, lambda: view_cls(db))  # pylint: disable=cell-var-from-loop
+        for db, view in self.views.items():
+            collection.add(view, lambda: view(self.dbs[db]))  # pylint: disable=cell-var-from-loop
 
         return collection
 
@@ -68,6 +79,7 @@ async def __call__(self, data: Dict[str, Any]) -> EvaluationResult:
         try:
             result = await self.collection.ask(
                 question=data["question"],
+                contexts=self.contexts,
                 dry_run=True,
                 return_natural_response=False,
             )
@@ -85,10 +97,10 @@ async def __call__(self, data: Dict[str, Any]) -> EvaluationResult:
             prediction = ExecutionResult(
                 view_name=result.view_name,
                 iql=IQLResult(
-                    filters=IQL(source=result.context["iql"]["filters"]),
-                    aggregation=IQL(source=result.context["iql"]["aggregation"]),
+                    filters=IQL(source=result.metadata["iql"]["filters"]),
+                    aggregation=IQL(source=result.metadata["iql"]["aggregation"]),
                 ),
-                sql=result.context["sql"],
+                sql=result.metadata["sql"],
             )
 
         reference = ExecutionResult(