From 41b576fbc33f4bdd55d456abea08a07e2e6b30bd Mon Sep 17 00:00:00 2001 From: kdziedzic68 Date: Wed, 18 Dec 2024 10:45:20 +0100 Subject: [PATCH 1/4] run from config interface for evaluator --- .../config/pipeline/document_ingestion.yaml | 6 +++- .../config/pipeline/document_search.yaml | 5 +++- .../document_search_optimization.yaml | 2 ++ .../evaluation/document-search/evaluate.py | 16 ++--------- .../src/ragbits/evaluate/evaluator.py | 28 ++++++++++++++++--- .../ragbits/evaluate/pipelines/__init__.py | 15 ++++++++++ .../evaluate/pipelines/document_search.py | 9 +++--- .../src/ragbits/evaluate/utils.py | 2 +- 8 files changed, 58 insertions(+), 25 deletions(-) diff --git a/examples/evaluation/document-search/config/pipeline/document_ingestion.yaml b/examples/evaluation/document-search/config/pipeline/document_ingestion.yaml index edf49af0..32cd12a2 100644 --- a/examples/evaluation/document-search/config/pipeline/document_ingestion.yaml +++ b/examples/evaluation/document-search/config/pipeline/document_ingestion.yaml @@ -2,4 +2,8 @@ defaults: - embedder: litellm - providers: unstructured - vector_store: chroma - - _self_ \ No newline at end of file + - _self_ + +type: ragbits.evaluate.pipelines.document_search:DocumentSearchWithIngestionPipeline +ingest: true +search: false \ No newline at end of file diff --git a/examples/evaluation/document-search/config/pipeline/document_search.yaml b/examples/evaluation/document-search/config/pipeline/document_search.yaml index 8eaffd68..bc379e88 100644 --- a/examples/evaluation/document-search/config/pipeline/document_search.yaml +++ b/examples/evaluation/document-search/config/pipeline/document_search.yaml @@ -4,4 +4,7 @@ defaults: - vector_store: chroma - rephraser: noop - reranker: noop - - _self_ \ No newline at end of file + - _self_ + + +type: ragbits.evaluate.pipelines.document_search:DocumentSearchWithIngestionPipeline diff --git a/examples/evaluation/document-search/config/pipeline/document_search_optimization.yaml b/examples/evaluation/document-search/config/pipeline/document_search_optimization.yaml index 8364c038..34bd0542 100644 --- a/examples/evaluation/document-search/config/pipeline/document_search_optimization.yaml +++ b/examples/evaluation/document-search/config/pipeline/document_search_optimization.yaml @@ -8,3 +8,5 @@ defaults: - _self_ type: ragbits.evaluate.pipelines.document_search:DocumentSearchWithIngestionPipeline +ingest: true + diff --git a/examples/evaluation/document-search/evaluate.py b/examples/evaluation/document-search/evaluate.py index d421ce59..0fec74c7 100644 --- a/examples/evaluation/document-search/evaluate.py +++ b/examples/evaluation/document-search/evaluate.py @@ -31,20 +31,8 @@ async def bench(config: DictConfig) -> None: config: Hydra configuration. """ run = setup_neptune(config) - - log.info("Starting evaluation...") - - dataloader = dataloader_factory(config.data) - pipeline = DocumentSearchPipeline(config.pipeline) - metrics = metric_set_factory(config.metrics) - - evaluator = Evaluator() - results = await evaluator.compute( - pipeline=pipeline, - dataloader=dataloader, - metrics=metrics, - ) - + log.info("Starting the experiment...") + results = await Evaluator.run_experiment_from_config(config=config) output_dir = log_to_file(results) if run: log_to_neptune(run, results, output_dir) diff --git a/packages/ragbits-evaluate/src/ragbits/evaluate/evaluator.py b/packages/ragbits-evaluate/src/ragbits/evaluate/evaluator.py index ea1d078a..446dbe10 100644 --- a/packages/ragbits-evaluate/src/ragbits/evaluate/evaluator.py +++ b/packages/ragbits-evaluate/src/ragbits/evaluate/evaluator.py @@ -3,10 +3,14 @@ from dataclasses import asdict from typing import Any +from omegaconf import DictConfig from tqdm.asyncio import tqdm -from ragbits.evaluate.loaders.base import DataLoader -from ragbits.evaluate.metrics.base import MetricSet +from .loaders import dataloader_factory +from .metrics import metric_set_factory +from .loaders.base import DataLoader +from .metrics.base import MetricSet +from .pipelines import pipeline_factory from ragbits.evaluate.pipelines.base import EvaluationPipeline, EvaluationResult @@ -19,7 +23,7 @@ async def compute( self, pipeline: EvaluationPipeline, dataloader: DataLoader, - metrics: MetricSet, + metrics: MetricSet | None, ) -> dict[str, Any]: """ Compute the evaluation results for the given pipeline and data. @@ -34,7 +38,7 @@ async def compute( """ dataset = await dataloader.load() results, perf_results = await self._call_pipeline(pipeline, dataset) - computed_metrics = self._compute_metrics(metrics, results) + computed_metrics = self._compute_metrics(metrics, results) if metrics else {} processed_results = self._results_processor(results) return { @@ -43,6 +47,22 @@ async def compute( **processed_results, } + @classmethod + async def run_experiment_from_config(cls, config: DictConfig) -> dict[str, Any] | None: + dataloader = dataloader_factory(config.data) + pipeline = pipeline_factory(config.pipeline) + + metric_config = config.get("metrics", None) + metrics = metric_set_factory(metric_config) if metric_config is not None else None + + evaluator = cls() + results = await evaluator.compute( + pipeline=pipeline, + dataloader=dataloader, + metrics=metrics, + ) + return results + async def _call_pipeline( self, pipeline: EvaluationPipeline, diff --git a/packages/ragbits-evaluate/src/ragbits/evaluate/pipelines/__init__.py b/packages/ragbits-evaluate/src/ragbits/evaluate/pipelines/__init__.py index e69de29b..cdfb8e02 100644 --- a/packages/ragbits-evaluate/src/ragbits/evaluate/pipelines/__init__.py +++ b/packages/ragbits-evaluate/src/ragbits/evaluate/pipelines/__init__.py @@ -0,0 +1,15 @@ +import sys + +from omegaconf import DictConfig + +from ragbits.core.utils.config_handling import import_by_path + +from .base import EvaluationPipeline + +module = sys.modules[__name__] + + +def pipeline_factory(pipeline_config: DictConfig) -> EvaluationPipeline: + pipeline_module = import_by_path(pipeline_config.type, module) + pipeline = pipeline_module(pipeline_config) + return pipeline diff --git a/packages/ragbits-evaluate/src/ragbits/evaluate/pipelines/document_search.py b/packages/ragbits-evaluate/src/ragbits/evaluate/pipelines/document_search.py index 6b15b46f..8e76aaf1 100644 --- a/packages/ragbits-evaluate/src/ragbits/evaluate/pipelines/document_search.py +++ b/packages/ragbits-evaluate/src/ragbits/evaluate/pipelines/document_search.py @@ -38,7 +38,7 @@ def document_search(self) -> "DocumentSearch": """ return DocumentSearch.from_config(self.config) # type: ignore - async def __call__(self, data: dict) -> DocumentSearchResult: + async def __call__(self, data: dict) -> DocumentSearchResult | None: """ Runs the document search evaluation pipeline. @@ -68,7 +68,7 @@ def __init__(self, config: DictConfig | None = None) -> None: self._ingested = False self._lock = asyncio.Lock() - async def __call__(self, data: dict) -> DocumentSearchResult: + async def __call__(self, data: dict) -> DocumentSearchResult | None: """ Queries a vector store with given data Ingests the corpus to the store if has not been done @@ -78,10 +78,11 @@ async def __call__(self, data: dict) -> DocumentSearchResult: DocumentSearchResult - query result """ async with self._lock: - if not self._ingested: + if not self._ingested and self.config.get("ingest", False): await self._ingest_documents() self._ingested = True - return await super().__call__(data) + if self.config.get("search", True): + return await super().__call__(data) async def _ingest_documents(self) -> None: documents = await tqdm.gather( diff --git a/packages/ragbits-evaluate/src/ragbits/evaluate/utils.py b/packages/ragbits-evaluate/src/ragbits/evaluate/utils.py index f2ea8fb8..b24883a8 100644 --- a/packages/ragbits-evaluate/src/ragbits/evaluate/utils.py +++ b/packages/ragbits-evaluate/src/ragbits/evaluate/utils.py @@ -101,7 +101,7 @@ def setup_neptune(config: DictConfig) -> Run | None: Returns: The Neptune run. """ - if config.neptune.run: + if config.get("neptune", {}).get("run"): run = Run( project=config.neptune.project, tags=[ From 83ed7e7bfc50aef18357b3bf376c0d56edc0eddc Mon Sep 17 00:00:00 2001 From: kdziedzic68 Date: Wed, 18 Dec 2024 13:56:04 +0100 Subject: [PATCH 2/4] optimize basic --- .../document-search/{ => advanced}/README.md | 0 .../{ => advanced}/config/data/corpus.yaml | 0 .../{ => advanced}/config/data/qa.yaml | 0 .../config/embedder/litellm.yaml | 0 .../config/experiments/chunking-1000.yaml | 0 .../config/experiments/chunking-250.yaml | 0 .../config/experiments/chunking-500.yaml | 0 .../{ => advanced}/config/ingestion.yaml | 0 .../{ => advanced}/config/optimization.yaml | 10 +-- .../pipeline/answer_data_source/corpus.yaml | 0 .../config/pipeline/document_ingestion.yaml | 0 .../config/pipeline/document_search.yaml | 0 .../document_search_optimization.yaml | 0 .../config/pipeline/embedder/litellm.yaml | 0 .../embedder/litellm_opt_template.yaml | 0 .../pipeline/providers/unstructured.yaml | 0 .../providers/unstructured_opt_template.yaml | 0 .../config/pipeline/rephraser/noop.yaml | 0 .../config/pipeline/reranker/noop.yaml | 0 .../config/pipeline/vector_store/chroma.yaml | 0 .../config/providers/unstructured.yaml | 0 .../{ => advanced}/config/rephraser/noop.yaml | 0 .../{ => advanced}/config/reranker/noop.yaml | 0 .../{ => advanced}/config/retrieval.yaml | 0 .../config/vector_store/chroma.yaml | 0 .../{ => advanced}/evaluate.py | 3 - .../document-search/{ => advanced}/ingest.py | 0 .../document-search/advanced/optimize.py | 26 +++++++ .../document-search/basic/evaluate.py | 78 +++++++++++++++++++ .../document-search/basic/ingest.py | 60 ++++++++++++++ .../document-search/basic/optimize.py | 51 ++++++++++++ .../evaluation/document-search/optimize.py | 45 ----------- .../src/ragbits/core/utils/config_handling.py | 1 - .../ragbits/core/vector_stores/__init__.py | 12 ++- .../src/ragbits/document_search/_main.py | 7 +- .../src/ragbits/evaluate/evaluator.py | 9 ++- .../src/ragbits/evaluate/optimizer.py | 39 +++++++++- .../evaluate/pipelines/document_search.py | 6 +- 38 files changed, 285 insertions(+), 62 deletions(-) rename examples/evaluation/document-search/{ => advanced}/README.md (100%) rename examples/evaluation/document-search/{ => advanced}/config/data/corpus.yaml (100%) rename examples/evaluation/document-search/{ => advanced}/config/data/qa.yaml (100%) rename examples/evaluation/document-search/{ => advanced}/config/embedder/litellm.yaml (100%) rename examples/evaluation/document-search/{ => advanced}/config/experiments/chunking-1000.yaml (100%) rename examples/evaluation/document-search/{ => advanced}/config/experiments/chunking-250.yaml (100%) rename examples/evaluation/document-search/{ => advanced}/config/experiments/chunking-500.yaml (100%) rename examples/evaluation/document-search/{ => advanced}/config/ingestion.yaml (100%) rename examples/evaluation/document-search/{ => advanced}/config/optimization.yaml (70%) rename examples/evaluation/document-search/{ => advanced}/config/pipeline/answer_data_source/corpus.yaml (100%) rename examples/evaluation/document-search/{ => advanced}/config/pipeline/document_ingestion.yaml (100%) rename examples/evaluation/document-search/{ => advanced}/config/pipeline/document_search.yaml (100%) rename examples/evaluation/document-search/{ => advanced}/config/pipeline/document_search_optimization.yaml (100%) rename examples/evaluation/document-search/{ => advanced}/config/pipeline/embedder/litellm.yaml (100%) rename examples/evaluation/document-search/{ => advanced}/config/pipeline/embedder/litellm_opt_template.yaml (100%) rename examples/evaluation/document-search/{ => advanced}/config/pipeline/providers/unstructured.yaml (100%) rename examples/evaluation/document-search/{ => advanced}/config/pipeline/providers/unstructured_opt_template.yaml (100%) rename examples/evaluation/document-search/{ => advanced}/config/pipeline/rephraser/noop.yaml (100%) rename examples/evaluation/document-search/{ => advanced}/config/pipeline/reranker/noop.yaml (100%) rename examples/evaluation/document-search/{ => advanced}/config/pipeline/vector_store/chroma.yaml (100%) rename examples/evaluation/document-search/{ => advanced}/config/providers/unstructured.yaml (100%) rename examples/evaluation/document-search/{ => advanced}/config/rephraser/noop.yaml (100%) rename examples/evaluation/document-search/{ => advanced}/config/reranker/noop.yaml (100%) rename examples/evaluation/document-search/{ => advanced}/config/retrieval.yaml (100%) rename examples/evaluation/document-search/{ => advanced}/config/vector_store/chroma.yaml (100%) rename examples/evaluation/document-search/{ => advanced}/evaluate.py (88%) rename examples/evaluation/document-search/{ => advanced}/ingest.py (100%) create mode 100644 examples/evaluation/document-search/advanced/optimize.py create mode 100644 examples/evaluation/document-search/basic/evaluate.py create mode 100644 examples/evaluation/document-search/basic/ingest.py create mode 100644 examples/evaluation/document-search/basic/optimize.py delete mode 100644 examples/evaluation/document-search/optimize.py diff --git a/examples/evaluation/document-search/README.md b/examples/evaluation/document-search/advanced/README.md similarity index 100% rename from examples/evaluation/document-search/README.md rename to examples/evaluation/document-search/advanced/README.md diff --git a/examples/evaluation/document-search/config/data/corpus.yaml b/examples/evaluation/document-search/advanced/config/data/corpus.yaml similarity index 100% rename from examples/evaluation/document-search/config/data/corpus.yaml rename to examples/evaluation/document-search/advanced/config/data/corpus.yaml diff --git a/examples/evaluation/document-search/config/data/qa.yaml b/examples/evaluation/document-search/advanced/config/data/qa.yaml similarity index 100% rename from examples/evaluation/document-search/config/data/qa.yaml rename to examples/evaluation/document-search/advanced/config/data/qa.yaml diff --git a/examples/evaluation/document-search/config/embedder/litellm.yaml b/examples/evaluation/document-search/advanced/config/embedder/litellm.yaml similarity index 100% rename from examples/evaluation/document-search/config/embedder/litellm.yaml rename to examples/evaluation/document-search/advanced/config/embedder/litellm.yaml diff --git a/examples/evaluation/document-search/config/experiments/chunking-1000.yaml b/examples/evaluation/document-search/advanced/config/experiments/chunking-1000.yaml similarity index 100% rename from examples/evaluation/document-search/config/experiments/chunking-1000.yaml rename to examples/evaluation/document-search/advanced/config/experiments/chunking-1000.yaml diff --git a/examples/evaluation/document-search/config/experiments/chunking-250.yaml b/examples/evaluation/document-search/advanced/config/experiments/chunking-250.yaml similarity index 100% rename from examples/evaluation/document-search/config/experiments/chunking-250.yaml rename to examples/evaluation/document-search/advanced/config/experiments/chunking-250.yaml diff --git a/examples/evaluation/document-search/config/experiments/chunking-500.yaml b/examples/evaluation/document-search/advanced/config/experiments/chunking-500.yaml similarity index 100% rename from examples/evaluation/document-search/config/experiments/chunking-500.yaml rename to examples/evaluation/document-search/advanced/config/experiments/chunking-500.yaml diff --git a/examples/evaluation/document-search/config/ingestion.yaml b/examples/evaluation/document-search/advanced/config/ingestion.yaml similarity index 100% rename from examples/evaluation/document-search/config/ingestion.yaml rename to examples/evaluation/document-search/advanced/config/ingestion.yaml diff --git a/examples/evaluation/document-search/config/optimization.yaml b/examples/evaluation/document-search/advanced/config/optimization.yaml similarity index 70% rename from examples/evaluation/document-search/config/optimization.yaml rename to examples/evaluation/document-search/advanced/config/optimization.yaml index 53ff3424..807676cc 100644 --- a/examples/evaluation/document-search/config/optimization.yaml +++ b/examples/evaluation/document-search/advanced/config/optimization.yaml @@ -19,8 +19,8 @@ metrics: threshold: 0.5 -callbacks: - - type: ragbits.evaluate.callbacks.neptune:NeptuneCallbackConfigurator - args: - callback_type: neptune.integrations.optuna:NeptuneCallback - project: deepsense-ai/ragbits +#callbacks: +# - type: ragbits.evaluate.callbacks.neptune:NeptuneCallbackConfigurator +# args: +# callback_type: neptune.integrations.optuna:NeptuneCallback +# project: deepsense-ai/ragbits diff --git a/examples/evaluation/document-search/config/pipeline/answer_data_source/corpus.yaml b/examples/evaluation/document-search/advanced/config/pipeline/answer_data_source/corpus.yaml similarity index 100% rename from examples/evaluation/document-search/config/pipeline/answer_data_source/corpus.yaml rename to examples/evaluation/document-search/advanced/config/pipeline/answer_data_source/corpus.yaml diff --git a/examples/evaluation/document-search/config/pipeline/document_ingestion.yaml b/examples/evaluation/document-search/advanced/config/pipeline/document_ingestion.yaml similarity index 100% rename from examples/evaluation/document-search/config/pipeline/document_ingestion.yaml rename to examples/evaluation/document-search/advanced/config/pipeline/document_ingestion.yaml diff --git a/examples/evaluation/document-search/config/pipeline/document_search.yaml b/examples/evaluation/document-search/advanced/config/pipeline/document_search.yaml similarity index 100% rename from examples/evaluation/document-search/config/pipeline/document_search.yaml rename to examples/evaluation/document-search/advanced/config/pipeline/document_search.yaml diff --git a/examples/evaluation/document-search/config/pipeline/document_search_optimization.yaml b/examples/evaluation/document-search/advanced/config/pipeline/document_search_optimization.yaml similarity index 100% rename from examples/evaluation/document-search/config/pipeline/document_search_optimization.yaml rename to examples/evaluation/document-search/advanced/config/pipeline/document_search_optimization.yaml diff --git a/examples/evaluation/document-search/config/pipeline/embedder/litellm.yaml b/examples/evaluation/document-search/advanced/config/pipeline/embedder/litellm.yaml similarity index 100% rename from examples/evaluation/document-search/config/pipeline/embedder/litellm.yaml rename to examples/evaluation/document-search/advanced/config/pipeline/embedder/litellm.yaml diff --git a/examples/evaluation/document-search/config/pipeline/embedder/litellm_opt_template.yaml b/examples/evaluation/document-search/advanced/config/pipeline/embedder/litellm_opt_template.yaml similarity index 100% rename from examples/evaluation/document-search/config/pipeline/embedder/litellm_opt_template.yaml rename to examples/evaluation/document-search/advanced/config/pipeline/embedder/litellm_opt_template.yaml diff --git a/examples/evaluation/document-search/config/pipeline/providers/unstructured.yaml b/examples/evaluation/document-search/advanced/config/pipeline/providers/unstructured.yaml similarity index 100% rename from examples/evaluation/document-search/config/pipeline/providers/unstructured.yaml rename to examples/evaluation/document-search/advanced/config/pipeline/providers/unstructured.yaml diff --git a/examples/evaluation/document-search/config/pipeline/providers/unstructured_opt_template.yaml b/examples/evaluation/document-search/advanced/config/pipeline/providers/unstructured_opt_template.yaml similarity index 100% rename from examples/evaluation/document-search/config/pipeline/providers/unstructured_opt_template.yaml rename to examples/evaluation/document-search/advanced/config/pipeline/providers/unstructured_opt_template.yaml diff --git a/examples/evaluation/document-search/config/pipeline/rephraser/noop.yaml b/examples/evaluation/document-search/advanced/config/pipeline/rephraser/noop.yaml similarity index 100% rename from examples/evaluation/document-search/config/pipeline/rephraser/noop.yaml rename to examples/evaluation/document-search/advanced/config/pipeline/rephraser/noop.yaml diff --git a/examples/evaluation/document-search/config/pipeline/reranker/noop.yaml b/examples/evaluation/document-search/advanced/config/pipeline/reranker/noop.yaml similarity index 100% rename from examples/evaluation/document-search/config/pipeline/reranker/noop.yaml rename to examples/evaluation/document-search/advanced/config/pipeline/reranker/noop.yaml diff --git a/examples/evaluation/document-search/config/pipeline/vector_store/chroma.yaml b/examples/evaluation/document-search/advanced/config/pipeline/vector_store/chroma.yaml similarity index 100% rename from examples/evaluation/document-search/config/pipeline/vector_store/chroma.yaml rename to examples/evaluation/document-search/advanced/config/pipeline/vector_store/chroma.yaml diff --git a/examples/evaluation/document-search/config/providers/unstructured.yaml b/examples/evaluation/document-search/advanced/config/providers/unstructured.yaml similarity index 100% rename from examples/evaluation/document-search/config/providers/unstructured.yaml rename to examples/evaluation/document-search/advanced/config/providers/unstructured.yaml diff --git a/examples/evaluation/document-search/config/rephraser/noop.yaml b/examples/evaluation/document-search/advanced/config/rephraser/noop.yaml similarity index 100% rename from examples/evaluation/document-search/config/rephraser/noop.yaml rename to examples/evaluation/document-search/advanced/config/rephraser/noop.yaml diff --git a/examples/evaluation/document-search/config/reranker/noop.yaml b/examples/evaluation/document-search/advanced/config/reranker/noop.yaml similarity index 100% rename from examples/evaluation/document-search/config/reranker/noop.yaml rename to examples/evaluation/document-search/advanced/config/reranker/noop.yaml diff --git a/examples/evaluation/document-search/config/retrieval.yaml b/examples/evaluation/document-search/advanced/config/retrieval.yaml similarity index 100% rename from examples/evaluation/document-search/config/retrieval.yaml rename to examples/evaluation/document-search/advanced/config/retrieval.yaml diff --git a/examples/evaluation/document-search/config/vector_store/chroma.yaml b/examples/evaluation/document-search/advanced/config/vector_store/chroma.yaml similarity index 100% rename from examples/evaluation/document-search/config/vector_store/chroma.yaml rename to examples/evaluation/document-search/advanced/config/vector_store/chroma.yaml diff --git a/examples/evaluation/document-search/evaluate.py b/examples/evaluation/document-search/advanced/evaluate.py similarity index 88% rename from examples/evaluation/document-search/evaluate.py rename to examples/evaluation/document-search/advanced/evaluate.py index 0fec74c7..456e08b0 100644 --- a/examples/evaluation/document-search/evaluate.py +++ b/examples/evaluation/document-search/advanced/evaluate.py @@ -13,9 +13,6 @@ from omegaconf import DictConfig from ragbits.evaluate.evaluator import Evaluator -from ragbits.evaluate.loaders import dataloader_factory -from ragbits.evaluate.metrics import metric_set_factory -from ragbits.evaluate.pipelines.document_search import DocumentSearchPipeline from ragbits.evaluate.utils import log_to_file, log_to_neptune, setup_neptune logging.getLogger("LiteLLM").setLevel(logging.ERROR) diff --git a/examples/evaluation/document-search/ingest.py b/examples/evaluation/document-search/advanced/ingest.py similarity index 100% rename from examples/evaluation/document-search/ingest.py rename to examples/evaluation/document-search/advanced/ingest.py diff --git a/examples/evaluation/document-search/advanced/optimize.py b/examples/evaluation/document-search/advanced/optimize.py new file mode 100644 index 00000000..4644f331 --- /dev/null +++ b/examples/evaluation/document-search/advanced/optimize.py @@ -0,0 +1,26 @@ +import sys + +import hydra +from omegaconf import DictConfig, OmegaConf + +from ragbits.evaluate.optimizer import Optimizer +from ragbits.evaluate.utils import log_optimization_to_file + +module = sys.modules[__name__] + + +@hydra.main(config_path="config", config_name="optimization", version_base="3.2") +def main(config: DictConfig) -> None: + """ + Function running evaluation for all datasets and evaluation tasks defined in hydra config. + + Args: + config: Hydra configuration. + """ + config = {"optimizer": OmegaConf.create({"direction": "maximize", "n_trials": 10}), "experiment_config": config} + configs_with_scores = Optimizer.run_experiment_from_config(config=config) + log_optimization_to_file(configs_with_scores) + + +if __name__ == "__main__": + main() diff --git a/examples/evaluation/document-search/basic/evaluate.py b/examples/evaluation/document-search/basic/evaluate.py new file mode 100644 index 00000000..151da795 --- /dev/null +++ b/examples/evaluation/document-search/basic/evaluate.py @@ -0,0 +1,78 @@ +# /// script +# requires-python = ">=3.10" +# dependencies = [ +# "ragbits-document-search[huggingface]", +# "ragbits-core[chroma]", +# "hydra-core~=1.3.2", +# "unstructured[md]>=0.15.13", +# ] +# /// +import asyncio +import logging +import uuid +from pathlib import Path + +from omegaconf import OmegaConf +from ragbits.evaluate.utils import log_to_file +from ragbits.evaluate.evaluator import Evaluator + +logging.getLogger("LiteLLM").setLevel(logging.ERROR) +logging.getLogger("httpx").setLevel(logging.ERROR) +log = logging.getLogger(__name__) + + +async def evaluate() -> dict: + """ + Basic example of document search evaluation. + + """ + log.info("Ingesting documents...") + + config = OmegaConf.create( + { + "pipeline": { + "type": "ragbits.evaluate.pipelines.document_search:DocumentSearchWithIngestionPipeline", + "ingest": False, + "search": True, + "providers": { + "txt": { + "type": "ragbits.document_search.ingestion.providers.unstructured:UnstructuredDefaultProvider" + } + }, + }, + "data": { + "type": "ragbits.evaluate.loaders.hf:HFDataLoader", + "options": {"name": "hf-docs-retrieval", "path": "micpst/hf-docs-retrieval", "split": "train"}, + }, + "metrics": [ + { + "type": "ragbits.evaluate.metrics.document_search:DocumentSearchPrecisionRecallF1", + "matching_strategy": "RougeChunkMatch", + "options": {"threshold": 0.5}, + } + ], + "neptune": {"project": "ragbits", "run": False}, + "task": {"name": "default", "type": "document-search"}, + } + ) + + results = await Evaluator.run_experiment_from_config(config=config) + + log.info("Evaluation finished.") + + return results + + +def main() -> None: + """ + Run the evaluation process. + + """ + results = asyncio.run(evaluate()) + out_dir = Path(str(uuid.uuid4())) + out_dir.mkdir() + log_to_file(results, output_dir=out_dir) + + +if __name__ == "__main__": + main() # pylint: disable=no-value-for-parameter diff --git a/examples/evaluation/document-search/basic/ingest.py b/examples/evaluation/document-search/basic/ingest.py new file mode 100644 index 00000000..77dfff5c --- /dev/null +++ b/examples/evaluation/document-search/basic/ingest.py @@ -0,0 +1,60 @@ +# /// script +# requires-python = ">=3.10" +# dependencies = [ +# "ragbits-document-search[huggingface]", +# "ragbits-core[chroma]", +# "hydra-core~=1.3.2", +# "unstructured[md]>=0.15.13", +# ] +# /// +import asyncio +import logging + +from omegaconf import OmegaConf +from ragbits.evaluate.pipelines import pipeline_factory + +logging.getLogger("LiteLLM").setLevel(logging.ERROR) +logging.getLogger("httpx").setLevel(logging.ERROR) +log = logging.getLogger(__name__) + + +async def ingest() -> None: + """ + Ingest documents into the document search system. + + Args: + config: Hydra configuration. + """ + log.info("Ingesting documents...") + + config = OmegaConf.create( + { + "type": "ragbits.evaluate.pipelines.document_search:DocumentSearchWithIngestionPipeline", + "ingest": True, + "search": False, + "answer_data_source": {"name": "hf-docs", "path": "micpst/hf-docs", "split": "train", "num_docs": 5}, + "providers": { + "txt": {"type": "ragbits.document_search.ingestion.providers.unstructured:UnstructuredDefaultProvider"} + }, + } + ) + + ingestor = pipeline_factory(config) # type: ignore + + await ingestor() + + log.info("Ingestion finished.") + + +def main() -> None: + """ + Run the ingestion process. + + Args: + config: Hydra configuration. + """ + asyncio.run(ingest()) + + +if __name__ == "__main__": + main() # pylint: disable=no-value-for-parameter diff --git a/examples/evaluation/document-search/basic/optimize.py b/examples/evaluation/document-search/basic/optimize.py new file mode 100644 index 00000000..d835d090 --- /dev/null +++ b/examples/evaluation/document-search/basic/optimize.py @@ -0,0 +1,51 @@ +import sys + +from omegaconf import DictConfig, OmegaConf + +from ragbits.evaluate.optimizer import Optimizer +from ragbits.evaluate.utils import log_optimization_to_file + +module = sys.modules[__name__] + + +def main() -> None: + """ + Function running evaluation for all datasets and evaluation tasks defined in config. + """ + + config = OmegaConf.create( + { + "pipeline": { + "type": "ragbits.evaluate.pipelines.document_search:DocumentSearchWithIngestionPipeline", + "ingest": True, + "search": True, + "embedder": { + "model": "text-embedding-3-small", + "options": {"dimensions": {"optimize": True, "range": [32, 512]}, "encoding_format": float}, + "providers": { + "txt": { + "type": "ragbits.document_search.ingestion.providers.unstructured:UnstructuredDefaultProvider" + } + }, + }, + }, + "data": { + "type": "ragbits.evaluate.loaders.hf:HFDataLoader", + "options": {"name": "hf-docs-retrieval", "path": "micpst/hf-docs-retrieval", "split": "train"}, + }, + "metrics": [ + { + "type": "ragbits.evaluate.metrics.document_search:DocumentSearchPrecisionRecallF1", + "matching_strategy": "RougeChunkMatch", + "options": {"threshold": 0.5}, + } + ], + } + ) + config = {"optimizer": OmegaConf.create({"direction": "maximize", "n_trials": 10}), "experiment_config": config} + configs_with_scores = Optimizer.run_experiment_from_config(config=config) + log_optimization_to_file(configs_with_scores) + + +if __name__ == "__main__": + main() diff --git a/examples/evaluation/document-search/optimize.py b/examples/evaluation/document-search/optimize.py deleted file mode 100644 index 521e4e41..00000000 --- a/examples/evaluation/document-search/optimize.py +++ /dev/null @@ -1,45 +0,0 @@ -import sys - -import hydra -from omegaconf import DictConfig, OmegaConf - -from ragbits.core.utils.config_handling import import_by_path -from ragbits.evaluate.loaders import dataloader_factory -from ragbits.evaluate.metrics import metric_set_factory -from ragbits.evaluate.optimizer import Optimizer -from ragbits.evaluate.utils import log_optimization_to_file - -module = sys.modules[__name__] - - -@hydra.main(config_path="config", config_name="optimization", version_base="3.2") -def main(config: DictConfig) -> None: - """ - Function running evaluation for all datasets and evaluation tasks defined in hydra config. - - Args: - config: Hydra configuration. - """ - dataloader = dataloader_factory(config.data) - pipeline_class = import_by_path(config.pipeline.type, module) - metrics = metric_set_factory(config.metrics) - callback_configurators = None - if getattr(config, "callbacks", None): - callback_configurators = [ - import_by_path(callback_cfg.type, module)(callback_cfg.args) for callback_cfg in config.callbacks - ] - - optimization_cfg = OmegaConf.create({"direction": "maximize", "n_trials": 10}) - optimizer = Optimizer(cfg=optimization_cfg) - configs_with_scores = optimizer.optimize( - pipeline_class=pipeline_class, - config_with_params=config.pipeline, - metrics=metrics, - dataloader=dataloader, - callback_configurators=callback_configurators, - ) - log_optimization_to_file(configs_with_scores) - - -if __name__ == "__main__": - main() diff --git a/packages/ragbits-core/src/ragbits/core/utils/config_handling.py b/packages/ragbits-core/src/ragbits/core/utils/config_handling.py index 861ffff4..7226b7b4 100644 --- a/packages/ragbits-core/src/ragbits/core/utils/config_handling.py +++ b/packages/ragbits-core/src/ragbits/core/utils/config_handling.py @@ -86,7 +86,6 @@ def subclass_from_config(cls, config: ObjectContructionConfig) -> Self: subclass = import_by_path(config.type, cls.default_module) if not issubclass(subclass, cls): raise InvalidConfigError(f"{subclass} is not a subclass of {cls}") - return subclass.from_config(config.config) @classmethod diff --git a/packages/ragbits-core/src/ragbits/core/vector_stores/__init__.py b/packages/ragbits-core/src/ragbits/core/vector_stores/__init__.py index 7a85def1..e27b6690 100644 --- a/packages/ragbits-core/src/ragbits/core/vector_stores/__init__.py +++ b/packages/ragbits-core/src/ragbits/core/vector_stores/__init__.py @@ -1,4 +1,14 @@ from ragbits.core.vector_stores.base import VectorStore, VectorStoreEntry, VectorStoreOptions, WhereQuery from ragbits.core.vector_stores.in_memory import InMemoryVectorStore +from ragbits.core.vector_stores.chroma import ChromaVectorStore +from ragbits.core.vector_stores.qdrant import QdrantVectorStore -__all__ = ["InMemoryVectorStore", "VectorStore", "VectorStoreEntry", "VectorStoreOptions", "WhereQuery"] +__all__ = [ + "InMemoryVectorStore", + "VectorStore", + "VectorStoreEntry", + "VectorStoreOptions", + "WhereQuery", + "ChromaVectorStore", + "QdrantVectorStore", +] diff --git a/packages/ragbits-document-search/src/ragbits/document_search/_main.py b/packages/ragbits-document-search/src/ragbits/document_search/_main.py index 5db3f289..afe2eaa5 100644 --- a/packages/ragbits-document-search/src/ragbits/document_search/_main.py +++ b/packages/ragbits-document-search/src/ragbits/document_search/_main.py @@ -39,8 +39,11 @@ class DocumentSearchConfig(BaseModel): Schema for for the dict taken by DocumentSearch.from_config method. """ - embedder: ObjectContructionConfig - vector_store: ObjectContructionConfig + embedder: ObjectContructionConfig = ObjectContructionConfig(type="LiteLLMEmbeddings") + vector_store: ObjectContructionConfig = ObjectContructionConfig( + type="ChromaVectorStore", + config={"client": {"type": "PersistentClient"}, "index_name": "default"}, + ) rephraser: ObjectContructionConfig = ObjectContructionConfig(type="NoopQueryRephraser") reranker: ObjectContructionConfig = ObjectContructionConfig(type="NoopReranker") processing_strategy: ObjectContructionConfig = ObjectContructionConfig(type="SequentialProcessing") diff --git a/packages/ragbits-evaluate/src/ragbits/evaluate/evaluator.py b/packages/ragbits-evaluate/src/ragbits/evaluate/evaluator.py index 446dbe10..4f7e5db0 100644 --- a/packages/ragbits-evaluate/src/ragbits/evaluate/evaluator.py +++ b/packages/ragbits-evaluate/src/ragbits/evaluate/evaluator.py @@ -48,7 +48,14 @@ async def compute( } @classmethod - async def run_experiment_from_config(cls, config: DictConfig) -> dict[str, Any] | None: + async def run_experiment_from_config(cls, config: DictConfig) -> dict[str, Any]: + """ + Runs the evaluation experiment basing on configuration + Args: + config: DictConfig - soe config + Returns: + dictionary of metrics with scores + """ dataloader = dataloader_factory(config.data) pipeline = pipeline_factory(config.pipeline) diff --git a/packages/ragbits-evaluate/src/ragbits/evaluate/optimizer.py b/packages/ragbits-evaluate/src/ragbits/evaluate/optimizer.py index 80a552b1..f730f8c8 100644 --- a/packages/ragbits-evaluate/src/ragbits/evaluate/optimizer.py +++ b/packages/ragbits-evaluate/src/ragbits/evaluate/optimizer.py @@ -1,11 +1,15 @@ import asyncio +import sys import warnings from copy import deepcopy from typing import Any import optuna -from omegaconf import DictConfig, ListConfig +from omegaconf import DictConfig, ListConfig, OmegaConf +from ragbits.core.utils.config_handling import import_by_path +from .loaders import dataloader_factory +from .metrics import metric_set_factory from .callbacks.base import CallbackConfigurator from .evaluator import Evaluator from .loaders.base import DataLoader @@ -13,6 +17,9 @@ from .pipelines.base import EvaluationPipeline +module = sys.modules[__name__] + + class Optimizer: """ Class for optimization @@ -26,6 +33,36 @@ def __init__(self, cfg: DictConfig): # TODO check how optuna handles parallelism. discuss if we want to have parallel studies self._choices_cache: dict[str, list[Any]] = {} + @classmethod + def run_experiment_from_config(cls, config: dict[str, DictConfig]) -> list[tuple[DictConfig, float, dict[str, float]]]: + """ + Runs the optimization experiment configured with config object + Args: + config: dict + Returns: + list of configs with scores + """ + optimizer_config = config["optimizer"] + config = config["experiment_config"] + dataloader = dataloader_factory(config.data) + pipeline_class = import_by_path(config.pipeline.type, module) + metrics = metric_set_factory(config.metrics) + callback_configurators = None + if getattr(config, "callbacks", None): + callback_configurators = [ + import_by_path(callback_cfg.type, module)(callback_cfg.args) for callback_cfg in config.callbacks + ] + + optimizer = cls(cfg=optimizer_config) + configs_with_scores = optimizer.optimize( + pipeline_class=pipeline_class, + config_with_params=config.pipeline, + metrics=metrics, + dataloader=dataloader, + callback_configurators=callback_configurators, + ) + return configs_with_scores + def optimize( self, pipeline_class: type[EvaluationPipeline], diff --git a/packages/ragbits-evaluate/src/ragbits/evaluate/pipelines/document_search.py b/packages/ragbits-evaluate/src/ragbits/evaluate/pipelines/document_search.py index 8e76aaf1..6ce1c89a 100644 --- a/packages/ragbits-evaluate/src/ragbits/evaluate/pipelines/document_search.py +++ b/packages/ragbits-evaluate/src/ragbits/evaluate/pipelines/document_search.py @@ -38,7 +38,7 @@ def document_search(self) -> "DocumentSearch": """ return DocumentSearch.from_config(self.config) # type: ignore - async def __call__(self, data: dict) -> DocumentSearchResult | None: + async def __call__(self, data: dict | None) -> DocumentSearchResult | None: """ Runs the document search evaluation pipeline. @@ -64,11 +64,11 @@ class DocumentSearchWithIngestionPipeline(DocumentSearchPipeline): def __init__(self, config: DictConfig | None = None) -> None: super().__init__(config) - self.config.vector_store.config.index_name = str(uuid.uuid4()) + # self.config.vector_store.config.index_name = str(uuid.uuid4()) self._ingested = False self._lock = asyncio.Lock() - async def __call__(self, data: dict) -> DocumentSearchResult | None: + async def __call__(self, data: dict | None = None) -> DocumentSearchResult | None: """ Queries a vector store with given data Ingests the corpus to the store if has not been done From 070548adff8d0b285e417cdec0ce904b459945b0 Mon Sep 17 00:00:00 2001 From: kdziedzic68 Date: Wed, 18 Dec 2024 14:42:44 +0100 Subject: [PATCH 3/4] fix ruff --- .../document-search/basic/evaluate.py | 3 ++- .../document-search/basic/ingest.py | 1 + .../document-search/basic/optimize.py | 26 +++++++++++++------ .../ragbits/core/vector_stores/__init__.py | 6 ++--- .../src/ragbits/evaluate/evaluator.py | 5 ++-- .../src/ragbits/evaluate/optimizer.py | 12 +++++---- .../ragbits/evaluate/pipelines/__init__.py | 7 +++++ .../evaluate/pipelines/document_search.py | 2 ++ 8 files changed, 43 insertions(+), 19 deletions(-) diff --git a/examples/evaluation/document-search/basic/evaluate.py b/examples/evaluation/document-search/basic/evaluate.py index 151da795..5601b414 100644 --- a/examples/evaluation/document-search/basic/evaluate.py +++ b/examples/evaluation/document-search/basic/evaluate.py @@ -13,8 +13,9 @@ from pathlib import Path from omegaconf import OmegaConf -from ragbits.evaluate.utils import log_to_file + from ragbits.evaluate.evaluator import Evaluator +from ragbits.evaluate.utils import log_to_file logging.getLogger("LiteLLM").setLevel(logging.ERROR) logging.getLogger("httpx").setLevel(logging.ERROR) diff --git a/examples/evaluation/document-search/basic/ingest.py b/examples/evaluation/document-search/basic/ingest.py index 77dfff5c..46927f34 100644 --- a/examples/evaluation/document-search/basic/ingest.py +++ b/examples/evaluation/document-search/basic/ingest.py @@ -11,6 +11,7 @@ import logging from omegaconf import OmegaConf + from ragbits.evaluate.pipelines import pipeline_factory logging.getLogger("LiteLLM").setLevel(logging.ERROR) diff --git a/examples/evaluation/document-search/basic/optimize.py b/examples/evaluation/document-search/basic/optimize.py index d835d090..75035344 100644 --- a/examples/evaluation/document-search/basic/optimize.py +++ b/examples/evaluation/document-search/basic/optimize.py @@ -1,6 +1,6 @@ import sys -from omegaconf import DictConfig, OmegaConf +from omegaconf import OmegaConf from ragbits.evaluate.optimizer import Optimizer from ragbits.evaluate.utils import log_optimization_to_file @@ -12,20 +12,30 @@ def main() -> None: """ Function running evaluation for all datasets and evaluation tasks defined in config. """ - config = OmegaConf.create( { "pipeline": { "type": "ragbits.evaluate.pipelines.document_search:DocumentSearchWithIngestionPipeline", "ingest": True, "search": True, + "answer_data_source": { + "name": "hf-docs", + "path": "micpst/hf-docs", + "split": "train", + "num_docs": 5, + }, + "providers": { + "txt": { + "type": "ragbits.document_search.ingestion.providers.unstructured:UnstructuredDefaultProvider" + } + }, "embedder": { - "model": "text-embedding-3-small", - "options": {"dimensions": {"optimize": True, "range": [32, 512]}, "encoding_format": float}, - "providers": { - "txt": { - "type": "ragbits.document_search.ingestion.providers.unstructured:UnstructuredDefaultProvider" - } + "type": "ragbits.core.embeddings.litellm:LiteLLMEmbeddings", + "config": { + "model": "text-embedding-3-small", + "options": { + "dimensions": {"optimize": True, "range": [32, 512]}, + }, }, }, }, diff --git a/packages/ragbits-core/src/ragbits/core/vector_stores/__init__.py b/packages/ragbits-core/src/ragbits/core/vector_stores/__init__.py index e27b6690..426e690a 100644 --- a/packages/ragbits-core/src/ragbits/core/vector_stores/__init__.py +++ b/packages/ragbits-core/src/ragbits/core/vector_stores/__init__.py @@ -1,14 +1,14 @@ from ragbits.core.vector_stores.base import VectorStore, VectorStoreEntry, VectorStoreOptions, WhereQuery -from ragbits.core.vector_stores.in_memory import InMemoryVectorStore from ragbits.core.vector_stores.chroma import ChromaVectorStore +from ragbits.core.vector_stores.in_memory import InMemoryVectorStore from ragbits.core.vector_stores.qdrant import QdrantVectorStore __all__ = [ + "ChromaVectorStore", "InMemoryVectorStore", + "QdrantVectorStore", "VectorStore", "VectorStoreEntry", "VectorStoreOptions", "WhereQuery", - "ChromaVectorStore", - "QdrantVectorStore", ] diff --git a/packages/ragbits-evaluate/src/ragbits/evaluate/evaluator.py b/packages/ragbits-evaluate/src/ragbits/evaluate/evaluator.py index 4f7e5db0..79b5d60a 100644 --- a/packages/ragbits-evaluate/src/ragbits/evaluate/evaluator.py +++ b/packages/ragbits-evaluate/src/ragbits/evaluate/evaluator.py @@ -6,12 +6,13 @@ from omegaconf import DictConfig from tqdm.asyncio import tqdm +from ragbits.evaluate.pipelines.base import EvaluationPipeline, EvaluationResult + from .loaders import dataloader_factory -from .metrics import metric_set_factory from .loaders.base import DataLoader +from .metrics import metric_set_factory from .metrics.base import MetricSet from .pipelines import pipeline_factory -from ragbits.evaluate.pipelines.base import EvaluationPipeline, EvaluationResult class Evaluator: diff --git a/packages/ragbits-evaluate/src/ragbits/evaluate/optimizer.py b/packages/ragbits-evaluate/src/ragbits/evaluate/optimizer.py index f730f8c8..35491dfe 100644 --- a/packages/ragbits-evaluate/src/ragbits/evaluate/optimizer.py +++ b/packages/ragbits-evaluate/src/ragbits/evaluate/optimizer.py @@ -5,18 +5,18 @@ from typing import Any import optuna -from omegaconf import DictConfig, ListConfig, OmegaConf +from omegaconf import DictConfig, ListConfig + from ragbits.core.utils.config_handling import import_by_path -from .loaders import dataloader_factory -from .metrics import metric_set_factory from .callbacks.base import CallbackConfigurator from .evaluator import Evaluator +from .loaders import dataloader_factory from .loaders.base import DataLoader +from .metrics import metric_set_factory from .metrics.base import MetricSet from .pipelines.base import EvaluationPipeline - module = sys.modules[__name__] @@ -34,7 +34,9 @@ def __init__(self, cfg: DictConfig): self._choices_cache: dict[str, list[Any]] = {} @classmethod - def run_experiment_from_config(cls, config: dict[str, DictConfig]) -> list[tuple[DictConfig, float, dict[str, float]]]: + def run_experiment_from_config( + cls, config: dict[str, DictConfig] + ) -> list[tuple[DictConfig, float, dict[str, float]]]: """ Runs the optimization experiment configured with config object Args: diff --git a/packages/ragbits-evaluate/src/ragbits/evaluate/pipelines/__init__.py b/packages/ragbits-evaluate/src/ragbits/evaluate/pipelines/__init__.py index cdfb8e02..b7f26a4f 100644 --- a/packages/ragbits-evaluate/src/ragbits/evaluate/pipelines/__init__.py +++ b/packages/ragbits-evaluate/src/ragbits/evaluate/pipelines/__init__.py @@ -10,6 +10,13 @@ def pipeline_factory(pipeline_config: DictConfig) -> EvaluationPipeline: + """ + Factory of evaluation pipelines + Args: + pipeline_config: DictConfig + Returns: + instance of evaluation pipeline + """ pipeline_module = import_by_path(pipeline_config.type, module) pipeline = pipeline_module(pipeline_config) return pipeline diff --git a/packages/ragbits-evaluate/src/ragbits/evaluate/pipelines/document_search.py b/packages/ragbits-evaluate/src/ragbits/evaluate/pipelines/document_search.py index 6ce1c89a..b772ae89 100644 --- a/packages/ragbits-evaluate/src/ragbits/evaluate/pipelines/document_search.py +++ b/packages/ragbits-evaluate/src/ragbits/evaluate/pipelines/document_search.py @@ -85,6 +85,8 @@ async def __call__(self, data: dict | None = None) -> DocumentSearchResult | Non return await super().__call__(data) async def _ingest_documents(self) -> None: + if self.config.get("search", False) and self.config.get("ingest", False): + self.document_search.vector_store._index_name = str(uuid.uuid4()) documents = await tqdm.gather( *[ DocumentMeta.from_source( From 5cd3f319027ea17fac3137481c52d4a611afa561 Mon Sep 17 00:00:00 2001 From: kdziedzic68 Date: Wed, 18 Dec 2024 14:57:36 +0100 Subject: [PATCH 4/4] simplify examples --- .../document-search/advanced/optimize.py | 4 ++-- .../basic/{evaluate.py => basic_evaluate.py} | 0 .../basic/{ingest.py => basic_ingest.py} | 0 .../basic/{optimize.py => basic_optimize.py} | 4 ++-- .../src/ragbits/evaluate/optimizer.py | 14 +++++------ .../src/ragbits/evaluate/pipelines/base.py | 2 +- .../evaluate/pipelines/document_search.py | 23 +++++++++---------- 7 files changed, 23 insertions(+), 24 deletions(-) rename examples/evaluation/document-search/basic/{evaluate.py => basic_evaluate.py} (100%) rename examples/evaluation/document-search/basic/{ingest.py => basic_ingest.py} (100%) rename examples/evaluation/document-search/basic/{optimize.py => basic_optimize.py} (93%) diff --git a/examples/evaluation/document-search/advanced/optimize.py b/examples/evaluation/document-search/advanced/optimize.py index 4644f331..24a93aab 100644 --- a/examples/evaluation/document-search/advanced/optimize.py +++ b/examples/evaluation/document-search/advanced/optimize.py @@ -17,8 +17,8 @@ def main(config: DictConfig) -> None: Args: config: Hydra configuration. """ - config = {"optimizer": OmegaConf.create({"direction": "maximize", "n_trials": 10}), "experiment_config": config} - configs_with_scores = Optimizer.run_experiment_from_config(config=config) + exp_config = {"optimizer": OmegaConf.create({"direction": "maximize", "n_trials": 10}), "experiment_config": config} + configs_with_scores = Optimizer.run_experiment_from_config(config=exp_config) log_optimization_to_file(configs_with_scores) diff --git a/examples/evaluation/document-search/basic/evaluate.py b/examples/evaluation/document-search/basic/basic_evaluate.py similarity index 100% rename from examples/evaluation/document-search/basic/evaluate.py rename to examples/evaluation/document-search/basic/basic_evaluate.py diff --git a/examples/evaluation/document-search/basic/ingest.py b/examples/evaluation/document-search/basic/basic_ingest.py similarity index 100% rename from examples/evaluation/document-search/basic/ingest.py rename to examples/evaluation/document-search/basic/basic_ingest.py diff --git a/examples/evaluation/document-search/basic/optimize.py b/examples/evaluation/document-search/basic/basic_optimize.py similarity index 93% rename from examples/evaluation/document-search/basic/optimize.py rename to examples/evaluation/document-search/basic/basic_optimize.py index 75035344..a36abd11 100644 --- a/examples/evaluation/document-search/basic/optimize.py +++ b/examples/evaluation/document-search/basic/basic_optimize.py @@ -52,8 +52,8 @@ def main() -> None: ], } ) - config = {"optimizer": OmegaConf.create({"direction": "maximize", "n_trials": 10}), "experiment_config": config} - configs_with_scores = Optimizer.run_experiment_from_config(config=config) + exp_config = {"optimizer": OmegaConf.create({"direction": "maximize", "n_trials": 10}), "experiment_config": config} + configs_with_scores = Optimizer.run_experiment_from_config(config=exp_config) log_optimization_to_file(configs_with_scores) diff --git a/packages/ragbits-evaluate/src/ragbits/evaluate/optimizer.py b/packages/ragbits-evaluate/src/ragbits/evaluate/optimizer.py index 35491dfe..45801f02 100644 --- a/packages/ragbits-evaluate/src/ragbits/evaluate/optimizer.py +++ b/packages/ragbits-evaluate/src/ragbits/evaluate/optimizer.py @@ -45,20 +45,20 @@ def run_experiment_from_config( list of configs with scores """ optimizer_config = config["optimizer"] - config = config["experiment_config"] - dataloader = dataloader_factory(config.data) - pipeline_class = import_by_path(config.pipeline.type, module) - metrics = metric_set_factory(config.metrics) + exp_config = config["experiment_config"] + dataloader = dataloader_factory(exp_config.data) + pipeline_class = import_by_path(exp_config.pipeline.type, module) + metrics = metric_set_factory(exp_config.metrics) callback_configurators = None - if getattr(config, "callbacks", None): + if getattr(exp_config, "callbacks", None): callback_configurators = [ - import_by_path(callback_cfg.type, module)(callback_cfg.args) for callback_cfg in config.callbacks + import_by_path(callback_cfg.type, module)(callback_cfg.args) for callback_cfg in exp_config.callbacks ] optimizer = cls(cfg=optimizer_config) configs_with_scores = optimizer.optimize( pipeline_class=pipeline_class, - config_with_params=config.pipeline, + config_with_params=exp_config.pipeline, metrics=metrics, dataloader=dataloader, callback_configurators=callback_configurators, diff --git a/packages/ragbits-evaluate/src/ragbits/evaluate/pipelines/base.py b/packages/ragbits-evaluate/src/ragbits/evaluate/pipelines/base.py index 9f08a862..2fe0401e 100644 --- a/packages/ragbits-evaluate/src/ragbits/evaluate/pipelines/base.py +++ b/packages/ragbits-evaluate/src/ragbits/evaluate/pipelines/base.py @@ -28,7 +28,7 @@ def __init__(self, config: DictConfig | None = None) -> None: self.config = config or DictConfig({}) @abstractmethod - async def __call__(self, data: dict[str, Any]) -> EvaluationResult: + async def __call__(self, data: dict[str, Any] | None = None) -> EvaluationResult | None: """ Runs the evaluation pipeline. diff --git a/packages/ragbits-evaluate/src/ragbits/evaluate/pipelines/document_search.py b/packages/ragbits-evaluate/src/ragbits/evaluate/pipelines/document_search.py index b772ae89..c62364fb 100644 --- a/packages/ragbits-evaluate/src/ragbits/evaluate/pipelines/document_search.py +++ b/packages/ragbits-evaluate/src/ragbits/evaluate/pipelines/document_search.py @@ -1,5 +1,4 @@ import asyncio -import uuid from dataclasses import dataclass from functools import cached_property @@ -38,7 +37,7 @@ def document_search(self) -> "DocumentSearch": """ return DocumentSearch.from_config(self.config) # type: ignore - async def __call__(self, data: dict | None) -> DocumentSearchResult | None: + async def __call__(self, data: dict | None = None) -> DocumentSearchResult | None: """ Runs the document search evaluation pipeline. @@ -48,13 +47,15 @@ async def __call__(self, data: dict | None) -> DocumentSearchResult | None: Returns: The evaluation result. """ - elements = await self.document_search.search(data["question"]) - predicted_passages = [element.text_representation or "" for element in elements] - return DocumentSearchResult( - question=data["question"], - reference_passages=data["passages"], - predicted_passages=predicted_passages, - ) + if data is not None: + elements = await self.document_search.search(data["question"]) + predicted_passages = [element.text_representation or "" for element in elements] + return DocumentSearchResult( + question=data["question"], + reference_passages=data["passages"], + predicted_passages=predicted_passages, + ) + return None class DocumentSearchWithIngestionPipeline(DocumentSearchPipeline): @@ -68,7 +69,7 @@ def __init__(self, config: DictConfig | None = None) -> None: self._ingested = False self._lock = asyncio.Lock() - async def __call__(self, data: dict | None = None) -> DocumentSearchResult | None: + async def __call__(self, data: dict | None = None) -> DocumentSearchResult | None: # type: ignore """ Queries a vector store with given data Ingests the corpus to the store if has not been done @@ -85,8 +86,6 @@ async def __call__(self, data: dict | None = None) -> DocumentSearchResult | Non return await super().__call__(data) async def _ingest_documents(self) -> None: - if self.config.get("search", False) and self.config.get("ingest", False): - self.document_search.vector_store._index_name = str(uuid.uuid4()) documents = await tqdm.gather( *[ DocumentMeta.from_source(