deepsense-ai · micpst · Oct 23, 2024 · Oct 13, 2024 · Oct 13, 2024 · Oct 14, 2024
diff --git a/examples/evaluation/document-search/README.md b/examples/evaluation/document-search/README.md
@@ -0,0 +1,35 @@
+# Document Search Evaluation
+
+## Ingest
+
+```sh
+uv run ingest.py
+```
+
+```sh
+uv run ingest.py +experiments=chunking-250
+```
+
+```sh
+uv run ingest.py --multirun +experiments=chunking-250,chunking-500,chunking-1000
+```
+
+## Evaluate
+
+```sh
+uv run evaluate.py
+```
+
+```sh
+uv run evaluate.py +experiments=chunking-250
+```
+
+```sh
+uv run evaluate.py --multirun +experiments=chunking-250,chunking-500,chunking-1000
+```
+
+### Log to Neptune
+
+```sh
+uv run evaluate.py neptune.run=True
+```
diff --git a/examples/evaluation/document-search/config/data/corpus.yaml b/examples/evaluation/document-search/config/data/corpus.yaml
@@ -0,0 +1,3 @@
+path: "micpst/hf-docs"
+split: "train"
+num_docs: 5
diff --git a/examples/evaluation/document-search/config/data/qa.yaml b/examples/evaluation/document-search/config/data/qa.yaml
@@ -0,0 +1,3 @@
+path: "micpst/hf-docs-retrieval"
+split: "train"
+
diff --git a/examples/evaluation/document-search/config/embedder/litellm.yaml b/examples/evaluation/document-search/config/embedder/litellm.yaml
@@ -0,0 +1,6 @@
+type: LiteLLMEmbeddings
+config:
+  model: "text-embedding-3-small"
+  options:
+    dimensions: 768
+    encoding_format: float
diff --git a/examples/evaluation/document-search/config/experiments/chunking-1000.yaml b/examples/evaluation/document-search/config/experiments/chunking-1000.yaml
@@ -0,0 +1,20 @@
+# @package _global_
+
+task:
+  name: chunking-1000
+
+# used only for ingestion
+providers:
+  txt:
+    config:
+      chunking_kwargs:
+        max_characters: 1000
+  md:
+    config:
+      chunking_kwargs:
+        max_characters: 1000
+
+# used for both ingestion and evaluation
+vector_store:
+  config:
+    index_name: chunk-1000
diff --git a/examples/evaluation/document-search/config/experiments/chunking-250.yaml b/examples/evaluation/document-search/config/experiments/chunking-250.yaml
@@ -0,0 +1,20 @@
+# @package _global_
+
+task:
+  name: chunking-250
+
+# used only for ingestion
+providers:
+  txt:
+    config:
+      chunking_kwargs:
+        max_characters: 250
+  md:
+    config:
+      chunking_kwargs:
+        max_characters: 250
+
+# used for both ingestion and evaluation
+vector_store:
+  config:
+    index_name: chunk-250
diff --git a/examples/evaluation/document-search/config/experiments/chunking-500.yaml b/examples/evaluation/document-search/config/experiments/chunking-500.yaml
@@ -0,0 +1,20 @@
+# @package _global_
+
+task:
+  name: chunking-500
+
+# used only for ingestion
+providers:
+  txt:
+    config:
+      chunking_kwargs:
+        max_characters: 500
+  md:
+    config:
+      chunking_kwargs:
+        max_characters: 500
+
+# used for both ingestion and evaluation
+vector_store:
+  config:
+    index_name: chunk-500
diff --git a/examples/evaluation/document-search/config/ingestion.yaml b/examples/evaluation/document-search/config/ingestion.yaml
@@ -0,0 +1,6 @@
+defaults:
+  - data: corpus
+  - embedder: litellm
+  - providers: unstructured
+  - vector_store: chroma
+  - _self_
diff --git a/examples/evaluation/document-search/config/providers/unstructured.yaml b/examples/evaluation/document-search/config/providers/unstructured.yaml
@@ -0,0 +1,25 @@
+txt:
+  type: UnstructuredProvider
+  config:
+    use_api: false
+    partition_kwargs:
+      strategy: hi_res
+    chunking_kwargs:
+      include_orig_elements: true
+      max_characters: 1000
+      new_after_n_chars: 1000
+      overlap: 0
+      overlap_all: 0
+
+md:
+  type: UnstructuredProvider
+  config:
+    use_api: false
+    partition_kwargs:
+      strategy: hi_res
+    chunking_kwargs:
+      include_orig_elements: true
+      max_characters: 1000
+      new_after_n_chars: 1000
+      overlap: 0
+      overlap_all: 0
diff --git a/examples/evaluation/document-search/config/rephraser/noop.yaml b/examples/evaluation/document-search/config/rephraser/noop.yaml
@@ -0,0 +1 @@
+type: NoopQueryRephraser
diff --git a/examples/evaluation/document-search/config/reranker/noop.yaml b/examples/evaluation/document-search/config/reranker/noop.yaml
@@ -0,0 +1 @@
+type: NoopReranker
diff --git a/examples/evaluation/document-search/config/retrieval.yaml b/examples/evaluation/document-search/config/retrieval.yaml
@@ -0,0 +1,26 @@
+defaults:
+  - data: qa
+  - embedder: litellm
+  - providers: unstructured
+  - vector_store: chroma
+  - rephraser: noop
+  - reranker: noop
+  - _self_
+
+task:
+  name: default
+  type: document-search
+
+metrics:
+  DocumentSearchPrecisionRecallF1:
+    matching_strategy: RougeChunkMatch
+    options:
+      threshold: 0.5
+  DocumentSearchRankedRetrievalMetrics:
+    matching_strategy: RougeChunkMatch
+    options:
+      threshold: 0.5
+
+neptune:
+  project: ragbits
+  run: False
diff --git a/examples/evaluation/document-search/config/vector_store/chroma.yaml b/examples/evaluation/document-search/config/vector_store/chroma.yaml
@@ -0,0 +1,9 @@
+type: ChromaDBStore
+config:
+  chroma_client:
+    type: PersistentClient
+    config:
+      path: chroma
+  embedding_function:
+    type: ragbits.core.embeddings.litellm:LiteLLMEmbeddings
+  index_name: default
diff --git a/examples/evaluation/document-search/evaluate.py b/examples/evaluation/document-search/evaluate.py
@@ -0,0 +1,66 @@
+import asyncio
+import logging
+from pathlib import Path
+
+import hydra
+from hydra.core.hydra_config import HydraConfig
+from omegaconf import DictConfig
+
+from ragbits.evaluate.evaluator import Evaluator
+from ragbits.evaluate.loaders import HuggingFaceDataLoader
+from ragbits.evaluate.metrics import DocumentSearchPrecisionRecallF1, DocumentSearchRankedRetrievalMetrics, MetricSet
+from ragbits.evaluate.pipelines import DocumentSearchPipeline
+from ragbits.evaluate.utils import log_to_file, log_to_neptune
+
+logging.getLogger("LiteLLM").setLevel(logging.ERROR)
+logging.getLogger("httpx").setLevel(logging.ERROR)
+log = logging.getLogger(__name__)
+
+
+async def bench(config: DictConfig) -> None:
+    """
+    Function running evaluation for all datasets and evaluation tasks defined in hydra config.
+
+    Args:
+        config: Hydra configuration.
+    """
+    log.info("Starting evaluation...")
+
+    dataloader = HuggingFaceDataLoader(config.data)
+    pipeline = DocumentSearchPipeline(config)
+    metrics = MetricSet(
+        DocumentSearchPrecisionRecallF1,
+        DocumentSearchRankedRetrievalMetrics,
+    )(config.metrics)
+
+    evaluator = Evaluator()
+    results = await evaluator.compute(
+        pipeline=pipeline,
+        dataloader=dataloader,
+        metrics=metrics,
+    )
+
+    log.info("Evaluation finished. Saving results...")
+
+    output_dir = Path(HydraConfig.get().runtime.output_dir)
+    log_to_file(results, output_dir)
+
+    if config.neptune.run:
+        log_to_neptune(config, results, output_dir)
+
+    log.info("Evaluation results saved under directory: %s", output_dir)
+
+
+@hydra.main(config_path="config", config_name="retrieval", version_base="3.2")
+def main(config: DictConfig) -> None:
+    """
+    Function running evaluation for all datasets and evaluation tasks defined in hydra config.
+
+    Args:
+        config: Hydra configuration.
+    """
+    asyncio.run(bench(config))
+
+
+if __name__ == "__main__":
+    main()  # pylint: disable=no-value-for-parameter
diff --git a/examples/evaluation/document-search/ingest.py b/examples/evaluation/document-search/ingest.py
@@ -0,0 +1,59 @@
+import asyncio
+import logging
+
+import hydra
+from omegaconf import DictConfig
+from tqdm.asyncio import tqdm
+
+from ragbits.document_search._main import DocumentSearch
+from ragbits.document_search.documents.document import DocumentMeta
+from ragbits.document_search.documents.sources import HuggingFaceSource
+
+logging.getLogger("LiteLLM").setLevel(logging.ERROR)
+logging.getLogger("httpx").setLevel(logging.ERROR)
+log = logging.getLogger(__name__)
+
+
+async def ingest(config: DictConfig) -> None:
+    """
+    Ingest documents into the document search system.
+
+    Args:
+        config: Hydra configuration.
+    """
+    log.info("Ingesting documents...")
+
+    document_search = DocumentSearch.from_config(config)  # type: ignore
+
+    documents = await tqdm.gather(
+        *[
+            DocumentMeta.from_source(
+                HuggingFaceSource(
+                    path=config.data.path,
+                    split=config.data.split,
+                    row=i,
+                )
+            )
+            for i in range(config.data.num_docs)
+        ],
+        desc="Download",
+    )
+
+    await document_search.ingest(documents)
+
+    log.info("Ingestion finished.")
+
+
+@hydra.main(config_path="config", config_name="ingestion", version_base="3.2")
+def main(config: DictConfig) -> None:
+    """
+    Run the ingestion process.
+
+    Args:
+        config: Hydra configuration.
+    """
+    asyncio.run(ingest(config))
+
+
+if __name__ == "__main__":
+    main()  # pylint: disable=no-value-for-parameter
diff --git a/packages/ragbits-core/pyproject.toml b/packages/ragbits-core/pyproject.toml
@@ -47,7 +47,7 @@ litellm = [
 local = [
     "torch~=2.2.1",
     "transformers~=4.44.2",
-    "numpy~=1.24.0"
+    "numpy~=1.26.0"
 ]
 lab = [
     "gradio~=4.44.0",

diff --git a/packages/ragbits-document-search/pyproject.toml b/packages/ragbits-document-search/pyproject.toml
@@ -31,7 +31,6 @@ classifiers = [
     "Topic :: Software Development :: Libraries :: Python Modules",
 ]
 dependencies = [
-    "numpy~=1.24.0",
     "unstructured>=0.15.13",
     "unstructured-client>=0.26.0",
     "ragbits-core==0.1.0",

diff --git a/packages/ragbits-evaluate/README.md b/packages/ragbits-evaluate/README.md
@@ -0,0 +1 @@
+# Ragbits Evaluate
diff --git a/packages/ragbits-evaluate/pyproject.toml b/packages/ragbits-evaluate/pyproject.toml
@@ -0,0 +1,63 @@
+[project]
+name = "ragbits-evaluate"
+version = "0.1.0"
+description = "Building blocks for rapid development of GenAI applications"
+readme = "README.md"
+requires-python = ">=3.10"
+license = "MIT"
+authors = [
+    { name = "deepsense.ai", email = "[email protected]"}
+]
+keywords = [
+    "Retrieval Augmented Generation",
+    "RAG",
+    "Large Language Models",
+    "LLMs",
+    "Generative AI",
+    "GenAI",
+    "Evaluation"
+]
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Environment :: Console",
+    "Intended Audience :: Science/Research",
+    "License :: OSI Approved :: MIT License",
+    "Natural Language :: English",
+    "Operating System :: OS Independent",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Topic :: Scientific/Engineering :: Artificial Intelligence",
+    "Topic :: Software Development :: Libraries :: Python Modules",
+]
+dependencies = [
+    "hydra-core~=1.3.2",
+    "neptune~=1.12.0",
+]
+
+[project.optional-dependencies]
+relari = [
+    "continuous-eval~=0.3.12",
+]
+
+[tool.uv]
+dev-dependencies = [
+    "pre-commit~=3.8.0",
+    "pytest~=8.3.3",
+    "pytest-cov~=5.0.0",
+    "pytest-asyncio~=0.24.0",
+    "pip-licenses>=4.0.0,<5.0.0"
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[tool.hatch.metadata]
+allow-direct-references = true
+
+[tool.hatch.build.targets.wheel]
+packages = ["src/ragbits"]
+
+[tool.pytest.ini_options]
+asyncio_mode = "auto"
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		path: "micpst/hf-docs-retrieval"
		split: "train"