Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(eval): add evaluation pipeline for document search #91

Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions examples/evaluation/document-search/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Document Search Evaluation

## Ingest

```sh
uv run ingest.py
```

```sh
uv run ingest.py +experiments=chunking-250
```

```sh
uv run ingest.py --multirun +experiments=chunking-250,chunking-500,chunking-1000
```

## Evaluate

```sh
uv run evaluate.py
```

```sh
uv run evaluate.py +experiments=chunking-250
```

```sh
uv run evaluate.py --multirun +experiments=chunking-250,chunking-500,chunking-1000
```

### Log to Neptune

```sh
uv run evaluate.py neptune.run=True
```
3 changes: 3 additions & 0 deletions examples/evaluation/document-search/config/data/corpus.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
path: "micpst/hf-docs"
split: "train"
num_docs: 5
3 changes: 3 additions & 0 deletions examples/evaluation/document-search/config/data/qa.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
path: "micpst/hf-docs-retrieval"
split: "train"

Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
type: LiteLLMEmbeddings
config:
model: "text-embedding-3-small"
options:
dimensions: 768
encoding_format: float
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# @package _global_

task:
name: chunking-1000

# used only for ingestion
providers:
txt:
config:
chunking_kwargs:
max_characters: 1000
md:
config:
chunking_kwargs:
max_characters: 1000

# used for both ingestion and evaluation
vector_store:
config:
index_name: chunk-1000
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# @package _global_

task:
name: chunking-250

# used only for ingestion
providers:
txt:
config:
chunking_kwargs:
max_characters: 250
md:
config:
chunking_kwargs:
max_characters: 250

# used for both ingestion and evaluation
vector_store:
config:
index_name: chunk-250
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# @package _global_

task:
name: chunking-500

# used only for ingestion
providers:
txt:
config:
chunking_kwargs:
max_characters: 500
md:
config:
chunking_kwargs:
max_characters: 500

# used for both ingestion and evaluation
vector_store:
config:
index_name: chunk-500
6 changes: 6 additions & 0 deletions examples/evaluation/document-search/config/ingestion.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
defaults:
- data: corpus
- embedder: litellm
- providers: unstructured
- vector_store: chroma
- _self_
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
txt:
type: UnstructuredProvider
config:
use_api: false
partition_kwargs:
strategy: hi_res
chunking_kwargs:
include_orig_elements: true
max_characters: 1000
new_after_n_chars: 1000
overlap: 0
overlap_all: 0

md:
type: UnstructuredProvider
config:
use_api: false
partition_kwargs:
strategy: hi_res
chunking_kwargs:
include_orig_elements: true
max_characters: 1000
new_after_n_chars: 1000
overlap: 0
overlap_all: 0
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
type: NoopQueryRephraser
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
type: NoopReranker
26 changes: 26 additions & 0 deletions examples/evaluation/document-search/config/retrieval.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
defaults:
- data: qa
- embedder: litellm
- providers: unstructured
- vector_store: chroma
- rephraser: noop
- reranker: noop
- _self_

task:
name: default
type: document-search

metrics:
DocumentSearchPrecisionRecallF1:
matching_strategy: RougeChunkMatch
options:
threshold: 0.5
DocumentSearchRankedRetrievalMetrics:
matching_strategy: RougeChunkMatch
options:
threshold: 0.5

neptune:
project: ragbits
run: False
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
type: ChromaDBStore
config:
chroma_client:
type: PersistentClient
config:
path: chroma
embedding_function:
type: ragbits.core.embeddings.litellm:LiteLLMEmbeddings
index_name: default
66 changes: 66 additions & 0 deletions examples/evaluation/document-search/evaluate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import asyncio
import logging
from pathlib import Path

import hydra
from hydra.core.hydra_config import HydraConfig
from omegaconf import DictConfig

from ragbits.evaluate.evaluator import Evaluator
from ragbits.evaluate.loaders import HuggingFaceDataLoader
from ragbits.evaluate.metrics import DocumentSearchPrecisionRecallF1, DocumentSearchRankedRetrievalMetrics, MetricSet
from ragbits.evaluate.pipelines import DocumentSearchPipeline
from ragbits.evaluate.utils import log_to_file, log_to_neptune

logging.getLogger("LiteLLM").setLevel(logging.ERROR)
logging.getLogger("httpx").setLevel(logging.ERROR)
log = logging.getLogger(__name__)


async def bench(config: DictConfig) -> None:
"""
Function running evaluation for all datasets and evaluation tasks defined in hydra config.

Args:
config: Hydra configuration.
"""
log.info("Starting evaluation...")

dataloader = HuggingFaceDataLoader(config.data)
pipeline = DocumentSearchPipeline(config)
metrics = MetricSet(
DocumentSearchPrecisionRecallF1,
DocumentSearchRankedRetrievalMetrics,
)(config.metrics)

evaluator = Evaluator()
results = await evaluator.compute(
micpst marked this conversation as resolved.
Show resolved Hide resolved
pipeline=pipeline,
dataloader=dataloader,
metrics=metrics,
)

log.info("Evaluation finished. Saving results...")

output_dir = Path(HydraConfig.get().runtime.output_dir)
log_to_file(results, output_dir)

if config.neptune.run:
log_to_neptune(config, results, output_dir)

log.info("Evaluation results saved under directory: %s", output_dir)


@hydra.main(config_path="config", config_name="retrieval", version_base="3.2")
def main(config: DictConfig) -> None:
"""
Function running evaluation for all datasets and evaluation tasks defined in hydra config.

Args:
config: Hydra configuration.
"""
asyncio.run(bench(config))


if __name__ == "__main__":
main() # pylint: disable=no-value-for-parameter
59 changes: 59 additions & 0 deletions examples/evaluation/document-search/ingest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import asyncio
import logging

import hydra
from omegaconf import DictConfig
from tqdm.asyncio import tqdm

from ragbits.document_search._main import DocumentSearch
from ragbits.document_search.documents.document import DocumentMeta
from ragbits.document_search.documents.sources import HuggingFaceSource

logging.getLogger("LiteLLM").setLevel(logging.ERROR)
logging.getLogger("httpx").setLevel(logging.ERROR)
log = logging.getLogger(__name__)


async def ingest(config: DictConfig) -> None:
"""
Ingest documents into the document search system.

Args:
config: Hydra configuration.
"""
log.info("Ingesting documents...")

document_search = DocumentSearch.from_config(config) # type: ignore

documents = await tqdm.gather(
*[
DocumentMeta.from_source(
HuggingFaceSource(
path=config.data.path,
split=config.data.split,
row=i,
)
)
for i in range(config.data.num_docs)
],
desc="Download",
)

await document_search.ingest(documents)

log.info("Ingestion finished.")


@hydra.main(config_path="config", config_name="ingestion", version_base="3.2")
def main(config: DictConfig) -> None:
"""
Run the ingestion process.

Args:
config: Hydra configuration.
"""
asyncio.run(ingest(config))


if __name__ == "__main__":
main() # pylint: disable=no-value-for-parameter
2 changes: 1 addition & 1 deletion packages/ragbits-core/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ litellm = [
local = [
"torch~=2.2.1",
"transformers~=4.44.2",
"numpy~=1.24.0"
"numpy~=1.26.0"
]
lab = [
"gradio~=4.44.0",
Expand Down
1 change: 0 additions & 1 deletion packages/ragbits-document-search/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ classifiers = [
"Topic :: Software Development :: Libraries :: Python Modules",
]
dependencies = [
"numpy~=1.24.0",
"unstructured>=0.15.13",
"unstructured-client>=0.26.0",
"ragbits-core==0.1.0",
Expand Down
1 change: 1 addition & 0 deletions packages/ragbits-evaluate/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Ragbits Evaluate
63 changes: 63 additions & 0 deletions packages/ragbits-evaluate/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
[project]
name = "ragbits-evaluate"
version = "0.1.0"
description = "Building blocks for rapid development of GenAI applications"
micpst marked this conversation as resolved.
Show resolved Hide resolved
readme = "README.md"
requires-python = ">=3.10"
license = "MIT"
authors = [
{ name = "deepsense.ai", email = "[email protected]"}
]
keywords = [
"Retrieval Augmented Generation",
"RAG",
"Large Language Models",
"LLMs",
"Generative AI",
"GenAI",
"Evaluation"
]
classifiers = [
"Development Status :: 4 - Beta",
"Environment :: Console",
"Intended Audience :: Science/Research",
"License :: OSI Approved :: MIT License",
"Natural Language :: English",
"Operating System :: OS Independent",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
"Topic :: Software Development :: Libraries :: Python Modules",
]
dependencies = [
"hydra-core~=1.3.2",
"neptune~=1.12.0",
]

[project.optional-dependencies]
relari = [
"continuous-eval~=0.3.12",
]

[tool.uv]
dev-dependencies = [
"pre-commit~=3.8.0",
"pytest~=8.3.3",
"pytest-cov~=5.0.0",
"pytest-asyncio~=0.24.0",
"pip-licenses>=4.0.0,<5.0.0"
]

[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"

[tool.hatch.metadata]
allow-direct-references = true

[tool.hatch.build.targets.wheel]
packages = ["src/ragbits"]

[tool.pytest.ini_options]
asyncio_mode = "auto"
Loading
Loading