Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(eval): add evaluation pipeline for document search #91

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .libraries-whitelist.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
pkg_resources
tiktoken
chardet
chroma-hnswlib
chroma-hnswlib
rouge
35 changes: 35 additions & 0 deletions examples/evaluation/document-search/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Document Search Evaluation

## Ingest

```sh
uv run ingest.py
```

```sh
uv run ingest.py +experiments=chunking-250
```

```sh
uv run ingest.py --multirun +experiments=chunking-250,chunking-500,chunking-1000
```

## Evaluate

```sh
uv run evaluate.py
```

```sh
uv run evaluate.py +experiments=chunking-250
```

```sh
uv run evaluate.py --multirun +experiments=chunking-250,chunking-500,chunking-1000
```

### Log to Neptune

```sh
uv run evaluate.py neptune.run=True
```
4 changes: 4 additions & 0 deletions examples/evaluation/document-search/config/data/corpus.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
name: "hf-docs"
path: "micpst/hf-docs"
split: "train"
num_docs: 5
3 changes: 3 additions & 0 deletions examples/evaluation/document-search/config/data/qa.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
name: "hf-docs-retrieval"
path: "micpst/hf-docs-retrieval"
split: "train"
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
type: LiteLLMEmbeddings
config:
model: "text-embedding-3-small"
options:
dimensions: 768
encoding_format: float
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# @package _global_

task:
name: chunking-1000

# used only for ingestion
providers:
txt:
config:
chunking_kwargs:
max_characters: 1000
md:
config:
chunking_kwargs:
max_characters: 1000

# used for both ingestion and evaluation
vector_store:
config:
index_name: chunk-1000
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# @package _global_

task:
name: chunking-250

# used only for ingestion
providers:
txt:
config:
chunking_kwargs:
max_characters: 250
md:
config:
chunking_kwargs:
max_characters: 250

# used for both ingestion and evaluation
vector_store:
config:
index_name: chunk-250
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# @package _global_

task:
name: chunking-500

# used only for ingestion
providers:
txt:
config:
chunking_kwargs:
max_characters: 500
md:
config:
chunking_kwargs:
max_characters: 500

# used for both ingestion and evaluation
vector_store:
config:
index_name: chunk-500
6 changes: 6 additions & 0 deletions examples/evaluation/document-search/config/ingestion.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
defaults:
- data: corpus
- embedder: litellm
- providers: unstructured
- vector_store: chroma
- _self_
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
txt:
type: UnstructuredDefaultProvider
config:
use_api: false
partition_kwargs:
strategy: hi_res
chunking_kwargs:
include_orig_elements: true
max_characters: 1000
new_after_n_chars: 1000
overlap: 0
overlap_all: 0

md:
type: UnstructuredDefaultProvider
config:
use_api: false
partition_kwargs:
strategy: hi_res
chunking_kwargs:
include_orig_elements: true
max_characters: 1000
new_after_n_chars: 1000
overlap: 0
overlap_all: 0
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
type: NoopQueryRephraser
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
type: NoopReranker
26 changes: 26 additions & 0 deletions examples/evaluation/document-search/config/retrieval.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
defaults:
- data: qa
- embedder: litellm
- providers: unstructured
- vector_store: chroma
- rephraser: noop
- reranker: noop
- _self_

task:
name: default
type: document-search

metrics:
DocumentSearchPrecisionRecallF1:
matching_strategy: RougeChunkMatch
options:
threshold: 0.5
DocumentSearchRankedRetrievalMetrics:
matching_strategy: RougeChunkMatch
options:
threshold: 0.5

neptune:
project: ragbits
run: False
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
type: ChromaDBStore
config:
chroma_client:
type: PersistentClient
config:
path: chroma
embedding_function:
type: ragbits.core.embeddings.litellm:LiteLLMEmbeddings
index_name: default
67 changes: 67 additions & 0 deletions examples/evaluation/document-search/evaluate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# /// script
# requires-python = ">=3.10"
# dependencies = [
# "ragbits-document-search",
# "ragbits-evaluate[relari]",
# "ragbits[litellm,chromadb]",
# ]
# ///
import asyncio
import logging

import hydra
from omegaconf import DictConfig

from ragbits.evaluate.evaluator import Evaluator
from ragbits.evaluate.loaders.hf import HFDataLoader
from ragbits.evaluate.metrics.document_search import document_search_metrics
from ragbits.evaluate.pipelines.document_search import DocumentSearchPipeline
from ragbits.evaluate.utils import log_to_file, log_to_neptune, setup_neptune

logging.getLogger("LiteLLM").setLevel(logging.ERROR)
logging.getLogger("httpx").setLevel(logging.ERROR)
log = logging.getLogger(__name__)


async def bench(config: DictConfig) -> None:
"""
Function running evaluation for all datasets and evaluation tasks defined in hydra config.

Args:
config: Hydra configuration.
"""
run = setup_neptune(config)

log.info("Starting evaluation...")

dataloader = HFDataLoader(config.data)
pipeline = DocumentSearchPipeline(config)
metrics = document_search_metrics(config.metrics)

evaluator = Evaluator()
results = await evaluator.compute(
micpst marked this conversation as resolved.
Show resolved Hide resolved
pipeline=pipeline,
dataloader=dataloader,
metrics=metrics,
)

output_dir = log_to_file(results)
if run:
log_to_neptune(run, results, output_dir)

log.info("Evaluation results saved under directory: %s", output_dir)


@hydra.main(config_path="config", config_name="retrieval", version_base="3.2")
def main(config: DictConfig) -> None:
"""
Function running evaluation for all datasets and evaluation tasks defined in hydra config.

Args:
config: Hydra configuration.
"""
asyncio.run(bench(config))


if __name__ == "__main__":
main() # pylint: disable=no-value-for-parameter
66 changes: 66 additions & 0 deletions examples/evaluation/document-search/ingest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# /// script
# requires-python = ">=3.10"
# dependencies = [
# "ragbits-document-search",
# "ragbits[litellm,chromadb]",
# ]
# ///
import asyncio
import logging

import hydra
from omegaconf import DictConfig
from tqdm.asyncio import tqdm

from ragbits.document_search import DocumentSearch
from ragbits.document_search.documents.document import DocumentMeta
from ragbits.document_search.documents.sources import HuggingFaceSource

logging.getLogger("LiteLLM").setLevel(logging.ERROR)
logging.getLogger("httpx").setLevel(logging.ERROR)
log = logging.getLogger(__name__)


async def ingest(config: DictConfig) -> None:
"""
Ingest documents into the document search system.

Args:
config: Hydra configuration.
"""
log.info("Ingesting documents...")

document_search = DocumentSearch.from_config(config) # type: ignore

documents = await tqdm.gather(
*[
DocumentMeta.from_source(
HuggingFaceSource(
path=config.data.path,
split=config.data.split,
row=i,
)
)
for i in range(config.data.num_docs)
],
desc="Download",
)

await document_search.ingest(documents)

log.info("Ingestion finished.")


@hydra.main(config_path="config", config_name="ingestion", version_base="3.2")
def main(config: DictConfig) -> None:
"""
Run the ingestion process.

Args:
config: Hydra configuration.
"""
asyncio.run(ingest(config))


if __name__ == "__main__":
main() # pylint: disable=no-value-for-parameter
2 changes: 1 addition & 1 deletion packages/ragbits-core/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ litellm = [
local = [
"torch~=2.2.1",
"transformers~=4.44.2",
"numpy~=1.24.0"
"numpy~=1.26.0"
]
lab = [
"gradio~=4.44.0",
Expand Down
1 change: 0 additions & 1 deletion packages/ragbits-document-search/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ classifiers = [
"Topic :: Software Development :: Libraries :: Python Modules",
]
dependencies = [
"numpy~=1.24.0",
"unstructured>=0.15.13",
"unstructured-client>=0.26.0",
"pdf2image>=1.17.0",
Expand Down
1 change: 1 addition & 0 deletions packages/ragbits-evaluate/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Ragbits Evaluate
Loading
Loading