Skip to content

Commit

Permalink
feat(eval): add evaluation pipeline for document search (#91)
Browse files Browse the repository at this point in the history
  • Loading branch information
micpst authored Oct 23, 2024
1 parent defd0b2 commit cf7ea98
Show file tree
Hide file tree
Showing 34 changed files with 1,619 additions and 74 deletions.
3 changes: 2 additions & 1 deletion .libraries-whitelist.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
pkg_resources
tiktoken
chardet
chroma-hnswlib
chroma-hnswlib
rouge
35 changes: 35 additions & 0 deletions examples/evaluation/document-search/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Document Search Evaluation

## Ingest

```sh
uv run ingest.py
```

```sh
uv run ingest.py +experiments=chunking-250
```

```sh
uv run ingest.py --multirun +experiments=chunking-250,chunking-500,chunking-1000
```

## Evaluate

```sh
uv run evaluate.py
```

```sh
uv run evaluate.py +experiments=chunking-250
```

```sh
uv run evaluate.py --multirun +experiments=chunking-250,chunking-500,chunking-1000
```

### Log to Neptune

```sh
uv run evaluate.py neptune.run=True
```
4 changes: 4 additions & 0 deletions examples/evaluation/document-search/config/data/corpus.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
name: "hf-docs"
path: "micpst/hf-docs"
split: "train"
num_docs: 5
3 changes: 3 additions & 0 deletions examples/evaluation/document-search/config/data/qa.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
name: "hf-docs-retrieval"
path: "micpst/hf-docs-retrieval"
split: "train"
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
type: LiteLLMEmbeddings
config:
model: "text-embedding-3-small"
options:
dimensions: 768
encoding_format: float
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# @package _global_

task:
name: chunking-1000

# used only for ingestion
providers:
txt:
config:
chunking_kwargs:
max_characters: 1000
md:
config:
chunking_kwargs:
max_characters: 1000

# used for both ingestion and evaluation
vector_store:
config:
index_name: chunk-1000
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# @package _global_

task:
name: chunking-250

# used only for ingestion
providers:
txt:
config:
chunking_kwargs:
max_characters: 250
md:
config:
chunking_kwargs:
max_characters: 250

# used for both ingestion and evaluation
vector_store:
config:
index_name: chunk-250
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# @package _global_

task:
name: chunking-500

# used only for ingestion
providers:
txt:
config:
chunking_kwargs:
max_characters: 500
md:
config:
chunking_kwargs:
max_characters: 500

# used for both ingestion and evaluation
vector_store:
config:
index_name: chunk-500
6 changes: 6 additions & 0 deletions examples/evaluation/document-search/config/ingestion.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
defaults:
- data: corpus
- embedder: litellm
- providers: unstructured
- vector_store: chroma
- _self_
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
txt:
type: UnstructuredDefaultProvider
config:
use_api: false
partition_kwargs:
strategy: hi_res
chunking_kwargs:
include_orig_elements: true
max_characters: 1000
new_after_n_chars: 1000
overlap: 0
overlap_all: 0

md:
type: UnstructuredDefaultProvider
config:
use_api: false
partition_kwargs:
strategy: hi_res
chunking_kwargs:
include_orig_elements: true
max_characters: 1000
new_after_n_chars: 1000
overlap: 0
overlap_all: 0
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
type: NoopQueryRephraser
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
type: NoopReranker
26 changes: 26 additions & 0 deletions examples/evaluation/document-search/config/retrieval.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
defaults:
- data: qa
- embedder: litellm
- providers: unstructured
- vector_store: chroma
- rephraser: noop
- reranker: noop
- _self_

task:
name: default
type: document-search

metrics:
DocumentSearchPrecisionRecallF1:
matching_strategy: RougeChunkMatch
options:
threshold: 0.5
DocumentSearchRankedRetrievalMetrics:
matching_strategy: RougeChunkMatch
options:
threshold: 0.5

neptune:
project: ragbits
run: False
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
type: ChromaDBStore
config:
chroma_client:
type: PersistentClient
config:
path: chroma
embedding_function:
type: ragbits.core.embeddings.litellm:LiteLLMEmbeddings
index_name: default
67 changes: 67 additions & 0 deletions examples/evaluation/document-search/evaluate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# /// script
# requires-python = ">=3.10"
# dependencies = [
# "ragbits-document-search",
# "ragbits-evaluate[relari]",
# "ragbits[litellm,chromadb]",
# ]
# ///
import asyncio
import logging

import hydra
from omegaconf import DictConfig

from ragbits.evaluate.evaluator import Evaluator
from ragbits.evaluate.loaders.hf import HFDataLoader
from ragbits.evaluate.metrics.document_search import document_search_metrics
from ragbits.evaluate.pipelines.document_search import DocumentSearchPipeline
from ragbits.evaluate.utils import log_to_file, log_to_neptune, setup_neptune

logging.getLogger("LiteLLM").setLevel(logging.ERROR)
logging.getLogger("httpx").setLevel(logging.ERROR)
log = logging.getLogger(__name__)


async def bench(config: DictConfig) -> None:
"""
Function running evaluation for all datasets and evaluation tasks defined in hydra config.
Args:
config: Hydra configuration.
"""
run = setup_neptune(config)

log.info("Starting evaluation...")

dataloader = HFDataLoader(config.data)
pipeline = DocumentSearchPipeline(config)
metrics = document_search_metrics(config.metrics)

evaluator = Evaluator()
results = await evaluator.compute(
pipeline=pipeline,
dataloader=dataloader,
metrics=metrics,
)

output_dir = log_to_file(results)
if run:
log_to_neptune(run, results, output_dir)

log.info("Evaluation results saved under directory: %s", output_dir)


@hydra.main(config_path="config", config_name="retrieval", version_base="3.2")
def main(config: DictConfig) -> None:
"""
Function running evaluation for all datasets and evaluation tasks defined in hydra config.
Args:
config: Hydra configuration.
"""
asyncio.run(bench(config))


if __name__ == "__main__":
main() # pylint: disable=no-value-for-parameter
66 changes: 66 additions & 0 deletions examples/evaluation/document-search/ingest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# /// script
# requires-python = ">=3.10"
# dependencies = [
# "ragbits-document-search",
# "ragbits[litellm,chromadb]",
# ]
# ///
import asyncio
import logging

import hydra
from omegaconf import DictConfig
from tqdm.asyncio import tqdm

from ragbits.document_search import DocumentSearch
from ragbits.document_search.documents.document import DocumentMeta
from ragbits.document_search.documents.sources import HuggingFaceSource

logging.getLogger("LiteLLM").setLevel(logging.ERROR)
logging.getLogger("httpx").setLevel(logging.ERROR)
log = logging.getLogger(__name__)


async def ingest(config: DictConfig) -> None:
"""
Ingest documents into the document search system.
Args:
config: Hydra configuration.
"""
log.info("Ingesting documents...")

document_search = DocumentSearch.from_config(config) # type: ignore

documents = await tqdm.gather(
*[
DocumentMeta.from_source(
HuggingFaceSource(
path=config.data.path,
split=config.data.split,
row=i,
)
)
for i in range(config.data.num_docs)
],
desc="Download",
)

await document_search.ingest(documents)

log.info("Ingestion finished.")


@hydra.main(config_path="config", config_name="ingestion", version_base="3.2")
def main(config: DictConfig) -> None:
"""
Run the ingestion process.
Args:
config: Hydra configuration.
"""
asyncio.run(ingest(config))


if __name__ == "__main__":
main() # pylint: disable=no-value-for-parameter
2 changes: 1 addition & 1 deletion packages/ragbits-core/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ litellm = [
local = [
"torch~=2.2.1",
"transformers~=4.44.2",
"numpy~=1.24.0"
"numpy~=1.26.0"
]
lab = [
"gradio~=4.44.0",
Expand Down
1 change: 0 additions & 1 deletion packages/ragbits-document-search/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ classifiers = [
"Topic :: Software Development :: Libraries :: Python Modules",
]
dependencies = [
"numpy~=1.24.0",
"unstructured>=0.15.13",
"unstructured-client>=0.26.0",
"pdf2image>=1.17.0",
Expand Down
1 change: 1 addition & 0 deletions packages/ragbits-evaluate/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Ragbits Evaluate
Loading

0 comments on commit cf7ea98

Please sign in to comment.