Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: feat(document-search): automatic configuration selection based on evaluation #167

Closed
wants to merge 21 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions examples/evaluation/document-search/config/data/qa.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
name: "hf-docs-retrieval"
path: "micpst/hf-docs-retrieval"
split: "train"
type: ragbits.evaluate.loaders.hf:HFDataLoader
options:
name: "hf-docs-retrieval"
path: "micpst/hf-docs-retrieval"
split: "train"
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,19 @@
task:
name: chunking-1000

pipeline:
# used only for ingestion
providers:
txt:
config:
chunking_kwargs:
max_characters: 1000
md:
config:
chunking_kwargs:
max_characters: 1000
providers:
txt:
config:
chunking_kwargs:
max_characters: 1000
md:
config:
chunking_kwargs:
max_characters: 1000

# used for both ingestion and evaluation
vector_store:
config:
index_name: chunk-1000
# used for both ingestion and evaluation
vector_store:
config:
index_name: chunk-1000
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,18 @@ task:
name: chunking-250

# used only for ingestion
providers:
txt:
config:
chunking_kwargs:
max_characters: 250
md:
config:
chunking_kwargs:
max_characters: 250
pipeline:
providers:
txt:
config:
chunking_kwargs:
max_characters: 250
md:
config:
chunking_kwargs:
max_characters: 250

# used for both ingestion and evaluation
vector_store:
config:
index_name: chunk-250
# used for both ingestion and evaluation
vector_store:
config:
index_name: chunk-250
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,17 @@ task:
name: chunking-500

# used only for ingestion
providers:
txt:
config:
chunking_kwargs:
max_characters: 500
md:
config:
chunking_kwargs:
max_characters: 500

pipeline:
providers:
txt:
config:
chunking_kwargs:
max_characters: 500
md:
config:
chunking_kwargs:
max_characters: 500
# used for both ingestion and evaluation
vector_store:
config:
index_name: chunk-500
vector_store:
config:
index_name: chunk-500
4 changes: 1 addition & 3 deletions examples/evaluation/document-search/config/ingestion.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
defaults:
- data: corpus
- embedder: litellm
- providers: unstructured
- vector_store: chroma
- pipeline: document_ingestion
- _self_
24 changes: 24 additions & 0 deletions examples/evaluation/document-search/config/optimization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
defaults:
- pipeline: document_search_optimization
- data: qa
- _self_

task:
name: default
type: document-search

metrics:
- type: ragbits.evaluate.metrics.document_search:DocumentSearchPrecisionRecallF1
matching_strategy: RougeChunkMatch
options:
threshold: 0.5
- type: ragbits.evaluate.metrics.document_search:DocumentSearchRankedRetrievalMetrics
weight: -1.0
matching_strategy: RougeChunkMatch
options:
threshold: 0.5


neptune:
project: ragbits
run: false
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
name: "hf-docs"
path: "micpst/hf-docs"
split: "train"
num_docs: 5
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
defaults:
- embedder: litellm
- providers: unstructured
- vector_store: chroma
- _self_
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
defaults:
- embedder: litellm
- providers: unstructured
- vector_store: chroma
- rephraser: noop
- reranker: noop
- _self_
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
defaults:
- embedder: litellm_opt_template
- providers: unstructured_opt_template
- vector_store: chroma
- rephraser: noop
- reranker: noop
- answer_data_source: corpus
- _self_

type: ragbits.evaluate.pipelines.document_search:DocumentSearchWithIngestionPipeline
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
type: ragbits.core.embeddings.litellm:LiteLLMEmbeddings
config:
optimize: true
choices:
- model: "text-embedding-3-small"
options:
dimensions:
optimize: true
range:
- 32
- 512
encoding_format: float
- model: "text-embedding-3-large"
options:
dimensions:
optimize: true
range:
- 512
- 1024
encoding_format: float
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
txt:
type: ragbits.document_search.ingestion.providers.unstructured:UnstructuredDefaultProvider
config:
use_api: false
partition_kwargs:
strategy: hi_res
chunking_kwargs:
include_orig_elements: true
max_characters:
optimize: true
range:
- 500
- 1000
new_after_n_chars: 1000
overlap: 0
overlap_all: 0

md:
type: ragbits.document_search.ingestion.providers.unstructured:UnstructuredDefaultProvider
config:
use_api: false
partition_kwargs:
strategy: hi_res
chunking_kwargs:
include_orig_elements: true
max_characters: 1000
new_after_n_chars: 1000
overlap: 0
overlap_all: 0
11 changes: 3 additions & 8 deletions examples/evaluation/document-search/config/retrieval.yaml
Original file line number Diff line number Diff line change
@@ -1,22 +1,17 @@
defaults:
- data: qa
- embedder: litellm
- providers: unstructured
- vector_store: chroma
- rephraser: noop
- reranker: noop
- _self_
- pipeline: document_search

task:
name: default
type: document-search

metrics:
DocumentSearchPrecisionRecallF1:
- type: ragbits.evaluate.metrics.document_search:DocumentSearchPrecisionRecallF1
matching_strategy: RougeChunkMatch
options:
threshold: 0.5
DocumentSearchRankedRetrievalMetrics:
- type: ragbits.evaluate.metrics.document_search:DocumentSearchRankedRetrievalMetrics
matching_strategy: RougeChunkMatch
options:
threshold: 0.5
Expand Down
10 changes: 5 additions & 5 deletions examples/evaluation/document-search/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
from omegaconf import DictConfig

from ragbits.evaluate.evaluator import Evaluator
from ragbits.evaluate.loaders.hf import HFDataLoader
from ragbits.evaluate.metrics.document_search import document_search_metrics
from ragbits.evaluate.loaders import dataloader_factory
from ragbits.evaluate.metrics import metric_set_factory
from ragbits.evaluate.pipelines.document_search import DocumentSearchPipeline
from ragbits.evaluate.utils import log_to_file, log_to_neptune, setup_neptune

Expand All @@ -34,9 +34,9 @@ async def bench(config: DictConfig) -> None:

log.info("Starting evaluation...")

dataloader = HFDataLoader(config.data)
pipeline = DocumentSearchPipeline(config)
metrics = document_search_metrics(config.metrics)
dataloader = dataloader_factory(config.data)
pipeline = DocumentSearchPipeline(config.pipeline)
metrics = metric_set_factory(config.metrics)

evaluator = Evaluator()
results = await evaluator.compute(
Expand Down
2 changes: 1 addition & 1 deletion examples/evaluation/document-search/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ async def ingest(config: DictConfig) -> None:
"""
log.info("Ingesting documents...")

document_search = DocumentSearch.from_config(config) # type: ignore
document_search = DocumentSearch.from_config(config.pipeline) # type: ignore

documents = await tqdm.gather(
*[
Expand Down
42 changes: 42 additions & 0 deletions examples/evaluation/document-search/optimize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import sys

import hydra
from omegaconf import DictConfig, OmegaConf

from ragbits.core.utils.config_handling import get_cls_from_config
from ragbits.evaluate.loaders import dataloader_factory
from ragbits.evaluate.metrics import metric_set_factory
from ragbits.evaluate.optimizer import Optimizer
from ragbits.evaluate.utils import log_optimization_to_file

module = sys.modules[__name__]


@hydra.main(config_path="config", config_name="optimization", version_base="3.2")
def main(config: DictConfig) -> None:
"""
Function running evaluation for all datasets and evaluation tasks defined in hydra config.

Args:
config: Hydra configuration.
"""
dataloader = dataloader_factory(config.data)
pipeline_class = get_cls_from_config(config.pipeline.type, module)
metrics = metric_set_factory(config.metrics)

optimization_cfg = OmegaConf.create(
{"direction": "maximize", "n_trials": 10, "neptune_project": config.neptune.project}
)
optimizer = Optimizer(cfg=optimization_cfg)
configs_with_scores = optimizer.optimize(
pipeline_class=pipeline_class,
config_with_params=config.pipeline,
metrics=metrics,
dataloader=dataloader,
log_to_neptune=config.neptune.run,
)
log_optimization_to_file(configs_with_scores)


if __name__ == "__main__":
main()
2 changes: 1 addition & 1 deletion packages/ragbits-evaluate/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ classifiers = [
"Topic :: Scientific/Engineering :: Artificial Intelligence",
"Topic :: Software Development :: Libraries :: Python Modules",
]
dependencies = ["hydra-core~=1.3.2", "neptune~=1.12.0", "ragbits-core==0.2.0"]
dependencies = ["hydra-core~=1.3.2", "neptune~=1.12.0", "ragbits-core==0.2.0", "optuna==4.0.0"]

[project.optional-dependencies]
relari = [
Expand Down
16 changes: 16 additions & 0 deletions packages/ragbits-evaluate/src/ragbits/evaluate/loaders/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import sys

from omegaconf import DictConfig

from ragbits.core.utils.config_handling import get_cls_from_config

from .base import DataLoader

__all__ = [DataLoader]

module = sys.modules[__name__]


def dataloader_factory(config: DictConfig) -> DataLoader:
dataloader_class = get_cls_from_config(config.type, module)
return dataloader_class(config.options)
19 changes: 19 additions & 0 deletions packages/ragbits-evaluate/src/ragbits/evaluate/metrics/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import sys

from omegaconf import DictConfig

from ragbits.core.utils.config_handling import get_cls_from_config

from .base import MetricSet

__all__ = [MetricSet]

module = sys.modules[__name__]


def metric_set_factory(cfg: DictConfig) -> MetricSet:
metrics = []
for metric_cfg in cfg:
metric_module = get_cls_from_config(metric_cfg.type, module)
metrics.append(metric_module(metric_cfg))
return MetricSet(*metrics)
Loading
Loading