Skip to content

Commit

Permalink
Merge branch 'main' into hfapigenerator
Browse files Browse the repository at this point in the history
  • Loading branch information
anakin87 authored Apr 5, 2024
2 parents 9f2c9c0 + 65705a8 commit 8253515
Show file tree
Hide file tree
Showing 29 changed files with 1,577 additions and 171 deletions.
1 change: 1 addition & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ env:
AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
AZURE_OPENAI_ENDPOINT: ${{ secrets.AZURE_OPENAI_ENDPOINT }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
HF_API_TOKEN: ${{ secrets.HUGGINGFACE_API_KEY }}
PYTHON_VERSION: "3.8"
HATCH_VERSION: "1.9.3"

Expand Down
24 changes: 9 additions & 15 deletions docker/README.md
Original file line number Diff line number Diff line change
@@ -1,25 +1,19 @@
<p align="center">
<a href="https://www.deepset.ai/haystack/"><img src="https://raw.githubusercontent.com/deepset-ai/haystack/main/docs/img/haystack_logo_colored.png" alt="Haystack"></a>
<a href="https://haystack.deepset.ai/"><img src="https://raw.githubusercontent.com/deepset-ai/.github/main/haystack-logo-colored.png" alt="Haystack by deepset"></a>
</p>

Haystack is an end-to-end framework that enables you to build powerful and production-ready
pipelines for different search use cases. The Docker image comes with a web service
configured to serve Haystack's `rest_api` to ease pipeline deployments in containerized
environments.
[Haystack](https://github.com/deepset-ai/haystack) is an end-to-end LLM framework that allows you to build applications powered by LLMs, Transformer models, vector search and more. Whether you want to perform retrieval-augmented generation (RAG), document search, question answering or answer generation, Haystack can orchestrate state-of-the-art embedding models and LLMs into pipelines to build end-to-end NLP applications and solve your use case.

To start the Docker container binding the TCP port `8000` locally, run:
```sh
docker run -p 8000:8000 deepset/haystack
```
## Haystack 2.0

If you need the container to access other services available in the host, run:
```sh
docker run -p 8000:8000 --network="host" deepset/haystack
```
For the latest version of Haystack there's only one image available:

- `haystack:base-<version>` contains a working Python environment with Haystack preinstalled. This image is expected to
be derived `FROM`.

## Image Variants
## Haystack 1.x image variants

The Docker image comes in six variants:
The Docker image for Haystack 1.x comes in six variants:
- `haystack:gpu-<version>` contains Haystack dependencies as well as what's needed to run the REST API and UI. It comes with the CUDA runtime and is capable of running on GPUs.
- `haystack:cpu-remote-inference-<version>` is a slimmed down version of the CPU image with the REST API and UI. It is specifically designed for PromptNode inferencing using remotely hosted models, such as Hugging Face Inference, OpenAI, Cohere, Anthropic, and similar.
- `haystack:cpu-<version>` contains Haystack dependencies as well as what's needed to run the REST API and UI. It has no support for GPU so must be run on CPU.
Expand Down
58 changes: 46 additions & 12 deletions haystack/components/converters/html.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from pathlib import Path
from typing import Any, Dict, List, Literal, Optional, Union
from typing import Any, ClassVar, Dict, List, Literal, Optional, Union

from boilerpy3 import extractors

Expand Down Expand Up @@ -27,6 +27,16 @@ class HTMLToDocument:
```
"""

known_extractors: ClassVar[List[str]] = [
"DefaultExtractor",
"ArticleExtractor",
"ArticleSentencesExtractor",
"LargestContentExtractor",
"CanolaExtractor",
"KeepEverythingExtractor",
"NumWordsRulesExtractor",
]

def __init__(
self,
extractor_type: Literal[
Expand All @@ -38,6 +48,7 @@ def __init__(
"KeepEverythingExtractor",
"NumWordsRulesExtractor",
] = "DefaultExtractor",
try_others: bool = True,
):
"""
Create an HTMLToDocument component.
Expand All @@ -46,8 +57,10 @@ def __init__(
extractor_type: Name of the extractor class to use. Defaults to `DefaultExtractor`.
For more information on the different types of extractors,
see [boilerpy3 documentation](https://github.com/jmriebold/BoilerPy3?tab=readme-ov-file#extractors).
:param try_others: If `True`, the component will try other extractors if the user chosen extractor fails.
"""
self.extractor_type = extractor_type
self.try_others = try_others

def to_dict(self) -> Dict[str, Any]:
"""
Expand All @@ -56,7 +69,7 @@ def to_dict(self) -> Dict[str, Any]:
:returns:
Dictionary with serialized data.
"""
return default_to_dict(self, extractor_type=self.extractor_type)
return default_to_dict(self, extractor_type=self.extractor_type, try_others=self.try_others)

@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "HTMLToDocument":
Expand Down Expand Up @@ -96,28 +109,49 @@ def run(
documents = []
meta_list = normalize_metadata(meta=meta, sources_count=len(sources))

extractor_class = getattr(extractors, self.extractor_type)
extractor = extractor_class(raise_on_failure=False)
# Use all extractor types, ensuring user chosen extractor is first, preserve order, avoid duplicates
extractors_list = (
list(
dict.fromkeys(
[self.extractor_type, *self.known_extractors] # User chosen extractor is always tried first
)
)
if self.try_others
else [self.extractor_type]
)

for source, metadata in zip(sources, meta_list):
try:
bytestream = get_bytestream_from_source(source=source)
except Exception as e:
logger.warning("Could not read {source}. Skipping it. Error: {error}", source=source, error=e)
continue
try:
file_content = bytestream.data.decode("utf-8")
text = extractor.get_content(file_content)
except Exception as conversion_e:

text = None
for extractor_name in extractors_list:
extractor_class = getattr(extractors, extractor_name)
extractor = extractor_class(raise_on_failure=False)
try:
text = extractor.get_content(bytestream.data.decode("utf-8"))
if text:
break
except Exception as conversion_e:
if self.try_others:
logger.warning(
"Failed to extract text using {extractor} from {source}. Trying next extractor. Error: {error}",
extractor=extractor_name,
source=source,
error=conversion_e,
)
if not text:
logger.warning(
"Failed to extract text from {source}. Skipping it. Error: {error}",
f"Failed to extract text from {source} using extractors: {extractors_list}. Skipping it.",
source=source,
error=conversion_e,
extractors_list=extractors_list,
)
continue

merged_metadata = {**bytestream.meta, **metadata}
document = Document(content=text, meta=merged_metadata)
document = Document(content=text, meta={**bytestream.meta, **metadata})
documents.append(document)

return {"documents": documents}
15 changes: 14 additions & 1 deletion haystack/components/evaluators/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,17 @@
from .answer_exact_match import AnswerExactMatchEvaluator
from .document_map import DocumentMAPEvaluator
from .document_mrr import DocumentMRREvaluator
from .document_recall import DocumentRecallEvaluator
from .faithfulness import FaithfulnessEvaluator
from .llm_evaluator import LLMEvaluator
from .sas_evaluator import SASEvaluator

__all__ = ["AnswerExactMatchEvaluator", "LLMEvaluator"]
__all__ = [
"AnswerExactMatchEvaluator",
"DocumentMAPEvaluator",
"DocumentMRREvaluator",
"DocumentRecallEvaluator",
"FaithfulnessEvaluator",
"LLMEvaluator",
"SASEvaluator",
]
21 changes: 8 additions & 13 deletions haystack/components/evaluators/answer_exact_match.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,16 @@
class AnswerExactMatchEvaluator:
"""
Evaluator that checks if the predicted answers matches any of the ground truth answers exactly.
The result is a number from 0.0 to 1.0, it represents the proportion of questions where any predicted answer
matched one of the ground truth answers.
Each question can have multiple ground truth answers and multiple predicted answers.
The result is a number from 0.0 to 1.0, it represents the proportion any predicted answer
that matched one of the ground truth answers.
There can be multiple ground truth answers and multiple predicted answers as input.
Usage example:
```python
from haystack.components.evaluators import AnswerExactMatchEvaluator
evaluator = AnswerExactMatchEvaluator()
result = evaluator.run(
questions=["What is the capital of Germany?", "What is the capital of France?"],
ground_truth_answers=[["Berlin"], ["Paris"]],
predicted_answers=[["Berlin"], ["Lyon"]],
)
Expand All @@ -30,15 +29,11 @@ class AnswerExactMatchEvaluator:
"""

@component.output_types(individual_scores=List[int], score=float)
def run(
self, questions: List[str], ground_truth_answers: List[List[str]], predicted_answers: List[List[str]]
) -> Dict[str, Any]:
def run(self, ground_truth_answers: List[List[str]], predicted_answers: List[List[str]]) -> Dict[str, Any]:
"""
Run the AnswerExactMatchEvaluator on the given inputs.
All lists must have the same length.
`ground_truth_answers` and `retrieved_answers` must have the same length.
:param questions:
A list of questions.
:param ground_truth_answers:
A list of expected answers for each question.
:param predicted_answers:
Expand All @@ -49,8 +44,8 @@ def run(
- `score` - A number from 0.0 to 1.0 that represents the proportion of questions where any predicted
answer matched one of the ground truth answers.
"""
if not len(questions) == len(ground_truth_answers) == len(predicted_answers):
raise ValueError("The length of questions, ground_truth_answers, and predicted_answers must be the same.")
if not len(ground_truth_answers) == len(predicted_answers):
raise ValueError("The length of ground_truth_answers and predicted_answers must be the same.")

matches = []
for truths, extracted in zip(ground_truth_answers, predicted_answers):
Expand All @@ -60,6 +55,6 @@ def run(
matches.append(0)

# The proportion of questions where any predicted answer matched one of the ground truth answers
average = sum(matches) / len(questions)
average = sum(matches) / len(predicted_answers)

return {"individual_scores": matches, "score": average}
84 changes: 84 additions & 0 deletions haystack/components/evaluators/document_map.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
from typing import Any, Dict, List

from haystack import Document, component


@component
class DocumentMAPEvaluator:
"""
Evaluator that calculates the mean average precision of the retrieved documents, a metric
that measures how high retrieved documents are ranked.
Each question can have multiple ground truth documents and multiple retrieved documents.
`DocumentMAPEvaluator` doesn't normalize its inputs, the `DocumentCleaner` component
should be used to clean and normalize the documents before passing them to this evaluator.
Usage example:
```python
from haystack.components.evaluators import AnswerExactMatchEvaluator
evaluator = DocumentMAPEvaluator()
result = evaluator.run(
ground_truth_documents=[
[Document(content="France")],
[Document(content="9th century"), Document(content="9th")],
],
retrieved_documents=[
[Document(content="France")],
[Document(content="9th century"), Document(content="10th century"), Document(content="9th")],
],
)
print(result["individual_scores"])
# [1.0, 0.8333333333333333]
print(result["score"])
# 0.9166666666666666
```
"""

@component.output_types(score=float, individual_scores=List[float])
def run(
self, ground_truth_documents: List[List[Document]], retrieved_documents: List[List[Document]]
) -> Dict[str, Any]:
"""
Run the DocumentMAPEvaluator on the given inputs.
All lists must have the same length.
:param ground_truth_documents:
A list of expected documents for each question.
:param retrieved_documents:
A list of retrieved documents for each question.
:returns:
A dictionary with the following outputs:
- `score` - The average of calculated scores.
- `invididual_scores` - A list of numbers from 0.0 to 1.0 that represents how high retrieved documents are ranked.
"""
if len(ground_truth_documents) != len(retrieved_documents):
msg = "The length of ground_truth_documents and retrieved_documents must be the same."
raise ValueError(msg)

individual_scores = []

for ground_truth, retrieved in zip(ground_truth_documents, retrieved_documents):
score = 0.0
for ground_document in ground_truth:
if ground_document.content is None:
continue

average_precision = 0.0
relevant_documents = 0

for rank, retrieved_document in enumerate(retrieved):
if retrieved_document.content is None:
continue

if ground_document.content in retrieved_document.content:
relevant_documents += 1
average_precision += relevant_documents / (rank + 1)
if relevant_documents > 0:
score = average_precision / relevant_documents
individual_scores.append(score)

score = sum(individual_scores) / len(retrieved_documents)

return {"score": score, "individual_scores": individual_scores}
79 changes: 79 additions & 0 deletions haystack/components/evaluators/document_mrr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
from typing import Any, Dict, List

from haystack import Document, component


@component
class DocumentMRREvaluator:
"""
Evaluator that calculates the mean reciprocal rank of the retrieved documents.
MRR measures how high the first retrieved document is ranked.
Each question can have multiple ground truth documents and multiple retrieved documents.
`DocumentMRREvaluator` doesn't normalize its inputs, the `DocumentCleaner` component
should be used to clean and normalize the documents before passing them to this evaluator.
Usage example:
```python
from haystack.components.evaluators import AnswerExactMatchEvaluator
evaluator = DocumentMRREvaluator()
result = evaluator.run(
ground_truth_documents=[
[Document(content="France")],
[Document(content="9th century"), Document(content="9th")],
],
retrieved_documents=[
[Document(content="France")],
[Document(content="9th century"), Document(content="10th century"), Document(content="9th")],
],
)
print(result["individual_scores"])
# [1.0, 0.8333333333333333]
print(result["score"])
# 0.9166666666666666
```
"""

@component.output_types(score=float, individual_scores=List[float])
def run(
self, ground_truth_documents: List[List[Document]], retrieved_documents: List[List[Document]]
) -> Dict[str, Any]:
"""
Run the DocumentMRREvaluator on the given inputs.
`ground_truth_documents` and `retrieved_documents` must have the same length.
:param ground_truth_documents:
A list of expected documents for each question.
:param retrieved_documents:
A list of retrieved documents for each question.
:returns:
A dictionary with the following outputs:
- `score` - The average of calculated scores.
- `invididual_scores` - A list of numbers from 0.0 to 1.0 that represents how high the first retrieved document is ranked.
"""
if len(ground_truth_documents) != len(retrieved_documents):
msg = "The length of ground_truth_documents and retrieved_documents must be the same."
raise ValueError(msg)

individual_scores = []

for ground_truth, retrieved in zip(ground_truth_documents, retrieved_documents):
score = 0.0
for ground_document in ground_truth:
if ground_document.content is None:
continue

for rank, retrieved_document in enumerate(retrieved):
if retrieved_document.content is None:
continue

if ground_document.content in retrieved_document.content:
score = 1 / (rank + 1)
break
individual_scores.append(score)

score = sum(individual_scores) / len(retrieved_documents)

return {"score": score, "individual_scores": individual_scores}
Loading

0 comments on commit 8253515

Please sign in to comment.