Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Astra: rename retriever #399

Merged
merged 1 commit into from
Feb 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions integrations/astra/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ or

## Usage

This package includes Astra Document Store and Astra Retriever classes that integrate with Haystack, allowing you to easily perform document retrieval or RAG with Astra, and include those functions in Haystack pipelines.
This package includes Astra Document Store and Astra Embedding Retriever classes that integrate with Haystack, allowing you to easily perform document retrieval or RAG with Astra, and include those functions in Haystack pipelines.

### In order to use the Document Store directly:

Expand Down Expand Up @@ -78,7 +78,7 @@ document_store = AstraDocumentStore(
Then you can use the document store functions like count_document below:
`document_store.count_documents()`

### Using the Astra Retriever with Haystack Pipelines
### Using the Astra Embedding Retriever with Haystack Pipelines

Create the Document Store object like above, then import and create the Pipeline:

Expand All @@ -87,8 +87,8 @@ from haystack.preview import Pipeline
pipeline = Pipeline()
```

Add your AstraRetriever into the pipeline
`pipeline.add_component(instance=AstraSingleRetriever(document_store=document_store), name="retriever")`
Add your AstraEmbeddingRetriever into the pipeline
`pipeline.add_component(instance=AstraEmbeddingRetriever(document_store=document_store), name="retriever")`

Add other components and connect them as desired. Then run your pipeline:
`pipeline.run(...)`
4 changes: 2 additions & 2 deletions integrations/astra/examples/example.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from haystack.components.writers import DocumentWriter
from haystack.document_stores.types import DuplicatePolicy

from haystack_integrations.components.retrievers.astra import AstraRetriever
from haystack_integrations.components.retrievers.astra import AstraEmbeddingRetriever
from haystack_integrations.document_stores.astra import AstraDocumentStore

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -66,7 +66,7 @@
instance=SentenceTransformersTextEmbedder(model_name_or_path="sentence-transformers/all-MiniLM-L6-v2"),
name="embedder",
)
q.add_component("retriever", AstraRetriever(document_store))
q.add_component("retriever", AstraEmbeddingRetriever(document_store))

q.connect("embedder", "retriever")

Expand Down
4 changes: 2 additions & 2 deletions integrations/astra/examples/pipeline_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from haystack.components.writers import DocumentWriter
from haystack.document_stores.types import DuplicatePolicy

from haystack_integrations.components.retrievers.astra import AstraRetriever
from haystack_integrations.components.retrievers.astra import AstraEmbeddingRetriever
from haystack_integrations.document_stores.astra import AstraDocumentStore

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -77,7 +77,7 @@
instance=SentenceTransformersTextEmbedder(model_name_or_path="sentence-transformers/all-MiniLM-L6-v2"),
name="embedder",
)
rag_pipeline.add_component(instance=AstraRetriever(document_store=document_store), name="retriever")
rag_pipeline.add_component(instance=AstraEmbeddingRetriever(document_store=document_store), name="retriever")
rag_pipeline.add_component(instance=PromptBuilder(template=prompt_template), name="prompt_builder")
rag_pipeline.add_component(instance=OpenAIGenerator(api_key=os.environ.get("OPENAI_API_KEY")), name="llm")
rag_pipeline.add_component(instance=AnswerBuilder(), name="answer_builder")
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# SPDX-FileCopyrightText: 2023-present Anant Corporation <[email protected]>
#
# SPDX-License-Identifier: Apache-2.0
from .retriever import AstraRetriever
from .retriever import AstraEmbeddingRetriever

__all__ = ["AstraRetriever"]
__all__ = ["AstraEmbeddingRetriever"]
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@


@component
class AstraRetriever:
class AstraEmbeddingRetriever:
"""
A component for retrieving documents from an AstraDocumentStore.
"""

def __init__(self, document_store: AstraDocumentStore, filters: Optional[Dict[str, Any]] = None, top_k: int = 10):
"""
Create an AstraRetriever component. Usually you pass some basic configuration
Create an AstraEmbeddingRetriever component. Usually you pass some basic configuration
parameters to the constructor.

:param filters: A dictionary with filters to narrow down the search space (default is None).
Expand Down Expand Up @@ -59,7 +59,7 @@ def to_dict(self) -> Dict[str, Any]:
)

@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "AstraRetriever":
def from_dict(cls, data: Dict[str, Any]) -> "AstraEmbeddingRetriever":
document_store = AstraDocumentStore.from_dict(data["init_parameters"]["document_store"])
data["init_parameters"]["document_store"] = document_store
return default_from_dict(cls, data)
10 changes: 5 additions & 5 deletions integrations/astra/tests/test_retriever.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import pytest

from haystack_integrations.components.retrievers.astra import AstraRetriever
from haystack_integrations.components.retrievers.astra import AstraEmbeddingRetriever


@pytest.mark.skipif(
Expand All @@ -14,9 +14,9 @@
@pytest.mark.skipif(os.environ.get("ASTRA_DB_ID", "") == "", reason="ASTRA_DB_ID is not set")
@pytest.mark.integration
def test_retriever_to_json(document_store):
retriever = AstraRetriever(document_store, filters={"foo": "bar"}, top_k=99)
retriever = AstraEmbeddingRetriever(document_store, filters={"foo": "bar"}, top_k=99)
assert retriever.to_dict() == {
"type": "haystack_integrations.components.retrievers.astra.retriever.AstraRetriever",
"type": "haystack_integrations.components.retrievers.astra.retriever.AstraEmbeddingRetriever",
"init_parameters": {
"filters": {"foo": "bar"},
"top_k": 99,
Expand All @@ -43,7 +43,7 @@ def test_retriever_to_json(document_store):
@pytest.mark.integration
def test_retriever_from_json():
data = {
"type": "haystack_integrations.components.retrievers.astra.retriever.AstraRetriever",
"type": "haystack_integrations.components.retrievers.astra.retriever.AstraEmbeddingRetriever",
"init_parameters": {
"filters": {"bar": "baz"},
"top_k": 42,
Expand All @@ -62,6 +62,6 @@ def test_retriever_from_json():
},
},
}
retriever = AstraRetriever.from_dict(data)
retriever = AstraEmbeddingRetriever.from_dict(data)
assert retriever.top_k == 42
assert retriever.filters == {"bar": "baz"}