From c1d6d48b636a2ccecd7e70a7fc28eeeae997bbb2 Mon Sep 17 00:00:00 2001
From: anakin87 <stefanofiorucci@gmail.com>
Date: Wed, 13 Nov 2024 18:37:36 +0100
Subject: [PATCH] squashing

---
 README.md                                     |   2 +-
 .../fastembed/examples/ranker_example.py      |  22 ++
 integrations/fastembed/pydoc/config.yml       |   3 +-
 integrations/fastembed/pyproject.toml         |   6 +-
 .../components/rankers/fastembed/__init__.py  |   3 +
 .../components/rankers/fastembed/ranker.py    | 202 ++++++++++++
 .../fastembed/tests/test_fastembed_ranker.py  | 292 ++++++++++++++++++
 7 files changed, 527 insertions(+), 3 deletions(-)
 create mode 100644 integrations/fastembed/examples/ranker_example.py
 create mode 100644 integrations/fastembed/src/haystack_integrations/components/rankers/fastembed/__init__.py
 create mode 100644 integrations/fastembed/src/haystack_integrations/components/rankers/fastembed/ranker.py
 create mode 100644 integrations/fastembed/tests/test_fastembed_ranker.py

diff --git a/README.md b/README.md
index 2b4a83253..af83d045d 100644
--- a/README.md
+++ b/README.md
@@ -34,7 +34,7 @@ Please check out our [Contribution Guidelines](CONTRIBUTING.md) for all the deta
 | [cohere-haystack](integrations/cohere/)                                                                        | Embedder, Generator, Ranker | [![PyPI - Version](https://img.shields.io/pypi/v/cohere-haystack.svg)](https://pypi.org/project/cohere-haystack)                                         | [![Test / cohere](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/cohere.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/cohere.yml)                                           |
 | [deepeval-haystack](integrations/deepeval/)                                                                    | Evaluator           | [![PyPI - Version](https://img.shields.io/pypi/v/deepeval-haystack.svg)](https://pypi.org/project/deepeval-haystack)                                     | [![Test / deepeval](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/deepeval.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/deepeval.yml)                                     |
 | [elasticsearch-haystack](integrations/elasticsearch/)                                                          | Document Store      | [![PyPI - Version](https://img.shields.io/pypi/v/elasticsearch-haystack.svg)](https://pypi.org/project/elasticsearch-haystack)                           | [![Test / elasticsearch](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/elasticsearch.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/elasticsearch.yml)                      |
-| [fastembed-haystack](integrations/fastembed/)                                                                  | Embedder            | [![PyPI - Version](https://img.shields.io/pypi/v/fastembed-haystack.svg)](https://pypi.org/project/fastembed-haystack/)                                  | [![Test / fastembed](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/fastembed.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/fastembed.yml)                                  |
+| [fastembed-haystack](integrations/fastembed/)                                                                  | Embedder, Ranker    | [![PyPI - Version](https://img.shields.io/pypi/v/fastembed-haystack.svg)](https://pypi.org/project/fastembed-haystack/)                                  | [![Test / fastembed](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/fastembed.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/fastembed.yml)                                  |
 | [google-ai-haystack](integrations/google_ai/)                                                                  | Generator           | [![PyPI - Version](https://img.shields.io/pypi/v/google-ai-haystack.svg)](https://pypi.org/project/google-ai-haystack)                                   | [![Test / google-ai](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/google_ai.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/google_ai.yml)                                  |
 | [google-vertex-haystack](integrations/google_vertex/)                                                          | Generator           | [![PyPI - Version](https://img.shields.io/pypi/v/google-vertex-haystack.svg)](https://pypi.org/project/google-vertex-haystack)                           | [![Test / google-vertex](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/google_vertex.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/google_vertex.yml)                      |
 | [instructor-embedders-haystack](integrations/instructor_embedders/)                                            | Embedder            | [![PyPI - Version](https://img.shields.io/pypi/v/instructor-embedders-haystack.svg)](https://pypi.org/project/instructor-embedders-haystack)             | [![Test / instructor-embedders](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/instructor_embedders.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/instructor_embedders.yml) |
diff --git a/integrations/fastembed/examples/ranker_example.py b/integrations/fastembed/examples/ranker_example.py
new file mode 100644
index 000000000..7a31e4646
--- /dev/null
+++ b/integrations/fastembed/examples/ranker_example.py
@@ -0,0 +1,22 @@
+from haystack import Document
+
+from haystack_integrations.components.rankers.fastembed import FastembedRanker
+
+query = "Who is maintaining Qdrant?"
+documents = [
+    Document(
+        content="This is built to be faster and lighter than other embedding libraries e.g. Transformers, Sentence-Transformers, etc."
+    ),
+    Document(content="fastembed is supported by and maintained by Qdrant."),
+]
+
+ranker = FastembedRanker(model_name="Xenova/ms-marco-MiniLM-L-6-v2")
+ranker.warm_up()
+reranked_documents = ranker.run(query=query, documents=documents)["documents"]
+
+
+print(reranked_documents["documents"][0])
+
+# Document(id=...,
+#  content: 'fastembed is supported by and maintained by Qdrant.',
+#  score: 5.472434997558594..)
diff --git a/integrations/fastembed/pydoc/config.yml b/integrations/fastembed/pydoc/config.yml
index aad50e52c..8ab538cf8 100644
--- a/integrations/fastembed/pydoc/config.yml
+++ b/integrations/fastembed/pydoc/config.yml
@@ -6,7 +6,8 @@ loaders:
         "haystack_integrations.components.embedders.fastembed.fastembed_document_embedder",
         "haystack_integrations.components.embedders.fastembed.fastembed_text_embedder",
         "haystack_integrations.components.embedders.fastembed.fastembed_sparse_document_embedder",
-        "haystack_integrations.components.embedders.fastembed.fastembed_sparse_text_embedder"
+        "haystack_integrations.components.embedders.fastembed.fastembed_sparse_text_embedder",
+        "haystack_integrations.components.rankers.fastembed.ranker"
       ]
     ignore_when_discovered: ["__init__"]
 processors:
diff --git a/integrations/fastembed/pyproject.toml b/integrations/fastembed/pyproject.toml
index b9f1f6cfd..abae78d8a 100644
--- a/integrations/fastembed/pyproject.toml
+++ b/integrations/fastembed/pyproject.toml
@@ -23,7 +23,7 @@ classifiers = [
   "Programming Language :: Python :: Implementation :: CPython",
   "Programming Language :: Python :: Implementation :: PyPy",
 ]
-dependencies = ["haystack-ai>=2.0.1", "fastembed>=0.2.5", "onnxruntime<1.20.0"]
+dependencies = ["haystack-ai>=2.0.1", "fastembed>=0.4.2"]
 
 [project.urls]
 Source = "https://github.com/deepset-ai/haystack-core-integrations"
@@ -154,6 +154,10 @@ omit = ["*/tests/*", "*/__init__.py"]
 show_missing = true
 exclude_lines = ["no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:"]
 
+[tool.pytest.ini_options]
+minversion = "6.0"
+markers = ["unit: unit tests", "integration: integration tests"]
+
 [[tool.mypy.overrides]]
 module = [
   "haystack.*",
diff --git a/integrations/fastembed/src/haystack_integrations/components/rankers/fastembed/__init__.py b/integrations/fastembed/src/haystack_integrations/components/rankers/fastembed/__init__.py
new file mode 100644
index 000000000..ece5e858b
--- /dev/null
+++ b/integrations/fastembed/src/haystack_integrations/components/rankers/fastembed/__init__.py
@@ -0,0 +1,3 @@
+from .ranker import FastembedRanker
+
+__all__ = ["FastembedRanker"]
diff --git a/integrations/fastembed/src/haystack_integrations/components/rankers/fastembed/ranker.py b/integrations/fastembed/src/haystack_integrations/components/rankers/fastembed/ranker.py
new file mode 100644
index 000000000..8f077a30c
--- /dev/null
+++ b/integrations/fastembed/src/haystack_integrations/components/rankers/fastembed/ranker.py
@@ -0,0 +1,202 @@
+from typing import Any, Dict, List, Optional
+
+from haystack import Document, component, default_from_dict, default_to_dict, logging
+
+from fastembed.rerank.cross_encoder import TextCrossEncoder
+
+logger = logging.getLogger(__name__)
+
+
+@component
+class FastembedRanker:
+    """
+    Ranks Documents based on their similarity to the query using
+    [Fastembed models](https://qdrant.github.io/fastembed/examples/Supported_Models/).
+
+    Documents are indexed from most to least semantically relevant to the query.
+
+    Usage example:
+    ```python
+    from haystack import Document
+    from haystack_integrations.components.rankers.fastembed import FastembedRanker
+
+    ranker = FastembedRanker(model_name="Xenova/ms-marco-MiniLM-L-6-v2", top_k=2)
+
+    docs = [Document(content="Paris"), Document(content="Berlin")]
+    query = "What is the capital of germany?"
+    output = ranker.run(query=query, documents=docs)
+    print(output["documents"][0].content)
+
+    # Berlin
+    ```
+    """
+
+    def __init__(
+        self,
+        model_name: str = "Xenova/ms-marco-MiniLM-L-6-v2",
+        top_k: int = 10,
+        cache_dir: Optional[str] = None,
+        threads: Optional[int] = None,
+        batch_size: int = 64,
+        parallel: Optional[int] = None,
+        local_files_only: bool = False,
+        meta_fields_to_embed: Optional[List[str]] = None,
+        meta_data_separator: str = "\n",
+    ):
+        """
+        Creates an instance of the 'FastembedRanker'.
+
+        :param model_name: Fastembed model name. Check the list of supported models in the [Fastembed documentation](https://qdrant.github.io/fastembed/examples/Supported_Models/).
+        :param top_k: The maximum number of documents to return.
+        :param cache_dir: The path to the cache directory.
+                Can be set using the `FASTEMBED_CACHE_PATH` env variable.
+                Defaults to `fastembed_cache` in the system's temp directory.
+        :param threads: The number of threads single onnxruntime session can use. Defaults to None.
+        :param batch_size: Number of strings to encode at once.
+        :param parallel:
+                If > 1, data-parallel encoding will be used, recommended for offline encoding of large datasets.
+                If 0, use all available cores.
+                If None, don't use data-parallel processing, use default onnxruntime threading instead.
+        :param local_files_only: If `True`, only use the model files in the `cache_dir`.
+        :param meta_fields_to_embed: List of meta fields that should be concatenated
+            with the document content for reranking.
+        :param meta_data_separator: Separator used to concatenate the meta fields
+            to the Document content.
+        """
+        if top_k <= 0:
+            msg = f"top_k must be > 0, but got {top_k}"
+            raise ValueError(msg)
+
+        self.model_name = model_name
+        self.top_k = top_k
+        self.cache_dir = cache_dir
+        self.threads = threads
+        self.batch_size = batch_size
+        self.parallel = parallel
+        self.local_files_only = local_files_only
+        self.meta_fields_to_embed = meta_fields_to_embed or []
+        self.meta_data_separator = meta_data_separator
+        self._model = None
+
+    def to_dict(self) -> Dict[str, Any]:
+        """
+        Serializes the component to a dictionary.
+
+        :returns:
+            Dictionary with serialized data.
+        """
+        return default_to_dict(
+            self,
+            model_name=self.model_name,
+            top_k=self.top_k,
+            cache_dir=self.cache_dir,
+            threads=self.threads,
+            batch_size=self.batch_size,
+            parallel=self.parallel,
+            local_files_only=self.local_files_only,
+            meta_fields_to_embed=self.meta_fields_to_embed,
+            meta_data_separator=self.meta_data_separator,
+        )
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "FastembedRanker":
+        """
+        Deserializes the component from a dictionary.
+
+        :param data:
+            The dictionary to deserialize from.
+        :returns:
+            The deserialized component.
+        """
+        return default_from_dict(cls, data)
+
+    def warm_up(self):
+        """
+        Initializes the component.
+        """
+        if self._model is None:
+            self._model = TextCrossEncoder(
+                model_name=self.model_name,
+                cache_dir=self.cache_dir,
+                threads=self.threads,
+                local_files_only=self.local_files_only,
+            )
+
+    def _prepare_fastembed_input_docs(self, documents: List[Document]) -> List[str]:
+        """
+        Prepare the input by concatenating the document text with the metadata fields specified.
+        :param documents: The list of Document objects.
+
+        :return: A list of strings to be given as input to Fastembed model.
+        """
+        concatenated_input_list = []
+        for doc in documents:
+            meta_values_to_embed = [
+                str(doc.meta[key]) for key in self.meta_fields_to_embed if key in doc.meta and doc.meta.get(key)
+            ]
+            concatenated_input = self.meta_data_separator.join([*meta_values_to_embed, doc.content or ""])
+            concatenated_input_list.append(concatenated_input)
+
+        return concatenated_input_list
+
+    @component.output_types(documents=List[Document])
+    def run(self, query: str, documents: List[Document], top_k: Optional[int] = None):
+        """
+        Returns a list of documents ranked by their similarity to the given query, using FastEmbed.
+
+        :param query:
+            The input query to compare the documents to.
+        :param documents:
+            A list of documents to be ranked.
+        :param top_k:
+            The maximum number of documents to return.
+
+        :returns:
+            A dictionary with the following keys:
+            - `documents`: A list of documents closest to the query, sorted from most similar to least similar.
+
+        :raises ValueError: If `top_k` is not > 0.
+        """
+        if not isinstance(documents, list) or documents and not isinstance(documents[0], Document):
+            msg = "FastembedRanker expects a list of Documents as input. "
+            raise TypeError(msg)
+        if query == "":
+            msg = "No query provided"
+            raise ValueError(msg)
+
+        if not documents:
+            return {"documents": []}
+
+        top_k = top_k or self.top_k
+        if top_k <= 0:
+            msg = f"top_k must be > 0, but got {top_k}"
+            raise ValueError(msg)
+
+        if self._model is None:
+            msg = "The ranker model has not been loaded. Please call warm_up() before running."
+            raise RuntimeError(msg)
+
+        fastembed_input_docs = self._prepare_fastembed_input_docs(documents)
+
+        scores = list(
+            self._model.rerank(
+                query=query,
+                documents=fastembed_input_docs,
+                batch_size=self.batch_size,
+                parallel=self.parallel,
+            )
+        )
+
+        # Combine the two lists into a single list of tuples
+        doc_scores = list(zip(documents, scores))
+
+        # Sort the list of tuples by the score in descending order
+        sorted_doc_scores = sorted(doc_scores, key=lambda x: x[1], reverse=True)
+
+        # Get the top_k documents
+        top_k_documents = []
+        for doc, score in sorted_doc_scores[:top_k]:
+            doc.score = score
+            top_k_documents.append(doc)
+
+        return {"documents": top_k_documents}
diff --git a/integrations/fastembed/tests/test_fastembed_ranker.py b/integrations/fastembed/tests/test_fastembed_ranker.py
new file mode 100644
index 000000000..e38229c87
--- /dev/null
+++ b/integrations/fastembed/tests/test_fastembed_ranker.py
@@ -0,0 +1,292 @@
+from unittest.mock import MagicMock
+
+import pytest
+from haystack import Document, default_from_dict
+
+from haystack_integrations.components.rankers.fastembed.ranker import (
+    FastembedRanker,
+)
+
+
+class TestFastembedRanker:
+    def test_init_default(self):
+        """
+        Test default initialization parameters for FastembedRanker.
+        """
+        ranker = FastembedRanker(model_name="BAAI/bge-reranker-base")
+        assert ranker.model_name == "BAAI/bge-reranker-base"
+        assert ranker.top_k == 10
+        assert ranker.cache_dir is None
+        assert ranker.threads is None
+        assert ranker.batch_size == 64
+        assert ranker.parallel is None
+        assert not ranker.local_files_only
+        assert ranker.meta_fields_to_embed == []
+        assert ranker.meta_data_separator == "\n"
+
+    def test_init_with_parameters(self):
+        """
+        Test custom initialization parameters for FastembedRanker.
+        """
+        ranker = FastembedRanker(
+            model_name="BAAI/bge-reranker-base",
+            top_k=64,
+            cache_dir="fake_dir",
+            threads=2,
+            batch_size=50,
+            parallel=1,
+            local_files_only=True,
+            meta_fields_to_embed=["test_field"],
+            meta_data_separator=" | ",
+        )
+        assert ranker.model_name == "BAAI/bge-reranker-base"
+        assert ranker.top_k == 64
+        assert ranker.cache_dir == "fake_dir"
+        assert ranker.threads == 2
+        assert ranker.batch_size == 50
+        assert ranker.parallel == 1
+        assert ranker.local_files_only
+        assert ranker.meta_fields_to_embed == ["test_field"]
+        assert ranker.meta_data_separator == " | "
+
+    def test_init_with_incorrect_input(self):
+        """
+        Test for checking incorrect input format on init
+        """
+        with pytest.raises(
+            ValueError,
+            match="top_k must be > 0, but got 0",
+        ):
+            FastembedRanker(model_name="Xenova/ms-marco-MiniLM-L-12-v2", top_k=0)
+
+        with pytest.raises(
+            ValueError,
+            match="top_k must be > 0, but got -3",
+        ):
+            FastembedRanker(model_name="Xenova/ms-marco-MiniLM-L-12-v2", top_k=-3)
+
+    def test_to_dict(self):
+        """
+        Test serialization of FastembedRanker to a dictionary, using default initialization parameters.
+        """
+        ranker = FastembedRanker(model_name="BAAI/bge-reranker-base")
+        ranker_dict = ranker.to_dict()
+        assert ranker_dict == {
+            "type": "haystack_integrations.components.rankers.fastembed.ranker.FastembedRanker",
+            "init_parameters": {
+                "model_name": "BAAI/bge-reranker-base",
+                "top_k": 10,
+                "cache_dir": None,
+                "threads": None,
+                "batch_size": 64,
+                "parallel": None,
+                "local_files_only": False,
+                "meta_fields_to_embed": [],
+                "meta_data_separator": "\n",
+            },
+        }
+
+    def test_to_dict_with_custom_init_parameters(self):
+        """
+        Test serialization of FastembedRanker to a dictionary, using custom initialization parameters.
+        """
+        ranker = FastembedRanker(
+            model_name="BAAI/bge-reranker-base",
+            cache_dir="fake_dir",
+            threads=2,
+            top_k=5,
+            batch_size=50,
+            parallel=1,
+            local_files_only=True,
+            meta_fields_to_embed=["test_field"],
+            meta_data_separator=" | ",
+        )
+        ranker_dict = ranker.to_dict()
+        assert ranker_dict == {
+            "type": "haystack_integrations.components.rankers.fastembed.ranker.FastembedRanker",
+            "init_parameters": {
+                "model_name": "BAAI/bge-reranker-base",
+                "cache_dir": "fake_dir",
+                "threads": 2,
+                "top_k": 5,
+                "batch_size": 50,
+                "parallel": 1,
+                "local_files_only": True,
+                "meta_fields_to_embed": ["test_field"],
+                "meta_data_separator": " | ",
+            },
+        }
+
+    def test_from_dict(self):
+        """
+        Test deserialization of FastembedRanker from a dictionary, using default initialization parameters.
+        """
+        ranker_dict = {
+            "type": "haystack_integrations.components.rankers.fastembed.ranker.FastembedRanker",
+            "init_parameters": {
+                "model_name": "BAAI/bge-reranker-base",
+                "cache_dir": None,
+                "threads": None,
+                "top_k": 5,
+                "batch_size": 50,
+                "parallel": None,
+                "local_files_only": False,
+                "meta_fields_to_embed": [],
+                "meta_data_separator": "\n",
+            },
+        }
+        ranker = default_from_dict(FastembedRanker, ranker_dict)
+        assert ranker.model_name == "BAAI/bge-reranker-base"
+        assert ranker.cache_dir is None
+        assert ranker.threads is None
+        assert ranker.top_k == 5
+        assert ranker.batch_size == 50
+        assert ranker.parallel is None
+        assert not ranker.local_files_only
+        assert ranker.meta_fields_to_embed == []
+        assert ranker.meta_data_separator == "\n"
+
+    def test_from_dict_with_custom_init_parameters(self):
+        """
+        Test deserialization of FastembedRanker from a dictionary, using custom initialization parameters.
+        """
+        ranker_dict = {
+            "type": "haystack_integrations.components.rankers.fastembed.ranker.FastembedRanker",
+            "init_parameters": {
+                "model_name": "BAAI/bge-reranker-base",
+                "cache_dir": "fake_dir",
+                "threads": 2,
+                "top_k": 5,
+                "batch_size": 50,
+                "parallel": 1,
+                "local_files_only": True,
+                "meta_fields_to_embed": ["test_field"],
+                "meta_data_separator": " | ",
+            },
+        }
+        ranker = default_from_dict(FastembedRanker, ranker_dict)
+        assert ranker.model_name == "BAAI/bge-reranker-base"
+        assert ranker.cache_dir == "fake_dir"
+        assert ranker.threads == 2
+        assert ranker.top_k == 5
+        assert ranker.batch_size == 50
+        assert ranker.parallel == 1
+        assert ranker.local_files_only
+        assert ranker.meta_fields_to_embed == ["test_field"]
+        assert ranker.meta_data_separator == " | "
+
+    def test_run_incorrect_input_format(self):
+        """
+        Test for checking incorrect input format.
+        """
+        ranker = FastembedRanker(model_name="Xenova/ms-marco-MiniLM-L-12-v2")
+        ranker._model = "mock_model"
+
+        query = "query"
+        string_input = "text"
+        list_integers_input = [1, 2, 3]
+        list_document = [Document("Document 1")]
+
+        with pytest.raises(
+            TypeError,
+            match="FastembedRanker expects a list of Documents as input.",
+        ):
+            ranker.run(query=query, documents=string_input)
+
+        with pytest.raises(
+            TypeError,
+            match="FastembedRanker expects a list of Documents as input.",
+        ):
+            ranker.run(query=query, documents=list_integers_input)
+
+        with pytest.raises(
+            ValueError,
+            match="No query provided",
+        ):
+            ranker.run(query="", documents=list_document)
+
+        with pytest.raises(
+            ValueError,
+            match="top_k must be > 0, but got -3",
+        ):
+            ranker.run(query=query, documents=list_document, top_k=-3)
+
+    def test_run_no_warmup(self):
+        """
+        Test for checking error when calling without a warmup.
+        """
+        ranker = FastembedRanker(model_name="Xenova/ms-marco-MiniLM-L-12-v2")
+
+        query = "query"
+        list_document = [Document("Document 1")]
+
+        with pytest.raises(
+            RuntimeError,
+        ):
+            ranker.run(query=query, documents=list_document)
+
+    def test_run_empty_document_list(self):
+        """
+        Test for no error when sending no documents.
+        """
+        ranker = FastembedRanker(model_name="Xenova/ms-marco-MiniLM-L-12-v2")
+        ranker._model = "mock_model"
+
+        query = "query"
+        list_document = []
+
+        result = ranker.run(query=query, documents=list_document)
+        assert len(result["documents"]) == 0
+
+    def test_embed_metadata(self):
+        """
+        Tests the embedding of metadata fields in document content for ranking.
+        """
+        ranker = FastembedRanker(
+            model_name="model_name",
+            meta_fields_to_embed=["meta_field"],
+        )
+        ranker._model = MagicMock()
+
+        documents = [Document(content=f"document-number {i}", meta={"meta_field": f"meta_value {i}"}) for i in range(5)]
+        query = "test"
+        ranker.run(query=query, documents=documents)
+
+        ranker._model.rerank.assert_called_once_with(
+            query=query,
+            documents=[
+                "meta_value 0\ndocument-number 0",
+                "meta_value 1\ndocument-number 1",
+                "meta_value 2\ndocument-number 2",
+                "meta_value 3\ndocument-number 3",
+                "meta_value 4\ndocument-number 4",
+            ],
+            batch_size=64,
+            parallel=None,
+        )
+
+    @pytest.mark.integration
+    def test_run(self):
+        ranker = FastembedRanker(model_name="Xenova/ms-marco-MiniLM-L-6-v2", top_k=2)
+        ranker.warm_up()
+
+        query = "Who is maintaining Qdrant?"
+        documents = [
+            Document(
+                content="This is built to be faster and lighter than other embedding \
+libraries e.g. Transformers, Sentence-Transformers, etc."
+            ),
+            Document(content="This is some random input"),
+            Document(content="fastembed is supported by and maintained by Qdrant."),
+        ]
+
+        result = ranker.run(query=query, documents=documents)
+
+        assert len(result["documents"]) == 2
+        first_document = result["documents"][0]
+        second_document = result["documents"][1]
+
+        assert isinstance(first_document, Document)
+        assert isinstance(second_document, Document)
+        assert first_document.content == "fastembed is supported by and maintained by Qdrant."
+        assert first_document.score > second_document.score