diff --git a/docs/pydoc/config/websearch_api.yml b/docs/pydoc/config/websearch_api.yml index 7eae4b95a4..47404f2d32 100644 --- a/docs/pydoc/config/websearch_api.yml +++ b/docs/pydoc/config/websearch_api.yml @@ -1,7 +1,7 @@ loaders: - type: haystack_pydoc_tools.loaders.CustomPythonLoader search_path: [../../../haystack/components/websearch] - modules: ["serper_dev"] + modules: ["serper_dev", "searchapi"] ignore_when_discovered: ["__init__"] processors: - type: filter diff --git a/haystack/components/websearch/searchapi.py b/haystack/components/websearch/searchapi.py index 8a84c9c7b6..fcc6a6be00 100644 --- a/haystack/components/websearch/searchapi.py +++ b/haystack/components/websearch/searchapi.py @@ -1,6 +1,6 @@ import json import logging -from typing import Dict, List, Optional, Any +from typing import Dict, List, Optional, Any, Union import requests @@ -20,9 +20,21 @@ class SearchApiError(ComponentError): @component class SearchApiWebSearch: """ - Search engine using SearchApi API. Given a query, it returns a list of URLs that are the most relevant. + Uses [SearchApi](https://www.searchapi.io/) to search the web for relevant documents. See the [SearchApi website](https://www.searchapi.io/) for more details. + + Usage example: + ```python + from haystack.components.websearch import SearchApiWebSearch + from haystack.utils import Secret + + websearch = SearchApiWebSearch(top_k=10, api_key=Secret.from_token("test-api-key")) + results = websearch.run(query="Who is the boyfriend of Olivia Wilde?") + + assert results["documents"] + assert results["links"] + ``` """ def __init__( @@ -37,8 +49,8 @@ def __init__( :param top_k: Number of documents to return. :param allowed_domains: List of domains to limit the search to. :param search_params: Additional parameters passed to the SearchApi API. - For example, you can set 'num' to 100 to increase the number of search results. - See the [SearchApi website](https://www.searchapi.io/) for more details. + For example, you can set 'num' to 100 to increase the number of search results. + See the [SearchApi website](https://www.searchapi.io/) for more details. """ self.api_key = api_key @@ -51,7 +63,10 @@ def __init__( def to_dict(self) -> Dict[str, Any]: """ - Serialize this component to a dictionary. + Serializes the component to a dictionary. + + :returns: + Dictionary with serialized data. """ return default_to_dict( self, @@ -64,17 +79,27 @@ def to_dict(self) -> Dict[str, Any]: @classmethod def from_dict(cls, data: Dict[str, Any]) -> "SearchApiWebSearch": """ - Deserialize this component from a dictionary. + Deserializes the component from a dictionary. + + :param data: + The dictionary to deserialize from. + :returns: + The deserialized component. """ deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"]) return default_from_dict(cls, data) - @component.output_types(documents=List[Document], links=List[str]) - def run(self, query: str): + @component.output_types(documents=List[Document], links=Union[List[Document], List[str]]) + def run(self, query: str) -> Dict[str, Union[List[Document], List[str]]]: """ - Search the SearchApi API for the given query and return the results as a list of Documents and a list of links. - - :param query: Query string. + Uses [SearchApi](https://www.searchapi.io/) to search the web. + + :param query: Search query. + :returns: A dictionary with the following keys: + - "documents": List of documents returned by the search engine. + - "links": List of links returned by the search engine. + :raises TimeoutError: If the request to the SearchApi API times out. + :raises SearchApiError: If an error occurs while querying the SearchApi API. """ query_prepend = "OR ".join(f"site:{domain} " for domain in self.allowed_domains) if self.allowed_domains else "" @@ -84,8 +109,8 @@ def run(self, query: str): try: response = requests.get(SEARCHAPI_BASE_URL, headers=headers, params=payload, timeout=90) response.raise_for_status() # Will raise an HTTPError for bad responses - except requests.Timeout: - raise TimeoutError(f"Request to {self.__class__.__name__} timed out.") + except requests.Timeout as error: + raise TimeoutError(f"Request to {self.__class__.__name__} timed out.") from error except requests.RequestException as e: raise SearchApiError(f"An error occurred while querying {self.__class__.__name__}. Error: {e}") from e diff --git a/haystack/components/websearch/serper_dev.py b/haystack/components/websearch/serper_dev.py index a358591b14..87ee740655 100644 --- a/haystack/components/websearch/serper_dev.py +++ b/haystack/components/websearch/serper_dev.py @@ -1,6 +1,6 @@ import json import logging -from typing import Dict, List, Optional, Any +from typing import Dict, List, Optional, Any, Union import requests @@ -20,9 +20,21 @@ class SerperDevError(ComponentError): @component class SerperDevWebSearch: """ - Search engine using SerperDev API. Given a query, it returns a list of URLs that are the most relevant. + Uses [Serper](https://serper.dev/) to search the web for relevant documents. See the [Serper Dev website](https://serper.dev/) for more details. + + Usage example: + ```python + from haystack.components.websearch import SerperDevWebSearch + from haystack.utils import Secret + + websearch = SerperDevWebSearch(top_k=10, api_key=Secret.from_token("test-api-key")) + results = websearch.run(query="Who is the boyfriend of Olivia Wilde?") + + assert results["documents"] + assert results["links"] + ``` """ def __init__( @@ -33,12 +45,12 @@ def __init__( search_params: Optional[Dict[str, Any]] = None, ): """ - :param api_key: API key for the SerperDev API. + :param api_key: API key for the Serper API. :param top_k: Number of documents to return. :param allowed_domains: List of domains to limit the search to. - :param search_params: Additional parameters passed to the SerperDev API. - For example, you can set 'num' to 20 to increase the number of search results. - See the [Serper Dev website](https://serper.dev/) for more details. + :param search_params: Additional parameters passed to the Serper API. + For example, you can set 'num' to 20 to increase the number of search results. + See the [Serper website](https://serper.dev/) for more details. """ self.api_key = api_key self.top_k = top_k @@ -50,7 +62,10 @@ def __init__( def to_dict(self) -> Dict[str, Any]: """ - Serialize this component to a dictionary. + Serializes the component to a dictionary. + + :returns: + Dictionary with serialized data. """ return default_to_dict( self, @@ -63,17 +78,25 @@ def to_dict(self) -> Dict[str, Any]: @classmethod def from_dict(cls, data: Dict[str, Any]) -> "SerperDevWebSearch": """ - Deserialize this component from a dictionary. + Serializes the component to a dictionary. + + :returns: + Dictionary with serialized data. """ deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"]) return default_from_dict(cls, data) - @component.output_types(documents=List[Document], links=List[str]) - def run(self, query: str): + @component.output_types(documents=List[Document], links=Union[List[Document], List[str]]) + def run(self, query: str) -> Dict[str, Union[List[Document], List[str]]]: """ - Search the SerperDev API for the given query and return the results as a list of Documents and a list of links. - - :param query: Query string. + Use [Serper](https://serper.dev/) to search the web. + + :param query: Search query. + :returns: A dictionary with the following keys: + - "documents": List of documents returned by the search engine. + - "links": List of links returned by the search engine. + :raises SerperDevError: If an error occurs while querying the SerperDev API. + :raises TimeoutError: If the request to the SerperDev API times out. """ query_prepend = "OR ".join(f"site:{domain} " for domain in self.allowed_domains) if self.allowed_domains else "" @@ -85,8 +108,8 @@ def run(self, query: str): try: response = requests.post(SERPERDEV_BASE_URL, headers=headers, data=payload, timeout=30) # type: ignore response.raise_for_status() # Will raise an HTTPError for bad responses - except requests.Timeout: - raise TimeoutError(f"Request to {self.__class__.__name__} timed out.") + except requests.Timeout as error: + raise TimeoutError(f"Request to {self.__class__.__name__} timed out.") from error except requests.RequestException as e: raise SerperDevError(f"An error occurred while querying {self.__class__.__name__}. Error: {e}") from e diff --git a/pyproject.toml b/pyproject.toml index eb06323354..ddb0fc9ae8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -57,7 +57,7 @@ dependencies = [ "pyyaml", "more-itertools", # TextDocumentSplitter "networkx", # Pipeline graphs - "typing_extensions>=3.7", # typing support for Python 3.8 + "typing_extensions>=4.7", # typing support for Python 3.8 "boilerpy3", # Fulltext extraction from HTML pages ] diff --git a/releasenotes/notes/pin-typing-extensions-c4026f59603445b7.yaml b/releasenotes/notes/pin-typing-extensions-c4026f59603445b7.yaml index 81d43c5067..88e3ddeae2 100644 --- a/releasenotes/notes/pin-typing-extensions-c4026f59603445b7.yaml +++ b/releasenotes/notes/pin-typing-extensions-c4026f59603445b7.yaml @@ -1,5 +1,5 @@ --- fixes: - | - Pin the `typing-extensions` package to versions >= 3.7 to avoid + Pin the `typing-extensions` package to versions >= 4.7 to avoid [incompatibilities with the `openai` package](https://community.openai.com/t/error-while-importing-openai-from-open-import-openai/578166/26). diff --git a/test/components/websearch/test_serperdev.py b/test/components/websearch/test_serperdev.py index e737c267ea..7bcb799e99 100644 --- a/test/components/websearch/test_serperdev.py +++ b/test/components/websearch/test_serperdev.py @@ -174,7 +174,7 @@ def test_web_search(self): ws = SerperDevWebSearch(top_k=10) results = ws.run(query="Who is the boyfriend of Olivia Wilde?") documents = results["documents"] - links = results["documents"] + links = results["links"] assert len(documents) == len(links) == 10 assert all(isinstance(doc, Document) for doc in results) assert all(isinstance(link, str) for link in links)