From 655bf68b7a6171d85ad0dda5007537963f7b0203 Mon Sep 17 00:00:00 2001 From: Ayush Jain <125379023+ayushjain-ow@users.noreply.github.com> Date: Mon, 30 Oct 2023 17:20:00 +0530 Subject: [PATCH] fix: Add search_engine_kwargs param to WebRetriever to pass to WebSearch (#5805) * Add search_engine_kwargs param to WebRetriever to pass to WebSearch * add relnote --------- Co-authored-by: Massimiliano Pippi --- haystack/nodes/retriever/web.py | 5 ++++- ...rch_engine_kwargs-to-web-retriever-67fac44ef0039b7f.yaml | 6 ++++++ 2 files changed, 10 insertions(+), 1 deletion(-) create mode 100644 releasenotes/notes/add-search_engine_kwargs-to-web-retriever-67fac44ef0039b7f.yaml diff --git a/haystack/nodes/retriever/web.py b/haystack/nodes/retriever/web.py index a30dee8135..23f29bc8d7 100644 --- a/haystack/nodes/retriever/web.py +++ b/haystack/nodes/retriever/web.py @@ -3,7 +3,7 @@ from dataclasses import dataclass from datetime import datetime, timedelta from multiprocessing import cpu_count -from typing import Dict, Iterator, List, Optional, Literal, Union, Tuple +from typing import Dict, Iterator, List, Optional, Literal, Union, Tuple, Any from haystack.schema import Document from haystack.document_stores.base import BaseDocumentStore @@ -50,6 +50,7 @@ def __init__( self, api_key: str, search_engine_provider: Union[str, SearchEngine] = "SerperDev", + search_engine_kwargs: Optional[Dict[str, Any]] = None, top_search_results: Optional[int] = 10, top_k: Optional[int] = 5, mode: Literal["snippets", "raw_documents", "preprocessed_documents"] = "snippets", @@ -64,6 +65,7 @@ def __init__( """ :param api_key: API key for the search engine provider. :param search_engine_provider: Name of the search engine provider class. The options are "SerperDev" (default), "SerpAPI", "BingAPI" or "GoogleAPI" + :param search_engine_kwargs: Additional parameters to pass to the search engine provider. :param top_search_results: Number of top search results to be retrieved. :param top_k: Top k documents to be returned by the retriever. :param mode: Whether to return snippets, raw documents, or preprocessed documents. Snippets are the default. @@ -83,6 +85,7 @@ def __init__( top_k=top_search_results, allowed_domains=allowed_domains, search_engine_provider=search_engine_provider, + search_engine_kwargs=search_engine_kwargs, ) self.link_content_fetcher = link_content_fetcher or LinkContentFetcher() self.mode = mode diff --git a/releasenotes/notes/add-search_engine_kwargs-to-web-retriever-67fac44ef0039b7f.yaml b/releasenotes/notes/add-search_engine_kwargs-to-web-retriever-67fac44ef0039b7f.yaml new file mode 100644 index 0000000000..c3dcf8c96a --- /dev/null +++ b/releasenotes/notes/add-search_engine_kwargs-to-web-retriever-67fac44ef0039b7f.yaml @@ -0,0 +1,6 @@ +--- +enhancements: + - | + Add `search_engine_kwargs` param to WebRetriever so it can be propagated + to WebSearch. This is useful, for example, to pass the engine id when + using Google Custom Search.