Skip to content

Commit

Permalink
fix: Add search_engine_kwargs param to WebRetriever to pass to WebSea…
Browse files Browse the repository at this point in the history
…rch (#5805)

* Add search_engine_kwargs param to WebRetriever to pass to WebSearch

* add relnote

---------

Co-authored-by: Massimiliano Pippi <[email protected]>
  • Loading branch information
ayushjain-ow and masci authored Oct 30, 2023
1 parent 1a64fc5 commit 655bf68
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 1 deletion.
5 changes: 4 additions & 1 deletion haystack/nodes/retriever/web.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from dataclasses import dataclass
from datetime import datetime, timedelta
from multiprocessing import cpu_count
from typing import Dict, Iterator, List, Optional, Literal, Union, Tuple
from typing import Dict, Iterator, List, Optional, Literal, Union, Tuple, Any

from haystack.schema import Document
from haystack.document_stores.base import BaseDocumentStore
Expand Down Expand Up @@ -50,6 +50,7 @@ def __init__(
self,
api_key: str,
search_engine_provider: Union[str, SearchEngine] = "SerperDev",
search_engine_kwargs: Optional[Dict[str, Any]] = None,
top_search_results: Optional[int] = 10,
top_k: Optional[int] = 5,
mode: Literal["snippets", "raw_documents", "preprocessed_documents"] = "snippets",
Expand All @@ -64,6 +65,7 @@ def __init__(
"""
:param api_key: API key for the search engine provider.
:param search_engine_provider: Name of the search engine provider class. The options are "SerperDev" (default), "SerpAPI", "BingAPI" or "GoogleAPI"
:param search_engine_kwargs: Additional parameters to pass to the search engine provider.
:param top_search_results: Number of top search results to be retrieved.
:param top_k: Top k documents to be returned by the retriever.
:param mode: Whether to return snippets, raw documents, or preprocessed documents. Snippets are the default.
Expand All @@ -83,6 +85,7 @@ def __init__(
top_k=top_search_results,
allowed_domains=allowed_domains,
search_engine_provider=search_engine_provider,
search_engine_kwargs=search_engine_kwargs,
)
self.link_content_fetcher = link_content_fetcher or LinkContentFetcher()
self.mode = mode
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
enhancements:
- |
Add `search_engine_kwargs` param to WebRetriever so it can be propagated
to WebSearch. This is useful, for example, to pass the engine id when
using Google Custom Search.

0 comments on commit 655bf68

Please sign in to comment.