From 27ed1c152bcae34492b118ab228ea75a34516d23 Mon Sep 17 00:00:00 2001 From: Skyler Grey Date: Tue, 17 Sep 2024 12:39:48 +0000 Subject: [PATCH 1/4] fix(aiohttp): Update parameters to unmangle aiohttp initialization Previously we had to pass the same parameters to the BaseAdapter and the aiohttp ClientSession. This made initializing the ClientSession impossible, as it can't accept the apikey and engine_id parameters which we are required to pass in for the BaseAdapter. --- google_custom_search/adapter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/google_custom_search/adapter.py b/google_custom_search/adapter.py index 9aa3c30..16e7a8e 100644 --- a/google_custom_search/adapter.py +++ b/google_custom_search/adapter.py @@ -89,8 +89,8 @@ def search(self, *args, **kwargs) -> List[Item]: class AiohttpAdapter(BaseAdapter): "This class is aiohttpadapter for async mode." - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) + def __init__(self, apikey, engine_id, *args, **kwargs): + super().__init__(apikey, engine_id) if not async_mode: raise AsyncError( "This adapter use aiohttp, so please install aiohttp") From fc0243e5df1f0d47b83041810b9bab1aab4e7dd1 Mon Sep 17 00:00:00 2001 From: Skyler Grey Date: Tue, 17 Sep 2024 12:45:18 +0000 Subject: [PATCH 2/4] feat: Allow arbitrary parameters There are a lot more parameters to limit the search than what could previously be specified. By extending the _payload_maker to take kwargs and merge them with the required parameters, we can allow filtering the result with these different parameters --- google_custom_search/adapter.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/google_custom_search/adapter.py b/google_custom_search/adapter.py index 16e7a8e..53ed100 100644 --- a/google_custom_search/adapter.py +++ b/google_custom_search/adapter.py @@ -52,9 +52,10 @@ def _from_dict(self, data: dict) -> List[Item]: def _payload_maker( self, query: str, *, safe: bool = False, - filter_: bool = False + filter_: bool = False, + **kwargs ) -> dict: - payload = { + payload = kwargs | { "key": self.apikey, "cx": self.engine_id, "q": query From 002dfbc5e9bd26f2b6877505fc586a95cbcf3c30 Mon Sep 17 00:00:00 2001 From: Skyler Grey Date: Tue, 17 Sep 2024 12:53:20 +0000 Subject: [PATCH 3/4] fix(aiohttp): Prevent double access of .json Previously both the `request` and `search` methods called `await r.json()`, leading to the `search` method calling `.json()` on a dictionary. This caused the following error: `'dict' object has no attribute 'json'` --- google_custom_search/adapter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google_custom_search/adapter.py b/google_custom_search/adapter.py index 53ed100..1293d36 100644 --- a/google_custom_search/adapter.py +++ b/google_custom_search/adapter.py @@ -107,4 +107,4 @@ async def search(self, *args, **kwargs) -> List[Item]: r = await self.request( "GET", "/", params=self._payload_maker(*args, **kwargs) ) - return self._from_dict(await r.json()) + return self._from_dict(r) From 78dfc88b39fbc1b9da34a5feae288a3fbfbc9833 Mon Sep 17 00:00:00 2001 From: Skyler Grey Date: Tue, 17 Sep 2024 14:23:13 +0000 Subject: [PATCH 4/4] feat(aiohttp): Add async search generator The async generator (`asearch`) allows iterating through all 100 results which are available from the API. It does this by requesting new pages of results when the results are exhausted. --- google_custom_search/adapter.py | 27 +++++++++++++ google_custom_search/search.py | 72 +++++++++++++++++---------------- 2 files changed, 65 insertions(+), 34 deletions(-) diff --git a/google_custom_search/adapter.py b/google_custom_search/adapter.py index 1293d36..3c34c71 100644 --- a/google_custom_search/adapter.py +++ b/google_custom_search/adapter.py @@ -15,6 +15,8 @@ from .errors import AsyncError, ApiNotEnabled from .types import Item +from typing import AsyncGenerator + class BaseAdapter(metaclass=ABCMeta): """This is the base class for adapters. @@ -42,6 +44,9 @@ def request(self, method: str, path: str, *args, **kwargs) -> Any: def search(self, *args, **kwargs) -> List[Item]: ... + async def asearch(self, *_args, **_kwargs) -> AsyncGenerator[Item, None]: + raise NotImplementedError("You can only use 'asearch' on an asynchronous adapter") + def _from_dict(self, data: dict) -> List[Item]: if data.get('error'): raise ApiNotEnabled( @@ -108,3 +113,25 @@ async def search(self, *args, **kwargs) -> List[Item]: "GET", "/", params=self._payload_maker(*args, **kwargs) ) return self._from_dict(r) + + async def asearch(self, *args, **kwargs) -> AsyncGenerator[Item, None]: + limit = kwargs.get("limit", 100) + + if "limit" in kwargs: + del kwargs["limit"] + + while True: + page = await self.search(*args, **kwargs) + + for result in page: + yield result + + kwargs["start"] = kwargs.get("start", 1) + kwargs.get("num", 10) + + if kwargs["start"] + kwargs.get("num", 10) > limit: + kwargs["num"] = limit - kwargs["start"] + 1 # both ends of the range are inclusive + + if kwargs.get("num", 10) <= 0: + return + + diff --git a/google_custom_search/search.py b/google_custom_search/search.py index c699129..6545d29 100644 --- a/google_custom_search/search.py +++ b/google_custom_search/search.py @@ -1,34 +1,38 @@ -# google-custom-seaerch - search - -from typing import List - -from .types import Item -from .adapter import BaseAdapter - - -class CustomSearch: - """This is the class used when using Google Custom Search. - - Args: - adapter (BaseAdapter): Insert adapter - """ - APIURL: str = "https://www.googleapis.com/customsearch/v1" - - def __init__(self, adapter: BaseAdapter): - self.adapter = adapter - - def search(self, *args, **kwargs) -> List[Item]: - """This is searched using api. - - Args: - query (str): Search keyword - safe (bool): Using safe mode - filter_ (filter): Use filter mode - - Returns: - List[Item]: return result - - Raises: - ApiNotEnabled: api is not invalid - """ - return self.adapter.search(*args, **kwargs) +# google-custom-seaerch - search + +from typing import List, AsyncGenerator + +from .types import Item +from .adapter import BaseAdapter + + +class CustomSearch: + """This is the class used when using Google Custom Search. + + Args: + adapter (BaseAdapter): Insert adapter + """ + APIURL: str = "https://www.googleapis.com/customsearch/v1" + + def __init__(self, adapter: BaseAdapter): + self.adapter = adapter + + def search(self, *args, **kwargs) -> List[Item]: + """This is searched using api. + + Args: + query (str): Search keyword + safe (bool): Using safe mode + filter_ (filter): Use filter mode + + Returns: + List[Item]: return result + + Raises: + ApiNotEnabled: api is not invalid + """ + return self.adapter.search(*args, **kwargs) + + async def asearch(self, *args, **kwargs) -> AsyncGenerator[Item, None]: + async for item in self.adapter.asearch(*args, **kwargs): + yield item