diff --git a/search_engines/config.py b/search_engines/config.py index 97f69d1..19a99a4 100644 --- a/search_engines/config.py +++ b/search_engines/config.py @@ -18,6 +18,7 @@ FAKE_USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; rv:84.0) Gecko/20100101 Firefox/84.0' ## Proxy server +# Example: PROXY = "1.1.1.3:8080,http://1.2.3.4:8080" PROXY = None ## TOR proxy server diff --git a/search_engines/engine.py b/search_engines/engine.py index 769306f..f54c40f 100644 --- a/search_engines/engine.py +++ b/search_engines/engine.py @@ -165,7 +165,9 @@ def search(self, query, pages=cfg.SEARCH_ENGINE_RESULTS_PAGES): try: response = self._get_page(request['url'], request['data']) if not self._is_ok(response): - break + msg = f"google status not ok: {response.http}" + raise Exception(msg) + tags = BeautifulSoup(response.html, "html.parser") items = self._filter_results(tags) self._collect_results(items) diff --git a/search_engines/http_client.py b/search_engines/http_client.py index 61244ba..effe141 100644 --- a/search_engines/http_client.py +++ b/search_engines/http_client.py @@ -1,4 +1,5 @@ import requests +import random from collections import namedtuple from .config import TIMEOUT, PROXY, USER_AGENT @@ -45,6 +46,9 @@ def _quote(self, url): def _set_proxy(self, proxy): '''Returns HTTP or SOCKS proxies dictionary.''' if proxy: + if "," in proxy: + proxys = [x if utl.is_url(x) else f"http://{x}" for x in proxy.split(",") if x] + proxy = random.choice(proxys) if not utl.is_url(proxy): raise ValueError('Invalid proxy format!') proxy = {'http':proxy, 'https':proxy}