From eaace9ae9bf2a5e2ad8d319766aab7d7b5591be2 Mon Sep 17 00:00:00 2001 From: Nekmo Date: Wed, 8 Apr 2020 01:31:39 +0200 Subject: [PATCH] Issue #12: Invalid proxynova row --- proxy_db/providers.py | 14 +++++++++++--- tests/test_providers.py | 39 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 49 insertions(+), 4 deletions(-) diff --git a/proxy_db/providers.py b/proxy_db/providers.py index 121fbe3..94f6c62 100644 --- a/proxy_db/providers.py +++ b/proxy_db/providers.py @@ -136,12 +136,20 @@ def soup_items(self, soup): def soup_item(self, item): # document.write('12345678190.7'.substr(8) + '7.81.128'); - script = item.find('script').string - port = ''.join(item.find_all('td')[1].stripped_strings) + script = item.find('script') + if script is None: + self.logger.warning('Script tag is no available in item {}'.format(item)) + return None + script = script.string or '' + td_tags = item.find_all('td') + if len(td_tags) < 2: + self.logger.warning('td tag including port is not available in item {}'.format(item)) + return None + port = ''.join(td_tags[1].stripped_strings or '') subs = script.split("'") matchs = re.match('.+substr\((\d+)\).+', script) if matchs is None: - self.logger.warning('Invalid item: {}'.format(item)) + self.logger.warning('Invalid script value for item {}'.format(item)) return None substr = int(matchs.group(1)) start = subs[1][substr:] diff --git a/tests/test_providers.py b/tests/test_providers.py index 0e65d87..70d089d 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -14,7 +14,7 @@ URL = 'https://domain.com/' PROVIDER_HTML = """ Proxy: 12.131.91.51:8888 -Other: 8.10.81.82:7171. +Other: 8.10.81.82:7171. """ PROXY_NOVA_HTML = """ @@ -34,6 +34,30 @@ """ +PROXY_NOVA_INVALID_ROWS_HTML = """ + + Script tag is not available + + + 7070 + + + + Second td is not available + + + + + + Invalid script value + + + + + 7070 + + +""" class TestProviderRequestBase(unittest.TestCase): @@ -126,6 +150,19 @@ def test_find_page_proxies(self): {'proxy': '190.900.48.190:7070'}, ]) + @patch("proxy_db.providers.getLogger") + def test_invalid_rows(self, m): + provider = ProxyNovaCom() + request = Mock() + request.text = PROXY_NOVA_INVALID_ROWS_HTML + self.assertEqual(provider.find_page_proxies(request), []) + self.assertEqual( + m.return_value.warning.call_count, 3, + "Expected 'warning' to have been called 3 times. Called {}".format( + m.return_value.warning.call_count + ) + ) + class TestNoProviderInfiniteLoop(unittest.TestCase): """Test to make sure that it doesn't fall into an infinite loop when