Skip to content

Commit

Permalink
Issue #12: Invalid proxynova row
Browse files Browse the repository at this point in the history
  • Loading branch information
Nekmo committed Apr 7, 2020
1 parent 2eb9208 commit eaace9a
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 4 deletions.
14 changes: 11 additions & 3 deletions proxy_db/providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,12 +136,20 @@ def soup_items(self, soup):

def soup_item(self, item):
# document.write('12345678190.7'.substr(8) + '7.81.128');
script = item.find('script').string
port = ''.join(item.find_all('td')[1].stripped_strings)
script = item.find('script')
if script is None:
self.logger.warning('Script tag is no available in item {}'.format(item))
return None
script = script.string or ''
td_tags = item.find_all('td')
if len(td_tags) < 2:
self.logger.warning('td tag including port is not available in item {}'.format(item))
return None
port = ''.join(td_tags[1].stripped_strings or '')
subs = script.split("'")
matchs = re.match('.+substr\((\d+)\).+', script)
if matchs is None:
self.logger.warning('Invalid item: {}'.format(item))
self.logger.warning('Invalid script value for item {}'.format(item))
return None
substr = int(matchs.group(1))
start = subs[1][substr:]
Expand Down
39 changes: 38 additions & 1 deletion tests/test_providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
URL = 'https://domain.com/'
PROVIDER_HTML = """
Proxy: 12.131.91.51:8888
<b>Other: 8.10.81.82:7171.</b>
<b>Other: 8.10.81.82:7171.</b>
"""
PROXY_NOVA_HTML = """
<tr data-proxy-id="00000000">
Expand All @@ -34,6 +34,30 @@
</td>
</tr>
"""
PROXY_NOVA_INVALID_ROWS_HTML = """
<tr data-proxy-id="00000000">
Script tag is not available
<td align="left" onclick="javascript:check_proxy(this)"></td>
<td align="left">
<a href="/proxy-server-list/port-7070/" title="Port 7070 proxies">7070</a>
</td>
</tr>
<tr data-proxy-id="00000000">
Second td is not available
<td align="left" onclick="javascript:check_proxy(this)">
<abbr title=""><script>document.write('12345678190.9'.substr(8) + '5.300.123');<</script> </abbr>
</td>
</tr>
<tr data-proxy-id="00000000">
Invalid script value
<td align="left" onclick="javascript:check_proxy(this)">
<abbr title=""><script>'foo' 'bar' 'spam'</script> </abbr>
</td>
<td align="left">
<a href="/proxy-server-list/port-7070/" title="Port 7070 proxies">7070</a>
</td>
</tr>
"""


class TestProviderRequestBase(unittest.TestCase):
Expand Down Expand Up @@ -126,6 +150,19 @@ def test_find_page_proxies(self):
{'proxy': '190.900.48.190:7070'},
])

@patch("proxy_db.providers.getLogger")
def test_invalid_rows(self, m):
provider = ProxyNovaCom()
request = Mock()
request.text = PROXY_NOVA_INVALID_ROWS_HTML
self.assertEqual(provider.find_page_proxies(request), [])
self.assertEqual(
m.return_value.warning.call_count, 3,
"Expected 'warning' to have been called 3 times. Called {}".format(
m.return_value.warning.call_count
)
)


class TestNoProviderInfiniteLoop(unittest.TestCase):
"""Test to make sure that it doesn't fall into an infinite loop when
Expand Down

0 comments on commit eaace9a

Please sign in to comment.