Skip to content
This repository has been archived by the owner on Sep 5, 2024. It is now read-only.

Commit

Permalink
formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
michaelharms committed Jan 18, 2020
1 parent 1036e1b commit 55dee60
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 74 deletions.
38 changes: 4 additions & 34 deletions tests/comcrawl/utils/snapshots/snap_test_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -721,12 +721,8 @@
</body>
</html>'''

snapshots['test_download_multiple_results_single_threaded 1'] = [
{
'charset': 'UTF-8',
'digest': '745JGUNVPWB4L3TWJIGUQRQFTFSREJ5J',
'filename': 'crawl-data/CC-MAIN-2019-51/segments/1575540500637.40/warc/CC-MAIN-20191207160050-20191207184050-00394.warc.gz',
'html': '''<!DOCTYPE html>
snapshots['test_download_multiple_results_single_threaded 1'] = [{
'charset': 'UTF-8', 'digest': '745JGUNVPWB4L3TWJIGUQRQFTFSREJ5J', 'filename': 'crawl-data/CC-MAIN-2019-51/segments/1575540500637.40/warc/CC-MAIN-20191207160050-20191207184050-00394.warc.gz', 'html': '''<!DOCTYPE html>
<html>
<head>
<link rel="stylesheet" href="/static/__shared/shared.css"/>
Expand Down Expand Up @@ -1438,22 +1434,7 @@
</p>
</body>
</html>''',
'languages': 'eng',
'length': '3404',
'mime': 'text/html',
'mime-detected': 'text/html',
'offset': '68774745',
'status': '200',
'timestamp': '20191207172145',
'url': 'http://index.commoncrawl.org/',
'urlkey': 'org,commoncrawl,index)/'
},
{
'charset': 'UTF-8',
'digest': 'SVH4V5QDUS7SMXSXZYB2XWJSVDWFXUD7',
'filename': 'crawl-data/CC-MAIN-2019-47/segments/1573496667767.6/warc/CC-MAIN-20191114002636-20191114030636-00394.warc.gz',
'html': '''<!DOCTYPE html>
</html>''', 'languages': 'eng', 'length': '3404', 'mime': 'text/html', 'mime-detected': 'text/html', 'offset': '68774745', 'status': '200', 'timestamp': '20191207172145', 'url': 'http://index.commoncrawl.org/', 'urlkey': 'org,commoncrawl,index)/'}, {'charset': 'UTF-8', 'digest': 'SVH4V5QDUS7SMXSXZYB2XWJSVDWFXUD7', 'filename': 'crawl-data/CC-MAIN-2019-47/segments/1573496667767.6/warc/CC-MAIN-20191114002636-20191114030636-00394.warc.gz', 'html': '''<!DOCTYPE html>
<html>
<head>
<link rel="stylesheet" href="/static/__shared/shared.css"/>
Expand Down Expand Up @@ -2155,18 +2136,7 @@
</p>
</body>
</html>''',
'languages': 'eng',
'length': '3391',
'mime': 'text/html',
'mime-detected': 'text/html',
'offset': '82652447',
'status': '200',
'timestamp': '20191114010130',
'url': 'http://index.commoncrawl.org/',
'urlkey': 'org,commoncrawl,index)/'
}
]
</html>''', 'languages': 'eng', 'length': '3391', 'mime': 'text/html', 'mime-detected': 'text/html', 'offset': '82652447', 'status': '200', 'timestamp': '20191114010130', 'url': 'http://index.commoncrawl.org/', 'urlkey': 'org,commoncrawl,index)/'}]

snapshots['test_download_multiple_results_multi_threaded 1'] = [
{
Expand Down
72 changes: 32 additions & 40 deletions tests/comcrawl/utils/snapshots/snap_test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,46 +7,38 @@

snapshots = Snapshot()

snapshots['test_search_single_index 1'] = [
{
'charset': 'UTF-8',
'digest': '745JGUNVPWB4L3TWJIGUQRQFTFSREJ5J',
'filename': 'crawl-data/CC-MAIN-2019-51/segments/1575540500637.40/warc/CC-MAIN-20191207160050-20191207184050-00394.warc.gz',
'languages': 'eng',
'length': '3404',
'mime': 'text/html',
'mime-detected': 'text/html',
'offset': '68774745',
'status': '200',
'timestamp': '20191207172145',
'url': 'http://index.commoncrawl.org/',
'urlkey': 'org,commoncrawl,index)/'
},
{
'digest': 'N5ZYIRKMK64RUBECUDYLXWKOWFUUA5W3',
'filename': 'crawl-data/CC-MAIN-2019-51/segments/1575540500637.40/robotstxt/CC-MAIN-20191207160050-20191207184050-00388.warc.gz',
'length': '533',
'mime': 'text/plain',
'mime-detected': 'text/plain',
'offset': '3187942',
'status': '200',
'timestamp': '20191207172136',
'url': 'https://index.commoncrawl.org/robots.txt',
'urlkey': 'org,commoncrawl,index)/robots.txt'
},
{
'digest': 'N5ZYIRKMK64RUBECUDYLXWKOWFUUA5W3',
'filename': 'crawl-data/CC-MAIN-2019-51/segments/1575540500637.40/robotstxt/CC-MAIN-20191207160050-20191207184050-00261.warc.gz',
'length': '532',
'mime': 'text/plain',
'mime-detected': 'text/plain',
'offset': '461099',
'status': '200',
'timestamp': '20191207172144',
'url': 'http://index.commoncrawl.org/robots.txt',
'urlkey': 'org,commoncrawl,index)/robots.txt'
}
]
snapshots['test_search_single_index 1'] = [{'charset': 'UTF-8',
'digest': '745JGUNVPWB4L3TWJIGUQRQFTFSREJ5J',
'filename': 'crawl-data/CC-MAIN-2019-51/segments/1575540500637.40/warc/CC-MAIN-20191207160050-20191207184050-00394.warc.gz',
'languages': 'eng',
'length': '3404',
'mime': 'text/html',
'mime-detected': 'text/html',
'offset': '68774745',
'status': '200',
'timestamp': '20191207172145',
'url': 'http://index.commoncrawl.org/',
'urlkey': 'org,commoncrawl,index)/'},
{'digest': 'N5ZYIRKMK64RUBECUDYLXWKOWFUUA5W3',
'filename': 'crawl-data/CC-MAIN-2019-51/segments/1575540500637.40/robotstxt/CC-MAIN-20191207160050-20191207184050-00388.warc.gz',
'length': '533',
'mime': 'text/plain',
'mime-detected': 'text/plain',
'offset': '3187942',
'status': '200',
'timestamp': '20191207172136',
'url': 'https://index.commoncrawl.org/robots.txt',
'urlkey': 'org,commoncrawl,index)/robots.txt'},
{'digest': 'N5ZYIRKMK64RUBECUDYLXWKOWFUUA5W3',
'filename': 'crawl-data/CC-MAIN-2019-51/segments/1575540500637.40/robotstxt/CC-MAIN-20191207160050-20191207184050-00261.warc.gz',
'length': '532',
'mime': 'text/plain',
'mime-detected': 'text/plain',
'offset': '461099',
'status': '200',
'timestamp': '20191207172144',
'url': 'http://index.commoncrawl.org/robots.txt',
'urlkey': 'org,commoncrawl,index)/robots.txt'}]

snapshots['test_search_multiple_indexes_single_threaded 1'] = [
{
Expand Down

0 comments on commit 55dee60

Please sign in to comment.