Skip to content

Commit

Permalink
move http response to own column
Browse files Browse the repository at this point in the history
  • Loading branch information
dale-wahl committed Sep 12, 2023
1 parent 6c64ad1 commit 2b9b74a
Showing 1 changed file with 3 additions and 2 deletions.
5 changes: 3 additions & 2 deletions datasources/web_archive_scraper/search_web_archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ def get_items(self, query):
"subject": None,
"body": None,
"html": None,
"http_html": None,
"detected_404": None,
"timestamp": None,
"error": '',
Expand Down Expand Up @@ -256,9 +257,9 @@ def get_items(self, query):
try:
http_response = self.request_get_w_error_handling(scraped_page.get('final_url'), timeout=120)
self.dataset.log('Collected HTTP response: %s' % scraped_page.get('final_url'))
result['html'] = 'SELENIUM RESPONSE:\n' + str(result['html']) + '\nHTTP RESPONSE:\n' + http_response.text
result['http_html'] = http_response.text
except Exception as e:
result['html'] = 'SELENIUM RESPONSE:\n' + str(result['html']) + '\nHTTP RESPONSE:\nNone'
result['http_html'] = None
http_error = '\nHTTP ERROR:\n' + str(e)
result['error'] = 'SELENIUM ERROR:\n' + str(result['error']) + http_error

Expand Down

0 comments on commit 2b9b74a

Please sign in to comment.