Skip to content

Commit

Permalink
TODOs and structural changes
Browse files Browse the repository at this point in the history
  • Loading branch information
MatthewZMSU committed May 28, 2024
1 parent 4c5e5b0 commit e9f5f73
Showing 1 changed file with 11 additions and 7 deletions.
18 changes: 11 additions & 7 deletions scrapypuppeteer/middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,7 @@ def __is_closing(self, response,
return close_page


class PuppeteerContextRecoveryDownloaderMiddleware: # TODO: change name?
class PuppeteerContextRestoreDownloaderMiddleware:
"""
This middleware allows you to recover puppeteer context.
Expand Down Expand Up @@ -432,15 +432,19 @@ def process_request(request, spider):
request.meta['__request_binding'] = True
return None

def process_response(self, request, response, spider):
def process_response(self, request: Request, response, spider):
puppeteer_request = request.meta.get('puppeteer_request', None)
__request_binding = puppeteer_request.meta.get('__request_binding', False) if puppeteer_request is not None else None
__request_binding = puppeteer_request.meta.get('__request_binding', False) if puppeteer_request is not None else None # TODO: to fix NoneType AttributeError
if isinstance(response, PuppeteerResponse):
if __request_binding:
restoring_request = request.copy()
# TODO: here we need to add meta-key `__original_context_id`
# (or smth like this) in order to distinguish when context
print("HERE 5!!!")
request.dont_filter = True
request.meta['__restore_count'] = 0
self.context_requests[response.context_id] = request
restoring_request.dont_filter = True
restoring_request.meta['__restore_count'] = 0
restoring_request.meta['__context_id'] = response.context_id
self.context_requests[response.context_id] = restoring_request
self.context_counters[response.context_id] = 1
return response
else:
Expand All @@ -466,7 +470,7 @@ def process_response(self, request, response, spider):
else:
# We probably know this sequence
print("HERE 3!!!")
context_id = json.loads(response.text).get('contextId')
context_id = json.loads(response.text).get('contextId') # TODO: to check if context_id is not None!
if context_id in self.context_requests: # TODO: context_id is updating after it restarts!!!
# We know this sequence
if self.context_counters[context_id] <= self.n_recovery:
Expand Down

0 comments on commit e9f5f73

Please sign in to comment.