Skip to content

Commit

Permalink
Naming
Browse files Browse the repository at this point in the history
  • Loading branch information
MatthewZMSU committed May 28, 2024
1 parent 19d4c27 commit ef3af62
Showing 1 changed file with 13 additions and 12 deletions.
25 changes: 13 additions & 12 deletions scrapypuppeteer/middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,8 +404,8 @@ class PuppeteerContextRestoreDownloaderMiddleware:
N_RETRY_RESTORING_SETTING = "N_RETRY_RESTORING"
RESTORING_LENGTH_SETTING = "RESTORING_LENGTH"

def __init__(self, n_recovery: int, n_retry_restoring: int):
self.n_recovery = n_recovery
def __init__(self, restoring_length: int, n_retry_restoring: int):
self.restoring_length = restoring_length
self.n_retry_restoring = n_retry_restoring
self.context_requests = {}
self.context_counters = {}
Expand All @@ -414,15 +414,15 @@ def __init__(self, n_recovery: int, n_retry_restoring: int):
def from_crawler(cls, crawler: Crawler):
restoring_length = crawler.settings.get(cls.RESTORING_LENGTH_SETTING, 1)
if not isinstance(restoring_length, int):
raise TypeError(f"`n_recovery` must be an integer, got {type(restoring_length)}")
raise TypeError(f"`{cls.RESTORING_LENGTH_SETTING}` must be an integer, got {type(restoring_length)}")
elif restoring_length < 1:
raise ValueError("`n_recovery` must be greater than or equal to 1")
raise ValueError(f"`{cls.RESTORING_LENGTH_SETTING}` must be greater than or equal to 1")

n_retry_restoring = crawler.settings.get(cls.N_RETRY_RESTORING_SETTING, 1)
if not isinstance(n_retry_restoring, int):
raise TypeError(f"`n_recovery` must be an integer, got {type(n_retry_restoring)}")
raise TypeError(f"`{cls.N_RETRY_RESTORING_SETTING}` must be an integer, got {type(n_retry_restoring)}")
elif n_retry_restoring < 1:
raise ValueError("`n_recovery` must be greater than or equal to 1")
raise ValueError(f"`{cls.N_RETRY_RESTORING_SETTING}` must be greater than or equal to 1")

return cls(restoring_length, n_retry_restoring)

Expand All @@ -444,7 +444,6 @@ def process_request(request, spider):

def process_response(self, request: Request, response, spider):
puppeteer_request: Union[PuppeteerRequest, None] = request.meta.get('puppeteer_request', None)
# __request_binding = puppeteer_request.meta.get('__request_binding', False) if puppeteer_request is not None else None # TODO: to fix NoneType AttributeError
__request_binding = puppeteer_request and puppeteer_request.meta.get('__request_binding', False)
if isinstance(response, PuppeteerResponse):
if __request_binding:
Expand All @@ -453,11 +452,11 @@ def process_response(self, request: Request, response, spider):
print("HERE 7!!!")
restoring_request = request.copy()
old_context_id = restoring_request.meta['__context_id']
del self.context_requests[old_context_id]
del self.context_counters[old_context_id]
restoring_request.meta['__context_id'] = response.context_id
self.context_requests[response.context_id] = restoring_request
self.context_counters[response.context_id] = 1
del self.context_requests[old_context_id]
del self.context_counters[old_context_id]
return response
else:
# Just first request-response in the sequence
Expand Down Expand Up @@ -494,21 +493,23 @@ def process_response(self, request: Request, response, spider):
context_id = json.loads(response.text).get('contextId')
if context_id in self.context_requests:
# We know this sequence
if self.context_counters[context_id] < self.n_recovery:
if self.context_counters[context_id] <= self.restoring_length:
restoring_request = self.context_requests[context_id]
if restoring_request.meta['__restore_count'] < 3:
if restoring_request.meta['__restore_count'] < self.n_retry_restoring:
# Restoring!
print("HERE 4!!!")
restoring_request.meta['__restore_count'] += 1
print(f"Restoring the request {restoring_request}")
self.context_counters[context_id] = 1
return restoring_request
else:
print("HERE 9!!!")
print(f"`{self.N_RETRY_RESTORING_SETTING}` number is exceeded!")
# No more restoring
return response
else:
print("HERE 8!!!")
print("N_RECOVERY number is exceeded!")
print(f"`{self.RESTORING_LENGTH_SETTING}` number is exceeded!")
# We cannot restore the sequence as it is too long
del self.context_counters[context_id]
del self.context_requests[context_id]
Expand Down

0 comments on commit ef3af62

Please sign in to comment.