From 0602c7334794a91ad68b8baa0b4d126ecbca3917 Mon Sep 17 00:00:00 2001 From: EverVino Date: Fri, 17 May 2024 15:13:08 -0400 Subject: [PATCH 1/2] feat: add exponential backoff --- src/pymedx/api.py | 56 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 42 insertions(+), 14 deletions(-) diff --git a/src/pymedx/api.py b/src/pymedx/api.py index ca68aa27..ed149e14 100644 --- a/src/pymedx/api.py +++ b/src/pymedx/api.py @@ -1,6 +1,7 @@ """API module for PubMed.""" import datetime import itertools +import time from typing import Any, Dict, Iterable, List, Union, cast @@ -50,6 +51,7 @@ def __init__( # Keep track of the rate limit self._rateLimit: int = 3 + self._maxRetries: int = 10 self._requestsMade: List[datetime.datetime] = [] self.parameters: Dict[str, Union[str, int, List[str]]] # Define the standard / default query parameters @@ -154,6 +156,21 @@ def _exceededRateLimit(self) -> bool: # than the rate limit return len(self._requestsMade) > self._rateLimit + def _wait_to_retry(self, attempt: int) -> None: + """ + Calculate and wait the appropriate amount of time before a retry. + + Parameters. + ---------- + attempt: int + The current attempt number. + """ + backoff_time = min( + 2**attempt, 32 + ) # Exponential backoff, capped at 32 seconds + + time.sleep(backoff_time) + def _get( self, url: str, @@ -180,27 +197,38 @@ def _get( be parsed before returning, otherwise a string is returend """ - # Make sure the rate limit is not exceeded + attempt = 0 + while self._exceededRateLimit(): pass - # Set the response mode + while attempt < self._maxRetries: + try: + # Set the response mode + parameters["retmode"] = output - parameters["retmode"] = output + # Make the request to PubMed + response = requests.get(f"{BASE_URL}{url}", params=parameters) + # Check for any errors + response.raise_for_status() - # Make the request to PubMed - response = requests.get(f"{BASE_URL}{url}", params=parameters) - # Check for any errors - response.raise_for_status() + # Add this request to the list of requests made + self._requestsMade.append(datetime.datetime.now()) - # Add this request to the list of requests made - self._requestsMade.append(datetime.datetime.now()) + # Return the response + if output == "json": + return response.json() + else: + return response.text - # Return the response - if output == "json": - return response.json() - else: - return response.text + except Exception: + self._wait_to_retry(attempt) + attempt += 1 + + raise Exception( + f"Failed to retrieve data from {BASE_URL}{url} " + f"after {self._maxRetries} attempts" + ) def _getArticles( self, article_ids: List[str] From 8db99db04ac3d3d1127381822037131152ee59e3 Mon Sep 17 00:00:00 2001 From: EverVino Date: Fri, 17 May 2024 16:00:28 -0400 Subject: [PATCH 2/2] add jitter --- src/pymedx/api.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/pymedx/api.py b/src/pymedx/api.py index ed149e14..a105667f 100644 --- a/src/pymedx/api.py +++ b/src/pymedx/api.py @@ -1,6 +1,7 @@ """API module for PubMed.""" import datetime import itertools +import random import time from typing import Any, Dict, Iterable, List, Union, cast @@ -169,6 +170,8 @@ def _wait_to_retry(self, attempt: int) -> None: 2**attempt, 32 ) # Exponential backoff, capped at 32 seconds + backoff_time += random.uniform(0, 1) # Add jitter + time.sleep(backoff_time) def _get(