From 8d0590fc9a2a060ab961dcc1459b1ae1c7ea6543 Mon Sep 17 00:00:00 2001 From: Nitish shah Date: Fri, 7 Jun 2024 08:16:56 +0530 Subject: [PATCH 1/2] Update client.py --- paramspider/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paramspider/client.py b/paramspider/client.py index 09db654..c617caf 100644 --- a/paramspider/client.py +++ b/paramspider/client.py @@ -64,4 +64,4 @@ def fetch_url_content(url,proxy): sys.exit() logging.error(f"Failed to fetch URL {url} after {MAX_RETRIES} retries.") - sys.exit() + return None From f627545051e09293ea2d63004a66065241b71f68 Mon Sep 17 00:00:00 2001 From: Nitish shah Date: Fri, 7 Jun 2024 08:17:38 +0530 Subject: [PATCH 2/2] Update main.py --- paramspider/main.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/paramspider/main.py b/paramspider/main.py index 38ff9f5..6e434fd 100644 --- a/paramspider/main.py +++ b/paramspider/main.py @@ -93,6 +93,8 @@ def fetch_and_clean_urls(domain, extensions, stream_output,proxy, placeholder): logging.info(f"{Fore.YELLOW}[INFO]{Style.RESET_ALL} Fetching URLs for {Fore.CYAN + domain + Style.RESET_ALL}") wayback_uri = f"https://web.archive.org/cdx/search/cdx?url={domain}/*&output=txt&collapse=urlkey&fl=original&page=/" response = client.fetch_url_content(wayback_uri,proxy) + if response == None: + return urls = response.text.split() logging.info(f"{Fore.YELLOW}[INFO]{Style.RESET_ALL} Found {Fore.GREEN + str(len(urls)) + Style.RESET_ALL} URLs for {Fore.CYAN + domain + Style.RESET_ALL}") @@ -165,4 +167,4 @@ def main(): fetch_and_clean_urls(domain, extensions, args.stream,args.proxy, args.placeholder) if __name__ == "__main__": - main() \ No newline at end of file + main()