From 955357e7052dc3be85d4fd80bdd51e42b9ab9a02 Mon Sep 17 00:00:00 2001 From: Pascal Rath Date: Sun, 29 Sep 2024 19:36:55 +0200 Subject: [PATCH] rename safe_url=>safe_url_key --- web_crawler/main.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/web_crawler/main.py b/web_crawler/main.py index b658a80..6f5059a 100644 --- a/web_crawler/main.py +++ b/web_crawler/main.py @@ -39,7 +39,7 @@ BASE_DIR = Path(__file__).parent.resolve() -def safe_url(url: str) -> str: +def safe_url_key(url: str) -> str: url = url.replace('http://', '').replace('https://', '') if url.endswith('/'): url = url[:-1] @@ -97,7 +97,7 @@ def run(self): def _save_results(self): print('SAVING INFORMATION') - with open(f'{BASE_DIR}/out/results_{safe_url(TARGET)}.json', 'w', encoding='utf-8') as f: + with open(f'{BASE_DIR}/out/results_{safe_url_key(TARGET)}.json', 'w', encoding='utf-8') as f: f.write(json_dumps(self.results, indent=4)) @staticmethod @@ -108,7 +108,7 @@ def _init_browser() : sleep(1) def download_website(self, url: str): - surl = safe_url(url) + surl = safe_url_key(url) if surl in self.results or url.find(' 0: LINK_PART_NO_FOLLOW.append(args.skip) - CACHE_DIR = f'{BASE_DIR}/cache/{safe_url(TARGET)}' + CACHE_DIR = f'{BASE_DIR}/cache/{safe_url_key(TARGET)}' MAX_RECURSION = args.recursion_depth PAGE_LOAD_WAIT = args.load_time