Skip to content

Commit

Permalink
rename safe_url=>safe_url_key
Browse files Browse the repository at this point in the history
  • Loading branch information
superstes committed Sep 29, 2024
1 parent fa03e3a commit 955357e
Showing 1 changed file with 5 additions and 5 deletions.
10 changes: 5 additions & 5 deletions web_crawler/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
BASE_DIR = Path(__file__).parent.resolve()


def safe_url(url: str) -> str:
def safe_url_key(url: str) -> str:
url = url.replace('http://', '').replace('https://', '')
if url.endswith('/'):
url = url[:-1]
Expand Down Expand Up @@ -97,7 +97,7 @@ def run(self):
def _save_results(self):
print('SAVING INFORMATION')

with open(f'{BASE_DIR}/out/results_{safe_url(TARGET)}.json', 'w', encoding='utf-8') as f:
with open(f'{BASE_DIR}/out/results_{safe_url_key(TARGET)}.json', 'w', encoding='utf-8') as f:
f.write(json_dumps(self.results, indent=4))

@staticmethod
Expand All @@ -108,7 +108,7 @@ def _init_browser() :
sleep(1)

def download_website(self, url: str):
surl = safe_url(url)
surl = safe_url_key(url)
if surl in self.results or url.find('<html') != -1:
return

Expand Down Expand Up @@ -155,7 +155,7 @@ def download_website(self, url: str):
def analyze_website(self, url: str, depth: int = 0):
# pylint: disable=R0912,R0915
domain = url_domain(url)
surl = safe_url(url)
surl = safe_url_key(url)

if surl in self.results or url.find('<html') != -1:
return
Expand Down Expand Up @@ -272,7 +272,7 @@ def analyze_website(self, url: str, depth: int = 0):
elif len(args.skip.strip()) > 0:
LINK_PART_NO_FOLLOW.append(args.skip)

CACHE_DIR = f'{BASE_DIR}/cache/{safe_url(TARGET)}'
CACHE_DIR = f'{BASE_DIR}/cache/{safe_url_key(TARGET)}'
MAX_RECURSION = args.recursion_depth
PAGE_LOAD_WAIT = args.load_time

Expand Down

0 comments on commit 955357e

Please sign in to comment.