From e60bb19dc7de7d97d5c8e47e0b8ee92dac57206e Mon Sep 17 00:00:00 2001 From: Md Hussain Nagaria <34810212+NagariaHussain@users.noreply.github.com> Date: Thu, 12 Dec 2024 16:53:33 +0530 Subject: [PATCH] fix: ignore hash based links (#304) --- .../report/wiki_broken_links/test_broken_link_checker.py | 2 ++ wiki/wiki/report/wiki_broken_links/wiki_broken_links.py | 8 ++++++++ 2 files changed, 10 insertions(+) diff --git a/wiki/wiki/report/wiki_broken_links/test_broken_link_checker.py b/wiki/wiki/report/wiki_broken_links/test_broken_link_checker.py index a011e061..9bd3bced 100644 --- a/wiki/wiki/report/wiki_broken_links/test_broken_link_checker.py +++ b/wiki/wiki/report/wiki_broken_links/test_broken_link_checker.py @@ -31,6 +31,8 @@ def internal_to_external_urls(internal_url: str) -> str: And [this is a correct relative link]({WORKING_INTERNAL_URL}). And [this is an incorrect relative link]({BROKEN_INTERNAL_URL}). +This [hash link](#hash-link) should be ignored. + ![Broken Image]({BROKEN_IMG_URL}) """ diff --git a/wiki/wiki/report/wiki_broken_links/wiki_broken_links.py b/wiki/wiki/report/wiki_broken_links/wiki_broken_links.py index 2c4daca6..f8dc1833 100644 --- a/wiki/wiki/report/wiki_broken_links/wiki_broken_links.py +++ b/wiki/wiki/report/wiki_broken_links/wiki_broken_links.py @@ -84,6 +84,10 @@ def get_broken_links( broken_links = [] for el in links: url = el.attrs.get("href") or el.attrs.get("src") + + if is_hash_link(url): + continue + is_relative = is_relative_url(url) relative_url = None @@ -108,6 +112,10 @@ def is_relative_url(url: str) -> bool: return url.startswith("/") +def is_hash_link(url: str) -> bool: + return url.startswith("#") + + def is_broken_link(url: str) -> bool: try: status_code = get_request_status_code(url)