From ec29aa404cea9f724fd98994d55eff7f916b98cd Mon Sep 17 00:00:00 2001 From: Hussain Nagaria Date: Wed, 11 Dec 2024 18:33:52 +0530 Subject: [PATCH] feat: filter out broken images --- .../test_broken_link_checker.py | 16 ++++++++++++++-- .../wiki_broken_links/wiki_broken_links.js | 6 ++++++ .../wiki_broken_links/wiki_broken_links.py | 17 ++++++++++------- 3 files changed, 30 insertions(+), 9 deletions(-) diff --git a/wiki/wiki/report/wiki_broken_links/test_broken_link_checker.py b/wiki/wiki/report/wiki_broken_links/test_broken_link_checker.py index 516d93de..8b8c83fc 100644 --- a/wiki/wiki/report/wiki_broken_links/test_broken_link_checker.py +++ b/wiki/wiki/report/wiki_broken_links/test_broken_link_checker.py @@ -7,6 +7,7 @@ from wiki.wiki.report.wiki_broken_links.wiki_broken_links import execute, get_broken_links BROKEN_LINK = "https://frappewiki.notavalidtld" +BROKEN_IMG_LINK = "https://img.notavalidtld/failed.jpeg" TEST_MD_WITH_BROKEN_LINK = f""" ## Hello @@ -14,6 +15,8 @@ This is a test for a [broken link]({BROKEN_LINK}). This is a [valid link](https://frappe.io). + +![Broken Image]({BROKEN_IMG_LINK}) """ @@ -33,14 +36,14 @@ def setUp(self): def test_returns_correct_broken_links(self): broken_links = get_broken_links(TEST_MD_WITH_BROKEN_LINK) - self.assertEqual(len(broken_links), 1) + self.assertEqual(len(broken_links), 2) def test_wiki_broken_link_report(self): _, data = execute() self.assertEqual(len(data), 1) self.assertEqual(data[0]["broken_link"], BROKEN_LINK) - def test_wiki_broken_list_report_with_filters(self): + def test_wiki_broken_link_report_with_wiki_space_filter(self): _, data = execute({"wiki_space": self.test_wiki_space.name}) self.assertEqual(len(data), 0) @@ -54,5 +57,14 @@ def test_wiki_broken_list_report_with_filters(self): self.assertEqual(data[0]["wiki_page"], self.test_wiki_page.name) self.assertEqual(data[0]["broken_link"], BROKEN_LINK) + def test_wiki_broken_link_report_with_image_filter(self): + _, data = execute({"check_images": 1}) + self.assertEqual(len(data), 2) + self.assertEqual(data[0]["wiki_page"], self.test_wiki_page.name) + self.assertEqual(data[0]["broken_link"], BROKEN_LINK) + + self.assertEqual(data[1]["wiki_page"], self.test_wiki_page.name) + self.assertEqual(data[1]["broken_link"], BROKEN_IMG_LINK) + def tearDown(self): frappe.db.rollback() diff --git a/wiki/wiki/report/wiki_broken_links/wiki_broken_links.js b/wiki/wiki/report/wiki_broken_links/wiki_broken_links.js index 7702e048..61db8dd1 100644 --- a/wiki/wiki/report/wiki_broken_links/wiki_broken_links.js +++ b/wiki/wiki/report/wiki_broken_links/wiki_broken_links.js @@ -9,5 +9,11 @@ frappe.query_reports["Wiki Broken Links"] = { fieldtype: "Link", options: "Wiki Space", }, + { + fieldname: "check_images", + label: __("Check Images?"), + fieldtype: "Check", + default: 1, + }, ], }; diff --git a/wiki/wiki/report/wiki_broken_links/wiki_broken_links.py b/wiki/wiki/report/wiki_broken_links/wiki_broken_links.py index cbf09e86..3f8f8339 100644 --- a/wiki/wiki/report/wiki_broken_links/wiki_broken_links.py +++ b/wiki/wiki/report/wiki_broken_links/wiki_broken_links.py @@ -50,9 +50,9 @@ def get_data(filters: dict | None = None) -> list[list]: """ data = [] - if not filters: - wiki_pages = frappe.db.get_all("Wiki Page", fields=["name", "content"]) - elif filters.get("wiki_space"): + wiki_pages = frappe.db.get_all("Wiki Page", fields=["name", "content"]) + + if filters and filters.get("wiki_space"): wiki_space = filters.get("wiki_space") wiki_pages = frappe.db.get_all( "Wiki Group Item", @@ -60,22 +60,25 @@ def get_data(filters: dict | None = None) -> list[list]: filters={"parent": wiki_space, "parenttype": "Wiki Space"}, ) + include_images = filters and bool(filters.get("check_images")) for page in wiki_pages: - broken_links_for_page = get_broken_links(page.content) + broken_links_for_page = get_broken_links(page.content, include_images) rows = [{"broken_link": link, "wiki_page": page["name"]} for link in broken_links_for_page] data.extend(rows) return data -def get_broken_links(md_content: str): +def get_broken_links(md_content: str, include_images: bool = True): html = frappe.utils.md_to_html(md_content) soup = BeautifulSoup(html, "html.parser") links = soup.find_all("a") - images = soup.find_all("img") + if include_images: + links += soup.find_all("img") + broken_links = [] - for el in links + images: + for el in links: url = el.attrs.get("href") or el.attrs.get("src") try: response = requests.head(url, verify=False, timeout=5)