Skip to content

Commit

Permalink
feat: filter out broken images
Browse files Browse the repository at this point in the history
  • Loading branch information
NagariaHussain committed Dec 11, 2024
1 parent 3505357 commit ec29aa4
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 9 deletions.
16 changes: 14 additions & 2 deletions wiki/wiki/report/wiki_broken_links/test_broken_link_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,16 @@
from wiki.wiki.report.wiki_broken_links.wiki_broken_links import execute, get_broken_links

BROKEN_LINK = "https://frappewiki.notavalidtld"
BROKEN_IMG_LINK = "https://img.notavalidtld/failed.jpeg"

TEST_MD_WITH_BROKEN_LINK = f"""
## Hello
This is a test for a [broken link]({BROKEN_LINK}).
This is a [valid link](https://frappe.io).
![Broken Image]({BROKEN_IMG_LINK})
"""


Expand All @@ -33,14 +36,14 @@ def setUp(self):

def test_returns_correct_broken_links(self):
broken_links = get_broken_links(TEST_MD_WITH_BROKEN_LINK)
self.assertEqual(len(broken_links), 1)
self.assertEqual(len(broken_links), 2)

def test_wiki_broken_link_report(self):
_, data = execute()
self.assertEqual(len(data), 1)
self.assertEqual(data[0]["broken_link"], BROKEN_LINK)

def test_wiki_broken_list_report_with_filters(self):
def test_wiki_broken_link_report_with_wiki_space_filter(self):
_, data = execute({"wiki_space": self.test_wiki_space.name})
self.assertEqual(len(data), 0)

Expand All @@ -54,5 +57,14 @@ def test_wiki_broken_list_report_with_filters(self):
self.assertEqual(data[0]["wiki_page"], self.test_wiki_page.name)
self.assertEqual(data[0]["broken_link"], BROKEN_LINK)

def test_wiki_broken_link_report_with_image_filter(self):
_, data = execute({"check_images": 1})
self.assertEqual(len(data), 2)
self.assertEqual(data[0]["wiki_page"], self.test_wiki_page.name)
self.assertEqual(data[0]["broken_link"], BROKEN_LINK)

self.assertEqual(data[1]["wiki_page"], self.test_wiki_page.name)
self.assertEqual(data[1]["broken_link"], BROKEN_IMG_LINK)

def tearDown(self):
frappe.db.rollback()
6 changes: 6 additions & 0 deletions wiki/wiki/report/wiki_broken_links/wiki_broken_links.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,11 @@ frappe.query_reports["Wiki Broken Links"] = {
fieldtype: "Link",
options: "Wiki Space",
},
{
fieldname: "check_images",
label: __("Check Images?"),
fieldtype: "Check",
default: 1,
},
],
};
17 changes: 10 additions & 7 deletions wiki/wiki/report/wiki_broken_links/wiki_broken_links.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,32 +50,35 @@ def get_data(filters: dict | None = None) -> list[list]:
"""
data = []

if not filters:
wiki_pages = frappe.db.get_all("Wiki Page", fields=["name", "content"])
elif filters.get("wiki_space"):
wiki_pages = frappe.db.get_all("Wiki Page", fields=["name", "content"])

if filters and filters.get("wiki_space"):
wiki_space = filters.get("wiki_space")
wiki_pages = frappe.db.get_all(
"Wiki Group Item",
fields=["wiki_page as name", "wiki_page.content as content"],
filters={"parent": wiki_space, "parenttype": "Wiki Space"},
)

include_images = filters and bool(filters.get("check_images"))
for page in wiki_pages:
broken_links_for_page = get_broken_links(page.content)
broken_links_for_page = get_broken_links(page.content, include_images)
rows = [{"broken_link": link, "wiki_page": page["name"]} for link in broken_links_for_page]
data.extend(rows)

return data


def get_broken_links(md_content: str):
def get_broken_links(md_content: str, include_images: bool = True):
html = frappe.utils.md_to_html(md_content)
soup = BeautifulSoup(html, "html.parser")

links = soup.find_all("a")
images = soup.find_all("img")
if include_images:
links += soup.find_all("img")

broken_links = []
for el in links + images:
for el in links:
url = el.attrs.get("href") or el.attrs.get("src")
try:
response = requests.head(url, verify=False, timeout=5)
Expand Down

0 comments on commit ec29aa4

Please sign in to comment.