Skip to content

Commit

Permalink
Detect bare links (#13)
Browse files Browse the repository at this point in the history
Issue: #11 

Passing links without any tags
```
https://github.com
```
  • Loading branch information
AlexanderDokuchaev authored Jan 11, 2025
1 parent fe5cd91 commit 9844c06
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 5 deletions.
3 changes: 2 additions & 1 deletion .markdownlint.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# Default state for all rules
default: true

MD003: false # heading-style
MD013: false # Line length
MD033: false # Inline HTML
MD034: false # no-bare-urls
MD041: false # First line
MD003: false # heading-style
12 changes: 12 additions & 0 deletions md_dead_link_check/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from git import Repo

RE_HEADER = r"^(?:\s*[-+*]\s+|)[#]{1,6}\s*(.*?)\s*[#]*$"
RE_URL = r"(http[s]?://[^>)\]\s\"]+)"
RE_LINK = r"([!]{0,1})\[([^\]!]*)\]\(([^()\s]+(?:\([^()\s]*\))*)\s*(.*?)\)"
RE_HTML_TAG = r"</?\w+[^>]*>"
RE_HTML_TAG_ID = r"<\w+\s+(?:[^>]*?\s+)?(?:id|name)=([\"'])(.*?)\1"
Expand Down Expand Up @@ -113,16 +114,19 @@ def process_md_file(path: Path, root_dir: Path) -> MarkdownInfo:
line = re.sub(RE_SUB, "", line)

# Detect links
copy_line = line # Used to detect bare links
matches = re.findall(RE_LINK, line)
for img_tag, text, link, title in matches:
links.append(LinkInfo(link, path, line_num))
copy_line = copy_line.replace(link, "")

if matches:
# For case [![text](img_link)](link)
sub_line = re.sub(RE_LINK, "link", line)
matches2 = re.findall(RE_LINK, sub_line)
for img_tag, text, link, title in matches2:
links.append(LinkInfo(link, path, line_num))
copy_line = copy_line.replace(link, "")

# Detect id under a tag <a id="introduction"></a>
matches = re.findall(RE_HTML_TAG_ID, line)
Expand All @@ -133,6 +137,14 @@ def process_md_file(path: Path, root_dir: Path) -> MarkdownInfo:
matches = re.findall(RE_HTML_TAG_HREF, line)
for _, link in matches:
links.append(LinkInfo(link, path, line_num))
copy_line = copy_line.replace(link, "")

# Detect simple urls without any tags
matches = re.findall(RE_URL, copy_line)
for url in matches:
if url.endswith("."):
url = url[:-1]
links.append(LinkInfo(url, path, line_num))
return MarkdownInfo(path=path, fragments=fragments, links=links)


Expand Down
6 changes: 3 additions & 3 deletions tests/test_md_files/a.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ Some text

## links ##

[github](https://github.com/AlexanderDokuchaev)
[github](https://github.com/AlexanderDokuchaev) https://github.com
[b](./b.md) [d.a](b.md) `[A+B](A)`
<a href="./d/a.md" target="_blank">d.a</a><span href="./d/a.md">d.a</span>
<a href="./d/a.md" target="_blank">d.a</a><span href="https://github.com">d.a</span>
[d.a](/tests/test_md_files/d/a.md "tag")

### Header with `quotes` and $math$
Expand Down Expand Up @@ -59,4 +59,4 @@ Some text
<a href="mailto:[email protected]">mail</a>

[ftp](ftp://example.example/example)

https://github.com.
12 changes: 11 additions & 1 deletion tests/test_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,11 @@ def test_process_md_file():
location=Path("tests/test_md_files/a.md"),
line_num=27,
),
LinkInfo(
link="https://github.com",
location=Path("tests/test_md_files/a.md"),
line_num=27,
),
LinkInfo(
link="./b.md",
location=Path("tests/test_md_files/a.md"),
Expand All @@ -88,7 +93,7 @@ def test_process_md_file():
line_num=29,
),
LinkInfo(
link="./d/a.md",
link="https://github.com",
location=Path("tests/test_md_files/a.md"),
line_num=29,
),
Expand Down Expand Up @@ -157,5 +162,10 @@ def test_process_md_file():
location=Path("tests/test_md_files/a.md"),
line_num=61,
),
LinkInfo(
link="https://github.com",
location=Path("tests/test_md_files/a.md"),
line_num=62,
),
]
assert md_info.links == ref_links

0 comments on commit 9844c06

Please sign in to comment.