diff --git a/README.md b/README.md
index daca9c7..cb8f3e7 100644
--- a/README.md
+++ b/README.md
@@ -157,3 +157,66 @@ throttle_groups = 100
throttle_delay = 20
throttle_max_delay = 100
```
+
+## Rate Limiting and Request Throttling
+
+Websites often have limits on how many requests you can make within a certain period.
+If these limits are exceeded, the server will return a 429 Too Many Requests status code.
+
+### Failure Handling
+
+By default, the 429 status code is treated as a warning.
+You can modify this behavior and configure how the tool handles different status codes.
+
+```toml
+catch_response_codes = [404, 410, 429, 500]
+```
+
+### Throttling Mechanism
+
+To prevent your requests from overwhelming a website and potentially getting you blocked, this tool implements
+a throttling mechanism. This mechanism limits the number of requests that can be made in a given period.
+
+You can control the following parameters to fine-tune request throttling:
+
+```toml
+throttle_groups = 40 # default: 100
+throttle_delay = 30 # default: 20
+throttle_max_delay = 240 # default: 100
+```
+
+### Filter Links to Check
+
+By filtering out non-critical links and files, you can stay within rate limits while throttling requests.
+
+#### Exclude Links by Pattern
+
+Exclude specific URLs that match patterns:
+
+```toml
+exclude_links = ["https://github.com/AlexanderDokuchaev/md-dead-link-check/pull/*"]
+```
+
+#### Exclude Specific Files
+
+Prevent specific files (e.g., changelogs) from being checked:
+
+```toml
+exclude_files = ["CHANGELOG.md"]
+```
+
+#### Exclude Parts of Files Using Comments
+
+Ignore sections of files using a special comment ``.
+
+```md
+...
+
+
+
+All links will be ignored in this part of the file.
+
+
+
+...
+```
diff --git a/md_dead_link_check/__main__.py b/md_dead_link_check/__main__.py
index 855459e..c739e01 100644
--- a/md_dead_link_check/__main__.py
+++ b/md_dead_link_check/__main__.py
@@ -53,7 +53,7 @@ def main() -> int:
files = list(md_data)
status_list = check_all_links(md_data, config, repo_dir, files, files_in_repo)
- err_num = summary(status_list, args.warn, args.all, args.no_color, config)
+ err_num = summary(status_list, args.warn, args.all, args.no_color)
return min(err_num, 1)
diff --git a/md_dead_link_check/helpers.py b/md_dead_link_check/helpers.py
index c1bad3e..5f1d37d 100644
--- a/md_dead_link_check/helpers.py
+++ b/md_dead_link_check/helpers.py
@@ -3,7 +3,6 @@
from pathlib import Path
from typing import List
-from md_dead_link_check.config import Config
from md_dead_link_check.link_checker import Status
from md_dead_link_check.link_checker import StatusInfo
@@ -40,7 +39,7 @@ def disable_colors(self) -> None:
setattr(self, key, "")
-def summary(status: List[StatusInfo], print_warn: bool, print_all: bool, no_color: bool, config: Config) -> int:
+def summary(status: List[StatusInfo], print_warn: bool, print_all: bool, no_color: bool) -> int:
"""
Print summary.
Returns 0 if not found any error, otherwise 1.
@@ -72,10 +71,8 @@ def summary(status: List[StatusInfo], print_warn: bool, print_all: bool, no_colo
f"\n{specs.yellow}WARNING:{specs.clean} "
f"{count_429} link{'s' if count_429 > 1 else ''} returned \"429: Too Many Request\" respond code. "
f"This indicates that one of the servers is being accessed too frequently.\n"
- f"Wait and try again later, or adjust the configuration:\n"
- f"throttle_groups = {max(1, config.throttle_groups // 2)}\n"
- f"throttle_delay = {config.throttle_delay}\n"
- f"throttle_max_delay = {config.throttle_max_delay * 2}\n"
+ f"To more information visit "
+ "https://github.com/AlexanderDokuchaev/md-dead-link-check/#rate-limiting-and-request-throttling"
)
if err_nums:
diff --git a/md_dead_link_check/preprocess.py b/md_dead_link_check/preprocess.py
index f377b44..e043dad 100644
--- a/md_dead_link_check/preprocess.py
+++ b/md_dead_link_check/preprocess.py
@@ -16,6 +16,9 @@
RE_HTML_TAG_HREF = r"<\w+\s+(?:[^>]*?\s+)?href=([\"'])(.*?)\1"
RE_SUB = r"[$`][^`]+?[$`]"
+MD_TAG_DISABLE = ""
+MD_TAG_ENABLE = ""
+
@dataclass
class LinkInfo:
@@ -83,6 +86,7 @@ def process_md_file(path: Path, root_dir: Path) -> MarkdownInfo:
links: List[LinkInfo] = []
with (root_dir / path).open(encoding="utf8") as stream:
in_code_block = ""
+ disable_detection_links = False
for line_num, line in enumerate(stream.readlines(), 1):
striped_line = line.strip()
# Skip code blocks that can be start ``` or ````
@@ -112,6 +116,22 @@ def process_md_file(path: Path, root_dir: Path) -> MarkdownInfo:
# Skip $ and ` tags
line = re.sub(RE_SUB, "", line)
+ # Detect id under a tag
+ matches = re.findall(RE_HTML_TAG_ID, line)
+ for _, id in matches:
+ fragments.append(id.lower())
+
+ if MD_TAG_DISABLE in line:
+ disable_detection_links = True
+ continue
+
+ if MD_TAG_ENABLE in line:
+ disable_detection_links = False
+ continue
+
+ if disable_detection_links:
+ continue
+
# Detect links
copy_line = line # Used to detect bare links
matches = re.findall(RE_LINK, line)
@@ -127,11 +147,6 @@ def process_md_file(path: Path, root_dir: Path) -> MarkdownInfo:
links.append(LinkInfo(link, path, line_num))
copy_line = copy_line.replace(link, "")
- # Detect id under a tag
- matches = re.findall(RE_HTML_TAG_ID, line)
- for _, id in matches:
- fragments.append(id.lower())
-
# Detect links under a tag
matches = re.findall(RE_HTML_TAG_HREF, line)
for _, link in matches:
@@ -144,6 +159,7 @@ def process_md_file(path: Path, root_dir: Path) -> MarkdownInfo:
if url.endswith("."):
url = url[:-1]
links.append(LinkInfo(url, path, line_num))
+
return MarkdownInfo(path=path, fragments=fragments, links=links)
diff --git a/tests/test_md_files/a.md b/tests/test_md_files/a.md
index 3b1f11e..3d46a59 100644
--- a/tests/test_md_files/a.md
+++ b/tests/test_md_files/a.md
@@ -60,3 +60,11 @@ Some text
[ftp](ftp://example.example/example)
https://github.com.
+
+
+
+https://github.com
+
+
+
+https://github.com
diff --git a/tests/test_preprocess.py b/tests/test_preprocess.py
index a10010e..33476be 100644
--- a/tests/test_preprocess.py
+++ b/tests/test_preprocess.py
@@ -168,5 +168,10 @@ def test_process_md_file():
location=Path("tests/test_md_files/a.md"),
line_num=62,
),
+ LinkInfo(
+ link="https://github.com",
+ location=Path("tests/test_md_files/a.md"),
+ line_num=70,
+ ),
]
assert md_info.links == ref_links