Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

improve output #15

Merged
merged 1 commit into from
Jan 12, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion md_dead_link_check/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def main() -> int:
files = list(md_data)

status_list = check_all_links(md_data, config, repo_dir, files, files_in_repo)
err_num = summary(status_list, args.warn, args.all, args.no_color)
err_num = summary(status_list, args.warn, args.all, args.no_color, config)

return min(err_num, 1)

Expand Down
8 changes: 8 additions & 0 deletions md_dead_link_check/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,4 +46,12 @@ def get_config(root_dir: Path, config_path: Optional[Path]) -> Config:
f"Unexpected config key `{key}` in {config_path.name}. "
f"Available keys: [{', '.join(config.__annotations__)}]"
)
if not isinstance(config.timeout, int) or config.timeout < 1:
raise ValueError("`timeout` must be an integer greater than or equal to 1.")
if not isinstance(config.throttle_groups, int) or config.throttle_groups < 1:
raise ValueError("`throttle_groups` must be an integer greater than or equal to 1.")
if not isinstance(config.throttle_delay, int) or config.throttle_delay < 0:
raise ValueError("`throttle_delay` must be a non-negative float or integer.")
if not isinstance(config.throttle_max_delay, int) or config.throttle_max_delay < 0:
raise ValueError("`throttle_max_delay` must be a non-negative integer.")
return config
28 changes: 23 additions & 5 deletions md_dead_link_check/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
from pathlib import Path
from typing import List

from md_dead_link_check.config import Config
from md_dead_link_check.link_checker import Status
from md_dead_link_check.link_checker import StatusInfo


Expand Down Expand Up @@ -38,7 +40,7 @@ def disable_colors(self) -> None:
setattr(self, key, "")


def summary(status: List[StatusInfo], print_warn: bool, print_all: bool, no_color: bool) -> int:
def summary(status: List[StatusInfo], print_warn: bool, print_all: bool, no_color: bool, config: Config) -> int:
"""
Print summary.
Returns 0 if not found any error, otherwise 1.
Expand All @@ -47,19 +49,35 @@ def summary(status: List[StatusInfo], print_warn: bool, print_all: bool, no_colo
if no_color:
specs.disable_colors()
err_nums = 0
count_429 = 0

for x in status:
link_msg = (
f"{specs.blue}File:{specs.clean} {x.link_info.get_location()}"
f" {specs.split} {specs.blue}Link:{specs.clean} {x.link_info.link}"
)
if x.err_msg:
print(f"{link_msg} {specs.split} {specs.red}Error{specs.clean}: {x.err_msg}")
if x.msg is not None and "429: too many request" in x.msg.lower():
count_429 += 1

if x.status == Status.ERROR:
print(f"{link_msg} {specs.split} {specs.red}Error{specs.clean}: {x.msg}")
err_nums += 1
elif x.warn_msg and (print_warn or print_all):
print(f"{link_msg} {specs.split} {specs.yellow}Warn{specs.clean}: {x.warn_msg}")
elif x.status == Status.WARNING and (print_warn or print_all):
print(f"{link_msg} {specs.split} {specs.yellow}Warn{specs.clean}: {x.msg}")
elif print_all:
print(f"{link_msg} {specs.split} {specs.green}OK{specs.clean}")

if count_429:
print(
f"\n{specs.yellow}WARNING:{specs.clean} "
f"{count_429} link{'s' if count_429 > 1 else ''} returned \"429: Too Many Request\" respond code. "
f"This indicates that one of the servers is being accessed too frequently.\n"
f"Wait and try again later, or adjust the configuration:\n"
f"throttle_groups = {max(1, config.throttle_groups // 2)}\n"
f"throttle_delay = {config.throttle_delay}\n"
f"throttle_max_delay = {config.throttle_max_delay * 2}\n"
)

if err_nums:
cat_repeat = 0 if no_color else max(min(err_nums // 10, 5), 1)
print(f"{specs.fail}Found {err_nums} dead link{'s' if err_nums >1 else ''}" + specs.cat_fail * cat_repeat)
Expand Down
74 changes: 49 additions & 25 deletions md_dead_link_check/link_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import asyncio
from collections import defaultdict
from dataclasses import dataclass
from enum import Enum
from fnmatch import fnmatch
from pathlib import Path
from typing import Dict, List, Optional
Expand All @@ -27,21 +28,27 @@
IGNORED_PROTOCOLS = ("ftp", "sftp")


class Status(int, Enum):
OK = 0
WARNING = 1
ERROR = 2


@dataclass
class StatusInfo:
link_info: LinkInfo
err_msg: Optional[str] = None
warn_msg: Optional[str] = None
status: Status
msg: Optional[str] = None

def __lt__(self, other: StatusInfo) -> bool:
return self.link_info < other.link_info
return self.status < other.status or (self.status == other.status and self.link_info < other.link_info)


@dataclass
class LinkStatus:
link: str
err_msg: Optional[str] = None
warn_msg: Optional[str] = None
status: Status
msg: Optional[str] = None


@dataclass
Expand Down Expand Up @@ -81,22 +88,22 @@ async def process_link(data: LinkWithDelay, session: ClientSession, config: Conf
response.raise_for_status()
except ClientResponseError as e:
if not config.catch_response_codes or e.status in config.catch_response_codes:
return LinkStatus(link, f"{e.status}: {e.message}")
return LinkStatus(link, warn_msg=f"{e.status}: {e.message}")
return LinkStatus(link, Status.ERROR, f"{e.status}: {e.message}")
return LinkStatus(link, Status.WARNING, f"{e.status}: {e.message}")
except asyncio.CancelledError as e:
return LinkStatus(link, str(e))
return LinkStatus(link, Status.ERROR, str(e))
except ClientConnectorError as e:
return LinkStatus(link, str(e))
return LinkStatus(link, Status.ERROR, str(e))
except asyncio.TimeoutError:
if TIMEOUT_RESPONSE_CODE in config.catch_response_codes:
return LinkStatus(link, err_msg=MSG_TIMEOUT)
return LinkStatus(link, warn_msg=MSG_TIMEOUT)
return LinkStatus(link, Status.ERROR, MSG_TIMEOUT)
return LinkStatus(link, Status.WARNING, MSG_TIMEOUT)
except Exception as e:
msg = str(e)
if not msg:
msg = MSG_UNKNOWN_ERROR
return LinkStatus(link, err_msg=msg)
return LinkStatus(link)
return LinkStatus(link, Status.ERROR, msg)
return LinkStatus(link, Status.OK)


async def async_check_links(links: List[LinkWithDelay], config: Config) -> List[LinkStatus]:
Expand All @@ -105,15 +112,32 @@ async def async_check_links(links: List[LinkWithDelay], config: Config) -> List[
return ret


def calculate_delay(counter: int, config: Config) -> int:
return min(counter // config.throttle_groups * config.throttle_delay, config.throttle_max_delay)


def generate_delays_for_one_domain_links(links: List[str], config: Config) -> List[LinkWithDelay]:
domain_count: Dict[str, int] = defaultdict(int)
domain_requests_counter: Dict[str, int] = defaultdict(int)
ret: List[LinkWithDelay] = []

for link in links:
domain = urlsplit(link).netloc
delay = min(domain_count[domain] // config.throttle_groups * config.throttle_delay, config.throttle_max_delay)
delay = calculate_delay(domain_requests_counter[domain], config)
ret.append(LinkWithDelay(link, delay))
domain_count[domain] += 1
domain_requests_counter[domain] += 1

enabled_throttling = {d: num - 1 for d, num in domain_requests_counter.items() if num - 1 > config.throttle_groups}
if enabled_throttling:
print("Throttling applied to limit request frequency:")
for domain, num_request in enabled_throttling.items():
if num_request > config.throttle_groups:
max_delay = calculate_delay(num_request, config)
print(f" - Domain: {domain}")
print(f" Requests count: {num_request}")
if max_delay == config.throttle_max_delay:
print(f" Maximum delay: {max_delay} seconds (reached throttle_max_delay)")
else:
print(f" Maximum delay: {max_delay} seconds")

return ret

Expand Down Expand Up @@ -150,7 +174,7 @@ def check_web_links(md_data: Dict[str, MarkdownInfo], config: Config, files: Lis

for wl in web_links:
li_status = links_status_dict[wl.link]
ret.append(StatusInfo(wl, err_msg=li_status.err_msg, warn_msg=li_status.warn_msg))
ret.append(StatusInfo(wl, li_status.status, li_status.msg))
return ret


Expand All @@ -173,7 +197,7 @@ def check_path_links(
try:
split_result = urlsplit(md_link.link)
except ValueError:
ret.append(StatusInfo(md_link, MSG_PARSING_ERROR))
ret.append(StatusInfo(md_link, Status.ERROR, MSG_PARSING_ERROR))
continue

if split_result.scheme or split_result.netloc:
Expand All @@ -182,7 +206,7 @@ def check_path_links(

if not split_result.path:
if fragment not in md_file_info.fragments:
ret.append(StatusInfo(md_link, MSG_FRAGMENT_NOT_FOUND))
ret.append(StatusInfo(md_link, Status.ERROR, MSG_FRAGMENT_NOT_FOUND))
continue
else:
try:
Expand All @@ -194,28 +218,28 @@ def check_path_links(
abs_path = (md_abs_path.parent / split_result.path).resolve()
rel_path = abs_path.relative_to(root_dir)
except ValueError:
ret.append(StatusInfo(md_link, MSG_PATH_NOT_FOUND))
ret.append(StatusInfo(md_link, Status.ERROR, MSG_PATH_NOT_FOUND))
continue

if abs_path.as_posix() != abs_path.resolve().as_posix():
ret.append(StatusInfo(md_link, MSG_PATH_NOT_FOUND))
ret.append(StatusInfo(md_link, Status.ERROR, MSG_PATH_NOT_FOUND))
continue

if rel_path.as_posix() in md_data:
# Markdowns in repository
if fragment and fragment not in md_data[rel_path.as_posix()].fragments:
ret.append(StatusInfo(md_link, MSG_FRAGMENT_NOT_FOUND))
ret.append(StatusInfo(md_link, Status.ERROR, MSG_FRAGMENT_NOT_FOUND))
continue
else:
# Not markdown file
if not any(f.as_posix().startswith(rel_path.as_posix()) for f in files_in_repo):
if rel_path.exists():
ret.append(StatusInfo(md_link, MSG_PATH_NOT_ADDED))
ret.append(StatusInfo(md_link, Status.ERROR, MSG_PATH_NOT_ADDED))
else:
ret.append(StatusInfo(md_link, MSG_PATH_NOT_FOUND))
ret.append(StatusInfo(md_link, Status.ERROR, MSG_PATH_NOT_FOUND))
continue

ret.append(StatusInfo(md_link))
ret.append(StatusInfo(md_link, Status.OK))
return ret


Expand Down
Loading