Skip to content

Commit

Permalink
improve output
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexanderDokuchaev committed Jan 12, 2025
1 parent ec3f7db commit 96e9b8e
Show file tree
Hide file tree
Showing 5 changed files with 118 additions and 65 deletions.
2 changes: 1 addition & 1 deletion md_dead_link_check/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def main() -> int:
files = list(md_data)

status_list = check_all_links(md_data, config, repo_dir, files, files_in_repo)
err_num = summary(status_list, args.warn, args.all, args.no_color)
err_num = summary(status_list, args.warn, args.all, args.no_color, config)

return min(err_num, 1)

Expand Down
8 changes: 8 additions & 0 deletions md_dead_link_check/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,4 +46,12 @@ def get_config(root_dir: Path, config_path: Optional[Path]) -> Config:
f"Unexpected config key `{key}` in {config_path.name}. "
f"Available keys: [{', '.join(config.__annotations__)}]"
)
if not isinstance(config.timeout, int) or config.timeout < 1:
raise ValueError("`timeout` must be an integer greater than or equal to 1.")
if not isinstance(config.throttle_groups, int) or config.throttle_groups < 1:
raise ValueError("`throttle_groups` must be an integer greater than or equal to 1.")
if not isinstance(config.throttle_delay, int) or config.throttle_delay < 0:
raise ValueError("`throttle_delay` must be a non-negative float or integer.")
if not isinstance(config.throttle_max_delay, int) or config.throttle_max_delay < 0:
raise ValueError("`throttle_max_delay` must be a non-negative integer.")
return config
27 changes: 22 additions & 5 deletions md_dead_link_check/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
from pathlib import Path
from typing import List

from md_dead_link_check.config import Config
from md_dead_link_check.link_checker import Status
from md_dead_link_check.link_checker import StatusInfo


Expand Down Expand Up @@ -38,7 +40,7 @@ def disable_colors(self) -> None:
setattr(self, key, "")


def summary(status: List[StatusInfo], print_warn: bool, print_all: bool, no_color: bool) -> int:
def summary(status: List[StatusInfo], print_warn: bool, print_all: bool, no_color: bool, config: Config) -> int:
"""
Print summary.
Returns 0 if not found any error, otherwise 1.
Expand All @@ -47,19 +49,34 @@ def summary(status: List[StatusInfo], print_warn: bool, print_all: bool, no_colo
if no_color:
specs.disable_colors()
err_nums = 0
count_429 = 0

for x in status:
link_msg = (
f"{specs.blue}File:{specs.clean} {x.link_info.get_location()}"
f" {specs.split} {specs.blue}Link:{specs.clean} {x.link_info.link}"
)
if x.err_msg:
print(f"{link_msg} {specs.split} {specs.red}Error{specs.clean}: {x.err_msg}")
if x.msg is not None and x.msg == "429: To Many Request":
count_429 += 1

if x.status == Status.ERROR:
print(f"{link_msg} {specs.split} {specs.red}Error{specs.clean}: {x.msg}")
err_nums += 1
elif x.warn_msg and (print_warn or print_all):
print(f"{link_msg} {specs.split} {specs.yellow}Warn{specs.clean}: {x.warn_msg}")
elif x.status == Status.WARNING and (print_warn or print_all):
print(f"{link_msg} {specs.split} {specs.yellow}Warn{specs.clean}: {x.msg}")
elif print_all:
print(f"{link_msg} {specs.split} {specs.green}OK{specs.clean}")

if count_429:
print(
f"{specs.yellow}WARNING:{specs.clean} "
f"Found {count_429} link{'s' if count_429 > 1 else ''} with \"429: To Many Request\" respond code.\n"
f"Wait some time and try again or update configuration file to increase delay, for example:\n"
f"throttle_groups = {max(1, config.throttle_groups // 2)}\n"
f"throttle_delay = {max(1, config.throttle_delay) // 2}\n"
f"throttle_max_delay = {config.throttle_max_delay * 4}\n"
)

if err_nums:
cat_repeat = 0 if no_color else max(min(err_nums // 10, 5), 1)
print(f"{specs.fail}Found {err_nums} dead link{'s' if err_nums >1 else ''}" + specs.cat_fail * cat_repeat)
Expand Down
70 changes: 47 additions & 23 deletions md_dead_link_check/link_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import asyncio
from collections import defaultdict
from dataclasses import dataclass
from enum import Enum
from fnmatch import fnmatch
from pathlib import Path
from typing import Dict, List, Optional
Expand All @@ -27,21 +28,27 @@
IGNORED_PROTOCOLS = ("ftp", "sftp")


class Status(int, Enum):
OK = 0
WARNING = 1
ERROR = 2


@dataclass
class StatusInfo:
link_info: LinkInfo
err_msg: Optional[str] = None
warn_msg: Optional[str] = None
status: Status
msg: Optional[str] = None

def __lt__(self, other: StatusInfo) -> bool:
return self.link_info < other.link_info
return self.status < other.status or (self.status == other.status and self.link_info < other.link_info)


@dataclass
class LinkStatus:
link: str
err_msg: Optional[str] = None
warn_msg: Optional[str] = None
status: Status
msg: Optional[str] = None


@dataclass
Expand Down Expand Up @@ -81,22 +88,22 @@ async def process_link(data: LinkWithDelay, session: ClientSession, config: Conf
response.raise_for_status()
except ClientResponseError as e:
if not config.catch_response_codes or e.status in config.catch_response_codes:
return LinkStatus(link, f"{e.status}: {e.message}")
return LinkStatus(link, warn_msg=f"{e.status}: {e.message}")
return LinkStatus(link, Status.ERROR, f"{e.status}: {e.message}")
return LinkStatus(link, Status.WARNING, f"{e.status}: {e.message}")
except asyncio.CancelledError as e:
return LinkStatus(link, str(e))
return LinkStatus(link, Status.ERROR, str(e))
except ClientConnectorError as e:
return LinkStatus(link, str(e))
return LinkStatus(link, Status.ERROR, str(e))
except asyncio.TimeoutError:
if TIMEOUT_RESPONSE_CODE in config.catch_response_codes:
return LinkStatus(link, err_msg=MSG_TIMEOUT)
return LinkStatus(link, warn_msg=MSG_TIMEOUT)
return LinkStatus(link, Status.ERROR, MSG_TIMEOUT)
return LinkStatus(link, Status.WARNING, MSG_TIMEOUT)
except Exception as e:
msg = str(e)
if not msg:
msg = MSG_UNKNOWN_ERROR
return LinkStatus(link, err_msg=msg)
return LinkStatus(link)
return LinkStatus(link, Status.ERROR, msg)
return LinkStatus(link, Status.OK)


async def async_check_links(links: List[LinkWithDelay], config: Config) -> List[LinkStatus]:
Expand All @@ -105,16 +112,33 @@ async def async_check_links(links: List[LinkWithDelay], config: Config) -> List[
return ret


def calculate_delay(counter: int, config: Config) -> int:
return min(counter // config.throttle_groups * config.throttle_delay, config.throttle_max_delay)


def generate_delays_for_one_domain_links(links: List[str], config: Config) -> List[LinkWithDelay]:
domain_count: Dict[str, int] = defaultdict(int)
ret: List[LinkWithDelay] = []

for link in links:
domain = urlsplit(link).netloc
delay = min(domain_count[domain] // config.throttle_groups * config.throttle_delay, config.throttle_max_delay)
delay = calculate_delay(domain_count[domain], config)
ret.append(LinkWithDelay(link, delay))
domain_count[domain] += 1

enabled_throttling = {domain for domain, count in domain_count.items() if count - 1 > config.throttle_groups}
if enabled_throttling:
print("Throttling applied to limit request frequency:")
for domain, count in domain_count.items():
if count > config.throttle_groups:
max_delay = calculate_delay(domain_count[domain] - 1, config)
print(f" - Domain: {domain}")
print(f" Requests count: {count}")
if max_delay == config.throttle_max_delay:
print(f" Maximum delay: {max_delay} seconds (reached throttle_max_delay)")
else:
print(f" Maximum delay: {max_delay} seconds")

return ret


Expand Down Expand Up @@ -150,7 +174,7 @@ def check_web_links(md_data: Dict[str, MarkdownInfo], config: Config, files: Lis

for wl in web_links:
li_status = links_status_dict[wl.link]
ret.append(StatusInfo(wl, err_msg=li_status.err_msg, warn_msg=li_status.warn_msg))
ret.append(StatusInfo(wl, li_status.status, li_status.msg))
return ret


Expand All @@ -173,7 +197,7 @@ def check_path_links(
try:
split_result = urlsplit(md_link.link)
except ValueError:
ret.append(StatusInfo(md_link, MSG_PARSING_ERROR))
ret.append(StatusInfo(md_link, Status.ERROR, MSG_PARSING_ERROR))
continue

if split_result.scheme or split_result.netloc:
Expand All @@ -182,7 +206,7 @@ def check_path_links(

if not split_result.path:
if fragment not in md_file_info.fragments:
ret.append(StatusInfo(md_link, MSG_FRAGMENT_NOT_FOUND))
ret.append(StatusInfo(md_link, Status.ERROR, MSG_FRAGMENT_NOT_FOUND))
continue
else:
try:
Expand All @@ -194,28 +218,28 @@ def check_path_links(
abs_path = (md_abs_path.parent / split_result.path).resolve()
rel_path = abs_path.relative_to(root_dir)
except ValueError:
ret.append(StatusInfo(md_link, MSG_PATH_NOT_FOUND))
ret.append(StatusInfo(md_link, Status.ERROR, MSG_PATH_NOT_FOUND))
continue

if abs_path.as_posix() != abs_path.resolve().as_posix():
ret.append(StatusInfo(md_link, MSG_PATH_NOT_FOUND))
ret.append(StatusInfo(md_link, Status.ERROR, MSG_PATH_NOT_FOUND))
continue

if rel_path.as_posix() in md_data:
# Markdowns in repository
if fragment and fragment not in md_data[rel_path.as_posix()].fragments:
ret.append(StatusInfo(md_link, MSG_FRAGMENT_NOT_FOUND))
ret.append(StatusInfo(md_link, Status.ERROR, MSG_FRAGMENT_NOT_FOUND))
continue
else:
# Not markdown file
if not any(f.as_posix().startswith(rel_path.as_posix()) for f in files_in_repo):
if rel_path.exists():
ret.append(StatusInfo(md_link, MSG_PATH_NOT_ADDED))
ret.append(StatusInfo(md_link, Status.ERROR, MSG_PATH_NOT_ADDED))
else:
ret.append(StatusInfo(md_link, MSG_PATH_NOT_FOUND))
ret.append(StatusInfo(md_link, Status.ERROR, MSG_PATH_NOT_FOUND))
continue

ret.append(StatusInfo(md_link))
ret.append(StatusInfo(md_link, Status.OK))
return ret


Expand Down
Loading

0 comments on commit 96e9b8e

Please sign in to comment.