Skip to content

Commit

Permalink
Fix various things with broken link detection
Browse files Browse the repository at this point in the history
  • Loading branch information
rubenwardy committed Jul 5, 2024
1 parent 9bf91f1 commit 211be30
Showing 1 changed file with 10 additions and 5 deletions.
15 changes: 10 additions & 5 deletions app/tasks/pkgtasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,15 @@
# along with this program. If not, see <https://www.gnu.org/licenses/>.

import datetime
import random
import re
import sys
from time import sleep
from urllib.parse import urlparse
from typing import Optional

import requests
import urllib3
from sqlalchemy import or_, and_

from app.markdown import get_links, render_markdown
Expand Down Expand Up @@ -118,21 +120,23 @@ def _url_exists(url: str) -> str:
return str(e.response.status_code)
except requests.exceptions.ConnectionError:
return "ConnectionError"
except urllib3.exceptions.ReadTimeoutError:
return "timeout"


def _check_for_dead_links(package: Package) -> dict[str, str]:
links: list[Optional[str]] = [
links: set[Optional[str]] = {
package.repo,
package.website,
package.issueTracker,
package.forums_url,
package.video_url,
package.donate_url_actual,
package.translation_url,
]
}

if package.desc:
links.extend(get_links(render_markdown(package.desc), package.get_url("packages.view", absolute=True)))
links.update(get_links(render_markdown(package.desc), package.get_url("packages.view", absolute=True)))

print(f"Checking {package.title} ({len(links)} links) for broken links", file=sys.stderr)

Expand All @@ -150,7 +154,8 @@ def _check_for_dead_links(package: Package) -> dict[str, str]:
if res != "":
bad_urls[link] = res

sleep(0.5)
# Prevent leaking information
sleep(random.uniform(0.4, 0.6))

return bad_urls

Expand All @@ -159,7 +164,7 @@ def _check_package(package: Package) -> Optional[str]:
bad_urls = _check_for_dead_links(package)
if len(bad_urls) > 0:
return ("The following broken links were found on your package:\n\n" +
"\n".join([f"- {link} [{res}]" for link, res in bad_urls.items()]))
"\n".join([f"- <{link}> [{res}]" for link, res in bad_urls.items()]))

return None

Expand Down

0 comments on commit 211be30

Please sign in to comment.