Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PSCE-408 refactor: replaces regex with urllib for repo URL parsing #215

Merged
merged 3 commits into from
May 7, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion tests/trestlebot/test_gitlab.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def test_parse_repository_with_incorrect_name() -> None:
gl = GitLab("fake")
with pytest.raises(
GitProviderException,
match="https://notgitlab.com/owner/repo.git is an invalid repo URL",
match="https://notgitlab.com/owner/repo.git is an invalid Gitlab repo URL",
):
gl.parse_repository("https://notgitlab.com/owner/repo.git")

Expand Down
19 changes: 15 additions & 4 deletions trestlebot/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

"""GitHub related functions for the Trestle Bot."""

from __future__ import annotations

import os
import re
from typing import Optional, Tuple
Expand Down Expand Up @@ -31,7 +33,15 @@ def __init__(self, access_token: str):
session.login(token=access_token)

self._session = session
self.pattern = r"^(?:https?://)?github\.com/([^/]+)/([^/.]+)"

# For repo URL input validation
pattern = r"^(?:https?://)?github\.com/([^/]+)/([^/.]+)"
self._pattern = re.compile(pattern)

@property
def provider_pattern(self) -> re.Pattern[str]:
"""Regex pattern to validate repository URLs"""
return self._pattern

def parse_repository(self, repo_url: str) -> Tuple[str, str]:
"""
Expand All @@ -43,11 +53,12 @@ def parse_repository(self, repo_url: str) -> Tuple[str, str]:
Returns:
Owner and repo name in a tuple, respectively
"""

match = re.match(self.pattern, repo_url)
match: Optional[re.Match[str]]
stripped_url: str
match, stripped_url = self.match_url(repo_url)

if not match:
raise GitProviderException(f"{repo_url} is an invalid GitHub repo URL")
raise GitProviderException(f"{stripped_url} is an invalid GitHub repo URL")

owner = match.group(1)
repo = match.group(2)
Expand Down
28 changes: 18 additions & 10 deletions trestlebot/gitlab.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,13 @@

"""GitLab related functions for the Trestle Bot."""

from __future__ import annotations

import os
import re
import time
from typing import Tuple
from typing import Optional, Tuple
from urllib.parse import ParseResult, urlparse

import gitlab

Expand All @@ -21,10 +24,16 @@ def __init__(self, api_token: str, server_url: str = "https://gitlab.com"):

self._gitlab_client = gitlab.Gitlab(server_url, private_token=api_token)

stripped_url = re.sub(r"^(https?://)?", "", server_url)
self.pattern = r"^(?:https?://)?{0}(/.+)/([^/.]+)(\.git)?$".format(
re.escape(stripped_url)
)
# For repo URL input validation
parsed_url: ParseResult = urlparse(server_url)
stripped_url = f"{parsed_url.netloc}{parsed_url.path}"
pattern = rf"^(?:https?://)?{re.escape(stripped_url)}(/.+)/([^/.]+)(\.git)?$"
self._pattern = re.compile(pattern)

@property
def provider_pattern(self) -> re.Pattern[str]:
"""Regex pattern to validate repository URLs"""
return self._pattern

def parse_repository(self, repo_url: str) -> Tuple[str, str]:
"""
Expand All @@ -37,13 +46,12 @@ def parse_repository(self, repo_url: str) -> Tuple[str, str]:
Owner and project name in a tuple, respectively
"""

# Strip out any basic auth
stripped_url = re.sub(r"https?://.*?@", "https://", repo_url)

match = re.match(self.pattern, stripped_url)
match: Optional[re.Match[str]]
stripped_url: str
match, stripped_url = self.match_url(repo_url)

if not match:
raise GitProviderException(f"{stripped_url} is an invalid repo URL")
raise GitProviderException(f"{stripped_url} is an invalid Gitlab repo URL")

owner = match.group(1)[1:] # Removing the leading slash
repo = match.group(2)
Expand Down
25 changes: 24 additions & 1 deletion trestlebot/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,12 @@

"""Base Git Provider class for the Trestle Bot."""

from __future__ import annotations

import re
from abc import ABC, abstractmethod
from typing import Tuple
from typing import Optional, Tuple
from urllib.parse import ParseResult, urlparse


class GitProviderException(Exception):
Expand All @@ -17,6 +21,25 @@ class GitProvider(ABC):
Abstract base class for Git provider types
"""

@property
@abstractmethod
def provider_pattern(self) -> re.Pattern[str]:
"""Regex pattern to validate repository URLs"""

def match_url(self, repo_url: str) -> Tuple[Optional[re.Match[str]], str]:
"""Match a repository URL with the pattern"""
parsed_url: ParseResult = urlparse(repo_url)
scheme = parsed_url.scheme
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a nit-pick comment so feel free to ignore. These could be summarized to reduce verbosity. Ex:

if host := parsed_url.hostname:
    stripped_url = f"{host}{path}"
if scheme := parsed_url.scheme:
    stripped_url = f"{scheme}://{stripped_url}"

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @gvauter. I will apply this.

host = parsed_url.hostname
path = parsed_url.path

stripped_url = path
if host:
stripped_url = f"{host}{path}"
if scheme:
stripped_url = f"{scheme}://{stripped_url}"
return self.provider_pattern.match(stripped_url), stripped_url

@abstractmethod
def parse_repository(self, repository_url: str) -> Tuple[str, str]:
"""Parse repository information into namespace and repo, respectively"""
Expand Down
Loading