diff --git a/tests/conftest.py b/tests/conftest.py index eaa02134..f3d93c84 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,9 +1,12 @@ -'''py.test standard config file.''' +"""py.test standard config file.""" import logging + import pytest @pytest.fixture(autouse=True) def reset_log_level(): + """Automatically reset log level verbosity between tests. Generally want + test output the Unix way: silence is golden.""" logging.getLogger().setLevel(logging.WARN) diff --git a/tests/custom_suffix_test.py b/tests/custom_suffix_test.py index 48ce8bd9..9a862ae3 100644 --- a/tests/custom_suffix_test.py +++ b/tests/custom_suffix_test.py @@ -1,9 +1,9 @@ '''tldextract unit tests with a custom suffix list.''' import os +import tempfile import tldextract -from .helpers import temporary_dir FAKE_SUFFIX_LIST_URL = "file://" + os.path.join( os.path.dirname(os.path.abspath(__file__)), @@ -13,7 +13,7 @@ # pylint: disable=invalid-name extract_using_fake_suffix_list = tldextract.TLDExtract( - cache_dir=temporary_dir(), + cache_dir=tempfile.mkdtemp(), suffix_list_urls=[FAKE_SUFFIX_LIST_URL] ) extract_using_fake_suffix_list_no_cache = tldextract.TLDExtract( @@ -30,7 +30,7 @@ def test_private_extraction(): tld = tldextract.TLDExtract( - cache_dir=temporary_dir(), + cache_dir=tempfile.mkdtemp(), suffix_list_urls=[] ) diff --git a/tests/helpers.py b/tests/helpers.py deleted file mode 100644 index 8993e3ca..00000000 --- a/tests/helpers.py +++ /dev/null @@ -1,10 +0,0 @@ -'''tldextract test helpers.''' - -from subprocess import CalledProcessError, PIPE, Popen -import tempfile - - -def temporary_dir(): - """ Make a writable temporary file and return its absolute path. - """ - return tempfile.mkdtemp() diff --git a/tests/main_test.py b/tests/main_test.py index 2f116b16..5e25833b 100644 --- a/tests/main_test.py +++ b/tests/main_test.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- '''Main tldextract unit tests.''' -import sys +import tempfile import pytest import responses @@ -9,13 +9,12 @@ from tldextract.cache import DiskCache from tldextract.suffix_list import SuffixListNotFound from tldextract.tldextract import ExtractResult -from .helpers import temporary_dir # pylint: disable=invalid-name -extract = tldextract.TLDExtract(cache_dir=temporary_dir()) +extract = tldextract.TLDExtract(cache_dir=tempfile.mkdtemp()) extract_no_cache = tldextract.TLDExtract(cache_dir=False) -extract_using_real_local_suffix_list = tldextract.TLDExtract(cache_dir=temporary_dir()) +extract_using_real_local_suffix_list = tldextract.TLDExtract(cache_dir=tempfile.mkdtemp()) extract_using_real_local_suffix_list_no_cache = tldextract.TLDExtract(cache_dir=False) extract_using_fallback_to_snapshot_no_cache = tldextract.TLDExtract( cache_dir=None, @@ -266,6 +265,8 @@ def test_tlds_property(): def test_global_extract(): - assert tldextract.extract("foo.blogspot.com") == ExtractResult(subdomain='foo', domain='blogspot', suffix='com') + assert tldextract.extract("foo.blogspot.com") == ExtractResult( + subdomain="foo", domain="blogspot", suffix="com" + ) assert tldextract.extract("foo.blogspot.com", include_psl_private_domains=True) == \ ExtractResult(subdomain='', domain='foo', suffix='blogspot.com') diff --git a/tldextract/cache.py b/tldextract/cache.py index 66f7d84e..ff013a44 100644 --- a/tldextract/cache.py +++ b/tldextract/cache.py @@ -8,18 +8,10 @@ from filelock import FileLock -try: - FileNotFoundError -except NameError: - - class FileNotFoundError(Exception): - pass - - LOG = logging.getLogger(__name__) -class DiskCache(object): +class DiskCache: """Disk _cache that only works for jsonable values""" def __init__(self, cache_dir, lock_timeout=20): @@ -43,7 +35,9 @@ def get(self, namespace, key): return json.load(cache_file) except (OSError, ValueError) as exc: LOG.error("error reading TLD cache file %s: %s", cache_filepath, exc) - raise KeyError("namespace: " + namespace + " key: " + repr(key)) + raise KeyError( # pylint: disable=raise-missing-from + "namespace: " + namespace + " key: " + repr(key) + ) def set(self, namespace, key, value): """Set a value in the disk cache""" diff --git a/tldextract/cli.py b/tldextract/cli.py index ae418f65..140b4043 100644 --- a/tldextract/cli.py +++ b/tldextract/cli.py @@ -1,6 +1,7 @@ '''tldextract CLI''' +import argparse import logging import sys @@ -10,8 +11,6 @@ def main(): '''tldextract CLI main command.''' - import argparse - logging.basicConfig() parser = argparse.ArgumentParser( @@ -43,4 +42,4 @@ def main(): return for i in args.input: - print(' '.join(tld_extract(i))) # pylint: disable=superfluous-parens + print(' '.join(tld_extract(i))) diff --git a/tldextract/remote.py b/tldextract/remote.py index f783fdf3..906f2ce8 100644 --- a/tldextract/remote.py +++ b/tldextract/remote.py @@ -2,7 +2,6 @@ import re import socket -import sys from urllib.parse import scheme_chars diff --git a/tldextract/suffix_list.py b/tldextract/suffix_list.py index bec96708..a4371c83 100644 --- a/tldextract/suffix_list.py +++ b/tldextract/suffix_list.py @@ -1,49 +1,52 @@ -'tldextract helpers for testing and fetching remote resources.' +"tldextract helpers for testing and fetching remote resources." import logging import pkgutil import re -import sys import requests from requests_file import FileAdapter -LOG = logging.getLogger('tldextract') +LOG = logging.getLogger("tldextract") -PUBLIC_SUFFIX_RE = re.compile(r'^(?P[.*!]*\w[\S]*)', re.UNICODE | re.MULTILINE) -PUBLIC_PRIVATE_SUFFIX_SEPARATOR = '// ===BEGIN PRIVATE DOMAINS===' +PUBLIC_SUFFIX_RE = re.compile(r"^(?P[.*!]*\w[\S]*)", re.UNICODE | re.MULTILINE) +PUBLIC_PRIVATE_SUFFIX_SEPARATOR = "// ===BEGIN PRIVATE DOMAINS===" class SuffixListNotFound(LookupError): - pass + """A recoverable error while looking up a suffix list. Recoverable because + you can specify backups, or use this library's bundled snapshot.""" def find_first_response(cache, urls, cache_fetch_timeout=None): - """ Decode the first successfully fetched URL, from UTF-8 encoding to + """Decode the first successfully fetched URL, from UTF-8 encoding to Python unicode. """ with requests.Session() as session: - session.mount('file://', FileAdapter()) + session.mount("file://", FileAdapter()) for url in urls: try: - return cache.cached_fetch_url(session=session, url=url, timeout=cache_fetch_timeout) - except requests.exceptions.RequestException: - LOG.exception( - 'Exception reading Public Suffix List url %s', - url + return cache.cached_fetch_url( + session=session, url=url, timeout=cache_fetch_timeout ) + except requests.exceptions.RequestException: + LOG.exception("Exception reading Public Suffix List url %s", url) raise SuffixListNotFound( - 'No Public Suffix List found. Consider using a mirror or constructing ' - 'your TLDExtract with `suffix_list_urls=None`.' + "No Public Suffix List found. Consider using a mirror or constructing " + "your TLDExtract with `suffix_list_urls=None`." ) def extract_tlds_from_suffix_list(suffix_list_text): - public_text, _, private_text = suffix_list_text.partition(PUBLIC_PRIVATE_SUFFIX_SEPARATOR) + """Parse the raw suffix list text for its different designations of + suffixes.""" + public_text, _, private_text = suffix_list_text.partition( + PUBLIC_PRIVATE_SUFFIX_SEPARATOR + ) - public_tlds = [m.group('suffix') for m in PUBLIC_SUFFIX_RE.finditer(public_text)] - private_tlds = [m.group('suffix') for m in PUBLIC_SUFFIX_RE.finditer(private_text)] + public_tlds = [m.group("suffix") for m in PUBLIC_SUFFIX_RE.finditer(public_text)] + private_tlds = [m.group("suffix") for m in PUBLIC_SUFFIX_RE.finditer(private_text)] return public_tlds, private_tlds @@ -58,7 +61,7 @@ def get_suffix_lists(cache, urls, cache_fetch_timeout, fallback_to_snapshot): "cache_fetch_timeout": cache_fetch_timeout, "fallback_to_snapshot": fallback_to_snapshot, }, - hashed_argnames=["urls", "fallback_to_snapshot"] + hashed_argnames=["urls", "fallback_to_snapshot"], ) @@ -69,9 +72,9 @@ def _get_suffix_lists(cache, urls, cache_fetch_timeout, fallback_to_snapshot): text = find_first_response(cache, urls, cache_fetch_timeout=cache_fetch_timeout) except SuffixListNotFound as exc: if fallback_to_snapshot: - text = pkgutil.get_data('tldextract', '.tld_set_snapshot') + text = pkgutil.get_data("tldextract", ".tld_set_snapshot") if not isinstance(text, str): - text = str(text, 'utf-8') + text = str(text, "utf-8") else: raise exc diff --git a/tldextract/tldextract.py b/tldextract/tldextract.py index f113d4c3..8e808995 100644 --- a/tldextract/tldextract.py +++ b/tldextract/tldextract.py @@ -126,15 +126,15 @@ def ipv4(self): return "" -class TLDExtract(object): +class TLDExtract: """A callable for extracting, subdomain, domain, and suffix components from a URL.""" # TODO: Agreed with Pylint: too-many-arguments - def __init__( + def __init__( # pylint: disable=too-many-arguments self, cache_dir=CACHE_DIR, - suffix_list_urls=PUBLIC_SUFFIX_LIST_URLS, # pylint: disable=too-many-arguments + suffix_list_urls=PUBLIC_SUFFIX_LIST_URLS, fallback_to_snapshot=True, include_psl_private_domains=False, extra_suffixes=(), @@ -242,6 +242,7 @@ def __call__(self, url, include_psl_private_domains=None): return ExtractResult(subdomain, domain, suffix) def update(self, fetch_now=False): + """Force fetch the latest suffix list definitions.""" self._extractor = None self._cache.clear() if fetch_now: @@ -293,12 +294,14 @@ def _get_tld_extractor(self): @wraps(TLD_EXTRACTOR.__call__) -def extract(url, include_psl_private_domains=False): +def extract( + url, include_psl_private_domains=False +): # pylint: disable=missing-function-docstring return TLD_EXTRACTOR(url, include_psl_private_domains=include_psl_private_domains) @wraps(TLD_EXTRACTOR.update) -def update(*args, **kwargs): +def update(*args, **kwargs): # pylint: disable=missing-function-docstring return TLD_EXTRACTOR.update(*args, **kwargs) @@ -318,6 +321,7 @@ def __init__( self.tlds_excl_private = frozenset(public_tlds + extra_tlds) def tlds(self, include_psl_private_domains=None): + """Get the currently filtered list of suffixes.""" if include_psl_private_domains is None: include_psl_private_domains = self.include_psl_private_domains