From c0fe30c164b6dd7992130a8bccaaad22650324bb Mon Sep 17 00:00:00 2001 From: John Kurkowski Date: Fri, 23 Oct 2020 20:07:51 -0700 Subject: [PATCH 1/2] Remove unused vars --- tests/main_test.py | 2 -- tldextract/tldextract.py | 2 -- 2 files changed, 4 deletions(-) diff --git a/tests/main_test.py b/tests/main_test.py index 2f116b16..59e8efd8 100644 --- a/tests/main_test.py +++ b/tests/main_test.py @@ -1,8 +1,6 @@ # -*- coding: utf-8 -*- '''Main tldextract unit tests.''' -import sys - import pytest import responses import tldextract diff --git a/tldextract/tldextract.py b/tldextract/tldextract.py index f113d4c3..28e124a9 100644 --- a/tldextract/tldextract.py +++ b/tldextract/tldextract.py @@ -71,8 +71,6 @@ "https://raw.githubusercontent.com/publicsuffix/list/master/public_suffix_list.dat", ) -CACHE = DiskCache(cache_dir=CACHE_DIR) - class ExtractResult(collections.namedtuple("ExtractResult", "subdomain domain suffix")): """namedtuple of a URL's subdomain, domain, and suffix.""" From 34d494f253f43852946110f1a9cd336523bfc96c Mon Sep 17 00:00:00 2001 From: John Kurkowski Date: Fri, 23 Oct 2020 21:45:13 -0700 Subject: [PATCH 2/2] Catch permission error when making cache dir, as well as cache file Fixes #209. * Move makedir side-effect from string building helper function into try block * Duplicate warning from writing a single cache file * Log this at most once per app --- tests/conftest.py | 2 ++ tests/main_test.py | 26 ++++++++++++++++++++++ tldextract/cache.py | 53 ++++++++++++++++++++++++++++++++++----------- 3 files changed, 68 insertions(+), 13 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index eaa02134..ea064992 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2,8 +2,10 @@ import logging import pytest +import tldextract.cache @pytest.fixture(autouse=True) def reset_log_level(): + tldextract.cache._DID_LOG_UNABLE_TO_CACHE = False logging.getLogger().setLevel(logging.WARN) diff --git a/tests/main_test.py b/tests/main_test.py index 59e8efd8..8d0f8f51 100644 --- a/tests/main_test.py +++ b/tests/main_test.py @@ -1,6 +1,8 @@ # -*- coding: utf-8 -*- '''Main tldextract unit tests.''' +import logging +import os import pytest import responses import tldextract @@ -235,6 +237,30 @@ def test_result_as_dict(): assert result._asdict() == expected_dict +def test_cache_permission(mocker, monkeypatch, tmpdir): + """Emit a warning once that this can't cache the latest PSL.""" + + warning = mocker.patch.object(logging.getLogger("tldextract.cache"), "warning") + + def no_permission_makedirs(*args, **kwargs): + raise PermissionError( + """[Errno 13] Permission denied: '/usr/local/lib/python3.7/site-packages/tldextract/.suffix_cache""" + ) + + monkeypatch.setattr(os, "makedirs", no_permission_makedirs) + + for _ in range(0, 2): + my_extract = tldextract.TLDExtract(cache_dir=tmpdir) + assert_extract( + "http://www.google.com", + ("www.google.com", "www", "google", "com"), + funs=(my_extract,), + ) + + assert warning.call_count == 1 + assert warning.call_args[0][0].startswith("unable to cache") + + @responses.activate # pylint: disable=no-member def test_cache_timeouts(tmpdir): server = 'http://some-server.com' diff --git a/tldextract/cache.py b/tldextract/cache.py index 66f7d84e..957a61f9 100644 --- a/tldextract/cache.py +++ b/tldextract/cache.py @@ -18,6 +18,8 @@ class FileNotFoundError(Exception): LOG = logging.getLogger(__name__) +_DID_LOG_UNABLE_TO_CACHE = False + class DiskCache(object): """Disk _cache that only works for jsonable values""" @@ -52,21 +54,26 @@ def set(self, namespace, key, value): cache_filepath = self._key_to_cachefile_path(namespace, key) try: + _make_dir(cache_filepath) with open(cache_filepath, "w") as cache_file: json.dump(value, cache_file) except OSError as ioe: - LOG.warning( - ( - "unable to cache %s.%s in %s. This could refresh the " - "Public Suffix List over HTTP every app startup. " - "Construct your `TLDExtract` with a writable `cache_dir` or " - "set `cache_dir=False` to silence this warning. %s" - ), - namespace, - key, - cache_filepath, - ioe, - ) + global _DID_LOG_UNABLE_TO_CACHE + if not _DID_LOG_UNABLE_TO_CACHE: + LOG.warning( + ( + "unable to cache %s.%s in %s. This could refresh the " + "Public Suffix List over HTTP every app startup. " + "Construct your `TLDExtract` with a writable `cache_dir` or " + "set `cache_dir=False` to silence this warning. %s" + ), + namespace, + key, + cache_filepath, + ioe, + ) + _DID_LOG_UNABLE_TO_CACHE = True + return None def clear(self): @@ -92,7 +99,6 @@ def _key_to_cachefile_path(self, namespace, key): cache_path = os.path.join(namespace_path, hashed_key + self.file_ext) - _make_dir(cache_path) return cache_path def run_and_cache(self, func, namespace, kwargs, hashed_argnames): @@ -103,6 +109,27 @@ def run_and_cache(self, func, namespace, kwargs, hashed_argnames): key_args = {k: v for k, v in kwargs.items() if k in hashed_argnames} cache_filepath = self._key_to_cachefile_path(namespace, key_args) lock_path = cache_filepath + ".lock" + try: + _make_dir(cache_filepath) + except OSError as ioe: + global _DID_LOG_UNABLE_TO_CACHE + if not _DID_LOG_UNABLE_TO_CACHE: + LOG.warning( + ( + "unable to cache %s.%s in %s. This could refresh the " + "Public Suffix List over HTTP every app startup. " + "Construct your `TLDExtract` with a writable `cache_dir` or " + "set `cache_dir=False` to silence this warning. %s" + ), + namespace, + key_args, + cache_filepath, + ioe, + ) + _DID_LOG_UNABLE_TO_CACHE = True + + return func(**kwargs) + with FileLock(lock_path, timeout=self.lock_timeout): try: result = self.get(namespace=namespace, key=key_args)