Skip to content

Commit

Permalink
Catch permission error when making cache dir, as well as cache file (#…
Browse files Browse the repository at this point in the history
…211)

Stanches #209.

* Move makedir side-effect from string building helper function into try block
* Duplicate warning from writing a single cache file
* Log this at most once per app
* Remove unused vars
  • Loading branch information
john-kurkowski authored Oct 25, 2020
1 parent 993ba76 commit 653abc9
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 15 deletions.
4 changes: 4 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,14 @@
import logging

import pytest
import tldextract.cache


@pytest.fixture(autouse=True)
def reset_log_level():
"""Automatically reset log level verbosity between tests. Generally want
test output the Unix way: silence is golden."""
tldextract.cache._DID_LOG_UNABLE_TO_CACHE = ( # pylint: disable=protected-access
False
)
logging.getLogger().setLevel(logging.WARN)
27 changes: 27 additions & 0 deletions tests/main_test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# -*- coding: utf-8 -*-
'''Main tldextract unit tests.'''

import logging
import os
import tempfile

import pytest
Expand Down Expand Up @@ -234,6 +236,31 @@ def test_result_as_dict():
assert result._asdict() == expected_dict


def test_cache_permission(mocker, monkeypatch, tmpdir):
"""Emit a warning once that this can't cache the latest PSL."""

warning = mocker.patch.object(logging.getLogger("tldextract.cache"), "warning")

def no_permission_makedirs(*args, **kwargs):
raise PermissionError(
"""[Errno 13] Permission denied:
'/usr/local/lib/python3.7/site-packages/tldextract/.suffix_cache"""
)

monkeypatch.setattr(os, "makedirs", no_permission_makedirs)

for _ in range(0, 2):
my_extract = tldextract.TLDExtract(cache_dir=tmpdir)
assert_extract(
"http://www.google.com",
("www.google.com", "www", "google", "com"),
funs=(my_extract,),
)

assert warning.call_count == 1
assert warning.call_args[0][0].startswith("unable to cache")


@responses.activate
def test_cache_timeouts(tmpdir):
server = 'http://some-server.com'
Expand Down
53 changes: 40 additions & 13 deletions tldextract/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@

LOG = logging.getLogger(__name__)

_DID_LOG_UNABLE_TO_CACHE = False


class DiskCache:
"""Disk _cache that only works for jsonable values"""
Expand Down Expand Up @@ -46,21 +48,26 @@ def set(self, namespace, key, value):
cache_filepath = self._key_to_cachefile_path(namespace, key)

try:
_make_dir(cache_filepath)
with open(cache_filepath, "w") as cache_file:
json.dump(value, cache_file)
except OSError as ioe:
LOG.warning(
(
"unable to cache %s.%s in %s. This could refresh the "
"Public Suffix List over HTTP every app startup. "
"Construct your `TLDExtract` with a writable `cache_dir` or "
"set `cache_dir=False` to silence this warning. %s"
),
namespace,
key,
cache_filepath,
ioe,
)
global _DID_LOG_UNABLE_TO_CACHE # pylint: disable=global-statement
if not _DID_LOG_UNABLE_TO_CACHE:
LOG.warning(
(
"unable to cache %s.%s in %s. This could refresh the "
"Public Suffix List over HTTP every app startup. "
"Construct your `TLDExtract` with a writable `cache_dir` or "
"set `cache_dir=False` to silence this warning. %s"
),
namespace,
key,
cache_filepath,
ioe,
)
_DID_LOG_UNABLE_TO_CACHE = True

return None

def clear(self):
Expand All @@ -86,7 +93,6 @@ def _key_to_cachefile_path(self, namespace, key):

cache_path = os.path.join(namespace_path, hashed_key + self.file_ext)

_make_dir(cache_path)
return cache_path

def run_and_cache(self, func, namespace, kwargs, hashed_argnames):
Expand All @@ -97,6 +103,27 @@ def run_and_cache(self, func, namespace, kwargs, hashed_argnames):
key_args = {k: v for k, v in kwargs.items() if k in hashed_argnames}
cache_filepath = self._key_to_cachefile_path(namespace, key_args)
lock_path = cache_filepath + ".lock"
try:
_make_dir(cache_filepath)
except OSError as ioe:
global _DID_LOG_UNABLE_TO_CACHE # pylint: disable=global-statement
if not _DID_LOG_UNABLE_TO_CACHE:
LOG.warning(
(
"unable to cache %s.%s in %s. This could refresh the "
"Public Suffix List over HTTP every app startup. "
"Construct your `TLDExtract` with a writable `cache_dir` or "
"set `cache_dir=False` to silence this warning. %s"
),
namespace,
key_args,
cache_filepath,
ioe,
)
_DID_LOG_UNABLE_TO_CACHE = True

return func(**kwargs)

with FileLock(lock_path, timeout=self.lock_timeout):
try:
result = self.get(namespace=namespace, key=key_args)
Expand Down
2 changes: 0 additions & 2 deletions tldextract/tldextract.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,6 @@
"https://raw.githubusercontent.com/publicsuffix/list/master/public_suffix_list.dat",
)

CACHE = DiskCache(cache_dir=CACHE_DIR)


class ExtractResult(collections.namedtuple("ExtractResult", "subdomain domain suffix")):
"""namedtuple of a URL's subdomain, domain, and suffix."""
Expand Down

0 comments on commit 653abc9

Please sign in to comment.