diff --git a/ocrdbrowser/_cache.py b/ocrdbrowser/_cache.py new file mode 100644 index 0000000..25b5163 --- /dev/null +++ b/ocrdbrowser/_cache.py @@ -0,0 +1,30 @@ +from datetime import datetime +import functools +from pathlib import Path +from typing import Callable, TypeVar + +T = TypeVar("T") + + +def path_cache(fn: Callable[[Path], T]) -> Callable[[Path], T]: + pathcache: dict[Path, tuple[T, float]] = {} + + def cache(path: Path) -> T: + new_value = fn(path) + pathcache[path] = new_value, path.stat().st_mtime + return new_value + + @functools.wraps(fn) + def wrapper(path: Path) -> T: + if path not in pathcache: + return cache(path) + + value, saved_timestamp = pathcache[path] + last_modified = path.stat().st_mtime + + if last_modified > saved_timestamp: + return cache(path) + + return value + + return wrapper diff --git a/ocrdbrowser/_workspace.py b/ocrdbrowser/_workspace.py index fac5b3d..b9fd36b 100644 --- a/ocrdbrowser/_workspace.py +++ b/ocrdbrowser/_workspace.py @@ -2,18 +2,19 @@ from pathlib import Path from typing import List -from functools import lru_cache + +from ._cache import path_cache def is_valid(workspace: str) -> bool: return (Path(workspace) / "mets.xml").exists() -@lru_cache(maxsize=1) -def list_all(path: str) -> List[str]: +@path_cache +def list_all(path: Path) -> List[str]: # recursively enumerate METS file paths (excluding .backup subdirs) return [ str(workspace.parent) - for workspace in Path(path).rglob("mets.xml") + for workspace in path.rglob("mets.xml") if not workspace.match(".backup/*/mets.xml") ] diff --git a/tests/ocrdbrowser/test_path_cache.py b/tests/ocrdbrowser/test_path_cache.py new file mode 100644 index 0000000..c142da3 --- /dev/null +++ b/tests/ocrdbrowser/test_path_cache.py @@ -0,0 +1,54 @@ +from pathlib import Path +from typing import Iterator + +import pytest + +from ocrdbrowser._cache import path_cache + +TMPDIR = Path(__file__).parent / "tmpdir" +TMPFILE = TMPDIR / "tmp.txt" + + +@pytest.fixture(autouse=True) +def cleandir() -> Iterator[None]: + TMPDIR.mkdir() + + yield + + TMPFILE.unlink(missing_ok=True) + TMPDIR.rmdir() + + +def test__path_cache_decorator__returns_the_same_result_without_calling_func_again() -> ( + None +): + call_count = 0 + + @path_cache + def fn(path: str | Path) -> int: + nonlocal call_count + call_count += 1 + return call_count + + first = fn(TMPDIR) + second = fn(TMPDIR) + + assert call_count == 1 + assert first == second + + +def test__when_cached_path_changes__calls_func_again() -> None: + call_count = 0 + + @path_cache + def fn(path: str | Path) -> int: + nonlocal call_count + call_count += 1 + return call_count + + fn(TMPDIR) + TMPFILE.touch() + + fn(TMPDIR) + + assert call_count == 2 diff --git a/tests/ocrdbrowser/test_workspace.py b/tests/ocrdbrowser/test_workspace.py index cd5ff5b..d6f92db 100644 --- a/tests/ocrdbrowser/test_workspace.py +++ b/tests/ocrdbrowser/test_workspace.py @@ -18,7 +18,7 @@ def test__a_workspace_without_mets_xml_is_invalid() -> None: def test__list_workspaces__returns_valid_workspaces() -> None: - assert set(workspace.list_all(str(WORKSPACES))) == { + assert set(workspace.list_all(WORKSPACES)) == { str(WORKSPACES / "a_workspace"), str(WORKSPACES / "another workspace"), str(WORKSPACES / "nested" / "workspace"), diff --git a/tests/ocrdmonitor/server/test_workspace_endpoint.py b/tests/ocrdmonitor/server/test_workspace_endpoint.py index 1aa355e..08d2004 100644 --- a/tests/ocrdmonitor/server/test_workspace_endpoint.py +++ b/tests/ocrdmonitor/server/test_workspace_endpoint.py @@ -9,11 +9,9 @@ from httpx import Response from tests.ocrdmonitor.server import scraping -from tests.ocrdmonitor.server.decorators import use_custom_repository from tests.ocrdmonitor.server.fixtures.environment import ( DevEnvironment, Fixture, - RepositoryInitializer, ) from tests.ocrdmonitor.server.fixtures.settings import WORKSPACE_DIR from tests.testdoubles import ( @@ -60,16 +58,14 @@ def test__workspaces__shows_the_workspace_names_starting_from_workspace_root( assert set(texts) == {"a_workspace", "another workspace", "nested/workspace"} -@use_custom_repository +@pytest.mark.asyncio async def test__browse_workspace__passes_full_workspace_path_to_ocrdbrowser( - repository: RepositoryInitializer, + repository_fixture: Fixture, ) -> None: workspace = "a_workspace" full_workspace = str(WORKSPACE_DIR / workspace) browser = BrowserSpy() - fixture = ( - Fixture().with_repository_type(repository).with_browser_type(lambda: browser) - ) + fixture = repository_fixture.with_browser_type(lambda: browser) async with fixture as env: response = open_workspace(env.app, workspace)