Skip to content

Commit

Permalink
Merge pull request #34 from SvenMarcus/feature/file-cache
Browse files Browse the repository at this point in the history
Cache workspaces based on last write time
  • Loading branch information
markusweigelt authored Aug 25, 2023
2 parents 0b94b2b + 7cfa191 commit f7d1293
Show file tree
Hide file tree
Showing 5 changed files with 93 additions and 12 deletions.
30 changes: 30 additions & 0 deletions ocrdbrowser/_cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from datetime import datetime
import functools
from pathlib import Path
from typing import Callable, TypeVar

T = TypeVar("T")


def path_cache(fn: Callable[[Path], T]) -> Callable[[Path], T]:
pathcache: dict[Path, tuple[T, float]] = {}

def cache(path: Path) -> T:
new_value = fn(path)
pathcache[path] = new_value, path.stat().st_mtime
return new_value

@functools.wraps(fn)
def wrapper(path: Path) -> T:
if path not in pathcache:
return cache(path)

value, saved_timestamp = pathcache[path]
last_modified = path.stat().st_mtime

if last_modified > saved_timestamp:
return cache(path)

return value

return wrapper
9 changes: 5 additions & 4 deletions ocrdbrowser/_workspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,19 @@

from pathlib import Path
from typing import List
from functools import lru_cache

from ._cache import path_cache


def is_valid(workspace: str) -> bool:
return (Path(workspace) / "mets.xml").exists()


@lru_cache(maxsize=1)
def list_all(path: str) -> List[str]:
@path_cache
def list_all(path: Path) -> List[str]:
# recursively enumerate METS file paths (excluding .backup subdirs)
return [
str(workspace.parent)
for workspace in Path(path).rglob("mets.xml")
for workspace in path.rglob("mets.xml")
if not workspace.match(".backup/*/mets.xml")
]
54 changes: 54 additions & 0 deletions tests/ocrdbrowser/test_path_cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
from pathlib import Path
from typing import Iterator

import pytest

from ocrdbrowser._cache import path_cache

TMPDIR = Path(__file__).parent / "tmpdir"
TMPFILE = TMPDIR / "tmp.txt"


@pytest.fixture(autouse=True)
def cleandir() -> Iterator[None]:
TMPDIR.mkdir()

yield

TMPFILE.unlink(missing_ok=True)
TMPDIR.rmdir()


def test__path_cache_decorator__returns_the_same_result_without_calling_func_again() -> (
None
):
call_count = 0

@path_cache
def fn(path: str | Path) -> int:
nonlocal call_count
call_count += 1
return call_count

first = fn(TMPDIR)
second = fn(TMPDIR)

assert call_count == 1
assert first == second


def test__when_cached_path_changes__calls_func_again() -> None:
call_count = 0

@path_cache
def fn(path: str | Path) -> int:
nonlocal call_count
call_count += 1
return call_count

fn(TMPDIR)
TMPFILE.touch()

fn(TMPDIR)

assert call_count == 2
2 changes: 1 addition & 1 deletion tests/ocrdbrowser/test_workspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def test__a_workspace_without_mets_xml_is_invalid() -> None:


def test__list_workspaces__returns_valid_workspaces() -> None:
assert set(workspace.list_all(str(WORKSPACES))) == {
assert set(workspace.list_all(WORKSPACES)) == {
str(WORKSPACES / "a_workspace"),
str(WORKSPACES / "another workspace"),
str(WORKSPACES / "nested" / "workspace"),
Expand Down
10 changes: 3 additions & 7 deletions tests/ocrdmonitor/server/test_workspace_endpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,9 @@
from httpx import Response

from tests.ocrdmonitor.server import scraping
from tests.ocrdmonitor.server.decorators import use_custom_repository
from tests.ocrdmonitor.server.fixtures.environment import (
DevEnvironment,
Fixture,
RepositoryInitializer,
)
from tests.ocrdmonitor.server.fixtures.settings import WORKSPACE_DIR
from tests.testdoubles import (
Expand Down Expand Up @@ -60,16 +58,14 @@ def test__workspaces__shows_the_workspace_names_starting_from_workspace_root(
assert set(texts) == {"a_workspace", "another workspace", "nested/workspace"}


@use_custom_repository
@pytest.mark.asyncio
async def test__browse_workspace__passes_full_workspace_path_to_ocrdbrowser(
repository: RepositoryInitializer,
repository_fixture: Fixture,
) -> None:
workspace = "a_workspace"
full_workspace = str(WORKSPACE_DIR / workspace)
browser = BrowserSpy()
fixture = (
Fixture().with_repository_type(repository).with_browser_type(lambda: browser)
)
fixture = repository_fixture.with_browser_type(lambda: browser)

async with fixture as env:
response = open_workspace(env.app, workspace)
Expand Down

0 comments on commit f7d1293

Please sign in to comment.