Skip to content

Commit

Permalink
Use a global cache not thread-local one
Browse files Browse the repository at this point in the history
  • Loading branch information
JBorrow committed Feb 19, 2025
1 parent 5c3710c commit 5bbdee1
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 3 deletions.
41 changes: 39 additions & 2 deletions librarian_server/api/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"""

import asyncio
import datetime
from functools import lru_cache
from pathlib import Path
from time import perf_counter
Expand Down Expand Up @@ -38,8 +39,44 @@

router = APIRouter(prefix="/api/v2/validate")

VALIDATION_TIMEOUT = datetime.timedelta(hours=8)
VALIDATION_CACHE = {}


async def cached_calculate_checksum_of_local_copy(
original_checksum: str,
original_size: int,
path_info_function: callable,
path: Path,
store_id: int,
instance_id: int,
):
key = f"{original_checksum}-{instance_id}"

cached = VALIDATION_CACHE.get(key, None)

if cached is None or (
(datetime.datetime.now(datetime.timezone.utc) - cached[1]) > VALIDATION_TIMEOUT
):
result = await asyncify(calculate_checksum_of_local_copy)(
original_checksum=original_checksum,
original_size=original_size,
path_info_function=path_info_function,
path=path,
store_id=store_id,
instance_id=instance_id,
)

VALIDATION_CACHE[key] = (result, datetime.datetime.now(datetime.timezone.utc))
else:
log.info(
f"Using cached result for instance {instance_id}", instance_id=instance_id
)
result = cached[0]

return result


@lru_cache(maxsize=1024)
def calculate_checksum_of_local_copy(
original_checksum: str,
original_size: int,
Expand Down Expand Up @@ -187,7 +224,7 @@ async def validate_file(
if not instance.available:
continue

this_checksum_info = asyncify(calculate_checksum_of_local_copy)(
this_checksum_info = cached_calculate_checksum_of_local_copy(
original_checksum=file.checksum,
original_size=file.size,
path_info_function=instance.store.store_manager.path_info,
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ exclude=["*tests*"]
[project]
name="hera_librarian"
requires-python = ">=3.10"
version = "3.1.0"
version = "3.1.1"
dependencies = [
"alembic",
"argon2-cffi",
Expand Down
4 changes: 4 additions & 0 deletions tests/integration_test/test_send_queue.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,10 @@ def test_send_from_existing_file_row(
# Should have _ours_ and _theirs_.
assert len(instance_validations) == 2

# Check again (should use the cache)!
instance_validations = mocked_admin_client.validate_file(file_name=file_name)
assert len(instance_validations) == 2

source_librarians_for_validations = {x.librarian for x in instance_validations}

assert len(source_librarians_for_validations) == 2 # I.e. they are different
Expand Down

0 comments on commit 5bbdee1

Please sign in to comment.