Skip to content

Commit

Permalink
Improve runtime of dir listing and add memoizing instead of files
Browse files Browse the repository at this point in the history
Messured results:
$ python test_before.py
n_projects=412 n_versions=4038 storage='43 GB'
first run:  12.587313868105412
second run:  4.187226295471191e-06

$ python test_after.py
n_projects=412 n_versions=4038 storage='43 GB'
first run:  3.5831107571721077
second run:  5.334615707397461e-06
cash clear run:  3.1085935048758984

tests show small differences in size but this does
not matter in the grand schema of things
  • Loading branch information
fliiiix committed Oct 22, 2024
1 parent f0ce480 commit 6163699
Show file tree
Hide file tree
Showing 5 changed files with 56 additions and 33 deletions.
19 changes: 10 additions & 9 deletions docat/docat/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@
UPLOAD_FOLDER,
calculate_token,
create_symlink,
directory_size,
extract_archive,
get_all_projects,
get_dir_size,
get_project_details,
get_system_stats,
is_forbidden_project_name,
Expand Down Expand Up @@ -140,9 +140,9 @@ def upload_icon(
with icon_path.open("wb") as buffer:
shutil.copyfileobj(file.file, buffer)

# recalculate size cache
directory_size(project_base_path, recalculate=True)
directory_size(DOCAT_UPLOAD_FOLDER, recalculate=True)
# force cache revalidation
get_system_stats.cache_clear()
get_dir_size.cache_clear()

return ApiResponse(message="Icon successfully uploaded")

Expand Down Expand Up @@ -271,9 +271,9 @@ def upload(
if not (base_path / "index.html").exists():
return ApiResponse(message="Documentation uploaded successfully, but no index.html found at root of archive.")

# recalculate size cache
directory_size(project_base_path, recalculate=True)
directory_size(DOCAT_UPLOAD_FOLDER, recalculate=True)
# force cache revalidation
get_system_stats.cache_clear()
get_dir_size.cache_clear()

return ApiResponse(message="Documentation uploaded successfully")

Expand Down Expand Up @@ -381,8 +381,9 @@ def delete(
response.status_code = status.HTTP_404_NOT_FOUND
return ApiResponse(message=message)

# recalculate size cache
directory_size(DOCAT_UPLOAD_FOLDER, recalculate=True)
# force cache revalidation
get_system_stats.cache_clear()
get_dir_size.cache_clear()

return ApiResponse(message=f"Successfully deleted version '{version}'")

Expand Down
56 changes: 39 additions & 17 deletions docat/docat/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import os
import shutil
from datetime import datetime
from functools import cache
from pathlib import Path
from zipfile import ZipFile, ZipInfo

Expand Down Expand Up @@ -161,29 +162,50 @@ def readable_size(bytes: int) -> str:
return str(amount) + size_suffix


def directory_size(path: Path, recalculate: bool = False) -> int:
@cache
def get_dir_size(path: Path) -> int:
"""
Returns the size of a directory and caches it's size unless
recalculate is set to true or the cache is missed
"""
size_info = path / ".size"
if size_info.exists() and not recalculate:
return int(size_info.read_text())

dir_size = sum(file.stat().st_size for file in path.rglob("*") if file.is_file())
size_info.write_text(str(dir_size)) # cache directory size
Calculate the total size of a directory.
return dir_size
Results are cached (memoizing) by path.
"""
total = 0
with os.scandir(path) as it:
for entry in it:
if entry.is_file():
total += entry.stat().st_size
elif entry.is_dir():
total += get_dir_size(entry.path)
return total


@cache
def get_system_stats(upload_folder_path: Path) -> Stats:
"""
Return all docat statistics
Return all docat statistics.
Results are cached (memoizing) by path.
"""

dirs = 0
versions = 0
size = 0
# Note: Not great nesting with the deep nesting
# but it needs to run fast, consider speed when refactoring!
with os.scandir(upload_folder_path) as root:
for f in root:
if f.is_dir():
dirs += 1
with os.scandir(f.path) as project:
for v in project:
if v.is_dir() and not v.is_symlink():
size += get_dir_size(v.path)
versions += 1

return Stats(
n_projects=len([p for p in upload_folder_path.iterdir() if p.is_dir()]),
n_versions=sum(len([p for p in d.iterdir() if p.is_dir() and not p.is_symlink()]) for d in upload_folder_path.glob("*/")),
storage=readable_size(directory_size(upload_folder_path)),
n_projects=dirs,
n_versions=versions,
storage=readable_size(size),
)


Expand Down Expand Up @@ -212,7 +234,7 @@ def get_all_projects(upload_folder_path: Path, include_hidden: bool) -> Projects
name=project_name,
logo=project_has_logo,
versions=details.versions,
storage=readable_size(directory_size(upload_folder_path / project)),
storage=readable_size(get_dir_size(upload_folder_path / project)),
)
)

Expand Down Expand Up @@ -245,7 +267,7 @@ def should_include(name: str) -> bool:

return ProjectDetail(
name=project_name,
storage=readable_size(directory_size(docs_folder)),
storage=readable_size(get_dir_size(docs_folder)),
versions=sorted(
[
ProjectVersion(
Expand Down
4 changes: 2 additions & 2 deletions docat/tests/test_hide_show.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,7 @@ def test_hide_and_show_with_tag(_, client_with_claimed_project):
assert project_details_response.status_code == 200
assert project_details_response.json() == {
"name": "some-project",
"storage": "20 bytes",
"storage": "40 bytes",
"versions": [],
}

Expand All @@ -408,6 +408,6 @@ def test_hide_and_show_with_tag(_, client_with_claimed_project):
assert project_details_response.status_code == 200
assert project_details_response.json() == {
"name": "some-project",
"storage": "20 bytes",
"storage": "40 bytes",
"versions": [{"name": "1.0.0", "timestamp": "2000-01-01T01:01:00", "tags": ["latest"], "hidden": False}],
}
8 changes: 4 additions & 4 deletions docat/tests/test_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@
@pytest.mark.parametrize(
("project_config", "n_projects", "n_versions", "storage"),
[
([("some-project", ["1.0.0"])], 1, 1, "22 bytes"),
([("some-project", ["1.0.0", "2.0.0"])], 1, 2, "44 bytes"),
([("some-project", ["1.0.0", "2.0.0"])], 1, 2, "44 bytes"),
([("some-project", ["1.0.0", "2.0.0"]), ("another-project", ["1"])], 2, 3, "66 bytes"),
([("some-project", ["1.0.0"])], 1, 1, "20 bytes"),
([("some-project", ["1.0.0", "2.0.0"])], 1, 2, "40 bytes"),
([("some-project", ["1.0.0", "2.0.0"])], 1, 2, "40 bytes"),
([("some-project", ["1.0.0", "2.0.0"]), ("another-project", ["1"])], 2, 3, "60 bytes"),
],
)
def test_get_stats(_, project_config, n_projects, n_versions, storage, client_with_claimed_project):
Expand Down
2 changes: 1 addition & 1 deletion docat/tests/test_upload_icon.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ def test_get_project_recongizes_icon(_, client_with_claimed_project):
{
"name": "some-project",
"logo": True,
"storage": "105 bytes",
"storage": "103 bytes",
"versions": [{"name": "1.0.0", "timestamp": "2000-01-01T01:01:00", "tags": [], "hidden": False}],
}
]
Expand Down

0 comments on commit 6163699

Please sign in to comment.