Skip to content

Commit

Permalink
feat: Add file size
Browse files Browse the repository at this point in the history
  • Loading branch information
kaloster committed Jan 17, 2025
1 parent 4fd1241 commit c0f1a23
Show file tree
Hide file tree
Showing 48 changed files with 4,947 additions and 2,362 deletions.
8 changes: 4 additions & 4 deletions .infra/rdev/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@ stack:
services:
apiv2:
image:
tag: sha-8bba5b0
tag: sha-6d40480
initContainers:
# Install cerbos policies where the cerbos sidecar can grab them.
- name: install-cerbos-policies
image:
repository: 533267185808.dkr.ecr.us-west-2.amazonaws.com/core-platform/cryoet-data-portal-backend/apiv2/apiv2
tag: sha-8bba5b0
tag: sha-6d40480
command: ["cp", "-r", "./cerbos/", "/var/policies/"]
volumeMounts:
- mountPath: /var/policies
Expand All @@ -24,7 +24,7 @@ stack:
- name: run-migrations
image:
repository: 533267185808.dkr.ecr.us-west-2.amazonaws.com/core-platform/cryoet-data-portal-backend/apiv2/apiv2
tag: sha-8bba5b0
tag: sha-6d40480
command: ["alembic", "upgrade", "head"]
resources:
limits:
Expand All @@ -37,7 +37,7 @@ stack:
- name: gen-keypair
image:
repository: 533267185808.dkr.ecr.us-west-2.amazonaws.com/core-platform/cryoet-data-portal-backend/apiv2/apiv2
tag: sha-8bba5b0
tag: sha-6d40480
command: ["bash", "./etc/gen_keys.sh", "/var/keys/"]
volumeMounts:
- mountPath: /var/keys
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion apiv2/database/models/annotation_file.py

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion apiv2/database/models/dataset.py

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions apiv2/database/models/frame.py

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions apiv2/database/models/tiltseries.py

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions apiv2/database/models/tomogram.py

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions apiv2/db_import/importers/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ def load_computed_fields(self):
self.model_args["source"] = self.calculate_source()
self.model_args["s3_path"] = self.get_s3_url(self.input_data["path"])
self.model_args["https_path"] = self.get_https_url(self.input_data["path"])
self.model_args["file_size"] = self.get_file_size(self.input_data["path"])


class AnnotationImporter(IntegratedDBImporter):
Expand Down
20 changes: 20 additions & 0 deletions apiv2/db_import/importers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,26 @@ def get_s3_url(self, *input_path: tuple[str]) -> str:
input_path = input_path[len(self.config.bucket_name) + 1 :]
return os.path.join(self.config.s3_prefix, input_path)

def get_file_size(self, *input_path: tuple[str]) -> str:
input_path = os.path.join(*input_path)
if input_path.startswith(self.config.bucket_name):
input_path = input_path[len(self.config.bucket_name) + 1 :]

total_size = 0
try:
paginator = self.config.s3_client.get_paginator('list_objects_v2')
pages = paginator.paginate(Bucket=self.config.bucket_name, Prefix=input_path)
for page in pages:
if "Contents" in page:
for obj in page["Contents"]:
total_size += obj["Size"]


return total_size
except Exception as e:
print(f"Error retrieving folder size: {e}")
return None

def _map_direct_fields(self):
"""Iterate over `self.direct_mapped_fields` and populate model args based on the data we find in the input dict."""
for db_key, _ in self.direct_mapped_fields.items():
Expand Down
19 changes: 19 additions & 0 deletions apiv2/db_import/importers/base_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,25 @@ def get_s3_url(self, *input_path: tuple[str]) -> str:
input_path = input_path[len(self.config.bucket_name) + 1 :]
return os.path.join(self.config.s3_prefix, input_path)

def get_file_size(self, *input_path: tuple[str]) -> str:
input_path = os.path.join(*input_path)
if input_path.startswith(self.config.bucket_name):
input_path = input_path[len(self.config.bucket_name) + 1 :]

total_size = 0
try:
paginator = self.config.s3_client.get_paginator('list_objects_v2')
pages = paginator.paginate(Bucket=self.config.bucket_name, Prefix=input_path)
for page in pages:
if "Contents" in page:
for obj in page["Contents"]:
total_size += obj["Size"]


return total_size
except Exception as e:
print(f"Error retrieving folder size: {e}")
return None

class StaleDeletionDBImporter(BaseDBImporter):
"""
Expand Down
1 change: 1 addition & 0 deletions apiv2/db_import/importers/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ def get_computed_fields(self) -> dict[str, Any]:
extra_data = {
"s3_prefix": self.get_s3_url(self.dir_prefix),
"https_prefix": self.get_https_url(self.dir_prefix),
"file_size": self.get_file_size(self.dir_prefix),
"key_photo_url": None,
"key_photo_thumbnail_url": None,
}
Expand Down
1 change: 1 addition & 0 deletions apiv2/db_import/importers/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ class FrameItem(ItemDBImporter):
def load_computed_fields(self):
self.model_args["s3_frame_path"] = self.get_s3_url(self.input_data["file"])
self.model_args["https_frame_path"] = self.get_https_url(self.input_data["file"])
self.model_args["file_size"] = self.get_file_size(self.input_data["file"])
self.model_args["run_id"] = self.input_data["run"].id
self.model_args["deposition_id"] = self.input_data["deposition"].id

Expand Down
2 changes: 2 additions & 0 deletions apiv2/db_import/importers/tiltseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,12 @@ def get_computed_fields(self) -> dict[str, Any]:
if mrc_path := self.metadata.get("mrc_file"):
extra_data["s3_mrc_file"] = self.get_s3_url(mrc_path)
extra_data["https_mrc_file"] = self.get_https_url(mrc_path)
extra_data["file_size_mrc"] = self.get_file_size(mrc_path)

if omezarr_path := self.metadata.get("omezarr_dir"):
extra_data["s3_omezarr_dir"] = self.get_s3_url(omezarr_path)
extra_data["https_omezarr_dir"] = self.get_https_url(omezarr_path)
extra_data["file_size_omezarr"] = self.get_file_size(omezarr_path)

if angle_list := self.get_first_match_file_name("*.rawtlt") or self.get_first_match_file_name("*.tlt"):
extra_data["s3_angle_list"] = self.get_s3_url(angle_list)
Expand Down
2 changes: 2 additions & 0 deletions apiv2/db_import/importers/tomogram.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,10 @@ def load_computed_fields(self):
"reconstruction_software": self.input_data.get("reconstruction_software") or "Unknown",
"s3_omezarr_dir": self.get_s3_url(self.input_data["omezarr_dir"]),
"https_omezarr_dir": self.get_https_url(self.input_data["omezarr_dir"]),
"file_size_omezarr": self.get_file_size(self.input_data["omezarr_dir"]),
"s3_mrc_file": self.get_s3_url(self.input_data["mrc_file"]),
"https_mrc_file": self.get_https_url(self.input_data["mrc_file"]),
"file_size_mrc": self.get_file_size(self.input_data["mrc_file"]),
# TODO: Add alignment_id once we have an alignment importer.
"alignment_id": self.config.get_alignment_by_path(
self.get_s3_url(self.input_data["alignment_metadata_path"]),
Expand Down
1 change: 1 addition & 0 deletions apiv2/db_import/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,4 +127,5 @@ def expected_dataset(http_prefix: str) -> dict[str, Any]:
"key_photo_url": f"{http_prefix}/{DATASET_ID}/KeyPhoto/snapshot.png",
"key_photo_thumbnail_url": f"{http_prefix}/{DATASET_ID}/KeyPhoto/thumbnail.png",
"deposition_id": 300,
"file_size": 1373074.0,
}
3 changes: 3 additions & 0 deletions apiv2/db_import/tests/test_db_annotation_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ def expected_annotation_files(http_prefix: str) -> list[dict[str, Any]]:
"source": "community",
"format": "ndjson",
"is_visualization_default": True,
"file_size": 0,
},
{
"tomogram_voxel_spacing_id": TOMOGRAM_VOXEL_ID1,
Expand All @@ -69,6 +70,7 @@ def expected_annotation_files(http_prefix: str) -> list[dict[str, Any]]:
"source": "community",
"format": "mrc",
"is_visualization_default": False,
"file_size": 0,
},
{
"tomogram_voxel_spacing_id": TOMOGRAM_VOXEL_ID1,
Expand All @@ -77,6 +79,7 @@ def expected_annotation_files(http_prefix: str) -> list[dict[str, Any]]:
"source": "community",
"format": "zarr",
"is_visualization_default": False,
"file_size": 0,
},
]

Expand Down
4 changes: 4 additions & 0 deletions apiv2/db_import/tests/test_db_tiltseries_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ def expected_tiltseries(http_prefix: str) -> list[dict[str, Any]]:
"size_z": 31,
"size_y": 3838,
"size_x": 3708,
"file_size_omezarr": 0,
"file_size_mrc": 0,
},
{
"acceleration_voltage": 10000,
Expand Down Expand Up @@ -82,6 +84,8 @@ def expected_tiltseries(http_prefix: str) -> list[dict[str, Any]]:
"tilt_step": 3,
"tilting_scheme": "min to max tilt",
"total_flux": 12,
"file_size_omezarr": 0,
"file_size_mrc": 0,
},
]

Expand Down
4 changes: 4 additions & 0 deletions apiv2/db_import/tests/test_db_tomo_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,10 @@ def expected_tomograms_by_run(http_prefix: str) -> dict[str, dict[float, list[di
"tomogram_version": 1.0,
"s3_omezarr_dir": f"s3://test-public-bucket/{run1_vs_path}Tomograms/100/RUN1.zarr",
"https_omezarr_dir": f"{http_prefix}/{run1_vs_path}Tomograms/100/RUN1.zarr",
"file_size_omezarr": 0,
"s3_mrc_file": f"s3://test-public-bucket/{run1_vs_path}Tomograms/100/RUN1.mrc",
"https_mrc_file": f"{http_prefix}/{run1_vs_path}Tomograms/100/RUN1.mrc",
"file_size_mrc": 0,
"scale0_dimensions": "980,1016,500",
"scale1_dimensions": "490,508,250",
"scale2_dimensions": "245,254,125",
Expand Down Expand Up @@ -106,8 +108,10 @@ def expected_tomograms_by_run(http_prefix: str) -> dict[str, dict[float, list[di
"tomogram_version": 1.0,
"s3_omezarr_dir": f"s3://test-public-bucket/{run2_vs_path}Tomograms/100/RUN2.zarr",
"https_omezarr_dir": f"{http_prefix}/{run2_vs_path}Tomograms/100/RUN2.zarr",
"file_size_omezarr": 0,
"s3_mrc_file": f"s3://test-public-bucket/{run2_vs_path}Tomograms/100/RUN2.mrc",
"https_mrc_file": f"{http_prefix}/{run2_vs_path}Tomograms/100/RUN2.mrc",
"file_size_mrc": 0,
"scale0_dimensions": "800,800,400",
"scale1_dimensions": "400,400,200",
"scale2_dimensions": "200,200,100",
Expand Down
2 changes: 2 additions & 0 deletions apiv2/db_import/tests/test_frame_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,12 @@ def expected_frames(http_prefix: str) -> list[dict[str, Any]]:
{
"s3_frame_path": "s3://test-public-bucket/30001/RUN1/Frames/frame1",
"https_frame_path": "https://foo.com/30001/RUN1/Frames/frame1",
"file_size": 0,
},
{
"s3_frame_path": "s3://test-public-bucket/30001/RUN1/Frames/frame2",
"https_frame_path": "https://foo.com/30001/RUN1/Frames/frame2",
"file_size": 0,
},
]

Expand Down
1 change: 1 addition & 0 deletions apiv2/graphql_api/helpers/annotation_file.py

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions apiv2/graphql_api/helpers/dataset.py

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions apiv2/graphql_api/helpers/frame.py

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions apiv2/graphql_api/helpers/tiltseries.py

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions apiv2/graphql_api/helpers/tomogram.py

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit c0f1a23

Please sign in to comment.