Skip to content

Commit

Permalink
task/DES-2764: add util to populate filemeta table from tapisv2 based…
Browse files Browse the repository at this point in the history
… file metadata (#1259)

* Normalize path to be consistent

* Add command to migrate v2 file metadata

* Bump limit

* Change log type

---------

Co-authored-by: Jake Rosenberg <[email protected]>
  • Loading branch information
nathanfranklin and jarosenb authored May 24, 2024
1 parent 3ea9680 commit 72e2a8d
Show file tree
Hide file tree
Showing 5 changed files with 158 additions and 1 deletion.
Empty file.
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
"""Populate filemeta table from tapisv2-based file metadata
This module contains a Django management command which populates filemeta table
"""

# pylint: disable=logging-fstring-interpolation
# pylint: disable=no-member

import logging
import json

from django.core.management.base import BaseCommand

from designsafe.apps.api.filemeta.models import FileMetaModel

try:
from designsafe.apps.api.agave import get_service_account_client_v2
except ImportError:
# TODOV3 drop this
from designsafe.apps.api.agave import (
get_service_account_client as get_service_account_client_v2,
)


logger = logging.getLogger(__name__)


def get_all_v2_file_meta():
"""
Return all metadata objects for files
"""
service_account_v2 = get_service_account_client_v2()

# query is checking if we have a system/path in the value
# and name '"designsafe.file"
query = {
"name": "designsafe.file",
"value.system": {"$exists": True},
"value.path": {"$exists": True},
}

all_results = []
offset = 0

while True:
limit = 300
result = service_account_v2.meta.listMetadata(
q=json.dumps(query), limit=limit, offset=offset
)
all_results = all_results + result
offset += limit
if len(result) != limit:
break

return all_results


def populate_filemeta_table(dry_run, do_not_update_existing):
"""
Update the filemeta table from Tapisv2-based metadata.
"""
logger.info(
f"Updating filemeta table from tapisv2-based metadata."
f" dry_run={dry_run} do_not_update_existing={do_not_update_existing}"
)

v2_file_meta_data = get_all_v2_file_meta()
logger.info(f"Processing {len(v2_file_meta_data)} tapisv2-based metadata entries")

updated = 0
already_exists = 0
for meta_data in v2_file_meta_data:
if do_not_update_existing:
exists = True
try:
FileMetaModel.get_by_path_and_system(
meta_data["value"]["system"], meta_data["value"]["path"]
)
except FileMetaModel.DoesNotExist:
exists = False
if exists:
already_exists += 1
continue

if not dry_run:
FileMetaModel.create_or_update_file_meta(meta_data["value"])
updated += 1

logger.info(
"Successfully updated filemeta table from tapisv2-based metadata"
f"\n {len(v2_file_meta_data)} tapisv2-based metadata entries."
f"\n {already_exists} entries already existed in filemeta table."
f"\n {updated} entries were updated/created in filemeta table"
)


class Command(BaseCommand):
"""Command for migrating projects from Tapis v2 to v3"""

help = "Populate filemeta table from tapisv2-based file metadata."

def add_arguments(self, parser):
parser.add_argument(
"--dry-run",
action="store_true",
help="Executes the command in a simulation mode, logging actions "
"without applying any changes to filemeta table.",
)

parser.add_argument(
"--do-not-update-existing",
action="store_true",
help="Allows the command to not update any rows that already exist in the filemeta table",
)

def handle(self, *args, **options):
dry_run = options["dry_run"]
do_not_update_existing = options["do_not_update_existing"]

populate_filemeta_table(dry_run, do_not_update_existing)
14 changes: 13 additions & 1 deletion designsafe/apps/api/filemeta/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,16 @@
from django.utils import timezone


def _get_normalized_path(path) -> str:
""" Return a file path that begins with /"
For example, "file.jpg" becomes "/file.jpg"
"""
if not path.startswith('/'):
path = '/' + path
return path


class FileMetaModel(models.Model):
"""Model for File Meta"""

Expand Down Expand Up @@ -41,7 +51,8 @@ def create_or_update_file_meta(cls, value):
- tuple (instance, created): The FileMetaModel instance and a boolean indicating if it was created (True) or updated (False).
"""
system = value.get("system")
path = value.get("path")
path = _get_normalized_path(value.get("path"))
value["path"] = path

# Use a transaction to ensure atomicity
with transaction.atomic():
Expand All @@ -64,4 +75,5 @@ def get_by_path_and_system(cls, system, path):
Raises:
- DoesNotExist: if file metadata entry not found
"""
path = _get_normalized_path(path)
return cls.objects.get(value__system=system, value__path=path)
25 changes: 25 additions & 0 deletions designsafe/apps/api/filemeta/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,3 +159,28 @@ def test_create_file_metadata_missing_system_or_path(
content_type="application/json",
)
assert response.status_code == 400


@pytest.mark.django_db
def test_create_using_path_without_starting_slashes_issue_DES_2767 (
filemeta_value_mock,
):
# testing that "file.txt" and "/file.txt" are referring to the same
# file and that "file.txt" is normalized to "/file.txt"
filemeta_value_mock["path"] = "file.txt"

file_meta, created = FileMetaModel.create_or_update_file_meta(filemeta_value_mock)
assert created
assert file_meta.value["path"] == "/file.txt"


@pytest.mark.django_db
def test_get_using_path_with_or_without_starting_slashes_issue_DES_2767(
filemeta_value_mock,
):
filemeta_value_mock["path"] = "file.txt"
FileMetaModel.create_or_update_file_meta(filemeta_value_mock)

system = filemeta_value_mock["system"]
FileMetaModel.get_by_path_and_system(system, "file.txt")
FileMetaModel.get_by_path_and_system(system, "/file.txt")

0 comments on commit 72e2a8d

Please sign in to comment.