diff --git a/designsafe/apps/api/filemeta/management/__init__.py b/designsafe/apps/api/filemeta/management/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/designsafe/apps/api/filemeta/management/commands/__init__.py b/designsafe/apps/api/filemeta/management/commands/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/designsafe/apps/api/filemeta/management/commands/populate_filemeta_table_from_tavpisv2_metadata.py b/designsafe/apps/api/filemeta/management/commands/populate_filemeta_table_from_tavpisv2_metadata.py new file mode 100644 index 0000000000..4907a36f58 --- /dev/null +++ b/designsafe/apps/api/filemeta/management/commands/populate_filemeta_table_from_tavpisv2_metadata.py @@ -0,0 +1,120 @@ +"""Populate filemeta table from tapisv2-based file metadata + +This module contains a Django management command which populates filemeta table +""" + +# pylint: disable=logging-fstring-interpolation +# pylint: disable=no-member + +import logging +import json + +from django.core.management.base import BaseCommand + +from designsafe.apps.api.filemeta.models import FileMetaModel + +try: + from designsafe.apps.api.agave import get_service_account_client_v2 +except ImportError: + # TODOV3 drop this + from designsafe.apps.api.agave import ( + get_service_account_client as get_service_account_client_v2, + ) + + +logger = logging.getLogger(__name__) + + +def get_all_v2_file_meta(): + """ + Return all metadata objects for files + """ + service_account_v2 = get_service_account_client_v2() + + # query is checking if we have a system/path in the value + # and name '"designsafe.file" + query = { + "name": "designsafe.file", + "value.system": {"$exists": True}, + "value.path": {"$exists": True}, + } + + all_results = [] + offset = 0 + + while True: + limit = 300 + result = service_account_v2.meta.listMetadata( + q=json.dumps(query), limit=limit, offset=offset + ) + all_results = all_results + result + offset += limit + if len(result) != limit: + break + + return all_results + + +def populate_filemeta_table(dry_run, do_not_update_existing): + """ + Update the filemeta table from Tapisv2-based metadata. + """ + logger.info( + f"Updating filemeta table from tapisv2-based metadata." + f" dry_run={dry_run} do_not_update_existing={do_not_update_existing}" + ) + + v2_file_meta_data = get_all_v2_file_meta() + logger.info(f"Processing {len(v2_file_meta_data)} tapisv2-based metadata entries") + + updated = 0 + already_exists = 0 + for meta_data in v2_file_meta_data: + if do_not_update_existing: + exists = True + try: + FileMetaModel.get_by_path_and_system( + meta_data["value"]["system"], meta_data["value"]["path"] + ) + except FileMetaModel.DoesNotExist: + exists = False + if exists: + already_exists += 1 + continue + + if not dry_run: + FileMetaModel.create_or_update_file_meta(meta_data["value"]) + updated += 1 + + logger.info( + "Successfully updated filemeta table from tapisv2-based metadata" + f"\n {len(v2_file_meta_data)} tapisv2-based metadata entries." + f"\n {already_exists} entries already existed in filemeta table." + f"\n {updated} entries were updated/created in filemeta table" + ) + + +class Command(BaseCommand): + """Command for migrating projects from Tapis v2 to v3""" + + help = "Populate filemeta table from tapisv2-based file metadata." + + def add_arguments(self, parser): + parser.add_argument( + "--dry-run", + action="store_true", + help="Executes the command in a simulation mode, logging actions " + "without applying any changes to filemeta table.", + ) + + parser.add_argument( + "--do-not-update-existing", + action="store_true", + help="Allows the command to not update any rows that already exist in the filemeta table", + ) + + def handle(self, *args, **options): + dry_run = options["dry_run"] + do_not_update_existing = options["do_not_update_existing"] + + populate_filemeta_table(dry_run, do_not_update_existing) diff --git a/designsafe/apps/api/filemeta/models.py b/designsafe/apps/api/filemeta/models.py index e9a9e41e0e..1f8992bea4 100644 --- a/designsafe/apps/api/filemeta/models.py +++ b/designsafe/apps/api/filemeta/models.py @@ -5,6 +5,16 @@ from django.utils import timezone +def _get_normalized_path(path) -> str: + """ Return a file path that begins with /" + + For example, "file.jpg" becomes "/file.jpg" + """ + if not path.startswith('/'): + path = '/' + path + return path + + class FileMetaModel(models.Model): """Model for File Meta""" @@ -41,7 +51,8 @@ def create_or_update_file_meta(cls, value): - tuple (instance, created): The FileMetaModel instance and a boolean indicating if it was created (True) or updated (False). """ system = value.get("system") - path = value.get("path") + path = _get_normalized_path(value.get("path")) + value["path"] = path # Use a transaction to ensure atomicity with transaction.atomic(): @@ -64,4 +75,5 @@ def get_by_path_and_system(cls, system, path): Raises: - DoesNotExist: if file metadata entry not found """ + path = _get_normalized_path(path) return cls.objects.get(value__system=system, value__path=path) diff --git a/designsafe/apps/api/filemeta/tests.py b/designsafe/apps/api/filemeta/tests.py index b80fedafa8..3ab665b7bc 100644 --- a/designsafe/apps/api/filemeta/tests.py +++ b/designsafe/apps/api/filemeta/tests.py @@ -159,3 +159,28 @@ def test_create_file_metadata_missing_system_or_path( content_type="application/json", ) assert response.status_code == 400 + + +@pytest.mark.django_db +def test_create_using_path_without_starting_slashes_issue_DES_2767 ( + filemeta_value_mock, +): + # testing that "file.txt" and "/file.txt" are referring to the same + # file and that "file.txt" is normalized to "/file.txt" + filemeta_value_mock["path"] = "file.txt" + + file_meta, created = FileMetaModel.create_or_update_file_meta(filemeta_value_mock) + assert created + assert file_meta.value["path"] == "/file.txt" + + +@pytest.mark.django_db +def test_get_using_path_with_or_without_starting_slashes_issue_DES_2767( + filemeta_value_mock, +): + filemeta_value_mock["path"] = "file.txt" + FileMetaModel.create_or_update_file_meta(filemeta_value_mock) + + system = filemeta_value_mock["system"] + FileMetaModel.get_by_path_and_system(system, "file.txt") + FileMetaModel.get_by_path_and_system(system, "/file.txt")