From de7b7876d3edce18af70060e7a2bb06cfbdea8b3 Mon Sep 17 00:00:00 2001 From: Mahfouz Shehu Date: Wed, 16 Aug 2023 12:06:19 +0100 Subject: [PATCH] Feature/ro crates runs support (#322) * created model, manager, view, url, renderer, serializer, admin panel for RunExtraAnnotations * Established support for RO creates keyed against runs * Added extra_annotations relationship for runs in test_api_surface --- emgapi/admin.py | 32 +++-- emgapi/fields.py | 5 + emgapi/migrations/0010_runextraannotation.py | 35 +++++ emgapi/models.py | 37 +++++ emgapi/serializers.py | 31 ++++ emgapi/urls.py | 7 + emgapi/views.py | 54 +++++++ .../import_extra_assembly_annotations.py | 1 + .../commands/import_extra_run_annotations.py | 136 ++++++++++++++++++ tests/api/test_api_surface.py | 2 +- 10 files changed, 330 insertions(+), 10 deletions(-) create mode 100644 emgapi/migrations/0010_runextraannotation.py create mode 100644 emgapianns/management/commands/import_extra_run_annotations.py diff --git a/emgapi/admin.py b/emgapi/admin.py index 069ee492d..c17df404e 100644 --- a/emgapi/admin.py +++ b/emgapi/admin.py @@ -87,7 +87,7 @@ class StudyAdmin(admin.ModelAdmin, NoRemoveMixin): 'project_id', 'study_name', ) - list_filter = ('is_private', ) + list_filter = ('is_private',) raw_id_fields = ('biome',) def save_model(self, request, obj, form, change): @@ -127,7 +127,6 @@ class Meta: @admin.register(emg_models.SuperStudy) class SuperStudyAdmin(admin.ModelAdmin): - inlines = [SuperStudyStudiesInline, SuperStudyBiomesInline, SuperStudyGenomeCataloguesInline] form = SuperStudyAdminForm @@ -182,6 +181,18 @@ def get_search_results(self, request, queryset, search_term): return super().get_search_results(request, queryset, search_term) +class RunExtraAnnotationDownloads(admin.TabularInline): + model = emg_models.RunExtraAnnotation + raw_id_fields = [ + 'run', + 'parent_id', + 'group_type', + 'subdir', + 'description', + 'file_format' + ] + extra = 0 + @admin.register(emg_models.Run) class RunAdmin(admin.ModelAdmin, AccessionSearch): change_list_template = "admin/change_list_filter_sidebar.html" @@ -209,6 +220,9 @@ class RunAdmin(admin.ModelAdmin, AccessionSearch): 'sample', 'study', ] + inlines = [ + RunExtraAnnotationDownloads, + ] filter_property = 'study' prefix = 'MGYS' @@ -361,13 +375,13 @@ class AnalysisJobAdmin(admin.ModelAdmin, AccessionSearch, NoRemoveMixin): def get_queryset(self, request): return emg_models.AnalysisJob.objects_admin.all() \ .select_related( - 'pipeline', - 'analysis_status', - 'experiment_type', - 'run', - 'study', - 'assembly', - 'sample') + 'pipeline', + 'analysis_status', + 'experiment_type', + 'run', + 'study', + 'assembly', + 'sample') @admin.register(emg_models.StudyErrorType) diff --git a/emgapi/fields.py b/emgapi/fields.py index 9aeee9f31..c908c12da 100644 --- a/emgapi/fields.py +++ b/emgapi/fields.py @@ -49,6 +49,11 @@ def get_url(self, obj, view_name, request, format): kwargs = { 'accession': obj.assembly.accession } + + elif hasattr(obj, 'run'): + kwargs = { + 'accession': obj.run.accession + } kwargs['alias'] = obj.alias return reverse( diff --git a/emgapi/migrations/0010_runextraannotation.py b/emgapi/migrations/0010_runextraannotation.py new file mode 100644 index 000000000..bdaf0c165 --- /dev/null +++ b/emgapi/migrations/0010_runextraannotation.py @@ -0,0 +1,35 @@ +# Generated by Django 3.2.18 on 2023-07-17 13:35 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('emgapi', '0009_genome_annotations_v2_downloads'), + ] + + operations = [ + migrations.CreateModel( + name='RunExtraAnnotation', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('realname', models.CharField(db_column='REAL_NAME', max_length=255)), + ('alias', models.CharField(db_column='ALIAS', max_length=255)), + ('file_checksum', models.CharField(blank=True, db_column='CHECKSUM', max_length=255)), + ('checksum_algorithm', models.ForeignKey(blank=True, db_column='CHECKSUM_ALGORITHM', null=True, on_delete=django.db.models.deletion.CASCADE, to='emgapi.checksumalgorithm')), + ('description', models.ForeignKey(blank=True, db_column='DESCRIPTION_ID', null=True, on_delete=django.db.models.deletion.CASCADE, to='emgapi.downloaddescriptionlabel')), + ('file_format', models.ForeignKey(blank=True, db_column='FORMAT_ID', null=True, on_delete=django.db.models.deletion.CASCADE, to='emgapi.fileformat')), + ('group_type', models.ForeignKey(blank=True, db_column='GROUP_ID', null=True, on_delete=django.db.models.deletion.CASCADE, to='emgapi.downloadgrouptype')), + ('parent_id', models.ForeignKey(blank=True, db_column='PARENT_DOWNLOAD_ID', null=True, on_delete=django.db.models.deletion.CASCADE, related_name='parent', to='emgapi.runextraannotation')), + ('run', models.ForeignKey(db_column='RUN_ID', on_delete=django.db.models.deletion.CASCADE, related_name='extra_annotations', to='emgapi.run')), + ('subdir', models.ForeignKey(blank=True, db_column='SUBDIR_ID', null=True, on_delete=django.db.models.deletion.CASCADE, to='emgapi.downloadsubdir')), + ], + options={ + 'db_table': 'RUN_DOWNLOAD', + 'ordering': ('group_type', 'alias'), + 'unique_together': {('realname', 'alias', 'run')}, + }, + ), + ] diff --git a/emgapi/models.py b/emgapi/models.py index 98460c1d8..b5337c409 100644 --- a/emgapi/models.py +++ b/emgapi/models.py @@ -213,6 +213,11 @@ def available(self, request=None): Q(assembly__is_private=False), ], }, + 'RunExtraAnnotationQuerySet': { + 'all': [ + Q(run__is_private=False), + ], + }, } if request is not None and request.user.is_authenticated: @@ -241,6 +246,10 @@ def available(self, request=None): [Q(assembly__samples__studies__submission_account_id__iexact=_username, is_private=True) | Q(assembly__is_private=False)] + _query_filters['RunExtraAnnotationQuerySet']['authenticated'] = \ + [Q(sun__samples__studies__submission_account_id__iexact=_username, + is_private=True) | + Q(run__is_private=False)] filters = _query_filters.get(self.__class__.__name__) @@ -700,6 +709,7 @@ class AssemblyExtraAnnotationManager(BaseDownloadManager): pass + class AssemblyExtraAnnotation(BaseDownload): assembly = models.ForeignKey( 'Assembly', db_column='ASSEMBLY_ID', related_name='extra_annotations', @@ -719,6 +729,33 @@ class Meta: def __str__(self): return f'AssemblyExtraAnnotation: {self.id} {self.alias}' +class RunExtraAnnotationQuerySet(BaseQuerySet): + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + +class RunExtraAnnotationManager(BaseDownloadManager): + pass + +class RunExtraAnnotation(BaseDownload): + run = models.ForeignKey( + 'Run', db_column='RUN_ID', related_name='extra_annotations', + on_delete=models.CASCADE) + + @property + def accession(self): + return self.run.accession + + objects = RunExtraAnnotationManager(select_related=[]) + + class Meta: + db_table = 'RUN_DOWNLOAD' + unique_together = (('realname', 'alias', 'run'),) + ordering = ('group_type', 'alias',) + + def __str__(self): + return f'RunExtraAnnotation: {self.id} {self.alias}' + class StudyDownloadQuerySet(BaseQuerySet): pass diff --git a/emgapi/serializers.py b/emgapi/serializers.py index 1e9709e27..604cdabb7 100644 --- a/emgapi/serializers.py +++ b/emgapi/serializers.py @@ -504,6 +504,19 @@ def get_pipelines(self, obj): def get_analyses(self, obj): return None + extra_annotations = relations.SerializerMethodHyperlinkedRelatedField( + many=True, + read_only=True, + source='get_extra_annotations', + model=emg_models.RunExtraAnnotation, + related_link_view_name='emgapi_v1:run-extra-annotations-list', + related_link_url_kwarg='accession', + related_link_lookup_field='accession', + ) + + def get_extra_annotations(self, obj): + return None + class Meta: model = emg_models.Run exclude = ( @@ -672,6 +685,24 @@ class Meta: ) +class RunExtraAnnotationSerializer(BaseDownloadSerializer): + url = emg_fields.DownloadHyperlinkedIdentityField( + view_name='emgapi_v1:run-extra-annotations-detail', + lookup_field='alias', + ) + + class Meta: + model = emg_models.RunExtraAnnotation + fields = ( + 'id', + 'url', + 'alias', + 'file_format', + 'description', + 'group_type', + 'file_checksum' + ) + class RetrieveAssemblySerializer(AssemblySerializer): pipelines = emg_relations.HyperlinkedSerializerMethodResourceRelatedField( diff --git a/emgapi/urls.py b/emgapi/urls.py index ac0e48d1a..01aa6905c 100644 --- a/emgapi/urls.py +++ b/emgapi/urls.py @@ -65,6 +65,13 @@ basename='assembly-extra-annotations' ) +router.register( + r'runs/(?P[^/]+)/extra-annotations', + views.RunExtraAnnotationViewSet, + basename='run-extra-annotations' +) + + router.register( r'analyses', views.AnalysisJobViewSet, diff --git a/emgapi/views.py b/emgapi/views.py index ee8e1450c..35c76ef96 100644 --- a/emgapi/views.py +++ b/emgapi/views.py @@ -827,6 +827,60 @@ def retrieve(self, request, accession, alias, file_path = obj.realname return emg_utils.prepare_results_file_download_response(file_path, alias) +class RunExtraAnnotationViewSet( + emg_mixins.ListModelMixin, + viewsets.GenericViewSet + ): + serializer_class = emg_serializers.RunExtraAnnotationSerializer + + filter_backends = ( + filters.OrderingFilter, + ) + + ordering_fields = ( + 'alias', + ) + + ordering = ('alias',) + + lookup_field = 'alias' + lookup_value_regex = '[^/]+' + + def get_queryset(self): + try: + accession = self.kwargs['accession'] + except ValueError: + raise Http404() + return emg_models.RunExtraAnnotation.objects.available(self.request) \ + .filter(run__accession=accession) + + def get_object(self): + return get_object_or_404( + self.get_queryset(), Q(alias=self.kwargs['alias']) + ) + + def get_serializer_class(self): + return super(RunExtraAnnotationViewSet, self) \ + .get_serializer_class() + + def list(self, request, *args, **kwargs): + """ + Retrieves list of Run Extra Annotation downloads + Example: + --- + `/run//extra-annotations` + """ + return super(RunExtraAnnotationViewSet, self).list(request, *args, **kwargs) + + def retrieve(self, request, accession, alias, + *args, **kwargs): + obj = self.get_object() + if obj.subdir is not None: + file_path = f'{obj.subdir}/{obj.realname}' + else: + file_path = obj.realname + return emg_utils.prepare_results_file_download_response(file_path, alias) + class AnalysisJobViewSet(mixins.RetrieveModelMixin, emg_mixins.ListModelMixin, diff --git a/emgapianns/management/commands/import_extra_assembly_annotations.py b/emgapianns/management/commands/import_extra_assembly_annotations.py index 556c73433..ca018b376 100644 --- a/emgapianns/management/commands/import_extra_assembly_annotations.py +++ b/emgapianns/management/commands/import_extra_assembly_annotations.py @@ -74,6 +74,7 @@ def handle(self, *args, **options): logger.info('Looking for RO Crates (.zips') for file in Path(self.gffs_dir).glob('**/*.zip'): logger.info(f'Handling RO Crate Zip file {file}') + logger.info('this is the FILE NAME ' + file.name) erz = 'ERZ' + file.name.split('ERZ')[1].strip('.zip') try: assembly = emg_models.Assembly.objects.get(accession=erz) diff --git a/emgapianns/management/commands/import_extra_run_annotations.py b/emgapianns/management/commands/import_extra_run_annotations.py new file mode 100644 index 000000000..f68ba8203 --- /dev/null +++ b/emgapianns/management/commands/import_extra_run_annotations.py @@ -0,0 +1,136 @@ +import logging +import os +from pathlib import Path + +from emgapi import models as emg_models + +from django.core.management import BaseCommand + +logger = logging.getLogger(__name__) + + +class Command(BaseCommand): + help = "Imports a directory of GFFs that as 'extra run annotations', " \ + "i.e. annotations from tools that aren't part of the analysis pipelines." \ + "GFFs may (preferably) be wrapped into self-describing RO Crates." + + obj_list = list() + results_directory = None + gffs_dir = None + tool = None + + fmt_cache = {} + desc_label_cache = {} + group_cache = {} + subdir_cache = {} + + def add_arguments(self, parser): + parser.add_argument( + 'results_directory', + action='store', + type=str + ) + parser.add_argument( + 'gffs_directory', + action='store', + type=str, + help='The folder within `results_directory` where the GFF/ROCrate files are, e.g. "crates/"' + ) + parser.add_argument( + 'tool', + action='store', + type=str, + help='The type of annotation (e.g. rocrate)', + choices=['rocrate'] + ) + + def handle(self, *args, **options): + logger.info(options) + + self.results_directory = os.path.realpath(options.get('results_directory').strip()) + + if not os.path.exists(self.results_directory): + raise FileNotFoundError(f'Results dir {self.results_directory} does not exist') + + gffs_directory = options['gffs_directory'].strip() + self.gffs_dir = os.path.join(self.results_directory, gffs_directory) + if not os.path.exists(self.gffs_dir): + raise FileNotFoundError(f'GFFs dir {self.gffs_dir} does not exist') + + if options.get('tool') == 'rocrate': + logger.info('Looking for RO Crates (.zips') + for file in Path(self.gffs_dir).glob('**/*.zip'): + logger.info(f'Handling RO Crate Zip file {file}') + # erz = 'ERZ' + file.name.split('ERZ')[1].strip('.zip') + logger.info('this is the FILE NAME ' + file.name) + srr = 'SRR' + file.name.split('SRR')[1].strip('.zip') + try: + run = emg_models.Run.objects.get(accession=srr) + except emg_models.Run.DoesNotExist: + logger.warning(f'No Run found for RO Crate apparent ERZ {srr}') + continue + logger.info(f'Will upload RO Crate for {srr}') + self.upload_rocrate(run, gffs_directory, file.name) + + def upload_rocrate( + self, + run, + subdir, + filename, + ): + description_label = self.desc_label_cache.get('Analysis RO Crate') + if not description_label: + description_label, created = emg_models.DownloadDescriptionLabel \ + .objects \ + .get_or_create(description_label='Analysis RO Crate', defaults={ + "description": "Self-describing analysis workflow product packaged as RO Crate" + }) + if created: + logger.info(f'Added new download description label {description_label}') + self.desc_label_cache[description_label.description_label] = description_label + + fmt = self.fmt_cache.get('RO Crate') + if not fmt: + fmt, created = emg_models.FileFormat \ + .objects \ + .get_or_create(format_name='RO Crate', defaults={ + "format_extension": "zip", + "compression": True + }) + if created: + logger.info(f'Added new file format {fmt}') + self.fmt_cache[fmt.format_name] = fmt + + subdir_obj = self.subdir_cache.get(subdir) + if not subdir_obj: + subdir_obj, created = emg_models.DownloadSubdir.objects.get_or_create(subdir=subdir) + if created: + logger.info(f'Added new downloads subdir {subdir_obj}') + self.subdir_cache[subdir] = subdir_obj + + group = self.group_cache.get('Analysis RO Crate') + if not group: + group, created = emg_models.DownloadGroupType.objects.get_or_create(group_type='Analysis RO Crate') + if created: + logger.info(f'Added new download group type {group}') + self.group_cache[group.group_type] = group + + alias = os.path.basename(filename) + + defaults = { + 'alias': alias, + 'description': description_label, + 'file_format': fmt, + 'group_type': group, + 'realname': os.path.basename(filename), + 'subdir': subdir_obj + } + + dl, created = emg_models.RunExtraAnnotation.objects.update_or_create( + defaults, + run=run, + alias=alias, + ) + + logger.info(f'{"Created" if created else "Updated"} download {dl}') + return dl diff --git a/tests/api/test_api_surface.py b/tests/api/test_api_surface.py index b645f164d..6d40b1272 100644 --- a/tests/api/test_api_surface.py +++ b/tests/api/test_api_surface.py @@ -145,7 +145,7 @@ def test_invalid_view_should_raise_exception(self): ['studies', 'samples']), ('Run', 'runs', 'emgapi_v1:runs', [], ['pipelines', 'analyses', 'experiment-type', 'sample', 'study', - 'assemblies']), + 'assemblies', 'extra-annotations']), ('Assembly', 'assemblies', 'emgapi_v1:assemblies', [], ['pipelines', 'analyses', 'runs', 'samples', 'extra-annotations']), ('Sample', 'samples', 'emgapi_v1:samples', [],