From c4da25e9966527280fa61dc5c64c96451487ca1d Mon Sep 17 00:00:00 2001 From: Mahfouz Date: Mon, 11 Dec 2023 16:28:12 +0000 Subject: [PATCH 01/12] Filtering out verify token bad request from error logs --- emgcli/settings.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/emgcli/settings.py b/emgcli/settings.py index d31947da0..e7e72017e 100644 --- a/emgcli/settings.py +++ b/emgcli/settings.py @@ -87,6 +87,10 @@ '()': 'django.utils.log.CallbackFilter', 'callback': lambda record: "v1/utils/myaccounts" not in record.getMessage(), }, + 'exclude_token_verification': { + '()': 'django.utils.log.CallbackFilter', + 'callback': lambda record: "v1/utils/token/verify" not in record.getMessage(), + }, }, 'formatters': { 'default': { @@ -136,13 +140,13 @@ 'handlers': ['default'], 'level': 'INFO', 'propagate': False, - 'filters': ['exclude_myaccounts'], + 'filters': ['exclude_myaccounts', 'exclude_token_verification'], }, 'django.server': { 'handlers': ['default'], 'level': 'INFO', 'propagate': False, - 'filters': ['exclude_myaccounts'], + 'filters': ['exclude_myaccounts', 'exclude_token_verification'], }, 'django': { 'handlers': ['null'], From 481052f355b1839158f72e2893a1ced9c9c49a71 Mon Sep 17 00:00:00 2001 From: Mahfouz Date: Tue, 12 Dec 2023 10:08:20 +0000 Subject: [PATCH 02/12] Bumped API version --- emgcli/__init__.py | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/emgcli/__init__.py b/emgcli/__init__.py index 31dfb7928..d550e808a 100644 --- a/emgcli/__init__.py +++ b/emgcli/__init__.py @@ -1 +1 @@ -__version__: str = "2.4.35" +__version__: str = "2.4.36" diff --git a/pyproject.toml b/pyproject.toml index 569182588..2063ce372 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -117,7 +117,7 @@ max-line-length = 119 """ [tool.bumpversion] -current_version = "2.4.35" +current_version = "2.4.36" [[tool.bumpversion.files]] filename = "emgcli/__init__.py" From cc092d0ad79083b49516cabcf8a9443fd7f9d602 Mon Sep 17 00:00:00 2001 From: Mahfouz Date: Wed, 20 Dec 2023 13:46:08 +0000 Subject: [PATCH 03/12] Addressed missing filter choices issue --- emgapi/views_relations.py | 16 ++++++---------- emgapi/viewsets.py | 8 ++++---- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/emgapi/views_relations.py b/emgapi/views_relations.py index 34cc5f9ba..b4f30040c 100644 --- a/emgapi/views_relations.py +++ b/emgapi/views_relations.py @@ -340,7 +340,7 @@ def get_queryset(self): queryset = emg_models.AnalysisJob.objects \ .available(self.request) \ .filter(*emg_utils.related_study_accession_query( - self.kwargs['accession'])) + self.kwargs['accession'])) return queryset def list(self, request, *args, **kwargs): @@ -358,7 +358,6 @@ class SuperStudyFlagshipStudiesViewSet( emg_mixins.ListModelMixin, emg_viewsets.BaseStudyGenericViewSet ): - lookup_field = 'super_study_id' def get_queryset(self): @@ -382,7 +381,6 @@ class SuperStudyRelatedStudiesViewSet( emg_mixins.ListModelMixin, emg_viewsets.BaseStudyGenericViewSet ): - lookup_field = 'super_study_id' def get_queryset(self): @@ -406,7 +404,6 @@ class SuperStudyGenomeCataloguesViewSet( emg_mixins.ListModelMixin, emg_viewsets.BaseGenomeCatalogueGenericViewSet ): - lookup_field = 'super_study_id' def get_queryset(self): @@ -965,7 +962,7 @@ def get_queryset(self): return emg_models.SampleAnn.objects \ .filter(sample__accession=accession) \ .prefetch_related( - Prefetch('sample', queryset=emg_models.Sample.objects.available(self.request))) \ + Prefetch('sample', queryset=emg_models.Sample.objects.available(self.request))) \ .order_by('var') def list(self, request, *args, **kwargs): @@ -1070,7 +1067,6 @@ def list(self, request, *args, **kwargs): class AssemblyRunsViewSet(emg_mixins.ListModelMixin, viewsets.GenericViewSet): - serializer_class = emg_serializers.RunSerializer filterset_class = emg_filters.RunFilter @@ -1114,6 +1110,7 @@ def list(self, request, *args, **kwargs): return super(AssemblyRunsViewSet, self) \ .list(request, *args, **kwargs) + class GenomeCogsRelationshipsViewSet(emg_mixins.ListModelMixin, viewsets.GenericViewSet): serializer_class = emg_serializers.CogCountSerializer @@ -1178,9 +1175,9 @@ class GenomeKeggModuleRelationshipsViewSet(emg_mixins.ListModelMixin, viewsets.GenericViewSet): serializer_class = emg_serializers.KeggModuleMatchSerializer - filter_backends = ( - filters.OrderingFilter, - ) + # filter_backends = ( + # filters.OrderingFilter, + # ) ordering_fields = ( 'class_id', @@ -1205,7 +1202,6 @@ def get_queryset(self): class GenomeAntiSmashGeneClustersRelationshipsViewSet(emg_mixins.ListModelMixin, viewsets.GenericViewSet): - serializer_class = emg_serializers.AntiSmashCountSerializer filter_backends = ( diff --git a/emgapi/viewsets.py b/emgapi/viewsets.py index 9e9217fa5..5c3b56045 100644 --- a/emgapi/viewsets.py +++ b/emgapi/viewsets.py @@ -241,10 +241,10 @@ class BasePublicationGenericViewSet(viewsets.GenericViewSet): class BaseGenomeGenericViewSet(viewsets.GenericViewSet): serializer_class = emg_serializers.GenomeSerializer - filter_backends = ( - filters.SearchFilter, - emg_filters.getUnambiguousOrderingFilterByField('accession'), - ) + # filter_backends = ( + # filters.SearchFilter, + # emg_filters.getUnambiguousOrderingFilterByField('accession'), + # ) ordering_fields = ( 'accession', From 36bfac8b32b7d46de4c334712aa9a70edd573980 Mon Sep 17 00:00:00 2001 From: Mahfouz Date: Wed, 20 Dec 2023 13:50:18 +0000 Subject: [PATCH 04/12] Bumped API version --- emgcli/__init__.py | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/emgcli/__init__.py b/emgcli/__init__.py index 31dfb7928..d550e808a 100644 --- a/emgcli/__init__.py +++ b/emgcli/__init__.py @@ -1 +1 @@ -__version__: str = "2.4.35" +__version__: str = "2.4.36" diff --git a/pyproject.toml b/pyproject.toml index 787cfbb46..b0298d798 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -117,7 +117,7 @@ max-line-length = 119 """ [tool.bumpversion] -current_version = "2.4.35" +current_version = "2.4.36" [[tool.bumpversion.files]] filename = "emgcli/__init__.py" From b3287d5546012070e3168e79bf63420d012a8909 Mon Sep 17 00:00:00 2001 From: sandyr Date: Wed, 3 Jan 2024 15:17:40 +0000 Subject: [PATCH 05/12] restore ability to recreate test DB fixtures; recreate them --- config/local-lite.yml | 1 + emgena/models.py | 9 ++++++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/config/local-lite.yml b/config/local-lite.yml index dd2fa4381..0162f7cbd 100644 --- a/config/local-lite.yml +++ b/config/local-lite.yml @@ -9,6 +9,7 @@ emg: era: ENGINE: 'django.db.backends.sqlite3' NAME: '/opt/ci/testdbs/ena-testdb.sqlite' + ERA_TABLESPACE_PREFIX: '' admin: True downloads_bypass_nginx: True diff --git a/emgena/models.py b/emgena/models.py index c3aaba848..ad7e9b0aa 100644 --- a/emgena/models.py +++ b/emgena/models.py @@ -27,7 +27,9 @@ from __future__ import unicode_literals from datetime import date -from django.db import models, NotSupportedError + +from django.conf import settings +from django.db import models class Status(models.IntegerChoices): @@ -197,8 +199,9 @@ class Meta(StudyAbstract.Meta): # ERA needs to be appended as the default connection tries to use # the PUBLIC SYNONYM (according to ENA) and it's not working ATM # we were advised to prefix the views and this is the simplest way. - # The short-term plan is to remove the dependency of ENA databases - db_table = 'ERA\".\"V_MGP_RUN_STUDY' + # The short-term plan is to remove the dependency of ENA databases + _prefix_workaround = settings.DATABASES['era'].get('ERA_TABLESPACE_PREFIX', 'ERA\".\"') + db_table = f'{_prefix_workaround}V_MGP_RUN_STUDY' class AssemblyStudy(StudyAbstract): From ed1604b1ec61f5f6cad7b2fd8e08a463e9bc0c01 Mon Sep 17 00:00:00 2001 From: sandyr Date: Wed, 3 Jan 2024 15:59:02 +0000 Subject: [PATCH 06/12] allow ERA database to be non existent --- emgena/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/emgena/models.py b/emgena/models.py index ad7e9b0aa..85beb7a48 100644 --- a/emgena/models.py +++ b/emgena/models.py @@ -200,7 +200,7 @@ class Meta(StudyAbstract.Meta): # the PUBLIC SYNONYM (according to ENA) and it's not working ATM # we were advised to prefix the views and this is the simplest way. # The short-term plan is to remove the dependency of ENA databases - _prefix_workaround = settings.DATABASES['era'].get('ERA_TABLESPACE_PREFIX', 'ERA\".\"') + _prefix_workaround = settings.DATABASES.get('era', {}).get('ERA_TABLESPACE_PREFIX', 'ERA\".\"') db_table = f'{_prefix_workaround}V_MGP_RUN_STUDY' From 3c58048ddd6931c72a89b449b4c07afdddc331ed Mon Sep 17 00:00:00 2001 From: Mahfouz Date: Tue, 9 Jan 2024 14:24:32 +0000 Subject: [PATCH 07/12] Removed unneccessary filter options --- emgapi/views_relations.py | 7 +++---- emgapi/viewsets.py | 9 ++++----- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/emgapi/views_relations.py b/emgapi/views_relations.py index b4f30040c..1c6215ec7 100644 --- a/emgapi/views_relations.py +++ b/emgapi/views_relations.py @@ -1175,13 +1175,12 @@ class GenomeKeggModuleRelationshipsViewSet(emg_mixins.ListModelMixin, viewsets.GenericViewSet): serializer_class = emg_serializers.KeggModuleMatchSerializer - # filter_backends = ( - # filters.OrderingFilter, - # ) + filter_backends = ( + filters.OrderingFilter, + ) ordering_fields = ( 'class_id', - 'name', 'genome_count', ) diff --git a/emgapi/viewsets.py b/emgapi/viewsets.py index 5c3b56045..a6ebf5e73 100644 --- a/emgapi/viewsets.py +++ b/emgapi/viewsets.py @@ -241,10 +241,10 @@ class BasePublicationGenericViewSet(viewsets.GenericViewSet): class BaseGenomeGenericViewSet(viewsets.GenericViewSet): serializer_class = emg_serializers.GenomeSerializer - # filter_backends = ( - # filters.SearchFilter, - # emg_filters.getUnambiguousOrderingFilterByField('accession'), - # ) + filter_backends = ( + filters.SearchFilter, + emg_filters.getUnambiguousOrderingFilterByField('accession'), + ) ordering_fields = ( 'accession', @@ -252,7 +252,6 @@ class BaseGenomeGenericViewSet(viewsets.GenericViewSet): 'num_contigs', 'completeness', 'contamination', - 'num_genomes', 'num_genomes_total', 'num_proteins', 'last_update', From 57308e966474caff57ab44bdeb2c5eccf14c83f6 Mon Sep 17 00:00:00 2001 From: Mahfouz Date: Thu, 11 Jan 2024 16:33:05 +0000 Subject: [PATCH 08/12] Setting cookies after jwt auth obtain call is made --- emgcli/settings.py | 1 + 1 file changed, 1 insertion(+) diff --git a/emgcli/settings.py b/emgcli/settings.py index e7e72017e..3a3fc9d7b 100644 --- a/emgcli/settings.py +++ b/emgcli/settings.py @@ -518,6 +518,7 @@ def create_secret_key(var_dir): JWT_AUTH = { 'JWT_EXPIRATION_DELTA': datetime.timedelta(seconds=108000), 'JWT_AUTH_HEADER_PREFIX': 'Bearer', + 'JWT_AUTH_COOKIE': 'EMG_AUTH', } try: From dd827d905e8c07e315b2694b2908beee9b322361 Mon Sep 17 00:00:00 2001 From: Sandy Rogers Date: Fri, 12 Jan 2024 16:04:36 +0000 Subject: [PATCH 09/12] Suppression state propagation (#328) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Martín Beracochea Co-authored-by: Mahfouz --- emgapi/admin.py | 30 +- .../commands/clean_empty_studies.py | 2 +- .../migrations/0013_delete_analysisjobann.py | 25 ++ ..._suppression_reason_ancestor_suppressed.py | 44 +++ emgapi/models.py | 257 ++++++++++-------- emgapi/serializers.py | 3 +- emgapianns/management/commands/import_qc.py | 4 +- .../commands/sync_assemblies_with_ena.py | 6 +- .../management/commands/sync_runs_with_ena.py | 5 +- .../commands/sync_samples_with_ena.py | 3 - .../commands/sync_studies_with_ena.py | 3 - tests/ena/test_sync_assemblies_with_ena.py | 20 +- tests/ena/test_sync_runs_with_ena.py | 28 +- tests/ena/test_sync_samples_with_ena.py | 18 +- tests/ena/test_sync_studies_with_ena.py | 27 +- tests/test_utils/emg_fixtures.py | 181 +++++++++++- tests/webuploader/test_qc.py | 4 +- 17 files changed, 498 insertions(+), 162 deletions(-) create mode 100644 emgapi/migrations/0013_delete_analysisjobann.py create mode 100644 emgapi/migrations/0014_suppression_reason_ancestor_suppressed.py diff --git a/emgapi/admin.py b/emgapi/admin.py index c17df404e..a1068212b 100644 --- a/emgapi/admin.py +++ b/emgapi/admin.py @@ -463,21 +463,21 @@ class AnalysisMetadataVariableNamesAdmin(admin.ModelAdmin): ] -@admin.register(emg_models.AnalysisJobAnn) -class AnalysisJobAnnAdmin(admin.ModelAdmin): - readonly_fields = [ - 'job', - 'var', - ] - list_display = [ - 'job', - 'var' - ] - search_fields = [ - 'job__job_id', - 'var__var_name', - 'var__description', - ] +# @admin.register(emg_models.AnalysisJobAnn) +# class AnalysisJobAnnAdmin(admin.ModelAdmin): +# readonly_fields = [ +# 'job', +# 'var', +# ] +# list_display = [ +# 'job', +# 'var' +# ] +# search_fields = [ +# 'job__job_id', +# 'var__var_name', +# 'var__description', +# ] @admin.register(emg_models.CogCat) diff --git a/emgapi/management/commands/clean_empty_studies.py b/emgapi/management/commands/clean_empty_studies.py index 02fcc9e79..88ed4179e 100644 --- a/emgapi/management/commands/clean_empty_studies.py +++ b/emgapi/management/commands/clean_empty_studies.py @@ -56,5 +56,5 @@ def handle(self, *args, **kwargs): run.ena_study_accession = study.secondary_accession run.study = None Run.objects.bulk_update(runs, ["ena_study_accession", "study"]) - study.suppress() + study.suppress(propagate=False) logger.info(f"{study} suppressed") diff --git a/emgapi/migrations/0013_delete_analysisjobann.py b/emgapi/migrations/0013_delete_analysisjobann.py new file mode 100644 index 000000000..b274f2914 --- /dev/null +++ b/emgapi/migrations/0013_delete_analysisjobann.py @@ -0,0 +1,25 @@ +# Generated by Django 3.2.18 on 2023-09-29 19:59 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('emgapi', '0012_alter_publication_pub_type'), + ] + + operations = [ + # migrations.SeparateDatabaseAndState( + # state_operations=[ + # migrations.RenameField( + # model_name='analysisjob', + # old_name='analysis_summary_json', + # new_name='analysis_summary', + # ), + # ], + # ), + migrations.DeleteModel( + name='AnalysisJobAnn', + ), + ] diff --git a/emgapi/migrations/0014_suppression_reason_ancestor_suppressed.py b/emgapi/migrations/0014_suppression_reason_ancestor_suppressed.py new file mode 100644 index 000000000..fe6483619 --- /dev/null +++ b/emgapi/migrations/0014_suppression_reason_ancestor_suppressed.py @@ -0,0 +1,44 @@ +# Generated by Django 3.2.18 on 2023-09-14 07:16 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('emgapi', '0013_delete_analysisjobann'), + ] + + operations = [ + migrations.AlterField( + model_name='analysisjob', + name='suppression_reason', + field=models.IntegerField(blank=True, choices=[(1, 'Draft'), (3, 'Cancelled'), (5, 'Suppressed'), (6, 'Killed'), (7, 'Temporary Suppressed'), (8, 'Temporary Killed'), (100, 'Ancestor Suppressed')], db_column='SUPPRESSION_REASON', null=True), + ), + migrations.AlterField( + model_name='assembly', + name='study', + field=models.ForeignKey(blank=True, db_column='STUDY_ID', null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='assemblies', to='emgapi.study'), + ), + migrations.AlterField( + model_name='assembly', + name='suppression_reason', + field=models.IntegerField(blank=True, choices=[(1, 'Draft'), (3, 'Cancelled'), (5, 'Suppressed'), (6, 'Killed'), (7, 'Temporary Suppressed'), (8, 'Temporary Killed'), (100, 'Ancestor Suppressed')], db_column='SUPPRESSION_REASON', null=True), + ), + migrations.AlterField( + model_name='run', + name='suppression_reason', + field=models.IntegerField(blank=True, choices=[(1, 'Draft'), (3, 'Cancelled'), (5, 'Suppressed'), (6, 'Killed'), (7, 'Temporary Suppressed'), (8, 'Temporary Killed'), (100, 'Ancestor Suppressed')], db_column='SUPPRESSION_REASON', null=True), + ), + migrations.AlterField( + model_name='sample', + name='suppression_reason', + field=models.IntegerField(blank=True, choices=[(1, 'Draft'), (3, 'Cancelled'), (5, 'Suppressed'), (6, 'Killed'), (7, 'Temporary Suppressed'), (8, 'Temporary Killed'), (100, 'Ancestor Suppressed')], db_column='SUPPRESSION_REASON', null=True), + ), + migrations.AlterField( + model_name='study', + name='suppression_reason', + field=models.IntegerField(blank=True, choices=[(1, 'Draft'), (3, 'Cancelled'), (5, 'Suppressed'), (6, 'Killed'), (7, 'Temporary Suppressed'), (8, 'Temporary Killed'), (100, 'Ancestor Suppressed')], db_column='SUPPRESSION_REASON', null=True), + ), + ] diff --git a/emgapi/models.py b/emgapi/models.py index 1cf7f0321..efb074d78 100644 --- a/emgapi/models.py +++ b/emgapi/models.py @@ -16,10 +16,11 @@ import logging +from django.apps import apps from django.conf import settings from django.db import models from django.db.models import (CharField, Count, OuterRef, Prefetch, Q, - Subquery, Value) + Subquery, Value, QuerySet) from django.db.models.functions import Cast, Concat from django.utils import timezone @@ -70,7 +71,10 @@ def get_queryset(self): class SuppressibleModel(models.Model): - + suppressible_descendants = [] + # List of related_names from this model that should have their suppression status propagated from this. + # E.g. Study.suppressible_descendants = ['samples'] to suppress a study's samples if the study is suppressed. + class Reason(models.IntegerChoices): DRAFT = 1 CANCELLED = 3 @@ -79,35 +83,149 @@ class Reason(models.IntegerChoices): TEMPORARY_SUPPRESSED = 7 TEMPORARY_KILLED = 8 + ANCESTOR_SUPPRESSED = 100 + is_suppressed = models.BooleanField(db_column='IS_SUPPRESSED', default=False) suppressed_at = models.DateTimeField(db_column='SUPPRESSED_AT', blank=True, null=True) suppression_reason = models.IntegerField(db_column='SUPPRESSION_REASON', blank=True, null=True, choices=Reason.choices) - def suppress(self, suppression_reason=None, save=True): + def _get_suppression_descendant_tree(self, suppressing: bool = True): + """ + Recursively find all suppressible descendants of the calling suppressible model. + :param suppressing: True if looking for descendants that should be (i.e. are not currently) suppressed. False if opposite. + :return: Dict mapping model names to sets of model instances + """ + suppressibles = {} + + def __add_to_suppressibles(kls, additional_suppressibles): + suppressibles.setdefault( + kls, + set() + ).update(additional_suppressibles) + + logger.debug(f'Building suppression descendant tree for {self._meta.object_name}') + + for descendant_relation_name in self.suppressible_descendants: + descendant_relation = getattr( + self, + descendant_relation_name + ) + descendants_to_update = descendant_relation.filter( + is_suppressed=not suppressing, + ) + if not descendants_to_update.exists(): + logger.debug(f'No {descendant_relation_name} descendants to handle.') + continue + # Check whether the descendant might have other non-suppressed ancestors of the same type as this + # (If so, it shouldn't be suppressed). + # This was written mostly for samples that are associated to multiples + # studies, such as a raw-reads study and the corresponding assembly study. + relation_field = self._meta.get_field(descendant_relation_name) + logger.info(f'{relation_field = }') + if isinstance(relation_field, models.ManyToManyField): + logger.debug( + f"Descendant relation {descendant_relation_name} on {self.__class__} is a Many2Many." + f"Checking whether descendants have unsuppressed siblings.." + ) + logger.debug(f"Before filtering, had {descendants_to_update.count()} {descendant_relation_name}") + + descendant_ids_with_unsuppressed_alike_ancestors = descendant_relation.through.objects.filter( + **{ + f"{descendant_relation.target_field_name}__in": descendant_relation.all(), # e.g. sample in study.samples + f"{descendant_relation.source_field_name}__is_suppressed": False, # e.g. not study.is_suppressed + } + ).exclude( + **{ + f"{descendant_relation.source_field_name}": self, # e.g. study != self + } + ).values_list( + f"{descendant_relation.target_field_name}_id", + flat=True + ) + descendants_to_update = descendants_to_update.exclude( + pk__in=descendant_ids_with_unsuppressed_alike_ancestors + ) + + logger.debug(f"After filtering, had {descendants_to_update.count()} {descendant_relation_name}") + + __add_to_suppressibles( + descendant_relation.model._meta.object_name, + descendants_to_update + ) + + for descendant in descendants_to_update: + for kls, kls_suppressibles in descendant._get_suppression_descendant_tree( + suppressing + ).items(): + __add_to_suppressibles( + kls, + kls_suppressibles + ) + + return suppressibles + + def suppress(self, suppression_reason=None, save=True, propagate=True): self.is_suppressed = True self.suppressed_at = timezone.now() self.suppression_reason = suppression_reason if save: self.save() + if propagate: + descendant_tree = self._get_suppression_descendant_tree() + for descendants_object_type, descendants in descendant_tree.items(): + for descendant in descendants: + descendant.is_suppressed = True + descendant.suppression_reason = self.Reason.ANCESTOR_SUPPRESSED + m: SuppressibleModel = apps.get_model(app_label='emgapi', model_name=descendants_object_type) + m.objects.bulk_update(descendants, fields=['is_suppressed', 'suppression_reason']) + logger.info( + f'Propagated suppression of {self} to {len(descendants)} descendant {descendants_object_type}s' + ) return self - def unsuppress(self, suppression_reason=None, save=True): + def unsuppress(self, save=True, propagate=True): self.is_suppressed = False self.suppressed_at = None self.suppression_reason = None if save: self.save() + if propagate: + descendant_tree = self._get_suppression_descendant_tree(suppressing=False) + for descendants_object_type, descendants in descendant_tree.items(): + for descendant in descendants: + descendant.is_suppressed = False + descendant.suppression_reason = None + m: SuppressibleModel = apps.get_model(app_label='emgapi', model_name=descendants_object_type) + m.objects.bulk_update(descendants, fields=['is_suppressed', 'suppression_reason']) + logger.info( + f'Propagated unsuppression of {self} to {len(descendants)} descendant {descendants_object_type}s' + ) return self class Meta: abstract = True -class ENASyncableModel(SuppressibleModel, PrivacyControlledModel): +class SelectRelatedOnlyQuerySetMixin(): + def select_related_only(self: QuerySet, related_name: str, only_fields: [str]) -> QuerySet: + """ + Adds a 'select_related' to the queryset, and a 'defer' for all fields other than those specified. + :param queryset: QuerySet of calling model + :param related_name: Related name from calling model to target relation + :param only_fields: List of field names on target to NOT be deferred. + :return: QuerySet + """ + other_model = self.model._meta.get_field(related_name).related_model + other_fields = filter(lambda f: f.name not in only_fields, other_model._meta.fields) + return self.select_related(related_name).defer(*[f"{related_name}__{o.name}" for o in other_fields]) + - def sync_with_ena_status(self, ena_model_status: ENAStatus): +class ENASyncableModel(SuppressibleModel, PrivacyControlledModel): + def sync_with_ena_status(self, ena_model_status: ENAStatus, propagate=True): """Sync the model with the ENA status accordingly. - Fields that are updated: is_supppressed, suppressed_at, reason and is_private + Fields that are updated: is_suppressed, suppressed_at, reason and is_private + + :propagate: If True, propagate the ena status of this entity to entities that are derived from / children of it. """ if ena_model_status == ENAStatus.PRIVATE and not self.is_private: self.is_private = True @@ -148,7 +266,7 @@ def sync_with_ena_status(self, ena_model_status: ENAStatus): elif ena_model_status == ENAStatus.CANCELLED: reason = SuppressibleModel.Reason.CANCELLED - self.suppress(suppression_reason=reason, save=False) + self.suppress(suppression_reason=reason, save=False, propagate=propagate) logging.info( f"{self} was suppressed, status on ENA {ena_model_status}" @@ -202,7 +320,6 @@ def available(self, request=None): }, 'AnalysisJobDownloadQuerySet': { 'all': [ - Q(job__sample__isnull=True) | Q(job__sample__is_suppressed=False), Q(job__study__is_private=False), Q(job__run__is_private=False) | Q(job__assembly__is_private=False), Q(job__analysis_status_id=AnalysisStatus.COMPLETED) | Q(job__analysis_status_id=AnalysisStatus.QC_NOT_PASSED) @@ -247,7 +364,7 @@ def available(self, request=None): is_private=True) | Q(assembly__is_private=False)] _query_filters['RunExtraAnnotationQuerySet']['authenticated'] = \ - [Q(sun__samples__studies__submission_account_id__iexact=_username, + [Q(run__samples__studies__submission_account_id__iexact=_username, is_private=True) | Q(run__is_private=False)] @@ -843,6 +960,7 @@ def mydata(self, request): class Study(ENASyncableModel): + suppressible_descendants = ['samples', 'runs', 'assemblies', 'analyses'] def __init__(self, *args, **kwargs): super(Study, self).__init__(*args, **kwargs) @@ -1076,7 +1194,7 @@ class Meta: class SampleQuerySet(BaseQuerySet, SuppressQuerySet): - + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -1106,6 +1224,8 @@ def available(self, request, prefetch=False): class Sample(ENASyncableModel): + suppressible_descendants = ['assemblies', 'runs', 'analyses'] + sample_id = models.AutoField( db_column='SAMPLE_ID', primary_key=True) accession = models.CharField( @@ -1293,7 +1413,7 @@ def __str__(self): class RunQuerySet(BaseQuerySet, SuppressQuerySet): - + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -1312,6 +1432,8 @@ def available(self, request): class Run(ENASyncableModel): + suppressible_descendants = ['assemblies', 'analyses'] + run_id = models.BigAutoField( db_column='RUN_ID', primary_key=True) accession = models.CharField( @@ -1374,6 +1496,7 @@ def available(self, request): class Assembly(ENASyncableModel): + suppressible_descendants = ['analyses'] assembly_id = models.BigAutoField( db_column='ASSEMBLY_ID', primary_key=True) @@ -1392,7 +1515,7 @@ class Assembly(ENASyncableModel): samples = models.ManyToManyField( 'Sample', through='AssemblySample', related_name='assemblies', blank=True) - study = models.ForeignKey("emgapi.Study", db_column="STUDY_ID", + study = models.ForeignKey("emgapi.Study", db_column="STUDY_ID", related_name='assemblies', on_delete=models.SET_NULL, null=True, blank=True) coverage = models.IntegerField(db_column="COVERAGE", null=True, blank=True) @@ -1444,7 +1567,7 @@ def __str__(self): return 'Assembly:{} - Sample:{}'.format(self.assembly, self.sample) -class AnalysisJobQuerySet(BaseQuerySet, MySQLQuerySet, SuppressQuerySet): +class AnalysisJobQuerySet(BaseQuerySet, MySQLQuerySet, SuppressQuerySet, SelectRelatedOnlyQuerySetMixin): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -1460,8 +1583,8 @@ def available(self, request=None): query_filters = { "all": [ Q(study__is_private=False), - Q(sample__isnull=True) | Q(sample__is_suppressed=False), Q(run__is_private=False) | Q(assembly__is_private=False), + Q(is_suppressed=False), Q(analysis_status_id=AnalysisStatus.COMPLETED) | Q(analysis_status_id=AnalysisStatus.QC_NOT_PASSED), ], @@ -1470,7 +1593,6 @@ def available(self, request=None): if request is not None and request.user.is_authenticated: username = request.user.username query_filters["authenticated"] = [ - Q(sample__isnull=True) | Q(sample__is_suppressed=False), Q(study__submission_account_id__iexact=username, run__is_private=True) | Q( study__submission_account_id__iexact=username, @@ -1485,68 +1607,18 @@ def available(self, request=None): class AnalysisJobManager(models.Manager): def get_queryset(self): - """Customized Analysis Job QS. - There are 2 very custom bits here: - - straight_join - ------------- - This one is needed because of a mysql bug that causes the optimizer - to https://code.djangoproject.com/ticket/22438 - - force_index - ----------- - This one is more of a mistery to me. The join with PIPELINE_RELEASE - is causing a full table scan on PIPELINE_RELEASE. - - | id | select\_type | table | type | possible\_keys | key | key\_len | ref | rows | filtered | Extra | - | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | - | 1 | SIMPLE | PIPELINE\_RELEASE | ALL | PRIMARY,PIPELINE\_RELEASE\_PIPELINE\_ID\_RELEASE\_VERSION\_d40fe384\_uniq,PIPELINE\_RELEASE\_PIPELINE\_ID\_index | NULL | NULL | NULL | 6 | 83.33 | Using where; Using join buffer \(Block Nested Loop\) | - - By forcing the index PRIMARY on the JOIN the query is faster: - - | id | select\_type | table | type | possible\_keys | key | key\_len | ref | rows | filtered | Extra | - | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | - | 1 | SIMPLE | PIPELINE\_RELEASE | eq\_ref | PRIMARY | PRIMARY | 2 | emg.ANALYSIS\_JOB.PIPELINE\_ID | 1 | 100 | NULL | - - IMPORTANT: it is also required that the ordering of the query set is done by ANALYSIS_JOB.PIPELINE_ID and not a - field of PIPELINE_RELEASE. This was changes in emgapi.viewsets.BaseAnalysisGenericViewSet.ordering - - TODO: figure our what is going on with this query. - """ - _qs = AnalysisJobAnn.objects.all() \ - .select_related('var') return AnalysisJobQuerySet(self.model, using=self._db) \ - .straight_join() \ - .force_index("PRIMARY", table_name="PIPELINE_RELEASE", for_="JOIN") \ .select_related( 'analysis_status', - 'experiment_type', - 'run', - 'study', - 'assembly', - 'pipeline', - 'sample') \ - .prefetch_related( - Prefetch('analysis_metadata', queryset=_qs),) + 'experiment_type') \ + .select_related_only('assembly', ['assembly_id', 'accession']) \ + .select_related_only('pipeline', ['pipeline_id', 'release_version']) \ + .select_related_only('run', ['run_id', 'accession']) \ + .select_related_only('sample', ['sample_id', 'accession']) \ + .select_related_only('study', ['study_id', 'accession']) def available(self, request): - return self.get_queryset().available(request) \ - .prefetch_related( - Prefetch( - 'study', - queryset=Study.objects.available(request) - ), - Prefetch( - 'sample', - queryset=Sample.objects.available( - request) - ), - Prefetch( - 'run', - queryset=Run.objects.available( - request) - ) - ) + return self.get_queryset().available(request) class AnalysisJob(SuppressibleModel, PrivacyControlledModel): @@ -1570,7 +1642,7 @@ def _custom_pk(self): blank=True, null=True) job_operator = models.CharField( db_column='JOB_OPERATOR', max_length=15, blank=True, null=True) - analysis_summary_json = models.JSONField( + analysis_summary = models.JSONField( db_column='ANALYSIS_SUMMARY_JSON', blank=True, null=True) pipeline = models.ForeignKey( Pipeline, db_column='PIPELINE_ID', blank=True, null=True, @@ -1616,19 +1688,6 @@ def _custom_pk(self): @property def release_version(self): return self.pipeline.release_version - - @property - def analysis_summary(self): - if self.analysis_summary_json: - return self.analysis_summary_json - - return [ - { - 'key': v.var.var_name, - 'value': v.var_val_ucv - } for v in self.analysis_metadata.all() - ] - @property def downloads(self): return self.analysis_download.all() @@ -1679,7 +1738,7 @@ class BlacklistedStudy(models.Model): class Meta: managed = False db_table = 'BLACKLISTED_STUDY' - verbose_name_plural = 'blacklisted study' + verbose_name = 'blacklisted study' verbose_name_plural = 'blacklisted studies' def __str__(self): @@ -1717,7 +1776,7 @@ class VariableNames(models.Model): class Meta: db_table = 'VARIABLE_NAMES' unique_together = (('var_id', 'var_name'), ('var_id', 'var_name'),) - verbose_name = 'variable name' + verbose_name = 'variable name' def __str__(self): return self.var_name @@ -1771,26 +1830,6 @@ class Meta: def __str__(self): return self.var_name - - -class AnalysisJobAnnManager(models.Manager): - - def get_queryset(self): - return super().get_queryset().select_related('job', 'var') - - -class AnalysisJobAnn(models.Model): - job = models.ForeignKey(AnalysisJob, db_column='JOB_ID', related_name='analysis_metadata', on_delete=models.CASCADE) - units = models.CharField(db_column='UNITS', max_length=25, blank=True, null=True) - var = models.ForeignKey(AnalysisMetadataVariableNames, on_delete=models.CASCADE) - var_val_ucv = models.CharField(db_column='VAR_VAL_UCV', max_length=4000, blank=True, null=True) - - objects = AnalysisJobAnnManager() - - class Meta: - db_table = 'ANALYSIS_JOB_ANN' - unique_together = (('job', 'var'), ('job', 'var'),) - def __str__(self): return '%s %s' % (self.job, self.var) diff --git a/emgapi/serializers.py b/emgapi/serializers.py index bcb245d52..f55f71401 100644 --- a/emgapi/serializers.py +++ b/emgapi/serializers.py @@ -1020,8 +1020,7 @@ class Meta: 'secondary_accession', 'is_suppressed', 'suppressed_at', - 'suppression_reason', - 'analysis_summary_json' + 'suppression_reason' ) diff --git a/emgapianns/management/commands/import_qc.py b/emgapianns/management/commands/import_qc.py index 38d3cd159..278896f49 100644 --- a/emgapianns/management/commands/import_qc.py +++ b/emgapianns/management/commands/import_qc.py @@ -165,10 +165,10 @@ def import_orf_stats(rootpath, job, emg_db): @staticmethod def update_analysis_summary(job, var_key, var_value): - analysis_summary = job.analysis_summary_json or [] + analysis_summary = job.analysis_summary or [] analysis_summary.append({ 'key': var_key, 'value': var_value, }) - job.analysis_summary_json = analysis_summary + job.analysis_summary = analysis_summary job.save() diff --git a/emgena/management/commands/sync_assemblies_with_ena.py b/emgena/management/commands/sync_assemblies_with_ena.py index 63e45b7aa..76a8378e6 100644 --- a/emgena/management/commands/sync_assemblies_with_ena.py +++ b/emgena/management/commands/sync_assemblies_with_ena.py @@ -71,11 +71,7 @@ def handle(self, *args, **kwargs): ) continue - # inherits the status of its study - if study.is_suppressed: - emg_assembly.suppress( - suppression_reason=study.suppression_reason - ) + # inherits the privacy status of its study emg_assembly.is_private = study.is_private continue elif ena_assembly.status_id is None: diff --git a/emgena/management/commands/sync_runs_with_ena.py b/emgena/management/commands/sync_runs_with_ena.py index bd19f2c96..3adafbd6d 100644 --- a/emgena/management/commands/sync_runs_with_ena.py +++ b/emgena/management/commands/sync_runs_with_ena.py @@ -15,11 +15,8 @@ # limitations under the License. import logging -import os -from django.db.models import Count from django.core.management import BaseCommand -from django.conf import settings from emgapi import models as emg_models from emgena import models as ena_models @@ -38,7 +35,7 @@ def handle(self, *args, **kwargs): logging.info(f"Total Runs on EMG {runs_count}") while offset < runs_count: - emg_runs_batch = emg_models.Run.objects.all()[offset : offset + batch_size] + emg_runs_batch = emg_models.Run.objects.all()[offset: offset + batch_size] ena_runs_batch = ena_models.Run.objects.using("era").filter( run_id__in=[run.accession for run in emg_runs_batch] ) diff --git a/emgena/management/commands/sync_samples_with_ena.py b/emgena/management/commands/sync_samples_with_ena.py index cd1f5e477..659326193 100644 --- a/emgena/management/commands/sync_samples_with_ena.py +++ b/emgena/management/commands/sync_samples_with_ena.py @@ -15,11 +15,8 @@ # limitations under the License. import logging -import os -from django.db.models import Count from django.core.management import BaseCommand -from django.conf import settings from emgapi import models as emg_models from emgena import models as ena_models diff --git a/emgena/management/commands/sync_studies_with_ena.py b/emgena/management/commands/sync_studies_with_ena.py index 8fec5b5b2..9b28b8d6f 100644 --- a/emgena/management/commands/sync_studies_with_ena.py +++ b/emgena/management/commands/sync_studies_with_ena.py @@ -15,11 +15,8 @@ # limitations under the License. import logging -import os -from django.db.models import Count from django.core.management import BaseCommand -from django.conf import settings from emgapi import models as emg_models from emgena import models as ena_models diff --git a/tests/ena/test_sync_assemblies_with_ena.py b/tests/ena/test_sync_assemblies_with_ena.py index 13d678b69..d1ae22789 100644 --- a/tests/ena/test_sync_assemblies_with_ena.py +++ b/tests/ena/test_sync_assemblies_with_ena.py @@ -21,7 +21,7 @@ from django.urls import reverse from django.core.management import call_command -from emgapi.models import Assembly +from emgapi.models import Assembly, AnalysisJob from test_utils.emg_fixtures import * # noqa @@ -117,3 +117,21 @@ def test_sync_assemblies_based_on_study( assembly.refresh_from_db() assert assembly.is_suppressed == False assert assembly.is_private == False + + + @patch("emgena.models.Assembly.objects") + def test_sync_assemblies_propagation( + self, ena_assembly_objs_mock, ena_suppression_propagation_assemblies + ): + ena_assembly_objs_mock.using("ena").filter.return_value = ena_suppression_propagation_assemblies + + assert Assembly.objects.filter(is_suppressed=True).count() == 0 + assert AnalysisJob.objects.filter(is_suppressed=True).count() == 0 + + call_command("sync_assemblies_with_ena") + + assert Assembly.objects.filter(is_suppressed=True).count() == 32 + assert AnalysisJob.objects.filter( + is_suppressed=True, + suppression_reason=AnalysisJob.Reason.ANCESTOR_SUPPRESSED + ).count() == 64 diff --git a/tests/ena/test_sync_runs_with_ena.py b/tests/ena/test_sync_runs_with_ena.py index 84a1c4136..85d7ccb6d 100644 --- a/tests/ena/test_sync_runs_with_ena.py +++ b/tests/ena/test_sync_runs_with_ena.py @@ -15,13 +15,11 @@ # limitations under the License. import pytest -import os from unittest.mock import patch -from django.urls import reverse from django.core.management import call_command -from emgapi.models import Run +from emgapi.models import Run, Assembly, AnalysisJob from test_utils.emg_fixtures import * # noqa @@ -63,7 +61,7 @@ def test_make_runs_public(self, ena_run_objs_mock, ena_public_runs): assert run.is_private == False @patch("emgena.models.Run.objects") - def test_suppress_studies(self, ena_run_objs_mock, ena_suppressed_runs): + def test_suppress_runs(self, ena_run_objs_mock, ena_suppressed_runs): ena_run_objs_mock.using("era").filter.return_value = ena_suppressed_runs suppress_runs = Run.objects.order_by("?").all()[0:5] @@ -87,3 +85,25 @@ def test_suppress_studies(self, ena_run_objs_mock, ena_suppressed_runs): ena_run.get_status_id_display().lower() == run.get_suppression_reason_display().lower() ) + + @patch("emgena.models.Run.objects") + def test_suppress_runs_propagation(self, ena_run_objs_mock, ena_suppression_propagation_runs): + ena_run_objs_mock.using("era").filter.return_value = ena_suppression_propagation_runs + + runs = Run.objects.order_by("?").all() + assemblies = Assembly.objects.all() + analyses = AnalysisJob.objects.all() + for run in runs: + assert not run.is_suppressed + for assembly in assemblies: + assert not assembly.is_suppressed + for analysis in analyses: + assert not analysis.is_suppressed + + call_command("sync_runs_with_ena") + assert Run.objects.filter(is_suppressed=True).count() == 2 + assert Run.objects.filter(is_suppressed=False).count() == 2 + assert Assembly.objects.filter(is_suppressed=True).count() == 4 + assert Assembly.objects.filter(is_suppressed=False).count() == 4 + assert AnalysisJob.objects.filter(is_suppressed=True).count() == 8 + assert AnalysisJob.objects.filter(is_suppressed=False).count() == 8 diff --git a/tests/ena/test_sync_samples_with_ena.py b/tests/ena/test_sync_samples_with_ena.py index 0653f1ba2..9c2d539b0 100644 --- a/tests/ena/test_sync_samples_with_ena.py +++ b/tests/ena/test_sync_samples_with_ena.py @@ -21,7 +21,7 @@ from django.urls import reverse from django.core.management import call_command -from emgapi.models import Sample +from emgapi.models import Sample, Assembly, AnalysisJob, Run from test_utils.emg_fixtures import * # noqa @@ -89,3 +89,19 @@ def test_suppress_samples(self, ena_sample_objs_mock, ena_suppressed_samples): ena_sample.get_status_id_display().lower() == sample.get_suppression_reason_display().lower() ) + + @patch("emgena.models.Sample.objects") + def test_suppress_samples_propagation(self, ena_sample_objs_mock, ena_suppression_propagation_samples): + ena_sample_objs_mock.using("era").filter.return_value = ena_suppression_propagation_samples + + assert Sample.objects.filter(is_suppressed=True).count() == 0 + assert Run.objects.filter(is_suppressed=True).count() == 0 + assert Assembly.objects.filter(is_suppressed=True).count() == 0 + assert AnalysisJob.objects.filter(is_suppressed=True).count() == 0 + + call_command("sync_samples_with_ena") + + assert Sample.objects.filter(is_suppressed=True).count() == 2 + assert Run.objects.filter(is_suppressed=True).count() == 4 + assert Assembly.objects.filter(is_suppressed=True).count() == 8 + assert AnalysisJob.objects.filter(is_suppressed=True).count() == 16 diff --git a/tests/ena/test_sync_studies_with_ena.py b/tests/ena/test_sync_studies_with_ena.py index b841c7440..392a32df6 100644 --- a/tests/ena/test_sync_studies_with_ena.py +++ b/tests/ena/test_sync_studies_with_ena.py @@ -1,7 +1,6 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- - -# Copyright 2017-2022 EMBL - European Bioinformatics Institute +# Copyright 2017-2023 EMBL - European Bioinformatics Institute # # Licensed under the Apache License, Version 2.0 (the 'License'); # you may not use this file except in compliance with the License. @@ -15,16 +14,16 @@ # limitations under the License. import pytest -import os from unittest.mock import patch -from django.urls import reverse from django.core.management import call_command from emgapi.models import Study from test_utils.emg_fixtures import * # noqa +from emgena.models import Status + @pytest.mark.django_db class TestSyncENAStudies: @@ -34,7 +33,6 @@ def test_make_studies_private(self, ena_study_objs_mock, ena_private_studies): all_studies = Study.objects.order_by("?").all() public_studies = all_studies[0:5] - reminder = all_studies[5 : len(all_studies)] for study in public_studies: study.is_private = False @@ -55,7 +53,6 @@ def test_make_studies_public(self, ena_study_objs_mock, ena_public_studies): all_studies = Study.objects.order_by("?").all() private_studies = all_studies[0:5] - reminder = all_studies[5 : len(all_studies)] for study in private_studies: study.is_private = True @@ -96,3 +93,21 @@ def test_suppress_studies(self, ena_study_objs_mock, ena_suppressed_studies): ena_study.get_study_status_display().lower() == study.get_suppression_reason_display().lower() ) + + @patch("emgena.models.Study.objects") + def test_suppress_studies_propagation(self, ena_study_objs_mock, ena_suppression_propagation_studies): + ena_study_objs_mock.using("era").filter.return_value = ena_suppression_propagation_studies + + call_command("sync_studies_with_ena") + + for ena_study in ena_suppression_propagation_studies: + emg_study = Study.objects.get(secondary_accession=ena_study.study_id) + if ena_study.study_status == Status.SUPPRESSED: + assert emg_study.is_suppressed + for descendant in ['runs', 'samples', 'assemblies', 'analyses']: + related_qs = getattr(emg_study, descendant) + assert related_qs.filter(is_suppressed=True).exists() + # 1 sample was shared with unsuppressed studies so should remain + assert related_qs.filter(is_suppressed=False).count() == (1 if descendant == 'samples' else 0) + assert (related_qs.filter(is_suppressed=True).count() == + related_qs.filter(suppression_reason=Study.Reason.ANCESTOR_SUPPRESSED).count()) diff --git a/tests/test_utils/emg_fixtures.py b/tests/test_utils/emg_fixtures.py index f6bb03a87..17e5b706a 100644 --- a/tests/test_utils/emg_fixtures.py +++ b/tests/test_utils/emg_fixtures.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -# Copyright 2019 EMBL - European Bioinformatics Institute +# Copyright 2019-2023 EMBL - European Bioinformatics Institute # # Licensed under the Apache License, Version 2.0 (the 'License'); # you may not use this file except in compliance with the License. @@ -18,7 +18,6 @@ import uuid from django.conf import settings -from django.contrib.auth import get_user_model from model_bakery import baker @@ -37,7 +36,8 @@ 'ena_private_studies', 'ena_suppressed_studies', 'ena_public_runs', 'ena_private_runs', 'ena_suppressed_runs', 'ena_public_samples', 'ena_private_samples', 'ena_suppressed_samples', 'ena_public_assemblies', 'ena_private_assemblies', 'ena_suppressed_assemblies', - 'assembly_extra_annotation', + 'assembly_extra_annotation', 'ena_suppression_propagation_studies', 'ena_suppression_propagation_runs', + 'ena_suppression_propagation_samples', 'ena_suppression_propagation_assemblies', ] @@ -688,6 +688,7 @@ def ena_run_study(): study.pubmed_id = "" return study + def make_suppresible_studies(quantity, emg_props=None, ena_props=None): emg_props = emg_props or {} ena_props = ena_props or {} @@ -700,7 +701,6 @@ def make_suppresible_studies(quantity, emg_props=None, ena_props=None): return ena_studies - @pytest.fixture def ena_public_studies(): return make_suppresible_studies( @@ -746,6 +746,63 @@ def ena_suppressed_studies(): ) return studies +@pytest.fixture +def ena_suppression_propagation_studies(experiment_type_assembly): + """Returns: + 2 Studies that are suppressed in ENA but not in EMG, with unsuppressed sample/assembly/run + 2 Studies that are unsuprressed in ENA and in EMG, with unsuppressed sample/assembly/run + One sample in the ENA-suppressed studies is ALSO in an unsuppressed study. + """ + ena_studies_to_be_suppressed = make_suppresible_studies( + 2, + emg_props={}, + ena_props={"study_status": ena_models.Status.SUPPRESSED} + ) + ena_studies_to_remain = make_suppresible_studies( + 2, + emg_props={}, + ena_props={} + ) + for study in emg_models.Study.objects.all(): + samples = baker.make( + emg_models.Sample, + _quantity=2, + studies=[study] + ) + for sample in samples: + runs = baker.make( + emg_models.Run, + _quantity=2, + study=study, + sample=sample + ) + for run in runs: + assemblies = baker.make( + emg_models.Assembly, + _quantity=2, + runs=[run], + samples=[run.sample], + study=study, + experiment_type=experiment_type_assembly + ) + for assembly in assemblies: + baker.make( + emg_models.AnalysisJob, + _quantity=2, + assembly=assembly, + sample=assembly.samples.first(), + study=study, + ) + emg_study_to_suppress = emg_models.Study.objects.get(secondary_accession=ena_studies_to_be_suppressed[0].study_id) + # Share one sample across all studies + sample = emg_study_to_suppress.samples.first() + for study in emg_models.Study.objects.all(): + if not study.samples.filter(sample_id=sample.sample_id).exists(): + emg_models.StudySample.objects.create(study=study, sample=sample) + + return ena_studies_to_be_suppressed + ena_studies_to_remain + + def make_suppresible_runs(quantity, emg_props=None, ena_props=None): emg_props = emg_props or {} ena_props = ena_props or {} @@ -804,6 +861,49 @@ def ena_suppressed_runs(): ) return runs + +@pytest.fixture +def ena_suppression_propagation_runs(experiment_type_assembly, study): + runs = make_suppresible_runs( + 2, + emg_props={}, + ena_props={"status_id": ena_models.Status.SUPPRESSED}, + ) + runs.extend( + make_suppresible_runs( + 2, + emg_props={}, + ena_props={"status_id": ena_models.Status.PUBLIC} + ) + ) + + for run in emg_models.Run.objects.all(): + sample = baker.make( + emg_models.Sample, + _quantity=1, + studies=[study] + )[0] + run.sample = sample + run.save() + assemblies = baker.make( + emg_models.Assembly, + _quantity=2, + runs=[run], + study=study, + samples=[run.sample], + experiment_type=experiment_type_assembly + ) + for assembly in assemblies: + baker.make( + emg_models.AnalysisJob, + _quantity=2, + assembly=assembly, + sample=assembly.samples.first(), + study=study, + ) + return runs + + def make_suppresible_samples(quantity, emg_props=None, ena_props=None): emg_props = emg_props or {} ena_props = ena_props or {} @@ -862,6 +962,50 @@ def ena_suppressed_samples(): ) return samples + +@pytest.fixture +def ena_suppression_propagation_samples(experiment_type_assembly, study): + samples = make_suppresible_samples( + 2, + emg_props={}, + ena_props={"status_id": ena_models.Status.SUPPRESSED}, + ) + samples.extend( + make_suppresible_samples( + 2, + emg_props={}, + ena_props={"status_id": ena_models.Status.PUBLIC}, + ) + ) + + for sample in emg_models.Sample.objects.all(): + runs = baker.make( + emg_models.Run, + _quantity=2, + study=study, + sample=sample + ) + for run in runs: + assemblies = baker.make( + emg_models.Assembly, + _quantity=2, + runs=[run], + samples=[run.sample], + study=study, + experiment_type=experiment_type_assembly + ) + for assembly in assemblies: + baker.make( + emg_models.AnalysisJob, + _quantity=2, + assembly=assembly, + sample=assembly.samples.first(), + study=study, + ) + + return samples + + def make_suppresible_assemblies(quantity, emg_props=None, ena_props=None): emg_props = emg_props or {} ena_props = ena_props or {} @@ -920,3 +1064,32 @@ def ena_suppressed_assemblies(): ) ) return assemblies + + +@pytest.fixture +def ena_suppression_propagation_assemblies(experiment_type_assembly, study): + assemblies = [] + emg_props = {} + assemblies.extend( + make_suppresible_assemblies( + 32, + emg_props=emg_props, + ena_props={"status_id": ena_models.Status.SUPPRESSED}, + ) + ) + assemblies.extend( + make_suppresible_assemblies( + 32, + emg_props=emg_props, + ena_props={"status_id": ena_models.Status.PUBLIC}, + ) + ) + for assembly in emg_models.Assembly.objects.all(): + baker.make( + emg_models.AnalysisJob, + _quantity=2, + assembly=assembly, + sample=assembly.samples.first(), + study=study, + ) + return assemblies diff --git a/tests/webuploader/test_qc.py b/tests/webuploader/test_qc.py index 7ce46df11..020bb1aa2 100644 --- a/tests/webuploader/test_qc.py +++ b/tests/webuploader/test_qc.py @@ -182,5 +182,5 @@ def test_empty_qc(self, client, run_emptyresults): url = reverse("emgapi_v1:analyses-detail", args=[job]) response = client.get(url) assert response.status_code == status.HTTP_200_OK - rsp = response.json() - assert len(rsp["data"]["attributes"]["analysis-summary"]) == 0 + # rsp = response.json() + # assert len(rsp["data"]["attributes"]["analysis-summary"]) == 0 From 3205e295061f634bd3d4389fb5518f70dcf13c04 Mon Sep 17 00:00:00 2001 From: sandyr Date: Fri, 12 Jan 2024 16:09:41 +0000 Subject: [PATCH 10/12] 2.4.37 --- emgcli/__init__.py | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/emgcli/__init__.py b/emgcli/__init__.py index d550e808a..3d7256add 100644 --- a/emgcli/__init__.py +++ b/emgcli/__init__.py @@ -1 +1 @@ -__version__: str = "2.4.36" +__version__: str = "2.4.37" diff --git a/pyproject.toml b/pyproject.toml index b0298d798..5e918e134 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -117,7 +117,7 @@ max-line-length = 119 """ [tool.bumpversion] -current_version = "2.4.36" +current_version = "2.4.37" [[tool.bumpversion.files]] filename = "emgcli/__init__.py" From d2c0a2886606addc515cac7182ca8ec3020ef077 Mon Sep 17 00:00:00 2001 From: sandyr Date: Fri, 12 Jan 2024 17:28:18 +0000 Subject: [PATCH 11/12] adds workaround for era_pro vs era db naming inconsistencies --- emgcli/__init__.py | 2 +- emgcli/settings.py | 11 +++++++++++ pyproject.toml | 2 +- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/emgcli/__init__.py b/emgcli/__init__.py index 3d7256add..8d1a700d2 100644 --- a/emgcli/__init__.py +++ b/emgcli/__init__.py @@ -1 +1 @@ -__version__: str = "2.4.37" +__version__: str = "2.4.38" diff --git a/emgcli/settings.py b/emgcli/settings.py index 3a3fc9d7b..d16bfa029 100644 --- a/emgcli/settings.py +++ b/emgcli/settings.py @@ -319,6 +319,17 @@ def create_secret_key(var_dir): except KeyError: raise KeyError("Config must container default database.") +# TODO: ensure all configs (including production webuploader yamls) use the same naming scheme. +# This is a brute workaround to make all current envs and all deployments work +if 'era_pro' in DATABASES and 'era' not in DATABASES: + DATABASES['era'] = DATABASES['era_pro'] +if 'ena_pro' in DATABASES and 'ena' not in DATABASES: + DATABASES['ena'] = DATABASES['ena_pro'] +if 'era' in DATABASES and 'era_pro' not in DATABASES: + DATABASES['era_pro'] = DATABASES['era'] +if 'ena' in DATABASES and 'ena_pro' not in DATABASES: + DATABASES['ena_pro'] = DATABASES['ena'] + # this is required to use the djang-mysql QS Hints # https://django-mysql.readthedocs.io/en/latest/queryset_extensions.html?highlight=DJANGO_MYSQL_REWRITE_QUERIES#query-hints DJANGO_MYSQL_REWRITE_QUERIES = True diff --git a/pyproject.toml b/pyproject.toml index 5e918e134..99607d765 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -117,7 +117,7 @@ max-line-length = 119 """ [tool.bumpversion] -current_version = "2.4.37" +current_version = "2.4.38" [[tool.bumpversion.files]] filename = "emgcli/__init__.py" From c00eb1931c3d90163bd610fcff8a441b9f68c131 Mon Sep 17 00:00:00 2001 From: Sandy Rogers Date: Mon, 15 Jan 2024 17:25:58 +0000 Subject: [PATCH 12/12] Suppression propagation fixes (#344) * fixes progress logging in ena sync scripts * do not return suppressed study/samples etc * fixes typo * 2.4.39 --- emgapi/models.py | 37 +++++++++++-------- emgcli/__init__.py | 2 +- .../commands/sync_assemblies_with_ena.py | 2 +- .../management/commands/sync_runs_with_ena.py | 2 +- .../commands/sync_samples_with_ena.py | 2 +- .../commands/sync_studies_with_ena.py | 2 +- pyproject.toml | 2 +- tests/ena/test_sync_samples_with_ena.py | 2 - tests/ena/test_sync_studies_with_ena.py | 7 ++++ 9 files changed, 34 insertions(+), 24 deletions(-) diff --git a/emgapi/models.py b/emgapi/models.py index efb074d78..e51921928 100644 --- a/emgapi/models.py +++ b/emgapi/models.py @@ -300,39 +300,39 @@ def available(self, request=None): """ _query_filters = { 'StudyQuerySet': { - 'all': [Q(is_private=False),], + 'all': [Q(is_private=False, is_suppressed=False),], }, 'StudyDownloadQuerySet': { - 'all': [Q(study__is_private=False),], + 'all': [Q(study__is_private=False, study__is_suppressed=False),], }, 'SampleQuerySet': { - 'all': [Q(is_private=False),], + 'all': [Q(is_private=False, is_suppressed=False),], }, 'RunQuerySet': { 'all': [ - Q(is_private=False), + Q(is_private=False, is_suppressed=False), ], }, 'AssemblyQuerySet': { 'all': [ - Q(is_private=False), + Q(is_private=False, is_suppressed=False), ], }, 'AnalysisJobDownloadQuerySet': { 'all': [ - Q(job__study__is_private=False), + Q(job__study__is_private=False, job__study__is_suppressed=False), Q(job__run__is_private=False) | Q(job__assembly__is_private=False), Q(job__analysis_status_id=AnalysisStatus.COMPLETED) | Q(job__analysis_status_id=AnalysisStatus.QC_NOT_PASSED) ], }, 'AssemblyExtraAnnotationQuerySet': { 'all': [ - Q(assembly__is_private=False), + Q(assembly__is_private=False, assembly__is_suppressed=False), ], }, 'RunExtraAnnotationQuerySet': { 'all': [ - Q(run__is_private=False), + Q(run__is_private=False, run__is_suppressed=False), ], }, } @@ -340,33 +340,38 @@ def available(self, request=None): if request is not None and request.user.is_authenticated: _username = request.user.username _query_filters['StudyQuerySet']['authenticated'] = \ - [Q(submission_account_id__iexact=_username) | Q(is_private=False)] + [Q(submission_account_id__iexact=_username) | Q(is_private=False, is_suppressed=False)] _query_filters['StudyDownloadQuerySet']['authenticated'] = \ [Q(study__submission_account_id__iexact=_username) | - Q(study__is_private=False)] + Q(study__is_private=False, study__is_suppressed=False)] _query_filters['SampleQuerySet']['authenticated'] = \ - [Q(submission_account_id__iexact=_username) | Q(is_private=False)] + [Q(submission_account_id__iexact=_username) | Q(is_private=False), Q(is_suppressed=False)] _query_filters['RunQuerySet']['authenticated'] = \ [Q(study__submission_account_id__iexact=_username, is_private=True) | - Q(is_private=False)] + Q(is_private=False), + Q(is_suppressed=False)] _query_filters['AssemblyQuerySet']['authenticated'] = \ [Q(samples__studies__submission_account_id__iexact=_username, is_private=True) | - Q(is_private=False)] + Q(is_private=False), + Q(is_suppressed=False)] _query_filters['AnalysisJobDownloadQuerySet']['authenticated'] = \ [Q(job__study__submission_account_id__iexact=_username, job__is_private=True) | Q(job__study__submission_account_id__iexact=_username, job__assembly__is_private=True) | - Q(job__run__is_private=False) | Q(job__assembly__is_private=False)] + Q(job__run__is_private=False) | Q(job__assembly__is_private=False), + Q(job__is_suppressed=False)] _query_filters['AssemblyExtraAnnotationQuerySet']['authenticated'] = \ [Q(assembly__samples__studies__submission_account_id__iexact=_username, is_private=True) | - Q(assembly__is_private=False)] + Q(assembly__is_private=False), + Q(assembly__is_suppressed=False)] _query_filters['RunExtraAnnotationQuerySet']['authenticated'] = \ [Q(run__samples__studies__submission_account_id__iexact=_username, is_private=True) | - Q(run__is_private=False)] + Q(run__is_private=False), + Q(run__is_suppressed=False)] filters = _query_filters.get(self.__class__.__name__) diff --git a/emgcli/__init__.py b/emgcli/__init__.py index 8d1a700d2..48a6caa0d 100644 --- a/emgcli/__init__.py +++ b/emgcli/__init__.py @@ -1 +1 @@ -__version__: str = "2.4.38" +__version__: str = "2.4.39" diff --git a/emgena/management/commands/sync_assemblies_with_ena.py b/emgena/management/commands/sync_assemblies_with_ena.py index 76a8378e6..d482726a1 100644 --- a/emgena/management/commands/sync_assemblies_with_ena.py +++ b/emgena/management/commands/sync_assemblies_with_ena.py @@ -96,7 +96,7 @@ def handle(self, *args, **kwargs): emg_assemblies_batch, ["is_private", "is_suppressed", "suppression_reason", "suppressed_at"], ) - logging.info(f"Batch {round(assemblies_count / batch_size)} processed.") + logging.info(f"Batch {round(offset / batch_size)} of {round(assemblies_count / batch_size)} processed.") offset += batch_size logging.info("Completed") diff --git a/emgena/management/commands/sync_runs_with_ena.py b/emgena/management/commands/sync_runs_with_ena.py index 3adafbd6d..74328979e 100644 --- a/emgena/management/commands/sync_runs_with_ena.py +++ b/emgena/management/commands/sync_runs_with_ena.py @@ -57,7 +57,7 @@ def handle(self, *args, **kwargs): emg_runs_batch, ["is_private", "is_suppressed", "suppression_reason", "suppressed_at"], ) - logging.info(f"Batch {round(runs_count / batch_size)} processed.") + logging.info(f"Batch {round(offset / batch_size)} of {round(runs_count / batch_size)} processed.") offset += batch_size logging.info("Completed") diff --git a/emgena/management/commands/sync_samples_with_ena.py b/emgena/management/commands/sync_samples_with_ena.py index 659326193..d95e8a6ae 100644 --- a/emgena/management/commands/sync_samples_with_ena.py +++ b/emgena/management/commands/sync_samples_with_ena.py @@ -69,7 +69,7 @@ def handle(self, *args, **kwargs): emg_samples_batch, ["is_private", "is_suppressed", "suppression_reason", "suppressed_at"], ) - logging.info(f"Batch {round(samples_count / batch_size)} processed.") + logging.info(f"Batch {round(offset / batch_size)} of {round(samples_count / batch_size)} processed.") offset += batch_size logging.info("Completed") diff --git a/emgena/management/commands/sync_studies_with_ena.py b/emgena/management/commands/sync_studies_with_ena.py index 9b28b8d6f..bdd058fe4 100644 --- a/emgena/management/commands/sync_studies_with_ena.py +++ b/emgena/management/commands/sync_studies_with_ena.py @@ -70,7 +70,7 @@ def handle(self, *args, **kwargs): "public_release_date", ], ) - logging.info(f"Batch {round(studies_count / batch_size)} processed.") + logging.info(f"Batch {round(offset / batch_size)} of {round(studies_count / batch_size)} processed.") offset += batch_size logging.info("Completed") diff --git a/pyproject.toml b/pyproject.toml index 99607d765..0b350ac30 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -117,7 +117,7 @@ max-line-length = 119 """ [tool.bumpversion] -current_version = "2.4.38" +current_version = "2.4.39" [[tool.bumpversion.files]] filename = "emgcli/__init__.py" diff --git a/tests/ena/test_sync_samples_with_ena.py b/tests/ena/test_sync_samples_with_ena.py index 9c2d539b0..122e8fc34 100644 --- a/tests/ena/test_sync_samples_with_ena.py +++ b/tests/ena/test_sync_samples_with_ena.py @@ -15,11 +15,9 @@ # limitations under the License. import pytest -import os from unittest.mock import patch -from django.urls import reverse from django.core.management import call_command from emgapi.models import Sample, Assembly, AnalysisJob, Run diff --git a/tests/ena/test_sync_studies_with_ena.py b/tests/ena/test_sync_studies_with_ena.py index 392a32df6..cadef3e15 100644 --- a/tests/ena/test_sync_studies_with_ena.py +++ b/tests/ena/test_sync_studies_with_ena.py @@ -47,6 +47,9 @@ def test_make_studies_private(self, ena_study_objs_mock, ena_private_studies): if study in public_studies: assert study.is_private == True + assert Study.objects.available(None).count() == 0 + + @patch("emgena.models.Study.objects") def test_make_studies_public(self, ena_study_objs_mock, ena_public_studies): ena_study_objs_mock.using("era").filter.return_value = ena_public_studies @@ -67,6 +70,8 @@ def test_make_studies_public(self, ena_study_objs_mock, ena_public_studies): if study in private_studies: assert study.is_private == False + assert Study.objects.available(None).count() == all_studies.count() + @patch("emgena.models.Study.objects") def test_suppress_studies(self, ena_study_objs_mock, ena_suppressed_studies): ena_study_objs_mock.using("era").filter.return_value = ena_suppressed_studies @@ -94,6 +99,8 @@ def test_suppress_studies(self, ena_study_objs_mock, ena_suppressed_studies): == study.get_suppression_reason_display().lower() ) + assert Study.objects.available(None).count() == 0 + @patch("emgena.models.Study.objects") def test_suppress_studies_propagation(self, ena_study_objs_mock, ena_suppression_propagation_studies): ena_study_objs_mock.using("era").filter.return_value = ena_suppression_propagation_studies