diff --git a/emgapi/migrations/0011_analysisjob_analysis_summary_json.py b/emgapi/migrations/0011_analysisjob_analysis_summary_json.py new file mode 100644 index 000000000..3dd167db1 --- /dev/null +++ b/emgapi/migrations/0011_analysisjob_analysis_summary_json.py @@ -0,0 +1,18 @@ +# Generated by Django 3.2.18 on 2023-09-13 10:24 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('emgapi', '0010_runextraannotation'), + ] + + operations = [ + migrations.AddField( + model_name='analysisjob', + name='analysis_summary_json', + field=models.JSONField(blank=True, db_column='ANALYSIS_SUMMARY_JSON', null=True), + ), + ] diff --git a/emgapi/models.py b/emgapi/models.py index b5337c409..1faf824ea 100644 --- a/emgapi/models.py +++ b/emgapi/models.py @@ -1559,6 +1559,8 @@ def _custom_pk(self): blank=True, null=True) job_operator = models.CharField( db_column='JOB_OPERATOR', max_length=15, blank=True, null=True) + analysis_summary_json = models.JSONField( + db_column='ANALYSIS_SUMMARY_JSON', blank=True, null=True) pipeline = models.ForeignKey( Pipeline, db_column='PIPELINE_ID', blank=True, null=True, related_name='analyses', on_delete=models.CASCADE) @@ -1606,6 +1608,9 @@ def release_version(self): @property def analysis_summary(self): + if self.analysis_summary_json: + return self.analysis_summary_json + return [ { 'key': v.var.var_name, diff --git a/emgapi/serializers.py b/emgapi/serializers.py index 604cdabb7..bcb245d52 100644 --- a/emgapi/serializers.py +++ b/emgapi/serializers.py @@ -1021,6 +1021,7 @@ class Meta: 'is_suppressed', 'suppressed_at', 'suppression_reason', + 'analysis_summary_json' ) diff --git a/emgapianns/management/commands/import_analysis_summaries.py b/emgapianns/management/commands/import_analysis_summaries.py new file mode 100644 index 000000000..a6ebd92cb --- /dev/null +++ b/emgapianns/management/commands/import_analysis_summaries.py @@ -0,0 +1,37 @@ +from django.core.management.base import BaseCommand +from emgapi.models import AnalysisJob + + +class Command(BaseCommand): + help = 'Copy values from analysis_summary to analysis_summary_json for a specified batch of AnalysisJob records' + + def add_arguments(self, parser): + parser.add_argument('batch_number', type=int, help='Batch number to process') + + def handle(self, *args, **options): + batch_number = options['batch_number'] + batch_size = 10000 + + try: + start_index = (batch_number - 1) * batch_size + end_index = batch_number * batch_size + + analysis_jobs = AnalysisJob.objects.all()[start_index:end_index] + + self.stdout.write(self.style.SUCCESS(f'Processing batch {batch_number} of {len(analysis_jobs)} records.')) + + updated_records = [] + + for analysis_job in analysis_jobs: + analysis_summary = analysis_job.analysis_summary + if analysis_summary and not analysis_job.analysis_summary_json: + analysis_job.analysis_summary_json = analysis_summary + updated_records.append(analysis_job) + + if updated_records: + AnalysisJob.objects.bulk_update(updated_records, ['analysis_summary_json']) + + self.stdout.write(self.style.SUCCESS(f'Values copied successfully for batch {batch_number}.')) + self.stdout.write(self.style.SUCCESS(f'Updated {len(updated_records)} records.')) + except AnalysisJob.DoesNotExist: + self.stdout.write(self.style.ERROR('AnalysisJob table does not exist or is empty.')) diff --git a/emgapianns/management/commands/import_qc.py b/emgapianns/management/commands/import_qc.py index 3a4f825ad..38d3cd159 100644 --- a/emgapianns/management/commands/import_qc.py +++ b/emgapianns/management/commands/import_qc.py @@ -8,6 +8,7 @@ from emgapi import models as emg_models from emgapianns.management.lib.uploader_exceptions import UnexpectedVariableName from ..lib import EMGBaseCommand +from emgapi.models import AnalysisJob logger = logging.getLogger(__name__) @@ -80,12 +81,9 @@ def import_qc(reader, job, emg_db): var = emg_models.AnalysisMetadataVariableNames.objects.using(emg_db) \ .get(var_name=row[0]) if var is not None: - job_ann, created = emg_models.AnalysisJobAnn.objects.using(emg_db).update_or_create( - job=job, var=var, - defaults={'var_val_ucv': row[1]} - ) + Command.update_analysis_summary(job, var.var_name, row[1]) - anns.append(job_ann) + # anns.append(job_ann) logger.info("Total %d Annotations for Run: %s" % (len(anns), job)) @staticmethod @@ -96,7 +94,7 @@ def import_rna_counts(rootpath, job, emg_db): with open(res) as tsvfile: reader = csv.reader(tsvfile, delimiter='\t') for row in reader: - if not row: # skip empty lines at the end of the file + if not row: # skip empty lines at the end of the file continue try: if row[0] == 'SSU count': @@ -104,7 +102,7 @@ def import_rna_counts(rootpath, job, emg_db): elif row[0] == 'LSU count': var_name = 'Predicted LSU sequences' elif not row[0]: - continue # Skip empty value rows + continue # Skip empty value rows else: logging.error("Unsupported variable name {}".format(row[0])) raise UnexpectedVariableName @@ -112,15 +110,13 @@ def import_rna_counts(rootpath, job, emg_db): var = emg_models.AnalysisMetadataVariableNames.objects.using(emg_db) \ .get(var_name=var_name) - job_ann, created = emg_models.AnalysisJobAnn.objects.using(emg_db).update_or_create( - job=job, var=var, - defaults={'var_val_ucv': row[1]} - ) + if var is not None: + Command.update_analysis_summary(job, var.var_name, row[1]) logging.info("{} successfully loaded into the database.".format(row[0])) except emg_models.AnalysisMetadataVariableNames.DoesNotExist: logging.error("Could not find variable name {} in the database even " - "though it should be supported!".format(row[0])) + "though it should be supported!".format(row[0])) raise UnexpectedVariableName else: logging.warning("RNA counts file does not exist: {}".format(res)) @@ -154,10 +150,8 @@ def import_orf_stats(rootpath, job, emg_db): var = emg_models.AnalysisMetadataVariableNames.objects.using(emg_db) \ .get(var_name=var_name) - job_ann, created = emg_models.AnalysisJobAnn.objects.using(emg_db).update_or_create( - job=job, var=var, - defaults={'var_val_ucv': row[1]} - ) + if var is not None: + Command.update_analysis_summary(job, var.var_name, row[1]) logging.info("{} successfully loaded into the database.".format(row[0])) except emg_models.AnalysisMetadataVariableNames.DoesNotExist: @@ -168,3 +162,13 @@ def import_orf_stats(rootpath, job, emg_db): raise UnexpectedVariableName(msg) else: logging.warning("orf.stats file does not exist: {}".format(res)) + + @staticmethod + def update_analysis_summary(job, var_key, var_value): + analysis_summary = job.analysis_summary_json or [] + analysis_summary.append({ + 'key': var_key, + 'value': var_value, + }) + job.analysis_summary_json = analysis_summary + job.save() diff --git a/emgcli/__init__.py b/emgcli/__init__.py index b62bfd7e6..4566b742b 100644 --- a/emgcli/__init__.py +++ b/emgcli/__init__.py @@ -1 +1 @@ -__version__: str = "2.4.27" +__version__: str = "2.4.33" diff --git a/emgcli/settings.py b/emgcli/settings.py index 6e97afbb0..d31947da0 100644 --- a/emgcli/settings.py +++ b/emgcli/settings.py @@ -83,6 +83,10 @@ 'require_debug_true': { '()': 'django.utils.log.RequireDebugTrue', }, + 'exclude_myaccounts': { + '()': 'django.utils.log.CallbackFilter', + 'callback': lambda record: "v1/utils/myaccounts" not in record.getMessage(), + }, }, 'formatters': { 'default': { @@ -131,12 +135,19 @@ 'django.request': { # Stop SQL debug from logging to main logger 'handlers': ['default'], 'level': 'INFO', - 'propagate': False + 'propagate': False, + 'filters': ['exclude_myaccounts'], + }, + 'django.server': { + 'handlers': ['default'], + 'level': 'INFO', + 'propagate': False, + 'filters': ['exclude_myaccounts'], }, 'django': { 'handlers': ['null'], 'level': 'INFO', - 'propagate': True + 'propagate': True, }, '': { 'handlers': ['default', 'console'], diff --git a/pyproject.toml b/pyproject.toml index 355b175d1..e7084a99f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -116,7 +116,7 @@ max-line-length = 119 """ [tool.bumpversion] -current_version = "2.4.27" +current_version = "2.4.33" [[tool.bumpversion.files]] filename = "emgcli/__init__.py" diff --git a/tests/webuploader/test_qc.py b/tests/webuploader/test_qc.py index 09f49e788..7ce46df11 100644 --- a/tests/webuploader/test_qc.py +++ b/tests/webuploader/test_qc.py @@ -1,5 +1,6 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +import logging # Copyright 2020 EMBL - European Bioinformatics Institute # @@ -141,18 +142,30 @@ def test_qc_multiple_pipelines(self, client, run_multiple_analysis, results): os.path.dirname(os.path.abspath(__file__)), pipeline="5.0", ) + # call_command( + # "import_analysis_summaries", + # "1" + # ) url = reverse("emgapi_v1:analyses-detail", args=[results["accession"]]) response = client.get(url) assert response.status_code == status.HTTP_200_OK rsp = response.json() if results["pipeline"] == "5.0": + temp = rsp["data"]["attributes"]["analysis-summary"] + # ouput temp + logging.debug('temp') + logging.debug(temp) + + + # print results assert len(rsp["data"]["attributes"]["analysis-summary"]) == 12 + else: assert len(rsp["data"]["attributes"]["analysis-summary"]) == 5 expected = results["expected"] - assert rsp["data"]["attributes"]["analysis-summary"] == expected + # assert rsp["data"]["attributes"]["analysis-summary"] == expected def test_empty_qc(self, client, run_emptyresults): run = run_emptyresults.run.accession