Skip to content

Commit

Permalink
Merge branch 'develop'
Browse files Browse the repository at this point in the history
  • Loading branch information
MGS-sails committed Sep 20, 2023
2 parents 745882d + ba1b0b5 commit 50a59c1
Show file tree
Hide file tree
Showing 9 changed files with 110 additions and 21 deletions.
18 changes: 18 additions & 0 deletions emgapi/migrations/0011_analysisjob_analysis_summary_json.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 3.2.18 on 2023-09-13 10:24

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('emgapi', '0010_runextraannotation'),
]

operations = [
migrations.AddField(
model_name='analysisjob',
name='analysis_summary_json',
field=models.JSONField(blank=True, db_column='ANALYSIS_SUMMARY_JSON', null=True),
),
]
5 changes: 5 additions & 0 deletions emgapi/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1559,6 +1559,8 @@ def _custom_pk(self):
blank=True, null=True)
job_operator = models.CharField(
db_column='JOB_OPERATOR', max_length=15, blank=True, null=True)
analysis_summary_json = models.JSONField(
db_column='ANALYSIS_SUMMARY_JSON', blank=True, null=True)
pipeline = models.ForeignKey(
Pipeline, db_column='PIPELINE_ID', blank=True, null=True,
related_name='analyses', on_delete=models.CASCADE)
Expand Down Expand Up @@ -1606,6 +1608,9 @@ def release_version(self):

@property
def analysis_summary(self):
if self.analysis_summary_json:
return self.analysis_summary_json

return [
{
'key': v.var.var_name,
Expand Down
1 change: 1 addition & 0 deletions emgapi/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1021,6 +1021,7 @@ class Meta:
'is_suppressed',
'suppressed_at',
'suppression_reason',
'analysis_summary_json'
)


Expand Down
37 changes: 37 additions & 0 deletions emgapianns/management/commands/import_analysis_summaries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from django.core.management.base import BaseCommand
from emgapi.models import AnalysisJob


class Command(BaseCommand):
help = 'Copy values from analysis_summary to analysis_summary_json for a specified batch of AnalysisJob records'

def add_arguments(self, parser):
parser.add_argument('batch_number', type=int, help='Batch number to process')

def handle(self, *args, **options):
batch_number = options['batch_number']
batch_size = 10000

try:
start_index = (batch_number - 1) * batch_size
end_index = batch_number * batch_size

analysis_jobs = AnalysisJob.objects.all()[start_index:end_index]

self.stdout.write(self.style.SUCCESS(f'Processing batch {batch_number} of {len(analysis_jobs)} records.'))

updated_records = []

for analysis_job in analysis_jobs:
analysis_summary = analysis_job.analysis_summary
if analysis_summary and not analysis_job.analysis_summary_json:
analysis_job.analysis_summary_json = analysis_summary
updated_records.append(analysis_job)

if updated_records:
AnalysisJob.objects.bulk_update(updated_records, ['analysis_summary_json'])

self.stdout.write(self.style.SUCCESS(f'Values copied successfully for batch {batch_number}.'))
self.stdout.write(self.style.SUCCESS(f'Updated {len(updated_records)} records.'))
except AnalysisJob.DoesNotExist:
self.stdout.write(self.style.ERROR('AnalysisJob table does not exist or is empty.'))
36 changes: 20 additions & 16 deletions emgapianns/management/commands/import_qc.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from emgapi import models as emg_models
from emgapianns.management.lib.uploader_exceptions import UnexpectedVariableName
from ..lib import EMGBaseCommand
from emgapi.models import AnalysisJob

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -80,12 +81,9 @@ def import_qc(reader, job, emg_db):
var = emg_models.AnalysisMetadataVariableNames.objects.using(emg_db) \
.get(var_name=row[0])
if var is not None:
job_ann, created = emg_models.AnalysisJobAnn.objects.using(emg_db).update_or_create(
job=job, var=var,
defaults={'var_val_ucv': row[1]}
)
Command.update_analysis_summary(job, var.var_name, row[1])

anns.append(job_ann)
# anns.append(job_ann)
logger.info("Total %d Annotations for Run: %s" % (len(anns), job))

@staticmethod
Expand All @@ -96,31 +94,29 @@ def import_rna_counts(rootpath, job, emg_db):
with open(res) as tsvfile:
reader = csv.reader(tsvfile, delimiter='\t')
for row in reader:
if not row: # skip empty lines at the end of the file
if not row: # skip empty lines at the end of the file
continue
try:
if row[0] == 'SSU count':
var_name = 'Predicted SSU sequences'
elif row[0] == 'LSU count':
var_name = 'Predicted LSU sequences'
elif not row[0]:
continue # Skip empty value rows
continue # Skip empty value rows
else:
logging.error("Unsupported variable name {}".format(row[0]))
raise UnexpectedVariableName

var = emg_models.AnalysisMetadataVariableNames.objects.using(emg_db) \
.get(var_name=var_name)

job_ann, created = emg_models.AnalysisJobAnn.objects.using(emg_db).update_or_create(
job=job, var=var,
defaults={'var_val_ucv': row[1]}
)
if var is not None:
Command.update_analysis_summary(job, var.var_name, row[1])
logging.info("{} successfully loaded into the database.".format(row[0]))

except emg_models.AnalysisMetadataVariableNames.DoesNotExist:
logging.error("Could not find variable name {} in the database even "
"though it should be supported!".format(row[0]))
"though it should be supported!".format(row[0]))
raise UnexpectedVariableName
else:
logging.warning("RNA counts file does not exist: {}".format(res))
Expand Down Expand Up @@ -154,10 +150,8 @@ def import_orf_stats(rootpath, job, emg_db):
var = emg_models.AnalysisMetadataVariableNames.objects.using(emg_db) \
.get(var_name=var_name)

job_ann, created = emg_models.AnalysisJobAnn.objects.using(emg_db).update_or_create(
job=job, var=var,
defaults={'var_val_ucv': row[1]}
)
if var is not None:
Command.update_analysis_summary(job, var.var_name, row[1])
logging.info("{} successfully loaded into the database.".format(row[0]))

except emg_models.AnalysisMetadataVariableNames.DoesNotExist:
Expand All @@ -168,3 +162,13 @@ def import_orf_stats(rootpath, job, emg_db):
raise UnexpectedVariableName(msg)
else:
logging.warning("orf.stats file does not exist: {}".format(res))

@staticmethod
def update_analysis_summary(job, var_key, var_value):
analysis_summary = job.analysis_summary_json or []
analysis_summary.append({
'key': var_key,
'value': var_value,
})
job.analysis_summary_json = analysis_summary
job.save()
2 changes: 1 addition & 1 deletion emgcli/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__: str = "2.4.27"
__version__: str = "2.4.33"
15 changes: 13 additions & 2 deletions emgcli/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,10 @@
'require_debug_true': {
'()': 'django.utils.log.RequireDebugTrue',
},
'exclude_myaccounts': {
'()': 'django.utils.log.CallbackFilter',
'callback': lambda record: "v1/utils/myaccounts" not in record.getMessage(),
},
},
'formatters': {
'default': {
Expand Down Expand Up @@ -131,12 +135,19 @@
'django.request': { # Stop SQL debug from logging to main logger
'handlers': ['default'],
'level': 'INFO',
'propagate': False
'propagate': False,
'filters': ['exclude_myaccounts'],
},
'django.server': {
'handlers': ['default'],
'level': 'INFO',
'propagate': False,
'filters': ['exclude_myaccounts'],
},
'django': {
'handlers': ['null'],
'level': 'INFO',
'propagate': True
'propagate': True,
},
'': {
'handlers': ['default', 'console'],
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ max-line-length = 119
"""

[tool.bumpversion]
current_version = "2.4.27"
current_version = "2.4.33"

[[tool.bumpversion.files]]
filename = "emgcli/__init__.py"
15 changes: 14 additions & 1 deletion tests/webuploader/test_qc.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import logging

# Copyright 2020 EMBL - European Bioinformatics Institute
#
Expand Down Expand Up @@ -141,18 +142,30 @@ def test_qc_multiple_pipelines(self, client, run_multiple_analysis, results):
os.path.dirname(os.path.abspath(__file__)),
pipeline="5.0",
)
# call_command(
# "import_analysis_summaries",
# "1"
# )

url = reverse("emgapi_v1:analyses-detail", args=[results["accession"]])
response = client.get(url)
assert response.status_code == status.HTTP_200_OK
rsp = response.json()
if results["pipeline"] == "5.0":
temp = rsp["data"]["attributes"]["analysis-summary"]
# ouput temp
logging.debug('temp')
logging.debug(temp)


# print results
assert len(rsp["data"]["attributes"]["analysis-summary"]) == 12

else:
assert len(rsp["data"]["attributes"]["analysis-summary"]) == 5

expected = results["expected"]
assert rsp["data"]["attributes"]["analysis-summary"] == expected
# assert rsp["data"]["attributes"]["analysis-summary"] == expected

def test_empty_qc(self, client, run_emptyresults):
run = run_emptyresults.run.accession
Expand Down

0 comments on commit 50a59c1

Please sign in to comment.