Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature analysis job ann deprecation #331

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 15 additions & 15 deletions emgapi/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -463,21 +463,21 @@ class AnalysisMetadataVariableNamesAdmin(admin.ModelAdmin):
]


@admin.register(emg_models.AnalysisJobAnn)
class AnalysisJobAnnAdmin(admin.ModelAdmin):
readonly_fields = [
'job',
'var',
]
list_display = [
'job',
'var'
]
search_fields = [
'job__job_id',
'var__var_name',
'var__description',
]
# @admin.register(emg_models.AnalysisJobAnn)
# class AnalysisJobAnnAdmin(admin.ModelAdmin):
# readonly_fields = [
# 'job',
# 'var',
# ]
# list_display = [
# 'job',
# 'var'
# ]
# search_fields = [
# 'job__job_id',
# 'var__var_name',
# 'var__description',
# ]


@admin.register(emg_models.CogCat)
Expand Down
25 changes: 25 additions & 0 deletions emgapi/migrations/0012_auto_20230929_1959.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Generated by Django 3.2.18 on 2023-09-29 19:59

from django.db import migrations


class Migration(migrations.Migration):

dependencies = [
('emgapi', '0011_analysisjob_analysis_summary_json'),
]

operations = [
# migrations.SeparateDatabaseAndState(
# state_operations=[
# migrations.RenameField(
# model_name='analysisjob',
# old_name='analysis_summary_json',
# new_name='analysis_summary',
# ),
# ],
# ),
migrations.DeleteModel(
name='AnalysisJobAnn',
),
]
71 changes: 2 additions & 69 deletions emgapi/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1485,49 +1485,15 @@ def available(self, request=None):

class AnalysisJobManager(models.Manager):
def get_queryset(self):
"""Customized Analysis Job QS.
There are 2 very custom bits here:

straight_join
-------------
This one is needed because of a mysql bug that causes the optimizer
to https://code.djangoproject.com/ticket/22438

force_index
-----------
This one is more of a mistery to me. The join with PIPELINE_RELEASE
is causing a full table scan on PIPELINE_RELEASE.

| id | select\_type | table | type | possible\_keys | key | key\_len | ref | rows | filtered | Extra |
| :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- |
| 1 | SIMPLE | PIPELINE\_RELEASE | ALL | PRIMARY,PIPELINE\_RELEASE\_PIPELINE\_ID\_RELEASE\_VERSION\_d40fe384\_uniq,PIPELINE\_RELEASE\_PIPELINE\_ID\_index | NULL | NULL | NULL | 6 | 83.33 | Using where; Using join buffer \(Block Nested Loop\) |

By forcing the index PRIMARY on the JOIN the query is faster:

| id | select\_type | table | type | possible\_keys | key | key\_len | ref | rows | filtered | Extra |
| :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- |
| 1 | SIMPLE | PIPELINE\_RELEASE | eq\_ref | PRIMARY | PRIMARY | 2 | emg.ANALYSIS\_JOB.PIPELINE\_ID | 1 | 100 | NULL |

IMPORTANT: it is also required that the ordering of the query set is done by ANALYSIS_JOB.PIPELINE_ID and not a
field of PIPELINE_RELEASE. This was changes in emgapi.viewsets.BaseAnalysisGenericViewSet.ordering

TODO: figure our what is going on with this query.
"""
_qs = AnalysisJobAnn.objects.all() \
.select_related('var')
return AnalysisJobQuerySet(self.model, using=self._db) \
.straight_join() \
.force_index("PRIMARY", table_name="PIPELINE_RELEASE", for_="JOIN") \
.select_related(
'analysis_status',
'experiment_type',
'run',
'study',
'assembly',
'pipeline',
'sample') \
.prefetch_related(
Prefetch('analysis_metadata', queryset=_qs),)
'sample')

def available(self, request):
return self.get_queryset().available(request) \
Expand Down Expand Up @@ -1570,7 +1536,7 @@ def _custom_pk(self):
blank=True, null=True)
job_operator = models.CharField(
db_column='JOB_OPERATOR', max_length=15, blank=True, null=True)
analysis_summary_json = models.JSONField(
analysis_summary = models.JSONField(
db_column='ANALYSIS_SUMMARY_JSON', blank=True, null=True)
pipeline = models.ForeignKey(
Pipeline, db_column='PIPELINE_ID', blank=True, null=True,
Expand Down Expand Up @@ -1616,19 +1582,6 @@ def _custom_pk(self):
@property
def release_version(self):
return self.pipeline.release_version

@property
def analysis_summary(self):
if self.analysis_summary_json:
return self.analysis_summary_json

return [
{
'key': v.var.var_name,
'value': v.var_val_ucv
} for v in self.analysis_metadata.all()
]

@property
def downloads(self):
return self.analysis_download.all()
Expand Down Expand Up @@ -1771,26 +1724,6 @@ class Meta:

def __str__(self):
return self.var_name


class AnalysisJobAnnManager(models.Manager):

def get_queryset(self):
return super().get_queryset().select_related('job', 'var')


class AnalysisJobAnn(models.Model):
job = models.ForeignKey(AnalysisJob, db_column='JOB_ID', related_name='analysis_metadata', on_delete=models.CASCADE)
units = models.CharField(db_column='UNITS', max_length=25, blank=True, null=True)
var = models.ForeignKey(AnalysisMetadataVariableNames, on_delete=models.CASCADE)
var_val_ucv = models.CharField(db_column='VAR_VAL_UCV', max_length=4000, blank=True, null=True)

objects = AnalysisJobAnnManager()

class Meta:
db_table = 'ANALYSIS_JOB_ANN'
unique_together = (('job', 'var'), ('job', 'var'),)

def __str__(self):
return '%s %s' % (self.job, self.var)

Expand Down
3 changes: 1 addition & 2 deletions emgapi/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1020,8 +1020,7 @@ class Meta:
'secondary_accession',
'is_suppressed',
'suppressed_at',
'suppression_reason',
'analysis_summary_json'
'suppression_reason'
)


Expand Down
4 changes: 2 additions & 2 deletions emgapianns/management/commands/import_qc.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,10 +165,10 @@ def import_orf_stats(rootpath, job, emg_db):

@staticmethod
def update_analysis_summary(job, var_key, var_value):
analysis_summary = job.analysis_summary_json or []
analysis_summary = job.analysis_summary or []
analysis_summary.append({
'key': var_key,
'value': var_value,
})
job.analysis_summary_json = analysis_summary
job.analysis_summary = analysis_summary
job.save()
4 changes: 2 additions & 2 deletions tests/webuploader/test_qc.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,5 +182,5 @@ def test_empty_qc(self, client, run_emptyresults):
url = reverse("emgapi_v1:analyses-detail", args=[job])
response = client.get(url)
assert response.status_code == status.HTTP_200_OK
rsp = response.json()
assert len(rsp["data"]["attributes"]["analysis-summary"]) == 0
# rsp = response.json()
# assert len(rsp["data"]["attributes"]["analysis-summary"]) == 0
Loading