Skip to content

Commit

Permalink
Merge pull request #233 from kjsanger/fix/find-illumina-flowcells-query
Browse files Browse the repository at this point in the history
 Fix Illumina flowcells query to use the entity_type column
  • Loading branch information
mgcam authored Jan 9, 2024
2 parents eef8337 + 3d7336a commit b5c3180
Show file tree
Hide file tree
Showing 2 changed files with 89 additions and 39 deletions.
67 changes: 36 additions & 31 deletions src/npg_irods/illumina.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from typing import Iterator, Optional, Type

from partisan.irods import AVU, Collection, DataObject
from partisan.metadata import AsValueEnum
from sqlalchemy import asc
from sqlalchemy.orm import Session
from structlog import get_logger
Expand Down Expand Up @@ -57,11 +58,17 @@ class TagIndex(Enum):
Rather, it is a bin for reads that cannot be associated with any of the candidate
tags in a pool after sequencing."""

CONTROL_198 = 198

CONTROL_888 = 888
"""Tag index 888 is conventionally used to indicate a control sample e.g. Phi X
that has been added to a pool."""
@unique
class EntityType(AsValueEnum):
"""The type of sequenced material applied to a flowcell. This related to the
entity_type column in the MLWH. The values are defined in the MLWH schema
metadata."""

LIBRARY = "library"
LIBRARY_CONTROL = "library_control"
LIBRARY_INDEXED = "library_indexed"
LIBRARY_INDEXED_SPIKE = "library_indexed_spike"


@dataclass(order=True)
Expand Down Expand Up @@ -141,7 +148,7 @@ def __repr__(self):
if self.tag_index is not None:
rep[SeqConcept.TAG_INDEX.value] = self.tag_index
if self.subset is not None:
rep[SeqConcept.SUBSET.value] = self.subset
rep[SeqConcept.SUBSET.value] = self.subset.value

return json.dumps(rep, sort_keys=True, separators=(",", ":"))

Expand Down Expand Up @@ -202,25 +209,29 @@ def empty_acl(*args):
return []

if requires_full_metadata(item):
log.info("Requires full metadata", path=item)
log.debug("Requires full metadata", path=item)
sample_fn, study_fn = make_sample_metadata, make_study_metadata
else:
log.info("Requires reduced metadata", path=item)
log.debug("Requires reduced metadata", path=item)
sample_fn, study_fn = make_reduced_sample_metadata, make_reduced_study_metadata

if requires_managed_access(item):
log.info("Requires managed access", path=item)
log.debug("Requires managed access", path=item)
acl_fn = make_sample_acl
else:
log.info("Does not require managed access", path=item)
log.debug("Does not require managed access", path=item)
acl_fn = empty_acl

# Each component may be associated with multiple flowcells
components = find_associated_components(item)
log.debug("Found associated components", path=item, comp=components)

for c in components:
for fc in find_flowcells_by_component(
flowcells = find_flowcells_by_component(
mlwh_session, c, include_controls=include_controls
):
)
log.debug("Found associated flowcells", path=item, flowcells=flowcells, comp=c)
for fc in flowcells:
secondary_metadata.extend(sample_fn(fc.sample))
secondary_metadata.extend(study_fn(fc.study))
acl.extend(acl_fn(fc.sample, fc.study, zone=zone))
Expand Down Expand Up @@ -467,31 +478,25 @@ def find_flowcells_by_component(
sess.query(IseqFlowcell)
.distinct()
.join(IseqFlowcell.iseq_product_metrics)
.filter(IseqProductMetrics.id_run == component.id_run)
.filter(
IseqProductMetrics.id_run == component.id_run,
IseqFlowcell.position == component.position,
)
)

if component.position is not None:
query = query.filter(IseqProductMetrics.position == component.position)
if not include_controls:
query = query.filter(
IseqFlowcell.entity_type.notin_(
[
EntityType.LIBRARY_CONTROL.value,
EntityType.LIBRARY_INDEXED_SPIKE.value,
]
)
)

match component.tag_index:
case TagIndex.CONTROL_198.value | TagIndex.CONTROL_888.value:
if not include_controls:
query = query.filter(
IseqProductMetrics.tag_index.notin_(
[TagIndex.CONTROL_198.value, TagIndex.CONTROL_888.value]
)
)

query = query.filter(IseqProductMetrics.tag_index == component.tag_index)
case TagIndex.BIN.value:
if not include_controls:
query = query.filter(
IseqProductMetrics.tag_index.notin_(
[TagIndex.CONTROL_198.value, TagIndex.CONTROL_888.value]
)
)

query = query.filter(IseqProductMetrics.tag_index.is_not(None))
query = query.filter(IseqProductMetrics.tag_index.isnot(None))
case int():
query = query.filter(IseqProductMetrics.tag_index == component.tag_index)
case None:
Expand Down
61 changes: 53 additions & 8 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
Sample,
Study,
)
from npg_irods.illumina import EntityType
from npg_irods.metadata import illumina, ont
from npg_irods.metadata.common import DataFile, SeqConcept
from npg_irods.metadata.lims import TrackedSample
Expand Down Expand Up @@ -413,34 +414,78 @@ def initialize_mlwh_illumina_synthetic(session: Session):

sample_info = [
# Not multiplexed
{"study": study_a, "sample": sample1, "position": 1, "tag_index": None},
{
"study": study_a,
"sample": sample1,
"position": 1,
"tag_index": None,
"entity_type": EntityType.LIBRARY.value,
},
# Multiplexed, samples from the same study
{"study": study_a, "sample": sample1, "position": 1, "tag_index": 1},
{"study": study_a, "sample": sample2, "position": 1, "tag_index": 2},
{"study": study_a, "sample": sample1, "position": 2, "tag_index": 1},
{"study": study_a, "sample": sample2, "position": 2, "tag_index": 2},
{
"study": study_a,
"sample": sample1,
"position": 1,
"tag_index": 1,
"entity_type": EntityType.LIBRARY_INDEXED.value,
},
{
"study": study_a,
"sample": sample2,
"position": 1,
"tag_index": 2,
"entity_type": EntityType.LIBRARY_INDEXED.value,
},
{
"study": study_a,
"sample": sample1,
"position": 2,
"tag_index": 1,
"entity_type": EntityType.LIBRARY_INDEXED.value,
},
{
"study": study_a,
"sample": sample2,
"position": 2,
"tag_index": 2,
"entity_type": EntityType.LIBRARY_INDEXED.value,
},
# Multiplexed, samples from different studies
{"study": study_a, "sample": sample1, "position": 2, "tag_index": 1},
{"study": study_b, "sample": sample3, "position": 2, "tag_index": 2},
{
"study": study_a,
"sample": sample1,
"position": 2,
"tag_index": 1,
"entity_type": EntityType.LIBRARY_INDEXED.value,
},
{
"study": study_b,
"sample": sample3,
"position": 2,
"tag_index": 2,
"entity_type": EntityType.LIBRARY_INDEXED.value,
},
# Phi X
{
"study": control_study,
"sample": control_sample,
"position": 1,
"tag_index": 888,
"entity_type": EntityType.LIBRARY_INDEXED_SPIKE.value,
},
{
"study": control_study,
"sample": control_sample,
"position": 2,
"tag_index": 888,
"entity_type": EntityType.LIBRARY_INDEXED_SPIKE.value,
},
]

flowcells = [
IseqFlowcell(
entity_id_lims=f"ENTITY_01",
entity_type=f"ENTITY_TYPE_01",
entity_type=info["entity_type"],
id_flowcell_lims=f"FLOWCELL{i}",
id_lims="LIMS_01",
id_pool_lims=f"POOL_01",
Expand Down

0 comments on commit b5c3180

Please sign in to comment.