Skip to content

Commit

Permalink
Added a model representing libraries in a well.
Browse files Browse the repository at this point in the history
  • Loading branch information
mgcam committed Jun 11, 2024
1 parent fbff9a7 commit ed54cab
Show file tree
Hide file tree
Showing 2 changed files with 96 additions and 6 deletions.
40 changes: 37 additions & 3 deletions lang_qc/models/pacbio/well.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from pydantic.dataclasses import dataclass

from lang_qc.db.mlwh_schema import PacBioRunWellMetrics
from lang_qc.models.pacbio.experiment import PacBioExperiment
from lang_qc.models.pacbio.experiment import PacBioExperiment, PacBioLibrary
from lang_qc.models.pacbio.qc_data import QCDataWell
from lang_qc.models.pager import PagedResponse
from lang_qc.models.qc_state import QcState
Expand Down Expand Up @@ -132,9 +132,10 @@ def pre_root(cls, values: dict[str, Any]) -> dict[str, Any]:
"""

# https://github.com/pydantic/pydantic-core/blob/main/python/pydantic_core/_pydantic_core.pyi
mlwh_db_row: PacBioRunWellMetrics = values.kwargs["db_well"]
assert mlwh_db_row
if "db_well" not in values.kwargs:
raise ValueError("None db_well value is not allowed.")

mlwh_db_row: PacBioRunWellMetrics = values.kwargs["db_well"]
column_names = [column.key for column in PacBioRunWellMetrics.__table__.columns]

assigned = dict()
Expand Down Expand Up @@ -175,6 +176,39 @@ def pre_root(cls, values: dict[str, Any]) -> dict[str, Any]:
return assigned


@dataclass(kw_only=True, frozen=True)
class PacBioWellLibraries(PacBioWell):
"""A response model binding together basic PacBio well and LIMS data for
the libraries, which were sequenced in this well.
"""

libraries: list[PacBioLibrary] = Field(
title="A list of `PacBioLibrary` objects",
description="""
A list of `PacBioLibrary` objects. Each member of the list represents
a library, which was sequenced in this well. If the object is created
by supplying the `db_well` attribute via the constructor, the list
is never empty. The list is not sorted.
""",
)

@model_validator(mode="before")
def pre_root(cls, values: dict[str, Any]) -> dict[str, Any]:

assigned = super().pre_root(values)
mlwh_db_row: PacBioRunWellMetrics = values.kwargs["db_well"]
lims_data = mlwh_db_row.get_experiment_info()
if len(lims_data) == 0:
raise ValueError(
f"No LIMS data retrieved for {mlwh_db_row.__repr__()} "
"on account of partially linked or unlinked product data."
)

assigned["libraries"] = [PacBioLibrary(db_library=row) for row in lims_data]

return assigned


class PacBioPagedWells(PagedResponse, extra="forbid"):
"""A response model for paged data about PacBio wells."""

Expand Down
62 changes: 59 additions & 3 deletions tests/test_pb_well_models.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
import pytest
from npg_id_generation.pac_bio import PacBioEntity
from sqlalchemy.orm import Session

from lang_qc.db.helper.qc import get_qc_states_by_id_product_list
from lang_qc.db.helper.wells import WellWh
from lang_qc.db.mlwh_schema import PacBioRunWellMetrics
from lang_qc.models.pacbio.well import PacBioWellFull, PacBioWellSummary
from lang_qc.models.pacbio.experiment import PacBioLibrary
from lang_qc.models.pacbio.well import (
PacBioWellFull,
PacBioWellLibraries,
PacBioWellSummary,
)
from tests.conftest import compare_dates
from tests.fixtures.well_data import load_data4well_retrieval, load_dicts_and_users

Expand Down Expand Up @@ -116,16 +122,23 @@ def test_create_full_model(
assert pb_well.experiment_tracking is None


def test_create_summary_model(
def test_create_summary_and_library_models(
mlwhdb_test_session, qcdb_test_session, load_data4well_retrieval, mlwhdb_load_runs
):

with pytest.raises(ValueError, match=r"None db_well value is not allowed."):
PacBioWellSummary(plate_number=3)

(well_row, qc_state) = _prepare_data(
mlwhdb_test_session, qcdb_test_session, "TRACTION-RUN-92", "A1"
)
pb_well = PacBioWellSummary(db_well=well_row)
_examine_well_model_a1(pb_well, well_row.id_pac_bio_product)
assert pb_well.study_names == ["Tree of Life - ASG"]

pb_well = PacBioWellLibraries(db_well=well_row)
_examine_well_model_a1(pb_well, well_row.id_pac_bio_product)

(well_row, qc_state) = _prepare_data(
mlwhdb_test_session, qcdb_test_session, "TRACTION_RUN_1", "B1"
)
Expand All @@ -140,7 +153,7 @@ def test_create_summary_model(
_examine_well_model_c1(pb_well, well_row.id_pac_bio_product)


def test_create_summary_model_study_info(
def test_create_summary_and_library_models_lims_info(
mlwhdb_test_session, qcdb_test_session, load_data4well_retrieval, mlwhdb_load_runs
):
# Well with two samples, none is linked to LIMS
Expand All @@ -150,6 +163,9 @@ def test_create_summary_model_study_info(
pb_well = PacBioWellSummary(db_well=well_row)
assert pb_well.study_names == []

with pytest.raises(ValueError, match=r"No LIMS data retrieved"):
PacBioWellLibraries(db_well=well_row)

# Fully linked wells with one sample
(well_row, qc_state) = _prepare_data(
mlwhdb_test_session, qcdb_test_session, "TRACTION-RUN-1162", "C1"
Expand All @@ -163,6 +179,19 @@ def test_create_summary_model_study_info(
pb_well = PacBioWellSummary(db_well=well_row)
assert pb_well.study_names == ["DTOL_Darwin Tree of Life"]

pb_well = PacBioWellLibraries(db_well=well_row)
assert len(pb_well.libraries) == 1
expected_lib = PacBioLibrary(
study_id="5901",
study_name="DTOL_Darwin Tree of Life",
sample_id="9463663",
sample_name="DTOL14290946",
tag_sequence=["CTCAGCATACGAGTAT"],
library_type="Pacbio_HiFi",
pool_name="TRAC-2-7128",
)
assert pb_well.libraries[0] == expected_lib

# A fully linked well with multiple samples, all belonging to the same study
(well_row, qc_state) = _prepare_data(
mlwhdb_test_session, qcdb_test_session, "TRACTION-RUN-1140", "B1", 1
Expand All @@ -180,6 +209,30 @@ def test_create_summary_model_study_info(
"ToL_Blaxter_ Reference Genomes_ DNA",
]

pb_well = PacBioWellLibraries(db_well=well_row)
assert len(pb_well.libraries) == 4
libs = {lib.sample_id: lib for lib in pb_well.libraries}
expected_lib = PacBioLibrary(
study_id="6771",
study_name="ToL_Blaxter_ Reference Genomes_ DNA",
sample_id="8657549",
sample_name="6771STDY13618009",
tag_sequence=["CTGCGATCACGAGTAT"],
library_type="Pacbio_HiFi",
pool_name="TRAC-2-7676",
)
assert libs["8657549"] == expected_lib
expected_lib = PacBioLibrary(
study_id="5901",
study_name="DTOL_Darwin Tree of Life",
sample_id="9463590",
sample_name="DTOL14291044",
tag_sequence=["TCTGCATCATGAGTAT"],
library_type="Pacbio_HiFi",
pool_name="TRAC-2-7676",
)
assert libs["9463590"] == expected_lib

# A partially linked well with three samples, which belong to two studies.
# The LIMS link for one of the samples is deleted so that two other samples
# belong to the same study.
Expand All @@ -188,3 +241,6 @@ def test_create_summary_model_study_info(
)
pb_well = PacBioWellSummary(db_well=well_row)
assert pb_well.study_names == []

with pytest.raises(ValueError, match=r"No LIMS data retrieved"):
PacBioWellLibraries(db_well=well_row)

0 comments on commit ed54cab

Please sign in to comment.