Skip to content

Commit

Permalink
Merge pull request #231 from mgcam/better_error_propagation
Browse files Browse the repository at this point in the history
Moved QCPoolMetrics model generation to the model itself.
  • Loading branch information
nerdstrike authored Jun 17, 2024
2 parents efff421 + 221fa08 commit 58c53bf
Show file tree
Hide file tree
Showing 4 changed files with 113 additions and 80 deletions.
48 changes: 0 additions & 48 deletions lang_qc/db/helper/wells.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@

import logging
from datetime import date, datetime, timedelta
from statistics import mean, stdev
from typing import ClassVar, List

from pydantic import BaseModel, ConfigDict, Field
Expand All @@ -34,7 +33,6 @@
)
from lang_qc.db.mlwh_schema import PacBioRunWellMetrics
from lang_qc.db.qc_schema import QcState, QcStateDict, QcType
from lang_qc.models.pacbio.qc_data import QCPoolMetrics, SampleDeplexingStats
from lang_qc.models.pacbio.well import PacBioPagedWells, PacBioWellSummary
from lang_qc.models.pager import PagedResponse
from lang_qc.models.qc_flow_status import QcFlowStatusEnum
Expand Down Expand Up @@ -80,52 +78,6 @@ def get_mlwh_well_by_product_id(
)
).scalar_one_or_none()

def get_metrics_by_well_product_id(
self, id_product: PacBioWellSHA256
) -> QCPoolMetrics | None:
well = self.get_mlwh_well_by_product_id(id_product)
if well and well.demultiplex_mode and "Instrument" in well.demultiplex_mode:

product_metrics = well.pac_bio_product_metrics
lib_lims_data = [
product.pac_bio_run
for product in product_metrics
if product.pac_bio_run is not None
]
if len(lib_lims_data) != len(product_metrics):
raise Exception("Partially linked LIMS data or no linked LIMS data")

cov: float | None
if any(p.hifi_num_reads is None for p in product_metrics):
cov = None
else:
hifi_reads = [prod.hifi_num_reads for prod in product_metrics]
cov = stdev(hifi_reads) / mean(hifi_reads) * 100

sample_stats = []
for (i, prod) in enumerate(product_metrics):
sample_stats.append(
SampleDeplexingStats(
id_product=prod.id_pac_bio_product,
tag1_name=lib_lims_data[i].tag_identifier,
tag2_name=lib_lims_data[i].tag2_identifier,
deplexing_barcode=prod.barcode4deplexing,
hifi_read_bases=prod.hifi_read_bases,
hifi_num_reads=prod.hifi_num_reads,
hifi_read_length_mean=prod.hifi_read_length_mean,
hifi_bases_percent=prod.hifi_bases_percent,
percentage_total_reads=(
prod.hifi_num_reads / well.hifi_num_reads * 100
if (well.hifi_num_reads and prod.hifi_num_reads)
else None
),
)
)

return QCPoolMetrics(pool_coeff_of_variance=cov, products=sample_stats)

return None

def recent_completed_wells(self) -> List[PacBioRunWellMetrics]:
"""
Get recent not QC-ed completed wells from the mlwh database.
Expand Down
15 changes: 8 additions & 7 deletions lang_qc/endpoints/pacbio_well.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,20 +222,21 @@ def get_seq_metrics(
summary="Get sample (deplexing) metrics for a multiplexed well product by the well ID",
responses={
status.HTTP_404_NOT_FOUND: {"description": "Product not found"},
status.HTTP_409_CONFLICT: {"description": "Missing or incomplete LIMS data"},
status.HTTP_422_UNPROCESSABLE_ENTITY: {"description": "Invalid product ID"},
},
response_model=QCPoolMetrics,
)
def get_product_metrics(
id_product: PacBioWellSHA256, mlwhdb_session: Session = Depends(get_mlwh_db)
) -> QCPoolMetrics:
metrics = WellWh(mlwh_session=mlwhdb_session).get_metrics_by_well_product_id(
id_product
)
if metrics is None:
raise HTTPException(
status_code=404, detail="Well does not have any pool metrics"
)

mlwh_well = _find_well_product_or_error(id_product, mlwhdb_session)
try:
metrics = QCPoolMetrics(db_well=mlwh_well)
except MissingLimsDataError as err:
raise HTTPException(409, detail=str(err))

return metrics


Expand Down
70 changes: 68 additions & 2 deletions lang_qc/models/pacbio/qc_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,14 @@
# You should have received a copy of the GNU General Public License along with
# this program. If not, see <http://www.gnu.org/licenses/>

from pydantic import BaseModel, ConfigDict, Field
from statistics import mean, stdev
from typing import Any

from pydantic import BaseModel, ConfigDict, Field, model_validator
from pydantic.dataclasses import dataclass

from lang_qc.db.mlwh_schema import PacBioRunWellMetrics
from lang_qc.util.errors import MissingLimsDataError
from lang_qc.util.type_checksum import PacBioProductSHA256


Expand Down Expand Up @@ -175,11 +180,72 @@ class SampleDeplexingStats(BaseModel):
percentage_total_reads: float | None


class QCPoolMetrics(BaseModel):
@dataclass(kw_only=True, frozen=True)
class QCPoolMetrics:

db_well: PacBioRunWellMetrics = Field(init_var=True)
pool_coeff_of_variance: float | None = Field(
title="Coefficient of variance for reads in the pool",
description="Percentage of the standard deviation w.r.t. mean, when pool is more than one",
)
products: list[SampleDeplexingStats] = Field(
title="List of products and their metrics"
)

@model_validator(mode="before")
def pre_root(cls, values: dict[str, Any]) -> dict[str, Any]:
"""
Populates this object with the run and well tracking information
from a database row that is passed as an argument.
"""

db_well_key_name = "db_well"
# https://github.com/pydantic/pydantic-core/blob/main/python/pydantic_core/_pydantic_core.pyi
if db_well_key_name not in values.kwargs:
return values.kwargs

well: PacBioRunWellMetrics = values.kwargs[db_well_key_name]
if well is None:
raise ValueError(f"None {db_well_key_name} value is not allowed.")

cov: float = None
sample_stats = []

if well.demultiplex_mode and "Instrument" in well.demultiplex_mode:
product_metrics = well.pac_bio_product_metrics
lib_lims_data = [
product.pac_bio_run
for product in product_metrics
if product.pac_bio_run is not None
]
if len(lib_lims_data) != len(product_metrics):
raise MissingLimsDataError(
"Partially linked LIMS data or no linked LIMS data"
)

if any(p.hifi_num_reads is None for p in product_metrics):
cov = None
else:
hifi_reads = [prod.hifi_num_reads for prod in product_metrics]
cov = stdev(hifi_reads) / mean(hifi_reads) * 100

for (i, prod) in enumerate(product_metrics):
sample_stats.append(
SampleDeplexingStats(
id_product=prod.id_pac_bio_product,
tag1_name=lib_lims_data[i].tag_identifier,
tag2_name=lib_lims_data[i].tag2_identifier,
deplexing_barcode=prod.barcode4deplexing,
hifi_read_bases=prod.hifi_read_bases,
hifi_num_reads=prod.hifi_num_reads,
hifi_read_length_mean=prod.hifi_read_length_mean,
hifi_bases_percent=prod.hifi_bases_percent,
percentage_total_reads=(
prod.hifi_num_reads / well.hifi_num_reads * 100
if (well.hifi_num_reads and prod.hifi_num_reads)
else None
),
)
)

return {"pool_coeff_of_variance": cov, "products": sample_stats}
60 changes: 37 additions & 23 deletions tests/test_pac_bio_qc_data_well.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
from npg_id_generation.pac_bio import PacBioEntity

from lang_qc.db.helper.wells import WellWh
from lang_qc.models.pacbio.qc_data import QCDataWell
from lang_qc.models.pacbio.qc_data import QCDataWell, QCPoolMetrics
from lang_qc.util.errors import MissingLimsDataError
from tests.fixtures.sample_data import multiplexed_run, simplex_run


Expand Down Expand Up @@ -103,47 +104,60 @@ def test_creating_qc_data_well(mlwhdb_test_session, mlwhdb_load_runs):


def test_pool_metrics_from_single_sample_well(mlwhdb_test_session, simplex_run):
helper = WellWh(session=mlwhdb_test_session)

id = PacBioEntity(
run_name=simplex_run.pac_bio_run_name,
well_label=simplex_run.well_label,
plate_number=simplex_run.plate_number,
).hash_product_id()
helper = WellWh(session=mlwhdb_test_session)
row = helper.get_mlwh_well_by_product_id(id)

metrics = helper.get_metrics_by_well_product_id(id)
assert metrics is None, "Got no metrics for a one-sample well"
metric = QCPoolMetrics(db_well=row)
assert metric.pool_coeff_of_variance is None
assert metric.products == []


def test_pool_metrics_from_well(mlwhdb_test_session, multiplexed_run):
helper = WellWh(session=mlwhdb_test_session)

id = PacBioEntity(run_name="RUN", well_label="B1", plate_number=1).hash_product_id()
metrics = helper.get_metrics_by_well_product_id(id)
helper = WellWh(session=mlwhdb_test_session)
row = helper.get_mlwh_well_by_product_id(id)
metrics_via_db = QCPoolMetrics(db_well=row)
metrics_direct = QCPoolMetrics(
pool_coeff_of_variance=metrics_via_db.pool_coeff_of_variance,
products=metrics_via_db.products,
)

assert metrics, "Two samples means we get a metrics response"
assert (
int(metrics.pool_coeff_of_variance) == 47
), "Variance between 20 and 10 is ~47%"
for metrics in [metrics_via_db, metrics_direct]:
assert (
int(metrics.pool_coeff_of_variance) == 47
), "Variance between 20 and 10 is ~47%"

assert metrics.products[0].hifi_read_bases == 100
assert (
metrics.products[1].hifi_read_bases == 900
), "hifi read base counts are faithfully copied"
assert metrics.products[0].hifi_read_bases == 100
assert (
metrics.products[1].hifi_read_bases == 900
), "hifi read base counts are faithfully copied"

assert (
int(metrics.products[0].percentage_total_reads) == 33
), "10 of 30 reads is 33.3%"
assert (
int(metrics.products[1].percentage_total_reads) == 66
), "20 of 30 reads is 66.6%"

assert (
int(metrics.products[0].percentage_total_reads) == 33
), "10 of 30 reads is 33.3%"
assert (
int(metrics.products[1].percentage_total_reads) == 66
), "20 of 30 reads is 66.6%"

def test_errors_instantiating_pool_metrics(mlwhdb_test_session):

def test_pool_metrics_from_well(mlwhdb_test_session):
with pytest.raises(ValueError, match=r"None db_well value is not allowed."):
QCPoolMetrics(db_well=None)

id = PacBioEntity(
run_name="TRACTION-RUN-1140", well_label="C1", plate_number=2
).hash_product_id()
helper = WellWh(session=mlwhdb_test_session)
row = helper.get_mlwh_well_by_product_id(id)
with pytest.raises(
Exception, match=r"Partially linked LIMS data or no linked LIMS data"
MissingLimsDataError, match=r"Partially linked LIMS data or no linked LIMS data"
):
helper.get_metrics_by_well_product_id(id)
QCPoolMetrics(db_well=row)

0 comments on commit 58c53bf

Please sign in to comment.