-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #224 from nerdstrike/render_product_metrics
Back end support for pool metrics
- Loading branch information
Showing
9 changed files
with
464 additions
and
51 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
# Copyright (c) 2022, 2023 Genome Research Ltd. | ||
# Copyright (c) 2022, 2023, 2024 Genome Research Ltd. | ||
# | ||
# Authors: | ||
# Marina Gourtovaia <[email protected]> | ||
|
@@ -21,6 +21,7 @@ | |
|
||
import logging | ||
from datetime import date, datetime, timedelta | ||
from statistics import mean, stdev | ||
from typing import ClassVar, List | ||
|
||
from pydantic import BaseModel, ConfigDict, Field | ||
|
@@ -33,11 +34,13 @@ | |
) | ||
from lang_qc.db.mlwh_schema import PacBioRunWellMetrics | ||
from lang_qc.db.qc_schema import QcState, QcStateDict, QcType | ||
from lang_qc.models.pacbio.qc_data import QCPoolMetrics, SampleDeplexingStats | ||
from lang_qc.models.pacbio.well import PacBioPagedWells, PacBioWellSummary | ||
from lang_qc.models.pager import PagedResponse | ||
from lang_qc.models.qc_flow_status import QcFlowStatusEnum | ||
from lang_qc.models.qc_state import QcState as QcStateModel | ||
from lang_qc.util.errors import EmptyListOfRunNamesError, RunNotFoundError | ||
from lang_qc.util.type_checksum import PacBioWellSHA256 | ||
|
||
""" | ||
This package is using an undocumented feature of Pydantic, type | ||
|
@@ -64,7 +67,7 @@ class WellWh(BaseModel): | |
# The TestClient seems to be keeping these instances alive and changing them. | ||
|
||
def get_mlwh_well_by_product_id( | ||
self, id_product: str | ||
self, id_product: PacBioWellSHA256 | ||
) -> PacBioRunWellMetrics | None: | ||
""" | ||
Returns a well row record from the well metrics table or | ||
|
@@ -77,6 +80,52 @@ def get_mlwh_well_by_product_id( | |
) | ||
).scalar_one_or_none() | ||
|
||
def get_metrics_by_well_product_id( | ||
self, id_product: PacBioWellSHA256 | ||
) -> QCPoolMetrics | None: | ||
well = self.get_mlwh_well_by_product_id(id_product) | ||
if well and well.demultiplex_mode and "Instrument" in well.demultiplex_mode: | ||
|
||
product_metrics = well.pac_bio_product_metrics | ||
lib_lims_data = [ | ||
product.pac_bio_run | ||
for product in product_metrics | ||
if product.pac_bio_run is not None | ||
] | ||
if len(lib_lims_data) != len(product_metrics): | ||
raise Exception("Partially linked LIMS data or no linked LIMS data") | ||
|
||
cov: float | None | ||
if any(p.hifi_num_reads is None for p in product_metrics): | ||
cov = None | ||
else: | ||
hifi_reads = [prod.hifi_num_reads for prod in product_metrics] | ||
cov = stdev(hifi_reads) / mean(hifi_reads) * 100 | ||
|
||
sample_stats = [] | ||
for (i, prod) in enumerate(product_metrics): | ||
sample_stats.append( | ||
SampleDeplexingStats( | ||
id_product=prod.id_pac_bio_product, | ||
tag1_name=lib_lims_data[i].tag_identifier, | ||
tag2_name=lib_lims_data[i].tag2_identifier, | ||
deplexing_barcode=prod.barcode4deplexing, | ||
hifi_read_bases=prod.hifi_read_bases, | ||
hifi_num_reads=prod.hifi_num_reads, | ||
hifi_read_length_mean=prod.hifi_read_length_mean, | ||
hifi_bases_percent=prod.hifi_bases_percent, | ||
percentage_total_reads=( | ||
prod.hifi_num_reads / well.hifi_num_reads * 100 | ||
if (well.hifi_num_reads and prod.hifi_num_reads) | ||
else None | ||
), | ||
) | ||
) | ||
|
||
return QCPoolMetrics(pool_coeff_of_variance=cov, products=sample_stats) | ||
|
||
return None | ||
|
||
def recent_completed_wells(self) -> List[PacBioRunWellMetrics]: | ||
""" | ||
Get recent not QC-ed completed wells from the mlwh database. | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
# Copyright (c) 2022, 2023 Genome Research Ltd. | ||
# Copyright (c) 2022, 2023, 2024 Genome Research Ltd. | ||
# | ||
# Authors: | ||
# Marina Gourtovaia <[email protected]> | ||
|
@@ -23,6 +23,7 @@ | |
from pydantic import BaseModel, ConfigDict, Field | ||
|
||
from lang_qc.db.mlwh_schema import PacBioRunWellMetrics | ||
from lang_qc.util.type_checksum import PacBioProductSHA256 | ||
|
||
|
||
# Pydantic prohibits us from defining these as @classmethod or @staticmethod | ||
|
@@ -153,3 +154,32 @@ def from_orm(cls, obj: PacBioRunWellMetrics): | |
qc_data[name]["value"] = getattr(obj, name, None) | ||
|
||
return cls.model_validate(qc_data) | ||
|
||
|
||
class SampleDeplexingStats(BaseModel): | ||
""" | ||
A representation of metrics for one product, some direct from the DB and others inferred | ||
For a long time tag2_name was null and tag1_name was silently used at both ends of the sequence. | ||
As a result tag2_name will be None for most data in or before 2024. | ||
""" | ||
|
||
id_product: PacBioProductSHA256 | ||
tag1_name: str | None | ||
tag2_name: str | None | ||
deplexing_barcode: str | None | ||
hifi_read_bases: int | None | ||
hifi_num_reads: int | None | ||
hifi_read_length_mean: float | None | ||
hifi_bases_percent: float | None | ||
percentage_total_reads: float | None | ||
|
||
|
||
class QCPoolMetrics(BaseModel): | ||
pool_coeff_of_variance: float | None = Field( | ||
title="Coefficient of variance for reads in the pool", | ||
description="Percentage of the standard deviation w.r.t. mean, when pool is more than one", | ||
) | ||
products: list[SampleDeplexingStats] = Field( | ||
title="List of products and their metrics" | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.