Skip to content

Commit

Permalink
Merge pull request #229 from mgcam/well_libs
Browse files Browse the repository at this point in the history
Well libs
  • Loading branch information
nerdstrike authored Jun 12, 2024
2 parents 9d68ec0 + fd3e9ef commit 0b225e7
Show file tree
Hide file tree
Showing 9 changed files with 418 additions and 66 deletions.
36 changes: 33 additions & 3 deletions lang_qc/db/mlwh_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,30 @@
from sqlalchemy.dialects.mysql import SMALLINT as mysqlSMALLINT
from sqlalchemy.dialects.mysql import TINYINT as mysqlTINYINT
from sqlalchemy.dialects.mysql import VARCHAR as mysqlVARCHAR
from sqlalchemy.orm import declarative_base, relationship
from sqlalchemy.orm import DeclarativeBase, relationship

Base = declarative_base()

class Base(DeclarativeBase):
"""
A base class for declarative class definitions for the ml warehouse database.
"""

def _get_row_description(self, fields: list[str]) -> str:
"""
Returns a printable representation of the database table row. Interprets
a list of strings given as the `fields` argument as a list of column
names. Combines the name of the class, names of the given columns
and respective values into a row description. The columns for which
the row has a NULL value are omitted from the description.
"""

pairs = []
for name in fields:
value = self.__getattribute__(name)
if value is not None:
pairs.append(f"{name}={value}")
description = ", ".join(pairs)
return f"{self.__module__}.{self.__class__.__name__}: {description}"


class Sample(Base):
Expand Down Expand Up @@ -538,7 +559,16 @@ class PacBioRunWellMetrics(Base):
"PacBioProductMetrics", back_populates="pac_bio_run_well_metrics"
)

def get_experiment_info(self):
"""Custom or customised methods are added below"""

def __repr__(self):
"""Returns a printable representation of the database row"""

return self._get_row_description(
["pac_bio_run_name", "well_label", "plate_number", "id_pac_bio_product"]
)

def get_experiment_info(self) -> list[PacBioRun]:
"""Returns a list of PacBioRun mlwh database rows.
Returns LIMS information about the PacBio experiment
Expand Down
33 changes: 32 additions & 1 deletion lang_qc/endpoints/pacbio_well.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,18 @@
from lang_qc.db.mlwh_connection import get_mlwh_db
from lang_qc.db.qc_connection import get_qc_db
from lang_qc.db.qc_schema import User
from lang_qc.models.pacbio.well import PacBioPagedWells, PacBioWellFull
from lang_qc.models.pacbio.well import (
PacBioPagedWells,
PacBioWellFull,
PacBioWellLibraries,
)
from lang_qc.models.qc_flow_status import QcFlowStatusEnum
from lang_qc.models.qc_state import QcState, QcStateBasic
from lang_qc.util.auth import check_user
from lang_qc.util.errors import (
InconsistentInputError,
InvalidDictValueError,
MissingLimsDataError,
RunNotFoundError,
)
from lang_qc.util.type_checksum import ChecksumSHA256
Expand Down Expand Up @@ -163,6 +168,32 @@ def get_wells_in_run(
return response


@router.get(
"/wells/{id_product}/libraries",
summary="Get well summary and LIMS data for all libraries",
responses={
status.HTTP_404_NOT_FOUND: {"description": "Well product does not exist"},
status.HTTP_422_UNPROCESSABLE_ENTITY: {"description": "Invalid product ID"},
status.HTTP_409_CONFLICT: {"description": "Missing or incomplete LIMS data"},
},
response_model=PacBioWellLibraries,
)
def get_well_lims_info(
id_product: ChecksumSHA256,
mlwhdb_session: Session = Depends(get_mlwh_db),
) -> PacBioWellLibraries:

db_well = _find_well_product_or_error(id_product, mlwhdb_session)
well_libraries: PacBioWellLibraries
try:
well_libraries = PacBioWellLibraries(db_well=db_well)
except MissingLimsDataError as err:
# 409 - Request conflicts with the current state of the server.
raise HTTPException(409, detail=str(err))

return well_libraries


@router.get(
"/products/{id_product}/seq_level",
summary="Get full sequencing QC metrics and state for a product",
Expand Down
160 changes: 121 additions & 39 deletions lang_qc/models/pacbio/experiment.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023 Genome Research Ltd.
# Copyright (c) 2023, 2024 Genome Research Ltd.
#
# Authors:
# Marina Gourtovaia <[email protected]>
Expand All @@ -19,14 +19,96 @@
# You should have received a copy of the GNU General Public License along with
# this program. If not, see <http://www.gnu.org/licenses/>.

from typing import List
from typing import Any

from pydantic import BaseModel, ConfigDict, Field
from pydantic import Field, model_validator
from pydantic.dataclasses import dataclass

from lang_qc.db.mlwh_schema import PacBioRun


class PacBioExperiment(BaseModel):
@dataclass(kw_only=True, frozen=True)
class PacBioLibrary:
"""
This model represents LIMS data associated with a PacBio library.
The fields of the model can be assigned directly via the constructor.
However, if the `db_library` field, a single row of the PacBioRun table
class, is set via the constructor, the rest of the fields are populated
using this database row object, while any other information passed to the
constructor is disregarded.
The `db_library` field is not present in the model instance that is
returned by the constructor.
"""

db_library: PacBioRun = Field(init_var=True)

study_id: str = Field(
title="LIMS-specific study identifier",
)
study_name: str = Field(
title="Study name",
)
sample_id: str = Field(
title="LIMS-specific Sample identifier",
)
sample_name: str = Field(
title="Sample name",
)
tag_sequence: list = Field(
title="Tag sequence",
description="""
Tag sequences as a list. An empty list for a non-indexed library.
""",
)
library_type: str | None = Field(
default=None,
title="Library type",
)
pool_name: str | None = Field(
default=None,
title="Pool name",
description="""
The pac_bio_library_tube_barcode from TRACTION, AKA pool name
""",
)

@model_validator(mode="before")
def pre_root(cls, values: dict[str, Any]) -> dict[str, Any]:
"""
Populates the fields of this object with information available
in the LIMS system. Errors if the `db_library` attribute is not
set via the constructor.
"""

# https://github.com/pydantic/pydantic-core/blob/main/python/pydantic_core/_pydantic_core.pyi
if "db_library" not in values.kwargs:
return values.kwargs
db_row: PacBioRun = values.kwargs["db_library"]
if db_row is None:
raise ValueError("None db_library value is not allowed.")

assigned = dict()
study = db_row.study
assigned["study_name"] = study.name
assigned["study_id"] = study.id_study_lims
sample = db_row.sample
assigned["sample_name"] = sample.name
assigned["sample_id"] = sample.id_sample_lims
assigned["library_type"] = db_row.pipeline_id_lims
assigned["pool_name"] = db_row.pac_bio_library_tube_barcode
assigned["tag_sequence"] = []
if tag := db_row.tag_sequence:
assigned["tag_sequence"].append(tag)
if tag := db_row.tag2_sequence:
assigned["tag_sequence"].append(tag)

return assigned


@dataclass(kw_only=True, frozen=True)
class PacBioExperiment:
"""
A response model that contains laboratory tracking information
about the PacBio wells and samples prior to the start of the
Expand All @@ -43,28 +125,30 @@ class PacBioExperiment(BaseModel):
(library).
"""

db_libraries: list[PacBioRun] = Field(init_var=True)

study_id: list = Field(
title="Study identifier",
description="""
Study identifiers as a sorted list of unique strings (to cover
an unlikely case of multiple studies).
""",
)
study_name: str = Field(
study_name: str | None = Field(
default=None,
title="Study name",
description="""
Study name, is not set in case of multiple studies.
""",
)
sample_id: str = Field(
sample_id: str | None = Field(
default=None,
title="Sample identifier",
description="""
Sample identifier, is not set in case of multiple samples.
""",
)
sample_name: str = Field(
sample_name: str | None = Field(
default=None,
title="Sample name",
description="""
Expand Down Expand Up @@ -94,59 +178,57 @@ class PacBioExperiment(BaseModel):
unlikely case of multiple library types.
""",
)
pool_name: str = Field(
pool_name: str | None = Field(
default=None,
title="Pool name",
description="""
The pac_bio_library_tube_barcode from TRACTION, AKA pool name
""",
)
model_config = ConfigDict(from_attributes=True, extra="forbid")

@classmethod
def from_orm(cls, lims_db_rows: List[PacBioRun]):
@model_validator(mode="before")
def pre_root(cls, values: dict[str, Any]) -> dict[str, Any]:
"""
A factory method, creates an instance of the PacBioLimsData class.
Should be given a non-empty list of PacBioRun table row objects as
an argument.
Populates the fields of this object with information available
in the LIMS system.
Errors if the `db_libraries` attribute is not set via the constructor.
"""

lims_db_rows: list[PacBioRun] = values.kwargs["db_libraries"]
num_samples = len(lims_db_rows)
if num_samples == 0:
raise Exception("Cannot create PacBioLimsData object, no data.")
if any(row is None for row in lims_db_rows):
raise Exception("Cannot create PacBioLimsData object, None row.")
raise ValueError("Empty db_libraries list is not allowed.")

lib_objects = [PacBioLibrary(db_library=row) for row in lims_db_rows]

# Using sets for some data instead of lists because we do not
# want repetitions.
lims_data = {
"num_samples": num_samples,
"study_id": set(),
"library_type": set(),
"tag_sequence": [],
}
study_name = None
for row in lims_db_rows:
lims_data["study_id"].add(row.study.id_study_lims)
lims_data["library_type"].add(row.pipeline_id_lims)
study_name = row.study.name
if pool_name := row.pac_bio_library_tube_barcode:
lims_data["pool_name"] = pool_name
if num_samples == 1:
if tag := row.tag_sequence:
lims_data["tag_sequence"].append(tag)
if tag := row.tag2_sequence:
lims_data["tag_sequence"].append(tag)
lims_data["sample_id"] = row.sample.id_sample_lims
lims_data["sample_name"] = row.sample.name
lims_data["study_name"] = row.study.name

lims_data["study_id"] = {o.study_id for o in lib_objects} # returns a set
lims_data["library_type"] = {
o.library_type if o.library_type is not None else "UNKNOWN"
for o in lib_objects
}

pool_names = {o.pool_name for o in lib_objects}
if len(pool_names) > 1:
raise ValueError("Multiple pool names.")
lims_data["pool_name"] = pool_names.pop()

o = lib_objects[0]
if num_samples == 1:
lims_data["tag_sequence"] = o.tag_sequence
lims_data["sample_id"] = o.sample_id
lims_data["sample_name"] = o.sample_name
lims_data["study_name"] = o.study_name
if len(lims_data["study_id"]) == 1:
lims_data["study_name"] = study_name
lims_data["study_name"] = o.study_name

# Convert sets back to lists and sort so that the list items are
# Convert sets back to lists and sort so that the items are
# in a predictable order.
for key in ("library_type", "study_id"):
lims_data[key] = sorted(lims_data[key])

return cls.model_validate(lims_data)
return lims_data
Loading

0 comments on commit 0b225e7

Please sign in to comment.