From 5c77509fa5be9f2c542a2c561d5eb420b3ccbb4b Mon Sep 17 00:00:00 2001
From: mgcam <mg8@sanger.ac.uk>
Date: Sat, 8 Jun 2024 23:53:34 +0100
Subject: [PATCH 1/5] Added a model for PB library LIMS data.

Reimplemented the PacBioExperiment class, removed
from_orm method, replaced it by a pre-init hook.
---
 lang_qc/models/pacbio/experiment.py | 160 +++++++++++++++++++++-------
 lang_qc/models/pacbio/well.py       |   4 +-
 tests/test_pac_bio_experiment.py    |  47 +++++---
 3 files changed, 155 insertions(+), 56 deletions(-)

diff --git a/lang_qc/models/pacbio/experiment.py b/lang_qc/models/pacbio/experiment.py
index 14eeb7c..76bf598 100644
--- a/lang_qc/models/pacbio/experiment.py
+++ b/lang_qc/models/pacbio/experiment.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023 Genome Research Ltd.
+# Copyright (c) 2023, 2024 Genome Research Ltd.
 #
 # Authors:
 #   Marina Gourtovaia <mg8@sanger.ac.uk>
@@ -19,14 +19,96 @@
 # You should have received a copy of the GNU General Public License along with
 # this program. If not, see <http://www.gnu.org/licenses/>.
 
-from typing import List
+from typing import Any
 
-from pydantic import BaseModel, ConfigDict, Field
+from pydantic import Field, model_validator
+from pydantic.dataclasses import dataclass
 
 from lang_qc.db.mlwh_schema import PacBioRun
 
 
-class PacBioExperiment(BaseModel):
+@dataclass(kw_only=True, frozen=True)
+class PacBioLibrary:
+    """
+    This model represents LIMS data associated with a PacBio library.
+
+    The fields of the model can be assigned directly via the constructor.
+    However, if the `db_library` field, a single row of the PacBioRun table
+    class, is set via the constructor, the rest of the fields are populated
+    using this database row object, while  any other information passed to the
+    constructor is disregarded.
+
+    The  `db_library` field is not present in the model instance that is
+    returned by the constructor.
+    """
+
+    db_library: PacBioRun = Field(init_var=True)
+
+    study_id: str = Field(
+        title="LIMS-specific study identifier",
+    )
+    study_name: str = Field(
+        title="Study name",
+    )
+    sample_id: str = Field(
+        title="LIMS-specific Sample identifier",
+    )
+    sample_name: str = Field(
+        title="Sample name",
+    )
+    tag_sequence: list = Field(
+        title="Tag sequence",
+        description="""
+        Tag sequences as a list. An empty list for a non-indexed library.
+        """,
+    )
+    library_type: str | None = Field(
+        default=None,
+        title="Library type",
+    )
+    pool_name: str | None = Field(
+        default=None,
+        title="Pool name",
+        description="""
+        The pac_bio_library_tube_barcode from TRACTION, AKA pool name
+        """,
+    )
+
+    @model_validator(mode="before")
+    def pre_root(cls, values: dict[str, Any]) -> dict[str, Any]:
+        """
+        Populates the fields of this object with information available
+        in the LIMS system. Errors if the `db_library` attribute is not
+        set via the constructor.
+        """
+
+        # https://github.com/pydantic/pydantic-core/blob/main/python/pydantic_core/_pydantic_core.pyi
+        if "db_library" not in values.kwargs:
+            return values.kwargs
+        db_row: PacBioRun = values.kwargs["db_library"]
+        if db_row is None:
+            raise ValueError("None db_library value is not allowed.")
+
+        assigned = dict()
+        study = db_row.study
+        assigned["study_name"] = study.name
+        assigned["study_id"] = study.id_study_lims
+        sample = db_row.sample
+        assigned["sample_name"] = sample.name
+        assigned["sample_id"] = sample.id_sample_lims
+        assigned["library_type"] = db_row.pipeline_id_lims
+        assigned["pool_name"] = db_row.pac_bio_library_tube_barcode
+        assigned["tag_sequence"] = []
+        if tag := db_row.tag_sequence:
+            assigned["tag_sequence"].append(tag)
+            if tag := db_row.tag2_sequence:
+                assigned["tag_sequence"].append(tag)
+
+        return assigned
+
+
+@dataclass(kw_only=True, frozen=True)
+class PacBioExperiment:
     """
     A response model that contains laboratory tracking information
     about the PacBio wells and samples prior to the start of the
@@ -43,6 +125,8 @@ class PacBioExperiment(BaseModel):
     (library).
     """
 
+    db_libraries: list[PacBioRun] = Field(init_var=True)
+
     study_id: list = Field(
         title="Study identifier",
         description="""
@@ -50,21 +134,21 @@ class PacBioExperiment(BaseModel):
         an unlikely case of multiple studies).
         """,
     )
-    study_name: str = Field(
+    study_name: str | None = Field(
         default=None,
         title="Study name",
         description="""
         Study name, is not set in case of multiple studies.
         """,
     )
-    sample_id: str = Field(
+    sample_id: str | None = Field(
         default=None,
         title="Sample identifier",
         description="""
         Sample identifier, is not set in case of multiple samples.
         """,
     )
-    sample_name: str = Field(
+    sample_name: str | None = Field(
         default=None,
         title="Sample name",
         description="""
@@ -94,59 +178,57 @@ class PacBioExperiment(BaseModel):
         unlikely case of multiple library types.
         """,
     )
-    pool_name: str = Field(
+    pool_name: str | None = Field(
         default=None,
         title="Pool name",
         description="""
         The pac_bio_library_tube_barcode from TRACTION, AKA pool name
         """,
     )
-    model_config = ConfigDict(from_attributes=True, extra="forbid")
 
-    @classmethod
-    def from_orm(cls, lims_db_rows: List[PacBioRun]):
+    @model_validator(mode="before")
+    def pre_root(cls, values: dict[str, Any]) -> dict[str, Any]:
         """
-        A factory method, creates an instance of the PacBioLimsData class.
-        Should be given a non-empty list of PacBioRun table row objects as
-        an argument.
+        Populates the fields of this object with information available
+        in the LIMS system.
+        Errors if the `db_libraries` attribute is not set via the constructor.
         """
 
+        lims_db_rows: list[PacBioRun] = values.kwargs["db_libraries"]
         num_samples = len(lims_db_rows)
         if num_samples == 0:
-            raise Exception("Cannot create PacBioLimsData object, no data.")
-        if any(row is None for row in lims_db_rows):
-            raise Exception("Cannot create PacBioLimsData object, None row.")
+            raise ValueError("Empty db_libraries list is not allowed.")
+
+        lib_objects = [PacBioLibrary(db_library=row) for row in lims_db_rows]
 
-        # Using sets for some data instead of lists because we do not
-        # want repetitions.
         lims_data = {
             "num_samples": num_samples,
-            "study_id": set(),
-            "library_type": set(),
             "tag_sequence": [],
         }
-        study_name = None
-        for row in lims_db_rows:
-            lims_data["study_id"].add(row.study.id_study_lims)
-            lims_data["library_type"].add(row.pipeline_id_lims)
-            study_name = row.study.name
-            if pool_name := row.pac_bio_library_tube_barcode:
-                lims_data["pool_name"] = pool_name
-            if num_samples == 1:
-                if tag := row.tag_sequence:
-                    lims_data["tag_sequence"].append(tag)
-                    if tag := row.tag2_sequence:
-                        lims_data["tag_sequence"].append(tag)
-                lims_data["sample_id"] = row.sample.id_sample_lims
-                lims_data["sample_name"] = row.sample.name
-                lims_data["study_name"] = row.study.name
 
+        lims_data["study_id"] = {o.study_id for o in lib_objects}  # returns a set
+        lims_data["library_type"] = {
+            o.library_type if o.library_type is not None else "UNKNOWN"
+            for o in lib_objects
+        }
+
+        pool_names = {o.pool_name for o in lib_objects}
+        if len(pool_names) > 1:
+            raise ValueError("Multiple pool names.")
+        lims_data["pool_name"] = pool_names.pop()
+
+        o = lib_objects[0]
+        if num_samples == 1:
+            lims_data["tag_sequence"] = o.tag_sequence
+            lims_data["sample_id"] = o.sample_id
+            lims_data["sample_name"] = o.sample_name
+            lims_data["study_name"] = o.study_name
         if len(lims_data["study_id"]) == 1:
-            lims_data["study_name"] = study_name
+            lims_data["study_name"] = o.study_name
 
-        # Convert sets back to lists and sort so that the list items are
+        # Convert sets back to lists and sort so that the items are
         # in a predictable order.
         for key in ("library_type", "study_id"):
             lims_data[key] = sorted(lims_data[key])
 
-        return cls.model_validate(lims_data)
+        return lims_data
diff --git a/lang_qc/models/pacbio/well.py b/lang_qc/models/pacbio/well.py
index d2047a5..d0d34cb 100644
--- a/lang_qc/models/pacbio/well.py
+++ b/lang_qc/models/pacbio/well.py
@@ -223,6 +223,8 @@ def pre_root(cls, values: dict[str, Any]) -> dict[str, Any]:
         assigned["metrics"] = QCDataWell.from_orm(mlwh_db_row)
         experiment_info = mlwh_db_row.get_experiment_info()
         if len(experiment_info):
-            assigned["experiment_tracking"] = PacBioExperiment.from_orm(experiment_info)
+            assigned["experiment_tracking"] = PacBioExperiment(
+                db_libraries=experiment_info
+            )
 
         return assigned
diff --git a/tests/test_pac_bio_experiment.py b/tests/test_pac_bio_experiment.py
index ff97954..775693c 100644
--- a/tests/test_pac_bio_experiment.py
+++ b/tests/test_pac_bio_experiment.py
@@ -2,7 +2,19 @@
 from sqlalchemy import select
 
 from lang_qc.db.mlwh_schema import PacBioRun
-from lang_qc.models.pacbio.experiment import PacBioExperiment
+from lang_qc.models.pacbio.experiment import PacBioExperiment, PacBioLibrary
+
+
+def test_creating_library_object(mlwhdb_test_session, mlwhdb_load_runs):
+
+    l = PacBioLibrary(
+        study_id="1",
+        sample_id="1",
+        study_name="st_name",
+        sample_name="sa_name",
+        tag_sequence=[],
+    )
+    assert l.study_id == "1"
 
 
 def test_creating_experiment_object(mlwhdb_test_session, mlwhdb_load_runs):
@@ -17,7 +29,13 @@ def test_creating_experiment_object(mlwhdb_test_session, mlwhdb_load_runs):
     )
     well_row = mlwhdb_test_session.execute(query).scalars().one()
 
-    lims = PacBioExperiment.from_orm([well_row])
+    with pytest.raises(Exception, match=r"Empty db_libraries list is not allowed."):
+        PacBioExperiment(db_libraries=[])
+
+    with pytest.raises(ValueError, match=r"None db_library value is not allowed."):
+        PacBioExperiment(db_libraries=[well_row, None])
+
+    lims = PacBioExperiment(db_libraries=[well_row])
     assert lims.num_samples == 1
     assert lims.study_id == ["6457"]
     assert lims.study_name == "Tree of Life - ASG"
@@ -34,7 +52,7 @@ def test_creating_experiment_object(mlwhdb_test_session, mlwhdb_load_runs):
     )
     well_row = mlwhdb_test_session.execute(query).scalars().one()
 
-    lims = PacBioExperiment.from_orm([well_row])
+    lims = PacBioExperiment(db_libraries=[well_row])
     assert lims.num_samples == 1
     assert lims.study_id == ["5901"]
     assert lims.study_name == "DTOL_Darwin Tree of Life"
@@ -51,7 +69,7 @@ def test_creating_experiment_object(mlwhdb_test_session, mlwhdb_load_runs):
     )
     well_rows = mlwhdb_test_session.execute(query).scalars().all()
 
-    lims = PacBioExperiment.from_orm(well_rows)
+    lims = PacBioExperiment(db_libraries=well_rows)
     assert lims.num_samples == 40
     assert lims.study_id == ["7069"]
     assert lims.study_name == "Alternative Enzymes 2022 microbial genomes"
@@ -68,7 +86,7 @@ def test_creating_experiment_object(mlwhdb_test_session, mlwhdb_load_runs):
     )
     well_rows = mlwhdb_test_session.execute(query).scalars().all()
 
-    lims = PacBioExperiment.from_orm(well_rows)
+    lims = PacBioExperiment(db_libraries=well_rows)
     assert lims.num_samples == 3
     assert lims.study_id == ["5901", "6457"]
     assert lims.study_name is None
@@ -85,7 +103,14 @@ def test_creating_experiment_object(mlwhdb_test_session, mlwhdb_load_runs):
     )
     well_rows = mlwhdb_test_session.execute(query).scalars().all()
 
-    lims = PacBioExperiment.from_orm(well_rows)
+    with pytest.raises(ValueError, match=r"Multiple pool names."):
+        PacBioExperiment(db_libraries=well_rows)
+
+    for row in well_rows:
+        row.pac_bio_library_tube_barcode = "AXCTYW"
+    mlwhdb_test_session.commit()
+
+    lims = PacBioExperiment(db_libraries=well_rows)
     assert lims.num_samples == 42
     assert lims.study_id == ["6457", "7069"]
     assert lims.study_name is None
@@ -93,13 +118,3 @@ def test_creating_experiment_object(mlwhdb_test_session, mlwhdb_load_runs):
     assert lims.sample_name is None
     assert lims.library_type == ["PacBio_Ultra_Low_Input", "Pacbio_HiFi_mplx"]
     assert lims.tag_sequence == []
-
-    with pytest.raises(
-        Exception, match=r"Cannot create PacBioLimsData object, no data"
-    ):
-        PacBioExperiment.from_orm([])
-
-    with pytest.raises(
-        Exception, match=r"Cannot create PacBioLimsData object, None row"
-    ):
-        PacBioExperiment.from_orm([well_row, None])

From fbff9a74dac11d80972c88ef4b991b60b3d824a3 Mon Sep 17 00:00:00 2001
From: mgcam <mg8@sanger.ac.uk>
Date: Tue, 11 Jun 2024 12:08:13 +0100
Subject: [PATCH 2/5] Created an extendable declarative base class

... for mlwh ORM classes so that common methods can be
implemented.

Customised __repr__ method for one of db classes.
---
 lang_qc/db/mlwh_schema.py     | 36 ++++++++++++++++++++++++++++++++---
 tests/test_mlwh_db_classes.py | 24 +++++++++++++++++++++++
 2 files changed, 57 insertions(+), 3 deletions(-)
 create mode 100644 tests/test_mlwh_db_classes.py

diff --git a/lang_qc/db/mlwh_schema.py b/lang_qc/db/mlwh_schema.py
index 395916f..c0796b1 100644
--- a/lang_qc/db/mlwh_schema.py
+++ b/lang_qc/db/mlwh_schema.py
@@ -25,9 +25,30 @@
 from sqlalchemy.dialects.mysql import SMALLINT as mysqlSMALLINT
 from sqlalchemy.dialects.mysql import TINYINT as mysqlTINYINT
 from sqlalchemy.dialects.mysql import VARCHAR as mysqlVARCHAR
-from sqlalchemy.orm import declarative_base, relationship
+from sqlalchemy.orm import DeclarativeBase, relationship
 
-Base = declarative_base()
+
+class Base(DeclarativeBase):
+    """
+    A base class for declarative class definitions for the ml warehouse database.
+    """
+
+    def get_row_description(self, fields: list[str]) -> str:
+        """
+        Returns a printable representation of the database table row. Interprets
+        a list of strings given as the `fields` argument as a list of column
+        names. Combines the name of the class, names of the given columns
+        and respective values into a row description. The columns for which
+        the row has a NULL value are omitted from the description.
+        """
+
+        pairs = []
+        for name in fields:
+            value = self.__getattribute__(name)
+            if value is not None:
+                pairs.append(f"{name}={value}")
+        description = ", ".join(pairs)
+        return f"{self.__module__}.{self.__class__.__name__}: {description}"
 
 
 class Sample(Base):
@@ -538,7 +559,16 @@ class PacBioRunWellMetrics(Base):
         "PacBioProductMetrics", back_populates="pac_bio_run_well_metrics"
     )
 
-    def get_experiment_info(self):
+    """Custom or customised methods are added below"""
+
+    def __repr__(self):
+        """Returns a printable representation of the database row"""
+
+        return self.get_row_description(
+            ["pac_bio_run_name", "well_label", "plate_number", "id_pac_bio_product"]
+        )
+
+    def get_experiment_info(self) -> list[PacBioRun]:
         """Returns a list of PacBioRun mlwh database rows.
 
         Returns LIMS information about the PacBio experiment
diff --git a/tests/test_mlwh_db_classes.py b/tests/test_mlwh_db_classes.py
new file mode 100644
index 0000000..be0b89a
--- /dev/null
+++ b/tests/test_mlwh_db_classes.py
@@ -0,0 +1,24 @@
+from sqlalchemy import select
+
+from lang_qc.db.mlwh_schema import PacBioRunWellMetrics
+
+"""Tests for custom and customised ORM methods"""
+
+
+def test_pac_bio_well_metrics_repr(mlwhdb_test_session, mlwhdb_load_runs):
+    id1 = "cf18bd66e0f0895ea728c1d08103c62d3de8a57a5f879cee45f7b0acc028aa61"
+    id2 = "513c674f489b106c6af716dd0d210826ff03b7648d50888839c3722ca1b10dbf"
+    data = {
+        id1: f"pac_bio_run_name=TRACTION-RUN-92, well_label=A1, id_pac_bio_product={id1}",
+        id2: f"pac_bio_run_name=TRACTION-RUN-1140, well_label=A1, plate_number=2, id_pac_bio_product={id2}",
+    }
+
+    for id in data.keys():
+        query = select(PacBioRunWellMetrics).where(
+            PacBioRunWellMetrics.id_pac_bio_product == id
+        )
+        db_row = mlwhdb_test_session.execute(query).scalar_one()
+        assert (
+            db_row.__repr__()
+            == "lang_qc.db.mlwh_schema.PacBioRunWellMetrics: " + data[id]
+        )

From ed54cabcfbe7c70b4facaf1550397e9d73acf920 Mon Sep 17 00:00:00 2001
From: mgcam <mg8@sanger.ac.uk>
Date: Tue, 11 Jun 2024 13:17:13 +0100
Subject: [PATCH 3/5] Added a model representing libraries in a well.

---
 lang_qc/models/pacbio/well.py | 40 ++++++++++++++++++++--
 tests/test_pb_well_models.py  | 62 +++++++++++++++++++++++++++++++++--
 2 files changed, 96 insertions(+), 6 deletions(-)

diff --git a/lang_qc/models/pacbio/well.py b/lang_qc/models/pacbio/well.py
index d0d34cb..e5bf0d8 100644
--- a/lang_qc/models/pacbio/well.py
+++ b/lang_qc/models/pacbio/well.py
@@ -27,7 +27,7 @@
 from pydantic.dataclasses import dataclass
 
 from lang_qc.db.mlwh_schema import PacBioRunWellMetrics
-from lang_qc.models.pacbio.experiment import PacBioExperiment
+from lang_qc.models.pacbio.experiment import PacBioExperiment, PacBioLibrary
 from lang_qc.models.pacbio.qc_data import QCDataWell
 from lang_qc.models.pager import PagedResponse
 from lang_qc.models.qc_state import QcState
@@ -132,9 +132,10 @@ def pre_root(cls, values: dict[str, Any]) -> dict[str, Any]:
         """
 
         # https://github.com/pydantic/pydantic-core/blob/main/python/pydantic_core/_pydantic_core.pyi
-        mlwh_db_row: PacBioRunWellMetrics = values.kwargs["db_well"]
-        assert mlwh_db_row
+        if "db_well" not in values.kwargs:
+            raise ValueError("None db_well value is not allowed.")
 
+        mlwh_db_row: PacBioRunWellMetrics = values.kwargs["db_well"]
         column_names = [column.key for column in PacBioRunWellMetrics.__table__.columns]
 
         assigned = dict()
@@ -175,6 +176,39 @@ def pre_root(cls, values: dict[str, Any]) -> dict[str, Any]:
         return assigned
 
 
+@dataclass(kw_only=True, frozen=True)
+class PacBioWellLibraries(PacBioWell):
+    """A response model binding together basic PacBio well and LIMS data for
+    the libraries, which were sequenced in this well.
+    """
+
+    libraries: list[PacBioLibrary] = Field(
+        title="A list of `PacBioLibrary` objects",
+        description="""
+        A list of `PacBioLibrary` objects. Each member of the list represents
+        a library, which was sequenced in this well. If the object is created
+        by supplying the `db_well` attribute via the constructor, the list
+        is never empty. The list is not sorted.
+        """,
+    )
+
+    @model_validator(mode="before")
+    def pre_root(cls, values: dict[str, Any]) -> dict[str, Any]:
+
+        assigned = super().pre_root(values)
+        mlwh_db_row: PacBioRunWellMetrics = values.kwargs["db_well"]
+        lims_data = mlwh_db_row.get_experiment_info()
+        if len(lims_data) == 0:
+            raise ValueError(
+                f"No LIMS data retrieved for {mlwh_db_row.__repr__()} "
+                "on account of partially linked or unlinked product data."
+            )
+
+        assigned["libraries"] = [PacBioLibrary(db_library=row) for row in lims_data]
+
+        return assigned
+
+
 class PacBioPagedWells(PagedResponse, extra="forbid"):
     """A response model for paged data about PacBio wells."""
 
diff --git a/tests/test_pb_well_models.py b/tests/test_pb_well_models.py
index 12d64a4..ed80b75 100644
--- a/tests/test_pb_well_models.py
+++ b/tests/test_pb_well_models.py
@@ -1,10 +1,16 @@
+import pytest
 from npg_id_generation.pac_bio import PacBioEntity
 from sqlalchemy.orm import Session
 
 from lang_qc.db.helper.qc import get_qc_states_by_id_product_list
 from lang_qc.db.helper.wells import WellWh
 from lang_qc.db.mlwh_schema import PacBioRunWellMetrics
-from lang_qc.models.pacbio.well import PacBioWellFull, PacBioWellSummary
+from lang_qc.models.pacbio.experiment import PacBioLibrary
+from lang_qc.models.pacbio.well import (
+    PacBioWellFull,
+    PacBioWellLibraries,
+    PacBioWellSummary,
+)
 from tests.conftest import compare_dates
 from tests.fixtures.well_data import load_data4well_retrieval, load_dicts_and_users
 
@@ -116,9 +122,13 @@ def test_create_full_model(
     assert pb_well.experiment_tracking is None
 
 
-def test_create_summary_model(
+def test_create_summary_and_library_models(
     mlwhdb_test_session, qcdb_test_session, load_data4well_retrieval, mlwhdb_load_runs
 ):
+
+    with pytest.raises(ValueError, match=r"None db_well value is not allowed."):
+        PacBioWellSummary(plate_number=3)
+
     (well_row, qc_state) = _prepare_data(
         mlwhdb_test_session, qcdb_test_session, "TRACTION-RUN-92", "A1"
     )
@@ -126,6 +136,9 @@ def test_create_summary_model(
     _examine_well_model_a1(pb_well, well_row.id_pac_bio_product)
     assert pb_well.study_names == ["Tree of Life - ASG"]
 
+    pb_well = PacBioWellLibraries(db_well=well_row)
+    _examine_well_model_a1(pb_well, well_row.id_pac_bio_product)
+
     (well_row, qc_state) = _prepare_data(
         mlwhdb_test_session, qcdb_test_session, "TRACTION_RUN_1", "B1"
     )
@@ -140,7 +153,7 @@ def test_create_summary_model(
     _examine_well_model_c1(pb_well, well_row.id_pac_bio_product)
 
 
-def test_create_summary_model_study_info(
+def test_create_summary_and_library_models_lims_info(
     mlwhdb_test_session, qcdb_test_session, load_data4well_retrieval, mlwhdb_load_runs
 ):
     # Well with two samples, none is linked to LIMS
@@ -150,6 +163,9 @@ def test_create_summary_model_study_info(
     pb_well = PacBioWellSummary(db_well=well_row)
     assert pb_well.study_names == []
 
+    with pytest.raises(ValueError, match=r"No LIMS data retrieved"):
+        PacBioWellLibraries(db_well=well_row)
+
     # Fully linked wells with one sample
     (well_row, qc_state) = _prepare_data(
         mlwhdb_test_session, qcdb_test_session, "TRACTION-RUN-1162", "C1"
@@ -163,6 +179,19 @@ def test_create_summary_model_study_info(
     pb_well = PacBioWellSummary(db_well=well_row)
     assert pb_well.study_names == ["DTOL_Darwin Tree of Life"]
 
+    pb_well = PacBioWellLibraries(db_well=well_row)
+    assert len(pb_well.libraries) == 1
+    expected_lib = PacBioLibrary(
+        study_id="5901",
+        study_name="DTOL_Darwin Tree of Life",
+        sample_id="9463663",
+        sample_name="DTOL14290946",
+        tag_sequence=["CTCAGCATACGAGTAT"],
+        library_type="Pacbio_HiFi",
+        pool_name="TRAC-2-7128",
+    )
+    assert pb_well.libraries[0] == expected_lib
+
     # A fully linked well with multiple samples, all belonging to the same study
     (well_row, qc_state) = _prepare_data(
         mlwhdb_test_session, qcdb_test_session, "TRACTION-RUN-1140", "B1", 1
@@ -180,6 +209,30 @@ def test_create_summary_model_study_info(
         "ToL_Blaxter_ Reference Genomes_ DNA",
     ]
 
+    pb_well = PacBioWellLibraries(db_well=well_row)
+    assert len(pb_well.libraries) == 4
+    libs = {lib.sample_id: lib for lib in pb_well.libraries}
+    expected_lib = PacBioLibrary(
+        study_id="6771",
+        study_name="ToL_Blaxter_ Reference Genomes_ DNA",
+        sample_id="8657549",
+        sample_name="6771STDY13618009",
+        tag_sequence=["CTGCGATCACGAGTAT"],
+        library_type="Pacbio_HiFi",
+        pool_name="TRAC-2-7676",
+    )
+    assert libs["8657549"] == expected_lib
+    expected_lib = PacBioLibrary(
+        study_id="5901",
+        study_name="DTOL_Darwin Tree of Life",
+        sample_id="9463590",
+        sample_name="DTOL14291044",
+        tag_sequence=["TCTGCATCATGAGTAT"],
+        library_type="Pacbio_HiFi",
+        pool_name="TRAC-2-7676",
+    )
+    assert libs["9463590"] == expected_lib
+
     # A partially linked well with three samples, which belong to two studies.
     # The LIMS link for one of the samples is deleted so that two other samples
     # belong to the same study.
@@ -188,3 +241,6 @@ def test_create_summary_model_study_info(
     )
     pb_well = PacBioWellSummary(db_well=well_row)
     assert pb_well.study_names == []
+
+    with pytest.raises(ValueError, match=r"No LIMS data retrieved"):
+        PacBioWellLibraries(db_well=well_row)

From 00df2edb15ea38c985b6c759677cf700374ef702 Mon Sep 17 00:00:00 2001
From: mgcam <mg8@sanger.ac.uk>
Date: Tue, 11 Jun 2024 18:59:28 +0100
Subject: [PATCH 4/5] Added an end point for well library data.

---
 lang_qc/endpoints/pacbio_well.py       | 33 +++++++++++-
 lang_qc/models/pacbio/well.py          |  3 +-
 lang_qc/util/errors.py                 |  7 +++
 tests/endpoints/test_well_libraries.py | 70 ++++++++++++++++++++++++++
 tests/test_pb_well_models.py           |  5 +-
 5 files changed, 114 insertions(+), 4 deletions(-)
 create mode 100644 tests/endpoints/test_well_libraries.py

diff --git a/lang_qc/endpoints/pacbio_well.py b/lang_qc/endpoints/pacbio_well.py
index f9d4957..d2a5a61 100644
--- a/lang_qc/endpoints/pacbio_well.py
+++ b/lang_qc/endpoints/pacbio_well.py
@@ -37,13 +37,18 @@
 from lang_qc.db.mlwh_connection import get_mlwh_db
 from lang_qc.db.qc_connection import get_qc_db
 from lang_qc.db.qc_schema import User
-from lang_qc.models.pacbio.well import PacBioPagedWells, PacBioWellFull
+from lang_qc.models.pacbio.well import (
+    PacBioPagedWells,
+    PacBioWellFull,
+    PacBioWellLibraries,
+)
 from lang_qc.models.qc_flow_status import QcFlowStatusEnum
 from lang_qc.models.qc_state import QcState, QcStateBasic
 from lang_qc.util.auth import check_user
 from lang_qc.util.errors import (
     InconsistentInputError,
     InvalidDictValueError,
+    MissingLimsDataError,
     RunNotFoundError,
 )
 from lang_qc.util.type_checksum import ChecksumSHA256
@@ -163,6 +168,32 @@ def get_wells_in_run(
     return response
 
 
+@router.get(
+    "/wells/{id_product}/libraries",
+    summary="Get well summary and LIMS data for all libraries",
+    responses={
+        status.HTTP_404_NOT_FOUND: {"description": "Well product does not exist"},
+        status.HTTP_422_UNPROCESSABLE_ENTITY: {"description": "Invalid product ID"},
+        status.HTTP_409_CONFLICT: {"description": "Missing or incomplete LIMS data"},
+    },
+    response_model=PacBioWellLibraries,
+)
+def get_well_lims_info(
+    id_product: ChecksumSHA256,
+    mlwhdb_session: Session = Depends(get_mlwh_db),
+) -> PacBioWellLibraries:
+
+    db_well = _find_well_product_or_error(id_product, mlwhdb_session)
+    well_libraries: PacBioWellLibraries
+    try:
+        well_libraries = PacBioWellLibraries(db_well=db_well)
+    except MissingLimsDataError as err:
+        # 409 - Request conflicts with the current state of the server.
+        raise HTTPException(409, detail=str(err))
+
+    return well_libraries
+
+
 @router.get(
     "/products/{id_product}/seq_level",
     summary="Get full sequencing QC metrics and state for a product",
diff --git a/lang_qc/models/pacbio/well.py b/lang_qc/models/pacbio/well.py
index e5bf0d8..e809805 100644
--- a/lang_qc/models/pacbio/well.py
+++ b/lang_qc/models/pacbio/well.py
@@ -31,6 +31,7 @@
 from lang_qc.models.pacbio.qc_data import QCDataWell
 from lang_qc.models.pager import PagedResponse
 from lang_qc.models.qc_state import QcState
+from lang_qc.util.errors import MissingLimsDataError
 
 
 def get_field_names(cls):
@@ -199,7 +200,7 @@ def pre_root(cls, values: dict[str, Any]) -> dict[str, Any]:
         mlwh_db_row: PacBioRunWellMetrics = values.kwargs["db_well"]
         lims_data = mlwh_db_row.get_experiment_info()
         if len(lims_data) == 0:
-            raise ValueError(
+            raise MissingLimsDataError(
                 f"No LIMS data retrieved for {mlwh_db_row.__repr__()} "
                 "on account of partially linked or unlinked product data."
             )
diff --git a/lang_qc/util/errors.py b/lang_qc/util/errors.py
index 21dab57..aeac0ef 100644
--- a/lang_qc/util/errors.py
+++ b/lang_qc/util/errors.py
@@ -25,3 +25,10 @@ class EmptyListOfRunNamesError(Exception):
 
 class RunNotFoundError(Exception):
     """Exception to be used when no well metrics data for a run is found."""
+
+
+class MissingLimsDataError(Exception):
+    """
+    Exception to be used when product LIMS data is not available
+    or partially missing.
+    """
diff --git a/tests/endpoints/test_well_libraries.py b/tests/endpoints/test_well_libraries.py
new file mode 100644
index 0000000..d35e476
--- /dev/null
+++ b/tests/endpoints/test_well_libraries.py
@@ -0,0 +1,70 @@
+from fastapi.testclient import TestClient
+from sqlalchemy import select
+
+# from lang_qc.db.mlwh_schema import PacBioRunWellMetrics
+
+
+def test_well_libraries(test_client: TestClient, mlwhdb_load_runs):
+    """Test retrieval of LIMS library data for a well."""
+
+    response = test_client.get(f"/pacbio/wells/malformed/libraries")
+    assert response.status_code == 422
+
+    id_product = "aaa8bd66e0f0895ea728c1d08103c62d3de8a57a5f879cee45f7b0acc028aa61"
+    response = test_client.get(f"/pacbio/wells/{id_product}/libraries")
+    assert response.status_code == 404
+
+    # Partially linked well
+    id_product = "26928ba6ec2a00c04dd6c7c68008ec9436e3979a384b9f708dc371c99f272e17"
+    response = test_client.get(f"/pacbio/wells/{id_product}/libraries")
+    assert response.status_code == 409
+    assert response.json()["detail"] == "".join(
+        [
+            "No LIMS data retrieved for lang_qc.db.mlwh_schema.PacBioRunWellMetrics:",
+            " pac_bio_run_name=TRACTION-RUN-1140, well_label=C1, plate_number=2,",
+            " id_pac_bio_product=26928ba6ec2a00c04dd6c7c68008ec9436e3979a384b9f708dc371c99f272e17",
+            " on account of partially linked or unlinked product data.",
+        ]
+    )
+
+    # Fully linked well
+    id_product = "513c674f489b106c6af716dd0d210826ff03b7648d50888839c3722ca1b10dbf"
+    response = test_client.get(f"/pacbio/wells/{id_product}/libraries")
+    assert response.status_code == 200
+    expected_response = {
+        "id_product": "513c674f489b106c6af716dd0d210826ff03b7648d50888839c3722ca1b10dbf",
+        "label": "A1",
+        "plate_number": 2,
+        "run_name": "TRACTION-RUN-1140",
+        "run_start_time": "2024-02-23T10:28:12",
+        "run_complete_time": "2024-02-25T20:53:05",
+        "well_start_time": "2024-02-24T14:25:12",
+        "well_complete_time": "2024-02-26T00:27:52",
+        "run_status": "Complete",
+        "well_status": "Complete",
+        "instrument_name": "84093",
+        "instrument_type": "Revio",
+        "qc_state": None,
+        "libraries": [
+            {
+                "study_id": "5901",
+                "study_name": "DTOL_Darwin Tree of Life",
+                "sample_id": "9478726",
+                "sample_name": "DTOL14523243",
+                "tag_sequence": ["ATCTGCACGTGAGTAT"],
+                "library_type": "Pacbio_HiFi",
+                "pool_name": "TRAC-2-7677",
+            },
+            {
+                "study_id": "5901",
+                "study_name": "DTOL_Darwin Tree of Life",
+                "sample_id": "9518398",
+                "sample_name": "DTOL14180244",
+                "tag_sequence": ["ATGTACTAGTGAGTAT"],
+                "library_type": "Pacbio_HiFi",
+                "pool_name": "TRAC-2-7677",
+            },
+        ],
+    }
+
+    assert response.json() == expected_response
diff --git a/tests/test_pb_well_models.py b/tests/test_pb_well_models.py
index ed80b75..ce5560d 100644
--- a/tests/test_pb_well_models.py
+++ b/tests/test_pb_well_models.py
@@ -11,6 +11,7 @@
     PacBioWellLibraries,
     PacBioWellSummary,
 )
+from lang_qc.util.errors import MissingLimsDataError
 from tests.conftest import compare_dates
 from tests.fixtures.well_data import load_data4well_retrieval, load_dicts_and_users
 
@@ -163,7 +164,7 @@ def test_create_summary_and_library_models_lims_info(
     pb_well = PacBioWellSummary(db_well=well_row)
     assert pb_well.study_names == []
 
-    with pytest.raises(ValueError, match=r"No LIMS data retrieved"):
+    with pytest.raises(MissingLimsDataError, match=r"No LIMS data retrieved"):
         PacBioWellLibraries(db_well=well_row)
 
     # Fully linked wells with one sample
@@ -242,5 +243,5 @@ def test_create_summary_and_library_models_lims_info(
     pb_well = PacBioWellSummary(db_well=well_row)
     assert pb_well.study_names == []
 
-    with pytest.raises(ValueError, match=r"No LIMS data retrieved"):
+    with pytest.raises(MissingLimsDataError, match=r"No LIMS data retrieved"):
         PacBioWellLibraries(db_well=well_row)

From fd3e9ef00b758a50ac6a7ab39463fa88f752de2b Mon Sep 17 00:00:00 2001
From: mgcam <mg8@sanger.ac.uk>
Date: Wed, 12 Jun 2024 15:01:54 +0100
Subject: [PATCH 5/5] Dropped direct calls to __repr__()

Also made the helper function of the parent class 'private'.
---
 lang_qc/db/mlwh_schema.py     | 4 ++--
 lang_qc/models/pacbio/well.py | 2 +-
 tests/test_mlwh_db_classes.py | 7 +++----
 3 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/lang_qc/db/mlwh_schema.py b/lang_qc/db/mlwh_schema.py
index c0796b1..c1cbff0 100644
--- a/lang_qc/db/mlwh_schema.py
+++ b/lang_qc/db/mlwh_schema.py
@@ -33,7 +33,7 @@ class Base(DeclarativeBase):
     A base class for declarative class definitions for the ml warehouse database.
     """
 
-    def get_row_description(self, fields: list[str]) -> str:
+    def _get_row_description(self, fields: list[str]) -> str:
         """
         Returns a printable representation of the database table row. Interprets
         a list of strings given as the `fields` argument as a list of column
@@ -564,7 +564,7 @@ class PacBioRunWellMetrics(Base):
     def __repr__(self):
         """Returns a printable representation of the database row"""
 
-        return self.get_row_description(
+        return self._get_row_description(
             ["pac_bio_run_name", "well_label", "plate_number", "id_pac_bio_product"]
         )
 
diff --git a/lang_qc/models/pacbio/well.py b/lang_qc/models/pacbio/well.py
index e809805..00926da 100644
--- a/lang_qc/models/pacbio/well.py
+++ b/lang_qc/models/pacbio/well.py
@@ -201,7 +201,7 @@ def pre_root(cls, values: dict[str, Any]) -> dict[str, Any]:
         lims_data = mlwh_db_row.get_experiment_info()
         if len(lims_data) == 0:
             raise MissingLimsDataError(
-                f"No LIMS data retrieved for {mlwh_db_row.__repr__()} "
+                f"No LIMS data retrieved for {str(mlwh_db_row)} "
                 "on account of partially linked or unlinked product data."
             )
 
diff --git a/tests/test_mlwh_db_classes.py b/tests/test_mlwh_db_classes.py
index be0b89a..5b832fe 100644
--- a/tests/test_mlwh_db_classes.py
+++ b/tests/test_mlwh_db_classes.py
@@ -18,7 +18,6 @@ def test_pac_bio_well_metrics_repr(mlwhdb_test_session, mlwhdb_load_runs):
             PacBioRunWellMetrics.id_pac_bio_product == id
         )
         db_row = mlwhdb_test_session.execute(query).scalar_one()
-        assert (
-            db_row.__repr__()
-            == "lang_qc.db.mlwh_schema.PacBioRunWellMetrics: " + data[id]
-        )
+        expected_string = "lang_qc.db.mlwh_schema.PacBioRunWellMetrics: " + data[id]
+        assert db_row.__repr__() == expected_string
+        assert str(db_row) == expected_string