Merge pull request #186 from mgcam/upcoming_tab

Upcoming tab for incomplete runs
wtsi-npg · Oct 3, 2023 · bcc69fd · bcc69fd
2 parents d6bdd2b + 633cc90
commit bcc69fd
Show file tree

Hide file tree

Showing 7 changed files with 146 additions and 16 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,12 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
 
 ## [Unreleased]
 
+### Added
+
+* Back-end code for the 'Upcoming' tab. The 'Upcoming' tab is
+  automatically appended to the collection of the UI tabs for
+  filtering wells.
+
 ### Changed
 
 * Increased the look-back period for the inbox query from 4 weeks to

diff --git a/lang_qc/db/helper/wells.py b/lang_qc/db/helper/wells.py
@@ -27,7 +27,10 @@
 from sqlalchemy import and_, or_, select
 from sqlalchemy.orm import Session
 
-from lang_qc.db.helper.qc import get_qc_states_by_id_product_list
+from lang_qc.db.helper.qc import (
+    get_qc_states_by_id_product_list,
+    qc_state_for_product_exists,
+)
 from lang_qc.db.mlwh_schema import PacBioRunWellMetrics
 from lang_qc.db.qc_schema import QcState, QcStateDict, QcType
 from lang_qc.models.pacbio.well import PacBioPagedWells, PacBioWell
@@ -42,6 +45,8 @@
 Here this type is used to mark a purely internal to the class variables.
 """
 
+INBOX_LOOK_BACK_NUM_WEEKS = 12
+
 
 class WellWh(BaseModel):
     """
@@ -54,7 +59,6 @@ class WellWh(BaseModel):
         title="SQLAlchemy Session",
         description="A SQLAlchemy Session for the ml warehouse database",
     )
-    INBOX_LOOK_BACK_NUM_WEEKS: ClassVar = 12
 
     class Config:
         allow_mutation = False
@@ -78,6 +82,8 @@ def get_mlwh_well_by_product_id(
     def recent_completed_wells(self) -> List[PacBioRunWellMetrics]:
         """
         Get recent not QC-ed completed wells from the mlwh database.
+        Recent wells are defined as wells that completed within the
+        last 12 weeks.
         """
 
         ######
@@ -92,7 +98,7 @@ def recent_completed_wells(self) -> List[PacBioRunWellMetrics]:
         # Using current local time.
         # Generating a date rather than a timestamp here in order to have a consistent
         # earliest date for the look-back period during the QC team's working day.
-        my_date = date.today() - timedelta(weeks=self.INBOX_LOOK_BACK_NUM_WEEKS)
+        my_date = date.today() - timedelta(weeks=INBOX_LOOK_BACK_NUM_WEEKS)
         look_back_min_date = datetime(my_date.year, my_date.month, my_date.day)
 
         # Select the wells that has not been QC-ed, but later double-check against
@@ -213,6 +219,8 @@ def create_for_qc_status(
             QcFlowStatusEnum.UNKNOWN,
         ]:
             wells = self._aborted_and_unknown_wells(qc_flow_status)
+        elif qc_flow_status == QcFlowStatusEnum.UPCOMING:
+            wells = self._upcoming_wells()
         else:
             wells = self._get_wells_for_status(qc_flow_status)
 
@@ -322,6 +330,60 @@ def _add_tracking_info(self, wells: List[PacBioWell]):
             else:
                 well.copy_run_tracking_info(db_well)
 
+    def _upcoming_wells(self):
+        """
+        Upcoming wells are recent wells, which do not belong to any other
+        QC flow statuses as defined in QcFlowStatus. Recent wells are defined
+        as wells that belong to runs that started within the last 12 weeks.
+        """
+
+        recent_completed_product_ids = [
+            w.id_pac_bio_product for w in self.recent_completed_wells()
+        ]
+
+        my_date = date.today() - timedelta(weeks=INBOX_LOOK_BACK_NUM_WEEKS)
+        look_back_min_date = datetime(my_date.year, my_date.month, my_date.day)
+
+        # If queries for any other filters change, this query should be revised
+        # since we are repeating (but negating) a few condition that are
+        # associated with some of the statuses (filters).
+
+        query = (
+            select(PacBioRunWellMetrics)
+            .where(PacBioRunWellMetrics.run_start > look_back_min_date)
+            .where(PacBioRunWellMetrics.qc_seq_state.is_(None))
+            .where(
+                PacBioRunWellMetrics.id_pac_bio_product.not_in(
+                    recent_completed_product_ids
+                )
+            )
+            .where(PacBioRunWellMetrics.well_status.not_like("Abort%"))
+            .where(PacBioRunWellMetrics.well_status.not_like("Terminat%"))
+            .where(PacBioRunWellMetrics.well_status.not_like("Fail%"))
+            .where(PacBioRunWellMetrics.well_status.not_like("Error%"))
+            .where(PacBioRunWellMetrics.well_status.not_in(["Unknown", "On hold"]))
+            .order_by(
+                PacBioRunWellMetrics.run_start,
+                PacBioRunWellMetrics.pac_bio_run_name,
+                PacBioRunWellMetrics.plate_number,
+                PacBioRunWellMetrics.well_label,
+            )
+        )
+
+        wells = []
+        for w in self.session.execute(query).scalars().all():
+            if (
+                qc_state_for_product_exists(
+                    session=self.qcdb_session, id_product=w.id_pac_bio_product
+                )
+                is False
+            ):
+                wells.append(w)
+
+        self.total_number_of_items = len(wells)  # Save the number of retrieved wells.
+
+        return self._well_models(self.slice_data(wells), False)
+
     def _recent_inbox_wells(self, recent_wells):
 
         inbox_wells_indexes = []

diff --git a/lang_qc/models/qc_flow_status.py b/lang_qc/models/qc_flow_status.py
@@ -43,8 +43,13 @@ class QcFlowStatus(BaseModel):
 @unique
 class QcFlowStatusEnum(str, Enum):
     """
-    An enumeration of known QC flow states. The order of the statuses is
+    An enumeration of known QC flow statuses. The order of the statuses is
     consistent with the temporal flow of the manual QC process.
+
+    Logically the upcoming status should be in the beginning. In order
+    to keep the order of tab consistent with early versions and to separate
+    this status from more relevant to teh QC process statuses, this status
+    is placed at the end.
     """
 
     INBOX = "inbox"
@@ -53,6 +58,7 @@ class QcFlowStatusEnum(str, Enum):
     QC_COMPLETE = "qc_complete"
     ABORTED = "aborted"
     UNKNOWN = "unknown"
+    UPCOMING = "upcoming"
 
     @classmethod
     def qc_flow_statuses(cls) -> "List[QcFlowStatus]":

diff --git a/tests/endpoints/test_config.py b/tests/endpoints/test_config.py
@@ -15,6 +15,7 @@ def test_get_config(test_client: TestClient, load_dicts_and_users):
             {"label": "QC Complete", "param": "qc_complete"},
             {"label": "Aborted", "param": "aborted"},
             {"label": "Unknown", "param": "unknown"},
+            {"label": "Upcoming", "param": "upcoming"},
         ],
         "qc_states": [
             {"description": "Passed", "only_prelim": False},

diff --git a/tests/endpoints/test_filtered_wells.py b/tests/endpoints/test_filtered_wells.py
@@ -251,6 +251,22 @@ def test_aborted_filter(test_client: TestClient, load_data4well_retrieval):
     _assert_filtered_results(response, [], 10, 100, num_total)
 
 
+def test_upcoming_filter(test_client: TestClient, load_data4well_retrieval):
+    """Test passing `upcoming` filter."""
+
+    expected_data = [
+        {"TRACTION_RUN_12:B1": None},
+        {"TRACTION_RUN_12:C1": None},
+        {"TRACTION_RUN_6:A1": None},
+        {"TRACTION_RUN_6:B1": None},
+    ]
+    num_total = len(expected_data)
+    response = test_client.get(
+        "/pacbio/wells?qc_status=upcoming&page_size=10&page_number=1"
+    )
+    _assert_filtered_results(response, expected_data, 10, 1, num_total)
+
+
 def _assert_filtered_results(
     response, expected_data, page_size, page_number, total_number
 ):

diff --git a/tests/fixtures/well_data.py b/tests/fixtures/well_data.py
@@ -440,6 +440,38 @@
         "Revio",
         1,
     ],
+    [
+        "TRACTION_RUN_6",
+        "A1",
+        "2022-12-12 15:47:25",
+        "2022-12-19 16:43:31",
+        "2022-12-12 15:57:31",
+        "2022-12-14 06:42:33",
+        "Running",
+        "Running",
+        "OnInstrument",
+        None,
+        None,
+        "1234",
+        "Revio",
+        2,
+    ],
+    [
+        "TRACTION_RUN_6",
+        "B1",
+        "2022-12-12 15:47:25",
+        "2022-12-19 16:43:31",
+        "2022-12-13 20:52:47",
+        "2022-12-15 10:37:35",
+        "Running",
+        "Running",
+        "OnInstrument",
+        None,
+        None,
+        "1234",
+        "Revio",
+        2,
+    ],
     [
         "TRACTION_RUN_7",
         "A1",
@@ -866,7 +898,7 @@ def load_data4well_retrieval(
     # We want some wells to be in the inbox. For that their run_complete dates
     # should be within, for example, last four weeks. Therefore, we need to
     #  update the timestamps for these runs.
-    _update_timestamps4inbox()
+    _update_timestamps()
 
     # Transform a list of lists into a list of hashes, which map to db rows.
     mlwh_data4insert = []
@@ -890,7 +922,7 @@ def load_data4well_retrieval(
             "instrument_type": record[12],
             "plate_number": record[13],
         }
-        # Add QC state for one runs.
+        # Add QC state for one run.
         if (data["pac_bio_run_name"] == "TRACTION_RUN_4") and (
             data["well_label"] in ("A1", "B1")
         ):
@@ -956,31 +988,35 @@ def _get_dict_of_dict_rows(qcdb_test_session):
     }
 
 
-def _update_timestamps4inbox():
+def _update_timestamps():
 
     # Designated inbox wells:
     # TRACTION_RUN_3 - A1, B1,
     # TRACTION_RUN_4 - C1, D1,
     # TRACTION_RUN_10 - A1, B1, C1
     # TRACTION_RUN_12 - A1
-
+    #
     # These wells do not have a record in a fixture for the LangQC database,
     # values for their run status, ccs_execution_mode, polymerase_num_reads,
     # hifi_num_reads are set in a way that makes them eligible for the QC
     # inbox. Here we make sure that these wells have recent (ie within 4 weeks)
     # completion dates.
-    # We also update dates for TRACTION_RUN_1, which does have wells in QC.
+
+    # We also update dates for TRACTION_RUN_1, which does have wells in QC,
+    # and TRACTION_RUN_6, which partially fits into the upcoming status.
 
     # Find the earliest date in the set.
-    inbox_runs = [f"TRACTION_RUN_{run}" for run in (1, 3, 4, 10, 12)]
+    runs = [f"TRACTION_RUN_{run}" for run in (1, 3, 4, 6, 10, 12)]
     date_tuples = [
         (record[2], record[3], record[4], record[5])
         for record in MLWH_DATA
-        if record[0] in inbox_runs
+        if record[0] in runs
     ]
     dates = []
     for dt in date_tuples:
-        dates.extend([datetime.strptime(date, DATE_FORMAT) for date in dt])
+        dates.extend(
+            [datetime.strptime(date, DATE_FORMAT) for date in dt if date is not None]
+        )
     old_earliest = min(dates)
     # Find the date 26 days from today.
     new_earliest = date.today() - timedelta(days=26)
@@ -989,9 +1025,10 @@ def _update_timestamps4inbox():
         datetime(new_earliest.year, new_earliest.month, new_earliest.day) - old_earliest
     )
     delta_plus = timedelta(delta.days)
-    # Amend all dates for the inbox data by adding delta.
+    # Amend all dates by adding delta.
     for index, record in enumerate(MLWH_DATA):
-        if record[0] in inbox_runs:
+        if record[0] in runs:
             for i in (2, 3, 4, 5):
-                time = datetime.strptime(record[i], DATE_FORMAT) + delta_plus
-                MLWH_DATA[index][i] = time.strftime(DATE_FORMAT)
+                if record[i] is not None:
+                    time = datetime.strptime(record[i], DATE_FORMAT) + delta_plus
+                    MLWH_DATA[index][i] = time.strftime(DATE_FORMAT)
diff --git a/tests/test_pb_wells_factory.py b/tests/test_pb_wells_factory.py
@@ -174,12 +174,14 @@ def test_paged_retrieval_for_statuses(
         QcFlowStatusEnum.IN_PROGRESS.name: 11,
         QcFlowStatusEnum.ON_HOLD.name: 2,
         QcFlowStatusEnum.QC_COMPLETE.name: 4,
+        QcFlowStatusEnum.UPCOMING.name: 4,
     }
 
     for status in [
         QcFlowStatusEnum.IN_PROGRESS,
         QcFlowStatusEnum.ON_HOLD,
         QcFlowStatusEnum.QC_COMPLETE,
+        QcFlowStatusEnum.UPCOMING,
     ]:
 
         factory = PacBioPagedWellsFactory(