Skip to content

Commit

Permalink
Merge pull request #186 from mgcam/upcoming_tab
Browse files Browse the repository at this point in the history
Upcoming tab for incomplete runs
  • Loading branch information
nerdstrike authored Oct 3, 2023
2 parents d6bdd2b + 633cc90 commit bcc69fd
Show file tree
Hide file tree
Showing 7 changed files with 146 additions and 16 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ and this project adheres to [Semantic Versioning](http://semver.org/).

## [Unreleased]

### Added

* Back-end code for the 'Upcoming' tab. The 'Upcoming' tab is
automatically appended to the collection of the UI tabs for
filtering wells.

### Changed

* Increased the look-back period for the inbox query from 4 weeks to
Expand Down
68 changes: 65 additions & 3 deletions lang_qc/db/helper/wells.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,10 @@
from sqlalchemy import and_, or_, select
from sqlalchemy.orm import Session

from lang_qc.db.helper.qc import get_qc_states_by_id_product_list
from lang_qc.db.helper.qc import (
get_qc_states_by_id_product_list,
qc_state_for_product_exists,
)
from lang_qc.db.mlwh_schema import PacBioRunWellMetrics
from lang_qc.db.qc_schema import QcState, QcStateDict, QcType
from lang_qc.models.pacbio.well import PacBioPagedWells, PacBioWell
Expand All @@ -42,6 +45,8 @@
Here this type is used to mark a purely internal to the class variables.
"""

INBOX_LOOK_BACK_NUM_WEEKS = 12


class WellWh(BaseModel):
"""
Expand All @@ -54,7 +59,6 @@ class WellWh(BaseModel):
title="SQLAlchemy Session",
description="A SQLAlchemy Session for the ml warehouse database",
)
INBOX_LOOK_BACK_NUM_WEEKS: ClassVar = 12

class Config:
allow_mutation = False
Expand All @@ -78,6 +82,8 @@ def get_mlwh_well_by_product_id(
def recent_completed_wells(self) -> List[PacBioRunWellMetrics]:
"""
Get recent not QC-ed completed wells from the mlwh database.
Recent wells are defined as wells that completed within the
last 12 weeks.
"""

######
Expand All @@ -92,7 +98,7 @@ def recent_completed_wells(self) -> List[PacBioRunWellMetrics]:
# Using current local time.
# Generating a date rather than a timestamp here in order to have a consistent
# earliest date for the look-back period during the QC team's working day.
my_date = date.today() - timedelta(weeks=self.INBOX_LOOK_BACK_NUM_WEEKS)
my_date = date.today() - timedelta(weeks=INBOX_LOOK_BACK_NUM_WEEKS)
look_back_min_date = datetime(my_date.year, my_date.month, my_date.day)

# Select the wells that has not been QC-ed, but later double-check against
Expand Down Expand Up @@ -213,6 +219,8 @@ def create_for_qc_status(
QcFlowStatusEnum.UNKNOWN,
]:
wells = self._aborted_and_unknown_wells(qc_flow_status)
elif qc_flow_status == QcFlowStatusEnum.UPCOMING:
wells = self._upcoming_wells()
else:
wells = self._get_wells_for_status(qc_flow_status)

Expand Down Expand Up @@ -322,6 +330,60 @@ def _add_tracking_info(self, wells: List[PacBioWell]):
else:
well.copy_run_tracking_info(db_well)

def _upcoming_wells(self):
"""
Upcoming wells are recent wells, which do not belong to any other
QC flow statuses as defined in QcFlowStatus. Recent wells are defined
as wells that belong to runs that started within the last 12 weeks.
"""

recent_completed_product_ids = [
w.id_pac_bio_product for w in self.recent_completed_wells()
]

my_date = date.today() - timedelta(weeks=INBOX_LOOK_BACK_NUM_WEEKS)
look_back_min_date = datetime(my_date.year, my_date.month, my_date.day)

# If queries for any other filters change, this query should be revised
# since we are repeating (but negating) a few condition that are
# associated with some of the statuses (filters).

query = (
select(PacBioRunWellMetrics)
.where(PacBioRunWellMetrics.run_start > look_back_min_date)
.where(PacBioRunWellMetrics.qc_seq_state.is_(None))
.where(
PacBioRunWellMetrics.id_pac_bio_product.not_in(
recent_completed_product_ids
)
)
.where(PacBioRunWellMetrics.well_status.not_like("Abort%"))
.where(PacBioRunWellMetrics.well_status.not_like("Terminat%"))
.where(PacBioRunWellMetrics.well_status.not_like("Fail%"))
.where(PacBioRunWellMetrics.well_status.not_like("Error%"))
.where(PacBioRunWellMetrics.well_status.not_in(["Unknown", "On hold"]))
.order_by(
PacBioRunWellMetrics.run_start,
PacBioRunWellMetrics.pac_bio_run_name,
PacBioRunWellMetrics.plate_number,
PacBioRunWellMetrics.well_label,
)
)

wells = []
for w in self.session.execute(query).scalars().all():
if (
qc_state_for_product_exists(
session=self.qcdb_session, id_product=w.id_pac_bio_product
)
is False
):
wells.append(w)

self.total_number_of_items = len(wells) # Save the number of retrieved wells.

return self._well_models(self.slice_data(wells), False)

def _recent_inbox_wells(self, recent_wells):

inbox_wells_indexes = []
Expand Down
8 changes: 7 additions & 1 deletion lang_qc/models/qc_flow_status.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,13 @@ class QcFlowStatus(BaseModel):
@unique
class QcFlowStatusEnum(str, Enum):
"""
An enumeration of known QC flow states. The order of the statuses is
An enumeration of known QC flow statuses. The order of the statuses is
consistent with the temporal flow of the manual QC process.
Logically the upcoming status should be in the beginning. In order
to keep the order of tab consistent with early versions and to separate
this status from more relevant to teh QC process statuses, this status
is placed at the end.
"""

INBOX = "inbox"
Expand All @@ -53,6 +58,7 @@ class QcFlowStatusEnum(str, Enum):
QC_COMPLETE = "qc_complete"
ABORTED = "aborted"
UNKNOWN = "unknown"
UPCOMING = "upcoming"

@classmethod
def qc_flow_statuses(cls) -> "List[QcFlowStatus]":
Expand Down
1 change: 1 addition & 0 deletions tests/endpoints/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ def test_get_config(test_client: TestClient, load_dicts_and_users):
{"label": "QC Complete", "param": "qc_complete"},
{"label": "Aborted", "param": "aborted"},
{"label": "Unknown", "param": "unknown"},
{"label": "Upcoming", "param": "upcoming"},
],
"qc_states": [
{"description": "Passed", "only_prelim": False},
Expand Down
16 changes: 16 additions & 0 deletions tests/endpoints/test_filtered_wells.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,22 @@ def test_aborted_filter(test_client: TestClient, load_data4well_retrieval):
_assert_filtered_results(response, [], 10, 100, num_total)


def test_upcoming_filter(test_client: TestClient, load_data4well_retrieval):
"""Test passing `upcoming` filter."""

expected_data = [
{"TRACTION_RUN_12:B1": None},
{"TRACTION_RUN_12:C1": None},
{"TRACTION_RUN_6:A1": None},
{"TRACTION_RUN_6:B1": None},
]
num_total = len(expected_data)
response = test_client.get(
"/pacbio/wells?qc_status=upcoming&page_size=10&page_number=1"
)
_assert_filtered_results(response, expected_data, 10, 1, num_total)


def _assert_filtered_results(
response, expected_data, page_size, page_number, total_number
):
Expand Down
61 changes: 49 additions & 12 deletions tests/fixtures/well_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -440,6 +440,38 @@
"Revio",
1,
],
[
"TRACTION_RUN_6",
"A1",
"2022-12-12 15:47:25",
"2022-12-19 16:43:31",
"2022-12-12 15:57:31",
"2022-12-14 06:42:33",
"Running",
"Running",
"OnInstrument",
None,
None,
"1234",
"Revio",
2,
],
[
"TRACTION_RUN_6",
"B1",
"2022-12-12 15:47:25",
"2022-12-19 16:43:31",
"2022-12-13 20:52:47",
"2022-12-15 10:37:35",
"Running",
"Running",
"OnInstrument",
None,
None,
"1234",
"Revio",
2,
],
[
"TRACTION_RUN_7",
"A1",
Expand Down Expand Up @@ -866,7 +898,7 @@ def load_data4well_retrieval(
# We want some wells to be in the inbox. For that their run_complete dates
# should be within, for example, last four weeks. Therefore, we need to
# update the timestamps for these runs.
_update_timestamps4inbox()
_update_timestamps()

# Transform a list of lists into a list of hashes, which map to db rows.
mlwh_data4insert = []
Expand All @@ -890,7 +922,7 @@ def load_data4well_retrieval(
"instrument_type": record[12],
"plate_number": record[13],
}
# Add QC state for one runs.
# Add QC state for one run.
if (data["pac_bio_run_name"] == "TRACTION_RUN_4") and (
data["well_label"] in ("A1", "B1")
):
Expand Down Expand Up @@ -956,31 +988,35 @@ def _get_dict_of_dict_rows(qcdb_test_session):
}


def _update_timestamps4inbox():
def _update_timestamps():

# Designated inbox wells:
# TRACTION_RUN_3 - A1, B1,
# TRACTION_RUN_4 - C1, D1,
# TRACTION_RUN_10 - A1, B1, C1
# TRACTION_RUN_12 - A1

#
# These wells do not have a record in a fixture for the LangQC database,
# values for their run status, ccs_execution_mode, polymerase_num_reads,
# hifi_num_reads are set in a way that makes them eligible for the QC
# inbox. Here we make sure that these wells have recent (ie within 4 weeks)
# completion dates.
# We also update dates for TRACTION_RUN_1, which does have wells in QC.

# We also update dates for TRACTION_RUN_1, which does have wells in QC,
# and TRACTION_RUN_6, which partially fits into the upcoming status.

# Find the earliest date in the set.
inbox_runs = [f"TRACTION_RUN_{run}" for run in (1, 3, 4, 10, 12)]
runs = [f"TRACTION_RUN_{run}" for run in (1, 3, 4, 6, 10, 12)]
date_tuples = [
(record[2], record[3], record[4], record[5])
for record in MLWH_DATA
if record[0] in inbox_runs
if record[0] in runs
]
dates = []
for dt in date_tuples:
dates.extend([datetime.strptime(date, DATE_FORMAT) for date in dt])
dates.extend(
[datetime.strptime(date, DATE_FORMAT) for date in dt if date is not None]
)
old_earliest = min(dates)
# Find the date 26 days from today.
new_earliest = date.today() - timedelta(days=26)
Expand All @@ -989,9 +1025,10 @@ def _update_timestamps4inbox():
datetime(new_earliest.year, new_earliest.month, new_earliest.day) - old_earliest
)
delta_plus = timedelta(delta.days)
# Amend all dates for the inbox data by adding delta.
# Amend all dates by adding delta.
for index, record in enumerate(MLWH_DATA):
if record[0] in inbox_runs:
if record[0] in runs:
for i in (2, 3, 4, 5):
time = datetime.strptime(record[i], DATE_FORMAT) + delta_plus
MLWH_DATA[index][i] = time.strftime(DATE_FORMAT)
if record[i] is not None:
time = datetime.strptime(record[i], DATE_FORMAT) + delta_plus
MLWH_DATA[index][i] = time.strftime(DATE_FORMAT)
2 changes: 2 additions & 0 deletions tests/test_pb_wells_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,12 +174,14 @@ def test_paged_retrieval_for_statuses(
QcFlowStatusEnum.IN_PROGRESS.name: 11,
QcFlowStatusEnum.ON_HOLD.name: 2,
QcFlowStatusEnum.QC_COMPLETE.name: 4,
QcFlowStatusEnum.UPCOMING.name: 4,
}

for status in [
QcFlowStatusEnum.IN_PROGRESS,
QcFlowStatusEnum.ON_HOLD,
QcFlowStatusEnum.QC_COMPLETE,
QcFlowStatusEnum.UPCOMING,
]:

factory = PacBioPagedWellsFactory(
Expand Down

0 comments on commit bcc69fd

Please sign in to comment.