Skip to content

Commit

Permalink
Merge pull request #228 from wtsi-npg/devel
Browse files Browse the repository at this point in the history
Release 2.2.0
  • Loading branch information
nerdstrike authored Jun 11, 2024
2 parents e6e9dff + 9d68ec0 commit 828ce4d
Show file tree
Hide file tree
Showing 9 changed files with 277 additions and 13 deletions.
12 changes: 11 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,16 @@ and this project adheres to [Semantic Versioning](http://semver.org/).

## [Unreleased]

## [2.2.0] - 2024-06-11

### Added

* New endpoint added to support potential email notifications when QC states are finalised. `/products/qc?weeks={n}&seq_level={bool}&final={bool}`. It returns recent QC events

### Fixed

* Warehouse schema updated to match breaking change in pac_bio_product_metrics table

## [2.1.0] - 2024-04-15

### Changed
Expand Down Expand Up @@ -77,7 +87,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
* The client side JavaScript dependency, element-plus, is pinned
to version 2.4.4. In mid-January 2024 this was the highest version
that worked with our code. The version expression we had "^2.3.7"
allowed for fetching the latest available version of this library.
allowed for fetching the latest available version of this library.

## [1.5.0]

Expand Down
2 changes: 1 addition & 1 deletion frontend/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "npg-longue-vue",
"version": "2.1.0",
"version": "2.2.0",
"description": "UI for LangQC",
"author": "Kieron Taylor <[email protected]>",
"license": "GPL-3.0-or-later",
Expand Down
2 changes: 1 addition & 1 deletion lang_qc/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "2.1.0"
__version__ = "2.2.0"
58 changes: 57 additions & 1 deletion lang_qc/db/helper/qc.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
# this program. If not, see <http://www.gnu.org/licenses/>.

from collections import defaultdict
from datetime import datetime
from datetime import date, datetime, timedelta

from sqlalchemy import and_, func, select
from sqlalchemy.exc import NoResultFound
Expand Down Expand Up @@ -105,6 +105,62 @@ def get_qc_states_by_id_product_list(
return dict(response)


def get_qc_states(
session: Session,
num_weeks: int,
sequencing_outcomes_only: bool = False,
final_only: bool = False,
) -> dict[ChecksumSHA256, list[QcState]]:
"""
Returns a dictionary where keys are the product IDs, and the values are
lists of QcState records of any type for the same product.
The num_weeks argument limits the look-back time window.
If only sequencing type QC states are required, an optional
argument, sequencing_outcomes_only, should be set to True.
In this case it is guaranteed that the list of QcState objects
has only one member.
If only final QC states are required, an optional argument final_only
should be set to True.
"""

if num_weeks < 1:
raise ValueError("num_weeks should be a positive number")

query = (
select(QcStateDb)
.join(QcStateDb.seq_product)
.join(QcType)
.join(QcStateDict)
.join(User)
.where(QcStateDb.date_updated > date.today() - timedelta(weeks=num_weeks))
.options(
selectinload(QcStateDb.seq_product),
selectinload(QcStateDb.qc_type),
selectinload(QcStateDb.user),
selectinload(QcStateDb.qc_state_dict),
)
)
if sequencing_outcomes_only is True:
query = query.where(QcType.qc_type == SEQUENCING_QC_TYPE)
if final_only is True:
query = query.where(QcStateDb.is_preliminary == 0)

qc_states_dict = dict()
for qc_state in [
QcState.from_orm(row) for row in session.execute(query).scalars().all()
]:
id = qc_state.id_product
if id in qc_states_dict:
qc_states_dict[id].append(qc_state)
else:
qc_states_dict[id] = [qc_state]

return qc_states_dict


def product_has_qc_state(
session: Session, id_product: ChecksumSHA256, qc_type: str = None
) -> bool:
Expand Down
5 changes: 0 additions & 5 deletions lang_qc/db/mlwh_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -614,11 +614,6 @@ class PacBioProductMetrics(Base):
nullable=True,
comment="The mean barcode HiFi quality score",
)
hifi_read_quality_mean = Column(
mysqlINTEGER(unsigned=True),
nullable=True,
comment="The mean HiFi base quality",
)
hifi_bases_percent = Column(
mysqlFLOAT(),
nullable=True,
Expand Down
44 changes: 42 additions & 2 deletions lang_qc/endpoints/product.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,19 @@
# You should have received a copy of the GNU General Public License along with
# this program. If not, see <http://www.gnu.org/licenses/>.

from fastapi import APIRouter, Depends
from typing import Annotated

from fastapi import APIRouter, Depends, Query
from sqlalchemy.orm import Session
from starlette import status

from lang_qc.db.helper.qc import get_qc_states_by_id_product_list
from lang_qc.db.helper.qc import get_qc_states, get_qc_states_by_id_product_list
from lang_qc.db.qc_connection import get_qc_db
from lang_qc.models.qc_state import QcState
from lang_qc.util.type_checksum import ChecksumSHA256

RECENTLY_QCED_NUM_WEEKS = 4

router = APIRouter(
prefix="/products",
tags=["product"],
Expand Down Expand Up @@ -62,3 +66,39 @@ def bulk_qc_fetch(
):

return get_qc_states_by_id_product_list(session=qcdb_session, ids=request_body)


@router.get(
"/qc",
summary="Returns a dictionary of QC states",
description="""
The response is a dictionary of lists of QcState models hashed on product IDs.
Multiple QC states for the same product might be returned if the query is not
constrained to a single QC type.
Query parameters constrain the semantics of the response.
`weeks` - number of weeks to look back, defaults to four.
`seq_level` - a boolean option. If `True`, only `sequencing` type QC states
are returned. If `False` (the default), all types of QC states are
returned.
`final` - a boolean option. If `True`, only final QC states are returned.
If `False` (the default), both final and preliminary QC states are
returned.
""",
responses={
status.HTTP_422_UNPROCESSABLE_ENTITY: {"description": "Invalid number of weeks"}
},
response_model=dict[ChecksumSHA256, list[QcState]],
)
def qc_fetch(
weeks: Annotated[int, Query(gt=0)] = RECENTLY_QCED_NUM_WEEKS,
seq_level: bool = False,
final: bool = False,
qcdb_session: Session = Depends(get_qc_db),
) -> dict[ChecksumSHA256, list[QcState]]:
return get_qc_states(
session=qcdb_session,
num_weeks=weeks,
sequencing_outcomes_only=seq_level,
final_only=final,
)
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name = "npg_langqc"
packages = [
{ include = "lang_qc" },
]
version = "2.1.0"
version = "2.2.0"
description = "FastAPI application for Long Read QC"
authors = ["Adam Blanchet", "Marina Gourtovaia <[email protected]>", "Kieron Taylor <[email protected]>"]
license = "GPL-3.0-or-later"
Expand Down
52 changes: 52 additions & 0 deletions tests/endpoints/test_dump_qc_states.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
from datetime import datetime

import pytest
from fastapi.testclient import TestClient

from tests.fixtures.well_data import load_data4well_retrieval, load_dicts_and_users
Expand Down Expand Up @@ -57,3 +60,52 @@ def test_get_qc_by_product_id(test_client: TestClient, load_data4well_retrieval)
assert len(response_data) == 1
assert MISSING_CHECKSUM not in response_data
assert FIRST_GOOD_CHECKSUM in response_data


def test_get_qc(test_client: TestClient, load_data4well_retrieval):

response = test_client.get("/products/qc")
assert response.status_code == 200
response_data = response.json()
assert len(response_data) == 0

response = test_client.get("/products/qc?weeks=-1")
assert response.status_code == 422

# Earliest test QC states are updated on 2022-02-15
interval = datetime.today() - datetime(year=2022, month=2, day=15)
num_weeks = int(interval.days / 7 + 2)

response = test_client.get(f"/products/qc?weeks={num_weeks}")
assert response.status_code == 200
response_data = response.json()
assert len(response_data) == 18
assert sum([len(l) for l in response_data.values()]) == 34

response = test_client.get(
f"/products/qc?weeks={num_weeks}&final=false&seq_level=no"
)
assert response.status_code == 200
response_data = response.json()
assert len(response_data) == 18
assert sum([len(l) for l in response_data.values()]) == 34

response = test_client.get(f"/products/qc?weeks={num_weeks}&final=true")
assert response.status_code == 200
response_data = response.json()
assert len(response_data) == 4
assert sum([len(l) for l in response_data.values()]) == 8

response = test_client.get(
f"/products/qc?weeks={num_weeks}&final=True&seq_level=yes"
)
assert response.status_code == 200
response_data = response.json()
assert len(response_data) == 4
assert sum([len(l) for l in response_data.values()]) == 4
product_id = "5e91b9246b30c2df4e9f2a2313ce097e93493b0a822e9d9338e32df5d58db585"
assert product_id in response_data
qc_state = response_data[product_id][0]
assert qc_state["id_product"] == product_id
assert qc_state["is_preliminary"] is False
assert qc_state["qc_type"] == "sequencing"
113 changes: 112 additions & 1 deletion tests/test_qc_state_retrieval.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
from datetime import datetime, timedelta

import pytest
from sqlalchemy import select

from lang_qc.db.helper.qc import (
get_qc_state_for_product,
get_qc_states,
get_qc_states_by_id_product_list,
product_has_qc_state,
products_have_qc_state,
qc_state_dict,
)
from lang_qc.db.qc_schema import QcState
from lang_qc.models.qc_state import QcState as QcStateModel
from tests.fixtures.well_data import load_data4well_retrieval, load_dicts_and_users

MISSING_CHECKSUM = "A" * 64
Expand All @@ -24,7 +30,7 @@
two_good_ids_list = [FIRST_GOOD_CHECKSUM, SECOND_GOOD_CHECKSUM]


def test_bulk_retrieval(qcdb_test_session, load_data4well_retrieval):
def test_bulk_retrieval_by_id(qcdb_test_session, load_data4well_retrieval):

# The test below demonstrates that no run-time type checking of
# product IDs is performed.
Expand Down Expand Up @@ -66,6 +72,111 @@ def test_bulk_retrieval(qcdb_test_session, load_data4well_retrieval):
assert MISSING_CHECKSUM not in qc_states


def test_bulk_retrieval(qcdb_test_session, load_data4well_retrieval):

with pytest.raises(ValueError, match=r"num_weeks should be a positive number"):
assert get_qc_states(qcdb_test_session, num_weeks=-1)

qc_states = (
qcdb_test_session.execute(select(QcState).order_by(QcState.date_updated.desc()))
.scalars()
.all()
)
now = datetime.today()
max_interval = now - qc_states[-1].date_updated
max_num_weeks = int(max_interval.days / 7 + 1)
min_interval = now - qc_states[0].date_updated
min_num_weeks = int(min_interval.days / 7 - 1)

assert min_num_weeks > 2
# Set the look-back number of weeks to the period with no records.
qc_states_dict = get_qc_states(qcdb_test_session, num_weeks=(min_num_weeks - 1))
assert len(qc_states_dict) == 0

# Retrieve all available QC states.
qc_states_dict = get_qc_states(qcdb_test_session, num_weeks=max_num_weeks)
# Test total number of QcState objects.
assert sum([len(l) for l in qc_states_dict.values()]) == len(qc_states)
# Test number of items in the dictionary.
assert len(qc_states_dict) == len(
{qc_state.id_seq_product: 1 for qc_state in qc_states}
)

# Retrieve all available final QC states.
qc_states_dict = get_qc_states(
qcdb_test_session, num_weeks=max_num_weeks, final_only=True
)
assert sum([len(l) for l in qc_states_dict.values()]) == len(
[qc_state for qc_state in qc_states if qc_state.is_preliminary == 0]
)
assert {id: len(l) for (id, l) in qc_states_dict.items()} == {
"e47765a207c810c2c281d5847e18c3015f3753b18bd92e8a2bea1219ba3127ea": 2,
"977089cd272dffa70c808d74159981c0d1363840875452a868a4c5e15f1b2072": 2,
"dc99ab8cb6762df5c935adaeb1f0c49ff34af96b6fa3ebf9a90443079c389579": 2,
"5e91b9246b30c2df4e9f2a2313ce097e93493b0a822e9d9338e32df5d58db585": 2,
}

# Retrieve all available sequencing type QC states.
qc_states_dict = get_qc_states(
qcdb_test_session, num_weeks=max_num_weeks, sequencing_outcomes_only=True
)
assert len(qc_states_dict) == len(
[qc_state for qc_state in qc_states if qc_state.qc_type.qc_type == "sequencing"]
)

# Retrieve all available sequencing type final QC states.
qc_states_dict = get_qc_states(
qcdb_test_session,
num_weeks=max_num_weeks,
final_only=True,
sequencing_outcomes_only=True,
)
assert len(qc_states_dict) == len(
[
qc_state
for qc_state in qc_states
if (
qc_state.is_preliminary == 0
and qc_state.qc_type.qc_type == "sequencing"
)
]
)
assert {id: len(l) for (id, l) in qc_states_dict.items()} == {
"e47765a207c810c2c281d5847e18c3015f3753b18bd92e8a2bea1219ba3127ea": 1,
"977089cd272dffa70c808d74159981c0d1363840875452a868a4c5e15f1b2072": 1,
"dc99ab8cb6762df5c935adaeb1f0c49ff34af96b6fa3ebf9a90443079c389579": 1,
"5e91b9246b30c2df4e9f2a2313ce097e93493b0a822e9d9338e32df5d58db585": 1,
}

# Retrieve recent sequencing type final QC states.
num_weeks = max_num_weeks - 44
qc_states_dict = get_qc_states(
qcdb_test_session,
num_weeks=num_weeks,
final_only=True,
sequencing_outcomes_only=True,
)
earliest_time = now - timedelta(weeks=num_weeks)
assert len(qc_states_dict) == len(
[
qc_state
for qc_state in qc_states
if (
qc_state.date_updated > earliest_time
and qc_state.is_preliminary == 0
and qc_state.qc_type.qc_type == "sequencing"
)
]
)
product_id = "5e91b9246b30c2df4e9f2a2313ce097e93493b0a822e9d9338e32df5d58db585"
assert {id: len(l) for (id, l) in qc_states_dict.items()} == {product_id: 1}
qc_state = qc_states_dict[product_id][0]
assert isinstance(qc_state, QcStateModel)
assert qc_state.id_product == product_id
assert qc_state.is_preliminary is False
assert qc_state.qc_type == "sequencing"


def test_product_existence(qcdb_test_session, load_data4well_retrieval):

assert product_has_qc_state(qcdb_test_session, MISSING_CHECKSUM) is False
Expand Down

0 comments on commit 828ce4d

Please sign in to comment.