From 8b1895b5b2c117ae29f48d0de14df4b862cb0915 Mon Sep 17 00:00:00 2001 From: mgcam Date: Thu, 23 May 2024 15:59:41 +0100 Subject: [PATCH 1/2] Aded an endpoint for retrieving recent Qc states. --- lang_qc/db/helper/qc.py | 58 ++++++++++++- lang_qc/endpoints/product.py | 44 +++++++++- tests/endpoints/test_dump_qc_states.py | 52 ++++++++++++ tests/test_qc_state_retrieval.py | 113 ++++++++++++++++++++++++- 4 files changed, 263 insertions(+), 4 deletions(-) diff --git a/lang_qc/db/helper/qc.py b/lang_qc/db/helper/qc.py index fd635ce0..4ddd3290 100644 --- a/lang_qc/db/helper/qc.py +++ b/lang_qc/db/helper/qc.py @@ -20,7 +20,7 @@ # this program. If not, see . from collections import defaultdict -from datetime import datetime +from datetime import date, datetime, timedelta from sqlalchemy import and_, func, select from sqlalchemy.exc import NoResultFound @@ -105,6 +105,62 @@ def get_qc_states_by_id_product_list( return dict(response) +def get_qc_states( + session: Session, + num_weeks: int, + sequencing_outcomes_only: bool = False, + final_only: bool = False, +) -> dict[ChecksumSHA256, list[QcState]]: + """ + Returns a dictionary where keys are the product IDs, and the values are + lists of QcState records of any type for the same product. + + The num_weeks argument limits the look-back time window. + + If only sequencing type QC states are required, an optional + argument, sequencing_outcomes_only, should be set to True. + In this case it is guaranteed that the list of QcState objects + has only one member. + + If only final QC states are required, an optional argument final_only + should be set to True. + """ + + if num_weeks < 1: + raise ValueError("num_weeks should be a positive number") + + query = ( + select(QcStateDb) + .join(QcStateDb.seq_product) + .join(QcType) + .join(QcStateDict) + .join(User) + .where(QcStateDb.date_updated > date.today() - timedelta(weeks=num_weeks)) + .options( + selectinload(QcStateDb.seq_product), + selectinload(QcStateDb.qc_type), + selectinload(QcStateDb.user), + selectinload(QcStateDb.qc_state_dict), + ) + ) + if sequencing_outcomes_only is True: + query = query.where(QcType.qc_type == SEQUENCING_QC_TYPE) + if final_only is True: + query = query.where(QcStateDb.is_preliminary == 0) + + qc_states_dict = dict() + for qc_state in [ + QcState.from_orm(row) for row in session.execute(query).scalars().all() + ]: + id = qc_state.id_product + if id in qc_states_dict: + qc_states_dict[id].append(qc_state) + else: + qc_states_dict[id] = [qc_state] + + return qc_states_dict + + def product_has_qc_state( session: Session, id_product: ChecksumSHA256, qc_type: str = None ) -> bool: diff --git a/lang_qc/endpoints/product.py b/lang_qc/endpoints/product.py index d7805552..4abcb5a5 100644 --- a/lang_qc/endpoints/product.py +++ b/lang_qc/endpoints/product.py @@ -18,15 +18,19 @@ # You should have received a copy of the GNU General Public License along with # this program. If not, see . -from fastapi import APIRouter, Depends +from typing import Annotated + +from fastapi import APIRouter, Depends, Query from sqlalchemy.orm import Session from starlette import status -from lang_qc.db.helper.qc import get_qc_states_by_id_product_list +from lang_qc.db.helper.qc import get_qc_states, get_qc_states_by_id_product_list from lang_qc.db.qc_connection import get_qc_db from lang_qc.models.qc_state import QcState from lang_qc.util.type_checksum import ChecksumSHA256 +RECENTLY_QCED_NUM_WEEKS = 4 + router = APIRouter( prefix="/products", tags=["product"], @@ -62,3 +66,39 @@ def bulk_qc_fetch( ): return get_qc_states_by_id_product_list(session=qcdb_session, ids=request_body) + + +@router.get( + "/qc", + summary="Returns a dictionary of QC states", + description=""" + The response is a dictionary of lists of QcState models hashed on product IDs. + Multiple QC states for the same product might be returned if the query is not + constrained to a single QC type. + + Query parameters constrain the semantics of the response. + `weeks` - number of weeks to look back, defaults to four. + `seq_level` - a boolean option. If `True`, only `sequencing` type QC states + are returned. If `False` (the default), all types of QC states are + returned. + `final` - a boolean option. If `True`, only final QC states are returned. + If `False` (the default), both final and preliminary QC states are + returned. + """, + responses={ + status.HTTP_422_UNPROCESSABLE_ENTITY: {"description": "Invalid number of weeks"} + }, + response_model=dict[ChecksumSHA256, list[QcState]], +) +def qc_fetch( + weeks: Annotated[int, Query(gt=0)] = RECENTLY_QCED_NUM_WEEKS, + seq_level: bool = False, + final: bool = False, + qcdb_session: Session = Depends(get_qc_db), +) -> dict[ChecksumSHA256, list[QcState]]: + return get_qc_states( + session=qcdb_session, + num_weeks=weeks, + sequencing_outcomes_only=seq_level, + final_only=final, + ) diff --git a/tests/endpoints/test_dump_qc_states.py b/tests/endpoints/test_dump_qc_states.py index 49247ee0..6fef2d50 100644 --- a/tests/endpoints/test_dump_qc_states.py +++ b/tests/endpoints/test_dump_qc_states.py @@ -1,3 +1,6 @@ +from datetime import datetime + +import pytest from fastapi.testclient import TestClient from tests.fixtures.well_data import load_data4well_retrieval, load_dicts_and_users @@ -57,3 +60,52 @@ def test_get_qc_by_product_id(test_client: TestClient, load_data4well_retrieval) assert len(response_data) == 1 assert MISSING_CHECKSUM not in response_data assert FIRST_GOOD_CHECKSUM in response_data + + +def test_get_qc(test_client: TestClient, load_data4well_retrieval): + + response = test_client.get("/products/qc") + assert response.status_code == 200 + response_data = response.json() + assert len(response_data) == 0 + + response = test_client.get("/products/qc?weeks=-1") + assert response.status_code == 422 + + # Earliest test QC states are updated on 2022-02-15 + interval = datetime.today() - datetime(year=2022, month=2, day=15) + num_weeks = int(interval.days / 7 + 2) + + response = test_client.get(f"/products/qc?weeks={num_weeks}") + assert response.status_code == 200 + response_data = response.json() + assert len(response_data) == 18 + assert sum([len(l) for (id, l) in response_data.items()]) == 34 + + response = test_client.get( + f"/products/qc?weeks={num_weeks}&final=false&seq_level=no" + ) + assert response.status_code == 200 + response_data = response.json() + assert len(response_data) == 18 + assert sum([len(l) for (id, l) in response_data.items()]) == 34 + + response = test_client.get(f"/products/qc?weeks={num_weeks}&final=true") + assert response.status_code == 200 + response_data = response.json() + assert len(response_data) == 4 + assert sum([len(l) for (id, l) in response_data.items()]) == 8 + + response = test_client.get( + f"/products/qc?weeks={num_weeks}&final=True&seq_level=yes" + ) + assert response.status_code == 200 + response_data = response.json() + assert len(response_data) == 4 + assert sum([len(l) for (id, l) in response_data.items()]) == 4 + product_id = "5e91b9246b30c2df4e9f2a2313ce097e93493b0a822e9d9338e32df5d58db585" + assert product_id in response_data + qc_state = response_data[product_id][0] + assert qc_state["id_product"] == product_id + assert qc_state["is_preliminary"] is False + assert qc_state["qc_type"] == "sequencing" diff --git a/tests/test_qc_state_retrieval.py b/tests/test_qc_state_retrieval.py index 6d5813e4..505d80b2 100644 --- a/tests/test_qc_state_retrieval.py +++ b/tests/test_qc_state_retrieval.py @@ -1,12 +1,18 @@ +from datetime import datetime, timedelta + import pytest +from sqlalchemy import select from lang_qc.db.helper.qc import ( get_qc_state_for_product, + get_qc_states, get_qc_states_by_id_product_list, product_has_qc_state, products_have_qc_state, qc_state_dict, ) +from lang_qc.db.qc_schema import QcState +from lang_qc.models.qc_state import QcState as QcStateModel from tests.fixtures.well_data import load_data4well_retrieval, load_dicts_and_users MISSING_CHECKSUM = "A" * 64 @@ -24,7 +30,7 @@ two_good_ids_list = [FIRST_GOOD_CHECKSUM, SECOND_GOOD_CHECKSUM] -def test_bulk_retrieval(qcdb_test_session, load_data4well_retrieval): +def test_bulk_retrieval_by_id(qcdb_test_session, load_data4well_retrieval): # The test below demonstrates that no run-time type checking of # product IDs is performed. @@ -66,6 +72,111 @@ def test_bulk_retrieval(qcdb_test_session, load_data4well_retrieval): assert MISSING_CHECKSUM not in qc_states +def test_bulk_retrieval(qcdb_test_session, load_data4well_retrieval): + + with pytest.raises(ValueError, match=r"num_weeks should be a positive number"): + assert get_qc_states(qcdb_test_session, num_weeks=-1) + + qc_states = ( + qcdb_test_session.execute(select(QcState).order_by(QcState.date_updated.desc())) + .scalars() + .all() + ) + now = datetime.today() + max_interval = now - qc_states[-1].date_updated + max_num_weeks = int(max_interval.days / 7 + 1) + min_interval = now - qc_states[0].date_updated + min_num_weeks = int(min_interval.days / 7 - 1) + + assert min_num_weeks > 2 + # Set the look-back number of weeks to teh period with no records. + qc_states_dict = get_qc_states(qcdb_test_session, num_weeks=(min_num_weeks - 1)) + assert len(qc_states_dict) == 0 + + # Retrieve all available QC states. + qc_states_dict = get_qc_states(qcdb_test_session, num_weeks=max_num_weeks) + # Test total number of QcState objects. + assert sum([len(l) for (id, l) in qc_states_dict.items()]) == len(qc_states) + # Test number of items in the dictionary. + assert len(qc_states_dict) == len( + {qc_state.id_seq_product: 1 for qc_state in qc_states} + ) + + # Retrieve all available final QC states. + qc_states_dict = get_qc_states( + qcdb_test_session, num_weeks=max_num_weeks, final_only=True + ) + assert sum([len(l) for (id, l) in qc_states_dict.items()]) == len( + [qc_state for qc_state in qc_states if qc_state.is_preliminary == 0] + ) + assert {id: len(l) for (id, l) in qc_states_dict.items()} == { + "e47765a207c810c2c281d5847e18c3015f3753b18bd92e8a2bea1219ba3127ea": 2, + "977089cd272dffa70c808d74159981c0d1363840875452a868a4c5e15f1b2072": 2, + "dc99ab8cb6762df5c935adaeb1f0c49ff34af96b6fa3ebf9a90443079c389579": 2, + "5e91b9246b30c2df4e9f2a2313ce097e93493b0a822e9d9338e32df5d58db585": 2, + } + + # Retrieve all available sequencing type QC states. + qc_states_dict = get_qc_states( + qcdb_test_session, num_weeks=max_num_weeks, sequencing_outcomes_only=True + ) + assert len(qc_states_dict) == len( + [qc_state for qc_state in qc_states if qc_state.qc_type.qc_type == "sequencing"] + ) + + # Retrieve all available sequencing type final QC states. + qc_states_dict = get_qc_states( + qcdb_test_session, + num_weeks=max_num_weeks, + final_only=True, + sequencing_outcomes_only=True, + ) + assert len(qc_states_dict) == len( + [ + qc_state + for qc_state in qc_states + if ( + qc_state.is_preliminary == 0 + and qc_state.qc_type.qc_type == "sequencing" + ) + ] + ) + assert {id: len(l) for (id, l) in qc_states_dict.items()} == { + "e47765a207c810c2c281d5847e18c3015f3753b18bd92e8a2bea1219ba3127ea": 1, + "977089cd272dffa70c808d74159981c0d1363840875452a868a4c5e15f1b2072": 1, + "dc99ab8cb6762df5c935adaeb1f0c49ff34af96b6fa3ebf9a90443079c389579": 1, + "5e91b9246b30c2df4e9f2a2313ce097e93493b0a822e9d9338e32df5d58db585": 1, + } + + # Retrieve recent sequencing type final QC states. + num_weeks = max_num_weeks - 44 + qc_states_dict = get_qc_states( + qcdb_test_session, + num_weeks=num_weeks, + final_only=True, + sequencing_outcomes_only=True, + ) + earliest_time = now - timedelta(weeks=num_weeks) + assert len(qc_states_dict) == len( + [ + qc_state + for qc_state in qc_states + if ( + qc_state.date_updated > earliest_time + and qc_state.is_preliminary == 0 + and qc_state.qc_type.qc_type == "sequencing" + ) + ] + ) + product_id = "5e91b9246b30c2df4e9f2a2313ce097e93493b0a822e9d9338e32df5d58db585" + assert {id: len(l) for (id, l) in qc_states_dict.items()} == {product_id: 1} + qc_state = qc_states_dict[product_id][0] + assert isinstance(qc_state, QcStateModel) + assert qc_state.id_product == product_id + assert qc_state.is_preliminary is False + assert qc_state.qc_type == "sequencing" + + def test_product_existence(qcdb_test_session, load_data4well_retrieval): assert product_has_qc_state(qcdb_test_session, MISSING_CHECKSUM) is False From a469b53b9f41ad4ffab66de7aaa878673de4f405 Mon Sep 17 00:00:00 2001 From: mgcam Date: Fri, 24 May 2024 17:33:31 +0100 Subject: [PATCH 2/2] Use dict. values() method where only values are used --- tests/endpoints/test_dump_qc_states.py | 8 ++++---- tests/test_qc_state_retrieval.py | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/endpoints/test_dump_qc_states.py b/tests/endpoints/test_dump_qc_states.py index 6fef2d50..d544a7b6 100644 --- a/tests/endpoints/test_dump_qc_states.py +++ b/tests/endpoints/test_dump_qc_states.py @@ -80,7 +80,7 @@ def test_get_qc(test_client: TestClient, load_data4well_retrieval): assert response.status_code == 200 response_data = response.json() assert len(response_data) == 18 - assert sum([len(l) for (id, l) in response_data.items()]) == 34 + assert sum([len(l) for l in response_data.values()]) == 34 response = test_client.get( f"/products/qc?weeks={num_weeks}&final=false&seq_level=no" @@ -88,13 +88,13 @@ def test_get_qc(test_client: TestClient, load_data4well_retrieval): assert response.status_code == 200 response_data = response.json() assert len(response_data) == 18 - assert sum([len(l) for (id, l) in response_data.items()]) == 34 + assert sum([len(l) for l in response_data.values()]) == 34 response = test_client.get(f"/products/qc?weeks={num_weeks}&final=true") assert response.status_code == 200 response_data = response.json() assert len(response_data) == 4 - assert sum([len(l) for (id, l) in response_data.items()]) == 8 + assert sum([len(l) for l in response_data.values()]) == 8 response = test_client.get( f"/products/qc?weeks={num_weeks}&final=True&seq_level=yes" @@ -102,7 +102,7 @@ def test_get_qc(test_client: TestClient, load_data4well_retrieval): assert response.status_code == 200 response_data = response.json() assert len(response_data) == 4 - assert sum([len(l) for (id, l) in response_data.items()]) == 4 + assert sum([len(l) for l in response_data.values()]) == 4 product_id = "5e91b9246b30c2df4e9f2a2313ce097e93493b0a822e9d9338e32df5d58db585" assert product_id in response_data qc_state = response_data[product_id][0] diff --git a/tests/test_qc_state_retrieval.py b/tests/test_qc_state_retrieval.py index 505d80b2..c572225c 100644 --- a/tests/test_qc_state_retrieval.py +++ b/tests/test_qc_state_retrieval.py @@ -89,14 +89,14 @@ def test_bulk_retrieval(qcdb_test_session, load_data4well_retrieval): min_num_weeks = int(min_interval.days / 7 - 1) assert min_num_weeks > 2 - # Set the look-back number of weeks to teh period with no records. + # Set the look-back number of weeks to the period with no records. qc_states_dict = get_qc_states(qcdb_test_session, num_weeks=(min_num_weeks - 1)) assert len(qc_states_dict) == 0 # Retrieve all available QC states. qc_states_dict = get_qc_states(qcdb_test_session, num_weeks=max_num_weeks) # Test total number of QcState objects. - assert sum([len(l) for (id, l) in qc_states_dict.items()]) == len(qc_states) + assert sum([len(l) for l in qc_states_dict.values()]) == len(qc_states) # Test number of items in the dictionary. assert len(qc_states_dict) == len( {qc_state.id_seq_product: 1 for qc_state in qc_states} @@ -106,7 +106,7 @@ def test_bulk_retrieval(qcdb_test_session, load_data4well_retrieval): qc_states_dict = get_qc_states( qcdb_test_session, num_weeks=max_num_weeks, final_only=True ) - assert sum([len(l) for (id, l) in qc_states_dict.items()]) == len( + assert sum([len(l) for l in qc_states_dict.values()]) == len( [qc_state for qc_state in qc_states if qc_state.is_preliminary == 0] ) assert {id: len(l) for (id, l) in qc_states_dict.items()} == {