Skip to content
This repository was archived by the owner on Dec 6, 2024. It is now read-only.

Updated deleted sample logic and fix sample type regex #152

Merged
merged 7 commits into from
Oct 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,18 @@ Changes in this log refer only to changes that make it to the 'main' branch. and

For changes in deployment, please see the [deployment changelog](deploy/cttso-ica-to-pieriandx-cdk/Changelog.md)


## 2023-10-18

> Author: Alexis Lucattini
> Email: [[email protected]](mailto:[email protected])

### Bugfixes

* Update sample type trimming of _sample suffix (https://github.com/umccr/cttso-ica-to-pieriandx/pull/150)
* Resolves https://github.com/umccr/cttso-ica-to-pieriandx/issues/149

## 2023-10-18

> Author: Alexis Lucattini
Expand Down
11 changes: 11 additions & 0 deletions deploy/cttso-ica-to-pieriandx-cdk/Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,17 @@ are nested under deploy/cttso-ica-to-pieriandx-cdk.
> Author: Alexis Lucattini
> Email: [[email protected]](mailto:[email protected])

### Bugfixes

* Dont resubmit deleted samples - also remove deleted samples from the lims sheet (https://github.com/umccr/cttso-ica-to-pieriandx/pull/151)
* Resolves https://github.com/umccr/cttso-ica-to-pieriandx/issues/146

## 2023-10-18

> Author: Alexis Lucattini
> Email: [[email protected]](mailto:[email protected])
### Enhancements

* Move to project owner / project name mapping logic (https://github.com/umccr/cttso-ica-to-pieriandx/pull/141)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,24 @@ def get_libraries_for_processing(merged_df) -> pd.DataFrame:
columns=processing_columns
)

# Check none of the processing df libraries are in the list of deleted lists
deleted_lims_df, deleted_lims_excel_row_mapping_number = get_deleted_lims_df()
process_row: pd.Series
already_deleted_list_index = []
for index, process_row in to_process_df.iterrows():
if not deleted_lims_df.query(
f"subject_id == '{process_row['subject_id']}' and "
f"library_id == '{process_row['library_id']}' and "
f"portal_wfr_id == '{process_row['portal_wfr_id']}'"
).shape[0] == 0:
already_deleted_list_index.append(index)
logger.warning(f"Already run and deleted this combination {process_row['subject_id']} / {process_row['library_id']} / {process_row['portal_wfr_id']}, not reprocessing")

# Delete via index
to_process_df = to_process_df.iloc[list(
set(to_process_df.index.tolist()) - set(already_deleted_list_index)
)]

# Update columns to strip glims_ attributes
new_column_names = [
"panel",
Expand Down Expand Up @@ -1861,7 +1879,7 @@ def bind_pieriandx_case_submission_time_to_merged_df(merged_df: pd.DataFrame, ct
return merged_lims_df


def drop_to_be_deleted_cases(merged_df: pd.DataFrame, cttso_lims_df: pd.DataFrame, excel_row_mapping_number_df: pd.DataFrame) -> (pd.DataFrame, pd.DataFrame, pd.DataFrame):
def drop_to_be_deleted_cases(merged_df: pd.DataFrame, cttso_lims_df: pd.DataFrame, excel_row_mapping_number_df: pd.DataFrame, existing_pieriandx_cases: List) -> (pd.DataFrame, pd.DataFrame, pd.DataFrame):
"""
Cases that have been assigned to ToBeDeleted need to be dropped from row list
and instead attached to a new sheet
Expand Down Expand Up @@ -1928,17 +1946,44 @@ def drop_to_be_deleted_cases(merged_df: pd.DataFrame, cttso_lims_df: pd.DataFram
A pandas DataFrame with the following columns:
* cttso_lims_index
* excel_row_number
:param existing_pieriandx_cases:
A list of pieriandx cases we have
:return:
"""
# Split cttso lims df by query
to_be_deleted_cases_lims = cttso_lims_df.query("pieriandx_assignee == 'ToBeDeleted'")
to_be_deleted_cases_lims = cttso_lims_df.query(
"pieriandx_assignee == 'ToBeDeleted' or "
"( "
" pieriandx_case_id not in @existing_pieriandx_cases and "
" not pieriandx_case_id.isnull()"
")",
engine="python"
)
to_be_deleted_cases_merged_df = merged_df.query("pieriandx_assignee == 'ToBeDeleted'")

if to_be_deleted_cases_lims.shape[0] == 0 and to_be_deleted_cases_merged_df.shape[0] == 0:
# Check cases that are not in pieriandx
if len(existing_pieriandx_cases) == 0:
logger.error("Something seriously wrong has happened! Got an empty list of cases")
raise ValueError
# Cases that have already been deleted are ones in the lims df that cannot be found in pieriandx
cases_already_deleted = list(
set(
cttso_lims_df["pieriandx_case_id"].dropna().tolist()
) -
set(
existing_pieriandx_cases
)
)

if to_be_deleted_cases_lims.shape[0] == 0 and to_be_deleted_cases_merged_df.shape[0] == 0 and len(cases_already_deleted) == 0:
logger.info("Nothing to transfer to delete pile")
return merged_df, cttso_lims_df, excel_row_mapping_number_df

cttso_lims_df_cleaned = cttso_lims_df.query("pieriandx_assignee != 'ToBeDeleted'")
# Remove cases assigned to the ToBeDeleted user or cases that have already been deleted
cttso_lims_df_cleaned = cttso_lims_df.query(
"pieriandx_assignee != 'ToBeDeleted' and not "
"pieriandx_case_id in @cases_already_deleted"
)
clean_cttso_case_ids_list = cttso_lims_df_cleaned["pieriandx_case_id"].tolist()
deleted_case_ids_list = to_be_deleted_cases_lims["pieriandx_case_id"].tolist()

Expand All @@ -1963,6 +2008,12 @@ def drop_to_be_deleted_cases(merged_df: pd.DataFrame, cttso_lims_df: pd.DataFram
engine="python"
)

# Remove cases from merged df that have already been deleted
merged_df = merged_df.query(
"pieriandx_case_id not in @cases_already_deleted",
engine="python"
)

# Update cttso lims sheet with replacement
append_df_to_cttso_lims(cttso_lims_df_cleaned, replace=True)
# Wait for doc population
Expand Down Expand Up @@ -2109,7 +2160,7 @@ def lambda_handler(event, context):

# Clean out to-be-deleted cases
merged_df, cttso_lims_df, excel_row_number_mapping_df = \
drop_to_be_deleted_cases(merged_df, cttso_lims_df, excel_row_number_mapping_df)
drop_to_be_deleted_cases(merged_df, cttso_lims_df, excel_row_number_mapping_df, pieriandx_df["pieriandx_case_id"].tolist())

merged_df, cttso_lims_df, excel_row_number_mapping_df = \
cleanup_duplicate_rows(merged_df, cttso_lims_df, excel_row_number_mapping_df)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
import pandas as pd
import time

from pyriandx.utils import retry_session

from .globals import \
PIERIANDX_CDK_SSM_LIST, \
PIERIANDX_CDK_SSM_PATH, \
Expand Down Expand Up @@ -352,6 +354,40 @@ def validate_case_accession_number(subject_id: str, library_id: str, case_access
raise ValueError


def check_case_exists(case_id: str) -> bool:
"""
Check a case actually exists and has not been deleted
:param case_id:
:return:
"""
email, auth_token, institution, base_url = get_pieriandx_env_vars()

pyriandx_client = get_pieriandx_client(
email=email,
auth_token=auth_token,
institution=institution,
base_url=base_url
)

# Go around _get_api to collect error code if it exists
url = pyriandx_client.baseURL + f"/case/{case_id}"
response = retry_session(pyriandx_client.headers).get(url, params=None)

if response.status_code == 200:
return True

if response.status_code == 400:
logger.info(f"Case {case_id} is not found, it may have been deleted")
return False

if response.status_code == 401:
logger.error("Got unauthorized status code 401. Cannot continue with script")
raise ChildProcessError

logger.warning(f"Got status_code {response.status_code}. Assuming case does not exist")
return False


def get_pieriandx_status_for_missing_sample(case_id: str) -> pd.Series:
"""
Get pieriandx results for a sample with incomplete results
Expand Down Expand Up @@ -382,7 +418,7 @@ def get_pieriandx_status_for_missing_sample(case_id: str) -> pd.Series:
iter_count += 1
if iter_count >= MAX_ATTEMPTS_GET_CASES:
logger.error(f"Tried to get all cases {str(MAX_ATTEMPTS_GET_CASES)} times and failed")
raise EnvironmentError
raise ChildProcessError

# Attempt to get cases
response: Dict = pyriandx_client._get_api(endpoint=f"/case/{case_id}")
Expand Down
Loading