Skip to content
This repository was archived by the owner on Dec 6, 2024. It is now read-only.

Commit

Permalink
Merge pull request #152 from umccr/dev
Browse files Browse the repository at this point in the history
  • Loading branch information
alexiswl authored Oct 30, 2023
2 parents b34c3ec + a2e147a commit 0dbe35b
Show file tree
Hide file tree
Showing 6 changed files with 348 additions and 107 deletions.
12 changes: 12 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,18 @@ Changes in this log refer only to changes that make it to the 'main' branch. and

For changes in deployment, please see the [deployment changelog](deploy/cttso-ica-to-pieriandx-cdk/Changelog.md)


## 2023-10-18

> Author: Alexis Lucattini
> Email: [[email protected]](mailto:[email protected])

### Bugfixes

* Update sample type trimming of _sample suffix (https://github.com/umccr/cttso-ica-to-pieriandx/pull/150)
* Resolves https://github.com/umccr/cttso-ica-to-pieriandx/issues/149

## 2023-10-18

> Author: Alexis Lucattini
Expand Down
11 changes: 11 additions & 0 deletions deploy/cttso-ica-to-pieriandx-cdk/Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,17 @@ are nested under deploy/cttso-ica-to-pieriandx-cdk.
> Author: Alexis Lucattini
> Email: [[email protected]](mailto:[email protected])

### Bugfixes

* Dont resubmit deleted samples - also remove deleted samples from the lims sheet (https://github.com/umccr/cttso-ica-to-pieriandx/pull/151)
* Resolves https://github.com/umccr/cttso-ica-to-pieriandx/issues/146

## 2023-10-18

> Author: Alexis Lucattini
> Email: [[email protected]](mailto:[email protected])
### Enhancements

* Move to project owner / project name mapping logic (https://github.com/umccr/cttso-ica-to-pieriandx/pull/141)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,24 @@ def get_libraries_for_processing(merged_df) -> pd.DataFrame:
columns=processing_columns
)

# Check none of the processing df libraries are in the list of deleted lists
deleted_lims_df, deleted_lims_excel_row_mapping_number = get_deleted_lims_df()
process_row: pd.Series
already_deleted_list_index = []
for index, process_row in to_process_df.iterrows():
if not deleted_lims_df.query(
f"subject_id == '{process_row['subject_id']}' and "
f"library_id == '{process_row['library_id']}' and "
f"portal_wfr_id == '{process_row['portal_wfr_id']}'"
).shape[0] == 0:
already_deleted_list_index.append(index)
logger.warning(f"Already run and deleted this combination {process_row['subject_id']} / {process_row['library_id']} / {process_row['portal_wfr_id']}, not reprocessing")

# Delete via index
to_process_df = to_process_df.iloc[list(
set(to_process_df.index.tolist()) - set(already_deleted_list_index)
)]

# Update columns to strip glims_ attributes
new_column_names = [
"panel",
Expand Down Expand Up @@ -1861,7 +1879,7 @@ def bind_pieriandx_case_submission_time_to_merged_df(merged_df: pd.DataFrame, ct
return merged_lims_df


def drop_to_be_deleted_cases(merged_df: pd.DataFrame, cttso_lims_df: pd.DataFrame, excel_row_mapping_number_df: pd.DataFrame) -> (pd.DataFrame, pd.DataFrame, pd.DataFrame):
def drop_to_be_deleted_cases(merged_df: pd.DataFrame, cttso_lims_df: pd.DataFrame, excel_row_mapping_number_df: pd.DataFrame, existing_pieriandx_cases: List) -> (pd.DataFrame, pd.DataFrame, pd.DataFrame):
"""
Cases that have been assigned to ToBeDeleted need to be dropped from row list
and instead attached to a new sheet
Expand Down Expand Up @@ -1928,17 +1946,44 @@ def drop_to_be_deleted_cases(merged_df: pd.DataFrame, cttso_lims_df: pd.DataFram
A pandas DataFrame with the following columns:
* cttso_lims_index
* excel_row_number
:param existing_pieriandx_cases:
A list of pieriandx cases we have
:return:
"""
# Split cttso lims df by query
to_be_deleted_cases_lims = cttso_lims_df.query("pieriandx_assignee == 'ToBeDeleted'")
to_be_deleted_cases_lims = cttso_lims_df.query(
"pieriandx_assignee == 'ToBeDeleted' or "
"( "
" pieriandx_case_id not in @existing_pieriandx_cases and "
" not pieriandx_case_id.isnull()"
")",
engine="python"
)
to_be_deleted_cases_merged_df = merged_df.query("pieriandx_assignee == 'ToBeDeleted'")

if to_be_deleted_cases_lims.shape[0] == 0 and to_be_deleted_cases_merged_df.shape[0] == 0:
# Check cases that are not in pieriandx
if len(existing_pieriandx_cases) == 0:
logger.error("Something seriously wrong has happened! Got an empty list of cases")
raise ValueError
# Cases that have already been deleted are ones in the lims df that cannot be found in pieriandx
cases_already_deleted = list(
set(
cttso_lims_df["pieriandx_case_id"].dropna().tolist()
) -
set(
existing_pieriandx_cases
)
)

if to_be_deleted_cases_lims.shape[0] == 0 and to_be_deleted_cases_merged_df.shape[0] == 0 and len(cases_already_deleted) == 0:
logger.info("Nothing to transfer to delete pile")
return merged_df, cttso_lims_df, excel_row_mapping_number_df

cttso_lims_df_cleaned = cttso_lims_df.query("pieriandx_assignee != 'ToBeDeleted'")
# Remove cases assigned to the ToBeDeleted user or cases that have already been deleted
cttso_lims_df_cleaned = cttso_lims_df.query(
"pieriandx_assignee != 'ToBeDeleted' and not "
"pieriandx_case_id in @cases_already_deleted"
)
clean_cttso_case_ids_list = cttso_lims_df_cleaned["pieriandx_case_id"].tolist()
deleted_case_ids_list = to_be_deleted_cases_lims["pieriandx_case_id"].tolist()

Expand All @@ -1963,6 +2008,12 @@ def drop_to_be_deleted_cases(merged_df: pd.DataFrame, cttso_lims_df: pd.DataFram
engine="python"
)

# Remove cases from merged df that have already been deleted
merged_df = merged_df.query(
"pieriandx_case_id not in @cases_already_deleted",
engine="python"
)

# Update cttso lims sheet with replacement
append_df_to_cttso_lims(cttso_lims_df_cleaned, replace=True)
# Wait for doc population
Expand Down Expand Up @@ -2109,7 +2160,7 @@ def lambda_handler(event, context):

# Clean out to-be-deleted cases
merged_df, cttso_lims_df, excel_row_number_mapping_df = \
drop_to_be_deleted_cases(merged_df, cttso_lims_df, excel_row_number_mapping_df)
drop_to_be_deleted_cases(merged_df, cttso_lims_df, excel_row_number_mapping_df, pieriandx_df["pieriandx_case_id"].tolist())

merged_df, cttso_lims_df, excel_row_number_mapping_df = \
cleanup_duplicate_rows(merged_df, cttso_lims_df, excel_row_number_mapping_df)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
import pandas as pd
import time

from pyriandx.utils import retry_session

from .globals import \
PIERIANDX_CDK_SSM_LIST, \
PIERIANDX_CDK_SSM_PATH, \
Expand Down Expand Up @@ -352,6 +354,40 @@ def validate_case_accession_number(subject_id: str, library_id: str, case_access
raise ValueError


def check_case_exists(case_id: str) -> bool:
"""
Check a case actually exists and has not been deleted
:param case_id:
:return:
"""
email, auth_token, institution, base_url = get_pieriandx_env_vars()

pyriandx_client = get_pieriandx_client(
email=email,
auth_token=auth_token,
institution=institution,
base_url=base_url
)

# Go around _get_api to collect error code if it exists
url = pyriandx_client.baseURL + f"/case/{case_id}"
response = retry_session(pyriandx_client.headers).get(url, params=None)

if response.status_code == 200:
return True

if response.status_code == 400:
logger.info(f"Case {case_id} is not found, it may have been deleted")
return False

if response.status_code == 401:
logger.error("Got unauthorized status code 401. Cannot continue with script")
raise ChildProcessError

logger.warning(f"Got status_code {response.status_code}. Assuming case does not exist")
return False


def get_pieriandx_status_for_missing_sample(case_id: str) -> pd.Series:
"""
Get pieriandx results for a sample with incomplete results
Expand Down Expand Up @@ -382,7 +418,7 @@ def get_pieriandx_status_for_missing_sample(case_id: str) -> pd.Series:
iter_count += 1
if iter_count >= MAX_ATTEMPTS_GET_CASES:
logger.error(f"Tried to get all cases {str(MAX_ATTEMPTS_GET_CASES)} times and failed")
raise EnvironmentError
raise ChildProcessError

# Attempt to get cases
response: Dict = pyriandx_client._get_api(endpoint=f"/case/{case_id}")
Expand Down
Loading

0 comments on commit 0dbe35b

Please sign in to comment.