From a57c87632aea0e944ce7a75c65edee5465d5471b Mon Sep 17 00:00:00 2001 From: Alexis Lucattini Date: Tue, 17 Oct 2023 11:37:36 +1100 Subject: [PATCH] Added deleted lims sheet Filter out deleted rows with assignments to ToBeDeleted user Resolves #133 --- .../lambda_code.py | 124 ++++++++++++++++++ .../layers/lambda_utils/gspread_helpers.py | 119 +++++++++++++++++ 2 files changed, 243 insertions(+) diff --git a/deploy/cttso-ica-to-pieriandx-cdk/lambdas/launch_available_payloads_and_update_cttso_lims_sheet/lambda_code.py b/deploy/cttso-ica-to-pieriandx-cdk/lambdas/launch_available_payloads_and_update_cttso_lims_sheet/lambda_code.py index 17afd73..2101aea 100644 --- a/deploy/cttso-ica-to-pieriandx-cdk/lambdas/launch_available_payloads_and_update_cttso_lims_sheet/lambda_code.py +++ b/deploy/cttso-ica-to-pieriandx-cdk/lambdas/launch_available_payloads_and_update_cttso_lims_sheet/lambda_code.py @@ -1597,6 +1597,126 @@ def bind_pieriandx_case_submission_time_to_merged_df(merged_df: pd.DataFrame, ct return merged_lims_df +def drop_to_be_deleted_cases(merged_df: pd.DataFrame, cttso_lims_df: pd.DataFrame, excel_row_mapping_number_df: pd.DataFrame) -> (pd.DataFrame, pd.DataFrame, pd.DataFrame): + """ + Cases that have been assigned to ToBeDeleted need to be dropped from row list + and instead attached to a new sheet + :param merged_df: + * subject_id + * library_id + * in_redcap + * in_portal + * in_glims + * in_pieriandx + * redcap_sample_type + * redcap_is_complete + * portal_wfr_id + * portal_wfr_end + * portal_wfr_status + * portal_sequence_run_name + * portal_is_failed_run + * glims_project_owner + * glims_project_name + * glims_panel + * glims_sample_type + * glims_is_identified + * glims_default_snomed_term + * glims_needs_redcap + * pieriandx_case_id + * pieriandx_case_accession_number + * pieriandx_case_creation_date + * pieriandx_assignee + :param cttso_lims_df: + A pandas DataFrame with the following columns: + * subject_id + * library_id + * in_glims + * in_portal + * in_redcap + * in_pieriandx + * glims_project_owner + * glims_project_name + * glims_panel + * glims_sample_type + * glims_is_identified + * glims_default_snomed_term + * glims_needs_redcap + * redcap_sample_type + * redcap_is_complete + * portal_wfr_id + * portal_wfr_end + * portal_wfr_status + * portal_sequence_run_name + * portal_is_failed_run + * pieriandx_submission_time + * pieriandx_case_id + * pieriandx_case_accession_number + * pieriandx_case_creation_date + * pieriandx_case_identified + * pieriandx_assignee + * pieriandx_panel_type + * pieriandx_sample_type + * pieriandx_workflow_id + * pieriandx_workflow_status + * pieriandx_report_status + * pieriandx_report_signed_out - currently ignored + :param excel_row_mapping_number_df: + A pandas DataFrame with the following columns: + * cttso_lims_index + * excel_row_number + :return: + """ + # Split cttso lims df by query + to_be_deleted_cases_lims = cttso_lims_df.query("pieriandx_assignee == 'ToBeDeleted'") + to_be_deleted_cases_merged_df = merged_df.query("pieriandx_assignee == 'ToBeDeleted'") + + if to_be_deleted_cases_lims.shape[0] == 0 and to_be_deleted_cases_merged_df.shape[0] == 0: + logger.info("Nothing to transfer to delete pile") + return merged_df, cttso_lims_df, excel_row_mapping_number_df + + cttso_lims_df_cleaned = cttso_lims_df.query("pieriandx_assignee != 'ToBeDeleted'") + clean_cttso_case_ids_list = cttso_lims_df_cleaned["pieriandx_case_id"].tolist() + deleted_case_ids_list = to_be_deleted_cases_lims["pieriandx_case_id"].tolist() + + # Clean out merged df with existing deleted cases + # And any cases we're about to put into the deleted lims as well + deleted_lims_df, deleted_lims_excel_row_mapping_number = get_deleted_lims_df() + case_ids_to_remove_from_merged_df = list( + set( + deleted_lims_df["pieriandx_case_id"].tolist() + + to_be_deleted_cases_lims["pieriandx_case_id"].tolist() + ) + ) + + # If the case id is in both, we need to keep it, and have it reassigned + merged_df = merged_df.query( + "pieriandx_case_id.isnull() or " + "pieriandx_case_id not in @case_ids_to_remove_from_merged_df or " + "( " + " pieriandx_case_id in @clean_cttso_case_ids_list and " + " pieriandx_case_id in @deleted_case_ids_list " + ")", + engine="python" + ) + + # Update cttso lims sheet with replacement + append_df_to_cttso_lims(cttso_lims_df_cleaned, replace=True) + # Wait for doc population + sleep(10) + + # Collect new values + cttso_lims_df: pd.DataFrame + excel_row_number_mapping_df: pd.DataFrame + cttso_lims_df, excel_row_number_mapping_df = get_cttso_lims() + + # Update deleted sheet - note we only add in the cases that are in the LIMS - + # cases in merged_df will need to be updated into LIMS first THEN pulled out of LIMS in the next iteration of this + # lambda script + add_deleted_cases_to_deleted_sheet(to_be_deleted_cases_lims) + + return merged_df, cttso_lims_df, excel_row_number_mapping_df + + def lambdas_awake() -> bool: """ Go through the lambdas that are required for this service and make sure that they're all awake @@ -1720,6 +1840,10 @@ def lambda_handler(event, context): excel_row_number_mapping_df: pd.DataFrame cttso_lims_df, excel_row_number_mapping_df = get_cttso_lims() + # Clean out to-be-deleted cases + merged_df, cttso_lims_df, excel_row_number_mapping_df = \ + drop_to_be_deleted_cases(merged_df, cttso_lims_df, excel_row_number_mapping_df) + merged_df, cttso_lims_df, excel_row_number_mapping_df = \ cleanup_duplicate_rows(merged_df, cttso_lims_df, excel_row_number_mapping_df) diff --git a/deploy/cttso-ica-to-pieriandx-cdk/lambdas/layers/lambda_utils/gspread_helpers.py b/deploy/cttso-ica-to-pieriandx-cdk/lambdas/layers/lambda_utils/gspread_helpers.py index c629fa5..7ba2cf6 100644 --- a/deploy/cttso-ica-to-pieriandx-cdk/lambdas/layers/lambda_utils/gspread_helpers.py +++ b/deploy/cttso-ica-to-pieriandx-cdk/lambdas/layers/lambda_utils/gspread_helpers.py @@ -270,3 +270,122 @@ def get_cttso_lims() -> (pd.DataFrame, pd.DataFrame): ) return cttso_lims_df, excel_row_number_df + + +def get_deleted_lims_df() -> (pd.DataFrame, pd.DataFrame): + """ + Collect the values from the existing GSuite spreadsheet + Maps the values from the existing GSuite spreadsheet to their excel row number + Also returns the row value for each of the items + :return: ( + A pandas DataFrame with the following columns: + * subject_id + * library_id + * in_glims + * in_portal + * in_redcap + * in_pieriandx + * glims_project_owner + * glims_project_name + * glims_panel + * glims_sample_type + * glims_is_identified + * glims_default_snomed_term + * glims_needs_redcap + * redcap_sample_type + * redcap_is_complete + * portal_wfr_id + * portal_wfr_end + * portal_wfr_status + * portal_sequence_run_name + * portal_is_failed_run + * pieriandx_submission_time + * pieriandx_case_id + * pieriandx_case_accession_number + * pieriandx_case_creation_date + * pieriandx_case_identified + * pieriandx_assignee + * pieriandx_panel_type + * pieriandx_sample_type + * pieriandx_workflow_id + * pieriandx_workflow_status + * pieriandx_report_status + * pieriandx_report_signed_out - currently ignored + * date_added_to_deletion_table + , + A pandas DataFrame with the following columns: + * cttso_lims_index + * excel_row_number + ) + :return: + """ + deleted_lims_df: pd.DataFrame = Spread(spread=get_cttso_lims_sheet_id(), sheet="Deleted Cases").sheet_to_df(index=0) + + deleted_lims_df = deleted_lims_df.replace("", pd.NA) + + # Replace booleans + deleted_lims_df = deleted_lims_df.replace({ + "TRUE": True, + "FALSE": False + }) + + excel_row_number_df: pd.DataFrame = pd.DataFrame({"cttso_lims_index": deleted_lims_df.index}) + + # Conversion to 1-based index plus single header row + excel_row_number_df["excel_row_number"] = excel_row_number_df.index + 2 + + return deleted_lims_df, excel_row_number_df + + +def append_rows_to_deleted_lims(to_be_deleted: pd.DataFrame): + """ + List of rows to be added to the deleted lims database + # FIXME add df + :param to_be_deleted: + :return: + """ + # Open up the sheet object + sheet_obj = Spread(spread=get_cttso_lims_sheet_id(), sheet="Deleted Cases") + + # Perform a proper NA replacement + # https://github.com/pandas-dev/pandas/issues/29024#issuecomment-1098052276 + new_df = to_be_deleted.replace({pd.NaT: None}).replace({'NaT': None}).replace({np.NaN: ""}) + + # Get existing sheet + existing_sheet = sheet_obj.sheet_to_df(index=0) + # Update the sheet object with the list + sheet_obj.df_to_sheet( + pd.concat( + [ + existing_sheet, + new_df, + pd.DataFrame(columns=existing_sheet.columns, index=range(1000)) + ] + ), + index=False, replace=True, fill_value="" + ) + + +def add_deleted_cases_to_deleted_sheet(new_cases_to_delete_df: pd.DataFrame): + """ + # FIXME add df here + :param new_cases_to_delete_df: + :return: + """ + deleted_lims_df, excel_row_mapping_number = get_deleted_lims_df() + + # Create list for query + existing_deleted_case_ids = deleted_lims_df["pieriandx_case_id"].tolist() + + # Get list of deleted cases + new_cases_to_delete_df = new_cases_to_delete_df.query( + "pieriandx_case_id not in @existing_deleted_case_ids", + engine="python" + ) + + if new_cases_to_delete_df.shape[0] == 0: + return + + new_cases_to_delete_df["date_added_to_deletion_table"] = datetime.utcnow().isoformat(sep="T", timespec="seconds") + "Z" + + append_rows_to_deleted_lims(new_cases_to_delete_df)