Skip to content
This repository has been archived by the owner on Dec 6, 2024. It is now read-only.

Commit

Permalink
Merge pull request #140 from umccr/enhancement/add-deleted-lims-sheet
Browse files Browse the repository at this point in the history
  • Loading branch information
alexiswl authored Oct 17, 2023
2 parents d794bc2 + a57c876 commit 3fdf7f9
Show file tree
Hide file tree
Showing 2 changed files with 243 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1861,6 +1861,126 @@ def bind_pieriandx_case_submission_time_to_merged_df(merged_df: pd.DataFrame, ct
return merged_lims_df


def drop_to_be_deleted_cases(merged_df: pd.DataFrame, cttso_lims_df: pd.DataFrame, excel_row_mapping_number_df: pd.DataFrame) -> (pd.DataFrame, pd.DataFrame, pd.DataFrame):
"""
Cases that have been assigned to ToBeDeleted need to be dropped from row list
and instead attached to a new sheet
:param merged_df:
* subject_id
* library_id
* in_redcap
* in_portal
* in_glims
* in_pieriandx
* redcap_sample_type
* redcap_is_complete
* portal_wfr_id
* portal_wfr_end
* portal_wfr_status
* portal_sequence_run_name
* portal_is_failed_run
* glims_project_owner
* glims_project_name
* glims_panel
* glims_sample_type
* glims_is_identified
* glims_default_snomed_term
* glims_needs_redcap
* pieriandx_case_id
* pieriandx_case_accession_number
* pieriandx_case_creation_date
* pieriandx_assignee
:param cttso_lims_df:
A pandas DataFrame with the following columns:
* subject_id
* library_id
* in_glims
* in_portal
* in_redcap
* in_pieriandx
* glims_project_owner
* glims_project_name
* glims_panel
* glims_sample_type
* glims_is_identified
* glims_default_snomed_term
* glims_needs_redcap
* redcap_sample_type
* redcap_is_complete
* portal_wfr_id
* portal_wfr_end
* portal_wfr_status
* portal_sequence_run_name
* portal_is_failed_run
* pieriandx_submission_time
* pieriandx_case_id
* pieriandx_case_accession_number
* pieriandx_case_creation_date
* pieriandx_case_identified
* pieriandx_assignee
* pieriandx_panel_type
* pieriandx_sample_type
* pieriandx_workflow_id
* pieriandx_workflow_status
* pieriandx_report_status
* pieriandx_report_signed_out - currently ignored
:param excel_row_mapping_number_df:
A pandas DataFrame with the following columns:
* cttso_lims_index
* excel_row_number
:return:
"""
# Split cttso lims df by query
to_be_deleted_cases_lims = cttso_lims_df.query("pieriandx_assignee == 'ToBeDeleted'")
to_be_deleted_cases_merged_df = merged_df.query("pieriandx_assignee == 'ToBeDeleted'")

if to_be_deleted_cases_lims.shape[0] == 0 and to_be_deleted_cases_merged_df.shape[0] == 0:
logger.info("Nothing to transfer to delete pile")
return merged_df, cttso_lims_df, excel_row_mapping_number_df

cttso_lims_df_cleaned = cttso_lims_df.query("pieriandx_assignee != 'ToBeDeleted'")
clean_cttso_case_ids_list = cttso_lims_df_cleaned["pieriandx_case_id"].tolist()
deleted_case_ids_list = to_be_deleted_cases_lims["pieriandx_case_id"].tolist()

# Clean out merged df with existing deleted cases
# And any cases we're about to put into the deleted lims as well
deleted_lims_df, deleted_lims_excel_row_mapping_number = get_deleted_lims_df()
case_ids_to_remove_from_merged_df = list(
set(
deleted_lims_df["pieriandx_case_id"].tolist() +
to_be_deleted_cases_lims["pieriandx_case_id"].tolist()
)
)

# If the case id is in both, we need to keep it, and have it reassigned
merged_df = merged_df.query(
"pieriandx_case_id.isnull() or "
"pieriandx_case_id not in @case_ids_to_remove_from_merged_df or "
"( "
" pieriandx_case_id in @clean_cttso_case_ids_list and "
" pieriandx_case_id in @deleted_case_ids_list "
")",
engine="python"
)

# Update cttso lims sheet with replacement
append_df_to_cttso_lims(cttso_lims_df_cleaned, replace=True)
# Wait for doc population
sleep(10)

# Collect new values
cttso_lims_df: pd.DataFrame
excel_row_number_mapping_df: pd.DataFrame
cttso_lims_df, excel_row_number_mapping_df = get_cttso_lims()

# Update deleted sheet - note we only add in the cases that are in the LIMS -
# cases in merged_df will need to be updated into LIMS first THEN pulled out of LIMS in the next iteration of this
# lambda script
add_deleted_cases_to_deleted_sheet(to_be_deleted_cases_lims)

return merged_df, cttso_lims_df, excel_row_number_mapping_df


def lambdas_awake() -> bool:
"""
Go through the lambdas that are required for this service and make sure that they're all awake
Expand Down Expand Up @@ -1987,6 +2107,10 @@ def lambda_handler(event, context):
excel_row_number_mapping_df: pd.DataFrame
cttso_lims_df, excel_row_number_mapping_df = get_cttso_lims()

# Clean out to-be-deleted cases
merged_df, cttso_lims_df, excel_row_number_mapping_df = \
drop_to_be_deleted_cases(merged_df, cttso_lims_df, excel_row_number_mapping_df)

merged_df, cttso_lims_df, excel_row_number_mapping_df = \
cleanup_duplicate_rows(merged_df, cttso_lims_df, excel_row_number_mapping_df)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -264,3 +264,122 @@ def get_cttso_lims() -> (pd.DataFrame, pd.DataFrame):
)

return cttso_lims_df, excel_row_number_df


def get_deleted_lims_df() -> (pd.DataFrame, pd.DataFrame):
"""
Collect the values from the existing GSuite spreadsheet
Maps the values from the existing GSuite spreadsheet to their excel row number
Also returns the row value for each of the items
:return: (
A pandas DataFrame with the following columns:
* subject_id
* library_id
* in_glims
* in_portal
* in_redcap
* in_pieriandx
* glims_project_owner
* glims_project_name
* glims_panel
* glims_sample_type
* glims_is_identified
* glims_default_snomed_term
* glims_needs_redcap
* redcap_sample_type
* redcap_is_complete
* portal_wfr_id
* portal_wfr_end
* portal_wfr_status
* portal_sequence_run_name
* portal_is_failed_run
* pieriandx_submission_time
* pieriandx_case_id
* pieriandx_case_accession_number
* pieriandx_case_creation_date
* pieriandx_case_identified
* pieriandx_assignee
* pieriandx_panel_type
* pieriandx_sample_type
* pieriandx_workflow_id
* pieriandx_workflow_status
* pieriandx_report_status
* pieriandx_report_signed_out - currently ignored
* date_added_to_deletion_table
,
A pandas DataFrame with the following columns:
* cttso_lims_index
* excel_row_number
)
:return:
"""
deleted_lims_df: pd.DataFrame = Spread(spread=get_cttso_lims_sheet_id(), sheet="Deleted Cases").sheet_to_df(index=0)

deleted_lims_df = deleted_lims_df.replace("", pd.NA)

# Replace booleans
deleted_lims_df = deleted_lims_df.replace({
"TRUE": True,
"FALSE": False
})

excel_row_number_df: pd.DataFrame = pd.DataFrame({"cttso_lims_index": deleted_lims_df.index})

# Conversion to 1-based index plus single header row
excel_row_number_df["excel_row_number"] = excel_row_number_df.index + 2

return deleted_lims_df, excel_row_number_df


def append_rows_to_deleted_lims(to_be_deleted: pd.DataFrame):
"""
List of rows to be added to the deleted lims database
# FIXME add df
:param to_be_deleted:
:return:
"""
# Open up the sheet object
sheet_obj = Spread(spread=get_cttso_lims_sheet_id(), sheet="Deleted Cases")

# Perform a proper NA replacement
# https://github.com/pandas-dev/pandas/issues/29024#issuecomment-1098052276
new_df = to_be_deleted.replace({pd.NaT: None}).replace({'NaT': None}).replace({np.NaN: ""})

# Get existing sheet
existing_sheet = sheet_obj.sheet_to_df(index=0)
# Update the sheet object with the list
sheet_obj.df_to_sheet(
pd.concat(
[
existing_sheet,
new_df,
pd.DataFrame(columns=existing_sheet.columns, index=range(1000))
]
),
index=False, replace=True, fill_value=""
)


def add_deleted_cases_to_deleted_sheet(new_cases_to_delete_df: pd.DataFrame):
"""
# FIXME add df here
:param new_cases_to_delete_df:
:return:
"""
deleted_lims_df, excel_row_mapping_number = get_deleted_lims_df()

# Create list for query
existing_deleted_case_ids = deleted_lims_df["pieriandx_case_id"].tolist()

# Get list of deleted cases
new_cases_to_delete_df = new_cases_to_delete_df.query(
"pieriandx_case_id not in @existing_deleted_case_ids",
engine="python"
)

if new_cases_to_delete_df.shape[0] == 0:
return

new_cases_to_delete_df["date_added_to_deletion_table"] = datetime.utcnow().isoformat(sep="T", timespec="seconds") + "Z"

append_rows_to_deleted_lims(new_cases_to_delete_df)

0 comments on commit 3fdf7f9

Please sign in to comment.