diff --git a/deploy/cttso-ica-to-pieriandx-cdk/Changelog.md b/deploy/cttso-ica-to-pieriandx-cdk/Changelog.md index 7fb1369..732ade4 100644 --- a/deploy/cttso-ica-to-pieriandx-cdk/Changelog.md +++ b/deploy/cttso-ica-to-pieriandx-cdk/Changelog.md @@ -3,6 +3,28 @@ Changes in this log refer only to changes that make it to the 'main' branch and are nested under deploy/cttso-ica-to-pieriandx-cdk. +## 2023-11-17 + +> Author: Alexis Lucattini +> Email: [Alexis.Lucattini@umccr.org](mailto:alexis.lucattini@umccr.org) + +### Enhancements + +* Add two pieriandx columns for disease code and names (https://github.com/umccr/cttso-ica-to-pieriandx/pull/167) + * Resolves https://github.com/umccr/cttso-ica-to-pieriandx/issues/171 + +### Bugfixes + +* Refactored validation lambda so that checks are not done before columns are renamed (https://github.com/umccr/cttso-ica-to-pieriandx/pull/168) + * Resolves https://github.com/umccr/cttso-ica-to-pieriandx/issues/166 + +* Don't delete pending cases from cttso lims (https://github.com/umccr/cttso-ica-to-pieriandx/pull/169) + * Resolves https://github.com/umccr/cttso-ica-to-pieriandx/issues/161 + +* Added snomed term to submission df (https://github.com/umccr/cttso-ica-to-pieriandx/pull/170) + * Resolves https://github.com/umccr/cttso-ica-to-pieriandx/issues/165 + + ## 2023-10-18 > Author: Alexis Lucattini diff --git a/deploy/cttso-ica-to-pieriandx-cdk/README.md b/deploy/cttso-ica-to-pieriandx-cdk/README.md index a6880a2..7a0aecf 100644 --- a/deploy/cttso-ica-to-pieriandx-cdk/README.md +++ b/deploy/cttso-ica-to-pieriandx-cdk/README.md @@ -64,6 +64,8 @@ new_headers = [ "pieriandx_case_creation_date", "pieriandx_case_identified", "pieriandx_assignee", + "pieriandx_disease_code", + "pieriandx_disease_name", "pieriandx_panel_type", "pieriandx_sample_type", "pieriandx_workflow_id", diff --git a/deploy/cttso-ica-to-pieriandx-cdk/lambdas/get_metadata_from_portal_and_defaults_and_launch_validation_workflow/lambda_code.py b/deploy/cttso-ica-to-pieriandx-cdk/lambdas/get_metadata_from_portal_and_defaults_and_launch_validation_workflow/lambda_code.py index baca52b..8cc1312 100644 --- a/deploy/cttso-ica-to-pieriandx-cdk/lambdas/get_metadata_from_portal_and_defaults_and_launch_validation_workflow/lambda_code.py +++ b/deploy/cttso-ica-to-pieriandx-cdk/lambdas/get_metadata_from_portal_and_defaults_and_launch_validation_workflow/lambda_code.py @@ -46,7 +46,8 @@ def lambda_handler(event, context): "ica_workflow_run_id": "wfr.123abc", "panel_type": "main", "sample_type": "validation", - "is_identified": False + "is_identified": False | "deidentified", + "disease_name": ""Disseminated malignancy of unknown primary"" } """ @@ -93,13 +94,16 @@ def lambda_handler(event, context): if (is_identified := event.get("is_identified", None)) is None: is_identified = VALIDATION_DEFAULTS["is_identified"] + # Check disease name + if (disease_name := event.get("disease_name", None)) is None: + disease_name = VALIDATION_DEFAULTS["disease_name"] + # Update sample_df with validation defaults sample_df["sample_type"] = sample_type sample_df["panel_type"] = panel_type sample_df["is_identified"] = is_identified + sample_df["disease_name"] = disease_name sample_df["indication"] = VALIDATION_DEFAULTS["indication"] - sample_df["disease_id"] = VALIDATION_DEFAULTS["disease_id"] - sample_df["disease_name"] = VALIDATION_DEFAULTS["disease_name"] sample_df["requesting_physicians_first_name"] = VALIDATION_DEFAULTS["requesting_physicians_first_name"] sample_df["requesting_physicians_last_name"] = VALIDATION_DEFAULTS["requesting_physicians_last_name"] sample_df["specimen_type"] = VALIDATION_DEFAULTS["specimen_type"] @@ -156,6 +160,22 @@ def lambda_handler(event, context): axis="columns" ) + # Convert times to utc time and strings + for date_column in ["date_received", "date_collected", "date_of_birth"]: + sample_df[date_column] = sample_df[date_column].apply( + lambda x: datetime_obj_to_utc_isoformat(handle_date(x)) + ) + + # Assert expected values exist + logger.info("Check we have all of the expected information") + for expected_column in EXPECTED_ATTRIBUTES: + if expected_column not in sample_df.columns.tolist(): + logger.error( + f"Expected column {expected_column} but " + f"did not find it in columns {', '.join(sample_df.columns.tolist())}" + ) + raise ValueError + # For identified - we rename external subject id as the medical record number if all(sample_df["is_identified"]): sample_df["first_name"] = VALIDATION_DEFAULTS["first_name"] @@ -175,22 +195,6 @@ def lambda_handler(event, context): } ) - # Convert times to utc time and strings - for date_column in ["date_received", "date_collected", "date_of_birth"]: - sample_df[date_column] = sample_df[date_column].apply( - lambda x: datetime_obj_to_utc_isoformat(handle_date(x)) - ) - - # Assert expected values exist - logger.info("Check we have all of the expected information") - for expected_column in EXPECTED_ATTRIBUTES: - if expected_column not in sample_df.columns.tolist(): - logger.error( - f"Expected column {expected_column} but " - f"did not find it in columns {', '.join(sample_df.columns.tolist())}" - ) - raise ValueError - # Launch batch lambda function accession_json: Dict = sample_df.to_dict(orient="records")[0] diff --git a/deploy/cttso-ica-to-pieriandx-cdk/lambdas/get_metadata_from_portal_and_redcap_and_launch_clinical_workflow/lambda_code.py b/deploy/cttso-ica-to-pieriandx-cdk/lambdas/get_metadata_from_portal_and_redcap_and_launch_clinical_workflow/lambda_code.py index 4e0daff..403f983 100644 --- a/deploy/cttso-ica-to-pieriandx-cdk/lambdas/get_metadata_from_portal_and_redcap_and_launch_clinical_workflow/lambda_code.py +++ b/deploy/cttso-ica-to-pieriandx-cdk/lambdas/get_metadata_from_portal_and_redcap_and_launch_clinical_workflow/lambda_code.py @@ -112,7 +112,7 @@ def lambda_handler(event, context): "allow_missing_redcap_entry": false, "panel_type": "main", "sample_type": "patient_care_sample", - "is_identified": true, + "is_identified": true, | is_identified="identified" } """ diff --git a/deploy/cttso-ica-to-pieriandx-cdk/lambdas/launch_available_payloads_and_update_cttso_lims_sheet/lambda_code.py b/deploy/cttso-ica-to-pieriandx-cdk/lambdas/launch_available_payloads_and_update_cttso_lims_sheet/lambda_code.py index 0e7918c..ad15344 100644 --- a/deploy/cttso-ica-to-pieriandx-cdk/lambdas/launch_available_payloads_and_update_cttso_lims_sheet/lambda_code.py +++ b/deploy/cttso-ica-to-pieriandx-cdk/lambdas/launch_available_payloads_and_update_cttso_lims_sheet/lambda_code.py @@ -184,6 +184,8 @@ def get_libraries_for_processing(merged_df) -> pd.DataFrame: * pieriandx_case_creation_date * pieriandx_assignee * pieriandx_case_identified + * pieriandx_disease_code + * pieriandx_disease_name * pieriandx_panel_type * pieriandx_sample_type :return: A pandas dataframe with the following columns @@ -195,6 +197,7 @@ def get_libraries_for_processing(merged_df) -> pd.DataFrame: * is_identified * needs_redcap * redcap_is_complete + * default_snomed_term """ # Initialise @@ -206,7 +209,8 @@ def get_libraries_for_processing(merged_df) -> pd.DataFrame: "sample_type", "is_identified", "needs_redcap", - "redcap_is_complete" + "redcap_is_complete", + "default_snomed_term" ] # Processing libraries must meet the following criteria @@ -273,6 +277,7 @@ def get_libraries_for_processing(merged_df) -> pd.DataFrame: "sample_type", "is_identified", "needs_redcap" + "default_snomed_term" ] for column_name in new_column_names: @@ -284,14 +289,17 @@ def get_libraries_for_processing(merged_df) -> pd.DataFrame: ] -def submit_library_to_pieriandx(subject_id: str, library_id: str, workflow_run_id: str, lambda_arn: str, panel_type: str, sample_type: str, is_identified: bool): +def submit_library_to_pieriandx(subject_id: str, library_id: str, workflow_run_id: str, lambda_arn: str, panel_type: str, sample_type: str, is_identified: Union[bool | str], default_snomed_term: str): """ Submit library to pieriandx + :param is_identified: + :param sample_type: :param subject_id: :param library_id: :param workflow_run_id: :param lambda_arn: :param panel_type: + :param default_snomed_term :return: """ lambda_client: LambdaClient = get_boto3_lambda_client() @@ -302,7 +310,8 @@ def submit_library_to_pieriandx(subject_id: str, library_id: str, workflow_run_i "ica_workflow_run_id": workflow_run_id, "panel_type": panel_type, "sample_type": sample_type, - "is_identified": is_identified + "is_identified": is_identified, + "disease_name": default_snomed_term } logger.info(f"Launching lambda function {lambda_arn} with the following payload {json.dumps(lambda_payload)}") @@ -361,6 +370,7 @@ def submit_libraries_to_pieriandx(processing_df: pd.DataFrame) -> pd.DataFrame: * is_identified * needs_redcap * redcap_is_complete + * default_snomed_term :return: A pandas dataframe with the following columns * subject_id @@ -411,7 +421,8 @@ def submit_libraries_to_pieriandx(processing_df: pd.DataFrame) -> pd.DataFrame: lambda_arn=row.submission_arn, panel_type=row.panel, sample_type=row.sample_type, - is_identified=row.is_identified + is_identified=row.is_identified, + default_snomed_term=row.default_snomed_term ) except ValueError: pass @@ -477,6 +488,8 @@ def append_to_cttso_lims(merged_df: pd.DataFrame, cttso_lims_df: pd.DataFrame, e * pieriandx_case_creation_date * pieriandx_case_identified * pieriandx_assignee + * pieriandx_disease_code + * pieriandx_disease_name * pieriandx_panel_type * pieriandx_sample_type * pieriandx_workflow_id @@ -649,6 +662,8 @@ def get_pieriandx_incomplete_job_df_from_cttso_lims_df(cttso_lims_df: pd.DataFra * pieriandx_case_accession_number * pieriandx_case_creation_date * pieriandx_case_identified + * pieriandx_disease_code + * pieriandx_disease_name * pieriandx_panel_type * pieriandx_sample_type * pieriandx_workflow_id @@ -681,6 +696,8 @@ def get_pieriandx_incomplete_job_df_from_cttso_lims_df(cttso_lims_df: pd.DataFra * pieriandx_case_accession_number * pieriandx_case_creation_date * pieriandx_case_identified + * pieriandx_disease_code + * pieriandx_disease_name * pieriandx_panel_type * pieriandx_sample_type * pieriandx_workflow_id @@ -805,6 +822,8 @@ def update_pieriandx_job_status_missing_df(pieriandx_job_status_missing_df, merg * pieriandx_case_id * pieriandx_case_accession_number * pieriandx_case_identified + * pieriandx_disease_code + * pieriandx_disease_name * pieriandx_panel_type * pieriandx_sample_type * pieriandx_workflow_id @@ -1120,6 +1139,8 @@ def update_cttso_lims(update_df: pd.DataFrame, cttso_lims_df: pd.DataFrame, exce * pieriandx_case_creation_date * pieriandx_case_identified * pieriandx_assignee + * pieriandx_disease_code + * pieriandx_disease_name * pieriandx_panel_type * pieriandx_sample_type * pieriandx_workflow_id @@ -1170,6 +1191,8 @@ def update_cttso_lims(update_df: pd.DataFrame, cttso_lims_df: pd.DataFrame, exce "pieriandx_case_creation_date", "pieriandx_assignee", "pieriandx_case_identified", + "pieriandx_disease_code", + "pieriandx_disease_name", "pieriandx_panel_type", "pieriandx_sample_type", "pieriandx_workflow_id", @@ -1225,6 +1248,8 @@ def get_duplicate_case_ids(lims_df: pd.DataFrame) -> List: * pieriandx_case_accession_number * pieriandx_case_creation_date * pieriandx_assignee + * pieriandx_disease_code + * pieriandx_disease_name * pieriandx_panel_type * pieriandx_sample_type * pieriandx_workflow_id @@ -1421,6 +1446,8 @@ def cleanup_duplicate_rows(merged_df: pd.DataFrame, cttso_lims_df: pd.DataFrame, * pieriandx_case_creation_date * pieriandx_case_identified * pieriandx_assignee + * pieriandx_disease_code + * pieriandx_disease_name * pieriandx_panel_type * pieriandx_sample_type * pieriandx_workflow_id @@ -1483,6 +1510,8 @@ def cleanup_duplicate_rows(merged_df: pd.DataFrame, cttso_lims_df: pd.DataFrame, * pieriandx_case_creation_date * pieriandx_case_identified * pieriandx_assignee + * pieriandx_disease_code + * pieriandx_disease_name * pieriandx_panel_type * pieriandx_sample_type * pieriandx_workflow_id @@ -1659,6 +1688,8 @@ def bind_pieriandx_case_submission_time_to_merged_df(merged_df: pd.DataFrame, ct * pieriandx_case_creation_date * pieriandx_case_identified * pieriandx_assignee + * pieriandx_disease_code + * pieriandx_disease_name * pieriandx_panel_type * pieriandx_sample_type * pieriandx_workflow_id @@ -1690,6 +1721,8 @@ def bind_pieriandx_case_submission_time_to_merged_df(merged_df: pd.DataFrame, ct * pieriandx_case_accession_number * pieriandx_case_creation_date * pieriandx_assignee + * pieriandx_disease_code + * pieriandx_disease_name * pieriandx_panel_type * pieriandx_sample_type * pieriandx_workflow_id @@ -1936,6 +1969,8 @@ def drop_to_be_deleted_cases(merged_df: pd.DataFrame, cttso_lims_df: pd.DataFram * pieriandx_case_creation_date * pieriandx_case_identified * pieriandx_assignee + * pieriandx_disease_code + * pieriandx_disease_name * pieriandx_panel_type * pieriandx_sample_type * pieriandx_workflow_id @@ -1955,7 +1990,8 @@ def drop_to_be_deleted_cases(merged_df: pd.DataFrame, cttso_lims_df: pd.DataFram "pieriandx_assignee == 'ToBeDeleted' or " "( " " pieriandx_case_id not in @existing_pieriandx_cases and " - " not pieriandx_case_id.isnull()" + " not pieriandx_case_id.isnull() and " + " not pieriandx_case_id == 'pending'" ")", engine="python" ) diff --git a/deploy/cttso-ica-to-pieriandx-cdk/lambdas/layers/lambda_utils/globals.py b/deploy/cttso-ica-to-pieriandx-cdk/lambdas/layers/lambda_utils/globals.py index 5408023..224d37d 100644 --- a/deploy/cttso-ica-to-pieriandx-cdk/lambdas/layers/lambda_utils/globals.py +++ b/deploy/cttso-ica-to-pieriandx-cdk/lambdas/layers/lambda_utils/globals.py @@ -97,7 +97,6 @@ class SampleType(Enum): "sample_type": "validation", "panel_type": PanelType.MAIN, "indication": "NA", - "disease_id": 285645000, "disease_name": "Disseminated malignancy of unknown primary", "is_identified": True, "requesting_physicians_first_name": "Sean", @@ -175,7 +174,7 @@ class SampleType(Enum): EXPECTED_ATTRIBUTES = [ "sample_type", - "disease_id", + "disease_name", "indication", "accession_number", "external_specimen_id", diff --git a/deploy/cttso-ica-to-pieriandx-cdk/lambdas/layers/lambda_utils/gspread_helpers.py b/deploy/cttso-ica-to-pieriandx-cdk/lambdas/layers/lambda_utils/gspread_helpers.py index 6cd1fa2..162742a 100644 --- a/deploy/cttso-ica-to-pieriandx-cdk/lambdas/layers/lambda_utils/gspread_helpers.py +++ b/deploy/cttso-ica-to-pieriandx-cdk/lambdas/layers/lambda_utils/gspread_helpers.py @@ -222,6 +222,8 @@ def get_cttso_lims() -> (pd.DataFrame, pd.DataFrame): * pieriandx_case_creation_date * pieriandx_case_identified * pieriandx_assignee + * pieriandx_disease_code + * pieriandx_disease_name * pieriandx_panel_type * pieriandx_sample_type * pieriandx_workflow_id @@ -299,6 +301,8 @@ def get_deleted_lims_df() -> (pd.DataFrame, pd.DataFrame): * pieriandx_case_creation_date * pieriandx_case_identified * pieriandx_assignee + * pieriandx_disease_code + * pieriandx_disease_name * pieriandx_panel_type * pieriandx_sample_type * pieriandx_workflow_id diff --git a/deploy/cttso-ica-to-pieriandx-cdk/lambdas/layers/lambda_utils/pieriandx_helpers.py b/deploy/cttso-ica-to-pieriandx-cdk/lambdas/layers/lambda_utils/pieriandx_helpers.py index 1d6ae71..6e01185 100644 --- a/deploy/cttso-ica-to-pieriandx-cdk/lambdas/layers/lambda_utils/pieriandx_helpers.py +++ b/deploy/cttso-ica-to-pieriandx-cdk/lambdas/layers/lambda_utils/pieriandx_helpers.py @@ -397,6 +397,8 @@ def get_pieriandx_status_for_missing_sample(case_id: str) -> pd.Series: * pieriandx_case_id * pieriandx_case_accession_number * pieriandx_case_identified + * pieriandx_disease_code + * pieriandx_disease_name * pieriandx_panel_type * pieriandx_workflow_id * pieriandx_workflow_status @@ -437,6 +439,8 @@ def get_pieriandx_status_for_missing_sample(case_id: str) -> pd.Series: "pieriandx_case_id": case_id, "pieriandx_case_accession_number": response.get("specimens")[0].get("accessionNumber"), "pieriandx_case_identified": response.get("identified", False), + "pieriandx_disease_code": response.get("disease").get("code"), + "pieriandx_disease_name": response.get("disease").get("name"), "pieriandx_panel_type": PanelType(response.get("panelName")).name, "pieriandx_sample_type": SampleType(response.get("sampleType")).name, "pieriandx_workflow_id": pd.NA,