Merge pull request #172 from umccr/dev

umccr · Nov 7, 2023 · 5cd12cc · 5cd12cc
2 parents fba4d3f + 54e142a
commit 5cd12cc
Show file tree

Hide file tree

Showing 8 changed files with 98 additions and 27 deletions.
diff --git a/deploy/cttso-ica-to-pieriandx-cdk/Changelog.md b/deploy/cttso-ica-to-pieriandx-cdk/Changelog.md
@@ -3,6 +3,28 @@
 Changes in this log refer only to changes that make it to the 'main' branch and
 are nested under deploy/cttso-ica-to-pieriandx-cdk.  
 
+## 2023-11-17
+
+> Author: Alexis Lucattini
+> Email: [[email protected]](mailto:[email protected])
+
+### Enhancements
+
+* Add two pieriandx columns for disease code and names (https://github.com/umccr/cttso-ica-to-pieriandx/pull/167)
+  * Resolves https://github.com/umccr/cttso-ica-to-pieriandx/issues/171
+
+### Bugfixes
+
+* Refactored validation lambda so that checks are not done before columns are renamed (https://github.com/umccr/cttso-ica-to-pieriandx/pull/168)
+  * Resolves https://github.com/umccr/cttso-ica-to-pieriandx/issues/166
+
+* Don't delete pending cases from cttso lims (https://github.com/umccr/cttso-ica-to-pieriandx/pull/169)
+  * Resolves https://github.com/umccr/cttso-ica-to-pieriandx/issues/161
+
+* Added snomed term to submission df (https://github.com/umccr/cttso-ica-to-pieriandx/pull/170)
+  * Resolves https://github.com/umccr/cttso-ica-to-pieriandx/issues/165
+
+
 ## 2023-10-18
 
 > Author: Alexis Lucattini

diff --git a/deploy/cttso-ica-to-pieriandx-cdk/README.md b/deploy/cttso-ica-to-pieriandx-cdk/README.md
@@ -64,6 +64,8 @@ new_headers = [
     "pieriandx_case_creation_date",
     "pieriandx_case_identified",
     "pieriandx_assignee",
+    "pieriandx_disease_code",
+    "pieriandx_disease_name",
     "pieriandx_panel_type",
     "pieriandx_sample_type",
     "pieriandx_workflow_id",

diff --git a/...mbdas/get_metadata_from_portal_and_defaults_and_launch_validation_workflow/lambda_code.py b/...mbdas/get_metadata_from_portal_and_defaults_and_launch_validation_workflow/lambda_code.py
@@ -46,7 +46,8 @@ def lambda_handler(event, context):
         "ica_workflow_run_id": "wfr.123abc",
         "panel_type": "main",
         "sample_type": "validation",
-        "is_identified": False
+        "is_identified": False | "deidentified",
+        "disease_name": ""Disseminated malignancy of unknown primary""
     }
     """
 
@@ -93,13 +94,16 @@ def lambda_handler(event, context):
     if (is_identified := event.get("is_identified", None)) is None:
         is_identified = VALIDATION_DEFAULTS["is_identified"]
 
+    # Check disease name
+    if (disease_name := event.get("disease_name", None)) is None:
+        disease_name = VALIDATION_DEFAULTS["disease_name"]
+
     # Update sample_df with validation defaults
     sample_df["sample_type"] = sample_type
     sample_df["panel_type"] = panel_type
     sample_df["is_identified"] = is_identified
+    sample_df["disease_name"] = disease_name
     sample_df["indication"] = VALIDATION_DEFAULTS["indication"]
-    sample_df["disease_id"] = VALIDATION_DEFAULTS["disease_id"]
-    sample_df["disease_name"] = VALIDATION_DEFAULTS["disease_name"]
     sample_df["requesting_physicians_first_name"] = VALIDATION_DEFAULTS["requesting_physicians_first_name"]
     sample_df["requesting_physicians_last_name"] = VALIDATION_DEFAULTS["requesting_physicians_last_name"]
     sample_df["specimen_type"] = VALIDATION_DEFAULTS["specimen_type"]
@@ -156,6 +160,22 @@ def lambda_handler(event, context):
         axis="columns"
     )
 
+    # Convert times to utc time and strings
+    for date_column in ["date_received", "date_collected", "date_of_birth"]:
+        sample_df[date_column] = sample_df[date_column].apply(
+            lambda x: datetime_obj_to_utc_isoformat(handle_date(x))
+        )
+
+    # Assert expected values exist
+    logger.info("Check we have all of the expected information")
+    for expected_column in EXPECTED_ATTRIBUTES:
+        if expected_column not in sample_df.columns.tolist():
+            logger.error(
+                f"Expected column {expected_column} but "
+                f"did not find it in columns {', '.join(sample_df.columns.tolist())}"
+            )
+            raise ValueError
+
     # For identified - we rename external subject id as the medical record number
     if all(sample_df["is_identified"]):
         sample_df["first_name"] = VALIDATION_DEFAULTS["first_name"]
@@ -175,22 +195,6 @@ def lambda_handler(event, context):
             }
         )
 
-    # Convert times to utc time and strings
-    for date_column in ["date_received", "date_collected", "date_of_birth"]:
-        sample_df[date_column] = sample_df[date_column].apply(
-            lambda x: datetime_obj_to_utc_isoformat(handle_date(x))
-        )
-
-    # Assert expected values exist
-    logger.info("Check we have all of the expected information")
-    for expected_column in EXPECTED_ATTRIBUTES:
-        if expected_column not in sample_df.columns.tolist():
-            logger.error(
-                f"Expected column {expected_column} but "
-                f"did not find it in columns {', '.join(sample_df.columns.tolist())}"
-            )
-            raise ValueError
-
     # Launch batch lambda function
     accession_json: Dict = sample_df.to_dict(orient="records")[0]
 

diff --git a/...k/lambdas/get_metadata_from_portal_and_redcap_and_launch_clinical_workflow/lambda_code.py b/...k/lambdas/get_metadata_from_portal_and_redcap_and_launch_clinical_workflow/lambda_code.py
@@ -112,7 +112,7 @@ def lambda_handler(event, context):
         "allow_missing_redcap_entry": false,
         "panel_type": "main",
         "sample_type": "patient_care_sample",
-        "is_identified": true,
+        "is_identified": true, | is_identified="identified"
     }
     """
 

diff --git a/...ieriandx-cdk/lambdas/launch_available_payloads_and_update_cttso_lims_sheet/lambda_code.py b/...ieriandx-cdk/lambdas/launch_available_payloads_and_update_cttso_lims_sheet/lambda_code.py
@@ -184,6 +184,8 @@ def get_libraries_for_processing(merged_df) -> pd.DataFrame:
       * pieriandx_case_creation_date
       * pieriandx_assignee
       * pieriandx_case_identified
+      * pieriandx_disease_code
+      * pieriandx_disease_name
       * pieriandx_panel_type
       * pieriandx_sample_type
     :return: A pandas dataframe with the following columns
@@ -195,6 +197,7 @@ def get_libraries_for_processing(merged_df) -> pd.DataFrame:
       * is_identified
       * needs_redcap
       * redcap_is_complete
+      * default_snomed_term
     """
 
     # Initialise
@@ -206,7 +209,8 @@ def get_libraries_for_processing(merged_df) -> pd.DataFrame:
         "sample_type",
         "is_identified",
         "needs_redcap",
-        "redcap_is_complete"
+        "redcap_is_complete",
+        "default_snomed_term"
     ]
 
     # Processing libraries must meet the following criteria
@@ -273,6 +277,7 @@ def get_libraries_for_processing(merged_df) -> pd.DataFrame:
       "sample_type",
       "is_identified",
       "needs_redcap"
+      "default_snomed_term"
     ]
 
     for column_name in new_column_names:
@@ -284,14 +289,17 @@ def get_libraries_for_processing(merged_df) -> pd.DataFrame:
     ]
 
 
-def submit_library_to_pieriandx(subject_id: str, library_id: str, workflow_run_id: str, lambda_arn: str, panel_type: str, sample_type: str, is_identified: bool):
+def submit_library_to_pieriandx(subject_id: str, library_id: str, workflow_run_id: str, lambda_arn: str, panel_type: str, sample_type: str, is_identified: Union[bool | str], default_snomed_term: str):
     """
     Submit library to pieriandx
+    :param is_identified:
+    :param sample_type:
     :param subject_id:
     :param library_id:
     :param workflow_run_id:
     :param lambda_arn:
     :param panel_type:
+    :param default_snomed_term
     :return:
     """
     lambda_client: LambdaClient = get_boto3_lambda_client()
@@ -302,7 +310,8 @@ def submit_library_to_pieriandx(subject_id: str, library_id: str, workflow_run_i
             "ica_workflow_run_id": workflow_run_id,
             "panel_type": panel_type,
             "sample_type": sample_type,
-            "is_identified": is_identified
+            "is_identified": is_identified,
+            "disease_name": default_snomed_term
     }
 
     logger.info(f"Launching lambda function {lambda_arn} with the following payload {json.dumps(lambda_payload)}")
@@ -361,6 +370,7 @@ def submit_libraries_to_pieriandx(processing_df: pd.DataFrame) -> pd.DataFrame:
       * is_identified
       * needs_redcap
       * redcap_is_complete
+      * default_snomed_term
     :return:
       A pandas dataframe with the following columns
       * subject_id
@@ -411,7 +421,8 @@ def submit_libraries_to_pieriandx(processing_df: pd.DataFrame) -> pd.DataFrame:
                 lambda_arn=row.submission_arn,
                 panel_type=row.panel,
                 sample_type=row.sample_type,
-                is_identified=row.is_identified
+                is_identified=row.is_identified,
+                default_snomed_term=row.default_snomed_term
             )
         except ValueError:
             pass
@@ -477,6 +488,8 @@ def append_to_cttso_lims(merged_df: pd.DataFrame, cttso_lims_df: pd.DataFrame, e
       * pieriandx_case_creation_date
       * pieriandx_case_identified
       * pieriandx_assignee
+      * pieriandx_disease_code
+      * pieriandx_disease_name
       * pieriandx_panel_type
       * pieriandx_sample_type
       * pieriandx_workflow_id
@@ -649,6 +662,8 @@ def get_pieriandx_incomplete_job_df_from_cttso_lims_df(cttso_lims_df: pd.DataFra
         * pieriandx_case_accession_number
         * pieriandx_case_creation_date
         * pieriandx_case_identified
+        * pieriandx_disease_code
+        * pieriandx_disease_name
         * pieriandx_panel_type
         * pieriandx_sample_type
         * pieriandx_workflow_id
@@ -681,6 +696,8 @@ def get_pieriandx_incomplete_job_df_from_cttso_lims_df(cttso_lims_df: pd.DataFra
         * pieriandx_case_accession_number
         * pieriandx_case_creation_date
         * pieriandx_case_identified
+        * pieriandx_disease_code
+        * pieriandx_disease_name
         * pieriandx_panel_type
         * pieriandx_sample_type
         * pieriandx_workflow_id
@@ -805,6 +822,8 @@ def update_pieriandx_job_status_missing_df(pieriandx_job_status_missing_df, merg
       * pieriandx_case_id
       * pieriandx_case_accession_number
       * pieriandx_case_identified
+      * pieriandx_disease_code
+      * pieriandx_disease_name
       * pieriandx_panel_type
       * pieriandx_sample_type
       * pieriandx_workflow_id
@@ -1120,6 +1139,8 @@ def update_cttso_lims(update_df: pd.DataFrame, cttso_lims_df: pd.DataFrame, exce
       * pieriandx_case_creation_date
       * pieriandx_case_identified
       * pieriandx_assignee
+      * pieriandx_disease_code
+      * pieriandx_disease_name
       * pieriandx_panel_type
       * pieriandx_sample_type
       * pieriandx_workflow_id
@@ -1170,6 +1191,8 @@ def update_cttso_lims(update_df: pd.DataFrame, cttso_lims_df: pd.DataFrame, exce
             "pieriandx_case_creation_date",
             "pieriandx_assignee",
             "pieriandx_case_identified",
+            "pieriandx_disease_code",
+            "pieriandx_disease_name",
             "pieriandx_panel_type",
             "pieriandx_sample_type",
             "pieriandx_workflow_id",
@@ -1225,6 +1248,8 @@ def get_duplicate_case_ids(lims_df: pd.DataFrame) -> List:
       * pieriandx_case_accession_number
       * pieriandx_case_creation_date
       * pieriandx_assignee
+      * pieriandx_disease_code
+      * pieriandx_disease_name
       * pieriandx_panel_type
       * pieriandx_sample_type
       * pieriandx_workflow_id
@@ -1421,6 +1446,8 @@ def cleanup_duplicate_rows(merged_df: pd.DataFrame, cttso_lims_df: pd.DataFrame,
       * pieriandx_case_creation_date
       * pieriandx_case_identified
       * pieriandx_assignee
+      * pieriandx_disease_code
+      * pieriandx_disease_name
       * pieriandx_panel_type
       * pieriandx_sample_type
       * pieriandx_workflow_id
@@ -1483,6 +1510,8 @@ def cleanup_duplicate_rows(merged_df: pd.DataFrame, cttso_lims_df: pd.DataFrame,
         * pieriandx_case_creation_date
         * pieriandx_case_identified
         * pieriandx_assignee
+        * pieriandx_disease_code
+        * pieriandx_disease_name
         * pieriandx_panel_type
         * pieriandx_sample_type
         * pieriandx_workflow_id
@@ -1659,6 +1688,8 @@ def bind_pieriandx_case_submission_time_to_merged_df(merged_df: pd.DataFrame, ct
       * pieriandx_case_creation_date
       * pieriandx_case_identified
       * pieriandx_assignee
+      * pieriandx_disease_code
+      * pieriandx_disease_name
       * pieriandx_panel_type
       * pieriandx_sample_type
       * pieriandx_workflow_id
@@ -1690,6 +1721,8 @@ def bind_pieriandx_case_submission_time_to_merged_df(merged_df: pd.DataFrame, ct
       * pieriandx_case_accession_number
       * pieriandx_case_creation_date
       * pieriandx_assignee
+      * pieriandx_disease_code
+      * pieriandx_disease_name
       * pieriandx_panel_type
       * pieriandx_sample_type
       * pieriandx_workflow_id
@@ -1936,6 +1969,8 @@ def drop_to_be_deleted_cases(merged_df: pd.DataFrame, cttso_lims_df: pd.DataFram
         * pieriandx_case_creation_date
         * pieriandx_case_identified
         * pieriandx_assignee
+        * pieriandx_disease_code
+        * pieriandx_disease_name
         * pieriandx_panel_type
         * pieriandx_sample_type
         * pieriandx_workflow_id
@@ -1955,7 +1990,8 @@ def drop_to_be_deleted_cases(merged_df: pd.DataFrame, cttso_lims_df: pd.DataFram
         "pieriandx_assignee == 'ToBeDeleted' or "
         "( "
         "  pieriandx_case_id not in @existing_pieriandx_cases and "
-        "  not pieriandx_case_id.isnull()"
+        "  not pieriandx_case_id.isnull() and "
+        "  not pieriandx_case_id == 'pending'"
         ")",
         engine="python"
     )

diff --git a/deploy/cttso-ica-to-pieriandx-cdk/lambdas/layers/lambda_utils/globals.py b/deploy/cttso-ica-to-pieriandx-cdk/lambdas/layers/lambda_utils/globals.py
@@ -97,7 +97,6 @@ class SampleType(Enum):
     "sample_type": "validation",
     "panel_type": PanelType.MAIN,
     "indication": "NA",
-    "disease_id": 285645000,
     "disease_name": "Disseminated malignancy of unknown primary",
     "is_identified": True,
     "requesting_physicians_first_name": "Sean",
@@ -175,7 +174,7 @@ class SampleType(Enum):
 
 EXPECTED_ATTRIBUTES = [
     "sample_type",
-    "disease_id",
+    "disease_name",
     "indication",
     "accession_number",
     "external_specimen_id",

diff --git a/deploy/cttso-ica-to-pieriandx-cdk/lambdas/layers/lambda_utils/gspread_helpers.py b/deploy/cttso-ica-to-pieriandx-cdk/lambdas/layers/lambda_utils/gspread_helpers.py
@@ -222,6 +222,8 @@ def get_cttso_lims() -> (pd.DataFrame, pd.DataFrame):
         * pieriandx_case_creation_date
         * pieriandx_case_identified
         * pieriandx_assignee
+        * pieriandx_disease_code
+        * pieriandx_disease_name
         * pieriandx_panel_type
         * pieriandx_sample_type
         * pieriandx_workflow_id
@@ -299,6 +301,8 @@ def get_deleted_lims_df() -> (pd.DataFrame, pd.DataFrame):
         * pieriandx_case_creation_date
         * pieriandx_case_identified
         * pieriandx_assignee
+        * pieriandx_disease_code
+        * pieriandx_disease_name
         * pieriandx_panel_type
         * pieriandx_sample_type
         * pieriandx_workflow_id

diff --git a/deploy/cttso-ica-to-pieriandx-cdk/lambdas/layers/lambda_utils/pieriandx_helpers.py b/deploy/cttso-ica-to-pieriandx-cdk/lambdas/layers/lambda_utils/pieriandx_helpers.py
@@ -397,6 +397,8 @@ def get_pieriandx_status_for_missing_sample(case_id: str) -> pd.Series:
       * pieriandx_case_id
       * pieriandx_case_accession_number
       * pieriandx_case_identified
+      * pieriandx_disease_code
+      * pieriandx_disease_name
       * pieriandx_panel_type
       * pieriandx_workflow_id
       * pieriandx_workflow_status
@@ -437,6 +439,8 @@ def get_pieriandx_status_for_missing_sample(case_id: str) -> pd.Series:
             "pieriandx_case_id": case_id,
             "pieriandx_case_accession_number": response.get("specimens")[0].get("accessionNumber"),
             "pieriandx_case_identified": response.get("identified", False),
+            "pieriandx_disease_code": response.get("disease").get("code"),
+            "pieriandx_disease_name": response.get("disease").get("name"),
             "pieriandx_panel_type": PanelType(response.get("panelName")).name,
             "pieriandx_sample_type": SampleType(response.get("sampleType")).name,
             "pieriandx_workflow_id": pd.NA,