Added project name / owner mapping json

* Update the pieriandx case deployment lambdas to accept new metadata * Collect glims info from limsrow instead * Update function docs to contain correct columns * Resolves #131 / #132 / #134 / #135
umccr · Oct 17, 2023 · 0e1a9d8 · 0e1a9d8
1 parent 0acf166
commit 0e1a9d8
Show file tree

Hide file tree

Showing 13 changed files with 1,121 additions and 262 deletions.
diff --git a/deploy/cttso-ica-to-pieriandx-cdk/README.md b/deploy/cttso-ica-to-pieriandx-cdk/README.md
@@ -44,8 +44,13 @@ new_headers = [
     "in_portal",
     "in_redcap",
     "in_pieriandx",
-    "glims_is_validation",
-    "glims_is_research",
+    "glims_project_owner",
+    "glims_project_name",
+    "glims_panel",
+    "glims_sample_type",
+    "glims_is_identified",
+    "glims_default_snomed_term",
+    "glims_needs_redcap",
     "redcap_sample_type",
     "redcap_is_complete",
     "portal_wfr_id",
@@ -58,11 +63,12 @@ new_headers = [
     "pieriandx_case_accession_number",
     "pieriandx_case_creation_date",
     "pieriandx_case_identified",
+    "pieriandx_assignee",
     "pieriandx_panel_type",
     "pieriandx_sample_type",
     "pieriandx_workflow_id",
     "pieriandx_workflow_status",
-    "pieriandx_report_status",
+    "pieriandx_report_status"
 ]
 
 headers_df = pd.DataFrame(columns=new_headers)
@@ -253,7 +259,7 @@ Now change to the deployment directory (the directory this readme is in)
 cd deploy/cttso-ica-to-pieriandx-cdk
 ```
 
-### Wake up lamdas!
+### Wake up lambdas!
 
 Before we launch any payloads, let's ensure that the lambda (and any downstream lambdas)
 are active.
@@ -282,6 +288,106 @@ Find the workflow with the subject id and library id of interest in the workflow
 Use the Google LIMS page to check if you're sample is a validation sample (ProjectName field is either _control_ or _validation_).  
 Validation samples do not go through the subpanel pipeline, clinical samples go through the subpanel pipeline.
 
+We use the following JSON logic to determine the pathway for each pieriandx sample based on it's project owner 
+
+This file can be found in `project-name-to-pieriandx-mapping.json`.  
+
+The mapping can be updated with the script `update_project_name_mapping.sh`.  
+
+This ssm parameter is NOT part of the cdk stack and MUST be updated using the script above.  
+
+```json
+[
+  {
+    "project_owner": "VCCC",
+    "project_name": "PO",
+    "panel": "subpanel",
+    "sample_type": "patient_care_sample",
+    "is_identified": "identified",
+    "default_snomed_term":null
+  },
+  {
+    "project_owner": "Grimmond",
+    "project_name": "COUMN",
+    "panel": "subpanel",
+    "sample_type": "patient_care_sample",
+    "is_identified": "identified",
+    "default_snomed_term": null
+  },
+  {
+    "project_owner": "Tothill",
+    "project_name": "CUP",
+    "panel": "main",
+    "sample_type": "patient_care_sample",
+    "is_identified": "identified",
+    "default_snomed_term": "Disseminated malignancy of unknown primary"
+  },
+  {
+    "project_owner": "Tothill",
+    "project_name": "PPGL",
+    "panel": "main",
+    "sample_type": "patient_care_sample",
+    "is_identified": "identified",
+    "default_snomed_term": null
+  },
+  {
+    "project_owner": "TJohn",
+    "project_name": "MESO",
+    "panel": "subpanel",
+    "sample_type": "patient_care_sample",
+    "is_identified": "identified",
+    "default_snomed_term": null
+  },
+  {
+    "project_owner": "TJohn",
+    "project_name": "OCEANiC",
+    "panel": "subpanel",
+    "sample_type": "patient_care_sample",
+    "is_identified": "deidentified",
+    "default_snomed_term": null
+  },
+  {
+    "project_owner": "*",
+    "project_name": "SOLACE2",
+    "panel": "main",
+    "sample_type": "patient_care_sample",
+    "is_identified": "deidentified",
+    "default_snomed_term": "Neoplastic disease"
+  },
+  {
+    "project_owner": "SLuen",
+    "project_name": "IMPARP",
+    "panel": "main",
+    "sample_type": "patient_care_sample",
+    "is_identified": "deidentified",
+    "default_snomed_term": "Neoplastic disease"
+  },
+  {
+    "project_owner": "UMCCR",
+    "project_name": "Control",
+    "panel": "main",
+    "sample_type": "validation",
+    "is_identified": "deidentified",
+    "default_snomed_term": "Neoplastic disease"
+  },
+  {
+    "project_owner": "UMCCR",
+    "project_name": "QAP",
+    "panel": "subpanel",
+    "sample_type": "patient_care_sample",
+    "is_identified": "identified",
+    "default_snomed_term": null
+  },
+  {
+    "project_owner": "*",
+    "project_name": "*",
+    "panel": "main",
+    "sample_type": "patient_care_sample",
+    "is_identified": "deidentified",
+    "default_snomed_term": "Neoplastic disease"
+  }
+]
+```
 
 ### Creating the input payloads file
 

diff --git a/deploy/cttso-ica-to-pieriandx-cdk/constants.ts b/deploy/cttso-ica-to-pieriandx-cdk/constants.ts
@@ -44,3 +44,6 @@ export const SSM_TOKEN_REFRESH_LAMBDA_FUNCTION_ARN_VALUE: string = "token-refres
 
 // Output things
 export const SSM_LAMBDA_FUNCTION_ARN_VALUE: string = "cttso-ica-to-pieriandx-lambda-function"
+
+// Project Owner mapping path
+export const SSM_PROJECT_NAME_TO_PIERIANDX_CONFIG_SSM_PATH: string = "cttso-lims-project-name-to-pieriandx-mapping"
diff --git a/...mbdas/get_metadata_from_portal_and_defaults_and_launch_validation_workflow/lambda_code.py b/...mbdas/get_metadata_from_portal_and_defaults_and_launch_validation_workflow/lambda_code.py
@@ -44,7 +44,9 @@ def lambda_handler(event, context):
         "library_id": "L1234567",
         "case_accession_number": "SBJID_LIBID_123",
         "ica_workflow_run_id": "wfr.123abc",
-        "panel_type": "main"
+        "panel_type": "main",
+        "sample_type": "validation",
+        "is_identified": False
     }
     """
 
@@ -76,17 +78,30 @@ def lambda_handler(event, context):
                      f"for subject id '{subject_id}' / library id '{library_id}'")
         raise ValueError
 
+    # Get Panel Type (or get default if none
+    panel_type: str
+    if (panel_type := event.get("panel_type", None)) is None:
+        panel_type = VALIDATION_DEFAULTS["panel_type"]
+
+    # Get is identified
+    sample_type: str
+    if (sample_type := event.get("sample_type", None)) is None:
+        sample_type = VALIDATION_DEFAULTS["sample_type"]
+
+    # Get is identified
+    is_identified: str
+    if (is_identified := event.get("is_identified", None)) is None:
+        is_identified = VALIDATION_DEFAULTS["is_identified"]
+
     # Update sample_df with validation defaults
-    sample_df["sample_type"] = VALIDATION_DEFAULTS["sample_type"]
+    sample_df["sample_type"] = sample_type
+    sample_df["panel_type"] = panel_type
+    sample_df["is_identified"] = is_identified
     sample_df["indication"] = VALIDATION_DEFAULTS["indication"]
     sample_df["disease_id"] = VALIDATION_DEFAULTS["disease_id"]
     sample_df["disease_name"] = VALIDATION_DEFAULTS["disease_name"]
-    sample_df["is_identified"] = VALIDATION_DEFAULTS["is_identified"]
     sample_df["requesting_physicians_first_name"] = VALIDATION_DEFAULTS["requesting_physicians_first_name"]
     sample_df["requesting_physicians_last_name"] = VALIDATION_DEFAULTS["requesting_physicians_last_name"]
-    sample_df["first_name"] = VALIDATION_DEFAULTS["first_name"]
-    sample_df["last_name"] = VALIDATION_DEFAULTS["last_name"]
-    sample_df["date_of_birth"] = VALIDATION_DEFAULTS["date_of_birth"]
     sample_df["specimen_type"] = VALIDATION_DEFAULTS["specimen_type"]
     sample_df["date_accessioned"] = VALIDATION_DEFAULTS["date_accessioned"]
     sample_df["date_collected"] = VALIDATION_DEFAULTS["date_collected"]
@@ -135,7 +150,6 @@ def lambda_handler(event, context):
     sample_df = sample_df.rename(
         columns={
             "external_sample_id": "external_specimen_id",
-            "external_subject_id": "mrn"
         }
     )
 
@@ -148,6 +162,25 @@ def lambda_handler(event, context):
         axis="columns"
     )
 
+    # For identified - we rename external subject id as the medical record number
+    if all(sample_df["is_identified"]):
+        sample_df["first_name"] = VALIDATION_DEFAULTS["first_name"]
+        sample_df["last_name"] = VALIDATION_DEFAULTS["last_name"]
+        sample_df["date_of_birth"] = VALIDATION_DEFAULTS["date_of_birth"]
+        sample_df = sample_df.rename(
+            columns={
+                "external_subject_id": "mrn"
+            }
+        )
+    # For deidentified - we rename the external subject id as the study subject identifier
+    else:
+        sample_df["study_identifier"] = sample_df["project_name"]
+        sample_df = sample_df.rename(
+            columns={
+                "external_subject_id": "study_subject_identifier"
+            }
+        )
+
     # Assert expected values exist
     logger.info("Check we have all of the expected information")
     for expected_column in EXPECTED_ATTRIBUTES:
@@ -158,10 +191,6 @@ def lambda_handler(event, context):
             )
             raise ValueError
 
-    if (panel_type := event.get("panel_type", None)) is None:
-        panel_type = VALIDATION_DEFAULTS["panel_type"].name.lower()
-    sample_df["panel_type"] = panel_type
-
     # Launch batch lambda function
     accession_json: Dict = sample_df.to_dict(orient="records")[0]
 

diff --git a/...k/lambdas/get_metadata_from_portal_and_redcap_and_launch_clinical_workflow/lambda_code.py b/...k/lambdas/get_metadata_from_portal_and_redcap_and_launch_clinical_workflow/lambda_code.py
@@ -58,6 +58,7 @@ def merge_clinical_redcap_and_portal_data(redcap_df: pd.DataFrame, portal_df: pd
     Whilst the portal dataframe contains the following columns:
       * subject_id
       * library_id
+      * project_name
       * external_sample_id
       * external_subject_id
     :param redcap_df:
@@ -109,7 +110,9 @@ def lambda_handler(event, context):
         "case_accession_number": "SBJID_LIBID_123",
         "ica_workflow_run_id": "wfr.123abc",
         "allow_missing_redcap_entry": false,
-        "panel_type": "main"
+        "panel_type": "main",
+        "sample_type": "patient_care_sample",
+        "is_identified": true,
     }
     """
 
@@ -204,6 +207,17 @@ def lambda_handler(event, context):
     if (panel_type := event.get("panel_type", None)) is None:
         panel_type = CLINICAL_DEFAULTS["panel_type"].name.lower()
 
+    if (sample_type := event.get("sample_type", None)) is None:
+        sample_type = CLINICAL_DEFAULTS["sample_type"].name.lower()
+
+    if (is_identified := event.get("is_identified", None)) is None:
+        is_identified = CLINICAL_DEFAULTS["is_identified"].name.lower()
+
+    # Set panel type (if not null)
+    merged_df["panel_type"] = panel_type
+    merged_df["sample_type"] = sample_type
+    merged_df["is_identified"] = is_identified
+
     # Check length
     if merged_df.shape[0] == 0:
         logger.error("PierianDx metadata was not 'Complete', exiting")
@@ -236,7 +250,6 @@ def lambda_handler(event, context):
 
     # Set defaults
     merged_df["specimen_type"] = CLINICAL_DEFAULTS["specimen_type"]
-    merged_df["is_identified"] = CLINICAL_DEFAULTS["is_identified"]
     merged_df["indication"] = CLINICAL_DEFAULTS["indication"]
     merged_df["hospital_number"] = CLINICAL_DEFAULTS["hospital_number"]
     merged_df["accession_number"] = case_accession_number
@@ -252,8 +265,7 @@ def lambda_handler(event, context):
     logger.info("Rename external subject and external sample columns")
     merged_df = merged_df.rename(
         columns={
-            "external_sample_id": "external_specimen_id",
-            "external_subject_id": "mrn"
+            "external_sample_id": "external_specimen_id"
         }
     )
 
@@ -268,18 +280,32 @@ def lambda_handler(event, context):
             raise ValueError
 
     # Step 7a - make up the 'identified' values (date_of_birth / first_name / last_name)
-    merged_df["date_of_birth"] = datetime_obj_to_utc_isoformat(CLINICAL_DEFAULTS["date_of_birth"])
-    merged_df["first_name"] = merged_df.apply(
-        lambda x: CLINICAL_DEFAULTS["patient_name"][x.gender.lower()].split(" ")[0],
-        axis="columns"
-    )
-    merged_df["last_name"] = merged_df.apply(
-        lambda x: CLINICAL_DEFAULTS["patient_name"][x.gender.lower()].split(" ")[-1],
-        axis="columns"
-    )
+    # We set all but we only have one row (as asserted in the merge df)
+    if all(merged_df["is_identified"]):
+        merged_df["date_of_birth"] = datetime_obj_to_utc_isoformat(CLINICAL_DEFAULTS["date_of_birth"])
+        merged_df["first_name"] = merged_df.apply(
+            lambda x: CLINICAL_DEFAULTS["patient_name"][x.gender.lower()].split(" ")[0],
+            axis="columns"
+        )
+        merged_df["last_name"] = merged_df.apply(
+            lambda x: CLINICAL_DEFAULTS["patient_name"][x.gender.lower()].split(" ")[-1],
+            axis="columns"
+        )
+        merged_df = merged_df.rename(
+            columns={
+                "external_subject_id": "mrn"
+            }
+        )
+    # Step 7b - for deidentified samples, use study_identified and study_subject_identifier
+    else:
+        merged_df["study_identifier"] = merged_df["project_name"]
+        merged_df = merged_df.rename(
+            columns={
+                "external_subject_id": "study_subject_identifier"
+            }
+        )
 
-    # Set panel type
-    merged_df["panel_type"] = panel_type
+    # Set is_identified
 
     # Step 7 - Launch batch lambda function
     accession_json: Dict = merged_df.to_dict(orient="records")[0]