Skip to content
This repository has been archived by the owner on Dec 6, 2024. It is now read-only.

Commit

Permalink
Added project name / owner mapping json
Browse files Browse the repository at this point in the history
* Update the pieriandx case deployment lambdas to accept new metadata
* Collect glims info from limsrow instead
* Update function docs to contain correct columns

* Resolves #131 / #132 / #134 / #135
  • Loading branch information
alexiswl committed Oct 17, 2023
1 parent 0acf166 commit 0e1a9d8
Show file tree
Hide file tree
Showing 13 changed files with 1,121 additions and 262 deletions.
114 changes: 110 additions & 4 deletions deploy/cttso-ica-to-pieriandx-cdk/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,13 @@ new_headers = [
"in_portal",
"in_redcap",
"in_pieriandx",
"glims_is_validation",
"glims_is_research",
"glims_project_owner",
"glims_project_name",
"glims_panel",
"glims_sample_type",
"glims_is_identified",
"glims_default_snomed_term",
"glims_needs_redcap",
"redcap_sample_type",
"redcap_is_complete",
"portal_wfr_id",
Expand All @@ -58,11 +63,12 @@ new_headers = [
"pieriandx_case_accession_number",
"pieriandx_case_creation_date",
"pieriandx_case_identified",
"pieriandx_assignee",
"pieriandx_panel_type",
"pieriandx_sample_type",
"pieriandx_workflow_id",
"pieriandx_workflow_status",
"pieriandx_report_status",
"pieriandx_report_status"
]

headers_df = pd.DataFrame(columns=new_headers)
Expand Down Expand Up @@ -253,7 +259,7 @@ Now change to the deployment directory (the directory this readme is in)
cd deploy/cttso-ica-to-pieriandx-cdk
```

### Wake up lamdas!
### Wake up lambdas!

Before we launch any payloads, let's ensure that the lambda (and any downstream lambdas)
are active.
Expand Down Expand Up @@ -282,6 +288,106 @@ Find the workflow with the subject id and library id of interest in the workflow
Use the Google LIMS page to check if you're sample is a validation sample (ProjectName field is either _control_ or _validation_).
Validation samples do not go through the subpanel pipeline, clinical samples go through the subpanel pipeline.

We use the following JSON logic to determine the pathway for each pieriandx sample based on it's project owner

This file can be found in `project-name-to-pieriandx-mapping.json`.

The mapping can be updated with the script `update_project_name_mapping.sh`.

This ssm parameter is NOT part of the cdk stack and MUST be updated using the script above.

```json
[
{
"project_owner": "VCCC",
"project_name": "PO",
"panel": "subpanel",
"sample_type": "patient_care_sample",
"is_identified": "identified",
"default_snomed_term":null
},
{
"project_owner": "Grimmond",
"project_name": "COUMN",
"panel": "subpanel",
"sample_type": "patient_care_sample",
"is_identified": "identified",
"default_snomed_term": null
},
{
"project_owner": "Tothill",
"project_name": "CUP",
"panel": "main",
"sample_type": "patient_care_sample",
"is_identified": "identified",
"default_snomed_term": "Disseminated malignancy of unknown primary"
},
{
"project_owner": "Tothill",
"project_name": "PPGL",
"panel": "main",
"sample_type": "patient_care_sample",
"is_identified": "identified",
"default_snomed_term": null
},
{
"project_owner": "TJohn",
"project_name": "MESO",
"panel": "subpanel",
"sample_type": "patient_care_sample",
"is_identified": "identified",
"default_snomed_term": null
},
{
"project_owner": "TJohn",
"project_name": "OCEANiC",
"panel": "subpanel",
"sample_type": "patient_care_sample",
"is_identified": "deidentified",
"default_snomed_term": null
},
{
"project_owner": "*",
"project_name": "SOLACE2",
"panel": "main",
"sample_type": "patient_care_sample",
"is_identified": "deidentified",
"default_snomed_term": "Neoplastic disease"
},
{
"project_owner": "SLuen",
"project_name": "IMPARP",
"panel": "main",
"sample_type": "patient_care_sample",
"is_identified": "deidentified",
"default_snomed_term": "Neoplastic disease"
},
{
"project_owner": "UMCCR",
"project_name": "Control",
"panel": "main",
"sample_type": "validation",
"is_identified": "deidentified",
"default_snomed_term": "Neoplastic disease"
},
{
"project_owner": "UMCCR",
"project_name": "QAP",
"panel": "subpanel",
"sample_type": "patient_care_sample",
"is_identified": "identified",
"default_snomed_term": null
},
{
"project_owner": "*",
"project_name": "*",
"panel": "main",
"sample_type": "patient_care_sample",
"is_identified": "deidentified",
"default_snomed_term": "Neoplastic disease"
}
]
```

### Creating the input payloads file

Expand Down
3 changes: 3 additions & 0 deletions deploy/cttso-ica-to-pieriandx-cdk/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,6 @@ export const SSM_TOKEN_REFRESH_LAMBDA_FUNCTION_ARN_VALUE: string = "token-refres

// Output things
export const SSM_LAMBDA_FUNCTION_ARN_VALUE: string = "cttso-ica-to-pieriandx-lambda-function"

// Project Owner mapping path
export const SSM_PROJECT_NAME_TO_PIERIANDX_CONFIG_SSM_PATH: string = "cttso-lims-project-name-to-pieriandx-mapping"
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,9 @@ def lambda_handler(event, context):
"library_id": "L1234567",
"case_accession_number": "SBJID_LIBID_123",
"ica_workflow_run_id": "wfr.123abc",
"panel_type": "main"
"panel_type": "main",
"sample_type": "validation",
"is_identified": False
}
"""

Expand Down Expand Up @@ -76,17 +78,30 @@ def lambda_handler(event, context):
f"for subject id '{subject_id}' / library id '{library_id}'")
raise ValueError

# Get Panel Type (or get default if none
panel_type: str
if (panel_type := event.get("panel_type", None)) is None:
panel_type = VALIDATION_DEFAULTS["panel_type"]

# Get is identified
sample_type: str
if (sample_type := event.get("sample_type", None)) is None:
sample_type = VALIDATION_DEFAULTS["sample_type"]

# Get is identified
is_identified: str
if (is_identified := event.get("is_identified", None)) is None:
is_identified = VALIDATION_DEFAULTS["is_identified"]

# Update sample_df with validation defaults
sample_df["sample_type"] = VALIDATION_DEFAULTS["sample_type"]
sample_df["sample_type"] = sample_type
sample_df["panel_type"] = panel_type
sample_df["is_identified"] = is_identified
sample_df["indication"] = VALIDATION_DEFAULTS["indication"]
sample_df["disease_id"] = VALIDATION_DEFAULTS["disease_id"]
sample_df["disease_name"] = VALIDATION_DEFAULTS["disease_name"]
sample_df["is_identified"] = VALIDATION_DEFAULTS["is_identified"]
sample_df["requesting_physicians_first_name"] = VALIDATION_DEFAULTS["requesting_physicians_first_name"]
sample_df["requesting_physicians_last_name"] = VALIDATION_DEFAULTS["requesting_physicians_last_name"]
sample_df["first_name"] = VALIDATION_DEFAULTS["first_name"]
sample_df["last_name"] = VALIDATION_DEFAULTS["last_name"]
sample_df["date_of_birth"] = VALIDATION_DEFAULTS["date_of_birth"]
sample_df["specimen_type"] = VALIDATION_DEFAULTS["specimen_type"]
sample_df["date_accessioned"] = VALIDATION_DEFAULTS["date_accessioned"]
sample_df["date_collected"] = VALIDATION_DEFAULTS["date_collected"]
Expand Down Expand Up @@ -135,7 +150,6 @@ def lambda_handler(event, context):
sample_df = sample_df.rename(
columns={
"external_sample_id": "external_specimen_id",
"external_subject_id": "mrn"
}
)

Expand All @@ -148,6 +162,25 @@ def lambda_handler(event, context):
axis="columns"
)

# For identified - we rename external subject id as the medical record number
if all(sample_df["is_identified"]):
sample_df["first_name"] = VALIDATION_DEFAULTS["first_name"]
sample_df["last_name"] = VALIDATION_DEFAULTS["last_name"]
sample_df["date_of_birth"] = VALIDATION_DEFAULTS["date_of_birth"]
sample_df = sample_df.rename(
columns={
"external_subject_id": "mrn"
}
)
# For deidentified - we rename the external subject id as the study subject identifier
else:
sample_df["study_identifier"] = sample_df["project_name"]
sample_df = sample_df.rename(
columns={
"external_subject_id": "study_subject_identifier"
}
)

# Assert expected values exist
logger.info("Check we have all of the expected information")
for expected_column in EXPECTED_ATTRIBUTES:
Expand All @@ -158,10 +191,6 @@ def lambda_handler(event, context):
)
raise ValueError

if (panel_type := event.get("panel_type", None)) is None:
panel_type = VALIDATION_DEFAULTS["panel_type"].name.lower()
sample_df["panel_type"] = panel_type

# Launch batch lambda function
accession_json: Dict = sample_df.to_dict(orient="records")[0]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ def merge_clinical_redcap_and_portal_data(redcap_df: pd.DataFrame, portal_df: pd
Whilst the portal dataframe contains the following columns:
* subject_id
* library_id
* project_name
* external_sample_id
* external_subject_id
:param redcap_df:
Expand Down Expand Up @@ -109,7 +110,9 @@ def lambda_handler(event, context):
"case_accession_number": "SBJID_LIBID_123",
"ica_workflow_run_id": "wfr.123abc",
"allow_missing_redcap_entry": false,
"panel_type": "main"
"panel_type": "main",
"sample_type": "patient_care_sample",
"is_identified": true,
}
"""

Expand Down Expand Up @@ -204,6 +207,17 @@ def lambda_handler(event, context):
if (panel_type := event.get("panel_type", None)) is None:
panel_type = CLINICAL_DEFAULTS["panel_type"].name.lower()

if (sample_type := event.get("sample_type", None)) is None:
sample_type = CLINICAL_DEFAULTS["sample_type"].name.lower()

if (is_identified := event.get("is_identified", None)) is None:
is_identified = CLINICAL_DEFAULTS["is_identified"].name.lower()

# Set panel type (if not null)
merged_df["panel_type"] = panel_type
merged_df["sample_type"] = sample_type
merged_df["is_identified"] = is_identified

# Check length
if merged_df.shape[0] == 0:
logger.error("PierianDx metadata was not 'Complete', exiting")
Expand Down Expand Up @@ -236,7 +250,6 @@ def lambda_handler(event, context):

# Set defaults
merged_df["specimen_type"] = CLINICAL_DEFAULTS["specimen_type"]
merged_df["is_identified"] = CLINICAL_DEFAULTS["is_identified"]
merged_df["indication"] = CLINICAL_DEFAULTS["indication"]
merged_df["hospital_number"] = CLINICAL_DEFAULTS["hospital_number"]
merged_df["accession_number"] = case_accession_number
Expand All @@ -252,8 +265,7 @@ def lambda_handler(event, context):
logger.info("Rename external subject and external sample columns")
merged_df = merged_df.rename(
columns={
"external_sample_id": "external_specimen_id",
"external_subject_id": "mrn"
"external_sample_id": "external_specimen_id"
}
)

Expand All @@ -268,18 +280,32 @@ def lambda_handler(event, context):
raise ValueError

# Step 7a - make up the 'identified' values (date_of_birth / first_name / last_name)
merged_df["date_of_birth"] = datetime_obj_to_utc_isoformat(CLINICAL_DEFAULTS["date_of_birth"])
merged_df["first_name"] = merged_df.apply(
lambda x: CLINICAL_DEFAULTS["patient_name"][x.gender.lower()].split(" ")[0],
axis="columns"
)
merged_df["last_name"] = merged_df.apply(
lambda x: CLINICAL_DEFAULTS["patient_name"][x.gender.lower()].split(" ")[-1],
axis="columns"
)
# We set all but we only have one row (as asserted in the merge df)
if all(merged_df["is_identified"]):
merged_df["date_of_birth"] = datetime_obj_to_utc_isoformat(CLINICAL_DEFAULTS["date_of_birth"])
merged_df["first_name"] = merged_df.apply(
lambda x: CLINICAL_DEFAULTS["patient_name"][x.gender.lower()].split(" ")[0],
axis="columns"
)
merged_df["last_name"] = merged_df.apply(
lambda x: CLINICAL_DEFAULTS["patient_name"][x.gender.lower()].split(" ")[-1],
axis="columns"
)
merged_df = merged_df.rename(
columns={
"external_subject_id": "mrn"
}
)
# Step 7b - for deidentified samples, use study_identified and study_subject_identifier
else:
merged_df["study_identifier"] = merged_df["project_name"]
merged_df = merged_df.rename(
columns={
"external_subject_id": "study_subject_identifier"
}
)

# Set panel type
merged_df["panel_type"] = panel_type
# Set is_identified

# Step 7 - Launch batch lambda function
accession_json: Dict = merged_df.to_dict(orient="records")[0]
Expand Down
Loading

0 comments on commit 0e1a9d8

Please sign in to comment.