Skip to content
This repository has been archived by the owner on Dec 6, 2024. It is now read-only.

Added project name / owner mapping json #141

Merged
merged 1 commit into from
Oct 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 110 additions & 4 deletions deploy/cttso-ica-to-pieriandx-cdk/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,13 @@ new_headers = [
"in_portal",
"in_redcap",
"in_pieriandx",
"glims_is_validation",
"glims_is_research",
"glims_project_owner",
"glims_project_name",
"glims_panel",
"glims_sample_type",
"glims_is_identified",
"glims_default_snomed_term",
"glims_needs_redcap",
"redcap_sample_type",
"redcap_is_complete",
"portal_wfr_id",
Expand All @@ -58,11 +63,12 @@ new_headers = [
"pieriandx_case_accession_number",
"pieriandx_case_creation_date",
"pieriandx_case_identified",
"pieriandx_assignee",
"pieriandx_panel_type",
"pieriandx_sample_type",
"pieriandx_workflow_id",
"pieriandx_workflow_status",
"pieriandx_report_status",
"pieriandx_report_status"
]

headers_df = pd.DataFrame(columns=new_headers)
Expand Down Expand Up @@ -253,7 +259,7 @@ Now change to the deployment directory (the directory this readme is in)
cd deploy/cttso-ica-to-pieriandx-cdk
```

### Wake up lamdas!
### Wake up lambdas!

Before we launch any payloads, let's ensure that the lambda (and any downstream lambdas)
are active.
Expand Down Expand Up @@ -282,6 +288,106 @@ Find the workflow with the subject id and library id of interest in the workflow
Use the Google LIMS page to check if you're sample is a validation sample (ProjectName field is either _control_ or _validation_).
Validation samples do not go through the subpanel pipeline, clinical samples go through the subpanel pipeline.

We use the following JSON logic to determine the pathway for each pieriandx sample based on it's project owner

This file can be found in `project-name-to-pieriandx-mapping.json`.

The mapping can be updated with the script `update_project_name_mapping.sh`.

This ssm parameter is NOT part of the cdk stack and MUST be updated using the script above.

```json
[
{
"project_owner": "VCCC",
"project_name": "PO",
"panel": "subpanel",
"sample_type": "patient_care_sample",
"is_identified": "identified",
"default_snomed_term":null
},
{
"project_owner": "Grimmond",
"project_name": "COUMN",
"panel": "subpanel",
"sample_type": "patient_care_sample",
"is_identified": "identified",
"default_snomed_term": null
},
{
"project_owner": "Tothill",
"project_name": "CUP",
"panel": "main",
"sample_type": "patient_care_sample",
"is_identified": "identified",
"default_snomed_term": "Disseminated malignancy of unknown primary"
},
{
"project_owner": "Tothill",
"project_name": "PPGL",
"panel": "main",
"sample_type": "patient_care_sample",
"is_identified": "identified",
"default_snomed_term": null
},
{
"project_owner": "TJohn",
"project_name": "MESO",
"panel": "subpanel",
"sample_type": "patient_care_sample",
"is_identified": "identified",
"default_snomed_term": null
},
{
"project_owner": "TJohn",
"project_name": "OCEANiC",
"panel": "subpanel",
"sample_type": "patient_care_sample",
"is_identified": "deidentified",
"default_snomed_term": null
},
{
"project_owner": "*",
"project_name": "SOLACE2",
"panel": "main",
"sample_type": "patient_care_sample",
"is_identified": "deidentified",
"default_snomed_term": "Neoplastic disease"
},
{
"project_owner": "SLuen",
"project_name": "IMPARP",
"panel": "main",
"sample_type": "patient_care_sample",
"is_identified": "deidentified",
"default_snomed_term": "Neoplastic disease"
},
{
"project_owner": "UMCCR",
"project_name": "Control",
"panel": "main",
"sample_type": "validation",
"is_identified": "deidentified",
"default_snomed_term": "Neoplastic disease"
},
{
"project_owner": "UMCCR",
"project_name": "QAP",
"panel": "subpanel",
"sample_type": "patient_care_sample",
"is_identified": "identified",
"default_snomed_term": null
},
{
"project_owner": "*",
"project_name": "*",
"panel": "main",
"sample_type": "patient_care_sample",
"is_identified": "deidentified",
"default_snomed_term": "Neoplastic disease"
}
]
```

### Creating the input payloads file

Expand Down
3 changes: 3 additions & 0 deletions deploy/cttso-ica-to-pieriandx-cdk/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,6 @@ export const SSM_TOKEN_REFRESH_LAMBDA_FUNCTION_ARN_VALUE: string = "token-refres

// Output things
export const SSM_LAMBDA_FUNCTION_ARN_VALUE: string = "cttso-ica-to-pieriandx-lambda-function"

// Project Owner mapping path
export const SSM_PROJECT_NAME_TO_PIERIANDX_CONFIG_SSM_PATH: string = "cttso-lims-project-name-to-pieriandx-mapping"
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,9 @@ def lambda_handler(event, context):
"library_id": "L1234567",
"case_accession_number": "SBJID_LIBID_123",
"ica_workflow_run_id": "wfr.123abc",
"panel_type": "main"
"panel_type": "main",
"sample_type": "validation",
"is_identified": False
}
"""

Expand Down Expand Up @@ -76,17 +78,30 @@ def lambda_handler(event, context):
f"for subject id '{subject_id}' / library id '{library_id}'")
raise ValueError

# Get Panel Type (or get default if none
panel_type: str
if (panel_type := event.get("panel_type", None)) is None:
panel_type = VALIDATION_DEFAULTS["panel_type"]

# Get is identified
sample_type: str
if (sample_type := event.get("sample_type", None)) is None:
sample_type = VALIDATION_DEFAULTS["sample_type"]

# Get is identified
is_identified: str
if (is_identified := event.get("is_identified", None)) is None:
is_identified = VALIDATION_DEFAULTS["is_identified"]

# Update sample_df with validation defaults
sample_df["sample_type"] = VALIDATION_DEFAULTS["sample_type"]
sample_df["sample_type"] = sample_type
sample_df["panel_type"] = panel_type
sample_df["is_identified"] = is_identified
sample_df["indication"] = VALIDATION_DEFAULTS["indication"]
sample_df["disease_id"] = VALIDATION_DEFAULTS["disease_id"]
sample_df["disease_name"] = VALIDATION_DEFAULTS["disease_name"]
sample_df["is_identified"] = VALIDATION_DEFAULTS["is_identified"]
sample_df["requesting_physicians_first_name"] = VALIDATION_DEFAULTS["requesting_physicians_first_name"]
sample_df["requesting_physicians_last_name"] = VALIDATION_DEFAULTS["requesting_physicians_last_name"]
sample_df["first_name"] = VALIDATION_DEFAULTS["first_name"]
sample_df["last_name"] = VALIDATION_DEFAULTS["last_name"]
sample_df["date_of_birth"] = VALIDATION_DEFAULTS["date_of_birth"]
sample_df["specimen_type"] = VALIDATION_DEFAULTS["specimen_type"]
sample_df["date_accessioned"] = VALIDATION_DEFAULTS["date_accessioned"]
sample_df["date_collected"] = VALIDATION_DEFAULTS["date_collected"]
Expand Down Expand Up @@ -135,7 +150,6 @@ def lambda_handler(event, context):
sample_df = sample_df.rename(
columns={
"external_sample_id": "external_specimen_id",
"external_subject_id": "mrn"
}
)

Expand All @@ -148,6 +162,25 @@ def lambda_handler(event, context):
axis="columns"
)

# For identified - we rename external subject id as the medical record number
if all(sample_df["is_identified"]):
sample_df["first_name"] = VALIDATION_DEFAULTS["first_name"]
sample_df["last_name"] = VALIDATION_DEFAULTS["last_name"]
sample_df["date_of_birth"] = VALIDATION_DEFAULTS["date_of_birth"]
sample_df = sample_df.rename(
columns={
"external_subject_id": "mrn"
}
)
# For deidentified - we rename the external subject id as the study subject identifier
else:
sample_df["study_identifier"] = sample_df["project_name"]
sample_df = sample_df.rename(
columns={
"external_subject_id": "study_subject_identifier"
}
)

# Assert expected values exist
logger.info("Check we have all of the expected information")
for expected_column in EXPECTED_ATTRIBUTES:
Expand All @@ -158,10 +191,6 @@ def lambda_handler(event, context):
)
raise ValueError

if (panel_type := event.get("panel_type", None)) is None:
panel_type = VALIDATION_DEFAULTS["panel_type"].name.lower()
sample_df["panel_type"] = panel_type

# Launch batch lambda function
accession_json: Dict = sample_df.to_dict(orient="records")[0]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ def merge_clinical_redcap_and_portal_data(redcap_df: pd.DataFrame, portal_df: pd
Whilst the portal dataframe contains the following columns:
* subject_id
* library_id
* project_name
* external_sample_id
* external_subject_id
:param redcap_df:
Expand Down Expand Up @@ -109,7 +110,9 @@ def lambda_handler(event, context):
"case_accession_number": "SBJID_LIBID_123",
"ica_workflow_run_id": "wfr.123abc",
"allow_missing_redcap_entry": false,
"panel_type": "main"
"panel_type": "main",
"sample_type": "patient_care_sample",
"is_identified": true,
}
"""

Expand Down Expand Up @@ -204,6 +207,17 @@ def lambda_handler(event, context):
if (panel_type := event.get("panel_type", None)) is None:
panel_type = CLINICAL_DEFAULTS["panel_type"].name.lower()

if (sample_type := event.get("sample_type", None)) is None:
sample_type = CLINICAL_DEFAULTS["sample_type"].name.lower()

if (is_identified := event.get("is_identified", None)) is None:
is_identified = CLINICAL_DEFAULTS["is_identified"].name.lower()

# Set panel type (if not null)
merged_df["panel_type"] = panel_type
merged_df["sample_type"] = sample_type
merged_df["is_identified"] = is_identified

# Check length
if merged_df.shape[0] == 0:
logger.error("PierianDx metadata was not 'Complete', exiting")
Expand Down Expand Up @@ -236,7 +250,6 @@ def lambda_handler(event, context):

# Set defaults
merged_df["specimen_type"] = CLINICAL_DEFAULTS["specimen_type"]
merged_df["is_identified"] = CLINICAL_DEFAULTS["is_identified"]
merged_df["indication"] = CLINICAL_DEFAULTS["indication"]
merged_df["hospital_number"] = CLINICAL_DEFAULTS["hospital_number"]
merged_df["accession_number"] = case_accession_number
Expand All @@ -252,8 +265,7 @@ def lambda_handler(event, context):
logger.info("Rename external subject and external sample columns")
merged_df = merged_df.rename(
columns={
"external_sample_id": "external_specimen_id",
"external_subject_id": "mrn"
"external_sample_id": "external_specimen_id"
}
)

Expand All @@ -268,18 +280,32 @@ def lambda_handler(event, context):
raise ValueError

# Step 7a - make up the 'identified' values (date_of_birth / first_name / last_name)
merged_df["date_of_birth"] = datetime_obj_to_utc_isoformat(CLINICAL_DEFAULTS["date_of_birth"])
merged_df["first_name"] = merged_df.apply(
lambda x: CLINICAL_DEFAULTS["patient_name"][x.gender.lower()].split(" ")[0],
axis="columns"
)
merged_df["last_name"] = merged_df.apply(
lambda x: CLINICAL_DEFAULTS["patient_name"][x.gender.lower()].split(" ")[-1],
axis="columns"
)
# We set all but we only have one row (as asserted in the merge df)
if all(merged_df["is_identified"]):
merged_df["date_of_birth"] = datetime_obj_to_utc_isoformat(CLINICAL_DEFAULTS["date_of_birth"])
merged_df["first_name"] = merged_df.apply(
lambda x: CLINICAL_DEFAULTS["patient_name"][x.gender.lower()].split(" ")[0],
axis="columns"
)
merged_df["last_name"] = merged_df.apply(
lambda x: CLINICAL_DEFAULTS["patient_name"][x.gender.lower()].split(" ")[-1],
axis="columns"
)
merged_df = merged_df.rename(
columns={
"external_subject_id": "mrn"
}
)
# Step 7b - for deidentified samples, use study_identified and study_subject_identifier
else:
merged_df["study_identifier"] = merged_df["project_name"]
merged_df = merged_df.rename(
columns={
"external_subject_id": "study_subject_identifier"
}
)

# Set panel type
merged_df["panel_type"] = panel_type
# Set is_identified

# Step 7 - Launch batch lambda function
accession_json: Dict = merged_df.to_dict(orient="records")[0]
Expand Down
Loading
Loading