Skip to content
This repository has been archived by the owner on Dec 6, 2024. It is now read-only.

Commit

Permalink
Merge pull request #172 from umccr/dev
Browse files Browse the repository at this point in the history
  • Loading branch information
alexiswl authored Nov 7, 2023
2 parents fba4d3f + 54e142a commit 5cd12cc
Show file tree
Hide file tree
Showing 8 changed files with 98 additions and 27 deletions.
22 changes: 22 additions & 0 deletions deploy/cttso-ica-to-pieriandx-cdk/Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,28 @@
Changes in this log refer only to changes that make it to the 'main' branch and
are nested under deploy/cttso-ica-to-pieriandx-cdk.

## 2023-11-17

> Author: Alexis Lucattini
> Email: [[email protected]](mailto:[email protected])
### Enhancements

* Add two pieriandx columns for disease code and names (https://github.com/umccr/cttso-ica-to-pieriandx/pull/167)
* Resolves https://github.com/umccr/cttso-ica-to-pieriandx/issues/171

### Bugfixes

* Refactored validation lambda so that checks are not done before columns are renamed (https://github.com/umccr/cttso-ica-to-pieriandx/pull/168)
* Resolves https://github.com/umccr/cttso-ica-to-pieriandx/issues/166

* Don't delete pending cases from cttso lims (https://github.com/umccr/cttso-ica-to-pieriandx/pull/169)
* Resolves https://github.com/umccr/cttso-ica-to-pieriandx/issues/161

* Added snomed term to submission df (https://github.com/umccr/cttso-ica-to-pieriandx/pull/170)
* Resolves https://github.com/umccr/cttso-ica-to-pieriandx/issues/165


## 2023-10-18

> Author: Alexis Lucattini
Expand Down
2 changes: 2 additions & 0 deletions deploy/cttso-ica-to-pieriandx-cdk/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ new_headers = [
"pieriandx_case_creation_date",
"pieriandx_case_identified",
"pieriandx_assignee",
"pieriandx_disease_code",
"pieriandx_disease_name",
"pieriandx_panel_type",
"pieriandx_sample_type",
"pieriandx_workflow_id",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ def lambda_handler(event, context):
"ica_workflow_run_id": "wfr.123abc",
"panel_type": "main",
"sample_type": "validation",
"is_identified": False
"is_identified": False | "deidentified",
"disease_name": ""Disseminated malignancy of unknown primary""
}
"""

Expand Down Expand Up @@ -93,13 +94,16 @@ def lambda_handler(event, context):
if (is_identified := event.get("is_identified", None)) is None:
is_identified = VALIDATION_DEFAULTS["is_identified"]

# Check disease name
if (disease_name := event.get("disease_name", None)) is None:
disease_name = VALIDATION_DEFAULTS["disease_name"]

# Update sample_df with validation defaults
sample_df["sample_type"] = sample_type
sample_df["panel_type"] = panel_type
sample_df["is_identified"] = is_identified
sample_df["disease_name"] = disease_name
sample_df["indication"] = VALIDATION_DEFAULTS["indication"]
sample_df["disease_id"] = VALIDATION_DEFAULTS["disease_id"]
sample_df["disease_name"] = VALIDATION_DEFAULTS["disease_name"]
sample_df["requesting_physicians_first_name"] = VALIDATION_DEFAULTS["requesting_physicians_first_name"]
sample_df["requesting_physicians_last_name"] = VALIDATION_DEFAULTS["requesting_physicians_last_name"]
sample_df["specimen_type"] = VALIDATION_DEFAULTS["specimen_type"]
Expand Down Expand Up @@ -156,6 +160,22 @@ def lambda_handler(event, context):
axis="columns"
)

# Convert times to utc time and strings
for date_column in ["date_received", "date_collected", "date_of_birth"]:
sample_df[date_column] = sample_df[date_column].apply(
lambda x: datetime_obj_to_utc_isoformat(handle_date(x))
)

# Assert expected values exist
logger.info("Check we have all of the expected information")
for expected_column in EXPECTED_ATTRIBUTES:
if expected_column not in sample_df.columns.tolist():
logger.error(
f"Expected column {expected_column} but "
f"did not find it in columns {', '.join(sample_df.columns.tolist())}"
)
raise ValueError

# For identified - we rename external subject id as the medical record number
if all(sample_df["is_identified"]):
sample_df["first_name"] = VALIDATION_DEFAULTS["first_name"]
Expand All @@ -175,22 +195,6 @@ def lambda_handler(event, context):
}
)

# Convert times to utc time and strings
for date_column in ["date_received", "date_collected", "date_of_birth"]:
sample_df[date_column] = sample_df[date_column].apply(
lambda x: datetime_obj_to_utc_isoformat(handle_date(x))
)

# Assert expected values exist
logger.info("Check we have all of the expected information")
for expected_column in EXPECTED_ATTRIBUTES:
if expected_column not in sample_df.columns.tolist():
logger.error(
f"Expected column {expected_column} but "
f"did not find it in columns {', '.join(sample_df.columns.tolist())}"
)
raise ValueError

# Launch batch lambda function
accession_json: Dict = sample_df.to_dict(orient="records")[0]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def lambda_handler(event, context):
"allow_missing_redcap_entry": false,
"panel_type": "main",
"sample_type": "patient_care_sample",
"is_identified": true,
"is_identified": true, | is_identified="identified"
}
"""

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,8 @@ def get_libraries_for_processing(merged_df) -> pd.DataFrame:
* pieriandx_case_creation_date
* pieriandx_assignee
* pieriandx_case_identified
* pieriandx_disease_code
* pieriandx_disease_name
* pieriandx_panel_type
* pieriandx_sample_type
:return: A pandas dataframe with the following columns
Expand All @@ -195,6 +197,7 @@ def get_libraries_for_processing(merged_df) -> pd.DataFrame:
* is_identified
* needs_redcap
* redcap_is_complete
* default_snomed_term
"""

# Initialise
Expand All @@ -206,7 +209,8 @@ def get_libraries_for_processing(merged_df) -> pd.DataFrame:
"sample_type",
"is_identified",
"needs_redcap",
"redcap_is_complete"
"redcap_is_complete",
"default_snomed_term"
]

# Processing libraries must meet the following criteria
Expand Down Expand Up @@ -273,6 +277,7 @@ def get_libraries_for_processing(merged_df) -> pd.DataFrame:
"sample_type",
"is_identified",
"needs_redcap"
"default_snomed_term"
]

for column_name in new_column_names:
Expand All @@ -284,14 +289,17 @@ def get_libraries_for_processing(merged_df) -> pd.DataFrame:
]


def submit_library_to_pieriandx(subject_id: str, library_id: str, workflow_run_id: str, lambda_arn: str, panel_type: str, sample_type: str, is_identified: bool):
def submit_library_to_pieriandx(subject_id: str, library_id: str, workflow_run_id: str, lambda_arn: str, panel_type: str, sample_type: str, is_identified: Union[bool | str], default_snomed_term: str):
"""
Submit library to pieriandx
:param is_identified:
:param sample_type:
:param subject_id:
:param library_id:
:param workflow_run_id:
:param lambda_arn:
:param panel_type:
:param default_snomed_term
:return:
"""
lambda_client: LambdaClient = get_boto3_lambda_client()
Expand All @@ -302,7 +310,8 @@ def submit_library_to_pieriandx(subject_id: str, library_id: str, workflow_run_i
"ica_workflow_run_id": workflow_run_id,
"panel_type": panel_type,
"sample_type": sample_type,
"is_identified": is_identified
"is_identified": is_identified,
"disease_name": default_snomed_term
}

logger.info(f"Launching lambda function {lambda_arn} with the following payload {json.dumps(lambda_payload)}")
Expand Down Expand Up @@ -361,6 +370,7 @@ def submit_libraries_to_pieriandx(processing_df: pd.DataFrame) -> pd.DataFrame:
* is_identified
* needs_redcap
* redcap_is_complete
* default_snomed_term
:return:
A pandas dataframe with the following columns
* subject_id
Expand Down Expand Up @@ -411,7 +421,8 @@ def submit_libraries_to_pieriandx(processing_df: pd.DataFrame) -> pd.DataFrame:
lambda_arn=row.submission_arn,
panel_type=row.panel,
sample_type=row.sample_type,
is_identified=row.is_identified
is_identified=row.is_identified,
default_snomed_term=row.default_snomed_term
)
except ValueError:
pass
Expand Down Expand Up @@ -477,6 +488,8 @@ def append_to_cttso_lims(merged_df: pd.DataFrame, cttso_lims_df: pd.DataFrame, e
* pieriandx_case_creation_date
* pieriandx_case_identified
* pieriandx_assignee
* pieriandx_disease_code
* pieriandx_disease_name
* pieriandx_panel_type
* pieriandx_sample_type
* pieriandx_workflow_id
Expand Down Expand Up @@ -649,6 +662,8 @@ def get_pieriandx_incomplete_job_df_from_cttso_lims_df(cttso_lims_df: pd.DataFra
* pieriandx_case_accession_number
* pieriandx_case_creation_date
* pieriandx_case_identified
* pieriandx_disease_code
* pieriandx_disease_name
* pieriandx_panel_type
* pieriandx_sample_type
* pieriandx_workflow_id
Expand Down Expand Up @@ -681,6 +696,8 @@ def get_pieriandx_incomplete_job_df_from_cttso_lims_df(cttso_lims_df: pd.DataFra
* pieriandx_case_accession_number
* pieriandx_case_creation_date
* pieriandx_case_identified
* pieriandx_disease_code
* pieriandx_disease_name
* pieriandx_panel_type
* pieriandx_sample_type
* pieriandx_workflow_id
Expand Down Expand Up @@ -805,6 +822,8 @@ def update_pieriandx_job_status_missing_df(pieriandx_job_status_missing_df, merg
* pieriandx_case_id
* pieriandx_case_accession_number
* pieriandx_case_identified
* pieriandx_disease_code
* pieriandx_disease_name
* pieriandx_panel_type
* pieriandx_sample_type
* pieriandx_workflow_id
Expand Down Expand Up @@ -1120,6 +1139,8 @@ def update_cttso_lims(update_df: pd.DataFrame, cttso_lims_df: pd.DataFrame, exce
* pieriandx_case_creation_date
* pieriandx_case_identified
* pieriandx_assignee
* pieriandx_disease_code
* pieriandx_disease_name
* pieriandx_panel_type
* pieriandx_sample_type
* pieriandx_workflow_id
Expand Down Expand Up @@ -1170,6 +1191,8 @@ def update_cttso_lims(update_df: pd.DataFrame, cttso_lims_df: pd.DataFrame, exce
"pieriandx_case_creation_date",
"pieriandx_assignee",
"pieriandx_case_identified",
"pieriandx_disease_code",
"pieriandx_disease_name",
"pieriandx_panel_type",
"pieriandx_sample_type",
"pieriandx_workflow_id",
Expand Down Expand Up @@ -1225,6 +1248,8 @@ def get_duplicate_case_ids(lims_df: pd.DataFrame) -> List:
* pieriandx_case_accession_number
* pieriandx_case_creation_date
* pieriandx_assignee
* pieriandx_disease_code
* pieriandx_disease_name
* pieriandx_panel_type
* pieriandx_sample_type
* pieriandx_workflow_id
Expand Down Expand Up @@ -1421,6 +1446,8 @@ def cleanup_duplicate_rows(merged_df: pd.DataFrame, cttso_lims_df: pd.DataFrame,
* pieriandx_case_creation_date
* pieriandx_case_identified
* pieriandx_assignee
* pieriandx_disease_code
* pieriandx_disease_name
* pieriandx_panel_type
* pieriandx_sample_type
* pieriandx_workflow_id
Expand Down Expand Up @@ -1483,6 +1510,8 @@ def cleanup_duplicate_rows(merged_df: pd.DataFrame, cttso_lims_df: pd.DataFrame,
* pieriandx_case_creation_date
* pieriandx_case_identified
* pieriandx_assignee
* pieriandx_disease_code
* pieriandx_disease_name
* pieriandx_panel_type
* pieriandx_sample_type
* pieriandx_workflow_id
Expand Down Expand Up @@ -1659,6 +1688,8 @@ def bind_pieriandx_case_submission_time_to_merged_df(merged_df: pd.DataFrame, ct
* pieriandx_case_creation_date
* pieriandx_case_identified
* pieriandx_assignee
* pieriandx_disease_code
* pieriandx_disease_name
* pieriandx_panel_type
* pieriandx_sample_type
* pieriandx_workflow_id
Expand Down Expand Up @@ -1690,6 +1721,8 @@ def bind_pieriandx_case_submission_time_to_merged_df(merged_df: pd.DataFrame, ct
* pieriandx_case_accession_number
* pieriandx_case_creation_date
* pieriandx_assignee
* pieriandx_disease_code
* pieriandx_disease_name
* pieriandx_panel_type
* pieriandx_sample_type
* pieriandx_workflow_id
Expand Down Expand Up @@ -1936,6 +1969,8 @@ def drop_to_be_deleted_cases(merged_df: pd.DataFrame, cttso_lims_df: pd.DataFram
* pieriandx_case_creation_date
* pieriandx_case_identified
* pieriandx_assignee
* pieriandx_disease_code
* pieriandx_disease_name
* pieriandx_panel_type
* pieriandx_sample_type
* pieriandx_workflow_id
Expand All @@ -1955,7 +1990,8 @@ def drop_to_be_deleted_cases(merged_df: pd.DataFrame, cttso_lims_df: pd.DataFram
"pieriandx_assignee == 'ToBeDeleted' or "
"( "
" pieriandx_case_id not in @existing_pieriandx_cases and "
" not pieriandx_case_id.isnull()"
" not pieriandx_case_id.isnull() and "
" not pieriandx_case_id == 'pending'"
")",
engine="python"
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,6 @@ class SampleType(Enum):
"sample_type": "validation",
"panel_type": PanelType.MAIN,
"indication": "NA",
"disease_id": 285645000,
"disease_name": "Disseminated malignancy of unknown primary",
"is_identified": True,
"requesting_physicians_first_name": "Sean",
Expand Down Expand Up @@ -175,7 +174,7 @@ class SampleType(Enum):

EXPECTED_ATTRIBUTES = [
"sample_type",
"disease_id",
"disease_name",
"indication",
"accession_number",
"external_specimen_id",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,8 @@ def get_cttso_lims() -> (pd.DataFrame, pd.DataFrame):
* pieriandx_case_creation_date
* pieriandx_case_identified
* pieriandx_assignee
* pieriandx_disease_code
* pieriandx_disease_name
* pieriandx_panel_type
* pieriandx_sample_type
* pieriandx_workflow_id
Expand Down Expand Up @@ -299,6 +301,8 @@ def get_deleted_lims_df() -> (pd.DataFrame, pd.DataFrame):
* pieriandx_case_creation_date
* pieriandx_case_identified
* pieriandx_assignee
* pieriandx_disease_code
* pieriandx_disease_name
* pieriandx_panel_type
* pieriandx_sample_type
* pieriandx_workflow_id
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,8 @@ def get_pieriandx_status_for_missing_sample(case_id: str) -> pd.Series:
* pieriandx_case_id
* pieriandx_case_accession_number
* pieriandx_case_identified
* pieriandx_disease_code
* pieriandx_disease_name
* pieriandx_panel_type
* pieriandx_workflow_id
* pieriandx_workflow_status
Expand Down Expand Up @@ -437,6 +439,8 @@ def get_pieriandx_status_for_missing_sample(case_id: str) -> pd.Series:
"pieriandx_case_id": case_id,
"pieriandx_case_accession_number": response.get("specimens")[0].get("accessionNumber"),
"pieriandx_case_identified": response.get("identified", False),
"pieriandx_disease_code": response.get("disease").get("code"),
"pieriandx_disease_name": response.get("disease").get("name"),
"pieriandx_panel_type": PanelType(response.get("panelName")).name,
"pieriandx_sample_type": SampleType(response.get("sampleType")).name,
"pieriandx_workflow_id": pd.NA,
Expand Down

0 comments on commit 5cd12cc

Please sign in to comment.