From 7b136f99a803a89e36b64340d0e205e02abdba1a Mon Sep 17 00:00:00 2001 From: Susannah Trevino Date: Thu, 13 Feb 2025 13:53:38 -0600 Subject: [PATCH] Skip broken chunks --- oct_converter/dicom/e2e_meta.py | 20 +++++++++++--------- oct_converter/readers/e2e.py | 25 ++++++++++++++++++++++--- 2 files changed, 33 insertions(+), 12 deletions(-) diff --git a/oct_converter/dicom/e2e_meta.py b/oct_converter/dicom/e2e_meta.py index d3dccbb..9ca29fd 100644 --- a/oct_converter/dicom/e2e_meta.py +++ b/oct_converter/dicom/e2e_meta.py @@ -24,15 +24,17 @@ def e2e_patient_meta(meta: dict) -> PatientMeta: """ patient = PatientMeta() - patient_data = meta.get("patient_data", [{}]) - - patient.first_name = patient_data[0].get("first_name") - patient.last_name = patient_data[0].get("surname") - patient.patient_id = patient_data[0].get("patient_id") - patient.patient_sex = patient_data[0].get("sex") - # TODO patient.patient_dob - # Currently, E2E's patient_dob is incorrect, see - # the E2E reader for more context. + patient_data = meta.get("patient_data") + if patient_data: + # Heidelberg's updated anonymization process wipes + # this section of metadata + patient.first_name = patient_data[0].get("first_name") + patient.last_name = patient_data[0].get("surname") + patient.patient_id = patient_data[0].get("patient_id") + patient.patient_sex = patient_data[0].get("sex") + # TODO patient.patient_dob + # Currently, E2E's patient_dob is incorrect, see + # the E2E reader for more context. return patient diff --git a/oct_converter/readers/e2e.py b/oct_converter/readers/e2e.py index 89b2aa7..e9ad3f5 100644 --- a/oct_converter/readers/e2e.py +++ b/oct_converter/readers/e2e.py @@ -37,6 +37,7 @@ def __init__(self, filepath: str | Path) -> None: self.acquisition_date = None self.birthdate = None self.pixel_spacing = None + self.patient_id = None # get initial directory structure with open(self.filepath, "rb") as f: @@ -129,7 +130,13 @@ def _make_lut(): for start, pos in chunk_stack: f.seek(start + self.byte_skip) raw = f.read(60) - chunk = e2e_binary.chunk_structure.parse(raw) + try: + # Heidelberg's updated anonymization seems to cause problems with + # some chunks. Observed problems include an empty raw and problems + # with undecodable bytes. For now, these chunks are skipped... + chunk = e2e_binary.chunk_structure.parse(raw) + except Exception: + continue if chunk.type == 9: # patient data raw = f.read(127) @@ -358,7 +365,13 @@ def read_fundus_image( for start, pos in chunk_stack: f.seek(start + self.byte_skip) raw = f.read(60) - chunk = e2e_binary.chunk_structure.parse(raw) + try: + # Heidelberg's updated anonymization seems to cause problems with + # some chunks. Observed problems include an empty raw and problems + # with undecodable bytes. For now, these chunks are skipped... + chunk = e2e_binary.chunk_structure.parse(raw) + except Exception: + continue if chunk.type == 9: # patient data raw = f.read(127) @@ -489,7 +502,13 @@ def _convert_to_dict(container): for start, pos in chunk_stack: f.seek(start + self.byte_skip) raw = f.read(60) - chunk = e2e_binary.chunk_structure.parse(raw) + try: + # Heidelberg's updated anonymization seems to cause problems with + # some chunks. Observed problems include an empty raw and problems + # with undecodable bytes. For now, these chunks are skipped... + chunk = e2e_binary.chunk_structure.parse(raw) + except Exception: + continue image_string = "{}_{}_{}".format( chunk.patient_db_id, chunk.study_id, chunk.series_id