From 7b136f99a803a89e36b64340d0e205e02abdba1a Mon Sep 17 00:00:00 2001
From: Susannah Trevino <susannahtrevino@flywheel.io>
Date: Thu, 13 Feb 2025 13:53:38 -0600
Subject: [PATCH] Skip broken chunks

---
 oct_converter/dicom/e2e_meta.py | 20 +++++++++++---------
 oct_converter/readers/e2e.py    | 25 ++++++++++++++++++++++---
 2 files changed, 33 insertions(+), 12 deletions(-)

diff --git a/oct_converter/dicom/e2e_meta.py b/oct_converter/dicom/e2e_meta.py
index d3dccbb..9ca29fd 100644
--- a/oct_converter/dicom/e2e_meta.py
+++ b/oct_converter/dicom/e2e_meta.py
@@ -24,15 +24,17 @@ def e2e_patient_meta(meta: dict) -> PatientMeta:
     """
     patient = PatientMeta()
 
-    patient_data = meta.get("patient_data", [{}])
-
-    patient.first_name = patient_data[0].get("first_name")
-    patient.last_name = patient_data[0].get("surname")
-    patient.patient_id = patient_data[0].get("patient_id")
-    patient.patient_sex = patient_data[0].get("sex")
-    # TODO patient.patient_dob
-    # Currently, E2E's patient_dob is incorrect, see
-    # the E2E reader for more context.
+    patient_data = meta.get("patient_data")
+    if patient_data:
+        # Heidelberg's updated anonymization process wipes
+        # this section of metadata
+        patient.first_name = patient_data[0].get("first_name")
+        patient.last_name = patient_data[0].get("surname")
+        patient.patient_id = patient_data[0].get("patient_id")
+        patient.patient_sex = patient_data[0].get("sex")
+        # TODO patient.patient_dob
+        # Currently, E2E's patient_dob is incorrect, see
+        # the E2E reader for more context.
 
     return patient
 
diff --git a/oct_converter/readers/e2e.py b/oct_converter/readers/e2e.py
index 89b2aa7..e9ad3f5 100644
--- a/oct_converter/readers/e2e.py
+++ b/oct_converter/readers/e2e.py
@@ -37,6 +37,7 @@ def __init__(self, filepath: str | Path) -> None:
         self.acquisition_date = None
         self.birthdate = None
         self.pixel_spacing = None
+        self.patient_id = None
 
         # get initial directory structure
         with open(self.filepath, "rb") as f:
@@ -129,7 +130,13 @@ def _make_lut():
             for start, pos in chunk_stack:
                 f.seek(start + self.byte_skip)
                 raw = f.read(60)
-                chunk = e2e_binary.chunk_structure.parse(raw)
+                try:
+                    # Heidelberg's updated anonymization seems to cause problems with
+                    # some chunks. Observed problems include an empty raw and problems
+                    # with undecodable bytes. For now, these chunks are skipped...
+                    chunk = e2e_binary.chunk_structure.parse(raw)
+                except Exception:
+                    continue
 
                 if chunk.type == 9:  # patient data
                     raw = f.read(127)
@@ -358,7 +365,13 @@ def read_fundus_image(
             for start, pos in chunk_stack:
                 f.seek(start + self.byte_skip)
                 raw = f.read(60)
-                chunk = e2e_binary.chunk_structure.parse(raw)
+                try:
+                    # Heidelberg's updated anonymization seems to cause problems with
+                    # some chunks. Observed problems include an empty raw and problems
+                    # with undecodable bytes. For now, these chunks are skipped...
+                    chunk = e2e_binary.chunk_structure.parse(raw)
+                except Exception:
+                    continue
 
                 if chunk.type == 9:  # patient data
                     raw = f.read(127)
@@ -489,7 +502,13 @@ def _convert_to_dict(container):
             for start, pos in chunk_stack:
                 f.seek(start + self.byte_skip)
                 raw = f.read(60)
-                chunk = e2e_binary.chunk_structure.parse(raw)
+                try:
+                    # Heidelberg's updated anonymization seems to cause problems with
+                    # some chunks. Observed problems include an empty raw and problems
+                    # with undecodable bytes. For now, these chunks are skipped...
+                    chunk = e2e_binary.chunk_structure.parse(raw)
+                except Exception:
+                    continue
 
                 image_string = "{}_{}_{}".format(
                     chunk.patient_db_id, chunk.study_id, chunk.series_id