Merge pull request #151 from sustrev/main

E2E file read changes
marksgraham · Feb 18, 2025 · ee6381f · ee6381f
2 parents 1de10a1 + fc909dc
commit ee6381f
Show file tree

Hide file tree

Showing 2 changed files with 33 additions and 12 deletions.
diff --git a/oct_converter/dicom/e2e_meta.py b/oct_converter/dicom/e2e_meta.py
@@ -24,15 +24,17 @@ def e2e_patient_meta(meta: dict) -> PatientMeta:
     """
     patient = PatientMeta()
 
-    patient_data = meta.get("patient_data", [{}])
-
-    patient.first_name = patient_data[0].get("first_name")
-    patient.last_name = patient_data[0].get("surname")
-    patient.patient_id = patient_data[0].get("patient_id")
-    patient.patient_sex = patient_data[0].get("sex")
-    # TODO patient.patient_dob
-    # Currently, E2E's patient_dob is incorrect, see
-    # the E2E reader for more context.
+    patient_data = meta.get("patient_data")
+    if patient_data:
+        # Heidelberg's updated anonymization process wipes
+        # this section of metadata
+        patient.first_name = patient_data[0].get("first_name")
+        patient.last_name = patient_data[0].get("surname")
+        patient.patient_id = patient_data[0].get("patient_id")
+        patient.patient_sex = patient_data[0].get("sex")
+        # TODO patient.patient_dob
+        # Currently, E2E's patient_dob is incorrect, see
+        # the E2E reader for more context.
 
     return patient
 

diff --git a/oct_converter/readers/e2e.py b/oct_converter/readers/e2e.py
@@ -37,6 +37,7 @@ def __init__(self, filepath: str | Path) -> None:
         self.acquisition_date = None
         self.birthdate = None
         self.pixel_spacing = None
+        self.patient_id = None
 
         # get initial directory structure
         with open(self.filepath, "rb") as f:
@@ -129,7 +130,13 @@ def _make_lut():
             for start, pos in chunk_stack:
                 f.seek(start + self.byte_skip)
                 raw = f.read(60)
-                chunk = e2e_binary.chunk_structure.parse(raw)
+                try:
+                    # Heidelberg's updated anonymization seems to cause problems with
+                    # some chunks. Observed problems include an empty raw and problems
+                    # with undecodable bytes. For now, these chunks are skipped...
+                    chunk = e2e_binary.chunk_structure.parse(raw)
+                except Exception:
+                    continue
 
                 if chunk.type == 9:  # patient data
                     raw = f.read(127)
@@ -358,7 +365,13 @@ def read_fundus_image(
             for start, pos in chunk_stack:
                 f.seek(start + self.byte_skip)
                 raw = f.read(60)
-                chunk = e2e_binary.chunk_structure.parse(raw)
+                try:
+                    # Heidelberg's updated anonymization seems to cause problems with
+                    # some chunks. Observed problems include an empty raw and problems
+                    # with undecodable bytes. For now, these chunks are skipped...
+                    chunk = e2e_binary.chunk_structure.parse(raw)
+                except Exception:
+                    continue
 
                 if chunk.type == 9:  # patient data
                     raw = f.read(127)
@@ -489,7 +502,13 @@ def _convert_to_dict(container):
             for start, pos in chunk_stack:
                 f.seek(start + self.byte_skip)
                 raw = f.read(60)
-                chunk = e2e_binary.chunk_structure.parse(raw)
+                try:
+                    # Heidelberg's updated anonymization seems to cause problems with
+                    # some chunks. Observed problems include an empty raw and problems
+                    # with undecodable bytes. For now, these chunks are skipped...
+                    chunk = e2e_binary.chunk_structure.parse(raw)
+                except Exception:
+                    continue
 
                 image_string = "{}_{}_{}".format(
                     chunk.patient_db_id, chunk.study_id, chunk.series_id