From abd6e4b6da50a5da37e259e1e5f18dd58f418338 Mon Sep 17 00:00:00 2001
From: Ramon Luis Correa Medero <rlcorre@emory.edu>
Date: Sat, 25 Sep 2021 22:29:20 -0700
Subject: [PATCH 01/15] write pixel array as rgb images if photometric inter
 lines up

---
 modules/png-extraction/ImageExtractor.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/modules/png-extraction/ImageExtractor.py b/modules/png-extraction/ImageExtractor.py
index c4aaedb..d5f6fc1 100644
--- a/modules/png-extraction/ImageExtractor.py
+++ b/modules/png-extraction/ImageExtractor.py
@@ -205,6 +205,7 @@ def extract_images(filedata, i, png_destination, flattened_to_level, failed, is1
         pngfile = png_destination+folderName + '/' + hashlib.sha224(imName.encode('utf-8')).hexdigest() + '.png'
         dicom_path = filedata.iloc[i].loc['file']
         image_path = png_destination+folderName+'/' + hashlib.sha224(imName.encode('utf-8')).hexdigest() + '.png'
+        isRGB = filedata.iloc[i].loc['PhotometricInterpretation'] == 'RGB'
         if is16Bit:
             # write the PNG file as a 16-bit greyscale 
             image_2d = ds.pixel_array.astype(np.double) 
@@ -214,7 +215,10 @@ def extract_images(filedata, i, png_destination, flattened_to_level, failed, is1
             shape = ds.pixel_array.shape
             image_2d_scaled = np.uint16(image_2d_scaled) 
             with open(pngfile , 'wb') as png_file:
-                    w = png.Writer(shape[1], shape[0], greyscale=True,bitdepth=16)
+                    if isRGB: 
+                        w = png.Writer(shape[1], shape[0], greyscale=False,bitdepth=16)
+                    else: 
+                        w = png.Writer(shape[1], shape[0], greyscale=True,bitdepth=16)
                     w.write(png_file, image_2d_scaled)
         else: 
             shape = ds.pixel_array.shape
@@ -226,7 +230,10 @@ def extract_images(filedata, i, png_destination, flattened_to_level, failed, is1
             image_2d_scaled = np.uint8(image_2d_scaled)
             # Write the PNG file
             with open(pngfile , 'wb') as png_file:
-                    w = png.Writer(shape[1], shape[0], greyscale=True)
+                    if isRGB: 
+                        w = png.Writer(shape[1], shape[0], greyscale=False)
+                    else: 
+                        w = png.Writer(shape[1], shape[0], greyscale=True)
                     w.write(png_file, image_2d_scaled)
         filemapping = filedata.iloc[i].loc['file'] + ', ' + pngfile + '\n'
     except AttributeError as error:

From bc17bdf1c77ec85d801bc3ec1417fbe319321bc4 Mon Sep 17 00:00:00 2001
From: Ramon Luis Correa Medero <rlcorre@emory.edu>
Date: Fri, 8 Oct 2021 15:08:27 -0700
Subject: [PATCH 02/15] Update Readme

changed comments to reflect the fact we are working with NIFTI images.
---
 modules/nifti-extraction/README.md | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/modules/nifti-extraction/README.md b/modules/nifti-extraction/README.md
index c9b00c4..a0b5139 100644
--- a/modules/nifti-extraction/README.md
+++ b/modules/nifti-extraction/README.md
@@ -1,6 +1,6 @@
-# The Niffler PNG Extractor
+# The Niffler Nifti Extractor
 
-The PNG Extractor converts a set of DICOM images into png images, extract metadata in a privacy-preserving manner.
+The NIFTI Extractor converts a set of DICOM images into NIFTI images, extract metadata in a privacy-preserving manner.
 
 
 ## Configuring Niffler PNG Extractor
@@ -20,8 +20,6 @@ Find the config.json file in the folder and modify accordingly *for each* Niffle
 * *FlattenedToLevel*: Specify how you want your folder tree to be. Default is, "patient" (produces patient/*.png). 
   You may change this value to "study" (patient/study/*.png) or "series" (patient/study/series/*.png). All IDs are de-identified.
  
-* *is16Bit*:  Specifies whether to save extracted image as 16-bit  image. By default, this is set to true. Please set it to false to run 8-bit extraction.
-  
 * *SendEmail*: Do you want to send an email notification when the extraction completes? The default is true. You may disable this if you do not want to receive an email upon the completion.
 
 * *YourEmail*: Replace "test@test.test" with a valid email if you would like to receive an email notification. If the SendEmail property is disabled, you can leave this as is.
@@ -42,10 +40,10 @@ The below two fields can be left unmodified for most executions. The default val
 $ python3 ImageExtractor.py
 
 # With Nohup
-$ nohup python3 ImageExtractor.py > UNIQUE-OUTPUT-FILE-FOR-YOUR-EXTRACTION.out &
+$ nohup python3 ImageExtractorNifti.py > UNIQUE-OUTPUT-FILE-FOR-YOUR-EXTRACTION.out &
 
 # With Command Line Arguments
-$ nohup python3 ImageExtractor.py --DICOMHome "/opt/data/new-study" --Depth 0 --PrintImages true --SendEmail true > UNIQUE-OUTPUT-FILE-FOR-YOUR-EXTRACTION.out &
+$ nohup python3 ImageExtractorNifti.py --DICOMHome "/opt/data/new-study" --Depth 0 --PrintImages true --SendEmail true > UNIQUE-OUTPUT-FILE-FOR-YOUR-EXTRACTION.out &
 ```
 Check that the extraction is going smooth with no errors, by,
 
@@ -65,12 +63,7 @@ In the OutputDirectory, there will be several sub folders and directories.
 
 * *extracted-images*: The folder that consists of extracted PNG images
 
-* *failed-dicom*: The folder that consists of the DICOM images that failed to produce the PNG images upon the execution of the Niffler PNG Extractor. Failed DICOM images are stored in 4 sub-folders named 1, 2, 3, and 4, categorizing according to their failure reason.
-
-
-## Running the Niffler PNG Extractor with Slurm
-
-There is also an experimental PNG extractor implementation (ImageExtractorSlurm.py) that provides a distributed execution based on Slurm on a cluster.
+* *failed-dicom*: The folder that consists of the DICOM images that failed to produce the Nifti images upon the execution of the Niffler  Extractor. Failed DICOM images are stored in 4 sub-folders named 1, 2, 3, and 4, categorizing according to their failure reason.
 
 
 ## Troubleshooting

From b8882db194a83a296c3d660de0ee3f955a4587c4 Mon Sep 17 00:00:00 2001
From: Zach Zaiman <zzaiman@emory.edu>
Date: Mon, 8 Nov 2021 09:40:23 -0500
Subject: [PATCH 03/15] modified dicom anon to support full path reconstruction
 and mapping

---
 .../dicom-anonymization/DicomAnonymizer.py    |  41 +++---
 .../dicom-anonymization/DicomAnonymizer2.py   | 128 ++++++++++++++++++
 2 files changed, 151 insertions(+), 18 deletions(-)
 create mode 100644 modules/dicom-anonymization/DicomAnonymizer2.py

diff --git a/modules/dicom-anonymization/DicomAnonymizer.py b/modules/dicom-anonymization/DicomAnonymizer.py
index b4fdaae..9dce52d 100644
--- a/modules/dicom-anonymization/DicomAnonymizer.py
+++ b/modules/dicom-anonymization/DicomAnonymizer.py
@@ -10,27 +10,32 @@
 import sys
 import pydicom
 import random
+import glob
+import pathlib
 import pickle
 
 
-def get_dcm_folders(dcm_root_dir):
-    # get all folders
-    print('getting all dcm folders')
-    # speeding it up by checking files within each folder at the start
-    dcm_flds = []
-    for x in os.walk(dcm_root_dir):
-        folder = x[0]
-        # rejecting some folders that isn't in the accession list (HCC specific)
-        if 'bk' in folder or 'March5' in folder:
-            continue
-        try:
-            # assumes that all files in the study folder is dcm files
-            if 'dcm' in os.listdir(folder)[0]:
-                dcm_flds.append(folder)
-        except:
-            print('no dcm files in folder, skipping')
-            continue
-    return dcm_flds
+# def get_dcm_folders(dcm_root_dir):
+#     # get all folders
+#     print('getting all dcm folders')
+#     # speeding it up by checking files within each folder at the start
+#     dcm_flds = []
+#     for x in os.walk(dcm_root_dir):
+#         folder = x[0]
+#         # rejecting some folders that isn't in the accession list (HCC specific)
+#         if 'bk' in folder or 'March5' in folder:
+#             continue
+#         try:
+#             # assumes that all files in the study folder is dcm files
+#             if 'dcm' in os.listdir(folder)[0]:
+#                 dcm_flds.append(folder)
+#         except:
+#             print('no dcm files in folder, skipping')
+#             continue
+#     return dcm_flds
+def get_dcm_paths(dcm_root_dir):
+    paths = glob.glob(os.path.join(dcm_root_dir, "/**/*.dcm"), recursive=True)
+    return paths
 
 
 # randomly anonymizes the input id
diff --git a/modules/dicom-anonymization/DicomAnonymizer2.py b/modules/dicom-anonymization/DicomAnonymizer2.py
new file mode 100644
index 0000000..ac37954
--- /dev/null
+++ b/modules/dicom-anonymization/DicomAnonymizer2.py
@@ -0,0 +1,128 @@
+import os
+import sys
+import pydicom
+import random
+import glob
+import pathlib
+import pickle
+import string
+import random 
+
+def get_dcm_paths(dcm_root_dir):
+    paths = glob.glob(os.path.join(dcm_root_dir, "**/*.dcm"), recursive=True)
+    return paths
+
+def randomizeID(id):
+    string = str(id)
+    splits = string.split('.')
+    newID = splits[0]
+    i = 0
+    for split in splits:
+        if i == 0:
+            i += 1
+            continue
+        elif len(split) == 1:
+            newID = '.'.join((newID, split))
+            continue
+        num = int(split) + random.randint(0, 9)
+        newID = '.'.join((newID, str(num)))
+
+    return newID
+
+def anonSample(file, idtype, dict):
+    id = file[idtype].value
+    if id in dict.keys():
+        anon_id = dict[id]
+    else:
+        if idtype == 'PatientID':
+            anon_id = ''.join(random.SystemRandom().choice(string.ascii_uppercase + string.digits) for _ in range(25))
+        else:
+            anon_id = randomizeID(id)
+        # make sure that the new ID isn't the same as another
+        while anon_id in dict.values():
+            anon_id = randomizeID(id)
+        dict[id] = anon_id
+
+    return anon_id
+
+
+def dcm_anonymize(dcm_files, output_path, stop=None):
+    # creates dictionaries for the IDs for look up later
+    samplePatientIDs = {}
+    sampleStudyInstanceUIDs = {}
+    sampleSeriesInstanceUID = {}
+    sampleSOPInstanceUID = {}
+
+    UIDs = {'PatientID' : samplePatientIDs,
+            'StudyInstanceUID': sampleStudyInstanceUIDs,
+            'SeriesInstanceUID': sampleSeriesInstanceUID,
+            'SOPInstanceUID': sampleSOPInstanceUID}
+
+    # UIDs = pickle.load(open(os.path.join(output_path, "UIDs.pkl"), "rb"))
+
+    skipped = []
+
+    # tags to anonymize
+    anon_tags = ['InstanceCreationDate', 'InstanceCreationTime', 'AccessionNumber', 'StudyDate',
+                 'SeriesDate', 'AcquisitionDate', 'ContentDate', 'StudyTime', 'SeriesTime', 'AcquisitionTime',
+                 'ContentTime', 'AccessionNumber', 'InstitutionName', 'InstitutionAddress', 'ReferringPhysicianName',
+                 'PhysiciansOfRecord', 'PerformingPhysicianName', 'OperatorsName', 'PatientName', 
+                 'IssuerOfPatientID', 'PatientBirthDate', 'PatientSex', 'OtherPatientIDs', 'PatientAge', 'PatientSize',
+                 'PatientWeight', 'PatientAddress', 'EthnicGroup', 'PregnancyStatus', 'RequestingPhysician',
+                 'PerformedProcedureStepStartDate', 'PerformedProcedureStepStartTime', 'PerformedProcedureStepID']
+
+    # for upto 200 dcm folders
+    n = 0
+    for file in dcm_files:
+        try:  # if it doesn't
+            dcm_file = pydicom.dcmread(file)
+            dcm_file.remove_private_tags()
+            out_path = output_path
+            for UID in UIDs.keys():
+                # get the UID and get the anonymized UID
+                anon_id = anonSample(dcm_file, UID, UIDs[UID])
+                dcm_file[UID].value = anon_id
+                out_path = os.path.join(out_path, anon_id)
+
+            out_path+=".dcm"
+                # save instance UID to rename the filename (so that filename and SOPinstance matches)
+            # for the other tags, make them anonymous
+            for tag in anon_tags:
+                if tag in dcm_file:
+                    if type(dcm_file.data_element(tag).value) == str:
+                        dcm_file.data_element(tag).value = 'N/A'
+                    elif type(dcm_file.data_element(tag).value) == pydicom.uid.UID:
+                        dcm_file.data_element(tag).value = 'N/A'
+                    elif type(dcm_file.data_element(tag).value) == int:
+                        dcm_file.data_element(tag).value = 0
+                    else:
+                        dcm_file.data_element(tag).value = 0.0
+                
+                pathlib.Path("/".join(out_path.split("/")[:-1])).mkdir(parents=True, exist_ok=True)
+                dcm_file.save_as(out_path)
+            n += 1
+        except:
+            print('Invalid Dicom Error, skipping')
+            skip_file = pydicom.dcmread(file, force=True)
+            skipped.append((skip_file.AccessionNumber, skip_file.StudyInstanceUID))
+            continue
+        if n == stop or n == len(dcm_folders):
+            pickle.dump(UIDs, open(os.path.join(output_path, "UIDs.pkl"), "wb"))
+            exit()
+
+        pickle.dump(UIDs, open(os.path.join(output_path, "UIDs.pkl"), "wb"))
+        pickle.dump(skipped, open(os.path.join(output_path, "skipped.pkl"), "wb"))
+
+
+if __name__ == "__main__":
+    data_dir = sys.argv[1]
+    output_dir = sys.argv[2]
+    if len(sys.argv) > 3:
+        # stopping number
+        stop = int(sys.argv[3])
+    else:
+        stop = None
+    print('Extracting DICOM folders', flush=True)
+    dcm_folders = get_dcm_paths(data_dir)
+    print('Starting DICOM Study Anonymization', flush=True)
+    dcm_anonymize(dcm_folders, output_dir, stop=None)

From 65a3194efaa43bb03d27608178e4626c73ed2cc9 Mon Sep 17 00:00:00 2001
From: Zachary Zaiman <65204992+zmz223@users.noreply.github.com>
Date: Mon, 8 Nov 2021 10:08:17 -0500
Subject: [PATCH 04/15] Update DicomAnonymizer.py

---
 .../dicom-anonymization/DicomAnonymizer.py    | 40 +++++++++----------
 1 file changed, 19 insertions(+), 21 deletions(-)

diff --git a/modules/dicom-anonymization/DicomAnonymizer.py b/modules/dicom-anonymization/DicomAnonymizer.py
index 9dce52d..34b8018 100644
--- a/modules/dicom-anonymization/DicomAnonymizer.py
+++ b/modules/dicom-anonymization/DicomAnonymizer.py
@@ -15,27 +15,25 @@
 import pickle
 
 
-# def get_dcm_folders(dcm_root_dir):
-#     # get all folders
-#     print('getting all dcm folders')
-#     # speeding it up by checking files within each folder at the start
-#     dcm_flds = []
-#     for x in os.walk(dcm_root_dir):
-#         folder = x[0]
-#         # rejecting some folders that isn't in the accession list (HCC specific)
-#         if 'bk' in folder or 'March5' in folder:
-#             continue
-#         try:
-#             # assumes that all files in the study folder is dcm files
-#             if 'dcm' in os.listdir(folder)[0]:
-#                 dcm_flds.append(folder)
-#         except:
-#             print('no dcm files in folder, skipping')
-#             continue
-#     return dcm_flds
-def get_dcm_paths(dcm_root_dir):
-    paths = glob.glob(os.path.join(dcm_root_dir, "/**/*.dcm"), recursive=True)
-    return paths
+def get_dcm_folders(dcm_root_dir):
+    # get all folders
+    print('getting all dcm folders')
+    # speeding it up by checking files within each folder at the start
+    dcm_flds = []
+    for x in os.walk(dcm_root_dir):
+        folder = x[0]
+        # rejecting some folders that isn't in the accession list (HCC specific)
+        if 'bk' in folder or 'March5' in folder:
+            continue
+        try:
+            # assumes that all files in the study folder is dcm files
+            if 'dcm' in os.listdir(folder)[0]:
+                dcm_flds.append(folder)
+        except:
+            print('no dcm files in folder, skipping')
+            continue
+    return dcm_flds
+
 
 
 # randomly anonymizes the input id

From 8cc914c637a22eacd8fe25251d1ace50a0e32c14 Mon Sep 17 00:00:00 2001
From: Zachary Zaiman <65204992+zmz223@users.noreply.github.com>
Date: Mon, 8 Nov 2021 10:09:40 -0500
Subject: [PATCH 05/15] Update README.md

---
 modules/dicom-anonymization/README.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/modules/dicom-anonymization/README.md b/modules/dicom-anonymization/README.md
index e990d49..6f32990 100644
--- a/modules/dicom-anonymization/README.md
+++ b/modules/dicom-anonymization/README.md
@@ -4,3 +4,7 @@ You may convert a DICOM file into an anonymized DICOM file by running
 ```
 python DicomAnonymizer.py <INPUT-DICOMS-FOLDER> <OUTPUT-DICOMS-FOLDER>
 ```
+To maintain the source diretory hierarchy as well as anonymize and map the PatiendID field you can run  
+```
+python DicomAnonymizer2.py <INPUT-DICOMS-FOLDER> <OUTPUT-DICOMS-FOLDER>
+```

From 2a30abe473f4c53c347f5d9b994d0ff476dbdd73 Mon Sep 17 00:00:00 2001
From: Ramon Luis Correa Medero <rlcorre@emory.edu>
Date: Tue, 9 Nov 2021 00:33:53 -0700
Subject: [PATCH 06/15] update file naming convention

---
 modules/nifti-extraction/ImageExtractorNifti.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/nifti-extraction/ImageExtractorNifti.py b/modules/nifti-extraction/ImageExtractorNifti.py
index 5ad7575..4b60eec 100644
--- a/modules/nifti-extraction/ImageExtractorNifti.py
+++ b/modules/nifti-extraction/ImageExtractorNifti.py
@@ -203,7 +203,7 @@ def extract_images(filedata, i, nifti_destination, flattened_to_level, failed, i
             os.makedirs(nifti_destination + folderName,exist_ok=True)
 
 
-        niftifile = nifti_destination+folderName + '/' + hashlib.sha224(imName.encode('utf-8')).hexdigest() + '.nii.gz'
+        niftifile = nifti_destination+folderName + '/' ID1 +'_' +ID2 +'_' +ID3 + '.nii.gz'
         dicom2nifti.dicom_series_to_nifti(str(filedata.iloc[i].loc['file']),niftifile)
         filemapping = filedata.iloc[i].loc['file'] + ',' + niftifile + '\n'
     except AttributeError as error:

From 2f6951f1832c8114de8e13d9d1da4be7157b4bc7 Mon Sep 17 00:00:00 2001
From: Ananth Reddy <bananthreddy30@gmail.com>
Date: Mon, 15 Nov 2021 12:46:45 -0500
Subject: [PATCH 07/15] Minor Changes

---
 modules/png-extraction/ImageExtractor.py | 27 ++++++++++++------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/modules/png-extraction/ImageExtractor.py b/modules/png-extraction/ImageExtractor.py
index f739225..4d7aa2e 100644
--- a/modules/png-extraction/ImageExtractor.py
+++ b/modules/png-extraction/ImageExtractor.py
@@ -145,7 +145,7 @@ def extract_headers(f_list_elem):
         c = False
     kv = get_tuples(plan)       # gets tuple for field,val pairs for this file. function defined above
     # dicom images should not have more than 300 dicom tags
-    if len(kv)>500:
+    if len(kv)>300:
         logging.debug(str(len(kv)) + " dicom tags produced by " + ff)
     kv.append(('file', f_list_elem[1])) # adds my custom field with the original filepath
     kv.append(('has_pix_array',c))   # adds my custom field with if file has image
@@ -184,7 +184,7 @@ def extract_images(filedata, i, png_destination, flattened_to_level, failed, is1
             except:
                 ID2='ALL-STUDIES'
             folderName = hashlib.sha224(ID1.encode('utf-8')).hexdigest() + "/" + \
-                         hashlib.sha224(ID2.encode('utf-8')).hexdigest()
+                        hashlib.sha224(ID2.encode('utf-8')).hexdigest()
             # check for existence of the folder tree patient/study/series. Create if it does not exist.
             os.makedirs(png_destination + folderName,exist_ok=True)
         else:
@@ -196,8 +196,8 @@ def extract_images(filedata, i, png_destination, flattened_to_level, failed, is1
                 ID2='ALL-STUDIES'
                 ID3='ALL-SERIES'
             folderName = hashlib.sha224(ID1.encode('utf-8')).hexdigest() + "/" + \
-                         hashlib.sha224(ID2.encode('utf-8')).hexdigest() + "/" + \
-                         hashlib.sha224(ID3.encode('utf-8')).hexdigest()
+                        hashlib.sha224(ID2.encode('utf-8')).hexdigest() + "/" + \
+                        hashlib.sha224(ID3.encode('utf-8')).hexdigest()
             # check for existence of the folder tree patient/study/series. Create if it does not exist.
             os.makedirs(png_destination + folderName,exist_ok=True)
 
@@ -379,14 +379,15 @@ def execute(pickle_file, dicom_home, output_directory, print_images, print_only_
             total = len(chunk)
             stamp = time.time()
             for i in range(len(filedata)):
-                (fmap,fail_path,err) = extract_images(filedata, i, png_destination, flattened_to_level, failed, is16Bit)
-                if err:
-                    count +=1
-                    copyfile(fail_path[0],fail_path[1])
-                    err_msg = str(count) + ' out of ' + str(len(chunk)) + ' dicom images have failed extraction'
-                    logging.error(err_msg)
-                else:
-                    fm.write(fmap)
+                if (filedata.iloc[i].loc['file'] is not np.nan):
+                    (fmap,fail_path,err) = extract_images(filedata, i, png_destination, flattened_to_level, failed, is16Bit)
+                    if err:
+                        count +=1
+                        copyfile(fail_path[0],fail_path[1])
+                        err_msg = str(count) + ' out of ' + str(len(chunk)) + ' dicom images have failed extraction'
+                        logging.error(err_msg)
+                    else:
+                        fm.write(fmap)
         fm.close()
         logging.info('Chunk run time: %s %s', time.time() - t_start, ' seconds!')
 
@@ -432,7 +433,7 @@ def execute(pickle_file, dicom_home, output_directory, print_images, print_only_
     merged_meta = pd.concat(meta_list,ignore_index=True)
     merged_meta.to_csv('{}/metadata.csv'.format(output_directory),index=False)
     # getting a single mapping file
-    logging.info('Generatign final mapping file')
+    logging.info('Generating final mapping file')
     mappings = glob.glob("{}/maps/*.csv".format(output_directory))
     map_list = list()
     for mapping in mappings:

From 1731af02d0edadffe454a0817bd1e50bcf1ceb95 Mon Sep 17 00:00:00 2001
From: Ananth Reddy <bananthreddy30@gmail.com>
Date: Mon, 15 Nov 2021 13:08:14 -0500
Subject: [PATCH 08/15] Remove Space Changes

---
 modules/png-extraction/ImageExtractor.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/modules/png-extraction/ImageExtractor.py b/modules/png-extraction/ImageExtractor.py
index 4d7aa2e..a93ca3e 100644
--- a/modules/png-extraction/ImageExtractor.py
+++ b/modules/png-extraction/ImageExtractor.py
@@ -184,7 +184,7 @@ def extract_images(filedata, i, png_destination, flattened_to_level, failed, is1
             except:
                 ID2='ALL-STUDIES'
             folderName = hashlib.sha224(ID1.encode('utf-8')).hexdigest() + "/" + \
-                        hashlib.sha224(ID2.encode('utf-8')).hexdigest()
+                         hashlib.sha224(ID2.encode('utf-8')).hexdigest()
             # check for existence of the folder tree patient/study/series. Create if it does not exist.
             os.makedirs(png_destination + folderName,exist_ok=True)
         else:
@@ -196,8 +196,8 @@ def extract_images(filedata, i, png_destination, flattened_to_level, failed, is1
                 ID2='ALL-STUDIES'
                 ID3='ALL-SERIES'
             folderName = hashlib.sha224(ID1.encode('utf-8')).hexdigest() + "/" + \
-                        hashlib.sha224(ID2.encode('utf-8')).hexdigest() + "/" + \
-                        hashlib.sha224(ID3.encode('utf-8')).hexdigest()
+                         hashlib.sha224(ID2.encode('utf-8')).hexdigest() + "/" + \
+                         hashlib.sha224(ID3.encode('utf-8')).hexdigest()
             # check for existence of the folder tree patient/study/series. Create if it does not exist.
             os.makedirs(png_destination + folderName,exist_ok=True)
 

From 5c17f02cdf38b3a9d8eb5422d0361b883a182835 Mon Sep 17 00:00:00 2001
From: Pradeeban Kathiravelu <kk.pradeeban@gmail.com>
Date: Thu, 9 Dec 2021 15:10:02 -0500
Subject: [PATCH 09/15] Update README.md

---
 README.md | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 68ab644..424886e 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,12 @@
 # Niffler: A DICOM Framework for Machine Learning and Processing Pipelines.
 
-Niffler is an efficient DICOM Framework for machine learning pipelines and processing workflows on metadata. It facilitates efficient transfer of DICOM images on-demand and real-time from PACS to the research environments, to run processing workflows and machine learning pipelines.
+Niffler is a lightweight framework to facilitate executing machine learning pipelines and processing workflows on DICOM images and metadata. Niffler facilitates efficient transfer of DICOM images on-demand and real-time from PACS to the research environments. Niffler is also integrated with the radiology information system (RIS) to get clinical data in real-time. The DICOM images from the PACS and clinical data retrieved from the RIS can be used in conjunction in real-time as well as retrospectively on-demand.
+
+The Niffler framework consists of:
+- On-demand and real-time retrieval and processing of DICOM images from the PACS environment configured to accept requests from a deployment of Niffler.
+- Acquisition and processing of clinical data from a RIS.
+- Supportive utility functions such as DICOM → PNG conversion, DICOM → NifTi conversion, DICOM anonymization, and a workflow module.
+- Sample applications of the Niffler modules.
 
 Niffler enables receiving DICOM images real-time as a data stream from PACS as well as specific DICOM data based on a series of DICOM C-MOV queries. The Niffler real-time DICOM receiver extracts the metadata free of PHI as the images arrive, store the metadata in a Mongo database, and deletes the images nightly. The on-demand extractor reads a CSV file provided by the user (consisting of a list of values for PatientID, AccessionNumber, or other DICOM keywords), and performs a series of DICOM C-MOVE requests to receive them from the PACS, without manually querying them. Niffler also provides additional features such as converting DICOM images into PNG images, and perform additional computations such as computing scanner utilization and finding scanners with misconfigured clocks.
 

From 11551ed3995a3e074486a1309fac8ca8eca55df8 Mon Sep 17 00:00:00 2001
From: Pradeeban Kathiravelu <kk.pradeeban@gmail.com>
Date: Thu, 9 Dec 2021 15:20:06 -0500
Subject: [PATCH 10/15] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 424886e..60d16d0 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@ Niffler is a lightweight framework to facilitate executing machine learning pipe
 
 The Niffler framework consists of:
 - On-demand and real-time retrieval and processing of DICOM images from the PACS environment configured to accept requests from a deployment of Niffler.
-- Acquisition and processing of clinical data from a RIS.
+- Acquisition and processing of clinical data from a RIS, to enable real-time analytics (RTA).
 - Supportive utility functions such as DICOM → PNG conversion, DICOM → NifTi conversion, DICOM anonymization, and a workflow module.
 - Sample applications of the Niffler modules.
 

From 16820a76ea5e9f9657a95d798cdc75ab66f327c8 Mon Sep 17 00:00:00 2001
From: Pradeeban Kathiravelu <kk.pradeeban@gmail.com>
Date: Thu, 9 Dec 2021 17:32:37 -0500
Subject: [PATCH 11/15] Handle the last empty line

---
 modules/suvpar/Strip.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/modules/suvpar/Strip.py b/modules/suvpar/Strip.py
index 32debe5..76d0c38 100644
--- a/modules/suvpar/Strip.py
+++ b/modules/suvpar/Strip.py
@@ -25,11 +25,13 @@ def initialize():
 
 def strip():
     global df
-    # Consider only MR. Remove modalities such as PR and SR that are present in the original data.
-    df = df[df.Modality == "MR"]
-    # Consider only the ImageType that are true.
-    df = df[df['ImageType'].str.contains("ORIGINAL")]
-
+    try:
+        # Consider only MR. Remove modalities such as PR and SR that are present in the original data.
+        df = df[df.Modality == "MR"]
+        # Consider only the ImageType that are true.
+        df = df[df['ImageType'].str.contains("ORIGINAL")]
+    except ValueError:
+        logging.exception("Empty entry detected")
 
 def write():
     df.to_csv(output_csv)

From 8d98e7d98f040cba19772fa4a19df9bad04c6087 Mon Sep 17 00:00:00 2001
From: Pradeeban Kathiravelu <kk.pradeeban@gmail.com>
Date: Thu, 9 Dec 2021 19:19:04 -0500
Subject: [PATCH 12/15] Compute min and max for AcquisitionTime

---
 modules/suvpar/Strip.py | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/modules/suvpar/Strip.py b/modules/suvpar/Strip.py
index 76d0c38..07e1265 100644
--- a/modules/suvpar/Strip.py
+++ b/modules/suvpar/Strip.py
@@ -20,18 +20,24 @@ def initialize():
     feature_list = text_file.read().split('\n')
 
     df = pandas.read_csv(filename, usecols=lambda x: x in feature_list, sep=',')
-    logging.info(df['ImageType'])
 
 
 def strip():
     global df
-    try:
-        # Consider only MR. Remove modalities such as PR and SR that are present in the original data.
-        df = df[df.Modality == "MR"]
-        # Consider only the ImageType that are true.
-        df = df[df['ImageType'].str.contains("ORIGINAL")]
-    except ValueError:
-        logging.exception("Empty entry detected")
+    # Drop entries without an ImageType, AcquisitionTime, or an AccessionNumber entry.
+    df.dropna(subset=["ImageType"], inplace=True)
+    df.dropna(subset=["AccessionNumber"], inplace=True)
+    df.dropna(subset=["AcquisitionTime"], inplace=True)
+    # Consider only MR. Remove modalities such as PR and SR that are present in the original data.
+    df = df[df['ImageType'].str.contains("ORIGINAL")]
+    df = df[df.Modality == "MR"]
+    # Consider only the ImageType that are ORIGINAL.
+    df['AcquisitionTime'] = pandas.to_datetime(df['AcquisitionTime'], format='%H%M%S')
+    df = df.join(
+        df.groupby('AccessionNumber')['AcquisitionTime'].aggregate(['min', 'max']),
+        on='AccessionNumber')
+    df = df.drop_duplicates('AccessionNumber')
+
 
 def write():
     df.to_csv(output_csv)

From 1e952f547e3015de424eb3ea99f72cfb7b986689 Mon Sep 17 00:00:00 2001
From: Pradeeban Kathiravelu <kk.pradeeban@gmail.com>
Date: Thu, 9 Dec 2021 20:03:34 -0500
Subject: [PATCH 13/15] Merge date into time

---
 modules/suvpar/Strip.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/modules/suvpar/Strip.py b/modules/suvpar/Strip.py
index 07e1265..b7e63f4 100644
--- a/modules/suvpar/Strip.py
+++ b/modules/suvpar/Strip.py
@@ -24,15 +24,20 @@ def initialize():
 
 def strip():
     global df
-    # Drop entries without an ImageType, AcquisitionTime, or an AccessionNumber entry.
+    # Drop entries without an ImageType, AcquisitionTime, AcquisitionDate, AccessionNumber, or DeviceSerialNumber entry.
     df.dropna(subset=["ImageType"], inplace=True)
     df.dropna(subset=["AccessionNumber"], inplace=True)
     df.dropna(subset=["AcquisitionTime"], inplace=True)
-    # Consider only MR. Remove modalities such as PR and SR that are present in the original data.
+    df.dropna(subset=["AcquisitionDate"], inplace=True)
+    df.dropna(subset=["DeviceSerialNumber"], inplace=True)
+    # Consider only the ImageType that are ORIGINAL.
     df = df[df['ImageType'].str.contains("ORIGINAL")]
+    # Consider only MR. Remove modalities such as PR and SR that are present in the original data.
     df = df[df.Modality == "MR"]
-    # Consider only the ImageType that are ORIGINAL.
-    df['AcquisitionTime'] = pandas.to_datetime(df['AcquisitionTime'], format='%H%M%S')
+    # Ignore milliseconds
+    df['AcquisitionTime'] = df['AcquisitionDate'].astype(int).astype(str) + \
+                            df['AcquisitionTime'].astype(int).astype(str)
+    df['AcquisitionTime'] = pandas.to_datetime(df['AcquisitionTime'], format='%Y%m%d%H%M%S')
     df = df.join(
         df.groupby('AccessionNumber')['AcquisitionTime'].aggregate(['min', 'max']),
         on='AccessionNumber')

From cab38916cd1d17dcd14b236dfd14ae8b52f04ed5 Mon Sep 17 00:00:00 2001
From: Pradeeban Kathiravelu <kk.pradeeban@gmail.com>
Date: Thu, 9 Dec 2021 20:24:33 -0500
Subject: [PATCH 14/15] Update pom.xml

---
 modules/app-layer/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/app-layer/pom.xml b/modules/app-layer/pom.xml
index b08655a..e278d91 100644
--- a/modules/app-layer/pom.xml
+++ b/modules/app-layer/pom.xml
@@ -47,7 +47,7 @@
 		<dependency>
 			<groupId>org.apache.logging.log4j</groupId>
 			<artifactId>log4j-core</artifactId>
-			<version>2.13.2</version>
+			<version>2.15.0</version>
 		</dependency>
 	</dependencies>
 

From 0bc7dbcbf3a826bf053ad18190530a565e00db2c Mon Sep 17 00:00:00 2001
From: Pradeeban Kathiravelu <kk.pradeeban@gmail.com>
Date: Thu, 9 Dec 2021 20:27:37 -0500
Subject: [PATCH 15/15] Drop AcquisitionDate column from the final

---
 modules/suvpar/Strip.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/modules/suvpar/Strip.py b/modules/suvpar/Strip.py
index b7e63f4..50b4383 100644
--- a/modules/suvpar/Strip.py
+++ b/modules/suvpar/Strip.py
@@ -41,7 +41,11 @@ def strip():
     df = df.join(
         df.groupby('AccessionNumber')['AcquisitionTime'].aggregate(['min', 'max']),
         on='AccessionNumber')
+    df.rename(columns={'AcquisitionTime': 'AcquisitionDateTime'}, inplace=True)
+    df.rename(columns={'min': 'MinAcquisitionDateTime'}, inplace=True)
+    df.rename(columns={'max': 'MaxAcquisitionDateTime'}, inplace=True)
     df = df.drop_duplicates('AccessionNumber')
+    df = df.drop(columns=['AcquisitionDate'])
 
 
 def write():