Merge pull request #158 from Emory-HITI/dev

An experimental front-end and tests framework to Niffler.
Emory-HITI · Jul 3, 2021 · 10bbc99 · 10bbc99
2 parents 55fd6c7 + d11ef4b
commit 10bbc99
Show file tree

Hide file tree

Showing 36 changed files with 3,244 additions and 399 deletions.
diff --git a/.gitignore b/.gitignore
@@ -4,3 +4,12 @@ target/
 .idea/
 **/*.iml
 **/*.csv
+__pycache__
+**/*.pyc 
+htmlcov
+.coverage
+coverage.xml
+/tests/data/tmp
+/tests/data/**/*.dcm
+/tests/data/**/*.pickle
+!/tests/data/**/no-img.dcm
diff --git a/modules/app-layer/src/main/resources/intermediary/intermediate.csv_sample b/modules/app-layer/src/main/resources/intermediary/intermediate.csv_sample
@@ -1,2 +1,4 @@
 DeviceSerialNumber, StudyInstanceUID, PatientID, DurationInMinutes, Number of Series in the Study, Exam Start Time, Exam End Time, StudyDescription
-142635, 1.2.345.65454, 123456, 5.433333333, 8, 101744, 102310, StudyDesc
+142635, 1.2.345.65454, 123456, 5.433333333, 8, 101744, 102310, StudyDesc
+135, 1.3.345.65454, 12365, 5.43333, 2, 34343, 102310, StudyDesc1
+1635, 1.4.345.65454, 1256, 5.433333, 3, 434343, 102310, StudyDec2
diff --git a/modules/cold-extraction/ColdDataRetriever.py b/modules/cold-extraction/ColdDataRetriever.py
@@ -15,109 +15,77 @@
 
 from collections import defaultdict
 
-config = defaultdict(lambda: None)
-# Read Default config.json file
-with open('config.json', 'r') as f:
-    tmp_config = json.load(f)
-    config.update(tmp_config)
-
-# CLI Argument Parser
-ap = argparse.ArgumentParser()
-
-ap.add_argument("--NifflerSystem", default=config['NifflerSystem'],
-                help="Path to json file with Niffler System Information.")
-ap.add_argument("--StorageFolder",
-                default=config['StorageFolder'], help="StoreSCP config: Storage Folder. Refer Readme.md")
-ap.add_argument("--FilePath", default=config['FilePath'],
-                help="StoreSCP config: FilePath, Refer configuring config.json in Readme.md.")
-ap.add_argument("--CsvFile", default=config['CsvFile'],
-                help="Path to CSV file for extraction. Refer Readme.md.")
-ap.add_argument("--ExtractionType", default=config['ExtractionType'],
-                help="One of the supported extraction type for Cold Data extraction. Refer Readme.md.")
-ap.add_argument("--AccessionIndex", default=config['AccessionIndex'], type=int,
-                help="Set the CSV column index of AccessionNumber for extractions with Accessions.")
-ap.add_argument("--PatientIndex", default=config['PatientIndex'], type=int,
-                help="Set the CSV column index of EMPI for extractions with EMPI and an accession or EMPI and a date.")
-ap.add_argument("--DateIndex", default=config['DateIndex'], type=int,
-                help="Set the CSV column index of Date(StudyDate, AcquisitionDate) for extractions with EMPI and a date.")
-ap.add_argument("--DateType", default=config['DateType'],
-                help="DateType can range from AcquisitionDate, StudyDate, etc. Refer Readme.md.")
-ap.add_argument("--DateFormat", default=config['DateFormat'],
-                help="DateFormat can range from %Y%m%d, %m/%d/%y, %m-%d-%y, %%m%d%y, etc. Refer Readme.md.")
-ap.add_argument("--SendEmail", default=config['SendEmail'], type=bool,
-                help="Send email when extraction is complete. Default false")
-ap.add_argument("--YourEmail", default=config['YourEmail'],
-                help="A valid email, if send email is enabled.")
-
-args = vars(ap.parse_args())
-
-#Get variables for StoreScp from config.json.
-storage_folder = args['StorageFolder']
-file_path = args['FilePath']
-
-# Get variables for the each on-demand extraction from config.json
-csv_file = args['CsvFile']
-extraction_type = args['ExtractionType']
-accession_index = args['AccessionIndex']
-patient_index = args['PatientIndex']
-date_index = args['DateIndex']
-date_type = args['DateType']
-date_format = args['DateFormat']
-email = args['YourEmail']
-send_email = args['SendEmail']
-
-# Reads the system_json file.
-system_json = args['NifflerSystem']
-
-with open(system_json, 'r') as f:
-    niffler = json.load(f)
-
-# Get constants from system.json
-DCM4CHE_BIN = niffler['DCM4CHEBin']
-SRC_AET = niffler['SrcAet']
-QUERY_AET = niffler['QueryAet']
-DEST_AET = niffler['DestAet']
-NIGHTLY_ONLY = niffler['NightlyOnly']
-START_HOUR = niffler['StartHour']
-END_HOUR = niffler['EndHour']
-IS_EXTRACTION_NOT_RUNNING = True
-NIFFLER_ID = niffler['NifflerID']
-MAX_PROCESSES = niffler['MaxNifflerProcesses']
-
-SEPARATOR = ','
-
-accessions = []
-patients = []
-dates = []
-
-storescp_processes = 0
-niffler_processes = 0
-
-nifflerscp_str = "storescp.*{0}".format(QUERY_AET)
-qbniffler_str = 'ColdDataRetriever'
-
-niffler_log = 'niffler' + str(NIFFLER_ID) + '.log'
-
-logging.basicConfig(filename=niffler_log,level=logging.INFO)
-logging.getLogger('schedule').setLevel(logging.WARNING)
-
-# Variables to track progress between iterations.
-extracted_ones = list()
-
-# By default, assume that this is a fresh extraction.
-resume = False
-
-# All extracted files from the csv file are saved in a respective .pickle file.
-try:
-    with open(csv_file +'.pickle', 'rb') as f:
-        extracted_ones = pickle.load(f)
-        # Since we have successfully located a pickle file, it indicates that this is a resume.
-        resume = True
-except:
-    logging.info("No existing pickle file found. Therefore, initialized with empty value to track the progress to {0}.pickle.".format(csv_file))
-
-# record the start time
-t_start = time.time()
+def initialize_config_and_execute(valuesDict):
+    global storescp_processes, niffler_processes, nifflerscp_str, qbniffler_str
+    global storage_folder, file_path, csv_file, extraction_type, accession_index, patient_index, date_index, date_type, date_format, email, send_email, system_json
+    global DCM4CHE_BIN, SRC_AET, QUERY_AET, DEST_AET, NIGHTLY_ONLY, START_HOUR, END_HOUR, IS_EXTRACTION_NOT_RUNNING, NIFFLER_ID, MAX_PROCESSES, SEPARATOR
+    global accessions, patients, dates, niffler_log, resume, length
+
+    storage_folder = valuesDict['StorageFolder']
+    file_path = valuesDict['FilePath']
+    csv_file = valuesDict['CsvFile']
+    extraction_type = valuesDict['ExtractionType']
+    accession_index = int(valuesDict['AccessionIndex'])
+    patient_index = int(valuesDict['PatientIndex'])
+    date_index = int(valuesDict['DateIndex'])
+    date_type = valuesDict['DateType']
+    date_format = valuesDict['DateFormat']
+    email = valuesDict['YourEmail']
+    send_email = bool(valuesDict['SendEmail'])
+    system_json = valuesDict['NifflerSystem']
+
+    # Reads the system_json file.
+    with open(system_json, 'r') as f:
+        niffler = json.load(f)
+
+    # Get constants from system.json
+    DCM4CHE_BIN = niffler['DCM4CHEBin']
+    SRC_AET = niffler['SrcAet']
+    QUERY_AET = niffler['QueryAet']
+    DEST_AET = niffler['DestAet']
+    NIGHTLY_ONLY = niffler['NightlyOnly']
+    START_HOUR = niffler['StartHour']
+    END_HOUR = niffler['EndHour']
+    IS_EXTRACTION_NOT_RUNNING = True
+    NIFFLER_ID = niffler['NifflerID']
+    MAX_PROCESSES = niffler['MaxNifflerProcesses']
+
+    SEPARATOR = ','
+
+    accessions = []
+    patients = []
+    dates = []
+
+    storescp_processes = 0
+    niffler_processes = 0
+
+    nifflerscp_str = "storescp.*{0}".format(QUERY_AET)
+    qbniffler_str = 'ColdDataRetriever'
+
+    niffler_log = 'niffler' + str(NIFFLER_ID) + '.log'
+
+    logging.basicConfig(filename=niffler_log,level=logging.INFO)
+    logging.getLogger('schedule').setLevel(logging.WARNING)
+
+    # Variables to track progress between iterations.
+    global extracted_ones
+    extracted_ones = list()
+
+    # By default, assume that this is a fresh extraction.
+    resume = False
+
+    # All extracted files from the csv file are saved in a respective .pickle file.
+    try:
+        with open(csv_file +'.pickle', 'rb') as f:
+            extracted_ones = pickle.load(f)
+            # Since we have successfully located a pickle file, it indicates that this is a resume.
+            resume = True
+    except:
+        logging.info("No existing pickle file found. Therefore, initialized with empty value to track the progress to {0}.pickle.".format(csv_file))
+
+    # record the start time
+    t_start = time.time()
+    run_cold_extraction()
 
 # Check and kill the StoreScp processes.
 def check_kill_process():
@@ -161,33 +129,34 @@ def initialize():
     subprocess.call("{0}/storescp --accept-unknown --directory {1} --filepath {2} -b {3} > storescp.out &".format(DCM4CHE_BIN, storage_folder, file_path, QUERY_AET), shell=True)
 
 
-
-with open(csv_file, newline='') as f:
-    reader = csv.reader(f)
-    next(f)
-    for row in reader:
-        row = [x.strip() for x in row]
-        if (extraction_type == 'empi_date'):
-            if not ((row[patient_index] == "") or (row[date_index] == "")):
-                patients.append(row[patient_index])
-                temp_date = row[date_index]
-                dt_stamp = datetime.datetime.strptime(temp_date, date_format)
-                date_str = dt_stamp.strftime('%Y%m%d')
-                dates.append(date_str)
-                length = len(patients)
-        elif (extraction_type == 'empi'):
-            if not ((row[patient_index] == "")):
-                patients.append(row[patient_index])
-                length = len(patients)
-        elif (extraction_type == 'accession'):
-            if not ((row[accession_index] == "")):
-                accessions.append(row[accession_index])
-                length = len(accessions)
-        elif (extraction_type == 'empi_accession'):
-            if not ((row[patient_index] == "") or (row[accession_index] == "")):
-                patients.append(row[patient_index])
-                accessions.append(row[accession_index])
-                length = len(accessions)
+def read_csv():
+    global length
+    with open(csv_file, newline='') as f:
+        reader = csv.reader(f)
+        next(f)
+        for row in reader:
+            row = [x.strip() for x in row]
+            if (extraction_type == 'empi_date'):
+                if not ((row[patient_index] == "") or (row[date_index] == "")):
+                    patients.append(row[patient_index])
+                    temp_date = row[date_index]
+                    dt_stamp = datetime.datetime.strptime(temp_date, date_format)
+                    date_str = dt_stamp.strftime('%Y%m%d')
+                    dates.append(date_str)
+                    length = len(patients)
+            elif (extraction_type == 'empi'):
+                if not ((row[patient_index] == "")):
+                    patients.append(row[patient_index])
+                    length = len(patients)
+            elif (extraction_type == 'accession'):
+                if not ((row[accession_index] == "")):
+                    accessions.append(row[accession_index])
+                    length = len(accessions)
+            elif (extraction_type == 'empi_accession'):
+                if not ((row[patient_index] == "") or (row[accession_index] == "")):
+                    patients.append(row[patient_index])
+                    accessions.append(row[accession_index])
+                    length = len(accessions)
 
 
 # Run the retrieval only once, when the extraction script starts, and keep it running in a separate thread.
@@ -202,6 +171,7 @@ def run_retrieval():
 
 # The core DICOM on-demand retrieve process.
 def retrieve():
+    global length
     # For the cases that have the typical EMPI and Accession values together.
     if (extraction_type == 'empi_accession'):
         # Create our Identifier (query) dataset
@@ -305,16 +275,60 @@ def run_threaded(job_func):
     job_thread = threading.Thread(target=job_func)
     job_thread.start()
 
-
-# The thread scheduling
-schedule.every(1).minutes.do(run_threaded, run_retrieval)    
-schedule.every(10).minutes.do(run_threaded, update_pickle)
-
-# Keep running in a loop.
-while True:
-    try:
-        schedule.run_pending()
-        time.sleep(1)
-    except KeyboardInterrupt:
-        check_kill_process()
-        sys.exit(0)
+def run_cold_extraction():
+    read_csv()
+    # The thread scheduling
+    schedule.every(1).minutes.do(run_threaded, run_retrieval)    
+    schedule.every(10).minutes.do(run_threaded, update_pickle)
+
+    # # Keep running in a loop.
+    while True:
+        try:
+            schedule.run_pending()
+            time.sleep(1)
+        except KeyboardInterrupt:
+            check_kill_process()
+            logging.shutdown()
+            sys.exit(0)
+
+if __name__ == "__main__":
+    config = defaultdict(lambda: None)
+    # Read Default config.json file
+    with open('config.json', 'r') as f:
+        tmp_config = json.load(f)
+        config.update(tmp_config)
+
+    # CLI Argument Parser
+    ap = argparse.ArgumentParser()
+
+    ap.add_argument("--NifflerSystem", default=config['NifflerSystem'],
+                    help="Path to json file with Niffler System Information.")
+    ap.add_argument("--StorageFolder",
+                    default=config['StorageFolder'], help="StoreSCP config: Storage Folder. Refer Readme.md")
+    ap.add_argument("--FilePath", default=config['FilePath'],
+                    help="StoreSCP config: FilePath, Refer configuring config.json in Readme.md.")
+    ap.add_argument("--CsvFile", default=config['CsvFile'],
+                    help="Path to CSV file for extraction. Refer Readme.md.")
+    ap.add_argument("--ExtractionType", default=config['ExtractionType'],
+                    help="One of the supported extraction type for Cold Data extraction. Refer Readme.md.")
+    ap.add_argument("--AccessionIndex", default=config['AccessionIndex'], type=int,
+                    help="Set the CSV column index of AccessionNumber for extractions with Accessions.")
+    ap.add_argument("--PatientIndex", default=config['PatientIndex'], type=int,
+                    help="Set the CSV column index of EMPI for extractions with EMPI and an accession or EMPI and a date.")
+    ap.add_argument("--DateIndex", default=config['DateIndex'], type=int,
+                    help="Set the CSV column index of Date(StudyDate, AcquisitionDate) for extractions with EMPI and a date.")
+    ap.add_argument("--DateType", default=config['DateType'],
+                    help="DateType can range from AcquisitionDate, StudyDate, etc. Refer Readme.md.")
+    ap.add_argument("--DateFormat", default=config['DateFormat'],
+                    help="DateFormat can range from %Y%m%d, %m/%d/%y, %m-%d-%y, %%m%d%y, etc. Refer Readme.md.")
+    ap.add_argument("--SendEmail", default=config['SendEmail'], type=bool,
+                    help="Send email when extraction is complete. Default false")
+    ap.add_argument("--YourEmail", default=config['YourEmail'],
+                    help="A valid email, if send email is enabled.")
+
+    args = vars(ap.parse_args())
+
+    if(len(args) > 0):
+        initialize_config_and_execute(args)
+    else:
+        initialize_config_and_execute(config)
diff --git a/modules/frontend/README.md b/modules/frontend/README.md
@@ -0,0 +1,26 @@
+# A Frontend Framework for Niffler
+
+Here we have created a frontend module for Niffler for ease of acccess of it's modules.
+This module is using **Flask** as an engine to run all the frontend.
+
+**Make sure you have installed Flask from requirements.txt file.**
+
+## Steps for running Frontend Module
+
+1. Run server.py file by navigating into frontend directory and running:
+`python server.py` or `python3 server.py` (user level access)
+
+2. Then navigate to `localhost:9000` to view your Niffler modules.
+
+## Admin level access
+
+1. Run server.py file by navigating into frontend directory and running:
+`python server.py --admin` or `python3 server.py --admin` (admin level access)
+
+2. The admin access enables only admins to create new users.
+
+
+
+*Currently PNG extraction Frontend is developed and it can take values from frontend and can be passed into backend*
+
+**NOTE: `__init__.py` file is important as it serves the frontend directory as a package, so the values can be accessed from other modules also**