diff --git a/modules/cold-extraction/ColdDataRetriever.py b/modules/cold-extraction/ColdDataRetriever.py index bdb2a4c..0edd43f 100644 --- a/modules/cold-extraction/ColdDataRetriever.py +++ b/modules/cold-extraction/ColdDataRetriever.py @@ -15,109 +15,77 @@ from collections import defaultdict -config = defaultdict(lambda: None) -# Read Default config.json file -with open('config.json', 'r') as f: - tmp_config = json.load(f) - config.update(tmp_config) - -# CLI Argument Parser -ap = argparse.ArgumentParser() - -ap.add_argument("--NifflerSystem", default=config['NifflerSystem'], - help="Path to json file with Niffler System Information.") -ap.add_argument("--StorageFolder", - default=config['StorageFolder'], help="StoreSCP config: Storage Folder. Refer Readme.md") -ap.add_argument("--FilePath", default=config['FilePath'], - help="StoreSCP config: FilePath, Refer configuring config.json in Readme.md.") -ap.add_argument("--CsvFile", default=config['CsvFile'], - help="Path to CSV file for extraction. Refer Readme.md.") -ap.add_argument("--ExtractionType", default=config['ExtractionType'], - help="One of the supported extraction type for Cold Data extraction. Refer Readme.md.") -ap.add_argument("--AccessionIndex", default=config['AccessionIndex'], type=int, - help="Set the CSV column index of AccessionNumber for extractions with Accessions.") -ap.add_argument("--PatientIndex", default=config['PatientIndex'], type=int, - help="Set the CSV column index of EMPI for extractions with EMPI and an accession or EMPI and a date.") -ap.add_argument("--DateIndex", default=config['DateIndex'], type=int, - help="Set the CSV column index of Date(StudyDate, AcquisitionDate) for extractions with EMPI and a date.") -ap.add_argument("--DateType", default=config['DateType'], - help="DateType can range from AcquisitionDate, StudyDate, etc. Refer Readme.md.") -ap.add_argument("--DateFormat", default=config['DateFormat'], - help="DateFormat can range from %Y%m%d, %m/%d/%y, %m-%d-%y, %%m%d%y, etc. Refer Readme.md.") -ap.add_argument("--SendEmail", default=config['SendEmail'], type=bool, - help="Send email when extraction is complete. Default false") -ap.add_argument("--YourEmail", default=config['YourEmail'], - help="A valid email, if send email is enabled.") - -args = vars(ap.parse_args()) - -#Get variables for StoreScp from config.json. -storage_folder = args['StorageFolder'] -file_path = args['FilePath'] - -# Get variables for the each on-demand extraction from config.json -csv_file = args['CsvFile'] -extraction_type = args['ExtractionType'] -accession_index = args['AccessionIndex'] -patient_index = args['PatientIndex'] -date_index = args['DateIndex'] -date_type = args['DateType'] -date_format = args['DateFormat'] -email = args['YourEmail'] -send_email = args['SendEmail'] - -# Reads the system_json file. -system_json = args['NifflerSystem'] - -with open(system_json, 'r') as f: - niffler = json.load(f) - -# Get constants from system.json -DCM4CHE_BIN = niffler['DCM4CHEBin'] -SRC_AET = niffler['SrcAet'] -QUERY_AET = niffler['QueryAet'] -DEST_AET = niffler['DestAet'] -NIGHTLY_ONLY = niffler['NightlyOnly'] -START_HOUR = niffler['StartHour'] -END_HOUR = niffler['EndHour'] -IS_EXTRACTION_NOT_RUNNING = True -NIFFLER_ID = niffler['NifflerID'] -MAX_PROCESSES = niffler['MaxNifflerProcesses'] - -SEPARATOR = ',' - -accessions = [] -patients = [] -dates = [] - -storescp_processes = 0 -niffler_processes = 0 - -nifflerscp_str = "storescp.*{0}".format(QUERY_AET) -qbniffler_str = 'ColdDataRetriever' - -niffler_log = 'niffler' + str(NIFFLER_ID) + '.log' - -logging.basicConfig(filename=niffler_log,level=logging.INFO) -logging.getLogger('schedule').setLevel(logging.WARNING) - -# Variables to track progress between iterations. -extracted_ones = list() - -# By default, assume that this is a fresh extraction. -resume = False - -# All extracted files from the csv file are saved in a respective .pickle file. -try: - with open(csv_file +'.pickle', 'rb') as f: - extracted_ones = pickle.load(f) - # Since we have successfully located a pickle file, it indicates that this is a resume. - resume = True -except: - logging.info("No existing pickle file found. Therefore, initialized with empty value to track the progress to {0}.pickle.".format(csv_file)) - -# record the start time -t_start = time.time() +def initialize_Values(valuesDict): + global storescp_processes, niffler_processes, nifflerscp_str, qbniffler_str + global storage_folder, file_path, csv_file, extraction_type, accession_index, patient_index, date_index, date_type, date_format, email, send_email, system_json + global DCM4CHE_BIN, SRC_AET, QUERY_AET, DEST_AET, NIGHTLY_ONLY, START_HOUR, END_HOUR, IS_EXTRACTION_NOT_RUNNING, NIFFLER_ID, MAX_PROCESSES, SEPARATOR + global accessions, patients, dates, niffler_log, resume, length + + storage_folder = valuesDict['storage_folder'] + file_path = valuesDict['file_path'] + csv_file = valuesDict['CsvFile'] + extraction_type = valuesDict['extraction_type'] + accession_index = int(valuesDict['accession_index']) + patient_index = int(valuesDict['patient_index']) + date_index = int(valuesDict['date_index']) + date_type = valuesDict['date_type'] + date_format = valuesDict['date_format'] + email = valuesDict['email'] + send_email = bool(valuesDict['send_email']) + system_json = valuesDict['NifflerSystem'] + + # Reads the system_json file. + with open(system_json, 'r') as f: + niffler = json.load(f) + + # Get constants from system.json + DCM4CHE_BIN = niffler['DCM4CHEBin'] + SRC_AET = niffler['SrcAet'] + QUERY_AET = niffler['QueryAet'] + DEST_AET = niffler['DestAet'] + NIGHTLY_ONLY = niffler['NightlyOnly'] + START_HOUR = niffler['StartHour'] + END_HOUR = niffler['EndHour'] + IS_EXTRACTION_NOT_RUNNING = True + NIFFLER_ID = niffler['NifflerID'] + MAX_PROCESSES = niffler['MaxNifflerProcesses'] + + SEPARATOR = ',' + + accessions = [] + patients = [] + dates = [] + + storescp_processes = 0 + niffler_processes = 0 + + nifflerscp_str = "storescp.*{0}".format(QUERY_AET) + qbniffler_str = 'ColdDataRetriever' + + niffler_log = 'niffler' + str(NIFFLER_ID) + '.log' + + logging.basicConfig(filename=niffler_log,level=logging.INFO) + logging.getLogger('schedule').setLevel(logging.WARNING) + + # Variables to track progress between iterations. + global extracted_ones + extracted_ones = list() + + # By default, assume that this is a fresh extraction. + resume = False + + # All extracted files from the csv file are saved in a respective .pickle file. + try: + with open(csv_file +'.pickle', 'rb') as f: + extracted_ones = pickle.load(f) + # Since we have successfully located a pickle file, it indicates that this is a resume. + resume = True + except: + logging.info("No existing pickle file found. Therefore, initialized with empty value to track the progress to {0}.pickle.".format(csv_file)) + + # record the start time + t_start = time.time() + run_cold_extraction() # Check and kill the StoreScp processes. def check_kill_process(): @@ -161,33 +129,34 @@ def initialize(): subprocess.call("{0}/storescp --accept-unknown --directory {1} --filepath {2} -b {3} > storescp.out &".format(DCM4CHE_BIN, storage_folder, file_path, QUERY_AET), shell=True) - -with open(csv_file, newline='') as f: - reader = csv.reader(f) - next(f) - for row in reader: - row = [x.strip() for x in row] - if (extraction_type == 'empi_date'): - if not ((row[patient_index] == "") or (row[date_index] == "")): - patients.append(row[patient_index]) - temp_date = row[date_index] - dt_stamp = datetime.datetime.strptime(temp_date, date_format) - date_str = dt_stamp.strftime('%Y%m%d') - dates.append(date_str) - length = len(patients) - elif (extraction_type == 'empi'): - if not ((row[patient_index] == "")): - patients.append(row[patient_index]) - length = len(patients) - elif (extraction_type == 'accession'): - if not ((row[accession_index] == "")): - accessions.append(row[accession_index]) - length = len(accessions) - elif (extraction_type == 'empi_accession'): - if not ((row[patient_index] == "") or (row[accession_index] == "")): - patients.append(row[patient_index]) - accessions.append(row[accession_index]) - length = len(accessions) +def read_csv(): + global length + with open(csv_file, newline='') as f: + reader = csv.reader(f) + next(f) + for row in reader: + row = [x.strip() for x in row] + if (extraction_type == 'empi_date'): + if not ((row[patient_index] == "") or (row[date_index] == "")): + patients.append(row[patient_index]) + temp_date = row[date_index] + dt_stamp = datetime.datetime.strptime(temp_date, date_format) + date_str = dt_stamp.strftime('%Y%m%d') + dates.append(date_str) + length = len(patients) + elif (extraction_type == 'empi'): + if not ((row[patient_index] == "")): + patients.append(row[patient_index]) + length = len(patients) + elif (extraction_type == 'accession'): + if not ((row[accession_index] == "")): + accessions.append(row[accession_index]) + length = len(accessions) + elif (extraction_type == 'empi_accession'): + if not ((row[patient_index] == "") or (row[accession_index] == "")): + patients.append(row[patient_index]) + accessions.append(row[accession_index]) + length = len(accessions) # Run the retrieval only once, when the extraction script starts, and keep it running in a separate thread. @@ -202,6 +171,7 @@ def run_retrieval(): # The core DICOM on-demand retrieve process. def retrieve(): + global length # For the cases that have the typical EMPI and Accession values together. if (extraction_type == 'empi_accession'): # Create our Identifier (query) dataset @@ -305,16 +275,129 @@ def run_threaded(job_func): job_thread = threading.Thread(target=job_func) job_thread.start() - -# The thread scheduling -schedule.every(1).minutes.do(run_threaded, run_retrieval) -schedule.every(10).minutes.do(run_threaded, update_pickle) - -# Keep running in a loop. -while True: +def run_cold_extraction(): + read_csv() + # The thread scheduling + schedule.every(1).minutes.do(run_threaded, run_retrieval) + schedule.every(10).minutes.do(run_threaded, update_pickle) + + # # Keep running in a loop. + while True: + try: + schedule.run_pending() + time.sleep(1) + except KeyboardInterrupt: + check_kill_process() + logging.shutdown() + sys.exit(0) + +if __name__ == "__main__": + global storescp_processes, niffler_processes, nifflerscp_str, qbniffler_str + global storage_folder, file_path, csv_file, extraction_type, accession_index, patient_index, date_index, date_type, date_format, email, send_email + global DCM4CHE_BIN, SRC_AET, QUERY_AET, DEST_AET, NIGHTLY_ONLY, START_HOUR, END_HOUR, IS_EXTRACTION_NOT_RUNNING, NIFFLER_ID, MAX_PROCESSES, SEPARATOR + global accessions, patients, dates, niffler_log, resume, length + + config = defaultdict(lambda: None) + # Read Default config.json file + with open('config.json', 'r') as f: + tmp_config = json.load(f) + config.update(tmp_config) + + # CLI Argument Parser + ap = argparse.ArgumentParser() + + ap.add_argument("--NifflerSystem", default=config['NifflerSystem'], + help="Path to json file with Niffler System Information.") + ap.add_argument("--StorageFolder", + default=config['StorageFolder'], help="StoreSCP config: Storage Folder. Refer Readme.md") + ap.add_argument("--FilePath", default=config['FilePath'], + help="StoreSCP config: FilePath, Refer configuring config.json in Readme.md.") + ap.add_argument("--CsvFile", default=config['CsvFile'], + help="Path to CSV file for extraction. Refer Readme.md.") + ap.add_argument("--ExtractionType", default=config['ExtractionType'], + help="One of the supported extraction type for Cold Data extraction. Refer Readme.md.") + ap.add_argument("--AccessionIndex", default=config['AccessionIndex'], type=int, + help="Set the CSV column index of AccessionNumber for extractions with Accessions.") + ap.add_argument("--PatientIndex", default=config['PatientIndex'], type=int, + help="Set the CSV column index of EMPI for extractions with EMPI and an accession or EMPI and a date.") + ap.add_argument("--DateIndex", default=config['DateIndex'], type=int, + help="Set the CSV column index of Date(StudyDate, AcquisitionDate) for extractions with EMPI and a date.") + ap.add_argument("--DateType", default=config['DateType'], + help="DateType can range from AcquisitionDate, StudyDate, etc. Refer Readme.md.") + ap.add_argument("--DateFormat", default=config['DateFormat'], + help="DateFormat can range from %Y%m%d, %m/%d/%y, %m-%d-%y, %%m%d%y, etc. Refer Readme.md.") + ap.add_argument("--SendEmail", default=config['SendEmail'], type=bool, + help="Send email when extraction is complete. Default false") + ap.add_argument("--YourEmail", default=config['YourEmail'], + help="A valid email, if send email is enabled.") + + args = vars(ap.parse_args()) + + #Get variables for StoreScp from config.json. + storage_folder = args['StorageFolder'] + file_path = args['FilePath'] + + # Get variables for the each on-demand extraction from config.json + csv_file = args['CsvFile'] + extraction_type = args['ExtractionType'] + accession_index = args['AccessionIndex'] + patient_index = args['PatientIndex'] + date_index = args['DateIndex'] + date_type = args['DateType'] + date_format = args['DateFormat'] + email = args['YourEmail'] + send_email = args['SendEmail'] + + # Reads the system_json file. + system_json = args['NifflerSystem'] + + with open(system_json, 'r') as f: + niffler = json.load(f) + + # Get constants from system.json + DCM4CHE_BIN = niffler['DCM4CHEBin'] + SRC_AET = niffler['SrcAet'] + QUERY_AET = niffler['QueryAet'] + DEST_AET = niffler['DestAet'] + NIGHTLY_ONLY = niffler['NightlyOnly'] + START_HOUR = niffler['StartHour'] + END_HOUR = niffler['EndHour'] + IS_EXTRACTION_NOT_RUNNING = True + NIFFLER_ID = niffler['NifflerID'] + MAX_PROCESSES = niffler['MaxNifflerProcesses'] + + SEPARATOR = ',' + + accessions = [] + patients = [] + dates = [] + + storescp_processes = 0 + niffler_processes = 0 + + nifflerscp_str = "storescp.*{0}".format(QUERY_AET) + qbniffler_str = 'ColdDataRetriever' + + niffler_log = 'niffler' + str(NIFFLER_ID) + '.log' + + logging.basicConfig(filename=niffler_log,level=logging.INFO) + logging.getLogger('schedule').setLevel(logging.WARNING) + + # Variables to track progress between iterations. + extracted_ones = list() + + # By default, assume that this is a fresh extraction. + resume = False + + # All extracted files from the csv file are saved in a respective .pickle file. try: - schedule.run_pending() - time.sleep(1) - except KeyboardInterrupt: - check_kill_process() - sys.exit(0) + with open(csv_file +'.pickle', 'rb') as f: + extracted_ones = pickle.load(f) + # Since we have successfully located a pickle file, it indicates that this is a resume. + resume = True + except: + logging.info("No existing pickle file found. Therefore, initialized with empty value to track the progress to {0}.pickle.".format(csv_file)) + + # record the start time + t_start = time.time() + run_cold_extraction() diff --git a/modules/frontend/server.py b/modules/frontend/server.py index 134b731..220d58f 100644 --- a/modules/frontend/server.py +++ b/modules/frontend/server.py @@ -4,6 +4,7 @@ from flask_sqlalchemy import SQLAlchemy from flask_login import UserMixin from flask_login import LoginManager, login_user, login_required, current_user, logout_user +from werkzeug.utils import secure_filename import warnings warnings.filterwarnings("ignore") @@ -12,6 +13,11 @@ from models import User PEOPLE_FOLDER = os.path.join('static','styles') +UPLOAD_FOLDER = '../cold-extraction/csv' # Need to change this to a particular server path +app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER + +COLD_UPLOAD_FOLDER = '../cold-extraction/' # Need to change this to a particular server path +app.config['COLD_UPLOAD_FOLDER'] = COLD_UPLOAD_FOLDER # app = Flask(__name__) login_manager = LoginManager(app) @@ -24,7 +30,7 @@ def load_user(user_id): @app.route("/", methods=['GET']) def index(): - return render_template('home.html', isAdmin = isAdmin) + return render_template('Home.html', isAdmin = isAdmin) @app.route('/login', methods=['GET','POST']) def login(): @@ -43,7 +49,7 @@ def login(): # if the above check passes, then we know the user has the right credentials login_user(user, remember=remember) - return render_template('home.html') + return render_template('Home.html') return render_template('login.html', isAdmin = isAdmin) @app.route('/signup', methods=['GET','POST']) @@ -74,31 +80,34 @@ def signup(): @login_required def logout(): logout_user() - return render_template('home.html', isAdmin = isAdmin) - -# @app.route("/png-extraction", methods = ['GET']) -# @login_required -# def PNG_Extraction(): -# return render_template('pngHome.html') - -config_values = {} + return render_template('Home.html', isAdmin = isAdmin) @app.route('/png-extraction', methods=['GET', 'POST']) @login_required def extract_png(): + config_values = {} if request.method =='POST': + depth = request.form['depth'] + if(depth == '' or len(depth) == 0): + depth = '0' + chunks = request.form['chunks'] + if(chunks == '' or len(chunks) == 0): + chunks = '1' + useProcess = request.form['useProcess'] + if(useProcess == '' or len(useProcess) == 0): + useProcess = '0' + config_values["dcmFolder"] = request.form['DICOMFolder'] config_values["outputFolder"] = request.form['outputFolder'] - config_values["depth"] = request.form['depth'] - config_values["chunks"] = request.form['chunks'] - config_values["useProcess"] = request.form['useProcess'] + config_values["depth"] = depth + config_values["chunks"] = chunks + config_values["useProcess"] = useProcess config_values["level"] = request.form['level'] config_values["16Bit"] = request.form['16Bit'] config_values["printImages"] = request.form['printImages'] config_values["headers"] = request.form['headers'] config_values["sendEmail"] = request.form['sendEmail'] config_values["email"] = request.form['email'] - if(len(config_values) > 0): import sys sys.path.append("../png-extraction/") @@ -110,17 +119,74 @@ def extract_png(): @app.route('/cold-extraction', methods=['GET', 'POST']) @login_required def cold_extraction(): + logs = [] + csv_folder = UPLOAD_FOLDER + if not os.path.exists(csv_folder): + os.makedirs(csv_folder) + files_present_in_server = os.listdir(csv_folder) + + cold_extraction_values = {} if request.method =='POST': - csv_file = request.files['csvFile'] - if (csv_file): + f1 = request.files['csvFile_choose'] + f2 = request.form['csvFile_name'] + if(f1): + filename = secure_filename(f1.filename) + f1.save(os.path.join(app.config['UPLOAD_FOLDER'],filename)) + cold_extraction_values['CsvFile'] = os.path.join(app.config['UPLOAD_FOLDER'],filename) + else: + cold_extraction_values['CsvFile'] = os.path.join(app.config['UPLOAD_FOLDER'],f2) + + NifflerSystem = request.form['NifflerSystem'] + if(NifflerSystem == '' or len(NifflerSystem) == 0): + NifflerSystem = 'system.json' + file_path = request.form['file_path'] + if(file_path == '' or len(file_path) == 0): + file_path = '{00100020}/{0020000D}/{0020000E}/{00080018}.dcm' + accession_index = request.form['AccessionIndex'] + if(accession_index == '' or len(accession_index) == 0): + accession_index = '1' + patient_index = request.form['PatientIndex'] + if(patient_index == '' or len(patient_index) == 0): + patient_index = '0' + date_index = request.form['DateIndex'] + if(date_index == '' or len(date_index) == 0): + date_index = '1' + date_format = request.form['DateFormat'] + if(date_format == '' or len(date_format) == 0): + date_format = '%Y%m%d' + + NifflerSystem_File = COLD_UPLOAD_FOLDER + NifflerSystem + checkfile = True + try: + with open(NifflerSystem_File, 'r') as f: + checkfile = True + except: + err = "Error could not load given " + NifflerSystem + " file !!" + logs.append(err) + checkfile = False + + if checkfile: + cold_extraction_values['NifflerSystem'] = NifflerSystem_File + cold_extraction_values['storage_folder'] = request.form['StorageFolder'] + cold_extraction_values['file_path'] = file_path + cold_extraction_values['extraction_type'] = request.form['ExtractionType'] + cold_extraction_values['accession_index'] = accession_index + cold_extraction_values['patient_index'] = patient_index + cold_extraction_values['date_index'] = date_index + cold_extraction_values['date_type'] = request.form['DateType'] + cold_extraction_values['date_format'] = date_format + cold_extraction_values['send_email'] = request.form['sendEmail'] + cold_extraction_values['email'] = request.form['email'] + import sys import io - - stream = io.StringIO(csv_file.stream.read().decode("UTF8"), newline=None) sys.path.append("../cold-extraction/") import ColdDataRetriever - x = ColdDataRetriever.read_csv(stream) - return render_template('cold_extraction.html') + x = ColdDataRetriever.initialize_Values(cold_extraction_values) + return render_template('cold_extraction.html', logs = logs, files_list = files_present_in_server) + else: + return render_template('cold_extraction.html', logs = logs, files_list = files_present_in_server) + return render_template('cold_extraction.html', files_list = files_present_in_server) #JUST DO IT!!! if __name__=="__main__": - app.run(port="9000") \ No newline at end of file + app.run(host="0.0.0.0",port="9000") diff --git a/modules/frontend/templates/cold_extraction.html b/modules/frontend/templates/cold_extraction.html index aae7df6..dc1e479 100644 --- a/modules/frontend/templates/cold_extraction.html +++ b/modules/frontend/templates/cold_extraction.html @@ -35,6 +35,36 @@