From 7cce9a75c53f24cd866dc4655a255451db14e0dc Mon Sep 17 00:00:00 2001 From: Pradeeban Kathiravelu Date: Tue, 20 Jul 2021 18:31:58 -0400 Subject: [PATCH 1/9] merge the cfind output to a single file --- modules/cold-extraction/ColdDataRetriever.py | 23 +++++++++++++++----- modules/cold-extraction/README.md | 7 +++--- modules/cold-extraction/description.csv.xsl | 8 +++++-- 3 files changed, 26 insertions(+), 12 deletions(-) diff --git a/modules/cold-extraction/ColdDataRetriever.py b/modules/cold-extraction/ColdDataRetriever.py index 4795c17..4726a04 100644 --- a/modules/cold-extraction/ColdDataRetriever.py +++ b/modules/cold-extraction/ColdDataRetriever.py @@ -1,5 +1,5 @@ import logging -import os +import os, glob import signal import csv import time @@ -13,6 +13,7 @@ import threading import argparse import random +import pandas as pd from collections import defaultdict @@ -141,7 +142,7 @@ def initialize(): logging.info("{0}: StoreScp process for the current Niffler extraction is starting now".format( datetime.datetime.now())) - if not storage_folder == "CFIND-ONLY": + if not filepath == "CFIND-ONLY": subprocess.call("{0}/storescp --accept-unknown --directory {1} --filepath {2} -b {3} > storescp.out &".format( DCM4CHE_BIN, storage_folder, file_path, QUERY_AET), shell=True) @@ -211,22 +212,32 @@ def retrieve(): if number_of_query_attributes > 3 or number_of_query_attributes <= 1: # For the cases that extract entirely based on the PatientID - Patient-level extraction. if first_attr == "PatientID": + temp_folder = storage_folder + "/cfind-temp" for pid in range(0, length): sleep_for_nightly_mode() patient = firsts[pid] if (not resume) or (resume and (patient not in extracted_ones)): - if storage_folder == "CFIND-ONLY": - inc = random.randint(0,1000000) + if filepath == "CFIND-ONLY": + if not os.path.exists(temp_folder): + os.makedirs(temp_folder) + + inc = random.randint(0, 1000000) subprocess.call("{0}/findscu -c {1} -b {2} -M PatientRoot -m PatientID={3} " "-r StudyInstanceUID -r StudyDescription -x description.csv.xsl " - "--out-cat --out-file {4}_{5}.csv --out-dir .".format( - DCM4CHE_BIN, SRC_AET, QUERY_AET, patient, csv_file, inc), shell=True) + "--out-cat --out-file {4}/{5}.csv --out-dir .".format( + DCM4CHE_BIN, SRC_AET, QUERY_AET, patient, temp_folder, inc), shell=True) else: subprocess.call("{0}/movescu -c {1} -b {2} -M PatientRoot -m PatientID={3} --dest {4}".format( DCM4CHE_BIN, SRC_AET, QUERY_AET, patient, DEST_AET), shell=True) extracted_ones.append(patient) + all_files = glob.glob(os.path.join(temp_folder, "*.csv")) + df_from_each_file = (pd.read_csv(f, sep=',') for f in all_files) + df_merged = pd.concat(df_from_each_file, ignore_index=True) + df_merged.to_csv(storage_folder + "merged.csv") + shutil.rmtree(temp_folder) + # For the cases that extract based on a single property other than EMPI/PatientID. Goes to study level. # "Any" mode. Example: Extractions based on just AccessionNumber of AcquisitionDate. else: diff --git a/modules/cold-extraction/README.md b/modules/cold-extraction/README.md index d46d403..9ba65a2 100644 --- a/modules/cold-extraction/README.md +++ b/modules/cold-extraction/README.md @@ -171,7 +171,7 @@ Niffler experimentally supports a CFIND-ONLY mode. To activate, use the below value, ``` - "StorageFolder": "CFIND-ONLY", + "FilePath": "CFIND-ONLY", ``` As of now, this is only supported for the below mode, C-FIND based entirely on PatientID. ``` @@ -179,9 +179,8 @@ As of now, this is only supported for the below mode, C-FIND based entirely on P "FirstAttr": "PatientID", "FirstIndex": 0, ``` -The output will be a list of CSV files consisting of PatientID, StudyInstanceUI, and StudyDescription. - -The CSV outputs will be in the folder of the original CSV file, with the name of CsvFile appended by random numbers. +The output will be CSV file consisting of PatientID, StudyInstanceUID, AccessionNumber, and StudyDescription +in the StorageFolder. ## Troubleshooting diff --git a/modules/cold-extraction/description.csv.xsl b/modules/cold-extraction/description.csv.xsl index fa0b5ea..986e15b 100644 --- a/modules/cold-extraction/description.csv.xsl +++ b/modules/cold-extraction/description.csv.xsl @@ -40,12 +40,16 @@ + " + + " + , " " , - " - + " + " , " From 9227ce93e406cdad2b6a1b08aa844bfc6c671c24 Mon Sep 17 00:00:00 2001 From: Pradeeban Kathiravelu Date: Tue, 20 Jul 2021 18:44:36 -0400 Subject: [PATCH 2/9] Fix filepath typo --- modules/cold-extraction/ColdDataRetriever.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/cold-extraction/ColdDataRetriever.py b/modules/cold-extraction/ColdDataRetriever.py index 4726a04..8846d43 100644 --- a/modules/cold-extraction/ColdDataRetriever.py +++ b/modules/cold-extraction/ColdDataRetriever.py @@ -142,7 +142,7 @@ def initialize(): logging.info("{0}: StoreScp process for the current Niffler extraction is starting now".format( datetime.datetime.now())) - if not filepath == "CFIND-ONLY": + if not file_path == "CFIND-ONLY": subprocess.call("{0}/storescp --accept-unknown --directory {1} --filepath {2} -b {3} > storescp.out &".format( DCM4CHE_BIN, storage_folder, file_path, QUERY_AET), shell=True) @@ -217,12 +217,12 @@ def retrieve(): sleep_for_nightly_mode() patient = firsts[pid] if (not resume) or (resume and (patient not in extracted_ones)): - if filepath == "CFIND-ONLY": + if file_path == "CFIND-ONLY": if not os.path.exists(temp_folder): os.makedirs(temp_folder) inc = random.randint(0, 1000000) - subprocess.call("{0}/findscu -c {1} -b {2} -M PatientRoot -m PatientID={3} " + subprocess.call("{0}/findscu -c {1} -b {2} -M PatientRoot -m PatientID={3} -r AccessionNumber " "-r StudyInstanceUID -r StudyDescription -x description.csv.xsl " "--out-cat --out-file {4}/{5}.csv --out-dir .".format( DCM4CHE_BIN, SRC_AET, QUERY_AET, patient, temp_folder, inc), shell=True) From 39904729c734cf5fc4ef9123c79397d19a3c2502 Mon Sep 17 00:00:00 2001 From: Pradeeban Kathiravelu Date: Tue, 20 Jul 2021 18:47:50 -0400 Subject: [PATCH 3/9] Fix filepath typo --- modules/cold-extraction/ColdDataRetriever.py | 2 +- modules/cold-extraction/README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/cold-extraction/ColdDataRetriever.py b/modules/cold-extraction/ColdDataRetriever.py index 8846d43..857883e 100644 --- a/modules/cold-extraction/ColdDataRetriever.py +++ b/modules/cold-extraction/ColdDataRetriever.py @@ -235,7 +235,7 @@ def retrieve(): all_files = glob.glob(os.path.join(temp_folder, "*.csv")) df_from_each_file = (pd.read_csv(f, sep=',') for f in all_files) df_merged = pd.concat(df_from_each_file, ignore_index=True) - df_merged.to_csv(storage_folder + "merged.csv") + df_merged.to_csv(storage_folder + "/cfind-output.csv") shutil.rmtree(temp_folder) # For the cases that extract based on a single property other than EMPI/PatientID. Goes to study level. diff --git a/modules/cold-extraction/README.md b/modules/cold-extraction/README.md index 9ba65a2..084ef4f 100644 --- a/modules/cold-extraction/README.md +++ b/modules/cold-extraction/README.md @@ -179,7 +179,7 @@ As of now, this is only supported for the below mode, C-FIND based entirely on P "FirstAttr": "PatientID", "FirstIndex": 0, ``` -The output will be CSV file consisting of PatientID, StudyInstanceUID, AccessionNumber, and StudyDescription +The output will be in cfind-output.csv consisting of PatientID, StudyInstanceUID, AccessionNumber, and StudyDescription in the StorageFolder. ## Troubleshooting From 2150b49dbcd103e7cf9996f2e6c0c1d9e4ef14ab Mon Sep 17 00:00:00 2001 From: Pradeeban Kathiravelu Date: Tue, 20 Jul 2021 18:56:27 -0400 Subject: [PATCH 4/9] Fix filepath typo --- modules/cold-extraction/ColdDataRetriever.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/modules/cold-extraction/ColdDataRetriever.py b/modules/cold-extraction/ColdDataRetriever.py index 857883e..ae30a83 100644 --- a/modules/cold-extraction/ColdDataRetriever.py +++ b/modules/cold-extraction/ColdDataRetriever.py @@ -232,11 +232,15 @@ def retrieve(): DCM4CHE_BIN, SRC_AET, QUERY_AET, patient, DEST_AET), shell=True) extracted_ones.append(patient) - all_files = glob.glob(os.path.join(temp_folder, "*.csv")) - df_from_each_file = (pd.read_csv(f, sep=',') for f in all_files) - df_merged = pd.concat(df_from_each_file, ignore_index=True) - df_merged.to_csv(storage_folder + "/cfind-output.csv") - shutil.rmtree(temp_folder) + if file_path == "CFIND-ONLY": + cwd = os.getcwd() + os.chdir(temp_folder) + all_files = glob.glob('*.csv') + df_from_each_file = (pd.read_csv(f, sep=',') for f in all_files) + df_merged = pd.concat(df_from_each_file, ignore_index=True) + df_merged.to_csv(storage_folder + "/cfind-output.csv") + os.chdir(cwd) + shutil.rmtree(temp_folder) # For the cases that extract based on a single property other than EMPI/PatientID. Goes to study level. # "Any" mode. Example: Extractions based on just AccessionNumber of AcquisitionDate. From 761d9408760e2239ad7d5c4c7b4528e9c93294a0 Mon Sep 17 00:00:00 2001 From: Pradeeban Kathiravelu Date: Tue, 20 Jul 2021 19:04:06 -0400 Subject: [PATCH 5/9] Fix filepath typo --- modules/cold-extraction/ColdDataRetriever.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/modules/cold-extraction/ColdDataRetriever.py b/modules/cold-extraction/ColdDataRetriever.py index ae30a83..b39bb67 100644 --- a/modules/cold-extraction/ColdDataRetriever.py +++ b/modules/cold-extraction/ColdDataRetriever.py @@ -212,7 +212,10 @@ def retrieve(): if number_of_query_attributes > 3 or number_of_query_attributes <= 1: # For the cases that extract entirely based on the PatientID - Patient-level extraction. if first_attr == "PatientID": - temp_folder = storage_folder + "/cfind-temp" + temp_folder = "csv/cfind-temp" + if not os.path.exists(temp_folder): + os.makedirs(temp_folder) + for pid in range(0, length): sleep_for_nightly_mode() patient = firsts[pid] @@ -233,13 +236,10 @@ def retrieve(): extracted_ones.append(patient) if file_path == "CFIND-ONLY": - cwd = os.getcwd() - os.chdir(temp_folder) - all_files = glob.glob('*.csv') + all_files = glob.glob(os.path.join(temp_folder, "*.csv")) df_from_each_file = (pd.read_csv(f, sep=',') for f in all_files) df_merged = pd.concat(df_from_each_file, ignore_index=True) df_merged.to_csv(storage_folder + "/cfind-output.csv") - os.chdir(cwd) shutil.rmtree(temp_folder) # For the cases that extract based on a single property other than EMPI/PatientID. Goes to study level. From 965c493769e5605572470077d082609d655d8a63 Mon Sep 17 00:00:00 2001 From: Pradeeban Kathiravelu Date: Tue, 20 Jul 2021 19:07:50 -0400 Subject: [PATCH 6/9] Fix filepath typo --- modules/cold-extraction/ColdDataRetriever.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/cold-extraction/ColdDataRetriever.py b/modules/cold-extraction/ColdDataRetriever.py index b39bb67..178c1c2 100644 --- a/modules/cold-extraction/ColdDataRetriever.py +++ b/modules/cold-extraction/ColdDataRetriever.py @@ -238,7 +238,7 @@ def retrieve(): if file_path == "CFIND-ONLY": all_files = glob.glob(os.path.join(temp_folder, "*.csv")) df_from_each_file = (pd.read_csv(f, sep=',') for f in all_files) - df_merged = pd.concat(df_from_each_file, ignore_index=True) + df_merged = pd.concat(df_from_each_file, ignore_index=False) df_merged.to_csv(storage_folder + "/cfind-output.csv") shutil.rmtree(temp_folder) From cb2de5ced7a33c2cc3b7edf32eb5ca7645859c8c Mon Sep 17 00:00:00 2001 From: Pradeeban Kathiravelu Date: Tue, 20 Jul 2021 19:14:03 -0400 Subject: [PATCH 7/9] Fix csv merge --- modules/cold-extraction/ColdDataRetriever.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/modules/cold-extraction/ColdDataRetriever.py b/modules/cold-extraction/ColdDataRetriever.py index 178c1c2..9c17c72 100644 --- a/modules/cold-extraction/ColdDataRetriever.py +++ b/modules/cold-extraction/ColdDataRetriever.py @@ -236,10 +236,14 @@ def retrieve(): extracted_ones.append(patient) if file_path == "CFIND-ONLY": - all_files = glob.glob(os.path.join(temp_folder, "*.csv")) - df_from_each_file = (pd.read_csv(f, sep=',') for f in all_files) - df_merged = pd.concat(df_from_each_file, ignore_index=False) - df_merged.to_csv(storage_folder + "/cfind-output.csv") + cwd = os.getcwd() + extension = 'csv' + all_filenames = [i for i in glob.glob('*.{}'.format(extension))] + # combine all files in the list + combined_csv = pd.concat([pd.read_csv(f) for f in all_filenames]) + # export to csv + combined_csv.to_csv(storage_folder + "/cfind-output.csv", index=False, encoding='utf-8-sig') + os.chdir(cwd) shutil.rmtree(temp_folder) # For the cases that extract based on a single property other than EMPI/PatientID. Goes to study level. From ef0c9e0c85d7919344a24afccbb52a504a5190a6 Mon Sep 17 00:00:00 2001 From: Pradeeban Kathiravelu Date: Tue, 20 Jul 2021 19:14:53 -0400 Subject: [PATCH 8/9] Fix csv merge --- modules/cold-extraction/ColdDataRetriever.py | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/cold-extraction/ColdDataRetriever.py b/modules/cold-extraction/ColdDataRetriever.py index 9c17c72..0bdc698 100644 --- a/modules/cold-extraction/ColdDataRetriever.py +++ b/modules/cold-extraction/ColdDataRetriever.py @@ -238,6 +238,7 @@ def retrieve(): if file_path == "CFIND-ONLY": cwd = os.getcwd() extension = 'csv' + os.chdir(temp_folder) all_filenames = [i for i in glob.glob('*.{}'.format(extension))] # combine all files in the list combined_csv = pd.concat([pd.read_csv(f) for f in all_filenames]) From 6b7c8dff4c8f30ae5b640759ca03d681c4359ac1 Mon Sep 17 00:00:00 2001 From: Pradeeban Kathiravelu Date: Tue, 20 Jul 2021 19:31:32 -0400 Subject: [PATCH 9/9] add new line for filemerge --- modules/cold-extraction/ColdDataRetriever.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/modules/cold-extraction/ColdDataRetriever.py b/modules/cold-extraction/ColdDataRetriever.py index 0bdc698..f2a8d4e 100644 --- a/modules/cold-extraction/ColdDataRetriever.py +++ b/modules/cold-extraction/ColdDataRetriever.py @@ -237,13 +237,13 @@ def retrieve(): if file_path == "CFIND-ONLY": cwd = os.getcwd() - extension = 'csv' os.chdir(temp_folder) - all_filenames = [i for i in glob.glob('*.{}'.format(extension))] - # combine all files in the list - combined_csv = pd.concat([pd.read_csv(f) for f in all_filenames]) - # export to csv - combined_csv.to_csv(storage_folder + "/cfind-output.csv", index=False, encoding='utf-8-sig') + all_filenames = [i for i in glob.glob('*.*')] + with open(storage_folder + "/cfind-output.csv", 'w') as outfile: + for fname in all_filenames: + with open(fname) as infile: + for line in infile: + outfile.write(line) os.chdir(cwd) shutil.rmtree(temp_folder)