From 7cce9a75c53f24cd866dc4655a255451db14e0dc Mon Sep 17 00:00:00 2001
From: Pradeeban Kathiravelu <kk.pradeeban@gmail.com>
Date: Tue, 20 Jul 2021 18:31:58 -0400
Subject: [PATCH 1/9] merge the cfind output to a single file

---
 modules/cold-extraction/ColdDataRetriever.py | 23 +++++++++++++++-----
 modules/cold-extraction/README.md            |  7 +++---
 modules/cold-extraction/description.csv.xsl  |  8 +++++--
 3 files changed, 26 insertions(+), 12 deletions(-)

diff --git a/modules/cold-extraction/ColdDataRetriever.py b/modules/cold-extraction/ColdDataRetriever.py
index 4795c17..4726a04 100644
--- a/modules/cold-extraction/ColdDataRetriever.py
+++ b/modules/cold-extraction/ColdDataRetriever.py
@@ -1,5 +1,5 @@
 import logging
-import os
+import os, glob
 import signal
 import csv
 import time
@@ -13,6 +13,7 @@
 import threading
 import argparse
 import random
+import pandas as pd
 
 from collections import defaultdict
 
@@ -141,7 +142,7 @@ def initialize():
     logging.info("{0}: StoreScp process for the current Niffler extraction is starting now".format(
         datetime.datetime.now()))
 
-    if not storage_folder == "CFIND-ONLY":
+    if not filepath == "CFIND-ONLY":
         subprocess.call("{0}/storescp --accept-unknown --directory {1} --filepath {2} -b {3} > storescp.out &".format(
             DCM4CHE_BIN, storage_folder, file_path, QUERY_AET), shell=True)
 
@@ -211,22 +212,32 @@ def retrieve():
     if number_of_query_attributes > 3 or number_of_query_attributes <= 1:
         # For the cases that extract entirely based on the PatientID - Patient-level extraction.
         if first_attr == "PatientID":
+            temp_folder = storage_folder + "/cfind-temp"
             for pid in range(0, length):
                 sleep_for_nightly_mode()
                 patient = firsts[pid]
                 if (not resume) or (resume and (patient not in extracted_ones)):
-                    if storage_folder == "CFIND-ONLY":
-                        inc = random.randint(0,1000000)
+                    if filepath == "CFIND-ONLY":
+                        if not os.path.exists(temp_folder):
+                            os.makedirs(temp_folder)
+
+                        inc = random.randint(0, 1000000)
                         subprocess.call("{0}/findscu -c {1} -b {2} -M PatientRoot -m PatientID={3} "
                                         "-r StudyInstanceUID -r StudyDescription -x description.csv.xsl "
-                                        "--out-cat --out-file {4}_{5}.csv --out-dir .".format(
-                            DCM4CHE_BIN, SRC_AET, QUERY_AET,  patient, csv_file, inc), shell=True)
+                                        "--out-cat --out-file {4}/{5}.csv --out-dir .".format(
+                            DCM4CHE_BIN, SRC_AET, QUERY_AET, patient, temp_folder, inc), shell=True)
 
                     else:
                         subprocess.call("{0}/movescu -c {1} -b {2} -M PatientRoot -m PatientID={3} --dest {4}".format(
                             DCM4CHE_BIN, SRC_AET, QUERY_AET, patient, DEST_AET), shell=True)
                     extracted_ones.append(patient)
 
+            all_files = glob.glob(os.path.join(temp_folder, "*.csv"))
+            df_from_each_file = (pd.read_csv(f, sep=',') for f in all_files)
+            df_merged = pd.concat(df_from_each_file, ignore_index=True)
+            df_merged.to_csv(storage_folder + "merged.csv")
+            shutil.rmtree(temp_folder)
+
         # For the cases that extract based on a single property other than EMPI/PatientID. Goes to study level.
         # "Any" mode. Example: Extractions based on just AccessionNumber of AcquisitionDate.
         else:
diff --git a/modules/cold-extraction/README.md b/modules/cold-extraction/README.md
index d46d403..9ba65a2 100644
--- a/modules/cold-extraction/README.md
+++ b/modules/cold-extraction/README.md
@@ -171,7 +171,7 @@ Niffler experimentally supports a CFIND-ONLY mode.
 
 To activate, use the below value,
 ```
-	"StorageFolder": "CFIND-ONLY",
+	"FilePath": "CFIND-ONLY",
 ```
 As of now, this is only supported for the below mode, C-FIND based entirely on PatientID.
 ```
@@ -179,9 +179,8 @@ As of now, this is only supported for the below mode, C-FIND based entirely on P
 	"FirstAttr": "PatientID",
 	"FirstIndex": 0,
 ```
-The output will be a list of CSV files consisting of PatientID, StudyInstanceUI, and StudyDescription.
-
-The CSV outputs will be in the folder of the original CSV file, with the name of CsvFile appended by random numbers.
+The output will be CSV file consisting of PatientID, StudyInstanceUID, AccessionNumber, and StudyDescription 
+in the StorageFolder.
 
 ## Troubleshooting
 
diff --git a/modules/cold-extraction/description.csv.xsl b/modules/cold-extraction/description.csv.xsl
index fa0b5ea..986e15b 100644
--- a/modules/cold-extraction/description.csv.xsl
+++ b/modules/cold-extraction/description.csv.xsl
@@ -40,12 +40,16 @@
   <xsl:output method="text"/>
 
   <xsl:template match="/NativeDicomModel">
+    <xsl:text>"</xsl:text>
+    <xsl:apply-templates select="DicomAttribute[@tag='00100020']"/>
+    <xsl:text>"</xsl:text>
+    <xsl:text>,</xsl:text>
     <xsl:text>"</xsl:text>
     <xsl:apply-templates select="DicomAttribute[@tag='0020000D']"/>
     <xsl:text>"</xsl:text>
     <xsl:text>,</xsl:text>
-    <xsl:text>"</xsl:text>    
-    <xsl:apply-templates select="DicomAttribute[@tag='00100020']"/>
+    <xsl:text>"</xsl:text>
+    <xsl:apply-templates select="DicomAttribute[@tag='00080050']"/>
     <xsl:text>"</xsl:text>
     <xsl:text>,</xsl:text>
     <xsl:text>"</xsl:text>

From 9227ce93e406cdad2b6a1b08aa844bfc6c671c24 Mon Sep 17 00:00:00 2001
From: Pradeeban Kathiravelu <kk.pradeeban@gmail.com>
Date: Tue, 20 Jul 2021 18:44:36 -0400
Subject: [PATCH 2/9] Fix filepath typo

---
 modules/cold-extraction/ColdDataRetriever.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/modules/cold-extraction/ColdDataRetriever.py b/modules/cold-extraction/ColdDataRetriever.py
index 4726a04..8846d43 100644
--- a/modules/cold-extraction/ColdDataRetriever.py
+++ b/modules/cold-extraction/ColdDataRetriever.py
@@ -142,7 +142,7 @@ def initialize():
     logging.info("{0}: StoreScp process for the current Niffler extraction is starting now".format(
         datetime.datetime.now()))
 
-    if not filepath == "CFIND-ONLY":
+    if not file_path == "CFIND-ONLY":
         subprocess.call("{0}/storescp --accept-unknown --directory {1} --filepath {2} -b {3} > storescp.out &".format(
             DCM4CHE_BIN, storage_folder, file_path, QUERY_AET), shell=True)
 
@@ -217,12 +217,12 @@ def retrieve():
                 sleep_for_nightly_mode()
                 patient = firsts[pid]
                 if (not resume) or (resume and (patient not in extracted_ones)):
-                    if filepath == "CFIND-ONLY":
+                    if file_path == "CFIND-ONLY":
                         if not os.path.exists(temp_folder):
                             os.makedirs(temp_folder)
 
                         inc = random.randint(0, 1000000)
-                        subprocess.call("{0}/findscu -c {1} -b {2} -M PatientRoot -m PatientID={3} "
+                        subprocess.call("{0}/findscu -c {1} -b {2} -M PatientRoot -m PatientID={3} -r AccessionNumber "
                                         "-r StudyInstanceUID -r StudyDescription -x description.csv.xsl "
                                         "--out-cat --out-file {4}/{5}.csv --out-dir .".format(
                             DCM4CHE_BIN, SRC_AET, QUERY_AET, patient, temp_folder, inc), shell=True)

From 39904729c734cf5fc4ef9123c79397d19a3c2502 Mon Sep 17 00:00:00 2001
From: Pradeeban Kathiravelu <kk.pradeeban@gmail.com>
Date: Tue, 20 Jul 2021 18:47:50 -0400
Subject: [PATCH 3/9] Fix filepath typo

---
 modules/cold-extraction/ColdDataRetriever.py | 2 +-
 modules/cold-extraction/README.md            | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/cold-extraction/ColdDataRetriever.py b/modules/cold-extraction/ColdDataRetriever.py
index 8846d43..857883e 100644
--- a/modules/cold-extraction/ColdDataRetriever.py
+++ b/modules/cold-extraction/ColdDataRetriever.py
@@ -235,7 +235,7 @@ def retrieve():
             all_files = glob.glob(os.path.join(temp_folder, "*.csv"))
             df_from_each_file = (pd.read_csv(f, sep=',') for f in all_files)
             df_merged = pd.concat(df_from_each_file, ignore_index=True)
-            df_merged.to_csv(storage_folder + "merged.csv")
+            df_merged.to_csv(storage_folder + "/cfind-output.csv")
             shutil.rmtree(temp_folder)
 
         # For the cases that extract based on a single property other than EMPI/PatientID. Goes to study level.
diff --git a/modules/cold-extraction/README.md b/modules/cold-extraction/README.md
index 9ba65a2..084ef4f 100644
--- a/modules/cold-extraction/README.md
+++ b/modules/cold-extraction/README.md
@@ -179,7 +179,7 @@ As of now, this is only supported for the below mode, C-FIND based entirely on P
 	"FirstAttr": "PatientID",
 	"FirstIndex": 0,
 ```
-The output will be CSV file consisting of PatientID, StudyInstanceUID, AccessionNumber, and StudyDescription 
+The output will be in cfind-output.csv consisting of PatientID, StudyInstanceUID, AccessionNumber, and StudyDescription 
 in the StorageFolder.
 
 ## Troubleshooting

From 2150b49dbcd103e7cf9996f2e6c0c1d9e4ef14ab Mon Sep 17 00:00:00 2001
From: Pradeeban Kathiravelu <kk.pradeeban@gmail.com>
Date: Tue, 20 Jul 2021 18:56:27 -0400
Subject: [PATCH 4/9] Fix filepath typo

---
 modules/cold-extraction/ColdDataRetriever.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/modules/cold-extraction/ColdDataRetriever.py b/modules/cold-extraction/ColdDataRetriever.py
index 857883e..ae30a83 100644
--- a/modules/cold-extraction/ColdDataRetriever.py
+++ b/modules/cold-extraction/ColdDataRetriever.py
@@ -232,11 +232,15 @@ def retrieve():
                             DCM4CHE_BIN, SRC_AET, QUERY_AET, patient, DEST_AET), shell=True)
                     extracted_ones.append(patient)
 
-            all_files = glob.glob(os.path.join(temp_folder, "*.csv"))
-            df_from_each_file = (pd.read_csv(f, sep=',') for f in all_files)
-            df_merged = pd.concat(df_from_each_file, ignore_index=True)
-            df_merged.to_csv(storage_folder + "/cfind-output.csv")
-            shutil.rmtree(temp_folder)
+            if file_path == "CFIND-ONLY":
+                cwd = os.getcwd()
+                os.chdir(temp_folder)
+                all_files = glob.glob('*.csv')
+                df_from_each_file = (pd.read_csv(f, sep=',') for f in all_files)
+                df_merged = pd.concat(df_from_each_file, ignore_index=True)
+                df_merged.to_csv(storage_folder + "/cfind-output.csv")
+                os.chdir(cwd)
+                shutil.rmtree(temp_folder)
 
         # For the cases that extract based on a single property other than EMPI/PatientID. Goes to study level.
         # "Any" mode. Example: Extractions based on just AccessionNumber of AcquisitionDate.

From 761d9408760e2239ad7d5c4c7b4528e9c93294a0 Mon Sep 17 00:00:00 2001
From: Pradeeban Kathiravelu <kk.pradeeban@gmail.com>
Date: Tue, 20 Jul 2021 19:04:06 -0400
Subject: [PATCH 5/9] Fix filepath typo

---
 modules/cold-extraction/ColdDataRetriever.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/modules/cold-extraction/ColdDataRetriever.py b/modules/cold-extraction/ColdDataRetriever.py
index ae30a83..b39bb67 100644
--- a/modules/cold-extraction/ColdDataRetriever.py
+++ b/modules/cold-extraction/ColdDataRetriever.py
@@ -212,7 +212,10 @@ def retrieve():
     if number_of_query_attributes > 3 or number_of_query_attributes <= 1:
         # For the cases that extract entirely based on the PatientID - Patient-level extraction.
         if first_attr == "PatientID":
-            temp_folder = storage_folder + "/cfind-temp"
+            temp_folder = "csv/cfind-temp"
+            if not os.path.exists(temp_folder):
+                os.makedirs(temp_folder)
+
             for pid in range(0, length):
                 sleep_for_nightly_mode()
                 patient = firsts[pid]
@@ -233,13 +236,10 @@ def retrieve():
                     extracted_ones.append(patient)
 
             if file_path == "CFIND-ONLY":
-                cwd = os.getcwd()
-                os.chdir(temp_folder)
-                all_files = glob.glob('*.csv')
+                all_files = glob.glob(os.path.join(temp_folder, "*.csv"))
                 df_from_each_file = (pd.read_csv(f, sep=',') for f in all_files)
                 df_merged = pd.concat(df_from_each_file, ignore_index=True)
                 df_merged.to_csv(storage_folder + "/cfind-output.csv")
-                os.chdir(cwd)
                 shutil.rmtree(temp_folder)
 
         # For the cases that extract based on a single property other than EMPI/PatientID. Goes to study level.

From 965c493769e5605572470077d082609d655d8a63 Mon Sep 17 00:00:00 2001
From: Pradeeban Kathiravelu <kk.pradeeban@gmail.com>
Date: Tue, 20 Jul 2021 19:07:50 -0400
Subject: [PATCH 6/9] Fix filepath typo

---
 modules/cold-extraction/ColdDataRetriever.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/cold-extraction/ColdDataRetriever.py b/modules/cold-extraction/ColdDataRetriever.py
index b39bb67..178c1c2 100644
--- a/modules/cold-extraction/ColdDataRetriever.py
+++ b/modules/cold-extraction/ColdDataRetriever.py
@@ -238,7 +238,7 @@ def retrieve():
             if file_path == "CFIND-ONLY":
                 all_files = glob.glob(os.path.join(temp_folder, "*.csv"))
                 df_from_each_file = (pd.read_csv(f, sep=',') for f in all_files)
-                df_merged = pd.concat(df_from_each_file, ignore_index=True)
+                df_merged = pd.concat(df_from_each_file, ignore_index=False)
                 df_merged.to_csv(storage_folder + "/cfind-output.csv")
                 shutil.rmtree(temp_folder)
 

From cb2de5ced7a33c2cc3b7edf32eb5ca7645859c8c Mon Sep 17 00:00:00 2001
From: Pradeeban Kathiravelu <kk.pradeeban@gmail.com>
Date: Tue, 20 Jul 2021 19:14:03 -0400
Subject: [PATCH 7/9] Fix csv merge

---
 modules/cold-extraction/ColdDataRetriever.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/modules/cold-extraction/ColdDataRetriever.py b/modules/cold-extraction/ColdDataRetriever.py
index 178c1c2..9c17c72 100644
--- a/modules/cold-extraction/ColdDataRetriever.py
+++ b/modules/cold-extraction/ColdDataRetriever.py
@@ -236,10 +236,14 @@ def retrieve():
                     extracted_ones.append(patient)
 
             if file_path == "CFIND-ONLY":
-                all_files = glob.glob(os.path.join(temp_folder, "*.csv"))
-                df_from_each_file = (pd.read_csv(f, sep=',') for f in all_files)
-                df_merged = pd.concat(df_from_each_file, ignore_index=False)
-                df_merged.to_csv(storage_folder + "/cfind-output.csv")
+                cwd = os.getcwd()
+                extension = 'csv'
+                all_filenames = [i for i in glob.glob('*.{}'.format(extension))]
+                # combine all files in the list
+                combined_csv = pd.concat([pd.read_csv(f) for f in all_filenames])
+                # export to csv
+                combined_csv.to_csv(storage_folder + "/cfind-output.csv", index=False, encoding='utf-8-sig')
+                os.chdir(cwd)
                 shutil.rmtree(temp_folder)
 
         # For the cases that extract based on a single property other than EMPI/PatientID. Goes to study level.

From ef0c9e0c85d7919344a24afccbb52a504a5190a6 Mon Sep 17 00:00:00 2001
From: Pradeeban Kathiravelu <kk.pradeeban@gmail.com>
Date: Tue, 20 Jul 2021 19:14:53 -0400
Subject: [PATCH 8/9] Fix csv merge

---
 modules/cold-extraction/ColdDataRetriever.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modules/cold-extraction/ColdDataRetriever.py b/modules/cold-extraction/ColdDataRetriever.py
index 9c17c72..0bdc698 100644
--- a/modules/cold-extraction/ColdDataRetriever.py
+++ b/modules/cold-extraction/ColdDataRetriever.py
@@ -238,6 +238,7 @@ def retrieve():
             if file_path == "CFIND-ONLY":
                 cwd = os.getcwd()
                 extension = 'csv'
+                os.chdir(temp_folder)
                 all_filenames = [i for i in glob.glob('*.{}'.format(extension))]
                 # combine all files in the list
                 combined_csv = pd.concat([pd.read_csv(f) for f in all_filenames])

From 6b7c8dff4c8f30ae5b640759ca03d681c4359ac1 Mon Sep 17 00:00:00 2001
From: Pradeeban Kathiravelu <kk.pradeeban@gmail.com>
Date: Tue, 20 Jul 2021 19:31:32 -0400
Subject: [PATCH 9/9] add new line for filemerge

---
 modules/cold-extraction/ColdDataRetriever.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/modules/cold-extraction/ColdDataRetriever.py b/modules/cold-extraction/ColdDataRetriever.py
index 0bdc698..f2a8d4e 100644
--- a/modules/cold-extraction/ColdDataRetriever.py
+++ b/modules/cold-extraction/ColdDataRetriever.py
@@ -237,13 +237,13 @@ def retrieve():
 
             if file_path == "CFIND-ONLY":
                 cwd = os.getcwd()
-                extension = 'csv'
                 os.chdir(temp_folder)
-                all_filenames = [i for i in glob.glob('*.{}'.format(extension))]
-                # combine all files in the list
-                combined_csv = pd.concat([pd.read_csv(f) for f in all_filenames])
-                # export to csv
-                combined_csv.to_csv(storage_folder + "/cfind-output.csv", index=False, encoding='utf-8-sig')
+                all_filenames = [i for i in glob.glob('*.*')]
+                with open(storage_folder + "/cfind-output.csv", 'w') as outfile:
+                    for fname in all_filenames:
+                        with open(fname) as infile:
+                            for line in infile:
+                                outfile.write(line)
                 os.chdir(cwd)
                 shutil.rmtree(temp_folder)