Skip to content

Commit

Permalink
Merge pull request #111 from Emory-HITI/dev
Browse files Browse the repository at this point in the history
Fix metadata extraction with private tags
  • Loading branch information
pradeeban authored Mar 13, 2021
2 parents bd05e40 + 31ea6a1 commit 9644da5
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 13 deletions.
2 changes: 1 addition & 1 deletion docs/index.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Niffler: A DICOM Framework for Machine Learning Pipelines against Real-Time Radiology Images
# Niffler: A DICOM Framework for Machine Learning Pipelines and Processing Workflows

Niffler is a research project for DICOM networking, supporting efficient DICOM retrievals and subsequent ML workflows on the images and metadata on a research environment.

Expand Down
33 changes: 21 additions & 12 deletions modules/meta-extraction/MetadataExtractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@
IS_DCM4CHE_NOT_RUNNING = True
logging.basicConfig(level=logging.INFO)

FEATURES_FOLDER = "conf/"
PICKLE_FOLDER = "pickles/"
FEATURES_FOLDER = os.getcwd() + "/conf/"
PICKLE_FOLDER = os.getcwd() + "/pickles/"


# Variables to track progress between iterations.
Expand All @@ -68,8 +68,6 @@
except:
logging.info("Unable to load a valid pickle file. Initialized with empty value for processed_and_deleted_series")



# Read the txt file which includes the features, then extract them
os.chdir(FEATURES_FOLDER)
txt_files = glob.glob('*.txt')
Expand All @@ -84,7 +82,6 @@
feature_files.append(filename)



# Function for getting tuple for field, val pairs for this file
# plan is instance of dicom class, the data for single mammo file
def get_tuples(plan, features, outlist = None, key = ""):
Expand All @@ -109,9 +106,23 @@ def get_tuples(plan, features, outlist = None, key = ""):
value1 = tuple(value1)
elif type(value1) is pydicom.uid.UID:
value1 = str(value1)
outlist.append((key + aa, value1)) # appends name, value pair for this file. these are later concatenated to the dataframe
except KeyError:
logging.debug("Key error encountered for %s", aa)
else:
if (value1):
value1 = str(value1)
else:
value1 = ""
if key and aa:
key_ = key + aa
elif aa:
key_ = aa
elif key:
key_ = key
else:
key_ = ""
logging.debug("key and aa are empty")
outlist.append((key_, value1)) # appends name, value pair for this file. these are later concatenated to the dataframe
except KeyError as e:
logging.debug("The value is empty, %s", e)
return outlist


Expand All @@ -120,7 +131,6 @@ def get_dict_fields(bigdict, features):
return {x: bigdict[x] for x in features if x in bigdict}



# The core method of extracting metadata
def extract():
os.chdir(STORAGE_FOLDER)
Expand All @@ -141,7 +151,6 @@ def extract():
logging.debug('There are no objects found. Waiting for new data to arrive.')



# Extract a list of one instance from each series
def extract_metadata():
global processed_series_but_yet_to_delete
Expand All @@ -154,7 +163,6 @@ def extract_metadata():
first_inst_of_series = list()
headerlist = []


if EXTRACTION_RUNNING:
logging.info("Previous Extraction Thread Still Running. Skip this iteration.......................")
else:
Expand All @@ -173,7 +181,6 @@ def extract_metadata():

logging.info('Number of series to be processed: %s', len(series))


for series_path in series:
extracted_in_this_iteration += 1
logging.debug('Extracted: %s %s %s %s', str(extracted_in_this_iteration), ' out of ', str(len(series)),
Expand Down Expand Up @@ -258,6 +265,8 @@ def clear_storage():
def update_pickle():
global processed_series_but_yet_to_delete
global processed_and_deleted_series

os.chdir(PICKLE_FOLDER)

# Pickle using the highest protocol available.
with open(PICKLE_FOLDER + 'processed_series_but_yet_to_delete.pickle', 'wb') as f:
Expand Down
1 change: 1 addition & 0 deletions modules/meta-extraction/conf/featureset1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,4 @@ ManufacturerModelName
0x00090010
0x0019100B
0x0019105A
0x00251011

0 comments on commit 9644da5

Please sign in to comment.