filter.py - Allow tags to be specified using a string containing a 0x…

…-prefix hex number (#254) * filter.py - Allow tags to be specified using a string containing a 0x-prefix hex number * increment version due to change in filter.py * Update CHANGELOG.md with URL and new version number Co-authored-by: howff <howff>
pydicom · May 10, 2023 · f714359 · f714359
1 parent 38717b8
commit f714359
Show file tree

Hide file tree

Showing 24 changed files with 31 additions and 60 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -14,6 +14,7 @@ and **Merged pull requests**. Critical items to know are:
 Referenced versions in headers are tagged on Github, in parentheses are for pypi.
 
 ## [vxx](https://github.com/pydicom/deid/tree/master) (master)
+- Allow filter tag names to be 0x-prefix hex numbers so private tags can be referenced in recipes [#253](https://github.com/pydicom/deid/issues/253) (0.3.22)
 - Fix incorrect coordinate definition for GE CT [#249](https://github.com/pydicom/deid/issues/249)
 - Circular import error [#247](https://github.com/pydicom/deid/issues/247) (0.3.21)
 - Expand BLANK Action to additional VRs [#241](https://github.com/pydicom/deid/issues/241) (0.3.2)

diff --git a/deid/config/__init__.py b/deid/config/__init__.py
@@ -28,7 +28,6 @@ class DeidRecipe:
     """
 
     def __init__(self, deid=None, base=False, default_base="dicom"):
-
         # If deid is None, use the default
         if deid is None:
             base = True
@@ -49,7 +48,6 @@ def load(self, deid):
         """
         deid = get_deid(deid)
         if deid is not None:
-
             # Update our list of files
             self._files.append(deid)
             self.files = list(set(self.files))

diff --git a/deid/config/utils.py b/deid/config/utils.py
@@ -37,12 +37,10 @@ def load_combined_deid(deids):
     deid = None
 
     for single_deid in deids:
-
         # If not a tag or path, returns None
         next_deid = get_deid(tag=single_deid, exit_on_fail=False, quiet=True, load=True)
 
         if next_deid is not None:
-
             # Formats must match
             if found_format is None:
                 found_format = next_deid["format"]
@@ -57,7 +55,6 @@ def load_combined_deid(deids):
             if deid is None:
                 deid = next_deid
             else:
-
                 # Update filter, appending to end to give first preference
                 if "filter" in next_deid:
                     if "filter" not in deid:
@@ -112,7 +109,6 @@ def load_deid(path=None):
     section = None
 
     while spec:
-
         # Clean up white trailing/leading space
         line = spec.pop(0).strip()
 
@@ -126,7 +122,6 @@ def load_deid(path=None):
 
         # A new section?
         elif line.startswith("%"):
-
             # Remove any comments
             line = line.split("#", 1)[0].strip()
 
@@ -151,7 +146,6 @@ def load_deid(path=None):
 
         # An action (ADD, BLANK, JITTER, KEEP, REPLACE, REMOVE, LABEL)
         elif line.upper().startswith(actions):
-
             # Start of a filter group
             if line.upper().startswith("LABEL") and section == "filter":
                 members = parse_filter_group(spec)
@@ -355,7 +349,6 @@ def parse_label(section, config, section_name, members, label=None):
         if not member.lower().startswith(filters):
             bot.warning("%s filter is not valid, skipping." % member.lower())
         else:
-
             # Returns single member with field, values, operator,
             # Or if multiple or/and in statement, a list
             entry = parse_member(member, operator)
@@ -380,7 +373,6 @@ def parse_member(members, operator=None):
     members = [members]
 
     while len(members) > 0:
-
         operator = None
         value = None
         member = members.pop(0).strip()
@@ -399,7 +391,6 @@ def parse_member(members, operator=None):
                 operator = "+"
 
         if operator is not None:
-
             member, rest = member.split(operator, 1)
 
             # The rest is only valid if contains a filter statement
@@ -470,7 +461,6 @@ def add_section(config, section, section_name=None):
         bot.exit("%s is not a valid section." % section)
 
     if section not in config:
-
         # If a section is named, we have more one level (dict)
         if section_name is not None:
             config[section] = OrderedDict()
@@ -531,7 +521,6 @@ def parse_group_action(section, line, config, section_name):
 
     # Values supports FIELD or SPLIT
     elif section == "values":
-
         # If we have a third set of arguments
         if parts:
             value = _remove_comments(parts)

diff --git a/deid/dicom/actions/jitter.py b/deid/dicom/actions/jitter.py
@@ -43,7 +43,6 @@ def jitter_timestamp(field, value):
     new_value = original
 
     if original is not None:
-
         # Create default for new value
         new_value = None
         dcmvr = field.element.VR
@@ -67,7 +66,6 @@ def jitter_timestamp(field, value):
                 )
 
         else:
-
             # If the field type is not supplied, attempt to parse different formats
             for fmtstr in ["%Y%m%d", "%Y%m%d%H%M%S.%f%z", "%Y%m%d%H%M%S.%f"]:
                 try:

diff --git a/deid/dicom/fields.py b/deid/dicom/fields.py
@@ -157,7 +157,6 @@ def extract_sequence(sequence, prefix=None):
     """
     items = {}
     for item in sequence:
-
         # If it's a Dataset, we need to further unwrap it
         if isinstance(item, Dataset):
             for subitem in item:
@@ -224,7 +223,6 @@ def expand_field_expression(field, dicom, contenders=None):
 
     # Loop through fields, all are strings STOPPED HERE NEED TO ADDRESS EMPTY NAME
     for uid, field in contenders.items():
-
         # Apply expander to string for name OR to tag string
         if expander.lower() in ["endswith", "startswith", "contains"]:
             if field.name_contains(expression):
@@ -270,7 +268,6 @@ def add_element(element, name, uid, is_filemeta):
             seen.append(uid)
 
     while datasets:
-
         # Grab the first dataset, usually just the dicom
         dataset = datasets.pop(0)
 
@@ -281,7 +278,6 @@ def add_element(element, name, uid, is_filemeta):
 
         # Includes private tags, sequences flattened, non-null values
         for contender in dataset:
-
             # All items should be data elements, skip based on keyword or tag
             if contender.keyword in skip or str(contender.tag) in skip:
                 continue
@@ -297,7 +293,6 @@ def add_element(element, name, uid, is_filemeta):
 
             # if it's a sequence, extract with prefix and index
             if isinstance(contender.value, Sequence) and expand_sequences is True:
-
                 # Add the contender (usually type Dataset) to fields
                 add_element(contender, name, uid, is_filemeta)
 

diff --git a/deid/dicom/filter.py b/deid/dicom/filter.py
@@ -24,11 +24,14 @@ def apply_filter(dicom, field, filter_name, value):
     Parameters
     ==========
     dicom: the pydicom.dataset Dataset (pydicom.read_file)
-    field: the name of the field to apply the filter to
+    field: the name of the field to apply the filter to,
+      or the tag number as a string '0xGGGGEEEE'
     filer_name: the name of the filter to apply (e.g., contains)
     value: the value to set, if filter_name is valid
 
     """
+    if "0x" in field:
+        field = int(field, 0)  # 0=decode hex with 0x prefix
     filter_name = filter_name.lower().strip()
 
     if filter_name == "contains":
@@ -74,7 +77,6 @@ def equalsBase(self, field, term, ignore_case=True, not_equals=False):
     # In this loop we can only switch to True
     for contender in contenders:
         if contender is not None:
-
             try:
                 # both converted to string (handles tags)
                 contender = str(contender)
@@ -187,7 +189,6 @@ def compareBase(self, field, expression, func, ignore_case=True):
 
     for contender in contenders:
         if contender is not None:
-
             try:
                 contender = str(contender)
                 expression = str(expression)

diff --git a/deid/dicom/groups.py b/deid/dicom/groups.py
@@ -23,7 +23,6 @@ def extract_values_list(dicom, actions, fields=None):
         fields = get_fields(dicom)
 
     for action in actions:
-
         # Extract some subset of fields based on action
         subset = expand_field_expression(
             field=action["field"], dicom=dicom, contenders=fields
@@ -40,7 +39,6 @@ def extract_values_list(dicom, actions, fields=None):
 
         # Split action, can optionally have a "by" and/or minlength parameter
         elif action["action"] == "SPLIT":
-
             # Default values for split are length 1 and character empty space
             bot.debug("Parsing action %s" % action)
             split_by = " "
@@ -86,7 +84,6 @@ def extract_fields_list(dicom, actions, fields=None):
         fields = get_fields(dicom)
 
     for action in actions:
-
         if action["action"] == "FIELD":
             subset.update(
                 expand_field_expression(

diff --git a/deid/dicom/header.py b/deid/dicom/header.py
@@ -56,7 +56,6 @@ def get_identifiers(
 def remove_private_identifiers(
     dicom_files, save=True, overwrite=False, output_folder=None, force=True
 ):
-
     """
     Remove private identifiers.
 
@@ -99,7 +98,6 @@ def replace_identifiers(
     remove_private=False,
     disable_skip=False,
 ):
-
     """
     Replace identifiers.
 

diff --git a/deid/dicom/parser.py b/deid/dicom/parser.py
@@ -150,7 +150,6 @@ def get_nested_field(self, field, return_parent=False):
 
                 # We keep going until we find the desired tag
                 if tag != desired:
-
                     # If the parent has been removed, we can't continue
                     if tag not in parent:
                         return None, desired
@@ -159,7 +158,6 @@ def get_nested_field(self, field, return_parent=False):
 
             # Otherwise it's an index into a sequence
             else:
-
                 # If the sequence is outside the bounds of the array of items
                 # within the sequence, we can't continue.
                 if int(uid) < 0 or int(uid) >= len(parent.value):
@@ -224,7 +222,6 @@ def parse(self, strip_sequences=False, remove_private=False):
 
         # if we loaded a deid recipe
         if self.recipe.deid is not None:
-
             # Prepare additional lists of values and lookup fields (index by nested uid)
             if self.recipe.has_values_lists():
                 for group, actions in self.recipe.get_values_lists().items():
@@ -461,7 +458,6 @@ def update_dicom(element, is_filemeta):
 
         # Assume we don't want to add an empty value
         if value is not None:
-
             # If provided a field object, create based on keyword or tag identifier
             name = field
             if isinstance(field, DicomField):
@@ -541,7 +537,6 @@ def _run_action(self, field, action, value=None):
 
         # Remove the field entirely
         elif action == "REMOVE":
-
             # If a value is defined, parse it (could be filter)
             do_removal = True
             if value is not None:

diff --git a/deid/dicom/pixels/clean.py b/deid/dicom/pixels/clean.py
@@ -50,7 +50,6 @@ def __init__(
         font=None,
         force=True,
     ):
-
         if output_folder is None:
             output_folder = get_temporary_name(prefix="clean")
 
@@ -89,7 +88,6 @@ def detect(self, dicom_file):
     def clean(
         self, fix_interpretation: bool = True, pixel_data_attribute: str = "PixelData"
     ) -> Optional[NDArray]:
-
         if not self.results:
             bot.warning(
                 "Use %s.detect() with a dicom file to find coordinates first." % self
@@ -312,21 +310,17 @@ def clean_pixel_data(
     coordinates = []
 
     for item in results["results"]:
-
         # We iterate through coordinates in order specified in file
         for coordinate_set in item.get("coordinates", []):
-
             # Each is a list with [value, coordinate]
             mask_value, new_coordinates = coordinate_set
 
             if not isinstance(new_coordinates, list):
                 new_coordinates = [new_coordinates]
 
             for new_coordinate in new_coordinates:
-
                 # Case 1: an "all" indicates applying to entire image
                 if new_coordinate.lower() == "all":
-
                     # 2D - Greyscale Image - Shape = (X, Y) OR 3D - RGB Image - Shape = (X, Y, Channel)
                     if len(original.shape) == 2 or (
                         len(original.shape) == 3 and dicom.SamplesPerPixel == 3
@@ -375,7 +369,6 @@ def clean_pixel_data(
     # Now apply finished mask to the data
     # RGB cine clip
     if len(original.shape) == 4:
-
         # np.tile does the copying and stacking of masks into the channel dim to produce 3D masks
         # transposition to convert tile output (channel, X, Y)  into (X, Y, channel)
         # see: https://github.com/nquach/anonymize/blob/master/anonymize.py#L154
@@ -390,7 +383,6 @@ def clean_pixel_data(
 
     # RGB image or Greyscale cine clip
     elif len(original.shape) == 3:
-
         # This condition is ambiguous.  If the image shape is 3, we may have a single frame RGB image: size (X, Y, channel)
         # or a multiframe greyscale image: size (frames, X, Y).  Interrogate the SamplesPerPixel field.
         if dicom.SamplesPerPixel == 3:

diff --git a/deid/dicom/pixels/detect.py b/deid/dicom/pixels/detect.py
@@ -66,7 +66,6 @@ def _has_burned_pixels_multi(dicom_files: List[Union[str, FileDataset]], force,
 
 
 def _has_burned_pixels_single(dicom_file, force: bool, deid):
-
     """
     Determine if a single dicom has burned pixels.
 
@@ -142,10 +141,8 @@ def _has_burned_pixels_single(dicom_file, force: bool, deid):
                 group_flags = []  # evaluation for a single line
                 group_descriptions = []
                 for group in item["filters"]:
-
                     # You cannot pop from the list
                     for a in range(len(group["action"])):
-
                         action = group["action"][a]
                         field = group["field"][a]
                         value = ""
@@ -268,14 +265,12 @@ def extract_coordinates(dicom, field):
 
     # Now extract coordinates
     for region in regions:
-
         if (
             "RegionLocationMinX0" in region
             and "RegionLocationMinY0" in region
             and "RegionLocationMaxX1" in region
             and "RegionLocationMaxY1" in region
         ):
-
             # https://gist.github.com/vsoch/df6957be12c34e62b21000603f1687e5
             # minr, minc, maxr, maxc = coordinate
             # self.cleaned[minc:maxc, minr:maxr] = 0  # should fill with black

diff --git a/deid/dicom/tags.py b/deid/dicom/tags.py
@@ -45,7 +45,6 @@ def get_tag(field):
     manifest = None
 
     if len(found) > 0:
-
         # (VR, VM, Name, Retired, Keyword
         found = found[0]  # shouldn't ever have length > 1
         tag = Tag(list(found)[0])

diff --git a/deid/dicom/validate.py b/deid/dicom/validate.py
@@ -26,7 +26,6 @@ def validate_dicoms(dcm_files, force=False):
 
     bot.debug("Checking %s dicom files for validation." % (len(dcm_files)))
     for dcm_file in dcm_files:
-
         try:
             with open(dcm_file, "rb") as filey:
                 read_file(filey, force=force)

diff --git a/deid/logger/message.py b/deid/logger/message.py
@@ -311,7 +311,6 @@ def get_logging_level():
         level = int(os.environ.get("MESSAGELEVEL", DEBUG))
 
     except ValueError:
-
         level = os.environ.get("MESSAGELEVEL", DEBUG)
         if level == "CRITICAL":
             return FLAG

diff --git a/deid/main/__init__.py b/deid/main/__init__.py
@@ -163,7 +163,6 @@ def get_parser():
 
 
 def main():
-
     parser = get_parser()
     try:
         args = parser.parse_args()
-Original file line number
+Diff line change
@@ Expand Up / @@ -163,7 +163,6 @@ def get_parser(): @@
     def main():
         parser = get_parser()
         try:
             args = parser.parse_args()
@@ Expand Down @@