From f7143599c95e8d4ccee8301b580d5d3e7c05f238 Mon Sep 17 00:00:00 2001 From: howff <3064316+howff@users.noreply.github.com> Date: Wed, 10 May 2023 17:45:01 +0100 Subject: [PATCH] filter.py - Allow tags to be specified using a string containing a 0x-prefix hex number (#254) * filter.py - Allow tags to be specified using a string containing a 0x-prefix hex number * increment version due to change in filter.py * Update CHANGELOG.md with URL and new version number Co-authored-by: howff --- CHANGELOG.md | 1 + deid/config/__init__.py | 2 -- deid/config/utils.py | 11 ----------- deid/dicom/actions/jitter.py | 2 -- deid/dicom/fields.py | 5 ----- deid/dicom/filter.py | 7 ++++--- deid/dicom/groups.py | 3 --- deid/dicom/header.py | 2 -- deid/dicom/parser.py | 5 ----- deid/dicom/pixels/clean.py | 8 -------- deid/dicom/pixels/detect.py | 5 ----- deid/dicom/tags.py | 1 - deid/dicom/validate.py | 1 - deid/logger/message.py | 1 - deid/main/__init__.py | 1 - deid/main/identifiers.py | 1 - deid/tests/resources/filter_tag_number.dicom | 9 +++++++++ deid/tests/test_config.py | 1 - deid/tests/test_deid_recipe.py | 2 -- deid/tests/test_dicom_utils.py | 1 - deid/tests/test_filter_detect.py | 11 +++++++++++ deid/utils/actions.py | 3 --- deid/version.py | 2 +- docs/_docs/user-docs/recipe-filters.md | 6 +++++- 24 files changed, 31 insertions(+), 60 deletions(-) create mode 100644 deid/tests/resources/filter_tag_number.dicom diff --git a/CHANGELOG.md b/CHANGELOG.md index a79d7e1..a93c378 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ and **Merged pull requests**. Critical items to know are: Referenced versions in headers are tagged on Github, in parentheses are for pypi. ## [vxx](https://github.com/pydicom/deid/tree/master) (master) +- Allow filter tag names to be 0x-prefix hex numbers so private tags can be referenced in recipes [#253](https://github.com/pydicom/deid/issues/253) (0.3.22) - Fix incorrect coordinate definition for GE CT [#249](https://github.com/pydicom/deid/issues/249) - Circular import error [#247](https://github.com/pydicom/deid/issues/247) (0.3.21) - Expand BLANK Action to additional VRs [#241](https://github.com/pydicom/deid/issues/241) (0.3.2) diff --git a/deid/config/__init__.py b/deid/config/__init__.py index a348d67..53949a1 100644 --- a/deid/config/__init__.py +++ b/deid/config/__init__.py @@ -28,7 +28,6 @@ class DeidRecipe: """ def __init__(self, deid=None, base=False, default_base="dicom"): - # If deid is None, use the default if deid is None: base = True @@ -49,7 +48,6 @@ def load(self, deid): """ deid = get_deid(deid) if deid is not None: - # Update our list of files self._files.append(deid) self.files = list(set(self.files)) diff --git a/deid/config/utils.py b/deid/config/utils.py index 77d07ed..39fc38a 100644 --- a/deid/config/utils.py +++ b/deid/config/utils.py @@ -37,12 +37,10 @@ def load_combined_deid(deids): deid = None for single_deid in deids: - # If not a tag or path, returns None next_deid = get_deid(tag=single_deid, exit_on_fail=False, quiet=True, load=True) if next_deid is not None: - # Formats must match if found_format is None: found_format = next_deid["format"] @@ -57,7 +55,6 @@ def load_combined_deid(deids): if deid is None: deid = next_deid else: - # Update filter, appending to end to give first preference if "filter" in next_deid: if "filter" not in deid: @@ -112,7 +109,6 @@ def load_deid(path=None): section = None while spec: - # Clean up white trailing/leading space line = spec.pop(0).strip() @@ -126,7 +122,6 @@ def load_deid(path=None): # A new section? elif line.startswith("%"): - # Remove any comments line = line.split("#", 1)[0].strip() @@ -151,7 +146,6 @@ def load_deid(path=None): # An action (ADD, BLANK, JITTER, KEEP, REPLACE, REMOVE, LABEL) elif line.upper().startswith(actions): - # Start of a filter group if line.upper().startswith("LABEL") and section == "filter": members = parse_filter_group(spec) @@ -355,7 +349,6 @@ def parse_label(section, config, section_name, members, label=None): if not member.lower().startswith(filters): bot.warning("%s filter is not valid, skipping." % member.lower()) else: - # Returns single member with field, values, operator, # Or if multiple or/and in statement, a list entry = parse_member(member, operator) @@ -380,7 +373,6 @@ def parse_member(members, operator=None): members = [members] while len(members) > 0: - operator = None value = None member = members.pop(0).strip() @@ -399,7 +391,6 @@ def parse_member(members, operator=None): operator = "+" if operator is not None: - member, rest = member.split(operator, 1) # The rest is only valid if contains a filter statement @@ -470,7 +461,6 @@ def add_section(config, section, section_name=None): bot.exit("%s is not a valid section." % section) if section not in config: - # If a section is named, we have more one level (dict) if section_name is not None: config[section] = OrderedDict() @@ -531,7 +521,6 @@ def parse_group_action(section, line, config, section_name): # Values supports FIELD or SPLIT elif section == "values": - # If we have a third set of arguments if parts: value = _remove_comments(parts) diff --git a/deid/dicom/actions/jitter.py b/deid/dicom/actions/jitter.py index e263dd6..f93ca54 100644 --- a/deid/dicom/actions/jitter.py +++ b/deid/dicom/actions/jitter.py @@ -43,7 +43,6 @@ def jitter_timestamp(field, value): new_value = original if original is not None: - # Create default for new value new_value = None dcmvr = field.element.VR @@ -67,7 +66,6 @@ def jitter_timestamp(field, value): ) else: - # If the field type is not supplied, attempt to parse different formats for fmtstr in ["%Y%m%d", "%Y%m%d%H%M%S.%f%z", "%Y%m%d%H%M%S.%f"]: try: diff --git a/deid/dicom/fields.py b/deid/dicom/fields.py index 0d4abbb..85e7a35 100644 --- a/deid/dicom/fields.py +++ b/deid/dicom/fields.py @@ -157,7 +157,6 @@ def extract_sequence(sequence, prefix=None): """ items = {} for item in sequence: - # If it's a Dataset, we need to further unwrap it if isinstance(item, Dataset): for subitem in item: @@ -224,7 +223,6 @@ def expand_field_expression(field, dicom, contenders=None): # Loop through fields, all are strings STOPPED HERE NEED TO ADDRESS EMPTY NAME for uid, field in contenders.items(): - # Apply expander to string for name OR to tag string if expander.lower() in ["endswith", "startswith", "contains"]: if field.name_contains(expression): @@ -270,7 +268,6 @@ def add_element(element, name, uid, is_filemeta): seen.append(uid) while datasets: - # Grab the first dataset, usually just the dicom dataset = datasets.pop(0) @@ -281,7 +278,6 @@ def add_element(element, name, uid, is_filemeta): # Includes private tags, sequences flattened, non-null values for contender in dataset: - # All items should be data elements, skip based on keyword or tag if contender.keyword in skip or str(contender.tag) in skip: continue @@ -297,7 +293,6 @@ def add_element(element, name, uid, is_filemeta): # if it's a sequence, extract with prefix and index if isinstance(contender.value, Sequence) and expand_sequences is True: - # Add the contender (usually type Dataset) to fields add_element(contender, name, uid, is_filemeta) diff --git a/deid/dicom/filter.py b/deid/dicom/filter.py index 0889b34..f0ad5b1 100644 --- a/deid/dicom/filter.py +++ b/deid/dicom/filter.py @@ -24,11 +24,14 @@ def apply_filter(dicom, field, filter_name, value): Parameters ========== dicom: the pydicom.dataset Dataset (pydicom.read_file) - field: the name of the field to apply the filter to + field: the name of the field to apply the filter to, + or the tag number as a string '0xGGGGEEEE' filer_name: the name of the filter to apply (e.g., contains) value: the value to set, if filter_name is valid """ + if "0x" in field: + field = int(field, 0) # 0=decode hex with 0x prefix filter_name = filter_name.lower().strip() if filter_name == "contains": @@ -74,7 +77,6 @@ def equalsBase(self, field, term, ignore_case=True, not_equals=False): # In this loop we can only switch to True for contender in contenders: if contender is not None: - try: # both converted to string (handles tags) contender = str(contender) @@ -187,7 +189,6 @@ def compareBase(self, field, expression, func, ignore_case=True): for contender in contenders: if contender is not None: - try: contender = str(contender) expression = str(expression) diff --git a/deid/dicom/groups.py b/deid/dicom/groups.py index 491bce9..5fa5e69 100644 --- a/deid/dicom/groups.py +++ b/deid/dicom/groups.py @@ -23,7 +23,6 @@ def extract_values_list(dicom, actions, fields=None): fields = get_fields(dicom) for action in actions: - # Extract some subset of fields based on action subset = expand_field_expression( field=action["field"], dicom=dicom, contenders=fields @@ -40,7 +39,6 @@ def extract_values_list(dicom, actions, fields=None): # Split action, can optionally have a "by" and/or minlength parameter elif action["action"] == "SPLIT": - # Default values for split are length 1 and character empty space bot.debug("Parsing action %s" % action) split_by = " " @@ -86,7 +84,6 @@ def extract_fields_list(dicom, actions, fields=None): fields = get_fields(dicom) for action in actions: - if action["action"] == "FIELD": subset.update( expand_field_expression( diff --git a/deid/dicom/header.py b/deid/dicom/header.py index 99a7907..41e383a 100644 --- a/deid/dicom/header.py +++ b/deid/dicom/header.py @@ -56,7 +56,6 @@ def get_identifiers( def remove_private_identifiers( dicom_files, save=True, overwrite=False, output_folder=None, force=True ): - """ Remove private identifiers. @@ -99,7 +98,6 @@ def replace_identifiers( remove_private=False, disable_skip=False, ): - """ Replace identifiers. diff --git a/deid/dicom/parser.py b/deid/dicom/parser.py index c518da0..52189c9 100644 --- a/deid/dicom/parser.py +++ b/deid/dicom/parser.py @@ -150,7 +150,6 @@ def get_nested_field(self, field, return_parent=False): # We keep going until we find the desired tag if tag != desired: - # If the parent has been removed, we can't continue if tag not in parent: return None, desired @@ -159,7 +158,6 @@ def get_nested_field(self, field, return_parent=False): # Otherwise it's an index into a sequence else: - # If the sequence is outside the bounds of the array of items # within the sequence, we can't continue. if int(uid) < 0 or int(uid) >= len(parent.value): @@ -224,7 +222,6 @@ def parse(self, strip_sequences=False, remove_private=False): # if we loaded a deid recipe if self.recipe.deid is not None: - # Prepare additional lists of values and lookup fields (index by nested uid) if self.recipe.has_values_lists(): for group, actions in self.recipe.get_values_lists().items(): @@ -461,7 +458,6 @@ def update_dicom(element, is_filemeta): # Assume we don't want to add an empty value if value is not None: - # If provided a field object, create based on keyword or tag identifier name = field if isinstance(field, DicomField): @@ -541,7 +537,6 @@ def _run_action(self, field, action, value=None): # Remove the field entirely elif action == "REMOVE": - # If a value is defined, parse it (could be filter) do_removal = True if value is not None: diff --git a/deid/dicom/pixels/clean.py b/deid/dicom/pixels/clean.py index fe1e686..a656237 100644 --- a/deid/dicom/pixels/clean.py +++ b/deid/dicom/pixels/clean.py @@ -50,7 +50,6 @@ def __init__( font=None, force=True, ): - if output_folder is None: output_folder = get_temporary_name(prefix="clean") @@ -89,7 +88,6 @@ def detect(self, dicom_file): def clean( self, fix_interpretation: bool = True, pixel_data_attribute: str = "PixelData" ) -> Optional[NDArray]: - if not self.results: bot.warning( "Use %s.detect() with a dicom file to find coordinates first." % self @@ -312,10 +310,8 @@ def clean_pixel_data( coordinates = [] for item in results["results"]: - # We iterate through coordinates in order specified in file for coordinate_set in item.get("coordinates", []): - # Each is a list with [value, coordinate] mask_value, new_coordinates = coordinate_set @@ -323,10 +319,8 @@ def clean_pixel_data( new_coordinates = [new_coordinates] for new_coordinate in new_coordinates: - # Case 1: an "all" indicates applying to entire image if new_coordinate.lower() == "all": - # 2D - Greyscale Image - Shape = (X, Y) OR 3D - RGB Image - Shape = (X, Y, Channel) if len(original.shape) == 2 or ( len(original.shape) == 3 and dicom.SamplesPerPixel == 3 @@ -375,7 +369,6 @@ def clean_pixel_data( # Now apply finished mask to the data # RGB cine clip if len(original.shape) == 4: - # np.tile does the copying and stacking of masks into the channel dim to produce 3D masks # transposition to convert tile output (channel, X, Y) into (X, Y, channel) # see: https://github.com/nquach/anonymize/blob/master/anonymize.py#L154 @@ -390,7 +383,6 @@ def clean_pixel_data( # RGB image or Greyscale cine clip elif len(original.shape) == 3: - # This condition is ambiguous. If the image shape is 3, we may have a single frame RGB image: size (X, Y, channel) # or a multiframe greyscale image: size (frames, X, Y). Interrogate the SamplesPerPixel field. if dicom.SamplesPerPixel == 3: diff --git a/deid/dicom/pixels/detect.py b/deid/dicom/pixels/detect.py index b4001e3..d4b83c2 100644 --- a/deid/dicom/pixels/detect.py +++ b/deid/dicom/pixels/detect.py @@ -66,7 +66,6 @@ def _has_burned_pixels_multi(dicom_files: List[Union[str, FileDataset]], force, def _has_burned_pixels_single(dicom_file, force: bool, deid): - """ Determine if a single dicom has burned pixels. @@ -142,10 +141,8 @@ def _has_burned_pixels_single(dicom_file, force: bool, deid): group_flags = [] # evaluation for a single line group_descriptions = [] for group in item["filters"]: - # You cannot pop from the list for a in range(len(group["action"])): - action = group["action"][a] field = group["field"][a] value = "" @@ -268,14 +265,12 @@ def extract_coordinates(dicom, field): # Now extract coordinates for region in regions: - if ( "RegionLocationMinX0" in region and "RegionLocationMinY0" in region and "RegionLocationMaxX1" in region and "RegionLocationMaxY1" in region ): - # https://gist.github.com/vsoch/df6957be12c34e62b21000603f1687e5 # minr, minc, maxr, maxc = coordinate # self.cleaned[minc:maxc, minr:maxr] = 0 # should fill with black diff --git a/deid/dicom/tags.py b/deid/dicom/tags.py index 20f874d..4e3d79e 100644 --- a/deid/dicom/tags.py +++ b/deid/dicom/tags.py @@ -45,7 +45,6 @@ def get_tag(field): manifest = None if len(found) > 0: - # (VR, VM, Name, Retired, Keyword found = found[0] # shouldn't ever have length > 1 tag = Tag(list(found)[0]) diff --git a/deid/dicom/validate.py b/deid/dicom/validate.py index a710c97..2e298eb 100644 --- a/deid/dicom/validate.py +++ b/deid/dicom/validate.py @@ -26,7 +26,6 @@ def validate_dicoms(dcm_files, force=False): bot.debug("Checking %s dicom files for validation." % (len(dcm_files))) for dcm_file in dcm_files: - try: with open(dcm_file, "rb") as filey: read_file(filey, force=force) diff --git a/deid/logger/message.py b/deid/logger/message.py index 08d4caa..f7ce695 100644 --- a/deid/logger/message.py +++ b/deid/logger/message.py @@ -311,7 +311,6 @@ def get_logging_level(): level = int(os.environ.get("MESSAGELEVEL", DEBUG)) except ValueError: - level = os.environ.get("MESSAGELEVEL", DEBUG) if level == "CRITICAL": return FLAG diff --git a/deid/main/__init__.py b/deid/main/__init__.py index e7174c5..ac39852 100644 --- a/deid/main/__init__.py +++ b/deid/main/__init__.py @@ -163,7 +163,6 @@ def get_parser(): def main(): - parser = get_parser() try: args = parser.parse_args() diff --git a/deid/main/identifiers.py b/deid/main/identifiers.py index e11674d..9fa00ec 100644 --- a/deid/main/identifiers.py +++ b/deid/main/identifiers.py @@ -16,7 +16,6 @@ def main(args, parser): - # Global output folder output_folder = args.outfolder if output_folder is None: diff --git a/deid/tests/resources/filter_tag_number.dicom b/deid/tests/resources/filter_tag_number.dicom new file mode 100644 index 0000000..1c090aa --- /dev/null +++ b/deid/tests/resources/filter_tag_number.dicom @@ -0,0 +1,9 @@ +FORMAT dicom + +%filter blacklist + +LABEL - To be tested with Cat.dcm. Intended to NOT flag the image. + contains 0x00110003 Agfa + +%header +ADD PatientIdentityRemoved No diff --git a/deid/tests/test_config.py b/deid/tests/test_config.py index 43b7909..51dba8c 100644 --- a/deid/tests/test_config.py +++ b/deid/tests/test_config.py @@ -43,7 +43,6 @@ def test_load_deid(self): config = load_deid(self.tmpdir) def test_find_deid(self): - print("Testing finding deid file, referencing directly.") from deid.config.utils import find_deid diff --git a/deid/tests/test_deid_recipe.py b/deid/tests/test_deid_recipe.py index 9ddaf8f..9a9c43f 100644 --- a/deid/tests/test_deid_recipe.py +++ b/deid/tests/test_deid_recipe.py @@ -26,7 +26,6 @@ def tearDown(self): print("\n######################END########################") def test_load_recipe(self): - print("Case 1: Test loading default DeidRecipe") recipe = DeidRecipe() @@ -42,7 +41,6 @@ def test_load_recipe(self): recipe = DeidRecipe(self.deid) def test_get_functions(self): - recipe = DeidRecipe(self.deid) # Format diff --git a/deid/tests/test_dicom_utils.py b/deid/tests/test_dicom_utils.py index 6eeb66b..2829e3a 100644 --- a/deid/tests/test_dicom_utils.py +++ b/deid/tests/test_dicom_utils.py @@ -64,7 +64,6 @@ def test_get_files_as_list(self): self.assertEqual(found, expected) def test_jitter_timestamp(self): - from deid.dicom.actions import jitter_timestamp from deid.dicom.fields import DicomField from deid.dicom.tags import get_tag diff --git a/deid/tests/test_filter_detect.py b/deid/tests/test_filter_detect.py index a4add77..06a882a 100644 --- a/deid/tests/test_filter_detect.py +++ b/deid/tests/test_filter_detect.py @@ -47,6 +47,17 @@ def test_filter_single_rule_true(self): out = client.detect(dicom_file) self.assertTrue(out["flagged"]) + def test_filter_tag_number(self): + """Test the DicomCleaner.detect to ensure numeric tag numbers can be used""" + from deid.dicom import DicomCleaner + + dicom_file = get_file(self.dataset) + deid = os.path.join(self.deidpath, "filter_tag_number.dicom") + + client = DicomCleaner(output_folder=self.tmpdir, deid=deid) + out = client.detect(dicom_file) + self.assertTrue(out["flagged"]) + def test_filter_single_rule_innerop_false(self): """Test the DicomCleaner.detect to ensure a single rule with an inner operator evaluated to false detects appropriately.""" from deid.dicom import DicomCleaner diff --git a/deid/utils/actions.py b/deid/utils/actions.py index ccc2385..7fb38e8 100644 --- a/deid/utils/actions.py +++ b/deid/utils/actions.py @@ -32,7 +32,6 @@ def parse_value(dicom, value, item=None, field=None, funcs=None): if re.search("(^var:)|(^func:)|(^deid_func:)", value): value_type, value_option = value.split(":", 1) if value_type.lower() == "var": - # If selected variable not provided, skip if value_option not in item: return None @@ -40,7 +39,6 @@ def parse_value(dicom, value, item=None, field=None, funcs=None): # The user wants to use a deid provided function elif value_type.lower() == "deid_func": - # There can be additional key=value pairs try: value_option, extras = value_option.split(" ", 1) @@ -61,7 +59,6 @@ def parse_value(dicom, value, item=None, field=None, funcs=None): # The user is providing a specific function elif value_type.lower() == "func": - if value_option not in item: bot.warning("%s not found in item lookup." % (value_option)) return None diff --git a/deid/version.py b/deid/version.py index dedc23f..64c2e5e 100644 --- a/deid/version.py +++ b/deid/version.py @@ -2,7 +2,7 @@ __copyright__ = "Copyright 2016-2023, Vanessa Sochat" __license__ = "MIT" -__version__ = "0.3.21" +__version__ = "0.3.22" AUTHOR = "Vanessa Sochat" AUTHOR_EMAIL = "vsoch@users.noreply.github.com" NAME = "deid" diff --git a/docs/_docs/user-docs/recipe-filters.md b/docs/_docs/user-docs/recipe-filters.md index 21acc5d..9b41fb4 100644 --- a/docs/_docs/user-docs/recipe-filters.md +++ b/docs/_docs/user-docs/recipe-filters.md @@ -91,7 +91,11 @@ report to the user given that the flag goes off. Each criteria then has the foll ``` ``` -where "value" is optional, depending on the filter. For example: +where "criteria" is a test such as "contains", "notcontains", +"equals", "notequals", "missing", "present" or "empty"; +"field" is the single-word name of a DICOM tag (as known to pydicom), +or the number as 8-digit hex format with 0x prefix; +and "value" is optional, depending on the filter. For example: ``` LABEL Burned In Annotation