Skip to content

Commit

Permalink
filter.py - Allow tags to be specified using a string containing a 0x…
Browse files Browse the repository at this point in the history
…-prefix hex number (#254)

* filter.py - Allow tags to be specified using a string containing a 0x-prefix hex number
* increment version due to change in filter.py
* Update CHANGELOG.md with URL and new version number

Co-authored-by: howff <howff>
  • Loading branch information
howff authored May 10, 2023
1 parent 38717b8 commit f714359
Show file tree
Hide file tree
Showing 24 changed files with 31 additions and 60 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ and **Merged pull requests**. Critical items to know are:
Referenced versions in headers are tagged on Github, in parentheses are for pypi.

## [vxx](https://github.com/pydicom/deid/tree/master) (master)
- Allow filter tag names to be 0x-prefix hex numbers so private tags can be referenced in recipes [#253](https://github.com/pydicom/deid/issues/253) (0.3.22)
- Fix incorrect coordinate definition for GE CT [#249](https://github.com/pydicom/deid/issues/249)
- Circular import error [#247](https://github.com/pydicom/deid/issues/247) (0.3.21)
- Expand BLANK Action to additional VRs [#241](https://github.com/pydicom/deid/issues/241) (0.3.2)
Expand Down
2 changes: 0 additions & 2 deletions deid/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ class DeidRecipe:
"""

def __init__(self, deid=None, base=False, default_base="dicom"):

# If deid is None, use the default
if deid is None:
base = True
Expand All @@ -49,7 +48,6 @@ def load(self, deid):
"""
deid = get_deid(deid)
if deid is not None:

# Update our list of files
self._files.append(deid)
self.files = list(set(self.files))
Expand Down
11 changes: 0 additions & 11 deletions deid/config/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,10 @@ def load_combined_deid(deids):
deid = None

for single_deid in deids:

# If not a tag or path, returns None
next_deid = get_deid(tag=single_deid, exit_on_fail=False, quiet=True, load=True)

if next_deid is not None:

# Formats must match
if found_format is None:
found_format = next_deid["format"]
Expand All @@ -57,7 +55,6 @@ def load_combined_deid(deids):
if deid is None:
deid = next_deid
else:

# Update filter, appending to end to give first preference
if "filter" in next_deid:
if "filter" not in deid:
Expand Down Expand Up @@ -112,7 +109,6 @@ def load_deid(path=None):
section = None

while spec:

# Clean up white trailing/leading space
line = spec.pop(0).strip()

Expand All @@ -126,7 +122,6 @@ def load_deid(path=None):

# A new section?
elif line.startswith("%"):

# Remove any comments
line = line.split("#", 1)[0].strip()

Expand All @@ -151,7 +146,6 @@ def load_deid(path=None):

# An action (ADD, BLANK, JITTER, KEEP, REPLACE, REMOVE, LABEL)
elif line.upper().startswith(actions):

# Start of a filter group
if line.upper().startswith("LABEL") and section == "filter":
members = parse_filter_group(spec)
Expand Down Expand Up @@ -355,7 +349,6 @@ def parse_label(section, config, section_name, members, label=None):
if not member.lower().startswith(filters):
bot.warning("%s filter is not valid, skipping." % member.lower())
else:

# Returns single member with field, values, operator,
# Or if multiple or/and in statement, a list
entry = parse_member(member, operator)
Expand All @@ -380,7 +373,6 @@ def parse_member(members, operator=None):
members = [members]

while len(members) > 0:

operator = None
value = None
member = members.pop(0).strip()
Expand All @@ -399,7 +391,6 @@ def parse_member(members, operator=None):
operator = "+"

if operator is not None:

member, rest = member.split(operator, 1)

# The rest is only valid if contains a filter statement
Expand Down Expand Up @@ -470,7 +461,6 @@ def add_section(config, section, section_name=None):
bot.exit("%s is not a valid section." % section)

if section not in config:

# If a section is named, we have more one level (dict)
if section_name is not None:
config[section] = OrderedDict()
Expand Down Expand Up @@ -531,7 +521,6 @@ def parse_group_action(section, line, config, section_name):

# Values supports FIELD or SPLIT
elif section == "values":

# If we have a third set of arguments
if parts:
value = _remove_comments(parts)
Expand Down
2 changes: 0 additions & 2 deletions deid/dicom/actions/jitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ def jitter_timestamp(field, value):
new_value = original

if original is not None:

# Create default for new value
new_value = None
dcmvr = field.element.VR
Expand All @@ -67,7 +66,6 @@ def jitter_timestamp(field, value):
)

else:

# If the field type is not supplied, attempt to parse different formats
for fmtstr in ["%Y%m%d", "%Y%m%d%H%M%S.%f%z", "%Y%m%d%H%M%S.%f"]:
try:
Expand Down
5 changes: 0 additions & 5 deletions deid/dicom/fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,6 @@ def extract_sequence(sequence, prefix=None):
"""
items = {}
for item in sequence:

# If it's a Dataset, we need to further unwrap it
if isinstance(item, Dataset):
for subitem in item:
Expand Down Expand Up @@ -224,7 +223,6 @@ def expand_field_expression(field, dicom, contenders=None):

# Loop through fields, all are strings STOPPED HERE NEED TO ADDRESS EMPTY NAME
for uid, field in contenders.items():

# Apply expander to string for name OR to tag string
if expander.lower() in ["endswith", "startswith", "contains"]:
if field.name_contains(expression):
Expand Down Expand Up @@ -270,7 +268,6 @@ def add_element(element, name, uid, is_filemeta):
seen.append(uid)

while datasets:

# Grab the first dataset, usually just the dicom
dataset = datasets.pop(0)

Expand All @@ -281,7 +278,6 @@ def add_element(element, name, uid, is_filemeta):

# Includes private tags, sequences flattened, non-null values
for contender in dataset:

# All items should be data elements, skip based on keyword or tag
if contender.keyword in skip or str(contender.tag) in skip:
continue
Expand All @@ -297,7 +293,6 @@ def add_element(element, name, uid, is_filemeta):

# if it's a sequence, extract with prefix and index
if isinstance(contender.value, Sequence) and expand_sequences is True:

# Add the contender (usually type Dataset) to fields
add_element(contender, name, uid, is_filemeta)

Expand Down
7 changes: 4 additions & 3 deletions deid/dicom/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,14 @@ def apply_filter(dicom, field, filter_name, value):
Parameters
==========
dicom: the pydicom.dataset Dataset (pydicom.read_file)
field: the name of the field to apply the filter to
field: the name of the field to apply the filter to,
or the tag number as a string '0xGGGGEEEE'
filer_name: the name of the filter to apply (e.g., contains)
value: the value to set, if filter_name is valid
"""
if "0x" in field:
field = int(field, 0) # 0=decode hex with 0x prefix
filter_name = filter_name.lower().strip()

if filter_name == "contains":
Expand Down Expand Up @@ -74,7 +77,6 @@ def equalsBase(self, field, term, ignore_case=True, not_equals=False):
# In this loop we can only switch to True
for contender in contenders:
if contender is not None:

try:
# both converted to string (handles tags)
contender = str(contender)
Expand Down Expand Up @@ -187,7 +189,6 @@ def compareBase(self, field, expression, func, ignore_case=True):

for contender in contenders:
if contender is not None:

try:
contender = str(contender)
expression = str(expression)
Expand Down
3 changes: 0 additions & 3 deletions deid/dicom/groups.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ def extract_values_list(dicom, actions, fields=None):
fields = get_fields(dicom)

for action in actions:

# Extract some subset of fields based on action
subset = expand_field_expression(
field=action["field"], dicom=dicom, contenders=fields
Expand All @@ -40,7 +39,6 @@ def extract_values_list(dicom, actions, fields=None):

# Split action, can optionally have a "by" and/or minlength parameter
elif action["action"] == "SPLIT":

# Default values for split are length 1 and character empty space
bot.debug("Parsing action %s" % action)
split_by = " "
Expand Down Expand Up @@ -86,7 +84,6 @@ def extract_fields_list(dicom, actions, fields=None):
fields = get_fields(dicom)

for action in actions:

if action["action"] == "FIELD":
subset.update(
expand_field_expression(
Expand Down
2 changes: 0 additions & 2 deletions deid/dicom/header.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ def get_identifiers(
def remove_private_identifiers(
dicom_files, save=True, overwrite=False, output_folder=None, force=True
):

"""
Remove private identifiers.
Expand Down Expand Up @@ -99,7 +98,6 @@ def replace_identifiers(
remove_private=False,
disable_skip=False,
):

"""
Replace identifiers.
Expand Down
5 changes: 0 additions & 5 deletions deid/dicom/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,6 @@ def get_nested_field(self, field, return_parent=False):

# We keep going until we find the desired tag
if tag != desired:

# If the parent has been removed, we can't continue
if tag not in parent:
return None, desired
Expand All @@ -159,7 +158,6 @@ def get_nested_field(self, field, return_parent=False):

# Otherwise it's an index into a sequence
else:

# If the sequence is outside the bounds of the array of items
# within the sequence, we can't continue.
if int(uid) < 0 or int(uid) >= len(parent.value):
Expand Down Expand Up @@ -224,7 +222,6 @@ def parse(self, strip_sequences=False, remove_private=False):

# if we loaded a deid recipe
if self.recipe.deid is not None:

# Prepare additional lists of values and lookup fields (index by nested uid)
if self.recipe.has_values_lists():
for group, actions in self.recipe.get_values_lists().items():
Expand Down Expand Up @@ -461,7 +458,6 @@ def update_dicom(element, is_filemeta):

# Assume we don't want to add an empty value
if value is not None:

# If provided a field object, create based on keyword or tag identifier
name = field
if isinstance(field, DicomField):
Expand Down Expand Up @@ -541,7 +537,6 @@ def _run_action(self, field, action, value=None):

# Remove the field entirely
elif action == "REMOVE":

# If a value is defined, parse it (could be filter)
do_removal = True
if value is not None:
Expand Down
8 changes: 0 additions & 8 deletions deid/dicom/pixels/clean.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ def __init__(
font=None,
force=True,
):

if output_folder is None:
output_folder = get_temporary_name(prefix="clean")

Expand Down Expand Up @@ -89,7 +88,6 @@ def detect(self, dicom_file):
def clean(
self, fix_interpretation: bool = True, pixel_data_attribute: str = "PixelData"
) -> Optional[NDArray]:

if not self.results:
bot.warning(
"Use %s.detect() with a dicom file to find coordinates first." % self
Expand Down Expand Up @@ -312,21 +310,17 @@ def clean_pixel_data(
coordinates = []

for item in results["results"]:

# We iterate through coordinates in order specified in file
for coordinate_set in item.get("coordinates", []):

# Each is a list with [value, coordinate]
mask_value, new_coordinates = coordinate_set

if not isinstance(new_coordinates, list):
new_coordinates = [new_coordinates]

for new_coordinate in new_coordinates:

# Case 1: an "all" indicates applying to entire image
if new_coordinate.lower() == "all":

# 2D - Greyscale Image - Shape = (X, Y) OR 3D - RGB Image - Shape = (X, Y, Channel)
if len(original.shape) == 2 or (
len(original.shape) == 3 and dicom.SamplesPerPixel == 3
Expand Down Expand Up @@ -375,7 +369,6 @@ def clean_pixel_data(
# Now apply finished mask to the data
# RGB cine clip
if len(original.shape) == 4:

# np.tile does the copying and stacking of masks into the channel dim to produce 3D masks
# transposition to convert tile output (channel, X, Y) into (X, Y, channel)
# see: https://github.com/nquach/anonymize/blob/master/anonymize.py#L154
Expand All @@ -390,7 +383,6 @@ def clean_pixel_data(

# RGB image or Greyscale cine clip
elif len(original.shape) == 3:

# This condition is ambiguous. If the image shape is 3, we may have a single frame RGB image: size (X, Y, channel)
# or a multiframe greyscale image: size (frames, X, Y). Interrogate the SamplesPerPixel field.
if dicom.SamplesPerPixel == 3:
Expand Down
5 changes: 0 additions & 5 deletions deid/dicom/pixels/detect.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@ def _has_burned_pixels_multi(dicom_files: List[Union[str, FileDataset]], force,


def _has_burned_pixels_single(dicom_file, force: bool, deid):

"""
Determine if a single dicom has burned pixels.
Expand Down Expand Up @@ -142,10 +141,8 @@ def _has_burned_pixels_single(dicom_file, force: bool, deid):
group_flags = [] # evaluation for a single line
group_descriptions = []
for group in item["filters"]:

# You cannot pop from the list
for a in range(len(group["action"])):

action = group["action"][a]
field = group["field"][a]
value = ""
Expand Down Expand Up @@ -268,14 +265,12 @@ def extract_coordinates(dicom, field):

# Now extract coordinates
for region in regions:

if (
"RegionLocationMinX0" in region
and "RegionLocationMinY0" in region
and "RegionLocationMaxX1" in region
and "RegionLocationMaxY1" in region
):

# https://gist.github.com/vsoch/df6957be12c34e62b21000603f1687e5
# minr, minc, maxr, maxc = coordinate
# self.cleaned[minc:maxc, minr:maxr] = 0 # should fill with black
Expand Down
1 change: 0 additions & 1 deletion deid/dicom/tags.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ def get_tag(field):
manifest = None

if len(found) > 0:

# (VR, VM, Name, Retired, Keyword
found = found[0] # shouldn't ever have length > 1
tag = Tag(list(found)[0])
Expand Down
1 change: 0 additions & 1 deletion deid/dicom/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ def validate_dicoms(dcm_files, force=False):

bot.debug("Checking %s dicom files for validation." % (len(dcm_files)))
for dcm_file in dcm_files:

try:
with open(dcm_file, "rb") as filey:
read_file(filey, force=force)
Expand Down
1 change: 0 additions & 1 deletion deid/logger/message.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,6 @@ def get_logging_level():
level = int(os.environ.get("MESSAGELEVEL", DEBUG))

except ValueError:

level = os.environ.get("MESSAGELEVEL", DEBUG)
if level == "CRITICAL":
return FLAG
Expand Down
1 change: 0 additions & 1 deletion deid/main/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,6 @@ def get_parser():


def main():

parser = get_parser()
try:
args = parser.parse_args()
Expand Down
Loading

0 comments on commit f714359

Please sign in to comment.