Skip to content

Commit

Permalink
Merge pull request #84 from ArcanaFramework/dicom-metadata
Browse files Browse the repository at this point in the history
Cleans up the reading of DICOM metadata and returns dictionary not DICOM object
  • Loading branch information
tclose authored Sep 20, 2024
2 parents 3abc44d + 74923c2 commit f5bf5e6
Show file tree
Hide file tree
Showing 21 changed files with 257 additions and 161 deletions.
9 changes: 2 additions & 7 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,6 @@ repos:
rev: v1.11.2
hooks:
- id: mypy
args:
[
--strict,
--install-types,
--non-interactive,
]
args: [--strict, --install-types, --non-interactive, --no-warn-unused-ignores]
exclude: tests
additional_dependencies: [pytest, attrs, imageio]
additional_dependencies: [pytest, attrs, imageio, pydicom]
2 changes: 1 addition & 1 deletion docs/source/developer/extensions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ the :class:`.WithSeparateHeader` mixin.
ext = ".hdr"
def load(self):
return dict(ln.split(":") for ln in self.contents.splitlines())
return dict(ln.split(":") for ln in self.raw_contents.splitlines())
class MyFormatWithHeader(WithSeparateHeader, File):
ext = ".myh"
Expand Down
46 changes: 31 additions & 15 deletions extras/fileformats/extras/application/medical.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,28 @@
import typing as ty
from pathlib import Path
import pydicom
from typing_extensions import TypeAlias
import pydicom.tag
from fileformats.core import FileSet, extra_implementation
from fileformats.application import Dicom
import medimages4tests.dummy.dicom.mri.t1w.siemens.skyra.syngo_d13c
from fileformats.core import SampleFileGenerator

TagListType: TypeAlias = ty.Union[
ty.List[int],
ty.List[str],
ty.List[ty.Tuple[int, int]],
ty.List[pydicom.tag.BaseTag],
]


@extra_implementation(FileSet.read_metadata)
def dicom_read_metadata(
dicom: Dicom,
specific_tags: ty.Optional[ty.Collection[str]] = None,
metadata_keys: ty.Optional[TagListType] = None,
**kwargs: ty.Any,
) -> ty.Mapping[str, ty.Any]:
dcm = pydicom.dcmread(
dicom.fspath,
specific_tags=list(specific_tags if specific_tags is not None else []),
)
[getattr(dcm, a, None) for a in dir(dcm)] # Ensure all keywords are set
metadata = {
e.keyword: e.value
for e in dcm.elements()
if isinstance(e, pydicom.DataElement)
and getattr(e, "keyword", False)
and e.keyword != "PixelData"
}
return metadata
dcm = pydicom.dcmread(dicom.fspath, specific_tags=metadata_keys)
return Dicom.pydicom_to_dict(dcm)


@extra_implementation(FileSet.generate_sample_data)
Expand All @@ -38,3 +35,22 @@ def dicom_generate_sample_data(
out_dir=generator.dest_dir
).iterdir()
)


@extra_implementation(FileSet.load)
def dicom_load(
dicom: Dicom,
specific_tags: ty.Optional[TagListType] = None,
**kwargs: ty.Any,
) -> pydicom.FileDataset:
return pydicom.dcmread(dicom.fspath, specific_tags=specific_tags)


@extra_implementation(FileSet.save)
def dicom_save(
dicom: Dicom,
data: pydicom.FileDataset,
write_like_original: bool = False,
**kwargs: ty.Any,
) -> None:
pydicom.dcmwrite(dicom.fspath, data, write_like_original=write_like_original)
16 changes: 10 additions & 6 deletions extras/fileformats/extras/application/serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import yaml
import pydra.mark
import pydra.engine.specs
from fileformats.core import converter, extra_implementation
from fileformats.core import FileSet, converter, extra_implementation
from fileformats.application import TextSerialization, Json, Yaml
from fileformats.application.serialization import SerializationType

Expand All @@ -27,14 +27,18 @@ def convert_data_serialization(
return output_format.new(output_path, dct)


@extra_implementation(TextSerialization.load)
def yaml_load(yml: Yaml) -> SerializationType:
@extra_implementation(FileSet.load)
def yaml_load(yml: Yaml, **kwargs: ty.Any) -> SerializationType:
with open(yml.fspath) as f:
data = yaml.load(f, Loader=yaml.Loader)
return data # type: ignore[no-any-return]


@extra_implementation(TextSerialization.save)
def yaml_save(yml: Yaml, data: SerializationType) -> None:
@extra_implementation(FileSet.save)
def yaml_save(
yml: Yaml,
data: SerializationType,
**kwargs: ty.Any,
) -> None:
with open(yml.fspath, "w") as f:
yaml.dump(data, f)
yaml.dump(data, f, **kwargs)
Original file line number Diff line number Diff line change
@@ -1,8 +1,18 @@
import pytest
from fileformats.application import Dicom


def test_dicom_metadata():

dicom = Dicom.sample()

assert dicom.metadata["EchoTime"] == "2.07"
assert dicom.metadata["EchoTime"] == 2.07


def test_dicom_metadata_with_specific_tags():

dicom = Dicom(Dicom.sample(), metadata_keys=["EchoTime"])

assert dicom.metadata["EchoTime"] == 2.07
with pytest.raises(KeyError):
dicom.metadata["PatientName"]
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,10 @@
from fileformats.application import Json, Yaml


SAMPLE_JSON = """{
"a": "string field",
"alist": [0, 1, 2, 3, 4, 5],
"anesteddict": {
"x": null,
"y": [],
"z": 42.0
}
}"""
SAMPLE_JSON = (
"""{"a": "string field", "alist": [0, 1, 2, 3, 4, 5], """
""""anesteddict": {"x": null, "y": [], "z": 42.0}}"""
)

SAMPLE_YAML = """a: string field
alist:
Expand All @@ -38,7 +33,7 @@ def test_json_to_yaml(work_dir):
f.write(SAMPLE_JSON)
jsn = Json(in_file)
yml = Yaml.convert(jsn)
assert yml.contents == SAMPLE_YAML
assert yml.raw_contents == SAMPLE_YAML


# @pytest.mark.xfail(
Expand All @@ -50,5 +45,5 @@ def test_yaml_to_json(work_dir):
with open(in_file, "w") as f:
f.write(SAMPLE_JSON)
yml = Yaml(in_file)
Json.convert(yml)
assert yml.contents == SAMPLE_JSON
jsn = Json.convert(yml)
assert jsn.raw_contents == SAMPLE_JSON
15 changes: 9 additions & 6 deletions extras/fileformats/extras/image/readwrite.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
import imageio
import typing as ty
import numpy # noqa: F401
import typing # noqa: F401
from fileformats.core import extra_implementation
from fileformats.core import FileSet, extra_implementation
from fileformats.image.raster import RasterImage, DataArrayType


@extra_implementation(RasterImage.load)
def read_raster_data(image: RasterImage) -> DataArrayType:
@extra_implementation(FileSet.load)
def read_raster_data(image: RasterImage, **kwargs: ty.Any) -> DataArrayType:
return imageio.imread(image.fspath) # type: ignore


@extra_implementation(RasterImage.save)
def write_raster_data(image: RasterImage, data: DataArrayType) -> None:
imageio.imwrite(image.fspath, data)
@extra_implementation(FileSet.save)
def write_raster_data(
image: RasterImage, data: DataArrayType, **kwargs: ty.Any
) -> None:
imageio.imwrite(image.fspath, data, **kwargs)
58 changes: 58 additions & 0 deletions fileformats/application/medical.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
import typing as ty
from fileformats.generic import BinaryFile
from fileformats.core.mixin import WithMagicNumber

if ty.TYPE_CHECKING:
import pydicom


class Dicom(WithMagicNumber, BinaryFile):

Expand All @@ -10,3 +14,57 @@ class Dicom(WithMagicNumber, BinaryFile):
binary = True

alternate_exts = (".dcm",) # dcm is recommended not required

@classmethod
def pydicom_to_dict(
cls, dcm: "pydicom.Dataset", omit: ty.Collection[str] = ("PixelData",)
) -> ty.Dict[str, ty.Any]:
"""Convert a pydicom Dataset to a dictionary.
Parameters
----------
dcm : pydicom.Dataset
The pydicom Dataset to convert.
omit : Collection[str], optional
A collection of keys to omit from the dictionary, by default ("PixelData",)
Returns
-------
Dict[str, Any]
The dictionary representation of the pydicom Dataset
"""
import pydicom.dataset
import pydicom.valuerep
import pydicom.multival
import pydicom.uid

# Ensure that all keys are loaded before creating dictionary otherwise the keywords
# will not be set in the elem
[getattr(dcm, attr, None) for attr in dir(dcm)]
dct: ty.Dict[str, ty.Any] = {}
for elem in dcm.values():
try:
key = elem.keyword # type: ignore[union-attr, attr-defined]
except AttributeError:
key = None
if not key:
key = elem.tag.json_key # type: ignore[attr-defined]
if key not in omit:
value = elem.value # type: ignore[attr-defined]
if isinstance(value, pydicom.multival.MultiValue):
value = [str(v) for v in value]
elif isinstance(value, pydicom.uid.UID):
value = str(value)
elif isinstance(value, bytes):
value = value.decode(errors="ignore")
elif isinstance(value, pydicom.dataset.Dataset):
value = cls.pydicom_to_dict(value, omit)
elif isinstance(value, pydicom.valuerep.IS):
value = int(value)
elif isinstance(value, pydicom.valuerep.DSfloat):
value = float(value)
# Can be handy to be able to access family_name and given_name separately
# elif isinstance(value, pydicom.valuerep.PersonName):
# value = str(value)
dct[key] = value
return dct
8 changes: 4 additions & 4 deletions fileformats/application/serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,16 +103,16 @@ def generate_yaml_sample_data(


@extra_implementation(FileSet.load)
def load(jsn: Json) -> SerializationType:
def load(jsn: Json, **kwargs: ty.Any) -> SerializationType:
try:
with jsn.open() as f:
dct: ty.Dict[str, ty.Any] = json.load(f)
dct: ty.Dict[str, ty.Any] = json.load(f, **kwargs)
except json.JSONDecodeError as e:
raise FormatMismatchError(f"'{jsn.fspath}' is not a valid JSON file") from e
return dct


@extra_implementation(FileSet.save)
def save(jsn: Json, data: SerializationType) -> None:
def save(jsn: Json, data: SerializationType, **kwargs: ty.Any) -> None:
with jsn.open("w") as f:
json.dump(data, f)
json.dump(data, f, **kwargs)
Loading

0 comments on commit f5bf5e6

Please sign in to comment.