From 9b60cfc3da52e3eb5ab8e89cb7d82b3a2cd0e205 Mon Sep 17 00:00:00 2001 From: mkuehbach Date: Wed, 7 Aug 2024 12:31:49 +0200 Subject: [PATCH 1/6] Start working on TIFF parsing for JEOL --- src/pynxtools_em/parsers/image_tiff_jeol.py | 231 ++++++++++++++++++++ 1 file changed, 231 insertions(+) create mode 100644 src/pynxtools_em/parsers/image_tiff_jeol.py diff --git a/src/pynxtools_em/parsers/image_tiff_jeol.py b/src/pynxtools_em/parsers/image_tiff_jeol.py new file mode 100644 index 0000000..c613dcd --- /dev/null +++ b/src/pynxtools_em/parsers/image_tiff_jeol.py @@ -0,0 +1,231 @@ +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Subparser for harmonizing JEOL specific content in TIFF files.""" + +import mmap +from typing import Dict + +import flatdict as fd +import numpy as np +from PIL import Image, ImageSequence +from pynxtools_em.concepts.mapping_functors_pint import add_specific_metadata_pint + +# from pynxtools_em.configurations.image_tiff_jeol_cfg import JEOL_VARIOUS_DYNAMIC_TO_NX_EM +from pynxtools_em.parsers.image_tiff import TiffParser +from pynxtools_em.utils.string_conversions import string_to_number + + +class JeolTiffParser(TiffParser): + def __init__(self, file_path: str = "", entry_id: int = 1): + super().__init__(file_path) + self.entry_id = entry_id + self.event_id = 1 + self.prfx = None + self.tmp: Dict = {"data": None, "flat_dict_meta": fd.FlatDict({})} + self.supported_version: Dict = {} + self.version: Dict = {} + self.tags: Dict = {} + self.supported = False + self.init_support() + self.check_if_tiff_point_electronic() + + def init_support(self): + """Init supported versions.""" + self.supported_version["tech_partner"] = ["point electronic"] + self.supported_version["schema_name"] = ["DISS"] + self.supported_version["schema_version"] = ["5.15.31.0"] + + def xmpmeta_to_flat_dict(self, meta: fd.FlatDict): + for entry in meta["xmpmeta/RDF/Description"]: + tmp = fd.FlatDict(entry, "/") + for key, obj in tmp.items(): + if isinstance(obj, list): + for dct in obj: + if isinstance(dct, dict): + lst = fd.FlatDict(dct, "/") + for kkey, kobj in lst.items(): + if isinstance(kobj, str) and kobj != "": + if ( + f"{key}/{kkey}" + not in self.tmp["flat_dict_meta"] + ): + self.tmp["flat_dict_meta"][f"{key}/{kkey}"] = ( + string_to_number(kobj) + ) + if isinstance(obj, str) and obj != "": + if key not in self.tmp["flat_dict_meta"]: + self.tmp["flat_dict_meta"][key] = string_to_number(obj) + else: + raise KeyError(f"Duplicated key {key} !") + + def check_if_tiff_point_electronic(self): + """Check if resource behind self.file_path is a TaggedImageFormat file. + + This also loads the metadata first if possible as these contain details + about which software was used to process the image data, e.g. DISS software. + """ + self.supported = 0 # voting-based + with open(self.file_path, "rb", 0) as file: + s = mmap.mmap(file.fileno(), 0, access=mmap.ACCESS_READ) + magic = s.read(4) + if magic == b"II*\x00": # https://en.wikipedia.org/wiki/TIFF + self.supported += 1 + else: + self.supported = False + print( + f"Parser {self.__class__.__name__} finds no content in {self.file_path} that it supports" + ) + return + with Image.open(self.file_path, mode="r") as fp: + # either hunt for metadata under tag_v2 key 700 or take advantage of the + # fact that point electronic write xmpmeta/xmptk XMP Core 5.1.2 + meta = fd.FlatDict(fp.getxmp(), "/") + if meta: + if "xmpmeta/xmptk" in meta: + if meta["xmpmeta/xmptk"] == "XMP Core 5.1.2": + # load the metadata + self.tmp["flat_dict_meta"] = fd.FlatDict({}, "/") + self.xmpmeta_to_flat_dict(meta) + + for key, value in self.tmp["flat_dict_meta"].items(): + print(f"{key}____{type(value)}____{value}") + + # check if written about with supported DISS version + prefix = f"{self.supported_version['tech_partner'][0]} {self.supported_version['schema_name'][0]}" + supported_versions = [ + f"{prefix} {val}" + for val in self.supported_version["schema_version"] + ] + print(supported_versions) + if ( + self.tmp["flat_dict_meta"]["CreatorTool"] + in supported_versions + ): + self.supported += 1 # found specific XMP metadata + if self.supported == 2: + self.supported = True + else: + self.supported = False + print( + f"Parser {self.__class__.__name__} finds no content in {self.file_path} that it supports" + ) + + def parse_and_normalize(self): + """Perform actual parsing filling cache self.tmp.""" + if self.supported is True: + print(f"Parsing via point electronic DISS-specific metadata...") + # metadata have at this point already been collected into an fd.FlatDict + else: + print( + f"{self.file_path} is not a point electronic DISS-specific " + f"TIFF file that this parser can process !" + ) + + def process_into_template(self, template: dict) -> dict: + if self.supported is True: + self.process_event_data_em_metadata(template) + self.process_event_data_em_data(template) + return template + + def process_event_data_em_data(self, template: dict) -> dict: + """Add respective heavy data.""" + # default display of the image(s) representing the data collected in this event + print( + f"Writing point electronic DISS TIFF image data to the respective NeXus concept instances..." + ) + # read image in-place + image_identifier = 1 + with Image.open(self.file_path, mode="r") as fp: + for img in ImageSequence.Iterator(fp): + nparr = np.array(img) + print( + f"Processing image {image_identifier} ... {type(nparr)}, {np.shape(nparr)}, {nparr.dtype}" + ) + # eventually similar open discussions points as for the TFS TIFF parser + trg = ( + f"/ENTRY[entry{self.entry_id}]/measurement/event_data_em_set/" + f"EVENT_DATA_EM[event_data_em{self.event_id}]/" + f"IMAGE_SET[image_set{image_identifier}]/image_twod" + ) + template[f"{trg}/title"] = f"Image" + template[f"{trg}/@signal"] = "real" + dims = ["i", "j"] # i == x (fastest), j == y (fastest) + idx = 0 + for dim in dims: + template[f"{trg}/@AXISNAME_indices[axis_{dim}_indices]"] = ( + np.uint32(idx) + ) + idx += 1 + template[f"{trg}/@axes"] = [] + for dim in dims[::-1]: + template[f"{trg}/@axes"].append(f"axis_{dim}") + template[f"{trg}/real"] = {"compress": np.array(fp), "strength": 1} + # 0 is y while 1 is x for 2d, 0 is z, 1 is y, while 2 is x for 3d + template[f"{trg}/real/@long_name"] = f"Signal" + + sxy = {"i": 1.0, "j": 1.0} + scan_unit = {"i": "m", "j": "m"} # assuming FEI reports SI units + # we may face the CCD overview camera for the chamber for which there might not be a calibration! + if ("PixelSizeX" in self.tmp["flat_dict_meta"]) and ( + "PixelSizeY" in self.tmp["flat_dict_meta"] + ): + sxy = { + "i": self.tmp["flat_dict_meta"]["PixelSizeX"], + "j": self.tmp["flat_dict_meta"]["PixelSizeY"], + } + else: + print("WARNING: Assuming pixel width and height unit is meter!") + nxy = {"i": np.shape(np.array(fp))[1], "j": np.shape(np.array(fp))[0]} + # TODO::be careful we assume here a very specific coordinate system + # however, these assumptions need to be confirmed by point electronic + # additional points as discussed already in comments to TFS TIFF reader + for dim in dims: + template[f"{trg}/AXISNAME[axis_{dim}]"] = { + "compress": np.asarray( + np.linspace(0, nxy[dim] - 1, num=nxy[dim], endpoint=True) + * sxy[dim], + np.float64, + ), + "strength": 1, + } + template[f"{trg}/AXISNAME[axis_{dim}]/@long_name"] = ( + f"Coordinate along {dim}-axis ({scan_unit[dim]})" + ) + template[f"{trg}/AXISNAME[axis_{dim}]/@units"] = f"{scan_unit[dim]}" + image_identifier += 1 + return template + + def add_various_dynamic(self, template: dict) -> dict: + identifier = [self.entry_id, self.event_id, 1] + add_specific_metadata_pint( + DISS_VARIOUS_DYNAMIC_TO_NX_EM, + self.tmp["flat_dict_meta"], + identifier, + template, + ) + return template + + def process_event_data_em_metadata(self, template: dict) -> dict: + """Add respective metadata.""" + # contextualization to understand how the image relates to the EM session + print( + f"Mapping some of the point electronic DISS metadata on respective NeXus concepts..." + ) + self.add_various_dynamic(template) + # ... add more as required ... + return template From de02ba0f7b6e7000e787c0cfa34ff4833c32019f Mon Sep 17 00:00:00 2001 From: mkuehbach Date: Wed, 7 Aug 2024 14:53:12 +0200 Subject: [PATCH 2/6] Implementing functionality --- src/pynxtools_em/parsers/image_tiff_jeol.py | 140 ++++++++---------- .../parsers/image_tiff_point_electronic.py | 5 +- 2 files changed, 62 insertions(+), 83 deletions(-) diff --git a/src/pynxtools_em/parsers/image_tiff_jeol.py b/src/pynxtools_em/parsers/image_tiff_jeol.py index c613dcd..4787b1b 100644 --- a/src/pynxtools_em/parsers/image_tiff_jeol.py +++ b/src/pynxtools_em/parsers/image_tiff_jeol.py @@ -31,109 +31,89 @@ class JeolTiffParser(TiffParser): - def __init__(self, file_path: str = "", entry_id: int = 1): - super().__init__(file_path) + def __init__( + self, tiff_file_path: str = "", txt_file_path: str = "", entry_id: int = 1 + ): + super().__init__(tiff_file_path) self.entry_id = entry_id self.event_id = 1 + self.txt_file_path = None + if txt_file_path is not None and txt_file_path != "": + self.txt_file_path = txt_file_path self.prfx = None self.tmp: Dict = {"data": None, "flat_dict_meta": fd.FlatDict({})} self.supported_version: Dict = {} self.version: Dict = {} self.tags: Dict = {} self.supported = False - self.init_support() - self.check_if_tiff_point_electronic() - - def init_support(self): - """Init supported versions.""" - self.supported_version["tech_partner"] = ["point electronic"] - self.supported_version["schema_name"] = ["DISS"] - self.supported_version["schema_version"] = ["5.15.31.0"] - - def xmpmeta_to_flat_dict(self, meta: fd.FlatDict): - for entry in meta["xmpmeta/RDF/Description"]: - tmp = fd.FlatDict(entry, "/") - for key, obj in tmp.items(): - if isinstance(obj, list): - for dct in obj: - if isinstance(dct, dict): - lst = fd.FlatDict(dct, "/") - for kkey, kobj in lst.items(): - if isinstance(kobj, str) and kobj != "": - if ( - f"{key}/{kkey}" - not in self.tmp["flat_dict_meta"] - ): - self.tmp["flat_dict_meta"][f"{key}/{kkey}"] = ( - string_to_number(kobj) - ) - if isinstance(obj, str) and obj != "": - if key not in self.tmp["flat_dict_meta"]: - self.tmp["flat_dict_meta"][key] = string_to_number(obj) - else: - raise KeyError(f"Duplicated key {key} !") + self.check_if_tiff_jeol() - def check_if_tiff_point_electronic(self): + def check_if_tiff_jeol(self): """Check if resource behind self.file_path is a TaggedImageFormat file. - This also loads the metadata first if possible as these contain details - about which software was used to process the image data, e.g. DISS software. + This loads the metadata with the txt_file_path first to the formatting of that + information can be used to tell JEOL data apart from other data. """ - self.supported = 0 # voting-based + # currently not voting-based algorithm required as used in other parsers + if self.txt_file_path is None: + self.supported = False + print( + f"Parser {self.__class__.__name__} does not work with JEOL metadata text file !" + ) + return with open(self.file_path, "rb", 0) as file: s = mmap.mmap(file.fileno(), 0, access=mmap.ACCESS_READ) magic = s.read(4) - if magic == b"II*\x00": # https://en.wikipedia.org/wiki/TIFF - self.supported += 1 - else: + if magic != b"II*\x00": # https://en.wikipedia.org/wiki/TIFF self.supported = False print( f"Parser {self.__class__.__name__} finds no content in {self.file_path} that it supports" ) return - with Image.open(self.file_path, mode="r") as fp: - # either hunt for metadata under tag_v2 key 700 or take advantage of the - # fact that point electronic write xmpmeta/xmptk XMP Core 5.1.2 - meta = fd.FlatDict(fp.getxmp(), "/") - if meta: - if "xmpmeta/xmptk" in meta: - if meta["xmpmeta/xmptk"] == "XMP Core 5.1.2": - # load the metadata - self.tmp["flat_dict_meta"] = fd.FlatDict({}, "/") - self.xmpmeta_to_flat_dict(meta) - - for key, value in self.tmp["flat_dict_meta"].items(): - print(f"{key}____{type(value)}____{value}") - - # check if written about with supported DISS version - prefix = f"{self.supported_version['tech_partner'][0]} {self.supported_version['schema_name'][0]}" - supported_versions = [ - f"{prefix} {val}" - for val in self.supported_version["schema_version"] - ] - print(supported_versions) - if ( - self.tmp["flat_dict_meta"]["CreatorTool"] - in supported_versions - ): - self.supported += 1 # found specific XMP metadata - if self.supported == 2: - self.supported = True - else: - self.supported = False - print( - f"Parser {self.__class__.__name__} finds no content in {self.file_path} that it supports" - ) + + with open(self.txt_file_path, "r") as txt: + txt = [ + line.strip().lstrip("$") + for line in txt.readlines() + if line.strip() != "" and line.startswith("$") + ] + + self.tmp["flat_dict_meta"] = fd.FlatDict({}, "/") + for line in txt: + tmp = line.split() + if len(tmp) == 1: + print(f"WARNING::{line} is currently ignored !") + elif len(tmp) == 2: + if tmp[0] not in self.tmp["flat_dict_meta"]: + self.tmp["flat_dict_meta"][tmp[0]] = string_to_number(tmp[1]) + else: + raise KeyError(f"Found duplicated key {tmp[0]} !") + else: # len(tmp) > 2: + print(f"WARNING::{line} is currently ignored !") + + # report metadata just for verbose purposes right now + for key, value in self.tmp["flat_dict_meta"].items(): + print(f"{key}______{type(value)}____{value}") + + if ( + self.tmp["flat_dict_meta"]["SEM_DATA_VERSION"] == "1" + and self.tmp["flat_dict_meta"]["CM_LABEL"] == "JEOL" + ): + self.supported = True + else: + self.supported = False + print( + f"Parser {self.__class__.__name__} finds no content in {self.file_path} that it supports" + ) def parse_and_normalize(self): """Perform actual parsing filling cache self.tmp.""" if self.supported is True: - print(f"Parsing via point electronic DISS-specific metadata...") + print(f"Parsing via JEOL...") # metadata have at this point already been collected into an fd.FlatDict else: print( - f"{self.file_path} is not a point electronic DISS-specific " - f"TIFF file that this parser can process !" + f"{self.file_path} is not a JEOL-specific TIFF file that this parser can process !" ) def process_into_template(self, template: dict) -> dict: @@ -146,9 +126,10 @@ def process_event_data_em_data(self, template: dict) -> dict: """Add respective heavy data.""" # default display of the image(s) representing the data collected in this event print( - f"Writing point electronic DISS TIFF image data to the respective NeXus concept instances..." + f"Writing JEOL TIFF image data to the respective NeXus concept instances..." ) # read image in-place + #################################################### image_identifier = 1 with Image.open(self.file_path, mode="r") as fp: for img in ImageSequence.Iterator(fp): @@ -156,7 +137,7 @@ def process_event_data_em_data(self, template: dict) -> dict: print( f"Processing image {image_identifier} ... {type(nparr)}, {np.shape(nparr)}, {nparr.dtype}" ) - # eventually similar open discussions points as for the TFS TIFF parser + # eventually similar open discussions points as were raised for tiff_tfs parser trg = ( f"/ENTRY[entry{self.entry_id}]/measurement/event_data_em_set/" f"EVENT_DATA_EM[event_data_em{self.event_id}]/" @@ -179,8 +160,7 @@ def process_event_data_em_data(self, template: dict) -> dict: template[f"{trg}/real/@long_name"] = f"Signal" sxy = {"i": 1.0, "j": 1.0} - scan_unit = {"i": "m", "j": "m"} # assuming FEI reports SI units - # we may face the CCD overview camera for the chamber for which there might not be a calibration! + scan_unit = {"i": "m", "j": "m"} if ("PixelSizeX" in self.tmp["flat_dict_meta"]) and ( "PixelSizeY" in self.tmp["flat_dict_meta"] ): diff --git a/src/pynxtools_em/parsers/image_tiff_point_electronic.py b/src/pynxtools_em/parsers/image_tiff_point_electronic.py index 4efa76c..bfeb3f7 100644 --- a/src/pynxtools_em/parsers/image_tiff_point_electronic.py +++ b/src/pynxtools_em/parsers/image_tiff_point_electronic.py @@ -157,7 +157,7 @@ def process_event_data_em_data(self, template: dict) -> dict: print( f"Processing image {image_identifier} ... {type(nparr)}, {np.shape(nparr)}, {nparr.dtype}" ) - # eventually similar open discussions points as for the TFS TIFF parser + # eventually similar open discussions points as were raised for tiff_tfs parser trg = ( f"/ENTRY[entry{self.entry_id}]/measurement/event_data_em_set/" f"EVENT_DATA_EM[event_data_em{self.event_id}]/" @@ -180,8 +180,7 @@ def process_event_data_em_data(self, template: dict) -> dict: template[f"{trg}/real/@long_name"] = f"Signal" sxy = {"i": 1.0, "j": 1.0} - scan_unit = {"i": "m", "j": "m"} # assuming FEI reports SI units - # we may face the CCD overview camera for the chamber for which there might not be a calibration! + scan_unit = {"i": "m", "j": "m"} if ("PixelSizeX" in self.tmp["flat_dict_meta"]) and ( "PixelSizeY" in self.tmp["flat_dict_meta"] ): From ea8eed6d26c6aaf2a099385b7d9c3154166fbfce Mon Sep 17 00:00:00 2001 From: mkuehbach Date: Wed, 7 Aug 2024 18:06:05 +0200 Subject: [PATCH 3/6] Image parsing for JEOL working --- .vscode/launch.json | 7 +- docs/index.md | 2 +- docs/reference/tiff.md | 4 + docs/reference/{vemd.md => velox.md} | 0 mkdocs.yaml | 2 +- .../image_tiff_jeol_cfg copy.py | 43 +++++++ src/pynxtools_em/parsers/image_tiff_jeol.py | 107 ++++++++++++------ src/pynxtools_em/reader.py | 4 + src/pynxtools_em/utils/io_case_logic.py | 1 + 9 files changed, 128 insertions(+), 42 deletions(-) rename docs/reference/{vemd.md => velox.md} (100%) create mode 100644 src/pynxtools_em/configurations/image_tiff_jeol_cfg copy.py diff --git a/.vscode/launch.json b/.vscode/launch.json index a15ce36..43831d0 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -11,9 +11,10 @@ "cwd": "${workspaceFolder}", "program": "../.py3.12.4/bin/dataconverter", "args": ["convert", - "examples/eln_data.yaml", - "examples/em.oasis.specific.yaml", - "../ebic_dm3_goette/documents-export-2024-06-06/EBIC/Defekt1.tif", + // "examples/eln_data.yaml", + // "examples/em.oasis.specific.yaml", + "../ebic_dm3_goette/documents-export-2024-06-06/SEM/20240227_A1_2m_0_FA3_1.txt", + "../ebic_dm3_goette/documents-export-2024-06-06/SEM/20240227_A1_2m_0_FA3_1.tif", "--reader", "em", "--nxdl", diff --git a/docs/index.md b/docs/index.md index c258b1f..5615d42 100644 --- a/docs/index.md +++ b/docs/index.md @@ -57,7 +57,7 @@ for the respective file formats of technology partners of the electron microscop - [How to map pieces of information to NeXus](reference/contextualization.md) - [Tagged Image File Format (TIFF)](reference/tiff.md) - [Portable Network Graphics (PNG)](reference/png.md) -- [Velox EMD](reference/vemd.md) +- [Velox EMD](reference/velox.md) - [EDAX APEX](reference/apex.md) - [Nion Co. projects](reference/nion.md) diff --git a/docs/reference/tiff.md b/docs/reference/tiff.md index 3ec5842..9f83d02 100644 --- a/docs/reference/tiff.md +++ b/docs/reference/tiff.md @@ -7,3 +7,7 @@ The pynxtools-em parser and normalizer reads the following content and maps them | --------------- | -------------- | | Reconstructed positions (x, y, z) | :heavy_check_mark: | | Mass-to-charge-state-ratio values (m/q) | :heavy_check_mark: |--> + + + + diff --git a/docs/reference/vemd.md b/docs/reference/velox.md similarity index 100% rename from docs/reference/vemd.md rename to docs/reference/velox.md diff --git a/mkdocs.yaml b/mkdocs.yaml index bdf040d..11c41a7 100644 --- a/mkdocs.yaml +++ b/mkdocs.yaml @@ -21,7 +21,7 @@ nav: - reference/contextualization.md - reference/tiff.md - reference/png.md - - reference/vemd.md + - reference/velox.md - reference/apex.md - reference/nion.md plugins: diff --git a/src/pynxtools_em/configurations/image_tiff_jeol_cfg copy.py b/src/pynxtools_em/configurations/image_tiff_jeol_cfg copy.py new file mode 100644 index 0000000..12ef7f5 --- /dev/null +++ b/src/pynxtools_em/configurations/image_tiff_jeol_cfg copy.py @@ -0,0 +1,43 @@ +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Configuration of the image_tiff_jeol parser.""" + +from pint import UnitRegistry + +ureg = UnitRegistry() + + +JEOL_VARIOUS_DYNAMIC_TO_NX_EM = { + "prefix_trg": "/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]", + "prefix_src": "", + "map_to_f8": [ + ("em_lab/OPTICAL_SYSTEM_EM[optical_system_em]/magnification", "Mag"), + ( + "em_lab/OPTICAL_SYSTEM_EM[optical_system_em]/working_distance", + ureg.centimeter, + "WD/value", + "WD/Unit", + ), + ( + "em_lab/EBEAM_COLUMN[ebeam_column]/electron_source/voltage", + ureg.picovolt, + "HV/value", + "HV/Unit", + ), + ], +} diff --git a/src/pynxtools_em/parsers/image_tiff_jeol.py b/src/pynxtools_em/parsers/image_tiff_jeol.py index 4787b1b..289e386 100644 --- a/src/pynxtools_em/parsers/image_tiff_jeol.py +++ b/src/pynxtools_em/parsers/image_tiff_jeol.py @@ -18,35 +18,51 @@ """Subparser for harmonizing JEOL specific content in TIFF files.""" import mmap -from typing import Dict +from typing import Dict, List import flatdict as fd import numpy as np +import pint from PIL import Image, ImageSequence +from pint import UnitRegistry from pynxtools_em.concepts.mapping_functors_pint import add_specific_metadata_pint # from pynxtools_em.configurations.image_tiff_jeol_cfg import JEOL_VARIOUS_DYNAMIC_TO_NX_EM from pynxtools_em.parsers.image_tiff import TiffParser from pynxtools_em.utils.string_conversions import string_to_number +ureg = UnitRegistry() + class JeolTiffParser(TiffParser): - def __init__( - self, tiff_file_path: str = "", txt_file_path: str = "", entry_id: int = 1 - ): - super().__init__(tiff_file_path) - self.entry_id = entry_id - self.event_id = 1 - self.txt_file_path = None - if txt_file_path is not None and txt_file_path != "": - self.txt_file_path = txt_file_path - self.prfx = None - self.tmp: Dict = {"data": None, "flat_dict_meta": fd.FlatDict({})} - self.supported_version: Dict = {} - self.version: Dict = {} - self.tags: Dict = {} - self.supported = False - self.check_if_tiff_jeol() + def __init__(self, file_paths: List[str], entry_id: int = 1, verbose=False): + tif_txt = ["", ""] + if ( + len(file_paths) == 2 + and file_paths[0][0 : file_paths[0].rfind(".")] + == file_paths[1][0 : file_paths[0].rfind(".")] + ): + for entry in file_paths: + if entry.lower().endswith((".tif", ".tiff")): + tif_txt[0] = entry + elif entry.lower().endswith((".txt")): + tif_txt[1] = entry + if all(value != "" for value in tif_txt): + super().__init__(tif_txt[0]) + self.entry_id = entry_id + self.event_id = 1 + self.verbose = verbose + self.txt_file_path = tif_txt[1] + self.prfx = None + self.tmp: Dict = {"data": None, "flat_dict_meta": fd.FlatDict({})} + self.supported_version: Dict = {} + self.version: Dict = {} + self.tags: Dict = {} + self.supported = False + self.check_if_tiff_jeol() + else: + print(f"Parser {self.__class__.__name__} needs TIF and TXT file !") + self.supported = False def check_if_tiff_jeol(self): """Check if resource behind self.file_path is a TaggedImageFormat file. @@ -58,7 +74,8 @@ def check_if_tiff_jeol(self): if self.txt_file_path is None: self.supported = False print( - f"Parser {self.__class__.__name__} does not work with JEOL metadata text file !" + f"Parser {self.__class__.__name__} does not work without a JEOL text file with the image metadata !" + f"This file is required to have exactly the same file name as the file with the TIF image data !" ) return with open(self.file_path, "rb", 0) as file: @@ -70,7 +87,6 @@ def check_if_tiff_jeol(self): f"Parser {self.__class__.__name__} finds no content in {self.file_path} that it supports" ) return - with open(self.txt_file_path, "r") as txt: txt = [ line.strip().lstrip("$") @@ -85,7 +101,19 @@ def check_if_tiff_jeol(self): print(f"WARNING::{line} is currently ignored !") elif len(tmp) == 2: if tmp[0] not in self.tmp["flat_dict_meta"]: - self.tmp["flat_dict_meta"][tmp[0]] = string_to_number(tmp[1]) + # this is not working robustly as the following example fails: + # CM_TITLE 20240227_A1_2m_0_FA3_1 ('invalid decimal literal', (1, 9)) + # try: + # self.tmp["flat_dict_meta"][tmp[0]] = pint.Quantity(tmp[1]) + # except pint.errors.UndefinedUnitError: + # self.tmp["flat_dict_meta"][tmp[0]] = tmp[1] + # as an alternative we currently use a mixture of pint quantities + # and regular numpy / pure Python types, the mapping functor should + # take care of resolving the cases properly + if tmp[0] != "SM_MICRON_MARKER": + self.tmp["flat_dict_meta"][tmp[0]] = tmp[1] + else: + self.tmp["flat_dict_meta"][tmp[0]] = pint.Quantity(tmp[1]) else: raise KeyError(f"Found duplicated key {tmp[0]} !") else: # len(tmp) > 2: @@ -106,20 +134,16 @@ def check_if_tiff_jeol(self): f"Parser {self.__class__.__name__} finds no content in {self.file_path} that it supports" ) - def parse_and_normalize(self): - """Perform actual parsing filling cache self.tmp.""" + def parse(self, template: dict) -> dict: if self.supported is True: print(f"Parsing via JEOL...") # metadata have at this point already been collected into an fd.FlatDict + self.process_event_data_em_metadata(template) + self.process_event_data_em_data(template) else: print( f"{self.file_path} is not a JEOL-specific TIFF file that this parser can process !" ) - - def process_into_template(self, template: dict) -> dict: - if self.supported is True: - self.process_event_data_em_metadata(template) - self.process_event_data_em_data(template) return template def process_event_data_em_data(self, template: dict) -> dict: @@ -128,8 +152,6 @@ def process_event_data_em_data(self, template: dict) -> dict: print( f"Writing JEOL TIFF image data to the respective NeXus concept instances..." ) - # read image in-place - #################################################### image_identifier = 1 with Image.open(self.file_path, mode="r") as fp: for img in ImageSequence.Iterator(fp): @@ -161,12 +183,22 @@ def process_event_data_em_data(self, template: dict) -> dict: sxy = {"i": 1.0, "j": 1.0} scan_unit = {"i": "m", "j": "m"} - if ("PixelSizeX" in self.tmp["flat_dict_meta"]) and ( - "PixelSizeY" in self.tmp["flat_dict_meta"] + if ("SM_MICRON_BAR" in self.tmp["flat_dict_meta"]) and ( + "SM_MICRON_MARKER" in self.tmp["flat_dict_meta"] ): + # JEOL-specific conversion for micron bar pixel to physical length + resolution = int(self.tmp["flat_dict_meta"]["SM_MICRON_BAR"]) + physical_length = ( + self.tmp["flat_dict_meta"]["SM_MICRON_MARKER"] + .to(ureg.meter) + .magnitude + ) + # resolution many pixel represent physical_length scanned surface + # assuming square pixel + print(f"resolution {resolution}, L {physical_length}") sxy = { - "i": self.tmp["flat_dict_meta"]["PixelSizeX"], - "j": self.tmp["flat_dict_meta"]["PixelSizeY"], + "i": physical_length / resolution, + "j": physical_length / resolution, } else: print("WARNING: Assuming pixel width and height unit is meter!") @@ -191,21 +223,22 @@ def process_event_data_em_data(self, template: dict) -> dict: return template def add_various_dynamic(self, template: dict) -> dict: + pass + """ identifier = [self.entry_id, self.event_id, 1] add_specific_metadata_pint( - DISS_VARIOUS_DYNAMIC_TO_NX_EM, + JEOL_VARIOUS_DYNAMIC_TO_NX_EM, self.tmp["flat_dict_meta"], identifier, template, ) + """ return template def process_event_data_em_metadata(self, template: dict) -> dict: """Add respective metadata.""" # contextualization to understand how the image relates to the EM session - print( - f"Mapping some of the point electronic DISS metadata on respective NeXus concepts..." - ) + print(f"Mapping some of JEOL metadata on respective NeXus concepts...") self.add_various_dynamic(template) # ... add more as required ... return template diff --git a/src/pynxtools_em/reader.py b/src/pynxtools_em/reader.py index e39af63..88186fa 100644 --- a/src/pynxtools_em/reader.py +++ b/src/pynxtools_em/reader.py @@ -26,6 +26,7 @@ from pynxtools_em.concepts.nxs_concepts import NxEmAppDef from pynxtools_em.parsers.convention_reader import NxEmConventionParser +from pynxtools_em.parsers.image_tiff_jeol import JeolTiffParser from pynxtools_em.parsers.nxs_imgs import NxEmImagesParser from pynxtools_em.parsers.nxs_mtex import NxEmNxsMTexParser from pynxtools_em.parsers.nxs_nion import NionProjectParser @@ -118,6 +119,9 @@ def read( # zip_parser = NxEmOmZipEbsdParser(case.dat[0], entry_id) # zip_parser.parse(template) + elif len(case.dat) == 2: + jeol = JeolTiffParser(case.dat, entry_id, verbose=False) + jeol.parse(template) nxplt = NxEmDefaultPlotResolver() nxplt.priority_select(template) diff --git a/src/pynxtools_em/utils/io_case_logic.py b/src/pynxtools_em/utils/io_case_logic.py index 67cc0ed..96e2d04 100644 --- a/src/pynxtools_em/utils/io_case_logic.py +++ b/src/pynxtools_em/utils/io_case_logic.py @@ -33,6 +33,7 @@ ".h5oina", ".mtex.h5", ".dream3d", + ".txt", ] # ".dm3", ".dm4"] From c704ab0d0c8ace3f4ddbd48945d5837680c3ec00 Mon Sep 17 00:00:00 2001 From: mkuehbach Date: Wed, 7 Aug 2024 18:07:40 +0200 Subject: [PATCH 4/6] Filename fix --- .../{image_tiff_jeol_cfg copy.py => image_tiff_jeol_cfg.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/pynxtools_em/configurations/{image_tiff_jeol_cfg copy.py => image_tiff_jeol_cfg.py} (100%) diff --git a/src/pynxtools_em/configurations/image_tiff_jeol_cfg copy.py b/src/pynxtools_em/configurations/image_tiff_jeol_cfg.py similarity index 100% rename from src/pynxtools_em/configurations/image_tiff_jeol_cfg copy.py rename to src/pynxtools_em/configurations/image_tiff_jeol_cfg.py From bcec9ea118d3b0418238ff9e3fe12cacd651b4eb Mon Sep 17 00:00:00 2001 From: mkuehbach Date: Wed, 7 Aug 2024 18:25:22 +0200 Subject: [PATCH 5/6] Add a few metadata items, working --- .../configurations/image_tiff_jeol_cfg.py | 22 +++++++++++----- src/pynxtools_em/parsers/image_tiff_jeol.py | 26 ++++++++++++++----- 2 files changed, 35 insertions(+), 13 deletions(-) diff --git a/src/pynxtools_em/configurations/image_tiff_jeol_cfg.py b/src/pynxtools_em/configurations/image_tiff_jeol_cfg.py index 12ef7f5..df6c584 100644 --- a/src/pynxtools_em/configurations/image_tiff_jeol_cfg.py +++ b/src/pynxtools_em/configurations/image_tiff_jeol_cfg.py @@ -26,18 +26,28 @@ "prefix_trg": "/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]", "prefix_src": "", "map_to_f8": [ - ("em_lab/OPTICAL_SYSTEM_EM[optical_system_em]/magnification", "Mag"), + ("em_lab/OPTICAL_SYSTEM_EM[optical_system_em]/magnification", "CM_MAG"), ( "em_lab/OPTICAL_SYSTEM_EM[optical_system_em]/working_distance", ureg.centimeter, - "WD/value", - "WD/Unit", + "SM_WD", + ureg.millimeter, ), ( "em_lab/EBEAM_COLUMN[ebeam_column]/electron_source/voltage", - ureg.picovolt, - "HV/value", - "HV/Unit", + ureg.volt, + "CM_ACCEL_VOLTAGE", + ureg.kilovolt, ), ], } + + +JEOL_VARIOUS_STATIC_TO_NX_EM = { + "prefix_trg": "/ENTRY[entry*]/measurement/em_lab", + "prefix_src": "", + "use": [("FABRICATION[fabrication]/vendor", "JEOL")], + "map": [ + ("FABRICATION[fabrication]/model", "CM_INSTRUMENT"), + ], +} diff --git a/src/pynxtools_em/parsers/image_tiff_jeol.py b/src/pynxtools_em/parsers/image_tiff_jeol.py index 289e386..4987cc1 100644 --- a/src/pynxtools_em/parsers/image_tiff_jeol.py +++ b/src/pynxtools_em/parsers/image_tiff_jeol.py @@ -26,8 +26,10 @@ from PIL import Image, ImageSequence from pint import UnitRegistry from pynxtools_em.concepts.mapping_functors_pint import add_specific_metadata_pint - -# from pynxtools_em.configurations.image_tiff_jeol_cfg import JEOL_VARIOUS_DYNAMIC_TO_NX_EM +from pynxtools_em.configurations.image_tiff_jeol_cfg import ( + JEOL_VARIOUS_DYNAMIC_TO_NX_EM, + JEOL_VARIOUS_STATIC_TO_NX_EM, +) from pynxtools_em.parsers.image_tiff import TiffParser from pynxtools_em.utils.string_conversions import string_to_number @@ -111,7 +113,9 @@ def check_if_tiff_jeol(self): # and regular numpy / pure Python types, the mapping functor should # take care of resolving the cases properly if tmp[0] != "SM_MICRON_MARKER": - self.tmp["flat_dict_meta"][tmp[0]] = tmp[1] + self.tmp["flat_dict_meta"][tmp[0]] = string_to_number( + tmp[1] + ) else: self.tmp["flat_dict_meta"][tmp[0]] = pint.Quantity(tmp[1]) else: @@ -124,7 +128,7 @@ def check_if_tiff_jeol(self): print(f"{key}______{type(value)}____{value}") if ( - self.tmp["flat_dict_meta"]["SEM_DATA_VERSION"] == "1" + self.tmp["flat_dict_meta"]["SEM_DATA_VERSION"] == 1 and self.tmp["flat_dict_meta"]["CM_LABEL"] == "JEOL" ): self.supported = True @@ -223,8 +227,6 @@ def process_event_data_em_data(self, template: dict) -> dict: return template def add_various_dynamic(self, template: dict) -> dict: - pass - """ identifier = [self.entry_id, self.event_id, 1] add_specific_metadata_pint( JEOL_VARIOUS_DYNAMIC_TO_NX_EM, @@ -232,7 +234,16 @@ def add_various_dynamic(self, template: dict) -> dict: identifier, template, ) - """ + return template + + def add_various_static(self, template: dict) -> dict: + identifier = [self.entry_id, self.event_id, 1] + add_specific_metadata_pint( + JEOL_VARIOUS_STATIC_TO_NX_EM, + self.tmp["flat_dict_meta"], + identifier, + template, + ) return template def process_event_data_em_metadata(self, template: dict) -> dict: @@ -240,5 +251,6 @@ def process_event_data_em_metadata(self, template: dict) -> dict: # contextualization to understand how the image relates to the EM session print(f"Mapping some of JEOL metadata on respective NeXus concepts...") self.add_various_dynamic(template) + self.add_various_static(template) # ... add more as required ... return template From 9018afe68f549aa6618e84d591a0ca06424eaaf5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20K=C3=BChbach?= Date: Mon, 26 Aug 2024 14:14:32 +0200 Subject: [PATCH 6/6] Parser for TESCAN TIFF, fixes #26 (#45) * Initial implementation of working TESCAN parser * Parser for Zeiss TIFF parser, fixes #31 (#46) * Initial version of the implementation for the Zeiss parser, activation of TESCAN metadata, testing remains * Fixed bug that more than one ureg was used across different parsers, fixed bug that mapping to a target quantity was ignored for case_three_str * Prepare nionswift * Parser for Nion Co. nd binary and HDF5 files to NeXus, fixes #27 (#53) * Implementation generic plotting and initial metadata mapping table * Added interfacing to data and mdata processing functions, initial plots working, need to update pynxtools upstream, need to test metadata * Suggestions from Lukas * Minor linting * Tested plot generation functional including default plots for multi-signal example from B. Haas and pytchography dataset from Anton, Sherjeel et al. * Initial test of writing metadata, several already written but three issues i) some metadata not in expected places due to unclear strategy when certain information end up where in nion metadata, ii) some concepts like corrector_cs not resolved and thus not autoannotated and thus will likely create problems in nomad, iii) only hotfixing currently the case that information written to template is not checked for whether it exists already and thus also logical situations like I know already what this detector is can not be handled automatically but only by overwriting * Further fixes for Nion metadata parsing * Reconfigured location of aberrations * Fixed variable typo * Fixing several so far incorrectly resolved metadata concepts * Fixed except for one dwell_time for event 1,3,4,12 for multisignal B. Haas datasets all issues * Parser for Gatan DigitalMicrograph DM3/DM4, fixes #12 (#54) * Starting code for DigitalMicrograph * Test and lint with several python versions, fixes #41 * Fix whitespace * Working initial version of generic plotting for Gatan with default plot resolving, next steps, test all cases, activate metadata * Working initial working of a Gatan parser, metadata are mapped but substantial discussion with the community is required * Parser for Hitachi TIFF, fixes #44 (#55) * Reorganized locations of tfs concepts * Working Hitachi parser * Enabling HDR sidecar metadata file for TESCAN (#56) * Preparing for TESCAN sidecar file, next steps: i) complete this, then cleaning, and final tests * Fixed incorrect routing Zeiss, mypy issue still remains * Fixed mypy, possibly a mypy bug, just switching order changed it? * Rm unused variables (#57) * First round of unnecessary variables removed, first round of unnecessary complicated variable names simplified * Equalized all call lines for individual parsers, removed deprecated term subparser * Further reduction of code bloat from unnecessary small functions but called with the similar configuration * Refactoring, suggestions from lukaspiel * Preparing docs update (#59) * Preparing docs update * Tested automated mapping of as a suggestion how to implement #321 of www.github.com/FAIRmat-NFDI/pynxtools * conventions parser using v3 and tested * eln and oasis parser using v3 and tested * Minor formatting and adding of conventions and eln/oasis cfg parser in docs * axon parser using v3 and tested * gatan parser using v3 and tested * hitachi parser using v3 and tested * jeol parser using v3 and tested * nion parser using v3 and tested * point electronic parser using v3 and tested * tescan parser using v3 and tested * zeiss parser using v3 and tested but some more concepts should be mapped * tfs/fei tiff parser using v3, testing remains * Some changes on the Velox parser but not complete (refactoring for images and testing) remains * Refactored naming of mapping tables to aid automated lexical sorting during linting * Fix bug with case_three_list handle when concatenating a sequence of pint quantities with units to an array pint quantity for a scaled that same multiplicative unit * tfs parser using v3 and tested * velox refactored and tested, mapping table names shortened, but further testing for velox is required * apex parser using v3 and tested * Suggestions from @lukaspiel for all em PRs in this chain (#61) * Suggestions from @lukaspiel for #60 * Reviewer comments @lukaspiel #57 * Review comments @lukaspiel #55 * Remaining changes of @lukaspiel * Nion kV to V --------- Co-authored-by: mkuehbach * Further testing and modernization to match NXem v3, fixes #28 (#60) * conventions parser using v3 and tested * eln and oasis parser using v3 and tested * Minor formatting and adding of conventions and eln/oasis cfg parser in docs * axon parser using v3 and tested * gatan parser using v3 and tested * hitachi parser using v3 and tested * jeol parser using v3 and tested * nion parser using v3 and tested * point electronic parser using v3 and tested * tescan parser using v3 and tested * zeiss parser using v3 and tested but some more concepts should be mapped * tfs/fei tiff parser using v3, testing remains * Some changes on the Velox parser but not complete (refactoring for images and testing) remains * Refactored naming of mapping tables to aid automated lexical sorting during linting * Fix bug with case_three_list handle when concatenating a sequence of pint quantities with units to an array pint quantity for a scaled that same multiplicative unit * tfs parser using v3 and tested * velox refactored and tested, mapping table names shortened, but further testing for velox is required * apex parser using v3 and tested * Suggestions from @lukaspiel for all em PRs in this chain (#61) * Suggestions from @lukaspiel for #60 * Reviewer comments @lukaspiel #57 * Review comments @lukaspiel #55 * Remaining changes of @lukaspiel * Nion kV to V --------- Co-authored-by: mkuehbach --------- Co-authored-by: mkuehbach --------- Co-authored-by: mkuehbach --------- Co-authored-by: mkuehbach --------- Co-authored-by: mkuehbach --------- Co-authored-by: mkuehbach --------- Co-authored-by: mkuehbach --------- Co-authored-by: mkuehbach --------- Co-authored-by: mkuehbach --------- Co-authored-by: mkuehbach --- .github/workflows/pylint.yml | 21 +- .vscode/launch.json | 46 +- dev-requirements.txt | 2 +- dev/test_vars.ipynb | 127 +++ docs/how-tos/kikuchi.md | 8 +- docs/index.md | 20 +- docs/reference/conventions.md | 3 + docs/reference/eln_and_cfg.md | 3 + docs/reference/gatan.md | 9 + docs/reference/nion.md | 328 +++++++- docs/reference/{png.md => png_axon.md} | 2 +- docs/reference/tiff_hitachi.md | 12 + docs/reference/{tiff.md => tiff_jeol.md} | 3 +- docs/reference/tiff_point.md | 8 + docs/reference/tiff_tescan.md | 22 + docs/reference/tiff_tfs.md | 12 + docs/reference/tiff_zeiss.md | 12 + docs/reference/velox.md | 2 +- mkdocs.yaml | 17 +- pyproject.toml | 2 +- .../concepts/mapping_functors_pint.py | 425 +++++----- src/pynxtools_em/concepts/nxs_concepts.py | 5 +- .../concepts/nxs_em_eds_indexing.py | 23 +- src/pynxtools_em/concepts/nxs_image_set.py | 73 +- src/pynxtools_em/concepts/nxs_object.py | 37 +- src/pynxtools_em/concepts/nxs_spectrum_set.py | 73 +- src/pynxtools_em/configurations/README.md | 10 +- .../configurations/conventions_cfg.py | 16 +- src/pynxtools_em/configurations/eln_cfg.py | 58 +- .../image_png_protochips_cfg.py | 62 +- .../configurations/image_tiff_hitachi_cfg.py | 62 ++ .../configurations/image_tiff_jeol_cfg.py | 13 +- .../image_tiff_point_electronic_cfg.py | 13 +- .../configurations/image_tiff_tescan_cfg.py | 95 +++ .../configurations/image_tiff_tfs_cfg.py | 398 ++++++++-- .../configurations/image_tiff_zeiss_cfg.py | 60 ++ src/pynxtools_em/configurations/nion_cfg.py | 401 ++++++++++ src/pynxtools_em/configurations/oasis_cfg.py | 10 +- .../configurations/rsciio_gatan_cfg.py | 110 +++ .../configurations/rsciio_velox_cfg.py | 100 +-- ...ention_reader.py => conventions_reader.py} | 49 +- src/pynxtools_em/parsers/hfive_apex.py | 72 +- src/pynxtools_em/parsers/image_base.py | 14 +- .../parsers/image_png_protochips.py | 233 +++--- src/pynxtools_em/parsers/image_tiff.py | 12 +- .../parsers/image_tiff_hitachi.py | 210 +++++ src/pynxtools_em/parsers/image_tiff_jeol.py | 104 ++- .../parsers/image_tiff_point_electronic.py | 94 +-- src/pynxtools_em/parsers/image_tiff_tescan.py | 253 ++++++ src/pynxtools_em/parsers/image_tiff_tfs.py | 314 +++----- .../parsers/image_tiff_tfs_concepts.py | 299 ------- src/pynxtools_em/parsers/image_tiff_zeiss.py | 252 ++++++ src/pynxtools_em/parsers/nxs_imgs.py | 50 +- src/pynxtools_em/parsers/nxs_mtex.py | 6 +- src/pynxtools_em/parsers/nxs_nion.py | 347 ++++++--- src/pynxtools_em/parsers/nxs_pyxem.py | 66 +- .../parsers/oasis_config_reader.py | 64 +- src/pynxtools_em/parsers/oasis_eln_reader.py | 50 +- src/pynxtools_em/parsers/rsciio_base.py | 5 +- src/pynxtools_em/parsers/rsciio_bruker.py | 33 +- src/pynxtools_em/parsers/rsciio_gatan.py | 270 +++++++ src/pynxtools_em/parsers/rsciio_velox.py | 733 +++++------------- src/pynxtools_em/reader.py | 64 +- src/pynxtools_em/utils/gatan_utils.py | 72 ++ src/pynxtools_em/utils/interpret_boolean.py | 22 +- src/pynxtools_em/utils/io_case_logic.py | 7 +- src/pynxtools_em/utils/nion_utils.py | 24 +- src/pynxtools_em/utils/numerics.py | 21 + src/pynxtools_em/utils/nx_default_plots.py | 4 +- .../utils/pint_custom_unit_registry.py | 69 ++ src/pynxtools_em/utils/rsciio_hspy_utils.py | 10 + src/pynxtools_em/utils/tfs_utils.py | 32 + src/pynxtools_em/utils/velox_utils.py | 72 ++ 73 files changed, 4390 insertions(+), 2240 deletions(-) create mode 100644 dev/test_vars.ipynb create mode 100644 docs/reference/conventions.md create mode 100644 docs/reference/eln_and_cfg.md create mode 100644 docs/reference/gatan.md rename docs/reference/{png.md => png_axon.md} (89%) create mode 100644 docs/reference/tiff_hitachi.md rename docs/reference/{tiff.md => tiff_jeol.md} (91%) create mode 100644 docs/reference/tiff_point.md create mode 100644 docs/reference/tiff_tescan.md create mode 100644 docs/reference/tiff_tfs.md create mode 100644 docs/reference/tiff_zeiss.md create mode 100644 src/pynxtools_em/configurations/image_tiff_hitachi_cfg.py create mode 100644 src/pynxtools_em/configurations/image_tiff_tescan_cfg.py create mode 100644 src/pynxtools_em/configurations/image_tiff_zeiss_cfg.py create mode 100644 src/pynxtools_em/configurations/nion_cfg.py create mode 100644 src/pynxtools_em/configurations/rsciio_gatan_cfg.py rename src/pynxtools_em/parsers/{convention_reader.py => conventions_reader.py} (74%) create mode 100644 src/pynxtools_em/parsers/image_tiff_hitachi.py create mode 100644 src/pynxtools_em/parsers/image_tiff_tescan.py delete mode 100644 src/pynxtools_em/parsers/image_tiff_tfs_concepts.py create mode 100644 src/pynxtools_em/parsers/image_tiff_zeiss.py create mode 100644 src/pynxtools_em/parsers/rsciio_gatan.py create mode 100644 src/pynxtools_em/utils/gatan_utils.py create mode 100644 src/pynxtools_em/utils/numerics.py create mode 100644 src/pynxtools_em/utils/pint_custom_unit_registry.py create mode 100644 src/pynxtools_em/utils/tfs_utils.py create mode 100644 src/pynxtools_em/utils/velox_utils.py diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml index 9a33ac3..c227195 100644 --- a/.github/workflows/pylint.yml +++ b/.github/workflows/pylint.yml @@ -7,23 +7,24 @@ env: jobs: linting: runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python_version: ["3.8", "3.9", "3.10", "3.11", "3.12"] + steps: - uses: actions/checkout@v4 - - name: Set up Python 3.10 + with: + fetch-depth: 0 + submodules: recursive + - name: Set up Python ${{ matrix.python_version }} uses: actions/setup-python@v5 with: - python-version: "3.10" + python-version: ${{ matrix.python_version }} - name: Install dependencies run: | - git submodule sync --recursive - git submodule update --init --recursive --jobs=4 curl -LsSf https://astral.sh/uv/install.sh | sh - - name: Install package - run: | - uv pip install --no-deps . - - name: Install dev requirements - run: | - uv pip install -r dev-requirements.txt + uv pip install ".[dev,docs]" - name: ruff check run: | ruff check src/pynxtools_em tests diff --git a/.vscode/launch.json b/.vscode/launch.json index 43831d0..0341c9a 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -11,15 +11,51 @@ "cwd": "${workspaceFolder}", "program": "../.py3.12.4/bin/dataconverter", "args": ["convert", - // "examples/eln_data.yaml", - // "examples/em.oasis.specific.yaml", - "../ebic_dm3_goette/documents-export-2024-06-06/SEM/20240227_A1_2m_0_FA3_1.txt", - "../ebic_dm3_goette/documents-export-2024-06-06/SEM/20240227_A1_2m_0_FA3_1.tif", + // eln_data/oasis_specific + //"examples/em.conventions.yaml", + //"examples/eln_data.yaml", + //"examples/em.oasis.specific.yaml", + //"../axon/ReductionOfFeOxSmall.zip", + //"../digitalmicrograph/218.ebdf722abcf63000e2fa71fc6a72cd4b4747991c702a2dc65e400196f990cbad.dm3", + //"../digitalmicrograph/241.a2c338fd458e6b7023ec946a5e3ce8c85bd2befcb5d17dae7ae5f44b2dede81b.dm4", + //"../hitachi/360.tif", + //"../hitachi/360.txt", + //"../hitachi/TemHitachiImageScExample.txt", + //"../jeol/20240227_A1_2m_0_FA3_1.tif", + //"../jeol/20240227_A1_2m_0_FA3_1.txt", + //"../nion-parsing/2022-02-18_Metadata_Kuehbach.zip", + //"../point/Defekt1.tif", + //"../tescan/Ti3C2VacDriedFilm19.tif", + //"../tescan/Ti3C2VacDriedFilm19.hdr", + //"../tescan/CZ04-2_102_Pic_2.tif", + //"zeiss/SE2.tif", + //"tfs/0c8nA_3deg_003_AplusB_test.tif", + //"tfs/ALN_baoh_021.tif", + //"tfs/ETD_image.tif", + //"tfs/NavCam_normal_vis_light_ccd.tif", + //"tfs/T3_image.tif", + //"velox/CG71113 1138 Ceta 660 mm Camera.emd", + //"../apex/InGaN_nanowires_linescan.edaxh5", + //"../apex/InGaN_nanowires_map.edaxh5", + //"../apex/InGaN_nanowires_spectra.edaxh5", + //"../apex/2023-08-16_Ni_NFDI.edaxh5", "--reader", "em", "--nxdl", "NXem", - "--output=dbg/dbg.nxs"], + //"--output=dbg/conv.nxs", + //"--output=dbg/eln.nxs", + //"--output=dbg/axon.nxs", + //"--output=dbg/gatan_dm3.nxs", + //"--output=dbg/gatan_dm4.nxs", + //"--output=dbg/hitachi.nxs", + //"--output=dbg/jeol.nxs", + //"--output=dbg/nion.nxs", + //"--output=dbg/ebic.nxs", + //"--output=dbg/tescan.nxs", + //"--output=pynxtools_em/dbg/zeiss.nxs", + //"--output=pynxtools_em/dbg/tfs.nxs", + //"--output=dbg/apex.nxs"], } ] } \ No newline at end of file diff --git a/dev-requirements.txt b/dev-requirements.txt index f284b9b..d4babf4 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -564,7 +564,7 @@ pymdown-extensions==10.9 # via # pynxtools-em (pyproject.toml) # mkdocs-material -pynxtools @ git+https://github.com/FAIRmat-NFDI/pynxtools@622c580c75f9bcc4b5c2de57884f389864614818 +pynxtools @ git+https://github.com/FAIRmat-NFDI/pynxtools@9c46cc99a86736dcba137ed2ed9b19078372f567 # via pynxtools-em (pyproject.toml) pyparsing==3.1.2 # via matplotlib diff --git a/dev/test_vars.ipynb b/dev/test_vars.ipynb new file mode 100644 index 0000000..3fc1190 --- /dev/null +++ b/dev/test_vars.ipynb @@ -0,0 +1,127 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 12, + "id": "815e2bd0-2ff4-424c-9512-a5cc60bf320a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "| Concept | NeXus/HDF5 |\n", + "| --------------- | -------------- |\n", + "| Device | :heavy_check_mark: |\n", + "| EmissionCurrent | :heavy_check_mark: |\n", + "| HV | :heavy_check_mark: |\n", + "| Magnification | :heavy_check_mark: |\n", + "| PredictedBeamCurrent | :heavy_check_mark: |\n", + "| SerialNumber | :heavy_check_mark: |\n", + "| SpecimenCurrent | :heavy_check_mark: |\n", + "| SpotSize | :heavy_check_mark: |\n", + "| StageRotation | :heavy_check_mark: |\n", + "| StageTilt | :heavy_check_mark: |\n", + "| StageX | :heavy_check_mark: |\n", + "| StageY | :heavy_check_mark: |\n", + "| StageZ | :heavy_check_mark: |\n", + "| StigmatorX | :heavy_check_mark: |\n", + "| StigmatorY | :heavy_check_mark: |\n", + "| WD | :heavy_check_mark: |\n" + ] + } + ], + "source": [ + "import pynxtools_em.configurations.image_tiff_tescan_cfg as tescan_cfg\n", + "import pynxtools_em.configurations.nion_cfg as nion_cfg\n", + "from pynxtools_em.concepts.mapping_functors_pint import get_case\n", + "thisone = {\"NION\": nion_cfg,\n", + " \"TESCAN\": tescan_cfg}\n", + "\n", + "tech_partner = \"TESCAN\"\n", + "a = vars(thisone[tech_partner])\n", + "src_concepts = set()\n", + "verbose = False\n", + "for key, obj in a.items():\n", + " if key.startswith(tech_partner):\n", + " if verbose:\n", + " print(key)\n", + " prefix_src = []\n", + " concepts = []\n", + " for k, v in obj.items():\n", + " if not k.startswith(\"prefix\"):\n", + " if k != \"use\":\n", + " if isinstance(v, list):\n", + " for entry in v:\n", + " case = get_case(entry)\n", + " if verbose:\n", + " print(f\"{entry}\")\n", + " print(f\">>>>{get_case(entry)}\")\n", + " if case == \"case_one\":\n", + " concepts.append(entry)\n", + " if case == \"case_two_str\": \n", + " concepts.append(entry[1])\n", + " elif case == \"case_five_str\":\n", + " concepts.append(entry[2])\n", + " # TODO::add remaining cases\n", + " elif case == \"case_five_list\":\n", + " for val in entry[2]:\n", + " concepts.append(val)\n", + " elif k == \"prefix_src\":\n", + " if isinstance(v, str):\n", + " prefix_src.append(v)\n", + " elif isinstance(v, list):\n", + " for val in v:\n", + " prefix_src.append(val)\n", + " for prefix in prefix_src:\n", + " for concept in concepts:\n", + " src_concepts.add(f\"{prefix}{concept}\")\n", + "\n", + "print(\"| Concept | NeXus/HDF5 |\")\n", + "print(\"| --------------- | -------------- |\")\n", + "for src_concept in sorted(src_concepts):\n", + " print(f\"| {src_concept} | :heavy_check_mark: |\")\n", + " # print(obj)\n", + " # get case\n", + " # fish all src concepts to build sorted list of {prefix_src}/src for all {prefix_src}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b753eec4-0bea-4e39-80c6-520f5e00bd9f", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e0962859-5aef-49d8-8832-f9fb7a30921e", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/how-tos/kikuchi.md b/docs/how-tos/kikuchi.md index f0fa853..fc42168 100644 --- a/docs/how-tos/kikuchi.md +++ b/docs/how-tos/kikuchi.md @@ -5,7 +5,7 @@ The following diagram shows a comprehensive example how diverse datasets from Ki -[Further details to the parsing route via MTex](mtex.md) -[Further details to the parsing route via pyxem](pyxem.md) -[Further details to the NXem_ebsd base class](https://fairmat-nfdi.github.io/nexus_definitions/classes/contributed_definitions/NXem_ebsd.html#nxem-ebsd) -[Further details to the NXem application definition](https://fairmat-nfdi.github.io/nexus_definitions/classes/contributed_definitions/NXem.html#nxem) +- [Further details to the parsing route via MTex](mtex.md) +- [Further details to the parsing route via pyxem](pyxem.md) +- [Further details to the NXem_ebsd base class](https://fairmat-nfdi.github.io/nexus_definitions/classes/contributed_definitions/NXem_ebsd.html#nxem-ebsd) +- [Further details to the NXem application definition](https://fairmat-nfdi.github.io/nexus_definitions/classes/contributed_definitions/NXem.html#nxem) diff --git a/docs/index.md b/docs/index.md index 5615d42..08eb071 100644 --- a/docs/index.md +++ b/docs/index.md @@ -55,11 +55,23 @@ Here you can learn which specific pieces of information and concepts pynxtools-e for the respective file formats of technology partners of the electron microscopy community. - [How to map pieces of information to NeXus](reference/contextualization.md) -- [Tagged Image File Format (TIFF)](reference/tiff.md) -- [Portable Network Graphics (PNG)](reference/png.md) -- [Velox EMD](reference/velox.md) + +- [Conventions collected with a text file or ELN](reference/conventions.md) +- [Metadata collected with an ELN and RDM-specific configurations](reference/eln_and_cfg.md) + +- [AXON Protochips Portable Network Graphics PNG](reference/png_axon.md) - [EDAX APEX](reference/apex.md) -- [Nion Co. projects](reference/nion.md) +- [Gatan DigitalMicrograph DM3/DM4](reference/gatan.md) +- [Hitachi Tagged Image File Format TIFF](reference/tiff_hitachi.md) +- [JEOL Tagged Image File Format TIFF](reference/tiff_jeol.md) +- [Nion Co. projects with NDATA and HDF5 files](reference/nion.md) +- [Point Electronic DISS Tagged Image File Format TIFF](reference/tiff_point.md) +- [TESCAN Tagged Image File Format TIFF](reference/tiff_tescan.md) +- [ThermoFisher Tagged Image File Format TIFF](reference/tiff_tfs.md) +- [ThermoFisher Velox EMD](reference/velox.md) +- [Zeiss Tagged Image File Format TIFF](reference/tiff_zeiss.md) +- [EBSD-centric content for the parsing route via MTex](how-tos/mtex.md) +- [EBSD-centric content for the parsing route via pyxem](how-tos/pyxem.md) diff --git a/docs/reference/conventions.md b/docs/reference/conventions.md new file mode 100644 index 0000000..49a930e --- /dev/null +++ b/docs/reference/conventions.md @@ -0,0 +1,3 @@ +# Conventions collected with a text file or ELN + + diff --git a/docs/reference/eln_and_cfg.md b/docs/reference/eln_and_cfg.md new file mode 100644 index 0000000..6568985 --- /dev/null +++ b/docs/reference/eln_and_cfg.md @@ -0,0 +1,3 @@ +# Metadata collected with an ELN and RDM-specific configurations + + diff --git a/docs/reference/gatan.md b/docs/reference/gatan.md new file mode 100644 index 0000000..b33853b --- /dev/null +++ b/docs/reference/gatan.md @@ -0,0 +1,9 @@ +# Gatan DigitalMicrograph DM3/DM4 + +The pynxtools-em parser and normalizer reads the following content and maps them on respective NeXus concepts that are defined in the NXem application definition: + + + diff --git a/docs/reference/nion.md b/docs/reference/nion.md index 685cdbc..cc853f1 100644 --- a/docs/reference/nion.md +++ b/docs/reference/nion.md @@ -1,3 +1,329 @@ -# Nion Co. projects +# Nion Co. projects with NDATA and HDF5 files The pynxtools-em parser and normalizer reads the following content and maps them on respective NeXus concepts that are defined in the NXem application definition: + +| Concept | NeXus/HDF5 | +| --------------- | -------------- | +| metadata/hardware_source/ImageRonchigram/C1 ConstW | :heavy_check_mark: | +| metadata/hardware_source/ImageRonchigram/C10 | :heavy_check_mark: | +| metadata/hardware_source/ImageRonchigram/C12.a | :heavy_check_mark: | +| metadata/hardware_source/ImageRonchigram/C12.b | :heavy_check_mark: | +| metadata/hardware_source/ImageRonchigram/C2 ConstW | :heavy_check_mark: | +| metadata/hardware_source/ImageRonchigram/C21.a | :heavy_check_mark: | +| metadata/hardware_source/ImageRonchigram/C21.b | :heavy_check_mark: | +| metadata/hardware_source/ImageRonchigram/C23.a | :heavy_check_mark: | +| metadata/hardware_source/ImageRonchigram/C23.b | :heavy_check_mark: | +| metadata/hardware_source/ImageRonchigram/C3 ConstW | :heavy_check_mark: | +| metadata/hardware_source/ImageRonchigram/C30 | :heavy_check_mark: | +| metadata/hardware_source/ImageRonchigram/C32.a | :heavy_check_mark: | +| metadata/hardware_source/ImageRonchigram/C32.b | :heavy_check_mark: | +| metadata/hardware_source/ImageRonchigram/C34.a | :heavy_check_mark: | +| metadata/hardware_source/ImageRonchigram/C34.b | :heavy_check_mark: | +| metadata/hardware_source/ImageRonchigram/C50 | :heavy_check_mark: | +| metadata/hardware_source/ImageRonchigram/EHT | :heavy_check_mark: | +| metadata/hardware_source/ImageRonchigram/GeometricProbeSize | :heavy_check_mark: | +| metadata/hardware_source/ImageRonchigram/MajorOL | :heavy_check_mark: | +| metadata/hardware_source/ImageRonchigram/StageOutA | :heavy_check_mark: | +| metadata/hardware_source/ImageRonchigram/StageOutB | :heavy_check_mark: | +| metadata/hardware_source/ImageRonchigram/StageOutX | :heavy_check_mark: | +| metadata/hardware_source/ImageRonchigram/StageOutY | :heavy_check_mark: | +| metadata/hardware_source/ImageRonchigram/StageOutZ | :heavy_check_mark: | +| metadata/hardware_source/ImageRonchigram/SuperFEG.^EmissionCurrent | :heavy_check_mark: | +| metadata/hardware_source/ImageRonchigram/fov_nm | :heavy_check_mark: | +| metadata/hardware_source/ImageRonchigram/probe_ha | :heavy_check_mark: | +| metadata/hardware_source/ac_frame_sync | :heavy_check_mark: | +| metadata/hardware_source/ac_line_sync | :heavy_check_mark: | +| metadata/hardware_source/autostem/ImageRonchigram/C1 ConstW | :heavy_check_mark: | +| metadata/hardware_source/autostem/ImageRonchigram/C2 ConstW | :heavy_check_mark: | +| metadata/hardware_source/autostem/ImageRonchigram/C3 ConstW | :heavy_check_mark: | +| metadata/hardware_source/autostem/ImageRonchigram/EHT | :heavy_check_mark: | +| metadata/hardware_source/autostem/ImageRonchigram/GeometricProbeSize | :heavy_check_mark: | +| metadata/hardware_source/autostem/ImageRonchigram/MajorOL | :heavy_check_mark: | +| metadata/hardware_source/autostem/ImageRonchigram/StageOutA | :heavy_check_mark: | +| metadata/hardware_source/autostem/ImageRonchigram/StageOutB | :heavy_check_mark: | +| metadata/hardware_source/autostem/ImageRonchigram/StageOutX | :heavy_check_mark: | +| metadata/hardware_source/autostem/ImageRonchigram/StageOutY | :heavy_check_mark: | +| metadata/hardware_source/autostem/ImageRonchigram/StageOutZ | :heavy_check_mark: | +| metadata/hardware_source/autostem/ImageRonchigram/SuperFEG.^EmissionCurrent | :heavy_check_mark: | +| metadata/hardware_source/autostem/ImageRonchigram/fov_nm | :heavy_check_mark: | +| metadata/hardware_source/autostem/ImageRonchigram/probe_ha | :heavy_check_mark: | +| metadata/hardware_source/autostem/ImageScanned/C1 ConstW | :heavy_check_mark: | +| metadata/hardware_source/autostem/ImageScanned/C10 | :heavy_check_mark: | +| metadata/hardware_source/autostem/ImageScanned/C12.a | :heavy_check_mark: | +| metadata/hardware_source/autostem/ImageScanned/C12.b | :heavy_check_mark: | +| metadata/hardware_source/autostem/ImageScanned/C2 ConstW | :heavy_check_mark: | +| metadata/hardware_source/autostem/ImageScanned/C21.a | :heavy_check_mark: | +| metadata/hardware_source/autostem/ImageScanned/C21.b | :heavy_check_mark: | +| metadata/hardware_source/autostem/ImageScanned/C23.a | :heavy_check_mark: | +| metadata/hardware_source/autostem/ImageScanned/C23.b | :heavy_check_mark: | +| metadata/hardware_source/autostem/ImageScanned/C3 ConstW | :heavy_check_mark: | +| metadata/hardware_source/autostem/ImageScanned/C30 | :heavy_check_mark: | +| metadata/hardware_source/autostem/ImageScanned/C32.a | :heavy_check_mark: | +| metadata/hardware_source/autostem/ImageScanned/C32.b | :heavy_check_mark: | +| metadata/hardware_source/autostem/ImageScanned/C34.a | :heavy_check_mark: | +| metadata/hardware_source/autostem/ImageScanned/C34.b | :heavy_check_mark: | +| metadata/hardware_source/autostem/ImageScanned/C50 | :heavy_check_mark: | +| metadata/hardware_source/autostem/ImageScanned/EHT | :heavy_check_mark: | +| metadata/hardware_source/autostem/ImageScanned/GeometricProbeSize | :heavy_check_mark: | +| metadata/hardware_source/autostem/ImageScanned/MajorOL | :heavy_check_mark: | +| metadata/hardware_source/autostem/ImageScanned/StageOutA | :heavy_check_mark: | +| metadata/hardware_source/autostem/ImageScanned/StageOutB | :heavy_check_mark: | +| metadata/hardware_source/autostem/ImageScanned/StageOutX | :heavy_check_mark: | +| metadata/hardware_source/autostem/ImageScanned/StageOutY | :heavy_check_mark: | +| metadata/hardware_source/autostem/ImageScanned/StageOutZ | :heavy_check_mark: | +| metadata/hardware_source/autostem/ImageScanned/SuperFEG.^EmissionCurrent | :heavy_check_mark: | +| metadata/hardware_source/autostem/ImageScanned/fov_nm | :heavy_check_mark: | +| metadata/hardware_source/autostem/ImageScanned/probe_ha | :heavy_check_mark: | +| metadata/hardware_source/calibration_style | :heavy_check_mark: | +| metadata/hardware_source/center_x_nm | :heavy_check_mark: | +| metadata/hardware_source/center_y_nm | :heavy_check_mark: | +| metadata/hardware_source/channel_modifier | :heavy_check_mark: | +| metadata/hardware_source/detector_configuration/beam_center_x | :heavy_check_mark: | +| metadata/hardware_source/detector_configuration/beam_center_y | :heavy_check_mark: | +| metadata/hardware_source/detector_configuration/bit_depth_image | :heavy_check_mark: | +| metadata/hardware_source/detector_configuration/bit_depth_readout | :heavy_check_mark: | +| metadata/hardware_source/detector_configuration/count_time | :heavy_check_mark: | +| metadata/hardware_source/detector_configuration/countrate_correction_applied | :heavy_check_mark: | +| metadata/hardware_source/detector_configuration/data_collection_date | :heavy_check_mark: | +| metadata/hardware_source/detector_configuration/description | :heavy_check_mark: | +| metadata/hardware_source/detector_configuration/detector_number | :heavy_check_mark: | +| metadata/hardware_source/detector_configuration/detector_readout_time | :heavy_check_mark: | +| metadata/hardware_source/detector_configuration/eiger_fw_version | :heavy_check_mark: | +| metadata/hardware_source/detector_configuration/flatfield_correction_applied | :heavy_check_mark: | +| metadata/hardware_source/detector_configuration/frame_time | :heavy_check_mark: | +| metadata/hardware_source/detector_configuration/pixel_mask_applied | :heavy_check_mark: | +| metadata/hardware_source/detector_configuration/sensor_material | :heavy_check_mark: | +| metadata/hardware_source/detector_configuration/sensor_thickness | :heavy_check_mark: | +| metadata/hardware_source/detector_configuration/software_version | :heavy_check_mark: | +| metadata/hardware_source/detector_configuration/threshold_energy | :heavy_check_mark: | +| metadata/hardware_source/detector_configuration/x_pixel_size | :heavy_check_mark: | +| metadata/hardware_source/detector_configuration/x_pixels_in_detector | :heavy_check_mark: | +| metadata/hardware_source/detector_configuration/y_pixel_size | :heavy_check_mark: | +| metadata/hardware_source/detector_configuration/y_pixels_in_detector | :heavy_check_mark: | +| metadata/hardware_source/external_clock_mode | :heavy_check_mark: | +| metadata/hardware_source/external_clock_wait_time_ms | :heavy_check_mark: | +| metadata/hardware_source/flyback_time_us | :heavy_check_mark: | +| metadata/hardware_source/line_time_us | :heavy_check_mark: | +| metadata/hardware_source/pixel_time_us | :heavy_check_mark: | +| metadata/hardware_source/rotation_rad | :heavy_check_mark: | +| metadata/instrument/ImageRonchigram/C1 ConstW | :heavy_check_mark: | +| metadata/instrument/ImageRonchigram/C10 | :heavy_check_mark: | +| metadata/instrument/ImageRonchigram/C12.a | :heavy_check_mark: | +| metadata/instrument/ImageRonchigram/C12.b | :heavy_check_mark: | +| metadata/instrument/ImageRonchigram/C2 ConstW | :heavy_check_mark: | +| metadata/instrument/ImageRonchigram/C21.a | :heavy_check_mark: | +| metadata/instrument/ImageRonchigram/C21.b | :heavy_check_mark: | +| metadata/instrument/ImageRonchigram/C23.a | :heavy_check_mark: | +| metadata/instrument/ImageRonchigram/C23.b | :heavy_check_mark: | +| metadata/instrument/ImageRonchigram/C3 ConstW | :heavy_check_mark: | +| metadata/instrument/ImageRonchigram/C30 | :heavy_check_mark: | +| metadata/instrument/ImageRonchigram/C32.a | :heavy_check_mark: | +| metadata/instrument/ImageRonchigram/C32.b | :heavy_check_mark: | +| metadata/instrument/ImageRonchigram/C34.a | :heavy_check_mark: | +| metadata/instrument/ImageRonchigram/C34.b | :heavy_check_mark: | +| metadata/instrument/ImageRonchigram/C50 | :heavy_check_mark: | +| metadata/instrument/ImageRonchigram/EHT | :heavy_check_mark: | +| metadata/instrument/ImageRonchigram/GeometricProbeSize | :heavy_check_mark: | +| metadata/instrument/ImageRonchigram/MajorOL | :heavy_check_mark: | +| metadata/instrument/ImageRonchigram/StageOutA | :heavy_check_mark: | +| metadata/instrument/ImageRonchigram/StageOutB | :heavy_check_mark: | +| metadata/instrument/ImageRonchigram/StageOutX | :heavy_check_mark: | +| metadata/instrument/ImageRonchigram/StageOutY | :heavy_check_mark: | +| metadata/instrument/ImageRonchigram/StageOutZ | :heavy_check_mark: | +| metadata/instrument/ImageRonchigram/SuperFEG.^EmissionCurrent | :heavy_check_mark: | +| metadata/instrument/ImageRonchigram/fov_nm | :heavy_check_mark: | +| metadata/instrument/ImageRonchigram/probe_ha | :heavy_check_mark: | +| metadata/instrument/ImageScanned/C1 ConstW | :heavy_check_mark: | +| metadata/instrument/ImageScanned/C10 | :heavy_check_mark: | +| metadata/instrument/ImageScanned/C12.a | :heavy_check_mark: | +| metadata/instrument/ImageScanned/C12.b | :heavy_check_mark: | +| metadata/instrument/ImageScanned/C2 ConstW | :heavy_check_mark: | +| metadata/instrument/ImageScanned/C21.a | :heavy_check_mark: | +| metadata/instrument/ImageScanned/C21.b | :heavy_check_mark: | +| metadata/instrument/ImageScanned/C23.a | :heavy_check_mark: | +| metadata/instrument/ImageScanned/C23.b | :heavy_check_mark: | +| metadata/instrument/ImageScanned/C3 ConstW | :heavy_check_mark: | +| metadata/instrument/ImageScanned/C30 | :heavy_check_mark: | +| metadata/instrument/ImageScanned/C32.a | :heavy_check_mark: | +| metadata/instrument/ImageScanned/C32.b | :heavy_check_mark: | +| metadata/instrument/ImageScanned/C34.a | :heavy_check_mark: | +| metadata/instrument/ImageScanned/C34.b | :heavy_check_mark: | +| metadata/instrument/ImageScanned/C50 | :heavy_check_mark: | +| metadata/instrument/ImageScanned/EHT | :heavy_check_mark: | +| metadata/instrument/ImageScanned/GeometricProbeSize | :heavy_check_mark: | +| metadata/instrument/ImageScanned/MajorOL | :heavy_check_mark: | +| metadata/instrument/ImageScanned/StageOutA | :heavy_check_mark: | +| metadata/instrument/ImageScanned/StageOutB | :heavy_check_mark: | +| metadata/instrument/ImageScanned/StageOutX | :heavy_check_mark: | +| metadata/instrument/ImageScanned/StageOutY | :heavy_check_mark: | +| metadata/instrument/ImageScanned/StageOutZ | :heavy_check_mark: | +| metadata/instrument/ImageScanned/SuperFEG.^EmissionCurrent | :heavy_check_mark: | +| metadata/instrument/ImageScanned/fov_nm | :heavy_check_mark: | +| metadata/instrument/ImageScanned/probe_ha | :heavy_check_mark: | +| metadata/instrument/autostem/ImageRonchigram/C1 ConstW | :heavy_check_mark: | +| metadata/instrument/autostem/ImageRonchigram/C2 ConstW | :heavy_check_mark: | +| metadata/instrument/autostem/ImageRonchigram/C3 ConstW | :heavy_check_mark: | +| metadata/instrument/autostem/ImageRonchigram/EHT | :heavy_check_mark: | +| metadata/instrument/autostem/ImageRonchigram/GeometricProbeSize | :heavy_check_mark: | +| metadata/instrument/autostem/ImageRonchigram/MajorOL | :heavy_check_mark: | +| metadata/instrument/autostem/ImageRonchigram/StageOutA | :heavy_check_mark: | +| metadata/instrument/autostem/ImageRonchigram/StageOutB | :heavy_check_mark: | +| metadata/instrument/autostem/ImageRonchigram/StageOutX | :heavy_check_mark: | +| metadata/instrument/autostem/ImageRonchigram/StageOutY | :heavy_check_mark: | +| metadata/instrument/autostem/ImageRonchigram/StageOutZ | :heavy_check_mark: | +| metadata/instrument/autostem/ImageRonchigram/SuperFEG.^EmissionCurrent | :heavy_check_mark: | +| metadata/instrument/autostem/ImageRonchigram/fov_nm | :heavy_check_mark: | +| metadata/instrument/autostem/ImageRonchigram/probe_ha | :heavy_check_mark: | +| metadata/instrument/autostem/ImageScanned/C10 | :heavy_check_mark: | +| metadata/instrument/autostem/ImageScanned/C12.a | :heavy_check_mark: | +| metadata/instrument/autostem/ImageScanned/C12.b | :heavy_check_mark: | +| metadata/instrument/autostem/ImageScanned/C21.a | :heavy_check_mark: | +| metadata/instrument/autostem/ImageScanned/C21.b | :heavy_check_mark: | +| metadata/instrument/autostem/ImageScanned/C23.a | :heavy_check_mark: | +| metadata/instrument/autostem/ImageScanned/C23.b | :heavy_check_mark: | +| metadata/instrument/autostem/ImageScanned/C30 | :heavy_check_mark: | +| metadata/instrument/autostem/ImageScanned/C32.a | :heavy_check_mark: | +| metadata/instrument/autostem/ImageScanned/C32.b | :heavy_check_mark: | +| metadata/instrument/autostem/ImageScanned/C34.a | :heavy_check_mark: | +| metadata/instrument/autostem/ImageScanned/C34.b | :heavy_check_mark: | +| metadata/instrument/autostem/ImageScanned/C50 | :heavy_check_mark: | +| metadata/instrument/autostem/ImageScanned/EHT | :heavy_check_mark: | +| metadata/instrument/autostem/ImageScanned/GeometricProbeSize | :heavy_check_mark: | +| metadata/instrument/autostem/ImageScanned/StageOutA | :heavy_check_mark: | +| metadata/instrument/autostem/ImageScanned/StageOutB | :heavy_check_mark: | +| metadata/instrument/autostem/ImageScanned/StageOutX | :heavy_check_mark: | +| metadata/instrument/autostem/ImageScanned/StageOutY | :heavy_check_mark: | +| metadata/instrument/autostem/ImageScanned/StageOutZ | :heavy_check_mark: | +| metadata/instrument/autostem/ImageScanned/SuperFEG.^EmissionCurrent | :heavy_check_mark: | +| metadata/instrument/autostem/ImageScanned/fov_nm | :heavy_check_mark: | +| metadata/instrument/autostem/ImageScanned/probe_ha | :heavy_check_mark: | +| metadata/scan/scan_device_parameters/ac_frame_sync | :heavy_check_mark: | +| metadata/scan/scan_device_parameters/ac_line_sync | :heavy_check_mark: | +| metadata/scan/scan_device_parameters/calibration_style | :heavy_check_mark: | +| metadata/scan/scan_device_parameters/center_x_nm | :heavy_check_mark: | +| metadata/scan/scan_device_parameters/center_y_nm | :heavy_check_mark: | +| metadata/scan/scan_device_parameters/channel_modifier | :heavy_check_mark: | +| metadata/scan/scan_device_parameters/external_clock_mode | :heavy_check_mark: | +| metadata/scan/scan_device_parameters/external_clock_wait_time_ms | :heavy_check_mark: | +| metadata/scan/scan_device_parameters/flyback_time_us | :heavy_check_mark: | +| metadata/scan/scan_device_parameters/line_time_us | :heavy_check_mark: | +| metadata/scan/scan_device_parameters/pixel_time_us | :heavy_check_mark: | +| metadata/scan/scan_device_parameters/rotation_rad | :heavy_check_mark: | +| metadata/scan/scan_device_properties/ImageScanned:C1 ConstW | :heavy_check_mark: | +| metadata/scan/scan_device_properties/ImageScanned:C10 | :heavy_check_mark: | +| metadata/scan/scan_device_properties/ImageScanned:C12.a | :heavy_check_mark: | +| metadata/scan/scan_device_properties/ImageScanned:C12.b | :heavy_check_mark: | +| metadata/scan/scan_device_properties/ImageScanned:C2 ConstW | :heavy_check_mark: | +| metadata/scan/scan_device_properties/ImageScanned:C21.a | :heavy_check_mark: | +| metadata/scan/scan_device_properties/ImageScanned:C21.b | :heavy_check_mark: | +| metadata/scan/scan_device_properties/ImageScanned:C23.a | :heavy_check_mark: | +| metadata/scan/scan_device_properties/ImageScanned:C23.b | :heavy_check_mark: | +| metadata/scan/scan_device_properties/ImageScanned:C3 ConstW | :heavy_check_mark: | +| metadata/scan/scan_device_properties/ImageScanned:C30 | :heavy_check_mark: | +| metadata/scan/scan_device_properties/ImageScanned:C32.a | :heavy_check_mark: | +| metadata/scan/scan_device_properties/ImageScanned:C32.b | :heavy_check_mark: | +| metadata/scan/scan_device_properties/ImageScanned:C34.a | :heavy_check_mark: | +| metadata/scan/scan_device_properties/ImageScanned:C34.b | :heavy_check_mark: | +| metadata/scan/scan_device_properties/ImageScanned:C50 | :heavy_check_mark: | +| metadata/scan/scan_device_properties/ImageScanned:EHT | :heavy_check_mark: | +| metadata/scan/scan_device_properties/ImageScanned:GeometricProbeSize | :heavy_check_mark: | +| metadata/scan/scan_device_properties/ImageScanned:MajorOL | :heavy_check_mark: | +| metadata/scan/scan_device_properties/ImageScanned:StageOutA | :heavy_check_mark: | +| metadata/scan/scan_device_properties/ImageScanned:StageOutB | :heavy_check_mark: | +| metadata/scan/scan_device_properties/ImageScanned:StageOutX | :heavy_check_mark: | +| metadata/scan/scan_device_properties/ImageScanned:StageOutY | :heavy_check_mark: | +| metadata/scan/scan_device_properties/ImageScanned:StageOutZ | :heavy_check_mark: | +| metadata/scan/scan_device_properties/ImageScanned:SuperFEG.^EmissionCurrent | :heavy_check_mark: | +| metadata/scan/scan_device_properties/ImageScanned:fov_nm | :heavy_check_mark: | +| metadata/scan/scan_device_properties/ImageScanned:probe_ha | :heavy_check_mark: | +| metadata/scan/scan_device_properties/MagBoard 0 DAC 0 | :heavy_check_mark: | +| metadata/scan/scan_device_properties/MagBoard 0 DAC 1 | :heavy_check_mark: | +| metadata/scan/scan_device_properties/MagBoard 0 DAC 10 | :heavy_check_mark: | +| metadata/scan/scan_device_properties/MagBoard 0 DAC 11 | :heavy_check_mark: | +| metadata/scan/scan_device_properties/MagBoard 0 DAC 2 | :heavy_check_mark: | +| metadata/scan/scan_device_properties/MagBoard 0 DAC 3 | :heavy_check_mark: | +| metadata/scan/scan_device_properties/MagBoard 0 DAC 4 | :heavy_check_mark: | +| metadata/scan/scan_device_properties/MagBoard 0 DAC 5 | :heavy_check_mark: | +| metadata/scan/scan_device_properties/MagBoard 0 DAC 6 | :heavy_check_mark: | +| metadata/scan/scan_device_properties/MagBoard 0 DAC 7 | :heavy_check_mark: | +| metadata/scan/scan_device_properties/MagBoard 0 DAC 8 | :heavy_check_mark: | +| metadata/scan/scan_device_properties/MagBoard 0 DAC 9 | :heavy_check_mark: | +| metadata/scan/scan_device_properties/MagBoard 0 Relay | :heavy_check_mark: | +| metadata/scan/scan_device_properties/MagBoard 1 DAC 0 | :heavy_check_mark: | +| metadata/scan/scan_device_properties/MagBoard 1 DAC 1 | :heavy_check_mark: | +| metadata/scan/scan_device_properties/MagBoard 1 DAC 10 | :heavy_check_mark: | +| metadata/scan/scan_device_properties/MagBoard 1 DAC 11 | :heavy_check_mark: | +| metadata/scan/scan_device_properties/MagBoard 1 DAC 2 | :heavy_check_mark: | +| metadata/scan/scan_device_properties/MagBoard 1 DAC 3 | :heavy_check_mark: | +| metadata/scan/scan_device_properties/MagBoard 1 DAC 4 | :heavy_check_mark: | +| metadata/scan/scan_device_properties/MagBoard 1 DAC 5 | :heavy_check_mark: | +| metadata/scan/scan_device_properties/MagBoard 1 DAC 6 | :heavy_check_mark: | +| metadata/scan/scan_device_properties/MagBoard 1 DAC 7 | :heavy_check_mark: | +| metadata/scan/scan_device_properties/MagBoard 1 DAC 8 | :heavy_check_mark: | +| metadata/scan/scan_device_properties/MagBoard 1 DAC 9 | :heavy_check_mark: | +| metadata/scan/scan_device_properties/MagBoard 1 Relay | :heavy_check_mark: | +| metadata/scan/scan_device_properties/ac_frame_sync | :heavy_check_mark: | +| metadata/scan/scan_device_properties/ac_line_sync | :heavy_check_mark: | +| metadata/scan/scan_device_properties/calibration_style | :heavy_check_mark: | +| metadata/scan/scan_device_properties/center_x_nm | :heavy_check_mark: | +| metadata/scan/scan_device_properties/center_y_nm | :heavy_check_mark: | +| metadata/scan/scan_device_properties/channel_modifier | :heavy_check_mark: | +| metadata/scan/scan_device_properties/external_clock_mode | :heavy_check_mark: | +| metadata/scan/scan_device_properties/external_clock_wait_time_ms | :heavy_check_mark: | +| metadata/scan/scan_device_properties/flyback_time_us | :heavy_check_mark: | +| metadata/scan/scan_device_properties/line_time_us | :heavy_check_mark: | +| metadata/scan/scan_device_properties/mag_boards/MagBoard 0 DAC 0 | :heavy_check_mark: | +| metadata/scan/scan_device_properties/mag_boards/MagBoard 0 DAC 1 | :heavy_check_mark: | +| metadata/scan/scan_device_properties/mag_boards/MagBoard 0 DAC 10 | :heavy_check_mark: | +| metadata/scan/scan_device_properties/mag_boards/MagBoard 0 DAC 11 | :heavy_check_mark: | +| metadata/scan/scan_device_properties/mag_boards/MagBoard 0 DAC 2 | :heavy_check_mark: | +| metadata/scan/scan_device_properties/mag_boards/MagBoard 0 DAC 3 | :heavy_check_mark: | +| metadata/scan/scan_device_properties/mag_boards/MagBoard 0 DAC 4 | :heavy_check_mark: | +| metadata/scan/scan_device_properties/mag_boards/MagBoard 0 DAC 5 | :heavy_check_mark: | +| metadata/scan/scan_device_properties/mag_boards/MagBoard 0 DAC 6 | :heavy_check_mark: | +| metadata/scan/scan_device_properties/mag_boards/MagBoard 0 DAC 7 | :heavy_check_mark: | +| metadata/scan/scan_device_properties/mag_boards/MagBoard 0 DAC 8 | :heavy_check_mark: | +| metadata/scan/scan_device_properties/mag_boards/MagBoard 0 DAC 9 | :heavy_check_mark: | +| metadata/scan/scan_device_properties/mag_boards/MagBoard 0 Relay | :heavy_check_mark: | +| metadata/scan/scan_device_properties/mag_boards/MagBoard 1 DAC 0 | :heavy_check_mark: | +| metadata/scan/scan_device_properties/mag_boards/MagBoard 1 DAC 1 | :heavy_check_mark: | +| metadata/scan/scan_device_properties/mag_boards/MagBoard 1 DAC 10 | :heavy_check_mark: | +| metadata/scan/scan_device_properties/mag_boards/MagBoard 1 DAC 11 | :heavy_check_mark: | +| metadata/scan/scan_device_properties/mag_boards/MagBoard 1 DAC 2 | :heavy_check_mark: | +| metadata/scan/scan_device_properties/mag_boards/MagBoard 1 DAC 3 | :heavy_check_mark: | +| metadata/scan/scan_device_properties/mag_boards/MagBoard 1 DAC 4 | :heavy_check_mark: | +| metadata/scan/scan_device_properties/mag_boards/MagBoard 1 DAC 5 | :heavy_check_mark: | +| metadata/scan/scan_device_properties/mag_boards/MagBoard 1 DAC 6 | :heavy_check_mark: | +| metadata/scan/scan_device_properties/mag_boards/MagBoard 1 DAC 7 | :heavy_check_mark: | +| metadata/scan/scan_device_properties/mag_boards/MagBoard 1 DAC 8 | :heavy_check_mark: | +| metadata/scan/scan_device_properties/mag_boards/MagBoard 1 DAC 9 | :heavy_check_mark: | +| metadata/scan/scan_device_properties/mag_boards/MagBoard 1 Relay | :heavy_check_mark: | +| metadata/scan/scan_device_properties/pixel_time_us | :heavy_check_mark: | +| metadata/scan/scan_device_properties/rotation_rad | :heavy_check_mark: | +| metadata/scan_detector/autostem/ImageScanned/C1 ConstW | :heavy_check_mark: | +| metadata/scan_detector/autostem/ImageScanned/C10 | :heavy_check_mark: | +| metadata/scan_detector/autostem/ImageScanned/C12.a | :heavy_check_mark: | +| metadata/scan_detector/autostem/ImageScanned/C12.b | :heavy_check_mark: | +| metadata/scan_detector/autostem/ImageScanned/C2 ConstW | :heavy_check_mark: | +| metadata/scan_detector/autostem/ImageScanned/C21.a | :heavy_check_mark: | +| metadata/scan_detector/autostem/ImageScanned/C21.b | :heavy_check_mark: | +| metadata/scan_detector/autostem/ImageScanned/C23.a | :heavy_check_mark: | +| metadata/scan_detector/autostem/ImageScanned/C23.b | :heavy_check_mark: | +| metadata/scan_detector/autostem/ImageScanned/C3 ConstW | :heavy_check_mark: | +| metadata/scan_detector/autostem/ImageScanned/C30 | :heavy_check_mark: | +| metadata/scan_detector/autostem/ImageScanned/C32.a | :heavy_check_mark: | +| metadata/scan_detector/autostem/ImageScanned/C32.b | :heavy_check_mark: | +| metadata/scan_detector/autostem/ImageScanned/C34.a | :heavy_check_mark: | +| metadata/scan_detector/autostem/ImageScanned/C34.b | :heavy_check_mark: | +| metadata/scan_detector/autostem/ImageScanned/C50 | :heavy_check_mark: | +| metadata/scan_detector/autostem/ImageScanned/EHT | :heavy_check_mark: | +| metadata/scan_detector/autostem/ImageScanned/GeometricProbeSize | :heavy_check_mark: | +| metadata/scan_detector/autostem/ImageScanned/MajorOL | :heavy_check_mark: | +| metadata/scan_detector/autostem/ImageScanned/StageOutA | :heavy_check_mark: | +| metadata/scan_detector/autostem/ImageScanned/StageOutB | :heavy_check_mark: | +| metadata/scan_detector/autostem/ImageScanned/StageOutX | :heavy_check_mark: | +| metadata/scan_detector/autostem/ImageScanned/StageOutY | :heavy_check_mark: | +| metadata/scan_detector/autostem/ImageScanned/StageOutZ | :heavy_check_mark: | +| metadata/scan_detector/autostem/ImageScanned/SuperFEG.^EmissionCurrent | :heavy_check_mark: | +| metadata/scan_detector/autostem/ImageScanned/fov_nm | :heavy_check_mark: | +| metadata/scan_detector/autostem/ImageScanned/probe_ha | :heavy_check_mark: | diff --git a/docs/reference/png.md b/docs/reference/png_axon.md similarity index 89% rename from docs/reference/png.md rename to docs/reference/png_axon.md index 8b65dc2..57fb6bd 100644 --- a/docs/reference/png.md +++ b/docs/reference/png_axon.md @@ -1,4 +1,4 @@ -# Portable Network Graphics (PNG) +# AXON Protochips Portable Network Graphics PNG The pynxtools-em parser and normalizer reads the following content and maps them on respective NeXus concepts that are defined in the NXem application definition: diff --git a/docs/reference/tiff_hitachi.md b/docs/reference/tiff_hitachi.md new file mode 100644 index 0000000..54c62de --- /dev/null +++ b/docs/reference/tiff_hitachi.md @@ -0,0 +1,12 @@ +# Hitachi Tagged Image File Format TIFF + +The pynxtools-em parser and normalizer reads the following content and maps them on respective NeXus concepts that are defined in the NXem application definition: + + + + + + diff --git a/docs/reference/tiff.md b/docs/reference/tiff_jeol.md similarity index 91% rename from docs/reference/tiff.md rename to docs/reference/tiff_jeol.md index 9f83d02..349049e 100644 --- a/docs/reference/tiff.md +++ b/docs/reference/tiff_jeol.md @@ -1,5 +1,4 @@ -# Tagged Image File Format (TIFF) - +# JEOL Tagged Image File Format TIFF The pynxtools-em parser and normalizer reads the following content and maps them on respective NeXus concepts that are defined in the NXem application definition: diff --git a/docs/reference/tiff_point.md b/docs/reference/tiff_point.md new file mode 100644 index 0000000..fb97912 --- /dev/null +++ b/docs/reference/tiff_point.md @@ -0,0 +1,8 @@ +# point electronic DISS Tagged Image Format TIFF + +The pynxtools-em parser and normalizer reads the following content and maps them on respective NeXus concepts that are defined in the NXem application definition: + + diff --git a/docs/reference/tiff_tescan.md b/docs/reference/tiff_tescan.md new file mode 100644 index 0000000..e1515e3 --- /dev/null +++ b/docs/reference/tiff_tescan.md @@ -0,0 +1,22 @@ +# TESCAN Tagged Image File Format TIFF + +The pynxtools-em parser and normalizer reads the following content and maps them on respective NeXus concepts that are defined in the NXem application definition: + +| Concept | NeXus/HDF5 | +| --------------- | -------------- | +| Device | :heavy_check_mark: | +| EmissionCurrent | :heavy_check_mark: | +| HV | :heavy_check_mark: | +| Magnification | :heavy_check_mark: | +| PredictedBeamCurrent | :heavy_check_mark: | +| SerialNumber | :heavy_check_mark: | +| SpecimenCurrent | :heavy_check_mark: | +| SpotSize | :heavy_check_mark: | +| StageRotation | :heavy_check_mark: | +| StageTilt | :heavy_check_mark: | +| StageX | :heavy_check_mark: | +| StageY | :heavy_check_mark: | +| StageZ | :heavy_check_mark: | +| StigmatorX | :heavy_check_mark: | +| StigmatorY | :heavy_check_mark: | +| WD | :heavy_check_mark: | diff --git a/docs/reference/tiff_tfs.md b/docs/reference/tiff_tfs.md new file mode 100644 index 0000000..e8d5a48 --- /dev/null +++ b/docs/reference/tiff_tfs.md @@ -0,0 +1,12 @@ +# ThermoFisher Tagged Image File Format TIFF + +The pynxtools-em parser and normalizer reads the following content and maps them on respective NeXus concepts that are defined in the NXem application definition: + + + + + + diff --git a/docs/reference/tiff_zeiss.md b/docs/reference/tiff_zeiss.md new file mode 100644 index 0000000..3edcdde --- /dev/null +++ b/docs/reference/tiff_zeiss.md @@ -0,0 +1,12 @@ +# Zeiss Tagged Image File Format TIFF + +The pynxtools-em parser and normalizer reads the following content and maps them on respective NeXus concepts that are defined in the NXem application definition: + + + + + + diff --git a/docs/reference/velox.md b/docs/reference/velox.md index 9bc2836..29342b4 100644 --- a/docs/reference/velox.md +++ b/docs/reference/velox.md @@ -1,4 +1,4 @@ -# Velox EMD +# ThermoFisher Velox EMD The pynxtools-em parser and normalizer reads the following content and maps them on respective NeXus concepts that are defined in the NXem application definition: diff --git a/mkdocs.yaml b/mkdocs.yaml index 11c41a7..2b62c69 100644 --- a/mkdocs.yaml +++ b/mkdocs.yaml @@ -14,16 +14,27 @@ nav: - How-tos: # - how-tos/howto.md - how-tos/kikuchi.md + - how-tos/mtex.md + - how-tos/pyxem.md - Learn: - explanation/learn.md - explanation/implementation.md - Reference: - reference/contextualization.md - - reference/tiff.md - - reference/png.md - - reference/velox.md + - reference/conventions.md + - reference/eln_and_cfg.md + - reference/png_axon.md - reference/apex.md + - reference/gatan.md + - reference/tiff_hitachi.md + - reference/tiff_jeol.md - reference/nion.md + - reference/tiff_point.md + - reference/tiff_tescan.md + - reference/tiff_tfs.md + - reference/velox.md + - reference/tiff_zeiss.md + plugins: - search - macros: diff --git a/pyproject.toml b/pyproject.toml index 9571584..d6249b4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,7 +23,7 @@ classifiers = [ ] dependencies = [ "pint==0.17", - "pynxtools @ git+https://github.com/FAIRmat-NFDI/pynxtools@sprint23_em_v3", + "pynxtools @ git+https://github.com/FAIRmat-NFDI/pynxtools@bypass_v06", "rosettasciio>=0.4", "kikuchipy>=0.9.0", "pyxem>=0.16.0", diff --git a/src/pynxtools_em/concepts/mapping_functors_pint.py b/src/pynxtools_em/concepts/mapping_functors_pint.py index b17c026..db2a1de 100644 --- a/src/pynxtools_em/concepts/mapping_functors_pint.py +++ b/src/pynxtools_em/concepts/mapping_functors_pint.py @@ -18,30 +18,18 @@ """Utilities for working with NeXus concepts encoded as Python dicts in the concepts dir.""" from datetime import datetime -from typing import Any +from typing import Any, Dict import flatdict as fd import numpy as np -import pint import pytz -from pint import UnitRegistry from pynxtools_em.utils.get_file_checksum import get_sha256_of_file_content from pynxtools_em.utils.interpret_boolean import try_interpret_as_boolean +from pynxtools_em.utils.pint_custom_unit_registry import is_not_special_unit, ureg from pynxtools_em.utils.string_conversions import rchop -ureg = UnitRegistry() -# ureg.formatter.default_format = "D" -# https://pint.readthedocs.io/en/stable/user/formatting.html -ureg.define("nx_unitless = 1") -ureg.define("nx_dimensionless = 1") -ureg.define("nx_any = 1") -NX_UNITLESS = pint.Quantity(1, ureg.nx_unitless) -NX_DIMENSIONLESS = pint.Quantity(1, ureg.nx_dimensionless) -NX_ANY = pint.Quantity(1, ureg.nx_any) - - -# best practice is use np.ndarray or np.generic as magnitude within that pint.Quantity! -MAP_TO_DTYPES = { +# best practice is use np.ndarray or np.generic as magnitude within that ureg.Quantity! +MAP_TO_DTYPES: Dict[str, type] = { "u1": np.uint8, "i1": np.int8, "u2": np.uint16, @@ -57,12 +45,12 @@ } # general conversion workflow -# 1. Normalize src data to str, bool, or pint.Quantity -# These pint.Quantities should use numpy scalar or array for the dtype of the magnitude. +# 1. Normalize src data to str, bool, or ureg.Quantity +# These ureg.Quantities should use numpy scalar or array for the dtype of the magnitude. # Use special NeXus unit categories unitless, dimensionless, and any. -# 2. Map on specific trg path, pint.Unit, eventually with conversions, and dtype conversion +# 2. Map on specific trg path, ureg.Unit, eventually with conversions, and dtype conversion # Later this could include endianness -# 3. Store pint.Quantity magnitude and if non-special also correctly converted @units +# 3. Store ureg.Quantity magnitude and if non-special also correctly converted @units # attribute @@ -85,15 +73,10 @@ def var_path_to_spcfc_path(path: str, instance_identifier: list): return nx_specific_path -def is_not_special_unit(units: pint.Unit) -> bool: - """True if not a special NeXus unit category.""" - for special_units in [NX_UNITLESS.units, NX_DIMENSIONLESS.units, NX_ANY.units]: - if units == special_units: - return False - return True - - def get_case(arg): + """Identify which case an instruction from the configuration belongs to. + Each case comes with specific instructions to resolve that are detailed + in the README.md in this source code directory.""" if isinstance(arg, str): # str return "case_one" elif isinstance(arg, tuple): @@ -103,28 +86,28 @@ def get_case(arg): return "case_two_str" elif isinstance(arg[1], list): return "case_two_list" - elif len(arg) == 3: # str, str | list, pint.Unit or str, pint.Unit, str | list + elif len(arg) == 3: # str, str | list, ureg.Unit or str, ureg.Unit, str | list if isinstance(arg[0], str): - if isinstance(arg[1], pint.Unit): + if isinstance(arg[1], ureg.Unit): if isinstance(arg[2], str): return "case_three_str" elif isinstance(arg[2], list): return "case_three_list" - elif (arg[2], pint.Unit): + elif (arg[2], ureg.Unit): if isinstance(arg[1], str): return "case_four_str" elif isinstance(arg[1], list): return "case_four_list" elif len(arg) == 4: - # str, pint.Unit, str | list, pint.Unit - # str, pint.Unit, str, str + # str, ureg.Unit, str | list, ureg.Unit + # str, ureg.Unit, str, str # last string points to unit string for situations where e.g. tech partner # report HV/value, HV/Unit and these two pieces of information should be - # fused into a pint.Quantity with target pint.Unit given as second argument + # fused into a ureg.Quantity with target ureg.Unit given as second argument if ( isinstance(arg[0], str) - and isinstance(arg[1], pint.Unit) - and isinstance(arg[3], pint.Unit) + and isinstance(arg[1], ureg.Unit) + and isinstance(arg[3], ureg.Unit) ): if isinstance(arg[2], str): return "case_five_str" @@ -132,7 +115,7 @@ def get_case(arg): return "case_five_list" elif ( isinstance(arg[0], str) - and isinstance(arg[1], pint.Unit) + and isinstance(arg[1], ureg.Unit) and isinstance(arg[2], str) and isinstance(arg[3], str) ): @@ -145,59 +128,26 @@ def map_to_dtype(trg_dtype: str, value: Any) -> Any: # error: Argument 1 has incompatible type "generic | bool | int | float | complex | # str | bytes | memoryview"; expected "str | bytes | SupportsIndex" [arg-type] if np.shape(value) != (): - if trg_dtype == "u1": - return np.asarray(value, np.uint8) - elif trg_dtype == "i1": - return np.asarray(value, np.int8) - elif trg_dtype == "u2": - return np.asarray(value, np.uint16) - elif trg_dtype == "i2": - return np.asarray(value, np.int16) - # elif trg_dtype == "f2": - # return np.asarray(value, np.float16) - elif trg_dtype == "u4": - return np.asarray(value, np.uint32) - elif trg_dtype == "i4": - return np.asarray(value, np.int32) - elif trg_dtype == "f4": - return np.asarray(value, np.float32) - elif trg_dtype == "u8": - return np.asarray(value, np.uint64) - elif trg_dtype == "i8": - return np.asarray(value, np.int64) - elif trg_dtype == "f8": - return np.asarray(value, np.float64) - elif trg_dtype == "bool": - if hasattr(value, "dtype"): - if value.dtype is bool: - return np.asarray(value, bool) + if trg_dtype in MAP_TO_DTYPES: + if trg_dtype != "bool": + return np.asarray(value, MAP_TO_DTYPES[trg_dtype]) + else: + if hasattr(value, "dtype"): + if value.dtype is bool: + return np.asarray(value, bool) + else: + raise TypeError( + f"map_to_dtype, hitting unexpected case for array bool !" + ) else: raise ValueError(f"map_to_dtype, hitting unexpected case for array !") else: - if trg_dtype == "u1": - return np.uint8(value) - elif trg_dtype == "i1": - return np.int8(value) - elif trg_dtype == "u2": - return np.uint16(value) - elif trg_dtype == "i2": - return np.int16(value) - # elif trg_dtype == "f2": - # return np.float16(value) - elif trg_dtype == "u4": - return np.uint32(value) - elif trg_dtype == "i4": - return np.int32(value) - elif trg_dtype == "f4": - return np.float32(value) - elif trg_dtype == "u8": - return np.uint64(value) - elif trg_dtype == "i8": - return np.int64(value) - elif trg_dtype == "f8": - return np.float64(value) - elif trg_dtype == "bool": - return try_interpret_as_boolean(value) + if trg_dtype in MAP_TO_DTYPES: + if trg_dtype != "bool": + that_type = MAP_TO_DTYPES[trg_dtype] + return that_type(value) + else: + return try_interpret_as_boolean(value) else: raise ValueError(f"map_to_dtype, hitting unexpected case for scalar !") @@ -205,94 +155,94 @@ def map_to_dtype(trg_dtype: str, value: Any) -> Any: def set_value(template: dict, trg: str, src_val: Any, trg_dtype: str = "") -> dict: """Set value in the template using trg. - src_val can be a single value, an array, or a pint.Quantity (scalar or array) + src_val can be a single value, an array, or a ureg.Quantity (scalar or array) """ # np.issubdtype(np.uint32, np.signedinteger) - if src_val: # covering not None, not "", ... - if not trg_dtype: # go with existent dtype - if isinstance(src_val, str): - # TODO this is not rigorous need to check for null-term also and str arrays - template[f"{trg}"] = src_val - # assumes I/O to HDF5 will write specific encoding, typically variable, null-term, utf8 - elif isinstance(src_val, pint.Quantity): - if isinstance( - src_val.magnitude, (np.ndarray, np.generic) - ) or np.isscalar( - src_val.magnitude - ): # bool case typically not expected! - template[f"{trg}"] = src_val.magnitude - if is_not_special_unit(src_val.units): - template[f"{trg}/@units"] = f"{src_val.units}" - print( - f"WARNING::Assuming writing to HDF5 will auto-convert Python types to numpy type, trg {trg} !" - ) - else: - raise TypeError( - f"pint.Quantity magnitude should use in-build, bool, or np !" - ) - elif ( - isinstance(src_val, (np.ndarray, np.generic)) - or np.isscalar(src_val) - or isinstance(src_val, bool) - ): - template[f"{trg}"] = src_val - # units may be required, need to be set explicitly elsewhere in the source code! + if not trg_dtype: # go with existent dtype + if isinstance(src_val, str): + # TODO this is not rigorous need to check for null-term also and str arrays + template[f"{trg}"] = src_val + # assumes I/O to HDF5 will write specific encoding, typically variable, null-term, utf8 + elif isinstance(src_val, ureg.Quantity): + if isinstance(src_val.magnitude, (np.ndarray, np.generic)) or np.isscalar( + src_val.magnitude + ): # bool case typically not expected! + template[f"{trg}"] = src_val.magnitude + if is_not_special_unit(src_val.units): + template[f"{trg}/@units"] = f"{src_val.units}" print( - f"WARNING::Assuming writing to HDF5 will auto-convert Python types to numpy type, trg: {trg} !" + f"WARNING::Assuming writing to HDF5 will auto-convert Python types to numpy type, trg {trg} !" ) else: raise TypeError( - f"Unexpected type {type(src_val)} found for not trg_dtype case !" + f"ureg.Quantity magnitude should use in-build, bool, or np !" ) - else: # do an explicit type conversion - # e.g. in cases when tech partner writes float32 but e.g. NeXus assumes float64 - if isinstance(src_val, str): + elif isinstance(src_val, list): + if all(isinstance(val, str) for val in src_val): + template[f"{trg}"] = ", ".join(src_val) + else: raise TypeError( - f"Unexpected type str found when calling set_value, trg {trg} !" - ) - elif isinstance(src_val, pint.Quantity): - if isinstance(src_val.magnitude, (np.ndarray, np.generic)): - template[f"{trg}"] = map_to_dtype(trg_dtype, src_val.magnitude) - if is_not_special_unit(src_val.units): - template[f"{trg}/@units"] = f"{src_val.units}" - elif np.isscalar(src_val.magnitude): # bool typically not expected - template[f"{trg}"] = map_to_dtype(trg_dtype, src_val.magnitude) - if is_not_special_unit(src_val.units): - template[f"{trg}/@units"] = f"{src_val.units}" - else: - raise TypeError( - f"Unexpected type for explicit src_val.magnitude, set_value, trg {trg} !" - ) - elif isinstance(src_val, (np.ndarray, np.generic)): - template[f"{trg}"] = map_to_dtype(trg_dtype, src_val) - # units may be required, need to be set explicitly elsewhere in the source code! - print( - f"WARNING::Assuming I/O to HDF5 will auto-convert to numpy type, trg: {trg} !" - ) - elif np.isscalar(src_val): - template[f"{trg}"] = map_to_dtype(trg_dtype, src_val) - print( - f"WARNING::Assuming I/O to HDF5 will auto-convert to numpy type, trg: {trg} !" + f"Not List[str] {type(src_val)} found for not trg_dtype case !" ) + elif ( + isinstance(src_val, (np.ndarray, np.generic)) + or np.isscalar(src_val) + or isinstance(src_val, bool) + ): + template[f"{trg}"] = np.asarray(src_val) + # units may be required, need to be set explicitly elsewhere in the source code! + print( + f"WARNING::Assuming writing to HDF5 will auto-convert Python types to numpy type, trg: {trg} !" + ) + else: + raise TypeError( + f"Unexpected type {type(src_val)} found for not trg_dtype case !" + ) + else: # do an explicit type conversion + # e.g. in cases when tech partner writes float32 but e.g. NeXus assumes float64 + if isinstance(src_val, (str, bool)): + template[f"{trg}"] = try_interpret_as_boolean(src_val) + elif isinstance(src_val, ureg.Quantity): + if isinstance(src_val.magnitude, (np.ndarray, np.generic)): + template[f"{trg}"] = map_to_dtype(trg_dtype, src_val.magnitude) + if is_not_special_unit(src_val.units): + template[f"{trg}/@units"] = f"{src_val.units}" + elif np.isscalar(src_val.magnitude): # bool typically not expected + template[f"{trg}"] = map_to_dtype(trg_dtype, src_val.magnitude) + if is_not_special_unit(src_val.units): + template[f"{trg}/@units"] = f"{src_val.units}" else: raise TypeError( - f"Unexpected type for explicit type conversion, set_value, trg {trg} !" + f"Unexpected type for explicit src_val.magnitude, set_value, trg {trg} !" ) + elif isinstance(src_val, (list, np.ndarray, np.generic)): + template[f"{trg}"] = map_to_dtype(trg_dtype, np.asarray(src_val)) + # units may be required, need to be set explicitly elsewhere in the source code! + print( + f"WARNING::Assuming I/O to HDF5 will auto-convert to numpy type, trg: {trg} !" + ) + elif np.isscalar(src_val): + template[f"{trg}"] = map_to_dtype(trg_dtype, src_val) + print( + f"WARNING::Assuming I/O to HDF5 will auto-convert to numpy type, trg: {trg} !" + ) + else: + raise TypeError( + f"Unexpected type for explicit type conversion, set_value, trg {trg} !" + ) return template def use_functor( cmds: list, mdata: fd.FlatDict, prfx_trg: str, ids: list, template: dict ) -> dict: - """Process the use functor.""" + """Process concept mapping for simple predefined strings and pint quantities.""" for cmd in cmds: if isinstance(cmd, tuple): if len(cmd) == 2: if isinstance(cmd[0], str): - if isinstance(cmd[1], str): # str, str - trg = var_path_to_spcfc_path(f"{prfx_trg}/{cmd[0]}", ids) - set_value(template, trg, cmd[1]) - elif isinstance(cmd[1], pint.Quantity): # str, pint.Quantity + if isinstance(cmd[1], (str, ureg.Quantity, bool)): + # str, str or str, ureg or str, bool trg = var_path_to_spcfc_path(f"{prfx_trg}/{cmd[0]}", ids) set_value(template, trg, cmd[1]) return template @@ -307,20 +257,19 @@ def map_functor( template: dict, trg_dtype_key: str = "", ) -> dict: + """Process concept mapping, datatype and unit conversion for quantities.""" for cmd in cmds: case = get_case(cmd) if case == "case_one": # str - if f"{prfx_src}{cmd}" not in mdata: - continue - src_val = mdata[f"{prfx_src}{cmd}"] - trg = var_path_to_spcfc_path(f"{prfx_trg}/{cmd}", ids) - set_value(template, trg, src_val, trg_dtype_key) + src_val = mdata.get(f"{prfx_src}{cmd}") + if src_val: + trg = var_path_to_spcfc_path(f"{prfx_trg}/{cmd}", ids) + set_value(template, trg, src_val, trg_dtype_key) elif case == "case_two_str": # str, str - if f"{prfx_src}{cmd[1]}" not in mdata: - continue - src_val = mdata[f"{prfx_src}{cmd[1]}"] - trg = var_path_to_spcfc_path(f"{prfx_trg}/{cmd[0]}", ids) - set_value(template, trg, src_val, trg_dtype_key) + src_val = mdata.get(f"{prfx_src}{cmd[1]}") + if src_val: + trg = var_path_to_spcfc_path(f"{prfx_trg}/{cmd[0]}", ids) + set_value(template, trg, src_val, trg_dtype_key) elif case == "case_two_list": # ignore empty list, all src paths str, all src_val have to exist of same type if len(cmd[1]) == 0: @@ -332,22 +281,24 @@ def map_functor( src_values = [mdata[f"{prfx_src}{val}"] for val in cmd[1]] if len(src_values) == 0: continue + if not all(src_val for src_val in src_values): + continue if not all(type(val) is type(src_values[0]) for val in src_values): continue trg = var_path_to_spcfc_path(f"{prfx_trg}/{cmd[0]}", ids) - set_value(template, trg, np.asarray(src_values), trg_dtype_key) - elif case == "case_three_str": # str, pint.Unit, str - if f"{prfx_src}{cmd[2]}" not in mdata: + set_value(template, trg, src_values, trg_dtype_key) + elif case == "case_three_str": # str, ureg.Unit, str + src_val = mdata.get(f"{prfx_src}{cmd[2]}") + if not src_val: continue - src_val = mdata[f"{prfx_src}{cmd[2]}"] trg = var_path_to_spcfc_path(f"{prfx_trg}/{cmd[0]}", ids) - if isinstance(src_val, pint.Quantity): - set_value(template, trg, src_val, trg_dtype_key) + if isinstance(src_val, ureg.Quantity): + set_value(template, trg, src_val.to(cmd[1]), trg_dtype_key) else: set_value( - template, trg, pint.Quantity(src_val, cmd[1].units), trg_dtype_key + template, trg, ureg.Quantity(src_val, cmd[1].units), trg_dtype_key ) - elif case == "case_three_list": # str, pint.Unit, list + elif case == "case_three_list": # str, ureg.Unit, list if len(cmd[2]) == 0: continue if not all(isinstance(val, str) for val in cmd[2]): @@ -355,36 +306,47 @@ def map_functor( if not all(f"{prfx_src}{val}" in mdata for val in cmd[2]): continue src_values = [mdata[f"{prfx_src}{val}"] for val in cmd[2]] + if not all(src_val for src_val in src_values): + continue if not all(type(val) is type(src_values[0]) for val in src_values): # need to check whether content are scalars also continue trg = var_path_to_spcfc_path(f"{prfx_trg}/{cmd[0]}", ids) - if isinstance(src_values, pint.Quantity): + if isinstance(src_values, ureg.Quantity): set_value(template, trg, src_values, trg_dtype_key) else: + # potentially a list of ureg.Quantities with different scaling + normalize = [] + for val in src_values: + if isinstance(val, ureg.Quantity): + normalize.append(val.to(cmd[1]).magnitude) + else: + raise TypeError( + "Unimplemented case for {val} in case_three_list !" + ) set_value( template, trg, - pint.Quantity(src_values, cmd[1].units), + ureg.Quantity(normalize, cmd[1]), trg_dtype_key, ) elif case.startswith("case_four"): # both of these cases can be avoided in an implementation when the # src quantity is already a pint quantity instead of some # pure python or numpy value or array respectively - print( - f"WARNING::Ignoring case_four, instead refactor implementation such" + raise ValueError( + f"Hitting unimplemented case_four, instead refactor implementation such" f"that values on the src side are pint.Quantities already!" ) elif case == "case_five_str": - if f"{prfx_src}{cmd[2]}" not in mdata: + src_val = mdata.get(f"{prfx_src}{cmd[2]}") + if not src_val: continue - src_val = mdata[f"{prfx_src}{cmd[2]}"] trg = var_path_to_spcfc_path(f"{prfx_trg}/{cmd[0]}", ids) - if isinstance(src_val, pint.Quantity): + if isinstance(src_val, ureg.Quantity): set_value(template, trg, src_val.to(cmd[1]), trg_dtype_key) else: - pint_src = pint.Quantity(src_val, cmd[3]) + pint_src = ureg.Quantity(src_val, cmd[3]) set_value(template, trg, pint_src.to(cmd[1]), trg_dtype_key) elif case == "case_five_list": if len(cmd[2]) == 0: @@ -394,28 +356,32 @@ def map_functor( if not all(f"{prfx_src}{val}" in mdata for val in cmd[2]): continue src_values = [mdata[f"{prfx_src}{val}"] for val in cmd[2]] - if isinstance(src_values[0], pint.Quantity): + if not all(src_val for src_val in src_values): + continue + if isinstance(src_values[0], ureg.Quantity): raise ValueError( - f"Hit unimplemented case that src_val is pint.Quantity" + f"Hit unimplemented case that src_val is ureg.Quantity" ) if not all(type(val) is type(src_values[0]) for val in src_values): continue trg = var_path_to_spcfc_path(f"{prfx_trg}/{cmd[0]}", ids) - if isinstance(src_values, pint.Quantity): + if isinstance(src_values, ureg.Quantity): set_value(template, trg, src_values.to(cmd[1]), trg_dtype_key) else: - pint_src = pint.Quantity(src_values, cmd[3]) + pint_src = ureg.Quantity(src_values, cmd[3]) set_value(template, trg, pint_src.to(cmd[1]), trg_dtype_key) elif case == "case_six": if f"{prfx_src}{cmd[2]}" not in mdata or f"{prfx_src}{cmd[3]}" not in mdata: continue src_val = mdata[f"{prfx_src}{cmd[2]}"] src_unit = mdata[f"{prfx_src}{cmd[3]}"] + if not src_val or not src_unit: + continue trg = var_path_to_spcfc_path(f"{prfx_trg}/{cmd[0]}", ids) - if isinstance(src_val, pint.Quantity): + if isinstance(src_val, ureg.Quantity): set_value(template, trg, src_val.units.to(cmd[1]), trg_dtype_key) else: - pint_src = pint.Quantity(src_val, pint.Unit(src_unit)) + pint_src = ureg.Quantity(src_val, ureg.Unit(src_unit)) set_value(template, trg, pint_src.to(cmd[1]), trg_dtype_key) return template @@ -428,6 +394,7 @@ def timestamp_functor( ids: list, template: dict, ) -> dict: + """Process concept mapping and time format conversion.""" for cmd in cmds: if isinstance(cmd, tuple): if 2 <= len(cmd) <= 3: # trg, src, timestamp or empty string (meaning utc) @@ -460,6 +427,7 @@ def filehash_functor( ids: list, template: dict, ) -> dict: + """Process concept mapping and checksums to add context from which file NeXus content was processed.""" for cmd in cmds: if isinstance(cmd, tuple): if len(cmd) == 2: @@ -493,11 +461,18 @@ def add_specific_metadata_pint( template: dictionary where to store mapped instance data using template paths """ if "prefix_trg" in cfg: - prfx_trg = cfg["prefix_trg"] + prefix_trg = cfg["prefix_trg"] else: raise KeyError(f"prefix_trg not found in cfg!") if "prefix_src" in cfg: - prfx_src = cfg["prefix_src"] + if isinstance(cfg["prefix_src"], str): + prfx_src = [cfg["prefix_src"]] + elif isinstance(cfg["prefix_src"], list) and all( + isinstance(val, str) for val in cfg["prefix_src"] + ): + prfx_src = cfg["prefix_src"] + else: + raise ValueError(f"prefix_src needs to be a str or a list[str] !") else: raise KeyError(f"prefix_src not found in cfg!") @@ -507,52 +482,36 @@ def add_specific_metadata_pint( # returns an output, given the mapping can be abstract, we call it a functor # https://numpy.org/doc/stable/reference/arrays.dtypes.html - - for functor_key in cfg: - if functor_key == "use": - use_functor(cfg["use"], mdata, prfx_trg, ids, template) - if functor_key == "map": - map_functor(cfg[functor_key], mdata, prfx_src, prfx_trg, ids, template) - if functor_key.startswith("map_to_"): - dtype_key = functor_key.replace("map_to_", "") - print(f"dtype_key >>>> {dtype_key}") - if dtype_key in MAP_TO_DTYPES: + for prefix_src in prfx_src: + for functor_key in cfg: + if functor_key in ["prefix_trg", "prefix_src"]: + continue + if functor_key == "use": + use_functor(cfg["use"], mdata, prefix_trg, ids, template) + if functor_key == "map": map_functor( - cfg[functor_key], - mdata, - prfx_src, - prfx_trg, - ids, - template, - dtype_key, + cfg[functor_key], mdata, prefix_src, prefix_trg, ids, template + ) + if functor_key.startswith("map_to_"): + dtype_key = functor_key.replace("map_to_", "") + if dtype_key in MAP_TO_DTYPES: + map_functor( + cfg[functor_key], + mdata, + prefix_src, + prefix_trg, + ids, + template, + dtype_key, + ) + else: + raise KeyError(f"Unexpected dtype_key {dtype_key} !") + if functor_key == "unix_to_iso8601": + timestamp_functor( + cfg["unix_to_iso8601"], mdata, prefix_src, prefix_trg, ids, template + ) + if functor_key == "sha256": + filehash_functor( + cfg["sha256"], mdata, prefix_src, prefix_trg, ids, template ) - else: - raise KeyError(f"Unexpected dtype_key {dtype_key} !") - if functor_key == "unix_to_iso8601": - timestamp_functor( - cfg["unix_to_iso8601"], mdata, prfx_src, prfx_trg, ids, template - ) - if functor_key == "sha256": - filehash_functor(cfg["sha256"], mdata, prfx_src, prfx_trg, ids, template) return template - - -PINT_MAPPING_TESTS = { - "use": [ - ("str_str_01", ""), - ("str_str_02", "one"), - ("str_qnt_01", NX_UNITLESS), - ("str_qnt_02", NX_DIMENSIONLESS), - ("str_qnt_03", NX_ANY), - ("str_qnt_04", pint.Quantity(1, ureg.meter)), - ("str_qnt_05", pint.Quantity(1, ureg.nx_unitless)), - ("str_qnt_06", pint.Quantity(1, ureg.nx_dimensionless)), - ("str_qnt_07", pint.Quantity(1, ureg.nx_any)), - ("str_qnt_08", pint.Quantity(np.uint32(1), ureg.meter)), - ("str_qnt_09", pint.Quantity(np.uint32(1), ureg.nx_unitless)), - ("str_qnt_10", pint.Quantity(np.uint32(1), ureg.nx_dimensionless)), - ("str_qnt_11", pint.Quantity(np.uint32(1), ureg.nx_any)), - ("str_qnt_12", pint.Quantity(np.asarray([1, 2, 3], np.uint32), ureg.meter)), - ], - "map": [], -} diff --git a/src/pynxtools_em/concepts/nxs_concepts.py b/src/pynxtools_em/concepts/nxs_concepts.py index 2970dcd..60d87f3 100644 --- a/src/pynxtools_em/concepts/nxs_concepts.py +++ b/src/pynxtools_em/concepts/nxs_concepts.py @@ -17,11 +17,12 @@ # """Implement NeXus-specific groups and fields to document software and versions used.""" -from pynxtools_em.concepts.mapping_functors import add_specific_metadata +from pynxtools_em.concepts.mapping_functors_pint import add_specific_metadata_pint from pynxtools_em.utils.versioning import NX_EM_EXEC_NAME, NX_EM_EXEC_VERSION EM_PYNX_TO_NEXUS = { "prefix_trg": "/ENTRY[entry*]/profiling", + "prefix_src": "", "use": [ ("PROGRAM[program1]/program", NX_EM_EXEC_NAME), ("PROGRAM[program1]/program/@version", NX_EM_EXEC_VERSION), @@ -37,5 +38,5 @@ def __init__(self, entry_id: int = 1): def parse(self, template: dict) -> dict: """Parse application definition.""" - add_specific_metadata(EM_PYNX_TO_NEXUS, {}, [self.entry_id], template) + add_specific_metadata_pint(EM_PYNX_TO_NEXUS, {}, [self.entry_id], template) return template diff --git a/src/pynxtools_em/concepts/nxs_em_eds_indexing.py b/src/pynxtools_em/concepts/nxs_em_eds_indexing.py index 8d31437..783e684 100644 --- a/src/pynxtools_em/concepts/nxs_em_eds_indexing.py +++ b/src/pynxtools_em/concepts/nxs_em_eds_indexing.py @@ -21,20 +21,19 @@ from pynxtools_em.concepts.nxs_object import NxObject - NX_EM_EDS_INDEXING_HDF_PATH = [ "indexing/element_names-field", - "indexing/IMAGE_R_SET/PROCESS-group", - "indexing/IMAGE_R_SET/PROCESS/peaks-field", - "indexing/IMAGE_R_SET/description-field", - "indexing/IMAGE_R_SET/iupac_line_candidates-field", - "indexing/IMAGE_R_SET/PROCESS/weights-field", - "indexing/IMAGE_R_SET/PROCESS/weights-field", - "indexing/IMAGE_R_SET/image_twod/axis_x-field", - "indexing/IMAGE_R_SET/image_twod/axis_x@long_name-attribute", - "indexing/IMAGE_R_SET/image_twod/axis_y-field", - "indexing/IMAGE_R_SET/image_twod/axis_y@long_name-attribute", - "indexing/IMAGE_R_SET/image_twod/intensity-field", + "indexing/IMAGE_SET/PROCESS-group", + "indexing/IMAGE_SET/PROCESS/peaks-field", + "indexing/IMAGE_SET/description-field", + "indexing/IMAGE_SET/iupac_line_candidates-field", + "indexing/IMAGE_SET/PROCESS/weights-field", + "indexing/IMAGE_SET/PROCESS/weights-field", + "indexing/IMAGE_SET/image_2d/axis_i-field", + "indexing/IMAGE_SET/image_2d/axis_i@long_name-attribute", + "indexing/IMAGE_SET/image_2d/axis_j-field", + "indexing/IMAGE_SET/image_2d/axis_j@long_name-attribute", + "indexing/IMAGE_SET/image_2d/real-field", "indexing/PEAK/ION/energy-field", "indexing/PEAK/ION/energy_range-field", "indexing/PEAK/ION/iupac_line_names-field", diff --git a/src/pynxtools_em/concepts/nxs_image_set.py b/src/pynxtools_em/concepts/nxs_image_set.py index 14e659a..228b3c5 100644 --- a/src/pynxtools_em/concepts/nxs_image_set.py +++ b/src/pynxtools_em/concepts/nxs_image_set.py @@ -21,44 +21,43 @@ from pynxtools_em.concepts.nxs_object import NxObject - NX_IMAGE_REAL_SPACE_SET_HDF_PATH = [ - "image_oned/axis_x-field", - "image_oned/axis_x@long_name-attribute", - "image_oned/intensity-field", - "image_threed/axis_x-field", - "image_threed/axis_x@long_name-attribute", - "image_threed/axis_y-field", - "image_threed/axis_y@long_name-attribute", - "image_threed/axis_z-field", - "image_threed/axis_z@long_name-attribute", - "image_threed/intensity-field", - "image_twod/axis_x-field", - "image_twod/axis_x@long_name-attribute", - "image_twod/axis_y-field", - "image_twod/axis_y@long_name-attribute", - "image_twod/intensity-field", - "stack_oned/axis_image_identifier-field", - "stack_oned/axis_image_identifier@long_name-attribute", - "stack_oned/axis_x-field", - "stack_oned/axis_x@long_name-attribute", - "stack_oned/intensity-field", - "stack_threed/axis_image_identifier-field", - "stack_threed/axis_image_identifier@long_name-attribute", - "stack_threed/axis_x-field", - "stack_threed/axis_x@long_name-attribute", - "stack_threed/axis_y-field", - "stack_threed/axis_y@long_name-attribute", - "stack_threed/axis_z-field", - "stack_threed/axis_z@long_name-attribute", - "stack_threed/intensity-field", - "stack_twod/axis_image_identifier-field", - "stack_twod/axis_image_identifier@long_name-attribute", - "stack_twod/axis_x-field", - "stack_twod/axis_x@long_name-attribute", - "stack_twod/axis_y-field", - "stack_twod/axis_y@long_name-attribute", - "stack_twod/intensity-field", + "image_1d/axis_i-field", + "image_1d/axis_i@long_name-attribute", + "image_1d/real-field", + "image_2d/axis_i-field", + "image_2d/axis_i@long_name-attribute", + "image_2d/axis_j-field", + "image_2d/axis_j@long_name-attribute", + "image_2d/real-field", + "image_3d/axis_i-field", + "image_3d/axis_i@long_name-attribute", + "image_3d/axis_j-field", + "image_3d/axis_j@long_name-attribute", + "image_3d/axis_k-field", + "image_3d/axis_k@long_name-attribute", + "image_3d/real-field", + "stack_1d/axis_i-field", + "stack_1d/axis_i@long_name-attribute", + "stack_1d/axis_image_identifier-field", + "stack_1d/axis_image_identifier@long_name-attribute", + "stack_1d/real-field", + "stack_2d/axis_i-field", + "stack_2d/axis_i@long_name-attribute", + "stack_2d/axis_image_identifier-field", + "stack_2d/axis_image_identifier@long_name-attribute", + "stack_2d/axis_j-field", + "stack_2d/axis_j@long_name-attribute", + "stack_2d/real-field", + "stack_3d/axis_i-field", + "stack_3d/axis_i@long_name-attribute", + "stack_3d/axis_image_identifier-field", + "stack_3d/axis_image_identifier@long_name-attribute", + "stack_3d/axis_j-field", + "stack_3d/axis_j@long_name-attribute", + "stack_3d/axis_k-field", + "stack_3d/axis_k@long_name-attribute", + "stack_3d/real-field", ] diff --git a/src/pynxtools_em/concepts/nxs_object.py b/src/pynxtools_em/concepts/nxs_object.py index 5e44728..3886418 100644 --- a/src/pynxtools_em/concepts/nxs_object.py +++ b/src/pynxtools_em/concepts/nxs_object.py @@ -17,10 +17,10 @@ # """NXobject (element of a labelled property graph) to store instance data.""" -from typing import Dict - import numpy as np +# Deprecated should be refactored and removed in future releases + class NxObject: """An object in a graph e.g. an attribute, dataset, or group in NeXus. @@ -31,26 +31,19 @@ class NxObject: eqv_hdf: node type in HDF5 serialization, group, dset/field, attribute """ - def __init__(self, name: str, unit: str, dtype, value, **kwargs): - if (name is not None) and (name == ""): - raise ValueError( - f"Value for argument name needs to be a non-empty string !" - ) - if (unit is not None) and (unit == ""): - raise ValueError( - f"Value for argument unit needs to be a non-empty string !" - ) - if (dtype is not None) and isinstance(dtype, type) is False: - raise ValueError( - f"Value of argument dtype must not be None " - f" and a valid, ideally a numpy datatype !" - ) - self.name = name - self.unit = unit - self.dtype = dtype - if value is None or isinstance(dtype, str): - self.unit = "unitless" - self.value = value + def __init__(self, **kwargs): + self.name = None + self.value = None + self.unit = None + self.dtype = None + if "name" in kwargs: + self.name = kwargs["name"] + if "unit" in kwargs: + self.unit = kwargs["unit"] + if "dtype" in kwargs: + self.dtype = kwargs["dtype"] + if "value" in kwargs: + self.value = kwargs["value"] self.eqv_hdf = None if "eqv_hdf" in kwargs: if kwargs["eqv_hdf"] in ["group", "dataset", "attribute"]: diff --git a/src/pynxtools_em/concepts/nxs_spectrum_set.py b/src/pynxtools_em/concepts/nxs_spectrum_set.py index ea11fca..0834c3c 100644 --- a/src/pynxtools_em/concepts/nxs_spectrum_set.py +++ b/src/pynxtools_em/concepts/nxs_spectrum_set.py @@ -21,48 +21,47 @@ from pynxtools_em.concepts.nxs_object import NxObject - NX_SPECTRUM_SET_HDF_PATH: List = [ - "collection-group", - "collection/axis_energy-field", - "collection/axis_energy@long_name-attribute", - "collection/axis_scan_point_id-field", - "collection/axis_scan_point_id@long_name-attribute", - "collection/intensity-field", - "collection/intensity@long_name-attribute", "PROCESS-group", + "PROCESS/PROGRAM-group", "PROCESS/detector_identifier-field", "PROCESS/mode-field", - "PROCESS/PROGRAM-group", "PROCESS/source-group", - "spectrum_zerod/axis_energy-field", - "spectrum_zerod/axis_energy@long_name-attribute", - "spectrum_zerod/intensity-field", - "spectrum_zerod/intensity@long_name-attribute", - "spectrum_oned/axis_energy-field", - "spectrum_oned/axis_energy@long_name-attribute", - "spectrum_oned/axis_x-field", - "spectrum_oned/axis_x@long_name-attribute", - "spectrum_oned/intensity-field", - "spectrum_oned/intensity@long_name-attribute", - "spectrum_threed/axis_energy-field", - "spectrum_threed/axis_energy@long_name-attribute", - "spectrum_threed/axis_x-field", - "spectrum_threed/axis_x@long_name-attribute", - "spectrum_threed/axis_y-field", - "spectrum_threed/axis_y@long_name-attribute", - "spectrum_threed/axis_z-field", - "spectrum_threed/axis_z@long_name-attribute", - "spectrum_threed/intensity-field", - "spectrum_threed/intensity@long_name-attribute", - "spectrum_twod/axis_energy-field", - "spectrum_twod/axis_energy@long_name-attribute", - "spectrum_twod/axis_x-field", - "spectrum_twod/axis_x@long_name-attribute", - "spectrum_twod/axis_y-field", - "spectrum_twod/axis_y@long_name-attribute", - "spectrum_twod/intensity-field", - "spectrum_twod/intensity@long_name-attribute", + "spectrum_0d/axis_energy-field", + "spectrum_0d/axis_energy@long_name-attribute", + "spectrum_0d/real-field", + "spectrum_0d/real@long_name-attribute", + "spectrum_1d/axis_energy-field", + "spectrum_1d/axis_energy@long_name-attribute", + "spectrum_1d/axis_i-field", + "spectrum_1d/axis_i@long_name-attribute", + "spectrum_1d/real-field", + "spectrum_1d/real@long_name-attribute", + "spectrum_2d/axis_energy-field", + "spectrum_2d/axis_energy@long_name-attribute", + "spectrum_2d/axis_i-field", + "spectrum_2d/axis_i@long_name-attribute", + "spectrum_2d/axis_j-field", + "spectrum_2d/axis_j@long_name-attribute", + "spectrum_2d/real-field", + "spectrum_2d/real@long_name-attribute", + "spectrum_3d/axis_energy-field", + "spectrum_3d/axis_energy@long_name-attribute", + "spectrum_3d/axis_i-field", + "spectrum_3d/axis_i@long_name-attribute", + "spectrum_3d/axis_j-field", + "spectrum_3d/axis_j@long_name-attribute", + "spectrum_3d/axis_k-field", + "spectrum_3d/axis_k@long_name-attribute", + "spectrum_3d/real-field", + "spectrum_3d/real@long_name-attribute", + "stack_0d-group", + "stack_0d/axis_energy-field", + "stack_0d/axis_energy@long_name-attribute", + "stack_0d/real-field", + "stack_0d/real@long_name-attribute", + "stack_0d/spectrum_identifier-field", + "stack_0d/spectrum_identifier@long_name-attribute", ] diff --git a/src/pynxtools_em/configurations/README.md b/src/pynxtools_em/configurations/README.md index e39e9bd..20b19af 100644 --- a/src/pynxtools_em/configurations/README.md +++ b/src/pynxtools_em/configurations/README.md @@ -58,7 +58,7 @@ takes place during consumption of the serialized NeXus artifact/file. The following example shows one typical such dictionary. ```python -AXON_STAGE_STATIC_TO_NX_EM = { +AXON_STATIC_STAGE_NX: Dict[str, Any] = { "prefix_trg": "/ENTRY[entry*]/measurement/em_lab/STAGE_LAB[stage_lab]", "prefix_src": "MicroscopeControlImageMetadata.ActivePositionerSettings.PositionerSettings.[*].Stage.", "use": [("design", "heating_chip")], @@ -74,7 +74,7 @@ pointed to by keyword f"{prefix_src}{map[0][1]}". Problems with the old algorithm can be exemplified with the following example ``` -VELOX_STAGE_TO_NX_EM = { +VELOX_DYNAMIC_STAGE_NX: Dict[str, Any] = { "prefix_trg": "/ENTRY[entry*]/measurement/event_data_em_set/EVENT_DATA_EM[event_data_em*]/em_lab/STAGE_LAB[stage_lab]", "use": [ ("tilt1/@units", "rad"), @@ -94,7 +94,7 @@ Keywords *use* and *map* were looped over. Therefore, template pathes like *tilt independently whether the corresponding value *tilt1* was found. The new approach solves this and makes the dictionary more compact: ``` -VELOX_STAGE_TO_NX_EM = { +VELOX_DYNAMIC_STAGE_NX: Dict[str, Any] = { "prefix_trg": "/ENTRY[entry*]/measurement/event_data_em_set/EVENT_DATA_EM[event_data_em*]/em_lab/STAGE_LAB[stage_lab]", "map": [("design", "Stage/HolderType")], "map_to_float64": [ @@ -120,7 +120,7 @@ mapping and translations as hard-coded instructions instead. * **use** instructs mapping explicitly instance data on *trg* without demanding a *src*. Specifically, tuples of the following two datatypes are allowed: (str, str | numpy datatype (scalar or array)) - (str, pint.Quantity) + (str, pint.ureg) The first value resolves the symbol for the concept on the *trg* side. The second value resolves the instance data to store on the *trg* side. The template path on the *trg* side is f"{prefix_trg}/{tpl[0]}", if provided prefix_src will be ignored. @@ -157,7 +157,7 @@ mapping and translations as hard-coded instructions instead. The third value resolves the specific unit on the *src* side. In an implementation, this case can be avoided when the value on the *src* side - is already normalized into a pint.Quantity. The second value can be a list of + is already normalized into a pint.ureg. The second value can be a list of strings of symbols for concepts on the *src* side. * ```(str, pint.ureg, str | list[str])``` aka case three. diff --git a/src/pynxtools_em/configurations/conventions_cfg.py b/src/pynxtools_em/configurations/conventions_cfg.py index 6066168..2e0194f 100644 --- a/src/pynxtools_em/configurations/conventions_cfg.py +++ b/src/pynxtools_em/configurations/conventions_cfg.py @@ -17,8 +17,10 @@ # """Dict mapping values for conventions and reference frames.""" +from typing import Any, Dict + # /ENTRY[entryID]/ROI[roiID]/ebsd/conventions" -ROTATIONS_TO_NEXUS = { +CONV_ROTATIONS_TO_NEXUS: Dict[str, Any] = { "prefix_trg": "/ENTRY[entry*]/coordinate_system_set", "prefix_src": "rotation_conventions/", "map": [ @@ -31,7 +33,7 @@ } -PROCESSING_CSYS_TO_NEXUS = { +CONV_PROCESSING_CSYS_TO_NEXUS: Dict[str, Any] = { "prefix_trg": "/ENTRY[entry*]/coordinate_system_set/processing_reference_frame", "prefix_src": "processing_reference_frame/", "map": [ @@ -48,7 +50,7 @@ } -SAMPLE_CSYS_TO_NEXUS = { +CONV_SAMPLE_CSYS_TO_NEXUS: Dict[str, Any] = { "prefix_trg": "/ENTRY[entry*]/coordinate_system_set/sample_reference_frame", "prefix_src": "sample_reference_frame/", "map": [ @@ -65,8 +67,8 @@ } -DETECTOR_CSYS_TO_NEXUS = { - "prefix_trg": "/ENTRY[entry*]/coordinate_system_set/detector_reference_frameID[detector_reference_frame1]", +CONV_DETECTOR_CSYS_TO_NEXUS: Dict[str, Any] = { + "prefix_trg": "/ENTRY[entry*]/coordinate_system_set/COORDINATE_SYSTEM[detector_reference_frame1]", "prefix_src": "detector_reference_frame/", "map": [ "type", @@ -82,7 +84,7 @@ } -GNOMONIC_CSYS_TO_NEXUS = { +CONV_GNOMONIC_CSYS_TO_NEXUS: Dict[str, Any] = { "prefix_trg": "/ENTRY[entry*]/ROI[roi*]/ebsd/gnomonic_reference_frame", "prefix_src": "gnomonic_reference_frame/", "map": [ @@ -96,7 +98,7 @@ } -PATTERN_CSYS_TO_NEXUS = { +CONV_PATTERN_CSYS_TO_NEXUS: Dict[str, Any] = { "prefix_trg": "/ENTRY[entry*]/ROI[roi*]/ebsd/pattern_centre", "prefix_src": "pattern_centre/", "map": [ diff --git a/src/pynxtools_em/configurations/eln_cfg.py b/src/pynxtools_em/configurations/eln_cfg.py index e9eda55..993b998 100644 --- a/src/pynxtools_em/configurations/eln_cfg.py +++ b/src/pynxtools_em/configurations/eln_cfg.py @@ -17,63 +17,35 @@ # """Dict mapping custom schema instances from eln_data.yaml file on concepts in NXem.""" -# mapping instructions as a dictionary -# prefix is the (variadic prefix to be add to every path on the target side) -# different modifiers are used -# "use": list of pair of trg, src endpoint, take the value in src copy into trg -# "load": list of single value or pair (trg, src) -# if single value this means that the endpoint of trg and src is the same -# e.g. in the example below "name" means -# ("/ENTRY[entry*]/USER[user*]/name, "load", "name") -# if pair load the value pointed to by src and copy into trg -# difference between load and map_to is that load assumes no e.g. string to real -# conversion is required while map_does not assume this -# and instead does the conversion also +from typing import Any, Dict -EM_ENTRY_TO_NEXUS = { +from pynxtools_em.utils.pint_custom_unit_registry import ureg + +OASISELN_EM_ENTRY_TO_NEXUS: Dict[str, Any] = { "prefix_trg": "/ENTRY[entry*]", "prefix_src": "entry/", - "map_to_str": [ - "experiment_alias", - "start_time", - "end_time", - "experiment_description", - ], + "map": ["experiment_alias", "start_time", "end_time", "experiment_description"], } -EM_SAMPLE_TO_NEXUS = { +OASISELN_EM_SAMPLE_TO_NEXUS: Dict[str, Any] = { "prefix_trg": "/ENTRY[entry*]/sample", "prefix_src": "sample/", - "map_to_str": [("thickness/@units", "thickness/unit")], - "map": [ - "method", - "name", - "atom_types", - "preparation_date", - ("thickness", "thickness/value"), - ], + "map": ["method", "name", "atom_types", "preparation_date"], + "map_to_f8": [("thickness", ureg.meter, "thickness/value", "thickness/unit")], } -EM_USER_TO_NEXUS = { +OASISELN_EM_USER_TO_NEXUS: Dict[str, Any] = { "prefix_trg": "/ENTRY[entry*]/USER[user*]", - "map": [ - "name", - "affiliation", - "address", - "email", - "telephone_number", - "role", - ], + "prefix_src": "", + "map": ["name", "affiliation", "address", "email", "telephone_number", "role"], } -EM_USER_IDENTIFIER_TO_NEXUS = { +OASISELN_EM_USER_IDENTIFIER_TO_NEXUS: Dict[str, Any] = { "prefix_trg": "/ENTRY[entry*]/USER[user*]", - "use": [ - ("IDENTIFIER[identifier]/identifier", "orcid"), - ("IDENTIFIER[identifier]/service", "orcid"), - ("IDENTIFIER[identifier]/is_persistent", True), - ], + "prefix_src": "", + "use": [("identifier/service", "orcid"), ("identifier/is_persistent", True)], + "map": [("identifier/identifier", "orcid")], } diff --git a/src/pynxtools_em/configurations/image_png_protochips_cfg.py b/src/pynxtools_em/configurations/image_png_protochips_cfg.py index 3df0299..08f40f6 100644 --- a/src/pynxtools_em/configurations/image_png_protochips_cfg.py +++ b/src/pynxtools_em/configurations/image_png_protochips_cfg.py @@ -18,7 +18,9 @@ """Configuration of the image_png_protochips parser.""" import re -from typing import Dict +from typing import Any, Dict + +from pynxtools_em.utils.pint_custom_unit_registry import ureg def specific_to_variadic(token): @@ -37,7 +39,7 @@ def specific_to_variadic(token): return None -AXON_STAGE_STATIC_TO_NX_EM = { +AXON_STATIC_STAGE_NX: Dict[str, Any] = { "prefix_trg": "/ENTRY[entry*]/measurement/em_lab/STAGE_LAB[stage_lab]", "prefix_src": "MicroscopeControlImageMetadata.ActivePositionerSettings.PositionerSettings.[*].Stage.", "use": [("design", "heating_chip")], @@ -45,8 +47,9 @@ def specific_to_variadic(token): } -AXON_DETECTOR_STATIC_TO_NX_EM = { - "prefix": "/ENTRY[entry*]/measurement/em_lab/DETECTOR[detector*]", +AXON_STATIC_DETECTOR_NX: Dict[str, Any] = { + "prefix_trg": "/ENTRY[entry*]/measurement/em_lab/detectorID[detector*]", + "prefix_src": "", "use": [ ( "local_name", @@ -56,46 +59,44 @@ def specific_to_variadic(token): } -AXON_STAGE_DYNAMIC_TO_NX_EM = { - "prefix_trg": "/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/em_lab/STAGE_LAB[stage_lab]", +AXON_DYNAMIC_STAGE_NX: Dict[str, Any] = { + "prefix_trg": "/ENTRY[entry*]/measurement/event_data_em_set/EVENT_DATA_EM[event_data_em*]/em_lab/STAGE_LAB[stage_lab]", "prefix_src": "MicroscopeControlImageMetadata.ActivePositionerSettings.PositionerSettings.[*].Stage.", - "map_to_real_and_join": [("position", ["X", "Y", "Z"])], -} # "use": [("position/@units", "m")], values are much to large to be m + "map_to_f8": [ + ("position", ureg.meter, ["X", "Y", "Z"], ureg.meter) + ], # values are much to large to be in m! +} -AXON_CHIP_DYNAMIC_TO_NX_EM = { - "prefix_trg": "/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/em_lab/heater", +AXON_DYNAMIC_CHIP_NX: Dict[str, Any] = { + "prefix_trg": "/ENTRY[entry*]/measurement/event_data_em_set/EVENT_DATA_EM[event_data_em*]/em_lab/heater", "prefix_src": "MicroscopeControlImageMetadata.AuxiliaryData.AuxiliaryDataCategory.[*].DataValues.AuxiliaryDataValue.[*].", - "use": [("current/@units", "A"), ("power/@units", "W"), ("voltage/@units", "V")], - "map_to_real": [ - ("current", "HeatingCurrent"), - ("power", "HeatingPower"), - ("voltage", "HeatingVoltage"), + "map_to_f8": [ + ("current", ureg.ampere, "HeatingCurrent", ureg.ampere), + ("power", ureg.watt, "HeatingPower", ureg.watt), + ("voltage", ureg.volt, "HeatingVoltage", ureg.volt), ], } -AXON_AUX_DYNAMIC_TO_NX_EM = { - "prefix_trg": "/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/em_lab/STAGE_LAB[stage_lab]", +AXON_DYNAMIC_AUX_NX: Dict[str, Any] = { + "prefix_trg": "/ENTRY[entry*]/measurement/event_data_em_set/EVENT_DATA_EM[event_data_em*]/em_lab/ebeam_column", "prefix_src": "MicroscopeControlImageMetadata.AuxiliaryData.AuxiliaryDataCategory.[*].DataValues.AuxiliaryDataValue.[*].", "use": [ - ("SENSOR[sensor2]/value/@units", "torr"), - ("SENSOR[sensor2]/measurement", "pressure"), - ("SENSOR[sensor1]/value/@units", "°C"), - ("SENSOR[sensor1]/measurement", "temperature"), + ("sensorID[sensor1]/measurement", "temperature"), + ("sensorID[sensor2]/measurement", "pressure"), ], - "map_to_real": [ - ("SENSOR[sensor2]/value", "HolderPressure"), - ("SENSOR[sensor1]/value", "HolderTemperature"), + "map_to_f8": [ + ("sensorID[sensor1]/value", ureg.degC, "HolderTemperature", ureg.degC), + ("sensorID[sensor2]/value", ureg.bar, "HolderPressure", ureg.torr), ], } -AXON_VARIOUS_DYNAMIC_TO_NX_EM = { - "prefix_trg": "/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]", +AXON_DYNAMIC_VARIOUS_NX: Dict[str, Any] = { + "prefix_trg": "/ENTRY[entry*]/measurement/event_data_em_set/EVENT_DATA_EM[event_data_em*]", "prefix_src": "MicroscopeControlImageMetadata.MicroscopeSettings.", "use": [ - ("em_lab/EBEAM_COLUMN[ebeam_column]/electron_source/voltage/@units", "V"), ( "event_type", "As tested with AXON 10.4.4.21, 2021-04-26T22:51:28.4539893-05:00 not included in Protochips PNG metadata", @@ -104,7 +105,6 @@ def specific_to_variadic(token): "em_lab/DETECTOR[detector*]/mode", "As tested with AXON 10.4.4.21, 2021-04-26T22:51:28.4539893-05:00 not included in Protochips PNG metadata", ), - ("em_lab/OPTICAL_SYSTEM_EM[optical_system_em]/camera_length", "m"), ], "map": [ ( @@ -112,14 +112,18 @@ def specific_to_variadic(token): "BeamBlankerState", ), ], - "map_to_real": [ + "map_to_f8": [ ( "em_lab/EBEAM_COLUMN[ebeam_column]/electron_source/voltage", + ureg.volt, "AcceleratingVoltage", + ureg.volt, ), ( "em_lab/OPTICAL_SYSTEM_EM[optical_system_em]/camera_length", + ureg.meter, "CameraLengthValue", + ureg.meter, ), ( "em_lab/OPTICAL_SYSTEM_EM[optical_system_em]/magnification", diff --git a/src/pynxtools_em/configurations/image_tiff_hitachi_cfg.py b/src/pynxtools_em/configurations/image_tiff_hitachi_cfg.py new file mode 100644 index 0000000..442fad6 --- /dev/null +++ b/src/pynxtools_em/configurations/image_tiff_hitachi_cfg.py @@ -0,0 +1,62 @@ +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Configuration of the image_tiff_hitachi parser.""" + +from typing import Any, Dict + +from pynxtools_em.utils.pint_custom_unit_registry import ureg + +HITACHI_DYNAMIC_VARIOUS_NX: Dict[str, Any] = { + "prefix_trg": "/ENTRY[entry*]/measurement/event_data_em_set/EVENT_DATA_EM[event_data_em*]", + "prefix_src": "", + "map_to_f8": [ + ("em_lab/OPTICAL_SYSTEM_EM[optical_system_em]/magnification", "Magnification"), + ( + "em_lab/OPTICAL_SYSTEM_EM[optical_system_em]/working_distance", + ureg.meter, + "WorkingDistance", + ), + ( + "em_lab/EBEAM_COLUMN[ebeam_column]/electron_source/voltage", + ureg.volt, + "AcceleratingVoltage", + ), + ( + "em_lab/EBEAM_COLUMN[ebeam_column]/electron_source/filament_current", + ureg.ampere, + "FilamentCurrent", + ), + ( + "em_lab/EBEAM_COLUMN[ebeam_column]/electron_source/emission_current", + ureg.ampere, + "EmissionCurrent", + ), + ], +} + + +HITACHI_STATIC_VARIOUS_NX: Dict[str, Any] = { + "prefix_trg": "/ENTRY[entry*]/measurement/em_lab", + "prefix_src": "", + "use": [("FABRICATION[fabrication]/vendor", "Hitachi")], + "map": [ + ("FABRICATION[fabrication]/model", "InstructName"), + ("FABRICATION[fabrication]/model", "Instrument name"), + ("FABRICATION[fabrication]/identifier", "SerialNumber"), + ], +} diff --git a/src/pynxtools_em/configurations/image_tiff_jeol_cfg.py b/src/pynxtools_em/configurations/image_tiff_jeol_cfg.py index df6c584..b308c67 100644 --- a/src/pynxtools_em/configurations/image_tiff_jeol_cfg.py +++ b/src/pynxtools_em/configurations/image_tiff_jeol_cfg.py @@ -17,19 +17,18 @@ # """Configuration of the image_tiff_jeol parser.""" -from pint import UnitRegistry +from typing import Any, Dict -ureg = UnitRegistry() +from pynxtools_em.utils.pint_custom_unit_registry import ureg - -JEOL_VARIOUS_DYNAMIC_TO_NX_EM = { - "prefix_trg": "/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]", +JEOL_DYNAMIC_VARIOUS_NX: Dict[str, Any] = { + "prefix_trg": "/ENTRY[entry*]/measurement/event_data_em_set/EVENT_DATA_EM[event_data_em*]", "prefix_src": "", "map_to_f8": [ ("em_lab/OPTICAL_SYSTEM_EM[optical_system_em]/magnification", "CM_MAG"), ( "em_lab/OPTICAL_SYSTEM_EM[optical_system_em]/working_distance", - ureg.centimeter, + ureg.meter, "SM_WD", ureg.millimeter, ), @@ -43,7 +42,7 @@ } -JEOL_VARIOUS_STATIC_TO_NX_EM = { +JEOL_STATIC_VARIOUS_NX: Dict[str, Any] = { "prefix_trg": "/ENTRY[entry*]/measurement/em_lab", "prefix_src": "", "use": [("FABRICATION[fabrication]/vendor", "JEOL")], diff --git a/src/pynxtools_em/configurations/image_tiff_point_electronic_cfg.py b/src/pynxtools_em/configurations/image_tiff_point_electronic_cfg.py index d20e04d..f73c8f7 100644 --- a/src/pynxtools_em/configurations/image_tiff_point_electronic_cfg.py +++ b/src/pynxtools_em/configurations/image_tiff_point_electronic_cfg.py @@ -17,25 +17,24 @@ # """Configuration of the image_tiff_point_electronic EBIC parser.""" -from pint import UnitRegistry +from typing import Any, Dict -ureg = UnitRegistry() +from pynxtools_em.utils.pint_custom_unit_registry import ureg - -DISS_VARIOUS_DYNAMIC_TO_NX_EM = { - "prefix_trg": "/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]", +DISS_DYNAMIC_VARIOUS_NX: Dict[str, Any] = { + "prefix_trg": "/ENTRY[entry*]/measurement/event_data_em_set/EVENT_DATA_EM[event_data_em*]", "prefix_src": "", "map_to_f8": [ ("em_lab/OPTICAL_SYSTEM_EM[optical_system_em]/magnification", "Mag"), ( "em_lab/OPTICAL_SYSTEM_EM[optical_system_em]/working_distance", - ureg.centimeter, + ureg.meter, "WD/value", "WD/Unit", ), ( "em_lab/EBEAM_COLUMN[ebeam_column]/electron_source/voltage", - ureg.picovolt, + ureg.volt, "HV/value", "HV/Unit", ), diff --git a/src/pynxtools_em/configurations/image_tiff_tescan_cfg.py b/src/pynxtools_em/configurations/image_tiff_tescan_cfg.py new file mode 100644 index 0000000..e0fdf35 --- /dev/null +++ b/src/pynxtools_em/configurations/image_tiff_tescan_cfg.py @@ -0,0 +1,95 @@ +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Configuration of the image_tiff_tescan parser.""" + +from typing import Any, Dict + +from pynxtools_em.utils.pint_custom_unit_registry import ureg + +TESCAN_DYNAMIC_VARIOUS_NX: Dict[str, Any] = { + "prefix_trg": "/ENTRY[entry*]/measurement/event_data_em_set/EVENT_DATA_EM[event_data_em*]", + "prefix_src": "", + "map_to_f8": [ + ("em_lab/OPTICAL_SYSTEM_EM[optical_system_em]/magnification", "Magnification"), + ( + "em_lab/OPTICAL_SYSTEM_EM[optical_system_em]/working_distance", + ureg.meter, + "WD", + ureg.meter, + ), + ( + "em_lab/OPTICAL_SYSTEM_EM[optical_system_em]/probe_diameter", + ureg.meter, + "SpotSize", # diameter or probe at the specimen surface? + ureg.meter, + ), + ( + "em_lab/OPTICAL_SYSTEM_EM[optical_system_em]/beam_current", + ureg.ampere, + "PredictedBeamCurrent", + ureg.ampere, + ), + ( + "em_lab/OPTICAL_SYSTEM_EM[optical_system_em]/specimen_current", + ureg.ampere, + "SpecimenCurrent", + ureg.ampere, + ), + ( + "em_lab/EBEAM_COLUMN[ebeam_column]/electron_source/voltage", + ureg.volt, + "HV", + ureg.volt, + ), + ( + "em_lab/EBEAM_COLUMN[ebeam_column]/electron_source/emission_current", + ureg.ampere, + "EmissionCurrent", + ureg.ampere, + ), + ], +} + + +TESCAN_DYNAMIC_STIGMATOR_NX: Dict[str, Any] = { + "prefix_trg": "/ENTRY[entry*]/measurement/event_data_em_set/EVENT_DATA_EM[event_data_em*]/em_lab/ebeam_column/corrector_ax", + "prefix_src": "", + "map_to_f8": [("value_x", "StigmatorX"), ("value_y", "StigmatorY")], +} + + +TESCAN_DYNAMIC_STAGE_NX: Dict[str, Any] = { + "prefix_trg": "/ENTRY[entry*]/measurement/event_data_em_set/EVENT_DATA_EM[event_data_em*]/em_lab/STAGE_LAB[stage_lab]", + "prefix_src": "", + "map_to_f8": [ + ("rotation", ureg.radian, "StageRotation", ureg.degree), + ("tilt1", ureg.radian, "StageTilt", ureg.degree), + ("position", ureg.meter, ["StageX", "StageY", "StageZ"], ureg.meter), + ], +} + + +TESCAN_STATIC_VARIOUS_NX: Dict[str, Any] = { + "prefix_trg": "/ENTRY[entry*]/measurement/em_lab", + "prefix_src": "", + "use": [("FABRICATION[fabrication]/vendor", "TESCAN")], + "map": [ + ("FABRICATION[fabrication]/model", "Device"), + ("FABRICATION[fabrication]/identifier", "SerialNumber"), + ], +} diff --git a/src/pynxtools_em/configurations/image_tiff_tfs_cfg.py b/src/pynxtools_em/configurations/image_tiff_tfs_cfg.py index a383993..7263ef7 100644 --- a/src/pynxtools_em/configurations/image_tiff_tfs_cfg.py +++ b/src/pynxtools_em/configurations/image_tiff_tfs_cfg.py @@ -17,79 +17,389 @@ # """Configuration of the image_tiff_tfs parser.""" -from numpy import pi +from typing import Any, Dict -RAD2DEG = 180.0 / pi +from pynxtools_em.utils.pint_custom_unit_registry import ureg - -TFS_DETECTOR_STATIC_TO_NX_EM = { - "prefix_trg": "/ENTRY[entry*]/measurement/em_lab/DETECTOR[detector*]", - "map": [ - ("local_name", "Detectors/Name"), - ], +TFS_STATIC_DETECTOR_NX: Dict[str, Any] = { + "prefix_trg": "/ENTRY[entry*]/measurement/em_lab/detectorID[detector*]", + "prefix_src": "", + "map": [("local_name", "Detectors/Name")], } -TFS_APERTURE_STATIC_TO_NX_EM = { - "prefix_trg": "/ENTRY[entry*]/measurement/em_lab/EBEAM_COLUMN[ebeam_column]/APERTURE_EM[aperture_em*]", - "use": [("value/@units", "m")], - "map": [ - ("description", "Beam/Aperture"), - ("value", "EBeam/ApertureDiameter"), - ], +TFS_STATIC_APERTURE_NX: Dict[str, Any] = { + "prefix_trg": "/ENTRY[entry*]/measurement/em_lab/ebeam_column/apertureID[aperture*]", + "prefix_src": "", + "map": [("description", "Beam/Aperture")], + "map_to_f8": [("value", ureg.meter, "EBeam/ApertureDiameter", ureg.meter)], } -TFS_VARIOUS_STATIC_TO_NX_EM = { +TFS_STATIC_VARIOUS_NX: Dict[str, Any] = { "prefix_trg": "/ENTRY[entry*]/measurement/em_lab", - "use": [("FABRICATION[fabrication]/vendor", "FEI")], + "prefix_src": "", + "use": [("fabrication/vendor", "FEI")], "map": [ - ("FABRICATION[fabrication]/model", "System/SystemType"), - ("FABRICATION[fabrication]/identifier", "System/BuildNr"), - ("EBEAM_COLUMN[ebeam_column]/electron_source/emitter_type", "System/Source"), + ("fabrication/model", "System/SystemType"), + ("fabrication/identifier", "System/BuildNr"), + ("ebeam_column/electron_source/emitter_type", "System/Source"), ], } -TFS_OPTICS_DYNAMIC_TO_NX_EM = { - "prefix_trg": "/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/em_lab/OPTICAL_SYSTEM_EM[optical_system_em]", - "use": [("beam_current/@units", "A"), ("working_distance/@units", "m")], - "map": [ - ("beam_current", "EBeam/BeamCurrent"), - ("working_distance", "EBeam/WD"), +TFS_DYNAMIC_OPTICS_NX: Dict[str, Any] = { + "prefix_trg": "/ENTRY[entry*]/measurement/event_data_em_set/EVENT_DATA_EM[event_data_em*]/em_lab/OPTICAL_SYSTEM_EM[optical_system_em]", + "prefix_src": "", + "map_to_f8": [ + ("beam_current", ureg.ampere, "EBeam/BeamCurrent", ureg.ampere), + ("working_distance", ureg.meter, "EBeam/WD", ureg.meter), ], } -TFS_STAGE_DYNAMIC_TO_NX_EM = { - "prefix_trg": "/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/em_lab/STAGE_LAB[stage_lab]", - "use": [("tilt1/@units", "deg"), ("tilt2/@units", "deg")], - "map_to_real_and_multiply": [ - ("tilt1", "EBeam/StageTa", RAD2DEG), - ("tilt2", "EBeam/StageTb", RAD2DEG), +TFS_DYNAMIC_STAGE_NX: Dict[str, Any] = { + "prefix_trg": "/ENTRY[entry*]/measurement/event_data_em_set/EVENT_DATA_EM[event_data_em*]/em_lab/STAGE_LAB[stage_lab]", + "prefix_src": "", + "map_to_f8": [ + ("rotation", ureg.radian, "Stage/StageR", ureg.radian), + ("tilt1", ureg.radian, "Stage/StageTa", ureg.radian), + ("tilt2", ureg.radian, "Stage/StageTb", ureg.radian), + ( + "position", + ureg.meter, + ["Stage/StageX", "Stage/StageY", "Stage/StageZ"], + ureg.meter, + ), ], } -TFS_SCAN_DYNAMIC_TO_NX_EM = { - "prefix_trg": "/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]/em_lab/SCANBOX_EM[scanbox_em]", - "use": [ - ("dwell_time/@units", "s"), - ], - "map": [("dwell_time", "Scan/Dwelltime"), ("scan_schema", "System/Scan")], +TFS_DYNAMIC_STIGMATOR_NX: Dict[str, Any] = { + "prefix_trg": "/ENTRY[entry*]/measurement/event_data_em_set/EVENT_DATA_EM[event_data_em*]/em_lab/ebeam_column/corrector_ax", + "prefix_src": "", + "map_to_f8": [("value_x", "Beam/StigmatorX"), ("value_y", "Beam/StigmatorY")], } -TFS_VARIOUS_DYNAMIC_TO_NX_EM = { - "prefix_trg": "/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]", - "use": [("em_lab/EBEAM_COLUMN[ebeam_column]/electron_source/voltage/@units", "V")], +TFS_DYNAMIC_SCAN_NX: Dict[str, Any] = { + "prefix_trg": "/ENTRY[entry*]/measurement/event_data_em_set/EVENT_DATA_EM[event_data_em*]/em_lab/scan_controller", + "prefix_src": "", + "map": [("scan_schema", "System/Scan")], + "map_to_f8": [("dwell_time", ureg.second, "Scan/Dwelltime", ureg.second)], +} + + +TFS_DYNAMIC_VARIOUS_NX: Dict[str, Any] = { + "prefix_trg": "/ENTRY[entry*]/measurement/event_data_em_set/EVENT_DATA_EM[event_data_em*]", + "prefix_src": "", "map": [ - ("em_lab/DETECTOR[detector*]/mode", "Detectors/Mode"), - ("em_lab/EBEAM_COLUMN[ebeam_column]/operation_mode", "EBeam/UseCase"), - ("em_lab/EBEAM_COLUMN[ebeam_column]/electron_source/voltage", "EBeam/HV"), + ("em_lab/detectorID[detector*]/mode", "Detectors/Mode"), + ("em_lab/ebeam_column/operation_mode", "EBeam/UseCase"), + ("em_lab/ebeam_column/BEAM[beam]/image_mode", "Beam/ImageMode"), + ("em_lab/ebeam_column/BEAM[beam]/mode", "EBeam/BeamMode"), + ("em_lab/ebeam_column/apertureID[aperture1]/name", "EBeam/Aperture"), ("event_type", "T1/Signal"), ("event_type", "T2/Signal"), ("event_type", "T3/Signal"), ("event_type", "ETD/Signal"), ], + "map_to_bool": [ + ( + "em_lab/OPTICAL_SYSTEM_EM[optical_system_em]/dynamic_focus", + "EBeam/DynamicFocusIsOn", + ) + ], + "map_to_u2": [("em_lab/ebeam_column/BEAM[beam]/value", "Beam/Spot")], + "map_to_f8": [ + ( + "em_lab/ebeam_column/electron_source/voltage", + ureg.volt, + "EBeam/HV", + ureg.volt, + ), + ( + "em_lab/ebeam_column/electron_source/emission_current", + ureg.ampere, + "EBeam/EmissionCurrent", + ureg.ampere, + ), + ( + "em_lab/ebeam_column/apertureID[aperture1]/diameter", + ureg.meter, + "EBeam/ApertureDiameter", + ureg.meter, + ), + ( + "em_lab/ebeam_column/BEAM[beam]/current", + ureg.ampere, + "EBeam/BeamCurrent", + ureg.ampere, + ), + ], } + + +# this example exemplifies the situation for the TFS/FEI SEM Apreo from the IKZ of Prof. Martin Albrecht +# thanks to Robert Kernke it was clarified the microscope has several detectors and imaging modes +# these imaging modes control the specific TFS/FEI concept instances stored in the respective TIFF file +# we here use a glossary of all concepts which we were able to parse out from an example image +# taken for each detector and imaging mode +# we then assume that one can work with the joint set of these concepts + +TIFF_TFS_PARENT_CONCEPTS = [ + "Accessories", + "Beam", + "ColdStage", + "CompoundLensFilter", + "Detectors", + "EBeam", + "EBeamDeceleration", + "EScan", + "ETD", + "EasyLift", + "GIS", + "HiResIllumination", + "HotStage", + "HotStageHVHS", + "HotStageMEMS", + "IRBeam", + "Image", + "Nav-Cam", + "PrivateFei", + "Scan", + "Specimen", + "Stage", + "System", + "T1", + "T2", + "T3", + "User", + "Vacuum", +] + +TIFF_TFS_ALL_CONCEPTS = [ + "Accessories/Number", + "Beam/Beam", + "Beam/BeamShiftX", + "Beam/BeamShiftY", + "Beam/FineStageBias", + "Beam/HV", + "Beam/ImageMode", + "Beam/Scan", + "Beam/ScanRotation", + "Beam/Spot", + "Beam/StigmatorX", + "Beam/StigmatorY", + "ColdStage/ActualTemperature", + "ColdStage/Humidity", + "ColdStage/SampleBias", + "ColdStage/TargetTemperature", + "CompoundLensFilter/IsOn", + "CompoundLensFilter/ThresholdEnergy", + "Detectors/Mode", + "Detectors/Name", + "Detectors/Number", + "EasyLift/Rotation", + "EBeam/Acq", + "EBeam/Aperture", + "EBeam/ApertureDiameter", + "EBeam/ATubeVoltage", + "EBeam/BeamCurrent", + "EBeam/BeamMode", + "EBeam/BeamShiftX", + "EBeam/BeamShiftY", + "EBeam/ColumnType", + "EBeam/DynamicFocusIsOn", + "EBeam/DynamicWDIsOn", + "EBeam/EmissionCurrent", + "EBeam/EucWD", + "EBeam/FinalLens", + "EBeam/HFW", + "EBeam/HV", + "EBeam/ImageMode", + "EBeam/LensMode", + "EBeam/LensModeA", + "EBeam/MagnificationCorrection", + "EBeam/PreTilt", + "EBeam/ScanRotation", + "EBeam/SemOpticalMode", + "EBeam/Source", + "EBeam/SourceTiltX", + "EBeam/SourceTiltY", + "EBeam/StageR", + "EBeam/StageTa", + "EBeam/StageTb", + "EBeam/StageX", + "EBeam/StageY", + "EBeam/StageZ", + "EBeam/StigmatorX", + "EBeam/StigmatorY", + "EBeam/TiltCorrectionAngle", + "EBeam/TiltCorrectionIsOn", + "EBeam/UseCase", + "EBeam/VFW", + "EBeam/WD", + "EBeam/WehneltBias", + "EBeamDeceleration/ImmersionRatio", + "EBeamDeceleration/LandingEnergy", + "EBeamDeceleration/ModeOn", + "EBeamDeceleration/StageBias", + "EScan/Dwell", + "EScan/FrameTime", + "EScan/HorFieldsize", + "EScan/InternalScan", + "EScan/LineIntegration", + "EScan/LineTime", + "EScan/Mainslock", + "EScan/PixelHeight", + "EScan/PixelWidth", + "EScan/Scan", + "EScan/ScanInterlacing", + "EScan/VerFieldsize", + "ETD/Brightness", + "ETD/BrightnessDB", + "ETD/Contrast", + "ETD/ContrastDB", + "ETD/Grid", + "ETD/MinimumDwellTime", + "ETD/Mix", + "ETD/Setting", + "ETD/Signal", + "GIS/Number", + "HiResIllumination/BrightFieldIsOn", + "HiResIllumination/BrightFieldValue", + "HiResIllumination/DarkFieldIsOn", + "HiResIllumination/DarkFieldValue", + "HotStage/ActualTemperature", + "HotStage/SampleBias", + "HotStage/ShieldBias", + "HotStage/TargetTemperature", + "HotStageHVHS/ActualTemperature", + "HotStageHVHS/SampleBias", + "HotStageHVHS/ShieldBias", + "HotStageHVHS/TargetTemperature", + "HotStageMEMS/ActualTemperature", + "HotStageMEMS/HeatingCurrent", + "HotStageMEMS/HeatingPower", + "HotStageMEMS/HeatingVoltage", + "HotStageMEMS/SampleBias", + "HotStageMEMS/SampleResistance", + "HotStageMEMS/TargetTemperature", + "Image/Average", + "Image/DigitalBrightness", + "Image/DigitalContrast", + "Image/DigitalGamma", + "Image/DriftCorrected", + "Image/Integrate", + "Image/MagCanvasRealWidth", + "Image/MagnificationMode", + "Image/PostProcessing", + "Image/ResolutionX", + "Image/ResolutionY", + "Image/ScreenMagCanvasRealWidth", + "Image/ScreenMagnificationMode", + "Image/Transformation", + "Image/ZoomFactor", + "Image/ZoomPanX", + "Image/ZoomPanY", + "IRBeam/HFW", + "IRBeam/n", + "IRBeam/ScanRotation", + "IRBeam/SiDepth", + "IRBeam/StageR", + "IRBeam/StageTa", + "IRBeam/StageTb", + "IRBeam/StageX", + "IRBeam/StageY", + "IRBeam/StageZ", + "IRBeam/VFW", + "IRBeam/WD", + "PrivateFei/BitShift", + "PrivateFei/DataBarAvailable", + "PrivateFei/DatabarHeight", + "PrivateFei/DataBarSelected", + "PrivateFei/TimeOfCreation", + "Scan/Average", + "Scan/Dwelltime", + "Scan/FrameTime", + "Scan/HorFieldsize", + "Scan/Integrate", + "Scan/InternalScan", + "Scan/PixelHeight", + "Scan/PixelWidth", + "Scan/VerFieldsize", + "Specimen/SpecimenCurrent", + "Specimen/Temperature", + "Stage/ActiveStage", + "Stage/SpecTilt", + "Stage/StageR", + "Stage/StageT", + "Stage/StageTb", + "Stage/StageX", + "Stage/StageY", + "Stage/StageZ", + "Stage/WorkingDistance", + "System/Acq", + "System/Aperture", + "System/BuildNr", + "System/Chamber", + "System/Column", + "System/DisplayHeight", + "System/DisplayWidth", + "System/Dnumber", + "System/ESEM", + "System/EucWD", + "System/FinalLens", + "System/Pump", + "System/Scan", + "System/Software", + "System/Source", + "System/Stage", + "System/SystemType", + "System/Type", + "T1/Brightness", + "T1/BrightnessDB", + "T1/Contrast", + "T1/ContrastDB", + "T1/MinimumDwellTime", + "T1/Setting", + "T1/Signal", + "T2/Brightness", + "T2/BrightnessDB", + "T2/Contrast", + "T2/ContrastDB", + "T2/MinimumDwellTime", + "T2/Setting", + "T2/Signal", + "T3/Brightness", + "T3/BrightnessDB", + "T3/Contrast", + "T3/ContrastDB", + "T3/MinimumDwellTime", + "T3/Signal", + "User/Date", + "User/Time", + "User/User", + "User/UserText", + "User/UserTextUnicode", + "Vacuum/ChPressure", + "Vacuum/Gas", + "Vacuum/Humidity", + "Vacuum/UserMode", +] + +# there is more to know and understand than just knowing TFS/FEI uses +# the above-mentioned concepts in their taxonomy: +# take the example of System/Source for which an example file (instance) has the +# value "FEG" +# similar like in NeXus "System/Source" labels a concept for which (assumption!) there +# is a controlled enumeration of symbols possible (as the example shows "FEG" is one such +# allowed symbol of the enumeration. +# The key issue is that the symbols for the leaf (here "FEG") means nothing eventually +# when one has another semantic world-view, like in NOMAD metainfo or NeXus +# (only us) humans understand that what TFS/FEI likely means with the symbol +# "FEG" is exactly the same as what we mean in NeXus when setting emitter_type of +# NXebeam_column to "cold_cathode_field_emitter" +# world with the controlled enumeration value "other" because we do not know +# if FEG means really a filament or a cold_cathode_field_emitter diff --git a/src/pynxtools_em/configurations/image_tiff_zeiss_cfg.py b/src/pynxtools_em/configurations/image_tiff_zeiss_cfg.py new file mode 100644 index 0000000..f0b5302 --- /dev/null +++ b/src/pynxtools_em/configurations/image_tiff_zeiss_cfg.py @@ -0,0 +1,60 @@ +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Configuration of the image_tiff_zeiss parser.""" + +from typing import Any, Dict + +from pynxtools_em.utils.pint_custom_unit_registry import ureg + +ZEISS_DYNAMIC_VARIOUS_NX: Dict[str, Any] = { + "prefix_trg": "/ENTRY[entry*]/measurement/event_data_em_set/EVENT_DATA_EM[event_data_em*]", + "prefix_src": "", + "map_to_f8": [ + ("em_lab/OPTICAL_SYSTEM_EM[optical_system_em]/magnification", "AP_MAG"), + ( + "em_lab/OPTICAL_SYSTEM_EM[optical_system_em]/working_distance", + ureg.meter, + "AP_WD", + ), + ( + "em_lab/EBEAM_COLUMN[ebeam_column]/electron_source/voltage", + ureg.volt, + "AP_MANUALKV", + ), + ], +} + +ZEISS_DYNAMIC_STAGE_NX: Dict[str, Any] = { + "prefix_trg": "/ENTRY[entry*]/measurement/event_data_em_set/EVENT_DATA_EM[event_data_em*]/em_lab/STAGE_LAB[stage_lab]", + "prefix_src": "", + "map_to_f8": [ + ("rotation", ureg.radian, "AP_STAGE_AT_R"), + ("tilt1", ureg.radian, "AP_STAGE_AT_T"), + ("position", ureg.meter, ["AP_STAGE_AT_X", "AP_STAGE_AT_Y", "AP_STAGE_AT_Z"]), + ], +} + +ZEISS_STATIC_VARIOUS_NX: Dict[str, Any] = { + "prefix_trg": "/ENTRY[entry*]/measurement/em_lab", + "prefix_src": "", + "use": [("FABRICATION[fabrication]/vendor", "Zeiss")], + "map": [ + ("FABRICATION[fabrication]/model", "DP_SEM"), + ("FABRICATION[fabrication]/identifier", "SV_SERIAL_NUMBER"), + ], +} diff --git a/src/pynxtools_em/configurations/nion_cfg.py b/src/pynxtools_em/configurations/nion_cfg.py new file mode 100644 index 0000000..1869f69 --- /dev/null +++ b/src/pynxtools_em/configurations/nion_cfg.py @@ -0,0 +1,401 @@ +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Dict mapping Nion custom schema instances on concepts in NXem.""" + +from typing import Any, Dict + +from pynxtools_em.utils.pint_custom_unit_registry import ureg + +NION_WHICH_SPECTRUM = { + "eV": ("spectrum_0d", ["axis_energy"]), + "nm_eV": ("spectrum_1d", ["axis_i", "axis_energy"]), + "nm_nm_eV": ("spectrum_2d", ["axis_j", "axis_i", "axis_energy"]), + "nm_nm_nm_eV": ("spectrum_3d", ["axis_k", "axis_j", "axis_i", "axis_energy"]), + "unitless_eV": ("stack_0d", ["spectrum_identifier", "axis_energy"]), + "unitless_nm_eV": ("stack_1d", ["spectrum_identifier", "axis_energy"]), + "unitless_nm_nm_eV": ( + "stack_2d", + ["spectrum_identifier", "axis_j", "axis_i", "axis_energy"], + ), + "unitless_nm_nm_nm_eV": ( + "stack_3d", + ["spectrum_identifier", "axis_k", "axis_j", "axis_i", "axis_energy"], + ), +} +NION_WHICH_IMAGE = { + "nm": ("image_1d", ["axis_i"]), + "nm_nm": ("image_2d", ["axis_j", "axis_i"]), + "nm_nm_nm": ("image_3d", ["axis_k", "axis_j", "axis_i"]), + "unitless_nm": ("stack_1d", ["image_identifier", "axis_i"]), + "unitless_nm_nm": ("stack_2d", ["image_identifier", "axis_j", "axis_i"]), + "unitless_nm_nm_nm": ( + "stack_3d", + ["image_identifier", "axis_k", "axis_j", "axis_i"], + ), +} +# TODO::use mapping to base_units like exemplified for the gatan parser + + +MAG = "magnitude" +NION_DYNAMIC_ABERRATION_NX: Dict[str, Any] = { + "prefix_trg": "/ENTRY[entry*]/measurement/event_data_em_set/EVENT_DATA_EM[event_data_em*]/em_lab/ebeam_column/corrector_cs/tableauID[tableau1]", + "prefix_src": [ + "metadata/hardware_source/ImageRonchigram/", + "metadata/hardware_source/autostem/ImageScanned/", + "metadata/instrument/ImageRonchigram/", + "metadata/instrument/ImageScanned/", + "metadata/instrument/autostem/ImageScanned/", + "metadata/scan/scan_device_properties/ImageScanned:", + "metadata/scan_detector/autostem/ImageScanned/", + ], + "map_to_f8": [ + (f"c_1_0/{MAG}", ureg.meter, "C10", ureg.meter), + (f"c_1_2_a/{MAG}", "C12.a"), + (f"c_1_2_b/{MAG}", "C12.b"), + (f"c_2_1_a/{MAG}", "C21.a"), + (f"c_2_1_b/{MAG}", "C21.b"), + (f"c_2_3_a/{MAG}", "C23.a"), + (f"c_2_3_b/{MAG}", "C23.b"), + (f"c_3_0/{MAG}", ureg.meter, "C30", ureg.meter), + (f"c_3_2_a/{MAG}", "C32.a"), + (f"c_3_2_b/{MAG}", "C32.b"), + (f"c_3_4_a/{MAG}", "C34.a"), + (f"c_3_4_a/{MAG}", "C34.b"), + (f"c_5_0/{MAG}", ureg.meter, "C50", ureg.meter), + ], +} + + +# more on metadata https://nionswift.readthedocs.io/en/stable/api/scripting.html#managing-session-metadata +# TODO::check units currently using alibi units! +NION_DYNAMIC_VARIOUS_NX: Dict[str, Any] = { + "prefix_trg": "/ENTRY[entry*]/measurement/event_data_em_set/EVENT_DATA_EM[event_data_em*]/em_lab", + "prefix_src": [ + "metadata/hardware_source/ImageRonchigram/", + "metadata/hardware_source/autostem/ImageRonchigram/", + "metadata/hardware_source/autostem/ImageScanned/", + "metadata/instrument/ImageRonchigram/", + "metadata/instrument/ImageScanned/", + "metadata/instrument/autostem/ImageRonchigram/", + "metadata/instrument/autostem/ImageScanned/", + "metadata/scan/scan_device_properties/ImageScanned:", + "metadata/scan_detector/autostem/ImageScanned/", + ], + "map_to_f8": [ + ("ebeam_column/electron_source/voltage", ureg.volt, "EHT", ureg.volt), + ( + "ebeam_column/BEAM[beam]/diameter", + ureg.meter, + "GeometricProbeSize", + ureg.meter, + ), # diameter? radius ? + ( + "OPTICAL_SETUP_EM[optical_setup]/semi_convergence_angle", + ureg.radian, + "probe_ha", + ureg.radian, + ), + ( + "OPTICAL_SETUP_EM[optical_setup]/probe_current", + ureg.ampere, + "SuperFEG.^EmissionCurrent", + ureg.ampere, + ), + ( + "OPTICAL_SETUP_EM[optical_setup]/field_of_view", + ureg.meter, + "fov_nm", + ureg.nanometer, + ), + # G_2Db, HAADF_Inner_ha, HAADF_Outer_ha, LastTuneCurrent, PMT2_gain, PMTBF_gain,PMTDF_gain + ], +} + + +NION_DYNAMIC_STAGE_NX: Dict[str, Any] = { + "prefix_trg": "/ENTRY[entry*]/measurement/event_data_em_set/EVENT_DATA_EM[event_data_em*]/em_lab/STAGE_LAB[stage]", + "prefix_src": [ + "metadata/hardware_source/ImageRonchigram/", + "metadata/hardware_source/autostem/ImageRonchigram/", + "metadata/hardware_source/autostem/ImageScanned/", + "metadata/instrument/ImageRonchigram/", + "metadata/instrument/ImageScanned/", + "metadata/instrument/autostem/ImageRonchigram/", + "metadata/instrument/autostem/ImageScanned/", + "metadata/scan/scan_device_properties/ImageScanned:", + "metadata/scan_detector/autostem/ImageScanned/", + ], + "map_to_f8": [ + ("tilt1", ureg.radian, "StageOutA", ureg.radian), + ("tilt2", ureg.radian, "StageOutB", ureg.radian), + ( + "position", + ureg.meter, + ["StageOutX", "StageOutY", "StageOutZ"], + ureg.meter, + ), + ], +} + + +# TODO::all examples from the last 5years showed only these four different lenses +# therefore such solution can work for now but nobody states lenses needed to be +# ordered always 1, 2, 3, 4 and worse, if e.g. only MajorOL is found we get a single +# instance lens4 only in a NeXus file which might confuse people as they learn that +# numbering should start from 1 +NION_DYNAMIC_LENS_NX: Dict[str, Any] = { + "prefix_trg": "/ENTRY[entry*]/measurement/event_data_em_set/EVENT_DATA_EM[event_data_em*]/em_lab/ebeam_column", + "prefix_src": [ + "metadata/hardware_source/ImageRonchigram/", + "metadata/hardware_source/autostem/ImageRonchigram/", + "metadata/hardware_source/autostem/ImageScanned/", + "metadata/instrument/ImageRonchigram/", + "metadata/instrument/ImageScanned/", + "metadata/instrument/autostem/ImageRonchigram/", + "metadata/scan/scan_device_properties/ImageScanned:", + "metadata/scan_detector/autostem/ImageScanned/", + ], + "use": [ + ( + "operation_mode", + "Currently, nionswift stores the operation mode relevant settings via multiple metadata keywords and none of them in my opinion fit quite with this concept. The community should decide how to solve this.", + ) + ], + "map_to_f8": [ + ("lensID[lens1]/value", "C1 ConstW"), + ("lensID[lens2]/value", "C2 ConstW"), + ("lensID[lens3]/value", "C3 ConstW"), + ("lensID[lens4]/value", "MajorOL"), + ], +} + + +# https://nionswift-instrumentation.readthedocs.io/en/latest/scanning.html#how-does-scanning-work +# according to this documentation ac_line_style should be boolean but datasets show +# 1.0, 2.0, True and False ! +NION_DYNAMIC_SCAN_NX: Dict[str, Any] = { + "prefix_trg": "/ENTRY[entry*]/measurement/event_data_em_set/EVENT_DATA_EM[event_data_em*]/em_lab/scan_controller", + "prefix_src": [ + "metadata/hardware_source/", + "metadata/scan/scan_device_parameters/", + "metadata/scan/scan_device_properties/", + ], + "use": [ + ( + "scan_schema", + "Currently, nionswift stores scan_schema relevant settings via multiple metadata keywords. The community should decide which of this is required.", + ) + ], + "map": [ + "ac_line_sync", + "calibration_style", + ("scan_schema", "channel_modifier"), + # TODO::exemplar mapping of subscan metadata + ], + "map_to_bool": ["ac_frame_sync"], + "map_to_u4": [("external_trigger_mode", "external_clock_mode")], + "map_to_f8": [ + ("center", ureg.meter, ["center_x_nm", "center_y_nm"], ureg.nanometer), + ("flyback_time", ureg.second, "flyback_time_us", ureg.microsecond), + ("line_time", ureg.second, "line_time_us", ureg.microsecond), + ( + "dwell_time", + ureg.second, + "pixel_time_us", + ureg.microsecond, + ), # requested_pixel_time_us + ("rotation", ureg.radian, "rotation_rad", ureg.radian), + ( + "external_trigger_max_wait_time", + ureg.second, + "external_clock_wait_time_ms", + ureg.millisecond, + ), + ], +} +# TODO metadata/scan/scan_device_parameters/ the following remain unmapped +# center_nm, data_shape_override, external_scan_mode, external_scan_ratio, pixel_size, scan_id, section_rect, +# size, state_override, subscan_fractional_center, subscan_fractional_size, +# subscan_pixel_size, subscan_rotation, subscan_type_partial, top_left_override + + +C0 = "CIRCUIT[magboard0]" +C1 = "CIRCUIT[magboard1]" +NION_DYNAMIC_MAGBOARDS_NX: Dict[str, Any] = { + "prefix_trg": "/ENTRY[entry*]/measurement/event_data_em_set/EVENT_DATA_EM[event_data_em*]/em_lab/scan_controller", + "prefix_src": [ + "metadata/scan/scan_device_properties/", + "metadata/scan/scan_device_properties/mag_boards/", + ], + # TODO: the above manual adding of NXcircuit should not be necessary + # working hypothesis if base class inheritance does not work correctly + # NXcomponent has NXcircuit + # NXscanbox_em is NXcomponent but does not inherit this NXcircuit + "map_to_f8": [ + (f"{C0}/dac0", "MagBoard 0 DAC 0"), + (f"{C0}/dac1", "MagBoard 0 DAC 1"), + (f"{C0}/dac2", "MagBoard 0 DAC 2"), + (f"{C0}/dac3", "MagBoard 0 DAC 3"), + (f"{C0}/dac4", "MagBoard 0 DAC 4"), + (f"{C0}/dac5", "MagBoard 0 DAC 5"), + (f"{C0}/dac6", "MagBoard 0 DAC 6"), + (f"{C0}/dac7", "MagBoard 0 DAC 7"), + (f"{C0}/dac8", "MagBoard 0 DAC 8"), + (f"{C0}/dac9", "MagBoard 0 DAC 9"), + (f"{C0}/dac10", "MagBoard 0 DAC 10"), + (f"{C0}/dac11", "MagBoard 0 DAC 11"), + (f"{C0}/relay", "MagBoard 0 Relay"), + (f"{C1}/dac0", "MagBoard 1 DAC 0"), + (f"{C1}/dac1", "MagBoard 1 DAC 1"), + (f"{C1}/dac2", "MagBoard 1 DAC 2"), + (f"{C1}/dac3", "MagBoard 1 DAC 3"), + (f"{C1}/dac4", "MagBoard 1 DAC 4"), + (f"{C1}/dac5", "MagBoard 1 DAC 5"), + (f"{C1}/dac6", "MagBoard 1 DAC 6"), + (f"{C1}/dac7", "MagBoard 1 DAC 7"), + (f"{C1}/dac8", "MagBoard 1 DAC 8"), + (f"{C1}/dac9", "MagBoard 1 DAC 9"), + (f"{C1}/dac10", "MagBoard 1 DAC 10"), + (f"{C1}/dac11", "MagBoard 1 DAC 11"), + (f"{C1}/relay", "MagBoard 1 Relay"), + ], +} + +# here is the same issue, for C. Koch's group it is correct that there is only one +# detector A so writing to detector1 works but not in cases when there are multiple +# detectors +NION_DYNAMIC_DETECTOR_NX: Dict[str, Any] = { + "prefix_trg": "/ENTRY[entry*]/measurement/event_data_em_set/EVENT_DATA_EM[event_data_em*]/em_lab/detectorID[detector*]", + "prefix_src": "metadata/hardware_source/detector_configuration/", + "use": [ + ( + "mode", + "Currently, nionswift does not have a metadata key for this although Dectrics detectors use many of the Dectris NeXus keywords also in nionswift.", + ) + ], + "map_to_bool": [ + "countrate_correction_applied", + "pixel_mask_applied", + ( + "flatfield_applied", + "flatfield_correction_applied", + ), # example for concept_name mismatch Dectris and NeXus + ], + "map_to_i1": ["bit_depth_readout", "bit_depth_image"], + "map_to_f8": [ + ("beam_center_x", ureg.meter, "beam_center_x", ureg.meter), + ("beam_center_y", ureg.meter, "beam_center_y", ureg.meter), + ("detector_readout_time", ureg.second, "detector_readout_time", ureg.second), + ("frame_time", ureg.second, "frame_time", ureg.second), + ("count_time", ureg.second, "count_time", ureg.second), + ("threshold_energy", ureg.eV, "threshold_energy", ureg.eV), + ], +} + + +NION_DYNAMIC_EVENT_TIME = { + "prefix_trg": "/ENTRY[entry*]/measurement/event_data_em_set/EVENT_DATA_EM[event_data_em*]", + "prefix_src": "metadata/hardware_source/detector_configuration/", + "map": [("start_time", "data_collection_date")], + # this could be a poor assumption as we do not know when during the acquisition + # this timestamp is taken +} + +# the following concepts from metadata/hardware_source/detector_configuration +# have no representative in NeXus for now, TODO add them as undocumented ? +# auto_summation, chi_increment, chi_start, compression, countrate_correction_count_cutoff, +# detector_translation, element, frame_count_time, frame_period, kappa_increment, +# kappa_start, nimages, ntrigger, number_of_excluded_pixels, omega_increment, +# omega_start, phi_increment, phi_start, photon_energy, roi_mode, trigger_mode, +# two_theta_increment, two_theta_start, virtual_pixel_correction_applied, wavelength + + +# a key challenge with nionswift project file metadata is that swift just repeats +# all available information in each serialized resources, e.g. a project with two +# assets (datasets, images, spectra) e.g. both with detector A will exist with all +# detector specific metadata just dumped without any check with an instance of the same +# concept exists already and thus there is no need to overwrite it unless it was changed +# nion does not distinguish static and dynamic metadata as if during a session at the +# microscope one where to change the window thickness of the detector from one image +# to the next even if that window is mounted physically on the detector and the user +# of the microscope not even allowed to open the microscope and de facto destroy the +# detector, same story for the microscope used, nothing about this in nion metadata +# the lazy approach to this is just repeat what nion is doing, i.e. copy all desired +# metadata over all the time or equally nasty assume how many detector their are +# and write only one and prevent all overwriting of the template afterwards +# this is not a question of naming conventions, taken an SEM and take datasets with +# it in the same session, each dataset a combination of some but at least signals +# from two detectors, when serialized together there is not point in repeating again +# how to check if (static) metadata from two detectors are the same? +# with a serial number easy, reject all metadata for that detector we already know and +# only add missing dat +# without a serial number though, like when parsing content from different microscopy +# tech partners and the joint zoo of their formats, this a challenging task especially +# when one does not have a joint set of concepts on which one could first normalize +# the representation and then compare if two sets are exactly the same in which case +# the repetitive writing of detector data could be avoided and for the sake of +# saving disk space just a reference added, currently there is no parser plugin that +# deals with this complexity +NION_STATIC_DETECTOR_NX: Dict[str, Any] = { + "prefix_trg": "/ENTRY[entry*]/measurement/em_lab/detectorID[detector*]", + "prefix_src": "metadata/hardware_source/detector_configuration/", + "map": [ + ("FABRICATION[fabrication]/model", "description"), + ( + "FABRICATION[fabrication]/vendor", + "detector_number", + ), # not documented in nion metadata by default + ("FABRICATION[fabrication]/identifier", "detector_number"), + "eiger_fw_version", + "sensor_material", + "software_version", + ], + "map_to_u4": [ + ("x_pixel", "x_pixels_in_detector"), + ("y_pixel", "y_pixels_in_detector"), + ], + "map_to_f8": [ + ("x_pixel_size", ureg.meter, "x_pixel_size", ureg.meter), + ("y_pixel_size", ureg.meter, "y_pixel_size", ureg.meter), + ("sensor_thickness", ureg.meter, "sensor_thickness", ureg.meter), + ], +} + +NION_STATIC_LENS_NX: Dict[str, Any] = { + "prefix_trg": "/ENTRY[entry*]/measurement/em_lab/ebeam_column", + "prefix_src": [ + "metadata/hardware_source/ImageRonchigram/", + "metadata/hardware_source/autostem/ImageRonchigram/", + "metadata/hardware_source/autostem/ImageScanned/", + "metadata/instrument/ImageRonchigram/", + "metadata/instrument/ImageScanned/", + "metadata/instrument/autostem/ImageRonchigram/", + "metadata/scan/scan_device_properties/ImageScanned:", + "metadata/scan_detector/autostem/ImageScanned/", + ], + "use": [ + ( + "operation_mode", + "Currently, nionswift stores the operation mode relevant settings via multiple metadata keywords and none of them in my opinion fit quite with this concept. The community should decide how to solve this.", + ), + ("lensID[lens1]/name", "C1"), + ("lensID[lens2]/name", "C2"), + ("lensID[lens3]/name", "C3"), + ("lensID[lens4]/name", "MajorOL"), + ], +} diff --git a/src/pynxtools_em/configurations/oasis_cfg.py b/src/pynxtools_em/configurations/oasis_cfg.py index 8f33e75..ad914a3 100644 --- a/src/pynxtools_em/configurations/oasis_cfg.py +++ b/src/pynxtools_em/configurations/oasis_cfg.py @@ -17,13 +17,14 @@ # """Dict mapping values for a specifically configured NOMAD Oasis.""" -# see specific comments about the design that should be moved to mkdocs +from typing import Any, Dict # import datetime as dt # f"{dt.datetime.now(dt.timezone.utc).isoformat().replace('+00:00', 'Z')}", -EM_CSYS_TO_NEXUS = { +OASISCFG_EM_CSYS_TO_NEXUS: Dict[str, Any] = { "prefix_trg": "/ENTRY[entry*]/coordinate_system_set/COORDINATE_SYSTEM[coordinate_system*]", + "prefix_src": "", "map": [ "alias", "type", @@ -39,7 +40,8 @@ } -EM_CITATION_TO_NEXUS = { +OASISCFG_EM_CITATION_TO_NEXUS: Dict[str, Any] = { "prefix_trg": "/ENTRY[entry*]/CITE[cite*]", - "map_to_str": [("authors"), ("doi"), ("description"), ("url")], + "prefix_src": "", + "map": ["authors", "doi", "description", "url"], } diff --git a/src/pynxtools_em/configurations/rsciio_gatan_cfg.py b/src/pynxtools_em/configurations/rsciio_gatan_cfg.py new file mode 100644 index 0000000..d1c20cc --- /dev/null +++ b/src/pynxtools_em/configurations/rsciio_gatan_cfg.py @@ -0,0 +1,110 @@ +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Dict mapping Gatan DigitalMicrograph custom schema instances on concepts in NXem.""" + +from typing import Any, Dict + +from pynxtools_em.utils.pint_custom_unit_registry import ureg + +# be careful compared to Nion and other tech partners data for may have reversed order! +# TODO:: confirming that this implementation is correct demands examples with dissimilar sized +# rectangular, cubodial, and hypercuboidal stacks! +GATAN_WHICH_SPECTRUM = { + "eV": ("spectrum_0d", ["axis_energy"]), + "eV_m": ("spectrum_1d", ["axis_energy", "axis_i"]), + "eV_m_m": ("spectrum_2d", ["axis_energy", "axis_i", "axis_j"]), +} +GATAN_WHICH_IMAGE = { + "m": ("image_1d", ["axis_i"]), + "1/m": ("image_1d", ["axis_i"]), + "m_m": ("image_2d", ["axis_i", "axis_j"]), + "1/m_1/m": ("image_2d", ["axis_i", "axis_j"]), +} + + +GATAN_DYNAMIC_VARIOUS_NX: Dict[str, Any] = { + "prefix_trg": "/ENTRY[entry*]/measurement/event_data_em_set/EVENT_DATA_EM[event_data_em*]/em_lab", + "prefix_src": "ImageList/TagGroup0/ImageTags/Microscope Info/", + "map_to_f8": [ + ( + "ebeam_column/electron_source/voltage", + ureg.volt, + "Voltage", + ureg.volt, + ), # volt? + ( + "ebeam_column/electron_source/emission_current", + ureg.ampere, + "Emission Current (µA)", + ureg.microampere, + ), + # Formatted Voltage, HT Extrapolated + ( + "ebeam_column/BEAM[beam]/diameter", + ureg.meter, + "Probe Size (nm)", + ureg.nanometer, + ), # diameter? radius ? + ( + "OPTICAL_SETUP_EM[optical_setup]/probe_current", + ureg.ampere, + "Probe Current (nA)", + ureg.nanoampere, + ), + ( + "OPTICAL_SETUP_EM[optical_setup]/field_of_view", + ureg.meter, + "Field of View (µm)", + ureg.micrometer, + ), + ("OPTICAL_SETUP_EM[optical_setup]/magnification", "Actual Magnification"), + ( + "OPTICAL_SETUP_EM[optical_setup]/camera_length", + ureg.meter, + "STEM Camera Length", + ureg.meter, + ), # meter? + # Cs(mm), Indicated Magnification, Magnification Interpolated, Formatted Actual Mag, Formatted Indicated Mag + ], + "map": [ + ("OPTICAL_SETUP_EM[optical_setup]/illumination_mode", "Illumination Mode"), + ( + "OPTICAL_SETUP_EM[optical_setup]/illumination_submode", + "Illumination Sub-mode", + ), + ("OPTICAL_SETUP_EM[optical_setup]/imaging_mode", "Imaging Mode"), + ("OPTICAL_SETUP_EM[optical_setup]/name", "Name"), + ("OPTICAL_SETUP_EM[optical_setup]/operation_mode", "Operation Mode"), + ("OPTICAL_SETUP_EM[optical_setup]/operation_mode_type", "Operation Mode Type"), + ], +} + +GATAN_DYNAMIC_STAGE_NX: Dict[str, Any] = { + "prefix_trg": "/ENTRY[entry*]/measurement/event_data_em_set/EVENT_DATA_EM[event_data_em*]/em_lab/STAGE_LAB[stage]", + "prefix_src": "ImageList/TagGroup0/ImageTags/Microscope Info/Stage Position/", + "map_to_f8": [ + ("tilt1", ureg.radian, "Stage Alpha", ureg.radian), + ("tilt2", ureg.radian, "Stage Beta", ureg.radian), + ( + "position", + ureg.meter, + ["Stage X", "Stage Y", "Stage Z"], + ureg.meter, # really meter? + ), + ], +} diff --git a/src/pynxtools_em/configurations/rsciio_velox_cfg.py b/src/pynxtools_em/configurations/rsciio_velox_cfg.py index 5bb2b9d..4b59b00 100644 --- a/src/pynxtools_em/configurations/rsciio_velox_cfg.py +++ b/src/pynxtools_em/configurations/rsciio_velox_cfg.py @@ -37,8 +37,26 @@ MetadataSchema-version and NeXus NXem-schema-version-dependent for the lenses """ -VELOX_ENTRY_TO_NX_EM = { +from typing import Any, Dict + +from pynxtools_em.utils.pint_custom_unit_registry import ureg + +VELOX_WHICH_SPECTRUM = { + "eV": ("spectrum_0d", ["axis_energy"]), + "m_eV": ("spectrum_1d", ["axis_i", "axis_energy"]), + "m_m_eV": ("spectrum_2d", ["axis_j", "axis_i", "axis_energy"]), +} +VELOX_WHICH_IMAGE = { + "m": ("image_1d", ["axis_i"]), + "1/m": ("image_1d", ["axis_i"]), + "m_m": ("image_2d", ["axis_j", "axis_i"]), + "1/m_1/m": ("image_2d", ["axis_j", "axis_i"]), +} + + +VELOX_STATIC_ENTRY_NX: Dict[str, Any] = { "prefix_trg": "/ENTRY[entry*]/measurement/em_lab/control_program", + "prefix_src": "", "use": [ ( "program", @@ -49,64 +67,59 @@ } -VELOX_EBEAM_STATIC_TO_NX_EM = { - "prefix_trg": "/ENTRY[entry*]/measurement/em_lab/EBEAM_COLUMN[ebeam_column]/electron_source", +VELOX_STATIC_EBEAM_NX: Dict[str, Any] = { + "prefix_trg": "/ENTRY[entry*]/measurement/em_lab/ebeam_column/electron_source", + "prefix_src": "", "use": [("probe", "electron")], "map": [("emitter_type", "Acquisition/SourceType")], } -VELOX_FABRICATION_TO_NX_EM = { - "prefix_trg": "/ENTRY[entry*]/measurement/em_lab/FABRICATION[fabrication]", +VELOX_STATIC_FABRICATION_NX: Dict[str, Any] = { + "prefix_trg": "/ENTRY[entry*]/measurement/em_lab/fabrication", + "prefix_src": "", "map": [ ("identifier", "Instrument/InstrumentId"), ("model", "Instrument/InstrumentModel"), ("vendor", "Instrument/Manufacturer"), - ], - "join_str": [ - ("model", ["Instrument/InstrumentClass", "Instrument/InstrumentModel"]) + # ("model", ["Instrument/InstrumentClass", "Instrument/InstrumentModel"]), ], } -VELOX_SCAN_TO_NX_EM = { - "prefix_trg": "/ENTRY[entry*]/measurement/event_data_em_set/EVENT_DATA_EM[event_data_em*]/em_lab/SCANBOX_EM[scanbox_em]", - "use": [("dwell_time/@units", "s")], - "map_to_real": [("dwell_time", "Scan/DwellTime")], +VELOX_DYNAMIC_SCAN_NX: Dict[str, Any] = { + "prefix_trg": "/ENTRY[entry*]/measurement/event_data_em_set/EVENT_DATA_EM[event_data_em*]/em_lab/scan_controller", + "prefix_src": "", + "map_to_f8": [("dwell_time", ureg.second, "Scan/DwellTime", ureg.second)], } -VELOX_OPTICS_TO_NX_EM = { +VELOX_DYNAMIC_OPTICS_NX: Dict[str, Any] = { "prefix_trg": "/ENTRY[entry*]/measurement/event_data_em_set/EVENT_DATA_EM[event_data_em*]/em_lab/OPTICAL_SYSTEM_EM[optical_system_em]", - "use": [ - ("camera_length/@units", "m"), - ("defocus/@units", "m"), - ("semi_convergence_angle/@units", "rad"), - ], - "map_to_real": [ + "prefix_src": "", + "map_to_f8": [ ("magnification", "Optics/NominalMagnification"), - ("camera_length", "Optics/CameraLength"), - ("defocus", "Optics/Defocus"), - ], - "map_to_real_and_multiply": [ - ("semi_convergence_angle", "Optics/BeamConvergence", 1.0), + ("camera_length", ureg.meter, "Optics/CameraLength", ureg.meter), + ("defocus", ureg.meter, "Optics/Defocus", ureg.meter), + ("semi_convergence_angle", ureg.radian, "Optics/BeamConvergence", ureg.radian), ], } # assume BeamConvergence is the semi_convergence_angle, needs clarification from vendors and colleagues -VELOX_STAGE_TO_NX_EM = { +VELOX_DYNAMIC_STAGE_NX: Dict[str, Any] = { "prefix_trg": "/ENTRY[entry*]/measurement/event_data_em_set/EVENT_DATA_EM[event_data_em*]/em_lab/STAGE_LAB[stage_lab]", - "use": [ - ("tilt1/@units", "rad"), - ("tilt2/@units", "rad"), - ("position/@units", "m"), - ], - "map_to_str": [("design", "Stage/HolderType")], - "map_to_real": [ - ("tilt1", "Stage/AlphaTilt"), - ("tilt2", "Stage/BetaTilt"), - ("position", ["Stage/Position/x", "Stage/Position/y", "Stage/Position/z"]), + "prefix_src": "", + "map": [("design", "Stage/HolderType")], + "map_to_f8": [ + ("tilt1", ureg.radian, "Stage/AlphaTilt", ureg.radian), + ("tilt2", ureg.radian, "Stage/BetaTilt", ureg.radian), + ( + "position", + ureg.meter, + ["Stage/Position/x", "Stage/Position/y", "Stage/Position/z"], + ureg.meter, + ), ], } # we do not know whether the angle is radiant or degree, in all examples @@ -116,21 +129,20 @@ # is not a proper unit for an instance of NX_VOLTAGE -VELOX_DYNAMIC_TO_NX_EM = { +VELOX_DYNAMIC_VARIOUS_NX: Dict[str, Any] = { "prefix_trg": "/ENTRY[entry*]/measurement/event_data_em_set/EVENT_DATA_EM[event_data_em*]", + "prefix_src": "", "unix_to_iso8601": [ ("start_time", "Acquisition/AcquisitionStartDatetime/DateTime") ], } -VELOX_EBEAM_DYNAMIC_TO_NX_EM = { - "prefix_trg": "/ENTRY[entry*]/measurement/event_data_em_set/EVENT_DATA_EM[event_data_em*]/em_lab/EBEAM_COLUMN[ebeam_column]", - "use": [ - ("electron_source/voltage/@units", "V"), - ], - "concatenate": [ - ("operation_mode", ["Optics/OperatingMode", "Optics/TemOperatingSubMode"]) +VELOX_DYNAMIC_EBEAM_NX: Dict[str, Any] = { + "prefix_trg": "/ENTRY[entry*]/measurement/event_data_em_set/EVENT_DATA_EM[event_data_em*]/em_lab/ebeam_column", + "prefix_src": "", + "map": [("operation_mode", ["Optics/OperatingMode", "Optics/TemOperatingSubMode"])], + "map_to_f8": [ + ("electron_source/voltage", ureg.volt, "Optics/AccelerationVoltage", ureg.volt) ], - "map_to_real": [("electron_source/voltage", "Optics/AccelerationVoltage")], } diff --git a/src/pynxtools_em/parsers/convention_reader.py b/src/pynxtools_em/parsers/conventions_reader.py similarity index 74% rename from src/pynxtools_em/parsers/convention_reader.py rename to src/pynxtools_em/parsers/conventions_reader.py index 07b48b8..240662d 100644 --- a/src/pynxtools_em/parsers/convention_reader.py +++ b/src/pynxtools_em/parsers/conventions_reader.py @@ -21,24 +21,16 @@ import flatdict as fd import yaml -from pynxtools_em.concepts.mapping_functors import ( - add_specific_metadata, - variadic_path_to_specific_path, -) +from pynxtools_em.concepts.mapping_functors_pint import add_specific_metadata_pint from pynxtools_em.configurations.conventions_cfg import ( - DETECTOR_CSYS_TO_NEXUS, - GNOMONIC_CSYS_TO_NEXUS, - PATTERN_CSYS_TO_NEXUS, - PROCESSING_CSYS_TO_NEXUS, - ROTATIONS_TO_NEXUS, - SAMPLE_CSYS_TO_NEXUS, -) -from pynxtools_em.geometries.euler_angle_convention import euler_convention -from pynxtools_em.geometries.handed_cartesian import ( - AXIS_DIRECTIONS, - REFERENCE_FRAMES, - is_cartesian_cs_well_defined, + CONV_DETECTOR_CSYS_TO_NEXUS, + CONV_GNOMONIC_CSYS_TO_NEXUS, + CONV_PATTERN_CSYS_TO_NEXUS, + CONV_PROCESSING_CSYS_TO_NEXUS, + CONV_ROTATIONS_TO_NEXUS, + CONV_SAMPLE_CSYS_TO_NEXUS, ) +from pynxtools_em.geometries.handed_cartesian import is_cartesian_cs_well_defined from pynxtools_em.geometries.msmse_convention import is_consistent_with_msmse_convention @@ -52,28 +44,31 @@ def __init__(self, file_path: str, entry_id: int = 1, verbose: bool = False): pathlib.Path(file_path).name.endswith("conventions.yaml") or pathlib.Path(file_path).name.endswith("conventions.yml") ) and entry_id > 0: - self.entry_id = entry_id self.file_path = file_path with open(self.file_path, "r", encoding="utf-8") as stream: - self.yml = fd.FlatDict(yaml.safe_load(stream), delimiter="/") + self.flat_metadata = fd.FlatDict(yaml.safe_load(stream), delimiter="/") if verbose: - for key, val in self.yml.items(): + for key, val in self.flat_metadata.items(): print(f"key: {key}, value: {val}") + self.entry_id = entry_id else: - self.entry_id = 1 self.file_path = "" - self.yml = {} + self.entry_id = 1 + self.flat_metadata = fd.FlatDict({}, "/") def parse(self, template) -> dict: """Extract metadata from generic ELN text file to respective NeXus objects.""" print("Parsing conventions...") identifier = [self.entry_id, 1] - add_specific_metadata(ROTATIONS_TO_NEXUS, self.yml, identifier, template) - add_specific_metadata(PROCESSING_CSYS_TO_NEXUS, self.yml, identifier, template) - add_specific_metadata(SAMPLE_CSYS_TO_NEXUS, self.yml, identifier, template) - add_specific_metadata(DETECTOR_CSYS_TO_NEXUS, self.yml, identifier, template) - add_specific_metadata(GNOMONIC_CSYS_TO_NEXUS, self.yml, identifier, template) - add_specific_metadata(PATTERN_CSYS_TO_NEXUS, self.yml, identifier, template) + for cfg in [ + CONV_ROTATIONS_TO_NEXUS, + CONV_PROCESSING_CSYS_TO_NEXUS, + CONV_SAMPLE_CSYS_TO_NEXUS, + CONV_DETECTOR_CSYS_TO_NEXUS, + CONV_GNOMONIC_CSYS_TO_NEXUS, + CONV_PATTERN_CSYS_TO_NEXUS, + ]: + add_specific_metadata_pint(cfg, self.flat_metadata, identifier, template) # check is used convention follows EBSD community suggestions by Rowenhorst et al. prfx = f"/ENTRY[entry{self.entry_id}]/coordinate_system_set" diff --git a/src/pynxtools_em/parsers/hfive_apex.py b/src/pynxtools_em/parsers/hfive_apex.py index 46cbddb..8ca4f69 100644 --- a/src/pynxtools_em/parsers/hfive_apex.py +++ b/src/pynxtools_em/parsers/hfive_apex.py @@ -444,28 +444,28 @@ def parse_and_normalize_eds_fov(self, fp): self.tmp[ckey] = NxImageRealSpaceSet() self.tmp[ckey].tmp["source"] = f"{src}/FOVIMAGE" nyx = { - "y": fp[f"{src}/FOVIMAGE"].attrs["PixelHeight"][0], - "x": fp[f"{src}/FOVIMAGE"].attrs["PixelWidth"][0], + "j": fp[f"{src}/FOVIMAGE"].attrs["PixelHeight"][0], + "i": fp[f"{src}/FOVIMAGE"].attrs["PixelWidth"][0], } syx = { - "x": fp[f"{src}/FOVIPR"]["MicronsPerPixelX"][0], - "y": fp[f"{src}/FOVIPR"]["MicronsPerPixelY"][0], + "j": fp[f"{src}/FOVIPR"]["MicronsPerPixelY"][0], + "i": fp[f"{src}/FOVIPR"]["MicronsPerPixelX"][0], } - scan_unit = {"x": "µm", "y": "µm"} + scan_unit = {"i": "µm", "j": "µm"} # is micron because MicronsPerPixel{dim} used by EDAX - self.tmp[ckey].tmp["image_twod/intensity"].value = np.reshape( - np.asarray(fp[f"{src}/FOVIMAGE"]), (nyx["y"], nyx["x"]) + self.tmp[ckey].tmp["image_2d/real"].value = np.reshape( + np.asarray(fp[f"{src}/FOVIMAGE"]), (nyx["j"], nyx["i"]) ) - dims = ["y", "x"] + dims = ["j", "i"] for dim in dims: - self.tmp[ckey].tmp[f"image_twod/axis_{dim}"].value = np.asarray( + self.tmp[ckey].tmp[f"image_2d/axis_{dim}"].value = np.asarray( 0.0 + np.linspace(0.0, nyx[dim] - 1, num=nyx[dim], endpoint=True) * syx[dim], - syx["x"].dtype, + syx[dim].dtype, ) self.tmp[ckey].tmp[ - f"image_twod/axis_{dim}@long_name" + f"image_2d/axis_{dim}@long_name" ].value = f"Position along {dim} ({scan_unit[dim]})" self.cache_id += 1 @@ -499,7 +499,7 @@ def parse_and_normalize_eds_spc(self, fp): e_zero = fp[f"{src}/SPC"]["eVOffset"][0] e_delta = fp[f"{src}/SPC"]["evPch"][0] e_n = fp[f"{src}/SPC"]["NumberOfPoints"][0] - self.tmp[ckey].tmp["spectrum_zerod/axis_energy"].value = ( + self.tmp[ckey].tmp["spectrum_0d/axis_energy"].value = ( e_zero + np.asarray( e_delta * np.linspace(0.0, int(e_n) - 1, num=int(e_n), endpoint=True), @@ -507,13 +507,11 @@ def parse_and_normalize_eds_spc(self, fp): ) / 1000.0 ) # keV - self.tmp[ckey].tmp[ - "spectrum_zerod/axis_energy@long_name" - ].value = "Energy (keV)" - self.tmp[ckey].tmp["spectrum_zerod/intensity"].value = np.asarray( + self.tmp[ckey].tmp["spectrum_0d/axis_energy@long_name"].value = "Energy (keV)" + self.tmp[ckey].tmp["spectrum_0d/real"].value = np.asarray( fp[f"{src}/SPC"]["SpectrumCounts"][0], np.int32 ) - self.tmp[ckey].tmp["spectrum_zerod/intensity@long_name"].value = f"Count (1)" + self.tmp[ckey].tmp["spectrum_0d/real@long_name"].value = f"Count (1)" self.cache_id += 1 for key, obj in self.tmp[ckey].tmp.items(): @@ -657,7 +655,7 @@ def parse_and_normalize_eds_area_rois(self, fp): ckey = self.init_named_cache(f"eds_map{self.cache_id}") self.tmp[ckey] = NxEmEdsIndexing() self.tmp[ckey].tmp["source"] = f"{src}/ROIs" - self.tmp[ckey].tmp["IMAGE_R_SET"] = [] + self.tmp[ckey].tmp["IMAGE_SET"] = [] e_zero = fp[f"{src}/SPC"]["eVOffset"][0] e_delta = fp[f"{src}/SPC"]["evPch"][0] @@ -667,13 +665,13 @@ def parse_and_normalize_eds_area_rois(self, fp): e_zero.dtype, ) # eV, as xraydb demands nxy = { - "x": fp[f"{src}/ELEMENTOVRLAYIMGCOLLECTIONPARAMS"][0]["ResolutionX"], - "y": fp[f"{src}/ELEMENTOVRLAYIMGCOLLECTIONPARAMS"][0]["ResolutionY"], - "lx": fp[f"{src}/ELEMENTOVRLAYIMGCOLLECTIONPARAMS"][0]["mmFieldWidth"], - "ly": fp[f"{src}/ELEMENTOVRLAYIMGCOLLECTIONPARAMS"][0]["mmFieldHeight"], + "i": fp[f"{src}/ELEMENTOVRLAYIMGCOLLECTIONPARAMS"][0]["ResolutionX"], + "j": fp[f"{src}/ELEMENTOVRLAYIMGCOLLECTIONPARAMS"][0]["ResolutionY"], + "li": fp[f"{src}/ELEMENTOVRLAYIMGCOLLECTIONPARAMS"][0]["mmFieldWidth"], + "lj": fp[f"{src}/ELEMENTOVRLAYIMGCOLLECTIONPARAMS"][0]["mmFieldHeight"], } - sxy = {"x": nxy["lx"] / nxy["x"], "y": nxy["ly"] / nxy["y"]} - scan_unit = {"x": "µm", "y": "µm"} + sxy = {"i": nxy["li"] / nxy["i"], "j": nxy["lj"] / nxy["j"]} + scan_unit = {"i": "µm", "j": "µm"} for entry in uniq: eds_map = NxImageRealSpaceSet() eds_map.tmp["source"] = f"{src}/ROIs/{entry}" @@ -690,24 +688,24 @@ def parse_and_normalize_eds_area_rois(self, fp): eds_map.tmp["iupac_line_candidates"] = ", ".join( get_xrayline_candidates(e_channels[e_roi_s], e_channels[e_roi_e + 1]) ) - for dim in ["x", "y"]: - eds_map.tmp[f"image_twod/axis_{dim}"].value = np.asarray( + for dim in ["i", "j"]: + eds_map.tmp[f"image_2d/axis_{dim}"].value = np.asarray( 0.0 + sxy[dim] * np.linspace(0.0, nxy[dim] - 1, num=int(nxy[dim]), endpoint=True), np.float32, ) eds_map.tmp[ - f"image_twod/axis_{dim}@long_name" + f"image_2d/axis_{dim}@long_name" ].value = f"{dim}-axis pixel coordinate ({scan_unit[dim]})" - eds_map.tmp["image_twod/intensity"].value = np.asarray( + eds_map.tmp["image_2d/real"].value = np.asarray( fp[f"{src}/ROIs/{entry}.dat"] ) - self.tmp[ckey].tmp["IMAGE_R_SET"].append(eds_map) # copy + self.tmp[ckey].tmp["IMAGE_SET"].append(eds_map) # copy self.cache_id += 1 for key, val in self.tmp[ckey].tmp.items(): - if key.startswith("IMAGE_R_SET"): + if key.startswith("IMAGE_SET"): for img in val: for kkey, vval in img.tmp.items(): print(f"\t\timg, key: {kkey}, val: {vval}") @@ -765,11 +763,11 @@ def parse_and_normalize_eds_line_lsd(self, fp): e_zero = 0.0 # strong assumption based on VInP_108_L2 example from IKZ e_delta = fp[f"{src}/SPC"].attrs["eVPCh"][0] e_n = fp[f"{src}/LSD"].attrs["NumberofChannels"][0] - self.tmp[ckey].tmp["spectrum_oned/axis_energy"].value = e_zero + np.asarray( + self.tmp[ckey].tmp["spectrum_1d/axis_energy"].value = e_zero + np.asarray( e_delta * np.linspace(0.0, int(e_n) - 1, num=int(e_n), endpoint=True), e_zero.dtype, ) - self.tmp[ckey].tmp["spectrum_oned/axis_energy@long_name"].value = "Energy (eV)" + self.tmp[ckey].tmp["spectrum_1d/axis_energy@long_name"].value = "Energy (eV)" # vector representation of the line's physical length from mm to µm line = np.asarray( @@ -791,21 +789,21 @@ def parse_and_normalize_eds_line_lsd(self, fp): i_n = fp[f"{src}/LSD"].attrs["NumberOfSpectra"][0] line_length = np.sqrt(line[0] ** 2 + line[1] ** 2) line_incr = line_length / i_n - self.tmp[ckey].tmp["spectrum_oned/axis_x"].value = np.asarray( + self.tmp[ckey].tmp["spectrum_1d/axis_i"].value = np.asarray( np.linspace(0.5 * line_incr, line_length, num=i_n, endpoint=True), fp[f"{src}/REGION"].attrs["X2"][0].dtype, ) - self.tmp[ckey].tmp["spectrum_oned/axis_x@long_name"] = ( + self.tmp[ckey].tmp["spectrum_1d/axis_i@long_name"] = ( "Coordinate along x-axis (µm)" ) - self.tmp[ckey].tmp["spectrum_oned/intensity"].value = np.asarray( + self.tmp[ckey].tmp["spectrum_1d/real"].value = np.asarray( fp[f"{src}/LSD"][0], np.int32 ) - self.tmp[ckey].tmp["spectrum_oned/intensity@long_name"].value = f"Count (1)" + self.tmp[ckey].tmp["spectrum_1d/real@long_name"].value = f"Count (1)" self.cache_id += 1 for key, val in self.tmp[ckey].tmp.items(): - print(f"ckey: {ckey}, spectrum_oned, key: {key}, val: {val}") + print(f"ckey: {ckey}, spectrum_1d, key: {key}, val: {val}") def parse_and_normalize_eds_line_rois(self, fp): """Normalize and scale APEX-specific EDS element emission line maps to NeXus.""" diff --git a/src/pynxtools_em/parsers/image_base.py b/src/pynxtools_em/parsers/image_base.py index bdad908..4d44ee5 100644 --- a/src/pynxtools_em/parsers/image_base.py +++ b/src/pynxtools_em/parsers/image_base.py @@ -17,31 +17,29 @@ # """Parent class for all tech partner-specific image parsers for mapping on NXem.""" -from typing import Dict, List +from typing import Dict -import numpy as np +# TODO::deprecated class ImgsBaseParser: def __init__(self, file_path: str = "", verbose=False): - # self.supported_version = VERSION_MANAGEMENT - # self.version = VERSION_MANAGEMENT # tech_partner the company which designed this format # schema_name the specific name of the family of schemas supported by this reader # schema_version the specific version(s) supported by this reader # writer_name the specific name of the tech_partner's (typically proprietary) software - self.prfx = None - self.tmp: Dict = {} - self.verbose = verbose if file_path is not None and file_path != "": self.file_path = file_path else: raise ValueError(f"{__name__} needs proper instantiation !") + self.tmp: Dict = {} + self.verbose = verbose + self.file_path_sha256 = None def init_named_cache(self, ckey: str): """Init a new cache for normalized image data if not existent.""" # purpose of the cache is to hold normalized information - if ckey not in self.tmp.keys(): + if ckey not in self.tmp: self.tmp[ckey] = {} return ckey else: diff --git a/src/pynxtools_em/parsers/image_png_protochips.py b/src/pynxtools_em/parsers/image_png_protochips.py index 522f179..8283608 100644 --- a/src/pynxtools_em/parsers/image_png_protochips.py +++ b/src/pynxtools_em/parsers/image_png_protochips.py @@ -15,7 +15,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # -"""Subparser for exemplar reading of raw PNG files collected on a TEM with Protochip heating_chip.""" +"""Parser for exemplar reading of raw PNG files collected on a TEM with Protochip heating_chip.""" import datetime import mmap @@ -27,17 +27,17 @@ import numpy as np import xmltodict from PIL import Image -from pynxtools_em.concepts.mapping_functors import ( - add_specific_metadata, - variadic_path_to_specific_path, +from pynxtools_em.concepts.mapping_functors_pint import ( + add_specific_metadata_pint, + var_path_to_spcfc_path, ) from pynxtools_em.configurations.image_png_protochips_cfg import ( - AXON_AUX_DYNAMIC_TO_NX_EM, - AXON_CHIP_DYNAMIC_TO_NX_EM, - AXON_DETECTOR_STATIC_TO_NX_EM, - AXON_STAGE_DYNAMIC_TO_NX_EM, - AXON_STAGE_STATIC_TO_NX_EM, - AXON_VARIOUS_DYNAMIC_TO_NX_EM, + AXON_DYNAMIC_AUX_NX, + AXON_DYNAMIC_CHIP_NX, + AXON_DYNAMIC_STAGE_NX, + AXON_DYNAMIC_VARIOUS_NX, + AXON_STATIC_DETECTOR_NX, + AXON_STATIC_STAGE_NX, specific_to_variadic, ) from pynxtools_em.parsers.image_base import ImgsBaseParser @@ -45,7 +45,9 @@ DEFAULT_CHECKSUM_ALGORITHM, get_sha256_of_file_content, ) +from pynxtools_em.utils.pint_custom_unit_registry import ureg from pynxtools_em.utils.sorting import sort_ascendingly_by_second_argument_iso8601 +from pynxtools_em.utils.string_conversions import string_to_number from pynxtools_em.utils.xml_utils import flatten_xml_to_dict @@ -54,9 +56,7 @@ def __init__(self, file_path: str = "", entry_id: int = 1, verbose: bool = False super().__init__(file_path) self.entry_id = entry_id self.event_id = 1 - self.prfx = None - self.tmp: Dict = {"data": None, "meta": {}} - self.supported_version: Dict = {} + self.dict_meta: Dict[str, fd.FlatDict] = {} self.version: Dict = {} self.png_info: Dict = {} self.supported = False @@ -73,6 +73,7 @@ def check_if_zipped_png_protochips(self): # all tests have to be passed before the input self.file_path # can at all be processed with this parser # test 1: check if file is a zipfile + self.supported = False with open(self.file_path, "rb", 0) as file: s = mmap.mmap(file.fileno(), 0, access=mmap.ACCESS_READ) magic = s.read(8) @@ -134,6 +135,7 @@ def check_if_zipped_png_protochips(self): self.supported = True def get_xml_metadata(self, file, fp): + """Parse content from the XML payload that PNGs from AXON Studio have.""" try: fp.seek(0) with Image.open(fp) as png: @@ -157,7 +159,7 @@ def get_xml_metadata(self, file, fp): else: grpnm_lookup[concept] = value # second phase, evaluate each concept instance symbol wrt to its prefix coming from the unique concept - self.tmp["meta"][file] = fd.FlatDict({}) + self.dict_meta[file] = fd.FlatDict({}, "/") for k, v in meta.items(): grpnms = None idxs = re.finditer(r".\[[0-9]+\].", k) @@ -176,8 +178,10 @@ def get_xml_metadata(self, file, fp): key = specific_to_variadic( f"{grpnms[0]}.{grpnms[1]}.{k[k.rfind('.') + 1:]}" ) - if key not in self.tmp["meta"][file]: - self.tmp["meta"][file][key] = v + if key not in self.dict_meta[file]: + self.dict_meta[file][key] = ( + string_to_number(v) + ) else: raise KeyError( "Trying to register a duplicated key {key}" @@ -186,29 +190,32 @@ def get_xml_metadata(self, file, fp): key = specific_to_variadic( f"{grpnms[0]}.{grpnms[1]}" ) - if key not in self.tmp["meta"][file]: - self.tmp["meta"][file][key] = v + if key not in self.dict_meta[file]: + self.dict_meta[file][key] = ( + string_to_number(v) + ) else: print( f"Trying to register duplicated key {key}" ) else: key = f"{k}" - if key not in self.tmp["meta"][file]: - self.tmp["meta"][file][key] = v + if key not in self.dict_meta[file]: + self.dict_meta[file][key] = string_to_number(v) else: print(f"Trying to register duplicated key {key}") - # TODO::simplify and check that metadata end up correctly in self.tmp["meta"][file] - # for key, value in self.tmp["meta"][file].items(): - # print(f"{type(key)}: {key}\t\t{type(value)}:{value}") + # TODO::simplify and check that metadata end up correctly in self.dict_meta[file] + if self.verbose: + for key, value in self.dict_meta[file].items(): + print(f"{key}____{type(value)}____{type(value)}") except ValueError: print(f"Flattening XML metadata content {self.file_path}:{file} failed !") def get_file_hash(self, file, fp): - self.tmp["meta"][file]["sha256"] = get_sha256_of_file_content(fp) + self.dict_meta[file]["sha256"] = get_sha256_of_file_content(fp) - def parse_and_normalize(self): - """Perform actual parsing filling cache self.tmp.""" + def parse(self, template: dict) -> dict: + """Perform actual parsing filling cache.""" if self.supported is True: print(f"Parsing via Protochips-specific metadata...") # may need to set self.supported = False on error @@ -218,28 +225,26 @@ def parse_and_normalize(self): self.get_xml_metadata(file, fp) self.get_file_hash(file, fp) # print(f"Debugging self.tmp.file.items {file}") - # for k, v in self.tmp["meta"][file].items(): + # for k, v in self.dict_meta[file].items(): # if k == "MicroscopeControlImageMetadata.MicroscopeDateTime": # print(f"{k}: {v}") print( f"{self.file_path} metadata within PNG collection processed " - f"successfully ({len(self.tmp['meta'].keys())} PNGs evaluated)." + f"successfully ({len(self.dict_meta)} PNGs evaluated)." ) + self.process_event_data_em_metadata(template) + self.process_event_data_em_data(template) else: print( f"{self.file_path} is not a Protochips-specific " f"PNG file that this parser can process !" ) - - def process_into_template(self, template: dict) -> dict: - if self.supported is True: - self.process_event_data_em_metadata(template) - self.process_event_data_em_data(template) return template def sort_event_data_em(self) -> List: + """Sort event data by datetime.""" events: List = [] - for file_name, mdata in self.tmp["meta"].items(): + for file_name, mdata in self.dict_meta.items(): key = f"MicroscopeControlImageMetadata.MicroscopeDateTime" if isinstance(mdata, fd.FlatDict): if key in mdata: @@ -265,66 +270,6 @@ def sort_event_data_em(self) -> List: ) return events_sorted - def add_detector_static_metadata(self, file_name: str, template: dict) -> dict: - identifier = [self.entry_id, self.event_id, 1] - add_specific_metadata( - AXON_DETECTOR_STATIC_TO_NX_EM, - self.tmp["meta"][file_name], - identifier, - template, - ) - return template - - def add_stage_static_metadata(self, file_name: str, template: dict) -> dict: - identifier = [self.entry_id, self.event_id, 1] - add_specific_metadata( - AXON_STAGE_STATIC_TO_NX_EM, - self.tmp["meta"][file_name], - identifier, - template, - ) - return template - - def add_stage_dynamic_metadata(self, file_name: str, template: dict) -> dict: - identifier = [self.entry_id, self.event_id, 1] - add_specific_metadata( - AXON_STAGE_DYNAMIC_TO_NX_EM, - self.tmp["meta"][file_name], - identifier, - template, - ) - return template - - def add_chip_dynamic_metadata(self, file_name: str, template: dict) -> dict: - identifier = [self.entry_id, self.event_id, 1] - add_specific_metadata( - AXON_CHIP_DYNAMIC_TO_NX_EM, - self.tmp["meta"][file_name], - identifier, - template, - ) - return template - - def add_aux_dynamic_metadata(self, file_name: str, template: dict) -> dict: - identifier = [self.entry_id, self.event_id, 1] - add_specific_metadata( - AXON_AUX_DYNAMIC_TO_NX_EM, - self.tmp["meta"][file_name], - identifier, - template, - ) - return template - - def add_various_dynamic_metadata(self, file_name: str, template: dict) -> dict: - identifier = [self.entry_id, self.event_id, 1] - add_specific_metadata( - AXON_VARIOUS_DYNAMIC_TO_NX_EM, - self.tmp["meta"][file_name], - identifier, - template, - ) - return template - def process_event_data_em_metadata(self, template: dict) -> dict: """Add respective metadata.""" # contextualization to understand how the image relates to the EM session @@ -335,24 +280,50 @@ def process_event_data_em_metadata(self, template: dict) -> dict: # surplus eventually AXON-specific identifier it seems useful though to sort these # PNGs based on time stamped information directly from the AXON metadata # here we sort ascendingly in time the events and associate new event ids + # static instrument data + self.event_sequence = self.sort_event_data_em() event_id = self.event_id + toggle = True for file_name, iso8601 in self.event_sequence: identifier = [self.entry_id, event_id, 1] - trg = variadic_path_to_specific_path( - f"/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET" - f"[event_data_em_set]/EVENT_DATA_EM[event_data_em*]" - f"/start_time", + trg = var_path_to_spcfc_path( + f"/ENTRY[entry*]/measurement/event_data_em_set/" + f"EVENT_DATA_EM[event_data_em*]/start_time", identifier, ) template[trg] = f"{iso8601}".replace(" ", "T") # AXON reports "yyyy-mm-dd hh-mm-ss*" but NeXus requires yyyy-mm-ddThh-mm-ss*" - self.add_detector_static_metadata(file_name, template) - self.add_stage_static_metadata(file_name, template) - # self.add_stage_dynamic_metadata(file_name, template) # TODO::unit for stage positions unclear - self.add_chip_dynamic_metadata(file_name, template) - self.add_aux_dynamic_metadata(file_name, template) - self.add_various_dynamic_metadata(file_name, template) + + # static + if toggle: + for cfg in [AXON_STATIC_DETECTOR_NX, AXON_STATIC_STAGE_NX]: + add_specific_metadata_pint( + cfg, + self.dict_meta[file_name], + [1, 1], + template, + ) + toggle = False + # dynamic + for cfg in [ + AXON_DYNAMIC_CHIP_NX, + AXON_DYNAMIC_AUX_NX, + AXON_DYNAMIC_VARIOUS_NX, + ]: + add_specific_metadata_pint( + cfg, + self.dict_meta[file_name], + identifier, + template, + ) + # additional dynamic data with currently different formatting + add_specific_metadata_pint( + AXON_DYNAMIC_STAGE_NX, + self.dict_meta[file_name], + identifier, + template, + ) event_id += 1 return template @@ -372,14 +343,14 @@ def process_event_data_em_data(self, template: dict) -> dict: nparr = np.array(png) image_identifier = 1 trg = ( - f"/ENTRY[entry{self.entry_id}]/measurement/EVENT_DATA_EM_SET" - f"[event_data_em_set]/EVENT_DATA_EM[event_data_em{event_id}]" - f"/IMAGE_R_SET[image_r_set{image_identifier}]/image_twod" + f"/ENTRY[entry{self.entry_id}]/measurement/event_data_em_set" + f"/EVENT_DATA_EM[event_data_em{event_id}]" + f"/IMAGE_SET[image_set{image_identifier}]/image_2d" ) # TODO::writer should decorate automatically! template[f"{trg}/title"] = f"Image" - template[f"{trg}/@signal"] = "intensity" - dims = ["x", "y"] + template[f"{trg}/@signal"] = "real" + dims = ["i", "j"] idx = 0 for dim in dims: template[f"{trg}/@AXISNAME_indices[axis_{dim}_indices]"] = ( @@ -389,39 +360,49 @@ def process_event_data_em_data(self, template: dict) -> dict: template[f"{trg}/@axes"] = [] for dim in dims[::-1]: template[f"{trg}/@axes"].append(f"axis_{dim}") - template[f"{trg}/intensity"] = { - "compress": nparr, - "strength": 1, - } + template[f"{trg}/real"] = {"compress": nparr, "strength": 1} # 0 is y while 1 is x for 2d, 0 is z, 1 is y, while 2 is x for 3d - template[f"{trg}/intensity/@long_name"] = f"Signal" + template[f"{trg}/real/@long_name"] = f"Signal" - sxy = {"x": 1.0, "y": 1.0} - scan_unit = {"x": "px", "y": "px"} - # TODO::get AXON image calibration - # "ImagerSettings.ImagePhysicalSize.X" / "ImagerSettings.ImagePixels.X" - # "ImagerSettings.ImagePhysicalSize.Y" / "ImagerSettings.ImagePixels.Y" - nxy = {"x": np.shape(nparr)[1], "y": np.shape(nparr)[0]} + sxy = { + "i": ureg.Quantity(1.0, ureg.meter), + "j": ureg.Quantity(1.0, ureg.meter), + } + abbrev = "MicroscopeControlImageMetadata.ImagerSettings.ImagePhysicalSize" + if ( + f"{abbrev}.X" in self.dict_meta[file_name] + and f"{abbrev}.Y" in self.dict_meta[file_name] + ): + sxy = { + "i": ureg.Quantity( + self.dict_meta[file_name][f"{abbrev}.X"], + ureg.nanometer, + ), + "j": ureg.Quantity( + self.dict_meta[file_name][f"{abbrev}.Y"], + ureg.nanometer, + ), + } + nxy = {"i": np.shape(nparr)[1], "j": np.shape(nparr)[0]} del nparr - # TODO::we assume here a very specific coordinate system - # see image_tiff_tfs.py parser for further details of the limitations - # of this approach + # TODO::we assume here a very specific coordinate system see image_tiff_tfs.py + # parser for further details of the limitations of this approach for dim in dims: template[f"{trg}/AXISNAME[axis_{dim}]"] = { "compress": np.asarray( np.linspace( 0, nxy[dim] - 1, num=nxy[dim], endpoint=True ) - * sxy[dim], + * sxy[dim].magnitude, np.float64, ), "strength": 1, } template[f"{trg}/AXISNAME[axis_{dim}]/@long_name"] = ( - f"Coordinate along {dim}-axis ({scan_unit[dim]})" + f"Coordinate along {dim}-axis ({sxy[dim].units})" ) template[f"{trg}/AXISNAME[axis_{dim}]/@units"] = ( - f"{scan_unit[dim]}" + f"{sxy[dim].units}" ) event_id += 1 return template diff --git a/src/pynxtools_em/parsers/image_tiff.py b/src/pynxtools_em/parsers/image_tiff.py index c8eeabe..f7f5dfb 100644 --- a/src/pynxtools_em/parsers/image_tiff.py +++ b/src/pynxtools_em/parsers/image_tiff.py @@ -30,10 +30,8 @@ class TiffParser(ImgsBaseParser): def __init__(self, file_path: str = ""): super().__init__(file_path) - self.prfx = None self.tmp: Dict = {} - self.supported_version: Dict = {} - self.version: Dict = {} + self.version: Dict = {} # trg target versions supported, src actual self.tags: Dict = {} self.supported = False self.check_if_tiff() @@ -89,11 +87,3 @@ def get_tags(self, verbose: bool = False): self.tags = {TAGS[key]: fp.tag[key] for key in fp.tag_v2} for key, val in self.tags.items(): print(f"{key}, {val}") - - def parse_and_normalize(self): - """Perform actual parsing filling cache self.tmp.""" - if self.supported is True: - print(f"Parsing via TiffParser...") - self.get_tags() - else: - print(f"{self.file_path} is not a TIFF file this parser can process !") diff --git a/src/pynxtools_em/parsers/image_tiff_hitachi.py b/src/pynxtools_em/parsers/image_tiff_hitachi.py new file mode 100644 index 0000000..664fb88 --- /dev/null +++ b/src/pynxtools_em/parsers/image_tiff_hitachi.py @@ -0,0 +1,210 @@ +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Parser for harmonizing Hitachi-specific content in TIFF files.""" + +import mmap +from tokenize import TokenError +from typing import Dict, List + +import flatdict as fd +import numpy as np +from PIL import Image, ImageSequence +from pint import UndefinedUnitError +from pynxtools_em.concepts.mapping_functors_pint import add_specific_metadata_pint +from pynxtools_em.configurations.image_tiff_hitachi_cfg import ( + HITACHI_DYNAMIC_VARIOUS_NX, + HITACHI_STATIC_VARIOUS_NX, +) +from pynxtools_em.parsers.image_tiff import TiffParser +from pynxtools_em.utils.pint_custom_unit_registry import ureg +from pynxtools_em.utils.string_conversions import string_to_number + + +class HitachiTiffParser(TiffParser): + def __init__(self, file_paths: List[str], entry_id: int = 1, verbose=False): + # TODO::instantiate super.__init__ + tif_txt = ["", ""] + if ( + len(file_paths) == 2 + and file_paths[0][0 : file_paths[0].rfind(".")] + == file_paths[1][0 : file_paths[0].rfind(".")] + ): + for entry in file_paths: + if entry.lower().endswith((".tif", ".tiff")): + tif_txt[0] = entry + elif entry.lower().endswith((".txt")): + tif_txt[1] = entry + if all(value != "" for value in tif_txt): + super().__init__(tif_txt[0]) + self.entry_id = entry_id + self.event_id = 1 + self.verbose = verbose + self.txt_file_path = tif_txt[1] + self.flat_dict_meta = fd.FlatDict({}, "/") + self.version: Dict = {} + self.supported = False + self.check_if_tiff_hitachi() + else: + print(f"Parser {self.__class__.__name__} needs TIF and TXT file !") + self.supported = False + + def check_if_tiff_hitachi(self): + """Check if resource behind self.file_path is a TaggedImageFormat file.""" + self.supported = False + if not hasattr(self, "file_path"): + print( + f"... is not a Hitachi-specific TIFF file that this parser can process !" + ) + return + if self.txt_file_path is None: + print( + f"Parser {self.__class__.__name__} does not work without a Hitachi text file with the image metadata !" + f"This file is required to have exactly the same file name as the file with the TIF image data !" + ) + return + with open(self.file_path, "rb", 0) as file: + s = mmap.mmap(file.fileno(), 0, access=mmap.ACCESS_READ) + magic = s.read(4) + if magic != b"II*\x00": # https://en.wikipedia.org/wiki/TIFF + print( + f"Parser {self.__class__.__name__} finds no content in {self.file_path} that it supports" + ) + return + with open(self.txt_file_path, "r", encoding="utf8") as fp: + txt = fp.read() + txt = txt.replace("\r\n", "\n") # windows to unix EOL conversion + txt = [ + line.strip() + for line in txt.split("\n") + if line.strip() != "" and line.startswith("#") is False + ] + # jump to typical header line + idx = 0 + while not txt[idx].startswith( + ("[SemImageFile]", "[TemImageFile]") + ) and idx < (len(txt) - 1): + idx += 1 + if idx < len(txt): + if not txt[idx].startswith(("[SemImageFile]", "[TemImageFile]")): + return + else: + print(f"Parser {self.__class__.__name__} metadata section is empty !") + return + + self.flat_dict_meta = fd.FlatDict({}, "/") + for line in txt[idx + 1 :]: # + 1 to jump over the header line + tmp = [token.strip() for token in line.split("=")] + if len(tmp) == 2 and all(token != "" for token in tmp): + try: + self.flat_dict_meta[tmp[0]] = ureg.Quantity(tmp[1]) + except (UndefinedUnitError, TokenError): + self.flat_dict_meta[tmp[0]] = string_to_number(tmp[1]) + + if self.verbose: + for key, value in self.flat_dict_meta.items(): + print(f"{key}______{type(value)}____{value}") + self.supported = True + + def parse(self, template: dict) -> dict: + """Perform actual parsing filling cache.""" + if self.supported is True: + print(f"Parsing via Hitachi...") + # metadata have at this point already been collected into an fd.FlatDict + self.process_event_data_em_metadata(template) + self.process_event_data_em_data(template) + return template + + def process_event_data_em_data(self, template: dict) -> dict: + """Add respective heavy data.""" + # default display of the image(s) representing the data collected in this event + print( + f"Writing Hitachi TIFF image data to the respective NeXus concept instances..." + ) + image_identifier = 1 + with Image.open(self.file_path, mode="r") as fp: + for img in ImageSequence.Iterator(fp): + nparr = np.array(img) + print( + f"Processing image {image_identifier} ... {type(nparr)}, {np.shape(nparr)}, {nparr.dtype}" + ) + # eventually similar open discussions points as were raised for tiff_tfs parser + trg = ( + f"/ENTRY[entry{self.entry_id}]/measurement/event_data_em_set/" + f"EVENT_DATA_EM[event_data_em{self.event_id}]/" + f"IMAGE_SET[image_set{image_identifier}]/image_2d" + ) + template[f"{trg}/title"] = f"Image" + template[f"{trg}/@signal"] = "real" + dims = ["i", "j"] # i == x (fastest), j == y (fastest) + idx = 0 + for dim in dims: + template[f"{trg}/@AXISNAME_indices[axis_{dim}_indices]"] = ( + np.uint32(idx) + ) + idx += 1 + template[f"{trg}/@axes"] = [] + for dim in dims[::-1]: + template[f"{trg}/@axes"].append(f"axis_{dim}") + template[f"{trg}/real"] = {"compress": np.array(fp), "strength": 1} + # 0 is y while 1 is x for 2d, 0 is z, 1 is y, while 2 is x for 3d + template[f"{trg}/real/@long_name"] = f"Signal" + + sxy = { + "i": ureg.Quantity(1.0, ureg.meter), + "j": ureg.Quantity(1.0, ureg.meter), + } + if "PixelSize" in self.flat_dict_meta: + sxy = { + "i": ureg.Quantity( + self.flat_dict_meta["PixelSize"], ureg.nanometer + ), + "j": ureg.Quantity( + self.flat_dict_meta["PixelSize"], ureg.nanometer + ), + } + else: + print("WARNING: Assuming pixel width and height unit is meter!") + + nxy = {"i": np.shape(np.array(fp))[1], "j": np.shape(np.array(fp))[0]} + # TODO::be careful we assume here a very specific coordinate system + # however, these assumptions need to be confirmed by point electronic + # additional points as discussed already in comments to TFS TIFF reader + for dim in dims: + template[f"{trg}/AXISNAME[axis_{dim}]"] = { + "compress": np.asarray( + np.linspace(0, nxy[dim] - 1, num=nxy[dim], endpoint=True) + * sxy[dim].magnitude, + np.float64, + ), + "strength": 1, + } + template[f"{trg}/AXISNAME[axis_{dim}]/@long_name"] = ( + f"Coordinate along {dim}-axis ({sxy[dim].units})" + ) + template[f"{trg}/AXISNAME[axis_{dim}]/@units"] = f"{sxy[dim].units}" + image_identifier += 1 + return template + + def process_event_data_em_metadata(self, template: dict) -> dict: + """Add respective metadata.""" + print(f"Mapping some of the Hitachi metadata on respective NeXus concepts...") + # we assume for now dynamic quantities can just be repeated + identifier = [self.entry_id, self.event_id, 1] + for cfg in [HITACHI_DYNAMIC_VARIOUS_NX, HITACHI_STATIC_VARIOUS_NX]: + add_specific_metadata_pint(cfg, self.flat_dict_meta, identifier, template) + return template diff --git a/src/pynxtools_em/parsers/image_tiff_jeol.py b/src/pynxtools_em/parsers/image_tiff_jeol.py index 4987cc1..d0e9374 100644 --- a/src/pynxtools_em/parsers/image_tiff_jeol.py +++ b/src/pynxtools_em/parsers/image_tiff_jeol.py @@ -15,26 +15,23 @@ # See the License for the specific language governing permissions and # limitations under the License. # -"""Subparser for harmonizing JEOL specific content in TIFF files.""" +"""Parser for harmonizing JEOL specific content in TIFF files.""" import mmap from typing import Dict, List import flatdict as fd import numpy as np -import pint from PIL import Image, ImageSequence -from pint import UnitRegistry from pynxtools_em.concepts.mapping_functors_pint import add_specific_metadata_pint from pynxtools_em.configurations.image_tiff_jeol_cfg import ( - JEOL_VARIOUS_DYNAMIC_TO_NX_EM, - JEOL_VARIOUS_STATIC_TO_NX_EM, + JEOL_DYNAMIC_VARIOUS_NX, + JEOL_STATIC_VARIOUS_NX, ) from pynxtools_em.parsers.image_tiff import TiffParser +from pynxtools_em.utils.pint_custom_unit_registry import ureg from pynxtools_em.utils.string_conversions import string_to_number -ureg = UnitRegistry() - class JeolTiffParser(TiffParser): def __init__(self, file_paths: List[str], entry_id: int = 1, verbose=False): @@ -55,11 +52,8 @@ def __init__(self, file_paths: List[str], entry_id: int = 1, verbose=False): self.event_id = 1 self.verbose = verbose self.txt_file_path = tif_txt[1] - self.prfx = None - self.tmp: Dict = {"data": None, "flat_dict_meta": fd.FlatDict({})} - self.supported_version: Dict = {} + self.flat_dict_meta = fd.FlatDict({}, "/") self.version: Dict = {} - self.tags: Dict = {} self.supported = False self.check_if_tiff_jeol() else: @@ -72,9 +66,13 @@ def check_if_tiff_jeol(self): This loads the metadata with the txt_file_path first to the formatting of that information can be used to tell JEOL data apart from other data. """ - # currently not voting-based algorithm required as used in other parsers + self.supported = False + if not hasattr(self, "file_path"): + print( + f"... is not a JEOL-specific TIFF file that this parser can process !" + ) + return if self.txt_file_path is None: - self.supported = False print( f"Parser {self.__class__.__name__} does not work without a JEOL text file with the image metadata !" f"This file is required to have exactly the same file name as the file with the TIF image data !" @@ -84,7 +82,6 @@ def check_if_tiff_jeol(self): s = mmap.mmap(file.fileno(), 0, access=mmap.ACCESS_READ) magic = s.read(4) if magic != b"II*\x00": # https://en.wikipedia.org/wiki/TIFF - self.supported = False print( f"Parser {self.__class__.__name__} finds no content in {self.file_path} that it supports" ) @@ -96,58 +93,47 @@ def check_if_tiff_jeol(self): if line.strip() != "" and line.startswith("$") ] - self.tmp["flat_dict_meta"] = fd.FlatDict({}, "/") + self.flat_dict_meta = fd.FlatDict({}, "/") for line in txt: tmp = line.split() if len(tmp) == 1: print(f"WARNING::{line} is currently ignored !") elif len(tmp) == 2: - if tmp[0] not in self.tmp["flat_dict_meta"]: - # this is not working robustly as the following example fails: - # CM_TITLE 20240227_A1_2m_0_FA3_1 ('invalid decimal literal', (1, 9)) - # try: - # self.tmp["flat_dict_meta"][tmp[0]] = pint.Quantity(tmp[1]) - # except pint.errors.UndefinedUnitError: - # self.tmp["flat_dict_meta"][tmp[0]] = tmp[1] - # as an alternative we currently use a mixture of pint quantities - # and regular numpy / pure Python types, the mapping functor should - # take care of resolving the cases properly + if tmp[0] not in self.flat_dict_meta: + # replace with pint parsing and catching multiple exceptions + # as it is exemplified in the tiff_zeiss parser if tmp[0] != "SM_MICRON_MARKER": - self.tmp["flat_dict_meta"][tmp[0]] = string_to_number( - tmp[1] - ) + self.flat_dict_meta[tmp[0]] = string_to_number(tmp[1]) else: - self.tmp["flat_dict_meta"][tmp[0]] = pint.Quantity(tmp[1]) + self.flat_dict_meta[tmp[0]] = ureg.Quantity(tmp[1]) else: raise KeyError(f"Found duplicated key {tmp[0]} !") - else: # len(tmp) > 2: + else: print(f"WARNING::{line} is currently ignored !") - # report metadata just for verbose purposes right now - for key, value in self.tmp["flat_dict_meta"].items(): - print(f"{key}______{type(value)}____{value}") + if self.verbose: + for key, value in self.flat_dict_meta.items(): + print(f"{key}______{type(value)}____{value}") - if ( - self.tmp["flat_dict_meta"]["SEM_DATA_VERSION"] == 1 - and self.tmp["flat_dict_meta"]["CM_LABEL"] == "JEOL" + if all( + key in self.flat_dict_meta for key in ["SEM_DATA_VERSION", "CM_LABEL"] ): - self.supported = True + if (self.flat_dict_meta["SEM_DATA_VERSION"] == 1) and ( + self.flat_dict_meta["CM_LABEL"] == "JEOL" + ): + self.supported = True else: - self.supported = False print( f"Parser {self.__class__.__name__} finds no content in {self.file_path} that it supports" ) def parse(self, template: dict) -> dict: + """Perform actual parsing filling cache.""" if self.supported is True: print(f"Parsing via JEOL...") # metadata have at this point already been collected into an fd.FlatDict self.process_event_data_em_metadata(template) self.process_event_data_em_data(template) - else: - print( - f"{self.file_path} is not a JEOL-specific TIFF file that this parser can process !" - ) return template def process_event_data_em_data(self, template: dict) -> dict: @@ -167,7 +153,7 @@ def process_event_data_em_data(self, template: dict) -> dict: trg = ( f"/ENTRY[entry{self.entry_id}]/measurement/event_data_em_set/" f"EVENT_DATA_EM[event_data_em{self.event_id}]/" - f"IMAGE_SET[image_set{image_identifier}]/image_twod" + f"IMAGE_SET[image_set{image_identifier}]/image_2d" ) template[f"{trg}/title"] = f"Image" template[f"{trg}/@signal"] = "real" @@ -185,17 +171,17 @@ def process_event_data_em_data(self, template: dict) -> dict: # 0 is y while 1 is x for 2d, 0 is z, 1 is y, while 2 is x for 3d template[f"{trg}/real/@long_name"] = f"Signal" - sxy = {"i": 1.0, "j": 1.0} - scan_unit = {"i": "m", "j": "m"} - if ("SM_MICRON_BAR" in self.tmp["flat_dict_meta"]) and ( - "SM_MICRON_MARKER" in self.tmp["flat_dict_meta"] + sxy = { + "i": ureg.Quantity(1.0, ureg.meter), + "j": ureg.Quantity(1.0, ureg.meter), + } + if ("SM_MICRON_BAR" in self.flat_dict_meta) and ( + "SM_MICRON_MARKER" in self.flat_dict_meta ): # JEOL-specific conversion for micron bar pixel to physical length - resolution = int(self.tmp["flat_dict_meta"]["SM_MICRON_BAR"]) + resolution = int(self.flat_dict_meta["SM_MICRON_BAR"]) physical_length = ( - self.tmp["flat_dict_meta"]["SM_MICRON_MARKER"] - .to(ureg.meter) - .magnitude + self.flat_dict_meta["SM_MICRON_MARKER"] # .to(ureg.meter) ) # resolution many pixel represent physical_length scanned surface # assuming square pixel @@ -214,33 +200,35 @@ def process_event_data_em_data(self, template: dict) -> dict: template[f"{trg}/AXISNAME[axis_{dim}]"] = { "compress": np.asarray( np.linspace(0, nxy[dim] - 1, num=nxy[dim], endpoint=True) - * sxy[dim], + * sxy[dim].magnitude, np.float64, ), "strength": 1, } template[f"{trg}/AXISNAME[axis_{dim}]/@long_name"] = ( - f"Coordinate along {dim}-axis ({scan_unit[dim]})" + f"Coordinate along {dim}-axis ({sxy[dim].units})" ) - template[f"{trg}/AXISNAME[axis_{dim}]/@units"] = f"{scan_unit[dim]}" + template[f"{trg}/AXISNAME[axis_{dim}]/@units"] = f"{sxy[dim].units}" image_identifier += 1 return template def add_various_dynamic(self, template: dict) -> dict: + """Add several event-based concepts with similar template path prefixes dynamic.""" identifier = [self.entry_id, self.event_id, 1] add_specific_metadata_pint( - JEOL_VARIOUS_DYNAMIC_TO_NX_EM, - self.tmp["flat_dict_meta"], + JEOL_DYNAMIC_VARIOUS_NX, + self.flat_dict_meta, identifier, template, ) return template def add_various_static(self, template: dict) -> dict: + """Add several event-based concepts with similar template path prefixes static.""" identifier = [self.entry_id, self.event_id, 1] add_specific_metadata_pint( - JEOL_VARIOUS_STATIC_TO_NX_EM, - self.tmp["flat_dict_meta"], + JEOL_STATIC_VARIOUS_NX, + self.flat_dict_meta, identifier, template, ) diff --git a/src/pynxtools_em/parsers/image_tiff_point_electronic.py b/src/pynxtools_em/parsers/image_tiff_point_electronic.py index bfeb3f7..61a8cea 100644 --- a/src/pynxtools_em/parsers/image_tiff_point_electronic.py +++ b/src/pynxtools_em/parsers/image_tiff_point_electronic.py @@ -15,7 +15,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # -"""Subparser for harmonizing point electronic DISS specific content in TIFF files.""" +"""Parser for harmonizing point electronic DISS specific content in TIFF files.""" import mmap from typing import Dict @@ -25,33 +25,31 @@ from PIL import Image, ImageSequence from pynxtools_em.concepts.mapping_functors_pint import add_specific_metadata_pint from pynxtools_em.configurations.image_tiff_point_electronic_cfg import ( - DISS_VARIOUS_DYNAMIC_TO_NX_EM, + DISS_DYNAMIC_VARIOUS_NX, ) from pynxtools_em.parsers.image_tiff import TiffParser from pynxtools_em.utils.string_conversions import string_to_number class PointElectronicTiffParser(TiffParser): - def __init__(self, file_path: str = "", entry_id: int = 1): + def __init__(self, file_path: str = "", entry_id: int = 1, verbose: bool = False): super().__init__(file_path) self.entry_id = entry_id self.event_id = 1 - self.prfx = None - self.tmp: Dict = {"data": None, "flat_dict_meta": fd.FlatDict({})} - self.supported_version: Dict = {} - self.version: Dict = {} - self.tags: Dict = {} + self.verbose = verbose + self.flat_metadata = fd.FlatDict({}, "/") + self.version: Dict = { + "trg": { + "tech_partner": ["point electronic"], + "schema_name": ["DISS"], + "schema_version": ["5.15.31.0"], + } + } self.supported = False - self.init_support() self.check_if_tiff_point_electronic() - def init_support(self): - """Init supported versions.""" - self.supported_version["tech_partner"] = ["point electronic"] - self.supported_version["schema_name"] = ["DISS"] - self.supported_version["schema_version"] = ["5.15.31.0"] - def xmpmeta_to_flat_dict(self, meta: fd.FlatDict): + """Flatten point-electronic formatting of XMPMeta data.""" for entry in meta["xmpmeta/RDF/Description"]: tmp = fd.FlatDict(entry, "/") for key, obj in tmp.items(): @@ -61,16 +59,13 @@ def xmpmeta_to_flat_dict(self, meta: fd.FlatDict): lst = fd.FlatDict(dct, "/") for kkey, kobj in lst.items(): if isinstance(kobj, str) and kobj != "": - if ( - f"{key}/{kkey}" - not in self.tmp["flat_dict_meta"] - ): - self.tmp["flat_dict_meta"][f"{key}/{kkey}"] = ( + if f"{key}/{kkey}" not in self.flat_metadata: + self.flat_metadata[f"{key}/{kkey}"] = ( string_to_number(kobj) ) - if isinstance(obj, str) and obj != "": - if key not in self.tmp["flat_dict_meta"]: - self.tmp["flat_dict_meta"][key] = string_to_number(obj) + elif isinstance(obj, str) and obj != "": + if key not in self.flat_metadata: + self.flat_metadata[key] = string_to_number(obj) else: raise KeyError(f"Duplicated key {key} !") @@ -100,23 +95,20 @@ def check_if_tiff_point_electronic(self): if "xmpmeta/xmptk" in meta: if meta["xmpmeta/xmptk"] == "XMP Core 5.1.2": # load the metadata - self.tmp["flat_dict_meta"] = fd.FlatDict({}, "/") + self.flat_metadata = fd.FlatDict({}, "/") self.xmpmeta_to_flat_dict(meta) - for key, value in self.tmp["flat_dict_meta"].items(): + for key, value in self.flat_metadata.items(): print(f"{key}____{type(value)}____{value}") # check if written about with supported DISS version - prefix = f"{self.supported_version['tech_partner'][0]} {self.supported_version['schema_name'][0]}" + prefix = f"{self.version['trg']['tech_partner'][0]} {self.version['trg']['schema_name'][0]}" supported_versions = [ f"{prefix} {val}" - for val in self.supported_version["schema_version"] + for val in self.version["trg"]["schema_version"] ] print(supported_versions) - if ( - self.tmp["flat_dict_meta"]["CreatorTool"] - in supported_versions - ): + if self.flat_metadata["CreatorTool"] in supported_versions: self.supported += 1 # found specific XMP metadata if self.supported == 2: self.supported = True @@ -126,21 +118,18 @@ def check_if_tiff_point_electronic(self): f"Parser {self.__class__.__name__} finds no content in {self.file_path} that it supports" ) - def parse_and_normalize(self): - """Perform actual parsing filling cache self.tmp.""" + def parse(self, template: dict) -> dict: + """Perform actual parsing filling cache.""" if self.supported is True: print(f"Parsing via point electronic DISS-specific metadata...") # metadata have at this point already been collected into an fd.FlatDict + self.process_event_data_em_metadata(template) + self.process_event_data_em_data(template) else: print( f"{self.file_path} is not a point electronic DISS-specific " f"TIFF file that this parser can process !" ) - - def process_into_template(self, template: dict) -> dict: - if self.supported is True: - self.process_event_data_em_metadata(template) - self.process_event_data_em_data(template) return template def process_event_data_em_data(self, template: dict) -> dict: @@ -161,7 +150,7 @@ def process_event_data_em_data(self, template: dict) -> dict: trg = ( f"/ENTRY[entry{self.entry_id}]/measurement/event_data_em_set/" f"EVENT_DATA_EM[event_data_em{self.event_id}]/" - f"IMAGE_SET[image_set{image_identifier}]/image_twod" + f"IMAGE_SET[image_set{image_identifier}]/image_2d" ) template[f"{trg}/title"] = f"Image" template[f"{trg}/@signal"] = "real" @@ -181,12 +170,12 @@ def process_event_data_em_data(self, template: dict) -> dict: sxy = {"i": 1.0, "j": 1.0} scan_unit = {"i": "m", "j": "m"} - if ("PixelSizeX" in self.tmp["flat_dict_meta"]) and ( - "PixelSizeY" in self.tmp["flat_dict_meta"] + if ("PixelSizeX" in self.flat_metadata) and ( + "PixelSizeY" in self.flat_metadata ): sxy = { - "i": self.tmp["flat_dict_meta"]["PixelSizeX"], - "j": self.tmp["flat_dict_meta"]["PixelSizeY"], + "i": self.flat_metadata["PixelSizeX"], + "j": self.flat_metadata["PixelSizeY"], } else: print("WARNING: Assuming pixel width and height unit is meter!") @@ -210,22 +199,17 @@ def process_event_data_em_data(self, template: dict) -> dict: image_identifier += 1 return template - def add_various_dynamic(self, template: dict) -> dict: - identifier = [self.entry_id, self.event_id, 1] - add_specific_metadata_pint( - DISS_VARIOUS_DYNAMIC_TO_NX_EM, - self.tmp["flat_dict_meta"], - identifier, - template, - ) - return template - def process_event_data_em_metadata(self, template: dict) -> dict: """Add respective metadata.""" # contextualization to understand how the image relates to the EM session print( f"Mapping some of the point electronic DISS metadata on respective NeXus concepts..." ) - self.add_various_dynamic(template) - # ... add more as required ... + identifier = [self.entry_id, self.event_id, 1] + add_specific_metadata_pint( + DISS_DYNAMIC_VARIOUS_NX, + self.flat_metadata, + identifier, + template, + ) return template diff --git a/src/pynxtools_em/parsers/image_tiff_tescan.py b/src/pynxtools_em/parsers/image_tiff_tescan.py new file mode 100644 index 0000000..54b5b55 --- /dev/null +++ b/src/pynxtools_em/parsers/image_tiff_tescan.py @@ -0,0 +1,253 @@ +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Parser for harmonizing TESCAN-specific content in TIFF files.""" + +import mmap +from typing import Dict, List + +import flatdict as fd +import numpy as np +from PIL import Image, ImageSequence +from pynxtools_em.concepts.mapping_functors_pint import add_specific_metadata_pint +from pynxtools_em.configurations.image_tiff_tescan_cfg import ( + TESCAN_DYNAMIC_STAGE_NX, + TESCAN_DYNAMIC_STIGMATOR_NX, + TESCAN_DYNAMIC_VARIOUS_NX, + TESCAN_STATIC_VARIOUS_NX, +) +from pynxtools_em.parsers.image_tiff import TiffParser +from pynxtools_em.utils.pint_custom_unit_registry import ureg +from pynxtools_em.utils.string_conversions import string_to_number + + +class TescanTiffParser(TiffParser): + def __init__(self, file_paths: List[str], entry_id: int = 1, verbose: bool = False): + # file and sidecar file may not come in a specific order need to find which is which if any supported + tif_hdr = ["", ""] + if len(file_paths) == 1 and file_paths[0].lower().endswith((".tif", ".tiff")): + tif_hdr[0] = file_paths[0] + elif ( + len(file_paths) == 2 + and file_paths[0][0 : file_paths[0].rfind(".")] + == file_paths[1][0 : file_paths[0].rfind(".")] + ): + for entry in file_paths: + if entry.lower().endswith((".tif", ".tiff")) and entry != "": + tif_hdr[0] = entry + elif entry.lower().endswith((".hdr")) and entry != "": + tif_hdr[1] = entry + + if tif_hdr[0] != "": + super().__init__(tif_hdr[0]) + self.entry_id = entry_id + self.event_id = 1 + self.verbose = verbose + self.flat_dict_meta = fd.FlatDict({}, "/") + self.version: Dict = {} + self.supported = False + self.hdr_file_path = tif_hdr[1] + self.check_if_tiff_tescan() + else: + self.supported = False + + def check_if_tiff_tescan(self): + """Check if resource behind self.file_path is a TaggedImageFormat file. + + This also loads the metadata first if possible as these contain details + about which software was used to process the image data, e.g. DISS software. + """ + self.supported = False + if not hasattr(self, "file_path"): + print( + f"... is not a TESCAN-specific TIFF/(HDR) file (set) that this parser can process !" + ) + return + with open(self.file_path, "rb", 0) as file: + s = mmap.mmap(file.fileno(), 0, access=mmap.ACCESS_READ) + magic = s.read(4) + if magic != b"II*\x00": # https://en.wikipedia.org/wiki/TIFF + print( + f"Parser {self.__class__.__name__} finds no content in {self.file_path} that it supports" + ) + return + + self.flat_dict_meta = fd.FlatDict({}, "/") + with Image.open(self.file_path, mode="r") as fp: + tescan_keys = [50431] + for tescan_key in tescan_keys: + if tescan_key in fp.tag_v2: + payload = fp.tag_v2[tescan_key] + pos = payload.find(bytes("Description", "utf8")) + try: + txt = payload[pos:].decode("utf8") + except UnicodeDecodeError: + print( + f"WARNING::{self.file_path} TESCAN TIFF tag {tescan_key} cannot be decoded using UTF8, trying to use sidecar file instead if available !" + ) + if hasattr(self, "hdr_file_path"): + continue + else: + return + del payload + + for line in txt.split(): + tmp = [value.strip() for value in line.split("=")] + if len(tmp) == 1: + print(f"Ignore line {line} !") + elif len(tmp) == 2: + if tmp[0] and tmp[0] not in self.flat_dict_meta: + self.flat_dict_meta[tmp[0]] = string_to_number(tmp[1]) + else: + print(f"Ignore line {line} !") + # very frequently using sidecar files create ambiguities: are the metadata in the + # image and the sidecar file exactly the same, a subset, which information to + # give preference in case of inconsistencies, system time when the sidecar file + # is written differs from system time when the image was written, which time + # to take for the event data? + if len(self.flat_dict_meta) == 0: + if self.hdr_file_path != "": + with open(self.hdr_file_path, mode="r", encoding="utf8") as fp: + txt = fp.read() + txt = txt.replace("\r\n", "\n") # windows to unix EOL conversion + txt = [ + line.strip() + for line in txt.split("\n") + if line.strip() != "" and line.startswith("#") is False + ] + if not all(value in txt for value in ["[MAIN]", "[SEM]"]): + print( + f"WARNING::TESCAN HDR sidecar file exists but does not contain expected section headers !" + ) + txt = [line for line in txt if line not in ["[MAIN]", "[SEM]"]] + for line in txt: + tmp = [value.strip() for value in line.split("=")] + if len(tmp) == 1: + print(f"Ignore line {line} !") + elif len(tmp) == 2: + if tmp[0] and (tmp[0] not in self.flat_dict_meta): + self.flat_dict_meta[tmp[0]] = string_to_number(tmp[1]) + else: + print(f"Ignore line {line} !") + else: + print(f"WARNING::Potential TESCAN TIF without metadata !") + + if self.verbose: + for key, value in self.flat_dict_meta.items(): + print(f"{key}____{type(value)}____{value}") + + # check if written about with supported DISS version + supported_versions = ["TIMA", "MIRA3 LMH"] + if "Device" in self.flat_dict_meta: + if self.flat_dict_meta["Device"] in supported_versions: + self.supported = True + # but this is quite a weak test, more instance data are required + # with TESCAN-specific concept names to make this here more robust + + def parse(self, template: dict) -> dict: + """Perform actual parsing filling cache.""" + if self.supported is True: + print(f"Parsing via TESCAN...") + # metadata have at this point already been collected into an fd.FlatDict + self.process_event_data_em_metadata(template) + self.process_event_data_em_data(template) + return template + + def process_event_data_em_data(self, template: dict) -> dict: + """Add respective heavy data.""" + print(f"Writing TESCAN image data to the respective NeXus concept instances...") + image_identifier = 1 + with Image.open(self.file_path, mode="r") as fp: + for img in ImageSequence.Iterator(fp): + nparr = np.array(img) + print( + f"Processing image {image_identifier} ... {type(nparr)}, {np.shape(nparr)}, {nparr.dtype}" + ) + # eventually similar open discussions points as were raised for tiff_tfs parser + trg = ( + f"/ENTRY[entry{self.entry_id}]/measurement/event_data_em_set/" + f"EVENT_DATA_EM[event_data_em{self.event_id}]/" + f"IMAGE_SET[image_set{image_identifier}]/image_2d" + ) + template[f"{trg}/title"] = f"Image" + template[f"{trg}/@signal"] = "real" + dims = ["i", "j"] # i == x (fastest), j == y (fastest) + idx = 0 + for dim in dims: + template[f"{trg}/@AXISNAME_indices[axis_{dim}_indices]"] = ( + np.uint32(idx) + ) + idx += 1 + template[f"{trg}/@axes"] = [] + for dim in dims[::-1]: + template[f"{trg}/@axes"].append(f"axis_{dim}") + template[f"{trg}/real"] = {"compress": np.array(fp), "strength": 1} + # 0 is y while 1 is x for 2d, 0 is z, 1 is y, while 2 is x for 3d + template[f"{trg}/real/@long_name"] = f"Signal" + + sxy = { + "i": ureg.Quantity(1.0, ureg.meter), + "j": ureg.Quantity(1.0, ureg.meter), + } + abbrev = "PixelSize" + if all( + value in self.flat_dict_meta + for value in ["PixelSizeX", "PixelSizeY"] + ): + sxy = { + "i": ureg.Quantity( + self.flat_dict_meta["PixelSizeX"], ureg.meter + ), + "j": ureg.Quantity( + self.flat_dict_meta["PixelSizeY"], ureg.meter + ), + } + else: + print("WARNING: Assuming pixel width and height unit is meter!") + nxy = {"i": np.shape(np.array(fp))[1], "j": np.shape(np.array(fp))[0]} + # TODO::be careful we assume here a very specific coordinate system + # however, these assumptions need to be confirmed by point electronic + # additional points as discussed already in comments to TFS TIFF reader + for dim in dims: + template[f"{trg}/AXISNAME[axis_{dim}]"] = { + "compress": np.asarray( + np.linspace(0, nxy[dim] - 1, num=nxy[dim], endpoint=True) + * sxy[dim].magnitude, + np.float64, + ), + "strength": 1, + } + template[f"{trg}/AXISNAME[axis_{dim}]/@long_name"] = ( + f"Coordinate along {dim}-axis ({sxy[dim].units})" + ) + template[f"{trg}/AXISNAME[axis_{dim}]/@units"] = f"{sxy[dim].units}" + image_identifier += 1 + return template + + def process_event_data_em_metadata(self, template: dict) -> dict: + """Add respective metadata.""" + # contextualization to understand how the image relates to the EM session + print(f"Mapping some of the TESCAN metadata on respective NeXus concepts...") + identifier = [self.entry_id, self.event_id, 1] + for cfg in [ + TESCAN_DYNAMIC_STIGMATOR_NX, + TESCAN_STATIC_VARIOUS_NX, + TESCAN_DYNAMIC_VARIOUS_NX, + TESCAN_DYNAMIC_STAGE_NX, + ]: + add_specific_metadata_pint(cfg, self.flat_dict_meta, identifier, template) + return template diff --git a/src/pynxtools_em/parsers/image_tiff_tfs.py b/src/pynxtools_em/parsers/image_tiff_tfs.py index 4019f51..c44baef 100644 --- a/src/pynxtools_em/parsers/image_tiff_tfs.py +++ b/src/pynxtools_em/parsers/image_tiff_tfs.py @@ -15,46 +15,46 @@ # See the License for the specific language governing permissions and # limitations under the License. # -"""Subparser for harmonizing ThermoFisher-specific content in TIFF files.""" +"""Parser for harmonizing ThermoFisher-specific content in TIFF files.""" import mmap from typing import Dict import flatdict as fd import numpy as np -from PIL import Image +from PIL import Image, ImageSequence from PIL.TiffTags import TAGS -from pynxtools_em.concepts.mapping_functors import add_specific_metadata + +# https://www.loc.gov/preservation/digital/formats/content/tiff_tags.shtml +from pynxtools_em.concepts.mapping_functors_pint import add_specific_metadata_pint from pynxtools_em.configurations.image_tiff_tfs_cfg import ( - TFS_APERTURE_STATIC_TO_NX_EM, - TFS_DETECTOR_STATIC_TO_NX_EM, - TFS_OPTICS_DYNAMIC_TO_NX_EM, - TFS_SCAN_DYNAMIC_TO_NX_EM, - TFS_STAGE_DYNAMIC_TO_NX_EM, - TFS_VARIOUS_DYNAMIC_TO_NX_EM, - TFS_VARIOUS_STATIC_TO_NX_EM, + TFS_DYNAMIC_OPTICS_NX, + TFS_DYNAMIC_SCAN_NX, + TFS_DYNAMIC_STAGE_NX, + TFS_DYNAMIC_STIGMATOR_NX, + TFS_DYNAMIC_VARIOUS_NX, + TFS_STATIC_APERTURE_NX, + TFS_STATIC_DETECTOR_NX, + TFS_STATIC_VARIOUS_NX, + TIFF_TFS_PARENT_CONCEPTS, ) from pynxtools_em.parsers.image_tiff import TiffParser -from pynxtools_em.parsers.image_tiff_tfs_concepts import ( - get_fei_childs, - get_fei_parent_concepts, -) from pynxtools_em.utils.image_utils import ( if_str_represents_float, sort_ascendingly_by_second_argument, ) +from pynxtools_em.utils.pint_custom_unit_registry import ureg +from pynxtools_em.utils.tfs_utils import get_fei_childs class TfsTiffParser(TiffParser): - def __init__(self, file_path: str = "", entry_id: int = 1): + def __init__(self, file_path: str = "", entry_id: int = 1, verbose: bool = False): super().__init__(file_path) self.entry_id = entry_id self.event_id = 1 - self.prfx = None - self.tmp: Dict = {"data": None, "flat_dict_meta": fd.FlatDict({})} - self.supported_version: Dict = {} + self.verbose = verbose + self.flat_dict_meta = fd.FlatDict({}, "/") self.version: Dict = {} - self.tags: Dict = {} self.supported = False self.check_if_tiff_tfs() @@ -76,7 +76,7 @@ def check_if_tiff_tfs(self): tfs_keys = [34682] for tfs_key in tfs_keys: if tfs_key in fp.tag_v2: - if len(fp.tag_v2[tfs_key]) == 1: + if len(fp.tag_v2[tfs_key]) >= 1: self.supported += 1 # found TFS-specific tag if self.supported == 2: self.supported = True @@ -89,19 +89,12 @@ def check_if_tiff_tfs(self): def get_metadata(self): """Extract metadata in TFS specific tags if present.""" print("Parsing TIFF tags...") - # for an overview of tags - # https://www.loc.gov/preservation/digital/formats/content/tiff_tags.shtml - # with Image.open(self.file_path, mode="r") as fp: - # self.tags = {TAGS[key] : fp.tag[key] for key in fp.tag_v2} - # for key, val in self.tags.items(): - # print(f"{key}, {val}") - tfs_parent_concepts = get_fei_parent_concepts() tfs_parent_concepts_byte_offset = {} - for concept in tfs_parent_concepts: + for concept in TIFF_TFS_PARENT_CONCEPTS: tfs_parent_concepts_byte_offset[concept] = None with open(self.file_path, "rb", 0) as fp: s = mmap.mmap(fp.fileno(), 0, access=mmap.ACCESS_READ) - for concept in tfs_parent_concepts: + for concept in TIFF_TFS_PARENT_CONCEPTS: pos = s.find(bytes(f"[{concept}]", "utf8")) # != -1 if pos != -1: tfs_parent_concepts_byte_offset[concept] = pos @@ -142,207 +135,146 @@ def get_metadata(self): if pos < pos_e: # check if pos_e is None s.seek(pos, 0) value = f"{s.readline().strip().decode('utf8').replace(f'{term}=', '')}" - self.tmp["flat_dict_meta"][f"{parent}/{term}"] = None + self.flat_dict_meta[f"{parent}/{term}"] = None if isinstance(value, str): if value != "": # execution order of the check here matters! if value.isdigit() is True: - self.tmp["flat_dict_meta"][f"{parent}/{term}"] = ( - np.int64(value) + self.flat_dict_meta[f"{parent}/{term}"] = np.int64( + value ) elif if_str_represents_float(value) is True: - self.tmp["flat_dict_meta"][f"{parent}/{term}"] = ( + self.flat_dict_meta[f"{parent}/{term}"] = ( np.float64(value) ) else: - self.tmp["flat_dict_meta"][f"{parent}/{term}"] = ( - value - ) + self.flat_dict_meta[f"{parent}/{term}"] = value else: raise ValueError( f"Detected an unexpected case {parent}/{term}, type: {type(value)} !" ) else: break - self.tmp["flat_dict_meta"] = fd.FlatDict(self.tmp["flat_dict_meta"]) + if self.verbose: + for key, value in self.flat_dict_meta.items(): + if value: + print(f"{key}____{type(value)}____{value}") - def parse_and_normalize(self): + def parse(self, template: dict) -> dict: """Perform actual parsing filling cache self.tmp.""" if self.supported is True: print(f"Parsing via ThermoFisher-specific metadata...") self.get_metadata() + self.process_event_data_em_metadata(template) + self.process_event_data_em_data(template) else: print( f"{self.file_path} is not a ThermoFisher-specific " f"TIFF file that this parser can process !" ) - - def process_into_template(self, template: dict) -> dict: - if self.supported is True: - self.process_event_data_em_metadata(template) - self.process_event_data_em_data(template) return template def process_event_data_em_data(self, template: dict) -> dict: """Add respective heavy data.""" # default display of the image(s) representing the data collected in this event - print( - f"Writing TFS/FEI TIFF image data to the respective NeXus concept instances..." - ) - # read image in-place + print(f"Writing TFS/FEI image data to NeXus concept instances...") + image_identifier = 1 with Image.open(self.file_path, mode="r") as fp: - nparr = np.array(fp) - # print(f"type: {type(nparr)}, dtype: {nparr.dtype}, shape: {np.shape(nparr)}") - # TODO::discussion points - # - how do you know we have an image of real space vs. imaginary space (from the metadata?) - # - how do deal with the (ugly) scale bar that is typically stamped into the TIFF image content? - # with H5Web and NeXus most of this is obsolete unless there are metadata stamped which are not - # available in NeXus or in the respective metadata in the metadata section of the TIFF image - # remember H5Web images can be scaled based on the metadata allowing basically the same - # explorative viewing using H5Web than what traditionally typical image viewers are meant for - image_identifier = 1 - trg = ( - f"/ENTRY[entry{self.entry_id}]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/" - f"EVENT_DATA_EM[event_data_em{self.event_id}]/" - f"IMAGE_R_SET[image_r_set{image_identifier}]/image_twod" - ) - # TODO::writer should decorate automatically! - template[f"{trg}/title"] = f"Image" - template[f"{trg}/@signal"] = "intensity" - dims = ["x", "y"] - idx = 0 - for dim in dims: - template[f"{trg}/@AXISNAME_indices[axis_{dim}_indices]"] = np.uint32( - idx + for img in ImageSequence.Iterator(fp): + nparr = np.array(img) + # print(f"type: {type(nparr)}, dtype: {nparr.dtype}, shape: {np.shape(nparr)}") + # TODO::discussion points + # - how do you know we have an image of real space vs. imaginary space (from the metadata?) + # - how do deal with the (ugly) scale bar that is typically stamped into the TIFF image content? + # with H5Web and NeXus most of this is obsolete unless there are metadata stamped which are not + # available in NeXus or in the respective metadata in the metadata section of the TIFF image + # remember H5Web images can be scaled based on the metadata allowing basically the same + # explorative viewing using H5Web than what traditionally typical image viewers are meant for + trg = ( + f"/ENTRY[entry{self.entry_id}]/measurement/event_data_em_set/" + f"EVENT_DATA_EM[event_data_em{self.event_id}]/" + f"IMAGE_SET[image_set{image_identifier}]/image_2d" ) - idx += 1 - template[f"{trg}/@axes"] = [] - for dim in dims[::-1]: - template[f"{trg}/@axes"].append(f"axis_{dim}") - template[f"{trg}/intensity"] = {"compress": np.array(fp), "strength": 1} - # 0 is y while 1 is x for 2d, 0 is z, 1 is y, while 2 is x for 3d - template[f"{trg}/intensity/@long_name"] = f"Signal" + template[f"{trg}/title"] = f"Image" + template[f"{trg}/@signal"] = "real" + dims = ["i", "j"] + idx = 0 + for dim in dims: + template[f"{trg}/@AXISNAME_indices[axis_{dim}_indices]"] = ( + np.uint32(idx) + ) + idx += 1 + template[f"{trg}/@axes"] = [] + for dim in dims[::-1]: + template[f"{trg}/@axes"].append(f"axis_{dim}") + template[f"{trg}/real"] = {"compress": np.array(fp), "strength": 1} + # 0 is y while 1 is x for 2d, 0 is z, 1 is y, while 2 is x for 3d + template[f"{trg}/real/@long_name"] = f"Signal" - sxy = {"x": 1.0, "y": 1.0} - scan_unit = {"x": "m", "y": "m"} # assuming FEI reports SI units - # we may face the CCD overview camera for the chamber for which there might not be a calibration! - if ("EScan/PixelWidth" in self.tmp["flat_dict_meta"]) and ( - "EScan/PixelHeight" in self.tmp["flat_dict_meta"] - ): sxy = { - "x": self.tmp["flat_dict_meta"]["EScan/PixelWidth"], - "y": self.tmp["flat_dict_meta"]["EScan/PixelHeight"], + "i": ureg.Quantity(1.0, ureg.meter), + "j": ureg.Quantity(1.0, ureg.meter), } - else: - print("WARNING: Assuming pixel width and height unit is meter!") - nxy = {"x": np.shape(np.array(fp))[1], "y": np.shape(np.array(fp))[0]} - # TODO::be careful we assume here a very specific coordinate system - # however the TIFF file gives no clue, TIFF just documents in which order - # it arranges a bunch of pixels that have stream in into a n-d tiling - # e.g. a 2D image - # also we have to be careful because TFS just gives us here - # typical case of an image without an information without its location - # on the physical sample surface, therefore we can only scale - # pixel_identifier by physical scaling quantities s_x, s_y - # also the dimensions of the image are on us to fish with the image - # reading library instead of TFS for consistency checks adding these - # to the metadata the reason is that TFS TIFF use the TIFF tagging mechanism - # and there is already a proper TIFF tag for the width and height of an - # image in number of pixel - for dim in dims: - template[f"{trg}/AXISNAME[axis_{dim}]"] = { - "compress": np.asarray( - np.linspace(0, nxy[dim] - 1, num=nxy[dim], endpoint=True) - * sxy[dim], - np.float64, - ), - "strength": 1, - } - template[f"{trg}/AXISNAME[axis_{dim}]/@long_name"] = ( - f"Coordinate along {dim}-axis ({scan_unit[dim]})" - ) - template[f"{trg}/AXISNAME[axis_{dim}]/@units"] = f"{scan_unit[dim]}" - return template - - def add_aperture_static_metadata(self, template: dict) -> dict: - identifier = [self.entry_id, self.event_id, 1] - add_specific_metadata( - TFS_APERTURE_STATIC_TO_NX_EM, - self.tmp["flat_dict_meta"], - identifier, - template, - ) - return template - - def add_detector_static_metadata(self, template: dict) -> dict: - identifier = [self.entry_id, self.event_id, 1] - add_specific_metadata( - TFS_DETECTOR_STATIC_TO_NX_EM, - self.tmp["flat_dict_meta"], - identifier, - template, - ) - return template - - def add_various_static_metadata(self, template: dict) -> dict: - identifier = [self.entry_id, self.event_id, 1] - add_specific_metadata( - TFS_VARIOUS_STATIC_TO_NX_EM, - self.tmp["flat_dict_meta"], - identifier, - template, - ) - return template - - def add_optics_dynamic_metadata(self, template: dict) -> dict: - identifier = [self.entry_id, self.event_id, 1] - add_specific_metadata( - TFS_OPTICS_DYNAMIC_TO_NX_EM, - self.tmp["flat_dict_meta"], - identifier, - template, - ) - return template - - def add_stage_dynamic_metadata(self, template: dict) -> dict: - identifier = [self.entry_id, self.event_id, 1] - add_specific_metadata( - TFS_STAGE_DYNAMIC_TO_NX_EM, - self.tmp["flat_dict_meta"], - identifier, - template, - ) - return template - - def add_scan_dynamic_metadata(self, template: dict) -> dict: - identifier = [self.entry_id, self.event_id, 1] - add_specific_metadata( - TFS_SCAN_DYNAMIC_TO_NX_EM, - self.tmp["flat_dict_meta"], - identifier, - template, - ) - return template - - def add_various_dynamic_metadata(self, template: dict) -> dict: - identifier = [self.entry_id, self.event_id, 1] - add_specific_metadata( - TFS_VARIOUS_DYNAMIC_TO_NX_EM, - self.tmp["flat_dict_meta"], - identifier, - template, - ) + # may face CCD overview camera of chamber that has no calibration! + if ("EScan/PixelWidth" in self.flat_dict_meta) and ( + "EScan/PixelHeight" in self.flat_dict_meta + ): + sxy = { + "i": ureg.Quantity( + self.flat_dict_meta["EScan/PixelWidth"], ureg.meter + ), + "j": ureg.Quantity( + self.flat_dict_meta["EScan/PixelHeight"], ureg.meter + ), + } + else: + print("WARNING: Assuming pixel width and height unit is meter!") + nxy = {"i": np.shape(np.array(fp))[1], "j": np.shape(np.array(fp))[0]} + # TODO::be careful we assume here a very specific coordinate system + # however the TIFF file gives no clue, TIFF just documents in which order + # it arranges a bunch of pixels that have stream in into a n-d tiling + # e.g. a 2D image + # also we have to be careful because TFS just gives us here + # typical case of an image without an information without its location + # on the physical sample surface, therefore we can only scale + # pixel_identifier by physical scaling quantities s_x, s_y + # also the dimensions of the image are on us to fish with the image + # reading library instead of TFS for consistency checks adding these + # to the metadata the reason is that TFS TIFF use the TIFF tagging mechanism + # and there is already a proper TIFF tag for the width and height of an + # image in number of pixel + for dim in dims: + template[f"{trg}/AXISNAME[axis_{dim}]"] = { + "compress": np.asarray( + np.linspace(0, nxy[dim] - 1, num=nxy[dim], endpoint=True) + * sxy[dim].magnitude, + np.float64, + ), + "strength": 1, + } + template[f"{trg}/AXISNAME[axis_{dim}]/@long_name"] = ( + f"Coordinate along {dim}-axis ({sxy[dim].units})" + ) + template[f"{trg}/AXISNAME[axis_{dim}]/@units"] = f"{sxy[dim].units}" + image_identifier += 1 return template def process_event_data_em_metadata(self, template: dict) -> dict: """Add respective metadata.""" # contextualization to understand how the image relates to the EM session print(f"Mapping some of the TFS/FEI metadata on respective NeXus concepts...") - self.add_aperture_static_metadata(template) - self.add_detector_static_metadata(template) - self.add_various_static_metadata(template) - self.add_optics_dynamic_metadata(template) - self.add_stage_dynamic_metadata(template) - self.add_scan_dynamic_metadata(template) - self.add_various_dynamic_metadata(template) + identifier = [self.entry_id, self.event_id, 1] + for cfg in [ + TFS_STATIC_APERTURE_NX, + TFS_STATIC_DETECTOR_NX, + TFS_STATIC_VARIOUS_NX, + TFS_DYNAMIC_OPTICS_NX, + TFS_DYNAMIC_SCAN_NX, + TFS_DYNAMIC_VARIOUS_NX, + TFS_DYNAMIC_STIGMATOR_NX, + ]: # TODO::static quantities may need to be splitted + add_specific_metadata_pint(cfg, self.flat_dict_meta, identifier, template) + add_specific_metadata_pint( + TFS_DYNAMIC_STAGE_NX, self.flat_dict_meta, identifier, template + ) return template diff --git a/src/pynxtools_em/parsers/image_tiff_tfs_concepts.py b/src/pynxtools_em/parsers/image_tiff_tfs_concepts.py deleted file mode 100644 index 9449f2d..0000000 --- a/src/pynxtools_em/parsers/image_tiff_tfs_concepts.py +++ /dev/null @@ -1,299 +0,0 @@ -# -# Copyright The NOMAD Authors. -# -# This file is part of NOMAD. See https://nomad-lab.eu for further info. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -"""Configuration of the image_tiff_tfs parser.""" - -from typing import List - -# this example exemplifies the situation for the TFS/FEI SEM Apreo from the IKZ of Prof. Martin Albrecht -# thanks to Robert Kernke it was clarified the microscope has several detectors and imaging modes -# these imaging modes control the specific TFS/FEI concept instances stored in the respective TIFF file -# we here use a glossary of all concepts which we were able to parse out from an example image -# taken for each detector and imaging mode -# we then assume that one can work with the joint set of these concepts - -TIFF_TFS_PARENT_CONCEPTS = [ - "Accessories", - "Beam", - "ColdStage", - "CompoundLensFilter", - "Detectors", - "EBeam", - "EBeamDeceleration", - "EScan", - "ETD", - "EasyLift", - "GIS", - "HiResIllumination", - "HotStage", - "HotStageHVHS", - "HotStageMEMS", - "IRBeam", - "Image", - "Nav-Cam", - "PrivateFei", - "Scan", - "Specimen", - "Stage", - "System", - "T1", - "T2", - "T3", - "User", - "Vacuum", -] - -TIFF_TFS_ALL_CONCEPTS = [ - "Accessories/Number", - "Beam/Beam", - "Beam/BeamShiftX", - "Beam/BeamShiftY", - "Beam/FineStageBias", - "Beam/HV", - "Beam/ImageMode", - "Beam/Scan", - "Beam/ScanRotation", - "Beam/Spot", - "Beam/StigmatorX", - "Beam/StigmatorY", - "ColdStage/ActualTemperature", - "ColdStage/Humidity", - "ColdStage/SampleBias", - "ColdStage/TargetTemperature", - "CompoundLensFilter/IsOn", - "CompoundLensFilter/ThresholdEnergy", - "Detectors/Mode", - "Detectors/Name", - "Detectors/Number", - "EasyLift/Rotation", - "EBeam/Acq", - "EBeam/Aperture", - "EBeam/ApertureDiameter", - "EBeam/ATubeVoltage", - "EBeam/BeamCurrent", - "EBeam/BeamMode", - "EBeam/BeamShiftX", - "EBeam/BeamShiftY", - "EBeam/ColumnType", - "EBeam/DynamicFocusIsOn", - "EBeam/DynamicWDIsOn", - "EBeam/EmissionCurrent", - "EBeam/EucWD", - "EBeam/FinalLens", - "EBeam/HFW", - "EBeam/HV", - "EBeam/ImageMode", - "EBeam/LensMode", - "EBeam/LensModeA", - "EBeam/MagnificationCorrection", - "EBeam/PreTilt", - "EBeam/ScanRotation", - "EBeam/SemOpticalMode", - "EBeam/Source", - "EBeam/SourceTiltX", - "EBeam/SourceTiltY", - "EBeam/StageR", - "EBeam/StageTa", - "EBeam/StageTb", - "EBeam/StageX", - "EBeam/StageY", - "EBeam/StageZ", - "EBeam/StigmatorX", - "EBeam/StigmatorY", - "EBeam/TiltCorrectionAngle", - "EBeam/TiltCorrectionIsOn", - "EBeam/UseCase", - "EBeam/VFW", - "EBeam/WD", - "EBeam/WehneltBias", - "EBeamDeceleration/ImmersionRatio", - "EBeamDeceleration/LandingEnergy", - "EBeamDeceleration/ModeOn", - "EBeamDeceleration/StageBias", - "EScan/Dwell", - "EScan/FrameTime", - "EScan/HorFieldsize", - "EScan/InternalScan", - "EScan/LineIntegration", - "EScan/LineTime", - "EScan/Mainslock", - "EScan/PixelHeight", - "EScan/PixelWidth", - "EScan/Scan", - "EScan/ScanInterlacing", - "EScan/VerFieldsize", - "ETD/Brightness", - "ETD/BrightnessDB", - "ETD/Contrast", - "ETD/ContrastDB", - "ETD/Grid", - "ETD/MinimumDwellTime", - "ETD/Mix", - "ETD/Setting", - "ETD/Signal", - "GIS/Number", - "HiResIllumination/BrightFieldIsOn", - "HiResIllumination/BrightFieldValue", - "HiResIllumination/DarkFieldIsOn", - "HiResIllumination/DarkFieldValue", - "HotStage/ActualTemperature", - "HotStage/SampleBias", - "HotStage/ShieldBias", - "HotStage/TargetTemperature", - "HotStageHVHS/ActualTemperature", - "HotStageHVHS/SampleBias", - "HotStageHVHS/ShieldBias", - "HotStageHVHS/TargetTemperature", - "HotStageMEMS/ActualTemperature", - "HotStageMEMS/HeatingCurrent", - "HotStageMEMS/HeatingPower", - "HotStageMEMS/HeatingVoltage", - "HotStageMEMS/SampleBias", - "HotStageMEMS/SampleResistance", - "HotStageMEMS/TargetTemperature", - "Image/Average", - "Image/DigitalBrightness", - "Image/DigitalContrast", - "Image/DigitalGamma", - "Image/DriftCorrected", - "Image/Integrate", - "Image/MagCanvasRealWidth", - "Image/MagnificationMode", - "Image/PostProcessing", - "Image/ResolutionX", - "Image/ResolutionY", - "Image/ScreenMagCanvasRealWidth", - "Image/ScreenMagnificationMode", - "Image/Transformation", - "Image/ZoomFactor", - "Image/ZoomPanX", - "Image/ZoomPanY", - "IRBeam/HFW", - "IRBeam/n", - "IRBeam/ScanRotation", - "IRBeam/SiDepth", - "IRBeam/StageR", - "IRBeam/StageTa", - "IRBeam/StageTb", - "IRBeam/StageX", - "IRBeam/StageY", - "IRBeam/StageZ", - "IRBeam/VFW", - "IRBeam/WD", - "PrivateFei/BitShift", - "PrivateFei/DataBarAvailable", - "PrivateFei/DatabarHeight", - "PrivateFei/DataBarSelected", - "PrivateFei/TimeOfCreation", - "Scan/Average", - "Scan/Dwelltime", - "Scan/FrameTime", - "Scan/HorFieldsize", - "Scan/Integrate", - "Scan/InternalScan", - "Scan/PixelHeight", - "Scan/PixelWidth", - "Scan/VerFieldsize", - "Specimen/SpecimenCurrent", - "Specimen/Temperature", - "Stage/ActiveStage", - "Stage/SpecTilt", - "Stage/StageR", - "Stage/StageT", - "Stage/StageTb", - "Stage/StageX", - "Stage/StageY", - "Stage/StageZ", - "Stage/WorkingDistance", - "System/Acq", - "System/Aperture", - "System/BuildNr", - "System/Chamber", - "System/Column", - "System/DisplayHeight", - "System/DisplayWidth", - "System/Dnumber", - "System/ESEM", - "System/EucWD", - "System/FinalLens", - "System/Pump", - "System/Scan", - "System/Software", - "System/Source", - "System/Stage", - "System/SystemType", - "System/Type", - "T1/Brightness", - "T1/BrightnessDB", - "T1/Contrast", - "T1/ContrastDB", - "T1/MinimumDwellTime", - "T1/Setting", - "T1/Signal", - "T2/Brightness", - "T2/BrightnessDB", - "T2/Contrast", - "T2/ContrastDB", - "T2/MinimumDwellTime", - "T2/Setting", - "T2/Signal", - "T3/Brightness", - "T3/BrightnessDB", - "T3/Contrast", - "T3/ContrastDB", - "T3/MinimumDwellTime", - "T3/Signal", - "User/Date", - "User/Time", - "User/User", - "User/UserText", - "User/UserTextUnicode", - "Vacuum/ChPressure", - "Vacuum/Gas", - "Vacuum/Humidity", - "Vacuum/UserMode", -] - -# there is more to know and understand than just knowing TFS/FEI uses -# the above-mentioned concepts in their taxonomy: -# take the example of System/Source for which an example file (instance) has the -# value "FEG" -# similar like in NeXus "System/Source" labels a concept for which (assumption!) there -# is a controlled enumeration of symbols possible (as the example shows "FEG" is one such -# allowed symbol of the enumeration. -# The key issue is that the symbols for the leaf (here "FEG") means nothing eventually -# when one has another semantic world-view, like in NOMAD metainfo or NeXus -# (only us) humans understand that what TFS/FEI likely means with the symbol -# "FEG" is exactly the same as what we mean in NeXus when setting emitter_type of -# NXebeam_column to "cold_cathode_field_emitter" -# world with the controlled enumeration value "other" because we do not know -# if FEG means really a filament or a cold_cathode_field_emitter - - -def get_fei_parent_concepts() -> List: - """Get list of unique FEI parent concepts.""" - return TIFF_TFS_PARENT_CONCEPTS - - -def get_fei_childs(parent_concept: str) -> List: - """Get all children of FEI parent concept.""" - child_concepts = set() - for entry in TIFF_TFS_ALL_CONCEPTS: - if isinstance(entry, str) and entry.count("/") == 1: - if entry.startswith(f"{parent_concept}/") is True: - child_concepts.add(entry.split("/")[1]) - return list(child_concepts) diff --git a/src/pynxtools_em/parsers/image_tiff_zeiss.py b/src/pynxtools_em/parsers/image_tiff_zeiss.py new file mode 100644 index 0000000..bd0bed1 --- /dev/null +++ b/src/pynxtools_em/parsers/image_tiff_zeiss.py @@ -0,0 +1,252 @@ +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Parser for harmonizing Zeiss-specific content in TIFF files.""" + +import mmap +import re +from tokenize import TokenError +from typing import Dict + +import flatdict as fd +import numpy as np +from PIL import Image, ImageSequence +from PIL.TiffTags import TAGS +from pint import UndefinedUnitError +from pynxtools_em.concepts.mapping_functors_pint import add_specific_metadata_pint +from pynxtools_em.configurations.image_tiff_zeiss_cfg import ( + ZEISS_DYNAMIC_STAGE_NX, + ZEISS_DYNAMIC_VARIOUS_NX, + ZEISS_STATIC_VARIOUS_NX, +) +from pynxtools_em.parsers.image_tiff import TiffParser +from pynxtools_em.utils.pint_custom_unit_registry import ureg +from pynxtools_em.utils.string_conversions import string_to_number + +ZEISS_CONCEPT_PREFIXES = ("AP_", "DP_", "SV_") + + +class ZeissTiffParser(TiffParser): + def __init__(self, file_path: str = "", entry_id: int = 1, verbose: bool = False): + super().__init__(file_path) + self.entry_id = entry_id + self.event_id = 1 + self.verbose = verbose + self.flat_dict_meta = fd.FlatDict({}, "/") + self.version: Dict = { + "trg": { + "tech_partner": ["Zeiss"], + "schema_name": ["Zeiss"], + "schema_version": ["V06.03.00.00 : 15-Dec-17"], + } + } + self.supported = False + self.check_if_tiff_zeiss() + + def get_metadata(self, payload: str): + """Extract metadata in Zeiss-specific tags if present, return version if success.""" + print("Parsing Zeiss tags...") + txt = [line.strip() for line in payload.split("\r") if line.strip() != ""] + + # skip over undocumented data to the first line of Zeiss metadata concepts + idx = 0 + while not txt[idx].startswith(ZEISS_CONCEPT_PREFIXES): + idx += 1 + + self.flat_dict_meta = fd.FlatDict({}, "/") + for line in txt[idx : len(txt) - 1]: + match = re.search(r"^(\w{2})_", line) + if ( + match + and line.startswith(ZEISS_CONCEPT_PREFIXES) + and line not in self.flat_dict_meta + ): + token = [value.strip() for value in txt[idx + 1].strip().split("=")] + if len(token) == 1: + if token[0].startswith("Time :"): + if token[0].replace("Time :", ""): + self.flat_dict_meta[line] = token[0].replace("Time :", "") + elif token[0].startswith("Date :"): + if token[0].replace("Date :", ""): + self.flat_dict_meta[line] = token[0].replace("Date :", "") + else: + print(f"WARNING::Ignoring line {line} token {token} !") + else: + tmp = [value.strip() for value in token[1].split()] + if len(tmp) == 1 and tmp[0] in ["On", "Yes"]: + self.flat_dict_meta[line] = True + elif len(tmp) == 1 and tmp[0] in ["Off", "No"]: + self.flat_dict_meta[line] = False + elif len(tmp) == 2 and tmp[1] == "°C": + self.flat_dict_meta[line] = ureg.Quantity(tmp[0], ureg.degC) + elif len(tmp) == 2 and tmp[1] == "X": + self.flat_dict_meta[line] = ureg.Quantity(tmp[0]) + elif len(tmp) == 3 and tmp[1] == "K" and tmp[2] == "X": + self.flat_dict_meta[line] = ureg.Quantity(tmp[0]) * 1000.0 + else: + try: + self.flat_dict_meta[line] = ureg.Quantity(token[1]) + except ( + UndefinedUnitError, + TokenError, + ValueError, + AttributeError, + ): + if token[1]: + self.flat_dict_meta[line] = string_to_number(token[1]) + idx += 1 + if self.verbose: + for key, value in self.flat_dict_meta.items(): + # if isinstance(value, ureg.Quantity): + # try: + # if not value.dimensionless: + # print(f"{value}, {type(value)}, {key}") + # except: + # print(f"{value}, {type(value)}, {key}") + # continue + # else: + print(f"{key}____{type(value)}____{value}") + if "SV_VERSION" in self.flat_dict_meta: + return self.flat_dict_meta["SV_VERSION"] + + def check_if_tiff_zeiss(self): + """Check if resource behind self.file_path is a TaggedImageFormat file.""" + self.supported = False + with open(self.file_path, "rb", 0) as file: + s = mmap.mmap(file.fileno(), 0, access=mmap.ACCESS_READ) + magic = s.read(4) + if magic != b"II*\x00": # https://en.wikipedia.org/wiki/TIFF + print( + f"Parser {self.__class__.__name__} finds no content in {self.file_path} that it supports" + ) + return + + with Image.open(self.file_path, mode="r") as fp: + zeiss_keys = [34118] + for zeiss_key in zeiss_keys: + if zeiss_key in fp.tag_v2: + this_version = self.get_metadata(fp.tag_v2[zeiss_key]) + + if this_version not in self.version["trg"]["schema_version"]: + print( + f"Parser {self.__class__.__name__} finds no content in {self.file_path} that it supports" + ) + return + else: + self.supported = True + + def parse(self, template: dict) -> dict: + """Perform actual parsing filling cache self.tmp.""" + if self.supported is True: + print(f"Parsing via Zeiss-specific metadata...") + # metadata have at this point already been collected into an fd.FlatDict + self.process_event_data_em_metadata(template) + self.process_event_data_em_data(template) + else: + print( + f"{self.file_path} is not a Zeiss-specific " + f"TIFF file that this parser can process !" + ) + return template + + def process_event_data_em_data(self, template: dict) -> dict: + """Add respective heavy data.""" + print(f"Writing Zeiss image data to the respective NeXus concept instances...") + image_identifier = 1 + with Image.open(self.file_path, mode="r") as fp: + for img in ImageSequence.Iterator(fp): + nparr = np.array(img) + print( + f"Processing image {image_identifier} ... {type(nparr)}, {np.shape(nparr)}, {nparr.dtype}" + ) + # eventually similar open discussions points as were raised for tiff_tfs parser + trg = ( + f"/ENTRY[entry{self.entry_id}]/measurement/event_data_em_set/" + f"EVENT_DATA_EM[event_data_em{self.event_id}]/" + f"IMAGE_SET[image_set{image_identifier}]/image_2d" + ) + template[f"{trg}/title"] = f"Image" + template[f"{trg}/@signal"] = "real" + dims = ["i", "j"] # i == x (fastest), j == y (fastest) + idx = 0 + for dim in dims: + template[f"{trg}/@AXISNAME_indices[axis_{dim}_indices]"] = ( + np.uint32(idx) + ) + idx += 1 + template[f"{trg}/@axes"] = [] + for dim in dims[::-1]: + template[f"{trg}/@axes"].append(f"axis_{dim}") + template[f"{trg}/real"] = {"compress": np.array(fp), "strength": 1} + # 0 is y while 1 is x for 2d, 0 is z, 1 is y, while 2 is x for 3d + template[f"{trg}/real/@long_name"] = f"Signal" + + sxy = { + "i": ureg.Quantity(1.0, ureg.meter), + "j": ureg.Quantity(1.0, ureg.meter), + } + found = False + for key in ["AP_PIXEL_SIZE", "APImagePixelSize"]: + if key in self.flat_dict_meta: + sxy = { + "i": self.flat_dict_meta[key], + "j": self.flat_dict_meta[key], + } + # to(ureg.meter).magnitude + found = True + break + if not found: + print("WARNING: Assuming pixel width and height unit is meter!") + nxy = {"i": np.shape(np.array(fp))[1], "j": np.shape(np.array(fp))[0]} + # TODO::be careful we assume here a very specific coordinate system + # however, these assumptions need to be confirmed by point electronic + # additional points as discussed already in comments to TFS TIFF reader + for dim in dims: + template[f"{trg}/AXISNAME[axis_{dim}]"] = { + "compress": np.asarray( + np.linspace(0, nxy[dim] - 1, num=nxy[dim], endpoint=True) + * sxy[dim].magnitude, + np.float64, + ), + "strength": 1, + } + template[f"{trg}/AXISNAME[axis_{dim}]/@long_name"] = ( + f"Coordinate along {dim}-axis ({sxy[dim].units})" + ) + template[f"{trg}/AXISNAME[axis_{dim}]/@units"] = f"{sxy[dim].units}" + image_identifier += 1 + return template + + def process_event_data_em_metadata(self, template: dict) -> dict: + """Add respective metadata.""" + # contextualization to understand how the image relates to the EM session + print(f"Mapping some of the Zeiss metadata on respective NeXus concepts...") + identifier = [self.entry_id, self.event_id, 1] + for cfg in [ + ZEISS_DYNAMIC_VARIOUS_NX, + ZEISS_STATIC_VARIOUS_NX, + ]: + add_specific_metadata_pint( + cfg, + self.flat_dict_meta, + identifier, + template, + ) + add_specific_metadata_pint( + ZEISS_DYNAMIC_STAGE_NX, self.flat_dict_meta, identifier, template + ) + return template diff --git a/src/pynxtools_em/parsers/nxs_imgs.py b/src/pynxtools_em/parsers/nxs_imgs.py index cb62eb3..2ddcf53 100644 --- a/src/pynxtools_em/parsers/nxs_imgs.py +++ b/src/pynxtools_em/parsers/nxs_imgs.py @@ -17,60 +17,16 @@ # """Parser mapping content of specific image files on NeXus.""" -from pynxtools_em.parsers.image_png_protochips import ProtochipsPngSetParser -from pynxtools_em.parsers.image_tiff_point_electronic import PointElectronicTiffParser -from pynxtools_em.parsers.image_tiff_tfs import TfsTiffParser - class NxEmImagesParser: """Map content from different type of image files on an instance of NXem.""" - def __init__(self, entry_id: int = 1, file_path: str = "", verbose: bool = False): + def __init__(self, file_path: str = "", entry_id: int = 1, verbose: bool = False): """Overwrite constructor of the generic reader.""" + self.file_path = file_path if entry_id > 0: self.entry_id = entry_id else: self.entry_id = 1 - self.file_path = file_path - self.cache = {"is_filled": False} self.verbose = verbose - - def identify_image_type(self): - """Identify if image matches known mime type and has content for which parser exists.""" - # tech partner formats used for measurement - img = TfsTiffParser(self.file_path) - if img.supported: - return "single_tiff_tfs" - img = PointElectronicTiffParser(self.file_path) - if img.supported: - return "tiff_point_electronic" - img = ProtochipsPngSetParser(self.file_path) - if img.supported: - return "set_of_zipped_png_protochips" - return None - - def parse(self, template: dict) -> dict: - image_parser_type = self.identify_image_type() - if image_parser_type is None: - print( - f"Parser {self.__class__.__name__} finds no content in {self.file_path} that it supports" - ) - return template - print(f"{self.__class__.__name__} identified content as {image_parser_type}") - # see also comments for respective nxs_pyxem parser - # and its interaction with tech-partner-specific hfive_* parsers - if image_parser_type == "single_tiff_tfs": - tiff = TfsTiffParser(self.file_path, self.entry_id) - tiff.parse_and_normalize() - tiff.process_into_template(template) - elif image_parser_type == "tiff_point_electronic": - diss = PointElectronicTiffParser(self.file_path, self.entry_id) - diss.parse_and_normalize() - diss.process_into_template(template) - elif image_parser_type == "set_of_zipped_png_protochips": - pngs = ProtochipsPngSetParser(self.file_path, self.entry_id) - pngs.parse_and_normalize() - pngs.process_into_template(template) - # add here further specific content (sub-)parsers for formats from other - # tech partner or other custom parsing of images - return template + self.cache = {"is_filled": False} diff --git a/src/pynxtools_em/parsers/nxs_mtex.py b/src/pynxtools_em/parsers/nxs_mtex.py index 327ff33..d196496 100644 --- a/src/pynxtools_em/parsers/nxs_mtex.py +++ b/src/pynxtools_em/parsers/nxs_mtex.py @@ -50,14 +50,14 @@ def hfive_to_template(payload): class NxEmNxsMTexParser: """Map content from *.nxs.mtex files on an instance of NXem.""" - def __init__(self, entry_id: int = 1, file_path: str = "", verbose: bool = False): + def __init__(self, file_path: str = "", entry_id: int = 1, verbose: bool = False): + self.file_path = file_path if entry_id > 0: self.entry_id = entry_id else: self.entry_id = 1 - self.file_path = file_path - self.supported = False self.verbose = verbose + self.supported = False self.check_if_mtex_nxs() def check_if_mtex_nxs(self): diff --git a/src/pynxtools_em/parsers/nxs_nion.py b/src/pynxtools_em/parsers/nxs_nion.py index c301cbb..0940e52 100644 --- a/src/pynxtools_em/parsers/nxs_nion.py +++ b/src/pynxtools_em/parsers/nxs_nion.py @@ -21,7 +21,7 @@ import glob import json import mmap -from typing import Dict +from typing import Dict, List from zipfile import ZipFile import flatdict as fd @@ -29,39 +29,49 @@ import nion.swift.model.NDataHandler as nsnd import numpy as np import yaml - -# from pynxtools_em.utils.swift_generate_dimscale_axes \ -# import get_list_of_dimension_scale_axes -# from pynxtools_em.utils.swift_display_items_to_nx \ -# import nexus_concept_dict, identify_nexus_concept_key -# from pynxtools_em.concepts.concept_mapper \ -# import apply_modifier, variadic_path_to_specific_path -# from pynxtools_em.swift_to_nx_image_real_space \ -# import NxImageRealSpaceDict +from pynxtools_em.concepts.mapping_functors_pint import add_specific_metadata_pint +from pynxtools_em.configurations.nion_cfg import ( + NION_DYNAMIC_ABERRATION_NX, + NION_DYNAMIC_DETECTOR_NX, + NION_DYNAMIC_EVENT_TIME, + NION_DYNAMIC_LENS_NX, + NION_DYNAMIC_MAGBOARDS_NX, + NION_DYNAMIC_SCAN_NX, + NION_DYNAMIC_STAGE_NX, + NION_DYNAMIC_VARIOUS_NX, + NION_STATIC_DETECTOR_NX, + NION_STATIC_LENS_NX, + NION_WHICH_IMAGE, + NION_WHICH_SPECTRUM, +) from pynxtools_em.utils.get_file_checksum import ( DEFAULT_CHECKSUM_ALGORITHM, get_sha256_of_file_content, ) -from pynxtools_em.utils.nion_utils import uuid_to_file_name +from pynxtools_em.utils.nion_utils import ( + nion_image_spectrum_or_generic_nxdata, + uuid_to_file_name, +) +from pynxtools_em.utils.pint_custom_unit_registry import ureg class NionProjectParser: """Parse (zip-compressed archive of a) nionswift project with its content.""" - def __init__( - self, entry_id: int = 1, input_file_path: str = "", verbose: bool = True - ): + def __init__(self, file_path: str = "", entry_id: int = 1, verbose: bool = True): """Class wrapping swift parser.""" - if input_file_path is not None and input_file_path != "": - self.file_path = input_file_path + if file_path is not None and file_path != "": + self.file_path = file_path if entry_id > 0: self.entry_id = entry_id else: self.entry_id = 1 + self.event_id = 1 + self.verbose = verbose # counters which keep track of how many instances of NXevent_data_em have # been instantiated, this implementation currently maps each display_items # onto an own NXevent_data_em instance - self.prfx = None + self.file_path_sha256 = None self.tmp: Dict = {} self.proj_file_dict: Dict = {} # assure that there is exactly one *.nsproj file only to parse from @@ -69,23 +79,15 @@ def __init__( # just get the *.ndata files irrespective whether parsed later or not self.hfive_file_dict: Dict = {} # just get the *.h5 files irrespective whether parsed later or not - self.configure() self.supported = False - self.verbose = verbose self.is_zipped = False self.check_if_nionswift_project() - - def configure(self): - self.tmp["cfg"]: Dict = {} - self.tmp["cfg"]["event_data_written"] = False - self.tmp["cfg"]["event_data_em_id"] = 1 - self.tmp["cfg"]["image_id"] = 1 - self.tmp["cfg"]["spectrum_id"] = 1 - self.tmp["flat_dict_meta"] = fd.FlatDict({}) + # eventually allow https://github.com/miurahr/py7zr/ to work with 7z directly def check_if_nionswift_project(self): """Inspect the content of the compressed project file to check if supported.""" - if self.file_path.endswith(".zip.nion"): + self.supported = False + if self.file_path.endswith(".zip"): self.is_zipped = True elif self.file_path.endswith(".nsproj"): self.is_zipped = False @@ -204,142 +206,94 @@ def check_if_nionswift_project(self): for key, val in self.hfive_file_dict.items(): print(f"hfive: ___{key}___{val}___") - def update_event_identifier(self): - """Advance and reset bookkeeping of event data em and data instances.""" - if self.tmp["cfg"]["event_data_written"] is True: - self.tmp["cfg"]["event_data_em_id"] += 1 - self.tmp["cfg"]["event_data_written"] = False - self.tmp["cfg"]["image_id"] = 1 - self.tmp["cfg"]["spectrum_id"] = 1 - - def add_nx_image_real_space(self, meta, arr, template): - """Create instance of NXimage_r_set""" - # TODO:: - return template - - def map_to_nexus(self, meta, arr, concept_name, template): - """Create the actual instance of a specific set of NeXus concepts in template.""" - # TODO:: - return template - - def process_ndata(self, file_hdl, full_path, template): + def process_ndata(self, file_hdl, full_path, template) -> dict: """Handle reading and processing of opened *.ndata inside the ZIP file.""" # assure that we start reading that file_hdl/pointer from the beginning... file_hdl.seek(0) local_files, dir_files, eocd = nsnd.parse_zip(file_hdl) - flat_metadata_dict = {} - """ - data_arr = None - nx_concept_name = "" - """ + flat_metadata = fd.FlatDict({}, "/") print( f"Inspecting {full_path} with len(local_files.keys()) ___{len(local_files.keys())}___" ) for offset, tpl in local_files.items(): - print(f"{offset}___{tpl}") + if self.verbose: + print(f"{offset}___{tpl}") # report to know there are more than metadata.json files in the ndata swift container format if tpl[0] == b"metadata.json": - print( - f"Extract metadata.json from ___{full_path}___ at offset ___{offset}___" - ) + if self.verbose: + print( + f"Extract metadata.json from ___{full_path}___ at offset ___{offset}___" + ) # ... explicit jump back to beginning of the file file_hdl.seek(0) - metadata_dict = nsnd.read_json( - file_hdl, local_files, dir_files, b"metadata.json" + flat_metadata = fd.FlatDict( + nsnd.read_json(file_hdl, local_files, dir_files, b"metadata.json"), + "/", ) - """ - nx_concept_key = identify_nexus_concept_key(metadata_dict) - nx_concept_name = nexus_concept_dict[nx_concept_key] - print(f"Display_item {full_path}, concept {nx_concept_key}, maps {nx_concept_name}") - """ - flat_metadata_dict = fd.FlatDict(metadata_dict, delimiter="/") if self.verbose: print(f"Flattened content of this metadata.json") - for key, value in flat_metadata_dict.items(): + for key, value in flat_metadata.items(): print(f"ndata, metadata.json, flat: ___{key}___{value}___") - # no break here, because we would like to inspect all content - # expect (based on Benedikt's example) to find only one json file - # in that *.ndata file pointed to by file_hdl - if flat_metadata_dict == {}: # only continue if some metadata were retrieved + else: + break + # previously no break here because we used verbose == True to log the analysis + # of all datasets that were collected in the last 5years on the NionHermes + # within the HU EM group lead by C. Koch and team, specifically we exported the + # metadata to learn about a much larger usage variety to guide the + # implementation of this parser, we expected though always to find only + # one file named metadata.json in that *.ndata file pointed to by file_hdl + if len(flat_metadata) == 0: return template for offset, tpl in local_files.items(): - # print(f"{tpl}") if tpl[0] == b"data.npy": - print( - f"Extract data.npy from ___{full_path}___ at offset ___{offset}___" - ) + if self.verbose: + print( + f"Extract data.npy from ___{full_path}___ at offset ___{offset}___" + ) file_hdl.seek(0) - data_arr = nsnd.read_data(file_hdl, local_files, dir_files, b"data.npy") - if isinstance(data_arr, np.ndarray): + nparr = nsnd.read_data(file_hdl, local_files, dir_files, b"data.npy") + if isinstance(nparr, np.ndarray): print( - f"ndata, data.npy, type, shape, dtype: ___{type(data_arr)}___{np.shape(data_arr)}___{data_arr.dtype}___" + f"ndata, data.npy, type, shape, dtype: ___{type(nparr)}___{np.shape(nparr)}___{nparr.dtype}___" ) + # because we expect (based on Benedikt's example) to find only one npy + # file in that *.ndata file pointed to by file_hdl and only one matching + # metadata.json we can now write the data and its metadata into template + self.process_event_data_em_metadata(flat_metadata, template) + self.process_event_data_em_data(nparr, flat_metadata, template) break - # because we expect (based on Benedikt's example) to find only one npy file - # in that *.ndata file pointed to by file_hdl - - # check on the integriety of the data_arr array that it is not None or empty - # this should be done more elegantly by just writing the - # data directly into the template and not creating another copy - # TODO::only during inspection - """ - self.map_to_nexus(flat_metadata_dict, data_arr, nx_concept_name, template) - del flat_metadata_dict - del data_arr - del nx_concept_name - """ return template - def process_hfive(self, file_hdl, full_path, template: dict): + def process_hfive(self, file_hdl, full_path, template: dict) -> dict: """Handle reading and processing of opened *.h5 inside the ZIP file.""" - flat_metadata_dict = {} - """ - data_arr = None - nx_concept_name = "" - """ + flat_metadata = fd.FlatDict({}, "/") file_hdl.seek(0) with h5py.File(file_hdl, "r") as h5r: print( f"Inspecting {full_path} with len(h5r.keys()) ___{len(h5r.keys())}___" ) print(f"{h5r.keys()}") - metadata_dict = json.loads(h5r["data"].attrs["properties"]) - - """ - nx_concept_key = identify_nexus_concept_key(metadata_dict) - nx_concept_name = nexus_concept_dict[nx_concept_key] - print(f"Display_item {full_path}, concept {nx_concept_key}, maps {nx_concept_name}") - """ - - flat_metadata_dict = fd.FlatDict(metadata_dict, delimiter="/") + flat_metadata = fd.FlatDict( + json.loads(h5r["data"].attrs["properties"]), "/" + ) if self.verbose: print(f"Flattened content of this metadata.json") - for key, value in flat_metadata_dict.items(): + for key, value in flat_metadata.items(): print(f"hfive, data, flat: ___{key}___{value}___") - if ( - flat_metadata_dict == {} - ): # only continue if some metadata were retrieved + if len(flat_metadata) == 0: return template - data_arr = h5r["data"][()] + self.process_event_data_em_metadata(flat_metadata, template) - if isinstance(data_arr, np.ndarray): + nparr = h5r["data"][()] + if isinstance(nparr, np.ndarray): print( - f"hfive, data, type, shape, dtype: ___{type(data_arr)}___{np.shape(data_arr)}___{data_arr.dtype}___" + f"hfive, data, type, shape, dtype: ___{type(nparr)}___{np.shape(nparr)}___{nparr.dtype}___" ) - """ - print(f"data_arr type {data_arr.dtype}, shape {np.shape(data_arr)}") - # check on the integriety of the data_arr array that it is not None or empty - # this should be done more elegantly by just writing the - # data directly into the template and not creating another copy - self.map_to_nexus(flat_metadata_dict, data_arr, nx_concept_name, template) - del flat_metadata_dict - del data_arr - del nx_concept_name - """ + self.process_event_data_em_data(nparr, flat_metadata, template) return template def parse_project_file(self, template: dict) -> dict: @@ -350,13 +304,11 @@ def parse_project_file(self, template: dict) -> dict: for pkey, proj_file_name in self.proj_file_dict.items(): with zip_file_hdl.open(proj_file_name) as file_hdl: nionswift_proj_mdata = fd.FlatDict( - yaml.safe_load(file_hdl), delimiter="/" + yaml.safe_load(file_hdl), "/" ) else: with open(self.file_path) as file_hdl: - nionswift_proj_mdata = fd.FlatDict( - yaml.safe_load(file_hdl), delimiter="/" - ) + nionswift_proj_mdata = fd.FlatDict(yaml.safe_load(file_hdl), "/") # TODO::inspection phase, maybe with yaml to file? if self.verbose: if self.is_zipped: @@ -431,3 +383,148 @@ def parse(self, template: dict) -> dict: print("Parsing in-place nionswift project (nsproj + data)...") self.parse_project_file(template) return template + + def process_event_data_em_metadata( + self, flat_metadata: fd.FlatDict, template: dict + ) -> dict: + print(f"Mapping some of the Nion metadata on respective NeXus concepts...") + # we assume for now dynamic quantities can just be repeated + identifier = [self.entry_id, self.event_id, 1] + for cfg in [ + NION_DYNAMIC_ABERRATION_NX, + NION_DYNAMIC_DETECTOR_NX, + NION_DYNAMIC_LENS_NX, + NION_DYNAMIC_MAGBOARDS_NX, + NION_DYNAMIC_SCAN_NX, + NION_DYNAMIC_STAGE_NX, + NION_DYNAMIC_VARIOUS_NX, + NION_DYNAMIC_EVENT_TIME, + ]: + add_specific_metadata_pint(cfg, flat_metadata, identifier, template) + # but not so static quantities, for these we ideally need to check if + # exactly the same data havent already been written in an effort to avoid + # redundancies + # most use cases simply avoid this complication as they assume well these + # metadata are delivered by the ELN and thus a different serialization code + # is used, like oasis_cfg or eln_cfg parsing as also pynxtools-em offers + + # nasty assume there is only one e.g. direct electron detector + identifier = [self.entry_id, 1] + add_specific_metadata_pint( + NION_STATIC_DETECTOR_NX, flat_metadata, identifier, template + ) + add_specific_metadata_pint( + NION_STATIC_LENS_NX, flat_metadata, identifier, template + ) + return template + + def process_event_data_em_data( + self, nparr: np.ndarray, flat_metadata: fd.FlatDict, template: dict + ) -> dict: + """Map Nion-specifically formatted data arrays on NeXus NXdata/NXimage/NXspectrum.""" + axes = flat_metadata["dimensional_calibrations"] + unit_combination = nion_image_spectrum_or_generic_nxdata(axes) + print(f"{unit_combination}, {np.shape(nparr)}") + print(axes) + print(f"entry_id {self.entry_id}, event_id {self.event_id}") + if unit_combination == "": + return template + + prfx = f"/ENTRY[entry{self.entry_id}]/measurement/event_data_em_set/EVENT_DATA_EM[event_data_em{self.event_id}]" + self.event_id += 1 + + # this is the place when you want to skip individually the writing of NXdata + # return template + + axis_names = None + if unit_combination in NION_WHICH_SPECTRUM: + trg = f"{prfx}/SPECTRUM_SET[spectrum_set1]/{NION_WHICH_SPECTRUM[unit_combination][0]}" + template[f"{trg}/title"] = f"{flat_metadata['title']}" + template[f"{trg}/@signal"] = f"intensity" + template[f"{trg}/intensity"] = {"compress": nparr, "strength": 1} + axis_names = NION_WHICH_SPECTRUM[unit_combination][1] + elif unit_combination in NION_WHICH_IMAGE: + trg = ( + f"{prfx}/IMAGE_SET[image_set1]/{NION_WHICH_IMAGE[unit_combination][0]}" + ) + template[f"{trg}/title"] = f"{flat_metadata['title']}" + template[f"{trg}/@signal"] = f"real" # TODO::unless COMPLEX + template[f"{trg}/real"] = {"compress": nparr, "strength": 1} + axis_names = NION_WHICH_IMAGE[unit_combination][1] + elif not any( + (value in ["1/", "iteration"]) for value in unit_combination.split(";") + ): + trg = f"{prfx}/DATA[data1]" + template[f"{trg}/title"] = f"{flat_metadata['title']}" + template[f"{trg}/@NX_class"] = f"NXdata" + template[f"{trg}/@signal"] = f"data" + template[f"{trg}/data"] = {"compress": nparr, "strength": 1} + axis_names = ["axis_i", "axis_j", "axis_k", "axis_l", "axis_m"][ + 0 : len(unit_combination.split("_")) + ][::-1] + else: + print(f"WARNING::{unit_combination} unsupported unit_combination !") + return template + + if len(axis_names) >= 1: + # arrays axis_names and dimensional_calibrations are aligned in order + # but that order is reversed wrt to AXISNAME_indices ! + for idx, axis_name in enumerate(axis_names): + template[f"{trg}/@AXISNAME_indices[{axis_name}_indices]"] = np.uint32( + len(axis_names) - 1 - idx + ) + template[f"{trg}/@axes"] = axis_names + + for idx, axis in enumerate(axes): + axis_name = axis_names[idx] + offset = axis["offset"] + step = axis["scale"] + units = axis["units"] + count = np.shape(nparr)[idx] + if units == "": + template[f"{trg}/AXISNAME[{axis_name}]"] = np.float32(offset) + ( + np.float32(step) + * np.asarray( + np.linspace( + start=0, stop=count - 1, num=count, endpoint=True + ), + np.float32, + ) + ) + if unit_combination in NION_WHICH_SPECTRUM: + template[f"{trg}/AXISNAME[{axis_name}]/@long_name"] = ( + f"Spectrum identifier" + ) + elif unit_combination in NION_WHICH_IMAGE: + template[f"{trg}/AXISNAME[{axis_name}]/@long_name"] = ( + f"Image identifier" + ) + else: + template[f"{trg}/AXISNAME[{axis_name}]/@long_name"] = ( + f"{axis_name}" + # unitless | dimensionless i.e. no unit in longname + ) + else: + template[f"{trg}/AXISNAME[{axis_name}]"] = np.float32(offset) + ( + np.float32(step) + * np.asarray( + np.linspace( + start=0, stop=count - 1, num=count, endpoint=True + ), + np.float32, + ) + ) + template[f"{trg}/AXISNAME[{axis_name}]/@units"] = ( + f"{ureg.Unit(units)}" + ) + if units == "eV": + # TODO::this is only robust if Nion reports always as eV and not with other prefix like kilo etc. + # in such case the solution from the gatan parser is required, i.e. conversion to base units + template[f"{trg}/AXISNAME[{axis_name}]/@long_name"] = ( + f"Energy ({ureg.Unit(units)})" # eV + ) + else: + template[f"{trg}/AXISNAME[{axis_name}]/@long_name"] = ( + f"Point coordinate along {axis_name} ({ureg.Unit(units)})" + ) + return template diff --git a/src/pynxtools_em/parsers/nxs_pyxem.py b/src/pynxtools_em/parsers/nxs_pyxem.py index 525bbf8..f18674f 100644 --- a/src/pynxtools_em/parsers/nxs_pyxem.py +++ b/src/pynxtools_em/parsers/nxs_pyxem.py @@ -106,13 +106,14 @@ def get_ipfdir_legend(ipf_key): class NxEmNxsPyxemParser: """Map content from different type of *.h5 files on an instance of NXem.""" - def __init__(self, entry_id: int = 1, file_path: str = "", verbose: bool = False): + def __init__(self, file_path: str = "", entry_id: int = 1, verbose: bool = False): """Overwrite constructor of the generic reader.""" + self.file_path = file_path if entry_id > 0: self.entry_id = entry_id else: self.entry_id = 1 - self.file_path = file_path + self.verbose = verbose self.id_mgn = { "event": 1, "event_img": 1, @@ -121,7 +122,6 @@ def __init__(self, entry_id: int = 1, file_path: str = "", verbose: bool = False "eds_img": 1, } self.cache = {"is_filled": False} - self.verbose = verbose def parse(self, template: dict) -> dict: hfive_parser_type = self.identify_hfive_type() @@ -388,30 +388,30 @@ def process_roi_overview_eds_based(self, inp, template: dict) -> dict: trg = ( f"/ENTRY[entry{self.entry_id}]/measurement/event_data_em_set/" f"EVENT_DATA_EM[event_data_em{self.id_mgn['event']}]/" - f"IMAGE_R_SET[image_r_set{self.id_mgn['event_img']}]/image_twod" + f"IMAGE_SET[image_set{self.id_mgn['event_img']}]/image_2d" ) template[f"{trg}/description"] = inp.tmp["source"] template[f"{trg}/title"] = f"Region-of-interest overview image" - template[f"{trg}/@signal"] = "intensity" - dims = [("x", 0), ("y", 1)] + template[f"{trg}/@signal"] = "real" + dims = [("i", 0), ("j", 1)] template[f"{trg}/@axes"] = [] for dim in dims[::-1]: template[f"{trg}/@axes"].append(f"axis_{dim[0]}") - template[f"{trg}/intensity"] = { - "compress": inp.tmp["image_twod/intensity"].value, + template[f"{trg}/real"] = { + "compress": inp.tmp["image_2d/real"].value, "strength": 1, } - template[f"{trg}/intensity/@long_name"] = f"Signal" + template[f"{trg}/real/@long_name"] = f"Signal" for dim in dims: template[f"{trg}/@AXISNAME_indices[axis_{dim[0]}_indices]"] = np.uint32( dim[1] ) template[f"{trg}/AXISNAME[axis_{dim[0]}]"] = { - "compress": inp.tmp[f"image_twod/axis_{dim[0]}"].value, + "compress": inp.tmp[f"image_2d/axis_{dim[0]}"].value, "strength": 1, } template[f"{trg}/AXISNAME[axis_{dim[0]}]/@long_name"] = inp.tmp[ - f"image_twod/axis_{dim[0]}@long_name" + f"image_2d/axis_{dim[0]}@long_name" ].value self.id_mgn["event_img"] += 1 self.id_mgn["event"] += 1 @@ -812,26 +812,26 @@ def process_roi_eds_spectra(self, inp: dict, template: dict) -> dict: trg = ( f"/ENTRY[entry{self.entry_id}]/measurement/event_data_em_set/" f"EVENT_DATA_EM[event_data_em{self.id_mgn['event']}]/SPECTRUM_SET" - f"[spectrum_set{self.id_mgn['event_spc']}]/spectrum_zerod" + f"[spectrum_set{self.id_mgn['event_spc']}]/spectrum_0d" ) template[f"{trg}/description"] = inp[ckey].tmp["source"] template[f"{trg}/title"] = f"Region-of-interest overview image" - template[f"{trg}/@signal"] = "intensity" + template[f"{trg}/@signal"] = "real" template[f"{trg}/@axes"] = ["axis_energy"] - template[f"{trg}/intensity"] = { - "compress": inp[ckey].tmp["spectrum_zerod/intensity"].value, + template[f"{trg}/real"] = { + "compress": inp[ckey].tmp["spectrum_0d/real"].value, "strength": 1, } - template[f"{trg}/intensity/@long_name"] = ( - inp[ckey].tmp["spectrum_zerod/intensity@long_name"].value + template[f"{trg}/real/@long_name"] = ( + inp[ckey].tmp["spectrum_0d/real@long_name"].value ) template[f"{trg}/@AXISNAME_indices[axis_energy_indices]"] = np.uint32(0) template[f"{trg}/AXISNAME[axis_energy]"] = { - "compress": inp[ckey].tmp[f"spectrum_zerod/axis_energy"].value, + "compress": inp[ckey].tmp[f"spectrum_0d/axis_energy"].value, "strength": 1, } template[f"{trg}/AXISNAME[axis_energy]/@long_name"] = ( - inp[ckey].tmp[f"spectrum_zerod/axis_energy@long_name"].value + inp[ckey].tmp[f"spectrum_0d/axis_energy@long_name"].value ) self.id_mgn["event_spc"] += 1 self.id_mgn["event"] += 1 @@ -845,12 +845,12 @@ def process_roi_eds_maps(self, inp: dict, template: dict) -> dict: f"eds/indexing" ) template[f"{trg}/source"] = inp[ckey].tmp["source"] - for img in inp[ckey].tmp["IMAGE_R_SET"]: + for img in inp[ckey].tmp["IMAGE_SET"]: if not isinstance(img, NxImageRealSpaceSet): continue trg = ( f"/ENTRY[entry{self.entry_id}]/ROI[roi{self.id_mgn['roi']}]/eds/" - f"indexing/IMAGE_R_SET[image_r_set{self.id_mgn['eds_img']}]" + f"indexing/IMAGE_SET[image_set{self.id_mgn['eds_img']}]" ) template[f"{trg}/source"] = img.tmp["source"] template[f"{trg}/description"] = img.tmp["description"] @@ -861,28 +861,28 @@ def process_roi_eds_maps(self, inp: dict, template: dict) -> dict: template[f"{trg}/iupac_line_candidates"] = img.tmp[ "iupac_line_candidates" ] - template[f"{trg}/image_twod/@signal"] = "intensity" - template[f"{trg}/image_twod/@axes"] = ["axis_y", "axis_x"] - template[f"{trg}/image_twod/title"] = ( + template[f"{trg}/image_2d/@signal"] = "real" + template[f"{trg}/image_2d/@axes"] = ["axis_j", "axis_i"] + template[f"{trg}/image_2d/title"] = ( f"EDS map {img.tmp['description']}" ) - template[f"{trg}/image_twod/intensity"] = { - "compress": img.tmp["image_twod/intensity"].value, + template[f"{trg}/image_2d/real"] = { + "compress": img.tmp["image_2d/real"].value, "strength": 1, } - template[f"{trg}/image_twod/intensity/@long_name"] = f"Signal" - dims = [("x", 0), ("y", 1)] + template[f"{trg}/image_2d/real/@long_name"] = f"Signal" + dims = [("i", 0), ("j", 1)] for dim in dims: template[ - f"{trg}/image_twod/@AXISNAME_indices[axis_{dim[0]}_indices]" + f"{trg}/image_2d/@AXISNAME_indices[axis_{dim[0]}_indices]" ] = np.uint32(dim[1]) - template[f"{trg}/image_twod/AXISNAME[axis_{dim[0]}]"] = { - "compress": img.tmp[f"image_twod/axis_{dim[0]}"].value, + template[f"{trg}/image_2d/AXISNAME[axis_{dim[0]}]"] = { + "compress": img.tmp[f"image_2d/axis_{dim[0]}"].value, "strength": 1, } template[ - f"{trg}/image_twod/AXISNAME[axis_{dim[0]}]/@long_name" - ] = img.tmp[f"image_twod/axis_{dim[0]}@long_name"].value + f"{trg}/image_2d/AXISNAME[axis_{dim[0]}]/@long_name" + ] = img.tmp[f"image_2d/axis_{dim[0]}@long_name"].value self.id_mgn["eds_img"] += 1 self.id_mgn["roi"] += 1 diff --git a/src/pynxtools_em/parsers/oasis_config_reader.py b/src/pynxtools_em/parsers/oasis_config_reader.py index f51bdb5..45f88a6 100644 --- a/src/pynxtools_em/parsers/oasis_config_reader.py +++ b/src/pynxtools_em/parsers/oasis_config_reader.py @@ -17,25 +17,18 @@ # """Parser NOMAD-Oasis-specific configuration serialized as oasis.yaml to NeXus NXem.""" -# mapping instructions as a dictionary -# prefix is the (variadic prefix to be add to every path on the target side) -# different modifiers are used -# "use": list of pair of trg, src endpoint, take the value in src copy into trg -# "load": list of single value or pair (trg, src) -# if single value this means that the endpoint of trg and src is the same -# e.g. in the example below "name" means -# ("/ENTRY[entry*]/USER[user*]/name, "load", "name") -# if pair load the value pointed to by src and copy into trg - import pathlib import flatdict as fd import yaml -from pynxtools_em.concepts.mapping_functors import add_specific_metadata -from pynxtools_em.configurations.oasis_cfg import EM_CITATION_TO_NEXUS, EM_CSYS_TO_NEXUS +from pynxtools_em.concepts.mapping_functors_pint import add_specific_metadata_pint +from pynxtools_em.configurations.oasis_cfg import ( + OASISCFG_EM_CITATION_TO_NEXUS, + OASISCFG_EM_CSYS_TO_NEXUS, +) -class NxEmNomadOasisConfigurationParser: +class NxEmNomadOasisConfigParser: """Parse deployment specific configuration.""" def __init__(self, file_path: str, entry_id: int, verbose: bool = False): @@ -46,33 +39,33 @@ def __init__(self, file_path: str, entry_id: int, verbose: bool = False): pathlib.Path(file_path).name.endswith(".oasis.specific.yaml") or pathlib.Path(file_path).name.endswith(".oasis.specific.yml") ) and entry_id > 0: - self.entry_id = entry_id self.file_path = file_path with open(self.file_path, "r", encoding="utf-8") as stream: - self.yml = fd.FlatDict(yaml.safe_load(stream), delimiter="/") + self.flat_metadata = fd.FlatDict(yaml.safe_load(stream), "/") if verbose: - for key, val in self.yml.items(): + for key, val in self.flat_metadata.items(): print(f"key: {key}, val: {val}") + self.entry_id = entry_id else: - self.entry_id = 1 self.file_path = "" - self.yml = {} + self.entry_id = 1 + self.flat_metadata = fd.FlatDict({}, "/") def parse_reference_frames(self, template: dict) -> dict: """Copy details about frames of reference into template.""" src = "coordinate_system_set" - if src in self.yml: - if isinstance(self.yml[src], list): - if all(isinstance(entry, dict) for entry in self.yml[src]): + if src in self.flat_metadata: + if isinstance(self.flat_metadata[src], list): + if all(isinstance(entry, dict) for entry in self.flat_metadata[src]): csys_id = 1 # custom schema delivers a list of dictionaries... - for csys_dict in self.yml[src]: - if csys_dict == {}: + for csys_dict in self.flat_metadata[src]: + if len(csys_dict) == 0: continue identifier = [self.entry_id, csys_id] - add_specific_metadata( - EM_CSYS_TO_NEXUS, - fd.FlatDict(csys_dict), + add_specific_metadata_pint( + OASISCFG_EM_CSYS_TO_NEXUS, + csys_dict, identifier, template, ) @@ -82,18 +75,21 @@ def parse_reference_frames(self, template: dict) -> dict: def parse_example(self, template: dict) -> dict: """Copy data from example-specific section into template.""" src = "citation" - if src in self.yml: - if isinstance(self.yml[src], list): - if all(isinstance(entry, dict) for entry in self.yml[src]) is True: + if src in self.flat_metadata: + if isinstance(self.flat_metadata[src], list): + if ( + all(isinstance(entry, dict) for entry in self.flat_metadata[src]) + is True + ): cite_id = 1 # custom schema delivers a list of dictionaries... - for cite_dict in self.yml[src]: - if cite_dict == {}: + for cite_dict in self.flat_metadata[src]: + if len(cite_dict) == 0: continue identifier = [self.entry_id, cite_id] - add_specific_metadata( - EM_CITATION_TO_NEXUS, - fd.FlatDict(cite_dict), + add_specific_metadata_pint( + OASISCFG_EM_CITATION_TO_NEXUS, + cite_dict, identifier, template, ) diff --git a/src/pynxtools_em/parsers/oasis_eln_reader.py b/src/pynxtools_em/parsers/oasis_eln_reader.py index 703e008..cd8debd 100644 --- a/src/pynxtools_em/parsers/oasis_eln_reader.py +++ b/src/pynxtools_em/parsers/oasis_eln_reader.py @@ -21,12 +21,12 @@ import flatdict as fd import yaml -from pynxtools_em.concepts.mapping_functors import add_specific_metadata +from pynxtools_em.concepts.mapping_functors_pint import add_specific_metadata_pint from pynxtools_em.configurations.eln_cfg import ( - EM_ENTRY_TO_NEXUS, - EM_SAMPLE_TO_NEXUS, - EM_USER_IDENTIFIER_TO_NEXUS, - EM_USER_TO_NEXUS, + OASISELN_EM_ENTRY_TO_NEXUS, + OASISELN_EM_SAMPLE_TO_NEXUS, + OASISELN_EM_USER_IDENTIFIER_TO_NEXUS, + OASISELN_EM_USER_TO_NEXUS, ) @@ -43,52 +43,56 @@ def __init__(self, file_path: str, entry_id: int, verbose: bool = False): pathlib.Path(file_path).name.endswith("eln_data.yaml") or pathlib.Path(file_path).name.endswith("eln_data.yml") ) and entry_id > 0: - self.entry_id = entry_id self.file_path = file_path with open(self.file_path, "r", encoding="utf-8") as stream: - self.yml = fd.FlatDict(yaml.safe_load(stream), delimiter="/") + self.flat_metadata = fd.FlatDict(yaml.safe_load(stream), delimiter="/") if verbose: - for key, val in self.yml.items(): + for key, val in self.flat_metadata.items(): print(f"key: {key}, value: {val}") + self.entry_id = entry_id else: - self.entry_id = 1 self.file_path = "" - self.yml = {} + self.entry_id = 1 + self.flat_metadata = fd.FlatDict({}, "/") def parse_entry(self, template: dict) -> dict: """Copy data from entry section into template.""" identifier = [self.entry_id] - add_specific_metadata(EM_ENTRY_TO_NEXUS, self.yml, identifier, template) + add_specific_metadata_pint( + OASISELN_EM_ENTRY_TO_NEXUS, self.flat_metadata, identifier, template + ) return template def parse_sample(self, template: dict) -> dict: """Copy data from entry section into template.""" identifier = [self.entry_id] - add_specific_metadata(EM_SAMPLE_TO_NEXUS, self.yml, identifier, template) + add_specific_metadata_pint( + OASISELN_EM_SAMPLE_TO_NEXUS, self.flat_metadata, identifier, template + ) return template def parse_user(self, template: dict) -> dict: """Copy data from user section into template.""" src = "user" - if src in self.yml: - if isinstance(self.yml[src], list): - if all(isinstance(entry, dict) for entry in self.yml[src]): + if src in self.flat_metadata: + if isinstance(self.flat_metadata[src], list): + if all(isinstance(entry, dict) for entry in self.flat_metadata[src]): user_id = 1 # custom schema delivers a list of dictionaries... - for user_dict in self.yml[src]: - if user_dict == {}: + for user_dict in self.flat_metadata[src]: + if len(user_dict) == 0: continue identifier = [self.entry_id, user_id] - add_specific_metadata( - EM_USER_TO_NEXUS, - fd.FlatDict(user_dict), + add_specific_metadata_pint( + OASISELN_EM_USER_TO_NEXUS, + user_dict, identifier, template, ) if "orcid" in user_dict: - add_specific_metadata( - EM_USER_IDENTIFIER_TO_NEXUS, - fd.FlatDict(user_dict), + add_specific_metadata_pint( + OASISELN_EM_USER_IDENTIFIER_TO_NEXUS, + user_dict, identifier, template, ) diff --git a/src/pynxtools_em/parsers/rsciio_base.py b/src/pynxtools_em/parsers/rsciio_base.py index 0b55af7..878d66f 100644 --- a/src/pynxtools_em/parsers/rsciio_base.py +++ b/src/pynxtools_em/parsers/rsciio_base.py @@ -35,13 +35,10 @@ class RsciioBaseParser: def __init__(self, file_path: str = ""): - # self.supported_version = VERSION_MANAGEMENT - # self.version = VERSION_MANAGEMENT # tech_partner the company which designed this format # schema_name the specific name of the family of schemas supported by this reader # schema_version the specific version(s) supported by this reader # writer_name the specific name of the tech_partner's (typically proprietary) software - self.prfx = None - self.tmp: Dict = {} if file_path is not None and file_path != "": self.file_path = file_path + self.tmp: Dict = {} diff --git a/src/pynxtools_em/parsers/rsciio_bruker.py b/src/pynxtools_em/parsers/rsciio_bruker.py index b8cc646..01be19e 100644 --- a/src/pynxtools_em/parsers/rsciio_bruker.py +++ b/src/pynxtools_em/parsers/rsciio_bruker.py @@ -28,15 +28,14 @@ class RsciioBrukerParser(RsciioBaseParser): def __init__(self, file_path: str = ""): super().__init__(file_path) - self.prfx = None self.tmp: Dict = {} self.objs: List = [] - self.supported_version: Dict = {} self.version: Dict = {} self.supported = False self.check_if_supported() def check_if_supported(self): + """Check if provided content matches Bruker concepts.""" try: self.objs = bruker.file_reader(self.file_path) # TODO::what to do if the content of the file is larger than the available @@ -46,38 +45,40 @@ def check_if_supported(self): # in the template and stream out accordingly self.supported = True except IOError: - print(f"Loading {self.file_path} using {self.__name__} is not supported !") + print(f"Loading {self.file_path} using Bruker is not supported !") - def parse_and_normalize(self): - """Perform actual parsing filling cache self.tmp.""" + def parse_and_normalize(self, template: dict) -> dict: + """Perform actual parsing filling cache.""" if self.supported is True: - print(f"Parsing with {self.__name__}...") - self.tech_partner_to_nexus_normalization() + print(f"Parsing via Bruker...") + self.normalize_eds_content(template) + self.normalize_eels_content(template) else: print( f"{self.file_path} is not a Bruker-specific " f"BCF file that this parser can process !" ) + return template - def tech_partner_to_nexus_normalization(self): - """Translate tech partner concepts to NeXus concepts.""" - self.normalize_eds_content() - self.normalize_eels_content() - - def normalize_eds_content(self): - pass + def normalize_eds_content(self, template: dict) -> dict: + """TODO implementation.""" + return template - def normalize_eels_content(self): - pass + def normalize_eels_content(self, template: dict) -> dict: + """TODO implementation.""" + return template def process_into_template(self, template: dict) -> dict: + """TODO implementation.""" if self.supported is True: self.process_event_data_em_metadata(template) self.process_event_data_em_data(template) return template def process_event_data_em_metadata(self, template: dict) -> dict: + """TODO implementation.""" return template def process_event_data_em_data(self, template: dict) -> dict: + """TODO implementation.""" return template diff --git a/src/pynxtools_em/parsers/rsciio_gatan.py b/src/pynxtools_em/parsers/rsciio_gatan.py new file mode 100644 index 0000000..657064a --- /dev/null +++ b/src/pynxtools_em/parsers/rsciio_gatan.py @@ -0,0 +1,270 @@ +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""(Sub-)parser for reading content from Gatan Digital Micrograph *.dm3 and *.dm4 (HDF5) via rosettasciio.""" + +from typing import Dict, List + +import flatdict as fd +import numpy as np +from pynxtools_em.concepts.mapping_functors_pint import add_specific_metadata_pint +from pynxtools_em.configurations.rsciio_gatan_cfg import ( + GATAN_DYNAMIC_STAGE_NX, + GATAN_DYNAMIC_VARIOUS_NX, + GATAN_WHICH_IMAGE, + GATAN_WHICH_SPECTRUM, +) +from pynxtools_em.parsers.rsciio_base import RsciioBaseParser +from pynxtools_em.utils.gatan_utils import gatan_image_spectrum_or_generic_nxdata +from pynxtools_em.utils.get_file_checksum import ( + DEFAULT_CHECKSUM_ALGORITHM, + get_sha256_of_file_content, +) +from pynxtools_em.utils.pint_custom_unit_registry import ureg +from pynxtools_em.utils.rsciio_hspy_utils import all_req_keywords_in_dict +from rsciio import digitalmicrograph as gatan + + +class RsciioGatanParser(RsciioBaseParser): + """Read Gatan Digital Micrograph dm3/dm4 formats.""" + + def __init__(self, file_path: str = "", entry_id: int = 1, verbose: bool = False): + super().__init__(file_path) + if entry_id > 0: + self.entry_id = entry_id + else: + self.entry_id = 1 + self.event_id = 1 + self.verbose = verbose + self.version: Dict = {} + self.supported = False + self.check_if_supported() + + def check_if_supported(self): + self.supported = False + if not self.file_path.lower().endswith(("dm3", "dm4")): + print( + f"Parser {self.__class__.__name__} finds no content in {self.file_path} that it supports" + ) + return + try: + self.objs = gatan.file_reader( + self.file_path, lazy=False, order="C", optimize=True + ) + # TODO::what to do if the content of the file is larger than the available + # main memory, make use of lazy loading + + reqs = ["data", "axes", "metadata", "original_metadata", "mapping"] + obj_idx_supported: List[int] = [] + for idx, obj in enumerate(self.objs): + if not isinstance(obj, dict): + continue + if not all_req_keywords_in_dict(obj, reqs): + continue + # flat_metadata = fd.FlatDict(obj["original_metadata"], "/") + # TODO::add version distinction logic from rsciio_velox + obj_idx_supported.append(idx) + if self.verbose: + print(f"{idx}-th obj is supported") + if len(obj_idx_supported) > 0: # at least some supported content + self.supported = True + else: + print( + f"Parser {self.__class__.__name__} finds no content in {self.file_path} that it supports" + ) + except IOError: + return + + def parse(self, template: dict) -> dict: + """Perform actual parsing filling cache self.tmp.""" + if self.supported: + with open(self.file_path, "rb", 0) as fp: + self.file_path_sha256 = get_sha256_of_file_content(fp) + print( + f"Parsing {self.file_path} Gatan with SHA256 {self.file_path_sha256} ..." + ) + self.parse_content(template) + return template + + def parse_content(self, template: dict) -> dict: + """Translate tech partner concepts to NeXus concepts.""" + reqs = ["data", "axes", "metadata", "original_metadata", "mapping"] + for idx, obj in enumerate(self.objs): + if not isinstance(obj, dict): + continue + if not all_req_keywords_in_dict(obj, reqs): + continue + self.process_event_data_em_metadata(obj, template) + self.process_event_data_em_data(obj, template) + self.event_id += 1 + if self.verbose: + print(f"obj{idx}, dims {obj['axes']}") + return template + + def process_event_data_em_metadata(self, obj: dict, template: dict) -> dict: + """Map Gatan Digital Micrograph-specific concept representations on NeXus concepts.""" + # use an own function for each instead of a loop of a template function call + # as for each section there are typically always some extra formatting + # steps required + flat_metadata = fd.FlatDict(obj["original_metadata"], "/") + identifier = [self.entry_id, self.event_id, 1] + for cfg in [GATAN_DYNAMIC_STAGE_NX, GATAN_DYNAMIC_VARIOUS_NX]: + add_specific_metadata_pint(cfg, flat_metadata, identifier, template) + return template + + def annotate_information_source( + self, trg: str, file_path: str, checksum: str, template: dict + ) -> dict: + """Add from where the information was obtained.""" + template[f"{trg}/PROCESS[process]/source/type"] = "file" + template[f"{trg}/PROCESS[process]/source/path"] = file_path + template[f"{trg}/PROCESS[process]/source/checksum"] = checksum + template[f"{trg}/PROCESS[process]/source/algorithm"] = ( + DEFAULT_CHECKSUM_ALGORITHM + ) + return template + + def process_event_data_em_data(self, obj: dict, template: dict) -> dict: + """Map Gatan-specifically formatted data arrays on NeXus NXdata/NXimage/NXspectrum.""" + # assume rosettasciio-specific formatting of the obj informationemd parser + # i.e. a dictionary with the following keys: + # "data", "axes", "metadata", "original_metadata", "mapping" + flat_hspy_meta = fd.FlatDict(obj["metadata"], "/") + if "General/title" not in flat_hspy_meta: + return template + + # flat_orig_meta = fd.FlatDict(obj["original_metadata"], "/") + axes = obj["axes"] + unit_combination = gatan_image_spectrum_or_generic_nxdata(axes) + if unit_combination == "": + return template + if self.verbose: + print(axes) + print(f"{unit_combination}, {np.shape(obj['data'])}") + print(f"entry_id {self.entry_id}, event_id {self.event_id}") + + prfx = f"/ENTRY[entry{self.entry_id}]/measurement/event_data_em_set/EVENT_DATA_EM[event_data_em{self.event_id}]" + self.event_id += 1 + + # this is the place when you want to skip individually the writing of NXdata + # return template + + axis_names = None + if unit_combination in GATAN_WHICH_SPECTRUM: + self.annotate_information_source( + f"{prfx}/SPECTRUM_SET[spectrum_set1]", + self.file_path, + self.file_path_sha256, + template, + ) + trg = f"{prfx}/SPECTRUM_SET[spectrum_set1]/{GATAN_WHICH_SPECTRUM[unit_combination][0]}" + template[f"{trg}/title"] = f"{flat_hspy_meta['General/title']}" + template[f"{trg}/@signal"] = f"intensity" + template[f"{trg}/intensity"] = {"compress": obj["data"], "strength": 1} + axis_names = GATAN_WHICH_SPECTRUM[unit_combination][1] + elif unit_combination in GATAN_WHICH_IMAGE: + self.annotate_information_source( + f"{prfx}/IMAGE_SET[image_set1]", + self.file_path, + self.file_path_sha256, + template, + ) + trg = ( + f"{prfx}/IMAGE_SET[image_set1]/{GATAN_WHICH_IMAGE[unit_combination][0]}" + ) + template[f"{trg}/title"] = f"{flat_hspy_meta['General/title']}" + template[f"{trg}/@signal"] = f"real" # TODO::unless COMPLEX + template[f"{trg}/real"] = {"compress": obj["data"], "strength": 1} + axis_names = GATAN_WHICH_IMAGE[unit_combination][1] + else: + self.annotate_information_source( + f"{prfx}/DATA[data1]", self.file_path, self.file_path_sha256, template + ) + trg = f"{prfx}/DATA[data1]" + template[f"{trg}/title"] = f"{flat_hspy_meta['General/title']}" + template[f"{trg}/@NX_class"] = f"NXdata" + template[f"{trg}/@signal"] = f"data" + template[f"{trg}/data"] = {"compress": obj["data"], "strength": 1} + axis_names = ["axis_i", "axis_j", "axis_k", "axis_l", "axis_m"][ + 0 : len(unit_combination.split("_")) + ] # mind, different to Nion and other tech partners here no [::-1] reversal + # of the indices 241.a2c338fd458e6b7023ec946a5e3ce8c85bd2befcb5d17dae7ae5f44b2dede81b.dm4 + # is a good example! + + if len(axis_names) >= 1: + # arrays axis_names and dimensional_calibrations are aligned in order + # but that order is reversed wrt to AXISNAME_indices ! + for idx, axis_name in enumerate(axis_names): + template[f"{trg}/@AXISNAME_indices[{axis_name}_indices]"] = np.uint32( + len(axis_names) - 1 - idx + ) # TODO::check with dissimilarly sized data array if this is idx ! + template[f"{trg}/@axes"] = axis_names + + for idx, axis in enumerate(axes): + axis_name = axis_names[idx] + offset = axis["offset"] + step = axis["scale"] + units = axis["units"] + count = np.shape(obj["data"])[idx] + if units == "": + template[f"{trg}/AXISNAME[{axis_name}]"] = np.float32(offset) + ( + np.float32(step) + * np.asarray( + np.linspace( + start=0, stop=count - 1, num=count, endpoint=True + ), + np.float32, + ) + ) + if unit_combination in GATAN_WHICH_SPECTRUM: + template[f"{trg}/AXISNAME[{axis_name}]/@long_name"] = ( + f"Spectrum identifier" + ) + elif unit_combination in GATAN_WHICH_IMAGE: + template[f"{trg}/AXISNAME[{axis_name}]/@long_name"] = ( + f"Image identifier" + ) + else: + template[f"{trg}/AXISNAME[{axis_name}]/@long_name"] = ( + f"{axis_name}" + # unitless | dimensionless i.e. no unit in longname + ) + else: + template[f"{trg}/AXISNAME[{axis_name}]"] = np.float32(offset) + ( + np.float32(step) + * np.asarray( + np.linspace( + start=0, stop=count - 1, num=count, endpoint=True + ), + np.float32, + ) + ) + template[f"{trg}/AXISNAME[{axis_name}]/@units"] = ( + f"{ureg.Unit(units)}" + ) + if ( + ureg.Quantity(units).to_base_units().units + == "kilogram * meter ** 2 / second ** 2" + ): + template[f"{trg}/AXISNAME[{axis_name}]/@long_name"] = ( + f"Energy ({ureg.Unit(units)})" + ) + else: + template[f"{trg}/AXISNAME[{axis_name}]/@long_name"] = ( + f"Point coordinate along {axis_name} ({ureg.Unit(units)})" + ) + return template diff --git a/src/pynxtools_em/parsers/rsciio_velox.py b/src/pynxtools_em/parsers/rsciio_velox.py index 4045e45..c2df664 100644 --- a/src/pynxtools_em/parsers/rsciio_velox.py +++ b/src/pynxtools_em/parsers/rsciio_velox.py @@ -17,80 +17,65 @@ # """(Sub-)parser for reading content from ThermoFisher Velox *.emd (HDF5) via rosettasciio.""" -from datetime import datetime from typing import Dict, List import flatdict as fd import numpy as np -import pytz -from ase.data import chemical_symbols -from pynxtools_em.concepts.mapping_functors import add_specific_metadata +from pynxtools_em.concepts.mapping_functors_pint import add_specific_metadata_pint from pynxtools_em.configurations.rsciio_velox_cfg import ( - VELOX_DYNAMIC_TO_NX_EM, - VELOX_EBEAM_DYNAMIC_TO_NX_EM, - VELOX_EBEAM_STATIC_TO_NX_EM, - VELOX_ENTRY_TO_NX_EM, - VELOX_FABRICATION_TO_NX_EM, - VELOX_OPTICS_TO_NX_EM, - VELOX_SCAN_TO_NX_EM, - VELOX_STAGE_TO_NX_EM, + VELOX_DYNAMIC_EBEAM_NX, + VELOX_DYNAMIC_OPTICS_NX, + VELOX_DYNAMIC_SCAN_NX, + VELOX_DYNAMIC_STAGE_NX, + VELOX_DYNAMIC_VARIOUS_NX, + VELOX_STATIC_EBEAM_NX, + VELOX_STATIC_ENTRY_NX, + VELOX_STATIC_FABRICATION_NX, + VELOX_WHICH_IMAGE, + VELOX_WHICH_SPECTRUM, ) from pynxtools_em.parsers.rsciio_base import RsciioBaseParser from pynxtools_em.utils.get_file_checksum import ( DEFAULT_CHECKSUM_ALGORITHM, get_sha256_of_file_content, ) -from pynxtools_em.utils.rsciio_hspy_utils import ( - get_axes_dims, - get_axes_units, - get_named_axis, -) +from pynxtools_em.utils.pint_custom_unit_registry import ureg +from pynxtools_em.utils.rsciio_hspy_utils import all_req_keywords_in_dict from pynxtools_em.utils.string_conversions import string_to_number +from pynxtools_em.utils.velox_utils import velox_image_spectrum_or_generic_nxdata from rsciio import emd -REAL_SPACE = 0 -COMPLEX_SPACE = 1 - - -def all_req_keywords_in_dict(dct: dict, keywords: list) -> bool: - """Check if dict dct has all keywords in keywords as keys from.""" - # falsifiable? - for key in keywords: - if key in dct: - continue - return False - return True - class RsciioVeloxParser(RsciioBaseParser): """Read Velox EMD File Format emd.""" - def __init__(self, entry_id: int = 1, file_path: str = "", verbose: bool = False): + def __init__(self, file_path: str = "", entry_id: int = 1, verbose: bool = False): super().__init__(file_path) if entry_id > 0: self.entry_id = entry_id else: self.entry_id = 1 + self.verbose = verbose + # for id_mgn check pynxtools-em v0.2 of this velox reader self.id_mgn: Dict = { - "event": 1, + "event_id": 1, "event_img": 1, "event_spc": 1, "roi": 1, "eds_img": 1, } - self.file_path_sha256 = None - self.tmp: Dict = {} - self.supported_version: Dict = { - "Core/MetadataDefinitionVersion": ["7.9"], - "Core/MetadataSchemaVersion": ["v1/2013/07"], - } self.version: Dict = { - "Core/MetadataDefinitionVersion": None, - "Core/MetadataSchemaVersion": None, + "trg": { + "Core/MetadataDefinitionVersion": ["7.9"], + "Core/MetadataSchemaVersion": ["v1/2013/07"], + }, + "src": { + "Core/MetadataDefinitionVersion": None, + "Core/MetadataSchemaVersion": None, + }, } self.obj_idx_supported: List = [] self.supported = False - self.verbose = verbose self.check_if_supported() def check_if_supported(self): @@ -119,12 +104,12 @@ def check_if_supported(self): if "Core/MetadataDefinitionVersion" in orgmeta: if ( orgmeta["Core/MetadataDefinitionVersion"] - not in self.supported_version["Core/MetadataDefinitionVersion"] + not in self.version["trg"]["Core/MetadataDefinitionVersion"] ): continue if ( orgmeta["Core/MetadataSchemaVersion"] - not in self.supported_version["Core/MetadataSchemaVersion"] + not in self.version["trg"]["Core/MetadataSchemaVersion"] ): continue self.obj_idx_supported.append(idx) @@ -144,15 +129,16 @@ def check_if_supported(self): def parse(self, template: dict) -> dict: """Perform actual parsing filling cache self.tmp.""" - if self.supported is True: - self.tech_partner_to_nexus_normalization(template) - else: + if self.supported: + with open(self.file_path, "rb", 0) as fp: + self.file_path_sha256 = get_sha256_of_file_content(fp) print( - f"Parser {self.__class__.__name__} finds no content in {self.file_path} that it supports" + f"Parsing {self.file_path} Velox with SHA256 {self.file_path_sha256} ..." ) + self.parse_content(template) return template - def tech_partner_to_nexus_normalization(self, template: dict) -> dict: + def parse_content(self, template: dict) -> dict: """Translate tech partner concepts to NeXus concepts.""" reqs = ["data", "axes", "metadata", "original_metadata", "mapping"] for idx, obj in enumerate(self.objs): @@ -160,174 +146,28 @@ def tech_partner_to_nexus_normalization(self, template: dict) -> dict: continue if not all_req_keywords_in_dict(obj, reqs): continue - content_type = self.content_resolver(obj) - print( - f"Parsing {idx}-th object in {self.file_path} content type is {content_type}" - ) + self.process_event_data_em_data(obj, template) if self.verbose: - print(f"dims: {obj['axes']}") - if content_type == "imgs": - self.normalize_imgs_content(obj, template) # generic imaging modes - # TODO:: could later make an own one for bright/dark field, but - # currently no distinction in hyperspy - elif content_type == "adf": - self.normalize_adf_content( - obj, template - ) # (high-angle) annular dark field - elif content_type == "diff": # diffraction image in reciprocal space - self.normalize_diff_content(obj, template) # diffraction images - elif content_type == "eds_map": - self.normalize_eds_map_content(obj, template) # ED(X)S in the TEM - elif content_type == "eds_spc": - self.normalize_eds_spc_content(obj, template) # EDS spectrum/(a) - elif content_type == "eels": - self.normalize_eels_content( - obj, template - ) # electron energy loss spectroscopy - else: # == "n/a" - print( - f"WARNING::Unable to resolve content of {idx}-th object in {self.file_path}!" - ) + print(f"obj{idx}, dims {obj['axes']}") return template - def content_resolver(self, obj: dict) -> str: - """Try to identify which content the obj describes best.""" - # assume rosettasciio-specific formatting of the emd parser - # i.e. a dictionary with the following keys: - # "data", "axes", "metadata", "original_metadata", "mapping" - meta = fd.FlatDict(obj["metadata"], "/") - # orgmeta = fd.FlatDict(obj["original_metadata"], "/") - dims = get_axes_dims(obj["axes"]) - units = get_axes_units(obj["axes"]) - - if "General/title" not in meta.keys(): - return "n/a" + def process_event_data_em_metadata(self, obj: dict, template: dict) -> dict: + """Map some of the TFS/FEI/Velox-specific metadata concepts on NeXus concepts.""" + identifier = [self.entry_id, self.id_mgn["event_id"], 1] + flat_orig_meta = fd.FlatDict(obj["original_metadata"], "/") - if (meta["General/title"] in ("BF")) or ( - meta["General/title"].startswith("DF") - ): - uniq = set() - for dim in dims: - uniq.add(dim[0]) - # TODO::the problem with using here the explicit name DF4 is that this may only - # work for a particular microscope: - # Core/MetadataDefinitionVersion: 7.9, Core/MetadataSchemaVersion: v1/2013/07 - # Instrument/ControlSoftwareVersion: 1.15.4, Instrument/Manufacturer: FEI Company - # Instrument/InstrumentId: 6338, Instrument/InstrumentModel: Talos F200X - # instead there should be a logic added which resolves which concept - # the data in this obj are best described by when asking a community-wide - # glossary but not the FEI-specific glossary - # all that logic is unneeded and thereby the data more interoperable - # if FEI would harmonize their obvious company metadata standard with the - # electron microscopy community! - if sorted(uniq) == ["x", "y"]: - return "imgs" - - if meta["General/title"] in ("HAADF"): - return "adf" - - # all units indicating we are in real or complex i.e. reciprocal space - if meta["General/title"] in ("EDS"): - return "eds_spc" - # applies to multiple cases, sum spectrum, spectrum stack etc. - - for symbol in chemical_symbols[1::]: # an eds_map - # TODO::does rosettasciio via hyperspy identify the symbol or is the - # title by default already in Velox set (by default) to the chemical symbol? - if meta["General/title"] != symbol: - continue - return "eds_map" - - vote_r_c = [0, 0] # real space, complex space - for unit in units: - if unit.lower().replace(" ", "") in ["m", "cm", "mm", "µm", "nm", "pm"]: - vote_r_c[REAL_SPACE] += 1 - if unit.lower().replace(" ", "") in [ - "1/m", - "1/cm", - "1/mm", - "1/µm", - "1/nm", - "1/pm", - ]: - vote_r_c[COMPLEX_SPACE] += 1 - - if (vote_r_c[0] == len(units)) and (vote_r_c[1] == 0): - return "imgs" - if (vote_r_c[0] == 0) and (vote_r_c[1] == len(units)): - return "diff" - - return "n/a" - - def add_entry_header( - self, orgmeta: fd.FlatDict, identifier: list, template: dict - ) -> dict: - """Map entry-specific metadata on NXem instance.""" - add_specific_metadata(VELOX_ENTRY_TO_NX_EM, orgmeta, identifier, template) - return template - - def add_ebeam_static( - self, orgmeta: fd.FlatDict, identifier: list, template: dict - ) -> dict: - """Map em_lab ebeam.""" - add_specific_metadata( - VELOX_EBEAM_STATIC_TO_NX_EM, orgmeta, identifier, template - ) - return template - - def add_fabrication( - self, orgmeta: fd.FlatDict, identifier: list, template: dict - ) -> dict: - """Map fabrication-specific metadata on NXem instance""" - add_specific_metadata(VELOX_FABRICATION_TO_NX_EM, orgmeta, identifier, template) - return template - - def add_scan(self, orgmeta: fd.FlatDict, identifier: list, template: dict) -> dict: - """Map scan-specific metadata on NXem instance.""" - add_specific_metadata(VELOX_SCAN_TO_NX_EM, orgmeta, identifier, template) - return template - - def add_optics( - self, orgmeta: fd.FlatDict, identifier: list, template: dict - ) -> dict: - """Map optics-specific metadata on NXem instance.""" - add_specific_metadata(VELOX_OPTICS_TO_NX_EM, orgmeta, identifier, template) - return template - - def add_stage(self, orgmeta: fd.FlatDict, identifier: list, template: dict) -> dict: - """Map optics-specific metadata on NXem instance.""" - add_specific_metadata(VELOX_STAGE_TO_NX_EM, orgmeta, identifier, template) - return template - - def add_various_dynamic( - self, orgmeta: fd.FlatDict, identifier: list, template: dict - ) -> dict: - """Map optics-specific metadata on NXem instance.""" - add_specific_metadata(VELOX_DYNAMIC_TO_NX_EM, orgmeta, identifier, template) - return template - - def add_ebeam_dynamic( - self, orgmeta: fd.FlatDict, identifier: list, template: dict - ) -> dict: - """Map optics-specific metadata on NXem instance.""" - add_specific_metadata( - VELOX_EBEAM_DYNAMIC_TO_NX_EM, orgmeta, identifier, template - ) - return template - - def add_lens_event_data( - self, orgmeta: fd.FlatDict, identifier: list, template: dict - ) -> dict: - """Map lens-specific Velox/FEI metadata on NeXus NXlens_em instances.""" if (len(identifier) != 3) or (not all(isinstance(x, int) for x in identifier)): raise ValueError( f"Argument identifier {identifier} needs three int values!" ) trg = ( f"/ENTRY[entry{identifier[0]}]/measurement/event_data_em_set/EVENT_DATA_EM" - f"[event_data_em{identifier[1]}]/em_lab/EBEAM_COLUMN[ebeam_column]" + f"[event_data_em{identifier[1]}]/em_lab/ebeam_column" ) - lens_names = [ + # using an own function like add_dynamic_lens_metadata may be needed + # if specific NeXus group have some extra formatting + lens_idx = 1 + for lens_name in [ "C1", "C2", "Diffraction", @@ -337,361 +177,178 @@ def add_lens_event_data( "Objective", "Projector1", "Projector2", - ] - lens_idx = 1 - for lens_name in lens_names: + ]: toggle = False - if f"Optics/{lens_name}LensIntensity" in orgmeta: - template[f"{trg}/LENS_EM[lens_em{lens_idx}]/value"] = string_to_number( - orgmeta[f"Optics/{lens_name}LensIntensity"] + if f"Optics/{lens_name}LensIntensity" in flat_orig_meta: + template[f"{trg}/lensID[lens{lens_idx}]/value"] = string_to_number( + flat_orig_meta[f"Optics/{lens_name}LensIntensity"] ) # TODO::unit? toggle = True - if f"Optics/{lens_name}LensMode" in orgmeta: - template[f"{trg}/LENS_EM[lens_em{lens_idx}]/mode"] = orgmeta[ - f"Optics/{lens_name}LensMode" - ] + if f"Optics/{lens_name}LensMode" in flat_orig_meta: + template[f"{trg}/lensID[lens{lens_idx}]/mode"] = string_to_number( + flat_orig_meta[f"Optics/{lens_name}LensMode"] + ) toggle = True if toggle: - template[f"{trg}/LENS_EM[lens_em{lens_idx}]/name"] = f"{lens_name}" + template[f"{trg}/lensID[lens{lens_idx}]/name"] = f"{lens_name}" lens_idx += 1 # Optics/GunLensSetting - return template - def add_metadata( - self, orgmeta: fd.FlatDict, identifier: list, template: dict - ) -> dict: - """Map Velox-specific concept representations on NeXus concepts.""" - # use an own function for each instead of a loop of a template function call - # as for each section there are typically always some extra formatting - # steps required - self.add_entry_header(orgmeta, identifier, template) - self.add_ebeam_static(orgmeta, identifier, template) - self.add_fabrication(orgmeta, identifier, template) - self.add_scan(orgmeta, identifier, template) - self.add_optics(orgmeta, identifier, template) - self.add_stage(orgmeta, identifier, template) - self.add_various_dynamic(orgmeta, identifier, template) - self.add_ebeam_dynamic(orgmeta, identifier, template) - self.add_lens_event_data(orgmeta, identifier, template) - return template - - def normalize_imgs_content(self, obj: dict, template: dict) -> dict: - """Map generic scanned images (e.g. BF/DF) to NeXus.""" - meta = fd.FlatDict(obj["metadata"], "/") - orgmeta = fd.FlatDict(obj["original_metadata"], "/") - dims = get_axes_dims(obj["axes"]) - if len(dims) != 2: - raise ValueError(f"{obj['axes']}") - trg = ( - f"/ENTRY[entry{self.entry_id}]/measurement/event_data_em_set/" - f"EVENT_DATA_EM[event_data_em{self.id_mgn['event']}]/" - f"IMAGE_R_SET[image_r_set{self.id_mgn['event_img']}]" + for cfg in [ + VELOX_STATIC_ENTRY_NX, + VELOX_STATIC_EBEAM_NX, + VELOX_DYNAMIC_SCAN_NX, + VELOX_DYNAMIC_VARIOUS_NX, + VELOX_DYNAMIC_OPTICS_NX, + ]: + add_specific_metadata_pint(cfg, flat_orig_meta, identifier, template) + + add_specific_metadata_pint( + VELOX_STATIC_FABRICATION_NX, flat_orig_meta, identifier, template ) - template[f"{trg}/PROCESS[process]/source/type"] = "file" - template[f"{trg}/PROCESS[process]/source/path"] = self.file_path - template[f"{trg}/PROCESS[process]/source/checksum"] = self.file_path_sha256 - template[f"{trg}/PROCESS[process]/source/algorithm"] = ( - DEFAULT_CHECKSUM_ALGORITHM + add_specific_metadata_pint( + VELOX_DYNAMIC_STAGE_NX, flat_orig_meta, identifier, template ) - template[f"{trg}/PROCESS[process]/detector_identifier"] = meta["General/title"] - template[f"{trg}/image_twod/@signal"] = "intensity" - template[f"{trg}/image_twod/@axes"] = [] - for dim in dims: - template[f"{trg}/image_twod/@axes"].append(f"axis_{dim[0]}") - template[f"{trg}/image_twod/@AXISNAME_indices[axis_{dim[0]}]"] = np.uint32( - dim[1] - ) - support, unit = get_named_axis(obj["axes"], dim[0]) - if support is not None and unit is not None: - template[f"{trg}/image_twod/axis_{dim[0]}"] = { - "compress": support, - "strength": 1, - } - template[f"{trg}/image_twod/axis_{dim[0]}/@long_name"] = ( - f"Coordinate along {dim[0]}-axis ({unit})" - ) - template[f"{trg}/image_twod/title"] = meta["General/title"] - template[f"{trg}/image_twod/intensity"] = { - "compress": np.asarray(obj["data"]), - "strength": 1, - } - # template[f"{trg}/image_twod/intensity/@units"] - self.add_metadata( - orgmeta, - [self.entry_id, self.id_mgn["event"], self.id_mgn["event_img"]], - template, + add_specific_metadata_pint( + VELOX_DYNAMIC_EBEAM_NX, flat_orig_meta, identifier, template ) - # TODO: add detector data - self.id_mgn["event_img"] += 1 - self.id_mgn["event"] += 1 return template - def normalize_adf_content(self, obj: dict, template: dict) -> dict: - """Map relevant (high-angle) annular dark field images to NeXus.""" - meta = fd.FlatDict(obj["metadata"], "/") - orgmeta = fd.FlatDict(obj["original_metadata"], "/") - dims = get_axes_dims(obj["axes"]) - if len(dims) != 2: - raise ValueError(f"{obj['axes']}") - trg = ( - f"/ENTRY[entry{self.entry_id}]/measurement/event_data_em_set/" - f"EVENT_DATA_EM[event_data_em{self.id_mgn['event']}]/" - f"IMAGE_R_SET[image_r_set{self.id_mgn['event_img']}]" - ) + def annotate_information_source( + self, trg: str, file_path: str, checksum: str, template: dict + ) -> dict: + """Add from where the information was obtained.""" template[f"{trg}/PROCESS[process]/source/type"] = "file" - template[f"{trg}/PROCESS[process]/source/path"] = self.file_path - template[f"{trg}/PROCESS[process]/source/checksum"] = self.file_path_sha256 + template[f"{trg}/PROCESS[process]/source/path"] = file_path + template[f"{trg}/PROCESS[process]/source/checksum"] = checksum template[f"{trg}/PROCESS[process]/source/algorithm"] = ( DEFAULT_CHECKSUM_ALGORITHM ) - template[f"{trg}/PROCESS[process]/detector_identifier"] = meta["General/title"] - template[f"{trg}/image_twod/@signal"] = "intensity" - template[f"{trg}/image_twod/@axes"] = [] - for dim in dims: - template[f"{trg}/image_twod/@axes"].append(f"axis_{dim[0]}") - template[f"{trg}/image_twod/@AXISNAME_indices[axis_{dim[0]}]"] = np.uint32( - dim[1] - ) - support, unit = get_named_axis(obj["axes"], dim[0]) - if support is not None and unit is not None: - template[f"{trg}/image_twod/axis_{dim[0]}"] = { - "compress": support, - "strength": 1, - } - template[f"{trg}/image_twod/axis_{dim[0]}/@long_name"] = ( - f"Coordinate along {dim[0]}-axis ({unit})" - ) - template[f"{trg}/image_twod/title"] = meta["General/title"] - template[f"{trg}/image_twod/intensity"] = { - "compress": np.asarray(obj["data"]), - "strength": 1, - } - # template[f"{trg}/image_twod/intensity/@units"] - self.add_metadata( - orgmeta, - [self.entry_id, self.id_mgn["event"], self.id_mgn["event_img"]], - template, - ) - # TODO: add detector data - # TODO::coll. angles given in original_metadata map to half_angle_interval - self.id_mgn["event_img"] += 1 - self.id_mgn["event"] += 1 return template - def normalize_diff_content(self, obj: dict, template: dict) -> dict: - """Map relevant diffraction images to NeXus.""" - # TODO::the above-mentioned constraint is not general enough - # this can work only for cases where we know that we not only have a - # Ceta camera but also use it for taking diffraction pattern - # TODO::this is an example that more logic is needed to identify whether - # the information inside obj really has a similarity with the concept of - # somebody having taken a diffraction image - # one can compare the situation with the following: - # assume you wish to take pictures of apples and have an NXapple_picture - # but all you get is an image from a digital camera where the dataset is - # named maybe DCIM, without a logic one cannot make the mapping robustly! - # can one map y, x, on j, i indices - idx_map = {"y": "j", "x": "i"} - meta = fd.FlatDict(obj["metadata"], "/") - orgmeta = fd.FlatDict(obj["original_metadata"], "/") - dims = get_axes_dims(obj["axes"]) - if len(dims) != 2: - raise ValueError(f"{obj['axes']}") - for dim in dims: - if dim[0] not in idx_map.keys(): - raise ValueError(f"Unable to map index {dim[0]} on something!") + def process_event_data_em_data(self, obj: dict, template: dict) -> dict: + """Map Velox-specifically formatted data arrays on NeXus NXdata/NXimage/NXspectrum.""" + flat_hspy_meta = fd.FlatDict(obj["metadata"], "/") + if "General/title" not in flat_hspy_meta: + return template - trg = ( - f"/ENTRY[entry{self.entry_id}]/measurement/event_data_em_set/" - f"EVENT_DATA_EM[event_data_em{self.id_mgn['event']}]/" - f"IMAGE_C_SET[image_c_set{self.id_mgn['event_img']}]" - ) - template[f"{trg}/PROCESS[process]/source/type"] = "file" - template[f"{trg}/PROCESS[process]/source/path"] = self.file_path - template[f"{trg}/PROCESS[process]/source/checksum"] = self.file_path_sha256 - template[f"{trg}/PROCESS[process]/source/algorithm"] = ( - DEFAULT_CHECKSUM_ALGORITHM - ) - template[f"{trg}/PROCESS[process]/detector_identifier"] = ( - f"Check carefully how rsciio/hyperspy knows this {meta['General/title']}!" - ) - template[f"{trg}/image_twod/@signal"] = "magnitude" - template[f"{trg}/image_twod/@axes"] = [] - for dim in dims: - template[f"{trg}/image_twod/@axes"].append(f"axis_{idx_map[dim[0]]}") - template[f"{trg}/image_twod/@AXISNAME_indices[axis_{idx_map[dim[0]]}]"] = ( - np.uint32(dim[1]) + # flat_orig_meta = fd.FlatDict(obj["original_metadata"], "/") + axes = obj["axes"] + unit_combination = velox_image_spectrum_or_generic_nxdata(axes) + if unit_combination == "": + return template + if self.verbose: + print(axes) + print(f"{unit_combination}, {np.shape(obj['data'])}") + print(f"entry_id {self.entry_id}, event_id {self.id_mgn['event_id']}") + + prfx = f"/ENTRY[entry{self.entry_id}]/measurement/event_data_em_set/EVENT_DATA_EM[event_data_em{self.id_mgn['event_id']}]" + # this is the place when you want to skip individually the writing of NXdata + # return template + axis_names = None + if unit_combination in VELOX_WHICH_SPECTRUM: + self.annotate_information_source( + f"{prfx}/SPECTRUM_SET[spectrum_set1]", + self.file_path, + self.file_path_sha256, + template, ) - support, unit = get_named_axis(obj["axes"], dim[0]) - if support is not None and unit is not None: - template[f"{trg}/image_twod/axis_{idx_map[dim[0]]}"] = { - "compress": support, - "strength": 1, - } - template[f"{trg}/image_twod/axis_{idx_map[dim[0]]}/@long_name"] = ( - f"Coordinate along {idx_map[dim[0]]}-axis ({unit})" - ) - template[f"{trg}/image_twod/title"] = meta["General/title"] - template[f"{trg}/image_twod/magnitude"] = { - "compress": np.asarray(obj["data"]), - "strength": 1, - } - # template[f"{trg}/image_twod/magnitude/@units"] - self.add_metadata( - orgmeta, - [self.entry_id, self.id_mgn["event"], self.id_mgn["event_img"]], - template, - ) - self.id_mgn["event_img"] += 1 - self.id_mgn["event"] += 1 - return template - - def normalize_eds_spc_content(self, obj: dict, template: dict) -> dict: - """Map relevant EDS spectrum/(a) to NeXus.""" - meta = fd.FlatDict(obj["metadata"], "/") - orgmeta = fd.FlatDict(obj["original_metadata"], "/") - dims = get_axes_dims(obj["axes"]) - n_dims = None - if dims == [("Energy", 0)]: - n_dims = 1 - elif dims == [("x", 0), ("X-ray energy", 1)]: - n_dims = 2 - elif dims == [("y", 0), ("x", 1), ("X-ray energy", 2)]: - n_dims = 3 + trg = f"{prfx}/SPECTRUM_SET[spectrum_set1]/{VELOX_WHICH_SPECTRUM[unit_combination][0]}" + template[f"{trg}/title"] = f"{flat_hspy_meta['General/title']}" + template[f"{trg}/@signal"] = f"intensity" + template[f"{trg}/intensity"] = {"compress": obj["data"], "strength": 1} + axis_names = VELOX_WHICH_SPECTRUM[unit_combination][1] + elif unit_combination in VELOX_WHICH_IMAGE: + self.annotate_information_source( + f"{prfx}/IMAGE_SET[image_set1]", + self.file_path, + self.file_path_sha256, + template, + ) + trg = ( + f"{prfx}/IMAGE_SET[image_set1]/{VELOX_WHICH_IMAGE[unit_combination][0]}" + ) + template[f"{trg}/title"] = f"{flat_hspy_meta['General/title']}" + template[f"{trg}/@signal"] = f"real" # TODO::unless COMPLEX + template[f"{trg}/real"] = {"compress": obj["data"], "strength": 1} + axis_names = VELOX_WHICH_IMAGE[unit_combination][1] else: - print(f"WARNING eds_spc for {dims} is not implemented!") - return template - trg = ( - f"/ENTRY[entry{self.entry_id}]/measurement/event_data_em_set/" - f"EVENT_DATA_EM[event_data_em{self.id_mgn['event']}]/" - f"SPECTRUM_SET[spectrum_set{self.id_mgn['event_spc']}]" - ) - template[f"{trg}/source"] = meta["General/title"] - template[f"{trg}/PROCESS[process]/source/type"] = "file" - template[f"{trg}/PROCESS[process]/source/path"] = self.file_path - template[f"{trg}/PROCESS[process]/source/checksum"] = self.file_path_sha256 - template[f"{trg}/PROCESS[process]/source/algorithm"] = ( - DEFAULT_CHECKSUM_ALGORITHM - ) - template[f"{trg}/PROCESS[process]/detector_identifier"] = ( - f"Check carefully how rsciio/hyperspy knows this {meta['General/title']}!" - ) - # TODO::the examples from E. Spiecker's group clearly show that indeed rosettasciio - # does a good job in reporting which elements where shown with EDX - # BUT: this is seems to be just copied into the title already by rosettasciio - # if reliant one could use this to auto-populate the - # /ENTRY[entry*]/sample/atom_types like what we do in atom probe - # BUT: in atom probe "pollutes" almost every NXentry with atoms that are typical - # in almost every atom probe dataset like carbon and hydrogen but this, in effect - # the filter effectiveness in a search will be poor as all entries will be showing - # up, is this what scientists want ? - trg = ( - f"/ENTRY[entry{self.entry_id}]/measurement/event_data_em_set/" - f"EVENT_DATA_EM[event_data_em{self.id_mgn['event']}]/" - f"SPECTRUM_SET[spectrum_set{self.id_mgn['event_spc']}]" - ) - if n_dims == 1: - trg = trg.replace(trg, f"{trg}/spectrum_zerod") - elif n_dims == 2: - trg = trg.replace(trg, f"{trg}/spectrum_oned") - elif n_dims == 3: - trg = trg.replace(trg, f"{trg}/spectrum_twod") - template[f"{trg}/@signal"] = "intensity" - if n_dims == 1: - template[f"{trg}/@axes"] = ["axis_energy"] - template[f"{trg}/@AXISNAME_indices[axis_energy_indices]"] = np.uint32(0) - support, unit = get_named_axis(obj["axes"], "Energy") - template[f"{trg}/AXISNAME[axis_energy]"] = { - "compress": support, - "strength": 1, - } - template[f"{trg}/AXISNAME[axis_energy]/@long_name"] = f"Energy ({unit})" - if n_dims == 3: - template[f"{trg}/@axes"] = ["axis_y", "axis_x", "axis_energy"] - for dim, idx in [("y", 2), ("x", 1)]: - template[f"{trg}/@AXISNAME_indices[axis_{dim}_indices]"] = np.uint32( - idx - ) - support, unit = get_named_axis(obj["axes"], dim) - template[f"{trg}/AXISNAME[axis_{dim}]"] = { - "compress": support, - "strength": 1, - } - template[f"{trg}/AXISNAME[axis_{dim}]/@long_name"] = ( - f"Coordinate along {dim}-axis ({unit})" - ) - template[f"{trg}/@AXISNAME_indices[axis_energy_indices]"] = np.uint32(0) - support, unit = get_named_axis(obj["axes"], "X-ray energy") - template[f"{trg}/AXISNAME[axis_energy]"] = { - "compress": support, - "strength": 1, - } - template[f"{trg}/AXISNAME[axis_energy]/@long_name"] = f"Energy ({unit})" - template[f"{trg}/title"] = f"EDS spectrum {meta['General/title']}" - template[f"{trg}/intensity"] = { - "compress": np.asarray(obj["data"]), - "strength": 1, - } - template[f"{trg}/intensity/@long_name"] = "Count (1)" - self.add_metadata( - orgmeta, - [self.entry_id, self.id_mgn["event"], self.id_mgn["event_spc"]], - template, - ) - self.id_mgn["event_spc"] += 1 - self.id_mgn["event"] += 1 - return template - - def normalize_eds_map_content(self, obj: dict, template: dict) -> dict: - """Map relevant EDS map to NeXus.""" - meta = fd.FlatDict(obj["metadata"], "/") - dims = get_axes_dims(obj["axes"]) - if len(dims) != 2: - raise ValueError(f"{obj['axes']}") - trg = f"/ENTRY[entry{self.entry_id}]/ROI[roi{self.id_mgn['roi']}]/eds/indexing" - template[f"{trg}/source"] = meta["General/title"] - trg = ( - f"/ENTRY[entry{self.entry_id}]/ROI[roi{self.id_mgn['roi']}]/eds/indexing/" - f"IMAGE_R_SET[image_r_set{self.id_mgn['eds_img']}]" - ) - template[f"{trg}/PROCESS[process]/source/type"] = "file" - template[f"{trg}/PROCESS[process]/source/path"] = self.file_path - template[f"{trg}/PROCESS[process]/source/checksum"] = self.file_path_sha256 - template[f"{trg}/PROCESS[process]/source/algorithm"] = ( - DEFAULT_CHECKSUM_ALGORITHM - ) - template[f"{trg}/PROCESS[process]/detector_identifier"] = ( - f"Check carefully how rsciio/hyperspy knows this {meta['General/title']}!" - ) - # template[f"{trg}/description"] = "" - # template[f"{trg}/energy_range"] = (0., 0.) - # template[f"{trg}/energy_range/@units"] = "keV" - # template[f"{trg}/iupac_line_candidates"] = "" - template[f"{trg}/image_twod/@signal"] = "intensity" - template[f"{trg}/image_twod/@axes"] = [] - for dim in dims: - template[f"{trg}/image_twod/@axes"].append(f"axis_{dim[0]}") - template[f"{trg}/image_twod/@AXISNAME_indices[axis_{dim[0]}_indices]"] = ( - np.uint32(dim[1]) + self.annotate_information_source( + f"{prfx}/DATA[data1]", self.file_path, self.file_path_sha256, template ) - support, unit = get_named_axis(obj["axes"], dim[0]) - if support is not None and unit is not None: - template[f"{trg}/image_twod/AXISNAME[axis_{dim[0]}]"] = { - "compress": support, - "strength": 1, - } - template[f"{trg}/image_twod/axis_{dim[0]}/@long_name"] = ( - f"Coordinate along {dim[0]}-axis ({unit})" - ) - template[f"{trg}/image_twod/title"] = f"EDS map {meta['General/title']}" - template[f"{trg}/image_twod/intensity"] = { - "compress": np.asarray(obj["data"]), - "strength": 1, - } - self.id_mgn["eds_img"] += 1 - self.id_mgn["roi"] += 1 # TODO not necessarily has to be incremented! - return template - - def normalize_eels_content(self, obj: dict, template: dict) -> dict: + trg = f"{prfx}/DATA[data1]" + template[f"{trg}/title"] = f"{flat_hspy_meta['General/title']}" + template[f"{trg}/@NX_class"] = f"NXdata" + template[f"{trg}/@signal"] = f"data" + template[f"{trg}/data"] = {"compress": obj["data"], "strength": 1} + axis_names = ["axis_i", "axis_j", "axis_k", "axis_l", "axis_m"][ + 0 : len(unit_combination.split("_")) + ] # TODO mind order + + if len(axis_names) >= 1: + # TODO arrays axis_names and dimensional_calibrations are aligned in order + # TODO but that order is reversed wrt to AXISNAME_indices ! + for idx, axis_name in enumerate(axis_names): + template[f"{trg}/@AXISNAME_indices[{axis_name}_indices]"] = np.uint32( + len(axis_names) - 1 - idx + ) # TODO::check with dissimilarly sized data array if this is idx ! + template[f"{trg}/@axes"] = axis_names + + for idx, axis in enumerate(axes): + axis_name = axis_names[idx] + offset = axis["offset"] + step = axis["scale"] + units = axis["units"] + count = np.shape(obj["data"])[idx] + if units == "": + template[f"{trg}/AXISNAME[{axis_name}]"] = np.float32(offset) + ( + np.float32(step) + * np.asarray( + np.linspace( + start=0, stop=count - 1, num=count, endpoint=True + ), + np.float32, + ) + ) + if unit_combination in VELOX_WHICH_SPECTRUM: + template[f"{trg}/AXISNAME[{axis_name}]/@long_name"] = ( + f"Spectrum identifier" + ) + elif unit_combination in VELOX_WHICH_IMAGE: + template[f"{trg}/AXISNAME[{axis_name}]/@long_name"] = ( + f"Image identifier" + ) + else: + template[f"{trg}/AXISNAME[{axis_name}]/@long_name"] = ( + f"{axis_name}" + # unitless | dimensionless i.e. no unit in longname + ) + else: + template[f"{trg}/AXISNAME[{axis_name}]"] = np.float32(offset) + ( + np.float32(step) + * np.asarray( + np.linspace( + start=0, stop=count - 1, num=count, endpoint=True + ), + np.float32, + ) + ) + template[f"{trg}/AXISNAME[{axis_name}]/@units"] = ( + f"{ureg.Unit(units)}" + ) + if ( + ureg.Quantity(units).to_base_units().units + == "kilogram * meter ** 2 / second ** 2" + ): + template[f"{trg}/AXISNAME[{axis_name}]/@long_name"] = ( + f"Energy ({ureg.Unit(units)})" + ) + else: + template[f"{trg}/AXISNAME[{axis_name}]/@long_name"] = ( + f"Point coordinate along {axis_name} ({ureg.Unit(units)})" + ) + + self.process_event_data_em_metadata(obj, template) + self.id_mgn["event_id"] += 1 return template diff --git a/src/pynxtools_em/reader.py b/src/pynxtools_em/reader.py index 88186fa..637fff6 100644 --- a/src/pynxtools_em/reader.py +++ b/src/pynxtools_em/reader.py @@ -17,24 +17,27 @@ # """Parser for loading generic orientation microscopy data based on .""" -from os import getcwd from time import perf_counter_ns -from typing import Any, Tuple +from typing import Any, List, Tuple import numpy as np from pynxtools.dataconverter.readers.base.reader import BaseReader from pynxtools_em.concepts.nxs_concepts import NxEmAppDef -from pynxtools_em.parsers.convention_reader import NxEmConventionParser +from pynxtools_em.parsers.conventions_reader import NxEmConventionParser +from pynxtools_em.parsers.image_png_protochips import ProtochipsPngSetParser +from pynxtools_em.parsers.image_tiff_hitachi import HitachiTiffParser from pynxtools_em.parsers.image_tiff_jeol import JeolTiffParser -from pynxtools_em.parsers.nxs_imgs import NxEmImagesParser +from pynxtools_em.parsers.image_tiff_point_electronic import PointElectronicTiffParser +from pynxtools_em.parsers.image_tiff_tescan import TescanTiffParser +from pynxtools_em.parsers.image_tiff_tfs import TfsTiffParser +from pynxtools_em.parsers.image_tiff_zeiss import ZeissTiffParser from pynxtools_em.parsers.nxs_mtex import NxEmNxsMTexParser from pynxtools_em.parsers.nxs_nion import NionProjectParser from pynxtools_em.parsers.nxs_pyxem import NxEmNxsPyxemParser -from pynxtools_em.parsers.oasis_config_reader import ( - NxEmNomadOasisConfigurationParser, -) +from pynxtools_em.parsers.oasis_config_reader import NxEmNomadOasisConfigParser from pynxtools_em.parsers.oasis_eln_reader import NxEmNomadOasisElnSchemaParser +from pynxtools_em.parsers.rsciio_gatan import RsciioGatanParser from pynxtools_em.parsers.rsciio_velox import RsciioVeloxParser from pynxtools_em.utils.io_case_logic import EmUseCaseSelector from pynxtools_em.utils.nx_atom_types import NxEmAtomTypesResolver @@ -82,7 +85,7 @@ def read( if len(case.cfg) == 1: print("Parse (meta)data coming from a configuration of an RDM...") # having or using a deployment-specific configuration is optional - nx_em_cfg = NxEmNomadOasisConfigurationParser(case.cfg[0], entry_id) + nx_em_cfg = NxEmNomadOasisConfigParser(case.cfg[0], entry_id) nx_em_cfg.report(template) if len(case.eln) == 1: @@ -101,27 +104,32 @@ def read( conventions.parse(template) print("Parse and map pieces of information within files from tech partners...") - if len(case.dat) == 1: - images = NxEmImagesParser(entry_id, case.dat[0], verbose=False) - images.parse(template) - - velox = RsciioVeloxParser(entry_id, case.dat[0], verbose=False) - velox.parse(template) - - nxs_mtex = NxEmNxsMTexParser(entry_id, case.dat[0], verbose=False) - nxs_mtex.parse(template) - - nxs_pyxem = NxEmNxsPyxemParser(entry_id, case.dat[0], verbose=False) - nxs_pyxem.parse(template) - - nxs_nion = NionProjectParser(entry_id, case.dat[0], verbose=False) - nxs_nion.parse(template) - - # zip_parser = NxEmOmZipEbsdParser(case.dat[0], entry_id) + if len(case.dat) == 1: # no sidecar file + parsers: List[type] = [ + TfsTiffParser, + ZeissTiffParser, + PointElectronicTiffParser, + ProtochipsPngSetParser, + RsciioVeloxParser, + RsciioGatanParser, + NxEmNxsMTexParser, + NxEmNxsPyxemParser, + NionProjectParser, + ] + for parser_type in parsers: + parser = parser_type(case.dat[0], entry_id, verbose=False) + parser.parse(template) + + # zip_parser = NxEmOmZipEbsdParser(case.dat[0], entry_id, verbose=False) # zip_parser.parse(template) - elif len(case.dat) == 2: - jeol = JeolTiffParser(case.dat, entry_id, verbose=False) - jeol.parse(template) + if len(case.dat) >= 1: # optional sidecar file + tescan = TescanTiffParser(case.dat, entry_id, verbose=False) + tescan.parse(template) + + if len(case.dat) == 2: # for sure with sidecar file + for parser_type in [JeolTiffParser, HitachiTiffParser]: + parser = parser_type(case.dat, entry_id, verbose=False) + parser.parse(template) nxplt = NxEmDefaultPlotResolver() nxplt.priority_select(template) diff --git a/src/pynxtools_em/utils/gatan_utils.py b/src/pynxtools_em/utils/gatan_utils.py new file mode 100644 index 0000000..6c03faa --- /dev/null +++ b/src/pynxtools_em/utils/gatan_utils.py @@ -0,0 +1,72 @@ +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Utility function for working with mapping of Gatan DigitalMicrograph content.""" + +from pint import UndefinedUnitError +from pynxtools_em.utils.pint_custom_unit_registry import ureg + + +def gatan_image_spectrum_or_generic_nxdata(list_of_dict) -> str: + """Encode sequence of units to tell whether NXimage_set, NXspectrum_set, NXdata.""" + if len(list_of_dict) >= 1: + token = [] + for obj in list_of_dict: + if isinstance(obj, dict): + if list(obj.keys()) == [ + "name", + "size", + "index_in_array", + "scale", + "offset", + "units", + "navigate", + ]: + if obj["units"] == "": + token.append("unitless") + else: + token.append(obj["units"]) + else: + raise ValueError( + f"{obj.keys()} are not exactly the expected keywords!" + ) + else: + raise ValueError(f"{obj} is not a dict!") + if len(token) >= 1: + print("_".join(token)) + unit_categories = [] + for unit in token: + if unit != "unitless": + try: + q = ureg.Quantity(unit) + base_unit = q.to_base_units().units + if base_unit == "1/meter": + unit_categories.append("1/m") + elif base_unit == "meter": + unit_categories.append("m") + elif base_unit == "kilogram * meter ** 2 / second ** 2": + unit_categories.append("eV") + else: + raise ValueError( + f"Hitting an undefined case for base_unit {base_unit} !" + ) + except UndefinedUnitError: + return "" + else: + unit_categories.append(unit) + return "_".join(unit_categories) + return "" diff --git a/src/pynxtools_em/utils/interpret_boolean.py b/src/pynxtools_em/utils/interpret_boolean.py index a3f75ec..2551a40 100644 --- a/src/pynxtools_em/utils/interpret_boolean.py +++ b/src/pynxtools_em/utils/interpret_boolean.py @@ -17,6 +17,8 @@ # """Interpret different human-readable forms of a boolean statement to boolean.""" +from typing import Any + HUMAN_BOOLEAN_STATEMENT = { "0": False, "1": True, @@ -29,10 +31,18 @@ } -def try_interpret_as_boolean(arg: str) -> bool: +def try_interpret_as_boolean(arg: Any) -> bool: """Try to interpret a human string statement if boolean be strict.""" - if arg.lower() in HUMAN_BOOLEAN_STATEMENT: - return HUMAN_BOOLEAN_STATEMENT[arg.lower()] - raise KeyError( - f"try_to_interpret_as_boolean argument {arg} does not yield key even for {arg.lower()}!" - ) + if isinstance(arg, bool): + return arg + elif isinstance(arg, str): + if arg.lower() in HUMAN_BOOLEAN_STATEMENT: + return HUMAN_BOOLEAN_STATEMENT[arg.lower()] + else: + raise KeyError( + f"try_to_interpret_as_boolean argument {arg} does not yield key even for {arg.lower()}!" + ) + else: + raise ValueError( + f"try_to_interpret_as_boolean argument {arg} cannot be converted to bool !" + ) diff --git a/src/pynxtools_em/utils/io_case_logic.py b/src/pynxtools_em/utils/io_case_logic.py index 96e2d04..cf1cd27 100644 --- a/src/pynxtools_em/utils/io_case_logic.py +++ b/src/pynxtools_em/utils/io_case_logic.py @@ -22,10 +22,11 @@ VALID_FILE_NAME_SUFFIX_CONFIG = [".yaml", ".yml"] VALID_FILE_NAME_SUFFIX_DATA = [ ".emd", + ".dm3", + ".dm4", ".tiff", ".tif", - ".zip.axon", - ".zip.nion", + ".zip", ".nsproj", ".edaxh5", ".h5", @@ -34,8 +35,8 @@ ".mtex.h5", ".dream3d", ".txt", + ".hdr", ] -# ".dm3", ".dm4"] class EmUseCaseSelector: diff --git a/src/pynxtools_em/utils/nion_utils.py b/src/pynxtools_em/utils/nion_utils.py index bb14276..b1f6f28 100644 --- a/src/pynxtools_em/utils/nion_utils.py +++ b/src/pynxtools_em/utils/nion_utils.py @@ -15,7 +15,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # -"""Utility function for mapping nionswift identifier to suffix used for identifying files in project.""" +"""Utility functions for working with Nion Co. content and concepts.""" import uuid @@ -36,3 +36,25 @@ def uuid_to_file_name(data_item_uuid_str: str) -> str: data_item_uuid_uuid = uuid.UUID(f"{data_item_uuid_str}") return f'data_{encode(data_item_uuid_uuid, "ABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890")}' # 25 character results + + +def nion_image_spectrum_or_generic_nxdata(list_of_dict) -> str: + """Encode sequence of units to tell whether NXimage_set, NXspectrum_set, NXdata.""" + if len(list_of_dict) >= 1: + token = [] + for obj in list_of_dict: + if isinstance(obj, dict): + if list(obj.keys()) == ["offset", "scale", "units"]: + if obj["units"] == "": + token.append("unitless") + else: + token.append(obj["units"]) + else: + raise ValueError( + f"{obj.keys()} are not exactly the expected keywords!" + ) + else: + raise ValueError(f"{obj} is not a dict!") + if len(token) >= 1: + return "_".join(token) + return "" diff --git a/src/pynxtools_em/utils/numerics.py b/src/pynxtools_em/utils/numerics.py new file mode 100644 index 0000000..5b9e995 --- /dev/null +++ b/src/pynxtools_em/utils/numerics.py @@ -0,0 +1,21 @@ +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Constants, numerical settings, etc.""" + +REAL_SPACE = 0 +COMPLEX_SPACE = 1 diff --git a/src/pynxtools_em/utils/nx_default_plots.py b/src/pynxtools_em/utils/nx_default_plots.py index 1ab9584..8154d80 100644 --- a/src/pynxtools_em/utils/nx_default_plots.py +++ b/src/pynxtools_em/utils/nx_default_plots.py @@ -66,11 +66,13 @@ def priority_select(self, template: dict, entry_id: int = 1) -> dict: dtyp_vote = [ ("IMAGE_SET", "image", 1), + ("IMAGE_SET", "stack", 1), ("SPECTRUM_SET", "spectrum", 2), + ("SPECTRUM_SET", "stack", 2), ] for key in template.keys(): for tpl in dtyp_vote: - for dimensionality in ["zerod", "oned", "twod", "threed"]: + for dimensionality in ["0d", "1d", "2d", "3d"]: head = f"{tpl[0]}[" idx_head = key.find(head) tail = f"]/{tpl[1]}_{dimensionality}" diff --git a/src/pynxtools_em/utils/pint_custom_unit_registry.py b/src/pynxtools_em/utils/pint_custom_unit_registry.py new file mode 100644 index 0000000..a75f885 --- /dev/null +++ b/src/pynxtools_em/utils/pint_custom_unit_registry.py @@ -0,0 +1,69 @@ +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""A customized unit registry for handling units with pint.""" + +import numpy as np +import pint +from pint import UnitRegistry + +ureg = UnitRegistry() +# ureg.formatter.default_format = "D" +# https://pint.readthedocs.io/en/stable/user/formatting.html + +# customizations for Zeiss +ureg.define("Hours = 1 * h") +ureg.define("Secs = 1 * s") +ureg.define("Volt = 1 * V") + +# customizations for NeXus +ureg.define("nx_unitless = 1") +ureg.define("nx_dimensionless = 1") +ureg.define("nx_any = 1") + +NX_UNITLESS = ureg.Quantity(1, ureg.nx_unitless) +NX_DIMENSIONLESS = ureg.Quantity(1, ureg.nx_dimensionless) +NX_ANY = ureg.Quantity(1, ureg.nx_any) + + +def is_not_special_unit(units: pint.Unit) -> bool: + """True if not a special NeXus unit category.""" + for special_units in [NX_UNITLESS.units, NX_DIMENSIONLESS.units, NX_ANY.units]: + if units == special_units: + return False + return True + + +PINT_MAPPING_TESTS = { + "use": [ + ("str_str_01", ""), + ("str_str_02", "one"), + ("str_qnt_01", NX_UNITLESS), + ("str_qnt_02", NX_DIMENSIONLESS), + ("str_qnt_03", NX_ANY), + ("str_qnt_04", ureg.Quantity(1, ureg.meter)), + ("str_qnt_05", ureg.Quantity(1, ureg.nx_unitless)), + ("str_qnt_06", ureg.Quantity(1, ureg.nx_dimensionless)), + ("str_qnt_07", ureg.Quantity(1, ureg.nx_any)), + ("str_qnt_08", ureg.Quantity(np.uint32(1), ureg.meter)), + ("str_qnt_09", ureg.Quantity(np.uint32(1), ureg.nx_unitless)), + ("str_qnt_10", ureg.Quantity(np.uint32(1), ureg.nx_dimensionless)), + ("str_qnt_11", ureg.Quantity(np.uint32(1), ureg.nx_any)), + ("str_qnt_12", ureg.Quantity(np.asarray([1, 2, 3], np.uint32), ureg.meter)), + ], + "map": [], +} diff --git a/src/pynxtools_em/utils/rsciio_hspy_utils.py b/src/pynxtools_em/utils/rsciio_hspy_utils.py index b5c8388..f78aae4 100644 --- a/src/pynxtools_em/utils/rsciio_hspy_utils.py +++ b/src/pynxtools_em/utils/rsciio_hspy_utils.py @@ -20,6 +20,16 @@ import numpy as np +def all_req_keywords_in_dict(dct: dict, keywords: list) -> bool: + """Check if dict dct has all keywords in keywords as keys from.""" + # falsifiable? + for key in keywords: + if key in dct: + continue + return False + return True + + def get_named_axis(axes_metadata, dim_name): """Return numpy array with tuple (axis pos, unit) along dim_name or None.""" retval = None diff --git a/src/pynxtools_em/utils/tfs_utils.py b/src/pynxtools_em/utils/tfs_utils.py new file mode 100644 index 0000000..cf77319 --- /dev/null +++ b/src/pynxtools_em/utils/tfs_utils.py @@ -0,0 +1,32 @@ +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Utility functions for working with ThermoFisher content and concepts.""" + +from typing import List + +from pynxtools_em.configurations.image_tiff_tfs_cfg import TIFF_TFS_ALL_CONCEPTS + + +def get_fei_childs(parent_concept: str) -> List: + """Get all children of FEI parent concept.""" + child_concepts = set() + for entry in TIFF_TFS_ALL_CONCEPTS: + if isinstance(entry, str) and entry.count("/") == 1: + if entry.startswith(f"{parent_concept}/") is True: + child_concepts.add(entry.split("/")[1]) + return list(child_concepts) diff --git a/src/pynxtools_em/utils/velox_utils.py b/src/pynxtools_em/utils/velox_utils.py new file mode 100644 index 0000000..ef6bb48 --- /dev/null +++ b/src/pynxtools_em/utils/velox_utils.py @@ -0,0 +1,72 @@ +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Utility function for working with mapping of Velox content.""" + +from typing import Dict + +import pint +from pynxtools_em.utils.pint_custom_unit_registry import ureg + +RSCIIO_AXES_MIN = ["name", "navigate", "offset", "scale", "size", "units"] +RSCIIO_AXES_MAX = ["index_in_array"] + RSCIIO_AXES_MIN + + +def velox_image_spectrum_or_generic_nxdata(list_of_dict) -> str: + """Encode sequence of units to tell whether NXimage_set, NXspectrum_set, NXdata.""" + if len(list_of_dict) >= 1: + token = [] + for obj in list_of_dict: + if isinstance(obj, dict): + sorted_keys = sorted(obj.keys()) + if sorted_keys == RSCIIO_AXES_MIN or sorted_keys == RSCIIO_AXES_MAX: + if obj["units"] == "": + token.append("unitless") + else: + token.append(obj["units"]) + else: + raise ValueError( + f"{obj.keys()} are not exactly the expected keywords!" + ) + else: + raise ValueError(f"{obj} is not a dict!") + if len(token) >= 1: + print("_".join(token)) + unit_categories = [] + for unit in token: + if unit != "unitless": + try: + q = ureg.Quantity(unit) + base_unit_map: Dict[str, str] = { + "1/meter": "1/m", + "meter": "m", + "kilogram * meter ** 2 / second ** 2": "eV", + "second": "s", + } + base_unit = base_unit_map.get(q.to_base_units().units) + if base_unit: + unit_categories.append(base_unit) + else: + raise ValueError( + f"Hitting an undefined case for base_unit {q.to_base_units().units} !" + ) + except pint.UndefinedUnitError: + return "" + else: + unit_categories.append(unit) + return "_".join(unit_categories) + return ""