FAIRmat-NFDI · mkuehbach · Aug 26, 2024 · Aug 7, 2024 · Aug 7, 2024 · Aug 7, 2024
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -11,9 +11,10 @@
             "cwd": "${workspaceFolder}",
             "program": "../.py3.12.4/bin/dataconverter",
             "args": ["convert",
-                     "examples/eln_data.yaml",
-                     "examples/em.oasis.specific.yaml",
-                     "../ebic_dm3_goette/documents-export-2024-06-06/EBIC/Defekt1.tif",
+                     // "examples/eln_data.yaml",
+                     // "examples/em.oasis.specific.yaml",
+                     "../ebic_dm3_goette/documents-export-2024-06-06/SEM/20240227_A1_2m_0_FA3_1.txt",
+                     "../ebic_dm3_goette/documents-export-2024-06-06/SEM/20240227_A1_2m_0_FA3_1.tif",
                      "--reader",
                      "em",
                      "--nxdl",

diff --git a/docs/index.md b/docs/index.md
@@ -57,7 +57,7 @@ for the respective file formats of technology partners of the electron microscop
 - [How to map pieces of information to NeXus](reference/contextualization.md)
 - [Tagged Image File Format (TIFF)](reference/tiff.md)
 - [Portable Network Graphics (PNG)](reference/png.md)
-- [Velox EMD](reference/vemd.md)
+- [Velox EMD](reference/velox.md)
 - [EDAX APEX](reference/apex.md)
 - [Nion Co. projects](reference/nion.md)
 

diff --git a/docs/reference/tiff.md b/docs/reference/tiff.md
@@ -7,3 +7,7 @@ The pynxtools-em parser and normalizer reads the following content and maps them
 | --------------- | --------------  |
 | Reconstructed positions (x, y, z) | :heavy_check_mark: |
 | Mass-to-charge-state-ratio values (m/q) | :heavy_check_mark: |-->
+
+<!-- ThermoFisher-->
+<!-- point electronic DISS-->
+<!-- JEOL-->
diff --git a/docs/reference/vemd.md → docs/reference/velox.md b/docs/reference/vemd.md → docs/reference/velox.md
diff --git a/mkdocs.yaml b/mkdocs.yaml
@@ -21,7 +21,7 @@ nav:
     - reference/contextualization.md
     - reference/tiff.md
     - reference/png.md
-    - reference/vemd.md
+    - reference/velox.md
     - reference/apex.md
     - reference/nion.md
 plugins:

diff --git a/src/pynxtools_em/configurations/image_tiff_jeol_cfg.py b/src/pynxtools_em/configurations/image_tiff_jeol_cfg.py
@@ -0,0 +1,53 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Configuration of the image_tiff_jeol parser."""
+
+from pint import UnitRegistry
+
+ureg = UnitRegistry()
+
+
+JEOL_VARIOUS_DYNAMIC_TO_NX_EM = {
+    "prefix_trg": "/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]",
+    "prefix_src": "",
+    "map_to_f8": [
+        ("em_lab/OPTICAL_SYSTEM_EM[optical_system_em]/magnification", "CM_MAG"),
+        (
+            "em_lab/OPTICAL_SYSTEM_EM[optical_system_em]/working_distance",
+            ureg.centimeter,
+            "SM_WD",
+            ureg.millimeter,
+        ),
+        (
+            "em_lab/EBEAM_COLUMN[ebeam_column]/electron_source/voltage",
+            ureg.volt,
+            "CM_ACCEL_VOLTAGE",
+            ureg.kilovolt,
+        ),
+    ],
+}
+
+
+JEOL_VARIOUS_STATIC_TO_NX_EM = {
+    "prefix_trg": "/ENTRY[entry*]/measurement/em_lab",
+    "prefix_src": "",
+    "use": [("FABRICATION[fabrication]/vendor", "JEOL")],
+    "map": [
+        ("FABRICATION[fabrication]/model", "CM_INSTRUMENT"),
+    ],
+}
diff --git a/src/pynxtools_em/parsers/image_tiff_jeol.py b/src/pynxtools_em/parsers/image_tiff_jeol.py
@@ -0,0 +1,256 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Subparser for harmonizing JEOL specific content in TIFF files."""
+
+import mmap
+from typing import Dict, List
+
+import flatdict as fd
+import numpy as np
+import pint
+from PIL import Image, ImageSequence
+from pint import UnitRegistry
+from pynxtools_em.concepts.mapping_functors_pint import add_specific_metadata_pint
+from pynxtools_em.configurations.image_tiff_jeol_cfg import (
+    JEOL_VARIOUS_DYNAMIC_TO_NX_EM,
+    JEOL_VARIOUS_STATIC_TO_NX_EM,
+)
+from pynxtools_em.parsers.image_tiff import TiffParser
+from pynxtools_em.utils.string_conversions import string_to_number
+
+ureg = UnitRegistry()
+
+
+class JeolTiffParser(TiffParser):
+    def __init__(self, file_paths: List[str], entry_id: int = 1, verbose=False):
+        tif_txt = ["", ""]
+        if (
+            len(file_paths) == 2
+            and file_paths[0][0 : file_paths[0].rfind(".")]
+            == file_paths[1][0 : file_paths[0].rfind(".")]
+        ):
+            for entry in file_paths:
+                if entry.lower().endswith((".tif", ".tiff")):
+                    tif_txt[0] = entry
+                elif entry.lower().endswith((".txt")):
+                    tif_txt[1] = entry
+        if all(value != "" for value in tif_txt):
+            super().__init__(tif_txt[0])
+            self.entry_id = entry_id
+            self.event_id = 1
+            self.verbose = verbose
+            self.txt_file_path = tif_txt[1]
+            self.prfx = None
+            self.tmp: Dict = {"data": None, "flat_dict_meta": fd.FlatDict({})}
+            self.supported_version: Dict = {}
+            self.version: Dict = {}
+            self.tags: Dict = {}
+            self.supported = False
+            self.check_if_tiff_jeol()
+        else:
+            print(f"Parser {self.__class__.__name__} needs TIF and TXT file !")
+            self.supported = False
+
+    def check_if_tiff_jeol(self):
+        """Check if resource behind self.file_path is a TaggedImageFormat file.
+
+        This loads the metadata with the txt_file_path first to the formatting of that
+        information can be used to tell JEOL data apart from other data.
+        """
+        # currently not voting-based algorithm required as used in other parsers
+        if self.txt_file_path is None:
+            self.supported = False
+            print(
+                f"Parser {self.__class__.__name__} does not work without a JEOL text file with the image metadata !"
+                f"This file is required to have exactly the same file name as the file with the TIF image data !"
+            )
+            return
+        with open(self.file_path, "rb", 0) as file:
+            s = mmap.mmap(file.fileno(), 0, access=mmap.ACCESS_READ)
+            magic = s.read(4)
+            if magic != b"II*\x00":  # https://en.wikipedia.org/wiki/TIFF
+                self.supported = False
+                print(
+                    f"Parser {self.__class__.__name__} finds no content in {self.file_path} that it supports"
+                )
+                return
+        with open(self.txt_file_path, "r") as txt:
+            txt = [
+                line.strip().lstrip("$")
+                for line in txt.readlines()
+                if line.strip() != "" and line.startswith("$")
+            ]
+
+            self.tmp["flat_dict_meta"] = fd.FlatDict({}, "/")
+            for line in txt:
+                tmp = line.split()
+                if len(tmp) == 1:
+                    print(f"WARNING::{line} is currently ignored !")
+                elif len(tmp) == 2:
+                    if tmp[0] not in self.tmp["flat_dict_meta"]:
+                        # this is not working robustly as the following example fails:
+                        # CM_TITLE 20240227_A1_2m_0_FA3_1 ('invalid decimal literal', (1, 9))
+                        # try:
+                        #     self.tmp["flat_dict_meta"][tmp[0]] = pint.Quantity(tmp[1])
+                        # except pint.errors.UndefinedUnitError:
+                        #     self.tmp["flat_dict_meta"][tmp[0]] = tmp[1]
+                        # as an alternative we currently use a mixture of pint quantities
+                        # and regular numpy / pure Python types, the mapping functor should
+                        # take care of resolving the cases properly
+                        if tmp[0] != "SM_MICRON_MARKER":
+                            self.tmp["flat_dict_meta"][tmp[0]] = string_to_number(
+                                tmp[1]
+                            )
+                        else:
+                            self.tmp["flat_dict_meta"][tmp[0]] = pint.Quantity(tmp[1])
+                    else:
+                        raise KeyError(f"Found duplicated key {tmp[0]} !")
+                else:  # len(tmp) > 2:
+                    print(f"WARNING::{line} is currently ignored !")
+
+            # report metadata just for verbose purposes right now
+            for key, value in self.tmp["flat_dict_meta"].items():
+                print(f"{key}______{type(value)}____{value}")
+
+            if (
+                self.tmp["flat_dict_meta"]["SEM_DATA_VERSION"] == 1
+                and self.tmp["flat_dict_meta"]["CM_LABEL"] == "JEOL"
+            ):
+                self.supported = True
+            else:
+                self.supported = False
+                print(
+                    f"Parser {self.__class__.__name__} finds no content in {self.file_path} that it supports"
+                )
+
+    def parse(self, template: dict) -> dict:
+        if self.supported is True:
+            print(f"Parsing via JEOL...")
+            # metadata have at this point already been collected into an fd.FlatDict
+            self.process_event_data_em_metadata(template)
+            self.process_event_data_em_data(template)
+        else:
+            print(
+                f"{self.file_path} is not a JEOL-specific TIFF file that this parser can process !"
+            )
+        return template
+
+    def process_event_data_em_data(self, template: dict) -> dict:
+        """Add respective heavy data."""
+        # default display of the image(s) representing the data collected in this event
+        print(
+            f"Writing JEOL TIFF image data to the respective NeXus concept instances..."
+        )
+        image_identifier = 1
+        with Image.open(self.file_path, mode="r") as fp:
+            for img in ImageSequence.Iterator(fp):
+                nparr = np.array(img)
+                print(
+                    f"Processing image {image_identifier} ... {type(nparr)}, {np.shape(nparr)}, {nparr.dtype}"
+                )
+                # eventually similar open discussions points as were raised for tiff_tfs parser
+                trg = (
+                    f"/ENTRY[entry{self.entry_id}]/measurement/event_data_em_set/"
+                    f"EVENT_DATA_EM[event_data_em{self.event_id}]/"
+                    f"IMAGE_SET[image_set{image_identifier}]/image_twod"
+                )
+                template[f"{trg}/title"] = f"Image"
+                template[f"{trg}/@signal"] = "real"
+                dims = ["i", "j"]  # i == x (fastest), j == y (fastest)
+                idx = 0
+                for dim in dims:
+                    template[f"{trg}/@AXISNAME_indices[axis_{dim}_indices]"] = (
+                        np.uint32(idx)
+                    )
+                    idx += 1
+                template[f"{trg}/@axes"] = []
+                for dim in dims[::-1]:
+                    template[f"{trg}/@axes"].append(f"axis_{dim}")
+                template[f"{trg}/real"] = {"compress": np.array(fp), "strength": 1}
+                #  0 is y while 1 is x for 2d, 0 is z, 1 is y, while 2 is x for 3d
+                template[f"{trg}/real/@long_name"] = f"Signal"
+
+                sxy = {"i": 1.0, "j": 1.0}
+                scan_unit = {"i": "m", "j": "m"}
+                if ("SM_MICRON_BAR" in self.tmp["flat_dict_meta"]) and (
+                    "SM_MICRON_MARKER" in self.tmp["flat_dict_meta"]
+                ):
+                    # JEOL-specific conversion for micron bar pixel to physical length
+                    resolution = int(self.tmp["flat_dict_meta"]["SM_MICRON_BAR"])
+                    physical_length = (
+                        self.tmp["flat_dict_meta"]["SM_MICRON_MARKER"]
+                        .to(ureg.meter)
+                        .magnitude
+                    )
+                    # resolution many pixel represent physical_length scanned surface
+                    # assuming square pixel
+                    print(f"resolution {resolution}, L {physical_length}")
+                    sxy = {
+                        "i": physical_length / resolution,
+                        "j": physical_length / resolution,
+                    }
+                else:
+                    print("WARNING: Assuming pixel width and height unit is meter!")
+                nxy = {"i": np.shape(np.array(fp))[1], "j": np.shape(np.array(fp))[0]}
+                # TODO::be careful we assume here a very specific coordinate system
+                # however, these assumptions need to be confirmed by point electronic
+                # additional points as discussed already in comments to TFS TIFF reader
+                for dim in dims:
+                    template[f"{trg}/AXISNAME[axis_{dim}]"] = {
+                        "compress": np.asarray(
+                            np.linspace(0, nxy[dim] - 1, num=nxy[dim], endpoint=True)
+                            * sxy[dim],
+                            np.float64,
+                        ),
+                        "strength": 1,
+                    }
+                    template[f"{trg}/AXISNAME[axis_{dim}]/@long_name"] = (
+                        f"Coordinate along {dim}-axis ({scan_unit[dim]})"
+                    )
+                    template[f"{trg}/AXISNAME[axis_{dim}]/@units"] = f"{scan_unit[dim]}"
+                image_identifier += 1
+        return template
+
+    def add_various_dynamic(self, template: dict) -> dict:
+        identifier = [self.entry_id, self.event_id, 1]
+        add_specific_metadata_pint(
+            JEOL_VARIOUS_DYNAMIC_TO_NX_EM,
+            self.tmp["flat_dict_meta"],
+            identifier,
+            template,
+        )
+        return template
+
+    def add_various_static(self, template: dict) -> dict:
+        identifier = [self.entry_id, self.event_id, 1]
+        add_specific_metadata_pint(
+            JEOL_VARIOUS_STATIC_TO_NX_EM,
+            self.tmp["flat_dict_meta"],
+            identifier,
+            template,
+        )
+        return template
+
+    def process_event_data_em_metadata(self, template: dict) -> dict:
+        """Add respective metadata."""
+        # contextualization to understand how the image relates to the EM session
+        print(f"Mapping some of JEOL metadata on respective NeXus concepts...")
+        self.add_various_dynamic(template)
+        self.add_various_static(template)
+        # ... add more as required ...
+        return template
diff --git a/src/pynxtools_em/parsers/image_tiff_point_electronic.py b/src/pynxtools_em/parsers/image_tiff_point_electronic.py
@@ -157,7 +157,7 @@ def process_event_data_em_data(self, template: dict) -> dict:
                 print(
                     f"Processing image {image_identifier} ... {type(nparr)}, {np.shape(nparr)}, {nparr.dtype}"
                 )
-                # eventually similar open discussions points as for the TFS TIFF parser
+                # eventually similar open discussions points as were raised for tiff_tfs parser
                 trg = (
                     f"/ENTRY[entry{self.entry_id}]/measurement/event_data_em_set/"
                     f"EVENT_DATA_EM[event_data_em{self.event_id}]/"
@@ -180,8 +180,7 @@ def process_event_data_em_data(self, template: dict) -> dict:
                 template[f"{trg}/real/@long_name"] = f"Signal"
 
                 sxy = {"i": 1.0, "j": 1.0}
-                scan_unit = {"i": "m", "j": "m"}  # assuming FEI reports SI units
-                # we may face the CCD overview camera for the chamber for which there might not be a calibration!
+                scan_unit = {"i": "m", "j": "m"}
                 if ("PixelSizeX" in self.tmp["flat_dict_meta"]) and (
                     "PixelSizeY" in self.tmp["flat_dict_meta"]
                 ):

diff --git a/src/pynxtools_em/reader.py b/src/pynxtools_em/reader.py
@@ -26,6 +26,7 @@
 
 from pynxtools_em.concepts.nxs_concepts import NxEmAppDef
 from pynxtools_em.parsers.convention_reader import NxEmConventionParser
+from pynxtools_em.parsers.image_tiff_jeol import JeolTiffParser
 from pynxtools_em.parsers.nxs_imgs import NxEmImagesParser
 from pynxtools_em.parsers.nxs_mtex import NxEmNxsMTexParser
 from pynxtools_em.parsers.nxs_nion import NionProjectParser
@@ -118,6 +119,9 @@ def read(
 
             # zip_parser = NxEmOmZipEbsdParser(case.dat[0], entry_id)
             # zip_parser.parse(template)
+        elif len(case.dat) == 2:
+            jeol = JeolTiffParser(case.dat, entry_id, verbose=False)
+            jeol.parse(template)
 
         nxplt = NxEmDefaultPlotResolver()
         nxplt.priority_select(template)

diff --git a/src/pynxtools_em/utils/io_case_logic.py b/src/pynxtools_em/utils/io_case_logic.py
@@ -33,6 +33,7 @@
     ".h5oina",
     ".mtex.h5",
     ".dream3d",
+    ".txt",
 ]
 # ".dm3", ".dm4"]