Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parser for JEOL TIFF, fixes #39 #43

Merged
merged 6 commits into from
Aug 26, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,10 @@
"cwd": "${workspaceFolder}",
"program": "../.py3.12.4/bin/dataconverter",
"args": ["convert",
"examples/eln_data.yaml",
"examples/em.oasis.specific.yaml",
"../ebic_dm3_goette/documents-export-2024-06-06/EBIC/Defekt1.tif",
// "examples/eln_data.yaml",
// "examples/em.oasis.specific.yaml",
"../ebic_dm3_goette/documents-export-2024-06-06/SEM/20240227_A1_2m_0_FA3_1.txt",
"../ebic_dm3_goette/documents-export-2024-06-06/SEM/20240227_A1_2m_0_FA3_1.tif",
"--reader",
"em",
"--nxdl",
Expand Down
2 changes: 1 addition & 1 deletion docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ for the respective file formats of technology partners of the electron microscop
- [How to map pieces of information to NeXus](reference/contextualization.md)
- [Tagged Image File Format (TIFF)](reference/tiff.md)
- [Portable Network Graphics (PNG)](reference/png.md)
- [Velox EMD](reference/vemd.md)
- [Velox EMD](reference/velox.md)
- [EDAX APEX](reference/apex.md)
- [Nion Co. projects](reference/nion.md)

Expand Down
4 changes: 4 additions & 0 deletions docs/reference/tiff.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,7 @@ The pynxtools-em parser and normalizer reads the following content and maps them
| --------------- | -------------- |
| Reconstructed positions (x, y, z) | :heavy_check_mark: |
| Mass-to-charge-state-ratio values (m/q) | :heavy_check_mark: |-->

<!-- ThermoFisher-->
<!-- point electronic DISS-->
<!-- JEOL-->
File renamed without changes.
2 changes: 1 addition & 1 deletion mkdocs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ nav:
- reference/contextualization.md
- reference/tiff.md
- reference/png.md
- reference/vemd.md
- reference/velox.md
- reference/apex.md
- reference/nion.md
plugins:
Expand Down
53 changes: 53 additions & 0 deletions src/pynxtools_em/configurations/image_tiff_jeol_cfg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#
# Copyright The NOMAD Authors.
#
# This file is part of NOMAD. See https://nomad-lab.eu for further info.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""Configuration of the image_tiff_jeol parser."""

from pint import UnitRegistry

ureg = UnitRegistry()


JEOL_VARIOUS_DYNAMIC_TO_NX_EM = {
"prefix_trg": "/ENTRY[entry*]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/EVENT_DATA_EM[event_data_em*]",
"prefix_src": "",
"map_to_f8": [
("em_lab/OPTICAL_SYSTEM_EM[optical_system_em]/magnification", "CM_MAG"),
(
"em_lab/OPTICAL_SYSTEM_EM[optical_system_em]/working_distance",
ureg.centimeter,
"SM_WD",
ureg.millimeter,
),
(
"em_lab/EBEAM_COLUMN[ebeam_column]/electron_source/voltage",
ureg.volt,
"CM_ACCEL_VOLTAGE",
ureg.kilovolt,
),
],
}


JEOL_VARIOUS_STATIC_TO_NX_EM = {
"prefix_trg": "/ENTRY[entry*]/measurement/em_lab",
"prefix_src": "",
"use": [("FABRICATION[fabrication]/vendor", "JEOL")],
"map": [
("FABRICATION[fabrication]/model", "CM_INSTRUMENT"),
],
}
256 changes: 256 additions & 0 deletions src/pynxtools_em/parsers/image_tiff_jeol.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,256 @@
#
# Copyright The NOMAD Authors.
#
# This file is part of NOMAD. See https://nomad-lab.eu for further info.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""Subparser for harmonizing JEOL specific content in TIFF files."""

import mmap
from typing import Dict, List

import flatdict as fd
import numpy as np
import pint
from PIL import Image, ImageSequence
from pint import UnitRegistry
from pynxtools_em.concepts.mapping_functors_pint import add_specific_metadata_pint
from pynxtools_em.configurations.image_tiff_jeol_cfg import (
JEOL_VARIOUS_DYNAMIC_TO_NX_EM,
JEOL_VARIOUS_STATIC_TO_NX_EM,
)
from pynxtools_em.parsers.image_tiff import TiffParser
from pynxtools_em.utils.string_conversions import string_to_number

ureg = UnitRegistry()


class JeolTiffParser(TiffParser):
def __init__(self, file_paths: List[str], entry_id: int = 1, verbose=False):
tif_txt = ["", ""]
if (
len(file_paths) == 2
and file_paths[0][0 : file_paths[0].rfind(".")]
== file_paths[1][0 : file_paths[0].rfind(".")]
):
for entry in file_paths:
if entry.lower().endswith((".tif", ".tiff")):
tif_txt[0] = entry
elif entry.lower().endswith((".txt")):
tif_txt[1] = entry
if all(value != "" for value in tif_txt):
super().__init__(tif_txt[0])
self.entry_id = entry_id
self.event_id = 1
self.verbose = verbose
self.txt_file_path = tif_txt[1]
self.prfx = None
self.tmp: Dict = {"data": None, "flat_dict_meta": fd.FlatDict({})}
self.supported_version: Dict = {}
self.version: Dict = {}
self.tags: Dict = {}
self.supported = False
self.check_if_tiff_jeol()
else:
print(f"Parser {self.__class__.__name__} needs TIF and TXT file !")
self.supported = False

def check_if_tiff_jeol(self):
"""Check if resource behind self.file_path is a TaggedImageFormat file.

This loads the metadata with the txt_file_path first to the formatting of that
information can be used to tell JEOL data apart from other data.
"""
# currently not voting-based algorithm required as used in other parsers
if self.txt_file_path is None:
self.supported = False
print(
mkuehbach marked this conversation as resolved.
Show resolved Hide resolved
f"Parser {self.__class__.__name__} does not work without a JEOL text file with the image metadata !"
f"This file is required to have exactly the same file name as the file with the TIF image data !"
)
return
with open(self.file_path, "rb", 0) as file:
s = mmap.mmap(file.fileno(), 0, access=mmap.ACCESS_READ)
magic = s.read(4)
if magic != b"II*\x00": # https://en.wikipedia.org/wiki/TIFF
self.supported = False
print(
f"Parser {self.__class__.__name__} finds no content in {self.file_path} that it supports"
)
return
with open(self.txt_file_path, "r") as txt:
txt = [
line.strip().lstrip("$")
for line in txt.readlines()
if line.strip() != "" and line.startswith("$")
]

self.tmp["flat_dict_meta"] = fd.FlatDict({}, "/")
for line in txt:
tmp = line.split()
if len(tmp) == 1:
print(f"WARNING::{line} is currently ignored !")
elif len(tmp) == 2:
if tmp[0] not in self.tmp["flat_dict_meta"]:
# this is not working robustly as the following example fails:
# CM_TITLE 20240227_A1_2m_0_FA3_1 ('invalid decimal literal', (1, 9))
# try:
# self.tmp["flat_dict_meta"][tmp[0]] = pint.Quantity(tmp[1])
# except pint.errors.UndefinedUnitError:
# self.tmp["flat_dict_meta"][tmp[0]] = tmp[1]
# as an alternative we currently use a mixture of pint quantities
# and regular numpy / pure Python types, the mapping functor should
# take care of resolving the cases properly
if tmp[0] != "SM_MICRON_MARKER":
self.tmp["flat_dict_meta"][tmp[0]] = string_to_number(
tmp[1]
)
else:
self.tmp["flat_dict_meta"][tmp[0]] = pint.Quantity(tmp[1])
else:
raise KeyError(f"Found duplicated key {tmp[0]} !")
else: # len(tmp) > 2:
print(f"WARNING::{line} is currently ignored !")

# report metadata just for verbose purposes right now
for key, value in self.tmp["flat_dict_meta"].items():
print(f"{key}______{type(value)}____{value}")

if (
self.tmp["flat_dict_meta"]["SEM_DATA_VERSION"] == 1
and self.tmp["flat_dict_meta"]["CM_LABEL"] == "JEOL"
):
self.supported = True
else:
self.supported = False
print(
f"Parser {self.__class__.__name__} finds no content in {self.file_path} that it supports"
)

def parse(self, template: dict) -> dict:
mkuehbach marked this conversation as resolved.
Show resolved Hide resolved
if self.supported is True:
print(f"Parsing via JEOL...")
# metadata have at this point already been collected into an fd.FlatDict
self.process_event_data_em_metadata(template)
self.process_event_data_em_data(template)
else:
mkuehbach marked this conversation as resolved.
Show resolved Hide resolved
print(
f"{self.file_path} is not a JEOL-specific TIFF file that this parser can process !"
)
return template

def process_event_data_em_data(self, template: dict) -> dict:
"""Add respective heavy data."""
# default display of the image(s) representing the data collected in this event
print(
f"Writing JEOL TIFF image data to the respective NeXus concept instances..."
)
image_identifier = 1
with Image.open(self.file_path, mode="r") as fp:
for img in ImageSequence.Iterator(fp):
nparr = np.array(img)
print(
f"Processing image {image_identifier} ... {type(nparr)}, {np.shape(nparr)}, {nparr.dtype}"
)
# eventually similar open discussions points as were raised for tiff_tfs parser
trg = (
f"/ENTRY[entry{self.entry_id}]/measurement/event_data_em_set/"
f"EVENT_DATA_EM[event_data_em{self.event_id}]/"
f"IMAGE_SET[image_set{image_identifier}]/image_twod"
)
template[f"{trg}/title"] = f"Image"
template[f"{trg}/@signal"] = "real"
dims = ["i", "j"] # i == x (fastest), j == y (fastest)
idx = 0
for dim in dims:
template[f"{trg}/@AXISNAME_indices[axis_{dim}_indices]"] = (
np.uint32(idx)
)
idx += 1
template[f"{trg}/@axes"] = []
for dim in dims[::-1]:
template[f"{trg}/@axes"].append(f"axis_{dim}")
template[f"{trg}/real"] = {"compress": np.array(fp), "strength": 1}
# 0 is y while 1 is x for 2d, 0 is z, 1 is y, while 2 is x for 3d
template[f"{trg}/real/@long_name"] = f"Signal"

sxy = {"i": 1.0, "j": 1.0}
scan_unit = {"i": "m", "j": "m"}
if ("SM_MICRON_BAR" in self.tmp["flat_dict_meta"]) and (
"SM_MICRON_MARKER" in self.tmp["flat_dict_meta"]
):
# JEOL-specific conversion for micron bar pixel to physical length
resolution = int(self.tmp["flat_dict_meta"]["SM_MICRON_BAR"])
physical_length = (
self.tmp["flat_dict_meta"]["SM_MICRON_MARKER"]
.to(ureg.meter)
.magnitude
)
# resolution many pixel represent physical_length scanned surface
# assuming square pixel
print(f"resolution {resolution}, L {physical_length}")
sxy = {
"i": physical_length / resolution,
"j": physical_length / resolution,
}
else:
print("WARNING: Assuming pixel width and height unit is meter!")
nxy = {"i": np.shape(np.array(fp))[1], "j": np.shape(np.array(fp))[0]}
# TODO::be careful we assume here a very specific coordinate system
# however, these assumptions need to be confirmed by point electronic
# additional points as discussed already in comments to TFS TIFF reader
for dim in dims:
template[f"{trg}/AXISNAME[axis_{dim}]"] = {
"compress": np.asarray(
np.linspace(0, nxy[dim] - 1, num=nxy[dim], endpoint=True)
* sxy[dim],
np.float64,
),
"strength": 1,
}
template[f"{trg}/AXISNAME[axis_{dim}]/@long_name"] = (
f"Coordinate along {dim}-axis ({scan_unit[dim]})"
)
template[f"{trg}/AXISNAME[axis_{dim}]/@units"] = f"{scan_unit[dim]}"
image_identifier += 1
return template

def add_various_dynamic(self, template: dict) -> dict:
mkuehbach marked this conversation as resolved.
Show resolved Hide resolved
identifier = [self.entry_id, self.event_id, 1]
add_specific_metadata_pint(
JEOL_VARIOUS_DYNAMIC_TO_NX_EM,
self.tmp["flat_dict_meta"],
identifier,
template,
)
return template

def add_various_static(self, template: dict) -> dict:
mkuehbach marked this conversation as resolved.
Show resolved Hide resolved
identifier = [self.entry_id, self.event_id, 1]
add_specific_metadata_pint(
JEOL_VARIOUS_STATIC_TO_NX_EM,
self.tmp["flat_dict_meta"],
identifier,
template,
)
return template

def process_event_data_em_metadata(self, template: dict) -> dict:
"""Add respective metadata."""
# contextualization to understand how the image relates to the EM session
print(f"Mapping some of JEOL metadata on respective NeXus concepts...")
self.add_various_dynamic(template)
self.add_various_static(template)
# ... add more as required ...
return template
5 changes: 2 additions & 3 deletions src/pynxtools_em/parsers/image_tiff_point_electronic.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ def process_event_data_em_data(self, template: dict) -> dict:
print(
f"Processing image {image_identifier} ... {type(nparr)}, {np.shape(nparr)}, {nparr.dtype}"
)
# eventually similar open discussions points as for the TFS TIFF parser
# eventually similar open discussions points as were raised for tiff_tfs parser
trg = (
f"/ENTRY[entry{self.entry_id}]/measurement/event_data_em_set/"
f"EVENT_DATA_EM[event_data_em{self.event_id}]/"
Expand All @@ -180,8 +180,7 @@ def process_event_data_em_data(self, template: dict) -> dict:
template[f"{trg}/real/@long_name"] = f"Signal"

sxy = {"i": 1.0, "j": 1.0}
scan_unit = {"i": "m", "j": "m"} # assuming FEI reports SI units
# we may face the CCD overview camera for the chamber for which there might not be a calibration!
scan_unit = {"i": "m", "j": "m"}
if ("PixelSizeX" in self.tmp["flat_dict_meta"]) and (
"PixelSizeY" in self.tmp["flat_dict_meta"]
):
Expand Down
4 changes: 4 additions & 0 deletions src/pynxtools_em/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

from pynxtools_em.concepts.nxs_concepts import NxEmAppDef
from pynxtools_em.parsers.convention_reader import NxEmConventionParser
from pynxtools_em.parsers.image_tiff_jeol import JeolTiffParser
from pynxtools_em.parsers.nxs_imgs import NxEmImagesParser
from pynxtools_em.parsers.nxs_mtex import NxEmNxsMTexParser
from pynxtools_em.parsers.nxs_nion import NionProjectParser
Expand Down Expand Up @@ -118,6 +119,9 @@ def read(

# zip_parser = NxEmOmZipEbsdParser(case.dat[0], entry_id)
# zip_parser.parse(template)
elif len(case.dat) == 2:
jeol = JeolTiffParser(case.dat, entry_id, verbose=False)
jeol.parse(template)

nxplt = NxEmDefaultPlotResolver()
nxplt.priority_select(template)
Expand Down
1 change: 1 addition & 0 deletions src/pynxtools_em/utils/io_case_logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
".h5oina",
".mtex.h5",
".dream3d",
".txt",
]
# ".dm3", ".dm4"]

Expand Down