diff --git a/docs/supported_formats/de.rst b/docs/supported_formats/de.rst new file mode 100644 index 00000000..a38f0979 --- /dev/null +++ b/docs/supported_formats/de.rst @@ -0,0 +1,40 @@ +.. _de-format: + +StreamPix .SEQ () +----------------- + +The .seq file format is a binary file format developed by +:ref:`Streampix` . Streampix is +a high speed digital recording software. + +Due to the flexible nature of the .seq file format we specifically support +the .seq file format associated with the DirectElectron-16 and DirectElectron-Celeritas cameras. + +For other cameras support may not be fully realized but if an issue is raised +:ref: `here` support can be +considered. + +Specifically for the support of the DirectElectron-Celeritas camera there are two +possible ways to load some file. + +1: Explicitly list files +.. code-block:: python + >>> from rsciio.de import file_loader + >>> + >>> file_loader(None, top="de_Top.seq", bottom="de_Botom.seq" + >>> metadata="de_Top.metadata",gain="de.gain.mrc", + >>> dark="de.dark.mrc", xml="de.seq.Config.Metadata.xml", + >>> celeritas=True) + +2: Automatically detect the files. In this case the program will automatically + look for files with the same naming structure in the same folder. +.. code-block:: python + >>> from rsciio.de import file_loader + >>> + >>> file_loader(top="de_Top.seq", celeritas=True) + + +All of the file loaders for the cameras also have a special `distributed` keyword which changes how the data is +loaded into memory or into a dask array. This allows for the user to use +:ref: `dask-distributed` as a backend rather +than using the default scheduler. diff --git a/docs/supported_formats/index.rst b/docs/supported_formats/index.rst index fc6a321e..f691406e 100644 --- a/docs/supported_formats/index.rst +++ b/docs/supported_formats/index.rst @@ -18,6 +18,7 @@ big datasets is supported. blockfile bruker de5 + de dens digitalmicrograph digitalsurf diff --git a/docs/supported_formats/supported_formats.rst b/docs/supported_formats/supported_formats.rst index ffb25a2c..3f2c9fdd 100644 --- a/docs/supported_formats/supported_formats.rst +++ b/docs/supported_formats/supported_formats.rst @@ -12,6 +12,8 @@ +---------------------------------------------------------------------+-------------------------+--------+--------+--------+ | :ref:`Direct electron EMD ` | de5 | Yes | No | Yes | +---------------------------------------------------------------------+-------------------------+--------+--------+--------+ + | :ref:`Direct electron StreamPix ` | seq | Yes | No | Yes | + +---------------------------------------------------------------------+-------------------------+--------+--------+--------+ | :ref:`DENSsolutions Impulse logfile ` | dens, csv & log | Yes | No | No | +---------------------------------------------------------------------+-------------------------+--------+--------+--------+ | :ref:`DENSsolutions Digiheater logfile ` | dens | Yes | No | No | diff --git a/rsciio/de/__init__.py b/rsciio/de/__init__.py new file mode 100644 index 00000000..d4de92f6 --- /dev/null +++ b/rsciio/de/__init__.py @@ -0,0 +1,10 @@ +from ._api import file_reader + + +__all__ = [ + "file_reader", +] + + +def __dir__(): + return sorted(__all__) diff --git a/rsciio/de/_api.py b/rsciio/de/_api.py new file mode 100644 index 00000000..510063e0 --- /dev/null +++ b/rsciio/de/_api.py @@ -0,0 +1,835 @@ +# -*- coding: utf-8 -*- +# Copyright 2007-2022 The HyperSpy developers +# +# This file is part of RosettaSciIO. +# +# RosettaSciIO is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# RosettaSciIO is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with RosettaSciIO. If not, see . + + +import numpy as np +import logging +import glob + +import dask.array as da + +from rsciio.utils.tools import ( + read_binary_metadata, + parse_xml, + get_chunk_index, + memmap_distributed, +) + +_logger = logging.getLogger(__name__) +# Stream Pix data types +data_types = {8: np.uint8, 16: np.uint16, 32: np.uint32} + + +def file_reader( + filename, + navigation_shape=None, + lazy=False, + celeritas=False, + chunks="auto", + distributed=False, + **kwargs, +): + """Reads the .seq file format from the DE 16 and DE Celeritas cameras. + This file format is generic and used by the 3rd party software StreamPix. + While this file loader may load data saved from other cameras it is not + guaranteed to load files other than those from Direct Electron. + + Parameters + ---------- + filename: str + The file name to be loaded. This should have a `.seq` ending to the + file. All additional information (dark, gain, metadata) should be in the + same file with the same naming scheme. + + If celeritas ==True either the bottom or top frame can be passed and + the other frame will automatically be found if it is in the same folder + with the same naming scheme. + navigation_shape: tuple + The shape of the navigation axis. This will coerce the data into the + shape given. Adding extra dimensions as necessary + lazy: bool + If the data should be loaded lazily using dask + celeritas: bool + If the data came from the celeritas camera. Important for loading + data saved with a prebuffer and split between top and bottom frames. + chunks: int, tuple, dict or str, optional + The new block dimensions to create. -1 indicates the full size of the + corresponding dimension. Default is “auto” which automatically determines chunk sizes. + distributed: bool + If the data should be loaded in a way that is supported by the + distributed backend. Slightly slower for smaller datasets but could + potentially see gains on larger datasets or more extensive hardware. + kwargs: + Any additional parameters such as: + top: str + The name of the top filename if passed directly + bottom: str + The name of the bottom file if passed directly + gain: str + The name of the gain image if passed directly + metadata: str + The name of the metadata file if passed directly + xml: str + The name of the xml file if passed directly + + Returns + ------- + + """ + if celeritas: + if "top" not in kwargs and "bottom" not in kwargs: + if "Top" in filename: + top = filename + leading_str = filename.rsplit("_Top", 1)[0] + bottom = glob.glob(leading_str + "_Bottom*.seq")[0] + filename = leading_str + ".seq" + + elif "Bottom" in filename: + bottom = filename + leading_str = filename.rsplit("_Bottom", 1)[0] + top = glob.glob(leading_str + "_Top*.seq")[0] + filename = leading_str + ".seq" + else: + raise ValueError( + "For the Celeritas Camera Top and Bottom " + "frames must be explicitly given by passing the" + "top and bottom kwargs or the file name must have" + "'Top' or 'Bottom' in the file name" + ) + if "metadata" not in kwargs: + kwargs["metadata"] = bottom + ".metadata" + + elif celeritas and "top" in kwargs and "bottom" in kwargs: + top = kwargs["top"] + bottom = kwargs["top"] + else: + raise ValueError( + "For the Celeritas Camera Top and Bottom " + "frames must be explicitly given by passing the" + "top and bottom kwargs or the file name must have" + "'Top' or 'Bottom' in the file name" + ) + + file_extensions = { + "metadata": ".metadata", + "dark": ".dark.mrc", + "gain": ".gain.mrc", + "xml": ".Config.Metadata.xml", + } + for ext in file_extensions: + if not ext in kwargs: + kwargs[ext] = filename + file_extensions[ext] + + if celeritas: + reader = CeleritasReader(file=filename, top=top, bottom=bottom, **kwargs) + else: + reader = SeqReader(file=filename, **kwargs) + return [ + reader.read_data( + navigation_shape=navigation_shape, + lazy=lazy, + distributed=distributed, + chunks=chunks, + ), + ] + + +class SeqReader: + def __init__(self, file, dark=None, gain=None, metadata=None, xml=None): + """ + Initializes a general reader for the .seq file format. + Some functions are overwritten with the CeleritasSEQ reader + + Parameters + ---------- + file: str + The signal to be read. + dark: str + The file path for the dark reference to be read. + gain: str + The file path for the gain reference to be read. + metadata: str + The file path for the metadata to be read + xml: str + The file path for the xml file to be read. + """ + self.file = file + self.metadata_file = metadata + self.dark = dark + self.gain = gain + self.xml = xml + # Output + self.original_metadata = { + "InputFiles": { + "file": file, + "metadata": self.metadata_file, + "dark": dark, + "gain": gain, + "xml": xml, + } + } + self.metadata = { + "General": { + "filename": file, + }, + "Signal": {"signal_type": "Signal2D"}, + } + self.data = None + self.axes = [] + self.buffer = None + + def _read_metadata(self): + metadata_header_dict = { + "Version": [" 1e-30 + ): + # need to still determine a way to properly set units and scale + self.axes[-2]["scale"] = self.original_metadata["Metadata"]["PixelSize"] + self.axes[-1]["scale"] = self.original_metadata["Metadata"]["PixelSize"] + return + + def read_data( + self, + navigation_shape=None, + chunks="auto", + distributed=False, + lazy=False, + ): + """Reads the data from self.file given a navigation shape. + Parameters + ---------- + navigation_shape: None or tuple + Reads the data and then coerces it into the navigation shape + given + chunks: int, tuple, dict or str, optional + The new block dimensions to create. -1 indicates the full size of the + corresponding dimension. Default is “auto” which automatically determines chunk sizes. + distributed: bool + If the data should be loaded in a distributed compatible way. In many cases this is + preferred if you have a specific target chunking style. + lazy: bool + Whether to open the file lazily or not + """ + header = self._read_file_header() + dark_img, gain_img = self._read_dark_gain() + self._read_xml() + self._read_metadata() + if navigation_shape is None or navigation_shape == (): + navigation_shape = (header["NumFrames"],) + + data_types = {8: np.uint8, 16: np.uint16, 32: np.uint32} + empty = header["TrueImageSize"] - ( + (header["ImageWidth"] * header["ImageHeight"] * 2) + 8 + ) + dtype = [ + ( + "Array", + data_types[int(header["ImageBitDepth"])], + (header["ImageWidth"], header["ImageHeight"]), + ), + ("sec", " num_frames + and not add_frames + ): + _logger.warning( + "The number of frames and the navigation shape are not " + "equal. To add frames to the end of the dataset set add_frames=True." + "Note: This will increase the size of the file on the disk!" + ) + navigation_shape = num_frames + elif ( + navigation_shape is not None + and np.product(navigation_shape) > num_frames + and add_frames + ): + buffer_frames += 1 + new_buffer_frames = int( + np.ceil(np.divide(np.product(navigation_shape), self.buffer)) + ) + if buffer_frames != new_buffer_frames: + raise ValueError( + f"Only one buffer frame should be dropped so only one " + f"frame will be added. The current navigation shape is larger" + f"than the size of the dataset by {new_buffer_frames-buffer_frames}" + f" frames. To add more frames manually append " + f"the dataset. " + ) + + t = np.memmap( + self.top, offset=8192, dtype=dtype, shape=buffer_frames, mode="r+" + ) + t.flush() + b = np.memmap( + self.bottom, offset=8192, dtype=dtype, shape=buffer_frames, mode="r+" + ) + b.flush() + + if navigation_shape is not None: + shape = navigation_shape + signal_shape + + else: + buffer_frames = header["NumFrames"] + shape = (buffer_frames * self.buffer,) + signal_shape + + if distributed: + data = read_stitch_binary_distributed( + top=self.top, + bottom=self.bottom, + buffer_size=self.buffer, + shape=shape, + offset=8192, + dtypes=dtype, + total_buffer_frames=buffer_frames, + chunks=chunks, + dark=dark_img, + gain=gain_img, + ) + else: + data, time = read_stitch_binary( + top=self.top, + bottom=self.bottom, + dtypes=dtype, + offset=8192, + total_buffer_frames=buffer_frames, + navigation_shape=navigation_shape, + lazy=lazy, + chunks=chunks, + ) + if dark_img is not None: + data = data - dark_img + if gain_img is not None: + data = data * gain_img + + self._create_axes( + header=header, nav_shape=navigation_shape, prebuffer=self.buffer + ) + return { + "data": data, + "metadata": self.metadata, + "axes": self.axes, + "original_metadata": self.original_metadata, + } + + +""" +Functions for reading split binary files. Generic binary distributed readers are +located in rosettasciio.utils.tools +""" + + +def read_stitch_binary_distributed( + top, + bottom, + shape, + dtypes, + offset, + total_buffer_frames=None, + buffer_size=None, + chunks=None, + dark=None, + gain=None, +): + indexes = get_chunk_index( + shape=shape, + signal_axes=(-1, -2), + chunks=chunks, + block_size_limit=None, + dtype=np.float32, + ) + data = da.map_blocks( + slic_stitch_binary, + indexes, + top=top, + bottom=bottom, + dtypes=dtypes, + offset=offset, + total_buffer_frames=total_buffer_frames, + buffer_size=buffer_size, + gain=gain, + dark=dark, + dtype=np.float32, + new_axis=(len(indexes.shape), len(indexes.shape) + 1), + chunks=indexes.chunks + (shape[-2], shape[-1]), + ) + return data + + +def slic_stitch_binary( + indexes, + top, + bottom, + dtypes, + offset=8192, + total_buffer_frames=None, + buffer_size=None, + gain=None, + dark=None, +): + top_mapped = np.memmap( + top, offset=offset, dtype=dtypes, shape=total_buffer_frames, mode="r" + )["Array"] + bottom_mapped = np.memmap( + bottom, + offset=offset, + dtype=dtypes, + shape=total_buffer_frames, + mode="r", + )["Array"] + if buffer_size != None: + indexes = np.divmod(indexes, buffer_size) + + bottom = bottom_mapped[indexes] + top = np.flip(top_mapped[indexes], axis=-2) + chunk = np.concatenate([top, bottom], axis=-2) + if dark is not None: + chunk = chunk - dark + if gain is not None: + chunk = chunk * gain + return chunk + + +def read_stitch_binary( + top, + bottom, + dtypes, + offset, + total_buffer_frames=None, + navigation_shape=None, + lazy=False, + chunks=None, +): + """Read and stitch the top and bottom files + Parameters + ---------- + top: str + The filename of the top of the frame + bottom: str + The filename of the bottom of the frame + total_buffer_frames: int + The size of the segment pre buffer. This should be an int based on the + size of each of the frames and the FPS. + navigation_shape: tuple + The navigation shape of the data. + lazy: bool + If the data should be cast to a dask array. + """ + keys = [d[0] for d in dtypes] + top_mapped = np.memmap( + top, + offset=offset, + dtype=dtypes, + shape=total_buffer_frames, + mode="r", + ) + bottom_mapped = np.memmap( + bottom, + offset=offset, + dtype=dtypes, + shape=total_buffer_frames, + mode="r", + ) + + if lazy: + top_mapped = da.from_array(top_mapped) + bottom_mapped = da.from_array(bottom_mapped) + + array = np.concatenate( + [ + np.flip( + top_mapped["Array"].reshape(-1, *top_mapped["Array"].shape[2:]), axis=1 + ), + bottom_mapped["Array"].reshape(-1, *bottom_mapped["Array"].shape[2:]), + ], + 1, + ) + if navigation_shape is not None and navigation_shape != (): + cut = np.product(navigation_shape) + array = array[:cut] + new_shape = tuple(navigation_shape) + array.shape[1:] + array = array.reshape(new_shape) + if lazy and chunks is not None: + array = array.rechunk(chunks=chunks) + + time = {k: bottom_mapped[k] for k in keys if k not in ["Array", "empty"]} + return array, time + + +def read_ref(file_name): + """Reads a reference image from the file using the file + name as well as the width and height of the image.""" + if file_name is None: + return + try: + shape = np.array(np.fromfile(file_name, dtype=np.int32, count=2), dtype=int) + shape = tuple(shape[::-1]) + ref = np.memmap(file_name, mode="r", dtype=np.float32, shape=shape, offset=1024) + ref.shape + return ref + except FileNotFoundError: + _logger.warning( + "No Reference image: " + + file_name + + " found. The dark/gain references should be in the same " + "directory as the image and have the form xxx.seq.dark.mrc " + "or xxx.seq.gain.mrc" + ) + return None diff --git a/rsciio/de/api.py b/rsciio/de/api.py new file mode 100644 index 00000000..67bbe750 --- /dev/null +++ b/rsciio/de/api.py @@ -0,0 +1,802 @@ +# -*- coding: utf-8 -*- +# Copyright 2007-2022 The HyperSpy developers +# +# This file is part of RosettaSciIO. +# +# RosettaSciIO is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# RosettaSciIO is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with RosettaSciIO. If not, see . + + +import numpy as np +import logging +import glob + +import dask.array as da + +from rsciio.utils.tools import ( + read_binary_metadata, + parse_xml, + get_chunk_index, + memmap_distributed, +) + +_logger = logging.getLogger(__name__) +# Stream Pix data types +data_types = {8: np.uint8, 16: np.uint16, 32: np.uint32} + + +def file_reader( + filename, + navigation_shape=None, + lazy=False, + celeritas=False, + chunks="auto", + distributed=False, + **kwargs +): + """Reads the .seq file format from the DE 16 and DE Celeritas cameras. + This file format is generic and used by the 3rd party software StreamPix. + While this file loader may load data saved from other cameras it is not + guaranteed to load files other than those from Direct Electron. + + Parameters + ---------- + filename: str + The file name to be loaded. This should have a `.seq` ending to the + file. All additional information (dark, gain, metadata) should be in the + same file with the same naming scheme. + + If celeritas ==True either the bottom or top frame can be passed and + the other frame will automatically be found if it is in the same folder + with the same naming scheme. + navigation_shape: tuple + The shape of the navigation axis. This will coerce the data into the + shape given. Adding extra dimensions as necessary + lazy: bool + If the data should be loaded lazily using dask + celeritas: bool + If the data came from the celeritas camera. Important for loading + data saved with a prebuffer and split between top and bottom frames. + distributed: bool + If the data should be loaded in a way that is supported by the + distributed backend. Slightly slower for smaller datasets but could + potentially see gains on larger datasets or more extensive hardware. + kwargs: + Any additional parameters such as: + top: str + The name of the top filename if passed directly + bottom: str + The name of the bottom file if passed directly + gain: str + The name of the gain image if passed directly + metadata: str + The name of the metadata file if passed directly + xml: str + The name of the xml file if passed directly + + Returns + ------- + + """ + if celeritas: + if "top" not in kwargs and "bottom" not in kwargs: + if "Top" in filename: + top = filename + leading_str = filename.rsplit("_Top", 1)[0] + bottom = glob.glob(leading_str + "_Bottom*.seq")[0] + filename = leading_str + ".seq" + + elif "Bottom" in filename: + bottom = filename + leading_str = filename.rsplit("_Bottom", 1)[0] + top = glob.glob(leading_str + "_Top*.seq")[0] + filename = leading_str + ".seq" + else: + raise ValueError( + "For the Celeritas Camera Top and Bottom " + "frames must be explicitly given by passing the" + "top and bottom kwargs or the file name must have" + "'Top' or 'Bottom' in the file name" + ) + if "metadata" not in kwargs: + kwargs["metadata"] = bottom + ".metadata" + + elif celeritas and "top" in kwargs and "bottom" in kwargs: + top = kwargs["top"] + bottom = kwargs["top"] + else: + raise ValueError( + "For the Celeritas Camera Top and Bottom " + "frames must be explicitly given by passing the" + "top and bottom kwargs or the file name must have" + "'Top' or 'Bottom' in the file name" + ) + + file_extensions = { + "metadata": ".metadata", + "dark": ".dark.mrc", + "gain": ".gain.mrc", + "xml": ".Config.Metadata.xml", + } + for ext in file_extensions: + if not ext in kwargs: + kwargs[ext] = filename + file_extensions[ext] + + if celeritas: + reader = CeleritasReader(file=filename, top=top, bottom=bottom, **kwargs) + else: + reader = SeqReader(file=filename, **kwargs) + return reader.read_data( + navigation_shape=navigation_shape, + lazy=lazy, + distributed=distributed, + chunks=chunks, + ) + + +class SeqReader: + def __init__(self, file, dark=None, gain=None, metadata=None, xml=None): + """ + Initializes a general reader for the .seq file format. + Some functions are overwritten with the CeleritasSEQ reader + + Parameters + ---------- + file: str + The signal to be read. + dark: str + The file path for the dark reference to be read. + gain: str + The file path for the gain reference to be read. + metadata: str + The file path for the metadata to be read + xml: str + The file path for the xml file to be read. + """ + self.file = file + self.metadata_file = metadata + self.dark = dark + self.gain = gain + self.xml = xml + # Output + self.original_metadata = { + "InputFiles": { + "file": file, + "metadata": self.metadata_file, + "dark": dark, + "gain": gain, + "xml": xml, + } + } + self.metadata = { + "General": { + "filename": file, + }, + "Signal": {"signal_type": "Signal2D"}, + } + self.data = None + self.axes = [] + self.buffer = None + + def _read_metadata(self): + metadata_header_dict = { + "Version": [" 1e-30 + ): + # need to still determine a way to properly set units and scale + self.axes[-2]["scale"] = self.original_metadata["Metadata"]["PixelSize"] + self.axes[-1]["scale"] = self.original_metadata["Metadata"]["PixelSize"] + return + + def read_data( + self, navigation_shape=None, chunks="auto", distributed=False, lazy=False + ): + """Reads the data from self.file given a navigation shape. + Parameters + ---------- + navigation_shape: None or tuple + Reads the data and then coerces it into the navigation shape + given + """ + header = self._read_file_header() + dark_img, gain_img = self._read_dark_gain() + self._read_xml() + self._read_metadata() + signal_shape = (header["ImageWidth"], header["ImageHeight"]) + num_frames = header["NumFrames"] + if navigation_shape is None or navigation_shape == (): + navigation_shape = (header["NumFrames"],) + + data_types = {8: np.uint8, 16: np.uint16, 32: np.uint32} + empty = header["TrueImageSize"] - ( + (header["ImageWidth"] * header["ImageHeight"] * 2) + 8 + ) + dtype = [ + ( + "Array", + data_types[int(header["ImageBitDepth"])], + (header["ImageWidth"], header["ImageHeight"]), + ), + ("sec", " num_frames: + _logger.warning( + "The number of frames and the navigation shape are not " + "equal. Adding frames to the end of the dataset. " + ) + t = np.memmap( + self.file, offset=8192, dtype=dtype, shape=num_frames, mode="r+" + ) + t.flush() + num_frames = np.product(navigation_shape) + + if distributed: + data = memmap_distributed( + self.file, + dtype, + offset=8192, + shape=navigation_shape, + key="Array", + chunks=chunks, + ) + else: + data = np.memmap( + self.file, dtype=dtype, offset=8192, shape=navigation_shape + ) + time = {"sec": data["sec"], "ms": data["ms"], "mis": data["mis"]} + data = data["Array"] + if lazy: + data = da.from_array(data, chunks=chunks) + self.original_metadata["Timestamps"] = time + self.metadata["Timestamps"] = time + if dark_img is not None: + data = data - dark_img + if gain_img is not None: + data = data * gain_img + self._create_axes(header=header, nav_shape=navigation_shape) + return { + "data": data, + "metadata": self.metadata, + "axes": self.axes, + "original_metadata": self.original_metadata, + } + + +class CeleritasReader(SeqReader): + def __init__(self, top, bottom, **kwargs): + """ + Initializes a reader for the .seq file format written from the + celeritas camera + + Parameters + ---------- + file: str + The signal to be read. + dark: str + The file path for the dark reference to be read. + gain: str + The file path for the gain reference to be read. + metadata: str + The file path for the metadata to be read + xml: str + The file path for the xml file to be read. + top: str + The signal from the top half of the camera + bottom: str + The signal from the bottom half of the camera. + """ + super().__init__(**kwargs) + self.top = top + self.bottom = bottom + self.bad_pixels = None + + def _read_file_header(self): + file_header_dict = { + "ImageWidth": [" num_frames: + _logger.warning( + "The number of frames and the navigation shape are not " + "equal. Adding frames to the end of the dataset. " + ) + buffer_frames = int( + np.ceil(np.divide(np.product(navigation_shape), self.buffer)) + ) + t = np.memmap( + self.top, offset=8192, dtype=dtype, shape=buffer_frames, mode="r+" + ) + t.flush() + b = np.memmap( + self.bottom, offset=8192, dtype=dtype, shape=buffer_frames, mode="r+" + ) + b.flush() + else: + buffer_frames = header["NumFrames"] + if navigation_shape is not None: + shape = navigation_shape + signal_shape + + else: + buffer_frames = header["NumFrames"] + shape = (buffer_frames * self.buffer,) + signal_shape + + if distributed: + data = read_stitch_binary_distributed( + top=self.top, + bottom=self.bottom, + buffer_size=self.buffer, + shape=shape, + offset=8192, + dtypes=dtype, + total_buffer_frames=buffer_frames, + chunks=chunks, + dark=dark_img, + gain=gain_img, + ) + else: + data, time = read_stitch_binary( + top=self.top, + bottom=self.bottom, + dtypes=dtype, + offset=8192, + total_buffer_frames=buffer_frames, + navigation_shape=navigation_shape, + lazy=lazy, + ) + if dark_img is not None: + data = data - dark_img + if gain_img is not None: + data = data * gain_img + + self._create_axes( + header=header, nav_shape=navigation_shape, prebuffer=self.buffer + ) + return { + "data": data, + "metadata": self.metadata, + "axes": self.axes, + "original_metadata": self.original_metadata, + } + + +""" +Functions for reading split binary files. Generic binary distributed readers are +located in rosettasciio.utils.tools +""" + + +def read_stitch_binary_distributed( + top, + bottom, + shape, + dtypes, + offset, + total_buffer_frames=None, + buffer_size=None, + chunks=None, + dark=None, + gain=None, +): + + indexes = get_chunk_index( + shape=shape, + signal_axes=(-1, -2), + chunks=chunks, + block_size_limit=None, + dtype=np.float32, + ) + data = da.map_blocks( + slic_stitch_binary, + indexes, + top=top, + bottom=bottom, + dtypes=dtypes, + offset=offset, + total_buffer_frames=total_buffer_frames, + buffer_size=buffer_size, + gain=gain, + dark=dark, + dtype=np.float32, + new_axis=(len(indexes.shape), len(indexes.shape) + 1), + chunks=indexes.chunks + (shape[-2], shape[-1]), + ) + return data + + +def slic_stitch_binary( + indexes, + top, + bottom, + dtypes, + offset=8192, + total_buffer_frames=None, + buffer_size=None, + gain=None, + dark=None, +): + top_mapped = np.memmap( + top, offset=offset, dtype=dtypes, shape=total_buffer_frames, mode="r" + )["Array"] + bottom_mapped = np.memmap( + bottom, + offset=offset, + dtype=dtypes, + shape=total_buffer_frames, + mode="r", + )["Array"] + if buffer_size != None: + indexes = np.divmod(indexes, buffer_size) + + bottom = bottom_mapped[indexes] + top = np.flip(top_mapped[indexes], axis=-2) + chunk = np.concatenate([top, bottom], axis=-2) + if dark is not None: + chunk = chunk - dark + if gain is not None: + chunk = chunk * gain + return chunk + + +def read_stitch_binary( + top, + bottom, + dtypes, + offset, + total_buffer_frames=None, + navigation_shape=None, + lazy=False, +): + """Read and stitch the top and bottom files + Parameters + ---------- + top: str + The filename of the top of the frame + bottom: str + The filename of the bottom of the frame + total_buffer_frames: int + The size of the segment pre buffer. This should be an int based on the + size of each of the frames and the FPS. + navigation_shape: tuple + The navigation shape of the data. + lazy: bool + If the data should be cast to a dask array. + """ + keys = [d[0] for d in dtypes] + top_mapped = np.memmap( + top, + offset=offset, + dtype=dtypes, + shape=total_buffer_frames, + mode="r", + ) + bottom_mapped = np.memmap( + bottom, + offset=offset, + dtype=dtypes, + shape=total_buffer_frames, + mode="r", + ) + + if lazy: + top_mapped = da.from_array(top_mapped) + bottom_mapped = da.from_array(bottom_mapped) + + array = np.concatenate( + [ + np.flip( + top_mapped["Array"].reshape(-1, *top_mapped["Array"].shape[2:]), axis=1 + ), + bottom_mapped["Array"].reshape(-1, *bottom_mapped["Array"].shape[2:]), + ], + 1, + ) + if navigation_shape is not None and navigation_shape != (): + cut = np.product(navigation_shape) + array = array[:cut] + new_shape = tuple(navigation_shape) + array.shape[1:] + array = array.reshape(new_shape) + + time = {k: bottom_mapped[k] for k in keys if k not in ["Array", "empty"]} + return array, time + + +def read_ref(file_name): + """Reads a reference image from the file using the file + name as well as the width and height of the image.""" + if file_name is None: + return + try: + shape = np.array(np.fromfile(file_name, dtype=np.int32, count=2), dtype=int) + shape = tuple(shape[::-1]) + ref = np.memmap(file_name, dtype=np.float32, shape=shape, offset=1024) + ref.shape + return ref + except FileNotFoundError: + _logger.warning( + "No Reference image: " + + file_name + + " found. The dark/gain references should be in the same " + "directory as the image and have the form xxx.seq.dark.mrc " + "or xxx.seq.gain.mrc" + ) + return None diff --git a/rsciio/de/specifications.yaml b/rsciio/de/specifications.yaml new file mode 100644 index 00000000..103a54a9 --- /dev/null +++ b/rsciio/de/specifications.yaml @@ -0,0 +1,11 @@ +name: DE +name_aliases: [] +description: Reads .seq files from Direct Electron Detectors +version: '1.0' +full_support: False +# Recognised file extension +file_extensions: ['seq',] +default_extension: 0 +# Writing capabilities +writes: False +non_uniform_axis: False diff --git a/rsciio/tests/de_data/celeritas_data/128x256_PRebuffer128/test.seq.Config.Metadata.xml b/rsciio/tests/de_data/celeritas_data/128x256_PRebuffer128/test.seq.Config.Metadata.xml new file mode 100644 index 00000000..8ee43572 --- /dev/null +++ b/rsciio/tests/de_data/celeritas_data/128x256_PRebuffer128/test.seq.Config.Metadata.xml @@ -0,0 +1,120 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/rsciio/tests/de_data/celeritas_data/128x256_PRebuffer128/test.seq.Missing_Frame_Info.txt b/rsciio/tests/de_data/celeritas_data/128x256_PRebuffer128/test.seq.Missing_Frame_Info.txt new file mode 100644 index 00000000..6b71ed78 Binary files /dev/null and b/rsciio/tests/de_data/celeritas_data/128x256_PRebuffer128/test.seq.Missing_Frame_Info.txt differ diff --git a/rsciio/tests/de_data/celeritas_data/128x256_PRebuffer128/test.seq.dark.mrc b/rsciio/tests/de_data/celeritas_data/128x256_PRebuffer128/test.seq.dark.mrc new file mode 100644 index 00000000..719ab8ba Binary files /dev/null and b/rsciio/tests/de_data/celeritas_data/128x256_PRebuffer128/test.seq.dark.mrc differ diff --git a/rsciio/tests/de_data/celeritas_data/128x256_PRebuffer128/test.seq.gain.mrc b/rsciio/tests/de_data/celeritas_data/128x256_PRebuffer128/test.seq.gain.mrc new file mode 100644 index 00000000..fc2a5cd9 Binary files /dev/null and b/rsciio/tests/de_data/celeritas_data/128x256_PRebuffer128/test.seq.gain.mrc differ diff --git a/rsciio/tests/de_data/celeritas_data/128x256_PRebuffer128/test2.seq.Config.Metadata.xml b/rsciio/tests/de_data/celeritas_data/128x256_PRebuffer128/test2.seq.Config.Metadata.xml new file mode 100644 index 00000000..418fcb6d --- /dev/null +++ b/rsciio/tests/de_data/celeritas_data/128x256_PRebuffer128/test2.seq.Config.Metadata.xml @@ -0,0 +1,124 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/rsciio/tests/de_data/celeritas_data/128x256_PRebuffer128/test3.seq.Config.Metadata.xml b/rsciio/tests/de_data/celeritas_data/128x256_PRebuffer128/test3.seq.Config.Metadata.xml new file mode 100644 index 00000000..c49d86f0 --- /dev/null +++ b/rsciio/tests/de_data/celeritas_data/128x256_PRebuffer128/test3.seq.Config.Metadata.xml @@ -0,0 +1,104 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/rsciio/tests/de_data/celeritas_data/128x256_PRebuffer128/test_Bottom_14-04-59.396.seq b/rsciio/tests/de_data/celeritas_data/128x256_PRebuffer128/test_Bottom_14-04-59.396.seq new file mode 100644 index 00000000..a6de9274 Binary files /dev/null and b/rsciio/tests/de_data/celeritas_data/128x256_PRebuffer128/test_Bottom_14-04-59.396.seq differ diff --git a/rsciio/tests/de_data/celeritas_data/128x256_PRebuffer128/test_Bottom_14-04-59.396.seq.metadata b/rsciio/tests/de_data/celeritas_data/128x256_PRebuffer128/test_Bottom_14-04-59.396.seq.metadata new file mode 100644 index 00000000..ba11ccc2 Binary files /dev/null and b/rsciio/tests/de_data/celeritas_data/128x256_PRebuffer128/test_Bottom_14-04-59.396.seq.metadata differ diff --git a/rsciio/tests/de_data/celeritas_data/128x256_PRebuffer128/test_Top_14-04-59.355.seq b/rsciio/tests/de_data/celeritas_data/128x256_PRebuffer128/test_Top_14-04-59.355.seq new file mode 100644 index 00000000..77266843 Binary files /dev/null and b/rsciio/tests/de_data/celeritas_data/128x256_PRebuffer128/test_Top_14-04-59.355.seq differ diff --git a/rsciio/tests/de_data/celeritas_data/128x256_PRebuffer128/test_Top_14-04-59.355.seq.metadata b/rsciio/tests/de_data/celeritas_data/128x256_PRebuffer128/test_Top_14-04-59.355.seq.metadata new file mode 100644 index 00000000..ec2e35cd Binary files /dev/null and b/rsciio/tests/de_data/celeritas_data/128x256_PRebuffer128/test_Top_14-04-59.355.seq.metadata differ diff --git a/rsciio/tests/de_data/celeritas_data/256x256_Prebuffer1/Movie_00784.seq.gain.mrc b/rsciio/tests/de_data/celeritas_data/256x256_Prebuffer1/Movie_00784.seq.gain.mrc new file mode 100644 index 00000000..8eef682f Binary files /dev/null and b/rsciio/tests/de_data/celeritas_data/256x256_Prebuffer1/Movie_00784.seq.gain.mrc differ diff --git a/rsciio/tests/de_data/celeritas_data/256x256_Prebuffer1/Movie_00785.seq.Config.Metadata.xml b/rsciio/tests/de_data/celeritas_data/256x256_Prebuffer1/Movie_00785.seq.Config.Metadata.xml new file mode 100644 index 00000000..abff6e11 --- /dev/null +++ b/rsciio/tests/de_data/celeritas_data/256x256_Prebuffer1/Movie_00785.seq.Config.Metadata.xml @@ -0,0 +1,120 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/rsciio/tests/de_data/celeritas_data/256x256_Prebuffer1/Movie_00785.seq.Missing_Frame_Info.txt b/rsciio/tests/de_data/celeritas_data/256x256_Prebuffer1/Movie_00785.seq.Missing_Frame_Info.txt new file mode 100644 index 00000000..d98b17c8 --- /dev/null +++ b/rsciio/tests/de_data/celeritas_data/256x256_Prebuffer1/Movie_00785.seq.Missing_Frame_Info.txt @@ -0,0 +1 @@ +Frame number of missing frames: diff --git a/rsciio/tests/de_data/celeritas_data/256x256_Prebuffer1/Movie_00785.seq.dark.mrc b/rsciio/tests/de_data/celeritas_data/256x256_Prebuffer1/Movie_00785.seq.dark.mrc new file mode 100644 index 00000000..e739ee36 Binary files /dev/null and b/rsciio/tests/de_data/celeritas_data/256x256_Prebuffer1/Movie_00785.seq.dark.mrc differ diff --git a/rsciio/tests/de_data/celeritas_data/256x256_Prebuffer1/Movie_00785_Bottom_13-49-04.196.seq b/rsciio/tests/de_data/celeritas_data/256x256_Prebuffer1/Movie_00785_Bottom_13-49-04.196.seq new file mode 100644 index 00000000..0744577e Binary files /dev/null and b/rsciio/tests/de_data/celeritas_data/256x256_Prebuffer1/Movie_00785_Bottom_13-49-04.196.seq differ diff --git a/rsciio/tests/de_data/celeritas_data/256x256_Prebuffer1/Movie_00785_Bottom_13-49-04.196.seq.metadata b/rsciio/tests/de_data/celeritas_data/256x256_Prebuffer1/Movie_00785_Bottom_13-49-04.196.seq.metadata new file mode 100644 index 00000000..abf56887 Binary files /dev/null and b/rsciio/tests/de_data/celeritas_data/256x256_Prebuffer1/Movie_00785_Bottom_13-49-04.196.seq.metadata differ diff --git a/rsciio/tests/de_data/celeritas_data/256x256_Prebuffer1/Movie_00785_Top_13-49-04.160.seq b/rsciio/tests/de_data/celeritas_data/256x256_Prebuffer1/Movie_00785_Top_13-49-04.160.seq new file mode 100644 index 00000000..84387bb0 Binary files /dev/null and b/rsciio/tests/de_data/celeritas_data/256x256_Prebuffer1/Movie_00785_Top_13-49-04.160.seq differ diff --git a/rsciio/tests/de_data/celeritas_data/256x256_Prebuffer1/Movie_00785_Top_13-49-04.160.seq.metadata b/rsciio/tests/de_data/celeritas_data/256x256_Prebuffer1/Movie_00785_Top_13-49-04.160.seq.metadata new file mode 100644 index 00000000..846074ec Binary files /dev/null and b/rsciio/tests/de_data/celeritas_data/256x256_Prebuffer1/Movie_00785_Top_13-49-04.160.seq.metadata differ diff --git a/rsciio/tests/de_data/celeritas_data/64x64_Prebuffer256/test.seq.Config.Metadata.xml b/rsciio/tests/de_data/celeritas_data/64x64_Prebuffer256/test.seq.Config.Metadata.xml new file mode 100644 index 00000000..9b75e167 --- /dev/null +++ b/rsciio/tests/de_data/celeritas_data/64x64_Prebuffer256/test.seq.Config.Metadata.xml @@ -0,0 +1,120 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/rsciio/tests/de_data/celeritas_data/64x64_Prebuffer256/test.seq.Missing_Frame_Info.txt b/rsciio/tests/de_data/celeritas_data/64x64_Prebuffer256/test.seq.Missing_Frame_Info.txt new file mode 100644 index 00000000..d98b17c8 --- /dev/null +++ b/rsciio/tests/de_data/celeritas_data/64x64_Prebuffer256/test.seq.Missing_Frame_Info.txt @@ -0,0 +1 @@ +Frame number of missing frames: diff --git a/rsciio/tests/de_data/celeritas_data/64x64_Prebuffer256/test.seq.dark.mrc b/rsciio/tests/de_data/celeritas_data/64x64_Prebuffer256/test.seq.dark.mrc new file mode 100644 index 00000000..e007c06e Binary files /dev/null and b/rsciio/tests/de_data/celeritas_data/64x64_Prebuffer256/test.seq.dark.mrc differ diff --git a/rsciio/tests/de_data/celeritas_data/64x64_Prebuffer256/test_Bottom_14-13-42.822.seq b/rsciio/tests/de_data/celeritas_data/64x64_Prebuffer256/test_Bottom_14-13-42.822.seq new file mode 100644 index 00000000..9acaab0e Binary files /dev/null and b/rsciio/tests/de_data/celeritas_data/64x64_Prebuffer256/test_Bottom_14-13-42.822.seq differ diff --git a/rsciio/tests/de_data/celeritas_data/64x64_Prebuffer256/test_Bottom_14-13-42.822.seq.metadata b/rsciio/tests/de_data/celeritas_data/64x64_Prebuffer256/test_Bottom_14-13-42.822.seq.metadata new file mode 100644 index 00000000..2c635cca Binary files /dev/null and b/rsciio/tests/de_data/celeritas_data/64x64_Prebuffer256/test_Bottom_14-13-42.822.seq.metadata differ diff --git a/rsciio/tests/de_data/celeritas_data/64x64_Prebuffer256/test_Top_14-13-42.780.seq b/rsciio/tests/de_data/celeritas_data/64x64_Prebuffer256/test_Top_14-13-42.780.seq new file mode 100644 index 00000000..ad30af7f Binary files /dev/null and b/rsciio/tests/de_data/celeritas_data/64x64_Prebuffer256/test_Top_14-13-42.780.seq differ diff --git a/rsciio/tests/de_data/celeritas_data/64x64_Prebuffer256/test_Top_14-13-42.780.seq.metadata b/rsciio/tests/de_data/celeritas_data/64x64_Prebuffer256/test_Top_14-13-42.780.seq.metadata new file mode 100644 index 00000000..4df61f60 Binary files /dev/null and b/rsciio/tests/de_data/celeritas_data/64x64_Prebuffer256/test_Top_14-13-42.780.seq.metadata differ diff --git a/rsciio/tests/de_data/data/test.seq b/rsciio/tests/de_data/data/test.seq new file mode 100644 index 00000000..855042b0 Binary files /dev/null and b/rsciio/tests/de_data/data/test.seq differ diff --git a/rsciio/tests/de_data/data/test.seq.Config.Metadata.xml b/rsciio/tests/de_data/data/test.seq.Config.Metadata.xml new file mode 100644 index 00000000..20df5545 --- /dev/null +++ b/rsciio/tests/de_data/data/test.seq.Config.Metadata.xml @@ -0,0 +1,120 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/rsciio/tests/de_data/data/test.seq.Missing_Frame_Info.txt b/rsciio/tests/de_data/data/test.seq.Missing_Frame_Info.txt new file mode 100644 index 00000000..d98b17c8 --- /dev/null +++ b/rsciio/tests/de_data/data/test.seq.Missing_Frame_Info.txt @@ -0,0 +1 @@ +Frame number of missing frames: diff --git a/rsciio/tests/de_data/data/test.seq.dark.mrc b/rsciio/tests/de_data/data/test.seq.dark.mrc new file mode 100644 index 00000000..0ba4643f Binary files /dev/null and b/rsciio/tests/de_data/data/test.seq.dark.mrc differ diff --git a/rsciio/tests/de_data/data/test.seq.gain.mrc b/rsciio/tests/de_data/data/test.seq.gain.mrc new file mode 100644 index 00000000..25e8c7f3 Binary files /dev/null and b/rsciio/tests/de_data/data/test.seq.gain.mrc differ diff --git a/rsciio/tests/de_data/data/test.seq.metadata b/rsciio/tests/de_data/data/test.seq.metadata new file mode 100644 index 00000000..05915246 Binary files /dev/null and b/rsciio/tests/de_data/data/test.seq.metadata differ diff --git a/rsciio/tests/test_de.py b/rsciio/tests/test_de.py new file mode 100644 index 00000000..05ba97df --- /dev/null +++ b/rsciio/tests/test_de.py @@ -0,0 +1,283 @@ +# -*- coding: utf-8 -*- +# Copyright 2007-2022 The HyperSpy developers +# +# This file is part of RosettaSciIO. +# +# RosettaSciIO is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# RosettaSciIO is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with RosettaSciIO. If not, see . + +import dask.array +import pytest +import numpy as np +import os +from rsciio.de._api import SeqReader, CeleritasReader + +from rsciio.de import file_reader + +hs = pytest.importorskip("hyperspy.api", reason="hyperspy not installed") + +celeritas1_path = str( + os.path.join( + os.path.dirname(__file__), "de_data", "celeritas_data", "128x256_PRebuffer128" + ) +) +celeritas2_path = str( + os.path.join( + os.path.dirname(__file__), "de_data", "celeritas_data", "256x256_Prebuffer1" + ) +) +celeritas3_path = str( + os.path.join( + os.path.dirname(__file__), "de_data", "celeritas_data", "64x64_Prebuffer256" + ) +) + +data_path = os.path.join(os.path.dirname(__file__), "de_data", "data") + + +class TestShared: + @pytest.fixture + def seq(self): + return SeqReader( + file=data_path + "/test.seq", + dark=data_path + "/test.seq.dark.mrc", + gain=data_path + "/test.seq.gain.mrc", + metadata=data_path + "/test.seq.metadata", + xml=data_path + "/test.seq.se.xml", + ) + + def test_parse_header(self, seq): + header = seq._read_file_header() + assert header["ImageWidth"] == 64 + assert header["ImageHeight"] == 64 + assert header["ImageBitDepthReal"] == 12 + assert header["NumFrames"] == 10 + assert header["TrueImageSize"] == 16384 + np.testing.assert_almost_equal( + header["FPS"], 30, 1 + ) # Note this value wrong for Celeritas Camera + # Read from the xml file... Factor of the frame buffer off + + @pytest.mark.parametrize( + "metadata_file", + [ + None, + data_path + "/test.seq.metadata", + data_path + "/testd.seq.metadata", + ], + ) + def test_parse_metadata(self, seq, metadata_file): + seq.metadata_file = metadata_file + metadata = seq._read_metadata() + if metadata_file is None or metadata_file == data_path + "/testd.seq.metadata": + assert metadata is None + else: + assert isinstance(metadata, dict) + + def test_read_dark(self, seq): + dark, gain = seq._read_dark_gain() + assert dark.shape == (64, 64) + assert gain.shape == (64, 64) + + def test_read_ref_none(self, seq): + seq.gain = None + dark, gain = seq._read_dark_gain() + assert gain is None + + @pytest.mark.parametrize("nav_shape", [None, (5, 2), (5, 3)]) + @pytest.mark.parametrize("distributed", [True, False]) + @pytest.mark.parametrize("lazy", [True, False]) + def test_read(self, seq, nav_shape, distributed, lazy): + data = seq.read_data(navigation_shape=nav_shape) + data2 = seq.read_data( + navigation_shape=nav_shape, distributed=distributed, lazy=lazy + ) + if nav_shape is None: + nav_shape = (10,) + assert data["data"].shape == (*nav_shape, 64, 64) + np.testing.assert_array_equal(data["data"], data2["data"]) + + def test_file_reader(self, seq): + file = data_path + "/test.seq" + data_dict = file_reader(file) + data_dict2 = seq.read_data() + np.testing.assert_array_equal(data_dict[0]["data"], data_dict2["data"]) + + +class TestLoadCeleritas: + @pytest.fixture + def seq(self): + folder = celeritas1_path + kws = { + "file": folder + "/test.seq", + "top": folder + "/test_Top_14-04-59.355.seq", + "bottom": folder + "/test_Bottom_14-04-59.396.seq", + "dark": folder + "/test.seq.dark.mrc", + "gain": folder + "/test.seq.gain.mrc", + "xml": folder + "/test.seq.Config.Metadata.xml", + "metadata": folder + "/test_Top_14-04-59.355.seq.metadata", + } + return CeleritasReader(**kws) + + @pytest.fixture + def seq2(self): + folder = celeritas3_path + kws = { + "file": folder + "/test.seq", + "top": folder + "/test_Top_14-13-42.780.seq", + "bottom": folder + "/test_Bottom_14-13-42.822.seq", + "dark": folder + "/test.seq.dark.mrc", + "xml": folder + "/test.seq.Config.Metadata.xml", + "metadata": folder + "/test_Top_14-13-42.780.seq", + } + return CeleritasReader(**kws) + + def test_parse_header(self, seq): + print(seq.bottom) + header = seq._read_file_header() + assert header["ImageWidth"] == 256 + assert header["ImageHeight"] == 8192 + assert header["ImageBitDepthReal"] == 12 + assert header["NumFrames"] == 4 # this is wrong + assert header["TrueImageSize"] == 4202496 + np.testing.assert_almost_equal( + header["FPS"], 300, 1 + ) # This value is wrong for the celeritas camera + + def test_parse_metadata(self, seq): + print(seq.metadata_file) + header = seq._read_metadata() + print(header) + + def test_parse_xml(self, seq): + xml = seq._read_xml() + assert xml["FileInfo"]["ImageSizeX"]["Value"] == 256 + assert xml["FileInfo"]["ImageSizeY"]["Value"] == 128 + assert xml["FileInfo"]["FrameRate"]["Value"] == 40000 # correct FPS + assert xml["FileInfo"]["DarkRef"]["Value"] == "Yes" + assert xml["FileInfo"]["GainRef"]["Value"] == "Yes" + assert xml["FileInfo"]["SegmentPreBuffer"]["Value"] == 128 + assert not np.any(seq.metadata["Signal"]["BadPixels"]) + + def test_bad_pixels_xml(self, seq): + folder = celeritas1_path + seq.xml = folder + "/test2.seq.Config.Metadata.xml" + xml = seq._read_xml() + assert xml["FileInfo"]["ImageSizeX"]["Value"] == 256 + assert xml["FileInfo"]["ImageSizeY"]["Value"] == 128 + assert xml["FileInfo"]["FrameRate"]["Value"] == 40000 # correct FPS + assert xml["FileInfo"]["DarkRef"]["Value"] == "Yes" + assert xml["FileInfo"]["GainRef"]["Value"] == "Yes" + assert xml["FileInfo"]["SegmentPreBuffer"]["Value"] == 128 + data_dict = seq.read_data() + data_dict["data"][:, seq.metadata["Signal"]["BadPixels"]] = 0 + assert np.any(seq.metadata["Signal"]["BadPixels"]) + + def test_bad_pixel_failure(self, seq): + with pytest.raises(KeyError): + folder = celeritas1_path + seq.xml = folder + "/test3.seq.Config.Metadata.xml" + seq._read_xml() + + @pytest.mark.parametrize("nav_shape", [None, (5, 4), (5, 3)]) + @pytest.mark.parametrize("distributed", [True, False]) + @pytest.mark.parametrize("lazy", [True, False]) + def test_read(self, seq, nav_shape, distributed, lazy): + data_dict = seq.read_data( + navigation_shape=nav_shape, + ) + + data_dict2 = seq.read_data( + navigation_shape=nav_shape, lazy=lazy, distributed=distributed + ) + shape = (512, 128, 256) + if nav_shape is not None: + shape = nav_shape + shape[1:] + assert data_dict["data"].shape == shape + assert data_dict["axes"][-1]["size"] == data_dict["data"].shape[-1] + assert data_dict["axes"][-2]["size"] == data_dict["data"].shape[-2] + + np.testing.assert_array_equal(data_dict["data"], data_dict2["data"]) + + @pytest.mark.parametrize("chunks", [(2, 2, 128, 256), (4, 1, 128, 256)]) + @pytest.mark.parametrize("distributed", [True, False]) + def test_chunking(self, seq, chunks, distributed): + data_dict = seq.read_data( + navigation_shape=(4, 4), + lazy=True, + distributed=distributed, + chunks=chunks, + ) + assert isinstance(data_dict["data"], dask.array.Array) + assert data_dict["data"].shape == (4, 4, 128, 256) + chunk_sizes = tuple([c[0] for c in data_dict["data"].chunks[:2]]) + assert chunk_sizes == chunks[:2] + + @pytest.mark.parametrize( + "kwargs", + ( + {"filename": celeritas1_path + "/test_14-04-59.355.seq"}, + {"filename": None, "top": celeritas1_path + "/test_14-04-59.355.seq"}, + ), + ) + def test_file_loader_failures(self, kwargs): + with pytest.raises(ValueError): + file_reader( + **kwargs, + celeritas=True, + ) + + def test_load_top_bottom( + self, + ): + data_dict_top = file_reader( + celeritas1_path + "/test_Top_14-04-59.355.seq", + celeritas=True, + ) + data_dict_bottom = file_reader( + celeritas1_path + "/test_Bottom_14-04-59.396.seq", + celeritas=True, + ) + np.testing.assert_array_equal( + data_dict_top[0]["data"], data_dict_bottom[0]["data"] + ) + + def test_read_data_no_xml(self, seq2): + seq2.xml = None + seq2.read_data() + + def test_load_file(self): + data_dict = file_reader( + celeritas1_path + "/test_Top_14-04-59.355.seq", + celeritas=True, + ) + assert data_dict[0]["data"].shape == (512, 128, 256) + + def test_load_file2(self): + data_dict = file_reader( + celeritas2_path + "/Movie_00785_Top_13-49-04.160.seq", + celeritas=True, + ) + assert data_dict[0]["data"].shape == (5, 256, 256) + + def test_load_file3(self): + data_dict = file_reader( + celeritas3_path + "/test_Bottom_14-13-42.822.seq", + celeritas=True, + lazy=True, + ) + assert isinstance(data_dict[0]["data"], dask.array.Array) + assert data_dict[0]["data"].shape == (512, 64, 64) + + def test_hyperspy(self): + hs.load(celeritas3_path + "/test_Bottom_14-13-42.822.seq", celeritas=True) diff --git a/rsciio/tests/utils/test_utils.py b/rsciio/tests/utils/test_utils.py index d7154874..1cf1303f 100644 --- a/rsciio/tests/utils/test_utils.py +++ b/rsciio/tests/utils/test_utils.py @@ -3,7 +3,7 @@ import numpy as np import pytest -from rsciio.utils.tools import DTBox, dict2sarray +from rsciio.utils.tools import DTBox, dict2sarray, memmap_distributed, get_chunk_slice import rsciio.utils.date_time_tools as dtt dt = [("x", np.uint8), ("y", np.uint16), ("text", (bytes, 6))] @@ -32,6 +32,30 @@ def _get_example(date, time, time_zone=None): serial3 = 42563.95662037037 +class TestDistributedMemmapTools: + def test_get_chunk_slices(self): + chunked_slices, chunks = get_chunk_slice( + shape=(10, 20, 30), chunks=(10, 10, 10) + ) + assert chunks == ((10,), (10, 10), (10, 10, 10)) + assert chunked_slices.shape == (1, 2, 3) + + def test_distributed_memmap(self, tmp_path): + arr = np.random.random(size=(2, 10, 10)) + file = tmp_path / "test.bin" + new = np.memmap(file, dtype=float, mode="w+", offset=16, shape=(2, 10, 10)) + new[:] = arr + new.flush + + read_data = np.memmap(file, dtype=float, offset=16, shape=(2, 10, 10)) + np.testing.assert_array_equal(read_data, arr) + + distributed_data = memmap_distributed( + file, dtype=float, offset=16, shape=(2, 10, 10), chunks=(1, 5, 5), key=None + ) + np.testing.assert_array_equal(distributed_data, arr) + + def test_d2s_fail(): d = dict(x=5, y=10, text="abcdef") with pytest.raises(ValueError): diff --git a/rsciio/utils/tools.py b/rsciio/utils/tools.py index a9a28174..e8db23e2 100644 --- a/rsciio/utils/tools.py +++ b/rsciio/utils/tools.py @@ -25,6 +25,7 @@ from contextlib import contextmanager import importlib +import dask.array as da import numpy as np from box import Box from pint import UnitRegistry @@ -40,6 +41,203 @@ def dummy_context_manager(*args, **kwargs): yield +def seek_read(file, dtype, pos): + file.seek(pos) + data = np.squeeze(np.fromfile(file, dtype, count=1))[()] + return data + + +def read_binary_metadata(file, mapping_dict): + """This function reads binary metadata in a batch like process. + The mapping dict is passed as dictionary with a "key":[data,location]" + format. + """ + if file is None: + return None + try: + with open(file, mode="rb") as f: + metadata = { + m: seek_read(f, mapping_dict[m][0], mapping_dict[m][1]) + for m in mapping_dict + } + return metadata + except FileNotFoundError: + _logger.warning( + msg="File " + file + " not found. Please" + "move it to the same directory to read" + " the metadata " + ) + return None + + +def xml_branch(child): + new_dict = {} + if len(child) != 0: + for k in child.keys(): + try: + new_dict[k] = float(child.get(key=k)) + except ValueError: + new_dict[k] = child.get(key=k) + for c in child: + if c.tag not in new_dict: + new_dict[c.tag] = xml_branch(c) + else: + if isinstance(new_dict[c.tag], list): + new_dict[c.tag].append(xml_branch(c)) + else: + new_dict[c.tag] = [new_dict[c.tag], xml_branch(c)] + return new_dict + else: + new_dict = child.attrib + for key in new_dict: + try: + new_dict[key] = float(new_dict[key]) + except ValueError: + new_dict[key] = new_dict[key] + return new_dict + + +def parse_xml(file): + try: + tree = ET.parse(file) + xml_dict = xml_branch(tree.getroot()) + except FileNotFoundError: + _logger.warning( + msg="File " + file + " not found. Please" + "move it to the same directory to read" + " the metadata " + ) + return None + return xml_dict + + +def get_chunk_slice( + shape, + chunks="auto", + block_size_limit=None, + dtype=None, +): + chunks = da.core.normalize_chunks( + chunks=chunks, shape=shape, limit=block_size_limit, dtype=dtype + ) + chunks_shape = tuple([len(c) for c in chunks]) + slices = np.empty(shape=chunks_shape, dtype=object) + for ind in np.ndindex(chunks_shape): + current_chunk = [chunk[i] for i, chunk in zip(ind, chunks)] + starts = [int(np.sum(chunk[:i])) for i, chunk in zip(ind, chunks)] + stops = [s + c for s, c in zip(starts, current_chunk)] + slices[ind] = tuple([slice(start, stop) for start, stop in zip(starts, stops)]) + return da.from_array(slices, chunks=1), chunks + + +def slice_memmap(slice, file, dtypes, key=None, **kwargs): + slice = np.squeeze(slice)[()] + print(slice) + + data = np.memmap(file, dtypes, **kwargs) + if key is not None: + data = data[key] + return data[slice] + + +def memmap_distributed( + file, + dtype, + offset=0, + shape=None, + order="C", + chunks="auto", + block_size_limit=None, + key="Array", +): + """Drop in replacement for `np.memmap` allowing for distributed loading of data. + This always loads the data using dask which can be beneficial in many cases, but + may not be ideal in others. + + The chunks and block_size_limit are for describing an ideal chunk shape and size + as defined using the `da.core.normalize_chunks` function. + + Notes + ----- + Currently `da.map_blocks` does not allow for multiple outputs. As a result one + "Key" is allowed which can be used when the give dtpye has a keyed input. + For example: dtype = (("Array", int, (128,128)), + ("sec", "