From 0c2a6f65d0b7fe9bcb8f2e148e2a5289ae5b6ee3 Mon Sep 17 00:00:00 2001 From: "donald e. boyce" Date: Tue, 5 Mar 2024 14:19:53 -0500 Subject: [PATCH] developer docs, imageseries --- docs/source/dev/imageseries-load-options.rst | 140 +++++++++++++------ docs/source/dev/imageseries-overview.rst | 60 +++++--- docs/source/dev/imageseries.rst | 17 +-- hexrd/imageseries/load/rawimage.py | 29 ++-- 4 files changed, 157 insertions(+), 89 deletions(-) diff --git a/docs/source/dev/imageseries-load-options.rst b/docs/source/dev/imageseries-load-options.rst index 40d31ca64..528a4daf9 100644 --- a/docs/source/dev/imageseries-load-options.rst +++ b/docs/source/dev/imageseries-load-options.rst @@ -4,46 +4,6 @@ Keyword Options for imageseries ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Each type of imageseries has its own keyword options for loading and saving. -Image Files -+++++++++++++ - -The format name is ``image-files``. - -This is usually written by hand. It is a YAML-based format, so the options are -in the file, not passed as keyword arguments. The file defines a -list of image files. It could be a list of single images or a list of -multi-imagefiles. - -YAML keywords are: - -``image-files`` - dictionary defining the image files - - - ``directory``: the directory containing the images - - ``files``: the list of images; it is a space separated list of file - names or glob patterns - -``empty-frames`` - (optional) number of frames to skip at the beginning of - each multiframe file; this is a commonly used option - -``max-total-frames`` - (optional) the maximum number of frames in the imageseries; this option - might be used for testing the data on a small number of frames - -``max-file-frames`` - (optional) the maximum number of frames to read per file; this would - be unusual - -``metadata`` - (required) it usually contains array data or string, but it can be empty - -There is actually no write function for this type of imageseries. It is -usually used to load image data to be sparsed and saved in another (usually -frame-cache) format. - - - HDF5 ++++++++++ @@ -81,7 +41,7 @@ The format name is ``frame-cache``. A better name might be sparse matrix format because the images are stored as sparse matrices in numpy npz file. There are actually two forms of the frame-cache. The original is a YAML-based format, which is now deprecated. -The other format is a single .npz file that includes array data and metadata. +The current format is a single .npz file that includes array data and metadata. **On Write.** @@ -96,3 +56,101 @@ The other format is a single .npz file that includes array data and metadata. **On Open.** No options are available on open. + + +Image Files ++++++++++++++ + +The format name is ``image-files``. + +**On Write.** + +There is actually no write function for this type of imageseries. It is +usually used to load image data to be sparsed and saved in another (usually +*frame-cache*) format. + + +**On Open.** + +This is usually written by hand. It is a YAML-based format, so the options are +in the file, not passed as keyword arguments. The file defines a +list of image files. It could be a list of single images or a list of +multi-imagefiles. + +YAML keywords are: + +``image-files`` + dictionary defining the image files + + - ``directory``: the directory containing the images + - ``files``: the list of images; it is a space separated list of file + names or glob patterns + +``empty-frames`` + (optional) number of frames to skip at the beginning of + each multiframe file; this is a commonly used option + +``max-total-frames`` + (optional) the maximum number of frames in the imageseries; this option + might be used for testing the data on a small number of frames + +``max-file-frames`` + (optional) the maximum number of frames to read per file; this would + be unusual + +``metadata`` + (required) it usually contains array data or string, but it can be empty + + +Raw Image +++++++++++++++++++++ +The format name is ``raw-image``. + +**On Write.** + +Like the *image-files* format, there is no writer for this format. + +**On Open.** + +There is another YAML based format. + +YAML keywords are: + +``filename`` + name of the data file +``scalar`` + This defines the scalar details. + + - ``type``: can be "i", "f", "d", or "b" for integer, float, double or bool + - ``bytes``: 1, 2, 4, or 8, for integer types only + - ``signed``: true or false + - ``endian``: can be ``big`` or ``little`` + +``shape`` + 2-tuple of ints + +``skip`` + number of bytes to skip at the beginning of the file (in the header) + +Here is an example that describes the GE format: + +.. code-block:: YAML + + # + # YAML example for raw image + # + # For scalar definition: + # "type": i -> int, f -> float, d -> double, b -> bool + # "bytes" and "signed" are only for int types + # "bytes": 1, 2, 4, or 8 + # "signed": true | false + # "endian": use sys.byteorder to determine value for local system + # + filename: RUBY_4537.ge + shape: 2048 2048 + skip: 8192 + scalar: + type: i + bytes: 2 + signed: false + endian: little diff --git a/docs/source/dev/imageseries-overview.rst b/docs/source/dev/imageseries-overview.rst index 2004bcf4a..859135c95 100644 --- a/docs/source/dev/imageseries-overview.rst +++ b/docs/source/dev/imageseries-overview.rst @@ -20,18 +20,30 @@ The imageseries package has two main functions: open and save. The format refers to the source of the images; file and kwargs depend on the format. Possible formats currently are: -* ``hdf5`` - the images are stored in an HDF5 file and loaded on demand. +``hdf5`` + The images are stored in an HDF5 file and loaded on demand. ``file`` is the name of the HDF5 file. -* ``frame-cache`` - the images are stored sparse matrices in a numpy .npz + +``frame-cache`` + The images are stored sparse matrices in a numpy .npz file; all of the sparse arrays are loaded on open, and a full (not sparse) array is delivered on request for a frame. There are two ways this can be done. In one, ``file`` is the name of the npz, and metadata is stored in the npz file. In the other, ``file`` is a YAML file that includes the name of the npz file as well as the metadata. -* ``image-files`` - the images are stored as one or more regular image files on + +``image-files`` + The images are stored as one or more regular image files on the file system. ``file`` is a YAML file describing listing a sequence of image files and metadata. -* ``array`` - images are stored as a 3D numpy array; used for testing. + +``raw-image`` + This is for nonstandard or less common image formats that do not load with + `fabio `. In that case, you can define + your own data format. + +``array`` + images are stored as a 3D numpy array; used for testing. See also :ref:`keyword-options`. @@ -44,35 +56,43 @@ frame is requested, the processed imageseries gets the frame from the original image series and applies the operations in order. It can then be saved as a regular imageseries and loaded as usual. -For more detail, see [processed imageseries](https://github.com/donald-e-boyce/hexrd/wiki/processed-ims). +For more detail, see :ref:`processed-ims`. **Interface.** -The imageseries provides a standard interface, somewhat like a 3D array. +The *imageseries* provides a standard interface for accessing images, +somewhat like a 3D array. Note that indexing does not work for slices or +multiple indices. -If `ims` is an imageseries instance: -* `len(ims)` is the number of frames -* `ims[j]` returns the j'th frame -* `ims.shape` is the shape of each frame -* `ims.dtype` is the numpy.dtype of each frame -* `ims.metadata` is a dictionary of metadata +If ``ims`` is an imageseries instance: + +* ``len(ims)`` is the number of frames +* ``ims[j]`` returns the j'th frame +* ``ims.shape`` is the shape of each frame +* ``ims.dtype`` is the numpy.dtype of each frame +* ``ims.metadata`` is a dictionary of metadata **Stats module.** This module delivers pixel by pixel stats on the imageseries. Functions are: -* `max(ims, nframes=0)` gives a single image that is the max over all frames -* `average(ims, nframes=0)` gives the mean pixel value over all the frames -* `median(ims, nframes=0)` gives median -* `percentile(ims, pct, nframes=0)` gives the percentile over all frames +* ``max(ims, nframes=0)`` gives a single image that is the max over all frames + or a subset +* ``average(ims, nframes=0)`` gives the mean pixel value over all the frames + or a subset +* ``median(ims, nframes=0)`` gives median + or a subset +* ``percentile(ims, pct, nframes=0)`` gives the percentile over all frames + or a subset The median is typically used to generate background images, but percentile could also be used too. **Omega module.** -For the hexrd work, we usually have a sequence of rotations about the vertical axis. Omega refers to the angle of rotation. The OmegaImageSeries is a subclass that has metadata for the rotation angles. +For the HEDM work, we usually have a sequence of rotations about the vertical +axis. Omega refers to the angle of rotation. The ``OmegaImageSeries`` is a +subclass that has metadata for the rotation angles. -See [omega](https://github.com/donald-e-boyce/hexrd/wiki/imageseries-omega). +See :ref:`omega`. .. include:: imageseries-usage.rst - .. include:: imageseries-load-options.rst - .. include:: imageseries-processed.rst +.. include:: imageseries-omega.rst diff --git a/docs/source/dev/imageseries.rst b/docs/source/dev/imageseries.rst index 3c29db352..d4d18baa0 100644 --- a/docs/source/dev/imageseries.rst +++ b/docs/source/dev/imageseries.rst @@ -1,17 +1,2 @@ imageseries package -=============== -The *imageseries* package provides a standard API for accessing image-based data sets. The primary tool in the package is the ImageSeries class. It's interface is analagous to a list of images with associated image metadata. The number of images is given by the len() function. Properties are defined for image shape (shape), data type (dtype) and metadata (metadata). Individual images are accessed by standard subscripting (e.g. image[i]). - -The package contains interfaces for loading (load) and saving (save) imageseries. Images can be loaded in three formats: 'array', 'hdf5' and 'frame-cache'. The 'array' format takes the images from a 3D numpy array. With 'hdf5', images are stored in hdf5 file and accessed on demand. The 'frame-cache' is a list of sparse matrices, useful for thresholded images. An imageseries can be saved in 'hdf5' or 'frame-cache' format. - -The imageseries package also contains a module for modifying the images (process). The process module provides the ProcessedImageSeries class, which takes a given imageseries and produces a new one by modifying the images. It has certain built-in image operations including transposition, flipping, dark subtraction and restriction to a subset. - - -Metadata ----------------- - -The metadata property is generically a dictionary. The actual contents depends on the application. For common hexrd applications in which the specimen is rotated while being exposed to x-rays, the metadata has an 'omega' key with an associated value being an nx2 numpy array where n is the number of frames and the two associated values give the omega (rotation) range for that frame. - -Reader Refactor -------------- -While the imageseries package is in itself indpendent of hexrd, it was used as the basis of a refactoring of the reader classes originally found in the detector module. The main reader class was ReadGE. In the refactored code, the reader classes are now in their own module, image_io, but imported into detector to preserve the interface. The image_io module contains a generic OmegaImageSeries class for working with imageseries having omega metadata. The refactored ReadGE class simply uses the OmegaImageSeries class to provide the same methods as the old class. New code should use the OmegaImageSeries (or the standard ImageSeries) class directly. +=================== diff --git a/hexrd/imageseries/load/rawimage.py b/hexrd/imageseries/load/rawimage.py index 8a3c3798c..a272ea1b8 100644 --- a/hexrd/imageseries/load/rawimage.py +++ b/hexrd/imageseries/load/rawimage.py @@ -10,18 +10,17 @@ class RawImageSeriesAdapter(ImageSeriesAdapter): - """collection of images in HDF5 format""" + """Image Data in Custom Format + + Parameters + ---------- + fname: string or Path + name of input YAML file describing the format + """ format = 'raw-image' def __init__(self, fname, **kwargs): - """Image data in custom format - - Parameters - ---------- - fname: string or Path - name of input YAML file describing the format - """ self.fname = fname with open(fname, "r") as f: y = yaml.safe_load(f) @@ -72,10 +71,16 @@ def _get_length(self): def typechars(numtype, bytes_=4, signed=False, little=True): """Return byte-type for data type and endianness - numtype (str) - "i", "f", "d", "b" for int, float, double or bool - bytes - number of bytes: 1,2,4, or 8 for ints only - signed (bool) - true for signed ints, false for unsigned - little (bool) - true for little endian + Parameters + ---------- + numtype: str {"i", "f", "d", "b"} + scalar type for int, float, double or bool + bytes: int + number of bytes: 1,2,4, or 8 (for ints only) + signed: bool + True for signed ints, False for unsigned + little: bool + True for little endian """ intbytes = { 1: "b",