Skip to content

Commit

Permalink
Lazily load the data in the frame-cache format
Browse files Browse the repository at this point in the history
Previously, just instantiating the imageseries would cause the data to be
loaded. For some of the larger datasets from eiger, this could use a lot of
RAM and time.

However, in some cases, we want to instantiate an imageseries just as a
convenient (and somewhat standardized) way to look at things such as
metadata (such as omegas) and other information without actually needing
to load the data. This is actually something we do in the GUI to check the
omegas (we do it for all image series types, in fact, not just frame-cache).

This commit changes the class to wait to load the data until it is actually
requested. This makes loading it in the Simple Image Series loader in the
GUI (which is done to check metadata) *significantly* faster.

It still loads in all the data the first time any data is requested. Another
potentially useful change would be to just load in whatever frame was
requested and cache that frame in a dict (to be returned next time).
However, maybe that could be done in the future if we encounter a more
relevant use case for it.

Signed-off-by: Patrick Avery <[email protected]>
  • Loading branch information
psavery committed Aug 18, 2024
1 parent 433a0d5 commit e9017ee
Showing 1 changed file with 54 additions and 46 deletions.
100 changes: 54 additions & 46 deletions hexrd/imageseries/load/framecache.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,12 @@ def __init__(self, fname, style='npz', **kwargs):
*kwargs* - keyword arguments (none required)
"""
self._fname = fname
self._framelist = []
if style.lower() in ('yml', 'yaml', 'test'):
self._from_yml = True

Check warning on line 27 in hexrd/imageseries/load/framecache.py

View check run for this annotation

Codecov / codecov/patch

hexrd/imageseries/load/framecache.py#L27

Added line #L27 was not covered by tests
self._load_yml()
self._load_cache(from_yml=True)
else:
self._from_yml = False
self._load_cache()

def _load_yml(self):
Expand All @@ -38,62 +40,65 @@ def _load_yml(self):
self._dtype = np.dtype(datad['dtype'])
self._meta = yamlmeta(d['meta'], path=self._cache)

def _load_cache(self, from_yml=False):
def _load_cache(self):
arrs = np.load(self._fname)
# HACK: while the loaded npz file has a getitem method
# that mimicks a dict, it doesn't have a "pop" method.
# must make an empty dict to pop after assignment of
# class attributes so we can get to the metadata
keysd = dict.fromkeys(list(arrs.keys()))
self._nframes = int(arrs['nframes'])
self._shape = tuple(arrs['shape'])
# Check the type so we can read files written
# using Python 2.7
array_dtype = arrs['dtype'].dtype
# Python 3
if array_dtype.type == np.str_:
dtype_str = str(arrs['dtype'])

Check warning on line 57 in hexrd/imageseries/load/framecache.py

View check run for this annotation

Codecov / codecov/patch

hexrd/imageseries/load/framecache.py#L57

Added line #L57 was not covered by tests
# Python 2.7
else:
dtype_str = arrs['dtype'].tobytes().decode()
self._dtype = np.dtype(dtype_str)

keysd.pop('nframes')
keysd.pop('shape')
keysd.pop('dtype')
for i in range(self._nframes):
keysd.pop(f"{i}_row")
keysd.pop(f"{i}_col")
keysd.pop(f"{i}_data")

# all rmaining keys should be metadata
for key in keysd:
keysd[key] = arrs[key]
self._meta = keysd

def _load_framelist(self):
"""load into list of csr sparse matrices"""
self._framelist = []
if from_yml:
if self._from_yml:
bpath = os.path.dirname(self._fname)
if os.path.isabs(self._cache):
cachepath = self._cache
else:
cachepath = os.path.join(bpath, self._cache)
arrs = np.load(cachepath)

for i in range(self._nframes):
row = arrs["%d_row" % i]
col = arrs["%d_col" % i]
data = arrs["%d_data" % i]
frame = csr_matrix((data, (row, col)),
shape=self._shape, dtype=self._dtype)
self._framelist.append(frame)
else:
arrs = np.load(self._fname)
# HACK: while the loaded npz file has a getitem method
# that mimicks a dict, it doesn't have a "pop" method.
# must make an empty dict to pop after assignment of
# class attributes so we can get to the metadata
keysd = dict.fromkeys(list(arrs.keys()))
self._nframes = int(arrs['nframes'])
self._shape = tuple(arrs['shape'])
# Check the type so we can read files written
# using Python 2.7
array_dtype = arrs['dtype'].dtype
# Python 3
if array_dtype.type == np.str_:
dtype_str = str(arrs['dtype'])
# Python 2.7
else:
dtype_str = arrs['dtype'].tobytes().decode()
self._dtype = np.dtype(dtype_str)
keysd.pop('nframes')
keysd.pop('shape')
keysd.pop('dtype')
for i in range(self._nframes):
row = arrs["%d_row" % i]
col = arrs["%d_col" % i]
data = arrs["%d_data" % i]
keysd.pop("%d_row" % i)
keysd.pop("%d_col" % i)
keysd.pop("%d_data" % i)
frame = csr_matrix((data, (row, col)),
shape=self._shape,
dtype=self._dtype)
self._framelist.append(frame)
# all rmaining keys should be metadata
for key in keysd:
keysd[key] = arrs[key]
self._meta = keysd

for i in range(self._nframes):
row = arrs[f"{i}_row"]
col = arrs[f"{i}_col"]
data = arrs[f"{i}_data"]
frame = csr_matrix((data, (row, col)),
shape=self._shape,
dtype=self._dtype)
self._framelist.append(frame)

@property
def _framelist_was_loaded(self):
# Just assume that if the framelist is empty, it wasn't loaded...
return len(self._framelist) > 0

@property
def metadata(self):
Expand Down Expand Up @@ -127,6 +132,9 @@ def shape(self):
return self._shape

def __getitem__(self, key):
if not self._framelist_was_loaded:
# Load the framelist now
self._load_framelist()
return self._framelist[key].toarray()

def __iter__(self):
Expand Down

0 comments on commit e9017ee

Please sign in to comment.