diff --git a/extra_data/components.py b/extra_data/components.py index bf44c347..c1511e39 100644 --- a/extra_data/components.py +++ b/extra_data/components.py @@ -38,7 +38,7 @@ def _guess_axes(data, train_pulse_ids, unstack_pulses): if unstack_pulses: # Separate train & pulse dimensions, and arrange dimensions # so that the data is contiguous in memory. - dim_order = ['train', 'pulse'] + dims[1:] + dim_order = train_pulse_ids.names + dims[1:] return arr.unstack('train_pulse').transpose(*dim_order) else: return arr @@ -239,7 +239,17 @@ def _select_pulse_indices(pulses, firsts, counts): return np.concatenate(positions) - def _get_module_pulse_data(self, source, key, pulses, unstack_pulses): + def _get_module_pulse_data(self, source, key, pulses, unstack_pulses, + inner_index='pulseId'): + def get_inner_ids(f, ix_name='pulseId'): + ids = f.file[f'/INSTRUMENT/{source}/{group}/{ix_name}'][ + data_slice + ] + # Raw files have a spurious extra dimension + if ids.ndim >= 2 and ids.shape[1] == 1: + ids = ids[:, 0] + return ids + seq_arrays = [] data_path = "/INSTRUMENT/{}/{}".format(source, key.replace('.', '/')) for f in self.data._source_index[source]: @@ -263,14 +273,13 @@ def _get_module_pulse_data(self, source, key, pulses, unstack_pulses): np.arange(first_tid, last_tid + 1, dtype=np.uint64), chunk_counts.astype(np.intp), ) - pulse_id = f.file['/INSTRUMENT/{}/{}/pulseId'.format(source, group)][ - data_slice - ] - # Raw files have a spurious extra dimension - if pulse_id.ndim >= 2 and pulse_id.shape[1] == 1: - pulse_id = pulse_id[:, 0] + inner_ids = get_inner_ids(f, inner_index) if isinstance(pulses, by_id): + if inner_index == 'pulseId': + pulse_id = inner_ids + else: + pulse_id = get_inner_ids(f, 'pulseId') positions = self._select_pulse_ids(pulses, pulse_id) else: # by_index positions = self._select_pulse_indices( @@ -278,9 +287,9 @@ def _get_module_pulse_data(self, source, key, pulses, unstack_pulses): ) trainids = trainids[positions] - pulse_id = pulse_id[positions] + inner_ids = inner_ids[positions] index = pd.MultiIndex.from_arrays( - [trainids, pulse_id], names=['train', 'pulse'] + [trainids, inner_ids], names=['train', inner_index[:-2]] ) if isinstance(positions, slice): @@ -332,8 +341,8 @@ def _fill_value(value, dtype): value = dtype.type(value) return value - def get_array(self, key, pulses=np.s_[:], unstack_pulses=True, - fill_value=None): + def get_array(self, key, pulses=np.s_[:], unstack_pulses=True, *, + fill_value=None, subtrain_index='pulseId'): """Get a labelled array of detector data Parameters @@ -350,7 +359,15 @@ def get_array(self, key, pulses=np.s_[:], unstack_pulses=True, fill_value: int or float, optional Value to use for missing values. If None (default) the fill value is 0 for integers and np.nan for floats. + subtrain_index: str + Specify 'pulseId' (default) or 'cellId' to label the frames recorded + within each train. Pulse ID should allow this data to be matched with + other devices, but depends on how the detector was manually configured + when the data was taken. Cell ID refers to the memory cell used for + that frame in the detector hardware. """ + if subtrain_index not in {'pulseId', 'cellId'}: + raise ValueError("subtrain_index must be 'pulseId' or 'cellId'") pulses = _check_pulse_selection(pulses) arrays = [] @@ -360,7 +377,8 @@ def get_array(self, key, pulses=np.s_[:], unstack_pulses=True, # If that changes, this check will need to change as well. if key.startswith('image.'): arrays.append(self._get_module_pulse_data( - source, key, pulses, unstack_pulses)) + source, key, pulses, unstack_pulses, subtrain_index, + )) else: arrays.append(self.data.get_array(source, key)) modnos.append(modno) diff --git a/extra_data/tests/mockdata/detectors.py b/extra_data/tests/mockdata/detectors.py index 2e2bbb4f..bec6124c 100644 --- a/extra_data/tests/mockdata/detectors.py +++ b/extra_data/tests/mockdata/detectors.py @@ -34,7 +34,6 @@ def write_control(self, f): def image_keys(self): if self.raw: return [ - ('cellId', 'u2', (1,)), ('data', 'u2', self.image_dims), ('length', 'u4', (1,)), ('status', 'u2', (1,)), @@ -42,7 +41,6 @@ def image_keys(self): else: return [ - ('cellId', 'u2', ()), ('data', 'f4', self.image_dims), ('mask', 'u4', self.image_dims), ('gain', 'u1', self.image_dims), @@ -114,6 +112,10 @@ def write_instrument(self, f): pid = f.create_dataset('INSTRUMENT/%s:xtdf/image/pulseId' % self.device_id, (nframes, 1), 'u8', maxshape=(None, 1)) pid[:, 0] = pid_index + + cid = f.create_dataset('INSTRUMENT/%s:xtdf/image/cellId' % self.device_id, + (nframes, 1), 'u2', maxshape=(None, 1)) + cid[:, 0] = pid_index # Cell IDs mirror pulse IDs for now else: # Corrected data drops the extra dimension, and maxshape==shape. f.create_dataset( @@ -126,6 +128,11 @@ def write_instrument(self, f): (nframes,), 'u8', chunks=True, data=pid_index ) + f.create_dataset( # Cell IDs mirror pulse IDs for now + 'INSTRUMENT/%s:xtdf/image/cellId' % self.device_id, + (nframes,), 'u2', chunks=True, data=pid_index + ) + max_len = None if self.raw else nframes for (key, datatype, dims) in self.image_keys: f.create_dataset('INSTRUMENT/%s:xtdf/image/%s' % (self.device_id, key), diff --git a/extra_data/tests/test_components.py b/extra_data/tests/test_components.py index 981d8c4a..ce292b5b 100644 --- a/extra_data/tests/test_components.py +++ b/extra_data/tests/test_components.py @@ -50,6 +50,18 @@ def test_get_array_pulse_id(mock_fxe_raw_run): assert list(arr.coords['pulse']) == [1, 7, 22, 23] +def test_get_array_with_cell_ids(mock_fxe_raw_run): + run = RunDirectory(mock_fxe_raw_run) + det = LPD1M(run.select_trains(by_index[:3])) + arr = det.get_array('image.data', subtrain_index='cellId') + assert arr.shape == (16, 3, 128, 256, 256) + assert arr.dims == ('module', 'train', 'cell', 'slow_scan', 'fast_scan') + + arr = det.get_array('image.data', pulses=by_id[0], subtrain_index='cellId') + assert arr.shape == (16, 3, 1, 256, 256) + assert (arr.coords['cell'] == 0).all() + + def test_get_array_pulse_indexes(mock_fxe_raw_run): run = RunDirectory(mock_fxe_raw_run) det = LPD1M(run.select_trains(by_index[:3]))