From 9f20ab96f3dafec416c986cc6e90960a58cc5e6b Mon Sep 17 00:00:00 2001 From: yuk Date: Mon, 17 Oct 2022 11:31:13 +0800 Subject: [PATCH] Put spaces back Cleanup grammar, styles and code Signed-off-by: Egor Savkin Revise metadata logic Raise on absent keys, raise on wrong types, do not allow metadata on dataset itself. Make tests more real-world-like. Signed-off-by: Egor Savkin Add HDF5 metadata unit tests Signed-off-by: Egor Savkin Add HDF5 attributes support to release notes Signed-off-by: yuk hdf5: Warn when attaching metadata to absent key Signed-off-by: yuk hdf5: Add demo for saving attributes Signed-off-by: yuk hdf5: Allow saving custom attributes Signed-off-by: yuk --- RELEASE_NOTES.rst | 3 +- .../no_hardware/repository/hdf5_attributes.py | 15 +++++ artiq/language/environment.py | 15 +++++ artiq/master/worker_db.py | 15 ++++- artiq/test/test_hdf5_attributes.py | 59 +++++++++++++++++++ 5 files changed, 105 insertions(+), 2 deletions(-) create mode 100644 artiq/examples/no_hardware/repository/hdf5_attributes.py create mode 100644 artiq/test/test_hdf5_attributes.py diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst index 9037d071b7..8518d51ef6 100644 --- a/RELEASE_NOTES.rst +++ b/RELEASE_NOTES.rst @@ -17,8 +17,9 @@ ARTIQ-9 (Unreleased) * Fastino monitoring with Moninj is now supported. * Qt6 support. * Python 3.12 support. -* Compiler can now give automatic suggestions for ``kernel_invariants``. +* Compiler can now give automatic suggestions for ``kernel_invariants``. * Idle kernels now restart when written with ``artiq_coremgmt`` and stop when erased/removed from config. +* HDF5 attributes can be attached to datasets using ``set_dataset_metadata()``. ARTIQ-8 ------- diff --git a/artiq/examples/no_hardware/repository/hdf5_attributes.py b/artiq/examples/no_hardware/repository/hdf5_attributes.py new file mode 100644 index 0000000000..a631f5afff --- /dev/null +++ b/artiq/examples/no_hardware/repository/hdf5_attributes.py @@ -0,0 +1,15 @@ +import numpy as np + +from artiq.experiment import * + + +class HDF5Attributes(EnvExperiment): + """Archive data to HDF5 with attributes""" + + def run(self): + dummy = np.empty(20) + dummy.fill(np.nan) + self.set_dataset("dummy", dummy, + broadcast=True, archive=True) + self.set_dataset_metadata("dummy", "k1", "v1") + self.set_dataset_metadata("dummy", "k2", "v2") diff --git a/artiq/language/environment.py b/artiq/language/environment.py index 12f8bbb12d..daab56c3d3 100644 --- a/artiq/language/environment.py +++ b/artiq/language/environment.py @@ -446,6 +446,21 @@ def append_to_dataset(self, key, value): efficiently as incremental modifications in broadcast mode.""" self.__dataset_mgr.append_to(key, value) + @rpc(flags={"async"}) + def set_dataset_metadata(self, key, metadata_key, metadata_value): + """Attach metadata to the dataset. + + The metadata is saved as HDF5 attributes if there was a call to + ``set_dataset(..., archive=True)`` with the same key. + + :param key: The already existing dataset, to which you want to attach the metadata. + If absent, KeyError will be raised. + :param metadata_key: The metadata key of type string. If already exists, rewrites the metadata. + :param metadata_value: Value to be attached to ``metadata_key``. Can be any valid HDF5 datatype. + See HDF5 documentation for additional information. + """ + self.__dataset_mgr.set_metadata(key, metadata_key, metadata_value) + def get_dataset(self, key, default=NoDefault, archive=True): """Returns the contents of a dataset. diff --git a/artiq/master/worker_db.py b/artiq/master/worker_db.py index af316e4cfe..393303abd2 100644 --- a/artiq/master/worker_db.py +++ b/artiq/master/worker_db.py @@ -120,6 +120,7 @@ class DatasetManager: def __init__(self, ddb): self._broadcaster = Notifier(dict()) self.local = dict() + self.hdf5_attributes = dict() self.archive = dict() self.metadata = dict() @@ -142,7 +143,7 @@ def set(self, key, value, metadata, broadcast, persist, archive): self.local[key] = value elif key in self.local: del self.local[key] - + self.metadata[key] = metadata def _get_mutation_target(self, key): @@ -184,12 +185,24 @@ def get_metadata(self, key): return self.metadata[key] return self.ddb.get_metadata(key) + def set_metadata(self, key, metadata_key, metadata_value): + if key not in self.local: + raise KeyError(f"Dataset '{key}' does not exist.") + if key not in self.hdf5_attributes: + self.hdf5_attributes[key] = dict() + self.hdf5_attributes[key][metadata_key] = metadata_value + def write_hdf5(self, f): datasets_group = f.create_group("datasets") for k, v in self.local.items(): m = self.metadata.get(k, {}) _write(datasets_group, k, v, m) + for k, attrs in self.hdf5_attributes.items(): + assert k in datasets_group + for attr_k, attr_v in attrs.items(): + datasets_group[k].attrs[attr_k] = attr_v + archive_group = f.create_group("archive") for k, v in self.archive.items(): m = self.metadata.get(k, {}) diff --git a/artiq/test/test_hdf5_attributes.py b/artiq/test/test_hdf5_attributes.py new file mode 100644 index 0000000000..e7c0f691b3 --- /dev/null +++ b/artiq/test/test_hdf5_attributes.py @@ -0,0 +1,59 @@ +import unittest +import io +import numpy as np +import h5py + +from artiq.experiment import * +from artiq.test.hardware_testbench import ExperimentCase + + +class HDF5Attributes(EnvExperiment): + """Archive data to HDF5 with attributes""" + + def run(self): + # Attach attributes metadata to the HDF5 key + # The key should exist in the resulting HDF5 file (archive=True). + self.set_dataset("dummy", np.full(20, np.nan), broadcast=True, archive=True) + self.set_dataset_metadata("dummy", "k1", "v1") + self.set_dataset_metadata("dummy", "k2", "v2") + + +class TestHDF5Attributes(ExperimentCase): + def setUp(self): + super().setUp() + self.exp = self.execute(HDF5Attributes) + self.dump() + + def dump(self): + self.bio = io.BytesIO() + with h5py.File(self.bio, "w") as f: + self.dataset_mgr.write_hdf5(f) + + self.bio.seek(0) + self.h5file = h5py.File(self.bio, "r") + self.datasets = self.h5file.get("datasets") + + def test_dataset_metadata(self): + self.assertEqual(self.datasets["dummy"].attrs, {"k1": "v1", "k2": "v2"}) + self.assertTrue(np.all((self.datasets["dummy"], np.full(20, np.nan)))) + + def test_write_none(self): + with self.assertRaises(KeyError): + self.exp.set_dataset_metadata(None, "test", "none") + self.exp.set_dataset_metadata("dummy", None, "none") + with self.assertRaises(TypeError): + self.dump() + + def test_write_absent(self): + with self.assertRaises(KeyError): + self.exp.set_dataset_metadata("absent", "test", "absent") + + def test_rewrite(self): + self.exp.set_dataset_metadata("dummy", "k2", "rewrite") + self.dump() + self.assertEqual(self.datasets["dummy"].attrs, {"k1": "v1", "k2": "rewrite"}) + + def test_non_archive(self): + self.exp.set_dataset("non_archive", np.full(30, np.nan), broadcast=True, archive=False) + with self.assertRaises(KeyError): + self.exp.set_dataset_metadata("non_archive", "k1", "v1")