Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add API to attach hdf5 attributes to datasets #1997

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions RELEASE_NOTES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ ARTIQ-9 (Unreleased)
* Python 3.12 support.
* Compiler can now give automatic suggestions for ``kernel_invariants``.
* Idle kernels now restart when written with ``artiq_coremgmt`` and stop when erased/removed from config.
* HDF5 attributes can be attached to datasets using ``set_dataset_metadata()``.

ARTIQ-8
-------
Expand Down
15 changes: 15 additions & 0 deletions artiq/examples/no_hardware/repository/hdf5_attributes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import numpy as np

from artiq.experiment import *


class HDF5Attributes(EnvExperiment):
"""Archive data to HDF5 with attributes"""

def run(self):
dummy = np.empty(20)
dummy.fill(np.nan)
self.set_dataset("dummy", dummy,
broadcast=True, archive=True)
self.set_dataset_metadata("dummy", "k1", "v1")
self.set_dataset_metadata("dummy", "k2", "v2")
15 changes: 15 additions & 0 deletions artiq/language/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,21 @@ def append_to_dataset(self, key, value):
efficiently as incremental modifications in broadcast mode."""
self.__dataset_mgr.append_to(key, value)

@rpc(flags={"async"})
def set_dataset_metadata(self, key, metadata_key, metadata_value):
"""Attach metadata to the dataset.

The metadata is saved as HDF5 attributes if there was a call to
``set_dataset(..., archive=True)`` with the same key.

:param key: The already existing dataset, to which you want to attach the metadata.
If absent, KeyError will be raised.
:param metadata_key: The metadata key of type string. If already exists, rewrites the metadata.
:param metadata_value: Value to be attached to ``metadata_key``. Can be any valid HDF5 datatype.
See HDF5 documentation for additional information.
"""
self.__dataset_mgr.set_metadata(key, metadata_key, metadata_value)

def get_dataset(self, key, default=NoDefault, archive=True):
"""Returns the contents of a dataset.

Expand Down
15 changes: 14 additions & 1 deletion artiq/master/worker_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ class DatasetManager:
def __init__(self, ddb):
self._broadcaster = Notifier(dict())
self.local = dict()
self.hdf5_attributes = dict()
self.archive = dict()
self.metadata = dict()

Expand All @@ -142,7 +143,7 @@ def set(self, key, value, metadata, broadcast, persist, archive):
self.local[key] = value
elif key in self.local:
del self.local[key]

self.metadata[key] = metadata

def _get_mutation_target(self, key):
Expand Down Expand Up @@ -184,12 +185,24 @@ def get_metadata(self, key):
return self.metadata[key]
return self.ddb.get_metadata(key)

def set_metadata(self, key, metadata_key, metadata_value):
if key not in self.local:
raise KeyError(f"Dataset '{key}' does not exist.")
if key not in self.hdf5_attributes:
self.hdf5_attributes[key] = dict()
self.hdf5_attributes[key][metadata_key] = metadata_value

def write_hdf5(self, f):
datasets_group = f.create_group("datasets")
for k, v in self.local.items():
m = self.metadata.get(k, {})
_write(datasets_group, k, v, m)

for k, attrs in self.hdf5_attributes.items():
assert k in datasets_group
for attr_k, attr_v in attrs.items():
datasets_group[k].attrs[attr_k] = attr_v

archive_group = f.create_group("archive")
for k, v in self.archive.items():
m = self.metadata.get(k, {})
Expand Down
59 changes: 59 additions & 0 deletions artiq/test/test_hdf5_attributes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import unittest
import io
import numpy as np
import h5py

from artiq.experiment import *
from artiq.test.hardware_testbench import ExperimentCase


class HDF5Attributes(EnvExperiment):
"""Archive data to HDF5 with attributes"""

def run(self):
# Attach attributes metadata to the HDF5 key
# The key should exist in the resulting HDF5 file (archive=True).
self.set_dataset("dummy", np.full(20, np.nan), broadcast=True, archive=True)
self.set_dataset_metadata("dummy", "k1", "v1")
self.set_dataset_metadata("dummy", "k2", "v2")


class TestHDF5Attributes(ExperimentCase):
def setUp(self):
super().setUp()
self.exp = self.execute(HDF5Attributes)
self.dump()

def dump(self):
self.bio = io.BytesIO()
with h5py.File(self.bio, "w") as f:
self.dataset_mgr.write_hdf5(f)

self.bio.seek(0)
self.h5file = h5py.File(self.bio, "r")
self.datasets = self.h5file.get("datasets")

def test_dataset_metadata(self):
self.assertEqual(self.datasets["dummy"].attrs, {"k1": "v1", "k2": "v2"})
self.assertTrue(np.all((self.datasets["dummy"], np.full(20, np.nan))))

def test_write_none(self):
with self.assertRaises(KeyError):
self.exp.set_dataset_metadata(None, "test", "none")
self.exp.set_dataset_metadata("dummy", None, "none")
with self.assertRaises(TypeError):
self.dump()

def test_write_absent(self):
with self.assertRaises(KeyError):
self.exp.set_dataset_metadata("absent", "test", "absent")

def test_rewrite(self):
self.exp.set_dataset_metadata("dummy", "k2", "rewrite")
self.dump()
self.assertEqual(self.datasets["dummy"].attrs, {"k1": "v1", "k2": "rewrite"})

def test_non_archive(self):
self.exp.set_dataset("non_archive", np.full(30, np.nan), broadcast=True, archive=False)
with self.assertRaises(KeyError):
self.exp.set_dataset_metadata("non_archive", "k1", "v1")