Skip to content

Commit

Permalink
Revise metadata logic
Browse files Browse the repository at this point in the history
Raise on absent keys, raise on wrong types, do not allow metadata on dataset itself. Make tests more real-world-like.

Signed-off-by: Egor Savkin <[email protected]>
  • Loading branch information
thomasfire committed Nov 15, 2022
1 parent d204b97 commit 780398f
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 71 deletions.
14 changes: 3 additions & 11 deletions artiq/examples/no_hardware/repository/hdf5_attributes.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,11 @@

class HDF5Attributes(EnvExperiment):
"""Archive data to HDF5 with attributes"""
def run(self):
# Attach attributes to the HDF5 group `datasets`
self.set_dataset_metadata(None, {
"arr": np.array([1, 2, 3]),
"description": "demo",
})

def run(self):
dummy = np.empty(20)
dummy.fill(np.nan)
# `archive=True` is required in order to
# attach attributes to HDF5 datasets
self.set_dataset("dummy", dummy,
broadcast=True, archive=True)
self.set_dataset_metadata("dummy", {"k1": "v1", "k2": "v2"})

self.set_dataset_metadata("nothing", {"no": "op"})
self.set_dataset_metadata("dummy", "k1", "v1")
self.set_dataset_metadata("dummy", "k2", "v2")
35 changes: 21 additions & 14 deletions artiq/language/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from artiq.language import units
from artiq.language.core import rpc


__all__ = ["NoDefault", "DefaultMissing",
"PYONValue", "BooleanValue", "EnumerationValue",
"NumberValue", "StringValue",
Expand Down Expand Up @@ -50,6 +49,7 @@ def describe(self):

class PYONValue(_SimpleArgProcessor):
"""An argument that can be any PYON-serializable value."""

def __init__(self, default=NoDefault):
# Override the _SimpleArgProcessor init, as list defaults are valid
# PYON values
Expand All @@ -68,6 +68,7 @@ def describe(self):

class BooleanValue(_SimpleArgProcessor):
"""A boolean argument."""

def process(self, x):
if type(x) != bool:
raise ValueError("Invalid BooleanValue value")
Expand All @@ -81,6 +82,7 @@ class EnumerationValue(_SimpleArgProcessor):
:param choices: A list of string representing the possible values of the
argument.
"""

def __init__(self, choices, default=NoDefault):
self.choices = choices
super().__init__(default)
Expand Down Expand Up @@ -141,7 +143,7 @@ def __init__(self, default=NoDefault, unit="", scale=None,
raise KeyError("Unit {} is unknown, you must specify "
"the scale manually".format(unit))
if step is None:
step = scale/10.0
step = scale / 10.0
self.unit = unit
self.scale = scale
self.step = step
Expand Down Expand Up @@ -212,6 +214,7 @@ def get(self, key, processor, group, tooltip):
def check_unprocessed_arguments(self):
pass


class ProcessArgumentManager:
def __init__(self, unprocessed_arguments):
self.unprocessed_arguments = unprocessed_arguments
Expand All @@ -226,15 +229,17 @@ def get(self, key, processor, group, tooltip):
return r

def check_unprocessed_arguments(self):
unprocessed = set(self.unprocessed_arguments.keys()) -\
unprocessed = set(self.unprocessed_arguments.keys()) - \
self._processed_arguments
if unprocessed:
raise AttributeError("Invalid argument(s): " +
", ".join(unprocessed))


class HasEnvironment:
"""Provides methods to manage the environment of an experiment (arguments,
devices, datasets)."""

def __init__(self, managers_or_parent, *args, **kwargs):
self.children = []
if isinstance(managers_or_parent, tuple):
Expand Down Expand Up @@ -384,19 +389,19 @@ def append_to_dataset(self, key, value):
self.__dataset_mgr.append_to(key, value)

@rpc(flags={"async"})
def set_dataset_metadata(self, key, metadata):
"""Attach metadata to the dataset itself, or to a key in the dataset.
def set_dataset_metadata(self, key, metadata_key, metadata_value):
"""Attach metadata to the key in the dataset.
The metadata is saved as HDF5 attributes if there was a call to
``set_dataset(..., archive=True)`` with the same key.
:param key: If ``None``, attach the metadata to the dataset itself
(to the ``datasets`` group when saving to HDF5).
Otherwise, attach the metadata to the specified key.
:param metadata: Dict of key-value pair.
Must be compatible with HDF5 attributes.
:param key: Already existing key in the dataset, which will be the metadata attached to.
If absent, KeyError will be raised.
:param metadata_key: Key of the metadata of type string. If already exists, rewrites the metadata.
:param metadata_value: Value to be attached to metadata_key of any valid HDF5 datatype.
See https://docs.hdfgroup.org/hdf5/develop/group___h5_t.html for additional information.
"""
self.__dataset_mgr.set_metadata(key, metadata)
self.__dataset_mgr.set_metadata(key, metadata_key, metadata_value)

def get_dataset(self, key, default=NoDefault, archive=True):
"""Returns the contents of a dataset.
Expand Down Expand Up @@ -451,6 +456,7 @@ class Experiment:
Deriving from this class enables automatic experiment discovery in
Python modules.
"""

def prepare(self):
"""Entry point for pre-computing data necessary for running the
experiment.
Expand Down Expand Up @@ -496,6 +502,7 @@ class EnvExperiment(Experiment, HasEnvironment):
:class:`~artiq.language.environment.HasEnvironment` environment manager.
Most experiments should derive from this class."""

def prepare(self):
"""This default prepare method calls :meth:`~artiq.language.environment.Experiment.prepare`
for all children, in the order of registration, if the child has a
Expand All @@ -506,9 +513,9 @@ def prepare(self):
def is_experiment(o):
"""Checks if a Python object is a top-level experiment class."""
return (isclass(o)
and issubclass(o, Experiment)
and o is not Experiment
and o is not EnvExperiment)
and issubclass(o, Experiment)
and o is not Experiment
and o is not EnvExperiment)


def is_public_experiment(o):
Expand Down
38 changes: 16 additions & 22 deletions artiq/master/worker_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
from sipyco.sync_struct import Notifier
from sipyco.pc_rpc import AutoTarget, Client, BestEffortClient


logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -56,6 +55,7 @@ class DeviceError(Exception):
class DeviceManager:
"""Handles creation and destruction of local device drivers and controller
RPC clients."""

def __init__(self, ddb, virtual_devices=dict()):
self.ddb = ddb
self.virtual_devices = virtual_devices
Expand Down Expand Up @@ -155,7 +155,7 @@ def append_to(self, key, value):
def get(self, key, archive=False):
if key in self.local:
return self.local[key]

data = self.ddb.get(key)
if archive:
if key in self.archive:
Expand All @@ -164,13 +164,14 @@ def get(self, key, archive=False):
self.archive[key] = data
return data

def set_metadata(self, key, metadata):
if key:
if key not in self.local:
logger.warning(f"Key '{key}' not found in dataset.")
self.hdf5_attributes["datasets/" + key] = metadata
else:
self.hdf5_attributes["datasets"] = metadata
def set_metadata(self, key, metadata_key, metadata_value):
if not (isinstance(metadata_key, str) and isinstance(key, str)):
raise TypeError("both `key` and `metadata_key` should be of type `str`")
if key not in self.local:
raise KeyError(f"Key '{key}' not found in dataset.")
if key not in self.hdf5_attributes:
self.hdf5_attributes[key] = dict()
self.hdf5_attributes[key][metadata_key] = metadata_value

def write_hdf5(self, f):
datasets_group = f.create_group("datasets")
Expand All @@ -182,9 +183,13 @@ def write_hdf5(self, f):
_write(archive_group, k, v)

def write_hdf5_attributes(self, f):
datasets = f["datasets"]
for k, attrs in self.hdf5_attributes.items():
if k in f:
_write_attributes(f, k, attrs)
if k in datasets:
for attr_k, attr_v in attrs.items():
datasets[k].attrs[attr_k] = attr_v
else:
raise KeyError(f"Key '{k}' not found in `datasets` group.")


def _write(group, k, v):
Expand All @@ -195,14 +200,3 @@ def _write(group, k, v):
except TypeError as e:
raise TypeError("Error writing dataset '{}' of type '{}': {}".format(
k, type(v), e))


def _write_attributes(f, k, attrs):
# Add context to exception message when the user writes a attribute that is
# not representable in HDF5.
try:
for attr_k, attr_v in attrs.items():
f[k].attrs[attr_k] = attr_v
except TypeError as e:
raise TypeError("Error writing attribute '{}' of type '{}': {}".format(
attr_k, type(attr_v), e))
58 changes: 34 additions & 24 deletions artiq/test/test_hdf5_attributes.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import unittest
import io
import numpy as np
import h5py

from artiq.experiment import *
from artiq.test.hardware_testbench import ExperimentCase
Expand All @@ -10,40 +12,48 @@ class HDF5Attributes(EnvExperiment):

def run(self):
# Attach attributes to the HDF5 group `datasets`
self.set_dataset_metadata(None, {
"arr": np.array([1, 2, 3]),
"description": "demo",
})

# `archive=True` is required in order to
# attach attributes to HDF5 datasets
# The key should exist in result HDF5 file.
self.set_dataset("dummy", np.full(20, np.nan), broadcast=True, archive=True)
self.set_dataset_metadata("dummy", {"k1": "v1", "k2": "v2"})

self.set_dataset("no_archive", np.full(30, np.nan), broadcast=False, archive=False)
self.set_dataset_metadata("no_archive", {"na_k": "na_v"})

self.set_dataset_metadata("nothing", {"k": "v"})
self.set_dataset_metadata(None, {"general": "metadata"})
self.set_dataset_metadata("dummy", "k1", "v1")
self.set_dataset_metadata("dummy", "k2", "v2")


class TestHDF5Attributes(ExperimentCase):
def setUp(self):
super().setUp()
self.exp = self.execute(HDF5Attributes)
self.dump()

def dump(self):
self.bio = io.BytesIO()
with h5py.File(self.bio, "w") as f:
self.dataset_mgr.write_hdf5(f)
self.dataset_mgr.write_hdf5_attributes(f)

self.bio.seek(0)
self.h5file = h5py.File(self.bio, "r")
self.datasets = self.h5file.get("datasets")

def test_dataset_metadata(self):
self.assertNotEqual(self.dataset_mgr, None)
self.assertEqual(self.dataset_mgr.hdf5_attributes["datasets/dummy"], {"k1": "v1", "k2": "v2"})
self.assertTrue(np.all((self.dataset_mgr.local["dummy"], np.full(20, np.nan))))
self.assertEqual(self.datasets["dummy"].attrs, {"k1": "v1", "k2": "v2"})
self.assertTrue(np.all((self.datasets["dummy"], np.full(20, np.nan))))

def test_write_none(self):
with self.assertRaises(TypeError):
self.exp.set_dataset_metadata(None, "test", "none")
with self.assertRaises(TypeError):
self.exp.set_dataset_metadata("dummy", None, "none")

def test_absent_key_metadata(self):
self.assertEqual(self.dataset_mgr.hdf5_attributes["datasets/nothing"], {"k": "v"})
def test_write_absent(self):
with self.assertRaises(KeyError):
self.exp.set_dataset_metadata("absent", "test", "absent")

def test_none_key_metadata(self):
self.assertEqual(self.dataset_mgr.hdf5_attributes["datasets"], {"general": "metadata"})
def test_rewrite(self):
self.exp.set_dataset_metadata("dummy", "k2", "rewrite")
self.dump()
self.assertEqual(self.datasets["dummy"].attrs, {"k1": "v1", "k2": "rewrite"})

def test_no_archive(self):
self.assertEqual(self.dataset_mgr.hdf5_attributes["datasets/no_archive"], {"na_k": "na_v"})
def test_non_archive(self):
self.exp.set_dataset("non_archive", np.full(30, np.nan), broadcast=True, archive=False)
with self.assertRaises(KeyError):
_ = self.dataset_mgr.local["no_archive"]
self.exp.set_dataset_metadata("non_archive", "k1", "v1")

0 comments on commit 780398f

Please sign in to comment.