Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support track_order param for Files #190

Merged
merged 1 commit into from
May 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 40 additions & 2 deletions h5pyd/_hl/attrs.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,16 +342,24 @@ def __len__(self):
def __iter__(self):
""" Iterate over the names of attributes. """
if self._objdb_attributes is not None:
if self._parent._track_order:
attrs = sorted(self._objdb_attributes.items(), key=lambda x: x[1]['created'])
else:
attrs = sorted(self._objdb_attributes.items())

ordered_attrs = {}
for a in attrs:
ordered_attrs[a[0]] = a[1]

for name in self._objdb_attributes:
for name in ordered_attrs:
yield name

else:
# make server request
req = self._req_prefix
# backup over the trailing slash in req
req = req[:-1]
rsp = self._parent.GET(req)
rsp = self._parent.GET(req, params={"CreateOrder": "1" if self._parent._track_order else "0"})
attributes = rsp['attributes']

attrlist = []
Expand Down Expand Up @@ -383,3 +391,33 @@ def __repr__(self):
if not self._parent.id.id:
return "<Attributes of closed HDF5 object>"
return f"<Attributes of HDF5 object at {id(self._parent.id)}>"

def __reversed__(self):
""" Iterate over the names of attributes in reverse order. """
if self._objdb_attributes is not None:
if self._parent._track_order:
attrs = sorted(self._objdb_attributes.items(), key=lambda x: x[1]['created'])
else:
attrs = sorted(self._objdb_attributes.items())

ordered_attrs = {}
for a in attrs:
ordered_attrs[a[0]] = a[1]

for name in reversed(ordered_attrs):
yield name

else:
# make server request
req = self._req_prefix
# backup over the trailing slash in req
req = req[:-1]
rsp = self._parent.GET(req, params={"CreateOrder": "1" if self._parent._track_order else "0"})
attributes = rsp['attributes']

attrlist = []
for attr in attributes:
attrlist.append(attr['name'])

for name in reversed(attrlist):
yield name
3 changes: 2 additions & 1 deletion h5pyd/_hl/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -717,7 +717,7 @@ def allocated_size(self):
self._getVerboseInfo()
return self._allocated_size

def __init__(self, bind):
def __init__(self, bind, track_order=False):
"""Create a new Dataset object by binding to a low-level DatasetID."""

if not isinstance(bind, DatasetID):
Expand All @@ -732,6 +732,7 @@ def __init__(self, bind):
# make a numpy dtype out of the type json
self._dtype = createDataType(self.id.type_json)
self._item_size = getItemSize(self.id.type_json)
self._track_order = track_order

self._shape = self.get_shape()

Expand Down
12 changes: 10 additions & 2 deletions h5pyd/_hl/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ def __init__(
logger=None,
owner=None,
linked_domain=None,
track_order=False,
retries=10,
timeout=180,
**kwds,
Expand Down Expand Up @@ -155,6 +156,10 @@ def __init__(
by admin users
linked_domain
Create new domain using the root of the linked domain
track_order
Whether to track dataset/group/attribute creation order within this file. Objects will be iterated
in ascending creation order if this is enabled, otherwise in ascending alphanumeric order.

retries
Number of retry attempts to be used if a server request fails
timeout
Expand Down Expand Up @@ -270,6 +275,8 @@ def __init__(
if bucket:
params["bucket"] = bucket

params["CreateOrder"] = "1" if track_order else "0"

# need some special logic for the first request in local mode
# to give the sockets time to initialize

Expand Down Expand Up @@ -393,8 +400,9 @@ def __init__(
self._verboseUpdated = None # when the verbose data was fetched
self._lastScan = None # when summary stats where last updated by server
self._dn_ids = dn_ids
self._track_order = track_order

Group.__init__(self, self._id)
Group.__init__(self, self._id, track_order=track_order)

def _getVerboseInfo(self):
now = time.time()
Expand All @@ -403,7 +411,7 @@ def _getVerboseInfo(self):
):
# resynch the verbose data
req = "/?verbose=1"
rsp_json = self.GET(req, use_cache=False)
rsp_json = self.GET(req, use_cache=False, params={"CreateOrder": "1" if self._track_order else "0"})

self.log.debug("get verbose info: {}".format(rsp_json))
props = {}
Expand Down
88 changes: 65 additions & 23 deletions h5pyd/_hl/group.py
mattjala marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ class Group(HLObject, MutableMappingHDF5):
""" Represents an HDF5 group.
"""

def __init__(self, bind, **kwargs):
def __init__(self, bind, track_order=False, **kwargs):
# print "group init, bind:", bind

""" Create a new Group object by binding to a low-level GroupID.
Expand All @@ -58,6 +58,7 @@ def __init__(self, bind, **kwargs):
if not isinstance(bind, GroupID):
raise ValueError(f"{bind} is not a GroupID")
HLObject.__init__(self, bind, **kwargs)
self._track_order = track_order
self._req_prefix = "/groups/" + self.id.uuid
self._link_db = {} # cache for links

Expand Down Expand Up @@ -149,7 +150,7 @@ def _get_link_json(self, h5path):
req = "/groups/" + parent_uuid + "/links/" + name

try:
rsp_json = self.GET(req)
rsp_json = self.GET(req, params={"CreateOrder": "1" if self._track_order else "0"})
except IOError:
raise KeyError("Unable to open object (Component not found)")

Expand Down Expand Up @@ -181,7 +182,7 @@ def _get_objdb_links(self):
group_json = objdb[self.id.id]
return group_json["links"]

def create_group(self, h5path):
def create_group(self, h5path, track_order=False):
""" Create and return a new subgroup.

Name may be absolute or relative. Fails if the target name already
Expand Down Expand Up @@ -237,6 +238,7 @@ def create_group(self, h5path):
parent_name = parent_name + '/' + link
self.log.debug("create group - parent name: {}".format(parent_name))
sub_group._name = parent_name
sub_group._track_order = track_order
parent_uuid = sub_group.id.id
else:
# sub-group already exsits
Expand All @@ -258,6 +260,7 @@ def create_group(self, h5path):
if sub_group is None:
# didn't actually create anything
raise ValueError("name already exists")

return sub_group

def create_dataset(self, name, shape=None, dtype=None, data=None, **kwds):
Expand Down Expand Up @@ -547,7 +550,7 @@ def require_group(self, name):
raise TypeError(f"Incompatible object ({grp.__class__.__name__}) already exists")
return grp

def getObjByUuid(self, uuid, collection_type=None):
def getObjByUuid(self, uuid, collection_type=None, track_order=False):
""" Utility method to get an obj based on collection type and uuid """
self.log.debug(f"getObjByUuid({uuid})")
obj_json = None
Expand Down Expand Up @@ -582,10 +585,10 @@ def getObjByUuid(self, uuid, collection_type=None):
# will need to get JSON from server
req = f"/{collection_type}/{uuid}"
# make server request
obj_json = self.GET(req)
obj_json = self.GET(req, params={"CreateOrder": "1" if track_order else "0"})

if collection_type == 'groups':
tgt = Group(GroupID(self, obj_json))
tgt = Group(GroupID(self, obj_json), track_order=track_order)
elif collection_type == 'datatypes':
tgt = Datatype(TypeID(self, obj_json))
elif collection_type == 'datasets':
Expand All @@ -595,13 +598,13 @@ def getObjByUuid(self, uuid, collection_type=None):
if "dims" in shape_json and len(shape_json["dims"]) == 1 and dtype_json["class"] == 'H5T_COMPOUND':
tgt = Table(DatasetID(self, obj_json))
else:
tgt = Dataset(DatasetID(self, obj_json))
tgt = Dataset(DatasetID(self, obj_json), track_order=track_order)
else:
raise IOError(f"Unexpected collection_type: {collection_type}")

return tgt

def __getitem__(self, name):
def __getitem__(self, name, track_order=False):
""" Open an object in the file """
# convert bytes to str for PY3
if isinstance(name, bytes):
Expand All @@ -614,11 +617,11 @@ def __getitem__(self, name):
if tgt is not None:
return tgt # ref'd object has not been deleted
if isinstance(name.id, GroupID):
tgt = self.getObjByUuid(name.id.uuid, collection_type="groups")
tgt = self.getObjByUuid(name.id.uuid, collection_type="groups", track_order=track_order)
elif isinstance(name.id, DatasetID):
tgt = self.getObjByUuid(name.id.uuid, collection_type="datasets")
tgt = self.getObjByUuid(name.id.uuid, collection_type="datasets", track_order=track_order)
elif isinstance(name.id, TypeID):
tgt = self.getObjByUuid(name.id.uuid, collection_type="datasets")
tgt = self.getObjByUuid(name.id.uuid, collection_type="datasets", track_order=track_order)
else:
raise IOError("Unexpected Error - ObjectID type: " + name.__class__.__name__)
return tgt
Expand All @@ -631,11 +634,11 @@ def __getitem__(self, name):
link_class = link_json['class']

if link_class == 'H5L_TYPE_HARD':
tgt = self.getObjByUuid(link_json['id'], collection_type=link_json['collection'])
tgt = self.getObjByUuid(link_json['id'], collection_type=link_json['collection'], track_order=track_order)
elif link_class == 'H5L_TYPE_SOFT':
h5path = link_json['h5path']
soft_parent_uuid, soft_json = self._get_link_json(h5path)
tgt = self.getObjByUuid(soft_json['id'], collection_type=soft_json['collection'])
tgt = self.getObjByUuid(soft_json['id'], collection_type=soft_json['collection'], track_order=track_order)

elif link_class == 'H5L_TYPE_EXTERNAL':
# try to get a handle to the file and return the linked object...
Expand All @@ -651,7 +654,8 @@ def __getitem__(self, name):
endpoint = self.id.http_conn.endpoint
username = self.id.http_conn.username
password = self.id.http_conn.password
f = File(external_domain, endpoint=endpoint, username=username, password=password, mode='r')
f = File(external_domain, endpoint=endpoint, username=username, password=password, mode='r',
track_order=track_order)
except IOError:
# unable to find external link
raise KeyError("Unable to open file: " + link_json['h5domain'])
Expand All @@ -675,7 +679,7 @@ def __getitem__(self, name):
tgt._name = name
return tgt

def get(self, name, default=None, getclass=False, getlink=False):
def get(self, name, default=None, getclass=False, getlink=False, track_order=False):
""" Retrieve an item or other information.

"name" given only:
Expand All @@ -699,18 +703,17 @@ def get(self, name, default=None, getclass=False, getlink=False):
>>> if cls == SoftLink:
... print '"foo" is a soft link!'
"""

if not (getclass or getlink):
try:
return self[name]
return self.__getitem__(name, track_order)
except KeyError:
return default

if name not in self:
return default

elif getclass and not getlink:
obj = self.__getitem__(name)
obj = self.__getitem__(name, track_order)
if obj is None:
return None
if obj.id.__class__ is GroupID:
Expand Down Expand Up @@ -777,7 +780,7 @@ def __setitem__(self, name, obj):
raise IOError("cannot create subgroup of softlink")
parent_uuid = link_json["id"]
req = "/groups/" + parent_uuid
group_json = self.GET(req)
group_json = self.GET(req, params={"CreateOrder": "1" if self._track_order else "0"})
tgt = Group(GroupID(self, group_json))
tgt[basename] = obj

Expand Down Expand Up @@ -867,7 +870,7 @@ def __len__(self):
return len(links_json)

req = "/groups/" + self.id.uuid
rsp_json = self.GET(req)
rsp_json = self.GET(req, params={"CreateOrder": "1" if self._track_order else "0"})
return rsp_json['linkCount']

def __iter__(self):
Expand All @@ -876,7 +879,7 @@ def __iter__(self):

if links is None:
req = "/groups/" + self.id.uuid + "/links"
rsp_json = self.GET(req)
rsp_json = self.GET(req, params={"CreateOrder": "1" if self._track_order else "0"})
links = rsp_json['links']

# reset the link cache
Expand All @@ -888,7 +891,16 @@ def __iter__(self):
for x in links:
yield x['title']
else:
for name in links:
if self._track_order:
links = sorted(links.items(), key=lambda x: x[1]['created'])
else:
links = sorted(links.items())

ordered_links = {}
for link in links:
ordered_links[link[0]] = link[1]

for name in ordered_links:
yield name

def __contains__(self, name):
Expand Down Expand Up @@ -1092,7 +1104,7 @@ def visititems(self, func):
else:
# request from server
req = "/groups/" + parent.id.uuid + "/links"
rsp_json = self.GET(req)
rsp_json = self.GET(req, params={"CreateOrder": "1" if self._track_order else "0"})
links = rsp_json['links']
for link in links:
obj = None
Expand Down Expand Up @@ -1137,6 +1149,36 @@ def __repr__(self):
r = f'<HDF5 group {namestr} ({len(self)} members)>'
return r

def __reversed__(self):
""" Iterate over member names in reverse order """
links = self._get_objdb_links()

if links is None:
req = "/groups/" + self.id.uuid + "/links"
rsp_json = self.GET(req, params={"CreateOrder": "1" if self._track_order else "0"})
links = rsp_json['links']

# reset the link cache
self._link_db = {}
for link in links:
name = link["title"]
self._link_db[name] = link

for x in reversed(links):
yield x['title']
else:
if self._track_order:
links = sorted(links.items(), key=lambda x: x[1]['created'])
else:
links = sorted(links.items())

ordered_links = {}
for link in links:
ordered_links[link[0]] = link[1]

for name in reversed(ordered_links):
yield name


class HardLink(object):

Expand Down
Loading