Skip to content

Commit

Permalink
Fix H5S_sel_type enum declaration (#349)
Browse files Browse the repository at this point in the history
Also add some more comments and simplify `build_data_dict`
by moving the non-virtual case out.
  • Loading branch information
ArvidJB authored Jun 28, 2024
1 parent 29fe8ce commit 4087d76
Show file tree
Hide file tree
Showing 2 changed files with 97 additions and 75 deletions.
156 changes: 82 additions & 74 deletions versioned_hdf5/slicetools.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,6 @@ from libc.stdlib cimport malloc, free
from libc.string cimport strlen, strncmp
from libcpp.vector cimport vector

ctypedef enum H5S_sel_type:
H5S_SEL_ERROR = -1, #Error
H5S_SEL_NONE = 0, #Nothing selected
H5S_SEL_POINTS = 1, #Sequence of points selected
H5S_SEL_HYPERSLABS = 2, #"New-style" hyperslab selection defined
H5S_SEL_ALL = 3, #Entire extent selected
H5S_SEL_N = 4 #/*THIS MUST BE LAST

cdef extern from "hdf5.h":
# HDF5 types
ctypedef long int hid_t
Expand All @@ -27,20 +19,23 @@ cdef extern from "hdf5.h":
ctypedef long long hsize_t

# virtual Dataset functions
cdef herr_t H5Pget_virtual_count(hid_t dcpl_id, size_t *count) except <herr_t>-1
cdef ssize_t H5Pget_virtual_dsetname(hid_t dcpl_id, size_t index, char *name, size_t size) except <ssize_t>-1
cdef ssize_t H5Pget_virtual_filename(hid_t dcpl_id, size_t index, char *name, size_t size) except <ssize_t>-1
cdef hid_t H5Pget_virtual_vspace(hid_t dcpl_id, size_t index) except <hid_t>-1
cdef hid_t H5Pget_virtual_srcspace(hid_t dcpl_id, size_t index) except <hid_t>-1
cdef ssize_t H5Pget_virtual_dsetname(hid_t dcpl_id, size_t index, char *name, size_t size)
cdef ssize_t H5Pget_virtual_filename(hid_t dcpl_id, size_t index, char *name, size_t size)
cdef hid_t H5Pget_virtual_vspace(hid_t dcpl_id, size_t index)
cdef hid_t H5Pget_virtual_srcspace(hid_t dcpl_id, size_t index)

ctypedef enum H5S_sel_type:
H5S_SEL_ERROR = -1, #Error
H5S_SEL_NONE = 0, #Nothing selected
H5S_SEL_POINTS = 1, #Sequence of points selected
H5S_SEL_HYPERSLABS = 2, #"New-style" hyperslab selection defined
H5S_SEL_ALL = 3, #Entire extent selected
H5S_SEL_N = 4 #/*THIS MUST BE LAST

# TODO: this function actually returns an H5S_sel_type enum, but compilation fails
# when that's specified and we have an "except" clause. This looks like a bug in Cython?
# https://github.com/cython/cython/issues/6275
# cdef H5S_sel_type H5Sget_select_type(hid_t space_id) except <H5S_sel_type> -1
cdef int H5Sget_select_type(hid_t space_id) except <int>-1
cdef H5S_sel_type H5Sget_select_type(hid_t space_id) except H5S_sel_type.H5S_SEL_ERROR

cdef int H5Sget_simple_extent_ndims(hid_t space_id) except <int> -1
cdef htri_t H5Sget_regular_hyperslab(hid_t spaceid, hsize_t* start, hsize_t* stride, hsize_t* count, hsize_t* block) except <htri_t>-1
cdef int H5Sget_simple_extent_ndims(hid_t space_id)
cdef htri_t H5Sget_regular_hyperslab(hid_t spaceid, hsize_t* start, hsize_t* stride, hsize_t* count, hsize_t* block)

def spaceid_to_slice(space) -> Tuple:
"""
Expand Down Expand Up @@ -77,21 +72,30 @@ def hyperslab_to_slice(start, stride, count, block):


cdef _spaceid_to_slice(space_id: hid_t):
sel_type: H5S_sel_type = <H5S_sel_type> H5Sget_select_type(space_id)
"""
Helper function to read the data for `space_id` selection and
convert it to a Tuple of slices.
"""
sel_type: H5S_sel_type = H5Sget_select_type(space_id)

if sel_type == H5S_sel_type.H5S_SEL_ALL:
return Tuple()
elif sel_type == H5S_sel_type.H5S_SEL_HYPERSLABS:
slices: list = []

rank: cython.int = H5Sget_simple_extent_ndims(space_id)
if rank < 0:
raise ValueError('Cannot determine rank of selection.')
start_array: vector[hsize_t] = vector[hsize_t](rank)
stride_array: vector[hsize_t] = vector[hsize_t](rank)
count_array: vector[hsize_t] = vector[hsize_t](rank)
block_array: vector[hsize_t] = vector[hsize_t](rank)

H5Sget_regular_hyperslab(space_id, start_array.data(), stride_array.data(),
count_array.data(), block_array.data())
ret: htri_t = H5Sget_regular_hyperslab(space_id, start_array.data(), stride_array.data(),
count_array.data(), block_array.data())
if ret < 0:
raise ValueError('Cannot determine hyperslab selection.')

i: cython.int
start: hsize_t
end: hsize_t
Expand All @@ -117,65 +121,69 @@ cdef _spaceid_to_slice(space_id: hid_t):
else:
raise NotImplementedError("Point selections are not yet supported")

cpdef build_data_dict(dcpl, shape: tuple, chunks: tuple, raw_data_name: str):
cpdef build_data_dict(dcpl, raw_data_name: str):
"""
Function to build the "data_dict" of a versioned virtual dataset.
All virtual datasets created by versioned-hdf5 should have chunks in
exactly one raw dataset `raw_data_name` in the same file. This function will
check that this is the case and return a dictionary mapping the `Tuple` of
the chunk in the virtual dataset to a `Slice` in the raw dataset.
:param dcpl: the dataset creation property list of the versioned dataset
:param raw_data_name: the name of the corresponding raw dataset
:return: a dictionary mapping the `Tuple` of the virtual dataset chunk
to a `Slice` in the raw dataset.
"""
data_dict: dict = {}

is_virtual: bool = dcpl.get_layout() == h5d.VIRTUAL

if not is_virtual:
# A dataset created with only a fillvalue will be nonvirtual,
# since create_virtual_dataset makes a nonvirtual dataset when
# there are no virtual sources.
data_dict = {}
# Same as dataset.get_virtual_sources
elif 0 in shape:
# Work around https://github.com/h5py/h5py/issues/1660
empty_idx = Tuple().expand(shape)
data_dict = {empty_idx: Slice()}
else:
data_dict = {}
with phil:
dcpl_id: hid_t = dcpl.id

with phil:
dcpl_id: hid_t = dcpl.id
virtual_count: size_t = dcpl.get_virtual_count()
j: size_t

virtual_count: size_t = dcpl.get_virtual_count()
j: size_t
raw_data_name_bytes: bytes = raw_data_name.encode('utf8')
# this a reference to the internal buffer of raw_data_name, do not free!
raw_data_str: cython.p_char = raw_data_name_bytes

raw_data_name_bytes: bytes = raw_data_name.encode('utf8')
# this a reference to the internal buffer of raw_data_name, do not free!
raw_data_str: cython.p_char = raw_data_name_bytes
filename_buf_len: ssize_t = 2
filename_buf: cython.p_char = <char *>malloc(filename_buf_len)
if not filename_buf:
raise MemoryError('could not allocate filename_buf')

filename_buf_len: ssize_t = 2
filename_buf: cython.p_char = <char *>malloc(filename_buf_len)
if not filename_buf:
raise MemoryError('could not allocate filename_buf')
try:
dataset_buf_len: ssize_t = strlen(raw_data_str) + 1
dataset_buf: cython.p_char = <char *>malloc(dataset_buf_len)
if not dataset_buf:
raise MemoryError('could not allocate dataset_buf')

try:
dataset_buf_len: ssize_t = strlen(raw_data_str) + 1
dataset_buf: cython.p_char = <char *>malloc(dataset_buf_len)
if not dataset_buf:
raise MemoryError('could not allocate dataset_buf')

try:
for j in range(virtual_count):
H5Pget_virtual_filename(dcpl_id, j, filename_buf, filename_buf_len)
if strncmp(filename_buf, ".", filename_buf_len) != 0:
raise ValueError('Virtual dataset filename mismatch, expected "."')

H5Pget_virtual_dsetname(dcpl_id, j, dataset_buf, dataset_buf_len)
if strncmp(dataset_buf, raw_data_str, dataset_buf_len) != 0:
raise ValueError(f'Virtual dataset name mismatch, expected {raw_data_name}')

vspace_id: hid_t = H5Pget_virtual_vspace(dcpl_id, j)
srcspace_id: hid_t = H5Pget_virtual_srcspace(dcpl_id, j)

vspace_slice_tuple = _spaceid_to_slice(vspace_id)
srcspace_slice_tuple = _spaceid_to_slice(srcspace_id)
# the slice into the raw_data (srcspace_slice_tuple) is only on the first axis
data_dict[vspace_slice_tuple] = srcspace_slice_tuple.args[0]
finally:
free(dataset_buf)
for j in range(virtual_count):
if H5Pget_virtual_filename(dcpl_id, j, filename_buf, filename_buf_len) < 0:
raise ValueError('Could not get virtual filename')
if strncmp(filename_buf, ".", filename_buf_len) != 0:
raise ValueError('Virtual dataset filename mismatch, expected "."')

if H5Pget_virtual_dsetname(dcpl_id, j, dataset_buf, dataset_buf_len) < 0:
raise ValueError('Could not get virtual dsetname')
if strncmp(dataset_buf, raw_data_str, dataset_buf_len) != 0:
raise ValueError(f'Virtual dataset name mismatch, expected {raw_data_name}')

vspace_id: hid_t = H5Pget_virtual_vspace(dcpl_id, j)
if vspace_id == -1:
raise ValueError('Could not get vspace_id')
srcspace_id: hid_t = H5Pget_virtual_srcspace(dcpl_id, j)
if srcspace_id == -1:
raise ValueError('Could not get srcspace_id')

vspace_slice_tuple = _spaceid_to_slice(vspace_id)
srcspace_slice_tuple = _spaceid_to_slice(srcspace_id)
# the slice into the raw_data (srcspace_slice_tuple) is only on the first axis
data_dict[vspace_slice_tuple] = srcspace_slice_tuple.args[0]
finally:
free(filename_buf)
free(dataset_buf)
finally:
free(filename_buf)

return data_dict
16 changes: 15 additions & 1 deletion versioned_hdf5/wrappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1429,7 +1429,21 @@ def __init__(self, _id):
def data_dict(self):
if self._data_dict is None:
dcpl = self.get_create_plist()
self._data_dict = build_data_dict(dcpl, self._shape, self.chunks, self.raw_data.name)

is_virtual: bool = dcpl.get_layout() == h5d.VIRTUAL

if not is_virtual:
# A dataset created with only a fillvalue will be nonvirtual,
# since create_virtual_dataset makes a nonvirtual dataset when
# there are no virtual sources.
self._data_dict = {}
# Same as dataset.get_virtual_sources
elif 0 in self.shape:
# Work around https://github.com/h5py/h5py/issues/1660
empty_idx = Tuple().expand(self.shape)
self._data_dict = {empty_idx: Slice()}
else:
self._data_dict = build_data_dict(dcpl, self.raw_data.name)

return self._data_dict

Expand Down

0 comments on commit 4087d76

Please sign in to comment.