Skip to content

Commit

Permalink
[InMemoryDataset redesign] New function asarray() (#384)
Browse files Browse the repository at this point in the history
  • Loading branch information
crusaderky authored Oct 21, 2024
1 parent 0affe5c commit 47b62cc
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 3 deletions.
1 change: 1 addition & 0 deletions versioned_hdf5/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ py.install_sources(
'backend.py',
'hashtable.py',
'replay.py',
'tools.py',
'versions.py',
'wrappers.py',
],
Expand Down
11 changes: 8 additions & 3 deletions versioned_hdf5/slicetools.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ from libcpp.vector cimport vector

from versioned_hdf5.cytools import np_hsize_t
from versioned_hdf5.cytools cimport count2stop, hsize_t, stop2count
from versioned_hdf5.tools import asarray


cdef FILE* fmemopen(void* buf, size_t size, const char* mode):
Expand Down Expand Up @@ -425,7 +426,10 @@ cpdef void read_many_slices(
cdef np.npy_intp[1] ndim_ptr = {ndim}
cdef np.ndarray dst_shape = np.PyArray_SimpleNewFromData(
1, ndim_ptr, np.NPY_INTP, <void*>dst.shape
).astype(np_hsize_t) # On 32 bit platforms, sizeof(hsize_t) == 8; sizeof(int) == 4
)
# On 32-bit platforms, sizeof(hsize_t) == 8; sizeof(npy_intp) == 4
# On 64-bit platforms, don't copy unnecessarily
dst_shape = asarray(dst_shape, np_hsize_t)

clipped_count = _clip_count(
src_shape,
Expand Down Expand Up @@ -473,9 +477,10 @@ cdef np.ndarray _preproc_many_slices_idx(obj: ArrayLike, hsize_t ndim, bint fast
# TODO https://github.com/numpy/numpy/issues/25396
if not NP_GE_200:
obj = np.asarray(obj)
if isinstance(obj, np.ndarray) and obj.dtype.kind != "u" and (obj < 0).any():
if hasattr(obj, "dtype") and obj.dtype.kind != "u" and (obj < 0).any():
raise OverflowError("index out of bounds for uint64")
cdef np.ndarray arr = np.asarray(obj, dtype=np_hsize_t)
# Don't copy when converting from np.intp to uint64 on 64-bit platforms
cdef np.ndarray arr = asarray(obj, np_hsize_t)

if arr.ndim not in (1, 2):
raise ValueError("Coordinates arrays must have 1 or 2 dimensions")
Expand Down
55 changes: 55 additions & 0 deletions versioned_hdf5/tests/test_tools.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import numpy as np
import numpy.ma as ma
from numpy.testing import assert_array_equal

from ..tools import asarray


def test_asarray():
a = np.array([1, -1], dtype="i2")
b = asarray(a)
assert b is a
b = asarray(a, "i2")
assert b is a
b = asarray(a, "u2")
assert_array_equal(b, a.astype("u2"), strict=True)
assert b.base is a
b = asarray(a, "i4")
assert_array_equal(b, a.astype("i4"), strict=True)
assert b.base is None

# Don't just test itemsize
a = np.array([1, -1], dtype="i4")
b = asarray(a, "f4")
assert_array_equal(b, a.astype("f4"), strict=True)
assert b.base is None

# non-arrays are coerced to np.ndarray
a = [1, -1]
b = asarray(a)
assert_array_equal(b, np.asarray(a), strict=True)
b = asarray(a, dtype=np.float32)
assert_array_equal(b, np.asarray(a, dtype=np.float32), strict=True)

a = 1
b = asarray(a)
assert isinstance(b, np.ndarray)
assert_array_equal(b, np.asarray(a), strict=True)

a = np.int16(1)
b = asarray(a)
assert isinstance(b, np.ndarray)
assert_array_equal(b, np.asarray(a), strict=True)

# array-likes aren't coerced to np.ndarray
a = ma.masked_array([1, -1], mask=[0, 1], dtype="i2")
b = asarray(a)
assert b is a
b = asarray(a, dtype="u2")
assert type(b) is type(a)
assert b.base is a
assert_array_equal(b, a.astype("u2"), strict=True)
b = asarray(a, dtype="i4")
assert type(b) is type(a)
assert b.base is None
assert_array_equal(b, a.astype("i4"), strict=True)
31 changes: 31 additions & 0 deletions versioned_hdf5/tools.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import numpy as np


def asarray(a, dtype=None):
"""Variant of np.asarray(a, dtype=dtype), with two differences:
1. If a is a numpy-like array, don't coerce it to a numpy.ndarray
2. If a has a ABI-compatible dtype, return a view instead of a copy
(works around https://github.com/numpy/numpy/issues/27509)
"""
if not hasattr(a, "__array__") or np.isscalar(a):
return np.asarray(a, dtype=dtype)

if dtype is None:
return a

dtype = np.dtype(dtype)
if a.dtype == dtype:
return a

if (
dtype.itemsize == a.itemsize
and dtype.kind in ("i", "u")
and a.dtype.kind in ("i", "u")
and hasattr(a, "view")
):
# Note that this does not reduce the amount of safety checks:
# np.array(-1).astype("u1") doesn't raise and returns 255!
return a.view(dtype)

return a.astype(dtype)

0 comments on commit 47b62cc

Please sign in to comment.