From afce382420950ea2ee70693a5fd63540286c941a Mon Sep 17 00:00:00 2001 From: crusaderky Date: Tue, 15 Oct 2024 15:24:26 +0100 Subject: [PATCH 1/2] Revert #371 --- .gitignore | 5 - versioned_hdf5/hyperspace.pxd | 10 - versioned_hdf5/hyperspace.py | 365 ------------------------ versioned_hdf5/hyperspace.pyx | 1 - versioned_hdf5/meson.build | 31 -- versioned_hdf5/subchunk_map.py | 7 - versioned_hdf5/tests/test_hyperspace.py | 85 ------ 7 files changed, 504 deletions(-) delete mode 100644 versioned_hdf5/hyperspace.pxd delete mode 100644 versioned_hdf5/hyperspace.py delete mode 120000 versioned_hdf5/hyperspace.pyx delete mode 100644 versioned_hdf5/tests/test_hyperspace.py diff --git a/.gitignore b/.gitignore index cb177ba2..ed6f745d 100644 --- a/.gitignore +++ b/.gitignore @@ -119,8 +119,3 @@ venv.bak/ # Rever rever/ - -# Cython byproducts -versioned_hdf5/*.c -versioned_hdf5/*.html -versioned_hdf5/*.so diff --git a/versioned_hdf5/hyperspace.pxd b/versioned_hdf5/hyperspace.pxd deleted file mode 100644 index 1bda7aca..00000000 --- a/versioned_hdf5/hyperspace.pxd +++ /dev/null @@ -1,10 +0,0 @@ -# This file allows cimport'ing the functions declared below from other Cython modules - -from cython cimport Py_ssize_t - - -cpdef Py_ssize_t[:] empty_view(Py_ssize_t n) -cpdef Py_ssize_t[:] view_from_tuple(tuple[int, ...] t) -cpdef Py_ssize_t[:, :] fill_hyperspace( - Py_ssize_t[:, :] obstacles, tuple[int, ...] shape -) diff --git a/versioned_hdf5/hyperspace.py b/versioned_hdf5/hyperspace.py deleted file mode 100644 index 58eadbf9..00000000 --- a/versioned_hdf5/hyperspace.py +++ /dev/null @@ -1,365 +0,0 @@ -# Note: this entire module is compiled by cython with wraparound=False -# See meson.build for details - -from __future__ import annotations - -import array - -import cython -import numpy as np -from cython import Py_ssize_t - -_empty_tpl = array.array("l", []) -_py_ssize_t_nbytes = _empty_tpl.itemsize -# assert that "long" is the same length as void* on the current platform -# Note: Py_ssize_t is always the same as np.intp -assert np.intp().nbytes == _py_ssize_t_nbytes - -if cython.compiled: # pragma: nocover - from cython.cimports.cpython import array # type: ignore - - @cython.ccall - def empty_view(n: Py_ssize_t) -> Py_ssize_t[:]: - """Functionally the same, but faster, as - - v: Py_ssize_t[:] = np.empty(n, dtype=np.intp) - - Note that this is limited to one dimension. - """ - # array.clone exists only in compiled Cython - return array.clone(_empty_tpl, n, zero=False) # type: ignore[attr-defined] - - @cython.ccall - def view_from_tuple(t: tuple[int, ...]) -> Py_ssize_t[:]: - """Functionally the same, but faster, as - - v: Py_ssize_t[:] = array.array("l", t) - """ - n = len(t) - v: Py_ssize_t[:] = empty_view(n) - for i in range(n): - v[i] = t[i] - return v - -else: - - def empty_view(n: Py_ssize_t) -> Py_ssize_t[:]: - return array.array("l", b" " * _py_ssize_t_nbytes * n) - - def view_from_tuple(t: tuple[int, ...]) -> Py_ssize_t[:]: - return array.array("l", t) - - -@cython.ccall -def fill_hyperspace( - obstacles: Py_ssize_t[:, :], - shape: tuple[int, ...], -) -> Py_ssize_t[:, :]: - """Given a N-dimensional space of the given shape and a series of obstacles each one - point in size, generate hyperrectangles that cover the whole remaining space - without ever crossing an obstacle or another hyperrectangle. - - Parameters - ---------- - obstacles: - A series of points to avoid, with one row per point and one column per axis. - They must be ordered along each axis (order along the outermost axis first). - - This obstacles info is returned by - ``versioned_hdf5.staged_changes._modified_chunks_in_selection``. - It can also be generated by ``np.stack(np.nonzero(arr)).T``, where arr is a ND - array that's nonzero wherever there's an obstacle. - - shape: - The shape of the space to be covered - - Returns - ------- - Edges of hyperrectangles, one hyperrectangle per row, with the first half of the - columns being the coordinates of the top-left corner (included) and the right half - of the columns being the coordinates of the bottom-right corner (excluded). - - See Also - -------- - https://en.wikipedia.org/wiki/Dimension#High-dimensional_space - https://en.wikipedia.org/wiki/Hyperrectangle - - Example - ------- - >>> obstacles = np.array([(2, 4), (2, 5), (5, 7)]) - >>> np.asarray(fill_hyperspace(obstacles, shape=(8, 10))) - array([[ 0, 0, 2, 10], - [ 2, 0, 3, 4], - [ 2, 6, 3, 10], - [ 3, 0, 5, 10], - [ 5, 0, 6, 7], - [ 5, 8, 6, 10], - [ 6, 0, 8, 10]]) - - Input Output - 0123456789 0123456789 - 0 .......... 0 aaaaaaaaaa - 1 .......... 1 aaaaaaaaaa - 2 ....XX.... 2 bbbbXXcccc - 3 .......... 3 dddddddddd - 4 .......... 4 dddddddddd - 5 .......X.. 5 eeeeeeeXff - 6 .......... 6 gggggggggg - 7 .......... 7 gggggggggg - - FIXME - ----- - The current implementation performs suboptimally in the common use case of an - uninterrupted sequence of obstacles along an axis that is not the innermost one: - - Input Current output Optimal output - 0123456789 0123456789 0123456789 - 0 .X........ 0 aXbbbbbbbb 0 aXbbbbbbbb - 1 .X........ 1 cXdddddddd 1 aXbbbbbbbb - 2 .X........ 2 eXffffffff 2 aXbbbbbbbb - """ - shape_v = view_from_tuple(shape) - nobs = len(obstacles) - ndim = len(shape_v) - assert ndim > 0 - - # For ndim=1, there can be only a single 1d segment between two obstacles - # For ndim=2, there can be 1x 2d rectangles and 2x 1d segments - # For ndim=3, there can be 1x 3d cuboid, 2x 2d rectangles, and 4x 1d lines - # etc. - max_spaces = ((1 << ndim) - 1) * (nobs + 2) - - out: Py_ssize_t[:, :] = np.empty((max_spaces, ndim * 2), dtype=np.intp) - - for n in shape_v: - if n == 0: - return out[:0, :] - - out_a = out[:, :ndim] - out_b = out[:, ndim:] - cur = 0 - - for i in range(nobs + 1): - if i == 0: - a = _one_before(shape_v) - else: - a = obstacles[i - 1, :] - if i == nobs: - b = _one_after(shape_v) - else: - b = obstacles[i, :] - - cur = _hyperrectangles_between(a, b, shape_v, out_a, out_b, cur) - - return out[:cur, :] - - -@cython.cfunc -@cython.exceptval(check=False) -def _hyperrectangles_between( - obstacle1: Py_ssize_t[:], - obstacle2: Py_ssize_t[:], - shape: Py_ssize_t[:], - out_a: Py_ssize_t[:, :], - out_b: Py_ssize_t[:, :], - cur: Py_ssize_t, -) -> Py_ssize_t: - """Helper of fill_hyperspace. - - Given two ordered points in a N-dimensional hyperspace, yield the top-left - corner (included) and bottom-right corner (excluded) of the N-dimensional - hyperrectangles that cover the whole space between the two points (both excluded). - - In other words, fill the flattened vector in the range ]obstacle1, obstacle2[. - - Parameters - ---------- - obstacle1: - The first point to avoid in the N-dimensional hyperspace - obstacle2: - The second point to avoid in the N-dimensional hyperspace. - It must be after the first point along the flattened vector. - shape: - The shape of the whole N-dimensional hyperspace to cover - out_a: - Empty buffer to write the top-left corners into - out_b: - Empty buffer to write the bottom-right corners into - cur: - row index of out_a and out_b to start writing from. - out_a and out_b must be at least 2**ndim - 1 + cur rows in size. - - Returns - ------- - Row index of out_a and out_b after the one that was last written to - - Example: - -------- - Given two points at coordinates (3, 4, 5) and (3, 7, 8) - in a space of shape (10, 10, 10), this function yields - - - [ 3, 4, 6:] -> (3, 4, 6), (4, 5, 10) - - [ 3, 5:7, :] -> (3, 5, 0), (4, 7, 10) - - [ 3, 7, :8] -> (3, 7, 0), (4, 8, 8) - - >>> out = np.empty((2**3 - 1, 6), dtype=np.intp) - >>> _hyperrectangles_between( - ... np.array([3, 4, 5]), - ... np.array([3, 7, 8]), - ... np.array([10, 10, 10]), - ... out[:, :3], - ... out[:, 3:], - ... 0 - ... ) - 3 - >>> out[:3] - array([[ 3, 4, 6, 4, 5, 10], - [ 3, 5, 0, 4, 7, 10], - [ 3, 7, 0, 4, 8, 8]]) - """ - ndim = len(obstacle1) # Always >0 - - if ndim == 1: - # Trivial use case in mono-dimensional space. - # There can be at most one 1D line between the two obstacles. - # - # ..XaaX... - - i, j = obstacle1[0], obstacle2[0] - if i + 1 < j: - out_a[cur, 0] = i + 1 - out_b[cur, 0] = j - cur += 1 - return cur - - if obstacle1[0] == obstacle2[0]: - # The two obstacles are on the same outermost hyperspace/space/surface/row. - # Simplify the problem by recursing into a hyperspace with one less dimension. - # - # ......... - # ..XaaX... - # ......... - - new_cur = _hyperrectangles_between( - obstacle1[1:], - obstacle2[1:], - shape[1:], - out_a[:, 1:], - out_b[:, 1:], - cur, - ) - prefix = obstacle1[0] - out_a[cur:new_cur, 0] = prefix - out_b[cur:new_cur, 0] = prefix + 1 - return new_cur - - # General case with the obstacles laying on different rows/planes/spaces/etc. - # - # If you have a 2d plane with two obstacles on different rows, you will have - # - up to one 1D line on the same row as obstacle1, from obstacle1 to the end of - # the plane - # - up to one 2D surface on the rows between obstacle1 and obstacle2 - # - up to one 1D line on the same row as obstacle2, from the beginning of the - # plane to obstacle2 - # - # ......... - # ..Xaaaaaa - # bbbbbbbbb - # bbbbbbbbb - # ccccX.... - # ......... - # - # If you have a 3D space with two obstacles on different planes, you will have - # - up to two 1D and 2D objects laying in the 2D plane of obstacle1, as described - # in the previous paragraph - # - up to one 3D space covering the planes between obstacle1 and obstacle2, - # - and up to two 1D and 2D objects laying in the 2d plane of obstacle2. - # - # ......... ccccccccc ccccccccc ddddddddd - # ..Xaaaaaa ccccccccc ccccccccc ddddddddd - # bbbbbbbbb ccccccccc ccccccccc ddddddddd - # bbbbbbbbb ccccccccc ccccccccc ddddddddd - # bbbbbbbbb ccccccccc ccccccccc eeeeX.... - # bbbbbbbbb ccccccccc ccccccccc ......... - # - # And so on for higher dimensionionalities. - - # Recurse with one less dimension on the same row, plane, etc. as obstacle1 - # until the end of the same row, plane, etc. - new_cur = _hyperrectangles_between( - obstacle1[1:], - _one_after(shape[1:]), - shape[1:], - out_a[:, 1:], - out_b[:, 1:], - cur, - ) - prefix = obstacle1[0] - out_a[cur:new_cur, 0] = prefix - out_b[cur:new_cur, 0] = prefix + 1 - cur = new_cur - - # Process the 2D surface, or 3D plane, etc. between the two obstacles - if obstacle1[0] + 1 < obstacle2[0]: - out_a[cur, 0] = obstacle1[0] + 1 - out_a[cur, 1:] = 0 - out_b[cur, 0] = obstacle2[0] - out_b[cur, 1:] = shape[1:] - cur += 1 - - # Recurse with one less dimension on the same row, plane, etc. as obstacle2 - # from the beginning of the same row, plane, etc. until obstacle2 - new_cur = _hyperrectangles_between( - _one_before(shape[1:]), - obstacle2[1:], - shape[1:], - out_a[:, 1:], - out_b[:, 1:], - cur, - ) - prefix = obstacle2[0] - out_a[cur:new_cur, 0] = prefix - out_b[cur:new_cur, 0] = prefix + 1 - return new_cur - - -@cython.cfunc -def _one_before(shape: Py_ssize_t[:]) -> Py_ssize_t[:]: - """Given an ND array 'a' of the given shape, return the coordinates of the point at - a.flat[-1], without bounds check or wrap around. - - >>> tuple(_one_before((2, 5))) - (-1, 4) - - x - ..... - ..... - - """ - ndim = len(shape) - out: Py_ssize_t[:] = empty_view(ndim) - out[0] = -1 - for i in range(1, ndim): - out[i] = shape[i] - 1 - return out - - -@cython.cfunc -def _one_after(shape: Py_ssize_t[:]) -> Py_ssize_t[:]: - """Given an ND array 'a' of the given shape, return the coordinates of the point at - a.flat[a.size], without bounds check. - - >>> tuple(_one_after((2, 5))) - (2, 0) - - ..... - ..... - x - - """ - ndim = len(shape) - out: Py_ssize_t[:] = empty_view(ndim) - out[0] = shape[0] - for i in range(1, ndim): - out[i] = 0 - return out diff --git a/versioned_hdf5/hyperspace.pyx b/versioned_hdf5/hyperspace.pyx deleted file mode 120000 index 52efd767..00000000 --- a/versioned_hdf5/hyperspace.pyx +++ /dev/null @@ -1 +0,0 @@ -hyperspace.py \ No newline at end of file diff --git a/versioned_hdf5/meson.build b/versioned_hdf5/meson.build index a76f908d..a75864da 100644 --- a/versioned_hdf5/meson.build +++ b/versioned_hdf5/meson.build @@ -37,37 +37,6 @@ py.extension_module( override_options : ['cython_language=cpp'], ) -cython_args = [ - # '-a', # Generate HTML report - '-X', 'infer_types=True', - # Comment these out if you experience segfaults to get a clear exception instead - '-X', 'initializedcheck=False', - '-X', 'boundscheck=False', - # Note: this is possible because we went through all view accesses in the module - # and made sure they always use non-negative indices. - '-X', 'wraparound=False', - # Note: this is possible only because we went through every single // and % in - # the module and made sure the sign of the two operands is always the same. - # Comment this out if behaviour changes when not compiled. - '-X', 'cdivision=True', -] - -# To generate HTML compilation reports: -# (First double check that flags below match those above) -# -# cythonize -3 -a -X infer_types=True -X initializedcheck=False -X boundscheck=False -X wraparound=False -X cdivision=True versioned_hdf5/{hyperspace,subchunk_map}.py -# -# Or uncomment the -a flag above and then run: -# meson build && pushd build && meson compile && popd && firefox $(find build/ -name "*.html") - -py.extension_module( - 'hyperspace', - 'hyperspace.pyx', - install: true, - subdir: 'versioned_hdf5', - cython_args: cython_args, -) - py.extension_module( 'subchunk_map', # FIXME This is a symlink. Can't find a way to convince Meson to compile diff --git a/versioned_hdf5/subchunk_map.py b/versioned_hdf5/subchunk_map.py index b31afc5b..1b7ac10a 100644 --- a/versioned_hdf5/subchunk_map.py +++ b/versioned_hdf5/subchunk_map.py @@ -17,13 +17,6 @@ ) from numpy.typing import NDArray -# Temporary hack to work around pytest issue: if a pure-pyton Cython module is first -# imported by `pytest .` and it contains doctests, it will be imported in pure python -# instead of its compiled form. This fails in CI, as Cython is not a runtime dependency, -# and regardless would cause a lot of type-checking normally performed by Cython to be -# skipped so it is to be avoided. -from . import hyperspace # noqa: F401 - if TYPE_CHECKING: # TODO import from typing and remove quotes (requires Python 3.10) # TODO use type = ... (requires Python 3.12) diff --git a/versioned_hdf5/tests/test_hyperspace.py b/versioned_hdf5/tests/test_hyperspace.py deleted file mode 100644 index bb367356..00000000 --- a/versioned_hdf5/tests/test_hyperspace.py +++ /dev/null @@ -1,85 +0,0 @@ -import hypothesis -import numpy as np -import numpy.testing as npt -import pytest -from hypothesis import given -from hypothesis import strategies as st - -from ..hyperspace import empty_view, fill_hyperspace, view_from_tuple - - -@pytest.mark.parametrize("n", [0, 1, 2]) -def test_empty_view(n): - l = list(range(n)) - v = empty_view(n) - for i in range(n): - v[i] = l[i] - npt.assert_array_equal(v, np.asarray(l, dtype=np.intp), strict=True) - - -@pytest.mark.parametrize("n", [0, 1, 2]) -def test_view_from_tuple(n): - t = tuple(range(n)) - v = view_from_tuple(t) - npt.assert_array_equal(v, np.asarray(t, dtype=np.intp), strict=True) - - -@pytest.mark.parametrize("ndim", [1, 2, 3, 4]) -def test_fill_empty_hyperspace(ndim): - shape = (3,) * ndim - obstacles = np.empty((0, ndim), dtype=np.intp) - actual = fill_hyperspace(obstacles, shape) - # Exactly one hyperrectangle can cover the whole hyperspace - expect = np.array([(0,) * ndim + shape], dtype=np.intp) - npt.assert_array_equal(actual, expect, strict=True) - - -@pytest.mark.parametrize("ndim", [1, 2, 3, 4]) -def test_fill_full_hyperspace(ndim): - shape = (3,) * ndim - obstacles = np.array(np.nonzero(np.ones(shape))).T - actual = fill_hyperspace(obstacles, shape) - # The hyperspace is fully covered by the obstacles so no hyperrectangles are needed - expect = np.empty((0, ndim * 2), dtype=np.intp) - npt.assert_array_equal(actual, expect, strict=True) - - -@given(st.integers(1, 4), st.data()) -def test_fill_hyperspace_size_zero(ndim, data): - shape = [10] * ndim - shape[data.draw(st.integers(0, ndim - 1))] = 0 - obstacles = np.empty((0, ndim), dtype=np.intp) - hyperrectangles = fill_hyperspace(obstacles, tuple(shape)) - expect = np.empty((0, ndim * 2), dtype=np.intp) - npt.assert_array_equal(hyperrectangles, expect) - - -@given( - # shape from (1, ) to (4, 4, 4, 4) - shape=st.lists(st.integers(1, 4), min_size=1, max_size=4), - data=st.data(), -) -@hypothesis.settings(database=None, max_examples=1000, deadline=None) -def test_fill_hyperspace(shape, data): - ndim = len(shape) - arr = np.zeros(shape) - - obstacles_flat = data.draw(st.lists(st.integers(0, arr.size - 1), unique=True)) - arr.flat[obstacles_flat] = 1 - obstacles = np.asarray(np.nonzero(arr)).T - hyperrectangles = np.asarray(fill_hyperspace(obstacles, tuple(shape))) - - # Test that all coordinates are within the shape - assert (hyperrectangles >= 0).all() - for axis in range(ndim): - assert (hyperrectangles[:, axis] <= shape[axis]).all() - assert (hyperrectangles[:, axis + ndim] <= shape[axis]).all() - # Test that all hyperrectangles have at least size 1 - assert (hyperrectangles[:, axis] < hyperrectangles[:, axis + ndim]).all() - - # Fill the empty space - for rect in hyperrectangles: - idx = tuple(slice(start, stop) for start, stop in zip(rect[:ndim], rect[ndim:])) - assert (arr[idx] == 0).all(), "overlapping hyperrectangles" - arr[idx] = 1 - assert (arr == 1).all(), "incomplete coverage" From 4090d77495e6dadb9ccd2b94edc78818ff1c42c5 Mon Sep 17 00:00:00 2001 From: crusaderky Date: Tue, 15 Oct 2024 15:25:53 +0100 Subject: [PATCH 2/2] Salvage changes --- .gitignore | 5 +++++ versioned_hdf5/meson.build | 23 +++++++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/.gitignore b/.gitignore index ed6f745d..cb177ba2 100644 --- a/.gitignore +++ b/.gitignore @@ -119,3 +119,8 @@ venv.bak/ # Rever rever/ + +# Cython byproducts +versioned_hdf5/*.c +versioned_hdf5/*.html +versioned_hdf5/*.so diff --git a/versioned_hdf5/meson.build b/versioned_hdf5/meson.build index a75864da..289b6237 100644 --- a/versioned_hdf5/meson.build +++ b/versioned_hdf5/meson.build @@ -37,6 +37,29 @@ py.extension_module( override_options : ['cython_language=cpp'], ) +cython_args = [ + # '-a', # Generate HTML report + '-X', 'infer_types=True', + # Comment these out if you experience segfaults to get a clear exception instead + '-X', 'initializedcheck=False', + '-X', 'boundscheck=False', + # Note: this is possible because we went through all view accesses in the module + # and made sure they always use non-negative indices. + '-X', 'wraparound=False', + # Note: this is possible only because we went through every single // and % in + # the module and made sure the sign of the two operands is always the same. + # Comment this out if behaviour changes when not compiled. + '-X', 'cdivision=True', +] + +# To generate HTML compilation reports: +# (First double check that flags below match those above) +# +# cythonize -3 -a -X infer_types=True -X initializedcheck=False -X boundscheck=False -X wraparound=False -X cdivision=True versioned_hdf5/*.pyx +# +# Or uncomment the -a flag above and then run: +# meson build && pushd build && meson compile && popd && firefox $(find build/ -name "*.html") + py.extension_module( 'subchunk_map', # FIXME This is a symlink. Can't find a way to convince Meson to compile