From e248869893163b95cffb4a63e899e5d933f3ef20 Mon Sep 17 00:00:00 2001 From: Kota Yamaguchi Date: Thu, 16 Jul 2020 00:50:43 +0900 Subject: [PATCH] Fix undefined memoryview format (#2223) * Fix undefined memoryview format * Add missing header * Add workaround for py27 array compatibility * Workaround py27 memoryview behavior * Fix memoryview constructor from buffer_info * Workaround PyMemoryView_FromMemory availability in py27 * Fix up memoryview tests * Update memoryview test from buffer to check signedness * Use static factory method to create memoryview * Remove ndim arg from memoryview::frombuffer and add tests * Allow ndim=0 memoryview and documentation fixup * Use void* to align to frombuffer method signature * Add const variants of frombuffer and frommemory * Add memory view section in doc * Fix docs * Add test for null buffer * Workaround py27 nullptr behavior in test * Rename frombuffer to from_buffer --- docs/Doxyfile | 2 + docs/advanced/pycpp/numpy.rst | 42 +++++++++ include/pybind11/buffer_info.h | 10 ++- include/pybind11/pytypes.h | 150 ++++++++++++++++++++++++++++----- tests/test_pytypes.cpp | 49 +++++++++++ tests/test_pytypes.py | 67 +++++++++++++++ 6 files changed, 293 insertions(+), 27 deletions(-) diff --git a/docs/Doxyfile b/docs/Doxyfile index 1b9d1297c5..24ece0d8db 100644 --- a/docs/Doxyfile +++ b/docs/Doxyfile @@ -18,3 +18,5 @@ ALIASES += "endrst=\endverbatim" QUIET = YES WARNINGS = YES WARN_IF_UNDOCUMENTED = NO +PREDEFINED = DOXYGEN_SHOULD_SKIP_THIS \ + PY_MAJOR_VERSION=3 diff --git a/docs/advanced/pycpp/numpy.rst b/docs/advanced/pycpp/numpy.rst index 3d9c90cb42..aef7abe3d7 100644 --- a/docs/advanced/pycpp/numpy.rst +++ b/docs/advanced/pycpp/numpy.rst @@ -384,3 +384,45 @@ operation on the C++ side: py::array a = /* A NumPy array */; py::array b = a[py::make_tuple(0, py::ellipsis(), 0)]; + +Memory view +=========== + +For a case when we simply want to provide a direct accessor to C/C++ buffer +without a concrete class object, we can return a ``memoryview`` object. Suppose +we wish to expose a ``memoryview`` for 2x4 uint8_t array, we can do the +following: + +.. code-block:: cpp + + const uint8_t buffer[] = { + 0, 1, 2, 3, + 4, 5, 6, 7 + }; + m.def("get_memoryview2d", []() { + return py::memoryview::from_buffer( + buffer, // buffer pointer + { 2, 4 }, // shape (rows, cols) + { sizeof(uint8_t) * 4, sizeof(uint8_t) } // strides in bytes + ); + }) + +This approach is meant for providing a ``memoryview`` for a C/C++ buffer not +managed by Python. The user is responsible for managing the lifetime of the +buffer. Using a ``memoryview`` created in this way after deleting the buffer in +C++ side results in undefined behavior. + +We can also use ``memoryview::from_memory`` for a simple 1D contiguous buffer: + +.. code-block:: cpp + + m.def("get_memoryview1d", []() { + return py::memoryview::from_memory( + buffer, // buffer pointer + sizeof(uint8_t) * 8 // buffer size + ); + }) + +.. note:: + + ``memoryview::from_memory`` is not available in Python 2. diff --git a/include/pybind11/buffer_info.h b/include/pybind11/buffer_info.h index 0bf42880b7..8349a46b8b 100644 --- a/include/pybind11/buffer_info.h +++ b/include/pybind11/buffer_info.h @@ -54,7 +54,7 @@ struct buffer_info { explicit buffer_info(Py_buffer *view, bool ownview = true) : buffer_info(view->buf, view->itemsize, view->format, view->ndim, {view->shape, view->shape + view->ndim}, {view->strides, view->strides + view->ndim}, view->readonly) { - this->view = view; + this->m_view = view; this->ownview = ownview; } @@ -73,16 +73,18 @@ struct buffer_info { ndim = rhs.ndim; shape = std::move(rhs.shape); strides = std::move(rhs.strides); - std::swap(view, rhs.view); + std::swap(m_view, rhs.m_view); std::swap(ownview, rhs.ownview); readonly = rhs.readonly; return *this; } ~buffer_info() { - if (view && ownview) { PyBuffer_Release(view); delete view; } + if (m_view && ownview) { PyBuffer_Release(m_view); delete m_view; } } + Py_buffer *view() const { return m_view; } + Py_buffer *&view() { return m_view; } private: struct private_ctr_tag { }; @@ -90,7 +92,7 @@ struct buffer_info { detail::any_container &&shape_in, detail::any_container &&strides_in, bool readonly) : buffer_info(ptr, itemsize, format, ndim, std::move(shape_in), std::move(strides_in), readonly) { } - Py_buffer *view = nullptr; + Py_buffer *m_view = nullptr; bool ownview = false; }; diff --git a/include/pybind11/pytypes.h b/include/pybind11/pytypes.h index 6e7666b878..5152f6804a 100644 --- a/include/pybind11/pytypes.h +++ b/include/pybind11/pytypes.h @@ -1336,35 +1336,139 @@ class buffer : public object { class memoryview : public object { public: - explicit memoryview(const buffer_info& info) { - static Py_buffer buf { }; - // Py_buffer uses signed sizes, strides and shape!.. - static std::vector py_strides { }; - static std::vector py_shape { }; - buf.buf = info.ptr; - buf.itemsize = info.itemsize; - buf.format = const_cast(info.format.c_str()); - buf.ndim = (int) info.ndim; - buf.len = info.size; - py_strides.clear(); - py_shape.clear(); - for (size_t i = 0; i < (size_t) info.ndim; ++i) { - py_strides.push_back(info.strides[i]); - py_shape.push_back(info.shape[i]); - } - buf.strides = py_strides.data(); - buf.shape = py_shape.data(); - buf.suboffsets = nullptr; - buf.readonly = info.readonly; - buf.internal = nullptr; + PYBIND11_OBJECT_CVT(memoryview, object, PyMemoryView_Check, PyMemoryView_FromObject) - m_ptr = PyMemoryView_FromBuffer(&buf); + /** \rst + Creates ``memoryview`` from ``buffer_info``. + + ``buffer_info`` must be created from ``buffer::request()``. Otherwise + throws an exception. + + For creating a ``memoryview`` from objects that support buffer protocol, + use ``memoryview(const object& obj)`` instead of this constructor. + \endrst */ + explicit memoryview(const buffer_info& info) { + if (!info.view()) + pybind11_fail("Prohibited to create memoryview without Py_buffer"); + // Note: PyMemoryView_FromBuffer never increments obj reference. + m_ptr = (info.view()->obj) ? + PyMemoryView_FromObject(info.view()->obj) : + PyMemoryView_FromBuffer(info.view()); if (!m_ptr) pybind11_fail("Unable to create memoryview from buffer descriptor"); } - PYBIND11_OBJECT_CVT(memoryview, object, PyMemoryView_Check, PyMemoryView_FromObject) + /** \rst + Creates ``memoryview`` from static buffer. + + This method is meant for providing a ``memoryview`` for C/C++ buffer not + managed by Python. The caller is responsible for managing the lifetime + of ``ptr`` and ``format``, which MUST outlive the memoryview constructed + here. + + See also: Python C API documentation for `PyMemoryView_FromBuffer`_. + + .. _PyMemoryView_FromBuffer: https://docs.python.org/c-api/memoryview.html#c.PyMemoryView_FromBuffer + + :param ptr: Pointer to the buffer. + :param itemsize: Byte size of an element. + :param format: Pointer to the null-terminated format string. For + homogeneous Buffers, this should be set to + ``format_descriptor::value``. + :param shape: Shape of the tensor (1 entry per dimension). + :param strides: Number of bytes between adjacent entries (for each + per dimension). + :param readonly: Flag to indicate if the underlying storage may be + written to. + \endrst */ + static memoryview from_buffer( + void *ptr, ssize_t itemsize, const char *format, + detail::any_container shape, + detail::any_container strides, bool readonly = false); + + static memoryview from_buffer( + const void *ptr, ssize_t itemsize, const char *format, + detail::any_container shape, + detail::any_container strides) { + return memoryview::from_buffer( + const_cast(ptr), itemsize, format, shape, strides, true); + } + + template + static memoryview from_buffer( + T *ptr, detail::any_container shape, + detail::any_container strides, bool readonly = false) { + return memoryview::from_buffer( + reinterpret_cast(ptr), sizeof(T), + format_descriptor::value, shape, strides, readonly); + } + + template + static memoryview from_buffer( + const T *ptr, detail::any_container shape, + detail::any_container strides) { + return memoryview::from_buffer( + const_cast(ptr), shape, strides, true); + } + +#if PY_MAJOR_VERSION >= 3 + /** \rst + Creates ``memoryview`` from static memory. + + This method is meant for providing a ``memoryview`` for C/C++ buffer not + managed by Python. The caller is responsible for managing the lifetime + of ``mem``, which MUST outlive the memoryview constructed here. + + This method is not available in Python 2. + + See also: Python C API documentation for `PyMemoryView_FromBuffer`_. + + .. _PyMemoryView_FromMemory: https://docs.python.org/c-api/memoryview.html#c.PyMemoryView_FromMemory + \endrst */ + static memoryview from_memory(void *mem, ssize_t size, bool readonly = false) { + PyObject* ptr = PyMemoryView_FromMemory( + reinterpret_cast(mem), size, + (readonly) ? PyBUF_READ : PyBUF_WRITE); + if (!ptr) + pybind11_fail("Could not allocate memoryview object!"); + return memoryview(object(ptr, stolen_t{})); + } + + static memoryview from_memory(const void *mem, ssize_t size) { + return memoryview::from_memory(const_cast(mem), size, true); + } +#endif }; + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +inline memoryview memoryview::from_buffer( + void *ptr, ssize_t itemsize, const char* format, + detail::any_container shape, + detail::any_container strides, bool readonly) { + size_t ndim = shape->size(); + if (ndim != strides->size()) + pybind11_fail("memoryview: shape length doesn't match strides length"); + ssize_t size = ndim ? 1 : 0; + for (size_t i = 0; i < ndim; ++i) + size *= (*shape)[i]; + Py_buffer view; + view.buf = ptr; + view.obj = nullptr; + view.len = size * itemsize; + view.readonly = static_cast(readonly); + view.itemsize = itemsize; + view.format = const_cast(format); + view.ndim = static_cast(ndim); + view.shape = shape->data(); + view.strides = strides->data(); + view.suboffsets = nullptr; + view.internal = nullptr; + PyObject* obj = PyMemoryView_FromBuffer(&view); + if (!obj) + throw error_already_set(); + return memoryview(object(obj, stolen_t{})); +} +#endif // DOXYGEN_SHOULD_SKIP_THIS /// @} pytypes /// \addtogroup python_builtins diff --git a/tests/test_pytypes.cpp b/tests/test_pytypes.cpp index f0d86d872c..9f7bc37dc6 100644 --- a/tests/test_pytypes.cpp +++ b/tests/test_pytypes.cpp @@ -318,4 +318,53 @@ TEST_SUBMODULE(pytypes, m) { m.def("test_list_slicing", [](py::list a) { return a[py::slice(0, -1, 2)]; }); + + m.def("test_memoryview_object", [](py::buffer b) { + return py::memoryview(b); + }); + + m.def("test_memoryview_buffer_info", [](py::buffer b) { + return py::memoryview(b.request()); + }); + + m.def("test_memoryview_from_buffer", [](bool is_unsigned) { + static const int16_t si16[] = { 3, 1, 4, 1, 5 }; + static const uint16_t ui16[] = { 2, 7, 1, 8 }; + if (is_unsigned) + return py::memoryview::from_buffer( + ui16, { 4 }, { sizeof(uint16_t) }); + else + return py::memoryview::from_buffer( + si16, { 5 }, { sizeof(int16_t) }); + }); + + m.def("test_memoryview_from_buffer_nativeformat", []() { + static const char* format = "@i"; + static const int32_t arr[] = { 4, 7, 5 }; + return py::memoryview::from_buffer( + arr, sizeof(int32_t), format, { 3 }, { sizeof(int32_t) }); + }); + + m.def("test_memoryview_from_buffer_empty_shape", []() { + static const char* buf = ""; + return py::memoryview::from_buffer(buf, 1, "B", { }, { }); + }); + + m.def("test_memoryview_from_buffer_invalid_strides", []() { + static const char* buf = "\x02\x03\x04"; + return py::memoryview::from_buffer(buf, 1, "B", { 3 }, { }); + }); + + m.def("test_memoryview_from_buffer_nullptr", []() { + return py::memoryview::from_buffer( + static_cast(nullptr), 1, "B", { }, { }); + }); + +#if PY_MAJOR_VERSION >= 3 + m.def("test_memoryview_from_memory", []() { + const char* buf = "\xff\xe1\xab\x37"; + return py::memoryview::from_memory( + buf, static_cast(strlen(buf))); + }); +#endif } diff --git a/tests/test_pytypes.py b/tests/test_pytypes.py index 79bee87750..a8b653a114 100644 --- a/tests/test_pytypes.py +++ b/tests/test_pytypes.py @@ -278,3 +278,70 @@ def test_number_protocol(): def test_list_slicing(): li = list(range(100)) assert li[::2] == m.test_list_slicing(li) + + +@pytest.mark.parametrize('method, args, fmt, expected_view', [ + (m.test_memoryview_object, (b'red',), 'B', b'red'), + (m.test_memoryview_buffer_info, (b'green',), 'B', b'green'), + (m.test_memoryview_from_buffer, (False,), 'h', [3, 1, 4, 1, 5]), + (m.test_memoryview_from_buffer, (True,), 'H', [2, 7, 1, 8]), + (m.test_memoryview_from_buffer_nativeformat, (), '@i', [4, 7, 5]), +]) +def test_memoryview(method, args, fmt, expected_view): + view = method(*args) + assert isinstance(view, memoryview) + assert view.format == fmt + if isinstance(expected_view, bytes) or sys.version_info[0] >= 3: + view_as_list = list(view) + else: + # Using max to pick non-zero byte (big-endian vs little-endian). + view_as_list = [max([ord(c) for c in s]) for s in view] + assert view_as_list == list(expected_view) + + +@pytest.mark.skipif( + not hasattr(sys, 'getrefcount'), + reason='getrefcount is not available') +@pytest.mark.parametrize('method', [ + m.test_memoryview_object, + m.test_memoryview_buffer_info, +]) +def test_memoryview_refcount(method): + buf = b'\x0a\x0b\x0c\x0d' + ref_before = sys.getrefcount(buf) + view = method(buf) + ref_after = sys.getrefcount(buf) + assert ref_before < ref_after + assert list(view) == list(buf) + + +def test_memoryview_from_buffer_empty_shape(): + view = m.test_memoryview_from_buffer_empty_shape() + assert isinstance(view, memoryview) + assert view.format == 'B' + if sys.version_info.major < 3: + # Python 2 behavior is weird, but Python 3 (the future) is fine. + assert bytes(view).startswith(b'