From 7180fe51872f2ac1c71fb98a02a4997ad83b22e7 Mon Sep 17 00:00:00 2001 From: Anton <100830759+antonwolfy@users.noreply.github.com> Date: Fri, 2 Feb 2024 09:10:52 +0100 Subject: [PATCH 01/29] Unmuted tests from test_mathematical.py scope (#1668) * Unmuted tests from test_mathematical.py scope * Removed fallback fixture for dpnp.copysign * Tests for dpnp.clip with NaN edges require numpy>=1.25.0 * Muted again power tests with complex types (until 2024.1 release) --- tests/test_mathematical.py | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) diff --git a/tests/test_mathematical.py b/tests/test_mathematical.py index 75735e89bc9..1faa0620f7d 100644 --- a/tests/test_mathematical.py +++ b/tests/test_mathematical.py @@ -15,6 +15,7 @@ import dpnp from dpnp.dpnp_array import dpnp_array +from tests.third_party.cupy import testing from .helper import ( assert_dtype_allclose, @@ -25,7 +26,6 @@ get_integer_dtypes, has_support_aspect64, is_cpu_device, - is_win_platform, ) @@ -125,8 +125,7 @@ def test_input_nan(self): expected = numpy.clip(np_a, -1, 1) assert_array_equal(result, expected) - # TODO: unmute the test once dpctl resolves the issue - @pytest.mark.skip(reason="dpctl-1489 issue") + @testing.with_requires("numpy>=1.25.0") @pytest.mark.parametrize( "kwargs", [ @@ -138,7 +137,7 @@ def test_input_nan(self): ], ) def test_nan_edges(self, kwargs): - np_a = numpy.arange(7) + np_a = numpy.arange(7.0) dp_a = dpnp.asarray(np_a) result = dp_a.clip(**kwargs) @@ -424,7 +423,6 @@ def test_add(self, dtype, lhs, rhs): def test_arctan2(self, dtype, lhs, rhs): self._test_mathematical("arctan2", dtype, lhs, rhs) - @pytest.mark.usefixtures("allow_fall_back_on_numpy") @pytest.mark.parametrize( "dtype", get_all_dtypes(no_bool=True, no_complex=True) ) @@ -564,7 +562,7 @@ def test_op_with_scalar(array, val, func, data_type, val_type): pytest.skip( "(0j ** 0) is different: (NaN + NaNj) in dpnp and (1 + 0j) in numpy" ) - # TODO: Remove when #1378 (dpctl) is solved + # TODO: Remove when #1378 (dpctl) is solved and 2024.1 is released (coverage is failing otherwise) elif ( is_cpu_device() and dpnp_a.dtype == dpnp.complex128 @@ -1002,7 +1000,7 @@ def test_power(array, val, data_type, val_type): dpnp_a = dpnp.array(array, dtype=data_type) val_ = val_type(val) - # TODO: Remove when #1378 (dpctl) is solved + # TODO: Remove when #1378 (dpctl) is solved and 2024.1 is released (coverage is failing otherwise) if ( is_cpu_device() and ( @@ -2306,12 +2304,6 @@ def test_complex_values(self): dp_arr = dpnp.array(np_arr) func = lambda x: x**2 - # TODO: unmute the test once it's available - if is_win_platform(): - pytest.skip( - "Until the latest dpctl is available on internal channel" - ) - assert_dtype_allclose(func(dp_arr), func(np_arr)) @pytest.mark.parametrize("val", [0, 1], ids=["0", "1"]) @@ -2696,9 +2688,6 @@ def test_matmul_dtype_matrix_inputs(self, dtype1, dtype2, shape_pair): with pytest.raises(TypeError): dpnp.matmul(b1, b2, dtype=dtype2) - # TODO: Temporary skipping the test, until Internal CI is updated with - # recent changed in dpctl regarding dpt.result_type function - @pytest.mark.skip("Temporary skipping the test") @pytest.mark.parametrize("dtype1", get_all_dtypes(no_bool=True)) @pytest.mark.parametrize("dtype2", get_all_dtypes(no_bool=True)) @pytest.mark.parametrize( @@ -2846,9 +2835,6 @@ def test_exe_q(self): with pytest.raises(ValueError): dpnp.matmul(x1, x2) - # TODO: Temporary skipping the test, until Internal CI is updated with - # recent changed in dpctl regarding dpt.result_type function - @pytest.mark.skip("Temporary skipping the test") def test_matmul_casting(self): a1 = dpnp.arange(2 * 4, dtype=dpnp.float32).reshape(2, 4) a2 = dpnp.arange(4 * 3).reshape(4, 3) From c7770fd63036b810849ec7807fa20f63e2821f1e Mon Sep 17 00:00:00 2001 From: vlad-perevezentsev Date: Fri, 2 Feb 2024 14:21:14 +0100 Subject: [PATCH 02/29] Update dpnp.linalg.svd() function (#1604) * Draft commit of dpnp.linalg.svd impl * Pass empty arrays if compute_uv=False * Add logic for the input array n < m * Add a new cupy test_decomposition * Rename gesvd input parameters * Correspondence of passed parameters to gesvd signature * Correct initialization of result variables in dpnp_svd * Update test_decomposition * Add implementation of _dpnp_svd_batch * Add test_decomposition to the scope of public CI * Improve error handling for mkl_lapack::gesvd function * Declate detail variable * Use a_usm_type and a_sycl_queue variables * Add additional checks for gesvd function * Remove old dpnp_svd backend * Refresh test_svd in test_linalg * Add detailed comments for gesvd arguments * gesvd returns pair of events and uses dpctl.utils.keep_args_alive * Keep a lexicographical order * Update docstrings for svd * Add test_svd to test_usm_type * Add a new impl to get s_type * Add a description for _stacked_identity * Simplify dpnp_svd_batch * Update tests for dpnp.linalg.svd * Add hermitian argument support * Add test_svd_hermitian * Update svd docstrings * Tune tolerance * Update test_svd_errors * Update _common_type and _common_inexact_type * Remove passing n and m parameteres to _gesvd * Simplify results return logic for dpnp_svd_batch * Update condition and random files in cupy/testing to use fix_random and repeat decorators * Rename cupy/testing/condition.py to .../_condition.py * Use self._tol in TestSvd * Update gesvd error handler * dpnp_svd works with F contiguous arrays * Add additional checks for output arrays * Impl parallel calculation in dpnp_svd_batch * Skip using @_condition.repeat in cupy tests * Add additional checks for output arrays * Update docstrings for svd * Use dpctl.SyclEvent.wait_for in dpnp_svd_batch * Add TODO : matching the order of returned arrays * Skip cupy tests on windows * Rename condition to _condition * Set setUpClass to skip cupy tests on cpu --- dpnp/backend/extensions/lapack/CMakeLists.txt | 1 + dpnp/backend/extensions/lapack/gesvd.cpp | 359 +++++++++++++ dpnp/backend/extensions/lapack/gesvd.hpp | 55 ++ dpnp/backend/extensions/lapack/lapack_py.cpp | 9 + .../extensions/lapack/types_matrix.hpp | 22 + dpnp/backend/include/dpnp_iface_fptr.hpp | 2 - dpnp/backend/kernels/dpnp_krnl_linalg.cpp | 44 -- dpnp/dpnp_algo/dpnp_algo.pxd | 2 - dpnp/linalg/dpnp_algo_linalg.pyx | 55 -- dpnp/linalg/dpnp_iface_linalg.py | 74 ++- dpnp/linalg/dpnp_utils_linalg.py | 481 +++++++++++++++--- tests/test_linalg.py | 205 +++++--- tests/test_sycl_queue.py | 91 ++-- tests/test_usm_type.py | 50 ++ .../cupy/linalg_tests/test_decomposition.py | 250 +++++++++ .../cupy/linalg_tests/test_solve.py | 4 +- .../cupy/random_tests/test_sample.py | 24 +- tests/third_party/cupy/testing/__init__.py | 4 +- .../testing/{condition.py => _condition.py} | 2 +- tests/third_party/cupy/testing/random.py | 17 +- 20 files changed, 1425 insertions(+), 326 deletions(-) create mode 100644 dpnp/backend/extensions/lapack/gesvd.cpp create mode 100644 dpnp/backend/extensions/lapack/gesvd.hpp rename tests/third_party/cupy/testing/{condition.py => _condition.py} (98%) diff --git a/dpnp/backend/extensions/lapack/CMakeLists.txt b/dpnp/backend/extensions/lapack/CMakeLists.txt index 626615e3e53..28fa2072d7d 100644 --- a/dpnp/backend/extensions/lapack/CMakeLists.txt +++ b/dpnp/backend/extensions/lapack/CMakeLists.txt @@ -28,6 +28,7 @@ set(python_module_name _lapack_impl) set(_module_src ${CMAKE_CURRENT_SOURCE_DIR}/lapack_py.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gesv.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/gesvd.cpp ${CMAKE_CURRENT_SOURCE_DIR}/getrf.cpp ${CMAKE_CURRENT_SOURCE_DIR}/getrf_batch.cpp ${CMAKE_CURRENT_SOURCE_DIR}/getri_batch.cpp diff --git a/dpnp/backend/extensions/lapack/gesvd.cpp b/dpnp/backend/extensions/lapack/gesvd.cpp new file mode 100644 index 00000000000..27734f4492b --- /dev/null +++ b/dpnp/backend/extensions/lapack/gesvd.cpp @@ -0,0 +1,359 @@ +//***************************************************************************** +// Copyright (c) 2023, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include + +// dpctl tensor headers +#include "utils/memory_overlap.hpp" +#include "utils/type_utils.hpp" + +#include "gesvd.hpp" +#include "types_matrix.hpp" + +#include "dpnp_utils.hpp" + +namespace dpnp +{ +namespace backend +{ +namespace ext +{ +namespace lapack +{ +namespace mkl_lapack = oneapi::mkl::lapack; +namespace py = pybind11; +namespace type_utils = dpctl::tensor::type_utils; + +typedef sycl::event (*gesvd_impl_fn_ptr_t)(sycl::queue, + const oneapi::mkl::jobsvd, + const oneapi::mkl::jobsvd, + const std::int64_t, + const std::int64_t, + char *, + const std::int64_t, + char *, + char *, + const std::int64_t, + char *, + const std::int64_t, + std::vector &, + const std::vector &); + +static gesvd_impl_fn_ptr_t gesvd_dispatch_table[dpctl_td_ns::num_types] + [dpctl_td_ns::num_types]; + +// Converts a given character code (ord) to the corresponding +// oneapi::mkl::jobsvd enumeration value +static oneapi::mkl::jobsvd process_job(std::int8_t job_val) +{ + switch (job_val) { + case 'A': + return oneapi::mkl::jobsvd::vectors; + case 'S': + return oneapi::mkl::jobsvd::somevec; + case 'O': + return oneapi::mkl::jobsvd::vectorsina; + case 'N': + return oneapi::mkl::jobsvd::novec; + default: + throw std::invalid_argument("Unknown value for job"); + } +} + +template +static sycl::event gesvd_impl(sycl::queue exec_q, + const oneapi::mkl::jobsvd jobu, + const oneapi::mkl::jobsvd jobvt, + const std::int64_t m, + const std::int64_t n, + char *in_a, + const std::int64_t lda, + char *out_s, + char *out_u, + const std::int64_t ldu, + char *out_vt, + const std::int64_t ldvt, + std::vector &host_task_events, + const std::vector &depends) +{ + type_utils::validate_type_for_device(exec_q); + type_utils::validate_type_for_device(exec_q); + + T *a = reinterpret_cast(in_a); + RealT *s = reinterpret_cast(out_s); + T *u = reinterpret_cast(out_u); + T *vt = reinterpret_cast(out_vt); + + const std::int64_t scratchpad_size = mkl_lapack::gesvd_scratchpad_size( + exec_q, jobu, jobvt, m, n, lda, ldu, ldvt); + T *scratchpad = nullptr; + + std::stringstream error_msg; + std::int64_t info = 0; + bool is_exception_caught = false; + + sycl::event gesvd_event; + try { + scratchpad = sycl::malloc_device(scratchpad_size, exec_q); + + gesvd_event = mkl_lapack::gesvd( + exec_q, + jobu, // Character specifying how to compute the matrix U: + // 'A' computes all columns of U, + // 'S' computes the first min(m,n) columns of U, + // 'O' overwrites A with the columns of U, + // 'N' does not compute U. + jobvt, // Character specifying how to compute the matrix VT: + // 'A' computes all rows of VT, + // 'S' computes the first min(m,n) rows of VT, + // 'O' overwrites A with the rows of VT, + // 'N' does not compute VT. + m, // The number of rows in the input matrix A (0 <= m). + n, // The number of columns in the input matrix A (0 <= n). + a, // Pointer to the input matrix A of size (m x n). + lda, // The leading dimension of A, must be at least max(1, m). + s, // Pointer to the array containing the singular values. + u, // Pointer to the matrix U in the singular value decomposition. + ldu, // The leading dimension of U, must be at least max(1, m). + vt, // Pointer to the matrix VT in the singular value decomposition. + ldvt, // The leading dimension of VT, must be at least max(1, n). + scratchpad, // Pointer to scratchpad memory to be used by MKL + // routine for storing intermediate results. + scratchpad_size, depends); + } catch (mkl_lapack::exception const &e) { + is_exception_caught = true; + info = e.info(); + if (info < 0) { + error_msg << "Parameter number " << -info + << " had an illegal value."; + } + else if (info == scratchpad_size && e.detail() != 0) { + error_msg + << "Insufficient scratchpad size. Required size is at least " + << e.detail(); + } + else if (info > 0) { + error_msg << "The algorithm computing SVD failed to converge; " + << info << " off-diagonal elements of an intermediate " + << "bidiagonal form did not converge to zero.\n"; + } + else { + error_msg << "Unexpected MKL exception caught during gesvd() " + "call:\nreason: " + << e.what() << "\ninfo: " << e.info(); + } + } catch (sycl::exception const &e) { + is_exception_caught = true; + error_msg << "Unexpected SYCL exception caught during gesvd() call:\n" + << e.what(); + } + + if (is_exception_caught) // an unexpected error occurs + { + if (scratchpad != nullptr) { + sycl::free(scratchpad, exec_q); + } + throw std::runtime_error(error_msg.str()); + } + + sycl::event clean_up_event = exec_q.submit([&](sycl::handler &cgh) { + cgh.depends_on(gesvd_event); + auto ctx = exec_q.get_context(); + cgh.host_task([ctx, scratchpad]() { sycl::free(scratchpad, ctx); }); + }); + host_task_events.push_back(clean_up_event); + return gesvd_event; +} + +std::pair + gesvd(sycl::queue exec_q, + const std::int8_t jobu_val, + const std::int8_t jobvt_val, + dpctl::tensor::usm_ndarray a_array, + dpctl::tensor::usm_ndarray out_s, + dpctl::tensor::usm_ndarray out_u, + dpctl::tensor::usm_ndarray out_vt, + const std::vector &depends) +{ + const int a_array_nd = a_array.get_ndim(); + const int out_u_array_nd = out_u.get_ndim(); + const int out_s_array_nd = out_s.get_ndim(); + const int out_vt_array_nd = out_vt.get_ndim(); + + if (a_array_nd != 2) { + throw py::value_error( + "The input array has ndim=" + std::to_string(a_array_nd) + + ", but a 2-dimensional array is expected."); + } + + if (out_s_array_nd != 1) { + throw py::value_error("The output array of singular values has ndim=" + + std::to_string(out_s_array_nd) + + ", but a 1-dimensional array is expected."); + } + + if (jobu_val == 'N' && jobvt_val == 'N') { + if (out_u_array_nd != 0) { + throw py::value_error( + "The output array of the left singular vectors has ndim=" + + std::to_string(out_u_array_nd) + + ", but it is not used and should have ndim=0."); + } + if (out_vt_array_nd != 0) { + throw py::value_error( + "The output array of the right singular vectors has ndim=" + + std::to_string(out_vt_array_nd) + + ", but it is not used and should have ndim=0."); + } + } + else { + if (out_u_array_nd != 2) { + throw py::value_error( + "The output array of the left singular vectors has ndim=" + + std::to_string(out_u_array_nd) + + ", but a 2-dimensional array is expected."); + } + if (out_vt_array_nd != 2) { + throw py::value_error( + "The output array of the right singular vectors has ndim=" + + std::to_string(out_vt_array_nd) + + ", but a 2-dimensional array is expected."); + } + } + + // check compatibility of execution queue and allocation queue + if (!dpctl::utils::queues_are_compatible( + exec_q, {a_array.get_queue(), out_s.get_queue(), out_u.get_queue(), + out_vt.get_queue()})) + { + throw std::runtime_error( + "USM allocations are not compatible with the execution queue."); + } + + auto const &overlap = dpctl::tensor::overlap::MemoryOverlap(); + if (overlap(a_array, out_s) || overlap(a_array, out_u) || + overlap(a_array, out_vt) || overlap(out_s, out_u) || + overlap(out_s, out_vt) || overlap(out_u, out_vt)) + { + throw py::value_error("Arrays have overlapping segments of memory"); + } + + bool is_a_array_f_contig = a_array.is_f_contiguous(); + if (!is_a_array_f_contig) { + throw py::value_error("The input array must be F-contiguous"); + } + + bool is_out_u_array_f_contig = out_u.is_f_contiguous(); + bool is_out_vt_array_f_contig = out_vt.is_f_contiguous(); + + if (!is_out_u_array_f_contig || !is_out_vt_array_f_contig) { + throw py::value_error("The output arrays of the left and right " + "singular vectors must be F-contiguous"); + } + + bool is_out_s_array_c_contig = out_s.is_c_contiguous(); + bool is_out_s_array_f_contig = out_s.is_f_contiguous(); + + if (!is_out_s_array_c_contig || !is_out_s_array_f_contig) { + throw py::value_error("The output array of singular values " + "must be contiguous"); + } + + auto array_types = dpctl_td_ns::usm_ndarray_types(); + int a_array_type_id = + array_types.typenum_to_lookup_id(a_array.get_typenum()); + int out_u_type_id = array_types.typenum_to_lookup_id(out_u.get_typenum()); + int out_s_type_id = array_types.typenum_to_lookup_id(out_s.get_typenum()); + int out_vt_type_id = array_types.typenum_to_lookup_id(out_vt.get_typenum()); + + if (a_array_type_id != out_u_type_id || a_array_type_id != out_vt_type_id) { + throw py::type_error( + "Input array, output left singular vectors array, " + "and outpuy right singular vectors array must have " + "the same data type"); + } + + gesvd_impl_fn_ptr_t gesvd_fn = + gesvd_dispatch_table[a_array_type_id][out_s_type_id]; + if (gesvd_fn == nullptr) { + throw py::value_error( + "No gesvd implementation is defined for the given pair " + "of array type and output singular values type."); + } + + char *a_array_data = a_array.get_data(); + char *out_s_data = out_s.get_data(); + char *out_u_data = out_u.get_data(); + char *out_vt_data = out_vt.get_data(); + + const py::ssize_t *a_array_shape = a_array.get_shape_raw(); + const std::int64_t m = a_array_shape[0]; + const std::int64_t n = a_array_shape[1]; + + const std::int64_t lda = std::max(1UL, m); + const std::int64_t ldu = std::max(1UL, m); + const std::int64_t ldvt = + std::max(1UL, jobvt_val == 'S' ? (m > n ? n : m) : n); + + const oneapi::mkl::jobsvd jobu = process_job(jobu_val); + const oneapi::mkl::jobsvd jobvt = process_job(jobvt_val); + + std::vector host_task_events; + sycl::event gesvd_ev = + gesvd_fn(exec_q, jobu, jobvt, m, n, a_array_data, lda, out_s_data, + out_u_data, ldu, out_vt_data, ldvt, host_task_events, depends); + + sycl::event args_ev = dpctl::utils::keep_args_alive( + exec_q, {a_array, out_s, out_u, out_vt}, host_task_events); + + return std::make_pair(args_ev, gesvd_ev); +} + +template +struct GesvdContigFactory +{ + fnT get() + { + if constexpr (types::GesvdTypePairSupportFactory::is_defined) + { + return gesvd_impl; + } + else { + return nullptr; + } + } +}; + +void init_gesvd_dispatch_table(void) +{ + dpctl_td_ns::DispatchTableBuilder + contig; + contig.populate_dispatch_table(gesvd_dispatch_table); +} +} // namespace lapack +} // namespace ext +} // namespace backend +} // namespace dpnp diff --git a/dpnp/backend/extensions/lapack/gesvd.hpp b/dpnp/backend/extensions/lapack/gesvd.hpp new file mode 100644 index 00000000000..17ebd0edbe7 --- /dev/null +++ b/dpnp/backend/extensions/lapack/gesvd.hpp @@ -0,0 +1,55 @@ +//***************************************************************************** +// Copyright (c) 2023, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include +#include + +#include + +namespace dpnp +{ +namespace backend +{ +namespace ext +{ +namespace lapack +{ +extern std::pair + gesvd(sycl::queue exec_q, + const std::int8_t jobu_val, + const std::int8_t jobvt_val, + dpctl::tensor::usm_ndarray a_array, + dpctl::tensor::usm_ndarray out_s, + dpctl::tensor::usm_ndarray out_u, + dpctl::tensor::usm_ndarray out_vt, + const std::vector &depends); + +extern void init_gesvd_dispatch_table(void); +} // namespace lapack +} // namespace ext +} // namespace backend +} // namespace dpnp diff --git a/dpnp/backend/extensions/lapack/lapack_py.cpp b/dpnp/backend/extensions/lapack/lapack_py.cpp index 71991be3652..0c76d0fc096 100644 --- a/dpnp/backend/extensions/lapack/lapack_py.cpp +++ b/dpnp/backend/extensions/lapack/lapack_py.cpp @@ -31,6 +31,7 @@ #include #include "gesv.hpp" +#include "gesvd.hpp" #include "getrf.hpp" #include "getri.hpp" #include "heevd.hpp" @@ -56,6 +57,7 @@ void init_dispatch_vectors(void) // populate dispatch tables void init_dispatch_tables(void) { + lapack_ext::init_gesvd_dispatch_table(); lapack_ext::init_heevd_dispatch_table(); } @@ -76,6 +78,13 @@ PYBIND11_MODULE(_lapack_impl, m) py::arg("sycl_queue"), py::arg("coeff_matrix"), py::arg("dependent_vals"), py::arg("depends") = py::list()); + m.def("_gesvd", &lapack_ext::gesvd, + "Call `gesvd` from OneMKL LAPACK library to return " + "the singular value decomposition of a general rectangular matrix", + py::arg("sycl_queue"), py::arg("jobu_val"), py::arg("jobvt_val"), + py::arg("a_array"), py::arg("res_s"), py::arg("res_u"), + py::arg("res_vt"), py::arg("depends") = py::list()); + m.def("_getrf", &lapack_ext::getrf, "Call `getrf` from OneMKL LAPACK library to return " "the LU factorization of a general n x n matrix", diff --git a/dpnp/backend/extensions/lapack/types_matrix.hpp b/dpnp/backend/extensions/lapack/types_matrix.hpp index 7e5413b84c8..893619e6afb 100644 --- a/dpnp/backend/extensions/lapack/types_matrix.hpp +++ b/dpnp/backend/extensions/lapack/types_matrix.hpp @@ -70,6 +70,28 @@ struct GesvTypePairSupportFactory dpctl_td_ns::NotDefinedEntry>::is_defined; }; +/** + * @brief A factory to define pairs of supported types for which + * MKL LAPACK library provides support in oneapi::mkl::lapack::gesvd + * function. + * + * @tparam T Type of array containing input matrix A and output matrices U and + * VT of singular vectors. + * @tparam RealT Type of output array containing singular values of A. + */ +template +struct GesvdTypePairSupportFactory +{ + static constexpr bool is_defined = std::disjunction< + dpctl_td_ns::TypePairDefinedEntry, + dpctl_td_ns::TypePairDefinedEntry, + dpctl_td_ns::TypePairDefinedEntry, RealT, float>, + dpctl_td_ns:: + TypePairDefinedEntry, RealT, double>, + // fall-through + dpctl_td_ns::NotDefinedEntry>::is_defined; +}; + /** * @brief A factory to define pairs of supported types for which * MKL LAPACK library provides support in oneapi::mkl::lapack::getrf diff --git a/dpnp/backend/include/dpnp_iface_fptr.hpp b/dpnp/backend/include/dpnp_iface_fptr.hpp index 2e2ce5ab144..3061bb01f29 100644 --- a/dpnp/backend/include/dpnp_iface_fptr.hpp +++ b/dpnp/backend/include/dpnp_iface_fptr.hpp @@ -363,8 +363,6 @@ enum class DPNPFuncName : size_t parameters */ DPNP_FN_SUM, /**< Used in numpy.sum() impl */ DPNP_FN_SVD, /**< Used in numpy.linalg.svd() impl */ - DPNP_FN_SVD_EXT, /**< Used in numpy.linalg.svd() impl, requires extra - parameters */ DPNP_FN_TAKE, /**< Used in numpy.take() impl */ DPNP_FN_TAN, /**< Used in numpy.tan() impl */ DPNP_FN_TANH, /**< Used in numpy.tanh() impl */ diff --git a/dpnp/backend/kernels/dpnp_krnl_linalg.cpp b/dpnp/backend/kernels/dpnp_krnl_linalg.cpp index e0b6de5b1b6..610da8fda3c 100644 --- a/dpnp/backend/kernels/dpnp_krnl_linalg.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_linalg.cpp @@ -824,17 +824,6 @@ template void (*dpnp_svd_default_c)(void *, void *, void *, void *, size_t, size_t) = dpnp_svd_c<_InputDT, _ComputeDT, _SVDT>; -template -DPCTLSyclEventRef (*dpnp_svd_ext_c)(DPCTLSyclQueueRef, - void *, - void *, - void *, - void *, - size_t, - size_t, - const DPCTLEventVectorRef) = - dpnp_svd_c<_InputDT, _ComputeDT, _SVDT>; - void func_map_init_linalg_func(func_map_t &fmap) { fmap[DPNPFuncName::DPNP_FN_CHOLESKY][eft_FLT][eft_FLT] = { @@ -1046,38 +1035,5 @@ void func_map_init_linalg_func(func_map_t &fmap) eft_C128, (void *)dpnp_svd_default_c, std::complex, double>}; - fmap[DPNPFuncName::DPNP_FN_SVD_EXT][eft_INT][eft_INT] = { - get_default_floating_type(), - (void *)dpnp_svd_ext_c< - int32_t, func_type_map_t::find_type, - func_type_map_t::find_type>, - get_default_floating_type(), - (void *) - dpnp_svd_ext_c()>, - func_type_map_t::find_type< - get_default_floating_type()>>}; - fmap[DPNPFuncName::DPNP_FN_SVD_EXT][eft_LNG][eft_LNG] = { - get_default_floating_type(), - (void *)dpnp_svd_ext_c< - int64_t, func_type_map_t::find_type, - func_type_map_t::find_type>, - get_default_floating_type(), - (void *) - dpnp_svd_ext_c()>, - func_type_map_t::find_type< - get_default_floating_type()>>}; - fmap[DPNPFuncName::DPNP_FN_SVD_EXT][eft_FLT][eft_FLT] = { - eft_FLT, (void *)dpnp_svd_ext_c}; - fmap[DPNPFuncName::DPNP_FN_SVD_EXT][eft_DBL][eft_DBL] = { - eft_DBL, (void *)dpnp_svd_ext_c}; - fmap[DPNPFuncName::DPNP_FN_SVD_EXT][eft_C128][eft_C128] = { - eft_C128, - (void *) - dpnp_svd_ext_c, std::complex, double>}; - return; } diff --git a/dpnp/dpnp_algo/dpnp_algo.pxd b/dpnp/dpnp_algo/dpnp_algo.pxd index 895b393aeff..28e21340647 100644 --- a/dpnp/dpnp_algo/dpnp_algo.pxd +++ b/dpnp/dpnp_algo/dpnp_algo.pxd @@ -171,8 +171,6 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName": # need this na DPNP_FN_RNG_ZIPF_EXT DPNP_FN_SEARCHSORTED DPNP_FN_SEARCHSORTED_EXT - DPNP_FN_SVD - DPNP_FN_SVD_EXT DPNP_FN_TRACE DPNP_FN_TRACE_EXT DPNP_FN_TRANSPOSE diff --git a/dpnp/linalg/dpnp_algo_linalg.pyx b/dpnp/linalg/dpnp_algo_linalg.pyx index 1d94a893fff..3bf6dad3ee8 100644 --- a/dpnp/linalg/dpnp_algo_linalg.pyx +++ b/dpnp/linalg/dpnp_algo_linalg.pyx @@ -51,7 +51,6 @@ __all__ = [ "dpnp_matrix_rank", "dpnp_norm", "dpnp_qr", - "dpnp_svd", ] @@ -379,57 +378,3 @@ cpdef tuple dpnp_qr(utils.dpnp_descriptor x1, str mode): c_dpctl.DPCTLEvent_Delete(event_ref) return (res_q.get_pyobj(), res_r.get_pyobj()) - - -cpdef tuple dpnp_svd(utils.dpnp_descriptor x1, cpp_bool full_matrices, cpp_bool compute_uv, cpp_bool hermitian): - cdef size_t size_m = x1.shape[0] - cdef size_t size_n = x1.shape[1] - cdef size_t size_s = min(size_m, size_n) - - cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(x1.dtype) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_SVD_EXT, param1_type, param1_type) - - x1_obj = x1.get_array() - - cdef (DPNPFuncType, void *) ret_type_and_func = utils.get_ret_type_and_func(kernel_data, - x1_obj.sycl_device.has_aspect_fp64) - cdef DPNPFuncType return_type = ret_type_and_func[0] - cdef custom_linalg_1in_3out_shape_t func = < custom_linalg_1in_3out_shape_t > ret_type_and_func[1] - - cdef utils.dpnp_descriptor res_u = utils.create_output_descriptor((size_m, size_m), - return_type, - None, - device=x1_obj.sycl_device, - usm_type=x1_obj.usm_type, - sycl_queue=x1_obj.sycl_queue) - cdef utils.dpnp_descriptor res_s = utils.create_output_descriptor((size_s, ), - return_type, - None, - device=x1_obj.sycl_device, - usm_type=x1_obj.usm_type, - sycl_queue=x1_obj.sycl_queue) - cdef utils.dpnp_descriptor res_vt = utils.create_output_descriptor((size_n, size_n), - return_type, - None, - device=x1_obj.sycl_device, - usm_type=x1_obj.usm_type, - sycl_queue=x1_obj.sycl_queue) - - result_sycl_queue = res_u.get_array().sycl_queue - - cdef c_dpctl.SyclQueue q = result_sycl_queue - cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() - - cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, - x1.get_data(), - res_u.get_data(), - res_s.get_data(), - res_vt.get_data(), - size_m, - size_n, - NULL) # dep_events_ref - - with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) - c_dpctl.DPCTLEvent_Delete(event_ref) - - return (res_u.get_pyobj(), res_s.get_pyobj(), res_vt.get_pyobj()) diff --git a/dpnp/linalg/dpnp_iface_linalg.py b/dpnp/linalg/dpnp_iface_linalg.py index 800aa8de1bb..2b8506130ad 100644 --- a/dpnp/linalg/dpnp_iface_linalg.py +++ b/dpnp/linalg/dpnp_iface_linalg.py @@ -53,6 +53,7 @@ dpnp_inv, dpnp_slogdet, dpnp_solve, + dpnp_svd, ) __all__ = [ @@ -611,12 +612,47 @@ def solve(a, b): return dpnp_solve(a, b) -def svd(x1, full_matrices=True, compute_uv=True, hermitian=False): +def svd(a, full_matrices=True, compute_uv=True, hermitian=False): """ Singular Value Decomposition. For full documentation refer to :obj:`numpy.linalg.svd`. + Parameters + ---------- + a : (..., M, N) {dpnp.ndarray, usm_ndarray} + Input array with ``a.ndim >= 2``. + full_matrices : bool, optional + If ``True``, it returns `u` and `Vh` with full-sized matrices. + If ``False``, the matrices are reduced in size. + Default: ``True``. + compute_uv : bool, optional + If ``False``, it only returns singular values. + Default: ``True``. + hermitian : bool, optional + If True, a is assumed to be Hermitian (symmetric if real-valued), + enabling a more efficient method for finding singular values. + Default: ``False``. + + Returns + ------- + u : { (…, M, M), (…, M, K) } dpnp.ndarray + Unitary matrix, where M is the number of rows of the input array `a`. + The shape of the matrix `u` depends on the value of `full_matrices`. + If `full_matrices` is ``True``, `u` has the shape (…, M, M). + If `full_matrices` is ``False``, `u` has the shape (…, M, K), + where K = min(M, N), and N is the number of columns of the input array `a`. + If `compute_uv` is ``False``, neither `u` or `Vh` are computed. + s : (…, K) dpnp.ndarray + Vector containing the singular values of `a`, sorted in descending order. + The length of `s` is min(M, N). + Vh : { (…, N, N), (…, K, N) } dpnp.ndarray + Unitary matrix, where N is the number of columns of the input array `a`. + The shape of the matrix `Vh` depends on the value of `full_matrices`. + If `full_matrices` is ``True``, `Vh` has the shape (…, N, N). + If `full_matrices` is ``False``, `Vh` has the shape (…, K, N). + If `compute_uv` is ``False``, neither `u` or `Vh` are computed. + Examples -------- >>> import dpnp as np @@ -629,11 +665,11 @@ def svd(x1, full_matrices=True, compute_uv=True, hermitian=False): >>> u.shape, s.shape, vh.shape ((9, 9), (6,), (6, 6)) >>> np.allclose(a, np.dot(u[:, :6] * s, vh)) - True + array([ True]) >>> smat = np.zeros((9, 6), dtype=complex) >>> smat[:6, :6] = np.diag(s) >>> np.allclose(a, np.dot(u, np.dot(smat, vh))) - True + array([ True]) Reconstruction based on reduced SVD, 2D case: @@ -641,10 +677,10 @@ def svd(x1, full_matrices=True, compute_uv=True, hermitian=False): >>> u.shape, s.shape, vh.shape ((9, 6), (6,), (6, 6)) >>> np.allclose(a, np.dot(u * s, vh)) - True + array([ True]) >>> smat = np.diag(s) >>> np.allclose(a, np.dot(u, np.dot(smat, vh))) - True + array([ True]) Reconstruction based on full SVD, 4D case: @@ -652,9 +688,9 @@ def svd(x1, full_matrices=True, compute_uv=True, hermitian=False): >>> u.shape, s.shape, vh.shape ((2, 7, 8, 8), (2, 7, 3), (2, 7, 3, 3)) >>> np.allclose(b, np.matmul(u[..., :3] * s[..., None, :], vh)) - True + array([ True]) >>> np.allclose(b, np.matmul(u[..., :3], s[..., None] * vh)) - True + array([ True]) Reconstruction based on reduced SVD, 4D case: @@ -662,30 +698,16 @@ def svd(x1, full_matrices=True, compute_uv=True, hermitian=False): >>> u.shape, s.shape, vh.shape ((2, 7, 8, 3), (2, 7, 3), (2, 7, 3, 3)) >>> np.allclose(b, np.matmul(u * s[..., None, :], vh)) - True + array([ True]) >>> np.allclose(b, np.matmul(u, s[..., None] * vh)) - True + array([ True]) """ - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) - if x1_desc: - if not x1_desc.ndim == 2: - pass - elif full_matrices is not True: - pass - elif compute_uv is not True: - pass - elif hermitian is not False: - pass - else: - result_tup = dpnp_svd(x1_desc, full_matrices, compute_uv, hermitian) - - return result_tup + dpnp.check_supported_arrays_type(a) + check_stacked_2d(a) - return call_origin( - numpy.linalg.svd, x1, full_matrices, compute_uv, hermitian - ) + return dpnp_svd(a, full_matrices, compute_uv, hermitian) def slogdet(a): diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py index f2632b5b6a4..93f41883133 100644 --- a/dpnp/linalg/dpnp_utils_linalg.py +++ b/dpnp/linalg/dpnp_utils_linalg.py @@ -24,8 +24,9 @@ # ***************************************************************************** +import dpctl import dpctl.tensor._tensor_impl as ti -from numpy import issubdtype +from numpy import prod import dpnp import dpnp.backend.extensions.lapack._lapack_impl as li @@ -40,6 +41,7 @@ "dpnp_inv", "dpnp_slogdet", "dpnp_solve", + "dpnp_svd", ] _jobz = {"N": 0, "V": 1} @@ -147,76 +149,6 @@ def _real_type(dtype, device=None): return dpnp.dtype(real_type) -def check_stacked_2d(*arrays): - """ - Return ``True`` if each array in `arrays` has at least two dimensions. - - If any array is less than two-dimensional, `dpnp.linalg.LinAlgError` will be raised. - - Parameters - ---------- - arrays : {dpnp.ndarray, usm_ndarray} - A sequence of input arrays to check for dimensionality. - - Returns - ------- - out : bool - ``True`` if each array in `arrays` is at least two-dimensional. - - Raises - ------ - dpnp.linalg.LinAlgError - If any array in `arrays` is less than two-dimensional. - - """ - - for a in arrays: - if a.ndim < 2: - raise dpnp.linalg.LinAlgError( - f"{a.ndim}-dimensional array given. The input " - "array must be at least two-dimensional" - ) - - -def check_stacked_square(*arrays): - """ - Return ``True`` if each array in `arrays` is a square matrix. - - If any array does not form a square matrix, `dpnp.linalg.LinAlgError` will be raised. - - Precondition: `arrays` are at least 2d. The caller should assert it - beforehand. For example, - - >>> def solve(a): - ... check_stacked_2d(a) - ... check_stacked_square(a) - ... ... - - Parameters - ---------- - arrays : {dpnp.ndarray, usm_ndarray} - A sequence of input arrays to check for square matrix shape. - - Returns - ------- - out : bool - ``True`` if each array in `arrays` forms a square matrix. - - Raises - ------ - dpnp.linalg.LinAlgError - If any array in `arrays` does not form a square matrix. - - """ - - for a in arrays: - m, n = a.shape[-2:] - if m != n: - raise dpnp.linalg.LinAlgError( - "Last 2 dimensions of the input array must be square" - ) - - def _common_type(*arrays): """ Common type for linear algebra operations. @@ -245,7 +177,8 @@ def _common_type(*arrays): dtypes = [arr.dtype for arr in arrays] - default = dpnp.default_float_type(device=arrays[0].device) + _, sycl_queue = get_usm_allocations(arrays) + default = dpnp.default_float_type(sycl_queue=sycl_queue) dtype_common = _common_inexact_type(default, *dtypes) return dtype_common @@ -275,7 +208,8 @@ def _common_inexact_type(default_dtype, *dtypes): """ inexact_dtypes = [ - dt if issubdtype(dt, dpnp.inexact) else default_dtype for dt in dtypes + dt if dpnp.issubdtype(dt, dpnp.inexact) else default_dtype + for dt in dtypes ] return dpnp.result_type(*inexact_dtypes) @@ -469,6 +403,120 @@ def _lu_factor(a, res_type): return (a_h, ipiv_h, dev_info_array) +def _stacked_identity( + batch_shape, n, dtype, usm_type="device", sycl_queue=None +): + """ + Create stacked identity matrices of size `n x n`. + + Forms multiple identity matrices based on `batch_shape`. + + Parameters + ---------- + batch_shape : tuple + Shape of the batch determining the stacking of identity matrices. + n : int + Dimension of each identity matrix. + dtype : dtype + Data type of the matrix element. + usm_type : {"device", "shared", "host"}, optional + The type of SYCL USM allocation for the output array. + sycl_queue : {None, SyclQueue}, optional + A SYCL queue to use for output array allocation and copying. + + Returns + ------- + out : dpnp.ndarray + Array of stacked `n x n` identity matrices as per `batch_shape`. + + Example + ------- + >>> _stacked_identity((2,), 2, dtype=dpnp.int64) + array([[[1, 0], + [0, 1]], + + [[1, 0], + [0, 1]]]) + + """ + + shape = batch_shape + (n, n) + idx = dpnp.arange(n, usm_type=usm_type, sycl_queue=sycl_queue) + x = dpnp.zeros(shape, dtype=dtype, usm_type=usm_type, sycl_queue=sycl_queue) + x[..., idx, idx] = 1 + return x + + +def check_stacked_2d(*arrays): + """ + Return ``True`` if each array in `arrays` has at least two dimensions. + + If any array is less than two-dimensional, `dpnp.linalg.LinAlgError` will be raised. + + Parameters + ---------- + arrays : {dpnp.ndarray, usm_ndarray} + A sequence of input arrays to check for dimensionality. + + Returns + ------- + out : bool + ``True`` if each array in `arrays` is at least two-dimensional. + + Raises + ------ + dpnp.linalg.LinAlgError + If any array in `arrays` is less than two-dimensional. + + """ + + for a in arrays: + if a.ndim < 2: + raise dpnp.linalg.LinAlgError( + f"{a.ndim}-dimensional array given. The input " + "array must be at least two-dimensional" + ) + + +def check_stacked_square(*arrays): + """ + Return ``True`` if each array in `arrays` is a square matrix. + + If any array does not form a square matrix, `dpnp.linalg.LinAlgError` will be raised. + + Precondition: `arrays` are at least 2d. The caller should assert it + beforehand. For example, + + >>> def solve(a): + ... check_stacked_2d(a) + ... check_stacked_square(a) + ... ... + + Parameters + ---------- + arrays : {dpnp.ndarray, usm_ndarray} + A sequence of input arrays to check for square matrix shape. + + Returns + ------- + out : bool + ``True`` if each array in `arrays` forms a square matrix. + + Raises + ------ + dpnp.linalg.LinAlgError + If any array in `arrays` does not form a square matrix. + + """ + + for a in arrays: + m, n = a.shape[-2:] + if m != n: + raise dpnp.linalg.LinAlgError( + "Last 2 dimensions of the input array must be square" + ) + + def dpnp_cholesky_batch(a, upper_lower, res_type): """ dpnp_cholesky_batch(a, upper_lower, res_type) @@ -1088,3 +1136,290 @@ def dpnp_slogdet(a): dpnp.where(singular, res_type.type(0), sign).reshape(shape), dpnp.where(singular, logdet_dtype.type("-inf"), logdet).reshape(shape), ) + + +def dpnp_svd_batch(a, uv_type, s_type, full_matrices=True, compute_uv=True): + """ + dpnp_svd_batch(a, uv_type, s_type, full_matrices=True, compute_uv=True) + + Return the batched singular value decomposition (SVD) of a stack of matrices. + + """ + + a_usm_type = a.usm_type + a_sycl_queue = a.sycl_queue + reshape = False + batch_shape_orig = a.shape[:-2] + + if a.ndim > 3: + # get 3d input arrays by reshape + a = a.reshape(prod(a.shape[:-2]), a.shape[-2], a.shape[-1]) + reshape = True + + batch_size = a.shape[0] + m, n = a.shape[-2:] + + if batch_size == 0: + k = min(m, n) + s = dpnp.empty( + batch_shape_orig + (k,), + dtype=s_type, + usm_type=a_usm_type, + sycl_queue=a_sycl_queue, + ) + if compute_uv: + if full_matrices: + u_shape = batch_shape_orig + (m, m) + vt_shape = batch_shape_orig + (n, n) + else: + u_shape = batch_shape_orig + (m, k) + vt_shape = batch_shape_orig + (k, n) + + u = dpnp.empty( + u_shape, + dtype=uv_type, + usm_type=a_usm_type, + sycl_queue=a_sycl_queue, + ) + vt = dpnp.empty( + vt_shape, + dtype=uv_type, + usm_type=a_usm_type, + sycl_queue=a_sycl_queue, + ) + return u, s, vt + else: + return s + elif m == 0 or n == 0: + s = dpnp.empty( + batch_shape_orig + (0,), + dtype=s_type, + usm_type=a_usm_type, + sycl_queue=a_sycl_queue, + ) + if compute_uv: + if full_matrices: + u = _stacked_identity( + batch_shape_orig, + m, + dtype=uv_type, + usm_type=a_usm_type, + sycl_queue=a_sycl_queue, + ) + vt = _stacked_identity( + batch_shape_orig, + n, + dtype=uv_type, + usm_type=a_usm_type, + sycl_queue=a_sycl_queue, + ) + else: + u = dpnp.empty( + batch_shape_orig + (m, 0), + dtype=uv_type, + usm_type=a_usm_type, + sycl_queue=a_sycl_queue, + ) + vt = dpnp.empty( + batch_shape_orig + (0, n), + dtype=uv_type, + usm_type=a_usm_type, + sycl_queue=a_sycl_queue, + ) + return u, s, vt + else: + return s + + u_matrices = [None] * batch_size + s_matrices = [None] * batch_size + vt_matrices = [None] * batch_size + ht_list_ev = [None] * batch_size * 2 + for i in range(batch_size): + if compute_uv: + ( + u_matrices[i], + s_matrices[i], + vt_matrices[i], + ht_list_ev[2 * i], + ht_list_ev[2 * i + 1], + ) = dpnp_svd(a[i], full_matrices, compute_uv=True, batch_call=True) + else: + s_matrices[i], ht_list_ev[2 * i], ht_list_ev[2 * i + 1] = dpnp_svd( + a[i], full_matrices, compute_uv=False, batch_call=True + ) + + dpctl.SyclEvent.wait_for(ht_list_ev) + + # TODO: Need to return C-contiguous array to match the output of numpy.linalg.svd + # Allocate 'F' order memory for dpnp output arrays to be aligned with dpnp_svd + out_s = dpnp.array(s_matrices, order="F") + if reshape: + out_s = out_s.reshape(batch_shape_orig + out_s.shape[-1:]) + + if compute_uv: + out_u = dpnp.array(u_matrices, order="F") + out_vt = dpnp.array(vt_matrices, order="F") + if reshape: + return ( + out_u.reshape(batch_shape_orig + out_u.shape[-2:]), + out_s, + out_vt.reshape(batch_shape_orig + out_vt.shape[-2:]), + ) + else: + return out_u, out_s, out_vt + else: + return out_s + + +def dpnp_svd( + a, full_matrices=True, compute_uv=True, hermitian=False, batch_call=False +): + """ + dpnp_svd(a, full_matrices=True, compute_uv=True, hermitian=False, batch_call=False) + + Return the singular value decomposition (SVD). + + """ + + if hermitian: + check_stacked_square(a) + + # _gesvd returns eigenvalues with s ** 2 sorted descending, + # but dpnp.linalg.eigh returns s sorted ascending so we re-order the eigenvalues + # and related arrays to have the correct order + if compute_uv: + s, u = dpnp.linalg.eigh(a) + sgn = dpnp.sign(s) + s = dpnp.absolute(s) + sidx = dpnp.argsort(s)[..., ::-1] + # Rearrange the signs according to sorted indices + sgn = dpnp.take_along_axis(sgn, sidx, axis=-1) + # Sort the singular values in descending order + s = dpnp.take_along_axis(s, sidx, axis=-1) + # Rearrange the eigenvectors according to sorted indices + u = dpnp.take_along_axis(u, sidx[..., None, :], axis=-1) + # Singular values are unsigned, move the sign into v + # Compute V^T adjusting for the sign and conjugating + vt = dpnp.transpose(u * sgn[..., None, :]).conjugate() + return u, s, vt + else: + # TODO: use dpnp.linalg.eighvals when it is updated + s, _ = dpnp.linalg.eigh(a) + s = dpnp.abs(s) + return dpnp.sort(s)[..., ::-1] + + uv_type = _common_type(a) + s_type = _real_type(uv_type) + + if a.ndim > 2: + return dpnp_svd_batch(a, uv_type, s_type, full_matrices, compute_uv) + + a_usm_type = a.usm_type + a_sycl_queue = a.sycl_queue + m, n = a.shape + + if m == 0 or n == 0: + s = dpnp.empty( + (0,), + dtype=s_type, + usm_type=a_usm_type, + sycl_queue=a_sycl_queue, + ) + if compute_uv: + if full_matrices: + u_shape = (m,) + vt_shape = (n,) + else: + u_shape = (m, 0) + vt_shape = (0, n) + + u = dpnp.eye( + *u_shape, + dtype=uv_type, + usm_type=a_usm_type, + sycl_queue=a_sycl_queue, + ) + vt = dpnp.eye( + *vt_shape, + dtype=uv_type, + usm_type=a_usm_type, + sycl_queue=a_sycl_queue, + ) + return u, s, vt + else: + return s + + # oneMKL LAPACK gesvd destroys `a` and assumes fortran-like array as input. + # Allocate 'F' order memory for dpnp arrays to comply with these requirements. + a_h = dpnp.empty_like(a, order="F", dtype=uv_type) + + a_usm_arr = dpnp.get_usm_ndarray(a) + + # use DPCTL tensor function to fill the сopy of the input array + # from the input array + a_ht_copy_ev, a_copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( + src=a_usm_arr, dst=a_h.get_array(), sycl_queue=a_sycl_queue + ) + + k = min(m, n) + if compute_uv: + if full_matrices: + u_shape = (m, m) + vt_shape = (n, n) + jobu = ord("A") + jobvt = ord("A") + else: + u_shape = (m, k) + vt_shape = (k, n) + jobu = ord("S") + jobvt = ord("S") + else: + u_shape = vt_shape = () + jobu = ord("N") + jobvt = ord("N") + + # oneMKL LAPACK assumes fortran-like array as input. + # Allocate 'F' order memory for dpnp output arrays to comply with these requirements. + u_h = dpnp.empty( + u_shape, + dtype=uv_type, + order="F", + usm_type=a_usm_type, + sycl_queue=a_sycl_queue, + ) + vt_h = dpnp.empty( + vt_shape, + dtype=uv_type, + order="F", + usm_type=a_usm_type, + sycl_queue=a_sycl_queue, + ) + s_h = dpnp.empty( + k, dtype=s_type, usm_type=a_usm_type, sycl_queue=a_sycl_queue + ) + + ht_lapack_ev, _ = li._gesvd( + a_sycl_queue, + jobu, + jobvt, + a_h.get_array(), + s_h.get_array(), + u_h.get_array(), + vt_h.get_array(), + [a_copy_ev], + ) + + if batch_call: + if compute_uv: + return u_h, s_h, vt_h, ht_lapack_ev, a_ht_copy_ev + else: + return s_h, ht_lapack_ev, a_ht_copy_ev + + ht_lapack_ev.wait() + a_ht_copy_ev.wait() + + # TODO: Need to return C-contiguous array to match the output of numpy.linalg.svd + if compute_uv: + return u_h, s_h, vt_h + else: + return s_h diff --git a/tests/test_linalg.py b/tests/test_linalg.py index 5ea536c2887..85206bad5ba 100644 --- a/tests/test_linalg.py +++ b/tests/test_linalg.py @@ -9,6 +9,7 @@ from .helper import ( assert_dtype_allclose, get_all_dtypes, + get_complex_dtypes, has_support_aspect64, is_cpu_device, ) @@ -755,64 +756,6 @@ def test_qr_not_2D(): assert_allclose(ia, inp.matmul(dpnp_q, dpnp_r)) -@pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True)) -@pytest.mark.parametrize( - "shape", - [(2, 2), (3, 4), (5, 3), (16, 16)], - ids=["(2,2)", "(3,4)", "(5,3)", "(16,16)"], -) -def test_svd(type, shape): - a = numpy.arange(shape[0] * shape[1], dtype=type).reshape(shape) - ia = inp.array(a) - - np_u, np_s, np_vt = numpy.linalg.svd(a) - dpnp_u, dpnp_s, dpnp_vt = inp.linalg.svd(ia) - - support_aspect64 = has_support_aspect64() - - if support_aspect64: - assert dpnp_u.dtype == np_u.dtype - assert dpnp_s.dtype == np_s.dtype - assert dpnp_vt.dtype == np_vt.dtype - assert dpnp_u.shape == np_u.shape - assert dpnp_s.shape == np_s.shape - assert dpnp_vt.shape == np_vt.shape - - tol = 1e-12 - if type == inp.float32: - tol = 1e-03 - elif not support_aspect64 and type in (inp.int32, inp.int64, None): - tol = 1e-03 - - # check decomposition - dpnp_diag_s = inp.zeros(shape, dtype=dpnp_s.dtype) - for i in range(dpnp_s.size): - dpnp_diag_s[i, i] = dpnp_s[i] - - # check decomposition - assert_allclose( - ia, inp.dot(dpnp_u, inp.dot(dpnp_diag_s, dpnp_vt)), rtol=tol, atol=tol - ) - - # compare singular values - # assert_allclose(dpnp_s, np_s, rtol=tol, atol=tol) - - # change sign of vectors - for i in range(min(shape[0], shape[1])): - if np_u[0, i] * dpnp_u[0, i] < 0: - np_u[:, i] = -np_u[:, i] - np_vt[i, :] = -np_vt[i, :] - - # compare vectors for non-zero values - for i in range(numpy.count_nonzero(np_s > tol)): - assert_allclose( - inp.asnumpy(dpnp_u)[:, i], np_u[:, i], rtol=tol, atol=tol - ) - assert_allclose( - inp.asnumpy(dpnp_vt)[i, :], np_vt[i, :], rtol=tol, atol=tol - ) - - class TestSolve: @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True)) def test_solve(self, dtype): @@ -1028,3 +971,149 @@ def test_slogdet_errors(self): # unsupported type a_np = inp.asnumpy(a_dp) assert_raises(TypeError, inp.linalg.slogdet, a_np) + + +class TestSvd: + def get_tol(self, dtype): + tol = 1e-06 + if dtype in (inp.float32, inp.complex64): + tol = 1e-04 + elif not has_support_aspect64() and dtype in ( + inp.int32, + inp.int64, + None, + ): + tol = 1e-04 + self._tol = tol + + def check_types_shapes( + self, dp_u, dp_s, dp_vt, np_u, np_s, np_vt, compute_vt=True + ): + if has_support_aspect64(): + if compute_vt: + assert dp_u.dtype == np_u.dtype + assert dp_vt.dtype == np_vt.dtype + assert dp_s.dtype == np_s.dtype + else: + if compute_vt: + assert dp_u.dtype.kind == np_u.dtype.kind + assert dp_vt.dtype.kind == np_vt.dtype.kind + assert dp_s.dtype.kind == np_s.dtype.kind + + if compute_vt: + assert dp_u.shape == np_u.shape + assert dp_vt.shape == np_vt.shape + assert dp_s.shape == np_s.shape + + # Checks the accuracy of singular value decomposition (SVD). + # Compares the reconstructed matrix from the decomposed components + # with the original matrix. + # Additionally checks for equality of singular values + # between dpnp and numpy decompositions + def check_decomposition( + self, dp_a, dp_u, dp_s, dp_vt, np_u, np_s, np_vt, compute_vt + ): + tol = self._tol + if compute_vt: + dpnp_diag_s = inp.zeros_like(dp_a, dtype=dp_s.dtype) + for i in range(min(dp_a.shape[-2], dp_a.shape[-1])): + dpnp_diag_s[..., i, i] = dp_s[..., i] + # TODO: remove it when dpnp.dot is updated + # dpnp.dot does not support complex type + if inp.issubdtype(dp_a.dtype, inp.complexfloating): + reconstructed = numpy.dot( + inp.asnumpy(dp_u), + numpy.dot(inp.asnumpy(dpnp_diag_s), inp.asnumpy(dp_vt)), + ) + else: + reconstructed = inp.dot(dp_u, inp.dot(dpnp_diag_s, dp_vt)) + # TODO: use assert dpnp.allclose() inside check_decomposition() + # when it will support complex dtypes + assert_allclose(dp_a, reconstructed, rtol=tol, atol=1e-4) + + assert_allclose(dp_s, np_s, rtol=tol, atol=1e-03) + + if compute_vt: + for i in range(min(dp_a.shape[-2], dp_a.shape[-1])): + if np_u[..., 0, i] * dp_u[..., 0, i] < 0: + np_u[..., :, i] = -np_u[..., :, i] + np_vt[..., i, :] = -np_vt[..., i, :] + for i in range(numpy.count_nonzero(np_s > tol)): + assert_allclose( + inp.asnumpy(dp_u[..., :, i]), + np_u[..., :, i], + rtol=tol, + atol=tol, + ) + assert_allclose( + inp.asnumpy(dp_vt[..., i, :]), + np_vt[..., i, :], + rtol=tol, + atol=tol, + ) + + @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True)) + @pytest.mark.parametrize( + "shape", + [(2, 2), (3, 4), (5, 3), (16, 16)], + ids=["(2,2)", "(3,4)", "(5,3)", "(16,16)"], + ) + def test_svd(self, dtype, shape): + a = numpy.arange(shape[0] * shape[1], dtype=dtype).reshape(shape) + dp_a = inp.array(a) + + np_u, np_s, np_vt = numpy.linalg.svd(a) + dp_u, dp_s, dp_vt = inp.linalg.svd(dp_a) + + self.check_types_shapes(dp_u, dp_s, dp_vt, np_u, np_s, np_vt) + self.get_tol(dtype) + self.check_decomposition( + dp_a, dp_u, dp_s, dp_vt, np_u, np_s, np_vt, True + ) + + @pytest.mark.parametrize("dtype", get_complex_dtypes()) + @pytest.mark.parametrize("compute_vt", [True, False], ids=["True", "False"]) + @pytest.mark.parametrize( + "shape", + [(2, 2), (16, 16)], + ids=["(2,2)", "(16, 16)"], + ) + def test_svd_hermitian(self, dtype, compute_vt, shape): + a = numpy.random.randn(*shape) + 1j * numpy.random.randn(*shape) + a = numpy.conj(a.T) @ a + + a = a.astype(dtype) + dp_a = inp.array(a) + + if compute_vt: + np_u, np_s, np_vt = numpy.linalg.svd( + a, compute_uv=compute_vt, hermitian=True + ) + dp_u, dp_s, dp_vt = inp.linalg.svd( + dp_a, compute_uv=compute_vt, hermitian=True + ) + else: + np_s = numpy.linalg.svd(a, compute_uv=compute_vt, hermitian=True) + dp_s = inp.linalg.svd(dp_a, compute_uv=compute_vt, hermitian=True) + np_u = np_vt = dp_u = dp_vt = None + + self.check_types_shapes( + dp_u, dp_s, dp_vt, np_u, np_s, np_vt, compute_vt + ) + + self.get_tol(dtype) + + self.check_decomposition( + dp_a, dp_u, dp_s, dp_vt, np_u, np_s, np_vt, compute_vt + ) + + def test_svd_errors(self): + a_dp = inp.array([[1, 2], [3, 4]], dtype="float32") + + # unsupported type + a_np = inp.asnumpy(a_dp) + assert_raises(TypeError, inp.linalg.svd, a_np) + + # a.ndim < 2 + a_dp_ndim_1 = a_dp.flatten() + assert_raises(inp.linalg.LinAlgError, inp.linalg.svd, a_dp_ndim_1) diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py index 7a7bcd53e0b..205d4efb572 100644 --- a/tests/test_sycl_queue.py +++ b/tests/test_sycl_queue.py @@ -1230,53 +1230,62 @@ def test_qr(device): valid_devices, ids=[device.filter_string for device in valid_devices], ) -def test_svd(device): - shape = (2, 2) +@pytest.mark.parametrize("full_matrices", [True, False], ids=["True", "False"]) +@pytest.mark.parametrize("compute_uv", [True, False], ids=["True", "False"]) +@pytest.mark.parametrize( + "shape", + [ + (1, 4), + (3, 2), + (4, 4), + (2, 0), + (0, 2), + (2, 2, 3), + (3, 3, 0), + (0, 2, 3), + (1, 0, 3), + ], + ids=[ + "(1, 4)", + "(3, 2)", + "(4, 4)", + "(2, 0)", + "(0, 2)", + "(2, 2, 3)", + "(3, 3, 0)", + "(0, 2, 3)", + "(1, 0, 3)", + ], +) +def test_svd(shape, full_matrices, compute_uv, device): dtype = dpnp.default_float_type(device) - numpy_data = numpy.arange(shape[0] * shape[1], dtype=dtype).reshape(shape) - dpnp_data = dpnp.arange( - shape[0] * shape[1], dtype=dtype, device=device - ).reshape(shape) - - np_u, np_s, np_vt = numpy.linalg.svd(numpy_data) - dpnp_u, dpnp_s, dpnp_vt = dpnp.linalg.svd(dpnp_data) - - assert dpnp_u.dtype == np_u.dtype - assert dpnp_s.dtype == np_s.dtype - assert dpnp_vt.dtype == np_vt.dtype - assert dpnp_u.shape == np_u.shape - assert dpnp_s.shape == np_s.shape - assert dpnp_vt.shape == np_vt.shape - - # check decomposition - dpnp_diag_s = dpnp.zeros(shape, dtype=dpnp_s.dtype, device=device) - for i in range(dpnp_s.size): - dpnp_diag_s[i, i] = dpnp_s[i] - - # check decomposition - assert_dtype_allclose( - dpnp_data, dpnp.dot(dpnp_u, dpnp.dot(dpnp_diag_s, dpnp_vt)) + + count_elems = numpy.prod(shape) + dpnp_data = dpnp.arange(count_elems, dtype=dtype, device=device).reshape( + shape ) + expected_queue = dpnp_data.get_array().sycl_queue - for i in range(min(shape[0], shape[1])): - if np_u[0, i] * dpnp_u[0, i] < 0: - np_u[:, i] = -np_u[:, i] - np_vt[i, :] = -np_vt[i, :] + if compute_uv: + dpnp_u, dpnp_s, dpnp_vt = dpnp.linalg.svd( + dpnp_data, full_matrices=full_matrices, compute_uv=compute_uv + ) - # compare vectors for non-zero values - for i in range(numpy.count_nonzero(np_s)): - assert_dtype_allclose(dpnp_u[:, i], np_u[:, i]) - assert_dtype_allclose(dpnp_vt[i, :], np_vt[i, :]) + dpnp_u_queue = dpnp_u.get_array().sycl_queue + dpnp_vt_queue = dpnp_vt.get_array().sycl_queue + dpnp_s_queue = dpnp_s.get_array().sycl_queue - expected_queue = dpnp_data.get_array().sycl_queue - dpnp_u_queue = dpnp_u.get_array().sycl_queue - dpnp_s_queue = dpnp_s.get_array().sycl_queue - dpnp_vt_queue = dpnp_vt.get_array().sycl_queue + assert_sycl_queue_equal(dpnp_u_queue, expected_queue) + assert_sycl_queue_equal(dpnp_vt_queue, expected_queue) + assert_sycl_queue_equal(dpnp_s_queue, expected_queue) - # compare queue and device - assert_sycl_queue_equal(dpnp_u_queue, expected_queue) - assert_sycl_queue_equal(dpnp_s_queue, expected_queue) - assert_sycl_queue_equal(dpnp_vt_queue, expected_queue) + else: + dpnp_s = dpnp.linalg.svd( + dpnp_data, full_matrices=full_matrices, compute_uv=compute_uv + ) + dpnp_s_queue = dpnp_s.get_array().sycl_queue + + assert_sycl_queue_equal(dpnp_s_queue, expected_queue) @pytest.mark.parametrize( diff --git a/tests/test_usm_type.py b/tests/test_usm_type.py index ada68ebfa6c..bff548a90d0 100644 --- a/tests/test_usm_type.py +++ b/tests/test_usm_type.py @@ -740,3 +740,53 @@ def test_inv(shape, is_empty, usm_type): result = dp.linalg.inv(x) assert x.usm_type == result.usm_type + + +@pytest.mark.parametrize("usm_type", list_of_usm_types, ids=list_of_usm_types) +@pytest.mark.parametrize( + "full_matrices_param", [True, False], ids=["True", "False"] +) +@pytest.mark.parametrize( + "compute_uv_param", [True, False], ids=["True", "False"] +) +@pytest.mark.parametrize( + "shape", + [ + (1, 4), + (3, 2), + (4, 4), + (2, 0), + (0, 2), + (2, 2, 3), + (3, 3, 0), + (0, 2, 3), + (1, 0, 3), + ], + ids=[ + "(1, 4)", + "(3, 2)", + "(4, 4)", + "(2, 0)", + "(0, 2)", + "(2, 2, 3)", + "(3, 3, 0)", + "(0, 2, 3)", + "(1, 0, 3)", + ], +) +def test_svd(usm_type, shape, full_matrices_param, compute_uv_param): + x = dp.ones(shape, usm_type=usm_type) + + if compute_uv_param: + u, s, vt = dp.linalg.svd( + x, full_matrices=full_matrices_param, compute_uv=compute_uv_param + ) + + assert x.usm_type == u.usm_type + assert x.usm_type == vt.usm_type + else: + s = dp.linalg.svd( + x, full_matrices=full_matrices_param, compute_uv=compute_uv_param + ) + + assert x.usm_type == s.usm_type diff --git a/tests/third_party/cupy/linalg_tests/test_decomposition.py b/tests/third_party/cupy/linalg_tests/test_decomposition.py index 42bcf122ff4..fd887c16e6c 100644 --- a/tests/third_party/cupy/linalg_tests/test_decomposition.py +++ b/tests/third_party/cupy/linalg_tests/test_decomposition.py @@ -6,6 +6,7 @@ import dpnp as cupy from tests.helper import has_support_aspect64, is_cpu_device from tests.third_party.cupy import testing +from tests.third_party.cupy.testing import _condition def random_matrix(shape, dtype, scale, sym=False): @@ -44,6 +45,14 @@ def random_matrix(shape, dtype, scale, sym=False): return new_a.astype(dtype) +def stacked_identity(xp, batch_shape, n, dtype): + shape = batch_shape + (n, n) + idx = xp.arange(n) + x = xp.zeros(shape, dtype=dtype) + x[..., idx, idx] = 1 + return x + + class TestCholeskyDecomposition: @testing.numpy_cupy_allclose(atol=1e-3, type_check=has_support_aspect64()) def check_L(self, array, xp): @@ -135,3 +144,244 @@ def check_L(self, array): def test_decomposition(self, dtype): A = numpy.array([[1, -2], [-2, 1]]).astype(dtype) self.check_L(A) + + +@testing.parameterize( + *testing.product( + { + "full_matrices": [True, False], + } + ) +) +@testing.fix_random() +class TestSVD(unittest.TestCase): + # TODO: New packages that fix issue CMPLRLLVM-53771 are only available in internal CI. + # Skip the tests on cpu until these packages are available for the external CI. + # Specifically dpcpp_linux-64>=2024.1.0 + @classmethod + def setUpClass(cls): + if is_cpu_device(): + raise unittest.SkipTest("CMPLRLLVM-53771") + + def setUp(self): + self.seed = testing.generate_seed() + + @testing.for_dtypes( + [ + numpy.int32, + numpy.int64, + numpy.uint32, + numpy.uint64, + numpy.float32, + numpy.float64, + numpy.complex64, + numpy.complex128, + ] + ) + def check_usv(self, shape, dtype): + array = testing.shaped_random(shape, numpy, dtype=dtype, seed=self.seed) + a_cpu = numpy.asarray(array, dtype=dtype) + a_gpu = cupy.asarray(array, dtype=dtype) + result_cpu = numpy.linalg.svd(a_cpu, full_matrices=self.full_matrices) + result_gpu = cupy.linalg.svd(a_gpu, full_matrices=self.full_matrices) + # Check if the input matrix is not broken + testing.assert_allclose(a_gpu, a_cpu) + + assert len(result_gpu) == 3 + for i in range(3): + assert result_gpu[i].shape == result_cpu[i].shape + if has_support_aspect64(): + assert result_gpu[i].dtype == result_cpu[i].dtype + else: + assert result_gpu[i].dtype.kind == result_cpu[i].dtype.kind + u_cpu, s_cpu, vh_cpu = result_cpu + u_gpu, s_gpu, vh_gpu = result_gpu + testing.assert_allclose(s_gpu, s_cpu, rtol=1e-5, atol=1e-4) + + # reconstruct the matrix + k = s_cpu.shape[-1] + + # dpnp.dot/matmul does not support complex type and unstable on cpu + # TODO: remove it and use xp.dot/matmul when dpnp.dot/matmul is updated + u_gpu = u_gpu.asnumpy() + vh_gpu = vh_gpu.asnumpy() + s_gpu = s_gpu.asnumpy() + xp = numpy + + if len(shape) == 2: + if self.full_matrices: + a_gpu_usv = numpy.dot(u_gpu[:, :k] * s_gpu, vh_gpu[:k, :]) + else: + a_gpu_usv = numpy.dot(u_gpu * s_gpu, vh_gpu) + else: + if self.full_matrices: + a_gpu_usv = numpy.matmul( + u_gpu[..., :k] * s_gpu[..., None, :], vh_gpu[..., :k, :] + ) + else: + a_gpu_usv = numpy.matmul(u_gpu * s_gpu[..., None, :], vh_gpu) + testing.assert_allclose(a_gpu, a_gpu_usv, rtol=1e-4, atol=1e-4) + + # assert unitary + u_len = u_gpu.shape[-1] + vh_len = vh_gpu.shape[-2] + testing.assert_allclose( + xp.matmul(u_gpu.swapaxes(-1, -2).conj(), u_gpu), + stacked_identity(xp, shape[:-2], u_len, dtype), + atol=1e-4, + ) + testing.assert_allclose( + xp.matmul(vh_gpu, vh_gpu.swapaxes(-1, -2).conj()), + stacked_identity(xp, shape[:-2], vh_len, dtype), + atol=1e-4, + ) + + @testing.for_dtypes( + [ + numpy.int32, + numpy.int64, + numpy.uint32, + numpy.uint64, + numpy.float32, + numpy.float64, + numpy.complex64, + numpy.complex128, + ] + ) + # dpnp.linalg.svd() returns results as F-contiguous + # while numpy.linalg.svd() returns as C-contiguous + @testing.numpy_cupy_allclose( + rtol=1e-5, + atol=1e-4, + type_check=has_support_aspect64(), + contiguous_check=False, + ) + def check_singular(self, shape, xp, dtype): + array = testing.shaped_random(shape, xp, dtype=dtype, seed=self.seed) + a = xp.asarray(array, dtype=dtype) + a_copy = a.copy() + result = xp.linalg.svd( + a, full_matrices=self.full_matrices, compute_uv=False + ) + # Check if the input matrix is not broken + assert (a == a_copy).all() + return result + + @_condition.repeat(3, 10) + def test_svd_rank2(self): + self.check_usv((3, 7)) + self.check_usv((2, 2)) + self.check_usv((7, 3)) + + @_condition.repeat(3, 10) + def test_svd_rank2_no_uv(self): + self.check_singular((3, 7)) + self.check_singular((2, 2)) + self.check_singular((7, 3)) + + @testing.with_requires("numpy>=1.16") + def test_svd_rank2_empty_array(self): + self.check_usv((0, 3)) + self.check_usv((3, 0)) + self.check_usv((1, 0)) + + @testing.with_requires("numpy>=1.16") + @testing.numpy_cupy_array_equal(type_check=has_support_aspect64()) + def test_svd_rank2_empty_array_compute_uv_false(self, xp): + array = xp.empty((3, 0)) + return xp.linalg.svd( + array, full_matrices=self.full_matrices, compute_uv=False + ) + + @_condition.repeat(3, 10) + def test_svd_rank3(self): + self.check_usv((2, 3, 4)) + self.check_usv((2, 3, 7)) + self.check_usv((2, 4, 4)) + self.check_usv((2, 7, 3)) + self.check_usv((2, 4, 3)) + self.check_usv((2, 32, 32)) + + @_condition.repeat(3, 10) + def test_svd_rank3_loop(self): + # This tests the loop-based batched gesvd on CUDA (_gesvd_batched) + self.check_usv((2, 64, 64)) + self.check_usv((2, 64, 32)) + self.check_usv((2, 32, 64)) + + @_condition.repeat(3, 10) + def test_svd_rank3_no_uv(self): + self.check_singular((2, 3, 4)) + self.check_singular((2, 3, 7)) + self.check_singular((2, 4, 4)) + self.check_singular((2, 7, 3)) + self.check_singular((2, 4, 3)) + + @_condition.repeat(3, 10) + def test_svd_rank3_no_uv_loop(self): + # This tests the loop-based batched gesvd on CUDA (_gesvd_batched) + self.check_singular((2, 64, 64)) + self.check_singular((2, 64, 32)) + self.check_singular((2, 32, 64)) + + @testing.with_requires("numpy>=1.16") + def test_svd_rank3_empty_array(self): + self.check_usv((0, 3, 4)) + self.check_usv((3, 0, 4)) + self.check_usv((3, 4, 0)) + self.check_usv((3, 0, 0)) + self.check_usv((0, 3, 0)) + self.check_usv((0, 0, 3)) + + @testing.with_requires("numpy>=1.16") + @testing.numpy_cupy_array_equal(type_check=has_support_aspect64()) + def test_svd_rank3_empty_array_compute_uv_false1(self, xp): + array = xp.empty((3, 0, 4)) + return xp.linalg.svd( + array, full_matrices=self.full_matrices, compute_uv=False + ) + + @testing.with_requires("numpy>=1.16") + @testing.numpy_cupy_array_equal(type_check=has_support_aspect64()) + def test_svd_rank3_empty_array_compute_uv_false2(self, xp): + array = xp.empty((0, 3, 4)) + return xp.linalg.svd( + array, full_matrices=self.full_matrices, compute_uv=False + ) + + @_condition.repeat(3, 10) + def test_svd_rank4(self): + self.check_usv((2, 2, 3, 4)) + self.check_usv((2, 2, 3, 7)) + self.check_usv((2, 2, 4, 4)) + self.check_usv((2, 2, 7, 3)) + self.check_usv((2, 2, 4, 3)) + self.check_usv((2, 2, 32, 32)) + + @_condition.repeat(3, 10) + def test_svd_rank4_loop(self): + # This tests the loop-based batched gesvd on CUDA (_gesvd_batched) + self.check_usv((3, 2, 64, 64)) + self.check_usv((3, 2, 64, 32)) + self.check_usv((3, 2, 32, 64)) + + @_condition.repeat(3, 10) + def test_svd_rank4_no_uv(self): + self.check_singular((2, 2, 3, 4)) + self.check_singular((2, 2, 3, 7)) + self.check_singular((2, 2, 4, 4)) + self.check_singular((2, 2, 7, 3)) + self.check_singular((2, 2, 4, 3)) + + @_condition.repeat(3, 10) + def test_svd_rank4_no_uv_loop(self): + # This tests the loop-based batched gesvd on CUDA (_gesvd_batched) + self.check_singular((3, 2, 64, 64)) + self.check_singular((3, 2, 64, 32)) + self.check_singular((3, 2, 32, 64)) + + @testing.with_requires("numpy>=1.16") + def test_svd_rank4_empty_array(self): + self.check_usv((0, 2, 3, 4)) + self.check_usv((1, 2, 0, 4)) + self.check_usv((1, 2, 3, 0)) diff --git a/tests/third_party/cupy/linalg_tests/test_solve.py b/tests/third_party/cupy/linalg_tests/test_solve.py index b31082c8e84..cd397f6c9e1 100644 --- a/tests/third_party/cupy/linalg_tests/test_solve.py +++ b/tests/third_party/cupy/linalg_tests/test_solve.py @@ -10,7 +10,7 @@ is_cpu_device, ) from tests.third_party.cupy import testing -from tests.third_party.cupy.testing import condition +from tests.third_party.cupy.testing import _condition @testing.parameterize( @@ -104,7 +104,7 @@ def test_invalid_shape(self): ) class TestInv(unittest.TestCase): @testing.for_dtypes("ifdFD") - @condition.retry(10) + @_condition.retry(10) def check_x(self, a_shape, dtype): a_cpu = numpy.random.randint(0, 10, size=a_shape) a_cpu = a_cpu.astype(dtype, order=self.order) diff --git a/tests/third_party/cupy/random_tests/test_sample.py b/tests/third_party/cupy/random_tests/test_sample.py index f95f3e42710..79e2370ad05 100644 --- a/tests/third_party/cupy/random_tests/test_sample.py +++ b/tests/third_party/cupy/random_tests/test_sample.py @@ -7,7 +7,7 @@ import dpnp as cupy from dpnp import random from tests.third_party.cupy import testing -from tests.third_party.cupy.testing import condition, hypothesis +from tests.third_party.cupy.testing import _condition, hypothesis @testing.gpu @@ -43,7 +43,7 @@ def test_zero_sizes(self): @testing.gpu class TestRandint2(unittest.TestCase): @pytest.mark.usefixtures("allow_fall_back_on_numpy") - @condition.repeat(3, 10) + @_condition.repeat(3, 10) def test_bound_1(self): vals = [random.randint(0, 10, (2, 3)) for _ in range(10)] for val in vals: @@ -52,7 +52,7 @@ def test_bound_1(self): self.assertEqual(max(_.max() for _ in vals), 9) @pytest.mark.usefixtures("allow_fall_back_on_numpy") - @condition.repeat(3, 10) + @_condition.repeat(3, 10) def test_bound_2(self): vals = [random.randint(0, 2) for _ in range(20)] for val in vals: @@ -61,7 +61,7 @@ def test_bound_2(self): self.assertEqual(max(_.max() for _ in vals), 1) @pytest.mark.usefixtures("allow_fall_back_on_numpy") - @condition.repeat(3, 10) + @_condition.repeat(3, 10) def test_bound_overflow(self): # 100 - (-100) exceeds the range of int8 val = random.randint(numpy.int8(-100), numpy.int8(100), size=20) @@ -70,7 +70,7 @@ def test_bound_overflow(self): self.assertLess(val.max(), 100) @pytest.mark.usefixtures("allow_fall_back_on_numpy") - @condition.repeat(3, 10) + @_condition.repeat(3, 10) def test_bound_float1(self): # generate floats s.t. int(low) < int(high) low, high = sorted(numpy.random.uniform(-5, 5, size=2)) @@ -90,7 +90,7 @@ def test_bound_float2(self): self.assertEqual(min(_.min() for _ in vals), -1) self.assertEqual(max(_.max() for _ in vals), 0) - @condition.repeat(3, 10) + @_condition.repeat(3, 10) def test_goodness_of_fit(self): mx = 5 trial = 100 @@ -99,7 +99,7 @@ def test_goodness_of_fit(self): expected = numpy.array([float(trial) / mx] * mx) self.assertTrue(hypothesis.chi_square_test(counts, expected)) - @condition.repeat(3, 10) + @_condition.repeat(3, 10) def test_goodness_of_fit_2(self): mx = 5 vals = random.randint(mx, size=(5, 20)) @@ -169,7 +169,7 @@ def test_size_is_not_none(self): @testing.fix_random() @testing.gpu class TestRandomIntegers2(unittest.TestCase): - @condition.repeat(3, 10) + @_condition.repeat(3, 10) def test_bound_1(self): vals = [random.random_integers(0, 10, (2, 3)).get() for _ in range(10)] for val in vals: @@ -177,7 +177,7 @@ def test_bound_1(self): self.assertEqual(min(_.min() for _ in vals), 0) self.assertEqual(max(_.max() for _ in vals), 10) - @condition.repeat(3, 10) + @_condition.repeat(3, 10) def test_bound_2(self): vals = [random.random_integers(0, 2).get() for _ in range(20)] for val in vals: @@ -185,7 +185,7 @@ def test_bound_2(self): self.assertEqual(min(vals), 0) self.assertEqual(max(vals), 2) - @condition.repeat(3, 10) + @_condition.repeat(3, 10) def test_goodness_of_fit(self): mx = 5 trial = 100 @@ -194,7 +194,7 @@ def test_goodness_of_fit(self): expected = numpy.array([float(trial) / mx] * mx) self.assertTrue(hypothesis.chi_square_test(counts, expected)) - @condition.repeat(3, 10) + @_condition.repeat(3, 10) def test_goodness_of_fit_2(self): mx = 5 vals = random.randint(0, mx, (5, 20)).get() @@ -289,7 +289,7 @@ def test_randn_invalid_argument(self): @testing.fix_random() @testing.gpu class TestMultinomial(unittest.TestCase): - @condition.repeat(3, 10) + @_condition.repeat(3, 10) @testing.for_float_dtypes() @testing.numpy_cupy_allclose(rtol=0.05) def test_multinomial(self, xp, dtype): diff --git a/tests/third_party/cupy/testing/__init__.py b/tests/third_party/cupy/testing/__init__.py index 701c381e2f3..aa6c113706b 100644 --- a/tests/third_party/cupy/testing/__init__.py +++ b/tests/third_party/cupy/testing/__init__.py @@ -60,6 +60,4 @@ product, product_dict, ) -from tests.third_party.cupy.testing.random import fix_random - -# from tests.third_party.cupy.testing.random import generate_seed +from tests.third_party.cupy.testing.random import fix_random, generate_seed diff --git a/tests/third_party/cupy/testing/condition.py b/tests/third_party/cupy/testing/_condition.py similarity index 98% rename from tests/third_party/cupy/testing/condition.py rename to tests/third_party/cupy/testing/_condition.py index 4465dc3d0ee..3533ef8b84d 100644 --- a/tests/third_party/cupy/testing/condition.py +++ b/tests/third_party/cupy/testing/_condition.py @@ -106,7 +106,7 @@ def repeat(times, intensive_times=None): if intensive_times is None: return repeat_with_success_at_least(times, times) - casual_test = bool(int(os.environ.get("CUPY_TEST_CASUAL", "0"))) + casual_test = bool(int(os.environ.get("CUPY_TEST_CASUAL", "1"))) times_ = times if casual_test else intensive_times return repeat_with_success_at_least(times_, times_) diff --git a/tests/third_party/cupy/testing/random.py b/tests/third_party/cupy/testing/random.py index 444f2b3352c..ecc299737c0 100644 --- a/tests/third_party/cupy/testing/random.py +++ b/tests/third_party/cupy/testing/random.py @@ -20,12 +20,15 @@ def do_setup(deterministic=True): global _old_cupy_random_states _old_python_random_state = random.getstate() _old_numpy_random_state = numpy.random.get_state() - _old_cupy_random_states = cupy.random.generator._random_states - cupy.random.reset_states() + _old_cupy_random_states = cupy.random.dpnp_iface_random._dpnp_random_states + cupy.random.dpnp_iface_random._dpnp_random_states = {} # Check that _random_state has been recreated in # cupy.random.reset_states(). Otherwise the contents of # _old_cupy_random_states would be overwritten. - assert cupy.random.generator._random_states is not _old_cupy_random_states + assert ( + cupy.random.dpnp_iface_random._dpnp_random_states + is not _old_cupy_random_states + ) if not deterministic: random.seed() @@ -43,7 +46,7 @@ def do_teardown(): global _old_cupy_random_states random.setstate(_old_python_random_state) numpy.random.set_state(_old_numpy_random_state) - cupy.random.generator._random_states = _old_cupy_random_states + cupy.random.dpnp_iface_random._dpnp_random_states = _old_cupy_random_states _old_python_random_state = None _old_numpy_random_state = None _old_cupy_random_states = None @@ -91,12 +94,12 @@ def fix_random(): """Decorator that fixes random numbers in a test. This decorator can be applied to either a test case class or a test method. - It should not be applied within ``condition.retry`` or - ``condition.repeat``. + It should not be applied within ``_condition.retry`` or + ``_condition.repeat``. """ # TODO(niboshi): Prevent this decorator from being applied within - # condition.repeat or condition.retry decorators. That would repeat + # _condition.repeat or _condition.retry decorators. That would repeat # tests with the same random seeds. It's okay to apply this outside # these decorators. From b46e0f62ca8d90ee03c267d9559e33c1b02c7736 Mon Sep 17 00:00:00 2001 From: Anton <100830759+antonwolfy@users.noreply.github.com> Date: Sat, 3 Feb 2024 00:34:18 +0100 Subject: [PATCH 03/29] Pin conda-build to `3.28.4` version in GitHub action (#1678) * Add extra pre-step to free more memory on Ubuntu runners * Exclude Windows runners * Enable conda verbosity * Pin conda-build to 3.28.4 * Pinned conda-build in test jobs --- .github/workflows/conda-package.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index 32f62306ae5..d9072c26a65 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -96,7 +96,7 @@ jobs: (echo CONDA_BLD=%CONDA_PREFIX%\conda-bld\win-64\) >> %GITHUB_ENV% - name: Install conda-build - run: conda install conda-build + run: conda install conda-build=3.28.4 - name: Cache conda packages uses: actions/cache@v4 @@ -167,7 +167,7 @@ jobs: # Needed to be able to run conda index - name: Install conda-build - run: conda install conda-build + run: conda install conda-build=3.28.4 - name: Create conda channel run: conda index ${{ env.channel-path }} @@ -283,7 +283,7 @@ jobs: # Needed to be able to run conda index - name: Install conda-build - run: conda install conda-build + run: conda install conda-build=3.28.4 - name: Create conda channel run: conda index ${{ env.channel-path }} From 38a7ca8fdef58595d0ddc6909568a0a7ce1a95bd Mon Sep 17 00:00:00 2001 From: vtavana <120411540+vtavana@users.noreply.github.com> Date: Fri, 2 Feb 2024 22:35:49 -0600 Subject: [PATCH 04/29] update `build_locally.py` (#1677) * update build_locally.py * fix pre-commit * add comments --------- Co-authored-by: Anton <100830759+antonwolfy@users.noreply.github.com> --- scripts/build_locally.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/scripts/build_locally.py b/scripts/build_locally.py index c8ff30d34ce..a0e5c55edc9 100644 --- a/scripts/build_locally.py +++ b/scripts/build_locally.py @@ -58,12 +58,31 @@ def run( cmake_args += [ "--cmake-executable=" + cmake_executable, ] + + # if dpctl is locally built using `script/build_locally.py`, it is needed + # to pass the -DDpctl_ROOT=$(python -m dpctl --cmakedir) + # if dpctl is conda installed, it is optional to pass this parameter + process = subprocess.Popen( + ["python", "-m", "dpctl", "--cmakedir"], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + output, error = process.communicate() + if process.returncode == 0: + cmake_dir = output.decode("utf-8").strip() + else: + raise RuntimeError( + "Failed to retrieve dpctl cmake directory: " + + error.decode("utf-8").strip() + ) + cmake_args += [ "--build-type=" + build_type, "--generator=" + build_system, "--", "-DCMAKE_C_COMPILER:PATH=" + c_compiler, "-DCMAKE_CXX_COMPILER:PATH=" + cxx_compiler, + "-DDpctl_ROOT=" + cmake_dir, ] if verbose: cmake_args += [ From 7c4b39ac9681790b71c866dac74485548a67c510 Mon Sep 17 00:00:00 2001 From: Natalia Polina Date: Sat, 3 Feb 2024 06:29:08 -0800 Subject: [PATCH 05/29] Implement sparse and copy arguments for dpnp.mesgrid function (#1675) * Implement sparse and copy arguments for dpnp.mesgrid function * address comments * Removed limitation block from th description * added tests --------- Co-authored-by: Anton Volkov Co-authored-by: Anton <100830759+antonwolfy@users.noreply.github.com> --- dpnp/dpnp_container.py | 12 ------ dpnp/dpnp_iface_arraycreation.py | 64 +++++++++++++++++++++++--------- tests/skipped_tests.tbl | 24 ------------ tests/skipped_tests_gpu.tbl | 24 ------------ tests/test_arraycreation.py | 9 +++++ 5 files changed, 56 insertions(+), 77 deletions(-) diff --git a/dpnp/dpnp_container.py b/dpnp/dpnp_container.py index 3aa4478baa0..243899bee80 100644 --- a/dpnp/dpnp_container.py +++ b/dpnp/dpnp_container.py @@ -271,18 +271,6 @@ def linspace( return dpnp_array(array_obj.shape, buffer=array_obj) -def meshgrid(*xi, indexing="xy"): - """Creates list of `dpnp_array` coordinate matrices from vectors.""" - if len(xi) == 0: - return [] - arrays = tuple(dpnp.get_usm_ndarray(x) for x in xi) - arrays_obj = dpt.meshgrid(*arrays, indexing=indexing) - return [ - dpnp_array._create_from_usm_ndarray(array_obj) - for array_obj in arrays_obj - ] - - def ones( shape, *, diff --git a/dpnp/dpnp_iface_arraycreation.py b/dpnp/dpnp_iface_arraycreation.py index 067eb3fbb52..851ef119975 100644 --- a/dpnp/dpnp_iface_arraycreation.py +++ b/dpnp/dpnp_iface_arraycreation.py @@ -1394,12 +1394,28 @@ def meshgrid(*xi, copy=True, sparse=False, indexing="xy"): For full documentation refer to :obj:`numpy.meshgrid`. - Limitations - ----------- - Each array instance from `xi` is supported as either :class:`dpnp.dpnp.ndarray` or :class:`dpctl.tensor.usm_ndarray`. - Parameter `copy` is supported only with default value ``True``. - Parameter `sparse` is supported only with default value ``False``. - Otherwise the function will be executed sequentially on CPU. + Parameters + ---------- + x1, x2,..., xn : {dpnp.ndarray, usm_ndarray} + 1-D arrays representing the coordinates of a grid. + indexing : {'xy', 'ij'}, optional + Cartesian ('xy', default) or matrix ('ij') indexing of output. + sparse : bool, optional + If True the shape of the returned coordinate array for dimension `i` + is reduced from ``(N1, ..., Ni, ... Nn)`` to + ``(1, ..., 1, Ni, 1, ..., 1)``. Default is False. + copy : bool, optional + If False, a view into the original arrays are returned in order to + conserve memory. Default is True. + + Returns + ------- + X1, X2,..., XN : tuple of dpnp.ndarrays + For vectors `x1`, `x2`,..., `xn` with lengths ``Ni=len(xi)``, + returns ``(N1, N2, N3,..., Nn)`` shaped arrays if indexing='ij' + or ``(N2, N1, N3,..., Nn)`` shaped arrays if indexing='xy' + with the elements of `xi` repeated to fill the matrix along + the first dimension for `x1`, the second for `x2` and so on. Examples -------- @@ -1433,18 +1449,32 @@ def meshgrid(*xi, copy=True, sparse=False, indexing="xy"): """ - if not all((isinstance(x, (dpnp.ndarray, dpt.usm_ndarray)) for x in xi)): - pass - elif indexing not in ["ij", "xy"]: - pass - elif copy is not True: - pass - elif sparse is not False: - pass - else: - return dpnp_container.meshgrid(*xi, indexing=indexing) + if not dpnp.check_supported_arrays_type(*xi): + raise TypeError("Each input array must be any of supported type") + + ndim = len(xi) + + if indexing not in ["xy", "ij"]: + raise ValueError( + "Unrecognized indexing keyword value, expecting 'xy' or 'ij'." + ) + + s0 = (1,) * ndim + output = [ + dpnp.reshape(x, s0[:i] + (-1,) + s0[i + 1 :]) for i, x in enumerate(xi) + ] + + if indexing == "xy" and ndim > 1: + output[0] = output[0].reshape((1, -1) + s0[2:]) + output[1] = output[1].reshape((-1, 1) + s0[2:]) + + if not sparse: + output = dpnp.broadcast_arrays(*output) + + if copy: + output = [x.copy() for x in output] - return call_origin(numpy.meshgrid, xi, copy, sparse, indexing) + return output class MGridClass: diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl index 8eb46d3c983..018255c1e40 100644 --- a/tests/skipped_tests.tbl +++ b/tests/skipped_tests.tbl @@ -151,30 +151,6 @@ tests/third_party/cupy/creation_tests/test_basic.py::TestBasic::test_ones_like_s tests/third_party/cupy/creation_tests/test_basic.py::TestBasic::test_zeros_like_subok tests/third_party/cupy/creation_tests/test_basic.py::TestBasic::test_zeros_strides -tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_0_{copy=False, indexing='xy', sparse=False}::test_meshgrid0 -tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_0_{copy=False, indexing='xy', sparse=False}::test_meshgrid1 -tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_0_{copy=False, indexing='xy', sparse=False}::test_meshgrid2 -tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_0_{copy=False, indexing='xy', sparse=False}::test_meshgrid3 -tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_1_{copy=False, indexing='xy', sparse=True}::test_meshgrid0 -tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_1_{copy=False, indexing='xy', sparse=True}::test_meshgrid1 -tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_1_{copy=False, indexing='xy', sparse=True}::test_meshgrid2 -tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_1_{copy=False, indexing='xy', sparse=True}::test_meshgrid3 -tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_2_{copy=False, indexing='ij', sparse=False}::test_meshgrid0 -tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_2_{copy=False, indexing='ij', sparse=False}::test_meshgrid1 -tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_2_{copy=False, indexing='ij', sparse=False}::test_meshgrid2 -tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_2_{copy=False, indexing='ij', sparse=False}::test_meshgrid3 -tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_3_{copy=False, indexing='ij', sparse=True}::test_meshgrid0 -tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_3_{copy=False, indexing='ij', sparse=True}::test_meshgrid1 -tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_3_{copy=False, indexing='ij', sparse=True}::test_meshgrid2 -tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_3_{copy=False, indexing='ij', sparse=True}::test_meshgrid3 -tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_5_{copy=True, indexing='xy', sparse=True}::test_meshgrid0 -tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_5_{copy=True, indexing='xy', sparse=True}::test_meshgrid1 -tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_5_{copy=True, indexing='xy', sparse=True}::test_meshgrid2 -tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_5_{copy=True, indexing='xy', sparse=True}::test_meshgrid3 -tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_7_{copy=True, indexing='ij', sparse=True}::test_meshgrid0 -tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_7_{copy=True, indexing='ij', sparse=True}::test_meshgrid1 -tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_7_{copy=True, indexing='ij', sparse=True}::test_meshgrid2 -tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_7_{copy=True, indexing='ij', sparse=True}::test_meshgrid3 tests/third_party/cupy/indexing_tests/test_generate.py::TestAxisConcatenator::test_AxisConcatenator_init1 tests/third_party/cupy/indexing_tests/test_generate.py::TestAxisConcatenator::test_len tests/third_party/cupy/indexing_tests/test_generate.py::TestC_::test_c_1 diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl index b8c195b9861..fe3671ecf7f 100644 --- a/tests/skipped_tests_gpu.tbl +++ b/tests/skipped_tests_gpu.tbl @@ -230,30 +230,6 @@ tests/third_party/cupy/creation_tests/test_basic.py::TestBasic::test_ones_like_s tests/third_party/cupy/creation_tests/test_basic.py::TestBasic::test_zeros_like_subok tests/third_party/cupy/creation_tests/test_basic.py::TestBasic::test_zeros_strides -tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_0_{copy=False, indexing='xy', sparse=False}::test_meshgrid0 -tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_0_{copy=False, indexing='xy', sparse=False}::test_meshgrid1 -tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_0_{copy=False, indexing='xy', sparse=False}::test_meshgrid2 -tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_0_{copy=False, indexing='xy', sparse=False}::test_meshgrid3 -tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_1_{copy=False, indexing='xy', sparse=True}::test_meshgrid0 -tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_1_{copy=False, indexing='xy', sparse=True}::test_meshgrid1 -tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_1_{copy=False, indexing='xy', sparse=True}::test_meshgrid2 -tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_1_{copy=False, indexing='xy', sparse=True}::test_meshgrid3 -tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_2_{copy=False, indexing='ij', sparse=False}::test_meshgrid0 -tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_2_{copy=False, indexing='ij', sparse=False}::test_meshgrid1 -tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_2_{copy=False, indexing='ij', sparse=False}::test_meshgrid2 -tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_2_{copy=False, indexing='ij', sparse=False}::test_meshgrid3 -tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_3_{copy=False, indexing='ij', sparse=True}::test_meshgrid0 -tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_3_{copy=False, indexing='ij', sparse=True}::test_meshgrid1 -tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_3_{copy=False, indexing='ij', sparse=True}::test_meshgrid2 -tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_3_{copy=False, indexing='ij', sparse=True}::test_meshgrid3 -tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_5_{copy=True, indexing='xy', sparse=True}::test_meshgrid0 -tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_5_{copy=True, indexing='xy', sparse=True}::test_meshgrid1 -tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_5_{copy=True, indexing='xy', sparse=True}::test_meshgrid2 -tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_5_{copy=True, indexing='xy', sparse=True}::test_meshgrid3 -tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_7_{copy=True, indexing='ij', sparse=True}::test_meshgrid0 -tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_7_{copy=True, indexing='ij', sparse=True}::test_meshgrid1 -tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_7_{copy=True, indexing='ij', sparse=True}::test_meshgrid2 -tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_7_{copy=True, indexing='ij', sparse=True}::test_meshgrid3 tests/third_party/cupy/creation_tests/test_ranges.py::TestRanges::test_arange_negative_size tests/third_party/cupy/creation_tests/test_ranges.py::TestRanges::test_arange_no_dtype_int diff --git a/tests/test_arraycreation.py b/tests/test_arraycreation.py index 0a4ce206337..f7b06ffc9be 100644 --- a/tests/test_arraycreation.py +++ b/tests/test_arraycreation.py @@ -878,3 +878,12 @@ def test_logspace_axis(axis): [2, 3], [20, 15], num=2, base=[[1, 3], [5, 7]], axis=axis ) assert_dtype_allclose(func(dpnp), func(numpy)) + + +def test_meshgrid_raise_error(): + a = numpy.array([1, 2, 3, 4]) + with pytest.raises(TypeError): + dpnp.meshgrid(a) + b = dpnp.array([1, 2, 3, 4]) + with pytest.raises(ValueError): + dpnp.meshgrid(b, indexing="ab") From d504d7dcb147568994ebd8205b7d74083adce86c Mon Sep 17 00:00:00 2001 From: Anton <100830759+antonwolfy@users.noreply.github.com> Date: Sat, 3 Feb 2024 20:11:43 +0100 Subject: [PATCH 06/29] Specify permissions in GH Action workflows (#1679) * Specify permissions in GH Action workflows * Added contents permissions write to deploy your static files to GitHub Pages * Added permissions required by actions --- .github/workflows/build-sphinx.yml | 10 ++++++++++ .github/workflows/conda-package.yml | 6 ++++++ .github/workflows/generate_coverage.yaml | 6 ++++++ .github/workflows/pre-commit.yml | 2 ++ 4 files changed, 24 insertions(+) diff --git a/.github/workflows/build-sphinx.yml b/.github/workflows/build-sphinx.yml index 6246ee13e12..a547efec727 100644 --- a/.github/workflows/build-sphinx.yml +++ b/.github/workflows/build-sphinx.yml @@ -6,6 +6,8 @@ on: pull_request: types: [opened, synchronize, reopened, closed] +permissions: read-all + env: GH_BOT_NAME: 'github-actions[bot]' GH_BOT_EMAIL: 'github-actions[bot]@users.noreply.github.com' @@ -25,6 +27,14 @@ jobs: runs-on: ubuntu-20.04 + permissions: + # Needed to cancel any previous runs that are not completed for a given workflow + actions: write + # Needed to deploy static files to GitHub Pages + contents: write + # Needed to add a comment to a pull request's issue + pull-requests: write + env: python-ver: '3.9' CHANNELS: '-c dppy/label/dev -c intel -c conda-forge --override-channels' diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index d9072c26a65..ddbd9191287 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -6,6 +6,8 @@ on: - master pull_request: +permissions: read-all + env: PACKAGE_NAME: dpnp MODULE_NAME: dpnp @@ -58,6 +60,10 @@ jobs: python: ['3.9', '3.10', '3.11'] os: [ubuntu-20.04, windows-latest] + permissions: + # Needed to cancel any previous runs that are not completed for a given workflow + actions: write + runs-on: ${{ matrix.os }} defaults: diff --git a/.github/workflows/generate_coverage.yaml b/.github/workflows/generate_coverage.yaml index e7479d445ea..b5b0e4a40b9 100644 --- a/.github/workflows/generate_coverage.yaml +++ b/.github/workflows/generate_coverage.yaml @@ -4,11 +4,17 @@ on: push: branches: [master] +permissions: read-all + jobs: generate-coverage: name: Generate coverage and push to Coveralls.io runs-on: ubuntu-20.04 + permissions: + # Needed to cancel any previous runs that are not completed for a given workflow + actions: write + defaults: run: shell: bash -l {0} diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index dd5047f22b1..aa17c7696df 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -5,6 +5,8 @@ on: push: branches: [master] +permissions: read-all + jobs: pre-commit: runs-on: ubuntu-latest From 22c2367d9ce82b4d2792db5dd9e5d1cbc717bda9 Mon Sep 17 00:00:00 2001 From: Anton <100830759+antonwolfy@users.noreply.github.com> Date: Sat, 3 Feb 2024 23:01:21 +0100 Subject: [PATCH 07/29] Add OpenSSF Scorecard badge to README (#1680) * Add OpenSSF Scorecard badge to README * Add permissions for clean job --- .github/workflows/build-sphinx.yml | 4 ++ .github/workflows/openssf-scorecard.yml | 73 +++++++++++++++++++++++++ README.md | 1 + 3 files changed, 78 insertions(+) create mode 100644 .github/workflows/openssf-scorecard.yml diff --git a/.github/workflows/build-sphinx.yml b/.github/workflows/build-sphinx.yml index a547efec727..fe82e96b2f0 100644 --- a/.github/workflows/build-sphinx.yml +++ b/.github/workflows/build-sphinx.yml @@ -190,6 +190,10 @@ jobs: needs: build-and-deploy + permissions: + # Needed to remove docs for closed pull request from the repo + contents: write + runs-on: ubuntu-20.04 steps: diff --git a/.github/workflows/openssf-scorecard.yml b/.github/workflows/openssf-scorecard.yml new file mode 100644 index 00000000000..b43795eaebe --- /dev/null +++ b/.github/workflows/openssf-scorecard.yml @@ -0,0 +1,73 @@ +# This workflow uses actions that are not certified by GitHub. They are provided +# by a third-party and are governed by separate terms of service, privacy +# policy, and support documentation. + +name: Scorecard supply-chain security +on: + # For Branch-Protection check. Only the default branch is supported. See + # https://github.com/ossf/scorecard/blob/main/docs/checks.md#branch-protection + branch_protection_rule: + # To guarantee Maintained check is occasionally updated. See + # https://github.com/ossf/scorecard/blob/main/docs/checks.md#maintained + schedule: + - cron: '28 2 * * 1' + - cron: '28 2 * * 4' + push: + branches: [ "master" ] + +# Declare default permissions as read only. +permissions: read-all + +jobs: + analysis: + name: Scorecard analysis + runs-on: ubuntu-latest + permissions: + # Needed to upload the results to code-scanning dashboard. + security-events: write + # Needed to publish results and get a badge (see publish_results below). + id-token: write + # Uncomment the permissions below if installing in a private repository. + # contents: read + # actions: read + + steps: + - name: "Checkout code" + uses: actions/checkout@v4.1.1 + with: + persist-credentials: false + + - name: "Run analysis" + uses: ossf/scorecard-action@v2.3.1 + with: + results_file: results.sarif + results_format: sarif + # (Optional) "write" PAT token. Uncomment the `repo_token` line below if: + # - you want to enable the Branch-Protection check on a *public* repository, or + # - you are installing Scorecard on a *private* repository + # To create the PAT, follow the steps in https://github.com/ossf/scorecard-action#authentication-with-pat. + # repo_token: ${{ secrets.SCORECARD_TOKEN }} + + # Public repositories: + # - Publish results to OpenSSF REST API for easy access by consumers + # - Allows the repository to include the Scorecard badge. + # - See https://github.com/ossf/scorecard-action#publishing-results. + # For private repositories: + # - `publish_results` will always be set to `false`, regardless + # of the value entered here. + publish_results: true + + # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF + # format to the repository Actions tab. + - name: "Upload artifact" + uses: actions/upload-artifact@v4.3.0 + with: + name: SARIF file + path: results.sarif + retention-days: 14 + + # Upload the results to GitHub's code scanning dashboard. + - name: "Upload to code-scanning" + uses: github/codeql-action/upload-sarif@v3.23.2 + with: + sarif_file: results.sarif diff --git a/README.md b/README.md index b19e902ece1..086f33c895a 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,7 @@ [![Conda package](https://github.com/IntelPython/dpnp/actions/workflows/conda-package.yml/badge.svg?branch=master&event=push)](https://github.com/IntelPython/dpnp/actions/workflows/conda-package.yml) [![Coverage Status](https://coveralls.io/repos/github/IntelPython/dpnp/badge.svg?branch=master)](https://coveralls.io/github/IntelPython/dpnp?branch=master) [![Build Sphinx](https://github.com/IntelPython/dpnp/workflows/Build%20Sphinx/badge.svg)](https://intelpython.github.io/dpnp) +[![OpenSSF Scorecard](https://api.securityscorecards.dev/projects/github.com/IntelPython/dpnp/badge)](https://securityscorecards.dev/viewer/?uri=github.com/IntelPython/dpnp) # DPNP - Data Parallel Extension for NumPy* [API coverage summary](https://intelpython.github.io/dpnp/reference/comparison.html#summary) From 3c676e75f8bf226865e582446425c8e9c30d1f5d Mon Sep 17 00:00:00 2001 From: Anton <100830759+antonwolfy@users.noreply.github.com> Date: Sun, 4 Feb 2024 11:58:08 +0100 Subject: [PATCH 08/29] Adding dependabot file to update GH action versions (#1681) --- .github/dependabot.yml | 6 ++++++ .github/workflows/build-sphinx.yml | 2 ++ 2 files changed, 8 insertions(+) create mode 100644 .github/dependabot.yml diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 00000000000..5ace4600a1f --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,6 @@ +version: 2 +updates: + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" diff --git a/.github/workflows/build-sphinx.yml b/.github/workflows/build-sphinx.yml index fe82e96b2f0..b4484a02988 100644 --- a/.github/workflows/build-sphinx.yml +++ b/.github/workflows/build-sphinx.yml @@ -193,6 +193,8 @@ jobs: permissions: # Needed to remove docs for closed pull request from the repo contents: write + # Needed to modify a comment in the pull request's issue + pull-requests: write runs-on: ubuntu-20.04 From da4df675e714de648ea1c16c4637bfb8baef20fc Mon Sep 17 00:00:00 2001 From: Anton <100830759+antonwolfy@users.noreply.github.com> Date: Sun, 4 Feb 2024 14:02:27 +0100 Subject: [PATCH 09/29] Add recipe-maintainers list (#1682) --- conda-recipe/meta.yaml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml index 2ff93e0a0a8..99e50c706c0 100644 --- a/conda-recipe/meta.yaml +++ b/conda-recipe/meta.yaml @@ -58,6 +58,7 @@ about: home: https://github.com/IntelPython/dpnp license: BSD-2-Clause license_file: LICENSE.txt + summary: 'Data Parallel Extension for NumPy' description: | LEGAL NOTICE: Use of this software package is subject to the software license agreement (as set forth above, in the license section of @@ -67,3 +68,11 @@ about:

EULA: BSD-2-Clause

+ +extra: + recipe-maintainers: + - oleksandr-pavlyk + - antonwolfy + - npolina4 + - vtavana + - vlad-perevezentsev From afd84fb4948dc1d1ee9bd2418b8396ae0e19ae45 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 4 Feb 2024 15:16:56 +0100 Subject: [PATCH 10/29] Bump mshick/add-pr-comment from 2.8.1 to 2.8.2 (#1683) Bumps [mshick/add-pr-comment](https://github.com/mshick/add-pr-comment) from 2.8.1 to 2.8.2. - [Release notes](https://github.com/mshick/add-pr-comment/releases) - [Commits](https://github.com/mshick/add-pr-comment/compare/v2.8.1...v2.8.2) --- updated-dependencies: - dependency-name: mshick/add-pr-comment dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Anton <100830759+antonwolfy@users.noreply.github.com> --- .github/workflows/build-sphinx.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-sphinx.yml b/.github/workflows/build-sphinx.yml index b4484a02988..6f7bc30562b 100644 --- a/.github/workflows/build-sphinx.yml +++ b/.github/workflows/build-sphinx.yml @@ -176,7 +176,7 @@ jobs: if: env.GH_EVENT_OPEN_PR_UPSTREAM env: PR_NUM: ${{ github.event.number }} - uses: mshick/add-pr-comment@v2.8.1 + uses: mshick/add-pr-comment@v2.8.2 with: message: | View rendered docs @ https://intelpython.github.io/dpnp/pull/${{ env.PR_NUM }}/index.html @@ -218,7 +218,7 @@ jobs: git push tokened_docs gh-pages - name: Modify the comment with URL to official documentation - uses: mshick/add-pr-comment@v2.8.1 + uses: mshick/add-pr-comment@v2.8.2 with: find: | View rendered docs @.+ From 1528fc676d0c2b909d79648094939e24e662fcb9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 4 Feb 2024 16:25:56 +0100 Subject: [PATCH 11/29] Bump styfle/cancel-workflow-action from 0.12.0 to 0.12.1 (#1684) Bumps [styfle/cancel-workflow-action](https://github.com/styfle/cancel-workflow-action) from 0.12.0 to 0.12.1. - [Release notes](https://github.com/styfle/cancel-workflow-action/releases) - [Commits](https://github.com/styfle/cancel-workflow-action/compare/0.12.0...0.12.1) --- updated-dependencies: - dependency-name: styfle/cancel-workflow-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Anton <100830759+antonwolfy@users.noreply.github.com> --- .github/workflows/build-sphinx.yml | 2 +- .github/workflows/conda-package.yml | 2 +- .github/workflows/generate_coverage.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build-sphinx.yml b/.github/workflows/build-sphinx.yml index 6f7bc30562b..af0c2243368 100644 --- a/.github/workflows/build-sphinx.yml +++ b/.github/workflows/build-sphinx.yml @@ -41,7 +41,7 @@ jobs: steps: - name: Cancel Previous Runs - uses: styfle/cancel-workflow-action@0.12.0 + uses: styfle/cancel-workflow-action@0.12.1 with: access_token: ${{ github.token }} diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index ddbd9191287..47db3e1850e 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -74,7 +74,7 @@ jobs: steps: - name: Cancel Previous Runs - uses: styfle/cancel-workflow-action@0.12.0 + uses: styfle/cancel-workflow-action@0.12.1 with: access_token: ${{ github.token }} diff --git a/.github/workflows/generate_coverage.yaml b/.github/workflows/generate_coverage.yaml index b5b0e4a40b9..ceafb5390a1 100644 --- a/.github/workflows/generate_coverage.yaml +++ b/.github/workflows/generate_coverage.yaml @@ -25,7 +25,7 @@ jobs: steps: - name: Cancel Previous Runs - uses: styfle/cancel-workflow-action@0.12.0 + uses: styfle/cancel-workflow-action@0.12.1 with: access_token: ${{ github.token }} From fcf3fa0f76f603d05b3913019bf9d8485536a668 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 4 Feb 2024 17:38:37 +0100 Subject: [PATCH 12/29] Bump nick-fields/retry from 2.9.0 to 3.0.0 (#1687) Bumps [nick-fields/retry](https://github.com/nick-fields/retry) from 2.9.0 to 3.0.0. - [Release notes](https://github.com/nick-fields/retry/releases) - [Changelog](https://github.com/nick-fields/retry/blob/master/.releaserc.js) - [Commits](https://github.com/nick-fields/retry/compare/v2.9.0...v3.0.0) --- updated-dependencies: - dependency-name: nick-fields/retry dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Anton <100830759+antonwolfy@users.noreply.github.com> --- .github/workflows/generate_coverage.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/generate_coverage.yaml b/.github/workflows/generate_coverage.yaml index ceafb5390a1..009947c9c98 100644 --- a/.github/workflows/generate_coverage.yaml +++ b/.github/workflows/generate_coverage.yaml @@ -60,7 +60,7 @@ jobs: - name: Build dpnp with coverage id: build_coverage - uses: nick-fields/retry@v2.9.0 + uses: nick-fields/retry@v3.0.0 with: shell: bash timeout_minutes: 60 From 10357cbc70fedd55397610bfe072078d18ea24cd Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 4 Feb 2024 18:35:13 +0100 Subject: [PATCH 13/29] Bump mattnotmitt/doxygen-action from 1.9.5 to 1.9.8 (#1685) Bumps [mattnotmitt/doxygen-action](https://github.com/mattnotmitt/doxygen-action) from 1.9.5 to 1.9.8. - [Release notes](https://github.com/mattnotmitt/doxygen-action/releases) - [Commits](https://github.com/mattnotmitt/doxygen-action/compare/v1.9.5...v1.9.8) --- updated-dependencies: - dependency-name: mattnotmitt/doxygen-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Anton <100830759+antonwolfy@users.noreply.github.com> --- .github/workflows/build-sphinx.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-sphinx.yml b/.github/workflows/build-sphinx.yml index af0c2243368..a7ed392a3a5 100644 --- a/.github/workflows/build-sphinx.yml +++ b/.github/workflows/build-sphinx.yml @@ -135,7 +135,7 @@ jobs: # https://github.com/marketplace/actions/doxygen-action - name: Build backend docs - uses: mattnotmitt/doxygen-action@v1.9.5 + uses: mattnotmitt/doxygen-action@v1.9.8 with: working-directory: 'dpnp/backend/doc' From ac30e215440b17a311c7d710faa5c03ef0f30227 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 4 Feb 2024 20:27:23 +0100 Subject: [PATCH 14/29] Bump github/codeql-action from 3.23.2 to 3.24.0 (#1686) Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.23.2 to 3.24.0. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/v3.23.2...v3.24.0) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/openssf-scorecard.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/openssf-scorecard.yml b/.github/workflows/openssf-scorecard.yml index b43795eaebe..903d1cb12cb 100644 --- a/.github/workflows/openssf-scorecard.yml +++ b/.github/workflows/openssf-scorecard.yml @@ -68,6 +68,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard. - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@v3.23.2 + uses: github/codeql-action/upload-sarif@v3.24.0 with: sarif_file: results.sarif From e7f7a7cbee9ee8036373614551fe9fd2a40e1006 Mon Sep 17 00:00:00 2001 From: StepSecurity Bot Date: Sun, 4 Feb 2024 12:59:46 -0800 Subject: [PATCH 15/29] [StepSecurity] ci: Harden GitHub Actions (#1688) Signed-off-by: StepSecurity Bot --- .github/workflows/build-sphinx.yml | 20 ++++++++-------- .github/workflows/conda-package.yml | 30 ++++++++++++------------ .github/workflows/generate_coverage.yaml | 8 +++---- .github/workflows/openssf-scorecard.yml | 8 +++---- .github/workflows/pre-commit.yml | 6 ++--- 5 files changed, 36 insertions(+), 36 deletions(-) diff --git a/.github/workflows/build-sphinx.yml b/.github/workflows/build-sphinx.yml index a7ed392a3a5..9de0097e120 100644 --- a/.github/workflows/build-sphinx.yml +++ b/.github/workflows/build-sphinx.yml @@ -41,7 +41,7 @@ jobs: steps: - name: Cancel Previous Runs - uses: styfle/cancel-workflow-action@0.12.1 + uses: styfle/cancel-workflow-action@85880fa0301c86cca9da44039ee3bb12d3bedbfa # 0.12.1 with: access_token: ${{ github.token }} @@ -52,7 +52,7 @@ jobs: echo "$GITHUB_CONTEXT" - name: Free Disk Space (Ubuntu) - uses: jlumbroso/free-disk-space@v1.3.1 + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 with: docker-images: false @@ -86,13 +86,13 @@ jobs: sudo apt-get install -y nvidia-cuda-toolkit clinfo - name: Checkout repo - uses: actions/checkout@v4.1.1 + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 with: fetch-depth: 0 # https://github.com/marketplace/actions/setup-miniconda - name: Setup miniconda - uses: conda-incubator/setup-miniconda@v3.0.1 + uses: conda-incubator/setup-miniconda@11b562958363ec5770fef326fe8ef0366f8cbf8a # v3.0.1 with: auto-update-conda: true python-version: ${{ env.python-ver }} @@ -135,7 +135,7 @@ jobs: # https://github.com/marketplace/actions/doxygen-action - name: Build backend docs - uses: mattnotmitt/doxygen-action@v1.9.8 + uses: mattnotmitt/doxygen-action@cbe72c8e402e8a3faa1f0b247ef90aa6c8e4ce74 # v1.9.8 with: working-directory: 'dpnp/backend/doc' @@ -146,7 +146,7 @@ jobs: # The step is only used to build docs while pushing a PR to "master" - name: Deploy docs if: env.GH_EVENT_PUSH_UPSTREAM - uses: peaceiris/actions-gh-pages@v3.9.3 + uses: peaceiris/actions-gh-pages@373f7f263a76c20808c831209c920827a82a2847 # v3.9.3 with: github_token: ${{ secrets.GITHUB_TOKEN }} publish_dir: ${{ env.PUBLISH_DIR }} @@ -159,7 +159,7 @@ jobs: # The step is only used to build docs while pushing to PR branch - name: Publish pull-request docs if: env.GH_EVENT_OPEN_PR_UPSTREAM - uses: peaceiris/actions-gh-pages@v3.9.3 + uses: peaceiris/actions-gh-pages@373f7f263a76c20808c831209c920827a82a2847 # v3.9.3 with: github_token: ${{ secrets.GITHUB_TOKEN }} publish_dir: ${{ env.PUBLISH_DIR }} @@ -176,7 +176,7 @@ jobs: if: env.GH_EVENT_OPEN_PR_UPSTREAM env: PR_NUM: ${{ github.event.number }} - uses: mshick/add-pr-comment@v2.8.2 + uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2.8.2 with: message: | View rendered docs @ https://intelpython.github.io/dpnp/pull/${{ env.PR_NUM }}/index.html @@ -199,7 +199,7 @@ jobs: runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v4.1.1 + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 with: fetch-depth: 0 @@ -218,7 +218,7 @@ jobs: git push tokened_docs gh-pages - name: Modify the comment with URL to official documentation - uses: mshick/add-pr-comment@v2.8.2 + uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2.8.2 with: find: | View rendered docs @.+ diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index 47db3e1850e..5ec377bd740 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -74,17 +74,17 @@ jobs: steps: - name: Cancel Previous Runs - uses: styfle/cancel-workflow-action@0.12.1 + uses: styfle/cancel-workflow-action@85880fa0301c86cca9da44039ee3bb12d3bedbfa # 0.12.1 with: access_token: ${{ github.token }} - name: Checkout DPNP repo - uses: actions/checkout@v4.1.1 + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 with: fetch-depth: 0 - name: Setup miniconda - uses: conda-incubator/setup-miniconda@v3.0.1 + uses: conda-incubator/setup-miniconda@11b562958363ec5770fef326fe8ef0366f8cbf8a # v3.0.1 with: auto-update-conda: true python-version: ${{ matrix.python }} @@ -105,7 +105,7 @@ jobs: run: conda install conda-build=3.28.4 - name: Cache conda packages - uses: actions/cache@v4 + uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0 env: CACHE_NUMBER: 1 # Increase to reset cache with: @@ -120,7 +120,7 @@ jobs: run: conda build --no-test --python ${{ matrix.python }} ${{ env.CHANNELS }} conda-recipe - name: Upload artifact - uses: actions/upload-artifact@v4.3.0 + uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8 # v4.3.0 with: name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }} path: ${{ env.CONDA_BLD }}${{ env.PACKAGE_NAME }}-*.tar.bz2 @@ -153,7 +153,7 @@ jobs: steps: - name: Download artifact - uses: actions/download-artifact@v4.1.1 + uses: actions/download-artifact@6b208ae046db98c579e8a3aa621ab581ff575935 # v4.1.1 with: name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }} path: ${{ env.pkg-path-in-channel }} @@ -164,7 +164,7 @@ jobs: tar -xvf ${{ env.pkg-path-in-channel }}/${{ env.PACKAGE_NAME }}-*.tar.bz2 -C ${{ env.extracted-pkg-path }} - name: Setup miniconda - uses: conda-incubator/setup-miniconda@v3.0.1 + uses: conda-incubator/setup-miniconda@11b562958363ec5770fef326fe8ef0366f8cbf8a # v3.0.1 with: auto-update-conda: true python-version: ${{ matrix.python }} @@ -196,7 +196,7 @@ jobs: TEST_CHANNELS: '-c ${{ env.channel-path }} ${{ env.CHANNELS }}' - name: Cache conda packages - uses: actions/cache@v4 + uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0 env: CACHE_NUMBER: 1 # Increase to reset cache with: @@ -254,7 +254,7 @@ jobs: steps: - name: Download artifact - uses: actions/download-artifact@v4.1.1 + uses: actions/download-artifact@6b208ae046db98c579e8a3aa621ab581ff575935 # v4.1.1 with: name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }} path: ${{ env.pkg-path-in-channel }} @@ -274,7 +274,7 @@ jobs: dir ${{ env.extracted-pkg-path }} - name: Setup miniconda - uses: conda-incubator/setup-miniconda@v3.0.1 + uses: conda-incubator/setup-miniconda@11b562958363ec5770fef326fe8ef0366f8cbf8a # v3.0.1 with: auto-update-conda: true python-version: ${{ matrix.python }} @@ -320,7 +320,7 @@ jobs: run: more lockfile - name: Cache conda packages - uses: actions/cache@v4 + uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0 env: CACHE_NUMBER: 1 # Increase to reset cache with: @@ -388,12 +388,12 @@ jobs: steps: - name: Download artifact - uses: actions/download-artifact@v4.1.1 + uses: actions/download-artifact@6b208ae046db98c579e8a3aa621ab581ff575935 # v4.1.1 with: name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }} - name: Setup miniconda - uses: conda-incubator/setup-miniconda@v3.0.1 + uses: conda-incubator/setup-miniconda@11b562958363ec5770fef326fe8ef0366f8cbf8a # v3.0.1 with: auto-update-conda: true python-version: ${{ matrix.python }} @@ -416,7 +416,7 @@ jobs: run: shell: bash -el {0} steps: - - uses: conda-incubator/setup-miniconda@v3.0.1 + - uses: conda-incubator/setup-miniconda@11b562958363ec5770fef326fe8ef0366f8cbf8a # v3.0.1 with: run-post: false channel-priority: "disabled" @@ -427,7 +427,7 @@ jobs: run: conda install anaconda-client - name: Checkout repo - uses: actions/checkout@v4.1.1 + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 with: repository: IntelPython/devops-tools fetch-depth: 0 diff --git a/.github/workflows/generate_coverage.yaml b/.github/workflows/generate_coverage.yaml index 009947c9c98..432377ce10c 100644 --- a/.github/workflows/generate_coverage.yaml +++ b/.github/workflows/generate_coverage.yaml @@ -25,17 +25,17 @@ jobs: steps: - name: Cancel Previous Runs - uses: styfle/cancel-workflow-action@0.12.1 + uses: styfle/cancel-workflow-action@85880fa0301c86cca9da44039ee3bb12d3bedbfa # 0.12.1 with: access_token: ${{ github.token }} - name: Checkout repo - uses: actions/checkout@v4.1.1 + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 with: fetch-depth: 0 - name: Setup miniconda - uses: conda-incubator/setup-miniconda@v3.0.1 + uses: conda-incubator/setup-miniconda@11b562958363ec5770fef326fe8ef0366f8cbf8a # v3.0.1 with: auto-update-conda: true python-version: ${{ env.python-ver }} @@ -60,7 +60,7 @@ jobs: - name: Build dpnp with coverage id: build_coverage - uses: nick-fields/retry@v3.0.0 + uses: nick-fields/retry@7152eba30c6575329ac0576536151aca5a72780e # v3.0.0 with: shell: bash timeout_minutes: 60 diff --git a/.github/workflows/openssf-scorecard.yml b/.github/workflows/openssf-scorecard.yml index 903d1cb12cb..ee124d3a9a1 100644 --- a/.github/workflows/openssf-scorecard.yml +++ b/.github/workflows/openssf-scorecard.yml @@ -33,12 +33,12 @@ jobs: steps: - name: "Checkout code" - uses: actions/checkout@v4.1.1 + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 with: persist-credentials: false - name: "Run analysis" - uses: ossf/scorecard-action@v2.3.1 + uses: ossf/scorecard-action@0864cf19026789058feabb7e87baa5f140aac736 # v2.3.1 with: results_file: results.sarif results_format: sarif @@ -60,7 +60,7 @@ jobs: # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF # format to the repository Actions tab. - name: "Upload artifact" - uses: actions/upload-artifact@v4.3.0 + uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8 # v4.3.0 with: name: SARIF file path: results.sarif @@ -68,6 +68,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard. - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@v3.24.0 + uses: github/codeql-action/upload-sarif@e8893c57a1f3a2b659b6b55564fdfdbbd2982911 # v3.24.0 with: sarif_file: results.sarif diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index aa17c7696df..7aea59b3977 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -18,8 +18,8 @@ jobs: sudo ln -s /usr/bin/clang-format-12 /usr/bin/clang-format clang-format --version - - uses: actions/checkout@v4.1.1 - - uses: actions/setup-python@v5 + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0 with: python-version: '3.11' - - uses: pre-commit/action@v3.0.0 + - uses: pre-commit/action@646c83fcd040023954eafda54b4db0192ce70507 # v3.0.0 From a002bdeff3b1038ce7af51ba805bc41cd06b35e1 Mon Sep 17 00:00:00 2001 From: Anton <100830759+antonwolfy@users.noreply.github.com> Date: Mon, 5 Feb 2024 19:12:54 +0100 Subject: [PATCH 16/29] Add `gitleaks` and `shellcheck` to pre-commit configuration (#1689) * Add gitleaks and shellcheck to pre-commit configuration * Pin gitleaks to the latest version * Use an array to build scikit-build arguments * Use an array to build wheel arguments --- .pre-commit-config.yaml | 8 + benchmarks/pytest_benchmark/README.md | 76 +- benchmarks/pytest_benchmark/test_random.py | 234 +++--- conda-recipe/build.sh | 22 +- conda-recipe/run_test.sh | 9 +- doc/0.builddoc.sh | 6 +- doc/make.bat | 72 +- dpnp/backend/examples/example11.cpp | 170 ++-- dpnp/dpnp_algo/dpnp_arraycreation.py | 784 +++++++++--------- scripts/build_deps_dpctl.sh | 8 +- scripts/install_cmake_lin.sh | 7 +- scripts/install_python_deps.sh | 4 +- scripts/install_system_deps.sh | 2 - scripts/install_system_deps_intelpython.sh | 2 - scripts/set_ci_env.sh | 18 +- tests/test_histograms.py | 178 ++-- .../cupy/manipulation_tests/test_kind.py | 286 +++---- .../third_party/intel/test_zero_copy_test1.py | 72 +- tests/third_party/intel/zero-copy-test1.py | 168 ++-- 19 files changed, 1066 insertions(+), 1060 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b3787c3833c..3289990f4a4 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -71,3 +71,11 @@ repos: hooks: - id: clang-format args: ["-i"] +- repo: https://github.com/gitleaks/gitleaks + rev: v8.18.2 + hooks: + - id: gitleaks +- repo: https://github.com/jumanjihouse/pre-commit-hooks + rev: 3.0.0 + hooks: + - id: shellcheck diff --git a/benchmarks/pytest_benchmark/README.md b/benchmarks/pytest_benchmark/README.md index d3c7478509a..77015a089ef 100644 --- a/benchmarks/pytest_benchmark/README.md +++ b/benchmarks/pytest_benchmark/README.md @@ -1,38 +1,38 @@ -# dpnp/benchmarks/pytest_benchmark/ - -## Prerequisites -* pytest >= 6.1.1 -* pytest-benchmark >= 3.4.1 - - -## Running benchmark tests -```bash -pytest benchmarks/ --benchmark-json=results.json -``` -Running tests and saving the current run into `STORAGE`, see [1] -```bash -pytest benchmarks/ --benchmark-json=results.json --benchmark-autosave -``` - -## Creating `.csv` report -```bash -pytest-benchmark compare results.json --csv=results.csv --group-by='name' -``` - -## Optional: creating histogram -Note: make sure that `pytest-benchmark[histogram]` installed -```bash -# example -pip install pytest-benchmark[histogram] -pytest -vv benchmarks/ --benchmark-autosave --benchmark-histogram -pytest-benchmark compare .benchmarks/Linux-CPython-3.7-64bit/* --histogram -``` - -## Advanced running example -``` -pytest benchmarks/ --benchmark-columns='min, max, mean, stddev, median, rounds, iterations' --benchmark-json=results.json --benchmark-autosave -pytest-benchmark compare results.json --csv=results.csv --group-by='name' -``` - - -[1] https://pytest-benchmark.readthedocs.io/en/latest/usage.html +# dpnp/benchmarks/pytest_benchmark/ + +## Prerequisites +* pytest >= 6.1.1 +* pytest-benchmark >= 3.4.1 + + +## Running benchmark tests +```bash +pytest benchmarks/ --benchmark-json=results.json +``` +Running tests and saving the current run into `STORAGE`, see [1] +```bash +pytest benchmarks/ --benchmark-json=results.json --benchmark-autosave +``` + +## Creating `.csv` report +```bash +pytest-benchmark compare results.json --csv=results.csv --group-by='name' +``` + +## Optional: creating histogram +Note: make sure that `pytest-benchmark[histogram]` installed +```bash +# example +pip install pytest-benchmark[histogram] +pytest -vv benchmarks/ --benchmark-autosave --benchmark-histogram +pytest-benchmark compare .benchmarks/Linux-CPython-3.7-64bit/* --histogram +``` + +## Advanced running example +``` +pytest benchmarks/ --benchmark-columns='min, max, mean, stddev, median, rounds, iterations' --benchmark-json=results.json --benchmark-autosave +pytest-benchmark compare results.json --csv=results.csv --group-by='name' +``` + + +[1] https://pytest-benchmark.readthedocs.io/en/latest/usage.html diff --git a/benchmarks/pytest_benchmark/test_random.py b/benchmarks/pytest_benchmark/test_random.py index 7c083d20009..ce0f374fb1e 100644 --- a/benchmarks/pytest_benchmark/test_random.py +++ b/benchmarks/pytest_benchmark/test_random.py @@ -1,117 +1,117 @@ -# cython: language_level=3 -# -*- coding: utf-8 -*- -# ***************************************************************************** -# Copyright (c) 2016-2024, Intel Corporation -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# - Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -# THE POSSIBILITY OF SUCH DAMAGE. -# ***************************************************************************** - -import numpy as np -import pytest - -import dpnp - -ROUNDS = 30 -ITERATIONS = 4 - -NNUMBERS = 2**26 - - -@pytest.mark.parametrize( - "function", [dpnp.random.beta, np.random.beta], ids=["dpnp", "numpy"] -) -def test_beta(benchmark, function): - result = benchmark.pedantic( - target=function, - args=( - 4.0, - 5.0, - NNUMBERS, - ), - rounds=ROUNDS, - iterations=ITERATIONS, - ) - - -@pytest.mark.parametrize( - "function", - [dpnp.random.exponential, np.random.exponential], - ids=["dpnp", "numpy"], -) -def test_exponential(benchmark, function): - result = benchmark.pedantic( - target=function, - args=( - 4.0, - NNUMBERS, - ), - rounds=ROUNDS, - iterations=ITERATIONS, - ) - - -@pytest.mark.parametrize( - "function", [dpnp.random.gamma, np.random.gamma], ids=["dpnp", "numpy"] -) -def test_gamma(benchmark, function): - result = benchmark.pedantic( - target=function, - args=( - 2.0, - 4.0, - NNUMBERS, - ), - rounds=ROUNDS, - iterations=ITERATIONS, - ) - - -@pytest.mark.parametrize( - "function", [dpnp.random.normal, np.random.normal], ids=["dpnp", "numpy"] -) -def test_normal(benchmark, function): - result = benchmark.pedantic( - target=function, - args=( - 0.0, - 1.0, - NNUMBERS, - ), - rounds=ROUNDS, - iterations=ITERATIONS, - ) - - -@pytest.mark.parametrize( - "function", [dpnp.random.uniform, np.random.uniform], ids=["dpnp", "numpy"] -) -def test_uniform(benchmark, function): - result = benchmark.pedantic( - target=function, - args=( - 0.0, - 1.0, - NNUMBERS, - ), - rounds=ROUNDS, - iterations=ITERATIONS, - ) +# cython: language_level=3 +# -*- coding: utf-8 -*- +# ***************************************************************************** +# Copyright (c) 2016-2024, Intel Corporation +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# - Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + +import numpy as np +import pytest + +import dpnp + +ROUNDS = 30 +ITERATIONS = 4 + +NNUMBERS = 2**26 + + +@pytest.mark.parametrize( + "function", [dpnp.random.beta, np.random.beta], ids=["dpnp", "numpy"] +) +def test_beta(benchmark, function): + result = benchmark.pedantic( + target=function, + args=( + 4.0, + 5.0, + NNUMBERS, + ), + rounds=ROUNDS, + iterations=ITERATIONS, + ) + + +@pytest.mark.parametrize( + "function", + [dpnp.random.exponential, np.random.exponential], + ids=["dpnp", "numpy"], +) +def test_exponential(benchmark, function): + result = benchmark.pedantic( + target=function, + args=( + 4.0, + NNUMBERS, + ), + rounds=ROUNDS, + iterations=ITERATIONS, + ) + + +@pytest.mark.parametrize( + "function", [dpnp.random.gamma, np.random.gamma], ids=["dpnp", "numpy"] +) +def test_gamma(benchmark, function): + result = benchmark.pedantic( + target=function, + args=( + 2.0, + 4.0, + NNUMBERS, + ), + rounds=ROUNDS, + iterations=ITERATIONS, + ) + + +@pytest.mark.parametrize( + "function", [dpnp.random.normal, np.random.normal], ids=["dpnp", "numpy"] +) +def test_normal(benchmark, function): + result = benchmark.pedantic( + target=function, + args=( + 0.0, + 1.0, + NNUMBERS, + ), + rounds=ROUNDS, + iterations=ITERATIONS, + ) + + +@pytest.mark.parametrize( + "function", [dpnp.random.uniform, np.random.uniform], ids=["dpnp", "numpy"] +) +def test_uniform(benchmark, function): + result = benchmark.pedantic( + target=function, + args=( + 0.0, + 1.0, + NNUMBERS, + ), + rounds=ROUNDS, + iterations=ITERATIONS, + ) diff --git a/conda-recipe/build.sh b/conda-recipe/build.sh index b4ea4c44cb2..b0a266be3ea 100755 --- a/conda-recipe/build.sh +++ b/conda-recipe/build.sh @@ -5,25 +5,29 @@ export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:${BUILD_PREFIX}/lib" # Intel LLVM must cooperate with compiler and sysroot from conda echo "--gcc-toolchain=${BUILD_PREFIX} --sysroot=${BUILD_PREFIX}/${HOST}/sysroot -target ${HOST}" > icpx_for_conda.cfg -export ICPXCFG="$(pwd)/icpx_for_conda.cfg" -export ICXCFG="$(pwd)/icpx_for_conda.cfg" + +ICPXCFG="$(pwd)/icpx_for_conda.cfg" +export ICPXCFG + +ICXCFG="$(pwd)/icpx_for_conda.cfg" +export ICXCFG export CMAKE_GENERATOR="Ninja" export TBB_ROOT_HINT=$PREFIX export DPL_ROOT_HINT=$PREFIX export MKL_ROOT_HINT=$PREFIX -SKBUILD_ARGS="-- -DCMAKE_C_COMPILER:PATH=icx -DCMAKE_CXX_COMPILER:PATH=icpx -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON" -SKBUILD_ARGS="${SKBUILD_ARGS} -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON" +SKBUILD_ARGS=(-- "-DCMAKE_C_COMPILER:PATH=icx" "-DCMAKE_CXX_COMPILER:PATH=icpx" "-DCMAKE_VERBOSE_MAKEFILE:BOOL=ON") +SKBUILD_ARGS=("${SKBUILD_ARGS[@]}" "-DCMAKE_VERBOSE_MAKEFILE:BOOL=ON") # Build wheel package if [ "$CONDA_PY" == "36" ]; then - WHEELS_BUILD_ARGS="-p manylinux1_x86_64" + WHEELS_BUILD_ARGS=("-p" "manylinux1_x86_64") else - WHEELS_BUILD_ARGS="-p manylinux2014_x86_64" + WHEELS_BUILD_ARGS=("-p" "manylinux2014_x86_64") fi if [ -n "${WHEELS_OUTPUT_FOLDER}" ]; then - $PYTHON setup.py install bdist_wheel ${WHEELS_BUILD_ARGS} ${SKBUILD_ARGS} - cp dist/dpnp*.whl ${WHEELS_OUTPUT_FOLDER} + $PYTHON setup.py install bdist_wheel "${WHEELS_BUILD_ARGS[@]}" "${SKBUILD_ARGS[@]}" + cp dist/dpnp*.whl "${WHEELS_OUTPUT_FOLDER}" else - $PYTHON setup.py install ${SKBUILD_ARGS} + $PYTHON setup.py install "${SKBUILD_ARGS[@]}" fi diff --git a/conda-recipe/run_test.sh b/conda-recipe/run_test.sh index 7780c9b98d1..c67e538e90f 100755 --- a/conda-recipe/run_test.sh +++ b/conda-recipe/run_test.sh @@ -10,16 +10,19 @@ fi # if DPCPPROOT is specified (work with custom DPCPP) if [ -n "${DPCPPROOT}" ]; then - . ${DPCPPROOT}/env/vars.sh + # shellcheck source=/dev/null + . "${DPCPPROOT}"/env/vars.sh fi # if MKLROOT is specified (work with custom math library) if [ -n "${MKLROOT}" ]; then - . ${MKLROOT}/env/vars.sh + # shellcheck source=/dev/null + . "${MKLROOT}"/env/vars.sh fi # have to activate while SYCL CPU device/driver needs paths # if TBBROOT is specified if [ -n "${TBBROOT}" ]; then - . ${TBBROOT}/env/vars.sh + # shellcheck source=/dev/null + . "${TBBROOT}"/env/vars.sh fi diff --git a/doc/0.builddoc.sh b/doc/0.builddoc.sh index 5dd034ac667..f10b4a5cc22 100755 --- a/doc/0.builddoc.sh +++ b/doc/0.builddoc.sh @@ -1,11 +1,11 @@ #!/bin/bash -BUILDDOCDIR=$(dirname $(readlink -e ${BASH_SOURCE[0]})) +BUILDDOCDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") ROOTDIR=$BUILDDOCDIR/.. -cd $ROOTDIR +cd "$ROOTDIR" || exit 1 python setup.py develop -cd $BUILDDOCDIR +cd "$BUILDDOCDIR" || exit 2 make clean make html diff --git a/doc/make.bat b/doc/make.bat index 0bd6076d3b2..3382907d015 100644 --- a/doc/make.bat +++ b/doc/make.bat @@ -1,36 +1,36 @@ -@ECHO OFF - -pushd %~dp0 - -REM Command file for Sphinx documentation - -if "%SPHINXBUILD%" == "" ( - set SPHINXBUILD=sphinx-build -) -set SOURCEDIR=. -set BUILDDIR=_build -set SPHINXPROJ=dpnp - -if "%1" == "" goto help - -%SPHINXBUILD% >NUL 2>NUL -if errorlevel 9009 ( - echo. - echo.The 'sphinx-build' command was not found. Make sure you have Sphinx - echo.installed, then set the SPHINXBUILD environment variable to point - echo.to the full path of the 'sphinx-build' executable. Alternatively you - echo.may add the Sphinx directory to PATH. - echo. - echo.If you don't have Sphinx installed, grab it from - echo.http://sphinx-doc.org/ - exit /b 1 -) - -%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% -goto end - -:help -%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% - -:end -popd +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build +set SPHINXPROJ=dpnp + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% + +:end +popd diff --git a/dpnp/backend/examples/example11.cpp b/dpnp/backend/examples/example11.cpp index 52fce9beb3b..3a16991bae6 100644 --- a/dpnp/backend/examples/example11.cpp +++ b/dpnp/backend/examples/example11.cpp @@ -1,85 +1,85 @@ -//***************************************************************************** -// Copyright (c) 2016-2024, Intel Corporation -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -// THE POSSIBILITY OF SUCH DAMAGE. -//***************************************************************************** - -/** - * Example 11. - * - * This example shows simple usage of the DPNP C++ Backend library RNG shuffle - * function for one and ndim arrays. - * - * Possible compile line: - * g++ -g dpnp/backend/examples/example11.cpp -Idpnp -Idpnp/backend/include - * -Ldpnp -Wl,-rpath='$ORIGIN'/dpnp -ldpnp_backend_c -o example11 - * - */ - -#include - -#include - -template -void print_dpnp_array(T *arr, size_t size) -{ - std::cout << std::endl; - for (size_t i = 0; i < size; ++i) { - std::cout << arr[i] << ", "; - } - std::cout << std::endl; -} - -int main(int, char **) -{ - // Two cases: - // 1) array size = 100, ndim = 1, high_dim_size = 10 (aka ndarray with shape - // (100,) ) 2) array size = 100, ndim = 2, high_dim_size = 20 (e.g. ndarray - // with shape (20, 5) and len(array) = 20 ) - const size_t ndim_cases = 2; - const size_t itemsize = sizeof(double); - const size_t ndim[ndim_cases] = {1, 2}; - const size_t high_dim_size[ndim_cases] = {100, 20}; - const size_t size = 100; - const size_t seed = 1234; - - // DPNPC dpnp_rng_shuffle_c - // DPNPC interface - double *array_1 = - reinterpret_cast(dpnp_memory_alloc_c(size * sizeof(double))); - for (size_t i = 0; i < ndim_cases; i++) { - std::cout << "\nREPRODUCE: DPNPC dpnp_rng_shuffle_c:"; - std::cout << "\nDIMS: " << ndim[i] << std::endl; - // init array 0, 1, 2, 3, 4, 5, 6, .... - dpnp_arange_c(0, 1, array_1, size); - // print before shuffle - std::cout << "\nINPUT array:"; - print_dpnp_array(array_1, size); - dpnp_rng_srand_c(seed); - dpnp_rng_shuffle_c(array_1, itemsize, ndim[i], high_dim_size[i], - size); - // print shuffle result - std::cout << "\nSHUFFLE INPUT array:"; - print_dpnp_array(array_1, size); - } - dpnp_memory_free_c(array_1); -} +//***************************************************************************** +// Copyright (c) 2016-2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +/** + * Example 11. + * + * This example shows simple usage of the DPNP C++ Backend library RNG shuffle + * function for one and ndim arrays. + * + * Possible compile line: + * g++ -g dpnp/backend/examples/example11.cpp -Idpnp -Idpnp/backend/include + * -Ldpnp -Wl,-rpath='$ORIGIN'/dpnp -ldpnp_backend_c -o example11 + * + */ + +#include + +#include + +template +void print_dpnp_array(T *arr, size_t size) +{ + std::cout << std::endl; + for (size_t i = 0; i < size; ++i) { + std::cout << arr[i] << ", "; + } + std::cout << std::endl; +} + +int main(int, char **) +{ + // Two cases: + // 1) array size = 100, ndim = 1, high_dim_size = 10 (aka ndarray with shape + // (100,) ) 2) array size = 100, ndim = 2, high_dim_size = 20 (e.g. ndarray + // with shape (20, 5) and len(array) = 20 ) + const size_t ndim_cases = 2; + const size_t itemsize = sizeof(double); + const size_t ndim[ndim_cases] = {1, 2}; + const size_t high_dim_size[ndim_cases] = {100, 20}; + const size_t size = 100; + const size_t seed = 1234; + + // DPNPC dpnp_rng_shuffle_c + // DPNPC interface + double *array_1 = + reinterpret_cast(dpnp_memory_alloc_c(size * sizeof(double))); + for (size_t i = 0; i < ndim_cases; i++) { + std::cout << "\nREPRODUCE: DPNPC dpnp_rng_shuffle_c:"; + std::cout << "\nDIMS: " << ndim[i] << std::endl; + // init array 0, 1, 2, 3, 4, 5, 6, .... + dpnp_arange_c(0, 1, array_1, size); + // print before shuffle + std::cout << "\nINPUT array:"; + print_dpnp_array(array_1, size); + dpnp_rng_srand_c(seed); + dpnp_rng_shuffle_c(array_1, itemsize, ndim[i], high_dim_size[i], + size); + // print shuffle result + std::cout << "\nSHUFFLE INPUT array:"; + print_dpnp_array(array_1, size); + } + dpnp_memory_free_c(array_1); +} diff --git a/dpnp/dpnp_algo/dpnp_arraycreation.py b/dpnp/dpnp_algo/dpnp_arraycreation.py index 0399deea254..d140f412e6a 100644 --- a/dpnp/dpnp_algo/dpnp_arraycreation.py +++ b/dpnp/dpnp_algo/dpnp_arraycreation.py @@ -1,392 +1,392 @@ -import math -import operator - -import dpctl.utils as dpu -import numpy - -import dpnp -import dpnp.dpnp_container as dpnp_container -import dpnp.dpnp_utils as utils - -__all__ = [ - "dpnp_geomspace", - "dpnp_linspace", - "dpnp_logspace", - "dpnp_nd_grid", -] - - -def dpnp_geomspace( - start, - stop, - num, - dtype=None, - device=None, - usm_type=None, - sycl_queue=None, - endpoint=True, - axis=0, -): - usm_type_alloc, sycl_queue_alloc = utils.get_usm_allocations([start, stop]) - - if sycl_queue is None and device is None: - sycl_queue = sycl_queue_alloc - sycl_queue_normalized = dpnp.get_normalized_queue_device( - sycl_queue=sycl_queue, device=device - ) - - if usm_type is None: - _usm_type = "device" if usm_type_alloc is None else usm_type_alloc - else: - _usm_type = usm_type - - if not dpnp.is_supported_array_type(start): - start = dpnp.asarray( - start, usm_type=_usm_type, sycl_queue=sycl_queue_normalized - ) - if not dpnp.is_supported_array_type(stop): - stop = dpnp.asarray( - stop, usm_type=_usm_type, sycl_queue=sycl_queue_normalized - ) - - dt = numpy.result_type(start, stop, float(num)) - dt = utils.map_dtype_to_device(dt, sycl_queue_normalized.sycl_device) - if dtype is None: - dtype = dt - - if dpnp.any(start == 0) or dpnp.any(stop == 0): - raise ValueError("Geometric sequence cannot include zero") - - out_sign = dpnp.ones( - dpnp.broadcast_arrays(start, stop)[0].shape, - dtype=dt, - usm_type=_usm_type, - sycl_queue=sycl_queue_normalized, - ) - # Avoid negligible real or imaginary parts in output by rotating to - # positive real, calculating, then undoing rotation - if dpnp.issubdtype(dt, dpnp.complexfloating): - all_imag = (start.real == 0.0) & (stop.real == 0.0) - if dpnp.any(all_imag): - start[all_imag] = start[all_imag].imag - stop[all_imag] = stop[all_imag].imag - out_sign[all_imag] = 1j - - both_negative = (dpnp.sign(start) == -1) & (dpnp.sign(stop) == -1) - if dpnp.any(both_negative): - dpnp.negative(start[both_negative], out=start[both_negative]) - dpnp.negative(stop[both_negative], out=stop[both_negative]) - dpnp.negative(out_sign[both_negative], out=out_sign[both_negative]) - - log_start = dpnp.log10(start) - log_stop = dpnp.log10(stop) - result = dpnp_logspace( - log_start, - log_stop, - num=num, - endpoint=endpoint, - base=10.0, - dtype=dtype, - usm_type=_usm_type, - sycl_queue=sycl_queue_normalized, - ) - - if num > 0: - result[0] = start - if num > 1 and endpoint: - result[-1] = stop - - result = out_sign * result - - if axis != 0: - result = dpnp.moveaxis(result, 0, axis) - - return result.astype(dtype, copy=False) - - -def dpnp_linspace( - start, - stop, - num, - dtype=None, - device=None, - usm_type=None, - sycl_queue=None, - endpoint=True, - retstep=False, - axis=0, -): - usm_type_alloc, sycl_queue_alloc = utils.get_usm_allocations([start, stop]) - - if sycl_queue is None and device is None: - sycl_queue = sycl_queue_alloc - sycl_queue_normalized = dpnp.get_normalized_queue_device( - sycl_queue=sycl_queue, device=device - ) - - if usm_type is None: - _usm_type = "device" if usm_type_alloc is None else usm_type_alloc - else: - _usm_type = usm_type - - if not hasattr(start, "dtype") and not dpnp.isscalar(start): - start = dpnp.asarray( - start, usm_type=_usm_type, sycl_queue=sycl_queue_normalized - ) - if not hasattr(stop, "dtype") and not dpnp.isscalar(stop): - stop = dpnp.asarray( - stop, usm_type=_usm_type, sycl_queue=sycl_queue_normalized - ) - - dt = numpy.result_type(start, stop, float(num)) - dt = utils.map_dtype_to_device(dt, sycl_queue_normalized.sycl_device) - if dtype is None: - dtype = dt - - num = operator.index(num) - if num < 0: - raise ValueError("Number of points must be non-negative") - step_num = (num - 1) if endpoint else num - - step_nan = False - if step_num == 0: - step_nan = True - step = dpnp.nan - - if dpnp.isscalar(start) and dpnp.isscalar(stop): - # Call linspace() function for scalars. - res = dpnp_container.linspace( - start, - stop, - num, - dtype=dt, - usm_type=_usm_type, - sycl_queue=sycl_queue_normalized, - endpoint=endpoint, - ) - if retstep is True and step_nan is False: - step = (stop - start) / step_num - else: - _start = dpnp.asarray( - start, - dtype=dt, - usm_type=_usm_type, - sycl_queue=sycl_queue_normalized, - ) - _stop = dpnp.asarray( - stop, dtype=dt, usm_type=_usm_type, sycl_queue=sycl_queue_normalized - ) - - res = dpnp_container.arange( - 0, - stop=num, - step=1, - dtype=dt, - usm_type=_usm_type, - sycl_queue=sycl_queue_normalized, - ) - - if step_nan is False: - step = (_stop - _start) / step_num - res = res.reshape((-1,) + (1,) * step.ndim) - res = res * step + _start - - if endpoint and num > 1: - res[-1] = dpnp_container.full(step.shape, _stop) - - if axis != 0: - res = dpnp.moveaxis(res, 0, axis) - - if numpy.issubdtype(dtype, dpnp.integer): - dpnp.floor(res, out=res) - - res = res.astype(dtype, copy=False) - - if retstep is True: - if dpnp.isscalar(step): - step = dpnp.asarray( - step, usm_type=res.usm_type, sycl_queue=res.sycl_queue - ) - return (res, step) - - return res - - -def dpnp_logspace( - start, - stop, - num=50, - device=None, - usm_type=None, - sycl_queue=None, - endpoint=True, - base=10.0, - dtype=None, - axis=0, -): - if not dpnp.isscalar(base): - usm_type_alloc, sycl_queue_alloc = utils.get_usm_allocations( - [start, stop, base] - ) - - if sycl_queue is None and device is None: - sycl_queue = sycl_queue_alloc - sycl_queue = dpnp.get_normalized_queue_device( - sycl_queue=sycl_queue, device=device - ) - - if usm_type is None: - usm_type = "device" if usm_type_alloc is None else usm_type_alloc - else: - usm_type = usm_type - start = dpnp.asarray(start, usm_type=usm_type, sycl_queue=sycl_queue) - stop = dpnp.asarray(stop, usm_type=usm_type, sycl_queue=sycl_queue) - base = dpnp.asarray(base, usm_type=usm_type, sycl_queue=sycl_queue) - [start, stop, base] = dpnp.broadcast_arrays(start, stop, base) - base = dpnp.expand_dims(base, axis=axis) - - res = dpnp_linspace( - start, - stop, - num=num, - device=device, - usm_type=usm_type, - sycl_queue=sycl_queue, - endpoint=endpoint, - axis=axis, - ) - - if dtype is None: - return dpnp.power(base, res) - return dpnp.power(base, res).astype(dtype, copy=False) - - -class dpnp_nd_grid: - """ - Construct a multi-dimensional "meshgrid". - - ``grid = dpnp_nd_grid()`` creates an instance which will return a mesh-grid - when indexed. The dimension and number of the output arrays are equal - to the number of indexing dimensions. If the step length is not a - complex number, then the stop is not inclusive. - - However, if the step length is a complex number (e.g. 5j), then the - integer part of its magnitude is interpreted as specifying the - number of points to create between the start and stop values, where - the stop value is inclusive. - - If instantiated with an argument of ``sparse=True``, the mesh-grid is - open (or not fleshed out) so that only one-dimension of each returned - argument is greater than 1. - - Parameters - ---------- - sparse : bool, optional - Whether the grid is sparse or not. Default is False. - - """ - - def __init__( - self, sparse=False, device=None, usm_type="device", sycl_queue=None - ): - dpu.validate_usm_type(usm_type, allow_none=False) - self.sparse = sparse - self.usm_type = usm_type - self.sycl_queue_normalized = dpnp.get_normalized_queue_device( - sycl_queue=sycl_queue, device=device - ) - - def __getitem__(self, key): - if isinstance(key, slice): - step = key.step - stop = key.stop - start = key.start - if start is None: - start = 0 - if isinstance(step, complex): - step = abs(step) - length = int(step) - if step != 1: - step = (stop - start) / float(step - 1) - stop = stop + step - return ( - dpnp.arange( - 0, - length, - 1, - dtype=dpnp.default_float_type(), - usm_type=self.usm_type, - sycl_queue=self.sycl_queue_normalized, - ) - * step - + start - ) - else: - return dpnp.arange( - start, - stop, - step, - usm_type=self.usm_type, - sycl_queue=self.sycl_queue_normalized, - ) - - size = [] - dtype = int - for k in range(len(key)): - step = key[k].step - start = key[k].start - stop = key[k].stop - if start is None: - start = 0 - if step is None: - step = 1 - if isinstance(step, complex): - size.append(int(abs(step))) - dtype = dpnp.default_float_type() - else: - size.append( - int(math.ceil((key[k].stop - start) / (step * 1.0))) - ) - if ( - isinstance(step, float) - or isinstance(start, float) - or isinstance(stop, float) - ): - dtype = dpnp.default_float_type() - if self.sparse: - nn = [ - dpnp.arange( - _x, - dtype=_t, - usm_type=self.usm_type, - sycl_queue=self.sycl_queue_normalized, - ) - for _x, _t in zip(size, (dtype,) * len(size)) - ] - else: - nn = dpnp.indices( - size, - dtype, - usm_type=self.usm_type, - sycl_queue=self.sycl_queue_normalized, - ) - for k in range(len(size)): - step = key[k].step - start = key[k].start - stop = key[k].stop - if start is None: - start = 0 - if step is None: - step = 1 - if isinstance(step, complex): - step = int(abs(step)) - if step != 1: - step = (stop - start) / float(step - 1) - nn[k] = nn[k] * step + start - if self.sparse: - slobj = [dpnp.newaxis] * len(size) - for k in range(len(size)): - slobj[k] = slice(None, None) - nn[k] = nn[k][tuple(slobj)] - slobj[k] = dpnp.newaxis - return nn +import math +import operator + +import dpctl.utils as dpu +import numpy + +import dpnp +import dpnp.dpnp_container as dpnp_container +import dpnp.dpnp_utils as utils + +__all__ = [ + "dpnp_geomspace", + "dpnp_linspace", + "dpnp_logspace", + "dpnp_nd_grid", +] + + +def dpnp_geomspace( + start, + stop, + num, + dtype=None, + device=None, + usm_type=None, + sycl_queue=None, + endpoint=True, + axis=0, +): + usm_type_alloc, sycl_queue_alloc = utils.get_usm_allocations([start, stop]) + + if sycl_queue is None and device is None: + sycl_queue = sycl_queue_alloc + sycl_queue_normalized = dpnp.get_normalized_queue_device( + sycl_queue=sycl_queue, device=device + ) + + if usm_type is None: + _usm_type = "device" if usm_type_alloc is None else usm_type_alloc + else: + _usm_type = usm_type + + if not dpnp.is_supported_array_type(start): + start = dpnp.asarray( + start, usm_type=_usm_type, sycl_queue=sycl_queue_normalized + ) + if not dpnp.is_supported_array_type(stop): + stop = dpnp.asarray( + stop, usm_type=_usm_type, sycl_queue=sycl_queue_normalized + ) + + dt = numpy.result_type(start, stop, float(num)) + dt = utils.map_dtype_to_device(dt, sycl_queue_normalized.sycl_device) + if dtype is None: + dtype = dt + + if dpnp.any(start == 0) or dpnp.any(stop == 0): + raise ValueError("Geometric sequence cannot include zero") + + out_sign = dpnp.ones( + dpnp.broadcast_arrays(start, stop)[0].shape, + dtype=dt, + usm_type=_usm_type, + sycl_queue=sycl_queue_normalized, + ) + # Avoid negligible real or imaginary parts in output by rotating to + # positive real, calculating, then undoing rotation + if dpnp.issubdtype(dt, dpnp.complexfloating): + all_imag = (start.real == 0.0) & (stop.real == 0.0) + if dpnp.any(all_imag): + start[all_imag] = start[all_imag].imag + stop[all_imag] = stop[all_imag].imag + out_sign[all_imag] = 1j + + both_negative = (dpnp.sign(start) == -1) & (dpnp.sign(stop) == -1) + if dpnp.any(both_negative): + dpnp.negative(start[both_negative], out=start[both_negative]) + dpnp.negative(stop[both_negative], out=stop[both_negative]) + dpnp.negative(out_sign[both_negative], out=out_sign[both_negative]) + + log_start = dpnp.log10(start) + log_stop = dpnp.log10(stop) + result = dpnp_logspace( + log_start, + log_stop, + num=num, + endpoint=endpoint, + base=10.0, + dtype=dtype, + usm_type=_usm_type, + sycl_queue=sycl_queue_normalized, + ) + + if num > 0: + result[0] = start + if num > 1 and endpoint: + result[-1] = stop + + result = out_sign * result + + if axis != 0: + result = dpnp.moveaxis(result, 0, axis) + + return result.astype(dtype, copy=False) + + +def dpnp_linspace( + start, + stop, + num, + dtype=None, + device=None, + usm_type=None, + sycl_queue=None, + endpoint=True, + retstep=False, + axis=0, +): + usm_type_alloc, sycl_queue_alloc = utils.get_usm_allocations([start, stop]) + + if sycl_queue is None and device is None: + sycl_queue = sycl_queue_alloc + sycl_queue_normalized = dpnp.get_normalized_queue_device( + sycl_queue=sycl_queue, device=device + ) + + if usm_type is None: + _usm_type = "device" if usm_type_alloc is None else usm_type_alloc + else: + _usm_type = usm_type + + if not hasattr(start, "dtype") and not dpnp.isscalar(start): + start = dpnp.asarray( + start, usm_type=_usm_type, sycl_queue=sycl_queue_normalized + ) + if not hasattr(stop, "dtype") and not dpnp.isscalar(stop): + stop = dpnp.asarray( + stop, usm_type=_usm_type, sycl_queue=sycl_queue_normalized + ) + + dt = numpy.result_type(start, stop, float(num)) + dt = utils.map_dtype_to_device(dt, sycl_queue_normalized.sycl_device) + if dtype is None: + dtype = dt + + num = operator.index(num) + if num < 0: + raise ValueError("Number of points must be non-negative") + step_num = (num - 1) if endpoint else num + + step_nan = False + if step_num == 0: + step_nan = True + step = dpnp.nan + + if dpnp.isscalar(start) and dpnp.isscalar(stop): + # Call linspace() function for scalars. + res = dpnp_container.linspace( + start, + stop, + num, + dtype=dt, + usm_type=_usm_type, + sycl_queue=sycl_queue_normalized, + endpoint=endpoint, + ) + if retstep is True and step_nan is False: + step = (stop - start) / step_num + else: + _start = dpnp.asarray( + start, + dtype=dt, + usm_type=_usm_type, + sycl_queue=sycl_queue_normalized, + ) + _stop = dpnp.asarray( + stop, dtype=dt, usm_type=_usm_type, sycl_queue=sycl_queue_normalized + ) + + res = dpnp_container.arange( + 0, + stop=num, + step=1, + dtype=dt, + usm_type=_usm_type, + sycl_queue=sycl_queue_normalized, + ) + + if step_nan is False: + step = (_stop - _start) / step_num + res = res.reshape((-1,) + (1,) * step.ndim) + res = res * step + _start + + if endpoint and num > 1: + res[-1] = dpnp_container.full(step.shape, _stop) + + if axis != 0: + res = dpnp.moveaxis(res, 0, axis) + + if numpy.issubdtype(dtype, dpnp.integer): + dpnp.floor(res, out=res) + + res = res.astype(dtype, copy=False) + + if retstep is True: + if dpnp.isscalar(step): + step = dpnp.asarray( + step, usm_type=res.usm_type, sycl_queue=res.sycl_queue + ) + return (res, step) + + return res + + +def dpnp_logspace( + start, + stop, + num=50, + device=None, + usm_type=None, + sycl_queue=None, + endpoint=True, + base=10.0, + dtype=None, + axis=0, +): + if not dpnp.isscalar(base): + usm_type_alloc, sycl_queue_alloc = utils.get_usm_allocations( + [start, stop, base] + ) + + if sycl_queue is None and device is None: + sycl_queue = sycl_queue_alloc + sycl_queue = dpnp.get_normalized_queue_device( + sycl_queue=sycl_queue, device=device + ) + + if usm_type is None: + usm_type = "device" if usm_type_alloc is None else usm_type_alloc + else: + usm_type = usm_type + start = dpnp.asarray(start, usm_type=usm_type, sycl_queue=sycl_queue) + stop = dpnp.asarray(stop, usm_type=usm_type, sycl_queue=sycl_queue) + base = dpnp.asarray(base, usm_type=usm_type, sycl_queue=sycl_queue) + [start, stop, base] = dpnp.broadcast_arrays(start, stop, base) + base = dpnp.expand_dims(base, axis=axis) + + res = dpnp_linspace( + start, + stop, + num=num, + device=device, + usm_type=usm_type, + sycl_queue=sycl_queue, + endpoint=endpoint, + axis=axis, + ) + + if dtype is None: + return dpnp.power(base, res) + return dpnp.power(base, res).astype(dtype, copy=False) + + +class dpnp_nd_grid: + """ + Construct a multi-dimensional "meshgrid". + + ``grid = dpnp_nd_grid()`` creates an instance which will return a mesh-grid + when indexed. The dimension and number of the output arrays are equal + to the number of indexing dimensions. If the step length is not a + complex number, then the stop is not inclusive. + + However, if the step length is a complex number (e.g. 5j), then the + integer part of its magnitude is interpreted as specifying the + number of points to create between the start and stop values, where + the stop value is inclusive. + + If instantiated with an argument of ``sparse=True``, the mesh-grid is + open (or not fleshed out) so that only one-dimension of each returned + argument is greater than 1. + + Parameters + ---------- + sparse : bool, optional + Whether the grid is sparse or not. Default is False. + + """ + + def __init__( + self, sparse=False, device=None, usm_type="device", sycl_queue=None + ): + dpu.validate_usm_type(usm_type, allow_none=False) + self.sparse = sparse + self.usm_type = usm_type + self.sycl_queue_normalized = dpnp.get_normalized_queue_device( + sycl_queue=sycl_queue, device=device + ) + + def __getitem__(self, key): + if isinstance(key, slice): + step = key.step + stop = key.stop + start = key.start + if start is None: + start = 0 + if isinstance(step, complex): + step = abs(step) + length = int(step) + if step != 1: + step = (stop - start) / float(step - 1) + stop = stop + step + return ( + dpnp.arange( + 0, + length, + 1, + dtype=dpnp.default_float_type(), + usm_type=self.usm_type, + sycl_queue=self.sycl_queue_normalized, + ) + * step + + start + ) + else: + return dpnp.arange( + start, + stop, + step, + usm_type=self.usm_type, + sycl_queue=self.sycl_queue_normalized, + ) + + size = [] + dtype = int + for k in range(len(key)): + step = key[k].step + start = key[k].start + stop = key[k].stop + if start is None: + start = 0 + if step is None: + step = 1 + if isinstance(step, complex): + size.append(int(abs(step))) + dtype = dpnp.default_float_type() + else: + size.append( + int(math.ceil((key[k].stop - start) / (step * 1.0))) + ) + if ( + isinstance(step, float) + or isinstance(start, float) + or isinstance(stop, float) + ): + dtype = dpnp.default_float_type() + if self.sparse: + nn = [ + dpnp.arange( + _x, + dtype=_t, + usm_type=self.usm_type, + sycl_queue=self.sycl_queue_normalized, + ) + for _x, _t in zip(size, (dtype,) * len(size)) + ] + else: + nn = dpnp.indices( + size, + dtype, + usm_type=self.usm_type, + sycl_queue=self.sycl_queue_normalized, + ) + for k in range(len(size)): + step = key[k].step + start = key[k].start + stop = key[k].stop + if start is None: + start = 0 + if step is None: + step = 1 + if isinstance(step, complex): + step = int(abs(step)) + if step != 1: + step = (stop - start) / float(step - 1) + nn[k] = nn[k] * step + start + if self.sparse: + slobj = [dpnp.newaxis] * len(size) + for k in range(len(size)): + slobj[k] = slice(None, None) + nn[k] = nn[k][tuple(slobj)] + slobj[k] = dpnp.newaxis + return nn diff --git a/scripts/build_deps_dpctl.sh b/scripts/build_deps_dpctl.sh index 3d5331bbdfb..dd85846a9d5 100755 --- a/scripts/build_deps_dpctl.sh +++ b/scripts/build_deps_dpctl.sh @@ -1,21 +1,19 @@ #!/bin/bash -THEDIR=$(dirname $(readlink -e ${BASH_SOURCE[0]})) - DPCTL_TARGET_VERSION=0.5.0rc2 echo ++++++++++++++++++ Build DPCTL ${DPCTL_TARGET_VERSION} +++++++++++++++++++ git clone --branch ${DPCTL_TARGET_VERSION} https://github.com/IntelPython/dpctl.git -cd dpctl +cd dpctl || exit 1 # didn't find better way to set required version -git tag -d $(git tag -l) +git tag -d "$(git tag -l)" git tag ${DPCTL_TARGET_VERSION} # python ./setup.py develop # python ./setup.py install -conda build conda-recipe/ --no-test -c ${ONEAPI_ROOT}/conda_channel +conda build conda-recipe/ --no-test -c "${ONEAPI_ROOT}"/conda_channel # ls -lR /opt/intel/oneapi/intelpython/latest/conda-bld diff --git a/scripts/install_cmake_lin.sh b/scripts/install_cmake_lin.sh index 966a22c617b..63ee19b0fdf 100755 --- a/scripts/install_cmake_lin.sh +++ b/scripts/install_cmake_lin.sh @@ -1,7 +1,5 @@ #!/bin/bash -THEDIR=$(dirname $(readlink -e ${BASH_SOURCE[0]})) - echo ========================= install cmake ================================== curl --output cmake_webimage.tar.gz \ --url https://github.com/Kitware/CMake/releases/download/v3.26.2/cmake-3.26.2-linux-x86_64.tar.gz \ @@ -10,7 +8,8 @@ curl --output cmake_webimage.tar.gz \ tar -xzf cmake_webimage.tar.gz rm -f cmake_webimage.tar.gz -export PATH=`pwd`/cmake-3.26.2-linux-x86_64/bin:$PATH +PATH=$(pwd)/cmake-3.26.2-linux-x86_64/bin:$PATH +export PATH -which cmake +command -v cmake cmake --version diff --git a/scripts/install_python_deps.sh b/scripts/install_python_deps.sh index bcb005403ba..e40d9a5b34a 100755 --- a/scripts/install_python_deps.sh +++ b/scripts/install_python_deps.sh @@ -1,7 +1,5 @@ #!/bin/bash -THEDIR=$(dirname $(readlink -e ${BASH_SOURCE[0]})) - echo +++++++++++++++++++++++++ Python prerequisites +++++++++++++++++++++++++++++++++ echo ========================= Conda: install prerequisites ========================= @@ -19,7 +17,7 @@ echo ========================= SW versions ===================================== conda list python --version -which python +command -v python python -c "import numpy as sw; print(f\"sw.__version__={sw.__version__}\nsw.get_include={sw.get_include()}\")" python -c "import dpctl as sw; print(f\"sw.__version__={sw.__version__}\nsw.get_include={sw.get_include()}\")" diff --git a/scripts/install_system_deps.sh b/scripts/install_system_deps.sh index a0bd07a040c..591bb025e85 100755 --- a/scripts/install_system_deps.sh +++ b/scripts/install_system_deps.sh @@ -1,7 +1,5 @@ #!/bin/bash -THEDIR=$(dirname $(readlink -e ${BASH_SOURCE[0]})) - # echo +++++++++++++++++++++++++ System prerequisites +++++++++++++++++++++++++++ # sudo apt-get install -f # sudo dpkg --configure -a diff --git a/scripts/install_system_deps_intelpython.sh b/scripts/install_system_deps_intelpython.sh index 8c38d41c385..5dbef56ff7e 100755 --- a/scripts/install_system_deps_intelpython.sh +++ b/scripts/install_system_deps_intelpython.sh @@ -1,7 +1,5 @@ #!/bin/bash -THEDIR=$(dirname $(readlink -e ${BASH_SOURCE[0]})) - echo +++++++++++++++++++++++++ Intel OneAPI Python ++++++++++++++++++++++++++++ sudo apt-get install intel-oneapi-python diff --git a/scripts/set_ci_env.sh b/scripts/set_ci_env.sh index afc3cc1b6a2..d8e6c2b93e8 100755 --- a/scripts/set_ci_env.sh +++ b/scripts/set_ci_env.sh @@ -1,30 +1,30 @@ #!/bin/bash -THEDIR=$(dirname $(readlink -e ${BASH_SOURCE[0]})) +THEDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") echo echo ========================= Set DPNP environment =========================== -echo SHELL=${SHELL} -echo PWD=${PWD} -echo HOME=${HOME} +echo SHELL="${SHELL}" +echo PWD="${PWD}" +echo HOME="${HOME}" ls -l echo ========================= current machine kernel ========================= uname -a -${THEDIR}/install_system_deps.sh +"${THEDIR}"/install_system_deps.sh . ./scripts/install_cmake_lin.sh echo ========================= setup Intel OneAPI python changed to Intel OneAPI ==== . /opt/intel/oneapi/setvars.sh -${THEDIR}/install_python_deps.sh +"${THEDIR}"/install_python_deps.sh echo ========================= SW versions =============================== g++ --version -which g++ +command -v g++ clang++ --version -which clang++ +command -v clang++ dpcpp --version -which dpcpp +command -v dpcpp diff --git a/tests/test_histograms.py b/tests/test_histograms.py index 2fb4cd71fa6..a283c5547cc 100644 --- a/tests/test_histograms.py +++ b/tests/test_histograms.py @@ -1,89 +1,89 @@ -import numpy -import pytest - -import dpnp - -from .helper import has_support_aspect64 - - -class TestHistogram: - def setup(self): - pass - - def teardown(self): - pass - - @pytest.mark.usefixtures("allow_fall_back_on_numpy") - def test_simple(self): - n = 100 - v = dpnp.random.rand(n) - a, _ = dpnp.histogram(v) - # check if the sum of the bins equals the number of samples - numpy.testing.assert_equal(dpnp.sum(a, axis=0), n) - # check that the bin counts are evenly spaced when the data is from - # a linear function - a, _ = dpnp.histogram( - numpy.linspace( - 0, - 10, - 100, - dtype="float64" if has_support_aspect64() else "float32", - ) - ) - numpy.testing.assert_array_equal(a, 10) - - @pytest.mark.usefixtures("allow_fall_back_on_numpy") - def test_one_bin(self): - # Ticket 632 - hist, edges = dpnp.histogram([1, 2, 3, 4], [1, 2]) - numpy.testing.assert_array_equal( - hist, - [ - 2, - ], - ) - numpy.testing.assert_array_equal(edges, [1, 2]) - numpy.testing.assert_raises(ValueError, dpnp.histogram, [1, 2], bins=0) - h, e = dpnp.histogram([1, 2], bins=1) - numpy.testing.assert_equal(h, dpnp.array([2])) - numpy.testing.assert_allclose(e, dpnp.array([1.0, 2.0])) - - def test_density(self): - # Check that the integral of the density equals 1. - n = 100 - v = dpnp.random.rand(n) - a, b = dpnp.histogram(v, density=True) - area = dpnp.sum(a * dpnp.diff(b)[0])[0] - numpy.testing.assert_almost_equal(area, 1) - - # Check with non-constant bin widths - v = dpnp.arange(10) - bins = [0, 1, 3, 6, 10] - a, b = dpnp.histogram(v, bins, density=True) - numpy.testing.assert_array_equal(a, 0.1) - numpy.testing.assert_equal(dpnp.sum(a * dpnp.diff(b))[0], 1) - - # Test that passing False works too - a, b = dpnp.histogram(v, bins, density=False) - numpy.testing.assert_array_equal(a, [1, 2, 3, 4]) - - # Variable bin widths are especially useful to deal with - # infinities. - v = dpnp.arange(10) - bins = [0, 1, 3, 6, numpy.inf] - a, b = dpnp.histogram(v, bins, density=True) - numpy.testing.assert_array_equal(a, [0.1, 0.1, 0.1, 0.0]) - - # Taken from a bug report from N. Becker on the numpy-discussion - # mailing list Aug. 6, 2010. - counts, _ = dpnp.histogram( - [1, 2, 3, 4], [0.5, 1.5, numpy.inf], density=True - ) - numpy.testing.assert_equal(counts, [0.25, 0]) - - @pytest.mark.usefixtures("allow_fall_back_on_numpy") - def test_arr_weights_mismatch(self): - a = dpnp.arange(10) + 0.5 - w = dpnp.arange(11) + 0.5 - with numpy.testing.assert_raises_regex(ValueError, "same shape as"): - h, b = dpnp.histogram(a, range=[1, 9], weights=w, density=True) +import numpy +import pytest + +import dpnp + +from .helper import has_support_aspect64 + + +class TestHistogram: + def setup(self): + pass + + def teardown(self): + pass + + @pytest.mark.usefixtures("allow_fall_back_on_numpy") + def test_simple(self): + n = 100 + v = dpnp.random.rand(n) + a, _ = dpnp.histogram(v) + # check if the sum of the bins equals the number of samples + numpy.testing.assert_equal(dpnp.sum(a, axis=0), n) + # check that the bin counts are evenly spaced when the data is from + # a linear function + a, _ = dpnp.histogram( + numpy.linspace( + 0, + 10, + 100, + dtype="float64" if has_support_aspect64() else "float32", + ) + ) + numpy.testing.assert_array_equal(a, 10) + + @pytest.mark.usefixtures("allow_fall_back_on_numpy") + def test_one_bin(self): + # Ticket 632 + hist, edges = dpnp.histogram([1, 2, 3, 4], [1, 2]) + numpy.testing.assert_array_equal( + hist, + [ + 2, + ], + ) + numpy.testing.assert_array_equal(edges, [1, 2]) + numpy.testing.assert_raises(ValueError, dpnp.histogram, [1, 2], bins=0) + h, e = dpnp.histogram([1, 2], bins=1) + numpy.testing.assert_equal(h, dpnp.array([2])) + numpy.testing.assert_allclose(e, dpnp.array([1.0, 2.0])) + + def test_density(self): + # Check that the integral of the density equals 1. + n = 100 + v = dpnp.random.rand(n) + a, b = dpnp.histogram(v, density=True) + area = dpnp.sum(a * dpnp.diff(b)[0])[0] + numpy.testing.assert_almost_equal(area, 1) + + # Check with non-constant bin widths + v = dpnp.arange(10) + bins = [0, 1, 3, 6, 10] + a, b = dpnp.histogram(v, bins, density=True) + numpy.testing.assert_array_equal(a, 0.1) + numpy.testing.assert_equal(dpnp.sum(a * dpnp.diff(b))[0], 1) + + # Test that passing False works too + a, b = dpnp.histogram(v, bins, density=False) + numpy.testing.assert_array_equal(a, [1, 2, 3, 4]) + + # Variable bin widths are especially useful to deal with + # infinities. + v = dpnp.arange(10) + bins = [0, 1, 3, 6, numpy.inf] + a, b = dpnp.histogram(v, bins, density=True) + numpy.testing.assert_array_equal(a, [0.1, 0.1, 0.1, 0.0]) + + # Taken from a bug report from N. Becker on the numpy-discussion + # mailing list Aug. 6, 2010. + counts, _ = dpnp.histogram( + [1, 2, 3, 4], [0.5, 1.5, numpy.inf], density=True + ) + numpy.testing.assert_equal(counts, [0.25, 0]) + + @pytest.mark.usefixtures("allow_fall_back_on_numpy") + def test_arr_weights_mismatch(self): + a = dpnp.arange(10) + 0.5 + w = dpnp.arange(11) + 0.5 + with numpy.testing.assert_raises_regex(ValueError, "same shape as"): + h, b = dpnp.histogram(a, range=[1, 9], weights=w, density=True) diff --git a/tests/third_party/cupy/manipulation_tests/test_kind.py b/tests/third_party/cupy/manipulation_tests/test_kind.py index 1812d77c0af..7cc61f722f8 100644 --- a/tests/third_party/cupy/manipulation_tests/test_kind.py +++ b/tests/third_party/cupy/manipulation_tests/test_kind.py @@ -1,143 +1,143 @@ -import unittest - -import numpy -import pytest - -import dpnp as cupy -from tests.helper import has_support_aspect64 -from tests.third_party.cupy import testing - - -class TestKind(unittest.TestCase): - @pytest.mark.skip("dpnp.asarray_chkfinite() is not implemented yet") - @testing.for_orders("CFAK") - @testing.for_all_dtypes() - @testing.numpy_cupy_array_equal() - def test_asarray_chkfinite(self, xp, dtype, order): - a = [0, 4, 0, 5] - return xp.asarray_chkfinite(a, dtype=dtype, order=order) - - @pytest.mark.skip("dpnp.asarray_chkfinite() is not implemented yet") - @testing.for_orders("CFAK") - @testing.for_all_dtypes(no_bool=True) - def test_asarray_chkfinite_non_finite_vals(self, dtype, order): - a = [-numpy.inf, 0.0, numpy.inf, numpy.nan] - for xp in (numpy, cupy): - if xp.issubdtype(dtype, xp.integer): - error = OverflowError - else: - error = ValueError - with pytest.raises(error): - xp.asarray_chkfinite(a, dtype=dtype, order=order) - - @testing.for_all_dtypes() - def test_asfarray(self, dtype): - a = cupy.asarray([1, 2, 3]) - a_gpu = cupy.asfarray(a, dtype) - a_cpu = numpy.asfarray(a, dtype) - if ( - has_support_aspect64() - or cupy.issubdtype(dtype, cupy.complexfloating) - or cupy.issubdtype(dtype, cupy.floating) - ): - assert a_cpu.dtype == a_gpu.dtype - else: - assert a_cpu.dtype == cupy.float64 - assert a_gpu.dtype == cupy.float32 - - @testing.for_all_dtypes() - def test_asfortranarray1(self, dtype): - def func(xp): - x = xp.zeros((2, 3), dtype=dtype) - ret = xp.asfortranarray(x) - assert x.flags.c_contiguous - assert ret.flags.f_contiguous - - assert func(numpy) == func(cupy) - - @testing.for_all_dtypes() - def test_asfortranarray2(self, dtype): - def func(xp): - x = xp.zeros((2, 3, 4), dtype=dtype) - ret = xp.asfortranarray(x) - assert x.flags.c_contiguous - assert ret.flags.f_contiguous - - assert func(numpy) == func(cupy) - - @testing.for_all_dtypes() - def test_asfortranarray3(self, dtype): - def func(xp): - x = xp.zeros((2, 3, 4), dtype=dtype) - ret = xp.asfortranarray(xp.asfortranarray(x)) - assert x.flags.c_contiguous - assert ret.flags.f_contiguous - - assert func(numpy) == func(cupy) - - @testing.for_all_dtypes() - def test_asfortranarray4(self, dtype): - def func(xp): - x = xp.zeros((2, 3), dtype=dtype) - x = xp.transpose(x, (1, 0)) - ret = xp.asfortranarray(x) - assert ret.flags.f_contiguous - - assert func(numpy) == func(cupy) - - @testing.for_all_dtypes() - def test_asfortranarray5(self, dtype): - def func(xp): - x = testing.shaped_arange((2, 3), xp, dtype) - ret = xp.asfortranarray(x) - assert x.flags.c_contiguous - assert ret.flags.f_contiguous - - assert func(numpy) == func(cupy) - - @pytest.mark.skip("dpnp.require() is not implemented yet") - @testing.for_all_dtypes() - def test_require_flag_check(self, dtype): - possible_flags = [["C_CONTIGUOUS"], ["F_CONTIGUOUS"]] - x = cupy.zeros((2, 3, 4), dtype=dtype) - for flags in possible_flags: - arr = cupy.require(x, dtype, flags) - for parameter in flags: - assert arr.flags[parameter] - assert arr.dtype == dtype - - @pytest.mark.skip("dpnp.require() is not implemented yet") - @testing.for_all_dtypes() - def test_require_owndata(self, dtype): - x = cupy.zeros((2, 3, 4), dtype=dtype) - arr = x.view() - arr = cupy.require(arr, dtype, ["O"]) - assert arr.flags["OWNDATA"] - - @pytest.mark.skip("dpnp.require() is not implemented yet") - @testing.for_all_dtypes() - def test_require_C_and_F_flags(self, dtype): - x = cupy.zeros((2, 3, 4), dtype=dtype) - with pytest.raises(ValueError): - cupy.require(x, dtype, ["C", "F"]) - - @pytest.mark.skip("dpnp.require() is not implemented yet") - @testing.for_all_dtypes() - def test_require_incorrect_requirments(self, dtype): - x = cupy.zeros((2, 3, 4), dtype=dtype) - with pytest.raises(ValueError): - cupy.require(x, dtype, ["W"]) - - @pytest.mark.skip("dpnp.require() is not implemented yet") - @testing.for_all_dtypes() - def test_require_incorrect_dtype(self, dtype): - x = cupy.zeros((2, 3, 4), dtype=dtype) - with pytest.raises(ValueError): - cupy.require(x, "random", "C") - - @pytest.mark.skip("dpnp.require() is not implemented yet") - @testing.for_all_dtypes() - def test_require_empty_requirements(self, dtype): - x = cupy.zeros((2, 3, 4), dtype=dtype) - x = cupy.require(x, dtype, []) - assert x.flags["C_CONTIGUOUS"] +import unittest + +import numpy +import pytest + +import dpnp as cupy +from tests.helper import has_support_aspect64 +from tests.third_party.cupy import testing + + +class TestKind(unittest.TestCase): + @pytest.mark.skip("dpnp.asarray_chkfinite() is not implemented yet") + @testing.for_orders("CFAK") + @testing.for_all_dtypes() + @testing.numpy_cupy_array_equal() + def test_asarray_chkfinite(self, xp, dtype, order): + a = [0, 4, 0, 5] + return xp.asarray_chkfinite(a, dtype=dtype, order=order) + + @pytest.mark.skip("dpnp.asarray_chkfinite() is not implemented yet") + @testing.for_orders("CFAK") + @testing.for_all_dtypes(no_bool=True) + def test_asarray_chkfinite_non_finite_vals(self, dtype, order): + a = [-numpy.inf, 0.0, numpy.inf, numpy.nan] + for xp in (numpy, cupy): + if xp.issubdtype(dtype, xp.integer): + error = OverflowError + else: + error = ValueError + with pytest.raises(error): + xp.asarray_chkfinite(a, dtype=dtype, order=order) + + @testing.for_all_dtypes() + def test_asfarray(self, dtype): + a = cupy.asarray([1, 2, 3]) + a_gpu = cupy.asfarray(a, dtype) + a_cpu = numpy.asfarray(a, dtype) + if ( + has_support_aspect64() + or cupy.issubdtype(dtype, cupy.complexfloating) + or cupy.issubdtype(dtype, cupy.floating) + ): + assert a_cpu.dtype == a_gpu.dtype + else: + assert a_cpu.dtype == cupy.float64 + assert a_gpu.dtype == cupy.float32 + + @testing.for_all_dtypes() + def test_asfortranarray1(self, dtype): + def func(xp): + x = xp.zeros((2, 3), dtype=dtype) + ret = xp.asfortranarray(x) + assert x.flags.c_contiguous + assert ret.flags.f_contiguous + + assert func(numpy) == func(cupy) + + @testing.for_all_dtypes() + def test_asfortranarray2(self, dtype): + def func(xp): + x = xp.zeros((2, 3, 4), dtype=dtype) + ret = xp.asfortranarray(x) + assert x.flags.c_contiguous + assert ret.flags.f_contiguous + + assert func(numpy) == func(cupy) + + @testing.for_all_dtypes() + def test_asfortranarray3(self, dtype): + def func(xp): + x = xp.zeros((2, 3, 4), dtype=dtype) + ret = xp.asfortranarray(xp.asfortranarray(x)) + assert x.flags.c_contiguous + assert ret.flags.f_contiguous + + assert func(numpy) == func(cupy) + + @testing.for_all_dtypes() + def test_asfortranarray4(self, dtype): + def func(xp): + x = xp.zeros((2, 3), dtype=dtype) + x = xp.transpose(x, (1, 0)) + ret = xp.asfortranarray(x) + assert ret.flags.f_contiguous + + assert func(numpy) == func(cupy) + + @testing.for_all_dtypes() + def test_asfortranarray5(self, dtype): + def func(xp): + x = testing.shaped_arange((2, 3), xp, dtype) + ret = xp.asfortranarray(x) + assert x.flags.c_contiguous + assert ret.flags.f_contiguous + + assert func(numpy) == func(cupy) + + @pytest.mark.skip("dpnp.require() is not implemented yet") + @testing.for_all_dtypes() + def test_require_flag_check(self, dtype): + possible_flags = [["C_CONTIGUOUS"], ["F_CONTIGUOUS"]] + x = cupy.zeros((2, 3, 4), dtype=dtype) + for flags in possible_flags: + arr = cupy.require(x, dtype, flags) + for parameter in flags: + assert arr.flags[parameter] + assert arr.dtype == dtype + + @pytest.mark.skip("dpnp.require() is not implemented yet") + @testing.for_all_dtypes() + def test_require_owndata(self, dtype): + x = cupy.zeros((2, 3, 4), dtype=dtype) + arr = x.view() + arr = cupy.require(arr, dtype, ["O"]) + assert arr.flags["OWNDATA"] + + @pytest.mark.skip("dpnp.require() is not implemented yet") + @testing.for_all_dtypes() + def test_require_C_and_F_flags(self, dtype): + x = cupy.zeros((2, 3, 4), dtype=dtype) + with pytest.raises(ValueError): + cupy.require(x, dtype, ["C", "F"]) + + @pytest.mark.skip("dpnp.require() is not implemented yet") + @testing.for_all_dtypes() + def test_require_incorrect_requirments(self, dtype): + x = cupy.zeros((2, 3, 4), dtype=dtype) + with pytest.raises(ValueError): + cupy.require(x, dtype, ["W"]) + + @pytest.mark.skip("dpnp.require() is not implemented yet") + @testing.for_all_dtypes() + def test_require_incorrect_dtype(self, dtype): + x = cupy.zeros((2, 3, 4), dtype=dtype) + with pytest.raises(ValueError): + cupy.require(x, "random", "C") + + @pytest.mark.skip("dpnp.require() is not implemented yet") + @testing.for_all_dtypes() + def test_require_empty_requirements(self, dtype): + x = cupy.zeros((2, 3, 4), dtype=dtype) + x = cupy.require(x, dtype, []) + assert x.flags["C_CONTIGUOUS"] diff --git a/tests/third_party/intel/test_zero_copy_test1.py b/tests/third_party/intel/test_zero_copy_test1.py index 9c9d0fa9dba..c59cd5b3188 100644 --- a/tests/third_party/intel/test_zero_copy_test1.py +++ b/tests/third_party/intel/test_zero_copy_test1.py @@ -1,36 +1,36 @@ -import importlib -import sys - -import pytest - - -class dummymodule: - pass - - -sys.modules["numba_dppy"] = dummymodule - -module_not_found = False - -reason = "" - -try: - zero_copy_test1 = importlib.import_module("zero-copy-test1") -except ModuleNotFoundError as e: - module_not_found = True - reason = str(e) - - -@pytest.mark.skipif(module_not_found, reason=reason) -def test_dpnp_interaction_with_dpctl_memory(): - return zero_copy_test1.test_dpnp_interaction_with_dpctl_memory() - - -@pytest.mark.skipif(module_not_found, reason=reason) -def test_dpnp_array_has_iface(): - return zero_copy_test1.test_dpnp_array_has_iface() - - -@pytest.mark.skipif(module_not_found, reason=reason) -def test_dpctl_dparray_has_iface(): - return zero_copy_test1.test_dpctl_dparray_has_iface() +import importlib +import sys + +import pytest + + +class dummymodule: + pass + + +sys.modules["numba_dppy"] = dummymodule + +module_not_found = False + +reason = "" + +try: + zero_copy_test1 = importlib.import_module("zero-copy-test1") +except ModuleNotFoundError as e: + module_not_found = True + reason = str(e) + + +@pytest.mark.skipif(module_not_found, reason=reason) +def test_dpnp_interaction_with_dpctl_memory(): + return zero_copy_test1.test_dpnp_interaction_with_dpctl_memory() + + +@pytest.mark.skipif(module_not_found, reason=reason) +def test_dpnp_array_has_iface(): + return zero_copy_test1.test_dpnp_array_has_iface() + + +@pytest.mark.skipif(module_not_found, reason=reason) +def test_dpctl_dparray_has_iface(): + return zero_copy_test1.test_dpctl_dparray_has_iface() diff --git a/tests/third_party/intel/zero-copy-test1.py b/tests/third_party/intel/zero-copy-test1.py index 4e7b110c669..44d2d776e9b 100644 --- a/tests/third_party/intel/zero-copy-test1.py +++ b/tests/third_party/intel/zero-copy-test1.py @@ -1,84 +1,84 @@ -import dpctl -import dpctl.memory as dpmem -import dpctl.tensor.numpy_usm_shared as usmarray -import numba_dppy as dppy -import numpy as np -import pytest - -import dpnp - - -class DuckUSMArray: - def __init__(self, shape, dtype="d", host_buffer=None): - nelems = np.prod(shape) - bytes = nelems * np.dtype(dtype).itemsize - shmem = dpmem.MemoryUSMShared(bytes) - if isinstance(host_buffer, np.ndarray): - shmem.copy_from_host(host_buffer.view(dtype="|u1")) - self.arr = np.ndarray(shape, dtype=dtype, buffer=shmem) - - def __getitem__(self, indx): - return self.arr[indx] - - def __setitem__(self, indx, val): - self.arr.__setitem__(indx, val) - - @property - def __sycl_usm_array_interface__(self): - iface = self.arr.__array_interface__ - b = self.arr.base - iface["syclobj"] = b.__sycl_usm_array_interface__["syclobj"] - iface["version"] = 1 - return iface - - -def test_dpnp_interaction_with_dpctl_memory(): - """Tests if dpnp supports zero-copy data exchange with another Python - object that defines `__sycl_usm_array_interface__` - """ - hb = np.arange(0, 100, dtype=np.int64) - da = DuckUSMArray(hb.shape, dtype=hb.dtype, host_buffer=hb) - - Y = dpnp.asarray(da) - # dpnp array must infer dimensions/dtype from input object - assert Y.dtype == hb.dtype - assert Y.shape == hb.shape - - Y[0] = 10 - assert da[0] == 10 # check zero copy - - -def test_dppy_array_pass(): - """Tests if dppy supports passing an array-like object DuckArray that defines `__sycl_usm_array_interface__` - to a dppy.kernel - """ - - @dppy.kernel - def dppy_f(array_like_obj): - i = dppy.get_global_id(0) - array_like_obj[i] = 10 - - global_size = 100 - hb = np.arange(0, global_size, dtype="i4") - da = DuckUSMArray(hb.shape, dtype=hb.dtype, host_buffer=hb) - - if dpctl.has_gpu_queues(dpctl.backend_type.level_zero): - print("\nScheduling on OpenCL GPU\n") - with dpctl.device_context("opencl:gpu") as gpu_queue: - dppy_f[global_size, dppy.DEFAULT_LOCAL_SIZE](da) - else: - print("\nSkip scheduling on OpenCL GPU\n") - - assert da[0] == 10 - - -def test_dpctl_dparray_has_iface(): - """Tests if dpctl.dptensor.numpy_usm_shared defines '__sycl_usm_array_interface__'""" - X = usmarray.ones(10) - assert type(getattr(X, "__sycl_usm_array_interface__", None) is dict) - - -def test_dpnp_array_has_iface(): - """Tests if dpnp.ndarray defines '__sycl_usm_array_interface__'""" - X = dpnp.array([1]) - assert type(getattr(X, "__sycl_usm_array_interface__", None) is dict) +import dpctl +import dpctl.memory as dpmem +import dpctl.tensor.numpy_usm_shared as usmarray +import numba_dppy as dppy +import numpy as np +import pytest + +import dpnp + + +class DuckUSMArray: + def __init__(self, shape, dtype="d", host_buffer=None): + nelems = np.prod(shape) + bytes = nelems * np.dtype(dtype).itemsize + shmem = dpmem.MemoryUSMShared(bytes) + if isinstance(host_buffer, np.ndarray): + shmem.copy_from_host(host_buffer.view(dtype="|u1")) + self.arr = np.ndarray(shape, dtype=dtype, buffer=shmem) + + def __getitem__(self, indx): + return self.arr[indx] + + def __setitem__(self, indx, val): + self.arr.__setitem__(indx, val) + + @property + def __sycl_usm_array_interface__(self): + iface = self.arr.__array_interface__ + b = self.arr.base + iface["syclobj"] = b.__sycl_usm_array_interface__["syclobj"] + iface["version"] = 1 + return iface + + +def test_dpnp_interaction_with_dpctl_memory(): + """Tests if dpnp supports zero-copy data exchange with another Python + object that defines `__sycl_usm_array_interface__` + """ + hb = np.arange(0, 100, dtype=np.int64) + da = DuckUSMArray(hb.shape, dtype=hb.dtype, host_buffer=hb) + + Y = dpnp.asarray(da) + # dpnp array must infer dimensions/dtype from input object + assert Y.dtype == hb.dtype + assert Y.shape == hb.shape + + Y[0] = 10 + assert da[0] == 10 # check zero copy + + +def test_dppy_array_pass(): + """Tests if dppy supports passing an array-like object DuckArray that defines `__sycl_usm_array_interface__` + to a dppy.kernel + """ + + @dppy.kernel + def dppy_f(array_like_obj): + i = dppy.get_global_id(0) + array_like_obj[i] = 10 + + global_size = 100 + hb = np.arange(0, global_size, dtype="i4") + da = DuckUSMArray(hb.shape, dtype=hb.dtype, host_buffer=hb) + + if dpctl.has_gpu_queues(dpctl.backend_type.level_zero): + print("\nScheduling on OpenCL GPU\n") + with dpctl.device_context("opencl:gpu") as gpu_queue: + dppy_f[global_size, dppy.DEFAULT_LOCAL_SIZE](da) + else: + print("\nSkip scheduling on OpenCL GPU\n") + + assert da[0] == 10 + + +def test_dpctl_dparray_has_iface(): + """Tests if dpctl.dptensor.numpy_usm_shared defines '__sycl_usm_array_interface__'""" + X = usmarray.ones(10) + assert type(getattr(X, "__sycl_usm_array_interface__", None) is dict) + + +def test_dpnp_array_has_iface(): + """Tests if dpnp.ndarray defines '__sycl_usm_array_interface__'""" + X = dpnp.array([1]) + assert type(getattr(X, "__sycl_usm_array_interface__", None) is dict) From 554bcddcb26fae9f9c97884aa7dcd7ae83767b89 Mon Sep 17 00:00:00 2001 From: Natalia Polina Date: Mon, 5 Feb 2024 15:46:29 -0800 Subject: [PATCH 17/29] Update docs for array creation functions (#1674) * Update docs for array creation functions * fix pre-cimmit * address comments * address comments * Add CFD examples and update CFD check for dpnp.copy * Fix dpnp.asfortranarray and dpnp.ascontiguousarray functions for not array input (#1691) * Fix dpnp.asfortranarray and dpnp.ascontiguousarray functions for not array input * Fix tests --- dpnp/dpnp_iface_arraycreation.py | 1398 +++++++++++++++++++++++++----- tests/test_arraycreation.py | 36 + tests/test_sycl_queue.py | 2 + tests/test_usm_type.py | 1 + 4 files changed, 1237 insertions(+), 200 deletions(-) diff --git a/dpnp/dpnp_iface_arraycreation.py b/dpnp/dpnp_iface_arraycreation.py index 851ef119975..26464d40256 100644 --- a/dpnp/dpnp_iface_arraycreation.py +++ b/dpnp/dpnp_iface_arraycreation.py @@ -112,6 +112,30 @@ def arange( For full documentation refer to :obj:`numpy.arange`. + Parameters + ---------- + start : {int, real}, optional + Start of interval. The interval includes this value. The default start value is 0. + stop : {int, real} + End of interval. The interval does not include this value, except in some cases + where `step` is not an integer and floating point round-off affects the length of out. + step : {int, real}, optional + Spacing between values. The default `step` size is 1. If `step` is specified as + a position argument, `start` must also be given. + dtype : dtype, optional + The desired dtype for the array. If not given, a default dtype will be used that can represent + the values (by considering Promotion Type Rule and device capabilities when necessary.) + device : {None, string, SyclDevice, SyclQueue}, optional + An array API concept of device where the output array is created. + The `device` can be ``None`` (the default), an OneAPI filter selector string, + an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device, + an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by + :obj:`dpnp.dpnp_array.dpnp_array.device` property. + usm_type : {"device", "shared", "host"}, optional + The type of SYCL USM allocation for the output array. Default is "device". + sycl_queue : {None, SyclQueue}, optional + A SYCL queue to use for output array allocation and copying. + Returns ------- out : dpnp.ndarray @@ -129,12 +153,26 @@ def arange( Examples -------- >>> import dpnp as np - >>> [i for i in np.arange(3)] - [0, 1, 2] - >>> [i for i in np.arange(3, 7)] - [3, 4, 5, 6] - >>> [i for i in np.arange(3, 7, 2)] - [3, 5] + >>> np.arange(3) + array([0, 1, 2]) + >>> np.arange(3, 7) + array([3, 4, 5, 6]) + >>> np.arange(3, 7, 2) + array([3, 5]) + + Creating an array on a different device or with a specified usm_type + + >>> x = np.arange(3) # default case + >>> x, x.device, x.usm_type + (array([0, 1, 2]), Device(level_zero:gpu:0), 'device') + + >>> y = np.arange(3, device="cpu") + >>> y, y.device, y.usm_type + (array([0, 1, 2]), Device(opencl:cpu:0), 'device') + + >>> z = np.arange(3, usm_type="host") + >>> z, z.device, z.usm_type + (array([0, 1, 2]), Device(level_zero:gpu:0), 'host') """ @@ -172,6 +210,29 @@ def array( For full documentation refer to :obj:`numpy.array`. + Parameters + ---------- + a : array_like + Input data, in any form that can be converted to an array. This includes scalars, + lists, lists of tuples, tuples, tuples of tuples, tuples of lists, and ndarrays. + dtype : dtype, optional + The desired dtype for the array. If not given, a default dtype will be used that can represent + the values (by considering Promotion Type Rule and device capabilities when necessary.) + copy : bool, optional + If ``True`` (default), then the object is copied. + order : {"C", "F", "A", "K"}, optional + Memory layout of the newly output array. Default: "K". + device : {None, string, SyclDevice, SyclQueue}, optional + An array API concept of device where the output array is created. + The `device` can be ``None`` (the default), an OneAPI filter selector string, + an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device, + an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by + :obj:`dpnp.dpnp_array.dpnp_array.device` property. + usm_type : {None, "device", "shared", "host"}, optional + The type of SYCL USM allocation for the output array. Default is ``None``. + sycl_queue : {None, SyclQueue}, optional + A SYCL queue to use for output array allocation and copying. + Returns ------- out : dpnp.ndarray @@ -201,17 +262,31 @@ def array( >>> x = np.array([1, 2, 3]) >>> x.ndim, x.size, x.shape (1, 3, (3,)) - >>> print(x) - [1 2 3] + >>> x + array([1, 2, 3]) More than one dimension: >>> x2 = np.array([[1, 2], [3, 4]]) >>> x2.ndim, x2.size, x2.shape (2, 4, (2, 2)) - >>> print(x2) - [[1 2] - [3 4]] + >>> x2 + array([[1, 2], + [3, 4]]) + + Creating an array on a different device or with a specified usm_type + + >>> x = np.array([1, 2, 3]) # default case + >>> x, x.device, x.usm_type + (array([1, 2, 3]), Device(level_zero:gpu:0), 'device') + + >>> y = np.array([1, 2, 3], device="cpu") + >>> y, y.device, y.usm_type + (array([1, 2, 3]), Device(opencl:cpu:0), 'device') + + >>> z = np.array([1, 2, 3], usm_type="host") + >>> z, z.device, z.usm_type + (array([1, 2, 3]), Device(level_zero:gpu:0), 'host') """ @@ -262,6 +337,27 @@ def asanyarray( For full documentation refer to :obj:`numpy.asanyarray`. + Parameters + ---------- + a : array_like + Input data, in any form that can be converted to an array. This includes scalars, + lists, lists of tuples, tuples, tuples of tuples, tuples of lists, and ndarrays. + dtype : dtype, optional + The desired dtype for the array. If not given, a default dtype will be used that can represent + the values (by considering Promotion Type Rule and device capabilities when necessary.) + order : {"C", "F", "A", "K"}, optional + Memory layout of the newly output array. Default: "K". + device : {None, string, SyclDevice, SyclQueue}, optional + An array API concept of device where the output array is created. + The `device` can be ``None`` (the default), an OneAPI filter selector string, + an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device, + an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by + :obj:`dpnp.dpnp_array.dpnp_array.device` property. + usm_type : {None, "device", "shared", "host"}, optional + The type of SYCL USM allocation for the output array. Default is ``None``. + sycl_queue : {None, SyclQueue}, optional + A SYCL queue to use for output array allocation and copying. + Returns ------- out : dpnp.ndarray @@ -291,6 +387,20 @@ def asanyarray( >>> np.asanyarray([1, 2, 3]) array([1, 2, 3]) + Creating an array on a different device or with a specified usm_type + + >>> x = np.asanyarray([1, 2, 3]) # default case + >>> x, x.device, x.usm_type + (array([1, 2, 3]), Device(level_zero:gpu:0), 'device') + + >>> y = np.asanyarray([1, 2, 3], device="cpu") + >>> y, y.device, y.usm_type + (array([1, 2, 3]), Device(opencl:cpu:0), 'device') + + >>> z = np.asanyarray([1, 2, 3], usm_type="host") + >>> z, z.device, z.usm_type + (array([1, 2, 3]), Device(level_zero:gpu:0), 'host') + """ if like is not None: @@ -323,6 +433,27 @@ def asarray( For full documentation refer to :obj:`numpy.asarray`. + Parameters + ---------- + a : array_like + Input data, in any form that can be converted to an array. This includes scalars, + lists, lists of tuples, tuples, tuples of tuples, tuples of lists, and ndarrays. + dtype : dtype, optional + The desired dtype for the array. If not given, a default dtype will be used that can represent + the values (by considering Promotion Type Rule and device capabilities when necessary.) + order : {"C", "F", "A", "K"}, optional + Memory layout of the newly output array. Default: "K". + device : {None, string, SyclDevice, SyclQueue}, optional + An array API concept of device where the output array is created. + The `device` can be ``None`` (the default), an OneAPI filter selector string, + an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device, + an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by + :obj:`dpnp.dpnp_array.dpnp_array.device` property. + usm_type : {None, "device", "shared", "host"}, optional + The type of SYCL USM allocation for the output array. Default is ``None``. + sycl_queue : {None, SyclQueue}, optional + A SYCL queue to use for output array allocation and copying. + Returns ------- out : dpnp.ndarray @@ -353,6 +484,20 @@ def asarray( >>> np.asarray([1, 2, 3]) array([1, 2, 3]) + Creating an array on a different device or with a specified usm_type + + >>> x = np.asarray([1, 2, 3]) # default case + >>> x, x.device, x.usm_type + (array([1, 2, 3]), Device(level_zero:gpu:0), 'device') + + >>> y = np.asarray([1, 2, 3], device="cpu") + >>> y, y.device, y.usm_type + (array([1, 2, 3]), Device(opencl:cpu:0), 'device') + + >>> z = np.asarray([1, 2, 3], usm_type="host") + >>> z, z.device, z.usm_type + (array([1, 2, 3]), Device(level_zero:gpu:0), 'host') + """ if like is not None: @@ -379,6 +524,25 @@ def ascontiguousarray( For full documentation refer to :obj:`numpy.ascontiguousarray`. + Parameters + ---------- + a : array_like + Input data, in any form that can be converted to an array. This includes scalars, + lists, lists of tuples, tuples, tuples of tuples, tuples of lists, and ndarrays. + dtype : dtype, optional + The desired dtype for the array. If not given, a default dtype will be used that can represent + the values (by considering Promotion Type Rule and device capabilities when necessary.) + device : {None, string, SyclDevice, SyclQueue}, optional + An array API concept of device where the output array is created. + The `device` can be ``None`` (the default), an OneAPI filter selector string, + an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device, + an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by + :obj:`dpnp.dpnp_array.dpnp_array.device` property. + usm_type : {None, "device", "shared", "host"}, optional + The type of SYCL USM allocation for the output array. Default is ``None``. + sycl_queue : {None, SyclQueue}, optional + A SYCL queue to use for output array allocation and copying. + Returns ------- out : dpnp.ndarray @@ -424,6 +588,21 @@ def ascontiguousarray( >>> x is y True + Creating an array on a different device or with a specified usm_type + + >>> x0 = np.asarray([1, 2, 3]) + >>> x = np.ascontiguousarray(x0) # default case + >>> x, x.device, x.usm_type + (array([1, 2, 3]), Device(level_zero:gpu:0), 'device') + + >>> y = np.ascontiguousarray(x0, device="cpu") + >>> y, y.device, y.usm_type + (array([1, 2, 3]), Device(opencl:cpu:0), 'device') + + >>> z = np.ascontiguousarray(x0, usm_type="host") + >>> z, z.device, z.usm_type + (array([1, 2, 3]), Device(level_zero:gpu:0), 'host') + """ if like is not None: @@ -433,7 +612,7 @@ def ascontiguousarray( ) # at least 1-d array has to be returned - if a.ndim == 0: + if dpnp.isscalar(a) or hasattr(a, "ndim") and a.ndim == 0: a = [a] return asarray( @@ -454,6 +633,25 @@ def asfortranarray( For full documentation refer to :obj:`numpy.asfortranarray`. + Parameters + ---------- + a : array_like + Input data, in any form that can be converted to an array. This includes scalars, + lists, lists of tuples, tuples, tuples of tuples, tuples of lists, and ndarrays. + dtype : dtype, optional + The desired dtype for the array. If not given, a default dtype will be used that can represent + the values (by considering Promotion Type Rule and device capabilities when necessary.) + device : {None, string, SyclDevice, SyclQueue}, optional + An array API concept of device where the output array is created. + The `device` can be ``None`` (the default), an OneAPI filter selector string, + an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device, + an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by + :obj:`dpnp.dpnp_array.dpnp_array.device` property. + usm_type : {None, "device", "shared", "host"}, optional + The type of SYCL USM allocation for the output array. Default is ``None``. + sycl_queue : {None, SyclQueue}, optional + A SYCL queue to use for output array allocation and copying. + Returns ------- out : dpnp.ndarray @@ -501,6 +699,21 @@ def asfortranarray( >>> x is y True + Creating an array on a different device or with a specified usm_type + + >>> x0 = np.asarray([1, 2, 3]) + >>> x = np.asfortranarray(x0) # default case + >>> x, x.device, x.usm_type + (array([1, 2, 3]), Device(level_zero:gpu:0), 'device') + + >>> y = np.asfortranarray(x0, device="cpu") + >>> y, y.device, y.usm_type + (array([1, 2, 3]), Device(opencl:cpu:0), 'device') + + >>> z = np.asfortranarray(x0, usm_type="host") + >>> z, z.device, z.usm_type + (array([1, 2, 3]), Device(level_zero:gpu:0), 'host') + """ if like is not None: @@ -510,7 +723,7 @@ def asfortranarray( ) # at least 1-d array has to be returned - if a.ndim == 0: + if dpnp.isscalar(a) or hasattr(a, "ndim") and a.ndim == 0: a = [a] return asarray( @@ -523,12 +736,32 @@ def asfortranarray( ) -def copy(a, order="K", subok=False): +def copy( + a, order="K", subok=False, device=None, usm_type=None, sycl_queue=None +): """ Return an array copy of the given object. For full documentation refer to :obj:`numpy.copy`. + Parameters + ---------- + a : array_like + Input data, in any form that can be converted to an array. This includes scalars, + lists, lists of tuples, tuples, tuples of tuples, tuples of lists, and ndarrays. + order : {"C", "F", "A", "K"}, optional + Memory layout of the newly output array. Default: "K". + device : {None, string, SyclDevice, SyclQueue}, optional + An array API concept of device where the output array is created. + The `device` can be ``None`` (the default), an OneAPI filter selector string, + an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device, + an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by + :obj:`dpnp.dpnp_array.dpnp_array.device` property. + usm_type : {None, "device", "shared", "host"}, optional + The type of SYCL USM allocation for the output array. Default is ``None``. + sycl_queue : {None, SyclQueue}, optional + A SYCL queue to use for output array allocation and copying. + Limitations ----------- Parameter `subok` is supported only with default value ``False``. @@ -566,6 +799,21 @@ def copy(a, order="K", subok=False): >>> x[0] == z[0] array(False) + Creating an array on a different device or with a specified usm_type + + >>> x0 = np.array([1, 2, 3]) + >>> x = np.copy(x0) # default case + >>> x, x.device, x.usm_type + (array([1, 2, 3]), Device(level_zero:gpu:0), 'device') + + >>> y = np.copy(x0, device="cpu") + >>> y, y.device, y.usm_type + (array([1, 2, 3]), Device(opencl:cpu:0), 'device') + + >>> z = np.copy(x0, usm_type="host") + >>> z, z.device, z.usm_type + (array([1, 2, 3]), Device(level_zero:gpu:0), 'host') + """ if subok is not False: @@ -575,9 +823,23 @@ def copy(a, order="K", subok=False): ) if dpnp.is_supported_array_type(a): - return dpnp_container.copy(a, order=order) + sycl_queue_normalized = dpnp.get_normalized_queue_device( + a, device=device, sycl_queue=sycl_queue + ) + if ( + usm_type is None or usm_type == a.usm_type + ) and sycl_queue_normalized == a.sycl_queue: + return dpnp_container.copy(a, order=order) - return array(a, order=order, subok=subok, copy=True) + return array( + a, + order=order, + subok=subok, + copy=True, + device=device, + usm_type=usm_type, + sycl_queue=sycl_queue, + ) def diag(v, /, k=0, *, device=None, usm_type=None, sycl_queue=None): @@ -586,16 +848,32 @@ def diag(v, /, k=0, *, device=None, usm_type=None, sycl_queue=None): For full documentation refer to :obj:`numpy.diag`. + Parameters + ---------- + v : array_like + Input data, in any form that can be converted to an array. This includes scalars, + lists, lists of tuples, tuples, tuples of tuples, tuples of lists, and ndarrays. + If `v` is a 2-D array, return a copy of its k-th diagonal. If `v` is a 1-D array, + return a 2-D array with `v` on the k-th diagonal. + k : int, optional + Diagonal in question. The default is 0. Use k > 0 for diagonals above the main diagonal, + and k < 0 for diagonals below the main diagonal. + device : {None, string, SyclDevice, SyclQueue}, optional + An array API concept of device where the output array is created. + The `device` can be ``None`` (the default), an OneAPI filter selector string, + an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device, + an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by + :obj:`dpnp.dpnp_array.dpnp_array.device` property. + usm_type : {None, "device", "shared", "host"}, optional + The type of SYCL USM allocation for the output array. Default is ``None``. + sycl_queue : {None, SyclQueue}, optional + A SYCL queue to use for output array allocation and copying. + Returns ------- out : dpnp.ndarray The extracted diagonal or constructed diagonal array. - Limitations - ----------- - Parameter `k` is only supported as integer data type. - Otherwise ``TypeError`` exception will be raised. - See Also -------- :obj:`diagonal` : Return specified diagonals. @@ -607,24 +885,38 @@ def diag(v, /, k=0, *, device=None, usm_type=None, sycl_queue=None): Examples -------- >>> import dpnp as np - >>> x = np.arange(9).reshape((3,3)) - >>> x + >>> x0 = np.arange(9).reshape((3, 3)) + >>> x0 array([[0, 1, 2], [3, 4, 5], [6, 7, 8]]) - >>> np.diag(x) + >>> np.diag(x0) array([0, 4, 8]) - >>> np.diag(x, k=1) + >>> np.diag(x0, k=1) array([1, 5]) - >>> np.diag(x, k=-1) + >>> np.diag(x0, k=-1) array([3, 7]) - >>> np.diag(np.diag(x)) + >>> np.diag(np.diag(x0)) array([[0, 0, 0], [0, 4, 0], [0, 0, 8]]) + Creating an array on a different device or with a specified usm_type + + >>> x = np.diag(x0) # default case + >>> x, x.device, x.usm_type + (array([0, 4, 8]), Device(level_zero:gpu:0), 'device') + + >>> y = np.diag(x0, device="cpu") + >>> y, y.device, y.usm_type + (array([0, 4, 8]), Device(opencl:cpu:0), 'device') + + >>> z = np.diag(x0, usm_type="host") + >>> z, z.device, z.usm_type + (array([0, 4, 8]), Device(level_zero:gpu:0), 'host') + """ if not isinstance(k, int): @@ -670,6 +962,26 @@ def diagflat(v, /, k=0, *, device=None, usm_type=None, sycl_queue=None): For full documentation refer to :obj:`numpy.diagflat`. + Parameters + ---------- + v : array_like + Input data, which is flattened and set as the k-th diagonal of the output, + in any form that can be converted to an array. This includes scalars, + lists, lists of tuples, tuples, tuples of tuples, tuples of lists, and ndarrays. + k : int, optional + Diagonal to set; 0, the default, corresponds to the "main" diagonal, + a positive (negative) k giving the number of the diagonal above (below) the main. + device : {None, string, SyclDevice, SyclQueue}, optional + An array API concept of device where the output array is created. + The `device` can be ``None`` (the default), an OneAPI filter selector string, + an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device, + an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by + :obj:`dpnp.dpnp_array.dpnp_array.device` property. + usm_type : {None, "device", "shared", "host"}, optional + The type of SYCL USM allocation for the output array. Default is ``None``. + sycl_queue : {None, SyclQueue}, optional + A SYCL queue to use for output array allocation and copying. + Returns ------- out : dpnp.ndarray @@ -689,19 +1001,42 @@ def diagflat(v, /, k=0, *, device=None, usm_type=None, sycl_queue=None): Examples -------- >>> import dpnp as np - >>> x = np.array([[1,2], [3,4]]) - >>> np.diagflat(x) + >>> x0 = np.array([[1, 2], [3, 4]]) + >>> np.diagflat(x0) array([[1, 0, 0, 0], [0, 2, 0, 0], [0, 0, 3, 0], [0, 0, 0, 4]]) - >>> np.diagflat(x, 1) + >>> np.diagflat(x0, 1) array([[0, 1, 0, 0, 0], - [0, 0, 2, 0, 0], - [0, 0, 0, 3, 0], - [0, 0, 0, 0, 4], - [0, 0, 0, 0, 0]]) + [0, 0, 2, 0, 0], + [0, 0, 0, 3, 0], + [0, 0, 0, 0, 4], + [0, 0, 0, 0, 0]]) + + Creating an array on a different device or with a specified usm_type + + >>> x = np.diagflat(x0) # default case + >>> x, x.device, x.usm_type + array([[1, 0, 0, 0], + [0, 2, 0, 0], + [0, 0, 3, 0], + [0, 0, 0, 4]]), Device(level_zero:gpu:0), 'device') + + >>> y = np.diagflat(x0, device="cpu") + >>> y, y.device, y.usm_type + array([[1, 0, 0, 0], + [0, 2, 0, 0], + [0, 0, 3, 0], + [0, 0, 0, 4]]), Device(opencl:cpu:0), 'device') + + >>> z = np.diagflat(x0, usm_type="host") + >>> z, z.device, z.usm_type + array([[1, 0, 0, 0], + [0, 2, 0, 0], + [0, 0, 3, 0], + [0, 0, 0, 4]]), Device(level_zero:gpu:0), 'host') """ @@ -730,9 +1065,33 @@ def empty( For full documentation refer to :obj:`numpy.empty`. + Parameters + ---------- + shape : {int, sequence of ints} + Shape of the new array, e.g., (2, 3) or 2. + dtype : dtype, optional + The desired dtype for the array, e.g., dpnp.int32. Default is the default floating point + data type for the device where input array is allocated. + order : {"C", "F"}, optional + Memory layout of the newly output array. Default: "C". + device : {None, string, SyclDevice, SyclQueue}, optional + An array API concept of device where the output array is created. + The `device` can be ``None`` (the default), an OneAPI filter selector string, + an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device, + an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by + :obj:`dpnp.dpnp_array.dpnp_array.device` property. + usm_type : {"device", "shared", "host"}, optional + The type of SYCL USM allocation for the output array. Default is "device". + sycl_queue : {None, SyclQueue}, optional + A SYCL queue to use for output array allocation and copying. + + Returns + ------- + out : dpnp.ndarray + Array of uninitialized data of the given shape, dtype, and order. + Limitations ----------- - Parameter `order` is supported only with values ``"C"`` and ``"F"``. Parameter `like` is supported only with default value ``None``. Otherwise the function will be executed sequentially on CPU. @@ -746,9 +1105,22 @@ def empty( Examples -------- >>> import dpnp as np - >>> x = np.empty(4) - >>> print(x) - [0. 0. 0. 0.] + >>> np.empty(4) + array([9.03088525e-312, 9.03088525e-312, 9.03088525e-312, 9.03088525e-312]) + + Creating an array on a different device or with a specified usm_type + + >>> x = np.empty((3, 3)) # default case + >>> x.shape, x.device, x.usm_type + ((3, 3), Device(level_zero:gpu:0), 'device') + + >>> y = np.empty((3, 3), device="cpu") + >>> y.shape, y.device, y.usm_type + ((3, 3), Device(opencl:cpu:0), 'device') + + >>> z = np.empty((3, 3), usm_type="host") + >>> z.shape, z.device, z.usm_type + ((3, 3), Device(level_zero:gpu:0), 'host') """ @@ -770,7 +1142,7 @@ def empty( def empty_like( - x1, + a, /, *, dtype=None, @@ -786,10 +1158,35 @@ def empty_like( For full documentation refer to :obj:`numpy.empty_like`. + Parameters + ---------- + a : {dpnp_array, usm_ndarray} + The shape and dtype of `a` define these same attributes of the returned array. + dtype : dtype, optional + The desired dtype for the array, e.g., dpnp.int32. Default is the default floating point + data type for the device where input array is allocated. + order : {"C", "F"}, optional + Memory layout of the newly output array. Default: "C". + shape : {int, sequence of ints} + Overrides the shape of the result. + device : {None, string, SyclDevice, SyclQueue}, optional + An array API concept of device where the output array is created. + The `device` can be ``None`` (the default), an OneAPI filter selector string, + an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device, + an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by + :obj:`dpnp.dpnp_array.dpnp_array.device` property. + usm_type : {None, "device", "shared", "host"}, optional + The type of SYCL USM allocation for the output array. Default is ``None``. + sycl_queue : {None, SyclQueue}, optional + A SYCL queue to use for output array allocation and copying. + + Returns + ------- + out : dpnp.ndarray + Array of uninitialized data with the same shape and type as prototype. + Limitations ----------- - Parameter `x1` is supported as :class:`dpnp.dpnp.ndarray` or :class:`dpctl.tensor.usm_ndarray` - Parameter `order` is supported with values ``"C"`` or ``"F"``. Parameter `subok` is supported only with default value ``False``. Otherwise the function will be executed sequentially on CPU. @@ -804,24 +1201,37 @@ def empty_like( -------- >>> import dpnp as np >>> a = np.array([1, 2, 3]) - >>> x = np.empty_like(a) - >>> [i for i in x] - [0, 0, 0] + >>> np.empty_like(a) + array([1, 2, 3]) + + Creating an array on a different device or with a specified usm_type + + >>> x = np.empty_like(a) # default case + >>> x.shape, x.device, x.usm_type + ((3, ), Device(level_zero:gpu:0), 'device') + + >>> y = np.empty_like(a, device="cpu") + >>> y.shape, y.device, y.usm_type + ((3, ), Device(opencl:cpu:0), 'device') + + >>> z = np.empty_like(a, usm_type="host") + >>> z.shape, z.device, z.usm_type + ((3, ), Device(level_zero:gpu:0), 'host') """ - if not isinstance(x1, (dpnp.ndarray, dpt.usm_ndarray)): + if not isinstance(a, (dpnp.ndarray, dpt.usm_ndarray)): pass elif order not in ("C", "c", "F", "f", None): pass elif subok is not False: pass else: - _shape = x1.shape if shape is None else shape - _dtype = x1.dtype if dtype is None else dtype - _usm_type = x1.usm_type if usm_type is None else usm_type + _shape = a.shape if shape is None else shape + _dtype = a.dtype if dtype is None else dtype + _usm_type = a.usm_type if usm_type is None else usm_type _sycl_queue = dpnp.get_normalized_queue_device( - x1, sycl_queue=sycl_queue, device=device + a, sycl_queue=sycl_queue, device=device ) return dpnp_container.empty( _shape, @@ -831,7 +1241,7 @@ def empty_like( sycl_queue=_sycl_queue, ) - return call_origin(numpy.empty_like, x1, dtype, order, subok, shape) + return call_origin(numpy.empty_like, a, dtype, order, subok, shape) def eye( @@ -852,6 +1262,37 @@ def eye( For full documentation refer to :obj:`numpy.eye`. + Parameters + ---------- + N : int + Number of rows in the output. + M : int, optional + Number of columns in the output. If None, defaults to `N`. + k : int, optional + Index of the diagonal: 0 (the default) refers to the main diagonal, + a positive value refers to an upper diagonal, and a negative value to a lower diagonal. + dtype : dtype, optional + The desired dtype for the array, e.g., dpnp.int32. Default is the default floating point + data type for the device where input array is allocated. + order : {"C", "F"}, optional + Memory layout of the newly output array. Default: "C". + device : {None, string, SyclDevice, SyclQueue}, optional + An array API concept of device where the output array is created. + The `device` can be ``None`` (the default), an OneAPI filter selector string, + an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device, + an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by + :obj:`dpnp.dpnp_array.dpnp_array.device` property. + usm_type : {"device", "shared", "host"}, optional + The type of SYCL USM allocation for the output array. Default is "device". + sycl_queue : {None, SyclQueue}, optional + A SYCL queue to use for output array allocation and copying. + + Returns + ------- + out : dpnp.ndarray + An array where all elements are equal to zero, except for the k-th diagonal, + whose values are equal to one. + Limitations ----------- Parameter `order` is supported only with values ``"C"`` and ``"F"``. @@ -870,6 +1311,23 @@ def eye( [0., 0., 1.], [0., 0., 0.]]) + Creating an array on a different device or with a specified usm_type + + >>> x = np.eye(2, dtype=int) # default case + >>> x, x.device, x.usm_type + (array([[1, 0], + [0, 1]]), Device(level_zero:gpu:0), 'device') + + >>> y = np.eye(2, dtype=int, device="cpu") + >>> y, y.device, y.usm_type + (array([[1, 0], + [0, 1]]), Device(opencl:cpu:0), 'device') + + >>> z = np.eye(2, dtype=int, usm_type="host") + >>> z, z.device, z.usm_type + (array([[1, 0], + [0, 1]]), Device(level_zero:gpu:0), 'host') + """ if order not in ("C", "c", "F", "f", None): pass @@ -990,6 +1448,34 @@ def full( For full documentation refer to :obj:`numpy.full`. + Parameters + ---------- + shape : {int, sequence of ints} + Shape of the new array, e.g., (2, 3) or 2. + fill_value : {scalar, array_like} + Fill value, in any form that can be converted to an array. This includes scalars, + lists, lists of tuples, tuples, tuples of tuples, tuples of lists, and ndarrays. + dtype : dtype, optional + The desired dtype for the array, e.g., dpnp.int32. Default is the default floating point + data type for the device where input array is allocated. + order : {"C", "F"}, optional + Memory layout of the newly output array. Default: "C". + device : {None, string, SyclDevice, SyclQueue}, optional + An array API concept of device where the output array is created. + The `device` can be ``None`` (the default), an OneAPI filter selector string, + an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device, + an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by + :obj:`dpnp.dpnp_array.dpnp_array.device` property. + usm_type : {None, "device", "shared", "host"}, optional + The type of SYCL USM allocation for the output array. Default is ``None``. + sycl_queue : {None, SyclQueue}, optional + A SYCL queue to use for output array allocation and copying. + + Returns + ------- + out : dpnp.ndarray + Array of `fill_value` with the given shape, dtype, and order. + Limitations ----------- Parameter `order` is supported only with values ``"C"`` and ``"F"``. @@ -1006,9 +1492,22 @@ def full( Examples -------- >>> import dpnp as np - >>> x = np.full(4, 10) - >>> [i for i in x] - [10, 10, 10, 10] + >>> np.full(4, 10) + array([10, 10, 10, 10]) + + Creating an array on a different device or with a specified usm_type + + >>> x = np.full(4, 10) # default case + >>> x, x.device, x.usm_type + (array([10, 10, 10, 10]), Device(level_zero:gpu:0), 'device') + + >>> y = np.full(4, 10, device="cpu") + >>> y, y.device, y.usm_type + (array([10, 10, 10, 10]), Device(opencl:cpu:0), 'device') + + >>> z = np.full(4, 10, usm_type="host") + >>> z, z.device, z.usm_type + (array([10, 10, 10, 10]), Device(level_zero:gpu:0), 'host') """ @@ -1031,7 +1530,7 @@ def full( def full_like( - x1, + a, /, fill_value, *, @@ -1048,9 +1547,38 @@ def full_like( For full documentation refer to :obj:`numpy.full_like`. + Parameters + ---------- + a : {dpnp_array, usm_ndarray} + The shape and dtype of `a` define these same attributes of the returned array. + fill_value : {scalar, array_like} + Fill value, in any form that can be converted to an array. This includes scalars, + lists, lists of tuples, tuples, tuples of tuples, tuples of lists, and ndarrays. + dtype : dtype, optional + The desired dtype for the array, e.g., dpnp.int32. Default is the default floating point + data type for the device where input array is allocated. + order : {"C", "F"}, optional + Memory layout of the newly output array. Default: "C". + shape : {int, sequence of ints} + Overrides the shape of the result. + device : {None, string, SyclDevice, SyclQueue}, optional + An array API concept of device where the output array is created. + The `device` can be ``None`` (the default), an OneAPI filter selector string, + an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device, + an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by + :obj:`dpnp.dpnp_array.dpnp_array.device` property. + usm_type : {None, "device", "shared", "host"}, optional + The type of SYCL USM allocation for the output array. Default is ``None``. + sycl_queue : {None, SyclQueue}, optional + A SYCL queue to use for output array allocation and copying. + + Returns + ------- + out : dpnp.ndarray + Array of `fill_value` with the same shape and type as `a`. + Limitations ----------- - Parameter `x1` is supported as :class:`dpnp.dpnp.ndarray` or :class:`dpctl.tensor.usm_ndarray` Parameter `order` is supported only with values ``"C"`` and ``"F"``. Parameter `subok` is supported only with default value ``False``. Otherwise the function will be executed sequentially on CPU. @@ -1066,23 +1594,36 @@ def full_like( -------- >>> import dpnp as np >>> a = np.arange(6) - >>> x = np.full_like(a, 1) - >>> [i for i in x] - [1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + >>> np.full_like(a, 1) + array([1, 1, 1, 1, 1, 1]) + + Creating an array on a different device or with a specified usm_type + + >>> x = np.full_like(a, 1) # default case + >>> x, x.device, x.usm_type + (array([1, 1, 1, 1, 1, 1]), Device(level_zero:gpu:0), 'device') + + >>> y = np.full_like(a, 1, device="cpu") + >>> y, y.device, y.usm_type + (array([1, 1, 1, 1, 1, 1]), Device(opencl:cpu:0), 'device') + + >>> z = np.full_like(a, 1, usm_type="host") + >>> z, z.device, z.usm_type + (array([1, 1, 1, 1, 1, 1]), Device(level_zero:gpu:0), 'host') """ - if not isinstance(x1, (dpnp.ndarray, dpt.usm_ndarray)): + if not isinstance(a, (dpnp.ndarray, dpt.usm_ndarray)): pass elif order not in ("C", "c", "F", "f", None): pass elif subok is not False: pass else: - _shape = x1.shape if shape is None else shape - _dtype = x1.dtype if dtype is None else dtype - _usm_type = x1.usm_type if usm_type is None else usm_type + _shape = a.shape if shape is None else shape + _dtype = a.dtype if dtype is None else dtype + _usm_type = a.usm_type if usm_type is None else usm_type _sycl_queue = dpnp.get_normalized_queue_device( - x1, sycl_queue=sycl_queue, device=device + a, sycl_queue=sycl_queue, device=device ) return dpnp_container.full( @@ -1093,14 +1634,14 @@ def full_like( usm_type=_usm_type, sycl_queue=_sycl_queue, ) - return numpy.full_like(x1, fill_value, dtype, order, subok, shape) + return numpy.full_like(a, fill_value, dtype, order, subok, shape) def geomspace( start, stop, /, - num, + num=50, *, dtype=None, device=None, @@ -1114,6 +1655,40 @@ def geomspace( For full documentation refer to :obj:`numpy.geomspace`. + Parameters + ---------- + start : array_like + The starting value of the sequence, in any form that can be converted to an array. + This includes scalars, lists, lists of tuples, tuples, tuples of tuples, + tuples of lists, and ndarrays. + stop : array_like + The final value of the sequence, in any form that can be converted to an array. + This includes scalars, lists, lists of tuples, tuples, tuples of tuples, + tuples of lists, and ndarrays. If `endpoint` is ``False`` num + 1 values + are spaced over the interval in log-space, of which all but the last + (a sequence of length num) are returned. + num : int, optional + Number of samples to generate. Default is 50. + dtype : dtype, optional + The desired dtype for the array. If not given, a default dtype will be used that can represent + the values (by considering Promotion Type Rule and device capabilities when necessary.) + device : {None, string, SyclDevice, SyclQueue}, optional + An array API concept of device where the output array is created. + The `device` can be ``None`` (the default), an OneAPI filter selector string, + an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device, + an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by + :obj:`dpnp.dpnp_array.dpnp_array.device` property. + usm_type : {None, "device", "shared", "host"}, optional + The type of SYCL USM allocation for the output array. Default is ``None``. + sycl_queue : {None, SyclQueue}, optional + A SYCL queue to use for output array allocation and copying. + endpoint : bool, optional + If ``True``, `stop` is the last sample. Otherwise, it is not included. Default is ``True``. + axis : int, optional + The axis in the result to store the samples. Relevant only if start or stop are array-like. + By default (0), the samples will be along a new axis inserted at the beginning. + Use -1 to get an axis at the end. + Returns ------- out : dpnp.ndarray @@ -1150,6 +1725,20 @@ def geomspace( >>> np.geomspace(-1000, -1, num=4) array([-1000., -100., -10., -1.]) + Creating an array on a different device or with a specified usm_type + + >>> x = np.geomspace(1000, 1, num=4) # default case + >>> x, x.device, x.usm_type + (array([1000., 100., 10., 1.]), Device(level_zero:gpu:0), 'device') + + >>> y = np.geomspace(1000, 1, num=4, device="cpu") + >>> y, y.device, y.usm_type + (array([1000., 100., 10., 1.]), Device(opencl:cpu:0), 'device') + + >>> z = np.geomspace(1000, 1, num=4, usm_type="host") + >>> z, z.device, z.usm_type + (array([1000., 100., 10., 1.]), Device(level_zero:gpu:0), 'host') + """ return dpnp_geomspace( @@ -1182,6 +1771,24 @@ def identity( For full documentation refer to :obj:`numpy.identity`. + Parameters + ---------- + n : int + Number of rows (and columns) in `n` x `n` output. + dtype : dtype, optional + The desired dtype for the array, e.g., dpnp.int32. Default is the default floating point + data type for the device where input array is allocated. + device : {None, string, SyclDevice, SyclQueue}, optional + An array API concept of device where the output array is created. + The `device` can be ``None`` (the default), an OneAPI filter selector string, + an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device, + an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by + :obj:`dpnp.dpnp_array.dpnp_array.device` property. + usm_type : {"device", "shared", "host"}, optional + The type of SYCL USM allocation for the output array. Default is "device". + sycl_queue : {None, SyclQueue}, optional + A SYCL queue to use for output array allocation and copying. + Returns ------- out : dpnp.ndarray @@ -1207,21 +1814,41 @@ def identity( [0., 1., 0.], [0., 0., 1.]]) + Creating an array on a different device or with a specified usm_type + + >>> x = np.identity(3) # default case + >>> x, x.device, x.usm_type + (array([[1., 0., 0.], + [0., 1., 0.], + [0., 0., 1.]]), Device(level_zero:gpu:0), 'device') + + >>> y = np.identity(3, device="cpu") + >>> y, y.device, y.usm_type + (array([[1., 0., 0.], + [0., 1., 0.], + [0., 0., 1.]]), Device(opencl:cpu:0), 'device') + + >>> z = np.identity(3, usm_type="host") + >>> z, z.device, z.usm_type + (array([[1., 0., 0.], + [0., 1., 0.], + [0., 0., 1.]]), Device(level_zero:gpu:0), 'host') + """ - if not use_origin_backend(): - if like is not None: - pass - elif n < 0: - raise ValueError("negative dimensions are not allowed") - else: - _dtype = dpnp.default_float_type() if dtype is None else dtype - return dpnp.eye( - n, - dtype=_dtype, - device=device, - usm_type=usm_type, - sycl_queue=sycl_queue, - ) + + if like is not None: + pass + elif n < 0: + raise ValueError("negative dimensions are not allowed") + else: + _dtype = dpnp.default_float_type() if dtype is None else dtype + return dpnp.eye( + n, + dtype=_dtype, + device=device, + usm_type=usm_type, + sycl_queue=sycl_queue, + ) return call_origin(numpy.identity, n, dtype=dtype, like=like) @@ -1244,6 +1871,39 @@ def linspace( For full documentation refer to :obj:`numpy.linspace`. + Parameters + ---------- + start : array_like + The starting value of the sequence, in any form that can be converted to an array. + This includes scalars, lists, lists of tuples, tuples, tuples of tuples, + tuples of lists, and ndarrays. + stop : array_like + The end value of the sequence, in any form that can be converted to an array. + This includes scalars, lists, lists of tuples, tuples, tuples of tuples, + tuples of lists, and ndarrays. If `endpoint` is set to ``False`` the sequence consists + of all but the last of num + 1 evenly spaced samples, so that `stop` is excluded. + dtype : dtype, optional + The desired dtype for the array. If not given, a default dtype will be used that can represent + the values (by considering Promotion Type Rule and device capabilities when necessary.) + device : {None, string, SyclDevice, SyclQueue}, optional + An array API concept of device where the output array is created. + The `device` can be ``None`` (the default), an OneAPI filter selector string, + an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device, + an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by + :obj:`dpnp.dpnp_array.dpnp_array.device` property. + usm_type : {None, "device", "shared", "host"}, optional + The type of SYCL USM allocation for the output array. Default is ``None``. + sycl_queue : {None, SyclQueue}, optional + A SYCL queue to use for output array allocation and copying. + endpoint : bool, optional + If ``True``, `stop` is the last sample. Otherwise, it is not included. Default is ``True``. + retstep : bool, optional + If ``True``, return (samples, step), where step is the spacing between samples. + axis : int, optional + The axis in the result to store the samples. Relevant only if start or stop are array-like. + By default (0), the samples will be along a new axis inserted at the beginning. + Use -1 to get an axis at the end. + Returns ------- out : dpnp.ndarray @@ -1275,6 +1935,20 @@ def linspace( >>> np.linspace(2.0, 3.0, num=5, retstep=True) (array([2. , 2.25, 2.5 , 2.75, 3. ]), array(0.25)) + Creating an array on a different device or with a specified usm_type + + >>> x = np.linspace(2.0, 3.0, num=3) # default case + >>> x, x.device, x.usm_type + (array([2. , 2.5, 3. ]), Device(level_zero:gpu:0), 'device') + + >>> y = np.linspace(2.0, 3.0, num=3, device="cpu") + >>> y, y.device, y.usm_type + (array([2. , 2.5, 3. ]), Device(opencl:cpu:0), 'device') + + >>> z = np.linspace(2.0, 3.0, num=3, usm_type="host") + >>> z, z.device, z.usm_type + (array([2. , 2.5, 3. ]), Device(level_zero:gpu:0), 'host') + """ return dpnp_linspace( @@ -1336,6 +2010,47 @@ def logspace( For full documentation refer to :obj:`numpy.logspace`. + Parameters + ---------- + start : array_like + Input data, in any form that can be converted to an array. This includes scalars, + lists, lists of tuples, tuples, tuples of tuples, tuples of lists, and ndarrays. + `base` ** `start` is the starting value of the sequence. + stop : array_like + Input data, in any form that can be converted to an array. This includes scalars, + lists, lists of tuples, tuples, tuples of tuples, tuples of lists, and ndarrays. + `base` ** `stop` is the final value of the sequence, unless `endpoint` is ``False``. + In that case, num + 1 values are spaced over the interval in log-space, + of which all but the last (a sequence of length num) are returned. + num : int, optional + Number of samples to generate. Default is 50. + device : {None, string, SyclDevice, SyclQueue}, optional + An array API concept of device where the output array is created. + The `device` can be ``None`` (the default), an OneAPI filter selector string, + an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device, + an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by + :obj:`dpnp.dpnp_array.dpnp_array.device` property. + usm_type : {None, "device", "shared", "host"}, optional + The type of SYCL USM allocation for the output array. Default is ``None``. + sycl_queue : {None, SyclQueue}, optional + A SYCL queue to use for output array allocation and copying. + endpoint : bool, optional + If ``True``, stop is the last sample. Otherwise, it is not included. Default is ``True``. + base : array_like, optional + Input data, in any form that can be converted to an array. This includes scalars, + lists, lists of tuples, tuples, tuples of tuples, tuples of lists, and ndarrays. + The base of the log space, in any form that can be converted to an array.This includes scalars, + lists, lists of tuples, tuples, tuples of tuples, tuples of lists, and ndarrays. + The `step` size between the elements in ln(samples) / ln(base) (or log_base(samples)) + is uniform. Default is 10.0. + dtype : dtype, optional + The desired dtype for the array. If not given, a default dtype will be used that can represent + the values (by considering Promotion Type Rule and device capabilities when necessary.) + axis : int, optional + The axis in the result to store the samples. Relevant only if start, stop, + or base are array-like. By default (0), the samples will be along a new axis inserted + at the beginning. Use -1 to get an axis at the end. + Returns ------- out: dpnp.ndarray @@ -1368,6 +2083,20 @@ def logspace( array([[ 4. , 5.0396842 , 6.34960421, 8. ], [ 9. , 12.98024613, 18.72075441, 27. ]]) + Creating an array on a different device or with a specified usm_type + + >>> x = np.logspace(1.0, 3.0, num=3) # default case + >>> x, x.device, x.usm_type + (array([ 10., 100., 1000.]), Device(level_zero:gpu:0), 'device') + + >>> y = np.logspace(1.0, 3.0, num=3, device="cpu") + >>> y, y.device, y.usm_type + (array([ 10., 100., 1000.]), Device(opencl:cpu:0), 'device') + + >>> z = np.logspace(1.0, 3.0, num=3, usm_type="host") + >>> z, z.device, z.usm_type + (array([ 10., 100., 1000.]), Device(level_zero:gpu:0), 'host') + """ return dpnp_logspace( @@ -1492,7 +2221,7 @@ class MGridClass: an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by :obj:`dpnp.dpnp_array.dpnp_array.device` property. usm_type : {"device", "shared", "host"}, optional - The type of SYCL USM allocation for the output array. + The type of SYCL USM allocation for the output array. Default is "device". sycl_queue : {None, SyclQueue}, optional A SYCL queue to use for output array allocation and copying. @@ -1516,17 +2245,19 @@ class MGridClass: [0, 1, 2, 3, 4], [0, 1, 2, 3, 4]]]) - >>> x = np.mgrid[-1:1:5j] - >>> x - array([-1. , -0.5, 0. , 0.5, 1. ]) - >>> x.usm_type - 'device' + Creating an array on a different device or with a specified usm_type + + >>> x = np.mgrid[-1:1:5j] # default case + >>> x, x.device, x.usm_type + (array([-1. , -0.5, 0. , 0.5, 1. ]), Device(level_zero:gpu:0), 'device') + + >>> y = np.mgrid(device="cpu")[-1:1:5j] + >>> y, y.device, y.usm_type + (array([-1. , -0.5, 0. , 0.5, 1. ]), Device(opencl:cpu:0), 'device') - >>> y = np.mgrid(usm_type="host")[-1:1:5j] - >>> y - array([-1. , -0.5, 0. , 0.5, 1. ]) - >>> x.usm_type - 'host' + >>> z = np.mgrid(usm_type="host")[-1:1:5j] + >>> z, z.device, z.usm_type + (array([-1. , -0.5, 0. , 0.5, 1. ]), Device(level_zero:gpu:0), 'host') """ @@ -1560,7 +2291,7 @@ class OGridClass: an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by :obj:`dpnp.dpnp_array.dpnp_array.device` property. usm_type : {"device", "shared", "host"}, optional - The type of SYCL USM allocation for the output array. + The type of SYCL USM allocation for the output array. Default is "device". sycl_queue : {None, SyclQueue}, optional A SYCL queue to use for output array allocation and copying. @@ -1580,17 +2311,19 @@ class OGridClass: [3], [4]]), array([[0, 1, 2, 3, 4]])] - >>> x = np.ogrid[-1:1:5j] - >>> x - array([-1. , -0.5, 0. , 0.5, 1. ]) - >>> x.usm_type - 'device' + Creating an array on a different device or with a specified usm_type + + >>> x = np.ogrid[-1:1:5j] # default case + >>> x, x.device, x.usm_type + (array([-1. , -0.5, 0. , 0.5, 1. ]), Device(level_zero:gpu:0), 'device') - >>> y = np.ogrid(usm_type="host")[-1:1:5j] - >>> y - array([-1. , -0.5, 0. , 0.5, 1. ]) - >>> x.usm_type - 'host' + >>> y = np.ogrid(device="cpu")[-1:1:5j] + >>> y, y.device, y.usm_type + (array([-1. , -0.5, 0. , 0.5, 1. ]), Device(opencl:cpu:0), 'device') + + >>> z = np.ogrid(usm_type="host")[-1:1:5j] + >>> z, z.device, z.usm_type + (array([-1. , -0.5, 0. , 0.5, 1. ]), Device(level_zero:gpu:0), 'host') """ @@ -1621,6 +2354,31 @@ def ones( For full documentation refer to :obj:`numpy.ones`. + Parameters + ---------- + shape : {int, sequence of ints} + Shape of the new array, e.g., (2, 3) or 2. + dtype : dtype, optional + The desired dtype for the array, e.g., dpnp.int32. Default is the default floating point + data type for the device where input array is allocated. + order : {"C", "F"}, optional + Memory layout of the newly output array. Default: "C". + device : {None, string, SyclDevice, SyclQueue}, optional + An array API concept of device where the output array is created. + The `device` can be ``None`` (the default), an OneAPI filter selector string, + an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device, + an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by + :obj:`dpnp.dpnp_array.dpnp_array.device` property. + usm_type : {"device", "shared", "host"}, optional + The type of SYCL USM allocation for the output array. Default is "device". + sycl_queue : {None, SyclQueue}, optional + A SYCL queue to use for output array allocation and copying. + + Returns + ------- + out : dpnp.ndarray + Array of ones with the given shape, dtype, and order. + Limitations ----------- Parameter `order` is supported only with values ``"C"`` and ``"F"``. @@ -1637,13 +2395,28 @@ def ones( Examples -------- >>> import dpnp as np - >>> [i for i in np.ones(5)] - [1.0, 1.0, 1.0, 1.0, 1.0] + >>> np.ones(5) + array([1., 1., 1., 1., 1.]) >>> x = np.ones((2, 1)) >>> x.ndim, x.size, x.shape (2, 2, (2, 1)) - >>> [i for i in x] - [1.0, 1.0] + >>> x + array([[1.], + [1.]]) + + Creating an array on a different device or with a specified usm_type + + >>> x = np.ones(3) # default case + >>> x, x.device, x.usm_type + (array([1., 1., 1.]), Device(level_zero:gpu:0), 'device') + + >>> y = np.ones(3, device="cpu") + >>> y, y.device, y.usm_type + (array([1., 1., 1.]), Device(opencl:cpu:0), 'device') + + >>> z = np.ones(3, usm_type="host") + >>> z, z.device, z.usm_type + (array([1., 1., 1.]), Device(level_zero:gpu:0), 'host') """ @@ -1665,7 +2438,7 @@ def ones( def ones_like( - x1, + a, /, *, dtype=None, @@ -1681,10 +2454,35 @@ def ones_like( For full documentation refer to :obj:`numpy.ones_like`. + Parameters + ---------- + a : {dpnp_array, usm_ndarray} + The shape and dtype of `a` define these same attributes of the returned array. + dtype : dtype, optional + The desired dtype for the array, e.g., dpnp.int32. Default is the default floating point + data type for the device where input array is allocated. + order : {"C", "F"}, optional + Memory layout of the newly output array. Default: "C". + shape : {int, sequence of ints} + Overrides the shape of the result. + device : {None, string, SyclDevice, SyclQueue}, optional + An array API concept of device where the output array is created. + The `device` can be ``None`` (the default), an OneAPI filter selector string, + an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device, + an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by + :obj:`dpnp.dpnp_array.dpnp_array.device` property. + usm_type : {None, "device", "shared", "host"}, optional + The type of SYCL USM allocation for the output array. Default is ``None``. + sycl_queue : {None, SyclQueue}, optional + A SYCL queue to use for output array allocation and copying. + + Returns + ------- + out : dpnp.ndarray + Array of ones with the same shape and type as `a`. + Limitations ----------- - Parameter `x1` is supported as :class:`dpnp.dpnp.ndarray` or :class:`dpctl.tensor.usm_ndarray` - Parameter `order` is supported with values ``"C"`` or ``"F"``. Parameter `subok` is supported only with default value ``False``. Otherwise the function will be executed sequentially on CPU. @@ -1698,25 +2496,39 @@ def ones_like( Examples -------- >>> import dpnp as np - >>> x = np.arange(6) - >>> [i for i in x] - [0.0, 1.0, 2.0, 3.0, 4.0, 5.0] - >>> [i for i in np.ones_like(x)] - [1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + >>> x0 = np.arange(6) + >>> x0 + array([0, 1, 2, 3, 4, 5]) + >>> np.ones_like(x0) + array([1, 1, 1, 1, 1, 1]) + + Creating an array on a different device or with a specified usm_type + + >>> x = np.ones_like(x0) # default case + >>> x, x.device, x.usm_type + (array([1, 1, 1, 1, 1, 1]), Device(level_zero:gpu:0), 'device') + + >>> y = np.ones_like(x0, device="cpu") + >>> y, y.device, y.usm_type + (array([1, 1, 1, 1, 1, 1]), Device(opencl:cpu:0), 'device') + + >>> z = np.ones_like(x0, usm_type="host") + >>> z, z.device, z.usm_type + (array([1, 1, 1, 1, 1, 1]), Device(level_zero:gpu:0), 'host') """ - if not isinstance(x1, (dpnp.ndarray, dpt.usm_ndarray)): + if not isinstance(a, (dpnp.ndarray, dpt.usm_ndarray)): pass elif order not in ("C", "c", "F", "f", None): pass elif subok is not False: pass else: - _shape = x1.shape if shape is None else shape - _dtype = x1.dtype if dtype is None else dtype - _usm_type = x1.usm_type if usm_type is None else usm_type + _shape = a.shape if shape is None else shape + _dtype = a.dtype if dtype is None else dtype + _usm_type = a.usm_type if usm_type is None else usm_type _sycl_queue = dpnp.get_normalized_queue_device( - x1, sycl_queue=sycl_queue, device=device + a, sycl_queue=sycl_queue, device=device ) return dpnp_container.ones( _shape, @@ -1726,7 +2538,7 @@ def ones_like( sycl_queue=_sycl_queue, ) - return call_origin(numpy.ones_like, x1, dtype, order, subok, shape) + return call_origin(numpy.ones_like, a, dtype, order, subok, shape) def trace(x1, offset=0, axis1=0, axis2=1, dtype=None, out=None): @@ -1778,9 +2590,32 @@ def tri( For full documentation refer to :obj:`numpy.tri`. + Parameters + ---------- + N : int + Number of rows in the array. + M : int, optional + Number of columns in the array. By default, `M` is taken equal to `N`. + k : int, optional + The sub-diagonal at and below which the array is filled. k = 0 is the main diagonal, + while k < 0 is below it, and k > 0 is above. The default is 0. + dtype : dtype, optional + The desired dtype for the array, e.g., dpnp.int32. Default is the default floating point + data type for the device where input array is allocated. + device : {None, string, SyclDevice, SyclQueue}, optional + An array API concept of device where the output array is created. + The `device` can be ``None`` (the default), an OneAPI filter selector string, + an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device, + an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by + :obj:`dpnp.dpnp_array.dpnp_array.device` property. + usm_type : {"device", "shared", "host"}, optional + The type of SYCL USM allocation for the output array. Default is "device". + sycl_queue : {None, SyclQueue}, optional + A SYCL queue to use for output array allocation and copying. + Returns ------- - out : ndarray of shape (N, M) + out : dpnp.ndarray of shape (N, M) Array with its lower triangle filled with ones and zeros elsewhere. Limitations @@ -1807,43 +2642,60 @@ def tri( [1., 0., 0., 0., 0.], [1., 1., 0., 0., 0.]]) + Creating an array on a different device or with a specified usm_type + + >>> x = np.tri(3, 2) # default case + >>> x, x.device, x.usm_type + (array([[1., 0.], + [1., 1.], + [1., 1.]]), Device(level_zero:gpu:0), 'device') + + >>> y = np.tri(3, 2, device="cpu") + >>> y, y.device, y.usm_type + (array([[1., 0.], + [1., 1.], + [1., 1.]]), Device(opencl:cpu:0), 'device') + + >>> z = np.tri(3, 2, usm_type="host") + >>> z, z.device, z.usm_type + (array([[1., 0.], + [1., 1.], + [1., 1.]]), Device(level_zero:gpu:0), 'host') + """ - if not use_origin_backend(): - if len(kwargs) != 0: - pass - elif not isinstance(N, int): - pass - elif N < 0: - pass - elif M is not None and not isinstance(M, int): - pass - elif M is not None and M < 0: - pass - elif not isinstance(k, int): - pass - else: - _dtype = ( - dpnp.default_float_type() - if dtype in (dpnp.float, None) - else dtype - ) - if M is None: - M = N - - m = dpnp.ones( - (N, M), - dtype=_dtype, - device=device, - usm_type=usm_type, - sycl_queue=sycl_queue, - ) - return dpnp.tril(m, k=k) + if len(kwargs) != 0: + pass + elif not isinstance(N, int): + pass + elif N < 0: + pass + elif M is not None and not isinstance(M, int): + pass + elif M is not None and M < 0: + pass + elif not isinstance(k, int): + pass + else: + _dtype = ( + dpnp.default_float_type() if dtype in (dpnp.float, None) else dtype + ) + if M is None: + M = N + + m = dpnp.ones( + (N, M), + dtype=_dtype, + device=device, + usm_type=usm_type, + sycl_queue=sycl_queue, + ) + return dpnp.tril(m, k=k) return call_origin(numpy.tri, N, M, k, dtype, **kwargs) -def tril(x1, /, *, k=0): +def tril(m, /, *, k=0): """ Lower triangle of an array. @@ -1851,16 +2703,29 @@ def tril(x1, /, *, k=0): For full documentation refer to :obj:`numpy.tril`. + Parameters + ---------- + m : {dpnp_array, usm_ndarray}, shape (…, M, N) + Input array. + k : int, optional + Diagonal above which to zero elements. k = 0 (the default) is the main diagonal, + k < 0 is below it and k > 0 is above. + + Returns + ------- + out : dpnp.ndarray of shape (N, M) + Lower triangle of `m`, of same shape and dtype as `m`. + Limitations ----------- - Parameter `x1` is supported as :class:`dpnp.dpnp.ndarray` or :class:`dpctl.tensor.usm_ndarray` with two or more dimensions. Parameter `k` is supported only of integer data type. Otherwise the function will be executed sequentially on CPU. Examples -------- >>> import dpnp as np - >>> np.tril([[1,2,3],[4,5,6],[7,8,9],[10,11,12]], -1) + >>> m = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]) + >>> np.tril(m, k=-1) array([[ 0, 0, 0], [ 4, 0, 0], [ 7, 8, 0], @@ -1874,19 +2739,19 @@ def tril(x1, /, *, k=0): except TypeError: pass - if not isinstance(x1, (dpnp.ndarray, dpt.usm_ndarray)): + if not isinstance(m, (dpnp.ndarray, dpt.usm_ndarray)): pass - elif x1.ndim < 2: + elif m.ndim < 2: pass elif _k is None: pass else: - return dpnp_container.tril(x1, k=_k) + return dpnp_container.tril(m, k=_k) - return call_origin(numpy.tril, x1, k) + return call_origin(numpy.tril, m, k) -def triu(x1, /, *, k=0): +def triu(m, /, *, k=0): """ Upper triangle of an array. @@ -1895,16 +2760,29 @@ def triu(x1, /, *, k=0): For full documentation refer to :obj:`numpy.triu`. + Parameters + ---------- + m : {dpnp_array, usm_ndarray}, shape (…, M, N) + Input array. + k : int, optional + Diagonal below which to zero elements. k = 0 (the default) is the main diagonal, + k < 0 is below it and k > 0 is above. + + Returns + ------- + out : dpnp.ndarray of shape (N, M) + Upper triangle of `m`, of same shape and dtype as `m`. + Limitations ----------- - Parameter `x1` is supported as :class:`dpnp.dpnp.ndarray` or :class:`dpctl.tensor.usm_ndarray` with two or more dimensions. Parameter `k` is supported only of integer data type. Otherwise the function will be executed sequentially on CPU. Examples -------- >>> import dpnp as np - >>> np.triu([[1,2,3],[4,5,6],[7,8,9],[10,11,12]], -1) + >>> m = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]) + >>> np.triu(m, k=-1) array([[ 1, 2, 3], [ 4, 5, 6], [ 0, 8, 9], @@ -1918,20 +2796,20 @@ def triu(x1, /, *, k=0): except TypeError: pass - if not isinstance(x1, (dpnp.ndarray, dpt.usm_ndarray)): + if not isinstance(m, (dpnp.ndarray, dpt.usm_ndarray)): pass - elif x1.ndim < 2: + elif m.ndim < 2: pass elif _k is None: pass else: - return dpnp_container.triu(x1, k=_k) + return dpnp_container.triu(m, k=_k) - return call_origin(numpy.triu, x1, k) + return call_origin(numpy.triu, m, k) def vander( - x1, + x, /, N=None, increasing=False, @@ -1945,6 +2823,27 @@ def vander( For full documentation refer to :obj:`numpy.vander`. + Parameters + ---------- + x : array_like + 1-D input array, in any form that can be converted to an array. This includes scalars, + lists, lists of tuples, tuples, tuples of tuples, tuples of lists, and ndarrays. + N : int, optional + Number of columns in the output. If `N` is not specified, a square array is returned (N = len(x)). + increasing : bool, optional + Order of the powers of the columns. If ``True,`` the powers increase from left to right, + if ``False`` (the default) they are reversed. + device : {None, string, SyclDevice, SyclQueue}, optional + An array API concept of device where the output array is created. + The `device` can be ``None`` (the default), an OneAPI filter selector string, + an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device, + an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by + :obj:`dpnp.dpnp_array.dpnp_array.device` property. + usm_type : {None, "device", "shared", "host"}, optional + The type of SYCL USM allocation for the output array. Default is ``None``. + sycl_queue : {None, SyclQueue}, optional + A SYCL queue to use for output array allocation and copying. + Returns ------- out : dpnp.ndarray @@ -1958,51 +2857,71 @@ def vander( Examples -------- >>> import dpnp as np - >>> x = np.array([1, 2, 3, 5]) + >>> x0 = np.array([1, 2, 3, 5]) >>> N = 3 - >>> np.vander(x, N) + >>> np.vander(x0, N) array([[ 1, 1, 1], [ 4, 2, 1], [ 9, 3, 1], [25, 5, 1]]) - >>> x = np.array([1, 2, 3, 5]) - >>> np.vander(x) + >>> np.vander(x0) array([[ 1, 1, 1, 1], [ 8, 4, 2, 1], [ 27, 9, 3, 1], [125, 25, 5, 1]]) - >>> np.vander(x, increasing=True) + >>> np.vander(x0, increasing=True) array([[ 1, 1, 1, 1], [ 1, 2, 4, 8], [ 1, 3, 9, 27], [ 1, 5, 25, 125]]) + + Creating an array on a different device or with a specified usm_type + + >>> x = np.vander(x0) # default case + >>> x, x.device, x.usm_type + (array([[ 1, 1, 1, 1], + [ 8, 4, 2, 1], + [ 27, 9, 3, 1], + [125, 25, 5, 1]]), Device(level_zero:gpu:0), 'device') + + >>> y = np.vander(x0, device="cpu") + >>> y, y.device, y.usm_type + (array([[ 1, 1, 1, 1], + [ 8, 4, 2, 1], + [ 27, 9, 3, 1], + [125, 25, 5, 1]]), Device(opencl:cpu:0), 'device') + + >>> z = np.vander(x0, usm_type="host") + >>> z, z.device, z.usm_type + (array([[ 1, 1, 1, 1], + [ 8, 4, 2, 1], + [ 27, 9, 3, 1], + [125, 25, 5, 1]]), Device(level_zero:gpu:0), 'host') """ - x1 = dpnp.asarray( - x1, device=device, usm_type=usm_type, sycl_queue=sycl_queue - ) + x = dpnp.asarray(x, device=device, usm_type=usm_type, sycl_queue=sycl_queue) if N is not None and not isinstance(N, int): raise TypeError("An integer is required, but got {}".format(type(N))) - elif x1.ndim != 1: - raise ValueError("x1 must be a one-dimensional array or sequence.") + elif x.ndim != 1: + raise ValueError("`x` must be a one-dimensional array or sequence.") else: if N is None: - N = x1.size + N = x.size - _dtype = int if x1.dtype == bool else x1.dtype + _dtype = int if x.dtype == bool else x.dtype m = empty( - (x1.size, N), + (x.size, N), dtype=_dtype, - usm_type=x1.usm_type, - sycl_queue=x1.sycl_queue, + usm_type=x.usm_type, + sycl_queue=x.sycl_queue, ) tmp = m[:, ::-1] if not increasing else m dpnp.power( - x1.reshape(-1, 1), - dpnp.arange(N, dtype=_dtype, sycl_queue=x1.sycl_queue), + x.reshape(-1, 1), + dpnp.arange(N, dtype=_dtype, sycl_queue=x.sycl_queue), out=tmp, ) @@ -2024,6 +2943,31 @@ def zeros( For full documentation refer to :obj:`numpy.zeros`. + Parameters + ---------- + shape : {int, sequence of ints} + Shape of the new array, e.g., (2, 3) or 2. + dtype : dtype, optional + The desired dtype for the array, e.g., dpnp.int32. Default is the default floating point + data type for the device where input array is allocated. + order : {"C", "F"}, optional + Memory layout of the newly output array. Default: "C". + device : {None, string, SyclDevice, SyclQueue}, optional + An array API concept of device where the output array is created. + The `device` can be ``None`` (the default), an OneAPI filter selector string, + an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device, + an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by + :obj:`dpnp.dpnp_array.dpnp_array.device` property. + usm_type : {"device", "shared", "host"}, optional + The type of SYCL USM allocation for the output array. Default is "device". + sycl_queue : {None, SyclQueue}, optional + A SYCL queue to use for output array allocation and copying. + + Returns + ------- + out : dpnp.ndarray + Array of zeros with the given shape, dtype, and order. + Limitations ----------- Parameter `order` is supported only with values ``"C"`` and ``"F"``. @@ -2040,13 +2984,28 @@ def zeros( Examples -------- >>> import dpnp as np - >>> [i for i in np.zeros(5)] - [0.0, 0.0, 0.0, 0.0, 0.0] + >>> np.zeros(5) + array([0., 0., 0., 0., 0.]) >>> x = np.zeros((2, 1)) >>> x.ndim, x.size, x.shape (2, 2, (2, 1)) - >>> [i for i in x] - [0.0, 0.0] + >>> x + array([[0.], + [0.]]) + + Creating an array on a different device or with a specified usm_type + + >>> x = np.zeros(3) # default case + >>> x, x.device, x.usm_type + (array([0., 0., 0.]), Device(level_zero:gpu:0), 'device') + + >>> y = np.zeros(3, device="cpu") + >>> y, y.device, y.usm_type + (array([0., 0., 0.]), Device(opencl:cpu:0), 'device') + + >>> z = np.zeros(3, usm_type="host") + >>> z, z.device, z.usm_type + (array([0., 0., 0.]), Device(level_zero:gpu:0), 'host') """ if like is not None: @@ -2067,7 +3026,7 @@ def zeros( def zeros_like( - x1, + a, /, *, dtype=None, @@ -2083,10 +3042,35 @@ def zeros_like( For full documentation refer to :obj:`numpy.zeros_like`. + Parameters + ---------- + a : {dpnp_array, usm_ndarray} + The shape and dtype of `a` define these same attributes of the returned array. + dtype : dtype, optional + The desired dtype for the array, e.g., dpnp.int32. Default is the default floating point + data type for the device where input array is allocated. + order : {"C", "F"}, optional + Memory layout of the newly output array. Default: "C". + shape : {int, sequence of ints} + Overrides the shape of the result. + device : {None, string, SyclDevice, SyclQueue}, optional + An array API concept of device where the output array is created. + The `device` can be ``None`` (the default), an OneAPI filter selector string, + an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device, + an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by + :obj:`dpnp.dpnp_array.dpnp_array.device` property. + usm_type : {None, "device", "shared", "host"}, optional + The type of SYCL USM allocation for the output array. Default is ``None``. + sycl_queue : {None, SyclQueue}, optional + A SYCL queue to use for output array allocation and copying. + + Returns + ------- + out : dpnp.ndarray + Array of zeros with the same shape and type as `a`. + Limitations ----------- - Parameter `x1` is supported as :class:`dpnp.dpnp.ndarray` or :class:`dpctl.tensor.usm_ndarray` - Parameter `order` is supported with values ``"C"`` or ``"F"``. Parameter `subok` is supported only with default value ``False``. Otherwise the function will be executed sequentially on CPU. @@ -2100,25 +3084,39 @@ def zeros_like( Examples -------- >>> import dpnp as np - >>> x = np.arange(6) - >>> [i for i in x] - [0.0, 1.0, 2.0, 3.0, 4.0, 5.0] - >>> [i for i in np.zeros_like(x)] - [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] + >>> x0 = np.arange(6) + >>> x0 + array([0, 1, 2, 3, 4, 5]) + >>> np.zeros_like(x0) + array([0, 0, 0, 0, 0, 0]) + + Creating an array on a different device or with a specified usm_type + + >>> x = np.zeros_like(x0) # default case + >>> x, x.device, x.usm_type + (array([0, 0, 0, 0, 0, 0]), Device(level_zero:gpu:0), 'device') + + >>> y = np.zeros_like(x0, device="cpu") + >>> y, y.device, y.usm_type + (array([0, 0, 0, 0, 0, 0]), Device(opencl:cpu:0), 'device') + + >>> z = np.zeros_like(x0, usm_type="host") + >>> z, z.device, z.usm_type + (array([0, 0, 0, 0, 0, 0]), Device(level_zero:gpu:0), 'host') """ - if not isinstance(x1, (dpnp.ndarray, dpt.usm_ndarray)): + if not isinstance(a, (dpnp.ndarray, dpt.usm_ndarray)): pass elif order not in ("C", "c", "F", "f", None): pass elif subok is not False: pass else: - _shape = x1.shape if shape is None else shape - _dtype = x1.dtype if dtype is None else dtype - _usm_type = x1.usm_type if usm_type is None else usm_type + _shape = a.shape if shape is None else shape + _dtype = a.dtype if dtype is None else dtype + _usm_type = a.usm_type if usm_type is None else usm_type _sycl_queue = dpnp.get_normalized_queue_device( - x1, sycl_queue=sycl_queue, device=device + a, sycl_queue=sycl_queue, device=device ) return dpnp_container.zeros( _shape, @@ -2128,4 +3126,4 @@ def zeros_like( sycl_queue=_sycl_queue, ) - return call_origin(numpy.zeros_like, x1, dtype, order, subok, shape) + return call_origin(numpy.zeros_like, a, dtype, order, subok, shape) diff --git a/tests/test_arraycreation.py b/tests/test_arraycreation.py index f7b06ffc9be..267842c749c 100644 --- a/tests/test_arraycreation.py +++ b/tests/test_arraycreation.py @@ -880,6 +880,42 @@ def test_logspace_axis(axis): assert_dtype_allclose(func(dpnp), func(numpy)) +@pytest.mark.parametrize( + "data", [(), 1, (2, 3), [4], numpy.array(5), numpy.array([6, 7])] +) +def test_ascontiguousarray(data): + result = dpnp.ascontiguousarray(data) + expected = numpy.ascontiguousarray(data) + assert_dtype_allclose(result, expected) + assert result.shape == expected.shape + + +@pytest.mark.parametrize("data", [(), 1, (2, 3), [4]]) +def test_ascontiguousarray1(data): + result = dpnp.ascontiguousarray(dpnp.array(data)) + expected = numpy.ascontiguousarray(numpy.array(data)) + assert_dtype_allclose(result, expected) + assert result.shape == expected.shape + + +@pytest.mark.parametrize( + "data", [(), 1, (2, 3), [4], numpy.array(5), numpy.array([6, 7])] +) +def test_asfortranarray(data): + result = dpnp.asfortranarray(data) + expected = numpy.asfortranarray(data) + assert_dtype_allclose(result, expected) + assert result.shape == expected.shape + + +@pytest.mark.parametrize("data", [(), 1, (2, 3), [4]]) +def test_asfortranarray1(data): + result = dpnp.asfortranarray(dpnp.array(data)) + expected = numpy.asfortranarray(numpy.array(data)) + assert_dtype_allclose(result, expected) + assert result.shape == expected.shape + + def test_meshgrid_raise_error(): a = numpy.array([1, 2, 3, 4]) with pytest.raises(TypeError): diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py index 205d4efb572..78a869fac9d 100644 --- a/tests/test_sycl_queue.py +++ b/tests/test_sycl_queue.py @@ -141,6 +141,7 @@ def test_empty_like(device_x, device_y): @pytest.mark.parametrize( "func, args, kwargs", [ + pytest.param("copy", ["x0"], {}), pytest.param("diag", ["x0"], {}), pytest.param("full_like", ["x0"], {"fill_value": 5}), pytest.param("geomspace", ["x0[0:3]", "8", "4"], {}), @@ -225,6 +226,7 @@ def test_array_creation_follow_device_2d_array(func, args, kwargs, device): @pytest.mark.parametrize( "func, args, kwargs", [ + pytest.param("copy", ["x0"], {}), pytest.param("diag", ["x0"], {}), pytest.param("full", ["10", "x0[3]"], {}), pytest.param("full_like", ["x0"], {"fill_value": 5}), diff --git a/tests/test_usm_type.py b/tests/test_usm_type.py index bff548a90d0..5a29e677747 100644 --- a/tests/test_usm_type.py +++ b/tests/test_usm_type.py @@ -140,6 +140,7 @@ def test_coerced_usm_types_power(usm_type_x, usm_type_y): @pytest.mark.parametrize( "func, args", [ + pytest.param("copy", ["x0"]), pytest.param("diag", ["x0"]), pytest.param("empty_like", ["x0"]), pytest.param("full", ["10", "x0[3]"]), From ac1fca74c6bfbf6d65a9f0f80309cc0e89c0f8f5 Mon Sep 17 00:00:00 2001 From: vtavana <120411540+vtavana@users.noreply.github.com> Date: Tue, 6 Feb 2024 06:06:22 -0600 Subject: [PATCH 18/29] update `dpnp.dot` implementation (#1669) * dot_func * using mkl::dotu instead mkl::dotc for complex * fix a test * fix negative strides * add a temporary workaround * address comments * add a TODO comment * call dpt.vecdot for integer data types * update doc string * pass argument by reference * update doc to add boolean dtype --------- Co-authored-by: Anton <100830759+antonwolfy@users.noreply.github.com> --- dpnp/backend/extensions/blas/CMakeLists.txt | 4 +- dpnp/backend/extensions/blas/blas_py.cpp | 21 +- dpnp/backend/extensions/blas/dot.cpp | 238 +++++++++++ dpnp/backend/extensions/blas/dot.hpp | 60 +++ dpnp/backend/extensions/blas/dotu.cpp | 241 +++++++++++ dpnp/backend/extensions/blas/gemm.cpp | 2 +- dpnp/backend/extensions/blas/gemm.hpp | 2 +- dpnp/backend/extensions/blas/gemm_batch.cpp | 2 +- dpnp/backend/extensions/blas/types_matrix.hpp | 45 ++- dpnp/backend/kernels/dpnp_krnl_common.cpp | 1 + dpnp/dpnp_algo/dpnp_algo.pxd | 7 - dpnp/dpnp_algo/dpnp_algo_linearalgebra.pxi | 100 ----- dpnp/dpnp_array.py | 25 +- dpnp/dpnp_iface_linearalgebra.py | 139 ++++--- dpnp/dpnp_utils/dpnp_utils_linearalgebra.py | 262 ++++++++---- tests/skipped_tests.tbl | 3 +- tests/skipped_tests_gpu.tbl | 6 +- tests/skipped_tests_gpu_no_fp64.tbl | 85 ---- tests/test_dot.py | 379 ++++++++++++++++-- tests/test_mathematical.py | 5 +- tests/test_sycl_queue.py | 8 +- tests/test_usm_type.py | 4 +- .../cupy/linalg_tests/test_eigenvalue.py | 25 +- .../cupy/linalg_tests/test_product.py | 35 +- .../cupy/math_tests/test_matmul.py | 55 +++ 25 files changed, 1329 insertions(+), 425 deletions(-) create mode 100644 dpnp/backend/extensions/blas/dot.cpp create mode 100644 dpnp/backend/extensions/blas/dot.hpp create mode 100644 dpnp/backend/extensions/blas/dotu.cpp diff --git a/dpnp/backend/extensions/blas/CMakeLists.txt b/dpnp/backend/extensions/blas/CMakeLists.txt index d19f60c9792..fe3a92d2181 100644 --- a/dpnp/backend/extensions/blas/CMakeLists.txt +++ b/dpnp/backend/extensions/blas/CMakeLists.txt @@ -1,5 +1,5 @@ # ***************************************************************************** -# Copyright (c) 2016-2023, Intel Corporation +# Copyright (c) 2024, Intel Corporation # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -27,6 +27,8 @@ set(python_module_name _blas_impl) set(_module_src ${CMAKE_CURRENT_SOURCE_DIR}/blas_py.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/dot.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/dotu.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gemm.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gemm_batch.cpp ) diff --git a/dpnp/backend/extensions/blas/blas_py.cpp b/dpnp/backend/extensions/blas/blas_py.cpp index 524f16fcc7d..7d5237381b1 100644 --- a/dpnp/backend/extensions/blas/blas_py.cpp +++ b/dpnp/backend/extensions/blas/blas_py.cpp @@ -1,5 +1,5 @@ //***************************************************************************** -// Copyright (c) 2023, Intel Corporation +// Copyright (c) 2024, Intel Corporation // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -30,6 +30,7 @@ #include #include +#include "dot.hpp" #include "gemm.hpp" namespace blas_ext = dpnp::backend::ext::blas; @@ -38,6 +39,8 @@ namespace py = pybind11; // populate dispatch tables void init_dispatch_tables(void) { + blas_ext::init_dot_dispatch_table(); + blas_ext::init_dotu_dispatch_table(); blas_ext::init_gemm_batch_dispatch_table(); blas_ext::init_gemm_dispatch_table(); } @@ -46,6 +49,22 @@ PYBIND11_MODULE(_blas_impl, m) { init_dispatch_tables(); + { + m.def("_dot", &blas_ext::dot, + "Call `dot` from OneMKL LAPACK library to return " + "the dot product of two real-valued vectors.", + py::arg("sycl_queue"), py::arg("vectorA"), py::arg("vectorB"), + py::arg("result"), py::arg("depends") = py::list()); + } + + { + m.def("_dotu", &blas_ext::dotu, + "Call `dotu` from OneMKL LAPACK library to return " + "the dot product of two complex vectors.", + py::arg("sycl_queue"), py::arg("vectorA"), py::arg("vectorB"), + py::arg("result"), py::arg("depends") = py::list()); + } + { m.def("_gemm", &blas_ext::gemm, "Call `gemm` from OneMKL LAPACK library to return " diff --git a/dpnp/backend/extensions/blas/dot.cpp b/dpnp/backend/extensions/blas/dot.cpp new file mode 100644 index 00000000000..048738f57a9 --- /dev/null +++ b/dpnp/backend/extensions/blas/dot.cpp @@ -0,0 +1,238 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include + +// dpctl tensor headers +#include "utils/memory_overlap.hpp" +#include "utils/type_utils.hpp" + +#include "dot.hpp" +#include "types_matrix.hpp" + +#include "dpnp_utils.hpp" + +namespace dpnp +{ +namespace backend +{ +namespace ext +{ +namespace blas +{ +namespace mkl_blas = oneapi::mkl::blas; +namespace py = pybind11; +namespace type_utils = dpctl::tensor::type_utils; + +typedef sycl::event (*dot_impl_fn_ptr_t)(sycl::queue &, + const std::int64_t, + char *, + const std::int64_t, + char *, + const std::int64_t, + char *, + const std::vector &); + +static dot_impl_fn_ptr_t dot_dispatch_table[dpctl_td_ns::num_types] + [dpctl_td_ns::num_types]; + +template +static sycl::event dot_impl(sycl::queue &exec_q, + const std::int64_t n, + char *vectorA, + const std::int64_t stride_a, + char *vectorB, + const std::int64_t stride_b, + char *result, + const std::vector &depends) +{ + type_utils::validate_type_for_device(exec_q); + type_utils::validate_type_for_device(exec_q); + + Tab *a = reinterpret_cast(vectorA); + Tab *b = reinterpret_cast(vectorB); + Tc *res = reinterpret_cast(result); + + std::stringstream error_msg; + bool is_exception_caught = false; + + sycl::event dot_event; + try { + dot_event = mkl_blas::row_major::dot(exec_q, + n, // size of the input vectors + a, // Pointer to vector a. + stride_a, // Stride of vector a. + b, // Pointer to vector b. + stride_b, // Stride of vector b. + res, // Pointer to result. + depends); + } catch (oneapi::mkl::exception const &e) { + error_msg + << "Unexpected MKL exception caught during dot() call:\nreason: " + << e.what(); + is_exception_caught = true; + } catch (sycl::exception const &e) { + error_msg << "Unexpected SYCL exception caught during dot() call:\n" + << e.what(); + is_exception_caught = true; + } + + if (is_exception_caught) // an unexpected error occurs + { + throw std::runtime_error(error_msg.str()); + } + + return dot_event; +} + +std::pair dot(sycl::queue &exec_q, + dpctl::tensor::usm_ndarray vectorA, + dpctl::tensor::usm_ndarray vectorB, + dpctl::tensor::usm_ndarray result, + const std::vector &depends) +{ + const int vectorA_nd = vectorA.get_ndim(); + const int vectorB_nd = vectorB.get_ndim(); + const int result_nd = result.get_ndim(); + + if ((vectorA_nd != 1)) { + throw py::value_error( + "The first input array has ndim=" + std::to_string(vectorA_nd) + + ", but a 1-dimensional array is expected."); + } + + if ((vectorB_nd != 1)) { + throw py::value_error( + "The second input array has ndim=" + std::to_string(vectorB_nd) + + ", but a 1-dimensional array is expected."); + } + + if ((result_nd != 0)) { + throw py::value_error( + "The output array has ndim=" + std::to_string(result_nd) + + ", but a 0-dimensional array is expected."); + } + + auto const &overlap = dpctl::tensor::overlap::MemoryOverlap(); + if (overlap(vectorA, result)) { + throw py::value_error( + "The first input array and output array are overlapping " + "segments of memory"); + } + if (overlap(vectorB, result)) { + throw py::value_error( + "The second input array and output array are overlapping " + "segments of memory"); + } + + // check compatibility of execution queue and allocation queue + if (!dpctl::utils::queues_are_compatible( + exec_q, + {vectorA.get_queue(), vectorB.get_queue(), result.get_queue()})) + { + throw py::value_error( + "USM allocations are not compatible with the execution queue."); + } + + py::ssize_t a_size = vectorA.get_size(); + py::ssize_t b_size = vectorB.get_size(); + if (a_size != b_size) { + throw py::value_error("The size of the first input array must be " + "equal to the size of the second input array."); + } + + std::vector a_stride = vectorA.get_strides_vector(); + std::vector b_stride = vectorB.get_strides_vector(); + + const std::int64_t n = a_size; + const std::int64_t str_a = a_stride[0]; + const std::int64_t str_b = b_stride[0]; + + int vectorA_typenum = vectorA.get_typenum(); + int vectorB_typenum = vectorB.get_typenum(); + int result_typenum = result.get_typenum(); + + if (vectorA_typenum != vectorB_typenum) { + throw py::value_error("vectorA and vectorB must be of the same type."); + } + + auto array_types = dpctl_td_ns::usm_ndarray_types(); + int vectorAB_type_id = array_types.typenum_to_lookup_id(vectorA_typenum); + int result_type_id = array_types.typenum_to_lookup_id(result_typenum); + + dot_impl_fn_ptr_t dot_fn = + dot_dispatch_table[vectorAB_type_id][result_type_id]; + if (dot_fn == nullptr) { + throw py::value_error( + "Types of input vectors and result array are mismatched."); + } + + char *a_typeless_ptr = vectorA.get_data(); + char *b_typeless_ptr = vectorB.get_data(); + char *r_typeless_ptr = result.get_data(); + + const int a_elemsize = vectorA.get_elemsize(); + const int b_elemsize = vectorB.get_elemsize(); + if (str_a < 0) { + a_typeless_ptr -= (n - 1) * std::abs(str_a) * a_elemsize; + } + if (str_b < 0) { + b_typeless_ptr -= (n - 1) * std::abs(str_b) * b_elemsize; + } + + sycl::event dot_ev = dot_fn(exec_q, n, a_typeless_ptr, str_a, + b_typeless_ptr, str_b, r_typeless_ptr, depends); + + sycl::event args_ev = dpctl::utils::keep_args_alive( + exec_q, {vectorA, vectorB, result}, {dot_ev}); + + return std::make_pair(args_ev, dot_ev); +} + +template +struct DotContigFactory +{ + fnT get() + { + if constexpr (types::DotTypePairSupportFactory::is_defined) { + return dot_impl; + } + else { + return nullptr; + } + } +}; + +void init_dot_dispatch_table(void) +{ + dpctl_td_ns::DispatchTableBuilder + contig; + contig.populate_dispatch_table(dot_dispatch_table); +} +} // namespace blas +} // namespace ext +} // namespace backend +} // namespace dpnp diff --git a/dpnp/backend/extensions/blas/dot.hpp b/dpnp/backend/extensions/blas/dot.hpp new file mode 100644 index 00000000000..3468196f760 --- /dev/null +++ b/dpnp/backend/extensions/blas/dot.hpp @@ -0,0 +1,60 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include +#include + +#include + +namespace dpnp +{ +namespace backend +{ +namespace ext +{ +namespace blas +{ +extern std::pair + dot(sycl::queue &exec_q, + dpctl::tensor::usm_ndarray vectorA, + dpctl::tensor::usm_ndarray vectorB, + dpctl::tensor::usm_ndarray result, + const std::vector &depends); + +extern std::pair + dotu(sycl::queue &exec_q, + dpctl::tensor::usm_ndarray vectorA, + dpctl::tensor::usm_ndarray vectorB, + dpctl::tensor::usm_ndarray result, + const std::vector &depends); + +extern void init_dot_dispatch_table(void); +extern void init_dotu_dispatch_table(void); +} // namespace blas +} // namespace ext +} // namespace backend +} // namespace dpnp diff --git a/dpnp/backend/extensions/blas/dotu.cpp b/dpnp/backend/extensions/blas/dotu.cpp new file mode 100644 index 00000000000..8c4b43f8034 --- /dev/null +++ b/dpnp/backend/extensions/blas/dotu.cpp @@ -0,0 +1,241 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include + +// dpctl tensor headers +#include "utils/memory_overlap.hpp" +#include "utils/type_utils.hpp" + +#include "dot.hpp" +#include "types_matrix.hpp" + +#include "dpnp_utils.hpp" + +namespace dpnp +{ +namespace backend +{ +namespace ext +{ +namespace blas +{ +namespace mkl_blas = oneapi::mkl::blas; +namespace py = pybind11; +namespace type_utils = dpctl::tensor::type_utils; + +typedef sycl::event (*dotu_impl_fn_ptr_t)(sycl::queue &, + const std::int64_t, + char *, + const std::int64_t, + char *, + const std::int64_t, + char *, + const std::vector &); + +static dotu_impl_fn_ptr_t dotu_dispatch_table[dpctl_td_ns::num_types] + [dpctl_td_ns::num_types]; + +template +static sycl::event dotu_impl(sycl::queue &exec_q, + const std::int64_t n, + char *vectorA, + const std::int64_t stride_a, + char *vectorB, + const std::int64_t stride_b, + char *result, + const std::vector &depends) +{ + type_utils::validate_type_for_device(exec_q); + type_utils::validate_type_for_device(exec_q); + + Tab *a = reinterpret_cast(vectorA); + Tab *b = reinterpret_cast(vectorB); + Tc *res = reinterpret_cast(result); + + std::stringstream error_msg; + bool is_exception_caught = false; + + sycl::event dotu_event; + try { + dotu_event = mkl_blas::row_major::dotu(exec_q, + n, // size of the input vectors + a, // Pointer to vector a. + stride_a, // Stride of vector a. + b, // Pointer to vector b. + stride_b, // Stride of vector b. + res, // Pointer to result. + depends); + } catch (oneapi::mkl::exception const &e) { + error_msg + << "Unexpected MKL exception caught during dotu() call:\nreason: " + << e.what(); + is_exception_caught = true; + } catch (sycl::exception const &e) { + error_msg << "Unexpected SYCL exception caught during dotu() call:\n" + << e.what(); + is_exception_caught = true; + } + + if (is_exception_caught) // an unexpected error occurs + { + throw std::runtime_error(error_msg.str()); + } + + return dotu_event; +} + +std::pair + dotu(sycl::queue &exec_q, + dpctl::tensor::usm_ndarray vectorA, + dpctl::tensor::usm_ndarray vectorB, + dpctl::tensor::usm_ndarray result, + const std::vector &depends) +{ + const int vectorA_nd = vectorA.get_ndim(); + const int vectorB_nd = vectorB.get_ndim(); + const int result_nd = result.get_ndim(); + + if ((vectorA_nd != 1)) { + throw py::value_error( + "The first input array has ndim=" + std::to_string(vectorA_nd) + + ", but a 1-dimensional array is expected."); + } + + if ((vectorB_nd != 1)) { + throw py::value_error( + "The second input array has ndim=" + std::to_string(vectorB_nd) + + ", but a 1-dimensional array is expected."); + } + + if ((result_nd != 0)) { + throw py::value_error( + "The output array has ndim=" + std::to_string(result_nd) + + ", but a 0-dimensional array is expected."); + } + + auto const &overlap = dpctl::tensor::overlap::MemoryOverlap(); + if (overlap(vectorA, result)) { + throw py::value_error( + "The first input array and output array are overlapping " + "segments of memory"); + } + if (overlap(vectorB, result)) { + throw py::value_error( + "The second input array and output array are overlapping " + "segments of memory"); + } + + // check compatibility of execution queue and allocation queue + if (!dpctl::utils::queues_are_compatible( + exec_q, + {vectorA.get_queue(), vectorB.get_queue(), result.get_queue()})) + { + throw py::value_error( + "USM allocations are not compatible with the execution queue."); + } + + py::ssize_t a_size = vectorA.get_size(); + py::ssize_t b_size = vectorB.get_size(); + if (a_size != b_size) { + throw py::value_error("The size of the first input array must be " + "equal to the size of the second input array."); + } + + std::vector a_stride = vectorA.get_strides_vector(); + std::vector b_stride = vectorB.get_strides_vector(); + + const std::int64_t n = a_size; + const std::int64_t str_a = a_stride[0]; + const std::int64_t str_b = b_stride[0]; + + int vectorA_typenum = vectorA.get_typenum(); + int vectorB_typenum = vectorB.get_typenum(); + int result_typenum = result.get_typenum(); + + if (vectorA_typenum != vectorB_typenum) { + throw py::value_error( + "Input arrays must be of must be of the same type."); + } + + auto array_types = dpctl_td_ns::usm_ndarray_types(); + int vectorAB_type_id = array_types.typenum_to_lookup_id(vectorA_typenum); + int result_type_id = array_types.typenum_to_lookup_id(result_typenum); + + dotu_impl_fn_ptr_t dotu_fn = + dotu_dispatch_table[vectorAB_type_id][result_type_id]; + if (dotu_fn == nullptr) { + throw py::value_error( + "Types of input vectors and result array are mismatched."); + } + + char *a_typeless_ptr = vectorA.get_data(); + char *b_typeless_ptr = vectorB.get_data(); + char *r_typeless_ptr = result.get_data(); + + const int a_elemsize = vectorA.get_elemsize(); + const int b_elemsize = vectorB.get_elemsize(); + if (str_a < 0) { + a_typeless_ptr -= (n - 1) * std::abs(str_a) * a_elemsize; + } + if (str_b < 0) { + b_typeless_ptr -= (n - 1) * std::abs(str_b) * b_elemsize; + } + + sycl::event dotu_ev = + dotu_fn(exec_q, n, a_typeless_ptr, str_a, b_typeless_ptr, str_b, + r_typeless_ptr, depends); + + sycl::event args_ev = dpctl::utils::keep_args_alive( + exec_q, {vectorA, vectorB, result}, {dotu_ev}); + + return std::make_pair(args_ev, dotu_ev); +} + +template +struct DotuContigFactory +{ + fnT get() + { + if constexpr (types::DotuTypePairSupportFactory::is_defined) { + return dotu_impl; + } + else { + return nullptr; + } + } +}; + +void init_dotu_dispatch_table(void) +{ + dpctl_td_ns::DispatchTableBuilder + contig; + contig.populate_dispatch_table(dotu_dispatch_table); +} +} // namespace blas +} // namespace ext +} // namespace backend +} // namespace dpnp diff --git a/dpnp/backend/extensions/blas/gemm.cpp b/dpnp/backend/extensions/blas/gemm.cpp index 5526ecd3c1b..a26420f49b3 100644 --- a/dpnp/backend/extensions/blas/gemm.cpp +++ b/dpnp/backend/extensions/blas/gemm.cpp @@ -1,5 +1,5 @@ //***************************************************************************** -// Copyright (c) 2023, Intel Corporation +// Copyright (c) 2024, Intel Corporation // All rights reserved. // // Redistribution and use in source and binary forms, with or without diff --git a/dpnp/backend/extensions/blas/gemm.hpp b/dpnp/backend/extensions/blas/gemm.hpp index 25f78b5b850..3f1ec6e745a 100644 --- a/dpnp/backend/extensions/blas/gemm.hpp +++ b/dpnp/backend/extensions/blas/gemm.hpp @@ -1,5 +1,5 @@ //***************************************************************************** -// Copyright (c) 2023, Intel Corporation +// Copyright (c) 2024, Intel Corporation // All rights reserved. // // Redistribution and use in source and binary forms, with or without diff --git a/dpnp/backend/extensions/blas/gemm_batch.cpp b/dpnp/backend/extensions/blas/gemm_batch.cpp index 32f592f6b8a..9359901edd8 100644 --- a/dpnp/backend/extensions/blas/gemm_batch.cpp +++ b/dpnp/backend/extensions/blas/gemm_batch.cpp @@ -1,5 +1,5 @@ //***************************************************************************** -// Copyright (c) 2023, Intel Corporation +// Copyright (c) 2024, Intel Corporation // All rights reserved. // // Redistribution and use in source and binary forms, with or without diff --git a/dpnp/backend/extensions/blas/types_matrix.hpp b/dpnp/backend/extensions/blas/types_matrix.hpp index 49154df03c4..c36ae0e2045 100644 --- a/dpnp/backend/extensions/blas/types_matrix.hpp +++ b/dpnp/backend/extensions/blas/types_matrix.hpp @@ -1,5 +1,5 @@ //***************************************************************************** -// Copyright (c) 2023, Intel Corporation +// Copyright (c) 2024, Intel Corporation // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -43,6 +43,49 @@ namespace blas { namespace types { +/** + * @brief A factory to define pairs of supported types for which + * MKL BLAS library provides support in oneapi::mkl::blas::dot + * function. + * + * @tparam Tab Type of arrays containing input vectors A and B. + * @tparam Tc Type of array containing output. + */ +template +struct DotTypePairSupportFactory +{ + static constexpr bool is_defined = std::disjunction< + dpctl_td_ns::TypePairDefinedEntry, + dpctl_td_ns::TypePairDefinedEntry, + dpctl_td_ns::TypePairDefinedEntry, + // fall-through + dpctl_td_ns::NotDefinedEntry>::is_defined; +}; + +/** + * @brief A factory to define pairs of supported types for which + * MKL BLAS library provides support in oneapi::mkl::blas::dotu + * function. + * + * @tparam Tab Type of arrays containing input vectors A and B. + * @tparam Tc Type of array containing output. + */ +template +struct DotuTypePairSupportFactory +{ + static constexpr bool is_defined = std::disjunction< + dpctl_td_ns::TypePairDefinedEntry, + Tc, + std::complex>, + dpctl_td_ns::TypePairDefinedEntry, + Tc, + std::complex>, + // fall-through + dpctl_td_ns::NotDefinedEntry>::is_defined; +}; + /** * @brief A factory to define pairs of supported types for which * MKL BLAS library provides support in oneapi::mkl::blas::gemm diff --git a/dpnp/backend/kernels/dpnp_krnl_common.cpp b/dpnp/backend/kernels/dpnp_krnl_common.cpp index e664c30b848..04eac54310d 100644 --- a/dpnp/backend/kernels/dpnp_krnl_common.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_common.cpp @@ -1040,6 +1040,7 @@ void func_map_init_linalg(func_map_t &fmap) fmap[DPNPFuncName::DPNP_FN_DOT][eft_DBL][eft_DBL] = { eft_DBL, (void *)dpnp_dot_default_c}; + // needed for "dpnp_correlate_c" function in dpnp_krnl_statistics.cpp fmap[DPNPFuncName::DPNP_FN_DOT_EXT][eft_INT][eft_INT] = { eft_INT, (void *)dpnp_dot_ext_c}; fmap[DPNPFuncName::DPNP_FN_DOT_EXT][eft_INT][eft_LNG] = { diff --git a/dpnp/dpnp_algo/dpnp_algo.pxd b/dpnp/dpnp_algo/dpnp_algo.pxd index 28e21340647..2fc7e1b4a3b 100644 --- a/dpnp/dpnp_algo/dpnp_algo.pxd +++ b/dpnp/dpnp_algo/dpnp_algo.pxd @@ -54,8 +54,6 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName": # need this na DPNP_FN_DIAG_INDICES_EXT DPNP_FN_DIAGONAL DPNP_FN_DIAGONAL_EXT - DPNP_FN_DOT - DPNP_FN_DOT_EXT DPNP_FN_EDIFF1D DPNP_FN_EDIFF1D_EXT DPNP_FN_EIG @@ -282,11 +280,6 @@ cpdef dpnp_descriptor dpnp_isclose(dpnp_descriptor input1, dpnp_descriptor input double rtol=*, double atol=*, cpp_bool equal_nan=*) -""" -Linear algebra -""" -cpdef dpnp_descriptor dpnp_dot(dpnp_descriptor in_array1, dpnp_descriptor in_array2) - """ Array creation routines """ diff --git a/dpnp/dpnp_algo/dpnp_algo_linearalgebra.pxi b/dpnp/dpnp_algo/dpnp_algo_linearalgebra.pxi index 9b4faf2a1b5..09336b5aaa3 100644 --- a/dpnp/dpnp_algo/dpnp_algo_linearalgebra.pxi +++ b/dpnp/dpnp_algo/dpnp_algo_linearalgebra.pxi @@ -36,7 +36,6 @@ and the rest of the library # NO IMPORTs here. All imports must be placed into main "dpnp_algo.pyx" file __all__ += [ - "dpnp_dot", "dpnp_inner", "dpnp_kron", ] @@ -47,105 +46,6 @@ ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_2in_1out_shapes_t)(c_dpctl.DPCTLSyclQue void *, void * , void * , shape_elem_type * , shape_elem_type *, shape_elem_type * , size_t, const c_dpctl.DPCTLEventVectorRef) -ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_2in_1out_dot_t)(c_dpctl.DPCTLSyclQueueRef, - void * , const size_t, const size_t, - const shape_elem_type *, const shape_elem_type * , - void * , const size_t, const size_t, - const shape_elem_type *, const shape_elem_type * , - void * , const size_t, const size_t, - const shape_elem_type *, const shape_elem_type * , - const c_dpctl.DPCTLEventVectorRef) except + - -cpdef utils.dpnp_descriptor dpnp_dot(utils.dpnp_descriptor in_array1, - utils.dpnp_descriptor in_array2, - utils.dpnp_descriptor out=None): - cdef shape_type_c shape1, shape2 - - shape1 = in_array1.shape - shape2 = in_array2.shape - - # convert string type names (array.dtype) to C enum DPNPFuncType - cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(in_array1.dtype) - cdef DPNPFuncType param2_type = dpnp_dtype_to_DPNPFuncType(in_array2.dtype) - - # get the FPTR data structure - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_DOT_EXT, param1_type, param2_type) - cdef utils.dpnp_descriptor result - - ndim1 = in_array1.ndim - ndim2 = in_array2.ndim - cdef shape_type_c result_shape - if ndim1 == 0: - result_shape = shape2 - elif ndim2 == 0: - result_shape = shape1 - elif ndim1 == 1 and ndim2 == 1: - result_shape = () - elif ndim1 == 1: # ndim2 > 1 - result_shape = shape2[::-2] if ndim2 == 2 else shape2[::2] - elif ndim2 == 1: # ndim1 > 1 - result_shape = shape1[:-1] - else: - if ndim1 == 1: - shape1 = (1, shape1[0]) - if ndim2 == 1: - shape2 = (shape1[0], 1) - result_shape = shape1[:-1] + shape2[:-2] + shape2[-1:] - - result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(in_array1, in_array2) - - if out is None: - # create result array with type given by FPTR data - result = utils.create_output_descriptor(result_shape, - kernel_data.return_type, - None, - device=result_sycl_device, - usm_type=result_usm_type, - sycl_queue=result_sycl_queue) - else: - result_type = dpnp_DPNPFuncType_to_dtype(< size_t > kernel_data.return_type) - if out.dtype != result_type: - utils.checker_throw_value_error('dot', 'out.dtype', out.dtype, result_type) - if out.shape != result_shape: - utils.checker_throw_value_error('dot', 'out.shape', out.shape, result_shape) - - result = out - - utils.get_common_usm_allocation(in_array1, result) # check USM allocation is common - - cdef shape_type_c result_strides = utils.strides_to_vector(result.strides, result.shape) - cdef shape_type_c in_array1_shape = in_array1.shape - cdef shape_type_c in_array1_strides = utils.strides_to_vector(in_array1.strides, in_array1.shape) - cdef shape_type_c in_array2_shape = in_array2.shape - cdef shape_type_c in_array2_strides = utils.strides_to_vector(in_array2.strides, in_array2.shape) - - cdef c_dpctl.SyclQueue q = result_sycl_queue - cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() - - cdef fptr_2in_1out_dot_t func = kernel_data.ptr - # call FPTR function - cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, - result.get_data(), - result.size, - result.ndim, - result_shape.data(), - result_strides.data(), - in_array1.get_data(), - in_array1.size, - in_array1.ndim, - in_array1_shape.data(), - in_array1_strides.data(), - in_array2.get_data(), - in_array2.size, - in_array2.ndim, - in_array2_shape.data(), - in_array2_strides.data(), - NULL) # dep_events_ref - - with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) - c_dpctl.DPCTLEvent_Delete(event_ref) - - return result cpdef utils.dpnp_descriptor dpnp_inner(dpnp_descriptor array1, dpnp_descriptor array2): diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py index cf848b50690..b5e75dde07c 100644 --- a/dpnp/dpnp_array.py +++ b/dpnp/dpnp_array.py @@ -704,8 +704,29 @@ def diagonal(input, offset=0, axis1=0, axis2=1): return dpnp.diagonal(input, offset, axis1, axis2) - def dot(self, other, out=None): - return dpnp.dot(self, other, out) + def dot(self, b, out=None): + """ + Dot product of two arrays. + + For full documentation refer to :obj:`dpnp.dot`. + + Examples + -------- + >>> import dpnp as np + >>> a = np.eye(2) + >>> b = np.ones((2, 2)) * 2 + >>> a.dot(b) + array([[2., 2.], + [2., 2.]]) + + This array method can be conveniently chained: + + >>> a.dot(b).dot(b) + array([[8., 8.], + [8., 8.]]) + """ + + return dpnp.dot(self, b, out) @property def dtype(self): diff --git a/dpnp/dpnp_iface_linearalgebra.py b/dpnp/dpnp_iface_linearalgebra.py index d39b84a50ec..9d63f7f8c3d 100644 --- a/dpnp/dpnp_iface_linearalgebra.py +++ b/dpnp/dpnp_iface_linearalgebra.py @@ -38,13 +38,12 @@ """ -import dpctl.tensor as dpt import numpy import dpnp from dpnp.dpnp_algo import * from dpnp.dpnp_utils import * -from dpnp.dpnp_utils.dpnp_utils_linearalgebra import dpnp_matmul +from dpnp.dpnp_utils.dpnp_utils_linearalgebra import dpnp_dot, dpnp_matmul __all__ = [ "dot", @@ -59,87 +58,99 @@ ] -def dot(x1, x2, out=None, **kwargs): +def dot(a, b, out=None): """ - Dot product of `x1` and `x2`. + Dot product of `a` and `b`. For full documentation refer to :obj:`numpy.dot`. + Parameters + ---------- + a : {dpnp_array, usm_ndarray, scalar} + First input array. Both inputs `a` and `b` can not be scalars at the same time. + b : {dpnp_array, usm_ndarray, scalar} + Second input array. Both inputs `a` and `b` can not be scalars at the same time. + out : {dpnp.ndarray, usm_ndarray}, optional + Alternative output array in which to place the result. It must have + the same shape and data type as the expected output and should be + C-contiguous. If these conditions are not met, an exception is + raised, instead of attempting to be flexible. + Returns ------- - y : dpnp.ndarray - Returns the dot product of `x1` and `x2`. + out : dpnp.ndarray + Returns the dot product of `a` and `b`. If `out` is given, then it is returned. - Limitations - ----------- - Parameters `x1` and `x2` are supported as either scalar, :class:`dpnp.ndarray` - or :class:`dpctl.tensor.usm_ndarray`, but both `x1` and `x2` can not be scalars at the same time. - Keyword argument ``kwargs`` is currently unsupported. - Otherwise the functions will be executed sequentially on CPU. - Input array data types are limited by supported DPNP :ref:`Data types`. - See Also -------- + :obj:`dpnp.ndarray.dot` : Equivalent method. :obj:`dpnp.tensordot` : Sum products over arbitrary axes. :obj:`dpnp.vdot` : Complex-conjugating dot product. + :obj:`dpnp.einsum` : Einstein summation convention. + :obj:`dpnp.matmul` : Matrix product of two arrays. + :obj:`dpnp.linalg.multi_dot` : Chained dot product. Examples -------- - >>> import dpnp as dp - >>> a = dp.array([1, 2, 3]) - >>> b = dp.array([1, 2, 3]) - >>> dp.dot(a, b) - 14 + >>> import dpnp as np + >>> a = np.array([1, 2, 3]) + >>> b = np.array([1, 2, 3]) + >>> np.dot(a, b) + array(14) + + Neither argument is complex-conjugated: + + >>> np.dot(np.array([2j, 3j]), np.array([2j, 3j])) + array(-13+0j) + + For 2-D arrays it is the matrix product: + + >>> a = np.array([[1, 0], [0, 1]]) + >>> b = np.array([[4, 1], [2, 2]]) + >>> np.dot(a, b) + array([[4, 1], + [2, 2]]) + + >>> a = np.arange(3*4*5*6).reshape((3,4,5,6)) + >>> b = np.arange(3*4*5*6)[::-1].reshape((5,4,6,3)) + >>> np.dot(a, b)[2,3,2,1,2,2] + array(499128) + >>> sum(a[2,3,2,:] * b[1,2,:,2]) + array(499128) """ - if kwargs: - pass - elif dpnp.isscalar(x1) and dpnp.isscalar(x2): - # at least either x1 or x2 has to be an array - pass + dpnp.check_supported_arrays_type(a, scalar_type=True) + dpnp.check_supported_arrays_type(b, scalar_type=True) + + if out is not None: + dpnp.check_supported_arrays_type(out) + if not out.flags.c_contiguous: + raise ValueError("Only C-contiguous array is acceptable.") + + if dpnp.isscalar(a) or dpnp.isscalar(b): + # TODO: investigate usage of axpy (axpy_batch) or scal + # functions from BLAS here instead of dpnp.multiply + return dpnp.multiply(a, b, out=out) + elif a.ndim == 0 or b.ndim == 0: + # TODO: investigate usage of axpy (axpy_batch) or scal + # functions from BLAS here instead of dpnp.multiply + return dpnp.multiply(a, b, out=out) + elif a.ndim == 1 and b.ndim == 1: + return dpnp_dot(a, b, out=out) + elif a.ndim == 2 and b.ndim == 2: + # NumPy does not allow casting even if it is safe + return dpnp.matmul(a, b, out=out, casting="no") + elif a.ndim == 1 or b.ndim == 1: + # NumPy does not allow casting even if it is safe + return dpnp.matmul(a, b, out=out, casting="no") else: - # get USM type and queue to copy scalar from the host memory into a USM allocation - usm_type, queue = ( - get_usm_allocations([x1, x2]) - if dpnp.isscalar(x1) or dpnp.isscalar(x2) - else (None, None) - ) - - x1_desc = dpnp.get_dpnp_descriptor( - x1, - copy_when_strides=False, - copy_when_nondefault_queue=False, - alloc_usm_type=usm_type, - alloc_queue=queue, - ) - x2_desc = dpnp.get_dpnp_descriptor( - x2, - copy_when_strides=False, - copy_when_nondefault_queue=False, - alloc_usm_type=usm_type, - alloc_queue=queue, - ) - if x1_desc and x2_desc: - if out is not None: - if not isinstance(out, (dpnp.ndarray, dpt.usm_ndarray)): - raise TypeError( - "return array must be of supported array type" - ) - out_desc = ( - dpnp.get_dpnp_descriptor( - out, - copy_when_strides=False, - copy_when_nondefault_queue=False, - ) - or None - ) - else: - out_desc = None - return dpnp_dot(x1_desc, x2_desc, out=out_desc).get_pyobj() - - return call_origin(numpy.dot, x1, x2, out=out, **kwargs) + # TODO: investigate usage of matmul for some possible + # use cases instead of dpnp.tensordot + result = dpnp.tensordot(a, b, axes=(-1, -2)) + # NumPy does not allow casting even if it is safe + return dpnp.get_result_array(result, out, casting="no") def einsum(*args, **kwargs): diff --git a/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py b/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py index d0add55eee3..65d97befa98 100644 --- a/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py +++ b/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py @@ -24,69 +24,46 @@ # ***************************************************************************** import dpctl +import dpctl.tensor as dpt import dpctl.tensor._tensor_impl as ti import numpy import dpnp import dpnp.backend.extensions.blas._blas_impl as bi +from dpnp.dpnp_array import dpnp_array from dpnp.dpnp_utils import get_usm_allocations -__all__ = ["dpnp_matmul"] +__all__ = ["dpnp_dot", "dpnp_matmul"] -def _gemm_res_dtype(*arrays, dtype, casting, sycl_queue): +def _copy_array(x, dep_events, host_events, contig_copy=False, dtype=None): """ - Determines the output array data type and the intermediate data type. - - If dtype is ``None``, the output array data type is determined based on - the Promotion Type Rule and device capabilities. Otherwise, `dtype` is - used as output array dtype if input arrays can cast to it according to - the casting rule determined. If casting cannot be done, a ``TypeError`` - is raised. - The intermediate data type is the data type used for performing matmul - operation calculations. If output array dtype is a floating-point data type, - it is also used for the intermediate data type. If output array dtype is an - integral data type, the default floating point data type of the device where - input arrays are allocated on are used for intermediate data type. + Creating a copy of input array if needed. - Parameters - ---------- - arrays : {dpnp.ndarray, usm_ndarray} - Input arrays. - dtype : dtype - If not ``None``, data type of the output array. - casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional - Controls what kind of data casting may occur. - sycl_queue : {SyclQueue} - A SYCL queue to use for determining default floating point datat type. - - Returns - ------- - gemm_dtype, res_dtype : - `gemm_dtype` is the data type used in performing matmul calculations. - The input arrays of matmul function are cast to `gemm_dtype` and then - the calculations are performed. - `res_dtype` is the output data type. When the result is obtained, it is cast - to `res_dtype`. + If `contig_copy` is ``True``, a C-contiguous copy of input array is returned. + In this case, the copy array has the input array data type unless `dtype` is + determined. + If `contig_copy` is ``False`` and input array data type is different than `dtype`, + a C-contiguous copy of input array with specified `dtype` is returned. """ - res_dtype = dpnp.result_type(*arrays) - default_dtype = dpnp.default_float_type(sycl_queue=sycl_queue) - - if dtype is not None: - if dpnp.can_cast(res_dtype, dtype, casting=casting): - res_dtype = dtype - else: - raise TypeError( - f"Cannot cast ufunc 'matmul' output from dtype({res_dtype}) to dtype({dtype}) with casting rule {casting}" - ) - - gemm_dtype = ( - res_dtype if dpnp.issubdtype(res_dtype, dpnp.inexact) else default_dtype - ) + if contig_copy: + copy = contig_copy + else: + copy = x.dtype != dtype if dtype is not None else False - return gemm_dtype, res_dtype + if copy: + x_copy = dpnp.empty_like(x, dtype=dtype, order="C") + ht_copy_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( + src=dpnp.get_usm_ndarray(x), + dst=x_copy.get_array(), + sycl_queue=x.sycl_queue, + ) + dep_events.append(copy_ev) + host_events.append(ht_copy_ev) + return x_copy + return x def _gemm_batch_matmul(exec_q, x1, x2, res, x1_is_2D, x2_is_2D, dev_tasks_list): @@ -95,8 +72,10 @@ def _gemm_batch_matmul(exec_q, x1, x2, res, x1_is_2D, x2_is_2D, dev_tasks_list): # when the input array is F-contiguous, the data of 2D array # that needs to be called in mkl::gemm_batch are not contiguous. ht_tasks_list = [] - x1 = _get_gemm_contig_array(x1, dev_tasks_list, ht_tasks_list) - x2 = _get_gemm_contig_array(x2, dev_tasks_list, ht_tasks_list) + contig_copy = not x1.flags.c_contiguous + x1 = _copy_array(x1, dev_tasks_list, ht_tasks_list, contig_copy=contig_copy) + contig_copy = not x2.flags.c_contiguous + x2 = _copy_array(x2, dev_tasks_list, ht_tasks_list, contig_copy=contig_copy) x1_strides = x1.strides x2_strides = x2.strides @@ -149,41 +128,133 @@ def _gemm_batch_matmul(exec_q, x1, x2, res, x1_is_2D, x2_is_2D, dev_tasks_list): return ht_blas_ev, ht_tasks_list, res -def _get_gemm_contig_array(x, dep_events, host_events, dtype=None): +def _op_res_dtype(*arrays, dtype, casting, sycl_queue): + """ + _op_res_dtype(*arrays, dtype, casting, sycl_queue) + + Determines the output array data type and an intermediate data type + used in performing calculations related to a specific math function. + If dtype is ``None``, the output array data type of the operation is + determined based on the Promotion Type Rule and device capabilities. + Otherwise, `dtype` is used as output array dtype, if input arrays + can cast to it according to the casting rule determined. If casting + cannot be done, a ``TypeError`` is raised. + The intermediate data type is the data type used for performing the math + function calculations. If output array dtype is a floating-point data type, + it is also used for the intermediate data type. If output array dtype is an + integral data type, the default floating point data type of the device where + input arrays are allocated on are used for intermediate data type. + + Parameters + ---------- + arrays : {dpnp.ndarray, usm_ndarray} + Input arrays. + dtype : dtype + If not ``None``, data type of the output array. + casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional + Controls what kind of data casting may occur. + sycl_queue : {SyclQueue} + A SYCL queue to use for determining default floating point datat type. + + Returns + ------- + op_dtype, res_dtype : + `op_dtype` is the data type used in performing math function calculations. + The input arrays of the math function are cast to `op_dtype` and then + the calculations are performed. + `res_dtype` is the output data type. When the result is obtained, it is cast + to `res_dtype`. + """ - Creating a copy of input array if needed. - This function has two use cases. In the first use case, which is more general, - if the input array is not c-contiguous or f-contiguous, we ensure it becomes - c-contiguous. Additionally, if the input array has an integral dtype, we - convert it to an appropriate floating-point data type specified by `dtype`. - In the second use case, which is for N-dimensional arrays with N>2, we need - to ensure c-contiguity. This is crucial because the implementation of the - `gemm_batch` function in dpnp only works for C-contiguous arrays. This use case - is essential when the input array is f-contiguous with floating point dtype for - which the array is not modified in the first use case. + res_dtype = dpnp.result_type(*arrays) + default_dtype = dpnp.default_float_type(sycl_queue=sycl_queue) + if dtype is not None: + if dpnp.can_cast(res_dtype, dtype, casting=casting): + res_dtype = dtype + else: + raise TypeError( + f"Cannot cast ufunc 'matmul' output from dtype({res_dtype}) to dtype({dtype}) with casting rule {casting}" + ) + + op_dtype = ( + res_dtype if dpnp.issubdtype(res_dtype, dpnp.inexact) else default_dtype + ) + + return op_dtype, res_dtype + + +def dpnp_dot(a, b, /, out=None): """ + Return the dot product of two arrays. - if dtype is None: - copy = not x.flags.c_contiguous - else: - copy = ( - not (x.flags.c_contiguous or x.flags.f_contiguous) - or x.dtype != dtype - ) + The routine that is used to perform the main calculation + depends on input array data types: 1) For integer and boolean data types, + `dpctl.tensor.vecdot` form the Data Parallel Control library is used, + 2) For floating point real-valued data types, `dot` routines from + BLAS library of OneMKL is used, and 3) For complex data types, + `dotu` routines from BLAS library of OneMKL is used. - if copy: - x_copy = dpnp.empty_like(x, dtype=dtype, order="C") - ht_copy_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( - src=dpnp.get_usm_ndarray(x), - dst=x_copy.get_array(), - sycl_queue=x.sycl_queue, + """ + + if a.size != b.size: + raise ValueError( + "Input arrays have a mismatch in their size. " + f"(size {a.size} is different from {b.size})" ) - dep_events.append(copy_ev) - host_events.append(ht_copy_ev) - return x_copy - return x + + res_usm_type, exec_q = get_usm_allocations([a, b]) + + # Determine the appropriate data types + # casting is irrelevant here since dtype is `None` + dot_dtype, res_dtype = _op_res_dtype( + a, b, dtype=None, casting="no", sycl_queue=exec_q + ) + + # create result array + result = dpnp.empty( + (), + dtype=dot_dtype, + usm_type=res_usm_type, + sycl_queue=exec_q, + ) + + # input arrays should have the proper data type + dep_events_list = [] + host_tasks_list = [] + if dpnp.issubdtype(res_dtype, dpnp.inexact): + # copying is needed if dtypes of input arrays are different + a = _copy_array(a, dep_events_list, host_tasks_list, dtype=dot_dtype) + b = _copy_array(b, dep_events_list, host_tasks_list, dtype=dot_dtype) + if dpnp.issubdtype(res_dtype, dpnp.complexfloating): + ht_ev, _ = bi._dotu( + exec_q, + dpnp.get_usm_ndarray(a), + dpnp.get_usm_ndarray(b), + dpnp.get_usm_ndarray(result), + dep_events_list, + ) + else: + ht_ev, _ = bi._dot( + exec_q, + dpnp.get_usm_ndarray(a), + dpnp.get_usm_ndarray(b), + dpnp.get_usm_ndarray(result), + dep_events_list, + ) + host_tasks_list.append(ht_ev) + dpctl.SyclEvent.wait_for(host_tasks_list) + else: + dpt_a = dpnp.get_usm_ndarray(a) + dpt_b = dpnp.get_usm_ndarray(b) + result = dpnp_array._create_from_usm_ndarray(dpt.vecdot(dpt_a, dpt_b)) + + if dot_dtype != res_dtype: + result = result.astype(res_dtype, copy=False) + + # NumPy does not allow casting even if it is safe + return dpnp.get_result_array(result, out, casting="no") def dpnp_matmul( @@ -197,8 +268,6 @@ def dpnp_matmul( dtype=None, ): """ - dpnp_matmul(x1, x2, out=None, casting="same_kind", order="K", dtype=None) - Return the matrix product of two arrays. The main calculation is done by calling an extension function @@ -222,14 +291,16 @@ def dpnp_matmul( res_usm_type, exec_q = get_usm_allocations([x1, x2]) - squeeze_flag = x1_ndim == 1 or x2_ndim == 1 + appended_axes = [] if x1_ndim == 1: x1 = x1[dpnp.newaxis, :] x1_ndim = x1.ndim + appended_axes.append(-2) if x2_ndim == 1: x2 = x2[:, dpnp.newaxis] x2_ndim = x2.ndim + appended_axes.append(-1) x1_shape = x1.shape x2_shape = x2.shape @@ -241,7 +312,7 @@ def dpnp_matmul( ) # Determine the appropriate data types - gemm_dtype, res_dtype = _gemm_res_dtype( + gemm_dtype, res_dtype = _op_res_dtype( x1, x2, dtype=dtype, casting=casting, sycl_queue=exec_q ) @@ -306,13 +377,28 @@ def dpnp_matmul( # and be C_CONTIGUOUS or F_CONTIGUOUS dep_events_list = [] host_tasks_list = [] - x1 = _get_gemm_contig_array( - x1, dep_events_list, host_tasks_list, gemm_dtype + contig_copy = not (x1.flags.c_contiguous or x1.flags.f_contiguous) + x1 = _copy_array( + x1, + dep_events_list, + host_tasks_list, + contig_copy=contig_copy, + dtype=gemm_dtype, ) - x2 = _get_gemm_contig_array( - x2, dep_events_list, host_tasks_list, gemm_dtype + contig_copy = not (x2.flags.c_contiguous or x2.flags.f_contiguous) + x2 = _copy_array( + x2, + dep_events_list, + host_tasks_list, + contig_copy=contig_copy, + dtype=gemm_dtype, ) + # TODO: investigate usage of gemv (gemv_batch) function + # from BLAS when one of the inputs is a vector to + # gain performance. + # TODO: investigate usage of syrk function from BLAS in + # case of a.T @ a and a @ a.T to gain performance. if x1_is_2D and x2_is_2D: ht_blas_ev, _ = bi._gemm( exec_q, @@ -340,8 +426,8 @@ def dpnp_matmul( host_tasks_list.append(ht_blas_ev) dpctl.SyclEvent.wait_for(host_tasks_list) - if squeeze_flag: - result = dpnp.squeeze(result) + if appended_axes: + result = dpnp.squeeze(result, tuple(appended_axes)) if x1_is_2D and x2_is_2D: # add new axes only if one of the input arrays diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl index 018255c1e40..f91a4f23289 100644 --- a/tests/skipped_tests.tbl +++ b/tests/skipped_tests.tbl @@ -331,13 +331,12 @@ tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumUnaryOperationWith tests/third_party/cupy/linalg_tests/test_einsum.py::TestListArgEinSumError::test_dim_mismatch3 tests/third_party/cupy/linalg_tests/test_einsum.py::TestListArgEinSumError::test_invalid_sub1 tests/third_party/cupy/linalg_tests/test_einsum.py::TestListArgEinSumError::test_too_many_dims3 + tests/third_party/cupy/linalg_tests/test_product.py::TestMatrixPower::test_matrix_power_invlarge tests/third_party/cupy/linalg_tests/test_product.py::TestMatrixPower::test_matrix_power_large tests/third_party/cupy/linalg_tests/test_product.py::TestMatrixPower::test_matrix_power_of_two -tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_dot_vec2 tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_multidim_vdot tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_tensordot_zero_dim -tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_dot_with_out_f_contiguous tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_multidim_vdot tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_tensordot tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_tensordot_with_int_axes diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl index fe3671ecf7f..c3464096085 100644 --- a/tests/skipped_tests_gpu.tbl +++ b/tests/skipped_tests_gpu.tbl @@ -151,8 +151,6 @@ tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumError::test_too_ma tests/third_party/cupy/linalg_tests/test_einsum.py::TestListArgEinSumError::test_dim_mismatch3 tests/third_party/cupy/linalg_tests/test_einsum.py::TestListArgEinSumError::test_too_many_dims3 -tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_reversed_vdot - tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsMultivariateNormal_param_0_{d=2, shape=(4, 3, 2)}::test_normal tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsMultivariateNormal_param_1_{d=2, shape=(3, 2)}::test_normal tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsMultivariateNormal_param_2_{d=4, shape=(4, 3, 2)}::test_normal @@ -435,17 +433,17 @@ tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_9_{opt tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumUnaryOperationWithScalar::test_scalar_float tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumUnaryOperationWithScalar::test_scalar_int tests/third_party/cupy/linalg_tests/test_einsum.py::TestListArgEinSumError::test_invalid_sub1 + tests/third_party/cupy/linalg_tests/test_product.py::TestMatrixPower::test_matrix_power_invlarge tests/third_party/cupy/linalg_tests/test_product.py::TestMatrixPower::test_matrix_power_large tests/third_party/cupy/linalg_tests/test_product.py::TestMatrixPower::test_matrix_power_of_two -tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_dot_vec2 tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_multidim_vdot tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_tensordot tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_tensordot_with_int_axes tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_tensordot_with_list_axes tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_tensordot_zero_dim -tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_dot_with_out_f_contiguous tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_multidim_vdot +tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_reversed_vdot tests/third_party/cupy/logic_tests/test_comparison.py::TestArrayEqual::test_array_equal_broadcast_not_allowed tests/third_party/cupy/logic_tests/test_comparison.py::TestArrayEqual::test_array_equal_diff_dtypes_is_equal diff --git a/tests/skipped_tests_gpu_no_fp64.tbl b/tests/skipped_tests_gpu_no_fp64.tbl index 26e11a70062..d724a6043e5 100644 --- a/tests/skipped_tests_gpu_no_fp64.tbl +++ b/tests/skipped_tests_gpu_no_fp64.tbl @@ -30,91 +30,6 @@ tests/test_umath.py::test_umaths[('floor_divide', 'ff')] tests/third_party/cupy/linalg_tests/test_eigenvalue.py::TestEigenvalue_param_0_{UPLO='U'}::test_eigh_batched tests/third_party/cupy/linalg_tests/test_eigenvalue.py::TestEigenvalue_param_1_{UPLO='L'}::test_eigh_batched -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_0_{shape=((2, 3, 4), (3, 4, 2)), trans_a=True, trans_b=True}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_1_{shape=((2, 3, 4), (3, 4, 2)), trans_a=True, trans_b=False}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_2_{shape=((2, 3, 4), (3, 4, 2)), trans_a=False, trans_b=True}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_3_{shape=((2, 3, 4), (3, 4, 2)), trans_a=False, trans_b=False}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_4_{shape=((1, 1), (1, 1)), trans_a=True, trans_b=True}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_5_{shape=((1, 1), (1, 1)), trans_a=True, trans_b=False}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_6_{shape=((1, 1), (1, 1)), trans_a=False, trans_b=True}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_7_{shape=((1, 1), (1, 1)), trans_a=False, trans_b=False}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_8_{shape=((1, 1), (1, 2)), trans_a=True, trans_b=True}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_9_{shape=((1, 1), (1, 2)), trans_a=True, trans_b=False}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_10_{shape=((1, 1), (1, 2)), trans_a=False, trans_b=True}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_11_{shape=((1, 1), (1, 2)), trans_a=False, trans_b=False}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_12_{shape=((1, 2), (2, 1)), trans_a=True, trans_b=True}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_13_{shape=((1, 2), (2, 1)), trans_a=True, trans_b=False}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_14_{shape=((1, 2), (2, 1)), trans_a=False, trans_b=True}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_15_{shape=((1, 2), (2, 1)), trans_a=False, trans_b=False}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_16_{shape=((2, 1), (1, 1)), trans_a=True, trans_b=True}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_17_{shape=((2, 1), (1, 1)), trans_a=True, trans_b=False}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_18_{shape=((2, 1), (1, 1)), trans_a=False, trans_b=True}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_19_{shape=((2, 1), (1, 1)), trans_a=False, trans_b=False}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_20_{shape=((1, 2), (2, 3)), trans_a=True, trans_b=True}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_21_{shape=((1, 2), (2, 3)), trans_a=True, trans_b=False}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_22_{shape=((1, 2), (2, 3)), trans_a=False, trans_b=True}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_23_{shape=((1, 2), (2, 3)), trans_a=False, trans_b=False}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_24_{shape=((2, 1), (1, 3)), trans_a=True, trans_b=True}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_25_{shape=((2, 1), (1, 3)), trans_a=True, trans_b=False}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_26_{shape=((2, 1), (1, 3)), trans_a=False, trans_b=True}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_27_{shape=((2, 1), (1, 3)), trans_a=False, trans_b=False}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_28_{shape=((2, 3), (3, 1)), trans_a=True, trans_b=True}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_29_{shape=((2, 3), (3, 1)), trans_a=True, trans_b=False}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_30_{shape=((2, 3), (3, 1)), trans_a=False, trans_b=True}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_31_{shape=((2, 3), (3, 1)), trans_a=False, trans_b=False}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_32_{shape=((2, 3), (3, 4)), trans_a=True, trans_b=True}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_33_{shape=((2, 3), (3, 4)), trans_a=True, trans_b=False}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_34_{shape=((2, 3), (3, 4)), trans_a=False, trans_b=True}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_35_{shape=((2, 3), (3, 4)), trans_a=False, trans_b=False}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_36_{shape=((0, 3), (3, 4)), trans_a=True, trans_b=True}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_37_{shape=((0, 3), (3, 4)), trans_a=True, trans_b=False}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_38_{shape=((0, 3), (3, 4)), trans_a=False, trans_b=True}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_39_{shape=((0, 3), (3, 4)), trans_a=False, trans_b=False}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_40_{shape=((2, 3), (3, 0)), trans_a=True, trans_b=True}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_41_{shape=((2, 3), (3, 0)), trans_a=True, trans_b=False}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_42_{shape=((2, 3), (3, 0)), trans_a=False, trans_b=True}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_43_{shape=((2, 3), (3, 0)), trans_a=False, trans_b=False}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_44_{shape=((0, 3), (3, 0)), trans_a=True, trans_b=True}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_45_{shape=((0, 3), (3, 0)), trans_a=True, trans_b=False}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_46_{shape=((0, 3), (3, 0)), trans_a=False, trans_b=True}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_47_{shape=((0, 3), (3, 0)), trans_a=False, trans_b=False}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_48_{shape=((3, 0), (0, 4)), trans_a=True, trans_b=True}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_49_{shape=((3, 0), (0, 4)), trans_a=True, trans_b=False}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_50_{shape=((3, 0), (0, 4)), trans_a=False, trans_b=True}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_51_{shape=((3, 0), (0, 4)), trans_a=False, trans_b=False}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_52_{shape=((2, 3, 0), (3, 0, 2)), trans_a=True, trans_b=True}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_53_{shape=((2, 3, 0), (3, 0, 2)), trans_a=True, trans_b=False}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_54_{shape=((2, 3, 0), (3, 0, 2)), trans_a=False, trans_b=True}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_55_{shape=((2, 3, 0), (3, 0, 2)), trans_a=False, trans_b=False}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_56_{shape=((0, 0), (0, 0)), trans_a=True, trans_b=True}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_57_{shape=((0, 0), (0, 0)), trans_a=True, trans_b=False}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_58_{shape=((0, 0), (0, 0)), trans_a=False, trans_b=True}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_59_{shape=((0, 0), (0, 0)), trans_a=False, trans_b=False}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_60_{shape=((3,), (3,)), trans_a=True, trans_b=True}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_61_{shape=((3,), (3,)), trans_a=True, trans_b=False}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_62_{shape=((3,), (3,)), trans_a=False, trans_b=True}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_63_{shape=((3,), (3,)), trans_a=False, trans_b=False}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_64_{shape=((2,), (2, 4)), trans_a=True, trans_b=True}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_65_{shape=((2,), (2, 4)), trans_a=True, trans_b=False}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_66_{shape=((2,), (2, 4)), trans_a=False, trans_b=True}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_67_{shape=((2,), (2, 4)), trans_a=False, trans_b=False}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_68_{shape=((4, 2), (2,)), trans_a=True, trans_b=True}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_69_{shape=((4, 2), (2,)), trans_a=True, trans_b=False}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_70_{shape=((4, 2), (2,)), trans_a=False, trans_b=True}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_71_{shape=((4, 2), (2,)), trans_a=False, trans_b=False}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDotFor0Dim_param_0_{shape=((), ()), trans_a=True, trans_b=True}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDotFor0Dim_param_1_{shape=((), ()), trans_a=True, trans_b=False}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDotFor0Dim_param_2_{shape=((), ()), trans_a=False, trans_b=True}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDotFor0Dim_param_3_{shape=((), ()), trans_a=False, trans_b=False}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDotFor0Dim_param_4_{shape=((), (2, 4)), trans_a=True, trans_b=True}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDotFor0Dim_param_5_{shape=((), (2, 4)), trans_a=True, trans_b=False}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDotFor0Dim_param_6_{shape=((), (2, 4)), trans_a=False, trans_b=True}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDotFor0Dim_param_7_{shape=((), (2, 4)), trans_a=False, trans_b=False}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDotFor0Dim_param_8_{shape=((4, 2), ()), trans_a=True, trans_b=True}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDotFor0Dim_param_9_{shape=((4, 2), ()), trans_a=True, trans_b=False}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDotFor0Dim_param_10_{shape=((4, 2), ()), trans_a=False, trans_b=True}::test_dot -tests/third_party/cupy/linalg_tests/test_product.py::TestDotFor0Dim_param_11_{shape=((4, 2), ()), trans_a=False, trans_b=False}::test_dot - tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsBeta_param_6_{a_shape=(3, 2), b_shape=(3, 2), shape=(4, 3, 2)}::test_beta tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsBeta_param_7_{a_shape=(3, 2), b_shape=(3, 2), shape=(3, 2)}::test_beta tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsChisquare_param_0_{df_shape=(), shape=(4, 3, 2)}::test_chisquare diff --git a/tests/test_dot.py b/tests/test_dot.py index 80da5090e1b..55884b00cd3 100644 --- a/tests/test_dot.py +++ b/tests/test_dot.py @@ -1,52 +1,373 @@ +import dpctl import numpy import pytest from numpy.testing import assert_allclose, assert_array_equal -import dpnp as inp +import dpnp -from .helper import get_all_dtypes +from .helper import assert_dtype_allclose, get_all_dtypes, get_complex_dtypes -@pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True)) -def test_dot_ones(type): - n = 10**5 - a = numpy.ones(n, dtype=type) - b = numpy.ones(n, dtype=type) - ia = inp.array(a) - ib = inp.array(b) - - result = inp.dot(ia, ib) - expected = numpy.dot(a, b) - assert_array_equal(expected, result) +class Testdot: + @pytest.mark.parametrize("dtype", get_all_dtypes()) + def test_dot_ones(self, dtype): + n = 10**5 + a = numpy.ones(n, dtype=dtype) + b = numpy.ones(n, dtype=dtype) + ia = dpnp.array(a) + ib = dpnp.array(b) + + result = dpnp.dot(ia, ib) + expected = numpy.dot(a, b) + assert_dtype_allclose(result, expected) + + @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True)) + def test_dot_arange(self, dtype): + n = 10**2 + m = 10**3 if dtype is not dpnp.float32 else 10**2 + a = numpy.hstack((numpy.arange(n, dtype=dtype),) * m) + b = numpy.flipud(a) + ia = dpnp.array(a) + ib = dpnp.array(b) + + result = dpnp.dot(ia, ib) + expected = numpy.dot(a, b) + assert_dtype_allclose(result, expected) + + @pytest.mark.parametrize("dtype", get_all_dtypes()) + def test_dot_scalar(self, dtype): + a = 2 + b = numpy.array(numpy.random.uniform(-5, 5, 10), dtype=dtype) + ib = dpnp.array(b) + + result = dpnp.dot(a, ib) + expected = numpy.dot(a, b) + assert_allclose(result, expected) + + # TODO: get rid of falls back on NumPy when tensordot + # is implemented using OneMKL + @pytest.mark.usefixtures("allow_fall_back_on_numpy") + @pytest.mark.parametrize("dtype", get_all_dtypes(no_complex=True)) + @pytest.mark.parametrize( + "array_info", + [ + (1, 10, (), (10,)), + (10, 1, (10,), ()), + (1, 1, (), ()), + (10, 10, (10,), (10,)), + (12, 6, (4, 3), (3, 2)), + (12, 3, (4, 3), (3,)), + (60, 3, (5, 4, 3), (3,)), + (4, 8, (4,), (4, 2)), + (60, 48, (5, 3, 4), (6, 4, 2)), + ], + ids=[ + "0d_1d", + "1d_0d", + "0d_0d", + "1d_1d", + "2d_2d", + "2d_1d", + "3d_1d", + "1d_2d", + "3d_3d", + ], + ) + def test_dot(self, dtype, array_info): + size1, size2, shape1, shape2 = array_info + a = numpy.array( + numpy.random.uniform(-5, 5, size1), dtype=dtype + ).reshape(shape1) + b = numpy.array( + numpy.random.uniform(-5, 5, size2), dtype=dtype + ).reshape(shape2) + ia = dpnp.array(a) + ib = dpnp.array(b) + + result = dpnp.dot(ia, ib) + expected = numpy.dot(a, b) + assert_dtype_allclose(result, expected) + + # TODO: get rid of falls back on NumPy when tensordot + # is implemented using OneMKL + @pytest.mark.usefixtures("allow_fall_back_on_numpy") + @pytest.mark.parametrize("dtype", get_complex_dtypes()) + @pytest.mark.parametrize( + "array_info", + [ + (1, 10, (), (10,)), + (10, 1, (10,), ()), + (1, 1, (), ()), + (10, 10, (10,), (10,)), + (12, 6, (4, 3), (3, 2)), + (12, 3, (4, 3), (3,)), + (60, 3, (5, 4, 3), (3,)), + (4, 8, (4,), (4, 2)), + (60, 48, (5, 3, 4), (6, 4, 2)), + ], + ids=[ + "0d_1d", + "1d_0d", + "0d_0d", + "1d_1d", + "2d_2d", + "2d_1d", + "3d_1d", + "1d_2d", + "3d_3d", + ], + ) + def test_dot_complex(self, dtype, array_info): + size1, size2, shape1, shape2 = array_info + x11 = numpy.random.uniform(-5, 5, size1) + x12 = numpy.random.uniform(-5, 5, size1) + x21 = numpy.random.uniform(-5, 5, size2) + x22 = numpy.random.uniform(-5, 5, size2) + a = numpy.array(x11 + 1j * x12, dtype=dtype).reshape(shape1) + b = numpy.array(x21 + 1j * x22, dtype=dtype).reshape(shape2) + ia = dpnp.array(a) + ib = dpnp.array(b) + + result = dpnp.dot(ia, ib) + expected = numpy.dot(a, b) + assert_dtype_allclose(result, expected) + + # TODO: get rid of falls back on NumPy when tensordot + # is implemented using OneMKL + @pytest.mark.usefixtures("allow_fall_back_on_numpy") + @pytest.mark.parametrize("dtype", get_all_dtypes()) + @pytest.mark.parametrize( + "array_info", + [ + (1, 10, (), (10,)), + (10, 1, (10,), ()), + (1, 1, (), ()), + (10, 10, (10,), (10,)), + (12, 6, (4, 3), (3, 2)), + (12, 3, (4, 3), (3,)), + (60, 3, (5, 4, 3), (3,)), + (4, 8, (4,), (4, 2)), + (60, 48, (5, 3, 4), (6, 4, 2)), + ], + ids=[ + "0d_1d", + "1d_0d", + "0d_0d", + "1d_1d", + "2d_2d", + "2d_1d", + "3d_1d", + "1d_2d", + "3d_3d", + ], + ) + def test_dot_ndarray(self, dtype, array_info): + size1, size2, shape1, shape2 = array_info + a = numpy.array( + numpy.random.uniform(-5, 5, size1), dtype=dtype + ).reshape(shape1) + b = numpy.array( + numpy.random.uniform(-5, 5, size2), dtype=dtype + ).reshape(shape2) + ia = dpnp.array(a) + ib = dpnp.array(b) + + result = ia.dot(ib) + expected = a.dot(b) + assert_dtype_allclose(result, expected) + + @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True)) + def test_dot_strided(self, dtype): + a = numpy.arange(25, dtype=dtype) + b = numpy.arange(25, dtype=dtype) + ia = dpnp.array(a) + ib = dpnp.array(b) + + result = dpnp.dot(ia[::3], ib[::3]) + expected = numpy.dot(a[::3], b[::3]) + assert_dtype_allclose(result, expected) + + result = dpnp.dot(ia, ib[::-1]) + expected = numpy.dot(a, b[::-1]) + assert_dtype_allclose(result, expected) + + result = dpnp.dot(ia[::-2], ib[::-2]) + expected = numpy.dot(a[::-2], b[::-2]) + assert_dtype_allclose(result, expected) + + result = dpnp.dot(ia[::-5], ib[::-5]) + expected = numpy.dot(a[::-5], b[::-5]) + assert_dtype_allclose(result, expected) + + @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True)) + def test_dot_out_scalar(self, dtype): + size = 10 + a = 2 + b = numpy.array(numpy.random.uniform(-5, 5, size), dtype=dtype) + ia = 2 + ib = dpnp.array(b) + + dp_out = dpnp.empty((size,), dtype=dtype) + result = dpnp.dot(ia, ib, out=dp_out) + expected = numpy.dot(a, b) + + assert result is dp_out + assert_allclose(result, expected) + + # TODO: get rid of falls back on NumPy when tensordot + # is implemented using OneMKL + @pytest.mark.usefixtures("allow_fall_back_on_numpy") + @pytest.mark.parametrize("dtype", get_all_dtypes()) + @pytest.mark.parametrize( + "array_info", + [ + (1, 10, (), (10,), (10,)), + (10, 1, (10,), (), (10,)), + (1, 1, (), (), ()), + (10, 10, (10,), (10,), ()), + (12, 6, (4, 3), (3, 2), (4, 2)), + (12, 3, (4, 3), (3,), (4,)), + (60, 3, (5, 4, 3), (3,), (5, 4)), + (4, 8, (4,), (4, 2), (2,)), + (60, 48, (5, 3, 4), (6, 4, 2), (5, 3, 6, 2)), + ], + ids=[ + "0d_1d", + "1d_0d", + "0d_0d", + "1d_1d", + "2d_2d", + "2d_1d", + "3d_1d", + "1d_2d", + "3d_3d", + ], + ) + def test_dot_out(self, dtype, array_info): + size1, size2, shape1, shape2, out_shape = array_info + a = numpy.array( + numpy.random.uniform(-5, 5, size1), dtype=dtype + ).reshape(shape1) + b = numpy.array( + numpy.random.uniform(-5, 5, size2), dtype=dtype + ).reshape(shape2) + ia = dpnp.array(a) + ib = dpnp.array(b) + + dp_out = dpnp.empty(out_shape, dtype=dtype) + result = dpnp.dot(ia, ib, out=dp_out) + expected = numpy.dot(a, b) + + assert result is dp_out + assert_dtype_allclose(result, expected) + + @pytest.mark.parametrize("dtype1", get_all_dtypes()) + @pytest.mark.parametrize("dtype2", get_all_dtypes()) + def test_dot_input_dtype_matrix(self, dtype1, dtype2): + a = numpy.array(numpy.random.uniform(-5, 5, 10), dtype=dtype1) + b = numpy.array(numpy.random.uniform(-5, 5, 10), dtype=dtype2) + ia = dpnp.array(a) + ib = dpnp.array(b) + + result = dpnp.dot(ia, ib) + expected = numpy.dot(a, b) + assert_dtype_allclose(result, expected) + + def test_dot_1d_error(self): + a = dpnp.ones(25) + b = dpnp.ones(24) + # size of input arrays differ + with pytest.raises(ValueError): + dpnp.dot(a, b) + + def test_dot_sycl_queue_error(self): + a = dpnp.ones((5,), sycl_queue=dpctl.SyclQueue()) + b = dpnp.ones((5,), sycl_queue=dpctl.SyclQueue()) + with pytest.raises(ValueError): + dpnp.dot(a, b) + + # NumPy does not raise an error for the following test. + # it just does not update the out keyword if it as not properly defined + @pytest.mark.parametrize("ia", [1, dpnp.ones((), dtype=dpnp.int32)]) + def test_dot_out_error_scalar(self, ia): + ib = dpnp.ones(10, dtype=dpnp.int32) + + # output data type is incorrect + dp_out = dpnp.empty((10,), dtype=dpnp.int64) + # TODO: change it to ValueError, when updated + # dpctl is being used in internal CI + with pytest.raises((ValueError, TypeError)): + dpnp.dot(ia, ib, out=dp_out) + + # output shape is incorrect + dp_out = dpnp.empty((2,), dtype=dpnp.int32) + # TODO: change it to ValueError, when updated + # dpctl is being used in internal CI + with pytest.raises((ValueError, TypeError)): + dpnp.dot(ia, ib, out=dp_out) + + # TODO: get rid of falls back on NumPy when tensordot + # is implemented using OneMKL + @pytest.mark.usefixtures("allow_fall_back_on_numpy") + @pytest.mark.parametrize( + "shape_pair", + [ + ((10,), (10,), ()), + ((3, 4), (4, 2), (3, 2)), + ((3, 4), (4,), (3,)), + ((5, 4, 3), (3,), (5, 4)), + ((4,), (3, 4, 2), (3, 2)), + ((5, 3, 4), (6, 4, 2), (5, 3, 6, 2)), + ], + ids=["1d_1d", "2d_2d", "2d_1d", "3d_1d", "1d_3d", "3d_3d"], + ) + def test_dot_out_error(self, shape_pair): + shape1, shape2, shape_out = shape_pair + a = numpy.ones(shape1, dtype=numpy.int32) + b = numpy.ones(shape2, dtype=numpy.int32) + ia = dpnp.array(a) + ib = dpnp.array(b) + # output data type is incorrect + np_out = numpy.empty(shape_out, dtype=numpy.int64) + dp_out = dpnp.empty(shape_out, dtype=dpnp.int64) + with pytest.raises(TypeError): + dpnp.dot(ia, ib, out=dp_out) + with pytest.raises(ValueError): + numpy.dot(a, b, out=np_out) -@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True)) -def test_dot_arange(dtype): - n = 10**2 - m = 10**3 if dtype is not inp.float32 else 10**2 - a = numpy.hstack((numpy.arange(n, dtype=dtype),) * m) - b = numpy.flipud(a) - ia = inp.array(a) - ib = inp.array(b) + # output shape is incorrect + np_out = numpy.empty((2, 3), dtype=numpy.int32) + dp_out = dpnp.empty((2, 3), dtype=dpnp.int32) + with pytest.raises(ValueError): + dpnp.dot(ia, ib, out=dp_out) + with pytest.raises(ValueError): + numpy.dot(a, b, out=np_out) - result = inp.dot(ia, ib) - expected = numpy.dot(a, b) - assert_allclose(expected, result) + # "F" or "C" is irrelevant for 0d or 1d arrays + if not (len(shape_out) in [0, 1]): + # output should be C-contiguous + np_out = numpy.empty(shape_out, dtype=numpy.int32, order="F") + dp_out = dpnp.empty(shape_out, dtype=dpnp.int32, order="F") + with pytest.raises(ValueError): + dpnp.dot(ia, ib, out=dp_out) + with pytest.raises(ValueError): + numpy.dot(a, b, out=np_out) @pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True)) def test_multi_dot(type): n = 16 - a = inp.reshape(inp.arange(n, dtype=type), (4, 4)) - b = inp.reshape(inp.arange(n, dtype=type), (4, 4)) - c = inp.reshape(inp.arange(n, dtype=type), (4, 4)) - d = inp.reshape(inp.arange(n, dtype=type), (4, 4)) + a = dpnp.reshape(dpnp.arange(n, dtype=type), (4, 4)) + b = dpnp.reshape(dpnp.arange(n, dtype=type), (4, 4)) + c = dpnp.reshape(dpnp.arange(n, dtype=type), (4, 4)) + d = dpnp.reshape(dpnp.arange(n, dtype=type), (4, 4)) a1 = numpy.arange(n, dtype=type).reshape((4, 4)) b1 = numpy.arange(n, dtype=type).reshape((4, 4)) c1 = numpy.arange(n, dtype=type).reshape((4, 4)) d1 = numpy.arange(n, dtype=type).reshape((4, 4)) - result = inp.linalg.multi_dot([a, b, c, d]) + result = dpnp.linalg.multi_dot([a, b, c, d]) expected = numpy.linalg.multi_dot([a1, b1, c1, d1]) assert_array_equal(expected, result) diff --git a/tests/test_mathematical.py b/tests/test_mathematical.py index 1faa0620f7d..56be3db6d92 100644 --- a/tests/test_mathematical.py +++ b/tests/test_mathematical.py @@ -2517,6 +2517,7 @@ class TestMatmul: ((4,), (4,)), ((4,), (4, 2)), ((2, 4), (4,)), + ((1, 4), (4,)), # output should be 1-d not 0-d ((2, 4), (4, 3)), ((1, 2, 3), (1, 3, 5)), ((4, 2, 3), (4, 3, 5)), @@ -2672,7 +2673,7 @@ def test_matmul_dtype(self, dtype, shape_pair): "((6, 7, 4, 3), (6, 7, 3, 5))", ], ) - def test_matmul_dtype_matrix_inputs(self, dtype1, dtype2, shape_pair): + def test_matmul_dtype_matrix_inout(self, dtype1, dtype2, shape_pair): shape1, shape2 = shape_pair a1 = numpy.arange(numpy.prod(shape1), dtype=dtype1).reshape(shape1) a2 = numpy.arange(numpy.prod(shape2), dtype=dtype1).reshape(shape2) @@ -2703,7 +2704,7 @@ def test_matmul_dtype_matrix_inputs(self, dtype1, dtype2, shape_pair): "((6, 7, 4, 3), (6, 7, 3, 5))", ], ) - def test_matmul_dtype_matrix_inout(self, dtype1, dtype2, shape_pair): + def test_matmul_dtype_matrix_inputs(self, dtype1, dtype2, shape_pair): shape1, shape2 = shape_pair a1 = numpy.arange(numpy.prod(shape1), dtype=dtype1).reshape(shape1) a2 = numpy.arange(numpy.prod(shape2), dtype=dtype2).reshape(shape2) diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py index 78a869fac9d..a8b8be52009 100644 --- a/tests/test_sycl_queue.py +++ b/tests/test_sycl_queue.py @@ -534,8 +534,8 @@ def test_reduce_hypot(device): ), pytest.param( "dot", - [[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]], - [[4.0, 4.0], [4.0, 4.0], [4.0, 4.0]], + [3.0, 4.0, 5.0], + [1.0, 2.0, 3.0], ), pytest.param( "floor_divide", [1.0, 2.0, 3.0, 4.0], [2.5, 2.5, 2.5, 2.5] @@ -842,8 +842,8 @@ def test_out_1in_1out(func, data, device): ), pytest.param( "dot", - [[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]], - [[4.0, 4.0], [4.0, 4.0], [4.0, 4.0]], + [3.0, 4.0, 5.0], + [1.0, 2.0, 3.0], ), pytest.param( "floor_divide", [1.0, 2.0, 3.0, 4.0], [2.5, 2.5, 2.5, 2.5] diff --git a/tests/test_usm_type.py b/tests/test_usm_type.py index 5a29e677747..171e979facf 100644 --- a/tests/test_usm_type.py +++ b/tests/test_usm_type.py @@ -494,8 +494,8 @@ def test_1in_1out(func, data, usm_type): pytest.param("copysign", [0.0, 1.0, 2.0], [-1.0, 0.0, 1.0]), pytest.param( "dot", - [[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]], - [[4.0, 4.0], [4.0, 4.0], [4.0, 4.0]], + [3.0, 4.0, 5.0], + [1.0, 2.0, 3.0], ), pytest.param("fmax", [[0.0, 1.0, 2.0]], [[3.0, 4.0, 5.0]]), pytest.param("fmin", [[0.0, 1.0, 2.0]], [[3.0, 4.0, 5.0]]), diff --git a/tests/third_party/cupy/linalg_tests/test_eigenvalue.py b/tests/third_party/cupy/linalg_tests/test_eigenvalue.py index 99dcfb2127c..b620bd39e98 100644 --- a/tests/third_party/cupy/linalg_tests/test_eigenvalue.py +++ b/tests/third_party/cupy/linalg_tests/test_eigenvalue.py @@ -15,12 +15,6 @@ def _get_hermitian(xp, a, UPLO): return xp.tril(a) + xp.tril(a, k=-1).swapaxes(-2, -1).conj() -# TODO: -# remove once dpnp.dot and dpnp.matmul support complex types -def _wrap_as_numpy_array(xp, a): - return a.asnumpy() if xp is cupy else a - - @testing.parameterize( *testing.product( { @@ -57,20 +51,12 @@ def test_eigh(self, xp, dtype): else: tol = 1e-5 - # TODO: remove _wrap_as_numpy_array() once @ support complex types - testing.assert_allclose( - _wrap_as_numpy_array(xp, A) @ _wrap_as_numpy_array(xp, v), - _wrap_as_numpy_array(xp, v) - @ numpy.diag(_wrap_as_numpy_array(xp, w)), - atol=tol, - rtol=tol, - ) + testing.assert_allclose(A @ v, v @ xp.diag(w), atol=tol, rtol=tol) # Check if v @ vt is an identity matrix testing.assert_allclose( - _wrap_as_numpy_array(xp, v) - @ _wrap_as_numpy_array(xp, v).swapaxes(-2, -1).conj(), - numpy.identity(_wrap_as_numpy_array(xp, A).shape[-1], _dtype), + v @ v.swapaxes(-2, -1).conj(), + xp.identity(A.shape[-1], _dtype), atol=tol, rtol=tol, ) @@ -121,11 +107,6 @@ def test_eigh_complex_batched(self, xp, dtype): # them through the eigen equation A*v=w*v. A = _get_hermitian(xp, a, self.UPLO) - # TODO: remove _wrap_as_numpy_array() once dpnp.dot() support complex types - A = _wrap_as_numpy_array(xp, A) - v = _wrap_as_numpy_array(xp, v) - w = _wrap_as_numpy_array(xp, w) - for i in range(a.shape[0]): testing.assert_allclose( A[i].dot(v[i]), w[i] * v[i], rtol=1e-5, atol=1e-5 diff --git a/tests/third_party/cupy/linalg_tests/test_product.py b/tests/third_party/cupy/linalg_tests/test_product.py index 93b13c93e87..1fd048356b4 100644 --- a/tests/third_party/cupy/linalg_tests/test_product.py +++ b/tests/third_party/cupy/linalg_tests/test_product.py @@ -36,10 +36,12 @@ } ) ) -@testing.gpu +# TODO: get rid of falls back on NumPy when tensordot +# is implemented using OneMKL +@pytest.mark.usefixtures("allow_fall_back_on_numpy") class TestDot(unittest.TestCase): @testing.for_all_dtypes_combination(["dtype_a", "dtype_b"]) - @testing.numpy_cupy_allclose() + @testing.numpy_cupy_allclose(type_check=has_support_aspect64()) def test_dot(self, xp, dtype_a, dtype_b): shape_a, shape_b = self.shape if self.trans_a: @@ -71,8 +73,13 @@ def test_dot_with_out(self, xp, dtype_a, dtype_b, dtype_c): else: shape_c = shape_a[:-1] + shape_b[:-2] + shape_b[-1:] c = xp.empty(shape_c, dtype=dtype_c) - out = xp.dot(a, b, out=c) - self.assertIs(out, c) + try: + out = xp.dot(a, b, out=c) + except TypeError: + # When output dtype is incorrect, NumPy raises ValueError + # While DPNP raises TypeError, so we change it to ValueError + raise ValueError + assert out is c return c @@ -128,10 +135,11 @@ def test_cross(self, xp, dtype_a, dtype_b): } ) ) -@testing.gpu class TestDotFor0Dim(unittest.TestCase): @testing.for_all_dtypes_combination(["dtype_a", "dtype_b"]) - @testing.numpy_cupy_allclose(contiguous_check=False) + @testing.numpy_cupy_allclose( + type_check=has_support_aspect64(), contiguous_check=False + ) def test_dot(self, xp, dtype_a, dtype_b): shape_a, shape_b = self.shape if self.trans_a: @@ -145,8 +153,7 @@ def test_dot(self, xp, dtype_a, dtype_b): return xp.dot(a, b) -@testing.gpu -class TestProduct(unittest.TestCase): +class TestProduct: @testing.for_all_dtypes() @testing.numpy_cupy_allclose() def test_dot_vec1(self, xp, dtype): @@ -154,6 +161,9 @@ def test_dot_vec1(self, xp, dtype): b = testing.shaped_arange((2,), xp, dtype) return xp.dot(a, b) + # TODO: get rid of falls back on NumPy when tensordot + # is implemented using OneMKL + @pytest.mark.usefixtures("allow_fall_back_on_numpy") @testing.for_all_dtypes() @testing.numpy_cupy_allclose() def test_dot_vec2(self, xp, dtype): @@ -168,6 +178,9 @@ def test_dot_vec3(self, xp, dtype): b = testing.shaped_arange((2,), xp, dtype) return xp.dot(a, b) + # TODO: get rid of falls back on NumPy when tensordot + # is implemented using OneMKL + @pytest.mark.usefixtures("allow_fall_back_on_numpy") @testing.for_all_dtypes() @testing.numpy_cupy_allclose() def test_transposed_dot(self, xp, dtype): @@ -175,6 +188,9 @@ def test_transposed_dot(self, xp, dtype): b = testing.shaped_arange((2, 3, 4), xp, dtype).transpose(0, 2, 1) return xp.dot(a, b) + # TODO: get rid of falls back on NumPy when tensordot + # is implemented using OneMKL + @pytest.mark.usefixtures("allow_fall_back_on_numpy") @testing.for_all_dtypes() @testing.numpy_cupy_allclose() def test_transposed_dot_with_out(self, xp, dtype): @@ -184,6 +200,9 @@ def test_transposed_dot_with_out(self, xp, dtype): xp.dot(a, b, out=c) return c + # TODO: get rid of falls back on NumPy when tensordot + # is implemented using OneMKL + @pytest.mark.usefixtures("allow_fall_back_on_numpy") @testing.for_all_dtypes() def test_transposed_dot_with_out_f_contiguous(self, dtype): for xp in (numpy, cupy): diff --git a/tests/third_party/cupy/math_tests/test_matmul.py b/tests/third_party/cupy/math_tests/test_matmul.py index d21ec7a2d68..887ed9ae1b9 100644 --- a/tests/third_party/cupy/math_tests/test_matmul.py +++ b/tests/third_party/cupy/math_tests/test_matmul.py @@ -73,6 +73,61 @@ def test_cupy_matmul(self, xp, dtype1): return xp.matmul(x1, x2) +@testing.parameterize( + *testing.product( + { + "shape_pair": [ + # dot test + ((2, 3), (3, 4), (2, 4)), + # ((0,), (0,), (0,)), + # matmul test + ((5, 3, 2), (5, 2, 4), (5, 3, 4)), + ((0, 3, 2), (0, 2, 4), (0, 3, 4)), + ], + } + ) +) +class TestMatmulOut(unittest.TestCase): + @testing.for_all_dtypes(name="dtype1") + @testing.for_all_dtypes(name="dtype2") + @testing.numpy_cupy_allclose( + rtol=1e-3, atol=1e-3, accept_error=TypeError # required for uint8 + ) + def test_cupy_matmul_noncontiguous(self, xp, dtype1, dtype2): + x1 = testing.shaped_arange(self.shape_pair[0], xp, dtype1) + x2 = testing.shaped_arange(self.shape_pair[1], xp, dtype2) + out = xp.zeros(self.shape_pair[2], dtype=dtype1)[::-1] + ret = xp.matmul(x1, x2, out=out) + assert ret is out + return ret + + @testing.for_all_dtypes(name="dtype1") + @testing.for_all_dtypes(name="dtype2") + @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-3) # required for uint8 + def test_cupy_matmul_out_cast(self, xp, dtype1, dtype2): + x1 = testing.shaped_arange(self.shape_pair[0], xp, dtype1) + x2 = testing.shaped_arange(self.shape_pair[1], xp, dtype2) + out = xp.zeros(self.shape_pair[2], dtype=bool) + ret = xp.matmul(x1, x2, out=out, casting="unsafe") + assert ret is out + return ret + + +class TestMatmulOutOverlap: + @pytest.mark.parametrize( + "shape", + [ + (900, 900), + (2, 600, 600), + ], + ) + @testing.for_dtypes([numpy.int32, numpy.float64]) + @testing.numpy_cupy_allclose(rtol=1e-5, atol=1e-5) + def test_overlap_both(self, xp, dtype, shape): + a = xp.ones(shape, dtype=dtype) + return xp.matmul(a, a, out=a) + + @testing.parameterize( *testing.product( { From b03261258eabf2d688decd16b874e22cebbf4de3 Mon Sep 17 00:00:00 2001 From: Anton <100830759+antonwolfy@users.noreply.github.com> Date: Wed, 7 Feb 2024 17:18:45 +0100 Subject: [PATCH 19/29] Add support of numpy 1.26.3 (#1690) --- conda-recipe/meta.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml index 99e50c706c0..7c9a10c4ff7 100644 --- a/conda-recipe/meta.yaml +++ b/conda-recipe/meta.yaml @@ -13,7 +13,7 @@ requirements: host: - python - setuptools - - numpy >=1.19,<1.25a0 + - numpy >=1.19,<1.27a0 - cython - cmake >=3.21 - ninja From 666486f5ae168b60ee6fe668cbe0977759b10557 Mon Sep 17 00:00:00 2001 From: Anton <100830759+antonwolfy@users.noreply.github.com> Date: Wed, 7 Feb 2024 19:38:29 +0100 Subject: [PATCH 20/29] Pin version of packages installed with pip command (#1696) --- .github/workflows/build-sphinx.yml | 2 +- .github/workflows/generate_coverage.yaml | 6 +++--- scripts/install_python_deps.sh | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build-sphinx.yml b/.github/workflows/build-sphinx.yml index 9de0097e120..e1719d01ae1 100644 --- a/.github/workflows/build-sphinx.yml +++ b/.github/workflows/build-sphinx.yml @@ -103,7 +103,7 @@ jobs: - name: Install sphinx dependencies run: | conda install sphinx sphinx_rtd_theme - pip install sphinxcontrib-googleanalytics + pip install sphinxcontrib-googleanalytics==0.4 - name: Install dpnp dependencies run: | diff --git a/.github/workflows/generate_coverage.yaml b/.github/workflows/generate_coverage.yaml index 432377ce10c..e0faec5b567 100644 --- a/.github/workflows/generate_coverage.yaml +++ b/.github/workflows/generate_coverage.yaml @@ -79,7 +79,7 @@ jobs: - name: Install coverall dependencies run: | sudo gem install coveralls-lcov - pip install coveralls==3.2.0 + pip install coveralls==3.3.1 - name: Upload coverage data to coveralls.io run: | @@ -102,7 +102,7 @@ jobs: steps: - name: Finished run: | - pip3 install --upgrade coveralls - coveralls --finish + pip3 install --upgrade coveralls==3.3.1 + coveralls --service=github --finish env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/scripts/install_python_deps.sh b/scripts/install_python_deps.sh index e40d9a5b34a..f538e28446c 100755 --- a/scripts/install_python_deps.sh +++ b/scripts/install_python_deps.sh @@ -11,7 +11,7 @@ echo ========================= Conda: remove mkl =============================== conda remove mkl --force -y || true echo ========================= PIP3: install prerequisites ========================== -pip3 install pytest-valgrind +pip3 install pytest-valgrind==0.2.0 echo ========================= SW versions ========================================== conda list From 1a3866e494bc48ff060eae5328e640699d9082a6 Mon Sep 17 00:00:00 2001 From: vtavana <120411540+vtavana@users.noreply.github.com> Date: Wed, 7 Feb 2024 17:38:32 -0600 Subject: [PATCH 21/29] update `dpnp.vdot` implementation (#1692) * update dpnp_vdot * address comments * address more comments --- dpnp/backend/extensions/blas/CMakeLists.txt | 1 + dpnp/backend/extensions/blas/blas_py.cpp | 10 + dpnp/backend/extensions/blas/dot.hpp | 8 + dpnp/backend/extensions/blas/dotc.cpp | 241 ++++++++++++++++++ dpnp/backend/extensions/blas/types_matrix.hpp | 24 ++ dpnp/dpnp_iface.py | 17 +- dpnp/dpnp_iface_linearalgebra.py | 67 ++++- dpnp/dpnp_utils/dpnp_utils_linearalgebra.py | 22 +- tests/skipped_tests.tbl | 3 - tests/skipped_tests_gpu.tbl | 3 - tests/test_dot.py | 144 ++++++++++- tests/test_sycl_queue.py | 25 +- tests/test_usm_type.py | 15 +- 13 files changed, 537 insertions(+), 43 deletions(-) create mode 100644 dpnp/backend/extensions/blas/dotc.cpp diff --git a/dpnp/backend/extensions/blas/CMakeLists.txt b/dpnp/backend/extensions/blas/CMakeLists.txt index fe3a92d2181..692c1c0ec59 100644 --- a/dpnp/backend/extensions/blas/CMakeLists.txt +++ b/dpnp/backend/extensions/blas/CMakeLists.txt @@ -28,6 +28,7 @@ set(python_module_name _blas_impl) set(_module_src ${CMAKE_CURRENT_SOURCE_DIR}/blas_py.cpp ${CMAKE_CURRENT_SOURCE_DIR}/dot.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/dotc.cpp ${CMAKE_CURRENT_SOURCE_DIR}/dotu.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gemm.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gemm_batch.cpp diff --git a/dpnp/backend/extensions/blas/blas_py.cpp b/dpnp/backend/extensions/blas/blas_py.cpp index 7d5237381b1..d091923e63e 100644 --- a/dpnp/backend/extensions/blas/blas_py.cpp +++ b/dpnp/backend/extensions/blas/blas_py.cpp @@ -40,6 +40,7 @@ namespace py = pybind11; void init_dispatch_tables(void) { blas_ext::init_dot_dispatch_table(); + blas_ext::init_dotc_dispatch_table(); blas_ext::init_dotu_dispatch_table(); blas_ext::init_gemm_batch_dispatch_table(); blas_ext::init_gemm_dispatch_table(); @@ -57,6 +58,15 @@ PYBIND11_MODULE(_blas_impl, m) py::arg("result"), py::arg("depends") = py::list()); } + { + m.def("_dotc", &blas_ext::dotc, + "Call `dotc` from OneMKL LAPACK library to return " + "the dot product of two complex vectors, " + "conjugating the first vector.", + py::arg("sycl_queue"), py::arg("vectorA"), py::arg("vectorB"), + py::arg("result"), py::arg("depends") = py::list()); + } + { m.def("_dotu", &blas_ext::dotu, "Call `dotu` from OneMKL LAPACK library to return " diff --git a/dpnp/backend/extensions/blas/dot.hpp b/dpnp/backend/extensions/blas/dot.hpp index 3468196f760..914355b7f1e 100644 --- a/dpnp/backend/extensions/blas/dot.hpp +++ b/dpnp/backend/extensions/blas/dot.hpp @@ -45,6 +45,13 @@ extern std::pair dpctl::tensor::usm_ndarray result, const std::vector &depends); +extern std::pair + dotc(sycl::queue &exec_q, + dpctl::tensor::usm_ndarray vectorA, + dpctl::tensor::usm_ndarray vectorB, + dpctl::tensor::usm_ndarray result, + const std::vector &depends); + extern std::pair dotu(sycl::queue &exec_q, dpctl::tensor::usm_ndarray vectorA, @@ -53,6 +60,7 @@ extern std::pair const std::vector &depends); extern void init_dot_dispatch_table(void); +extern void init_dotc_dispatch_table(void); extern void init_dotu_dispatch_table(void); } // namespace blas } // namespace ext diff --git a/dpnp/backend/extensions/blas/dotc.cpp b/dpnp/backend/extensions/blas/dotc.cpp new file mode 100644 index 00000000000..3f9e7e17b4c --- /dev/null +++ b/dpnp/backend/extensions/blas/dotc.cpp @@ -0,0 +1,241 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include + +// dpctl tensor headers +#include "utils/memory_overlap.hpp" +#include "utils/type_utils.hpp" + +#include "dot.hpp" +#include "types_matrix.hpp" + +#include "dpnp_utils.hpp" + +namespace dpnp +{ +namespace backend +{ +namespace ext +{ +namespace blas +{ +namespace mkl_blas = oneapi::mkl::blas; +namespace py = pybind11; +namespace type_utils = dpctl::tensor::type_utils; + +typedef sycl::event (*dotc_impl_fn_ptr_t)(sycl::queue &, + const std::int64_t, + char *, + const std::int64_t, + char *, + const std::int64_t, + char *, + const std::vector &); + +static dotc_impl_fn_ptr_t dotc_dispatch_table[dpctl_td_ns::num_types] + [dpctl_td_ns::num_types]; + +template +static sycl::event dotc_impl(sycl::queue &exec_q, + const std::int64_t n, + char *vectorA, + const std::int64_t stride_a, + char *vectorB, + const std::int64_t stride_b, + char *result, + const std::vector &depends) +{ + type_utils::validate_type_for_device(exec_q); + type_utils::validate_type_for_device(exec_q); + + Tab *a = reinterpret_cast(vectorA); + Tab *b = reinterpret_cast(vectorB); + Tc *res = reinterpret_cast(result); + + std::stringstream error_msg; + bool is_exception_caught = false; + + sycl::event dotc_event; + try { + dotc_event = mkl_blas::row_major::dotc(exec_q, + n, // size of the input vectors + a, // Pointer to vector a. + stride_a, // Stride of vector a. + b, // Pointer to vector b. + stride_b, // Stride of vector b. + res, // Pointer to result. + depends); + } catch (oneapi::mkl::exception const &e) { + error_msg + << "Unexpected MKL exception caught during dotc() call:\nreason: " + << e.what(); + is_exception_caught = true; + } catch (sycl::exception const &e) { + error_msg << "Unexpected SYCL exception caught during dotc() call:\n" + << e.what(); + is_exception_caught = true; + } + + if (is_exception_caught) // an unexpected error occurs + { + throw std::runtime_error(error_msg.str()); + } + + return dotc_event; +} + +std::pair + dotc(sycl::queue &exec_q, + dpctl::tensor::usm_ndarray vectorA, + dpctl::tensor::usm_ndarray vectorB, + dpctl::tensor::usm_ndarray result, + const std::vector &depends) +{ + const int vectorA_nd = vectorA.get_ndim(); + const int vectorB_nd = vectorB.get_ndim(); + const int result_nd = result.get_ndim(); + + if ((vectorA_nd != 1)) { + throw py::value_error( + "The first input array has ndim=" + std::to_string(vectorA_nd) + + ", but a 1-dimensional array is expected."); + } + + if ((vectorB_nd != 1)) { + throw py::value_error( + "The second input array has ndim=" + std::to_string(vectorB_nd) + + ", but a 1-dimensional array is expected."); + } + + if ((result_nd != 0)) { + throw py::value_error( + "The output array has ndim=" + std::to_string(result_nd) + + ", but a 0-dimensional array is expected."); + } + + auto const &overlap = dpctl::tensor::overlap::MemoryOverlap(); + if (overlap(vectorA, result)) { + throw py::value_error( + "The first input array and output array are overlapping " + "segments of memory"); + } + if (overlap(vectorB, result)) { + throw py::value_error( + "The second input array and output array are overlapping " + "segments of memory"); + } + + // check compatibility of execution queue and allocation queue + if (!dpctl::utils::queues_are_compatible( + exec_q, + {vectorA.get_queue(), vectorB.get_queue(), result.get_queue()})) + { + throw py::value_error( + "USM allocations are not compatible with the execution queue."); + } + + py::ssize_t a_size = vectorA.get_size(); + py::ssize_t b_size = vectorB.get_size(); + if (a_size != b_size) { + throw py::value_error("The size of the first input array must be " + "equal to the size of the second input array."); + } + + std::vector a_stride = vectorA.get_strides_vector(); + std::vector b_stride = vectorB.get_strides_vector(); + + const std::int64_t n = a_size; + const std::int64_t str_a = a_stride[0]; + const std::int64_t str_b = b_stride[0]; + + int vectorA_typenum = vectorA.get_typenum(); + int vectorB_typenum = vectorB.get_typenum(); + int result_typenum = result.get_typenum(); + + if (vectorA_typenum != vectorB_typenum) { + throw py::value_error( + "Input arrays must be of must be of the same type."); + } + + auto array_types = dpctl_td_ns::usm_ndarray_types(); + int vectorAB_type_id = array_types.typenum_to_lookup_id(vectorA_typenum); + int result_type_id = array_types.typenum_to_lookup_id(result_typenum); + + dotc_impl_fn_ptr_t dotc_fn = + dotc_dispatch_table[vectorAB_type_id][result_type_id]; + if (dotc_fn == nullptr) { + throw py::value_error( + "Types of input vectors and result array are mismatched."); + } + + char *a_typeless_ptr = vectorA.get_data(); + char *b_typeless_ptr = vectorB.get_data(); + char *r_typeless_ptr = result.get_data(); + + const int a_elemsize = vectorA.get_elemsize(); + const int b_elemsize = vectorB.get_elemsize(); + if (str_a < 0) { + a_typeless_ptr -= (n - 1) * std::abs(str_a) * a_elemsize; + } + if (str_b < 0) { + b_typeless_ptr -= (n - 1) * std::abs(str_b) * b_elemsize; + } + + sycl::event dotc_ev = + dotc_fn(exec_q, n, a_typeless_ptr, str_a, b_typeless_ptr, str_b, + r_typeless_ptr, depends); + + sycl::event args_ev = dpctl::utils::keep_args_alive( + exec_q, {vectorA, vectorB, result}, {dotc_ev}); + + return std::make_pair(args_ev, dotc_ev); +} + +template +struct DotcContigFactory +{ + fnT get() + { + if constexpr (types::DotcTypePairSupportFactory::is_defined) { + return dotc_impl; + } + else { + return nullptr; + } + } +}; + +void init_dotc_dispatch_table(void) +{ + dpctl_td_ns::DispatchTableBuilder + contig; + contig.populate_dispatch_table(dotc_dispatch_table); +} +} // namespace blas +} // namespace ext +} // namespace backend +} // namespace dpnp diff --git a/dpnp/backend/extensions/blas/types_matrix.hpp b/dpnp/backend/extensions/blas/types_matrix.hpp index c36ae0e2045..44e297d47e7 100644 --- a/dpnp/backend/extensions/blas/types_matrix.hpp +++ b/dpnp/backend/extensions/blas/types_matrix.hpp @@ -62,6 +62,30 @@ struct DotTypePairSupportFactory dpctl_td_ns::NotDefinedEntry>::is_defined; }; +/** + * @brief A factory to define pairs of supported types for which + * MKL BLAS library provides support in oneapi::mkl::blas::dotc + * function. + * + * @tparam Tab Type of arrays containing input vectors A and B. + * @tparam Tc Type of array containing output. + */ +template +struct DotcTypePairSupportFactory +{ + static constexpr bool is_defined = std::disjunction< + dpctl_td_ns::TypePairDefinedEntry, + Tc, + std::complex>, + dpctl_td_ns::TypePairDefinedEntry, + Tc, + std::complex>, + // fall-through + dpctl_td_ns::NotDefinedEntry>::is_defined; +}; + /** * @brief A factory to define pairs of supported types for which * MKL BLAS library provides support in oneapi::mkl::blas::dotu diff --git a/dpnp/dpnp_iface.py b/dpnp/dpnp_iface.py index 9aee27b73bc..e37c2e090a6 100644 --- a/dpnp/dpnp_iface.py +++ b/dpnp/dpnp_iface.py @@ -205,7 +205,7 @@ def astype(x1, dtype, order="K", casting="unsafe", copy=True): return dpnp_array._create_from_usm_ndarray(array_obj) -def check_supported_arrays_type(*arrays, scalar_type=False): +def check_supported_arrays_type(*arrays, scalar_type=False, all_scalars=False): """ Return ``True`` if each array has either type of scalar, :class:`dpnp.ndarray` or :class:`dpctl.tensor.usm_ndarray`. @@ -216,7 +216,9 @@ def check_supported_arrays_type(*arrays, scalar_type=False): arrays : {dpnp_array, usm_ndarray} Input arrays to check for supported types. scalar_type : {bool}, optional - A scalar type is also considered as supported if flag is True. + A scalar type is also considered as supported if flag is ``True``. + all_scalars : {bool}, optional + All the input arrays can be scalar if flag is ``True``. Returns ------- @@ -231,13 +233,22 @@ def check_supported_arrays_type(*arrays, scalar_type=False): """ + any_is_array = False for a in arrays: - if scalar_type and dpnp.isscalar(a) or is_supported_array_type(a): + if is_supported_array_type(a): + any_is_array = True + continue + elif scalar_type and dpnp.isscalar(a): continue raise TypeError( "An array must be any of supported type, but got {}".format(type(a)) ) + + if len(arrays) > 1 and not (all_scalars or any_is_array): + raise TypeError( + "At least one input must be of supported array type, but got all scalars." + ) return True diff --git a/dpnp/dpnp_iface_linearalgebra.py b/dpnp/dpnp_iface_linearalgebra.py index 9d63f7f8c3d..bffe881b626 100644 --- a/dpnp/dpnp_iface_linearalgebra.py +++ b/dpnp/dpnp_iface_linearalgebra.py @@ -121,8 +121,7 @@ def dot(a, b, out=None): """ - dpnp.check_supported_arrays_type(a, scalar_type=True) - dpnp.check_supported_arrays_type(b, scalar_type=True) + dpnp.check_supported_arrays_type(a, b, scalar_type=True) if out is not None: dpnp.check_supported_arrays_type(out) @@ -333,8 +332,7 @@ def matmul( """ - dpnp.check_supported_arrays_type(x1) - dpnp.check_supported_arrays_type(x2) + dpnp.check_supported_arrays_type(x1, x2) if subok is False: raise NotImplementedError( "subok keyword argument is only supported by its default value." @@ -444,19 +442,68 @@ def tensordot(x1, x2, axes=2): return call_origin(numpy.tensordot, x1, x2, axes) -def vdot(*args, **kwargs): +def vdot(a, b): """ Return the dot product of two vectors. - For full documentation refer to :obj:`numpy.vdot`. + For full documentation refer to :obj:`numpy.dot`. + + Parameters + ---------- + a : {dpnp_array, usm_ndarray, scalar} + First input array. Both inputs `a` and `b` can not be + scalars at the same time. If `a` is complex, the complex + conjugate is taken before the calculation of the dot product. + b : {dpnp_array, usm_ndarray, scalar} + Second input array. Both inputs `a` and `b` can not be + scalars at the same time. + + Returns + ------- + out : dpnp.ndarray + Returns the dot product of `a` and `b`. See Also -------- :obj:`dpnp.dot` : Returns the dot product. + :obj:`dpnp.matmul` : Returns the matrix product. + + Examples + -------- + >>> import dpnp as np + >>> a = np.array([1+2j,3+4j]) + >>> b = np.array([5+6j,7+8j]) + >>> np.vdot(a, b) + array(70-8j) + >>> np.vdot(b, a) + array(70+8j) - Notes - ----- - This function works the same as :obj:`dpnp.dot`. + Note that higher-dimensional arrays are flattened! + + >>> a = np.array([[1, 4], [5, 6]]) + >>> b = np.array([[4, 1], [2, 2]]) + >>> np.vdot(a, b) + array(30) + >>> np.vdot(b, a) + array(30) + >>> 1*4 + 4*1 + 5*2 + 6*2 + 30 """ - return dpnp.dot(*args, **kwargs) + + dpnp.check_supported_arrays_type(a, b, scalar_type=True) + + if dpnp.isscalar(a) or dpnp.isscalar(b): + if dpnp.isscalar(b) and a.size != 1: + raise ValueError("The first array should be of size one.") + if dpnp.isscalar(a) and b.size != 1: + raise ValueError("The second array should be of size one.") + a_conj = numpy.conj(a) if dpnp.isscalar(a) else dpnp.conj(a) + # TODO: investigate usage of axpy (axpy_batch) or scal + # functions from BLAS here instead of dpnp.multiply + return dpnp.multiply(a_conj, b) + elif a.ndim == 1 and b.ndim == 1: + return dpnp_dot(a, b, out=None, conjugate=True) + else: + # dot product of flatten arrays + return dpnp_dot(dpnp.ravel(a), dpnp.ravel(b), out=None, conjugate=True) diff --git a/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py b/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py index 65d97befa98..bf1a3417704 100644 --- a/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py +++ b/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py @@ -175,7 +175,7 @@ def _op_res_dtype(*arrays, dtype, casting, sycl_queue): res_dtype = dtype else: raise TypeError( - f"Cannot cast ufunc 'matmul' output from dtype({res_dtype}) to dtype({dtype}) with casting rule {casting}" + f"Cannot cast from dtype({res_dtype}) to dtype({dtype}) with casting rule {casting}" ) op_dtype = ( @@ -185,16 +185,18 @@ def _op_res_dtype(*arrays, dtype, casting, sycl_queue): return op_dtype, res_dtype -def dpnp_dot(a, b, /, out=None): +def dpnp_dot(a, b, /, out=None, *, conjugate=False): """ Return the dot product of two arrays. The routine that is used to perform the main calculation - depends on input array data types: 1) For integer and boolean data types, + depends on input arrays data type: 1) For integer and boolean data types, `dpctl.tensor.vecdot` form the Data Parallel Control library is used, - 2) For floating point real-valued data types, `dot` routines from - BLAS library of OneMKL is used, and 3) For complex data types, - `dotu` routines from BLAS library of OneMKL is used. + 2) For real-valued floating point data types, `dot` routines from + BLAS library of OneMKL are used, and 3) For complex data types, + `dotu` or `dotc` routines from BLAS library of OneMKL are used. + If `conjugate` is ``False``, `dotu` is used. Otherwise, `dotc` is used, + for which the first array is conjugated before calculating the dot product. """ @@ -228,7 +230,11 @@ def dpnp_dot(a, b, /, out=None): a = _copy_array(a, dep_events_list, host_tasks_list, dtype=dot_dtype) b = _copy_array(b, dep_events_list, host_tasks_list, dtype=dot_dtype) if dpnp.issubdtype(res_dtype, dpnp.complexfloating): - ht_ev, _ = bi._dotu( + if conjugate: + dot_func = "_dotc" + else: + dot_func = "_dotu" + ht_ev, _ = getattr(bi, dot_func)( exec_q, dpnp.get_usm_ndarray(a), dpnp.get_usm_ndarray(b), @@ -253,7 +259,7 @@ def dpnp_dot(a, b, /, out=None): if dot_dtype != res_dtype: result = result.astype(res_dtype, copy=False) - # NumPy does not allow casting even if it is safe + # numpy.dot does not allow casting even if it is safe return dpnp.get_result_array(result, out, casting="no") diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl index f91a4f23289..a38624e3757 100644 --- a/tests/skipped_tests.tbl +++ b/tests/skipped_tests.tbl @@ -335,13 +335,10 @@ tests/third_party/cupy/linalg_tests/test_einsum.py::TestListArgEinSumError::test tests/third_party/cupy/linalg_tests/test_product.py::TestMatrixPower::test_matrix_power_invlarge tests/third_party/cupy/linalg_tests/test_product.py::TestMatrixPower::test_matrix_power_large tests/third_party/cupy/linalg_tests/test_product.py::TestMatrixPower::test_matrix_power_of_two -tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_multidim_vdot tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_tensordot_zero_dim -tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_multidim_vdot tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_tensordot tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_tensordot_with_int_axes tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_tensordot_with_list_axes -tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_reversed_vdot tests/third_party/cupy/logic_tests/test_comparison.py::TestArrayEqual::test_array_equal_broadcast_not_allowed tests/third_party/cupy/logic_tests/test_comparison.py::TestArrayEqual::test_array_equal_diff_dtypes_is_equal diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl index c3464096085..ce6f6aef984 100644 --- a/tests/skipped_tests_gpu.tbl +++ b/tests/skipped_tests_gpu.tbl @@ -437,13 +437,10 @@ tests/third_party/cupy/linalg_tests/test_einsum.py::TestListArgEinSumError::test tests/third_party/cupy/linalg_tests/test_product.py::TestMatrixPower::test_matrix_power_invlarge tests/third_party/cupy/linalg_tests/test_product.py::TestMatrixPower::test_matrix_power_large tests/third_party/cupy/linalg_tests/test_product.py::TestMatrixPower::test_matrix_power_of_two -tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_multidim_vdot tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_tensordot tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_tensordot_with_int_axes tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_tensordot_with_list_axes tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_tensordot_zero_dim -tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_multidim_vdot -tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_reversed_vdot tests/third_party/cupy/logic_tests/test_comparison.py::TestArrayEqual::test_array_equal_broadcast_not_allowed tests/third_party/cupy/logic_tests/test_comparison.py::TestArrayEqual::test_array_equal_diff_dtypes_is_equal diff --git a/tests/test_dot.py b/tests/test_dot.py index 55884b00cd3..42478db9634 100644 --- a/tests/test_dot.py +++ b/tests/test_dot.py @@ -8,7 +8,7 @@ from .helper import assert_dtype_allclose, get_all_dtypes, get_complex_dtypes -class Testdot: +class TestDot: @pytest.mark.parametrize("dtype", get_all_dtypes()) def test_dot_ones(self, dtype): n = 10**5 @@ -371,3 +371,145 @@ def test_multi_dot(type): result = dpnp.linalg.multi_dot([a, b, c, d]) expected = numpy.linalg.multi_dot([a1, b1, c1, d1]) assert_array_equal(expected, result) + + +class TestVdot: + @pytest.mark.parametrize("dtype", get_all_dtypes()) + def test_vdot_scalar(self, dtype): + a = numpy.array([3.5], dtype=dtype) + ia = dpnp.array(a) + b = 2 + 3j + + result = dpnp.vdot(ia, b) + expected = numpy.vdot(a, b) + assert_allclose(result, expected) + + result = dpnp.vdot(b, ia) + expected = numpy.vdot(b, a) + assert_allclose(result, expected) + + @pytest.mark.parametrize("dtype", get_all_dtypes(no_complex=True)) + @pytest.mark.parametrize( + "array_info", + [ + (1, 1, (), ()), + (10, 10, (10,), (10,)), + (12, 12, (4, 3), (3, 4)), + (12, 12, (4, 3), (12,)), + (60, 60, (5, 4, 3), (60,)), + (8, 8, (8,), (4, 2)), + (60, 60, (5, 3, 4), (3, 4, 5)), + ], + ids=[ + "0d_0d", + "1d_1d", + "2d_2d", + "2d_1d", + "3d_1d", + "1d_2d", + "3d_3d", + ], + ) + def test_vdot(self, dtype, array_info): + size1, size2, shape1, shape2 = array_info + a = numpy.array( + numpy.random.uniform(-5, 5, size1), dtype=dtype + ).reshape(shape1) + b = numpy.array( + numpy.random.uniform(-5, 5, size2), dtype=dtype + ).reshape(shape2) + ia = dpnp.array(a) + ib = dpnp.array(b) + + result = dpnp.vdot(ia, ib) + expected = numpy.vdot(a, b) + assert_dtype_allclose(result, expected) + + @pytest.mark.parametrize("dtype", get_complex_dtypes()) + @pytest.mark.parametrize( + "array_info", + [ + (1, 1, (), ()), + (10, 10, (10,), (10,)), + (12, 12, (4, 3), (3, 4)), + (12, 12, (4, 3), (12,)), + (60, 60, (5, 4, 3), (60,)), + (8, 8, (8,), (4, 2)), + (60, 60, (5, 3, 4), (3, 4, 5)), + ], + ids=[ + "0d_0d", + "1d_1d", + "2d_2d", + "2d_1d", + "3d_1d", + "1d_2d", + "3d_3d", + ], + ) + def test_vdot_complex(self, dtype, array_info): + size1, size2, shape1, shape2 = array_info + x11 = numpy.random.uniform(-5, 5, size1) + x12 = numpy.random.uniform(-5, 5, size1) + x21 = numpy.random.uniform(-5, 5, size2) + x22 = numpy.random.uniform(-5, 5, size2) + a = numpy.array(x11 + 1j * x12, dtype=dtype).reshape(shape1) + b = numpy.array(x21 + 1j * x22, dtype=dtype).reshape(shape2) + ia = dpnp.array(a) + ib = dpnp.array(b) + + result = dpnp.vdot(ia, ib) + expected = numpy.vdot(a, b) + assert_dtype_allclose(result, expected) + + @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True)) + def test_vdot_strided(self, dtype): + a = numpy.arange(25, dtype=dtype) + b = numpy.arange(25, dtype=dtype) + ia = dpnp.array(a) + ib = dpnp.array(b) + + result = dpnp.vdot(ia[::3], ib[::3]) + expected = numpy.vdot(a[::3], b[::3]) + assert_dtype_allclose(result, expected) + + result = dpnp.vdot(ia, ib[::-1]) + expected = numpy.vdot(a, b[::-1]) + assert_dtype_allclose(result, expected) + + result = dpnp.vdot(ia[::-2], ib[::-2]) + expected = numpy.vdot(a[::-2], b[::-2]) + assert_dtype_allclose(result, expected) + + result = dpnp.vdot(ia[::-5], ib[::-5]) + expected = numpy.vdot(a[::-5], b[::-5]) + assert_dtype_allclose(result, expected) + + @pytest.mark.parametrize("dtype1", get_all_dtypes()) + @pytest.mark.parametrize("dtype2", get_all_dtypes()) + def test_vdot_input_dtype_matrix(self, dtype1, dtype2): + a = numpy.array(numpy.random.uniform(-5, 5, 10), dtype=dtype1) + b = numpy.array(numpy.random.uniform(-5, 5, 10), dtype=dtype2) + ia = dpnp.array(a) + ib = dpnp.array(b) + + result = dpnp.vdot(ia, ib) + expected = numpy.vdot(a, b) + assert_dtype_allclose(result, expected) + + def test_vdot_error(self): + a = dpnp.ones(25) + b = dpnp.ones(24) + # size of input arrays differ + with pytest.raises(ValueError): + dpnp.vdot(a, b) + + a = dpnp.ones(25) + b = 2 + # The first array should be of size one + with pytest.raises(ValueError): + dpnp.vdot(a, b) + + # The second array should be of size one + with pytest.raises(ValueError): + dpnp.vdot(b, a) diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py index a8b8be52009..f6329d8f216 100644 --- a/tests/test_sycl_queue.py +++ b/tests/test_sycl_queue.py @@ -532,11 +532,11 @@ def test_reduce_hypot(device): pytest.param( "divide", [0.0, 1.0, 2.0, 3.0, 4.0], [4.0, 4.0, 4.0, 4.0, 4.0] ), - pytest.param( - "dot", - [3.0, 4.0, 5.0], - [1.0, 2.0, 3.0], - ), + # dpnp.dot has 3 different implementations based on input arrays dtype + # checking all of them + pytest.param("dot", [3.0, 4.0, 5.0], [1.0, 2.0, 3.0]), + pytest.param("dot", [3, 4, 5], [1, 2, 3]), + pytest.param("dot", [3 + 2j, 4 + 1j, 5], [1, 2 + 3j, 3]), pytest.param( "floor_divide", [1.0, 2.0, 3.0, 4.0], [2.5, 2.5, 2.5, 2.5] ), @@ -579,6 +579,11 @@ def test_reduce_hypot(device): [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0], [0.0, 1.0, 2.0, 0.0, 1.0, 2.0, 0.0, 1.0, 2.0], ), + # dpnp.vdot has 3 different implementations based on input arrays dtype + # checking all of them + pytest.param("vdot", [3.0, 4.0, 5.0], [1.0, 2.0, 3.0]), + pytest.param("vdot", [3, 4, 5], [1, 2, 3]), + pytest.param("vdot", [3 + 2j, 4 + 1j, 5], [1, 2 + 3j, 3]), ], ) @pytest.mark.parametrize( @@ -840,11 +845,11 @@ def test_out_1in_1out(func, data, device): pytest.param( "divide", [0.0, 1.0, 2.0, 3.0, 4.0], [4.0, 4.0, 4.0, 4.0, 4.0] ), - pytest.param( - "dot", - [3.0, 4.0, 5.0], - [1.0, 2.0, 3.0], - ), + # dpnp.dot has 3 different implementations based on input arrays dtype + # checking all of them + pytest.param("dot", [3.0, 4.0, 5.0], [1.0, 2.0, 3.0]), + pytest.param("dot", [3, 4, 5], [1, 2, 3]), + pytest.param("dot", [3 + 2j, 4 + 1j, 5], [1, 2 + 3j, 3]), pytest.param( "floor_divide", [1.0, 2.0, 3.0, 4.0], [2.5, 2.5, 2.5, 2.5] ), diff --git a/tests/test_usm_type.py b/tests/test_usm_type.py index 171e979facf..29101cf9f48 100644 --- a/tests/test_usm_type.py +++ b/tests/test_usm_type.py @@ -492,11 +492,11 @@ def test_1in_1out(func, data, usm_type): ), pytest.param("arctan2", [[-1, +1, +1, -1]], [[-1, -1, +1, +1]]), pytest.param("copysign", [0.0, 1.0, 2.0], [-1.0, 0.0, 1.0]), - pytest.param( - "dot", - [3.0, 4.0, 5.0], - [1.0, 2.0, 3.0], - ), + # dpnp.dot has 3 different implementations based on input arrays dtype + # checking all of them + pytest.param("dot", [3.0, 4.0, 5.0], [1.0, 2.0, 3.0]), + pytest.param("dot", [3, 4, 5], [1, 2, 3]), + pytest.param("dot", [3 + 2j, 4 + 1j, 5], [1, 2 + 3j, 3]), pytest.param("fmax", [[0.0, 1.0, 2.0]], [[3.0, 4.0, 5.0]]), pytest.param("fmin", [[0.0, 1.0, 2.0]], [[3.0, 4.0, 5.0]]), pytest.param( @@ -505,6 +505,11 @@ def test_1in_1out(func, data, usm_type): pytest.param("logaddexp", [[-1, 2, 5, 9]], [[4, -3, 2, -8]]), pytest.param("maximum", [[0.0, 1.0, 2.0]], [[3.0, 4.0, 5.0]]), pytest.param("minimum", [[0.0, 1.0, 2.0]], [[3.0, 4.0, 5.0]]), + # dpnp.vdot has 3 different implementations based on input arrays dtype + # checking all of them + pytest.param("vdot", [3.0, 4.0, 5.0], [1.0, 2.0, 3.0]), + pytest.param("vdot", [3, 4, 5], [1, 2, 3]), + pytest.param("vdot", [3 + 2j, 4 + 1j, 5], [1, 2 + 3j, 3]), ], ) @pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types) From d45bb24d58dcbdb7229a776bc4d5b7768293d824 Mon Sep 17 00:00:00 2001 From: vtavana <120411540+vtavana@users.noreply.github.com> Date: Wed, 7 Feb 2024 22:58:58 -0600 Subject: [PATCH 22/29] Improve performance of `dpnp.matmul` and `dpnp.dot` with `out` keyword (#1694) * use out keyword for result * fix strided or overlapping out * address comments * fix typo * remove additional check --- dpnp/dpnp_iface.py | 21 ++++---- dpnp/dpnp_utils/dpnp_utils_linearalgebra.py | 53 ++++++++++++++++----- 2 files changed, 52 insertions(+), 22 deletions(-) diff --git a/dpnp/dpnp_iface.py b/dpnp/dpnp_iface.py index e37c2e090a6..d8838e67c8d 100644 --- a/dpnp/dpnp_iface.py +++ b/dpnp/dpnp_iface.py @@ -495,17 +495,20 @@ def get_result_array(a, out=None, casting="safe"): if out is None: return a else: - dpnp.check_supported_arrays_type(out) - if out.shape != a.shape: - raise ValueError( - f"Output array of shape {a.shape} is needed, got {out.shape}." - ) - elif isinstance(out, dpt.usm_ndarray): - out = dpnp_array._create_from_usm_ndarray(out) + if a is out: + return out + else: + dpnp.check_supported_arrays_type(out) + if out.shape != a.shape: + raise ValueError( + f"Output array of shape {a.shape} is needed, got {out.shape}." + ) + elif isinstance(out, dpt.usm_ndarray): + out = dpnp_array._create_from_usm_ndarray(out) - dpnp.copyto(out, a, casting=casting) + dpnp.copyto(out, a, casting=casting) - return out + return out def get_usm_ndarray(a): diff --git a/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py b/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py index bf1a3417704..3c36eda042d 100644 --- a/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py +++ b/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py @@ -36,6 +36,41 @@ __all__ = ["dpnp_dot", "dpnp_matmul"] +def _create_result_array(x1, x2, out, shape, dtype, usm_type, sycl_queue): + """ + Create the result array. + + If `out` is not ``None`` and its features match the specified `shape`, `dtype, + `usm_type`, and `sycl_queue` and it is C-contiguous or F-contiguous and + does not have any memory overlap with `x1` and `x2`, `out` itself is returned. + If these conditions are not statisfied, an empty array is returned with the + specified `shape`, `dtype, `usm_type`, and `sycl_queue`. + """ + + if out is not None: + x1_usm = dpnp.get_usm_ndarray(x1) + x2_usm = dpnp.get_usm_ndarray(x2) + out_usm = dpnp.get_usm_ndarray(out) + + if ( + out.dtype == dtype + and out.shape == shape + and out.usm_type == usm_type + and out.sycl_queue == sycl_queue + and (out.flags.c_contiguous or out.flags.f_contiguous) + and not ti._array_overlap(x1_usm, out_usm) + and not ti._array_overlap(x2_usm, out_usm) + ): + return out + + return dpnp.empty( + shape, + dtype=dtype, + usm_type=usm_type, + sycl_queue=sycl_queue, + ) + + def _copy_array(x, dep_events, host_events, contig_copy=False, dtype=None): """ Creating a copy of input array if needed. @@ -214,14 +249,9 @@ def dpnp_dot(a, b, /, out=None, *, conjugate=False): a, b, dtype=None, casting="no", sycl_queue=exec_q ) - # create result array - result = dpnp.empty( - (), - dtype=dot_dtype, - usm_type=res_usm_type, - sycl_queue=exec_q, + result = _create_result_array( + a, b, out, (), dot_dtype, res_usm_type, exec_q ) - # input arrays should have the proper data type dep_events_list = [] host_tasks_list = [] @@ -367,13 +397,10 @@ def dpnp_matmul( x2_shape = x2.shape res_shape = tuple(tmp_shape) + (x1_shape[-2], x2_shape[-1]) - # calculate results - result = dpnp.empty( - res_shape, - dtype=gemm_dtype, - usm_type=res_usm_type, - sycl_queue=exec_q, + result = _create_result_array( + x1, x2, out, res_shape, gemm_dtype, res_usm_type, exec_q ) + # calculate result if result.size == 0: pass elif x1.size == 0 or x2.size == 0: From 1e8675368d598faf34d120ef5260a386159cd810 Mon Sep 17 00:00:00 2001 From: vlad-perevezentsev Date: Thu, 8 Feb 2024 13:52:36 +0100 Subject: [PATCH 23/29] Update dpnp.linalg.qr() function (#1673) * Impl dpnp.linalg.qr for 2d array * Add cupy tests for dpnp.linalg.qr * Add batch implementation of dpnp.linalg.qr * Remove an old impl of dpnp_qr * Update test_qr in test_sycl_queue * Add test_qr in test_usm_type * Use _real_type for _orgqr * Use _real_type for _orgqr_batch * Update dpnp tests for dpnp.linalg.qr * Pass scratchpad_size to the error message test * Add additional checks * Extend error handler for mkl batch funcs * Add ungqr mkl extension to support complex dtype * Update tau array size check for orgqr * Add ungqr_batch mkl extension to support complex dtype * Add arrays type check * Fix test_det_singular_matrix * Expand tests for dpnp.linalg.qr with complex types * Update examples * Remove astype for output arrays * Use empty_like instead of empty * Use ht_list_ev with dpctl.SyclEvent.wait_for * Add _triu_inplace func * Use copy_usm for a_t array overwritten by geqrf/geqrf_batch --------- Co-authored-by: Anton <100830759+antonwolfy@users.noreply.github.com> --- dpnp/backend/extensions/lapack/CMakeLists.txt | 6 + dpnp/backend/extensions/lapack/geqrf.cpp | 262 +++++++++++ dpnp/backend/extensions/lapack/geqrf.hpp | 63 +++ .../backend/extensions/lapack/geqrf_batch.cpp | 273 +++++++++++ dpnp/backend/extensions/lapack/lapack_py.cpp | 55 +++ dpnp/backend/extensions/lapack/orgqr.cpp | 263 +++++++++++ dpnp/backend/extensions/lapack/orgqr.hpp | 67 +++ .../backend/extensions/lapack/orgqr_batch.cpp | 278 ++++++++++++ .../extensions/lapack/types_matrix.hpp | 147 ++++++ dpnp/backend/extensions/lapack/ungqr.cpp | 263 +++++++++++ dpnp/backend/extensions/lapack/ungqr.hpp | 67 +++ .../backend/extensions/lapack/ungqr_batch.cpp | 278 ++++++++++++ dpnp/backend/include/dpnp_iface_fptr.hpp | 2 - dpnp/backend/kernels/dpnp_krnl_linalg.cpp | 34 -- dpnp/dpnp_algo/dpnp_algo.pxd | 2 - dpnp/linalg/dpnp_algo_linalg.pyx | 56 --- dpnp/linalg/dpnp_iface_linalg.py | 70 ++- dpnp/linalg/dpnp_utils_linalg.py | 427 +++++++++++++++++- tests/test_linalg.py | 210 +++++---- tests/test_sycl_queue.py | 56 ++- tests/test_usm_type.py | 37 ++ .../cupy/linalg_tests/test_decomposition.py | 97 +++- 22 files changed, 2767 insertions(+), 246 deletions(-) create mode 100644 dpnp/backend/extensions/lapack/geqrf.cpp create mode 100644 dpnp/backend/extensions/lapack/geqrf.hpp create mode 100644 dpnp/backend/extensions/lapack/geqrf_batch.cpp create mode 100644 dpnp/backend/extensions/lapack/orgqr.cpp create mode 100644 dpnp/backend/extensions/lapack/orgqr.hpp create mode 100644 dpnp/backend/extensions/lapack/orgqr_batch.cpp create mode 100644 dpnp/backend/extensions/lapack/ungqr.cpp create mode 100644 dpnp/backend/extensions/lapack/ungqr.hpp create mode 100644 dpnp/backend/extensions/lapack/ungqr_batch.cpp diff --git a/dpnp/backend/extensions/lapack/CMakeLists.txt b/dpnp/backend/extensions/lapack/CMakeLists.txt index 28fa2072d7d..8f4b35f20ed 100644 --- a/dpnp/backend/extensions/lapack/CMakeLists.txt +++ b/dpnp/backend/extensions/lapack/CMakeLists.txt @@ -27,15 +27,21 @@ set(python_module_name _lapack_impl) set(_module_src ${CMAKE_CURRENT_SOURCE_DIR}/lapack_py.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/geqrf.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/geqrf_batch.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gesv.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gesvd.cpp ${CMAKE_CURRENT_SOURCE_DIR}/getrf.cpp ${CMAKE_CURRENT_SOURCE_DIR}/getrf_batch.cpp ${CMAKE_CURRENT_SOURCE_DIR}/getri_batch.cpp ${CMAKE_CURRENT_SOURCE_DIR}/heevd.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/orgqr.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/orgqr_batch.cpp ${CMAKE_CURRENT_SOURCE_DIR}/potrf.cpp ${CMAKE_CURRENT_SOURCE_DIR}/potrf_batch.cpp ${CMAKE_CURRENT_SOURCE_DIR}/syevd.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/ungqr.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/ungqr_batch.cpp ) pybind11_add_module(${python_module_name} MODULE ${_module_src}) diff --git a/dpnp/backend/extensions/lapack/geqrf.cpp b/dpnp/backend/extensions/lapack/geqrf.cpp new file mode 100644 index 00000000000..a91f689d503 --- /dev/null +++ b/dpnp/backend/extensions/lapack/geqrf.cpp @@ -0,0 +1,262 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include + +// dpctl tensor headers +#include "utils/memory_overlap.hpp" +#include "utils/type_utils.hpp" + +#include "geqrf.hpp" +#include "types_matrix.hpp" + +#include "dpnp_utils.hpp" + +namespace dpnp +{ +namespace backend +{ +namespace ext +{ +namespace lapack +{ +namespace mkl_lapack = oneapi::mkl::lapack; +namespace py = pybind11; +namespace type_utils = dpctl::tensor::type_utils; + +typedef sycl::event (*geqrf_impl_fn_ptr_t)(sycl::queue, + const std::int64_t, + const std::int64_t, + char *, + std::int64_t, + char *, + std::vector &, + const std::vector &); + +static geqrf_impl_fn_ptr_t geqrf_dispatch_vector[dpctl_td_ns::num_types]; + +template +static sycl::event geqrf_impl(sycl::queue exec_q, + const std::int64_t m, + const std::int64_t n, + char *in_a, + std::int64_t lda, + char *in_tau, + std::vector &host_task_events, + const std::vector &depends) +{ + type_utils::validate_type_for_device(exec_q); + + T *a = reinterpret_cast(in_a); + T *tau = reinterpret_cast(in_tau); + + const std::int64_t scratchpad_size = + mkl_lapack::geqrf_scratchpad_size(exec_q, m, n, lda); + T *scratchpad = nullptr; + + std::stringstream error_msg; + std::int64_t info = 0; + bool is_exception_caught = false; + + sycl::event geqrf_event; + try { + scratchpad = sycl::malloc_device(scratchpad_size, exec_q); + + geqrf_event = mkl_lapack::geqrf( + exec_q, + m, // The number of rows in the matrix; (0 ≤ m). + n, // The number of columns in the matrix; (0 ≤ n). + a, // Pointer to the m-by-n matrix. + lda, // The leading dimension of `a`; (1 ≤ m). + tau, // Pointer to the array of scalar factors of the + // elementary reflectors. + scratchpad, // Pointer to scratchpad memory to be used by MKL + // routine for storing intermediate results. + scratchpad_size, depends); + } catch (mkl_lapack::exception const &e) { + is_exception_caught = true; + info = e.info(); + + if (info < 0) { + error_msg << "Parameter number " << -info + << " had an illegal value."; + } + else if (info == scratchpad_size && e.detail() != 0) { + error_msg + << "Insufficient scratchpad size. Required size is at least " + << e.detail() << ", but current size is " << scratchpad_size + << "."; + } + else { + error_msg << "Unexpected MKL exception caught during geqrf() " + "call:\nreason: " + << e.what() << "\ninfo: " << info; + } + } catch (sycl::exception const &e) { + is_exception_caught = true; + error_msg << "Unexpected SYCL exception caught during geqrf() call:\n" + << e.what(); + } + + if (is_exception_caught) // an unexpected error occurs + { + if (scratchpad != nullptr) { + sycl::free(scratchpad, exec_q); + } + throw std::runtime_error(error_msg.str()); + } + + sycl::event clean_up_event = exec_q.submit([&](sycl::handler &cgh) { + cgh.depends_on(geqrf_event); + auto ctx = exec_q.get_context(); + cgh.host_task([ctx, scratchpad]() { sycl::free(scratchpad, ctx); }); + }); + host_task_events.push_back(clean_up_event); + + return geqrf_event; +} + +std::pair + geqrf(sycl::queue q, + dpctl::tensor::usm_ndarray a_array, + dpctl::tensor::usm_ndarray tau_array, + const std::vector &depends) +{ + const int a_array_nd = a_array.get_ndim(); + const int tau_array_nd = tau_array.get_ndim(); + + if (a_array_nd != 2) { + throw py::value_error( + "The input array has ndim=" + std::to_string(a_array_nd) + + ", but a 2-dimensional array is expected."); + } + + if (tau_array_nd != 1) { + throw py::value_error("The array of Householder scalars has ndim=" + + std::to_string(tau_array_nd) + + ", but a 1-dimensional array is expected."); + } + + // check compatibility of execution queue and allocation queue + if (!dpctl::utils::queues_are_compatible(q, {a_array, tau_array})) { + throw py::value_error( + "Execution queue is not compatible with allocation queues"); + } + + auto const &overlap = dpctl::tensor::overlap::MemoryOverlap(); + if (overlap(a_array, tau_array)) { + throw py::value_error( + "The input array and the array of Householder scalars " + "are overlapping segments of memory"); + } + + bool is_a_array_c_contig = a_array.is_c_contiguous(); + if (!is_a_array_c_contig) { + throw py::value_error("The input array " + "must be C-contiguous"); + } + + bool is_tau_array_c_contig = tau_array.is_c_contiguous(); + bool is_tau_array_f_contig = tau_array.is_f_contiguous(); + + if (!is_tau_array_c_contig || !is_tau_array_f_contig) { + throw py::value_error("The array of Householder scalars " + "must be contiguous"); + } + + auto array_types = dpctl_td_ns::usm_ndarray_types(); + int a_array_type_id = + array_types.typenum_to_lookup_id(a_array.get_typenum()); + int tau_array_type_id = + array_types.typenum_to_lookup_id(tau_array.get_typenum()); + + if (a_array_type_id != tau_array_type_id) { + throw py::value_error( + "The types of the input array and " + "the array of Householder scalars are mismatched"); + } + + geqrf_impl_fn_ptr_t geqrf_fn = geqrf_dispatch_vector[a_array_type_id]; + if (geqrf_fn == nullptr) { + throw py::value_error( + "No geqrf implementation defined for the provided type " + "of the input matrix."); + } + + char *a_array_data = a_array.get_data(); + char *tau_array_data = tau_array.get_data(); + + const py::ssize_t *a_array_shape = a_array.get_shape_raw(); + + // The input array is transponded + // Change the order of getting m, n + const std::int64_t m = a_array_shape[1]; + const std::int64_t n = a_array_shape[0]; + const std::int64_t lda = std::max(1UL, m); + + const size_t tau_array_size = tau_array.get_size(); + const size_t min_m_n = std::max(1UL, std::min(m, n)); + + if (tau_array_size != min_m_n) { + throw py::value_error("The array of Householder scalars has size=" + + std::to_string(tau_array_size) + ", but a size=" + + std::to_string(min_m_n) + " array is expected."); + } + + std::vector host_task_events; + sycl::event geqrf_ev = geqrf_fn(q, m, n, a_array_data, lda, tau_array_data, + host_task_events, depends); + + sycl::event args_ev = dpctl::utils::keep_args_alive(q, {a_array, tau_array}, + host_task_events); + + return std::make_pair(args_ev, geqrf_ev); +} + +template +struct GeqrfContigFactory +{ + fnT get() + { + if constexpr (types::GeqrfTypePairSupportFactory::is_defined) { + return geqrf_impl; + } + else { + return nullptr; + } + } +}; + +void init_geqrf_dispatch_vector(void) +{ + dpctl_td_ns::DispatchVectorBuilder + contig; + contig.populate_dispatch_vector(geqrf_dispatch_vector); +} +} // namespace lapack +} // namespace ext +} // namespace backend +} // namespace dpnp diff --git a/dpnp/backend/extensions/lapack/geqrf.hpp b/dpnp/backend/extensions/lapack/geqrf.hpp new file mode 100644 index 00000000000..4ab65286b29 --- /dev/null +++ b/dpnp/backend/extensions/lapack/geqrf.hpp @@ -0,0 +1,63 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include +#include + +#include + +namespace dpnp +{ +namespace backend +{ +namespace ext +{ +namespace lapack +{ +extern std::pair + geqrf(sycl::queue exec_q, + dpctl::tensor::usm_ndarray a_array, + dpctl::tensor::usm_ndarray tau_array, + const std::vector &depends = {}); + +extern std::pair + geqrf_batch(sycl::queue exec_q, + dpctl::tensor::usm_ndarray a_array, + dpctl::tensor::usm_ndarray tau_array, + std::int64_t m, + std::int64_t n, + std::int64_t stride_a, + std::int64_t stride_tau, + std::int64_t batch_size, + const std::vector &depends = {}); + +extern void init_geqrf_batch_dispatch_vector(void); +extern void init_geqrf_dispatch_vector(void); +} // namespace lapack +} // namespace ext +} // namespace backend +} // namespace dpnp diff --git a/dpnp/backend/extensions/lapack/geqrf_batch.cpp b/dpnp/backend/extensions/lapack/geqrf_batch.cpp new file mode 100644 index 00000000000..a4fe980a539 --- /dev/null +++ b/dpnp/backend/extensions/lapack/geqrf_batch.cpp @@ -0,0 +1,273 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include + +// dpctl tensor headers +#include "utils/memory_overlap.hpp" +#include "utils/type_utils.hpp" + +#include "geqrf.hpp" +#include "types_matrix.hpp" + +#include "dpnp_utils.hpp" + +namespace dpnp +{ +namespace backend +{ +namespace ext +{ +namespace lapack +{ +namespace mkl_lapack = oneapi::mkl::lapack; +namespace py = pybind11; +namespace type_utils = dpctl::tensor::type_utils; + +typedef sycl::event (*geqrf_batch_impl_fn_ptr_t)( + sycl::queue, + std::int64_t, + std::int64_t, + char *, + std::int64_t, + std::int64_t, + char *, + std::int64_t, + std::int64_t, + std::vector &, + const std::vector &); + +static geqrf_batch_impl_fn_ptr_t + geqrf_batch_dispatch_vector[dpctl_td_ns::num_types]; + +template +static sycl::event geqrf_batch_impl(sycl::queue exec_q, + std::int64_t m, + std::int64_t n, + char *in_a, + std::int64_t lda, + std::int64_t stride_a, + char *in_tau, + std::int64_t stride_tau, + std::int64_t batch_size, + std::vector &host_task_events, + const std::vector &depends) +{ + type_utils::validate_type_for_device(exec_q); + + T *a = reinterpret_cast(in_a); + T *tau = reinterpret_cast(in_tau); + + const std::int64_t scratchpad_size = + mkl_lapack::geqrf_batch_scratchpad_size(exec_q, m, n, lda, stride_a, + stride_tau, batch_size); + T *scratchpad = nullptr; + + std::stringstream error_msg; + std::int64_t info = 0; + bool is_exception_caught = false; + + sycl::event geqrf_batch_event; + try { + scratchpad = sycl::malloc_device(scratchpad_size, exec_q); + + geqrf_batch_event = mkl_lapack::geqrf_batch( + exec_q, + m, // The number of rows in each matrix in the batch; (0 ≤ m). + // It must be a non-negative integer. + n, // The number of columns in each matrix in the batch; (0 ≤ n). + // It must be a non-negative integer. + a, // Pointer to the batch of matrices, each of size (m x n). + lda, // The leading dimension of each matrix in the batch. + // For row major layout, lda ≥ max(1, m). + stride_a, // Stride between consecutive matrices in the batch. + tau, // Pointer to the array of scalar factors of the elementary + // reflectors for each matrix in the batch. + stride_tau, // Stride between arrays of scalar factors in the batch. + batch_size, // The number of matrices in the batch. + scratchpad, // Pointer to scratchpad memory to be used by MKL + // routine for storing intermediate results. + scratchpad_size, depends); + } catch (mkl_lapack::exception const &e) { + is_exception_caught = true; + info = e.info(); + + if (info < 0) { + error_msg << "Parameter number " << -info + << " had an illegal value."; + } + else if (info == scratchpad_size && e.detail() != 0) { + error_msg + << "Insufficient scratchpad size. Required size is at least " + << e.detail() << ", but current size is " << scratchpad_size + << "."; + } + else if (info != 0 && e.detail() == 0) { + error_msg << "Error in batch processing. " + "Number of failed calculations: " + << info; + } + else { + error_msg << "Unexpected MKL exception caught during geqrf_batch() " + "call:\nreason: " + << e.what() << "\ninfo: " << e.info(); + } + } catch (sycl::exception const &e) { + is_exception_caught = true; + error_msg + << "Unexpected SYCL exception caught during geqrf_batch() call:\n" + << e.what(); + } + + if (is_exception_caught) // an unexpected error occurs + { + if (scratchpad != nullptr) { + sycl::free(scratchpad, exec_q); + } + + throw std::runtime_error(error_msg.str()); + } + + sycl::event clean_up_event = exec_q.submit([&](sycl::handler &cgh) { + cgh.depends_on(geqrf_batch_event); + auto ctx = exec_q.get_context(); + cgh.host_task([ctx, scratchpad]() { sycl::free(scratchpad, ctx); }); + }); + host_task_events.push_back(clean_up_event); + return geqrf_batch_event; +} + +std::pair + geqrf_batch(sycl::queue q, + dpctl::tensor::usm_ndarray a_array, + dpctl::tensor::usm_ndarray tau_array, + std::int64_t m, + std::int64_t n, + std::int64_t stride_a, + std::int64_t stride_tau, + std::int64_t batch_size, + const std::vector &depends) +{ + const int a_array_nd = a_array.get_ndim(); + const int tau_array_nd = tau_array.get_ndim(); + + if (a_array_nd < 3) { + throw py::value_error( + "The input array has ndim=" + std::to_string(a_array_nd) + + ", but an array with ndim >= 3 is expected."); + } + + if (tau_array_nd != 2) { + throw py::value_error("The array of Householder scalars has ndim=" + + std::to_string(tau_array_nd) + + ", but a 2-dimensional array is expected."); + } + + // check compatibility of execution queue and allocation queue + if (!dpctl::utils::queues_are_compatible(q, {a_array, tau_array})) { + throw py::value_error( + "Execution queue is not compatible with allocation queues"); + } + + auto const &overlap = dpctl::tensor::overlap::MemoryOverlap(); + if (overlap(a_array, tau_array)) { + throw py::value_error( + "The input array and the array of Householder scalars " + "are overlapping segments of memory"); + } + + bool is_a_array_c_contig = a_array.is_c_contiguous(); + bool is_tau_array_c_contig = tau_array.is_c_contiguous(); + if (!is_a_array_c_contig) { + throw py::value_error("The input array " + "must be C-contiguous"); + } + if (!is_tau_array_c_contig) { + throw py::value_error("The array of Householder scalars " + "must be C-contiguous"); + } + + auto array_types = dpctl_td_ns::usm_ndarray_types(); + int a_array_type_id = + array_types.typenum_to_lookup_id(a_array.get_typenum()); + int tau_array_type_id = + array_types.typenum_to_lookup_id(tau_array.get_typenum()); + + if (a_array_type_id != tau_array_type_id) { + throw py::value_error( + "The types of the input array and " + "the array of Householder scalars are mismatched"); + } + + geqrf_batch_impl_fn_ptr_t geqrf_batch_fn = + geqrf_batch_dispatch_vector[a_array_type_id]; + if (geqrf_batch_fn == nullptr) { + throw py::value_error( + "No geqrf_batch implementation defined for the provided type " + "of the input matrix."); + } + + char *a_array_data = a_array.get_data(); + char *tau_array_data = tau_array.get_data(); + + const std::int64_t lda = std::max(1UL, m); + + std::vector host_task_events; + sycl::event geqrf_batch_ev = + geqrf_batch_fn(q, m, n, a_array_data, lda, stride_a, tau_array_data, + stride_tau, batch_size, host_task_events, depends); + + sycl::event args_ev = dpctl::utils::keep_args_alive(q, {a_array, tau_array}, + host_task_events); + + return std::make_pair(args_ev, geqrf_batch_ev); +} + +template +struct GeqrfBatchContigFactory +{ + fnT get() + { + if constexpr (types::GeqrfBatchTypePairSupportFactory::is_defined) { + return geqrf_batch_impl; + } + else { + return nullptr; + } + } +}; + +void init_geqrf_batch_dispatch_vector(void) +{ + dpctl_td_ns::DispatchVectorBuilder + contig; + contig.populate_dispatch_vector(geqrf_batch_dispatch_vector); +} +} // namespace lapack +} // namespace ext +} // namespace backend +} // namespace dpnp diff --git a/dpnp/backend/extensions/lapack/lapack_py.cpp b/dpnp/backend/extensions/lapack/lapack_py.cpp index 0c76d0fc096..eb815ac9f6b 100644 --- a/dpnp/backend/extensions/lapack/lapack_py.cpp +++ b/dpnp/backend/extensions/lapack/lapack_py.cpp @@ -30,14 +30,17 @@ #include #include +#include "geqrf.hpp" #include "gesv.hpp" #include "gesvd.hpp" #include "getrf.hpp" #include "getri.hpp" #include "heevd.hpp" #include "linalg_exceptions.hpp" +#include "orgqr.hpp" #include "potrf.hpp" #include "syevd.hpp" +#include "ungqr.hpp" namespace lapack_ext = dpnp::backend::ext::lapack; namespace py = pybind11; @@ -45,13 +48,19 @@ namespace py = pybind11; // populate dispatch vectors void init_dispatch_vectors(void) { + lapack_ext::init_geqrf_batch_dispatch_vector(); + lapack_ext::init_geqrf_dispatch_vector(); lapack_ext::init_gesv_dispatch_vector(); lapack_ext::init_getrf_batch_dispatch_vector(); lapack_ext::init_getrf_dispatch_vector(); lapack_ext::init_getri_batch_dispatch_vector(); + lapack_ext::init_orgqr_batch_dispatch_vector(); + lapack_ext::init_orgqr_dispatch_vector(); lapack_ext::init_potrf_batch_dispatch_vector(); lapack_ext::init_potrf_dispatch_vector(); lapack_ext::init_syevd_dispatch_vector(); + lapack_ext::init_ungqr_batch_dispatch_vector(); + lapack_ext::init_ungqr_dispatch_vector(); } // populate dispatch tables @@ -71,6 +80,20 @@ PYBIND11_MODULE(_lapack_impl, m) init_dispatch_vectors(); init_dispatch_tables(); + m.def("_geqrf_batch", &lapack_ext::geqrf_batch, + "Call `geqrf_batch` from OneMKL LAPACK library to return " + "the QR factorization of a batch general matrix ", + py::arg("sycl_queue"), py::arg("a_array"), py::arg("tau_array"), + py::arg("m"), py::arg("n"), py::arg("stride_a"), + py::arg("stride_tau"), py::arg("batch_size"), + py::arg("depends") = py::list()); + + m.def("_geqrf", &lapack_ext::geqrf, + "Call `geqrf` from OneMKL LAPACK library to return " + "the QR factorization of a general m x n matrix ", + py::arg("sycl_queue"), py::arg("a_array"), py::arg("tau_array"), + py::arg("depends") = py::list()); + m.def("_gesv", &lapack_ext::gesv, "Call `gesv` from OneMKL LAPACK library to return " "the solution of a system of linear equations with " @@ -114,6 +137,22 @@ PYBIND11_MODULE(_lapack_impl, m) py::arg("eig_vecs"), py::arg("eig_vals"), py::arg("depends") = py::list()); + m.def("_orgqr_batch", &lapack_ext::orgqr_batch, + "Call `_orgqr_batch` from OneMKL LAPACK library to return " + "the real orthogonal matrix Qi of the QR factorization " + "for a batch of general matrices", + py::arg("sycl_queue"), py::arg("a_array"), py::arg("tau_array"), + py::arg("m"), py::arg("n"), py::arg("k"), py::arg("stride_a"), + py::arg("stride_tau"), py::arg("batch_size"), + py::arg("depends") = py::list()); + + m.def("_orgqr", &lapack_ext::orgqr, + "Call `orgqr` from OneMKL LAPACK library to return " + "the real orthogonal matrix Q of the QR factorization", + py::arg("sycl_queue"), py::arg("m"), py::arg("n"), py::arg("k"), + py::arg("a_array"), py::arg("tau_array"), + py::arg("depends") = py::list()); + m.def("_potrf", &lapack_ext::potrf, "Call `potrf` from OneMKL LAPACK library to return " "the Cholesky factorization of a symmetric positive-definite matrix", @@ -134,4 +173,20 @@ PYBIND11_MODULE(_lapack_impl, m) py::arg("sycl_queue"), py::arg("jobz"), py::arg("upper_lower"), py::arg("eig_vecs"), py::arg("eig_vals"), py::arg("depends") = py::list()); + + m.def("_ungqr_batch", &lapack_ext::ungqr_batch, + "Call `_ungqr_batch` from OneMKL LAPACK library to return " + "the complex unitary matrices matrix Qi of the QR factorization " + "for a batch of general matrices", + py::arg("sycl_queue"), py::arg("a_array"), py::arg("tau_array"), + py::arg("m"), py::arg("n"), py::arg("k"), py::arg("stride_a"), + py::arg("stride_tau"), py::arg("batch_size"), + py::arg("depends") = py::list()); + + m.def("_ungqr", &lapack_ext::ungqr, + "Call `ungqr` from OneMKL LAPACK library to return " + "the complex unitary matrix Q of the QR factorization", + py::arg("sycl_queue"), py::arg("m"), py::arg("n"), py::arg("k"), + py::arg("a_array"), py::arg("tau_array"), + py::arg("depends") = py::list()); } diff --git a/dpnp/backend/extensions/lapack/orgqr.cpp b/dpnp/backend/extensions/lapack/orgqr.cpp new file mode 100644 index 00000000000..22cbbe05bee --- /dev/null +++ b/dpnp/backend/extensions/lapack/orgqr.cpp @@ -0,0 +1,263 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include + +// dpctl tensor headers +#include "utils/memory_overlap.hpp" +#include "utils/type_utils.hpp" + +#include "orgqr.hpp" +#include "types_matrix.hpp" + +#include "dpnp_utils.hpp" + +namespace dpnp +{ +namespace backend +{ +namespace ext +{ +namespace lapack +{ +namespace mkl_lapack = oneapi::mkl::lapack; +namespace py = pybind11; +namespace type_utils = dpctl::tensor::type_utils; + +typedef sycl::event (*orgqr_impl_fn_ptr_t)(sycl::queue, + const std::int64_t, + const std::int64_t, + const std::int64_t, + char *, + std::int64_t, + char *, + std::vector &, + const std::vector &); + +static orgqr_impl_fn_ptr_t orgqr_dispatch_vector[dpctl_td_ns::num_types]; + +template +static sycl::event orgqr_impl(sycl::queue exec_q, + const std::int64_t m, + const std::int64_t n, + const std::int64_t k, + char *in_a, + std::int64_t lda, + char *in_tau, + std::vector &host_task_events, + const std::vector &depends) +{ + type_utils::validate_type_for_device(exec_q); + + T *a = reinterpret_cast(in_a); + T *tau = reinterpret_cast(in_tau); + + const std::int64_t scratchpad_size = + mkl_lapack::orgqr_scratchpad_size(exec_q, m, n, k, lda); + T *scratchpad = nullptr; + + std::stringstream error_msg; + std::int64_t info = 0; + bool is_exception_caught = false; + + sycl::event orgqr_event; + try { + scratchpad = sycl::malloc_device(scratchpad_size, exec_q); + + orgqr_event = mkl_lapack::orgqr( + exec_q, + m, // The number of rows in the matrix; (0 ≤ m). + n, // The number of columns in the matrix; (0 ≤ n). + k, // The number of elementary reflectors + // whose product defines the matrix Q; (0 ≤ k ≤ n). + a, // Pointer to the m-by-n matrix. + lda, // The leading dimension of `a`; (1 ≤ m). + tau, // Pointer to the array of scalar factors of the + // elementary reflectors. + scratchpad, // Pointer to scratchpad memory to be used by MKL + // routine for storing intermediate results. + scratchpad_size, depends); + } catch (mkl_lapack::exception const &e) { + is_exception_caught = true; + info = e.info(); + + if (info < 0) { + error_msg << "Parameter number " << -info + << " had an illegal value."; + } + else if (info == scratchpad_size && e.detail() != 0) { + error_msg + << "Insufficient scratchpad size. Required size is at least " + << e.detail() << ", but current size is " << scratchpad_size + << "."; + } + else { + error_msg << "Unexpected MKL exception caught during orgqr() " + "call:\nreason: " + << e.what() << "\ninfo: " << info; + } + } catch (sycl::exception const &e) { + is_exception_caught = true; + error_msg << "Unexpected SYCL exception caught during orfqr() call:\n" + << e.what(); + } + + if (is_exception_caught) // an unexpected error occurs + { + if (scratchpad != nullptr) { + sycl::free(scratchpad, exec_q); + } + throw std::runtime_error(error_msg.str()); + } + + sycl::event clean_up_event = exec_q.submit([&](sycl::handler &cgh) { + cgh.depends_on(orgqr_event); + auto ctx = exec_q.get_context(); + cgh.host_task([ctx, scratchpad]() { sycl::free(scratchpad, ctx); }); + }); + host_task_events.push_back(clean_up_event); + + return orgqr_event; +} + +std::pair + orgqr(sycl::queue q, + const std::int64_t m, + const std::int64_t n, + const std::int64_t k, + dpctl::tensor::usm_ndarray a_array, + dpctl::tensor::usm_ndarray tau_array, + const std::vector &depends) +{ + const int a_array_nd = a_array.get_ndim(); + const int tau_array_nd = tau_array.get_ndim(); + + if (a_array_nd != 2) { + throw py::value_error( + "The input array has ndim=" + std::to_string(a_array_nd) + + ", but a 2-dimensional array is expected."); + } + + if (tau_array_nd != 1) { + throw py::value_error("The array of Householder scalars has ndim=" + + std::to_string(tau_array_nd) + + ", but a 1-dimensional array is expected."); + } + + // check compatibility of execution queue and allocation queue + if (!dpctl::utils::queues_are_compatible(q, {a_array, tau_array})) { + throw py::value_error( + "Execution queue is not compatible with allocation queues"); + } + + auto const &overlap = dpctl::tensor::overlap::MemoryOverlap(); + if (overlap(a_array, tau_array)) { + throw py::value_error( + "The input array and the array of Householder scalars " + "are overlapping segments of memory"); + } + + bool is_a_array_c_contig = a_array.is_c_contiguous(); + if (!is_a_array_c_contig) { + throw py::value_error("The input array " + "must be C-contiguous"); + } + + bool is_tau_array_c_contig = tau_array.is_c_contiguous(); + bool is_tau_array_f_contig = tau_array.is_f_contiguous(); + + if (!is_tau_array_c_contig || !is_tau_array_f_contig) { + throw py::value_error("The array of Householder scalars " + "must be contiguous"); + } + + const size_t tau_array_size = tau_array.get_size(); + + if (static_cast(tau_array_size) != k) { + throw py::value_error("The array of Householder scalars has size=" + + std::to_string(tau_array_size) + + ", but an array of size=" + std::to_string(k) + + " is expected."); + } + + auto array_types = dpctl_td_ns::usm_ndarray_types(); + int a_array_type_id = + array_types.typenum_to_lookup_id(a_array.get_typenum()); + int tau_array_type_id = + array_types.typenum_to_lookup_id(tau_array.get_typenum()); + + if (a_array_type_id != tau_array_type_id) { + throw py::value_error( + "The types of the input array and " + "the array of Householder scalars are mismatched"); + } + + orgqr_impl_fn_ptr_t orgqr_fn = orgqr_dispatch_vector[a_array_type_id]; + if (orgqr_fn == nullptr) { + throw py::value_error( + "No orgqr implementation defined for the provided type " + "of the input matrix."); + } + + char *a_array_data = a_array.get_data(); + const std::int64_t lda = std::max(1UL, m); + + char *tau_array_data = tau_array.get_data(); + + std::vector host_task_events; + sycl::event orgqr_ev = orgqr_fn(q, m, n, k, a_array_data, lda, + tau_array_data, host_task_events, depends); + + sycl::event args_ev = dpctl::utils::keep_args_alive(q, {a_array, tau_array}, + host_task_events); + + return std::make_pair(args_ev, orgqr_ev); +} + +template +struct OrgqrContigFactory +{ + fnT get() + { + if constexpr (types::OrgqrTypePairSupportFactory::is_defined) { + return orgqr_impl; + } + else { + return nullptr; + } + } +}; + +void init_orgqr_dispatch_vector(void) +{ + dpctl_td_ns::DispatchVectorBuilder + contig; + contig.populate_dispatch_vector(orgqr_dispatch_vector); +} +} // namespace lapack +} // namespace ext +} // namespace backend +} // namespace dpnp diff --git a/dpnp/backend/extensions/lapack/orgqr.hpp b/dpnp/backend/extensions/lapack/orgqr.hpp new file mode 100644 index 00000000000..9cc4f530d03 --- /dev/null +++ b/dpnp/backend/extensions/lapack/orgqr.hpp @@ -0,0 +1,67 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include +#include + +#include + +namespace dpnp +{ +namespace backend +{ +namespace ext +{ +namespace lapack +{ +extern std::pair + orgqr(sycl::queue exec_q, + const std::int64_t m, + const std::int64_t n, + const std::int64_t k, + dpctl::tensor::usm_ndarray a_array, + dpctl::tensor::usm_ndarray tau_array, + const std::vector &depends = {}); + +extern std::pair + orgqr_batch(sycl::queue exec_q, + dpctl::tensor::usm_ndarray a_array, + dpctl::tensor::usm_ndarray tau_array, + std::int64_t m, + std::int64_t n, + std::int64_t k, + std::int64_t stride_a, + std::int64_t stride_tau, + std::int64_t batch_size, + const std::vector &depends = {}); + +extern void init_orgqr_batch_dispatch_vector(void); +extern void init_orgqr_dispatch_vector(void); +} // namespace lapack +} // namespace ext +} // namespace backend +} // namespace dpnp diff --git a/dpnp/backend/extensions/lapack/orgqr_batch.cpp b/dpnp/backend/extensions/lapack/orgqr_batch.cpp new file mode 100644 index 00000000000..dfa9932a8e0 --- /dev/null +++ b/dpnp/backend/extensions/lapack/orgqr_batch.cpp @@ -0,0 +1,278 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include + +// dpctl tensor headers +#include "utils/memory_overlap.hpp" +#include "utils/type_utils.hpp" + +#include "orgqr.hpp" +#include "types_matrix.hpp" + +#include "dpnp_utils.hpp" + +namespace dpnp +{ +namespace backend +{ +namespace ext +{ +namespace lapack +{ +namespace mkl_lapack = oneapi::mkl::lapack; +namespace py = pybind11; +namespace type_utils = dpctl::tensor::type_utils; + +typedef sycl::event (*orgqr_batch_impl_fn_ptr_t)( + sycl::queue, + std::int64_t, + std::int64_t, + std::int64_t, + char *, + std::int64_t, + std::int64_t, + char *, + std::int64_t, + std::int64_t, + std::vector &, + const std::vector &); + +static orgqr_batch_impl_fn_ptr_t + orgqr_batch_dispatch_vector[dpctl_td_ns::num_types]; + +template +static sycl::event orgqr_batch_impl(sycl::queue exec_q, + std::int64_t m, + std::int64_t n, + std::int64_t k, + char *in_a, + std::int64_t lda, + std::int64_t stride_a, + char *in_tau, + std::int64_t stride_tau, + std::int64_t batch_size, + std::vector &host_task_events, + const std::vector &depends) +{ + type_utils::validate_type_for_device(exec_q); + + T *a = reinterpret_cast(in_a); + T *tau = reinterpret_cast(in_tau); + + const std::int64_t scratchpad_size = + mkl_lapack::orgqr_batch_scratchpad_size( + exec_q, m, n, k, lda, stride_a, stride_tau, batch_size); + T *scratchpad = nullptr; + + std::stringstream error_msg; + std::int64_t info = 0; + bool is_exception_caught = false; + + sycl::event orgqr_batch_event; + try { + scratchpad = sycl::malloc_device(scratchpad_size, exec_q); + + orgqr_batch_event = mkl_lapack::orgqr_batch( + exec_q, + m, // The number of rows in each matrix in the batch; (0 ≤ m). + // It must be a non-negative integer. + n, // The number of columns in each matrix in the batch; (0 ≤ n). + // It must be a non-negative integer. + k, // The number of elementary reflectors + // whose product defines the matrices Qi; (0 ≤ k ≤ n). + a, // Pointer to the batch of matrices, each of size (m x n). + lda, // The leading dimension of each matrix in the batch. + // For row major layout, lda ≥ max(1, m). + stride_a, // Stride between consecutive matrices in the batch. + tau, // Pointer to the array of scalar factors of the elementary + // reflectors for each matrix in the batch. + stride_tau, // Stride between arrays of scalar factors in the batch. + batch_size, // The number of matrices in the batch. + scratchpad, // Pointer to scratchpad memory to be used by MKL + // routine for storing intermediate results. + scratchpad_size, depends); + } catch (mkl_lapack::exception const &e) { + is_exception_caught = true; + info = e.info(); + + if (info < 0) { + error_msg << "Parameter number " << -info + << " had an illegal value."; + } + else if (info == scratchpad_size && e.detail() != 0) { + error_msg + << "Insufficient scratchpad size. Required size is at least " + << e.detail() << ", but current size is " << scratchpad_size + << "."; + } + else if (info != 0 && e.detail() == 0) { + error_msg << "Error in batch processing. " + "Number of failed calculations: " + << info; + } + else { + error_msg << "Unexpected MKL exception caught during orgqr_batch() " + "call:\nreason: " + << e.what() << "\ninfo: " << e.info(); + } + } catch (sycl::exception const &e) { + is_exception_caught = true; + error_msg + << "Unexpected SYCL exception caught during orgqr_batch() call:\n" + << e.what(); + } + + if (is_exception_caught) // an unexpected error occurs + { + if (scratchpad != nullptr) { + sycl::free(scratchpad, exec_q); + } + + throw std::runtime_error(error_msg.str()); + } + + sycl::event clean_up_event = exec_q.submit([&](sycl::handler &cgh) { + cgh.depends_on(orgqr_batch_event); + auto ctx = exec_q.get_context(); + cgh.host_task([ctx, scratchpad]() { sycl::free(scratchpad, ctx); }); + }); + host_task_events.push_back(clean_up_event); + return orgqr_batch_event; +} + +std::pair + orgqr_batch(sycl::queue q, + dpctl::tensor::usm_ndarray a_array, + dpctl::tensor::usm_ndarray tau_array, + std::int64_t m, + std::int64_t n, + std::int64_t k, + std::int64_t stride_a, + std::int64_t stride_tau, + std::int64_t batch_size, + const std::vector &depends) +{ + const int a_array_nd = a_array.get_ndim(); + const int tau_array_nd = tau_array.get_ndim(); + + if (a_array_nd < 3) { + throw py::value_error( + "The input array has ndim=" + std::to_string(a_array_nd) + + ", but an array with ndim >= 3 is expected."); + } + + if (tau_array_nd != 2) { + throw py::value_error("The array of Householder scalars has ndim=" + + std::to_string(tau_array_nd) + + ", but a 2-dimensional array is expected."); + } + + // check compatibility of execution queue and allocation queue + if (!dpctl::utils::queues_are_compatible(q, {a_array, tau_array})) { + throw py::value_error( + "Execution queue is not compatible with allocation queues"); + } + + auto const &overlap = dpctl::tensor::overlap::MemoryOverlap(); + if (overlap(a_array, tau_array)) { + throw py::value_error( + "The input array and the array of Householder scalars " + "are overlapping segments of memory"); + } + + bool is_a_array_c_contig = a_array.is_c_contiguous(); + bool is_tau_array_c_contig = tau_array.is_c_contiguous(); + if (!is_a_array_c_contig) { + throw py::value_error("The input array " + "must be C-contiguous"); + } + if (!is_tau_array_c_contig) { + throw py::value_error("The array of Householder scalars " + "must be C-contiguous"); + } + + auto array_types = dpctl_td_ns::usm_ndarray_types(); + int a_array_type_id = + array_types.typenum_to_lookup_id(a_array.get_typenum()); + int tau_array_type_id = + array_types.typenum_to_lookup_id(tau_array.get_typenum()); + + if (a_array_type_id != tau_array_type_id) { + throw py::value_error( + "The types of the input array and " + "the array of Householder scalars are mismatched"); + } + + orgqr_batch_impl_fn_ptr_t orgqr_batch_fn = + orgqr_batch_dispatch_vector[a_array_type_id]; + if (orgqr_batch_fn == nullptr) { + throw py::value_error( + "No orgqr_batch implementation defined for the provided type " + "of the input matrix."); + } + + char *a_array_data = a_array.get_data(); + char *tau_array_data = tau_array.get_data(); + + const std::int64_t lda = std::max(1UL, m); + + std::vector host_task_events; + sycl::event orgqr_batch_ev = + orgqr_batch_fn(q, m, n, k, a_array_data, lda, stride_a, tau_array_data, + stride_tau, batch_size, host_task_events, depends); + + sycl::event args_ev = dpctl::utils::keep_args_alive(q, {a_array, tau_array}, + host_task_events); + + return std::make_pair(args_ev, orgqr_batch_ev); +} + +template +struct OrgqrBatchContigFactory +{ + fnT get() + { + if constexpr (types::OrgqrBatchTypePairSupportFactory::is_defined) { + return orgqr_batch_impl; + } + else { + return nullptr; + } + } +}; + +void init_orgqr_batch_dispatch_vector(void) +{ + dpctl_td_ns::DispatchVectorBuilder + contig; + contig.populate_dispatch_vector(orgqr_batch_dispatch_vector); +} +} // namespace lapack +} // namespace ext +} // namespace backend +} // namespace dpnp diff --git a/dpnp/backend/extensions/lapack/types_matrix.hpp b/dpnp/backend/extensions/lapack/types_matrix.hpp index 893619e6afb..9a0ab36c8a4 100644 --- a/dpnp/backend/extensions/lapack/types_matrix.hpp +++ b/dpnp/backend/extensions/lapack/types_matrix.hpp @@ -43,6 +43,61 @@ namespace lapack { namespace types { +/** + * @brief A factory to define pairs of supported types for which + * MKL LAPACK library provides support in oneapi::mkl::lapack::geqrf_batch + * function. + * + * @tparam T Type of array containing the input matrices to be QR factorized in + * batch mode. Upon execution, each matrix in the batch is transformed to output + * arrays representing their respective orthogonal matrix Q and upper triangular + * matrix R. + */ +template +struct GeqrfBatchTypePairSupportFactory +{ + static constexpr bool is_defined = std::disjunction< + dpctl_td_ns::TypePairDefinedEntry, + dpctl_td_ns::TypePairDefinedEntry, + dpctl_td_ns::TypePairDefinedEntry, + T, + std::complex>, + dpctl_td_ns::TypePairDefinedEntry, + T, + std::complex>, + // fall-through + dpctl_td_ns::NotDefinedEntry>::is_defined; +}; + +/** + * @brief A factory to define pairs of supported types for which + * MKL LAPACK library provides support in oneapi::mkl::lapack::geqrf + * function. + * + * @tparam T Type of array containing the input matrix to be QR factorized. + * Upon execution, this matrix is transformed to output arrays representing + * the orthogonal matrix Q and the upper triangular matrix R. + */ +template +struct GeqrfTypePairSupportFactory +{ + static constexpr bool is_defined = std::disjunction< + dpctl_td_ns::TypePairDefinedEntry, + dpctl_td_ns::TypePairDefinedEntry, + dpctl_td_ns::TypePairDefinedEntry, + T, + std::complex>, + dpctl_td_ns::TypePairDefinedEntry, + T, + std::complex>, + // fall-through + dpctl_td_ns::NotDefinedEntry>::is_defined; +}; + /** * @brief A factory to define pairs of supported types for which * MKL LAPACK library provides support in oneapi::mkl::lapack::gesv @@ -190,6 +245,46 @@ struct HeevdTypePairSupportFactory dpctl_td_ns::NotDefinedEntry>::is_defined; }; +/** + * @brief A factory to define pairs of supported types for which + * MKL LAPACK library provides support in oneapi::mkl::lapack::orgqr_batch + * function. + * + * @tparam T Type of array containing the matrix A, + * each from a separate instance in the batch, from which the + * elementary reflectors were generated (as in QR factorization). + * Upon execution, each array in the batch is overwritten with + * its respective orthonormal matrix Q. + */ +template +struct OrgqrBatchTypePairSupportFactory +{ + static constexpr bool is_defined = std::disjunction< + dpctl_td_ns::TypePairDefinedEntry, + dpctl_td_ns::TypePairDefinedEntry, + // fall-through + dpctl_td_ns::NotDefinedEntry>::is_defined; +}; + +/** + * @brief A factory to define pairs of supported types for which + * MKL LAPACK library provides support in oneapi::mkl::lapack::orgqr + * function. + * + * @tparam T Type of array containing the matrix A from which the + * elementary reflectors were generated (as in QR factorization). + * Upon execution, the array is overwritten with the orthonormal matrix Q. + */ +template +struct OrgqrTypePairSupportFactory +{ + static constexpr bool is_defined = std::disjunction< + dpctl_td_ns::TypePairDefinedEntry, + dpctl_td_ns::TypePairDefinedEntry, + // fall-through + dpctl_td_ns::NotDefinedEntry>::is_defined; +}; + /** * @brief A factory to define pairs of supported types for which * MKL LAPACK library provides support in oneapi::mkl::lapack::potrf @@ -259,6 +354,58 @@ struct SyevdTypePairSupportFactory // fall-through dpctl_td_ns::NotDefinedEntry>::is_defined; }; + +/** + * @brief A factory to define pairs of supported types for which + * MKL LAPACK library provides support in oneapi::mkl::lapack::ungqr_batch + * function. + * + * @tparam T Type of array containing the matrix A, + * each from a separate instance in the batch, from which the + * elementary reflectors were generated (as in QR factorization). + * Upon execution, each array in the batch is overwritten with + * its respective complex unitary matrix Q. + */ +template +struct UngqrBatchTypePairSupportFactory +{ + static constexpr bool is_defined = std::disjunction< + dpctl_td_ns::TypePairDefinedEntry, + T, + std::complex>, + dpctl_td_ns::TypePairDefinedEntry, + T, + std::complex>, + // fall-through + dpctl_td_ns::NotDefinedEntry>::is_defined; +}; + +/** + * @brief A factory to define pairs of supported types for which + * MKL LAPACK library provides support in oneapi::mkl::lapack::ungqr + * function. + * + * @tparam T Type of array containing the matrix A from which the + * elementary reflectors were generated (as in QR factorization). + * Upon execution, the array is overwritten with the complex unitary matrix Q. + */ +template +struct UngqrTypePairSupportFactory +{ + static constexpr bool is_defined = std::disjunction< + dpctl_td_ns::TypePairDefinedEntry, + T, + std::complex>, + dpctl_td_ns::TypePairDefinedEntry, + T, + std::complex>, + // fall-through + dpctl_td_ns::NotDefinedEntry>::is_defined; +}; } // namespace types } // namespace lapack } // namespace ext diff --git a/dpnp/backend/extensions/lapack/ungqr.cpp b/dpnp/backend/extensions/lapack/ungqr.cpp new file mode 100644 index 00000000000..7c8dea4e950 --- /dev/null +++ b/dpnp/backend/extensions/lapack/ungqr.cpp @@ -0,0 +1,263 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include + +// dpctl tensor headers +#include "utils/memory_overlap.hpp" +#include "utils/type_utils.hpp" + +#include "types_matrix.hpp" +#include "ungqr.hpp" + +#include "dpnp_utils.hpp" + +namespace dpnp +{ +namespace backend +{ +namespace ext +{ +namespace lapack +{ +namespace mkl_lapack = oneapi::mkl::lapack; +namespace py = pybind11; +namespace type_utils = dpctl::tensor::type_utils; + +typedef sycl::event (*ungqr_impl_fn_ptr_t)(sycl::queue, + const std::int64_t, + const std::int64_t, + const std::int64_t, + char *, + std::int64_t, + char *, + std::vector &, + const std::vector &); + +static ungqr_impl_fn_ptr_t ungqr_dispatch_vector[dpctl_td_ns::num_types]; + +template +static sycl::event ungqr_impl(sycl::queue exec_q, + const std::int64_t m, + const std::int64_t n, + const std::int64_t k, + char *in_a, + std::int64_t lda, + char *in_tau, + std::vector &host_task_events, + const std::vector &depends) +{ + type_utils::validate_type_for_device(exec_q); + + T *a = reinterpret_cast(in_a); + T *tau = reinterpret_cast(in_tau); + + const std::int64_t scratchpad_size = + mkl_lapack::ungqr_scratchpad_size(exec_q, m, n, k, lda); + T *scratchpad = nullptr; + + std::stringstream error_msg; + std::int64_t info = 0; + bool is_exception_caught = false; + + sycl::event ungqr_event; + try { + scratchpad = sycl::malloc_device(scratchpad_size, exec_q); + + ungqr_event = mkl_lapack::ungqr( + exec_q, + m, // The number of rows in the matrix; (0 ≤ m). + n, // The number of columns in the matrix; (0 ≤ n). + k, // The number of elementary reflectors + // whose product defines the matrix Q; (0 ≤ k ≤ n). + a, // Pointer to the m-by-n matrix. + lda, // The leading dimension of `a`; (1 ≤ m). + tau, // Pointer to the array of scalar factors of the + // elementary reflectors. + scratchpad, // Pointer to scratchpad memory to be used by MKL + // routine for storing intermediate results. + scratchpad_size, depends); + } catch (mkl_lapack::exception const &e) { + is_exception_caught = true; + info = e.info(); + + if (info < 0) { + error_msg << "Parameter number " << -info + << " had an illegal value."; + } + else if (info == scratchpad_size && e.detail() != 0) { + error_msg + << "Insufficient scratchpad size. Required size is at least " + << e.detail() << ", but current size is " << scratchpad_size + << "."; + } + else { + error_msg << "Unexpected MKL exception caught during ungqr() " + "call:\nreason: " + << e.what() << "\ninfo: " << info; + } + } catch (sycl::exception const &e) { + is_exception_caught = true; + error_msg << "Unexpected SYCL exception caught during orfqr() call:\n" + << e.what(); + } + + if (is_exception_caught) // an unexpected error occurs + { + if (scratchpad != nullptr) { + sycl::free(scratchpad, exec_q); + } + throw std::runtime_error(error_msg.str()); + } + + sycl::event clean_up_event = exec_q.submit([&](sycl::handler &cgh) { + cgh.depends_on(ungqr_event); + auto ctx = exec_q.get_context(); + cgh.host_task([ctx, scratchpad]() { sycl::free(scratchpad, ctx); }); + }); + host_task_events.push_back(clean_up_event); + + return ungqr_event; +} + +std::pair + ungqr(sycl::queue q, + const std::int64_t m, + const std::int64_t n, + const std::int64_t k, + dpctl::tensor::usm_ndarray a_array, + dpctl::tensor::usm_ndarray tau_array, + const std::vector &depends) +{ + const int a_array_nd = a_array.get_ndim(); + const int tau_array_nd = tau_array.get_ndim(); + + if (a_array_nd != 2) { + throw py::value_error( + "The input array has ndim=" + std::to_string(a_array_nd) + + ", but a 2-dimensional array is expected."); + } + + if (tau_array_nd != 1) { + throw py::value_error("The array of Householder scalars has ndim=" + + std::to_string(tau_array_nd) + + ", but a 1-dimensional array is expected."); + } + + // check compatibility of execution queue and allocation queue + if (!dpctl::utils::queues_are_compatible(q, {a_array, tau_array})) { + throw py::value_error( + "Execution queue is not compatible with allocation queues"); + } + + auto const &overlap = dpctl::tensor::overlap::MemoryOverlap(); + if (overlap(a_array, tau_array)) { + throw py::value_error( + "The input array and the array of Householder scalars " + "are overlapping segments of memory"); + } + + bool is_a_array_c_contig = a_array.is_c_contiguous(); + if (!is_a_array_c_contig) { + throw py::value_error("The input array " + "must be C-contiguous"); + } + + bool is_tau_array_c_contig = tau_array.is_c_contiguous(); + bool is_tau_array_f_contig = tau_array.is_f_contiguous(); + + if (!is_tau_array_c_contig || !is_tau_array_f_contig) { + throw py::value_error("The array of Householder scalars " + "must be contiguous"); + } + + const size_t tau_array_size = tau_array.get_size(); + + if (static_cast(tau_array_size) != k) { + throw py::value_error("The array of Householder scalars has size=" + + std::to_string(tau_array_size) + + ", but an array of size=" + std::to_string(k) + + " is expected."); + } + + auto array_types = dpctl_td_ns::usm_ndarray_types(); + int a_array_type_id = + array_types.typenum_to_lookup_id(a_array.get_typenum()); + int tau_array_type_id = + array_types.typenum_to_lookup_id(tau_array.get_typenum()); + + if (a_array_type_id != tau_array_type_id) { + throw py::value_error( + "The types of the input array and " + "the array of Householder scalars are mismatched"); + } + + ungqr_impl_fn_ptr_t ungqr_fn = ungqr_dispatch_vector[a_array_type_id]; + if (ungqr_fn == nullptr) { + throw py::value_error( + "No ungqr implementation defined for the provided type " + "of the input matrix."); + } + + char *a_array_data = a_array.get_data(); + const std::int64_t lda = std::max(1UL, m); + + char *tau_array_data = tau_array.get_data(); + + std::vector host_task_events; + sycl::event ungqr_ev = ungqr_fn(q, m, n, k, a_array_data, lda, + tau_array_data, host_task_events, depends); + + sycl::event args_ev = dpctl::utils::keep_args_alive(q, {a_array, tau_array}, + host_task_events); + + return std::make_pair(args_ev, ungqr_ev); +} + +template +struct UngqrContigFactory +{ + fnT get() + { + if constexpr (types::UngqrTypePairSupportFactory::is_defined) { + return ungqr_impl; + } + else { + return nullptr; + } + } +}; + +void init_ungqr_dispatch_vector(void) +{ + dpctl_td_ns::DispatchVectorBuilder + contig; + contig.populate_dispatch_vector(ungqr_dispatch_vector); +} +} // namespace lapack +} // namespace ext +} // namespace backend +} // namespace dpnp diff --git a/dpnp/backend/extensions/lapack/ungqr.hpp b/dpnp/backend/extensions/lapack/ungqr.hpp new file mode 100644 index 00000000000..1a9b68e94f9 --- /dev/null +++ b/dpnp/backend/extensions/lapack/ungqr.hpp @@ -0,0 +1,67 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include +#include + +#include + +namespace dpnp +{ +namespace backend +{ +namespace ext +{ +namespace lapack +{ +extern std::pair + ungqr(sycl::queue exec_q, + const std::int64_t m, + const std::int64_t n, + const std::int64_t k, + dpctl::tensor::usm_ndarray a_array, + dpctl::tensor::usm_ndarray tau_array, + const std::vector &depends = {}); + +extern std::pair + ungqr_batch(sycl::queue exec_q, + dpctl::tensor::usm_ndarray a_array, + dpctl::tensor::usm_ndarray tau_array, + std::int64_t m, + std::int64_t n, + std::int64_t k, + std::int64_t stride_a, + std::int64_t stride_tau, + std::int64_t batch_size, + const std::vector &depends = {}); + +extern void init_ungqr_batch_dispatch_vector(void); +extern void init_ungqr_dispatch_vector(void); +} // namespace lapack +} // namespace ext +} // namespace backend +} // namespace dpnp diff --git a/dpnp/backend/extensions/lapack/ungqr_batch.cpp b/dpnp/backend/extensions/lapack/ungqr_batch.cpp new file mode 100644 index 00000000000..c07eaf150fc --- /dev/null +++ b/dpnp/backend/extensions/lapack/ungqr_batch.cpp @@ -0,0 +1,278 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include + +// dpctl tensor headers +#include "utils/memory_overlap.hpp" +#include "utils/type_utils.hpp" + +#include "types_matrix.hpp" +#include "ungqr.hpp" + +#include "dpnp_utils.hpp" + +namespace dpnp +{ +namespace backend +{ +namespace ext +{ +namespace lapack +{ +namespace mkl_lapack = oneapi::mkl::lapack; +namespace py = pybind11; +namespace type_utils = dpctl::tensor::type_utils; + +typedef sycl::event (*ungqr_batch_impl_fn_ptr_t)( + sycl::queue, + std::int64_t, + std::int64_t, + std::int64_t, + char *, + std::int64_t, + std::int64_t, + char *, + std::int64_t, + std::int64_t, + std::vector &, + const std::vector &); + +static ungqr_batch_impl_fn_ptr_t + ungqr_batch_dispatch_vector[dpctl_td_ns::num_types]; + +template +static sycl::event ungqr_batch_impl(sycl::queue exec_q, + std::int64_t m, + std::int64_t n, + std::int64_t k, + char *in_a, + std::int64_t lda, + std::int64_t stride_a, + char *in_tau, + std::int64_t stride_tau, + std::int64_t batch_size, + std::vector &host_task_events, + const std::vector &depends) +{ + type_utils::validate_type_for_device(exec_q); + + T *a = reinterpret_cast(in_a); + T *tau = reinterpret_cast(in_tau); + + const std::int64_t scratchpad_size = + mkl_lapack::ungqr_batch_scratchpad_size( + exec_q, m, n, k, lda, stride_a, stride_tau, batch_size); + T *scratchpad = nullptr; + + std::stringstream error_msg; + std::int64_t info = 0; + bool is_exception_caught = false; + + sycl::event ungqr_batch_event; + try { + scratchpad = sycl::malloc_device(scratchpad_size, exec_q); + + ungqr_batch_event = mkl_lapack::ungqr_batch( + exec_q, + m, // The number of rows in each matrix in the batch; (0 ≤ m). + // It must be a non-negative integer. + n, // The number of columns in each matrix in the batch; (0 ≤ n). + // It must be a non-negative integer. + k, // The number of elementary reflectors + // whose product defines the matrices Qi; (0 ≤ k ≤ n). + a, // Pointer to the batch of matrices, each of size (m x n). + lda, // The leading dimension of each matrix in the batch. + // For row major layout, lda ≥ max(1, m). + stride_a, // Stride between consecutive matrices in the batch. + tau, // Pointer to the array of scalar factors of the elementary + // reflectors for each matrix in the batch. + stride_tau, // Stride between arrays of scalar factors in the batch. + batch_size, // The number of matrices in the batch. + scratchpad, // Pointer to scratchpad memory to be used by MKL + // routine for storing intermediate results. + scratchpad_size, depends); + } catch (mkl_lapack::exception const &e) { + is_exception_caught = true; + info = e.info(); + + if (info < 0) { + error_msg << "Parameter number " << -info + << " had an illegal value."; + } + else if (info == scratchpad_size && e.detail() != 0) { + error_msg + << "Insufficient scratchpad size. Required size is at least " + << e.detail() << ", but current size is " << scratchpad_size + << "."; + } + else if (info != 0 && e.detail() == 0) { + error_msg << "Error in batch processing. " + "Number of failed calculations: " + << info; + } + else { + error_msg << "Unexpected MKL exception caught during ungqr_batch() " + "call:\nreason: " + << e.what() << "\ninfo: " << e.info(); + } + } catch (sycl::exception const &e) { + is_exception_caught = true; + error_msg + << "Unexpected SYCL exception caught during ungqr_batch() call:\n" + << e.what(); + } + + if (is_exception_caught) // an unexpected error occurs + { + if (scratchpad != nullptr) { + sycl::free(scratchpad, exec_q); + } + + throw std::runtime_error(error_msg.str()); + } + + sycl::event clean_up_event = exec_q.submit([&](sycl::handler &cgh) { + cgh.depends_on(ungqr_batch_event); + auto ctx = exec_q.get_context(); + cgh.host_task([ctx, scratchpad]() { sycl::free(scratchpad, ctx); }); + }); + host_task_events.push_back(clean_up_event); + return ungqr_batch_event; +} + +std::pair + ungqr_batch(sycl::queue q, + dpctl::tensor::usm_ndarray a_array, + dpctl::tensor::usm_ndarray tau_array, + std::int64_t m, + std::int64_t n, + std::int64_t k, + std::int64_t stride_a, + std::int64_t stride_tau, + std::int64_t batch_size, + const std::vector &depends) +{ + const int a_array_nd = a_array.get_ndim(); + const int tau_array_nd = tau_array.get_ndim(); + + if (a_array_nd < 3) { + throw py::value_error( + "The input array has ndim=" + std::to_string(a_array_nd) + + ", but an array with ndim >= 3 is expected."); + } + + if (tau_array_nd != 2) { + throw py::value_error("The array of Householder scalars has ndim=" + + std::to_string(tau_array_nd) + + ", but a 2-dimensional array is expected."); + } + + // check compatibility of execution queue and allocation queue + if (!dpctl::utils::queues_are_compatible(q, {a_array, tau_array})) { + throw py::value_error( + "Execution queue is not compatible with allocation queues"); + } + + auto const &overlap = dpctl::tensor::overlap::MemoryOverlap(); + if (overlap(a_array, tau_array)) { + throw py::value_error( + "The input array and the array of Householder scalars " + "are overlapping segments of memory"); + } + + bool is_a_array_c_contig = a_array.is_c_contiguous(); + bool is_tau_array_c_contig = tau_array.is_c_contiguous(); + if (!is_a_array_c_contig) { + throw py::value_error("The input array " + "must be C-contiguous"); + } + if (!is_tau_array_c_contig) { + throw py::value_error("The array of Householder scalars " + "must be C-contiguous"); + } + + auto array_types = dpctl_td_ns::usm_ndarray_types(); + int a_array_type_id = + array_types.typenum_to_lookup_id(a_array.get_typenum()); + int tau_array_type_id = + array_types.typenum_to_lookup_id(tau_array.get_typenum()); + + if (a_array_type_id != tau_array_type_id) { + throw py::value_error( + "The types of the input array and " + "the array of Householder scalars are mismatched"); + } + + ungqr_batch_impl_fn_ptr_t ungqr_batch_fn = + ungqr_batch_dispatch_vector[a_array_type_id]; + if (ungqr_batch_fn == nullptr) { + throw py::value_error( + "No ungqr_batch implementation defined for the provided type " + "of the input matrix."); + } + + char *a_array_data = a_array.get_data(); + char *tau_array_data = tau_array.get_data(); + + const std::int64_t lda = std::max(1UL, m); + + std::vector host_task_events; + sycl::event ungqr_batch_ev = + ungqr_batch_fn(q, m, n, k, a_array_data, lda, stride_a, tau_array_data, + stride_tau, batch_size, host_task_events, depends); + + sycl::event args_ev = dpctl::utils::keep_args_alive(q, {a_array, tau_array}, + host_task_events); + + return std::make_pair(args_ev, ungqr_batch_ev); +} + +template +struct UngqrBatchContigFactory +{ + fnT get() + { + if constexpr (types::UngqrBatchTypePairSupportFactory::is_defined) { + return ungqr_batch_impl; + } + else { + return nullptr; + } + } +}; + +void init_ungqr_batch_dispatch_vector(void) +{ + dpctl_td_ns::DispatchVectorBuilder + contig; + contig.populate_dispatch_vector(ungqr_batch_dispatch_vector); +} +} // namespace lapack +} // namespace ext +} // namespace backend +} // namespace dpnp diff --git a/dpnp/backend/include/dpnp_iface_fptr.hpp b/dpnp/backend/include/dpnp_iface_fptr.hpp index 3061bb01f29..e9a3458f84a 100644 --- a/dpnp/backend/include/dpnp_iface_fptr.hpp +++ b/dpnp/backend/include/dpnp_iface_fptr.hpp @@ -220,8 +220,6 @@ enum class DPNPFuncName : size_t DPNP_FN_PUT, /**< Used in numpy.put() impl */ DPNP_FN_PUT_ALONG_AXIS, /**< Used in numpy.put_along_axis() impl */ DPNP_FN_QR, /**< Used in numpy.linalg.qr() impl */ - DPNP_FN_QR_EXT, /**< Used in numpy.linalg.qr() impl, requires extra - parameters */ DPNP_FN_RADIANS, /**< Used in numpy.radians() impl */ DPNP_FN_RADIANS_EXT, /**< Used in numpy.radians() impl, requires extra parameters */ diff --git a/dpnp/backend/kernels/dpnp_krnl_linalg.cpp b/dpnp/backend/kernels/dpnp_krnl_linalg.cpp index 610da8fda3c..d74c593115e 100644 --- a/dpnp/backend/kernels/dpnp_krnl_linalg.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_linalg.cpp @@ -722,17 +722,6 @@ template void (*dpnp_qr_default_c)(void *, void *, void *, void *, size_t, size_t) = dpnp_qr_c<_InputDT, _ComputeDT>; -template -DPCTLSyclEventRef (*dpnp_qr_ext_c)(DPCTLSyclQueueRef, - void *, - void *, - void *, - void *, - size_t, - size_t, - const DPCTLEventVectorRef) = - dpnp_qr_c<_InputDT, _ComputeDT>; - template DPCTLSyclEventRef dpnp_svd_c(DPCTLSyclQueueRef q_ref, void *array1_in, @@ -1000,29 +989,6 @@ void func_map_init_linalg_func(func_map_t &fmap) // fmap[DPNPFuncName::DPNP_FN_QR][eft_C128][eft_C128] = { // eft_C128, (void*)dpnp_qr_c, std::complex>}; - fmap[DPNPFuncName::DPNP_FN_QR_EXT][eft_INT][eft_INT] = { - get_default_floating_type(), - (void *)dpnp_qr_ext_c< - int32_t, func_type_map_t::find_type>, - get_default_floating_type(), - (void *)dpnp_qr_ext_c< - int32_t, func_type_map_t::find_type< - get_default_floating_type()>>}; - fmap[DPNPFuncName::DPNP_FN_QR_EXT][eft_LNG][eft_LNG] = { - get_default_floating_type(), - (void *)dpnp_qr_ext_c< - int64_t, func_type_map_t::find_type>, - get_default_floating_type(), - (void *)dpnp_qr_ext_c< - int64_t, func_type_map_t::find_type< - get_default_floating_type()>>}; - fmap[DPNPFuncName::DPNP_FN_QR_EXT][eft_FLT][eft_FLT] = { - eft_FLT, (void *)dpnp_qr_ext_c}; - fmap[DPNPFuncName::DPNP_FN_QR_EXT][eft_DBL][eft_DBL] = { - eft_DBL, (void *)dpnp_qr_ext_c}; - // fmap[DPNPFuncName::DPNP_FN_QR_EXT][eft_C128][eft_C128] = { - // eft_C128, (void*)dpnp_qr_c, std::complex>}; - fmap[DPNPFuncName::DPNP_FN_SVD][eft_INT][eft_INT] = { eft_DBL, (void *)dpnp_svd_default_c}; fmap[DPNPFuncName::DPNP_FN_SVD][eft_LNG][eft_LNG] = { diff --git a/dpnp/dpnp_algo/dpnp_algo.pxd b/dpnp/dpnp_algo/dpnp_algo.pxd index 2fc7e1b4a3b..71382d38f26 100644 --- a/dpnp/dpnp_algo/dpnp_algo.pxd +++ b/dpnp/dpnp_algo/dpnp_algo.pxd @@ -94,8 +94,6 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName": # need this na DPNP_FN_PARTITION DPNP_FN_PARTITION_EXT DPNP_FN_PLACE - DPNP_FN_QR - DPNP_FN_QR_EXT DPNP_FN_RADIANS DPNP_FN_RADIANS_EXT DPNP_FN_RNG_BETA diff --git a/dpnp/linalg/dpnp_algo_linalg.pyx b/dpnp/linalg/dpnp_algo_linalg.pyx index 3bf6dad3ee8..67cd5d93034 100644 --- a/dpnp/linalg/dpnp_algo_linalg.pyx +++ b/dpnp/linalg/dpnp_algo_linalg.pyx @@ -50,7 +50,6 @@ __all__ = [ "dpnp_eigvals", "dpnp_matrix_rank", "dpnp_norm", - "dpnp_qr", ] @@ -323,58 +322,3 @@ cpdef object dpnp_norm(object input, ord=None, axis=None): return ret else: raise ValueError("Improper number of dimensions to norm.") - - -cpdef tuple dpnp_qr(utils.dpnp_descriptor x1, str mode): - cdef size_t size_m = x1.shape[0] - cdef size_t size_n = x1.shape[1] - cdef size_t min_m_n = min(size_m, size_n) - cdef size_t size_tau = min_m_n - - cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(x1.dtype) - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_QR_EXT, param1_type, param1_type) - - x1_obj = x1.get_array() - - cdef (DPNPFuncType, void *) ret_type_and_func = utils.get_ret_type_and_func(kernel_data, - x1_obj.sycl_device.has_aspect_fp64) - cdef DPNPFuncType return_type = ret_type_and_func[0] - cdef custom_linalg_1in_3out_shape_t func = < custom_linalg_1in_3out_shape_t > ret_type_and_func[1] - - cdef utils.dpnp_descriptor res_q = utils.create_output_descriptor((size_m, min_m_n), - return_type, - None, - device=x1_obj.sycl_device, - usm_type=x1_obj.usm_type, - sycl_queue=x1_obj.sycl_queue) - cdef utils.dpnp_descriptor res_r = utils.create_output_descriptor((min_m_n, size_n), - return_type, - None, - device=x1_obj.sycl_device, - usm_type=x1_obj.usm_type, - sycl_queue=x1_obj.sycl_queue) - cdef utils.dpnp_descriptor tau = utils.create_output_descriptor((size_tau, ), - return_type, - None, - device=x1_obj.sycl_device, - usm_type=x1_obj.usm_type, - sycl_queue=x1_obj.sycl_queue) - - result_sycl_queue = res_q.get_array().sycl_queue - - cdef c_dpctl.SyclQueue q = result_sycl_queue - cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() - - cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, - x1.get_data(), - res_q.get_data(), - res_r.get_data(), - tau.get_data(), - size_m, - size_n, - NULL) # dep_events_ref - - with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) - c_dpctl.DPCTLEvent_Delete(event_ref) - - return (res_q.get_pyobj(), res_r.get_pyobj()) diff --git a/dpnp/linalg/dpnp_iface_linalg.py b/dpnp/linalg/dpnp_iface_linalg.py index 2b8506130ad..88a904b3c3c 100644 --- a/dpnp/linalg/dpnp_iface_linalg.py +++ b/dpnp/linalg/dpnp_iface_linalg.py @@ -51,6 +51,7 @@ dpnp_det, dpnp_eigh, dpnp_inv, + dpnp_qr, dpnp_slogdet, dpnp_solve, dpnp_svd, @@ -529,7 +530,7 @@ def norm(x1, ord=None, axis=None, keepdims=False): return call_origin(numpy.linalg.norm, x1, ord, axis, keepdims) -def qr(x1, mode="reduced"): +def qr(a, mode="reduced"): """ Compute the qr factorization of a matrix. @@ -538,25 +539,64 @@ def qr(x1, mode="reduced"): For full documentation refer to :obj:`numpy.linalg.qr`. - Limitations - ----------- - Input array is supported as :obj:`dpnp.ndarray`. - Parameter mode='reduced' is supported. + Parameters + ---------- + a : {dpnp.ndarray, usm_ndarray} + The input array with the dimensionality of at least 2. + mode : {"reduced", "complete", "r", "raw"}, optional + If K = min(M, N), then + - "reduced" : returns Q, R with dimensions (…, M, K), (…, K, N) + - "complete" : returns Q, R with dimensions (…, M, M), (…, M, N) + - "r" : returns R only with dimensions (…, K, N) + - "raw" : returns h, tau with dimensions (…, N, M), (…, K,) + Default: "reduced". + + Returns + ------- + When mode is "reduced" or "complete", the result will be a namedtuple with + the attributes Q and R. + Q : dpnp.ndarray + A matrix with orthonormal columns. + When mode = "complete" the result is an orthogonal/unitary matrix + depending on whether or not a is real/complex. + The determinant may be either +/- 1 in that case. + In case the number of dimensions in the input array is greater + than 2 then a stack of the matrices with above properties is returned. + R : dpnp.ndarray + The upper-triangular matrix or a stack of upper-triangular matrices + if the number of dimensions in the input array is greater than 2. + (h, tau) : tuple of dpnp.ndarray + The h array contains the Householder reflectors that generate Q along with R. + The tau array contains scaling factors for the reflectors. + + Examples + -------- + >>> import dpnp as np + >>> a = np.random.randn(9, 6) + >>> Q, R = np.linalg.qr(a) + >>> np.allclose(a, np.dot(Q, R)) # a does equal QR + array([ True]) + >>> R2 = np.linalg.qr(a, mode='r') + >>> np.allclose(R, R2) # mode='r' returns the same R as mode='full' + array([ True]) + >>> a = np.random.normal(size=(3, 2, 2)) # Stack of 2 x 2 matrices as input + >>> Q, R = np.linalg.qr(a) + >>> Q.shape + (3, 2, 2) + >>> R.shape + (3, 2, 2) + >>> np.allclose(a, np.matmul(Q, R)) + array([ True]) """ - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) - if x1_desc: - if x1_desc.ndim != 2: - pass - elif mode != "reduced": - pass - else: - result_tup = dpnp_qr(x1_desc, mode) + dpnp.check_supported_arrays_type(a) + check_stacked_2d(a) - return result_tup + if mode not in ("reduced", "complete", "r", "raw"): + raise ValueError(f"Unrecognized mode {mode}") - return call_origin(numpy.linalg.qr, x1, mode) + return dpnp_qr(a, mode) def solve(a, b): diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py index 93f41883133..a6dcfbf0c2b 100644 --- a/dpnp/linalg/dpnp_utils_linalg.py +++ b/dpnp/linalg/dpnp_utils_linalg.py @@ -39,6 +39,7 @@ "dpnp_det", "dpnp_eigh", "dpnp_inv", + "dpnp_qr", "dpnp_slogdet", "dpnp_solve", "dpnp_svd", @@ -126,29 +127,6 @@ def _check_lapack_dev_info(dev_info, error_msg=None): raise dpnp.linalg.LinAlgError(error_msg) -def _real_type(dtype, device=None): - """ - Returns the real data type corresponding to a given dpnp data type. - - Parameters - ---------- - dtype : dpnp.dtype - The dtype for which to find the corresponding real data type. - device : {None, string, SyclDevice, SyclQueue}, optional - An array API concept of device where an array of default floating type might be created. - - Returns - ------- - out : str - The name of the real data type. - - """ - - default = dpnp.default_float_type(device) - real_type = _real_types_map.get(dtype.name, default) - return dpnp.dtype(real_type) - - def _common_type(*arrays): """ Common type for linear algebra operations. @@ -403,6 +381,29 @@ def _lu_factor(a, res_type): return (a_h, ipiv_h, dev_info_array) +def _real_type(dtype, device=None): + """ + Returns the real data type corresponding to a given dpnp data type. + + Parameters + ---------- + dtype : dpnp.dtype + The dtype for which to find the corresponding real data type. + device : {None, string, SyclDevice, SyclQueue}, optional + An array API concept of device where an array of default floating type might be created. + + Returns + ------- + out : str + The name of the real data type. + + """ + + default = dpnp.default_float_type(device) + real_type = _real_types_map.get(dtype.name, default) + return dpnp.dtype(real_type) + + def _stacked_identity( batch_shape, n, dtype, usm_type="device", sycl_queue=None ): @@ -447,6 +448,48 @@ def _stacked_identity( return x +def _triu_inplace(a, host_tasks, depends=None): + """ + _triu_inplace(a, host_tasks, depends=None) + + Computes the upper triangular part of an array in-place, + but currently allocates extra memory for the result. + + Parameters + ---------- + a : {dpnp.ndarray, usm_ndarray} + Input array from which the upper triangular part is to be extracted. + host_tasks : list + A list to which the function appends the host event corresponding to the computation. + This allows for dependency management and synchronization with other tasks. + depends : list, optional + A list of events that the triangular operation depends on. + These tasks are completed before the triangular computation starts. + If ``None``, defaults to an empty list. + + Returns + ------- + out : dpnp.ndarray + A new array containing the upper triangular part of the input array `a`. + + """ + + # TODO: implement a dedicated kernel for in-place triu instead of + # extra memory allocation for result + if depends is None: + depends = [] + out = dpnp.empty_like(a, order="C") + ht_triu_ev, _ = ti._triu( + src=a.get_array(), + dst=out.get_array(), + k=0, + sycl_queue=a.sycl_queue, + depends=depends, + ) + host_tasks.append(ht_triu_ev) + return out + + def check_stacked_2d(*arrays): """ Return ``True`` if each array in `arrays` has at least two dimensions. @@ -955,6 +998,344 @@ def dpnp_inv(a): return b_f +def dpnp_qr_batch(a, mode="reduced"): + """ + dpnp_qr_batch(a, mode="reduced") + + Return the batched qr factorization of `a` matrix. + + """ + + a_sycl_queue = a.sycl_queue + a_usm_type = a.usm_type + + m, n = a.shape[-2:] + k = min(m, n) + + batch_shape = a.shape[:-2] + batch_size = prod(batch_shape) + + res_type = _common_type(a) + + if batch_size == 0 or k == 0: + if mode == "reduced": + return ( + dpnp.empty_like( + a, + shape=batch_shape + (m, k), + dtype=res_type, + ), + dpnp.empty_like( + a, + shape=batch_shape + (k, n), + dtype=res_type, + ), + ) + elif mode == "complete": + q = _stacked_identity( + batch_shape, + m, + dtype=res_type, + usm_type=a_usm_type, + sycl_queue=a_sycl_queue, + ) + return ( + q, + dpnp.empty_like( + a, + shape=batch_shape + (m, n), + dtype=res_type, + ), + ) + elif mode == "r": + return dpnp.empty_like( + a, + shape=batch_shape + (k, n), + dtype=res_type, + ) + else: # mode=="raw" + return ( + dpnp.empty_like( + a, + shape=batch_shape + (n, m), + dtype=res_type, + ), + dpnp.empty_like( + a, + shape=batch_shape + (k,), + dtype=res_type, + ), + ) + + # get 3d input arrays by reshape + a = a.reshape(-1, m, n) + + a = a.swapaxes(-2, -1) + a_usm_arr = dpnp.get_usm_ndarray(a) + + a_t = dpnp.empty_like(a, order="C", dtype=res_type) + + # use DPCTL tensor function to fill the matrix array + # with content from the input array `a` + a_ht_copy_ev, a_copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( + src=a_usm_arr, dst=a_t.get_array(), sycl_queue=a_sycl_queue + ) + + tau_h = dpnp.empty_like( + a_t, + shape=(batch_size, k), + dtype=res_type, + ) + + a_stride = a_t.strides[0] + tau_stride = tau_h.strides[0] + + # Call the LAPACK extension function _geqrf_batch to compute the QR factorization + # of a general m x n matrix. + ht_geqrf_batch_ev, geqrf_batch_ev = li._geqrf_batch( + a_sycl_queue, + a_t.get_array(), + tau_h.get_array(), + m, + n, + a_stride, + tau_stride, + batch_size, + [a_copy_ev], + ) + + ht_list_ev = [ht_geqrf_batch_ev, a_ht_copy_ev] + + if mode in ["r", "raw"]: + if mode == "r": + r = a_t[..., :k].swapaxes(-2, -1) + r = _triu_inplace(r, ht_list_ev, [geqrf_batch_ev]) + dpctl.SyclEvent.wait_for(ht_list_ev) + return r.reshape(batch_shape + r.shape[-2:]) + + # mode=="raw" + dpctl.SyclEvent.wait_for(ht_list_ev) + q = a_t.reshape(batch_shape + a_t.shape[-2:]) + r = tau_h.reshape(batch_shape + tau_h.shape[-1:]) + return (q, r) + + if mode == "complete" and m > n: + mc = m + q = dpnp.empty_like( + a_t, + shape=(batch_size, m, m), + dtype=res_type, + ) + else: + mc = k + q = dpnp.empty_like( + a_t, + shape=(batch_size, n, m), + dtype=res_type, + ) + + # use DPCTL tensor function to fill the matrix array `q[..., :n, :]` + # with content from the array `a_t` overwritten by geqrf_batch + a_t_ht_copy_ev, a_t_copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( + src=a_t.get_array(), + dst=q[..., :n, :].get_array(), + sycl_queue=a_sycl_queue, + depends=[geqrf_batch_ev], + ) + + ht_list_ev.append(a_t_ht_copy_ev) + + q_stride = q.strides[0] + tau_stride = tau_h.strides[0] + + # Get LAPACK function (_orgqr_batch for real or _ungqf_batch for complex data types) + # for QR factorization + lapack_func = ( + "_ungqr_batch" + if dpnp.issubdtype(res_type, dpnp.complexfloating) + else "_orgqr_batch" + ) + + # Call the LAPACK extension function _orgqr_batch/ to generate the real orthogonal/ + # complex unitary matrices `Qi` of the QR factorization + # for a batch of general matrices. + ht_lapack_ev, lapack_ev = getattr(li, lapack_func)( + a_sycl_queue, + q.get_array(), + tau_h.get_array(), + m, + mc, + k, + q_stride, + tau_stride, + batch_size, + [a_t_copy_ev], + ) + + ht_list_ev.append(ht_lapack_ev) + + q = q[..., :mc, :].swapaxes(-2, -1) + r = a_t[..., :mc].swapaxes(-2, -1) + + ht_list_ev.append(ht_lapack_ev) + + r = _triu_inplace(r, ht_list_ev, [lapack_ev]) + dpctl.SyclEvent.wait_for(ht_list_ev) + + return ( + q.reshape(batch_shape + q.shape[-2:]), + r.reshape(batch_shape + r.shape[-2:]), + ) + + +def dpnp_qr(a, mode="reduced"): + """ + dpnp_qr(a, mode="reduced") + + Return the qr factorization of `a` matrix. + + """ + + if a.ndim > 2: + return dpnp_qr_batch(a, mode=mode) + + a_usm_arr = dpnp.get_usm_ndarray(a) + a_sycl_queue = a.sycl_queue + a_usm_type = a.usm_type + + res_type = _common_type(a) + + m, n = a.shape + k = min(m, n) + if k == 0: + if mode == "reduced": + return dpnp.empty_like( + a, + shape=(m, 0), + dtype=res_type, + ), dpnp.empty_like( + a, + shape=(0, n), + dtype=res_type, + ) + elif mode == "complete": + return dpnp.identity( + m, dtype=res_type, sycl_queue=a_sycl_queue, usm_type=a_usm_type + ), dpnp.empty_like( + a, + shape=(m, n), + dtype=res_type, + ) + elif mode == "r": + return dpnp.empty_like( + a, + shape=(0, n), + dtype=res_type, + ) + else: # mode == "raw" + return dpnp.empty_like( + a, + shape=(n, m), + dtype=res_type, + ), dpnp.empty_like( + a, + shape=(0,), + dtype=res_type, + ) + + # Transpose the input matrix to convert from row-major to column-major order. + # This adjustment is necessary for compatibility with OneMKL LAPACK routines, + # which expect matrices in column-major format. + # This allows data to be handled efficiently without the need for additional conversion. + a = a.T + a_usm_arr = dpnp.get_usm_ndarray(a) + a_t = dpnp.empty_like(a, order="C", dtype=res_type) + + # use DPCTL tensor function to fill the matrix array + # with content from the input array `a` + a_ht_copy_ev, a_copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( + src=a_usm_arr, dst=a_t.get_array(), sycl_queue=a_sycl_queue + ) + + tau_h = dpnp.empty_like( + a, + shape=(k,), + dtype=res_type, + ) + + # Call the LAPACK extension function _geqrf to compute the QR factorization + # of a general m x n matrix. + ht_geqrf_ev, geqrf_ev = li._geqrf( + a_sycl_queue, a_t.get_array(), tau_h.get_array(), [a_copy_ev] + ) + + ht_list_ev = [ht_geqrf_ev, a_ht_copy_ev] + + if mode in ["r", "raw"]: + if mode == "r": + r = a_t[:, :k].transpose() + r = _triu_inplace(r, ht_list_ev, [geqrf_ev]) + dpctl.SyclEvent.wait_for(ht_list_ev) + return r + + # mode == "raw": + dpctl.SyclEvent.wait_for(ht_list_ev) + return (a_t, tau_h) + + # mc is the total number of columns in the q matrix. + # In `complete` mode, mc equals the number of rows. + # In `reduced` mode, mc is the lesser of the row count or column count. + if mode == "complete" and m > n: + mc = m + q = dpnp.empty_like( + a_t, + shape=(m, m), + dtype=res_type, + ) + else: + mc = k + q = dpnp.empty_like( + a_t, + shape=(n, m), + dtype=res_type, + ) + + # use DPCTL tensor function to fill the matrix array `q[:n]` + # with content from the array `a_t` overwritten by geqrf + a_t_ht_copy_ev, a_t_copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( + src=a_t.get_array(), + dst=q[:n].get_array(), + sycl_queue=a_sycl_queue, + depends=[geqrf_ev], + ) + + ht_list_ev.append(a_t_ht_copy_ev) + + # Get LAPACK function (_orgqr for real or _ungqf for complex data types) + # for QR factorization + lapack_func = ( + "_ungqr" + if dpnp.issubdtype(res_type, dpnp.complexfloating) + else "_orgqr" + ) + + # Call the LAPACK extension function _orgqr/_ungqf to generate the real orthogonal/ + # complex unitary matrix `Q` of the QR factorization + ht_lapack_ev, lapack_ev = getattr(li, lapack_func)( + a_sycl_queue, m, mc, k, q.get_array(), tau_h.get_array(), [a_t_copy_ev] + ) + + q = q[:mc].transpose() + r = a_t[:, :mc].transpose() + + ht_list_ev.append(ht_lapack_ev) + + r = _triu_inplace(r, ht_list_ev, [lapack_ev]) + dpctl.SyclEvent.wait_for(ht_list_ev) + + return (q, r) + + def dpnp_solve(a, b): """ dpnp_solve(a, b) diff --git a/tests/test_linalg.py b/tests/test_linalg.py index 85206bad5ba..8e32b867b85 100644 --- a/tests/test_linalg.py +++ b/tests/test_linalg.py @@ -1,7 +1,12 @@ import dpctl import numpy import pytest -from numpy.testing import assert_allclose, assert_array_equal, assert_raises +from numpy.testing import ( + assert_allclose, + assert_almost_equal, + assert_array_equal, + assert_raises, +) import dpnp as inp from tests.third_party.cupy import testing @@ -308,8 +313,8 @@ def test_det_singular_matrix(self, matrix): a_np = numpy.array(matrix, dtype="float32") a_dp = inp.array(a_np) - expected = numpy.linalg.slogdet(a_np) - result = inp.linalg.slogdet(a_dp) + expected = numpy.linalg.det(a_np) + result = inp.linalg.det(a_dp) assert_allclose(expected, result, rtol=1e-3, atol=1e-4) @@ -672,88 +677,141 @@ def test_norm3(array, ord, axis): assert_allclose(expected, result) -@pytest.mark.usefixtures("allow_fall_back_on_numpy") -@pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True)) -@pytest.mark.parametrize( - "shape", - [(2, 2), (3, 4), (5, 3), (16, 16), (0, 0), (0, 2), (2, 0)], - ids=["(2,2)", "(3,4)", "(5,3)", "(16,16)", "(0,0)", "(0,2)", "(2,0)"], -) -@pytest.mark.parametrize( - "mode", ["complete", "reduced"], ids=["complete", "reduced"] -) -def test_qr(type, shape, mode): - a = numpy.arange(shape[0] * shape[1], dtype=type).reshape(shape) - ia = inp.array(a) +class TestQr: + # TODO: New packages that fix issue CMPLRLLVM-53771 are only available in internal CI. + # Skip the tests on cpu until these packages are available for the external CI. + # Specifically dpcpp_linux-64>=2024.1.0 + @pytest.mark.skipif(is_cpu_device(), reason="CMPLRLLVM-53771") + @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True)) + @pytest.mark.parametrize( + "shape", + [(2, 2), (3, 4), (5, 3), (16, 16), (2, 2, 2), (2, 4, 2), (2, 2, 4)], + ids=[ + "(2, 2)", + "(3, 4)", + "(5, 3)", + "(16, 16)", + "(2, 2, 2)", + "(2, 4, 2)", + "(2, 2, 4)", + ], + ) + @pytest.mark.parametrize( + "mode", + ["r", "raw", "complete", "reduced"], + ids=["r", "raw", "complete", "reduced"], + ) + def test_qr(self, dtype, shape, mode): + a = numpy.random.rand(*shape).astype(dtype) + ia = inp.array(a) + + if mode == "r": + np_r = numpy.linalg.qr(a, mode) + dpnp_r = inp.linalg.qr(ia, mode) + else: + np_q, np_r = numpy.linalg.qr(a, mode) + dpnp_q, dpnp_r = inp.linalg.qr(ia, mode) + + # check decomposition + if mode in ("complete", "reduced"): + if a.ndim == 2: + assert_almost_equal( + inp.dot(dpnp_q, dpnp_r), + a, + decimal=5, + ) + else: # a.ndim > 2 + assert_almost_equal( + inp.matmul(dpnp_q, dpnp_r), + a, + decimal=5, + ) + else: # mode=="raw" + assert_dtype_allclose(dpnp_q, np_q) - np_q, np_r = numpy.linalg.qr(a, mode) - dpnp_q, dpnp_r = inp.linalg.qr(ia, mode) - - support_aspect64 = has_support_aspect64() - - if support_aspect64: - assert dpnp_q.dtype == np_q.dtype - assert dpnp_r.dtype == np_r.dtype - assert dpnp_q.shape == np_q.shape - assert dpnp_r.shape == np_r.shape - - tol = 1e-6 - if type == inp.float32: - tol = 1e-02 - elif not support_aspect64 and type in (inp.int32, inp.int64, None): - tol = 1e-02 - - # check decomposition - assert_allclose( - ia, - inp.dot(dpnp_q, dpnp_r), - rtol=tol, - atol=tol, + if mode in ("raw", "r"): + assert_dtype_allclose(dpnp_r, np_r) + + @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True)) + @pytest.mark.parametrize( + "shape", + [(0, 0), (0, 2), (2, 0), (2, 0, 3), (2, 3, 0), (0, 2, 3)], + ids=[ + "(0, 0)", + "(0, 2)", + "(2 ,0)", + "(2, 0, 3)", + "(2, 3, 0)", + "(0, 2, 3)", + ], + ) + @pytest.mark.parametrize( + "mode", + ["r", "raw", "complete", "reduced"], + ids=["r", "raw", "complete", "reduced"], ) + def test_qr_empty(self, dtype, shape, mode): + a = numpy.empty(shape, dtype=dtype) + ia = inp.array(a) - # NP change sign for comparison - ncols = min(a.shape[0], a.shape[1]) - for i in range(ncols): - j = numpy.where(numpy.abs(np_q[:, i]) > tol)[0][0] - if np_q[j, i] * dpnp_q[j, i] < 0: - np_q[:, i] = -np_q[:, i] - np_r[i, :] = -np_r[i, :] - - if numpy.any(numpy.abs(np_r[i, :]) > tol): - assert_allclose( - inp.asnumpy(dpnp_q)[:, i], np_q[:, i], rtol=tol, atol=tol - ) + if mode == "r": + np_r = numpy.linalg.qr(a, mode) + dpnp_r = inp.linalg.qr(ia, mode) + else: + np_q, np_r = numpy.linalg.qr(a, mode) + dpnp_q, dpnp_r = inp.linalg.qr(ia, mode) - assert_allclose(dpnp_r, np_r, rtol=tol, atol=tol) + assert_dtype_allclose(dpnp_q, np_q) + assert_dtype_allclose(dpnp_r, np_r) -@pytest.mark.usefixtures("allow_fall_back_on_numpy") -def test_qr_not_2D(): - a = numpy.arange(12, dtype=numpy.float32).reshape((3, 2, 2)) - ia = inp.array(a) + @pytest.mark.skipif(is_cpu_device(), reason="CMPLRLLVM-53771") + @pytest.mark.parametrize( + "mode", + ["r", "raw", "complete", "reduced"], + ids=["r", "raw", "complete", "reduced"], + ) + def test_qr_strides(self, mode): + a = numpy.random.rand(5, 5) + ia = inp.array(a) - np_q, np_r = numpy.linalg.qr(a) - dpnp_q, dpnp_r = inp.linalg.qr(ia) + # positive strides + if mode == "r": + np_r = numpy.linalg.qr(a[::2, ::2], mode) + dpnp_r = inp.linalg.qr(ia[::2, ::2], mode) + else: + np_q, np_r = numpy.linalg.qr(a[::2, ::2], mode) + dpnp_q, dpnp_r = inp.linalg.qr(ia[::2, ::2], mode) - assert dpnp_q.dtype == np_q.dtype - assert dpnp_r.dtype == np_r.dtype - assert dpnp_q.shape == np_q.shape - assert dpnp_r.shape == np_r.shape + assert_dtype_allclose(dpnp_q, np_q) - assert_allclose(ia, inp.matmul(dpnp_q, dpnp_r)) + assert_dtype_allclose(dpnp_r, np_r) - a = numpy.empty((0, 3, 2), dtype=numpy.float32) - ia = inp.array(a) + # negative strides + if mode == "r": + np_r = numpy.linalg.qr(a[::-2, ::-2], mode) + dpnp_r = inp.linalg.qr(ia[::-2, ::-2], mode) + else: + np_q, np_r = numpy.linalg.qr(a[::-2, ::-2], mode) + dpnp_q, dpnp_r = inp.linalg.qr(ia[::-2, ::-2], mode) - np_q, np_r = numpy.linalg.qr(a) - dpnp_q, dpnp_r = inp.linalg.qr(ia) + assert_dtype_allclose(dpnp_q, np_q) - assert dpnp_q.dtype == np_q.dtype - assert dpnp_r.dtype == np_r.dtype - assert dpnp_q.shape == np_q.shape - assert dpnp_r.shape == np_r.shape + assert_dtype_allclose(dpnp_r, np_r) - assert_allclose(ia, inp.matmul(dpnp_q, dpnp_r)) + def test_qr_errors(self): + a_dp = inp.array([[1, 2], [3, 5]], dtype="float32") + + # unsupported type + a_np = inp.asnumpy(a_dp) + assert_raises(TypeError, inp.linalg.qr, a_np) + + # a.ndim < 2 + a_dp_ndim_1 = a_dp.flatten() + assert_raises(inp.linalg.LinAlgError, inp.linalg.qr, a_dp_ndim_1) + + # invalid mode + assert_raises(ValueError, inp.linalg.qr, a_dp, "c") class TestSolve: @@ -1018,14 +1076,6 @@ def check_decomposition( dpnp_diag_s = inp.zeros_like(dp_a, dtype=dp_s.dtype) for i in range(min(dp_a.shape[-2], dp_a.shape[-1])): dpnp_diag_s[..., i, i] = dp_s[..., i] - # TODO: remove it when dpnp.dot is updated - # dpnp.dot does not support complex type - if inp.issubdtype(dp_a.dtype, inp.complexfloating): - reconstructed = numpy.dot( - inp.asnumpy(dp_u), - numpy.dot(inp.asnumpy(dpnp_diag_s), inp.asnumpy(dp_vt)), - ) - else: reconstructed = inp.dot(dp_u, inp.dot(dpnp_diag_s, dp_vt)) # TODO: use assert dpnp.allclose() inside check_decomposition() # when it will support complex dtypes diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py index f6329d8f216..de243744403 100644 --- a/tests/test_sycl_queue.py +++ b/tests/test_sycl_queue.py @@ -1202,34 +1202,52 @@ def test_matrix_rank(device): assert_array_equal(expected, result) +@pytest.mark.parametrize( + "shape", + [ + (4, 4), + (2, 0), + (2, 2, 3), + (0, 2, 3), + (1, 0, 3), + ], + ids=[ + "(4, 4)", + "(2, 0)", + "(2, 2, 3)", + "(0, 2, 3)", + "(1, 0, 3)", + ], +) +@pytest.mark.parametrize( + "mode", + ["r", "raw", "complete", "reduced"], + ids=["r", "raw", "complete", "reduced"], +) @pytest.mark.parametrize( "device", valid_devices, ids=[device.filter_string for device in valid_devices], ) -def test_qr(device): - data = [[1.0, 2.0, 3.0], [1.0, 2.0, 3.0]] - dpnp_data = dpnp.array(data, device=device) - numpy_data = numpy.array(data, dtype=dpnp_data.dtype) - - np_q, np_r = numpy.linalg.qr(numpy_data, "reduced") - dpnp_q, dpnp_r = dpnp.linalg.qr(dpnp_data, "reduced") +def test_qr(shape, mode, device): + dtype = dpnp.default_float_type(device) + count_elems = numpy.prod(shape) + a = dpnp.arange(count_elems, dtype=dtype, device=device).reshape(shape) - assert dpnp_q.dtype == np_q.dtype - assert dpnp_r.dtype == np_r.dtype - assert dpnp_q.shape == np_q.shape - assert dpnp_r.shape == np_r.shape + expected_queue = a.get_array().sycl_queue - assert_dtype_allclose(dpnp_q, np_q) - assert_dtype_allclose(dpnp_r, np_r) + if mode == "r": + dp_r = dpnp.linalg.qr(a, mode=mode) + dp_r_queue = dp_r.get_array().sycl_queue + assert_sycl_queue_equal(dp_r_queue, expected_queue) + else: + dp_q, dp_r = dpnp.linalg.qr(a, mode=mode) - expected_queue = dpnp_data.get_array().sycl_queue - dpnp_q_queue = dpnp_q.get_array().sycl_queue - dpnp_r_queue = dpnp_r.get_array().sycl_queue + dp_q_queue = dp_q.get_array().sycl_queue + dp_r_queue = dp_r.get_array().sycl_queue - # compare queue and device - assert_sycl_queue_equal(dpnp_q_queue, expected_queue) - assert_sycl_queue_equal(dpnp_r_queue, expected_queue) + assert_sycl_queue_equal(dp_q_queue, expected_queue) + assert_sycl_queue_equal(dp_r_queue, expected_queue) @pytest.mark.parametrize( diff --git a/tests/test_usm_type.py b/tests/test_usm_type.py index 29101cf9f48..56e2a68756a 100644 --- a/tests/test_usm_type.py +++ b/tests/test_usm_type.py @@ -796,3 +796,40 @@ def test_svd(usm_type, shape, full_matrices_param, compute_uv_param): ) assert x.usm_type == s.usm_type + + +@pytest.mark.parametrize("usm_type", list_of_usm_types, ids=list_of_usm_types) +@pytest.mark.parametrize( + "shape", + [ + (4, 4), + (2, 0), + (2, 2, 3), + (0, 2, 3), + (1, 0, 3), + ], + ids=[ + "(4, 4)", + "(2, 0)", + "(2, 2, 3)", + "(0, 2, 3)", + "(1, 0, 3)", + ], +) +@pytest.mark.parametrize( + "mode", + ["r", "raw", "complete", "reduced"], + ids=["r", "raw", "complete", "reduced"], +) +def test_qr(shape, mode, usm_type): + count_elems = numpy.prod(shape) + a = dp.arange(count_elems, usm_type=usm_type).reshape(shape) + + if mode == "r": + dp_r = dp.linalg.qr(a, mode=mode) + assert a.usm_type == dp_r.usm_type + else: + dp_q, dp_r = dp.linalg.qr(a, mode=mode) + + assert a.usm_type == dp_q.usm_type + assert a.usm_type == dp_r.usm_type diff --git a/tests/third_party/cupy/linalg_tests/test_decomposition.py b/tests/third_party/cupy/linalg_tests/test_decomposition.py index fd887c16e6c..234a2e0e381 100644 --- a/tests/third_party/cupy/linalg_tests/test_decomposition.py +++ b/tests/third_party/cupy/linalg_tests/test_decomposition.py @@ -201,38 +201,31 @@ def check_usv(self, shape, dtype): # reconstruct the matrix k = s_cpu.shape[-1] - # dpnp.dot/matmul does not support complex type and unstable on cpu - # TODO: remove it and use xp.dot/matmul when dpnp.dot/matmul is updated - u_gpu = u_gpu.asnumpy() - vh_gpu = vh_gpu.asnumpy() - s_gpu = s_gpu.asnumpy() - xp = numpy - if len(shape) == 2: if self.full_matrices: - a_gpu_usv = numpy.dot(u_gpu[:, :k] * s_gpu, vh_gpu[:k, :]) + a_gpu_usv = cupy.dot(u_gpu[:, :k] * s_gpu, vh_gpu[:k, :]) else: - a_gpu_usv = numpy.dot(u_gpu * s_gpu, vh_gpu) + a_gpu_usv = cupy.dot(u_gpu * s_gpu, vh_gpu) else: if self.full_matrices: - a_gpu_usv = numpy.matmul( + a_gpu_usv = cupy.matmul( u_gpu[..., :k] * s_gpu[..., None, :], vh_gpu[..., :k, :] ) else: - a_gpu_usv = numpy.matmul(u_gpu * s_gpu[..., None, :], vh_gpu) + a_gpu_usv = cupy.matmul(u_gpu * s_gpu[..., None, :], vh_gpu) testing.assert_allclose(a_gpu, a_gpu_usv, rtol=1e-4, atol=1e-4) # assert unitary u_len = u_gpu.shape[-1] vh_len = vh_gpu.shape[-2] testing.assert_allclose( - xp.matmul(u_gpu.swapaxes(-1, -2).conj(), u_gpu), - stacked_identity(xp, shape[:-2], u_len, dtype), + cupy.matmul(u_gpu.swapaxes(-1, -2).conj(), u_gpu), + stacked_identity(cupy, shape[:-2], u_len, dtype), atol=1e-4, ) testing.assert_allclose( - xp.matmul(vh_gpu, vh_gpu.swapaxes(-1, -2).conj()), - stacked_identity(xp, shape[:-2], vh_len, dtype), + cupy.matmul(vh_gpu, vh_gpu.swapaxes(-1, -2).conj()), + stacked_identity(cupy, shape[:-2], vh_len, dtype), atol=1e-4, ) @@ -385,3 +378,77 @@ def test_svd_rank4_empty_array(self): self.check_usv((0, 2, 3, 4)) self.check_usv((1, 2, 0, 4)) self.check_usv((1, 2, 3, 0)) + + +@testing.parameterize( + *testing.product( + { + "mode": ["r", "raw", "complete", "reduced"], + } + ) +) +class TestQRDecomposition(unittest.TestCase): + @testing.for_dtypes("fdFD") + def check_mode(self, array, mode, dtype): + a_cpu = numpy.asarray(array, dtype=dtype) + a_gpu = cupy.asarray(array, dtype=dtype) + result_gpu = cupy.linalg.qr(a_gpu, mode=mode) + if ( + mode != "raw" + or numpy.lib.NumpyVersion(numpy.__version__) >= "1.22.0rc1" + ): + result_cpu = numpy.linalg.qr(a_cpu, mode=mode) + self._check_result(result_cpu, result_gpu) + + def _check_result(self, result_cpu, result_gpu): + if isinstance(result_cpu, tuple): + for b_cpu, b_gpu in zip(result_cpu, result_gpu): + assert b_cpu.dtype == b_gpu.dtype + testing.assert_allclose(b_cpu, b_gpu, atol=1e-4) + else: + assert result_cpu.dtype == result_gpu.dtype + testing.assert_allclose(result_cpu, result_gpu, atol=1e-4) + + # TODO: New packages that fix issue CMPLRLLVM-53771 are only available in internal CI. + # Skip the tests on cpu until these packages are available for the external CI. + # Specifically dpcpp_linux-64>=2024.1.0 + @pytest.mark.skipif(is_cpu_device(), reason="CMPLRLLVM-53771") + @testing.fix_random() + @_condition.repeat(3, 10) + def test_mode(self): + self.check_mode(numpy.random.randn(2, 4), mode=self.mode) + self.check_mode(numpy.random.randn(3, 3), mode=self.mode) + self.check_mode(numpy.random.randn(5, 4), mode=self.mode) + + @pytest.mark.skipif(is_cpu_device(), reason="CMPLRLLVM-53771") + @testing.with_requires("numpy>=1.22") + @testing.fix_random() + def test_mode_rank3(self): + self.check_mode(numpy.random.randn(3, 2, 4), mode=self.mode) + self.check_mode(numpy.random.randn(4, 3, 3), mode=self.mode) + self.check_mode(numpy.random.randn(2, 5, 4), mode=self.mode) + + @pytest.mark.skipif(is_cpu_device(), reason="CMPLRLLVM-53771") + @testing.with_requires("numpy>=1.22") + @testing.fix_random() + def test_mode_rank4(self): + self.check_mode(numpy.random.randn(2, 3, 2, 4), mode=self.mode) + self.check_mode(numpy.random.randn(2, 4, 3, 3), mode=self.mode) + self.check_mode(numpy.random.randn(2, 2, 5, 4), mode=self.mode) + + @testing.with_requires("numpy>=1.16") + def test_empty_array(self): + self.check_mode(numpy.empty((0, 3)), mode=self.mode) + self.check_mode(numpy.empty((3, 0)), mode=self.mode) + + @testing.with_requires("numpy>=1.22") + def test_empty_array_rank3(self): + self.check_mode(numpy.empty((0, 3, 2)), mode=self.mode) + self.check_mode(numpy.empty((3, 0, 2)), mode=self.mode) + self.check_mode(numpy.empty((3, 2, 0)), mode=self.mode) + self.check_mode(numpy.empty((0, 3, 3)), mode=self.mode) + self.check_mode(numpy.empty((3, 0, 3)), mode=self.mode) + self.check_mode(numpy.empty((3, 3, 0)), mode=self.mode) + self.check_mode(numpy.empty((0, 2, 3)), mode=self.mode) + self.check_mode(numpy.empty((2, 0, 3)), mode=self.mode) + self.check_mode(numpy.empty((2, 3, 0)), mode=self.mode) From 6c99e65187e43c8ec072eca179413baf57fa195e Mon Sep 17 00:00:00 2001 From: Anton <100830759+antonwolfy@users.noreply.github.com> Date: Thu, 8 Feb 2024 19:05:54 +0100 Subject: [PATCH 24/29] Implement a helper alias template for complex types (#1644) --- dpnp/backend/kernels/dpnp_krnl_fft.cpp | 4 ++-- dpnp/backend/src/dpnp_fptr.hpp | 12 ++++++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/dpnp/backend/kernels/dpnp_krnl_fft.cpp b/dpnp/backend/kernels/dpnp_krnl_fft.cpp index 027f3343178..aec669a8699 100644 --- a/dpnp/backend/kernels/dpnp_krnl_fft.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_fft.cpp @@ -414,7 +414,7 @@ DPCTLSyclEventRef dpnp_fft_fft_c(DPCTLSyclQueueRef q_ref, const size_t norm, const DPCTLEventVectorRef dep_event_vec_ref) { - static_assert(sycl::detail::is_complex<_DataType_output>::value, + static_assert(is_complex<_DataType_output>::value, "Output data type must be a complex type."); DPCTLSyclEventRef event_ref = nullptr; @@ -584,7 +584,7 @@ DPCTLSyclEventRef dpnp_fft_rfft_c(DPCTLSyclQueueRef q_ref, const size_t norm, const DPCTLEventVectorRef dep_event_vec_ref) { - static_assert(sycl::detail::is_complex<_DataType_output>::value, + static_assert(is_complex<_DataType_output>::value, "Output data type must be a complex type."); DPCTLSyclEventRef event_ref = nullptr; diff --git a/dpnp/backend/src/dpnp_fptr.hpp b/dpnp/backend/src/dpnp_fptr.hpp index a46f3a7d35d..022e844319d 100644 --- a/dpnp/backend/src/dpnp_fptr.hpp +++ b/dpnp/backend/src/dpnp_fptr.hpp @@ -219,6 +219,18 @@ template using dpnp_remove_cvref_t = typename std::remove_cv_t>; +/** + * A helper alias template to return true value for complex types and false + * otherwise. + */ +template +struct is_complex : public std::integral_constant< + bool, + std::is_same_v<_Tp, std::complex> || + std::is_same_v<_Tp, std::complex>> +{ +}; + /** * @brief "<" comparison with complex types support. * From 2ce997db0b4bbd9d28f0f224a4f59a78e34451f8 Mon Sep 17 00:00:00 2001 From: vtavana <120411540+vtavana@users.noreply.github.com> Date: Thu, 8 Feb 2024 17:40:54 -0600 Subject: [PATCH 25/29] Unmute result type tests and modify TypeError/ValueError tests (#1663) * unmute result type tests * update TypeError and ValueError tests * update TestResultType --- tests/test_manipulation.py | 3 -- tests/test_mathematical.py | 44 +++++--------------- tests/test_umath.py | 28 ++++--------- tests/third_party/cupy/test_type_routines.py | 29 +++++++++++-- 4 files changed, 44 insertions(+), 60 deletions(-) diff --git a/tests/test_manipulation.py b/tests/test_manipulation.py index bb5533b0e62..0c830950197 100644 --- a/tests/test_manipulation.py +++ b/tests/test_manipulation.py @@ -72,9 +72,6 @@ def test_repeat(arr): assert_array_equal(expected, result) -# TODO: Temporary skipping the test, until Internal CI is updated with -# recent changed in dpctl regarding dpt.result_type function -@pytest.mark.skip("Temporary skipping the test") def test_result_type(): X = [dpnp.ones((2), dtype=dpnp.int64), dpnp.int32, "float32"] X_np = [numpy.ones((2), dtype=numpy.int64), numpy.int32, "float32"] diff --git a/tests/test_mathematical.py b/tests/test_mathematical.py index 56be3db6d92..80fe09c61b8 100644 --- a/tests/test_mathematical.py +++ b/tests/test_mathematical.py @@ -1199,9 +1199,7 @@ def test_invalid_dtype(self, dtype): dp_array = dpnp.arange(10, dtype=dpnp_dtype) dp_out = dpnp.empty(10, dtype=dtype) - # TODO: change it to ValueError, when dpctl - # is being used in internal CI - with pytest.raises((TypeError, ValueError)): + with pytest.raises(ValueError): dpnp.ceil(dp_array, out=dp_out) @pytest.mark.parametrize("dtype", get_float_dtypes()) @@ -1241,9 +1239,7 @@ def test_invalid_dtype(self, dtype): dp_array = dpnp.arange(10, dtype=dpnp_dtype) dp_out = dpnp.empty(10, dtype=dtype) - # TODO: change it to ValueError, when dpctl - # is being used in internal CI - with pytest.raises((TypeError, ValueError)): + with pytest.raises(ValueError): dpnp.floor(dp_array, out=dp_out) @pytest.mark.parametrize("dtype", get_float_dtypes()) @@ -1283,9 +1279,7 @@ def test_invalid_dtype(self, dtype): dp_array = dpnp.arange(10, dtype=dpnp_dtype) dp_out = dpnp.empty(10, dtype=dtype) - # TODO: change it to ValueError, when dpctl - # is being used in internal CI - with pytest.raises((TypeError, ValueError)): + with pytest.raises(ValueError): dpnp.trunc(dp_array, out=dp_out) @pytest.mark.parametrize("dtype", get_float_dtypes()) @@ -1336,9 +1330,7 @@ def test_out_dtypes(self, dtype): dp_out = dpnp.empty(size, dtype=dpnp.complex64) if dtype != dpnp.complex64: # dtype of out mismatches types of input arrays - # TODO: change it to ValueError, when dpctl - # is being used in internal CI - with pytest.raises((TypeError, ValueError)): + with pytest.raises(ValueError): dpnp.add(dp_array1, dp_array2, out=dp_out) # allocate new out with expected type @@ -1435,9 +1427,7 @@ def test_out_dtypes(self, dtype): check_dtype = True if dtype != dpnp.complex64: # dtype of out mismatches types of input arrays - # TODO: change it to ValueError, when dpctl - # is being used in internal CI - with pytest.raises((TypeError, ValueError)): + with pytest.raises(ValueError): dpnp.divide(dp_array1, dp_array2, out=dp_out) # allocate new out with expected type @@ -1538,9 +1528,7 @@ def test_out_dtypes(self, dtype): dp_out = dpnp.empty(size, dtype=dpnp.complex64) if dtype != dpnp.complex64: # dtype of out mismatches types of input arrays - # TODO: change it to ValueError, when dpctl - # is being used in internal CI - with pytest.raises((TypeError, ValueError)): + with pytest.raises(ValueError): dpnp.floor_divide(dp_array1, dp_array2, out=dp_out) # allocate new out with expected type @@ -1800,9 +1788,7 @@ def test_out_dtypes(self, dtype): dp_out = dpnp.empty(size, dtype=dpnp.float32) if dtype != dpnp.float32: # dtype of out mismatches types of input arrays - # TODO: change it to ValueError, when dpctl - # is being used in internal CI - with pytest.raises((TypeError, ValueError)): + with pytest.raises(ValueError): dpnp.hypot(dp_array1, dp_array2, out=dp_out) # allocate new out with expected type @@ -1970,9 +1956,7 @@ def test_out_dtypes(self, dtype): dp_out = dpnp.empty(size, dtype=dpnp.complex64) if dtype != dpnp.complex64: # dtype of out mismatches types of input arrays - # TODO: change it to ValueError, when dpctl - # is being used in internal CI - with pytest.raises((TypeError, ValueError)): + with pytest.raises(ValueError): dpnp.maximum(dp_array1, dp_array2, out=dp_out) # allocate new out with expected type @@ -2053,9 +2037,7 @@ def test_out_dtypes(self, dtype): dp_out = dpnp.empty(size, dtype=dpnp.complex64) if dtype != dpnp.complex64: # dtype of out mismatches types of input arrays - # TODO: change it to ValueError, when dpctl - # is being used in internal CI - with pytest.raises((TypeError, ValueError)): + with pytest.raises(ValueError): dpnp.minimum(dp_array1, dp_array2, out=dp_out) # allocate new out with expected type @@ -2136,9 +2118,7 @@ def test_out_dtypes(self, dtype): dp_out = dpnp.empty(size, dtype=dpnp.complex64) if dtype != dpnp.complex64: # dtype of out mismatches types of input arrays - # TODO: change it to ValueError, when dpctl - # is being used in internal CI - with pytest.raises((TypeError, ValueError)): + with pytest.raises(ValueError): dpnp.multiply(dp_array1, dp_array2, out=dp_out) # allocate new out with expected type @@ -2233,9 +2213,7 @@ def test_out_dtypes(self, dtype): dp_out = dpnp.empty(size, dtype=dpnp.complex64) if dtype != dpnp.complex64: # dtype of out mismatches types of input arrays - # TODO: change it to ValueError, when dpctl - # is being used in internal CI - with pytest.raises((TypeError, ValueError)): + with pytest.raises(ValueError): dpnp.power(dp_array1, dp_array2, out=dp_out) # allocate new out with expected type diff --git a/tests/test_umath.py b/tests/test_umath.py index 8e04a439bc9..2f792c0ab3c 100644 --- a/tests/test_umath.py +++ b/tests/test_umath.py @@ -209,9 +209,7 @@ def test_invalid_dtype(self, func_params, dtype): dp_array = dpnp.arange(10, dtype=dpnp_dtype) dp_out = dpnp.empty(10, dtype=dtype) - # TODO: change it to ValueError, when dpctl - # is being used in internal CI - with pytest.raises((TypeError, ValueError)): + with pytest.raises(ValueError): getattr(dpnp, func_name)(dp_array, out=dp_out) @pytest.mark.parametrize( @@ -256,9 +254,7 @@ def test_invalid_dtype(self, dtype): dp_array = dpnp.arange(10, dtype=dpnp_dtype) dp_out = dpnp.empty(10, dtype=dtype) - # TODO: change it to ValueError, when dpctl - # is being used in internal CI - with pytest.raises((TypeError, ValueError)): + with pytest.raises(ValueError): dpnp.cbrt(dp_array, out=dp_out) @pytest.mark.parametrize( @@ -295,9 +291,7 @@ def test_invalid_dtype(self, dtype): dp_array = dpnp.arange(10, dtype=dpnp_dtype) dp_out = dpnp.empty(10, dtype=dtype) - # TODO: change it to ValueError, when dpctl - # is being used in internal CI - with pytest.raises((TypeError, ValueError)): + with pytest.raises(ValueError): dpnp.rsqrt(dp_array, out=dp_out) @pytest.mark.parametrize( @@ -338,9 +332,7 @@ def test_invalid_dtype(self, dtype): dp_array = dpnp.arange(10, dtype=dpnp_dtype) dp_out = dpnp.empty(10, dtype=dtype) - # TODO: change it to ValueError, when dpctl - # is being used in internal CI - with pytest.raises((TypeError, ValueError)): + with pytest.raises(ValueError): dpnp.square(dp_array, out=dp_out) @pytest.mark.parametrize( @@ -423,9 +415,7 @@ def test_invalid_dtype(self, dtype): dp_array = dpnp.arange(10, dtype=dpnp_dtype) dp_out = dpnp.empty(10, dtype=dtype) - # TODO: change it to ValueError, when dpctl - # is being used in internal CI - with pytest.raises((TypeError, ValueError)): + with pytest.raises(ValueError): dpnp.arctan2(dp_array, dp_array, out=dp_out) @pytest.mark.parametrize( @@ -461,9 +451,7 @@ def test_invalid_dtype(self, dtype): dpnp_dtype = get_all_dtypes(no_complex=True, no_none=True)[-1] dp_array = dpnp.arange(10, dtype=dpnp_dtype) dp_out = dpnp.empty(10, dtype=dtype) - # TODO: change it to ValueError, when dpctl - # is being used in internal CI - with pytest.raises((TypeError, ValueError)): + with pytest.raises(ValueError): dpnp.copysign(dp_array, dp_array, out=dp_out) @pytest.mark.parametrize( @@ -499,9 +487,7 @@ def test_invalid_dtype(self, dtype): dpnp_dtype = get_all_dtypes(no_complex=True, no_none=True)[-1] dp_array = dpnp.arange(10, dtype=dpnp_dtype) dp_out = dpnp.empty(10, dtype=dtype) - # TODO: change it to ValueError, when dpctl - # is being used in internal CI - with pytest.raises((TypeError, ValueError)): + with pytest.raises(ValueError): dpnp.logaddexp(dp_array, dp_array, out=dp_out) @pytest.mark.parametrize( diff --git a/tests/third_party/cupy/test_type_routines.py b/tests/third_party/cupy/test_type_routines.py index e6fd09c7419..ebfe56d6d42 100644 --- a/tests/third_party/cupy/test_type_routines.py +++ b/tests/third_party/cupy/test_type_routines.py @@ -4,6 +4,7 @@ import pytest import dpnp as cupy +from tests.helper import has_support_aspect64 from tests.third_party.cupy import testing @@ -87,9 +88,6 @@ def test_common_type_bool(self, dtype): } ) ) -# TODO: Temporary skipping the test, until Internal CI is updated with -# recent changed in dpctl regarding dpt.result_type function -@pytest.mark.skip("Temporary skipping the test") class TestResultType(unittest.TestCase): @testing.for_all_dtypes_combination(names=("dtype1", "dtype2")) @testing.numpy_cupy_equal() @@ -100,6 +98,31 @@ def test_result_type(self, xp, dtype1, dtype2): input1 = _generate_type_routines_input(xp, dtype1, self.obj_type1) input2 = _generate_type_routines_input(xp, dtype2, self.obj_type2) + + flag1 = isinstance(input1, (numpy.ndarray, cupy.ndarray)) + flag2 = isinstance(input2, (numpy.ndarray, cupy.ndarray)) + dt1 = cupy.dtype(input1) if not flag1 else None + dt2 = cupy.dtype(input2) if not flag2 else None + # dpnp takes into account device capabilities only if one of the + # inputs is an array, for such a case, if the other dtype is not + # supported by device, dpnp raise ValueError. So, we skip the test. + if flag1 or flag2: + if ( + dt1 in [cupy.float64, cupy.complex128] + or dt2 in [cupy.float64, cupy.complex128] + ) and not has_support_aspect64(): + pytest.skip("No fp64 support by device.") + ret = xp.result_type(input1, input2) + + # dpnp takes into account device capabilities if one of the inputs + # is an array, for such a case, we have to modify the results for + # NumPy to align it with device capabilities. + if (flag1 or flag2) and xp == numpy and not has_support_aspect64(): + ret = numpy.dtype(numpy.float32) if ret == numpy.float64 else ret + ret = ( + numpy.dtype(numpy.complex64) if ret == numpy.complex128 else ret + ) + assert isinstance(ret, numpy.dtype) return ret From 0957dddc19ff819cfe3bd686308c5a6107adec00 Mon Sep 17 00:00:00 2001 From: vlad-perevezentsev Date: Fri, 9 Feb 2024 13:15:54 +0100 Subject: [PATCH 26/29] Fix memory leak in dpnp_algo_random (#1700) Co-authored-by: Anton <100830759+antonwolfy@users.noreply.github.com> --- dpnp/random/dpnp_algo_random.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpnp/random/dpnp_algo_random.pyx b/dpnp/random/dpnp_algo_random.pyx index 432d3a3294c..3d2a10c51a9 100644 --- a/dpnp/random/dpnp_algo_random.pyx +++ b/dpnp/random/dpnp_algo_random.pyx @@ -442,7 +442,7 @@ cdef class MT19937(_Engine): try: for i in range(vector_seed_len): vector_seed[i] = seed[i] - except (ValueError, TypeError) as e: + except Exception as e: free(vector_seed) raise e else: From 4c7859b5dae41600377eea665ccf763218d0a226 Mon Sep 17 00:00:00 2001 From: vlad-perevezentsev Date: Fri, 9 Feb 2024 18:36:22 +0100 Subject: [PATCH 27/29] Fix ExecutionPlacementError for dpnp.take_along_axis (#1702) * Follow compute follows data to fill fancy_index * Update take_along_axis tests to cover the issue * Update test_take_along_axis --- dpnp/dpnp_iface_indexing.py | 7 ++++++- tests/test_sycl_queue.py | 36 ++++++++++++++++++++++++++++++++---- tests/test_usm_type.py | 26 ++++++++++++++++++++++++++ 3 files changed, 64 insertions(+), 5 deletions(-) diff --git a/dpnp/dpnp_iface_indexing.py b/dpnp/dpnp_iface_indexing.py index 8f973ed1f1a..a930b3c56d9 100644 --- a/dpnp/dpnp_iface_indexing.py +++ b/dpnp/dpnp_iface_indexing.py @@ -111,7 +111,12 @@ def _build_along_axis_index(a, indices, axis): else: ind_shape = shape_ones[:dim] + (-1,) + shape_ones[dim + 1 :] fancy_index.append( - dpnp.arange(n, dtype=indices.dtype).reshape(ind_shape) + dpnp.arange( + n, + dtype=indices.dtype, + usm_type=indices.usm_type, + sycl_queue=indices.sycl_queue, + ).reshape(ind_shape) ) return tuple(fancy_index) diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py index de243744403..6bc24af6c7d 100644 --- a/tests/test_sycl_queue.py +++ b/tests/test_sycl_queue.py @@ -1520,21 +1520,20 @@ def test_clip(device): assert_sycl_queue_equal(x.sycl_queue, y.sycl_queue) -@pytest.mark.parametrize("func", ["take", "take_along_axis"]) @pytest.mark.parametrize( "device", valid_devices, ids=[device.filter_string for device in valid_devices], ) -def test_take(func, device): +def test_take(device): numpy_data = numpy.arange(5) dpnp_data = dpnp.array(numpy_data, device=device) dpnp_ind = dpnp.array([0, 2, 4], device=device) np_ind = dpnp_ind.asnumpy() - result = getattr(dpnp, func)(dpnp_data, dpnp_ind, axis=None) - expected = getattr(numpy, func)(numpy_data, np_ind, axis=None) + result = dpnp.take(dpnp_data, dpnp_ind, axis=None) + expected = numpy.take(numpy_data, np_ind, axis=None) assert_allclose(expected, result) expected_queue = dpnp_data.get_array().sycl_queue @@ -1542,6 +1541,35 @@ def test_take(func, device): assert_sycl_queue_equal(result_queue, expected_queue) +@pytest.mark.parametrize( + "data, ind, axis", + [ + (numpy.arange(6), numpy.array([0, 2, 4]), None), + ( + numpy.arange(6).reshape((2, 3)), + numpy.array([0, 1]).reshape((2, 1)), + 1, + ), + ], +) +@pytest.mark.parametrize( + "device", + valid_devices, + ids=[device.filter_string for device in valid_devices], +) +def test_take_along_axis(data, ind, axis, device): + dp_data = dpnp.array(data, device=device) + dp_ind = dpnp.array(ind, device=device) + + result = dpnp.take_along_axis(dp_data, dp_ind, axis=axis) + expected = numpy.take_along_axis(data, ind, axis=axis) + assert_allclose(expected, result) + + expected_queue = dp_data.get_array().sycl_queue + result_queue = result.get_array().sycl_queue + assert_sycl_queue_equal(result_queue, expected_queue) + + @pytest.mark.parametrize( "device", valid_devices, diff --git a/tests/test_usm_type.py b/tests/test_usm_type.py index 56e2a68756a..e188cdb1c47 100644 --- a/tests/test_usm_type.py +++ b/tests/test_usm_type.py @@ -570,6 +570,32 @@ def test_take(func, usm_type_x, usm_type_ind): assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_ind]) +@pytest.mark.parametrize( + "data, ind, axis", + [ + (numpy.arange(6), numpy.array([0, 2, 4]), None), + ( + numpy.arange(6).reshape((2, 3)), + numpy.array([0, 1]).reshape((2, 1)), + 1, + ), + ], +) +@pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types) +@pytest.mark.parametrize( + "usm_type_ind", list_of_usm_types, ids=list_of_usm_types +) +def test_take_along_axis(data, ind, axis, usm_type_x, usm_type_ind): + x = dp.array(data, usm_type=usm_type_x) + ind = dp.array(ind, usm_type=usm_type_ind) + + z = dp.take_along_axis(x, ind, axis=axis) + + assert x.usm_type == usm_type_x + assert ind.usm_type == usm_type_ind + assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_ind]) + + @pytest.mark.parametrize( "data, is_empty", [ From 0457fe174dbfe760bfe5633e8e7d12183a4d6166 Mon Sep 17 00:00:00 2001 From: vtavana <120411540+vtavana@users.noreply.github.com> Date: Fri, 9 Feb 2024 13:48:00 -0600 Subject: [PATCH 28/29] implement `dpnp.tensordot` (#1699) * implement dpnp.tensordot * update doc string * address comments * fix doc string * update scaling factor * add TODO comment --------- Co-authored-by: Anton <100830759+antonwolfy@users.noreply.github.com> --- dpnp/dpnp_iface_linearalgebra.py | 153 +++++++++++++--- dpnp/dpnp_iface_sorting.py | 2 - tests/helper.py | 9 +- tests/skipped_tests.tbl | 4 - tests/skipped_tests_gpu.tbl | 4 - tests/test_dot.py | 168 +++++++++++++++--- tests/test_mathematical.py | 4 +- tests/test_sycl_queue.py | 5 + tests/test_usm_type.py | 5 + .../cupy/linalg_tests/test_product.py | 20 --- 10 files changed, 298 insertions(+), 76 deletions(-) diff --git a/dpnp/dpnp_iface_linearalgebra.py b/dpnp/dpnp_iface_linearalgebra.py index bffe881b626..7baca14c93b 100644 --- a/dpnp/dpnp_iface_linearalgebra.py +++ b/dpnp/dpnp_iface_linearalgebra.py @@ -39,6 +39,7 @@ import numpy +from numpy.core.numeric import normalize_axis_tuple import dpnp from dpnp.dpnp_algo import * @@ -66,9 +67,9 @@ def dot(a, b, out=None): Parameters ---------- - a : {dpnp_array, usm_ndarray, scalar} + a : {dpnp.ndarray, usm_ndarray, scalar} First input array. Both inputs `a` and `b` can not be scalars at the same time. - b : {dpnp_array, usm_ndarray, scalar} + b : {dpnp.ndarray, usm_ndarray, scalar} Second input array. Both inputs `a` and `b` can not be scalars at the same time. out : {dpnp.ndarray, usm_ndarray}, optional Alternative output array in which to place the result. It must have @@ -404,42 +405,152 @@ def outer(x1, x2, out=None): return call_origin(numpy.outer, x1, x2, out=out) -def tensordot(x1, x2, axes=2): - """ +def tensordot(a, b, axes=2): + r""" Compute tensor dot product along specified axes. For full documentation refer to :obj:`numpy.tensordot`. - Limitations - ----------- - Parameters `x1` and `x2` are supported as :obj:`dpnp.ndarray`. - Keyword argument `kwargs` is currently unsupported. - Parameter `axes` is supported only with value ``1``. - Otherwise the functions will be executed sequentially on CPU. - Input array data types are limited by supported DPNP :ref:`Data types`. + Parameters + ---------- + a : {dpnp.ndarray, usm_ndarray, scalar} + First input array. Both inputs `a` and `b` can not be scalars at the same time. + b : {dpnp.ndarray, usm_ndarray, scalar} + Second input array. Both inputs `a` and `b` can not be scalars at the same time. + axes : int or (2,) array_like + * integer_like + If an int `N`, sum over the last `N` axes of `a` and the first `N` axes + of `b` in order. The sizes of the corresponding axes must match. + * (2,) array_like + Or, a list of axes to be summed over, first sequence applying to `a`, + second to `b`. Both elements array_like must be of the same length. + + Returns + ------- + out : dpnp.ndarray + Returns the tensordot product of `a` and `b`. See Also -------- :obj:`dpnp.dot` : Returns the dot product. :obj:`dpnp.einsum` : Evaluates the Einstein summation convention on the operands. + Notes + ----- + Three common use cases are: + * ``axes = 0`` : tensor product :math:`a \otimes b` + * ``axes = 1`` : tensor dot product :math:`a \cdot b` + * ``axes = 2`` : (default) tensor double contraction :math:`a:b` + + When `axes` is integer, the sequence for evaluation will be: first + the -Nth axis in `a` and 0th axis in `b`, and the -1th axis in `a` and + Nth axis in `b` last. + + When there is more than one axis to sum over - and they are not the last + (first) axes of `a` (`b`) - the argument `axes` should consist of + two sequences of the same length, with the first axis to sum over given + first in both sequences, the second axis second, and so forth. + + The shape of the result consists of the non-contracted axes of the + first tensor, followed by the non-contracted axes of the second. + Examples -------- >>> import dpnp as np >>> a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) >>> b = np.array([1, 2, 3]) - >>> result = np.tensordot(a, b, 1) - >>> [x for x in result] - [14, 32, 50] + >>> np.tensordot(a, b, 1) + array([14, 32, 50]) + + >>> a = np.arange(60.).reshape(3,4,5) + >>> b = np.arange(24.).reshape(4,3,2) + >>> c = np.tensordot(a,b, axes=([1,0],[0,1])) + >>> c.shape + (5, 2) + >>> c + array([[4400., 4730.], + [4532., 4874.], + [4664., 5018.], + [4796., 5162.], + [4928., 5306.]]) + + A slower but equivalent way of computing the same... + + >>> d = np.zeros((5,2)) + >>> for i in range(5): + ... for j in range(2): + ... for k in range(3): + ... for n in range(4): + ... d[i,j] += a[k,n,i] * b[n,k,j] + >>> c == d + array([[ True, True], + [ True, True], + [ True, True], + [ True, True], + [ True, True]]) """ - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) - x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_nondefault_queue=False) - if x1_desc and x2_desc and (axes == 1): - return dpnp_tensordot_not_implemented(x1_desc, x2_desc) # dpnp_matmul + dpnp.check_supported_arrays_type(a, b, scalar_type=True) - return call_origin(numpy.tensordot, x1, x2, axes) + if dpnp.isscalar(a): + a = dpnp.array(a, sycl_queue=b.sycl_queue, usm_type=b.usm_type) + elif dpnp.isscalar(b): + b = dpnp.array(b, sycl_queue=a.sycl_queue, usm_type=a.usm_type) + + try: + iter(axes) + except Exception: + if not isinstance(axes, int): + raise TypeError("Axes must be an integer.") + axes_a = tuple(range(-axes, 0)) + axes_b = tuple(range(0, axes)) + else: + if len(axes) != 2: + raise ValueError("Axes must consist of two sequences.") + + axes_a, axes_b = axes + axes_a = (axes_a,) if dpnp.isscalar(axes_a) else axes_a + axes_b = (axes_b,) if dpnp.isscalar(axes_b) else axes_b + + if len(axes_a) != len(axes_b): + raise ValueError("Axes length mismatch.") + + a_shape = a.shape + b_shape = b.shape + for axis_a, axis_b in zip(axes_a, axes_b): + if a_shape[axis_a] != b_shape[axis_b]: + raise ValueError( + "shape of input arrays is not similar at requested axes." + ) + + # Make the axes non-negative + a_ndim = a.ndim + b_ndim = b.ndim + axes_a = normalize_axis_tuple(axes_a, a_ndim, "axis") + axes_b = normalize_axis_tuple(axes_b, b_ndim, "axis") + + # Move the axes to sum over, to the end of "a" + notin = tuple(k for k in range(a_ndim) if k not in axes_a) + newaxes_a = notin + axes_a + N1 = int(numpy.prod([a_shape[ax] for ax in notin])) + N2 = int(numpy.prod([a_shape[ax] for ax in axes_a])) + newshape_a = (N1, N2) + olda = [a_shape[axis] for axis in notin] + + # Move the axes to sum over, to the front of "b" + notin = tuple(k for k in range(b_ndim) if k not in axes_b) + newaxes_b = tuple(axes_b + notin) + N1 = int(numpy.prod([b_shape[ax] for ax in axes_b])) + N2 = int(numpy.prod([b_shape[ax] for ax in notin])) + newshape_b = (N1, N2) + oldb = [b_shape[axis] for axis in notin] + + at = a.transpose(newaxes_a).reshape(newshape_a) + bt = b.transpose(newaxes_b).reshape(newshape_b) + res = dpnp.matmul(at, bt) + + return res.reshape(olda + oldb) def vdot(a, b): @@ -450,11 +561,11 @@ def vdot(a, b): Parameters ---------- - a : {dpnp_array, usm_ndarray, scalar} + a : {dpnp.ndarray, usm_ndarray, scalar} First input array. Both inputs `a` and `b` can not be scalars at the same time. If `a` is complex, the complex conjugate is taken before the calculation of the dot product. - b : {dpnp_array, usm_ndarray, scalar} + b : {dpnp.ndarray, usm_ndarray, scalar} Second input array. Both inputs `a` and `b` can not be scalars at the same time. diff --git a/dpnp/dpnp_iface_sorting.py b/dpnp/dpnp_iface_sorting.py index 6a3db20e74c..93e8db2172b 100644 --- a/dpnp/dpnp_iface_sorting.py +++ b/dpnp/dpnp_iface_sorting.py @@ -1,5 +1,3 @@ -# cython: language_level=3 -# distutils: language = c++ # -*- coding: utf-8 -*- # ***************************************************************************** # Copyright (c) 2016-2024, Intel Corporation diff --git a/tests/helper.py b/tests/helper.py index aac6b51a1c6..2a2873afdce 100644 --- a/tests/helper.py +++ b/tests/helper.py @@ -8,7 +8,11 @@ def assert_dtype_allclose( - dpnp_arr, numpy_arr, check_type=True, check_only_type_kind=False + dpnp_arr, + numpy_arr, + check_type=True, + check_only_type_kind=False, + factor=8, ): """ Assert DPNP and NumPy array based on maximum dtype resolution of input arrays @@ -28,6 +32,7 @@ def assert_dtype_allclose( The 'check_only_type_kind' parameter (False by default) asserts only equal type kinds for all data types supported by DPNP when set to True. It is effective only when 'check_type' is also set to True. + The parameter `factor` scales the resolution used for comparing the arrays. """ @@ -44,7 +49,7 @@ def assert_dtype_allclose( if is_inexact(numpy_arr) else -dpnp.inf ) - tol = 8 * max(tol_dpnp, tol_numpy) + tol = factor * max(tol_dpnp, tol_numpy) assert_allclose(dpnp_arr.asnumpy(), numpy_arr, atol=tol, rtol=tol) if check_type: numpy_arr_dtype = numpy_arr.dtype diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl index a38624e3757..182eaf8877a 100644 --- a/tests/skipped_tests.tbl +++ b/tests/skipped_tests.tbl @@ -335,10 +335,6 @@ tests/third_party/cupy/linalg_tests/test_einsum.py::TestListArgEinSumError::test tests/third_party/cupy/linalg_tests/test_product.py::TestMatrixPower::test_matrix_power_invlarge tests/third_party/cupy/linalg_tests/test_product.py::TestMatrixPower::test_matrix_power_large tests/third_party/cupy/linalg_tests/test_product.py::TestMatrixPower::test_matrix_power_of_two -tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_tensordot_zero_dim -tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_tensordot -tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_tensordot_with_int_axes -tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_tensordot_with_list_axes tests/third_party/cupy/logic_tests/test_comparison.py::TestArrayEqual::test_array_equal_broadcast_not_allowed tests/third_party/cupy/logic_tests/test_comparison.py::TestArrayEqual::test_array_equal_diff_dtypes_is_equal diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl index ce6f6aef984..d6fd43e1887 100644 --- a/tests/skipped_tests_gpu.tbl +++ b/tests/skipped_tests_gpu.tbl @@ -437,10 +437,6 @@ tests/third_party/cupy/linalg_tests/test_einsum.py::TestListArgEinSumError::test tests/third_party/cupy/linalg_tests/test_product.py::TestMatrixPower::test_matrix_power_invlarge tests/third_party/cupy/linalg_tests/test_product.py::TestMatrixPower::test_matrix_power_large tests/third_party/cupy/linalg_tests/test_product.py::TestMatrixPower::test_matrix_power_of_two -tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_tensordot -tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_tensordot_with_int_axes -tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_tensordot_with_list_axes -tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_tensordot_zero_dim tests/third_party/cupy/logic_tests/test_comparison.py::TestArrayEqual::test_array_equal_broadcast_not_allowed tests/third_party/cupy/logic_tests/test_comparison.py::TestArrayEqual::test_array_equal_diff_dtypes_is_equal diff --git a/tests/test_dot.py b/tests/test_dot.py index 42478db9634..03045f002a8 100644 --- a/tests/test_dot.py +++ b/tests/test_dot.py @@ -44,9 +44,6 @@ def test_dot_scalar(self, dtype): expected = numpy.dot(a, b) assert_allclose(result, expected) - # TODO: get rid of falls back on NumPy when tensordot - # is implemented using OneMKL - @pytest.mark.usefixtures("allow_fall_back_on_numpy") @pytest.mark.parametrize("dtype", get_all_dtypes(no_complex=True)) @pytest.mark.parametrize( "array_info", @@ -88,9 +85,6 @@ def test_dot(self, dtype, array_info): expected = numpy.dot(a, b) assert_dtype_allclose(result, expected) - # TODO: get rid of falls back on NumPy when tensordot - # is implemented using OneMKL - @pytest.mark.usefixtures("allow_fall_back_on_numpy") @pytest.mark.parametrize("dtype", get_complex_dtypes()) @pytest.mark.parametrize( "array_info", @@ -132,9 +126,6 @@ def test_dot_complex(self, dtype, array_info): expected = numpy.dot(a, b) assert_dtype_allclose(result, expected) - # TODO: get rid of falls back on NumPy when tensordot - # is implemented using OneMKL - @pytest.mark.usefixtures("allow_fall_back_on_numpy") @pytest.mark.parametrize("dtype", get_all_dtypes()) @pytest.mark.parametrize( "array_info", @@ -214,9 +205,6 @@ def test_dot_out_scalar(self, dtype): assert result is dp_out assert_allclose(result, expected) - # TODO: get rid of falls back on NumPy when tensordot - # is implemented using OneMKL - @pytest.mark.usefixtures("allow_fall_back_on_numpy") @pytest.mark.parametrize("dtype", get_all_dtypes()) @pytest.mark.parametrize( "array_info", @@ -294,21 +282,14 @@ def test_dot_out_error_scalar(self, ia): # output data type is incorrect dp_out = dpnp.empty((10,), dtype=dpnp.int64) - # TODO: change it to ValueError, when updated - # dpctl is being used in internal CI - with pytest.raises((ValueError, TypeError)): + with pytest.raises(ValueError): dpnp.dot(ia, ib, out=dp_out) # output shape is incorrect dp_out = dpnp.empty((2,), dtype=dpnp.int32) - # TODO: change it to ValueError, when updated - # dpctl is being used in internal CI - with pytest.raises((ValueError, TypeError)): + with pytest.raises(ValueError): dpnp.dot(ia, ib, out=dp_out) - # TODO: get rid of falls back on NumPy when tensordot - # is implemented using OneMKL - @pytest.mark.usefixtures("allow_fall_back_on_numpy") @pytest.mark.parametrize( "shape_pair", [ @@ -373,6 +354,151 @@ def test_multi_dot(type): assert_array_equal(expected, result) +class TestTensordot: + @pytest.mark.parametrize("dtype", get_all_dtypes()) + def test_tensordot_scalar(self, dtype): + a = 2 + b = numpy.array(numpy.random.uniform(-5, 5, 10), dtype=dtype) + ib = dpnp.array(b) + + result = dpnp.tensordot(a, ib, axes=0) + expected = numpy.tensordot(a, b, axes=0) + assert_allclose(result, expected) + + result = dpnp.tensordot(ib, a, axes=0) + expected = numpy.tensordot(b, a, axes=0) + assert_allclose(result, expected) + + @pytest.mark.parametrize("dtype", get_all_dtypes(no_complex=True)) + @pytest.mark.parametrize("axes", [-3, -2, -1, 0, 1, 2]) + def test_tensordot(self, dtype, axes): + a = numpy.array(numpy.random.uniform(-10, 10, 64), dtype=dtype).reshape( + 4, 4, 4 + ) + b = numpy.array(numpy.random.uniform(-10, 10, 64), dtype=dtype).reshape( + 4, 4, 4 + ) + ia = dpnp.array(a) + ib = dpnp.array(b) + + result = dpnp.tensordot(ia, ib, axes=axes) + expected = numpy.tensordot(a, b, axes=axes) + # TODO: investigate the effect of factor, see SAT-6700 + assert_dtype_allclose(result, expected, factor=24) + + @pytest.mark.parametrize("dtype", get_complex_dtypes()) + @pytest.mark.parametrize("axes", [-3, -2, -1, 0, 1, 2]) + def test_tensordot_complex(self, dtype, axes): + x11 = numpy.random.uniform(-10, 10, 64) + x12 = numpy.random.uniform(-10, 10, 64) + x21 = numpy.random.uniform(-10, 10, 64) + x22 = numpy.random.uniform(-10, 10, 64) + a = numpy.array(x11 + 1j * x12, dtype=dtype).reshape(4, 4, 4) + b = numpy.array(x21 + 1j * x22, dtype=dtype).reshape(4, 4, 4) + ia = dpnp.array(a) + ib = dpnp.array(b) + + result = dpnp.tensordot(ia, ib, axes=axes) + expected = numpy.tensordot(a, b, axes=axes) + # TODO: investigate the effect of factor, see SAT-6700 + assert_dtype_allclose(result, expected, factor=24) + + @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True)) + @pytest.mark.parametrize( + "axes", + [ + ([0, 1]), + ([0, 1], [1, 2]), + (2, 3), + ([-2, -3], [3, 2]), + ((3, 1), (0, 2)), + ], + ) + def test_tensordot_axes(self, dtype, axes): + a = numpy.array( + numpy.random.uniform(-10, 10, 120), dtype=dtype + ).reshape(2, 5, 3, 4) + b = numpy.array( + numpy.random.uniform(-10, 10, 120), dtype=dtype + ).reshape(4, 2, 5, 3) + ia = dpnp.array(a) + ib = dpnp.array(b) + + result = dpnp.tensordot(ia, ib, axes=axes) + expected = numpy.tensordot(a, b, axes=axes) + # TODO: investigate the effect of factor, see SAT-6700 + assert_dtype_allclose(result, expected, factor=24) + + @pytest.mark.parametrize("dtype1", get_all_dtypes()) + @pytest.mark.parametrize("dtype2", get_all_dtypes()) + def test_tensordot_input_dtype_matrix(self, dtype1, dtype2): + a = numpy.array( + numpy.random.uniform(-10, 10, 60), dtype=dtype1 + ).reshape(3, 4, 5) + b = numpy.array( + numpy.random.uniform(-10, 10, 40), dtype=dtype2 + ).reshape(4, 5, 2) + ia = dpnp.array(a) + ib = dpnp.array(b) + + result = dpnp.tensordot(ia, ib) + expected = numpy.tensordot(a, b) + # TODO: investigate the effect of factor, see SAT-6700 + assert_dtype_allclose(result, expected, factor=24) + + def test_tensordot_strided(self): + for dim in [1, 2, 3, 4]: + axes = 1 if dim == 1 else 2 + A = numpy.random.rand(*([10] * dim)) + B = dpnp.asarray(A) + # positive stride + slices = tuple(slice(None, None, 2) for _ in range(dim)) + a = A[slices] + b = B[slices] + + result = dpnp.tensordot(b, b, axes=axes) + expected = numpy.tensordot(a, a, axes=axes) + assert_dtype_allclose(result, expected) + + # negative stride + slices = tuple(slice(None, None, -2) for _ in range(dim)) + a = A[slices] + b = B[slices] + + result = dpnp.tensordot(b, b, axes=axes) + expected = numpy.tensordot(a, a, axes=axes) + assert_dtype_allclose(result, expected) + + def test_tensordot_error(self): + a = 5 + b = 2 + # both inputs are scalar + with pytest.raises(TypeError): + dpnp.tensordot(a, b, axes=0) + + a = dpnp.arange(24).reshape(2, 3, 4) + b = dpnp.arange(24).reshape(3, 4, 2) + # axes should be an integer + with pytest.raises(TypeError): + dpnp.tensordot(a, b, axes=2.0) + + # Axes must consist of two sequences + with pytest.raises(ValueError): + dpnp.tensordot(a, b, axes=([0, 2],)) + + # Axes length mismatch + with pytest.raises(ValueError): + dpnp.tensordot(a, b, axes=([0, 2], [2])) + + # shape of input arrays is not similar at requested axes + with pytest.raises(ValueError): + dpnp.tensordot(a, b, axes=([0, 2], [2, 0])) + + # out of range index + with pytest.raises(IndexError): + dpnp.tensordot(a, b, axes=([0, 3], [2, 0])) + + class TestVdot: @pytest.mark.parametrize("dtype", get_all_dtypes()) def test_vdot_scalar(self, dtype): diff --git a/tests/test_mathematical.py b/tests/test_mathematical.py index 80fe09c61b8..12115b5256c 100644 --- a/tests/test_mathematical.py +++ b/tests/test_mathematical.py @@ -2726,7 +2726,7 @@ def test_matmul_strided(self): for dim in [1, 2, 3, 4]: A = numpy.random.rand(*([20] * dim)) B = dpnp.asarray(A) - # positive strides + # positive stride slices = tuple(slice(None, None, 2) for _ in range(dim)) a = A[slices] b = B[slices] @@ -2735,7 +2735,7 @@ def test_matmul_strided(self): expected = numpy.matmul(a, a) assert_dtype_allclose(result, expected) - # negative strides + # negative stride slices = tuple(slice(None, None, -2) for _ in range(dim)) a = A[slices] b = B[slices] diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py index 6bc24af6c7d..479e96e0229 100644 --- a/tests/test_sycl_queue.py +++ b/tests/test_sycl_queue.py @@ -579,6 +579,11 @@ def test_reduce_hypot(device): [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0], [0.0, 1.0, 2.0, 0.0, 1.0, 2.0, 0.0, 1.0, 2.0], ), + pytest.param( + "tensordot", + [[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]], + [[4.0, 4.0, 4.0], [4.0, 4.0, 4.0]], + ), # dpnp.vdot has 3 different implementations based on input arrays dtype # checking all of them pytest.param("vdot", [3.0, 4.0, 5.0], [1.0, 2.0, 3.0]), diff --git a/tests/test_usm_type.py b/tests/test_usm_type.py index e188cdb1c47..21dfb3cde67 100644 --- a/tests/test_usm_type.py +++ b/tests/test_usm_type.py @@ -505,6 +505,11 @@ def test_1in_1out(func, data, usm_type): pytest.param("logaddexp", [[-1, 2, 5, 9]], [[4, -3, 2, -8]]), pytest.param("maximum", [[0.0, 1.0, 2.0]], [[3.0, 4.0, 5.0]]), pytest.param("minimum", [[0.0, 1.0, 2.0]], [[3.0, 4.0, 5.0]]), + pytest.param( + "tensordot", + [[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]], + [[4.0, 4.0, 4.0], [4.0, 4.0, 4.0]], + ), # dpnp.vdot has 3 different implementations based on input arrays dtype # checking all of them pytest.param("vdot", [3.0, 4.0, 5.0], [1.0, 2.0, 3.0]), diff --git a/tests/third_party/cupy/linalg_tests/test_product.py b/tests/third_party/cupy/linalg_tests/test_product.py index 1fd048356b4..e59b30dcd6e 100644 --- a/tests/third_party/cupy/linalg_tests/test_product.py +++ b/tests/third_party/cupy/linalg_tests/test_product.py @@ -36,9 +36,6 @@ } ) ) -# TODO: get rid of falls back on NumPy when tensordot -# is implemented using OneMKL -@pytest.mark.usefixtures("allow_fall_back_on_numpy") class TestDot(unittest.TestCase): @testing.for_all_dtypes_combination(["dtype_a", "dtype_b"]) @testing.numpy_cupy_allclose(type_check=has_support_aspect64()) @@ -161,9 +158,6 @@ def test_dot_vec1(self, xp, dtype): b = testing.shaped_arange((2,), xp, dtype) return xp.dot(a, b) - # TODO: get rid of falls back on NumPy when tensordot - # is implemented using OneMKL - @pytest.mark.usefixtures("allow_fall_back_on_numpy") @testing.for_all_dtypes() @testing.numpy_cupy_allclose() def test_dot_vec2(self, xp, dtype): @@ -178,9 +172,6 @@ def test_dot_vec3(self, xp, dtype): b = testing.shaped_arange((2,), xp, dtype) return xp.dot(a, b) - # TODO: get rid of falls back on NumPy when tensordot - # is implemented using OneMKL - @pytest.mark.usefixtures("allow_fall_back_on_numpy") @testing.for_all_dtypes() @testing.numpy_cupy_allclose() def test_transposed_dot(self, xp, dtype): @@ -188,9 +179,6 @@ def test_transposed_dot(self, xp, dtype): b = testing.shaped_arange((2, 3, 4), xp, dtype).transpose(0, 2, 1) return xp.dot(a, b) - # TODO: get rid of falls back on NumPy when tensordot - # is implemented using OneMKL - @pytest.mark.usefixtures("allow_fall_back_on_numpy") @testing.for_all_dtypes() @testing.numpy_cupy_allclose() def test_transposed_dot_with_out(self, xp, dtype): @@ -200,9 +188,6 @@ def test_transposed_dot_with_out(self, xp, dtype): xp.dot(a, b, out=c) return c - # TODO: get rid of falls back on NumPy when tensordot - # is implemented using OneMKL - @pytest.mark.usefixtures("allow_fall_back_on_numpy") @testing.for_all_dtypes() def test_transposed_dot_with_out_f_contiguous(self, dtype): for xp in (numpy, cupy): @@ -307,7 +292,6 @@ def test_multidim_outer(self, xp, dtype): b = testing.shaped_arange((4, 5), xp, dtype) return xp.outer(a, b) - @pytest.mark.usefixtures("allow_fall_back_on_numpy") @testing.for_all_dtypes() @testing.numpy_cupy_allclose() def test_tensordot(self, xp, dtype): @@ -322,7 +306,6 @@ def test_transposed_tensordot(self, xp, dtype): b = testing.shaped_arange((4, 3, 2), xp, dtype).transpose(2, 0, 1) return xp.tensordot(a, b) - @pytest.mark.usefixtures("allow_fall_back_on_numpy") @testing.for_all_dtypes() @testing.numpy_cupy_allclose() def test_tensordot_with_int_axes(self, xp, dtype): @@ -352,7 +335,6 @@ def test_transposed_tensordot_with_int_axes(self, xp, dtype): ) return xp.tensordot(a, b, axes=3) - @pytest.mark.usefixtures("allow_fall_back_on_numpy") @testing.for_all_dtypes() @testing.numpy_cupy_allclose() def test_tensordot_with_list_axes(self, xp, dtype): @@ -433,8 +415,6 @@ def test_zerodim_kron(self, xp, dtype): } ) ) -@pytest.mark.usefixtures("allow_fall_back_on_numpy") -@testing.gpu class TestProductZeroLength(unittest.TestCase): @testing.for_all_dtypes() @testing.numpy_cupy_allclose() From a27200b0cee911cc657d3b580c81341f9fed5deb Mon Sep 17 00:00:00 2001 From: Anton <100830759+antonwolfy@users.noreply.github.com> Date: Sat, 10 Feb 2024 12:37:52 +0100 Subject: [PATCH 29/29] Updated CHANGELOG.md for 0.14.0 release (#1703) Co-authored-by: vtavana <120411540+vtavana@users.noreply.github.com> --- CHANGELOG.md | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 77 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a15f81807cc..97278d7c719 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,10 +4,85 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [0.13.0] - TBA +## [0.14.0] - MM/DD/2024 ### Added +* Added implementation of `dpnp.nanmean` and `dpnp.nanstd` functions [#1654](https://github.com/IntelPython/dpnp/pull/1654) +* Added implementation of `dpnp.angle` function [#1650](https://github.com/IntelPython/dpnp/pull/1650) +* Added implementation of `dpnp.logsumexp` and `dpnp.reduce_hypot` functions [#1648](https://github.com/IntelPython/dpnp/pull/1648) +* Added implementation of `dpnp.column_stack`, `dpnp.dstack` and `dpnp.row_stack` functions [#1647](https://github.com/IntelPython/dpnp/pull/1647) +* Added implementation of `dpnp.nanargmax`, `dpnp.nanargmin`, `dpnp.nanmax` and `dpnp.nanmin` functions [#1646](https://github.com/IntelPython/dpnp/pull/1646) +* Added implementation of `dpnp.clip` function, available as well as a method of dpnp array [#1645](https://github.com/IntelPython/dpnp/pull/1645) +* Added implementation of `dpnp.copysign` and `dpnp.rsqrt` functions [#1624](https://github.com/IntelPython/dpnp/pull/1624) +* Added implementation of `dpnp.linalg.slogdet` function [#1607](https://github.com/IntelPython/dpnp/pull/1607) +* Added implementation of `dpnp.can_cast` function [#1600](https://github.com/IntelPython/dpnp/pull/1600) +* Added implementation of `dpnp.linalg.solve` function [#1598](https://github.com/IntelPython/dpnp/pull/1598) +* Added implementation of `dpnp.broadcast_arrays` function [#1594](https://github.com/IntelPython/dpnp/pull/1594) +* Added implementation of `dpnp.tile` function [#1586](https://github.com/IntelPython/dpnp/pull/1586) +* Added implementation of `dpnp.iinfo` and `dpnp.finfo` functions [#1582](https://github.com/IntelPython/dpnp/pull/1582) +* Added implementation of `dpnp.logaddexp` function [#1561](https://github.com/IntelPython/dpnp/pull/1561) +* Added implementation of `dpnp.positive` function [#1559](https://github.com/IntelPython/dpnp/pull/1559) + +### Changed + +* Enabled compatibility support against numpy `1.26.4` [#1690](https://github.com/IntelPython/dpnp/pull/1690) +* Implemented `dpnp.true_divide` as an alias on `dpnp.divide` function [#1641](https://github.com/IntelPython/dpnp/pull/1641) +* Added support of more number of data types and dimensions for input array in `dpnp.vdot` function [#1692](https://github.com/IntelPython/dpnp/pull/1692) +* Added support of more number of data types and dimensions for input array in `dpnp.linalg.qr` function [#1673](https://github.com/IntelPython/dpnp/pull/1673) +* Added support of more number of data types and dimensions for input array in `dpnp.dot` function [#1669](https://github.com/IntelPython/dpnp/pull/1669) +* Added support of more number of data types and dimensions for input array in `dpnp.linalg.inv` function [#1665](https://github.com/IntelPython/dpnp/pull/1665) +* Added support of more number of data types for input array in `dpnp.sort` and `dpnp.argsort` functions, as well as implementing support of `axis` keyword [#1660](https://github.com/IntelPython/dpnp/pull/1660) +* Added support of more number of data types and dimensions for input array in `dpnp.linalg.cholesky` function, as well as implementing support of `upper` keyword [#1638](https://github.com/IntelPython/dpnp/pull/1638) +* Added support of more number of data types and dimensions for input array in `dpnp.diff`, as well as implementing support of `prepend` and `append` keywords [#1637](https://github.com/IntelPython/dpnp/pull/1637) +* Added support of more number of data types and dimensions for input array in `dpnp.matmul` function [#1616](https://github.com/IntelPython/dpnp/pull/1616) +* Added support of more number of data types and dimensions for input array in `dpnp.linalg.det` function [#1607](https://github.com/IntelPython/dpnp/pull/1607) +* Added support of more number of data types and dimensions for input array in `dpnp.linalg.svd` function, as well as implementing support of `full_matrices`, `compute_uv` and `hermitian` keywords [#1604](https://github.com/IntelPython/dpnp/pull/1604) +* Accepted different data types and dimensions of input arrays in `dpnp.put_along_axis` and `dpnp.take_along_axis` functions, as well as available values of `axis` keyword [#1636](https://github.com/IntelPython/dpnp/pull/1636) +* Added `keepdims`, `initial` and `where` keywords to `dpnp.amax` and `dpnp.amin` functions [#1639](https://github.com/IntelPython/dpnp/pull/1639) +* Extended `dpnp.mesgrid` function to support `sparse` and `copy` keyword arguments [#1675](https://github.com/IntelPython/dpnp/pull/1675) +* Extended `dpnp.average` function to support `axis`, `weights`, `returned` and `keepdims` keywords and `dpnp.nansum` function with `axis`, `dtype`, `keepdims` and `out` keyword arguments [#1654](https://github.com/IntelPython/dpnp/pull/1654) +* Extended `dpnp.std`, `dpnp.var` and `nanvar` functions to support `axis`, `dtype`, `out` and `keepdims` keyword arguments [#1635](https://github.com/IntelPython/dpnp/pull/1635) +* Extended `dpnp.ogrid` and `dpnp.mgrid` functions with support of device-aware keywords of compute follows data paradigm [#1622](https://github.com/IntelPython/dpnp/pull/1622) +* Extended `dpnp.indices` function to support `dtype` and `sparse` keyword arguments, as well as device-aware keywords of compute follows data paradigm [#1622](https://github.com/IntelPython/dpnp/pull/1622) +* Extended `dpnp.count_nonzero` function to support `axis` and `keepdims` keyword arguments [#1615](https://github.com/IntelPython/dpnp/pull/1615) +* Extended `dpnp.put_along_axis` and `dpnp.take_along_axis` functions to support `out`, `dtype` and `casting` keyword arguments [#1608](https://github.com/IntelPython/dpnp/pull/1608) +* Extended `dpnp.stack` and `dpnp.concatenate` functions to support `out`, `dtype` and `casting` keyword arguments [#1608](https://github.com/IntelPython/dpnp/pull/1608) +* Extended `dpnp.vstack` function to support `dtype` and `casting` keyword arguments [#1595](https://github.com/IntelPython/dpnp/pull/1595) +* Extended `dpnp.diag`, `dpnp.diagflat`, `dpnp.ptp` and `dpnp.vander` functions with support of extra keywords to align with compute follows data paradigm [#1579](https://github.com/IntelPython/dpnp/pull/1579) +* Extended `dpnp.tri` and `dpnp.identity` functions with support of device-aware keywords of compute follows data paradigm [#1577](https://github.com/IntelPython/dpnp/pull/1577) +* Added dedicated in-place kernels to `dpnp.divide` and `dpnp.floor_divide` functions [#1587](https://github.com/IntelPython/dpnp/pull/1587) +* Redesigned `dpnp.cbrt` and `dpnp.exp2` functions through pybind11 extension of OneMKL call where possible or leveraging on `dpctl.tensor` implementation [#1624](https://github.com/IntelPython/dpnp/pull/1624) +* Redesigned `dpnp.exp`, `dpnp.expm1`, `dpnp.log10`, `dpnp.log1p` and `dpnp.log2` functions through pybind11 extension of OneMKL call where possible or leveraging on `dpctl.tensor` implementation [#1576](https://github.com/IntelPython/dpnp/pull/1576) +* Redesigned `dpnp.abs` function through pybind11 extension of OneMKL call where possible or leveraging on `dpctl.tensor` implementation [#1575](https://github.com/IntelPython/dpnp/pull/1575) +* Redesigned `dpnp.hypot` function through pybind11 extension of OneMKL call where possible or leveraging on `dpctl.tensor` implementation [#1560](https://github.com/IntelPython/dpnp/pull/1560) +* Leveraged `dpctl.tensor` implementation for `dpnp.reciprocal` function [#1650](https://github.com/IntelPython/dpnp/pull/1650) +* Leveraged `dpctl.tensor` implementation for `dpnp.mean` function [#1632](https://github.com/IntelPython/dpnp/pull/1632) +* Leveraged `dpctl.tensor` implementation for `dpnp.repeat` function [#1614](https://github.com/IntelPython/dpnp/pull/1614) +* Leveraged `dpctl.tensor` implementation for `dpnp.argmax` and `dpnp.argmin` functions [#1610](https://github.com/IntelPython/dpnp/pull/1610) +* Leveraged `dpctl.tensor` implementation for `dpnp.geomspace` and `dpnp.logspace` functions [#1603](https://github.com/IntelPython/dpnp/pull/1603) +* Leveraged `dpctl.tensor` implementation for `dpnp.max` and `dpnp.min` functions [#1602](https://github.com/IntelPython/dpnp/pull/1602) +* Leveraged `dpctl.tensor` implementation for `dpnp.astype` function [#1597](https://github.com/IntelPython/dpnp/pull/1597) +* Leveraged `dpctl.tensor` implementation for `dpnp.maximum` and `dpnp.minimum` functions [#1558](https://github.com/IntelPython/dpnp/pull/1558) + +### Fixed + +* Resolved potential raising of execution placement error from `dpnp.take_along_axis` and `dpnp.put_along_axis` functions [#1702](https://github.com/IntelPython/dpnp/pull/1702) +* Improved performance of `dpnp.matmul` and `dpnp.dot` function when `out` keyword is passed [#1694](https://github.com/IntelPython/dpnp/pull/1694) +* Completed documentation for each array creation functions [#1674](https://github.com/IntelPython/dpnp/pull/1674) +* Aligned `dpnp.clip` where both `min` and `max` keywords have `None` value with NumPy implementation [#1670](https://github.com/IntelPython/dpnp/pull/1670) +* Fixed a bug related to `out` keyword in elementwise functions [#1656](https://github.com/IntelPython/dpnp/pull/1656) +* Resolved compilation warnings due to `-Wvla-extension` option enabled by default [#1651](https://github.com/IntelPython/dpnp/pull/1651) +* Replaced deprecated `IntelDPCPPConfig.cmake` script with vendored `IntelSYCLConfig.cmake` [#1611](https://github.com/IntelPython/dpnp/pull/1611) +* Improved coverage report to include code of pybind11 extensions [#1609](https://github.com/IntelPython/dpnp/pull/1609) +* Improved performance of `dpnp.atleast_2d` and `dpnp.atleast_3d` functions and fixed to return a correct shape of resulting array [#1560](https://github.com/IntelPython/dpnp/pull/1560) + + +## [0.13.0] - 09/29/2023 + +### Added + +* Added implementation of `dpnp.imag` and `dpnp.real` functions, as well as the corresponding properties and setters of dpnp array [#1557](https://github.com/IntelPython/dpnp/pull/1557) * Added implementation of flipping functions: `dpnp.flip`, `dpnp.fliplr` and `dpnp.flipud` [#1543](https://github.com/IntelPython/dpnp/pull/1543) * Added implementation of `dpnp.rint` function through `dpnp.round` call [#1537](https://github.com/IntelPython/dpnp/pull/1537) * Added in-place support for arithmetic operators [#1530](https://github.com/IntelPython/dpnp/pull/1530) @@ -64,6 +139,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * Resolved issues with running random functions on a device without fp64 support [#1498](https://github.com/IntelPython/dpnp/pull/1498) * Resolved issues with running statistics functions on a device without fp64 support [#1494](https://github.com/IntelPython/dpnp/pull/1494) + ## [0.12.1] - 07/18/2023 ### Added