diff --git a/dpctl/tensor/_clip.py b/dpctl/tensor/_clip.py index f2bc326e82..d95c0fa764 100644 --- a/dpctl/tensor/_clip.py +++ b/dpctl/tensor/_clip.py @@ -168,9 +168,9 @@ def _resolve_one_strong_one_weak_types(st_dtype, dtype, dev): return dpt.dtype(ti.default_device_int_type(dev)) if isinstance(dtype, WeakComplexType): if st_dtype is dpt.float16 or st_dtype is dpt.float32: - return st_dtype, dpt.complex64 + return dpt.complex64 return _to_device_supported_dtype(dpt.complex128, dev) - return (_to_device_supported_dtype(dpt.float64, dev),) + return _to_device_supported_dtype(dpt.float64, dev) else: return st_dtype else: @@ -197,8 +197,6 @@ def _check_clip_dtypes(res_dtype, arg1_dtype, arg2_dtype, sycl_dev): def _clip_none(x, val, out, order, _binary_fn): - if order not in ["K", "C", "F", "A"]: - order = "K" q1, x_usm_type = x.sycl_queue, x.usm_type q2, val_usm_type = _get_queue_usm_type(val) if q2 is None: @@ -391,9 +389,8 @@ def _clip_none(x, val, out, order, _binary_fn): return out -# need to handle logic for min or max being None -def clip(x, min=None, max=None, out=None, order="K"): - """clip(x, min, max, out=None, order="K") +def clip(x, /, min=None, max=None, out=None, order="K"): + """clip(x, min=None, max=None, out=None, order="K") Clips to the range [`min_i`, `max_i`] for each element `x_i` in `x`. @@ -402,14 +399,14 @@ def clip(x, min=None, max=None, out=None, order="K"): x (usm_ndarray): Array containing elements to clip. Must be compatible with `min` and `max` according to broadcasting rules. - min ({None, usm_ndarray}, optional): Array containing minimum values. + min ({None, Union[usm_ndarray, bool, int, float, complex]}, optional): + Array containing minimum values. Must be compatible with `x` and `max` according to broadcasting rules. - Only one of `min` and `max` can be `None`. - max ({None, usm_ndarray}, optional): Array containing maximum values. + max ({None, Union[usm_ndarray, bool, int, float, complex]}, optional): + Array containing maximum values. Must be compatible with `x` and `min` according to broadcasting rules. - Only one of `min` and `max` can be `None`. out ({None, usm_ndarray}, optional): Output array to populate. Array must have the correct shape and the expected data type. @@ -428,10 +425,67 @@ def clip(x, min=None, max=None, out=None, order="K"): "Expected `x` to be of dpctl.tensor.usm_ndarray type, got " f"{type(x)}" ) + if order not in ["K", "C", "F", "A"]: + order = "K" if min is None and max is None: - raise ValueError( - "only one of `min` and `max` is permitted to be `None`" + exec_q = x.sycl_queue + orig_out = out + if out is not None: + if not isinstance(out, dpt.usm_ndarray): + raise TypeError( + "output array must be of usm_ndarray type, got " + f"{type(out)}" + ) + + if out.shape != x.shape: + raise ValueError( + "The shape of input and output arrays are " + f"inconsistent. Expected output shape is {x.shape}, " + f"got {out.shape}" + ) + + if x.dtype != out.dtype: + raise ValueError( + f"Output array of type {x.dtype} is needed, " + f"got {out.dtype}" + ) + + if ( + dpctl.utils.get_execution_queue((exec_q, out.sycl_queue)) + is None + ): + raise ExecutionPlacementError( + "Input and output allocation queues are not compatible" + ) + + if ti._array_overlap(x, out): + if not ti._same_logical_tensors(x, out): + out = dpt.empty_like(out) + else: + return out + else: + if order == "K": + out = _empty_like_orderK(x, x.dtype) + else: + if order == "A": + order = "F" if x.flags.f_contiguous else "C" + out = dpt.empty_like(x, order=order) + + ht_copy_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( + src=x, dst=out, sycl_queue=exec_q ) + if not (orig_out is None or orig_out is out): + # Copy the out data from temporary buffer to original memory + ht_copy_out_ev, _ = ti._copy_usm_ndarray_into_usm_ndarray( + src=out, + dst=orig_out, + sycl_queue=exec_q, + depends=[copy_ev], + ) + ht_copy_out_ev.wait() + out = orig_out + ht_copy_ev.wait() + return out elif max is None: return _clip_none(x, min, out, order, tei._maximum) elif min is None: diff --git a/dpctl/tensor/_copy_utils.py b/dpctl/tensor/_copy_utils.py index ae261c50c1..972ff3e7ae 100644 --- a/dpctl/tensor/_copy_utils.py +++ b/dpctl/tensor/_copy_utils.py @@ -531,7 +531,7 @@ def copy(usm_ary, order="K"): ) order = order[0].upper() if not isinstance(usm_ary, dpt.usm_ndarray): - return TypeError( + raise TypeError( f"Expected object of type dpt.usm_ndarray, got {type(usm_ary)}" ) copy_order = "C" diff --git a/dpctl/tests/test_tensor_clip.py b/dpctl/tests/test_tensor_clip.py index 39ba35a4a1..11c93ecf1f 100644 --- a/dpctl/tests/test_tensor_clip.py +++ b/dpctl/tests/test_tensor_clip.py @@ -21,7 +21,12 @@ import dpctl import dpctl.tensor as dpt -from dpctl.tensor._type_utils import _can_cast +from dpctl.tensor._elementwise_common import _get_dtype +from dpctl.tensor._type_utils import ( + _can_cast, + _strong_dtype_num_kind, + _weak_type_num_kind, +) from dpctl.utils import ExecutionPlacementError _all_dtypes = [ @@ -194,6 +199,15 @@ def test_clip_out_need_temporary(): dpt.clip(x[:6], 2, 3, out=x[-6:]) assert dpt.all(x[:-6] == 1) and dpt.all(x[-6:] == 2) + x = dpt.arange(12, dtype="i4") + dpt.clip(x[:6], out=x[-6:]) + expected = dpt.arange(6, dtype="i4") + assert dpt.all(x[:-6] == expected) and dpt.all(x[-6:] == expected) + + x = dpt.ones(10, dtype="i4") + dpt.clip(x, out=x) + assert dpt.all(x == 1) + x = dpt.full(6, 3, dtype="i4") a_min = dpt.full(10, 2, dtype="i4") a_max = dpt.asarray(4, dtype="i4") @@ -227,6 +241,21 @@ def test_clip_arg_validation(): with pytest.raises(TypeError): dpt.clip(check, x1, x2) + with pytest.raises(ValueError): + dpt.clip(x1, check, x2) + + with pytest.raises(ValueError): + dpt.clip(x1, check) + + with pytest.raises(TypeError): + dpt.clip(x1, x1, x2, out=check) + + with pytest.raises(TypeError): + dpt.clip(x1, x2, out=check) + + with pytest.raises(TypeError): + dpt.clip(x1, out=check) + @pytest.mark.parametrize( "dt1,dt2", [("i4", "i4"), ("i4", "i2"), ("i2", "i4"), ("i1", "i2")] @@ -599,22 +628,40 @@ def test_clip_max_less_than_min(): assert dpt.all(res == 0) -def test_clip_minmax_weak_types(): +@pytest.mark.parametrize("dt", ["?", "i4", "f4", "c8"]) +def test_clip_minmax_weak_types(dt): get_queue_or_skip() - x = dpt.zeros(10, dtype=dpt.bool) + x = dpt.zeros(10, dtype=dt) min_list = [False, 0, 0.0, 0.0 + 0.0j] max_list = [True, 1, 1.0, 1.0 + 0.0j] + for min_v, max_v in zip(min_list, max_list): - if isinstance(min_v, bool) and isinstance(max_v, bool): - y = dpt.clip(x, min_v, max_v) - assert isinstance(y, dpt.usm_ndarray) + st_dt = _strong_dtype_num_kind(dpt.dtype(dt)) + wk_dt1 = _weak_type_num_kind(_get_dtype(min_v, x.sycl_device)) + wk_dt2 = _weak_type_num_kind(_get_dtype(max_v, x.sycl_device)) + + if st_dt >= wk_dt1 and st_dt >= wk_dt2: + r = dpt.clip(x, min_v, max_v) + assert isinstance(r, dpt.usm_ndarray) else: with pytest.raises(ValueError): dpt.clip(x, min_v, max_v) + if st_dt >= wk_dt1: + r = dpt.clip(x, min_v) + assert isinstance(r, dpt.usm_ndarray) + + r = dpt.clip(x, None, min_v) + assert isinstance(r, dpt.usm_ndarray) + else: + with pytest.raises(ValueError): + dpt.clip(x, min_v) + with pytest.raises(ValueError): + dpt.clip(x, None, max_v) + -def test_clip_max_weak_types(): +def test_clip_max_weak_type_errors(): get_queue_or_skip() x = dpt.zeros(10, dtype="i4") @@ -626,6 +673,15 @@ def test_clip_max_weak_types(): with pytest.raises(ValueError): dpt.clip(x, 2.5, m) + with pytest.raises(ValueError): + dpt.clip(x, 2.5) + + with pytest.raises(ValueError): + dpt.clip(dpt.astype(x, "?"), 2) + + with pytest.raises(ValueError): + dpt.clip(dpt.astype(x, "f4"), complex(2)) + def test_clip_unaligned(): get_queue_or_skip() @@ -636,3 +692,59 @@ def test_clip_unaligned(): expected = dpt.full(512, 2, dtype="i4") assert dpt.all(dpt.clip(x[1:], a_min, a_max) == expected) + + +def test_clip_none_args(): + get_queue_or_skip() + + x = dpt.arange(10, dtype="i4") + r = dpt.clip(x) + assert dpt.all(x == r) + + +def test_clip_shape_errors(): + get_queue_or_skip() + + x = dpt.ones((4, 4), dtype="i4") + a_min = dpt.ones(5, dtype="i4") + a_max = dpt.ones(5, dtype="i4") + + with pytest.raises(ValueError): + dpt.clip(x, a_min, a_max) + + with pytest.raises(ValueError): + dpt.clip(x, a_min) + + with pytest.raises(ValueError): + dpt.clip(x, 0, 1, out=a_min) + + with pytest.raises(ValueError): + dpt.clip(x, 0, out=a_min) + + with pytest.raises(ValueError): + dpt.clip(x, out=a_min) + + +def test_clip_compute_follows_data(): + q1 = get_queue_or_skip() + q2 = get_queue_or_skip() + + x = dpt.ones(10, dtype="i4", sycl_queue=q1) + a_min = dpt.ones(10, dtype="i4", sycl_queue=q2) + a_max = dpt.ones(10, dtype="i4", sycl_queue=q1) + res = dpt.empty_like(x, sycl_queue=q2) + + with pytest.raises(ExecutionPlacementError): + dpt.clip(x, a_min, a_max) + + with pytest.raises(ExecutionPlacementError): + dpt.clip(x, dpt.ones_like(x), a_max, out=res) + + with pytest.raises(ExecutionPlacementError): + dpt.clip(x, a_min) + + with pytest.raises(ExecutionPlacementError): + dpt.clip(x, None, a_max, out=res) + + with pytest.raises(ExecutionPlacementError): + dpt.clip(x, out=res)