From f795658559d61f1fd394c828e95608c9de8ee027 Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Thu, 11 Nov 2021 18:10:55 -0600 Subject: [PATCH] C++ refactoring: NumPy ufuncs for v2. (#1143) * C++ refactoring: NumPy ufuncs for v2. * All ufunc-related tests pass (skipping Python 2.7). --- src/awkward/_v2/_broadcasting.py | 4 +- src/awkward/_v2/_connect/numpy.py | 369 ++++++++---------- src/awkward/_v2/_util.py | 77 ++-- src/awkward/_v2/contents/regulararray.py | 16 + src/awkward/_v2/forms/form.py | 39 +- src/awkward/_v2/highlevel.py | 119 +++--- .../_v2/operations/convert/ak_from_numpy.py | 246 ++++++------ src/awkward/_v2/types/arraytype.py | 4 +- src/awkward/_v2/types/listtype.py | 4 +- src/awkward/_v2/types/numpytype.py | 4 +- src/awkward/_v2/types/optiontype.py | 4 +- src/awkward/_v2/types/recordtype.py | 2 +- src/awkward/_v2/types/regulartype.py | 4 +- src/awkward/_v2/types/uniontype.py | 4 +- src/awkward/_v2/types/unknowntype.py | 2 +- tests/v2/test_0086-nep13-ufunc.py | 151 +++++++ .../v2/test_0504-block-ufuncs-for-strings.py | 46 +++ ...narray-ufuncs-and-parameters-in-merging.py | 336 ++++++++++++++++ 18 files changed, 999 insertions(+), 432 deletions(-) create mode 100644 tests/v2/test_0086-nep13-ufunc.py create mode 100644 tests/v2/test_0504-block-ufuncs-for-strings.py create mode 100644 tests/v2/test_0527-fix-unionarray-ufuncs-and-parameters-in-merging.py diff --git a/src/awkward/_v2/_broadcasting.py b/src/awkward/_v2/_broadcasting.py index 024b274ac0..5ef8f7d5ed 100644 --- a/src/awkward/_v2/_broadcasting.py +++ b/src/awkward/_v2/_broadcasting.py @@ -62,12 +62,12 @@ def broadcast_pack(inputs, isscalar): def broadcast_unpack(x, isscalar): if all(isscalar): if len(x) == 0: - return x.getitem_nothing().getitem_nothing() + return x._getitem_nothing()._getitem_nothing() else: return x[0][0] else: if len(x) == 0: - return x.getitem_nothing() + return x._getitem_nothing() else: return x[0] diff --git a/src/awkward/_v2/_connect/numpy.py b/src/awkward/_v2/_connect/numpy.py index 02b5580d2d..0f216b45d6 100644 --- a/src/awkward/_v2/_connect/numpy.py +++ b/src/awkward/_v2/_connect/numpy.py @@ -6,7 +6,8 @@ import numpy -# import awkward as ak +import awkward as ak +from awkward._v2.contents.numpyarray import NumpyArray # def convert_to_array(layout, args, kwargs): @@ -63,207 +64,165 @@ # return decorator -# def array_ufunc(ufunc, method, inputs, kwargs): -# if method != "__call__" or len(inputs) == 0 or "out" in kwargs: -# return NotImplemented - -# behavior = ak._v2._util.behaviorof(*inputs) - -# nextinputs = [] -# for x in inputs: -# cast_fcn = ak._v2._util.custom_cast(x, behavior) -# if cast_fcn is not None: -# x = cast_fcn(x) -# nextinputs.append( -# ak._v2.operations.convert.to_layout(x, allow_record=True, allow_other=True) -# ) -# inputs = nextinputs - -# def adjust(custom, inputs, kwargs): -# args = [ -# ak._v2._util.wrap(x, behavior) -# if isinstance(x, (ak._v2.contents.Content, ak._v2.record.Record)) -# else x -# for x in inputs -# ] -# out = custom(*args, **kwargs) -# if not isinstance(out, tuple): -# out = (out,) - -# return tuple( -# x.layout if isinstance(x, (ak._v2.highlevel.Array, ak._v2.highlevel.Record)) else x -# for x in out -# ) - -# def adjust_apply_ufunc(apply_ufunc, ufunc, method, inputs, kwargs): -# nextinputs = [ -# ak._v2._util.wrap(x, behavior) -# if isinstance(x, (ak._v2.contents.Content, ak._v2.record.Record)) -# else x -# for x in inputs -# ] - -# out = apply_ufunc(ufunc, method, nextinputs, kwargs) - -# if out is NotImplemented: -# return None -# else: -# if not isinstance(out, tuple): -# out = (out,) -# out = tuple( -# x.layout -# if isinstance(x, (ak._v2.highlevel.Array, ak._v2.highlevel.Record)) -# else x -# for x in out -# ) -# return lambda: out - -# def is_fully_regular(layout): -# if ( -# isinstance(layout, ak._v2.contents.RegularArray) -# and layout.parameter("__record__") is None -# and layout.parameter("__array__") is None -# ): -# if isinstance(layout.content, ak._v2.contents.NumpyArray): -# return True -# elif isinstance(layout.content, ak._v2.contents.RegularArray): -# return is_fully_regular(layout.content) -# else: -# return False -# else: -# return False - -# def deregulate(layout): -# if not is_fully_regular(layout): -# return layout -# else: -# shape = [len(layout)] -# node = layout -# while isinstance(node, ak._v2.contents.RegularArray): -# shape.append(node.size) -# node = node.content -# if node.format.upper().startswith("M"): -# nparray = ak.nplike.of(node).asarray(node.view_int64).view(node.format) -# nparray = nparray.reshape(tuple(shape) + nparray.shape[1:]) -# return ak._v2.contents.NumpyArray( -# nparray, -# node.identities, -# node.parameters, -# ) -# else: -# nparray = ak.nplike.of(node).asarray(node) -# nparray = nparray.reshape(tuple(shape) + nparray.shape[1:]) -# return ak._v2.contents.NumpyArray( -# nparray, -# node.identities, -# node.parameters, -# ) - -# def getfunction(inputs): -# signature = [ufunc] -# for x in inputs: -# if isinstance(x, ak._v2.contents.Content): -# record = x.parameter("__record__") -# array = x.parameter("__array__") -# if record is not None: -# signature.append(record) -# elif array is not None: -# signature.append(array) -# elif isinstance(x, ak._v2.contents.NumpyArray): -# if x.format.upper().startswith("M"): -# signature.append( -# ak.nplike.of(x) -# .asarray(x.view_int64) -# .view(x.format) -# .dtype.type -# ) -# else: -# signature.append(ak.nplike.of(x).asarray(x).dtype.type) -# else: -# signature.append(None) -# else: -# signature.append(type(x)) - -# custom = ak._v2._util.overload(behavior, signature) -# if custom is not None: -# return lambda: adjust(custom, inputs, kwargs) - -# if ufunc is numpy.matmul: -# custom_matmul = getfunction_matmul(inputs) -# if custom_matmul is not None: -# return custom_matmul - -# inputs = [deregulate(x) for x in inputs] - -# if all( -# ( -# isinstance(x, ak._v2.contents.NumpyArray) -# and not (x.format.upper().startswith("M")) -# ) -# or not isinstance(x, (ak._v2.contents.Content, ak.partition.PartitionedArray)) # NO PARTITIONED ARRAY -# for x in inputs -# ): -# nplike = ak.nplike.of(*inputs) -# result = getattr(ufunc, method)( -# *[nplike.asarray(x) for x in inputs], **kwargs -# ) -# return lambda: (ak._v2.operations.convert.from_numpy(result, highlevel=False),) -# elif all( -# isinstance(x, ak._v2.contents.NumpyArray) and (x.format.upper().startswith("M")) -# for x in inputs -# ): -# nplike = ak.nplike.of(*inputs) -# result = getattr(ufunc, method)( -# *[nplike.asarray(x.view_int64).view(x.format) for x in inputs], **kwargs -# ) -# return lambda: (ak._v2.operations.convert.from_numpy(result, highlevel=False),) - -# for x in inputs: -# if isinstance(x, ak._v2.contents.Content): -# chained_behavior = ak._v2._util.Behavior(ak.behavior, behavior) -# apply_ufunc = chained_behavior[numpy.ufunc, x.parameter("__array__")] -# if apply_ufunc is not None: -# out = adjust_apply_ufunc(apply_ufunc, ufunc, method, inputs, kwargs) -# if out is not None: -# return out -# apply_ufunc = chained_behavior[numpy.ufunc, x.parameter("__record__")] -# if apply_ufunc is not None: -# out = adjust_apply_ufunc(apply_ufunc, ufunc, method, inputs, kwargs) -# if out is not None: -# return out - -# if all( -# x.parameter("__array__") is not None -# or x.parameter("__record__") is not None -# for x in inputs -# if isinstance(x, ak._v2.contents.Content) -# ): -# custom_types = [] -# for x in inputs: -# if isinstance(x, ak._v2.contents.Content): -# if x.parameter("__array__") is not None: -# custom_types.append(x.parameter("__array__")) -# elif x.parameter("__record__") is not None: -# custom_types.append(x.parameter("__record__")) -# else: -# custom_types.append(type(x).__name__) -# else: -# custom_types.append(type(x).__name__) -# raise ValueError( -# "no overloads for custom types: {0}({1})".format( -# ufunc.__name__, -# ", ".join(custom_types), -# ) -# -# ) +def _array_ufunc_custom_cast(inputs, behavior): + nextinputs = [] + for x in inputs: + cast_fcn = ak._v2._util.custom_cast(x, behavior) + if cast_fcn is not None: + x = cast_fcn(x) + nextinputs.append( + ak._v2.operations.convert.to_layout(x, allow_record=True, allow_other=True) + ) + return nextinputs + + +def _array_ufunc_adjust(custom, inputs, kwargs, behavior): + args = [ + ak._v2._util.wrap(x, behavior) + if isinstance(x, (ak._v2.contents.Content, ak._v2.record.Record)) + else x + for x in inputs + ] + out = custom(*args, **kwargs) + if not isinstance(out, tuple): + out = (out,) + + return tuple( + x.layout + if isinstance(x, (ak._v2.highlevel.Array, ak._v2.highlevel.Record)) + else x + for x in out + ) + + +def _array_ufunc_adjust_apply(apply_ufunc, ufunc, method, inputs, kwargs, behavior): + nextinputs = [ + ak._v2._util.wrap(x, behavior) + if isinstance(x, (ak._v2.contents.Content, ak._v2.record.Record)) + else x + for x in inputs + ] + + out = apply_ufunc(ufunc, method, nextinputs, kwargs) + + if out is NotImplemented: + return None + else: + if not isinstance(out, tuple): + out = (out,) + return tuple( + x.layout + if isinstance(x, (ak._v2.highlevel.Array, ak._v2.highlevel.Record)) + else x + for x in out + ) -# return None -# out = ak._v2._util.broadcast_and_apply( -# inputs, getfunction, behavior, allow_records=False, pass_depth=False -# ) -# assert isinstance(out, tuple) and len(out) == 1 -# return ak._v2._util.wrap(out[0], behavior) +def _array_ufunc_signature(ufunc, inputs): + signature = [ufunc] + for x in inputs: + if isinstance(x, ak._v2.contents.Content): + record, array = x.parameter("__record__"), x.parameter("__array__") + if record is not None: + signature.append(record) + elif array is not None: + signature.append(array) + elif isinstance(x, NumpyArray): + signature.append(x.dtype.type) + else: + signature.append(None) + else: + signature.append(type(x)) + + return signature + + +def _array_ufunc_deregulate(inputs): + nextinputs = [] + for x in inputs: + if isinstance(x, ak._v2.contents.RegularArray): + y = x.maybe_toNumpyArray() + if y is not None: + nextinputs.append(y) + else: + nextinputs.append(x) + else: + nextinputs.append(x) + + return nextinputs + + +def array_ufunc(ufunc, method, inputs, kwargs): + if method != "__call__" or len(inputs) == 0 or "out" in kwargs: + return NotImplemented + + behavior = ak._v2._util.behavior_of(*inputs) + + inputs = _array_ufunc_custom_cast(inputs, behavior) + + def action(inputs, **ignore): + signature = _array_ufunc_signature(ufunc, inputs) + + custom = ak._v2._util.overload(behavior, signature) + if custom is not None: + return _array_ufunc_adjust(custom, inputs, kwargs, behavior) + + if ufunc is numpy.matmul: + custom_matmul = action_for_matmul(inputs) + if custom_matmul is not None: + return custom_matmul() + + inputs = _array_ufunc_deregulate(inputs) + + if all( + isinstance(x, NumpyArray) or not isinstance(x, ak._v2.contents.Content) + for x in inputs + ): + nplike = ak.nplike.of(*inputs) + args = [x.to(nplike) if isinstance(x, NumpyArray) else x for x in inputs] + result = getattr(ufunc, method)(*args, **kwargs) + return (NumpyArray(result, nplike=nplike),) + + for x in inputs: + if isinstance(x, ak._v2.contents.Content): + apply_ufunc = ak._v2._util.custom_ufunc(ufunc, x, behavior) + if apply_ufunc is not None: + out = _array_ufunc_adjust_apply( + apply_ufunc, ufunc, method, inputs, kwargs, behavior + ) + if out is not None: + return out + + if all( + x.parameter("__array__") is not None + or x.parameter("__record__") is not None + for x in inputs + if isinstance(x, ak._v2.contents.Content) + ): + error_message = [] + for x in inputs: + if isinstance(x, ak._v2.contents.Content): + if x.parameter("__array__") is not None: + error_message.append(x.parameter("__array__")) + elif x.parameter("__record__") is not None: + error_message.append(x.parameter("__record__")) + else: + error_message.append(type(x).__name__) + else: + error_message.append(type(x).__name__) + raise TypeError( + "no {0}.{1} overloads for custom types: {2}".format( + type(ufunc).__module__, ufunc.__name__, ", ".join(error_message) + ) + ) + + return None + + out = ak._v2._broadcasting.broadcast_and_apply( + inputs, action, behavior, allow_records=False, function_name=ufunc.__name__ + ) + assert isinstance(out, tuple) and len(out) == 1 + return ak._v2._util.wrap(out[0], behavior) # def matmul_for_numba(lefts, rights, dtype): @@ -345,7 +304,11 @@ # matmul_for_numba.numbafied = None -# def getfunction_matmul(inputs): +def action_for_matmul(inputs): + raise NotImplementedError + + +# def action_for_matmul(inputs): # inputs = [ # ak._v2._util.recursively_apply( # x, (lambda _: _), pass_depth=False, numpy_to_regular=True @@ -358,7 +321,7 @@ # if len(inputs) == 2 and all( # isinstance(x, ak._v2._util.listtypes) # and isinstance(x.content, ak._v2._util.listtypes) -# and isinstance(x.content.content, ak._v2.contents.NumpyArray) +# and isinstance(x.content.content, NumpyArray) # for x in inputs # ): # ak._v2._connect.numba.register_and_check() @@ -378,7 +341,7 @@ # ak._v2.index.Index64(outer), # ak._v2.contents.ListOffsetArray64( # ak._v2.index.Index64(inner), -# ak._v2.contents.NumpyArray(content), +# NumpyArray(content), # ), # ), # ) diff --git a/src/awkward/_v2/_util.py b/src/awkward/_v2/_util.py index 665c04a320..a601685526 100644 --- a/src/awkward/_v2/_util.py +++ b/src/awkward/_v2/_util.py @@ -239,17 +239,17 @@ def arrayclass(layout, behavior): return ak._v2.highlevel.Array -# def custom_cast(obj, behavior): -# behavior = Behavior(ak._v2.behavior, behavior) -# for key, fcn in behavior.items(): -# if ( -# isinstance(key, tuple) -# and len(key) == 2 -# and key[0] == "__cast__" -# and isinstance(obj, key[1]) -# ): -# return fcn -# return None +def custom_cast(obj, behavior): + behavior = Behavior(ak._v2.behavior, behavior) + for key, fcn in behavior.items(): + if ( + isinstance(key, tuple) + and len(key) == 2 + and key[0] == "__cast__" + and isinstance(obj, key[1]) + ): + return fcn + return None def custom_broadcast(layout, behavior): @@ -271,6 +271,27 @@ def custom_broadcast(layout, behavior): return None +def custom_ufunc(ufunc, layout, behavior): + import numpy + + behavior = Behavior(ak._v2.behavior, behavior) + custom = layout.parameter("__array__") + if not isstr(custom): + custom = layout.parameter("__record__") + if not isstr(custom): + custom = layout.purelist_parameter("__record__") + if isstr(custom): + for key, fcn in behavior.items(): + if ( + isinstance(key, tuple) + and len(key) == 2 + and (key[0] is ufunc or key[0] is numpy.ufunc) + and key[1] == custom + ): + return fcn + return None + + # def numba_array_typer(layouttype, behavior): # behavior = Behavior(ak._v2.behavior, behavior) # arr = layouttype.parameters.get("__array__") @@ -371,23 +392,23 @@ def recordclass(layout, behavior): # return None -# def overload(behavior, signature): -# if not any(s is None for s in signature): -# behavior = Behavior(ak._v2.behavior, behavior) -# for key, custom in behavior.items(): -# if ( -# isinstance(key, tuple) -# and len(key) == len(signature) -# and key[0] == signature[0] -# and all( -# k == s -# or ( -# isinstance(k, type) and isinstance(s, type) and issubclass(s, k) -# ) -# for k, s in zip(key[1:], signature[1:]) -# ) -# ): -# return custom +def overload(behavior, signature): + if not any(s is None for s in signature): + behavior = Behavior(ak._v2.behavior, behavior) + for key, custom in behavior.items(): + if ( + isinstance(key, tuple) + and len(key) == len(signature) + and key[0] == signature[0] + and all( + k == s + or ( + isinstance(k, type) and isinstance(s, type) and issubclass(s, k) + ) + for k, s in zip(key[1:], signature[1:]) + ) + ): + return custom # def numba_attrs(layouttype, behavior): diff --git a/src/awkward/_v2/contents/regulararray.py b/src/awkward/_v2/contents/regulararray.py index 839df87d1c..3ccb445057 100644 --- a/src/awkward/_v2/contents/regulararray.py +++ b/src/awkward/_v2/contents/regulararray.py @@ -119,6 +119,22 @@ def toListOffsetArray64(self, start_at_zero=False): def toRegularArray(self): return self + def maybe_toNumpyArray(self): + content = None + if isinstance(self._content, ak._v2.contents.NumpyArray): + content = self._content + elif isinstance(self._content, RegularArray): + content = self._content.maybe_toNumpyArray() + + if isinstance(content, ak._v2.contents.NumpyArray): + shape = (self._length, self._size) + content.data.shape[1:] + return ak._v2.contents.NumpyArray( + content.data.reshape(shape), + None, + ak._v2._util.merge_parameters(self._parameters, content.parameters), + content.nplike, + ) + def _getitem_nothing(self): return self._content._getitem_range(slice(0, 0)) diff --git a/src/awkward/_v2/forms/form.py b/src/awkward/_v2/forms/form.py index 1910d8ca6c..df67e7c138 100644 --- a/src/awkward/_v2/forms/form.py +++ b/src/awkward/_v2/forms/form.py @@ -166,23 +166,42 @@ def from_json(input): return from_iter(json.loads(input)) -def _parameters_equal(one, two): +def _parameters_equal(one, two, only_array_record=False): if one is None and two is None: return True elif one is None: - for value in two.values(): - if value is not None: - return False + if only_array_record: + for key in ("__array__", "__record__"): + if two.get(key) is not None: + return False + else: + return True else: - return True + for value in two.values(): + if value is not None: + return False + else: + return True + elif two is None: - for value in one.values(): - if value is not None: - return False + if only_array_record: + for key in ("__array__", "__record__"): + if one.get(key) is not None: + return False + else: + return True else: - return True + for value in one.values(): + if value is not None: + return False + else: + return True + else: - keys = set(one.keys()).union(two.keys()) + if only_array_record: + keys = ("__array__", "__record__") + else: + keys = set(one.keys()).union(two.keys()) for key in keys: if one.get(key) != two.get(key): return False diff --git a/src/awkward/_v2/highlevel.py b/src/awkward/_v2/highlevel.py index ee42bc9b0e..76af3f7b15 100644 --- a/src/awkward/_v2/highlevel.py +++ b/src/awkward/_v2/highlevel.py @@ -1316,69 +1316,64 @@ def show(self, limit_rows=20, limit_cols=80, type=False, stream=sys.stdout): # """ # return ak._v2._connect.numpy.convert_to_array(self._layout, args, kwargs) - # def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): - # """ - # Intercepts attempts to pass this Array to a NumPy - # [universal functions](https://docs.scipy.org/doc/numpy/reference/ufuncs.html) - # (ufuncs) and passes it through the Array's structure. + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + """ + Intercepts attempts to pass this Array to a NumPy + [universal functions](https://docs.scipy.org/doc/numpy/reference/ufuncs.html) + (ufuncs) and passes it through the Array's structure. - # This method conforms to NumPy's - # [NEP 13](https://numpy.org/neps/nep-0013-ufunc-overrides.html) - # for overriding ufuncs, which has been - # [available since NumPy 1.13](https://numpy.org/devdocs/release/1.13.0-notes.html#array-ufunc-added) - # (and thus NumPy 1.13 is the minimum allowed version). + This method conforms to NumPy's + [NEP 13](https://numpy.org/neps/nep-0013-ufunc-overrides.html) + for overriding ufuncs, which has been + [available since NumPy 1.13](https://numpy.org/devdocs/release/1.13.0-notes.html#array-ufunc-added) + (and thus NumPy 1.13 is the minimum allowed version). - # When any ufunc is applied to an Awkward Array, it applies to the - # innermost level of structure and preserves the structure through the - # operation. + When any ufunc is applied to an Awkward Array, it applies to the + innermost level of structure and preserves the structure through the + operation. - # For example, with an `array` like + For example, with an `array` like - # ak.Array([[{"x": 0.0, "y": []}, {"x": 1.1, "y": [1]}], [], [{"x": 2.2, "y": [2, 2]}]]) + ak.Array([[{"x": 0.0, "y": []}, {"x": 1.1, "y": [1]}], [], [{"x": 2.2, "y": [2, 2]}]]) - # applying `np.sqrt` would yield + applying `np.sqrt` would yield - # >>> print(np.sqrt(array)) - # [[{x: 0, y: []}, {x: 1.05, y: [1]}], [], [{x: 1.48, y: [1.41, 1.41]}]] + >>> print(np.sqrt(array)) + [[{x: 0, y: []}, {x: 1.05, y: [1]}], [], [{x: 1.48, y: [1.41, 1.41]}]] - # In addition, many unary and binary operators implicitly call ufuncs, - # such as `np.power` in + In addition, many unary and binary operators implicitly call ufuncs, + such as `np.power` in - # >>> print(array**2) - # [[{x: 0, y: []}, {x: 1.21, y: [1]}], [], [{x: 4.84, y: [4, 4]}]] + >>> print(array**2) + [[{x: 0, y: []}, {x: 1.21, y: [1]}], [], [{x: 4.84, y: [4, 4]}]] - # In the above example, `array` is a nested list of records and `2` is - # a scalar. Awkward Array applies the same broadcasting rules as NumPy - # plus a few more to deal with nested structures. In addition to - # broadcasting a scalar, as above, it is possible to broadcast - # arrays with less depth into arrays with more depth, such as + In the above example, `array` is a nested list of records and `2` is + a scalar. Awkward Array applies the same broadcasting rules as NumPy + plus a few more to deal with nested structures. In addition to + broadcasting a scalar, as above, it is possible to broadcast + arrays with less depth into arrays with more depth, such as - # >>> print(array + ak.Array([10, 20, 30])) - # [[{x: 10, y: []}, {x: 11.1, y: [11]}], [], [{x: 32.2, y: [32, 32]}]] + >>> print(array + ak.Array([10, 20, 30])) + [[{x: 10, y: []}, {x: 11.1, y: [11]}], [], [{x: 32.2, y: [32, 32]}]] - # See #ak.broadcast_arrays for details about broadcasting and the - # generalized set of broadcasting rules. + See #ak.broadcast_arrays for details about broadcasting and the + generalized set of broadcasting rules. - # Third party libraries can create ufuncs, not just NumPy, so any library - # that "plays well" with the NumPy ecosystem can be used with Awkward - # Arrays: + Third party libraries can create ufuncs, not just NumPy, so any library + that "plays well" with the NumPy ecosystem can be used with Awkward + Arrays: - # >>> import numba as nb - # >>> @nb.vectorize([nb.float64(nb.float64)]) - # ... def sqr(x): - # ... return x * x - # ... - # >>> print(sqr(array)) - # [[{x: 0, y: []}, {x: 1.21, y: [1]}], [], [{x: 4.84, y: [4, 4]}]] + >>> import numba as nb + >>> @nb.vectorize([nb.float64(nb.float64)]) + ... def sqr(x): + ... return x * x + ... + >>> print(sqr(array)) + [[{x: 0, y: []}, {x: 1.21, y: [1]}], [], [{x: 4.84, y: [4, 4]}]] - # See also #__array_function__. - # """ - # if not hasattr(self, "_tracers"): - # return ak._v2._connect.numpy.array_ufunc(ufunc, method, inputs, kwargs) - # else: - # return ak._v2._connect.jax.jax_utils.array_ufunc( - # self, ufunc, method, inputs, kwargs - # ) + See also #__array_function__. + """ + return ak._v2._connect.numpy.array_ufunc(ufunc, method, inputs, kwargs) # def __array_function__(self, func, types, args, kwargs): # """ @@ -1989,21 +1984,21 @@ def show(self, limit_rows=20, limit_cols=80, type=False, stream=sys.stdout): else: stream.write(out + "\n") - # def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): - # """ - # Intercepts attempts to pass this Record to a NumPy - # [universal functions](https://docs.scipy.org/doc/numpy/reference/ufuncs.html) - # (ufuncs) and passes it through the Record's structure. + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + """ + Intercepts attempts to pass this Record to a NumPy + [universal functions](https://docs.scipy.org/doc/numpy/reference/ufuncs.html) + (ufuncs) and passes it through the Record's structure. - # This method conforms to NumPy's - # [NEP 13](https://numpy.org/neps/nep-0013-ufunc-overrides.html) - # for overriding ufuncs, which has been - # [available since NumPy 1.13](https://numpy.org/devdocs/release/1.13.0-notes.html#array-ufunc-added) - # (and thus NumPy 1.13 is the minimum allowed version). + This method conforms to NumPy's + [NEP 13](https://numpy.org/neps/nep-0013-ufunc-overrides.html) + for overriding ufuncs, which has been + [available since NumPy 1.13](https://numpy.org/devdocs/release/1.13.0-notes.html#array-ufunc-added) + (and thus NumPy 1.13 is the minimum allowed version). - # See #ak.Array.__array_ufunc__ for a more complete description. - # """ - # return ak._v2._connect.numpy.array_ufunc(ufunc, method, inputs, kwargs) + See #ak.Array.__array_ufunc__ for a more complete description. + """ + return ak._v2._connect.numpy.array_ufunc(ufunc, method, inputs, kwargs) # @property # def numba_type(self): diff --git a/src/awkward/_v2/operations/convert/ak_from_numpy.py b/src/awkward/_v2/operations/convert/ak_from_numpy.py index d3f5958af8..572d680f21 100644 --- a/src/awkward/_v2/operations/convert/ak_from_numpy.py +++ b/src/awkward/_v2/operations/convert/ak_from_numpy.py @@ -5,127 +5,135 @@ import awkward as ak np = ak.nplike.NumpyMetadata.instance() +numpy = ak.nplike.Numpy.instance() def from_numpy( array, regulararray=False, recordarray=True, highlevel=True, behavior=None ): - raise NotImplementedError - - -# """ -# Args: -# array (np.ndarray): The NumPy array to convert into an Awkward Array. -# This array can be a np.ma.MaskedArray. -# regulararray (bool): If True and the array is multidimensional, -# the dimensions are represented by nested #ak.layout.RegularArray -# nodes; if False and the array is multidimensional, the dimensions -# are represented by a multivalued #ak.layout.NumpyArray.shape. -# If the array is one-dimensional, this has no effect. -# recordarray (bool): If True and the array is a NumPy structured array -# (dtype.names is not None), the fields are represented by an -# #ak.layout.RecordArray; if False and the array is a structured -# array, the structure is left in the #ak.layout.NumpyArray `format`, -# which some functions do not recognize. -# highlevel (bool): If True, return an #ak.Array; otherwise, return -# a low-level #ak.layout.Content subclass. -# behavior (None or dict): Custom #ak.behavior for the output array, if -# high-level. - -# Converts a NumPy array into an Awkward Array. - -# The resulting layout may involve the following #ak.layout.Content types -# (only): - -# * #ak.layout.NumpyArray -# * #ak.layout.ByteMaskedArray or #ak.layout.UnmaskedArray if the -# `array` is an np.ma.MaskedArray. -# * #ak.layout.RegularArray if `regulararray=True`. -# * #ak.layout.RecordArray if `recordarray=True`. - -# See also #ak.to_numpy and #ak.from_cupy. -# """ - -# def recurse(array, mask): -# if regulararray and len(array.shape) > 1: -# return ak._v2.contents.RegularArray( -# recurse(array.reshape((-1,) + array.shape[2:]), mask), -# array.shape[1], -# array.shape[0], -# ) - -# if len(array.shape) == 0: -# array = array.reshape(1) - -# if array.dtype.kind == "S": -# asbytes = array.reshape(-1) -# itemsize = asbytes.dtype.itemsize -# starts = numpy.arange(0, len(asbytes) * itemsize, itemsize, dtype=np.int64) -# stops = starts + numpy.char.str_len(asbytes) -# data = ak._v2.contents.ListArray64( -# ak._v2.index.Index64(starts), -# ak._v2.index.Index64(stops), -# ak._v2.contents.NumpyArray( -# asbytes.view("u1"), parameters={"__array__": "byte"} -# ), -# parameters={"__array__": "bytestring"}, -# ) -# for i in range(len(array.shape) - 1, 0, -1): -# data = ak._v2.contents.RegularArray(data, array.shape[i], array.shape[i - 1]) -# elif array.dtype.kind == "U": -# asbytes = numpy.char.encode(array.reshape(-1), "utf-8", "surrogateescape") -# itemsize = asbytes.dtype.itemsize -# starts = numpy.arange(0, len(asbytes) * itemsize, itemsize, dtype=np.int64) -# stops = starts + numpy.char.str_len(asbytes) -# data = ak._v2.contents.ListArray64( -# ak._v2.index.Index64(starts), -# ak._v2.index.Index64(stops), -# ak._v2.contents.NumpyArray( -# asbytes.view("u1"), parameters={"__array__": "char"} -# ), -# parameters={"__array__": "string"}, -# ) -# for i in range(len(array.shape) - 1, 0, -1): -# data = ak._v2.contents.RegularArray(data, array.shape[i], array.shape[i - 1]) -# else: -# data = ak._v2.contents.NumpyArray(array) - -# if mask is None: -# return data -# elif mask is False or (isinstance(mask, np.bool_) and not mask): -# # NumPy's MaskedArray with mask == False is an UnmaskedArray -# if len(array.shape) == 1: -# return ak._v2.contents.UnmaskedArray(data) -# else: - -# def attach(x): -# if isinstance(x, ak._v2.contents.NumpyArray): -# return ak._v2.contents.UnmaskedArray(x) -# else: -# return ak._v2.contents.RegularArray(attach(x.content), x.size, len(x)) - -# return attach(data.toRegularArray()) -# else: -# # NumPy's MaskedArray is a ByteMaskedArray with valid_when=False -# return ak._v2.contents.ByteMaskedArray( -# ak._v2.index.Index8(mask), data, valid_when=False -# ) - -# if isinstance(array, numpy.ma.MaskedArray): -# mask = numpy.ma.getmask(array) -# array = numpy.ma.getdata(array) -# if isinstance(mask, np.ndarray) and len(mask.shape) > 1: -# regulararray = True -# mask = mask.reshape(-1) -# else: -# mask = None - -# if not recordarray or array.dtype.names is None: -# layout = recurse(array, mask) -# else: -# contents = [] -# for name in array.dtype.names: -# contents.append(recurse(array[name], mask)) -# layout = ak._v2.contents.RecordArray(contents, array.dtype.names) - -# return ak._v2._util.maybe_wrap(layout, behavior, highlevel) + """ + Args: + array (np.ndarray): The NumPy array to convert into an Awkward Array. + This array can be a np.ma.MaskedArray. + regulararray (bool): If True and the array is multidimensional, + the dimensions are represented by nested #ak.layout.RegularArray + nodes; if False and the array is multidimensional, the dimensions + are represented by a multivalued #ak.layout.NumpyArray.shape. + If the array is one-dimensional, this has no effect. + recordarray (bool): If True and the array is a NumPy structured array + (dtype.names is not None), the fields are represented by an + #ak.layout.RecordArray; if False and the array is a structured + array, the structure is left in the #ak.layout.NumpyArray `format`, + which some functions do not recognize. + highlevel (bool): If True, return an #ak.Array; otherwise, return + a low-level #ak.layout.Content subclass. + behavior (None or dict): Custom #ak.behavior for the output array, if + high-level. + + Converts a NumPy array into an Awkward Array. + + The resulting layout can only involve the following #ak.layout.Content types: + + * #ak.layout.NumpyArray + * #ak.layout.ByteMaskedArray or #ak.layout.UnmaskedArray if the + `array` is an np.ma.MaskedArray. + * #ak.layout.RegularArray if `regulararray=True`. + * #ak.layout.RecordArray if `recordarray=True`. + + See also #ak.to_numpy and #ak.from_cupy. + """ + + def recurse(array, mask): + if regulararray and len(array.shape) > 1: + return ak._v2.contents.RegularArray( + recurse(array.reshape((-1,) + array.shape[2:]), mask), + array.shape[1], + array.shape[0], + ) + + if len(array.shape) == 0: + array = array.reshape(1) + + if array.dtype.kind == "S": + asbytes = array.reshape(-1) + itemsize = asbytes.dtype.itemsize + starts = numpy.arange(0, len(asbytes) * itemsize, itemsize, dtype=np.int64) + stops = starts + numpy.char.str_len(asbytes) + data = ak._v2.contents.ListArray64( + ak._v2.index.Index64(starts), + ak._v2.index.Index64(stops), + ak._v2.contents.NumpyArray( + asbytes.view("u1"), parameters={"__array__": "byte"}, nplike=numpy + ), + parameters={"__array__": "bytestring"}, + ) + for i in range(len(array.shape) - 1, 0, -1): + data = ak._v2.contents.RegularArray( + data, array.shape[i], array.shape[i - 1] + ) + + elif array.dtype.kind == "U": + asbytes = numpy.char.encode(array.reshape(-1), "utf-8", "surrogateescape") + itemsize = asbytes.dtype.itemsize + starts = numpy.arange(0, len(asbytes) * itemsize, itemsize, dtype=np.int64) + stops = starts + numpy.char.str_len(asbytes) + data = ak._v2.contents.ListArray64( + ak._v2.index.Index64(starts), + ak._v2.index.Index64(stops), + ak._v2.contents.NumpyArray( + asbytes.view("u1"), parameters={"__array__": "char"}, nplike=numpy + ), + parameters={"__array__": "string"}, + ) + for i in range(len(array.shape) - 1, 0, -1): + data = ak._v2.contents.RegularArray( + data, array.shape[i], array.shape[i - 1] + ) + + else: + data = ak._v2.contents.NumpyArray(array) + + if mask is None: + return data + + elif mask is False or (isinstance(mask, np.bool_) and not mask): + # NumPy's MaskedArray with mask == False is an UnmaskedArray + if len(array.shape) == 1: + return ak._v2.contents.UnmaskedArray(data) + else: + + def attach(x): + if isinstance(x, ak._v2.contents.NumpyArray): + return ak._v2.contents.UnmaskedArray(x) + else: + return ak._v2.contents.RegularArray( + attach(x.content), x.size, len(x) + ) + + return attach(data.toRegularArray()) + + else: + # NumPy's MaskedArray is a ByteMaskedArray with valid_when=False + return ak._v2.contents.ByteMaskedArray( + ak._v2.index.Index8(mask), data, valid_when=False + ) + + if isinstance(array, numpy.ma.MaskedArray): + mask = numpy.ma.getmask(array) + array = numpy.ma.getdata(array) + if isinstance(mask, np.ndarray) and len(mask.shape) > 1: + regulararray = True + mask = mask.reshape(-1) + else: + mask = None + + if not recordarray or array.dtype.names is None: + layout = recurse(array, mask) + + else: + contents = [] + for name in array.dtype.names: + contents.append(recurse(array[name], mask)) + layout = ak._v2.contents.RecordArray(contents, array.dtype.names) + + return ak._v2._util.wrap(layout, behavior, highlevel) diff --git a/src/awkward/_v2/types/arraytype.py b/src/awkward/_v2/types/arraytype.py index 3e3799307b..4b70d57a5f 100644 --- a/src/awkward/_v2/types/arraytype.py +++ b/src/awkward/_v2/types/arraytype.py @@ -43,7 +43,9 @@ def __eq__(self, other): if isinstance(other, ArrayType): return ( self._length == other._length - and _parameters_equal(self._parameters, other._parameters) + and _parameters_equal( + self._parameters, other._parameters, only_array_record=True + ) and self._content == other._content ) else: diff --git a/src/awkward/_v2/types/listtype.py b/src/awkward/_v2/types/listtype.py index 0932a2a399..4fda46f49d 100644 --- a/src/awkward/_v2/types/listtype.py +++ b/src/awkward/_v2/types/listtype.py @@ -62,7 +62,9 @@ def __eq__(self, other): if isinstance(other, ListType): return ( self._typestr == other._typestr - and _parameters_equal(self._parameters, other._parameters) + and _parameters_equal( + self._parameters, other._parameters, only_array_record=True + ) and self._content == other._content ) else: diff --git a/src/awkward/_v2/types/numpytype.py b/src/awkward/_v2/types/numpytype.py index 7bf478ff60..c373efa8b7 100644 --- a/src/awkward/_v2/types/numpytype.py +++ b/src/awkward/_v2/types/numpytype.py @@ -143,7 +143,9 @@ def __eq__(self, other): return ( self._typestr == other._typestr and self._primitive == other._primitive - and _parameters_equal(self._parameters, other._parameters) + and _parameters_equal( + self._parameters, other._parameters, only_array_record=True + ) ) else: return False diff --git a/src/awkward/_v2/types/optiontype.py b/src/awkward/_v2/types/optiontype.py index 9962652c97..31710fe426 100644 --- a/src/awkward/_v2/types/optiontype.py +++ b/src/awkward/_v2/types/optiontype.py @@ -62,7 +62,9 @@ def __eq__(self, other): if isinstance(other, OptionType): return ( self._typestr == other._typestr - and _parameters_equal(self._parameters, other._parameters) + and _parameters_equal( + self._parameters, other._parameters, only_array_record=True + ) and self._content == other._content ) else: diff --git a/src/awkward/_v2/types/recordtype.py b/src/awkward/_v2/types/recordtype.py index 901bed0680..e88a007860 100644 --- a/src/awkward/_v2/types/recordtype.py +++ b/src/awkward/_v2/types/recordtype.py @@ -115,7 +115,7 @@ def __repr__(self): def __eq__(self, other): if isinstance(other, RecordType): if self._typestr != other._typestr or not _parameters_equal( - self._parameters, other._parameters + self._parameters, other._parameters, only_array_record=True ): return False diff --git a/src/awkward/_v2/types/regulartype.py b/src/awkward/_v2/types/regulartype.py index 265bd7b4a0..84ea162786 100644 --- a/src/awkward/_v2/types/regulartype.py +++ b/src/awkward/_v2/types/regulartype.py @@ -74,7 +74,9 @@ def __eq__(self, other): return ( self._typestr == other._typestr and self._size == other._size - and _parameters_equal(self._parameters, other._parameters) + and _parameters_equal( + self._parameters, other._parameters, only_array_record=True + ) and self._content == other._content ) else: diff --git a/src/awkward/_v2/types/uniontype.py b/src/awkward/_v2/types/uniontype.py index 1ad85a6aee..235edf5e37 100644 --- a/src/awkward/_v2/types/uniontype.py +++ b/src/awkward/_v2/types/uniontype.py @@ -72,7 +72,9 @@ def __eq__(self, other): if isinstance(other, UnionType): return ( self._typestr == other._typestr - and _parameters_equal(self._parameters, other._parameters) + and _parameters_equal( + self._parameters, other._parameters, only_array_record=True + ) and self._contents == other._contents ) else: diff --git a/src/awkward/_v2/types/unknowntype.py b/src/awkward/_v2/types/unknowntype.py index a0249e499d..1cb981e331 100644 --- a/src/awkward/_v2/types/unknowntype.py +++ b/src/awkward/_v2/types/unknowntype.py @@ -44,7 +44,7 @@ def __repr__(self): def __eq__(self, other): if isinstance(other, UnknownType): return self._typestr == other._typestr and _parameters_equal( - self._parameters, other._parameters + self._parameters, other._parameters, only_array_record=True ) else: return False diff --git a/tests/v2/test_0086-nep13-ufunc.py b/tests/v2/test_0086-nep13-ufunc.py new file mode 100644 index 0000000000..0905d35eb1 --- /dev/null +++ b/tests/v2/test_0086-nep13-ufunc.py @@ -0,0 +1,151 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE + +from __future__ import absolute_import + +import pytest # noqa: F401 +import numpy as np # noqa: F401 +import awkward as ak # noqa: F401 + +pytestmark = pytest.mark.skipif( + ak._util.py27, reason="No Python 2.7 support in Awkward 2.x" +) + + +def test_basic(): + array = ak._v2.highlevel.Array([[1.1, 2.2, 3.3], [], [4.4, 5.5]]) + assert ak.to_list(array + array) == [[2.2, 4.4, 6.6], [], [8.8, 11.0]] + assert ak.to_list(array * 2) == [[2.2, 4.4, 6.6], [], [8.8, 11.0]] + + +def test_emptyarray(): + one = ak._v2.highlevel.Array(ak._v2.contents.NumpyArray(np.array([]))) + two = ak._v2.highlevel.Array(ak._v2.contents.EmptyArray()) + assert ak.to_list(one + one) == [] + assert ak.to_list(two + two) == [] + assert ak.to_list(one + two) == [] + + +def test_indexedarray(): + content = ak._v2.contents.NumpyArray( + np.array([0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]) + ) + index1 = ak._v2.index.Index64(np.array([2, 4, 4, 0, 8], dtype=np.int64)) + index2 = ak._v2.index.Index64(np.array([6, 4, 4, 8, 0], dtype=np.int64)) + one = ak._v2.highlevel.Array(ak._v2.contents.IndexedArray(index1, content)) + two = ak._v2.highlevel.Array(ak._v2.contents.IndexedArray(index2, content)) + assert ak.to_list(one + two) == [8.8, 8.8, 8.8, 8.8, 8.8] + + +def test_indexedoptionarray(): + content = ak._v2.contents.NumpyArray( + np.array([0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]) + ) + index1 = ak._v2.index.Index64(np.array([2, -1, 4, 0, 8], dtype=np.int64)) + index2 = ak._v2.index.Index64(np.array([-1, 4, 4, -1, 0], dtype=np.int64)) + one = ak._v2.highlevel.Array(ak._v2.contents.IndexedOptionArray(index1, content)) + two = ak._v2.highlevel.Array(ak._v2.contents.IndexedOptionArray(index2, content)) + assert ak.to_list(one + two) == [None, None, 8.8, None, 8.8] + + uno = ak._v2.contents.NumpyArray(np.array([2.2, 4.4, 4.4, 0.0, 8.8])) + dos = ak._v2.contents.NumpyArray(np.array([6.6, 4.4, 4.4, 8.8, 0.0])) + assert ak.to_list(uno + two) == [None, 8.8, 8.8, None, 8.8] + assert ak.to_list(one + dos) == [8.8, None, 8.8, 8.8, 8.8] + + +def test_regularize_shape(): + array = ak._v2.contents.NumpyArray(np.arange(2 * 3 * 5).reshape(2, 3, 5)) + assert isinstance(array.toRegularArray(), ak._v2.contents.RegularArray) + assert ak.to_list(array.toRegularArray()) == ak.to_list(array) + + +def test_regulararray(): + array = ak._v2.highlevel.Array(np.arange(2 * 3 * 5).reshape(2, 3, 5)) + assert ( + ak.to_list(array + array) + == (np.arange(2 * 3 * 5).reshape(2, 3, 5) * 2).tolist() + ) + assert ak.to_list(array * 2) == (np.arange(2 * 3 * 5).reshape(2, 3, 5) * 2).tolist() + array2 = ak._v2.highlevel.Array(np.arange(2 * 1 * 5).reshape(2, 1, 5)) + assert ak.to_list(array + array2) == ak.to_list( + np.arange(2 * 3 * 5).reshape(2, 3, 5) + np.arange(2 * 1 * 5).reshape(2, 1, 5) + ) + array3 = ak._v2.highlevel.Array(np.arange(2 * 3 * 5).reshape(2, 3, 5).tolist()) + assert ak.to_list(array + array3) == ak.to_list( + np.arange(2 * 3 * 5).reshape(2, 3, 5) + np.arange(2 * 3 * 5).reshape(2, 3, 5) + ) + assert ak.to_list(array3 + array) == ak.to_list( + np.arange(2 * 3 * 5).reshape(2, 3, 5) + np.arange(2 * 3 * 5).reshape(2, 3, 5) + ) + + +def test_listarray(): + content = ak._v2.contents.NumpyArray(np.arange(12, dtype=np.int64)) + starts = ak._v2.index.Index64(np.array([3, 0, 999, 2, 6, 10], dtype=np.int64)) + stops = ak._v2.index.Index64(np.array([7, 3, 999, 4, 6, 12], dtype=np.int64)) + one = ak._v2.highlevel.Array(ak._v2.contents.ListArray(starts, stops, content)) + two = ak._v2.highlevel.Array( + [[100, 100, 100, 100], [200, 200, 200], [], [300, 300], [], [400, 400]] + ) + assert ak.to_list(one) == [[3, 4, 5, 6], [0, 1, 2], [], [2, 3], [], [10, 11]] + assert ak.to_list(one + 100) == [ + [103, 104, 105, 106], + [100, 101, 102], + [], + [102, 103], + [], + [110, 111], + ] + assert ak.to_list(one + two) == [ + [103, 104, 105, 106], + [200, 201, 202], + [], + [302, 303], + [], + [410, 411], + ] + assert ak.to_list(two + one) == [ + [103, 104, 105, 106], + [200, 201, 202], + [], + [302, 303], + [], + [410, 411], + ] + assert ak.to_list( + one + np.array([100, 200, 300, 400, 500, 600])[:, np.newaxis] + ) == [[103, 104, 105, 106], [200, 201, 202], [], [402, 403], [], [610, 611]] + assert ak.to_list( + np.array([100, 200, 300, 400, 500, 600])[:, np.newaxis] + one + ) == [[103, 104, 105, 106], [200, 201, 202], [], [402, 403], [], [610, 611]] + assert ak.to_list(one + 100) == [ + [103, 104, 105, 106], + [100, 101, 102], + [], + [102, 103], + [], + [110, 111], + ] + + +def test_unionarray(): + one0 = ak._v2.contents.NumpyArray(np.array([0.0, 1.1, 2.2, 3.3], dtype=np.float64)) + one1 = ak._v2.contents.NumpyArray(np.array([4, 5], dtype=np.int64)) + onetags = ak._v2.index.Index8(np.array([0, 0, 0, 0, 1, 1], dtype=np.int8)) + oneindex = ak._v2.index.Index64(np.array([0, 1, 2, 3, 0, 1], dtype=np.int64)) + one = ak._v2.highlevel.Array( + ak._v2.contents.UnionArray(onetags, oneindex, [one0, one1]) + ) + + two0 = ak._v2.contents.NumpyArray(np.array([0, 100], dtype=np.int64)) + two1 = ak._v2.contents.NumpyArray( + np.array([200.3, 300.3, 400.4, 500.5], dtype=np.float64) + ) + twotags = ak._v2.index.Index8(np.array([0, 0, 1, 1, 1, 1], dtype=np.int8)) + twoindex = ak._v2.index.Index64(np.array([0, 1, 0, 1, 2, 3], dtype=np.int64)) + two = ak._v2.highlevel.Array( + ak._v2.contents.UnionArray(twotags, twoindex, [two0, two1]) + ) + + assert ak.to_list(one) == [0.0, 1.1, 2.2, 3.3, 4, 5] + assert ak.to_list(two) == [0, 100, 200.3, 300.3, 400.4, 500.5] + assert ak.to_list(one + two) == [0.0, 101.1, 202.5, 303.6, 404.4, 505.5] diff --git a/tests/v2/test_0504-block-ufuncs-for-strings.py b/tests/v2/test_0504-block-ufuncs-for-strings.py new file mode 100644 index 0000000000..a075cc059c --- /dev/null +++ b/tests/v2/test_0504-block-ufuncs-for-strings.py @@ -0,0 +1,46 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE + +from __future__ import absolute_import + +import pytest # noqa: F401 +import numpy as np # noqa: F401 +import awkward as ak # noqa: F401 + +pytestmark = pytest.mark.skipif( + ak._util.py27, reason="No Python 2.7 support in Awkward 2.x" +) + + +def test(): + def _apply_ufunc(ufunc, method, inputs, kwargs): + nextinputs = [] + for x in inputs: + if ( + isinstance(x, ak._v2.highlevel.Array) + and x.layout.is_IndexedType + and not x.layout.is_OptionType + ): + nextinputs.append( + ak._v2.highlevel.Array( + x.layout.project(), behavior=ak._v2._util.behavior_of(x) + ) + ) + else: + nextinputs.append(x) + + return getattr(ufunc, method)(*nextinputs, **kwargs) + + ak._v2.behavior[np.ufunc, "categorical"] = _apply_ufunc + + array = ak._v2.highlevel.Array( + ak._v2.contents.IndexedArray( + ak._v2.index.Index64(np.array([0, 1, 2, 1, 3, 1, 4])), + ak._v2.contents.NumpyArray(np.array([321, 1.1, 123, 999, 2])), + parameters={"__array__": "categorical"}, + ) + ) + assert ak.to_list(array * 10) == [3210, 11, 1230, 11, 9990, 11, 20] + + array = ak.Array(["HAL"]) + with pytest.raises(ValueError): + array + 1 diff --git a/tests/v2/test_0527-fix-unionarray-ufuncs-and-parameters-in-merging.py b/tests/v2/test_0527-fix-unionarray-ufuncs-and-parameters-in-merging.py new file mode 100644 index 0000000000..04afb16857 --- /dev/null +++ b/tests/v2/test_0527-fix-unionarray-ufuncs-and-parameters-in-merging.py @@ -0,0 +1,336 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE + +from __future__ import absolute_import + +import pytest # noqa: F401 +import numpy as np # noqa: F401 +import awkward as ak # noqa: F401 + +pytestmark = pytest.mark.skipif( + ak._util.py27, reason="No Python 2.7 support in Awkward 2.x" +) + + +# https://github.com/scikit-hep/awkward-1.0/issues/459#issuecomment-694941328 +# +# So the rules would be, +# * if arrays have different `__array__` or `__record__` parameters, they are not equal; +# * if they otherwise have different parameters, the types can be equal, but merging +# (concatenation, option-simplify, or union-simplify) removes parameters other than +# `__array__` and `__record__`. + + +def test_0459_types(): + plain_plain = ak._v2.highlevel.Array( + ak._v2.contents.NumpyArray(np.array([0.0, 1.1, 2.2, 3.3, 4.4]), parameters={}) + ) + array_plain = ak._v2.highlevel.Array( + ak._v2.contents.NumpyArray( + np.array([0.0, 1.1, 2.2, 3.3, 4.4]), parameters={"__array__": "zoinks"} + ) + ) + plain_isdoc = ak._v2.highlevel.Array( + ak._v2.contents.NumpyArray( + np.array([0.0, 1.1, 2.2, 3.3, 4.4]), + parameters={"__doc__": "This is a zoink."}, + ) + ) + array_isdoc = ak._v2.highlevel.Array( + ak._v2.contents.NumpyArray( + np.array([0.0, 1.1, 2.2, 3.3, 4.4]), + parameters={"__doc__": "This is a zoink.", "__array__": "zoinks"}, + ) + ) + + assert plain_plain.layout.parameters == {} + assert array_plain.layout.parameters == {"__array__": "zoinks"} + assert plain_isdoc.layout.parameters == {"__doc__": "This is a zoink."} + assert array_isdoc.layout.parameters == { + "__array__": "zoinks", + "__doc__": "This is a zoink.", + } + + assert plain_plain.layout.form.type == plain_plain.layout.form.type + assert array_plain.layout.form.type == array_plain.layout.form.type + assert plain_isdoc.layout.form.type == plain_isdoc.layout.form.type + assert array_isdoc.layout.form.type == array_isdoc.layout.form.type + + assert plain_plain.layout.form.type != array_plain.layout.form.type + assert array_plain.layout.form.type != plain_plain.layout.form.type + + assert plain_plain.layout.form.type == plain_isdoc.layout.form.type + assert plain_isdoc.layout.form.type == plain_plain.layout.form.type + + assert array_plain.layout.form.type == array_isdoc.layout.form.type + assert array_isdoc.layout.form.type == array_plain.layout.form.type + + assert plain_isdoc.layout.form.type != array_isdoc.layout.form.type + assert array_isdoc.layout.form.type != plain_isdoc.layout.form.type + + +@pytest.mark.skip(reason="FIXME: need an implementation of v2 concatenate") +def test_0459(): + plain_plain = ak._v2.highlevel.Array( + ak._v2.contents.NumpyArray(np.array([0.0, 1.1, 2.2, 3.3, 4.4]), parameters={}) + ) + array_plain = ak._v2.highlevel.Array( + ak._v2.contents.NumpyArray( + np.array([0.0, 1.1, 2.2, 3.3, 4.4]), parameters={"__array__": "zoinks"} + ) + ) + plain_isdoc = ak._v2.highlevel.Array( + ak._v2.contents.NumpyArray( + np.array([0.0, 1.1, 2.2, 3.3, 4.4]), + parameters={"__doc__": "This is a zoink."}, + ) + ) + array_isdoc = ak._v2.highlevel.Array( + ak._v2.contents.NumpyArray( + np.array([0.0, 1.1, 2.2, 3.3, 4.4]), + parameters={"__doc__": "This is a zoink.", "__array__": "zoinks"}, + ) + ) + + assert plain_plain.layout.parameters == {} + assert array_plain.layout.parameters == {"__array__": "zoinks"} + assert plain_isdoc.layout.parameters == {"__doc__": "This is a zoink."} + assert array_isdoc.layout.parameters == { + "__array__": "zoinks", + "__doc__": "This is a zoink.", + } + + assert ( + ak._v2.operations.structure.concatenate( + [plain_plain, plain_plain] + ).layout.parameters + == {} + ) + assert ak._v2.operations.structure.concatenate( + [array_plain, array_plain] + ).layout.parameters == {"__array__": "zoinks"} + assert ak._v2.operations.structure.concatenate( + [plain_isdoc, plain_isdoc] + ).layout.parameters == {"__doc__": "This is a zoink."} + assert ak._v2.operations.structure.concatenate( + [array_isdoc, array_isdoc] + ).layout.parameters == { + "__array__": "zoinks", + "__doc__": "This is a zoink.", + } + + assert isinstance( + ak._v2.operations.structure.concatenate([plain_plain, plain_plain]).layout, + ak._v2.contents.NumpyArray, + ) + assert isinstance( + ak._v2.operations.structure.concatenate([array_plain, array_plain]).layout, + ak._v2.contents.NumpyArray, + ) + assert isinstance( + ak._v2.operations.structure.concatenate([plain_isdoc, plain_isdoc]).layout, + ak._v2.contents.NumpyArray, + ) + assert isinstance( + ak._v2.operations.structure.concatenate([array_isdoc, array_isdoc]).layout, + ak._v2.contents.NumpyArray, + ) + + assert ( + ak._v2.operations.structure.concatenate( + [plain_plain, array_plain] + ).layout.parameters + == {} + ) + assert ( + ak._v2.operations.structure.concatenate( + [plain_isdoc, array_isdoc] + ).layout.parameters + == {} + ) + assert ( + ak._v2.operations.structure.concatenate( + [array_plain, plain_plain] + ).layout.parameters + == {} + ) + assert ( + ak._v2.operations.structure.concatenate( + [array_isdoc, plain_isdoc] + ).layout.parameters + == {} + ) + + assert isinstance( + ak._v2.operations.structure.concatenate([plain_plain, array_plain]).layout, + ak._v2.contents.UnionArray, + ) + assert isinstance( + ak._v2.operations.structure.concatenate([plain_isdoc, array_isdoc]).layout, + ak._v2.contents.UnionArray, + ) + assert isinstance( + ak._v2.operations.structure.concatenate([array_plain, plain_plain]).layout, + ak._v2.contents.UnionArray, + ) + assert isinstance( + ak._v2.operations.structure.concatenate([array_isdoc, plain_isdoc]).layout, + ak._v2.contents.UnionArray, + ) + + assert ( + ak._v2.operations.structure.concatenate( + [plain_plain, plain_isdoc] + ).layout.parameters + == {} + ) + assert ak._v2.operations.structure.concatenate( + [array_plain, array_isdoc] + ).layout.parameters == {"__array__": "zoinks"} + assert ( + ak._v2.operations.structure.concatenate( + [plain_isdoc, plain_plain] + ).layout.parameters + == {} + ) + assert ak._v2.operations.structure.concatenate( + [array_isdoc, array_plain] + ).layout.parameters == {"__array__": "zoinks"} + + assert isinstance( + ak._v2.operations.structure.concatenate([plain_plain, plain_isdoc]).layout, + ak._v2.contents.NumpyArray, + ) + assert isinstance( + ak._v2.operations.structure.concatenate([array_plain, array_isdoc]).layout, + ak._v2.contents.NumpyArray, + ) + assert isinstance( + ak._v2.operations.structure.concatenate([plain_isdoc, plain_plain]).layout, + ak._v2.contents.NumpyArray, + ) + assert isinstance( + ak._v2.operations.structure.concatenate([array_isdoc, array_plain]).layout, + ak._v2.contents.NumpyArray, + ) + + +def test_0522(): + content1 = ak._v2.highlevel.Array([0.0, 1.1, 2.2, 3.3, 4.4]).layout + content2 = ak._v2.highlevel.Array([0, 100, 200, 300, 400]).layout + tags = ak._v2.index.Index8(np.array([0, 0, 0, 1, 1, 0, 0, 1, 1, 1], np.int8)) + index = ak._v2.index.Index64(np.array([0, 1, 2, 0, 1, 3, 4, 2, 3, 4], np.int64)) + unionarray = ak.Array(ak._v2.contents.UnionArray(tags, index, [content1, content2])) + assert unionarray.tolist() == [0.0, 1.1, 2.2, 0, 100, 3.3, 4.4, 200, 300, 400] + + assert (unionarray + 10).tolist() == [ + 10.0, + 11.1, + 12.2, + 10, + 110, + 13.3, + 14.4, + 210, + 310, + 410, + ] + assert (10 + unionarray).tolist() == [ + 10.0, + 11.1, + 12.2, + 10, + 110, + 13.3, + 14.4, + 210, + 310, + 410, + ] + + assert (unionarray + range(0, 100, 10)).tolist() == [ + 0.0, + 11.1, + 22.2, + 30, + 140, + 53.3, + 64.4, + 270, + 380, + 490, + ] + assert (range(0, 100, 10) + unionarray).tolist() == [ + 0.0, + 11.1, + 22.2, + 30, + 140, + 53.3, + 64.4, + 270, + 380, + 490, + ] + + assert (unionarray + np.arange(0, 100, 10)).tolist() == [ + 0.0, + 11.1, + 22.2, + 30, + 140, + 53.3, + 64.4, + 270, + 380, + 490, + ] + assert (np.arange(0, 100, 10) + unionarray).tolist() == [ + 0.0, + 11.1, + 22.2, + 30, + 140, + 53.3, + 64.4, + 270, + 380, + 490, + ] + + assert (unionarray + ak.Array(np.arange(0, 100, 10))).tolist() == [ + 0.0, + 11.1, + 22.2, + 30, + 140, + 53.3, + 64.4, + 270, + 380, + 490, + ] + assert (ak.Array(np.arange(0, 100, 10)) + unionarray).tolist() == [ + 0.0, + 11.1, + 22.2, + 30, + 140, + 53.3, + 64.4, + 270, + 380, + 490, + ] + + assert (unionarray + unionarray).tolist() == [ + 0.0, + 2.2, + 4.4, + 0, + 200, + 6.6, + 8.8, + 400, + 600, + 800, + ]