From c9425048786e4f7e2c659a11722b8fadbabcdfab Mon Sep 17 00:00:00 2001 From: ioanaif Date: Mon, 21 Feb 2022 19:41:23 +0200 Subject: [PATCH 1/3] ak.argsort and testing --- .../_v2/operations/structure/ak_argsort.py | 84 ++++---- tests/v2/test_0074-argsort-and-sort.py | 194 ++++++++++++++---- .../test_0339-highlevel-sorting-function.py | 19 ++ ...ure-that-jagged-slice-fits-array-length.py | 49 +++++ ...test_0736-implement-argsort-for-strings.py | 75 +++++++ tests/v2/test_0803-argsort-fix-type.py | 26 +++ tests/v2/test_0850-argsort-mask-array.py | 22 ++ tests/v2/test_0945-argsort-sort-nan-array.py | 66 ++++++ 8 files changed, 451 insertions(+), 84 deletions(-) create mode 100644 tests/v2/test_0339-highlevel-sorting-function.py create mode 100644 tests/v2/test_0723-ensure-that-jagged-slice-fits-array-length.py create mode 100644 tests/v2/test_0736-implement-argsort-for-strings.py create mode 100644 tests/v2/test_0803-argsort-fix-type.py create mode 100644 tests/v2/test_0850-argsort-mask-array.py create mode 100644 tests/v2/test_0945-argsort-sort-nan-array.py diff --git a/src/awkward/_v2/operations/structure/ak_argsort.py b/src/awkward/_v2/operations/structure/ak_argsort.py index 4485584b15..91a8f48af4 100644 --- a/src/awkward/_v2/operations/structure/ak_argsort.py +++ b/src/awkward/_v2/operations/structure/ak_argsort.py @@ -7,46 +7,44 @@ # @ak._v2._connect.numpy.implements("argsort") def argsort(array, axis=-1, ascending=True, stable=True, highlevel=True, behavior=None): - raise NotImplementedError - - -# """ -# Args: -# array: Data for which to get a sorting index, possibly within nested -# lists. -# axis (int): The dimension at which this operation is applied. The -# outermost dimension is `0`, followed by `1`, etc., and negative -# values count backward from the innermost: `-1` is the innermost -# dimension, `-2` is the next level up, etc. -# ascending (bool): If True, the first value in each sorted group -# will be smallest, the last value largest; if False, the order -# is from largest to smallest. -# stable (bool): If True, use a stable sorting algorithm (introsort: -# a hybrid of quicksort, heapsort, and insertion sort); if False, -# use a sorting algorithm that is not guaranteed to be stable -# (heapsort). -# highlevel (bool): If True, return an #ak.Array; otherwise, return -# a low-level #ak.layout.Content subclass. -# behavior (None or dict): Custom #ak.behavior for the output array, if -# high-level. - -# For example, - -# >>> ak.argsort(ak.Array([[7.7, 5.5, 7.7], [], [2.2], [8.8, 2.2]])) -# - -# The result of this function can be used to index other arrays with the -# same shape: - -# >>> data = ak.Array([[7, 5, 7], [], [2], [8, 2]]) -# >>> index = ak.argsort(index) -# >>> index -# -# >>> data[index] -# -# """ -# layout = ak._v2.operations.convert.to_layout( -# array, allow_record=False, allow_other=False -# ) -# out = layout.argsort(axis, ascending, stable) -# return ak._v2._util.maybe_wrap_like(out, array, behavior, highlevel) + + """ + Args: + array: Data for which to get a sorting index, possibly within nested + lists. + axis (int): The dimension at which this operation is applied. The + outermost dimension is `0`, followed by `1`, etc., and negative + values count backward from the innermost: `-1` is the innermost + dimension, `-2` is the next level up, etc. + ascending (bool): If True, the first value in each sorted group + will be smallest, the last value largest; if False, the order + is from largest to smallest. + stable (bool): If True, use a stable sorting algorithm (introsort: + a hybrid of quicksort, heapsort, and insertion sort); if False, + use a sorting algorithm that is not guaranteed to be stable + (heapsort). + highlevel (bool): If True, return an #ak.Array; otherwise, return + a low-level #ak.layout.Content subclass. + behavior (None or dict): Custom #ak.behavior for the output array, if + high-level. + + For example, + + >>> ak.argsort(ak.Array([[7.7, 5.5, 7.7], [], [2.2], [8.8, 2.2]])) + + + The result of this function can be used to index other arrays with the + same shape: + + >>> data = ak.Array([[7, 5, 7], [], [2], [8, 2]]) + >>> index = ak.argsort(index) + >>> index + + >>> data[index] + + """ + layout = ak._v2.operations.convert.to_layout( + array, allow_record=False, allow_other=False + ) + out = layout.argsort(axis, ascending, stable) + return ak._v2._util.wrap(out, behavior, highlevel) diff --git a/tests/v2/test_0074-argsort-and-sort.py b/tests/v2/test_0074-argsort-and-sort.py index 5a4ba903d8..8bfecb96e9 100644 --- a/tests/v2/test_0074-argsort-and-sort.py +++ b/tests/v2/test_0074-argsort-and-sort.py @@ -22,7 +22,7 @@ def test_bool_sort(): def test_keep_None_in_place_test(): array = ak._v2.highlevel.Array([[3, 2, 1], [], None, [4, 5]]).layout - assert to_list(array.argsort(axis=1)) == [ + assert to_list(ak._v2.operations.structure.argsort(array, axis=1)) == [ [2, 1, 0], [], None, @@ -41,41 +41,64 @@ def test_slicing_FIXME(): # awkward/_v2/_slicing.py:218: array = ak._v2.highlevel.Array([[3, 2, 1], [], None, [4, 5]]).layout - assert to_list(array[array.argsort(axis=1)]) == to_list(array.sort(axis=1)) + assert to_list( + array[ak._v2.operations.structure.argsort(array, axis=1)] + ) == to_list(array.sort(axis=1)) def test_EmptyArray(): array = ak._v2.contents.EmptyArray() assert to_list(array.sort()) == [] - assert to_list(array.argsort()) == [] + assert ( + to_list( + ak._v2.operations.structure.argsort( + array, + ) + ) + == [] + ) array2 = ak._v2.highlevel.Array([[], [], []]).layout - assert to_list(array2.argsort()) == [[], [], []] + assert to_list(ak._v2.operations.structure.argsort(array2)) == [[], [], []] -def test_EmptyArray_type_FIXME(): +def test_EmptyArray_type(): array = ak._v2.contents.EmptyArray() assert str(array.sort().form.type) == "unknown" - assert str(array.argsort().form.type) == "int64" + assert ( + str(ak._v2.operations.describe.type(ak._v2.operations.structure.argsort(array))) + == "0 * int64" + ) array2 = ak._v2.highlevel.Array([[], [], []]).layout - assert str(array2.argsort().form.type) == "var * int64" + assert ( + str( + ak._v2.operations.describe.type(ak._v2.operations.structure.argsort(array2)) + ) + == "3 * var * int64" + ) def test_NumpyArray(): array = ak._v2.contents.NumpyArray(np.array([3.3, 2.2, 1.1, 5.5, 4.4])) - assert to_list(array.argsort(axis=0, ascending=True, stable=False)) == [ + assert to_list( + ak._v2.operations.structure.argsort(array, axis=0, ascending=True, stable=False) + ) == [ 2, 1, 0, 4, 3, ] - assert to_list(array.argsort(axis=0, ascending=False, stable=False)) == [ + assert to_list( + ak._v2.operations.structure.argsort( + array, axis=0, ascending=False, stable=False + ) + ) == [ 3, 4, 0, @@ -107,16 +130,22 @@ def test_NumpyArray(): np.sort(np.asarray(array2), axis=0) ) - assert to_list(array2.argsort(axis=1, ascending=True, stable=False)) == to_list( - np.argsort(np.asarray(array2), 1) - ) + assert to_list( + ak._v2.operations.structure.argsort( + array2, axis=1, ascending=True, stable=False + ) + ) == to_list(np.argsort(np.asarray(array2), 1)) - assert to_list(array2.argsort(axis=0, ascending=True, stable=False)) == to_list( - np.argsort(np.asarray(array2), 0) - ) + assert to_list( + ak._v2.operations.structure.argsort( + array2, axis=0, ascending=True, stable=False + ) + ) == to_list(np.argsort(np.asarray(array2), 0)) with pytest.raises(ValueError) as err: - array2.sort(axis=2, ascending=True, stable=False) + ak._v2.operations.structure.argsort( + array2, axis=2, ascending=True, stable=False + ) assert str(err.value).startswith( "axis=2 exceeds the depth of the nested list structure (which is 2)" ) @@ -165,7 +194,9 @@ def test_IndexedOptionArray(): [-4.4, -5.5, -6.6], ] - assert to_list(array.argsort(axis=0, ascending=True, stable=True)) == [ + assert to_list( + ak._v2.operations.structure.argsort(array, axis=0, ascending=True, stable=True) + ) == [ [4, 4, 4, 0, 0], [2, 0, 0], [3, 1, 2], @@ -173,7 +204,9 @@ def test_IndexedOptionArray(): [1, 3, 3], ] - assert to_list(array.argsort(axis=0, ascending=True, stable=False)) == [ + assert to_list( + ak._v2.operations.structure.argsort(array, axis=0, ascending=True, stable=False) + ) == [ [4, 4, 4, 0, 0], [2, 0, 0], [3, 1, 2], @@ -181,14 +214,20 @@ def test_IndexedOptionArray(): [1, 3, 3], ] - assert to_list(array.argsort(axis=0, ascending=False, stable=True)) == [ + assert to_list( + ak._v2.operations.structure.argsort(array, axis=0, ascending=False, stable=True) + ) == [ [3, 4, 2, 0, 0], [2, 0, 0], [4, 1, 4], [0, 2, 1], [1, 3, 3], ] - assert to_list(array.argsort(axis=0, ascending=False, stable=False)) == [ + assert to_list( + ak._v2.operations.structure.argsort( + array, axis=0, ascending=False, stable=False + ) + ) == [ [3, 4, 2, 0, 0], [2, 0, 0], [4, 1, 4], @@ -196,7 +235,9 @@ def test_IndexedOptionArray(): [1, 3, 3], ] - assert to_list(array.argsort(axis=1, ascending=True, stable=True)) == [ + assert to_list( + ak._v2.operations.structure.argsort(array, axis=1, ascending=True, stable=True) + ) == [ [3, 2, 4, 0, 1], [0, 1, 2], [0, 2, 1], @@ -204,7 +245,9 @@ def test_IndexedOptionArray(): [2, 1, 0], ] - assert to_list(array.argsort(axis=1, ascending=True, stable=False)) == [ + assert to_list( + ak._v2.operations.structure.argsort(array, axis=1, ascending=True, stable=False) + ) == [ [3, 2, 4, 0, 1], [0, 1, 2], [0, 2, 1], @@ -212,7 +255,9 @@ def test_IndexedOptionArray(): [2, 1, 0], ] - assert to_list(array.argsort(axis=1, ascending=False, stable=True)) == [ + assert to_list( + ak._v2.operations.structure.argsort(array, axis=1, ascending=False, stable=True) + ) == [ [4, 2, 3, 0, 1], [0, 1, 2], [2, 0, 1], @@ -222,7 +267,9 @@ def test_IndexedOptionArray(): array2 = ak._v2.highlevel.Array([None, None, 1, -1, 30]).layout - assert to_list(array2.argsort(axis=0, ascending=True, stable=True)) == [ + assert to_list( + ak._v2.operations.structure.argsort(array2, axis=0, ascending=True, stable=True) + ) == [ 3, 2, 4, @@ -256,7 +303,11 @@ def test_IndexedArray(): index1 = ak._v2.index.Index32(np.array([1, 2, 3, 4], dtype=np.int32)) indexedarray1 = ak._v2.contents.IndexedArray(index1, content) - assert to_list(indexedarray1.argsort(axis=0, ascending=True, stable=False)) == [ + assert to_list( + ak._v2.operations.structure.argsort( + indexedarray1, axis=0, ascending=True, stable=False + ) + ) == [ 0, 1, 2, @@ -300,16 +351,22 @@ def test_3d(): ) ) # 5 - sorted = array.argsort(axis=1, ascending=True, stable=False) + sorted = ak._v2.operations.structure.argsort( + array, axis=1, ascending=True, stable=False + ) assert to_list(sorted) == to_list(np.argsort(np.asarray(array), 1)) - sorted = array.argsort(axis=2, ascending=True, stable=False) + sorted = ak._v2.operations.structure.argsort( + array, axis=2, ascending=True, stable=False + ) assert to_list(sorted) == to_list(np.argsort(np.asarray(array), 2)) sorted = array.sort(axis=2, ascending=True, stable=False) assert to_list(sorted) == to_list(np.sort(np.asarray(array), 2)) - sorted = array.argsort(axis=1, ascending=True, stable=False) + sorted = ak._v2.operations.structure.argsort( + array, axis=1, ascending=True, stable=False + ) assert to_list(sorted) == to_list(np.argsort(np.asarray(array), 1)) @@ -333,9 +390,9 @@ def test_3d(): sorted = array.sort(axis=0, ascending=True, stable=False) assert to_list(sorted) == to_list(np.sort(np.asarray(array), 0)) - assert to_list(array.argsort(axis=0, ascending=True, stable=False)) == to_list( - np.argsort(np.asarray(array), 0) - ) + assert to_list( + ak._v2.operations.structure.argsort(array, axis=0, ascending=True, stable=False) + ) == to_list(np.argsort(np.asarray(array), 0)) def test_ByteMaskedArray(): @@ -344,7 +401,9 @@ def test_ByteMaskedArray(): ) mask = ak._v2.index.Index8(np.array([0, 0, 1, 1, 0], dtype=np.int8)) array = ak._v2.contents.ByteMaskedArray(mask, content, valid_when=False) - sorted = array.argsort(axis=0, ascending=True, stable=False) + sorted = ak._v2.operations.structure.argsort( + array, axis=0, ascending=True, stable=False + ) assert to_list(sorted) == [ [0, 0, 0], [], @@ -370,7 +429,9 @@ def test_ByteMaskedArray(): None, ] - assert to_list(array.argsort(axis=1, ascending=True, stable=False)) == [ + assert to_list( + ak._v2.operations.structure.argsort(array, axis=1, ascending=True, stable=False) + ) == [ [0, 1, 2], [], None, @@ -447,7 +508,9 @@ def test_sort_bytestrings(): b"two", ] - assert to_list(array.argsort(axis=0, ascending=True, stable=True)) == [ + assert to_list( + ak._v2.operations.structure.argsort(array, axis=0, ascending=True, stable=True) + ) == [ 0, 5, 2, @@ -464,7 +527,14 @@ def test_sort_zero_length_arrays(): ) assert to_list(array) == [] assert to_list(array.sort()) == [] - assert to_list(array.argsort()) == [] + assert ( + to_list( + ak._v2.operations.structure.argsort( + array, + ) + ) + == [] + ) content = ak._v2.operations.convert.from_iter( [[0.0, 1.1, 2.2], [], [3.3, 4.4], [5.5], [6.6, 7.7, 8.8, 9.9]], highlevel=False @@ -473,17 +543,38 @@ def test_sort_zero_length_arrays(): array = ak._v2.contents.ByteMaskedArray(mask, content, valid_when=False) assert to_list(array) == [] assert to_list(array.sort()) == [] - assert to_list(array.argsort()) == [] + assert ( + to_list( + ak._v2.operations.structure.argsort( + array, + ) + ) + == [] + ) array = ak._v2.contents.NumpyArray([]) assert to_list(array) == [] assert to_list(array.sort()) == [] - assert to_list(array.argsort()) == [] + assert ( + to_list( + ak._v2.operations.structure.argsort( + array, + ) + ) + == [] + ) array = ak._v2.contents.RecordArray([], None, length=0) assert to_list(array) == [] assert to_list(array.sort()) == [] - assert to_list(array.argsort()) == [] + assert ( + to_list( + ak._v2.operations.structure.argsort( + array, + ) + ) + == [] + ) content = ak._v2.contents.NumpyArray( np.array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]) @@ -494,12 +585,26 @@ def test_sort_zero_length_arrays(): array = ak._v2.contents.ListArray(starts1, stops1, content) assert to_list(array) == [] assert to_list(array.sort()) == [] - assert to_list(array.argsort()) == [] + assert ( + to_list( + ak._v2.operations.structure.argsort( + array, + ) + ) + == [] + ) array = ak._v2.contents.ListOffsetArray(offsets1, content) assert to_list(array) == [] assert to_list(array.sort()) == [] - assert to_list(array.argsort()) == [] + assert ( + to_list( + ak._v2.operations.structure.argsort( + array, + ) + ) + == [] + ) def test_UnionArray_FIXME(): @@ -515,4 +620,11 @@ def test_UnionArray_FIXME(): assert to_list(array) == [] assert to_list(array.sort()) == [] - assert to_list(array.argsort()) == [] + assert ( + to_list( + ak._v2.operations.structure.argsort( + array, + ) + ) + == [] + ) diff --git a/tests/v2/test_0339-highlevel-sorting-function.py b/tests/v2/test_0339-highlevel-sorting-function.py new file mode 100644 index 0000000000..ccb1906d13 --- /dev/null +++ b/tests/v2/test_0339-highlevel-sorting-function.py @@ -0,0 +1,19 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE + + +import pytest # noqa: F401 +import numpy as np # noqa: F401 +import awkward as ak # noqa: F401 + +to_list = ak._v2.operations.convert.to_list + +@pytest.mark.skip(reason="FIXME: ak._v2.operations.structure.sort not implemented") +def test_sort(): + data = ak._v2.Array([[7, 5, 7], [], [2], [8, 2]]) + assert ak.to_list(ak._v2.operations.structure.sort(data)) == [[5, 7, 7], [], [2], [2, 8]] + + +def test_argsort(): + data = ak._v2.Array([[7, 5, 7], [], [2], [8, 2]]) + index = ak._v2.operations.structure.argsort(data) + assert to_list(data[index]) == [[5, 7, 7], [], [2], [2, 8]] diff --git a/tests/v2/test_0723-ensure-that-jagged-slice-fits-array-length.py b/tests/v2/test_0723-ensure-that-jagged-slice-fits-array-length.py new file mode 100644 index 0000000000..37f90d6cc9 --- /dev/null +++ b/tests/v2/test_0723-ensure-that-jagged-slice-fits-array-length.py @@ -0,0 +1,49 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE + + +import pytest # noqa: F401 +import numpy as np # noqa: F401 +import awkward as ak # noqa: F401 + + +def test_first_issue(): + a = ak._v2.contents.NumpyArray(np.arange(122)) + idx = ak._v2.index.Index64([0, 2, 4, 6, 8, 10, 12]) + a = ak._v2.contents.ListOffsetArray(idx, a) + idx = ak._v2.index.Index64([0, -1, 1, 2, -1, 3, 4, 5]) + a = ak._v2.contents.IndexedOptionArray(idx, a) + a = ak._v2.Array(a) + with pytest.raises(IndexError): + a[[[0], None]] + assert a[[[0], None, [], [], [], [], [], []]].tolist() == [ + [0], + None, + [], + [], + None, + [], + [], + [], + ] + + +def test_second_issue(): + a = ak._v2.contents.NumpyArray(np.arange(122)) + idx = ak._v2.index.Index64([0, 2, 4, 6, 8, 10, 12]) + a = ak._v2.contents.ListOffsetArray(idx, a) + idx = ak._v2.index.Index64([0, -1, 1, 2, -1, 3, 4, 5]) + a = ak._v2.contents.IndexedOptionArray(idx, a) + a = ak._v2.Array(a) + assert ak._v2.operations.describe.is_valid(a) + + assert ak._v2.operations.describe.is_valid(ak._v2.operations.structure.argsort(a)) + assert a[ak._v2.operations.structure.argsort(a)].tolist() == [ + [0, 1], + None, + [2, 3], + [4, 5], + None, + [6, 7], + [8, 9], + [10, 11], + ] diff --git a/tests/v2/test_0736-implement-argsort-for-strings.py b/tests/v2/test_0736-implement-argsort-for-strings.py new file mode 100644 index 0000000000..48a85f776d --- /dev/null +++ b/tests/v2/test_0736-implement-argsort-for-strings.py @@ -0,0 +1,75 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE + + +import pytest # noqa: F401 +import numpy as np # noqa: F401 +import awkward as ak # noqa: F401 + + +def test_but_first_fix_sort(): + assert ak._v2.operations.describe.is_valid(ak._v2.Array(["one", "two", "three"]).layout.sort(axis=-1)) + + +@pytest.mark.skip(reason="FIXME: issue in ak._v2.operations.structure.argsort") +def test_argsort(): + array = ak._v2.Array(["one", "two", "three", "four", "five", "six", "seven", "eight"]) + assert ak._v2.operations.structure.argsort(array, axis=-1).tolist() == [7, 4, 3, 0, 6, 5, 2, 1] + + array = ak._v2.Array( + [["twotwo", "two", "three"], ["four", "five"], [], ["six", "seven", "eight"]] + ) + assert ak._v2.operations.structure.argsort(array, axis=-1).tolist() == [[2, 1, 0], [1, 0], [], [2, 1, 0]] + + array = ak._v2.Array( + [ + [["twotwo", "two"], ["three"]], + [["four", "five"]], + [], + [["six"], ["seven", "eight"]], + ] + ) + assert ak._v2.operations.structure.argsort(array, axis=-1).tolist() == [ + [[1, 0], [0]], + [[1, 0]], + [], + [[0], [1, 0]], + ] + +@pytest.mark.skip(reason="FIXME: ak._v2.operations.structure.sort not implemented") +def test_sort(): + array = ak._v2.Array(["one", "two", "three", "four", "five", "six", "seven", "eight"]) + assert ak._v2.operations.structure.sort(array, axis=-1).tolist() == [ + "eight", + "five", + "four", + "one", + "seven", + "six", + "three", + "two", + ] + + array = ak._v2.Array( + [["twotwo", "two", "three"], ["four", "five"], [], ["six", "seven", "eight"]] + ) + assert ak._v2.operations.structure.sort(array, axis=-1).tolist() == [ + ["three", "two", "twotwo"], + ["five", "four"], + [], + ["eight", "seven", "six"], + ] + + array = ak._v2.Array( + [ + [["twotwo", "two"], ["three"]], + [["four", "five"]], + [], + [["six"], ["seven", "eight"]], + ] + ) + assert ak._v2.operations.structure.sort(array, axis=-1).tolist() == [ + [["two", "twotwo"], ["three"]], + [["five", "four"]], + [], + [["six"], ["eight", "seven"]], + ] diff --git a/tests/v2/test_0803-argsort-fix-type.py b/tests/v2/test_0803-argsort-fix-type.py new file mode 100644 index 0000000000..8d7648cb2a --- /dev/null +++ b/tests/v2/test_0803-argsort-fix-type.py @@ -0,0 +1,26 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE + + +import pytest # noqa: F401 +import numpy as np # noqa: F401 +import awkward as ak # noqa: F401 + + +def test_argsort(): + array = ak._v2.Array([[1.1, 2.2], [3.3, 3.1]]) + assert ak._v2.operations.structure.argsort(array).tolist() == [[0, 1], [1, 0]] + assert str(ak._v2.operations.describe.type(ak._v2.operations.structure.argsort(array))) == "2 * var * int64" + + empty_array = ak._v2.Array([[], []]) + assert ak._v2.operations.structure.argsort(empty_array).tolist() == [[], []] + assert str(ak._v2.operations.describe.type(ak._v2.operations.structure.argsort(empty_array))) == "2 * var * int64" + + select_array = array[array > 5] + assert select_array.tolist() == [[], []] + assert str(ak._v2.operations.describe.type(select_array)) == "2 * var * float64" + + assert ak._v2.operations.structure.argsort(select_array).tolist() == [[], []] + assert str(ak._v2.operations.describe.type(ak._v2.operations.structure.argsort(select_array))) == "2 * var * int64" + + assert ak._v2.operations.structure.argsort(array[array > 5]).tolist() == [[], []] + assert str(ak._v2.operations.describe.type(ak._v2.operations.structure.argsort(array[array > 5]))) == "2 * var * int64" diff --git a/tests/v2/test_0850-argsort-mask-array.py b/tests/v2/test_0850-argsort-mask-array.py new file mode 100644 index 0000000000..121fdf4619 --- /dev/null +++ b/tests/v2/test_0850-argsort-mask-array.py @@ -0,0 +1,22 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE + + +import pytest # noqa: F401 +import numpy as np # noqa: F401 +import awkward as ak # noqa: F401 + + +def test(): + array = ak._v2.Array([[0, 1, 2, 3], [3, 3, 3, 2, 1]]) + is_valid = array != 3 + + assert ak._v2.operations.structure.mask(array, is_valid).tolist() == [[0, 1, 2, None], [None, None, None, 2, 1]] + + assert ak._v2.operations.structure.mask(array, is_valid).layout.sort().tolist() == [ + [0, 1, 2, None], + [1, 2, None, None, None], + ] + assert ak._v2.operations.structure.argsort(ak._v2.operations.structure.mask(array, is_valid)).tolist() == [ + [0, 1, 2, 3], + [4, 3, 0, 1, 2], + ] diff --git a/tests/v2/test_0945-argsort-sort-nan-array.py b/tests/v2/test_0945-argsort-sort-nan-array.py new file mode 100644 index 0000000000..b14fbb8d5a --- /dev/null +++ b/tests/v2/test_0945-argsort-sort-nan-array.py @@ -0,0 +1,66 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE + + +import pytest # noqa: F401 +import numpy as np # noqa: F401 +import awkward as ak # noqa: F401 + +to_list = ak._v2.operations.convert.to_list + + +def test_nan(): + array = ak._v2.Array([1, 2, np.nan, 3, 0, np.nan]) + + assert ak._v2.operations.structure.argsort(array).tolist() == [ + 2, + 5, + 4, + 0, + 1, + 3, + ] + # Note, `nan` comparison with `nan` returns False + assert str(to_list(array.layout.sort())) == "[nan, nan, 0.0, 1.0, 2.0, 3.0]" + + +def test_bool(): + array = ak._v2.Array([True, False, False, True, True, True]) + + assert ak._v2.operations.structure.argsort(array).tolist() == [1, 2, 0, 3, 4, 5] + assert to_list(array.layout.sort()) == [False, False, True, True, True, True] + + +def test_argsort(): + array = ak._v2.Array([1, 2, None, 3, 0, None]) + assert ak._v2.operations.structure.argsort(array).tolist() == [4, 0, 1, 3, 2, 5] + assert array[ak._v2.operations.structure.argsort(array)].tolist() == [ + 0, + 1, + 2, + 3, + None, + None, + ] + + +def test_argsort_2d(): + array = ak._v2.Array([[1, 2, None, 3, 0, None], [1, 2, None, 3, 0, None]]) + assert ak._v2.operations.structure.argsort(array).tolist() == [[4, 0, 1, 3, 2, 5], [4, 0, 1, 3, 2, 5]] + assert array[ak._v2.operations.structure.argsort(array)].tolist() == [ + [ + 0, + 1, + 2, + 3, + None, + None, + ], + [ + 0, + 1, + 2, + 3, + None, + None, + ], + ] From b41f399d784a831214353ce4684d0bcfb5822f49 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 21 Feb 2022 17:43:49 +0000 Subject: [PATCH 2/3] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../test_0339-highlevel-sorting-function.py | 8 ++++- ...test_0736-implement-argsort-for-strings.py | 31 +++++++++++++++--- tests/v2/test_0803-argsort-fix-type.py | 32 ++++++++++++++++--- tests/v2/test_0850-argsort-mask-array.py | 9 ++++-- tests/v2/test_0945-argsort-sort-nan-array.py | 5 ++- 5 files changed, 72 insertions(+), 13 deletions(-) diff --git a/tests/v2/test_0339-highlevel-sorting-function.py b/tests/v2/test_0339-highlevel-sorting-function.py index ccb1906d13..d430a512f1 100644 --- a/tests/v2/test_0339-highlevel-sorting-function.py +++ b/tests/v2/test_0339-highlevel-sorting-function.py @@ -7,10 +7,16 @@ to_list = ak._v2.operations.convert.to_list + @pytest.mark.skip(reason="FIXME: ak._v2.operations.structure.sort not implemented") def test_sort(): data = ak._v2.Array([[7, 5, 7], [], [2], [8, 2]]) - assert ak.to_list(ak._v2.operations.structure.sort(data)) == [[5, 7, 7], [], [2], [2, 8]] + assert ak.to_list(ak._v2.operations.structure.sort(data)) == [ + [5, 7, 7], + [], + [2], + [2, 8], + ] def test_argsort(): diff --git a/tests/v2/test_0736-implement-argsort-for-strings.py b/tests/v2/test_0736-implement-argsort-for-strings.py index 48a85f776d..3c9a6d752d 100644 --- a/tests/v2/test_0736-implement-argsort-for-strings.py +++ b/tests/v2/test_0736-implement-argsort-for-strings.py @@ -7,18 +7,36 @@ def test_but_first_fix_sort(): - assert ak._v2.operations.describe.is_valid(ak._v2.Array(["one", "two", "three"]).layout.sort(axis=-1)) + assert ak._v2.operations.describe.is_valid( + ak._v2.Array(["one", "two", "three"]).layout.sort(axis=-1) + ) @pytest.mark.skip(reason="FIXME: issue in ak._v2.operations.structure.argsort") def test_argsort(): - array = ak._v2.Array(["one", "two", "three", "four", "five", "six", "seven", "eight"]) - assert ak._v2.operations.structure.argsort(array, axis=-1).tolist() == [7, 4, 3, 0, 6, 5, 2, 1] + array = ak._v2.Array( + ["one", "two", "three", "four", "five", "six", "seven", "eight"] + ) + assert ak._v2.operations.structure.argsort(array, axis=-1).tolist() == [ + 7, + 4, + 3, + 0, + 6, + 5, + 2, + 1, + ] array = ak._v2.Array( [["twotwo", "two", "three"], ["four", "five"], [], ["six", "seven", "eight"]] ) - assert ak._v2.operations.structure.argsort(array, axis=-1).tolist() == [[2, 1, 0], [1, 0], [], [2, 1, 0]] + assert ak._v2.operations.structure.argsort(array, axis=-1).tolist() == [ + [2, 1, 0], + [1, 0], + [], + [2, 1, 0], + ] array = ak._v2.Array( [ @@ -35,9 +53,12 @@ def test_argsort(): [[0], [1, 0]], ] + @pytest.mark.skip(reason="FIXME: ak._v2.operations.structure.sort not implemented") def test_sort(): - array = ak._v2.Array(["one", "two", "three", "four", "five", "six", "seven", "eight"]) + array = ak._v2.Array( + ["one", "two", "three", "four", "five", "six", "seven", "eight"] + ) assert ak._v2.operations.structure.sort(array, axis=-1).tolist() == [ "eight", "five", diff --git a/tests/v2/test_0803-argsort-fix-type.py b/tests/v2/test_0803-argsort-fix-type.py index 8d7648cb2a..e81b92d926 100644 --- a/tests/v2/test_0803-argsort-fix-type.py +++ b/tests/v2/test_0803-argsort-fix-type.py @@ -9,18 +9,42 @@ def test_argsort(): array = ak._v2.Array([[1.1, 2.2], [3.3, 3.1]]) assert ak._v2.operations.structure.argsort(array).tolist() == [[0, 1], [1, 0]] - assert str(ak._v2.operations.describe.type(ak._v2.operations.structure.argsort(array))) == "2 * var * int64" + assert ( + str(ak._v2.operations.describe.type(ak._v2.operations.structure.argsort(array))) + == "2 * var * int64" + ) empty_array = ak._v2.Array([[], []]) assert ak._v2.operations.structure.argsort(empty_array).tolist() == [[], []] - assert str(ak._v2.operations.describe.type(ak._v2.operations.structure.argsort(empty_array))) == "2 * var * int64" + assert ( + str( + ak._v2.operations.describe.type( + ak._v2.operations.structure.argsort(empty_array) + ) + ) + == "2 * var * int64" + ) select_array = array[array > 5] assert select_array.tolist() == [[], []] assert str(ak._v2.operations.describe.type(select_array)) == "2 * var * float64" assert ak._v2.operations.structure.argsort(select_array).tolist() == [[], []] - assert str(ak._v2.operations.describe.type(ak._v2.operations.structure.argsort(select_array))) == "2 * var * int64" + assert ( + str( + ak._v2.operations.describe.type( + ak._v2.operations.structure.argsort(select_array) + ) + ) + == "2 * var * int64" + ) assert ak._v2.operations.structure.argsort(array[array > 5]).tolist() == [[], []] - assert str(ak._v2.operations.describe.type(ak._v2.operations.structure.argsort(array[array > 5]))) == "2 * var * int64" + assert ( + str( + ak._v2.operations.describe.type( + ak._v2.operations.structure.argsort(array[array > 5]) + ) + ) + == "2 * var * int64" + ) diff --git a/tests/v2/test_0850-argsort-mask-array.py b/tests/v2/test_0850-argsort-mask-array.py index 121fdf4619..6298b41c8b 100644 --- a/tests/v2/test_0850-argsort-mask-array.py +++ b/tests/v2/test_0850-argsort-mask-array.py @@ -10,13 +10,18 @@ def test(): array = ak._v2.Array([[0, 1, 2, 3], [3, 3, 3, 2, 1]]) is_valid = array != 3 - assert ak._v2.operations.structure.mask(array, is_valid).tolist() == [[0, 1, 2, None], [None, None, None, 2, 1]] + assert ak._v2.operations.structure.mask(array, is_valid).tolist() == [ + [0, 1, 2, None], + [None, None, None, 2, 1], + ] assert ak._v2.operations.structure.mask(array, is_valid).layout.sort().tolist() == [ [0, 1, 2, None], [1, 2, None, None, None], ] - assert ak._v2.operations.structure.argsort(ak._v2.operations.structure.mask(array, is_valid)).tolist() == [ + assert ak._v2.operations.structure.argsort( + ak._v2.operations.structure.mask(array, is_valid) + ).tolist() == [ [0, 1, 2, 3], [4, 3, 0, 1, 2], ] diff --git a/tests/v2/test_0945-argsort-sort-nan-array.py b/tests/v2/test_0945-argsort-sort-nan-array.py index b14fbb8d5a..1fb2ff4ce0 100644 --- a/tests/v2/test_0945-argsort-sort-nan-array.py +++ b/tests/v2/test_0945-argsort-sort-nan-array.py @@ -45,7 +45,10 @@ def test_argsort(): def test_argsort_2d(): array = ak._v2.Array([[1, 2, None, 3, 0, None], [1, 2, None, 3, 0, None]]) - assert ak._v2.operations.structure.argsort(array).tolist() == [[4, 0, 1, 3, 2, 5], [4, 0, 1, 3, 2, 5]] + assert ak._v2.operations.structure.argsort(array).tolist() == [ + [4, 0, 1, 3, 2, 5], + [4, 0, 1, 3, 2, 5], + ] assert array[ak._v2.operations.structure.argsort(array)].tolist() == [ [ 0, From a29bd903d682217b14ebf77598a1f21cfd58dde2 Mon Sep 17 00:00:00 2001 From: ioanaif Date: Tue, 22 Feb 2022 00:46:19 +0200 Subject: [PATCH 3/3] Fixed issue in argsort --- src/awkward/_v2/contents/listoffsetarray.py | 2 +- .../test_0339-highlevel-sorting-function.py | 8 ++++- ...test_0736-implement-argsort-for-strings.py | 32 +++++++++++++++---- tests/v2/test_0803-argsort-fix-type.py | 32 ++++++++++++++++--- tests/v2/test_0850-argsort-mask-array.py | 9 ++++-- tests/v2/test_0945-argsort-sort-nan-array.py | 5 ++- 6 files changed, 73 insertions(+), 15 deletions(-) diff --git a/src/awkward/_v2/contents/listoffsetarray.py b/src/awkward/_v2/contents/listoffsetarray.py index 290d186fc1..420e1d05ec 100644 --- a/src/awkward/_v2/contents/listoffsetarray.py +++ b/src/awkward/_v2/contents/listoffsetarray.py @@ -1084,7 +1084,7 @@ def _argsort_next( self_stops.data, stable, ascending, - False, + True, ) ) return ak._v2.contents.NumpyArray(nextcarry, None, None, self._nplike) diff --git a/tests/v2/test_0339-highlevel-sorting-function.py b/tests/v2/test_0339-highlevel-sorting-function.py index ccb1906d13..d430a512f1 100644 --- a/tests/v2/test_0339-highlevel-sorting-function.py +++ b/tests/v2/test_0339-highlevel-sorting-function.py @@ -7,10 +7,16 @@ to_list = ak._v2.operations.convert.to_list + @pytest.mark.skip(reason="FIXME: ak._v2.operations.structure.sort not implemented") def test_sort(): data = ak._v2.Array([[7, 5, 7], [], [2], [8, 2]]) - assert ak.to_list(ak._v2.operations.structure.sort(data)) == [[5, 7, 7], [], [2], [2, 8]] + assert ak.to_list(ak._v2.operations.structure.sort(data)) == [ + [5, 7, 7], + [], + [2], + [2, 8], + ] def test_argsort(): diff --git a/tests/v2/test_0736-implement-argsort-for-strings.py b/tests/v2/test_0736-implement-argsort-for-strings.py index 48a85f776d..c6afb43971 100644 --- a/tests/v2/test_0736-implement-argsort-for-strings.py +++ b/tests/v2/test_0736-implement-argsort-for-strings.py @@ -7,18 +7,35 @@ def test_but_first_fix_sort(): - assert ak._v2.operations.describe.is_valid(ak._v2.Array(["one", "two", "three"]).layout.sort(axis=-1)) + assert ak._v2.operations.describe.is_valid( + ak._v2.Array(["one", "two", "three"]).layout.sort(axis=-1) + ) -@pytest.mark.skip(reason="FIXME: issue in ak._v2.operations.structure.argsort") def test_argsort(): - array = ak._v2.Array(["one", "two", "three", "four", "five", "six", "seven", "eight"]) - assert ak._v2.operations.structure.argsort(array, axis=-1).tolist() == [7, 4, 3, 0, 6, 5, 2, 1] + array = ak._v2.Array( + ["one", "two", "three", "four", "five", "six", "seven", "eight"] + ) + assert ak._v2.operations.structure.argsort(array, axis=-1).tolist() == [ + 7, + 4, + 3, + 0, + 6, + 5, + 2, + 1, + ] array = ak._v2.Array( [["twotwo", "two", "three"], ["four", "five"], [], ["six", "seven", "eight"]] ) - assert ak._v2.operations.structure.argsort(array, axis=-1).tolist() == [[2, 1, 0], [1, 0], [], [2, 1, 0]] + assert ak._v2.operations.structure.argsort(array, axis=-1).tolist() == [ + [2, 1, 0], + [1, 0], + [], + [2, 1, 0], + ] array = ak._v2.Array( [ @@ -35,9 +52,12 @@ def test_argsort(): [[0], [1, 0]], ] + @pytest.mark.skip(reason="FIXME: ak._v2.operations.structure.sort not implemented") def test_sort(): - array = ak._v2.Array(["one", "two", "three", "four", "five", "six", "seven", "eight"]) + array = ak._v2.Array( + ["one", "two", "three", "four", "five", "six", "seven", "eight"] + ) assert ak._v2.operations.structure.sort(array, axis=-1).tolist() == [ "eight", "five", diff --git a/tests/v2/test_0803-argsort-fix-type.py b/tests/v2/test_0803-argsort-fix-type.py index 8d7648cb2a..e81b92d926 100644 --- a/tests/v2/test_0803-argsort-fix-type.py +++ b/tests/v2/test_0803-argsort-fix-type.py @@ -9,18 +9,42 @@ def test_argsort(): array = ak._v2.Array([[1.1, 2.2], [3.3, 3.1]]) assert ak._v2.operations.structure.argsort(array).tolist() == [[0, 1], [1, 0]] - assert str(ak._v2.operations.describe.type(ak._v2.operations.structure.argsort(array))) == "2 * var * int64" + assert ( + str(ak._v2.operations.describe.type(ak._v2.operations.structure.argsort(array))) + == "2 * var * int64" + ) empty_array = ak._v2.Array([[], []]) assert ak._v2.operations.structure.argsort(empty_array).tolist() == [[], []] - assert str(ak._v2.operations.describe.type(ak._v2.operations.structure.argsort(empty_array))) == "2 * var * int64" + assert ( + str( + ak._v2.operations.describe.type( + ak._v2.operations.structure.argsort(empty_array) + ) + ) + == "2 * var * int64" + ) select_array = array[array > 5] assert select_array.tolist() == [[], []] assert str(ak._v2.operations.describe.type(select_array)) == "2 * var * float64" assert ak._v2.operations.structure.argsort(select_array).tolist() == [[], []] - assert str(ak._v2.operations.describe.type(ak._v2.operations.structure.argsort(select_array))) == "2 * var * int64" + assert ( + str( + ak._v2.operations.describe.type( + ak._v2.operations.structure.argsort(select_array) + ) + ) + == "2 * var * int64" + ) assert ak._v2.operations.structure.argsort(array[array > 5]).tolist() == [[], []] - assert str(ak._v2.operations.describe.type(ak._v2.operations.structure.argsort(array[array > 5]))) == "2 * var * int64" + assert ( + str( + ak._v2.operations.describe.type( + ak._v2.operations.structure.argsort(array[array > 5]) + ) + ) + == "2 * var * int64" + ) diff --git a/tests/v2/test_0850-argsort-mask-array.py b/tests/v2/test_0850-argsort-mask-array.py index 121fdf4619..6298b41c8b 100644 --- a/tests/v2/test_0850-argsort-mask-array.py +++ b/tests/v2/test_0850-argsort-mask-array.py @@ -10,13 +10,18 @@ def test(): array = ak._v2.Array([[0, 1, 2, 3], [3, 3, 3, 2, 1]]) is_valid = array != 3 - assert ak._v2.operations.structure.mask(array, is_valid).tolist() == [[0, 1, 2, None], [None, None, None, 2, 1]] + assert ak._v2.operations.structure.mask(array, is_valid).tolist() == [ + [0, 1, 2, None], + [None, None, None, 2, 1], + ] assert ak._v2.operations.structure.mask(array, is_valid).layout.sort().tolist() == [ [0, 1, 2, None], [1, 2, None, None, None], ] - assert ak._v2.operations.structure.argsort(ak._v2.operations.structure.mask(array, is_valid)).tolist() == [ + assert ak._v2.operations.structure.argsort( + ak._v2.operations.structure.mask(array, is_valid) + ).tolist() == [ [0, 1, 2, 3], [4, 3, 0, 1, 2], ] diff --git a/tests/v2/test_0945-argsort-sort-nan-array.py b/tests/v2/test_0945-argsort-sort-nan-array.py index b14fbb8d5a..1fb2ff4ce0 100644 --- a/tests/v2/test_0945-argsort-sort-nan-array.py +++ b/tests/v2/test_0945-argsort-sort-nan-array.py @@ -45,7 +45,10 @@ def test_argsort(): def test_argsort_2d(): array = ak._v2.Array([[1, 2, None, 3, 0, None], [1, 2, None, 3, 0, None]]) - assert ak._v2.operations.structure.argsort(array).tolist() == [[4, 0, 1, 3, 2, 5], [4, 0, 1, 3, 2, 5]] + assert ak._v2.operations.structure.argsort(array).tolist() == [ + [4, 0, 1, 3, 2, 5], + [4, 0, 1, 3, 2, 5], + ] assert array[ak._v2.operations.structure.argsort(array)].tolist() == [ [ 0,