Skip to content

Commit

Permalink
C++ refactoring: ak.ravel (#1222)
Browse files Browse the repository at this point in the history
* ak.ravel + testing

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
ioanaif and pre-commit-ci[bot] authored Jan 13, 2022
1 parent 06e1ca8 commit c471a04
Show file tree
Hide file tree
Showing 3 changed files with 137 additions and 51 deletions.
2 changes: 1 addition & 1 deletion pybind11
Submodule pybind11 updated 182 files
98 changes: 48 additions & 50 deletions src/awkward/_v2/operations/structure/ak_ravel.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,54 +9,52 @@

# @ak._v2._connect.numpy.implements("ravel")
def ravel(array, highlevel=True, behavior=None):
raise NotImplementedError


# """
# Args:
# array: Data containing nested lists to flatten
# highlevel (bool): If True, return an #ak.Array; otherwise, return
# a low-level #ak.layout.Content subclass.
# behavior (None or dict): Custom #ak.behavior for the output array, if
# high-level.

# Returns an array with all level of nesting removed by erasing the
# boundaries between consecutive lists.

# This is the equivalent of NumPy's `np.ravel` for Awkward Arrays.

# Consider the following doubly nested `array`.

# ak.Array([[
# [1.1, 2.2, 3.3],
# [],
# [4.4, 5.5],
# [6.6]],
# [],
# [
# [7.7],
# [8.8, 9.9]
# ]])

# Ravelling the array produces a flat array

# >>> print(ak.ravel(array))
# [1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]

# Missing values are eliminated by flattening: there is no distinction
# between an empty list and a value of None at the level of flattening.
# """
# layout = ak._v2.operations.convert.to_layout(
# array, allow_record=False, allow_other=False
# )
# nplike = ak.nplike.of(layout)

# out = ak._v2._util.completely_flatten(layout)
# assert isinstance(out, tuple) and all(isinstance(x, np.ndarray) for x in out)

# if any(isinstance(x, nplike.ma.MaskedArray) for x in out):
# out = ak._v2.contents.NumpyArray(nplike.ma.concatenate(out))
# else:
# out = ak._v2.contents.NumpyArray(nplike.concatenate(out))

# return ak._v2._util.maybe_wrap_like(out, array, behavior, highlevel)
"""
Args:
array: Data containing nested lists to flatten
highlevel (bool): If True, return an #ak.Array; otherwise, return
a low-level #ak.layout.Content subclass.
behavior (None or dict): Custom #ak.behavior for the output array, if
high-level.
Returns an array with all level of nesting removed by erasing the
boundaries between consecutive lists.
This is the equivalent of NumPy's `np.ravel` for Awkward Arrays.
Consider the following doubly nested `array`.
ak.Array([[
[1.1, 2.2, 3.3],
[],
[4.4, 5.5],
[6.6]],
[],
[
[7.7],
[8.8, 9.9]
]])
Ravelling the array produces a flat array
>>> print(ak.ravel(array))
[1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]
Missing values are eliminated by flattening: there is no distinction
between an empty list and a value of None at the level of flattening.
"""
layout = ak._v2.operations.convert.to_layout(
array, allow_record=False, allow_other=False
)
nplike = ak.nplike.of(layout)

out = (layout.completely_flatten(flatten_records=True),)
assert isinstance(out, tuple) and all(isinstance(x, np.ndarray) for x in out)

if any(isinstance(x, nplike.ma.MaskedArray) for x in out):
out = ak._v2.contents.NumpyArray(nplike.ma.concatenate(out))
else:
out = ak._v2.contents.NumpyArray(nplike.concatenate(out))

return ak._v2._util.wrap(out, array, behavior, highlevel)
88 changes: 88 additions & 0 deletions tests/v2/test_0984-ravel.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE

from __future__ import absolute_import

import pytest # noqa: F401
import numpy as np # noqa: F401
import awkward as ak # noqa: F401

to_list = ak._v2.operations.convert.to_list

content = ak._v2.contents.NumpyArray(np.array([0, 1, 2, 3, 4, 3, 6, 5, 2, 2]))


def test_one_level():
layout = ak._v2.contents.ListOffsetArray(
ak._v2.index.Index64(np.array([0, 3, 6, 6, 8, 10], dtype=np.int64)), content
)

# Test that all one level of nesting is removed
assert to_list(ak._v2.operations.structure.ravel(layout)) == [
0,
1,
2,
3,
4,
3,
6,
5,
2,
2,
]


def test_two_levels():
inner = ak._v2.contents.ListOffsetArray(
ak._v2.index.Index64(np.array([0, 3, 6, 6, 8, 10], dtype=np.int64)), content
)
layout = ak._v2.contents.ListOffsetArray(
ak._v2.index.Index64(np.array([0, 2, 4, 5], dtype=np.int64)), inner
)

# Test that all one level of nesting is removed
assert to_list(ak._v2.operations.structure.ravel(layout)) == [
0,
1,
2,
3,
4,
3,
6,
5,
2,
2,
]


def test_record():
x = ak._v2.contents.ListOffsetArray(
ak._v2.index.Index64(np.array([0, 3, 5], dtype=np.int64)), content
)
y = ak._v2.contents.ListOffsetArray(
ak._v2.index.Index64(np.array([5, 7, 10], dtype=np.int64)), content
)
layout = ak._v2.contents.RecordArray((x, y), ("x", "y"))
assert to_list(ak._v2.operations.structure.ravel(layout)) == [
0,
1,
2,
3,
4,
3,
6,
5,
2,
2,
]


def test_option():
inner = ak._v2.contents.ListOffsetArray(
ak._v2.index.Index64(np.array([0, 3, 6, 6, 8, 10], dtype=np.int64)), content
)

# Test that Nones are omitted
layout = ak._v2.contents.IndexedOptionArray(
ak._v2.index.Index64(np.array([0, -1, 2, -1, 4])), inner
)
assert to_list(ak._v2.operations.structure.ravel(layout)) == [0, 1, 2, 2, 2]

0 comments on commit c471a04

Please sign in to comment.