Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add ak.drop_none() #1904

Merged
merged 25 commits into from
Dec 16, 2022
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
6a49e53
feat: add ak.drop_none()
ioanaif Nov 21, 2022
76e72c9
style: pre-commit fixes
pre-commit-ci[bot] Nov 21, 2022
899825c
Moved cpp kernel to awkward-cpp
ioanaif Nov 21, 2022
ae8006b
Fix renamed var names
ioanaif Nov 22, 2022
c671c46
IndexedOptionArray is not part of the roles dict, changed to IndexedA…
ioanaif Nov 22, 2022
d1761b6
Merge branch 'main' into ioanaif/add-drop-none-feature
jpivarski Nov 22, 2022
3fea24e
Add `_drop_none` to the Content interface/protocol.
jpivarski Nov 22, 2022
e13ecb5
style: pre-commit fixes
pre-commit-ci[bot] Nov 22, 2022
e72ebaa
fix: correct whitespace & add return annotation
agoose77 Nov 22, 2022
e02c796
Merge branch 'main' into ioanaif/add-drop-none-feature
ioanaif Nov 22, 2022
77fdba7
Fix method signature, fix highlevel issues
ioanaif Nov 22, 2022
4643a3d
style: pre-commit fixes
pre-commit-ci[bot] Nov 22, 2022
c351c96
Add max_axis limit for ak.drop_none
ioanaif Nov 23, 2022
f13325a
Fix recursion and offsets
ioanaif Dec 13, 2022
99726db
Merge branch 'main' into ioanaif/add-drop-none-feature
ioanaif Dec 13, 2022
e888c7a
Changes for refactorings
ioanaif Dec 13, 2022
b55fe5f
style: pre-commit fixes
pre-commit-ci[bot] Dec 13, 2022
ae2a84b
Updates for refactorings
ioanaif Dec 13, 2022
b87e15f
docs: include `drop_none` in rendered docs
agoose77 Dec 13, 2022
aa59da7
Remove unused vars
ioanaif Dec 14, 2022
345d804
style: pre-commit fixes
pre-commit-ci[bot] Dec 14, 2022
846ef18
Store none_indexes in queue to deal with multiple branches
ioanaif Dec 16, 2022
ea09638
Merge branch 'main' into ioanaif/add-drop-none-feature
ioanaif Dec 16, 2022
32880ee
Add the review examples as tests.
jpivarski Dec 16, 2022
b19bed8
This should resolve the last test failures: make them raise np.AxisEr…
jpivarski Dec 16, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
// BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE

#define FILENAME(line) FILENAME_FOR_EXCEPTIONS_C("src/cpu-kernels/awkward_ListOffsetArray_drop_none_indexes.cpp", line)

#include "awkward/kernels.h"

template <typename T, typename C>
ERROR awkward_ListOffsetArray_drop_none_indexes(
T* tooffsets,
const C* noneindexes,
const T* fromoffsets,
int64_t length_offsets,
int64_t length_indexes) {
T nr_of_nones = 0;
int64_t offset1 = 0;
int64_t offset2 = 0;

for (int64_t i = 0; i < length_offsets; i++) {
offset2 = fromoffsets[i];
for (int j = offset1; j < offset2; j++) {
if (noneindexes[j] < 0) {
nr_of_nones++;
}
}
tooffsets[i] = fromoffsets[i] - nr_of_nones;
offset1 = offset2;
}

return success();
}
ERROR awkward_ListOffsetArray_drop_none_indexes_64(
int64_t* tooffsets,
const int64_t* noneindexes,
const int64_t* fromoffsets,
int64_t length_offsets,
int64_t length_indexes) {
return awkward_ListOffsetArray_drop_none_indexes<int64_t, int64_t>(
tooffsets,
noneindexes,
fromoffsets,
length_offsets,
length_indexes);
}

ERROR awkward_ListOffsetArray_drop_none_indexes_32(
int64_t* tooffsets,
const int32_t* noneindexes,
const int64_t* fromoffsets,
int64_t length_offsets,
int64_t length_indexes) {
return awkward_ListOffsetArray_drop_none_indexes<int64_t, int32_t>(
tooffsets,
noneindexes,
fromoffsets,
length_offsets,
length_indexes);
}
30 changes: 30 additions & 0 deletions kernel-specification.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2236,6 +2236,36 @@ kernels:
automatic-tests: true
manual-tests: []

- name: awkward_ListOffsetArray_drop_none_indexes
specializations:
- name: awkward_ListOffsetArray_drop_none_indexes_32
args:
- {name: tooffsets, type: "List[int64_t]", dir: out}
- {name: noneindexes, type: "Const[List[int32_t]]", dir: in, role: IndexedArray-index}
- {name: fromoffsets, type: "Const[List[int64_t]]", dir: in, role: ListOffsetArray-offsets}
- {name: length_offsets, type: "int64_t", dir: in, role: default}
- {name: length_indexes, type: "int64_t", dir: in, role: default}
- name: awkward_ListOffsetArray_drop_none_indexes_64
args:
- {name: tooffsets, type: "List[int64_t]", dir: out}
- {name: noneindexes, type: "Const[List[int64_t]]", dir: in, role: IndexedArray-index}
- {name: fromoffsets, type: "Const[List[int64_t]]", dir: in, role: ListOffsetArray-offsets}
- {name: length_offsets, type: "int64_t", dir: in, role: default}
- {name: length_indexes, type: "int64_t", dir: in, role: default}
description: null
definition: |
def awkward_ListOffsetArray_drop_none_indexes(tooffsets, noneindexes, fromoffsets, length_offsets, length_indexes):
nr_of_nones, offset1, offset2 = 0, 0, 0
for i in range(length_offsets):
offset2 = fromoffsets[i]
for j in range(offset1, offset2):
if (noneindexes[j] < 0):
nr_of_nones+=1
tooffsets[i] = fromoffsets[i] - nr_of_nones
offset1 = offset2
automatic-tests: true
manual-tests: []

- name: awkward_ListOffsetArray_flatten_offsets
specializations:
- name: awkward_ListOffsetArray32_flatten_offsets_64
Expand Down
3 changes: 3 additions & 0 deletions src/awkward/contents/bitmaskedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -597,6 +597,9 @@ def _completely_flatten(self, nplike, options):
else:
return [self.simplify_optiontype()]

def _drop_none(self):
return self.toByteMaskedArray()._drop_none()

def _recursively_apply(
self, action, behavior, depth, depth_context, lateral_context, options
):
Expand Down
5 changes: 5 additions & 0 deletions src/awkward/contents/bytemaskedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -995,6 +995,11 @@ def _completely_flatten(self, nplike, options):
else:
return [self.simplify_optiontype()]

def _drop_none(self):
numnull = ak.index.Index64.empty(1, self._nplike)
_, outindex = self._nextcarry_outindex(numnull)
return (outindex, self.project())

def _recursively_apply(
self, action, behavior, depth, depth_context, lateral_context, options
):
Expand Down
3 changes: 3 additions & 0 deletions src/awkward/contents/content.py
Original file line number Diff line number Diff line change
Expand Up @@ -1573,6 +1573,9 @@ def _completely_flatten(
) -> list:
raise ak._util.error(NotImplementedError)

def drop_none(self):
return self._drop_none()

jpivarski marked this conversation as resolved.
Show resolved Hide resolved
def recursively_apply(
self,
action: ActionType,
Expand Down
4 changes: 4 additions & 0 deletions src/awkward/contents/indexedoptionarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1575,6 +1575,10 @@ def _completely_flatten(self, nplike, options):
else:
return [self.simplify_optiontype()]

def _drop_none(self):
_, _, outindex = self._nextcarry_outindex(self._nplike)
return (outindex, self.project())
ioanaif marked this conversation as resolved.
Show resolved Hide resolved

def _recursively_apply(
self, action, behavior, depth, depth_context, lateral_context, options
):
Expand Down
3 changes: 3 additions & 0 deletions src/awkward/contents/listarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1377,6 +1377,9 @@ def _completely_flatten(self, nplike, options):
flat = next.content[next.offsets[0] : next.offsets[-1]]
return flat._completely_flatten(nplike, options)

def _drop_none(self):
return self.toListOffsetArray64()._drop_none()

def _recursively_apply(
self, action, behavior, depth, depth_context, lateral_context, options
):
Expand Down
29 changes: 29 additions & 0 deletions src/awkward/contents/listoffsetarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1975,6 +1975,35 @@ def _completely_flatten(self, nplike, options):
flat = self._content[self._offsets[0] : self._offsets[-1]]
return flat._completely_flatten(nplike, options)

def _drop_none(self):
if self._content.is_option:
agoose77 marked this conversation as resolved.
Show resolved Hide resolved

index, new_content = self._content._drop_none()
new_offsets = ak.index.Index64.empty(self._offsets.length, self._nplike)

assert (
new_offsets.nplike is self._nplike
and self._offsets.nplike is self._nplike
and index.nplike is self._nplike
)
self._handle_error(
self._nplike[
"awkward_ListOffsetArray_drop_none_indexes",
new_offsets.dtype.type,
index.dtype.type,
self._offsets.dtype.type,
](
new_offsets.data,
index.data,
self._offsets.data,
self._offsets.length,
index.length,
)
)
return ak.contents.ListOffsetArray(new_offsets, new_content)
else:
return self

def _recursively_apply(
self, action, behavior, depth, depth_context, lateral_context, options
):
Expand Down
3 changes: 3 additions & 0 deletions src/awkward/contents/regulararray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1270,6 +1270,9 @@ def _completely_flatten(self, nplike, options):
flat = self._content[: self._length * self._size]
return flat._completely_flatten(nplike, options)

def _drop_none(self):
return self.toListOffsetArray64()._drop_none()

def _recursively_apply(
self, action, behavior, depth, depth_context, lateral_context, options
):
Expand Down
3 changes: 3 additions & 0 deletions src/awkward/contents/unmaskedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -520,6 +520,9 @@ def _completely_flatten(self, nplike, options):
else:
return [self.simplify_optiontype()]

def _drop_none(self):
return self.toByteMaskedArray()._drop_none()

def _recursively_apply(
self, action, behavior, depth, depth_context, lateral_context, options
):
Expand Down
1 change: 1 addition & 0 deletions src/awkward/operations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from awkward.operations.ak_count import count
from awkward.operations.ak_count_nonzero import count_nonzero
from awkward.operations.ak_covar import covar
from awkward.operations.ak_drop_none import drop_none
from awkward.operations.ak_fields import fields
from awkward.operations.ak_fill_none import fill_none
from awkward.operations.ak_firsts import firsts
Expand Down
82 changes: 82 additions & 0 deletions src/awkward/operations/ak_drop_none.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE

import awkward as ak

np = ak.nplikes.NumpyMetadata.instance()


def drop_none(array, axis=None, highlevel=True, behavior=None):
jpivarski marked this conversation as resolved.
Show resolved Hide resolved
"""
Args:
array: Data in which to remove Nones.
axis (None or int): If None, the operation drops Nones at all levels of
nesting, returning an array of the same dimension, but without Nones.
Otherwise, it drops Nones at a specified depth.
The outermost dimension is `0`, followed by `1`, etc.,
and negative values count backward from the innermost: `-1` is the
innermost dimension, `-2` is the next level up, etc.
highlevel (bool): If True, return an #ak.Array; otherwise, return
a low-level #ak.contents.Content subclass.
behavior (None or dict): Custom #ak.behavior for the output array, if
high-level.

Removes missing values (None) from a given array.

For example, in the following `array`,

a = ak.Array([[[0]], [[None]], [[1], None], [[2, None]]])

The None value will be removed, resulting in

>>> ak.drop_none(a)
<Array [[[0]], [[]], [[1]], [[2]]] type='4 * var * var * int64'>

The default axis is None, however an axis can be specified:

>>> ak.drop_none(a,axis=1)
<Array [[[0]], [[None]], [[1]], [[2, None]]] type='4 * var * var * ?int64'>

"""
with ak._errors.OperationErrorContext(
"ak.drop_none",
dict(array=array, axis=axis, highlevel=highlevel, behavior=behavior),
):
return _impl(array, axis, highlevel, behavior)


def _impl(array, axis, highlevel, behavior):
layout = ak.operations.to_layout(array, allow_record=False, allow_other=False)

if layout.is_numpy:
return array

def maybe_drop_none(layout):
if layout.is_list:
return layout.drop_none()
else:
return layout

if axis is None:
if layout.is_option:
return layout.project()

def action(layout, continuation, **kwargs):
return maybe_drop_none(continuation())

else:

def action(layout, depth, depth_context, **kwargs):
posaxis = layout.axis_wrap_if_negative(depth_context["posaxis"])

if posaxis == depth and layout.is_option:
return layout.project()
elif posaxis == depth and layout.is_list:
if layout.content.is_option:
return layout.drop_none()

depth_context["posaxis"] = posaxis

depth_context = {"posaxis": axis}
out = layout.recursively_apply(action, behavior, depth_context)
ioanaif marked this conversation as resolved.
Show resolved Hide resolved

return ak._util.wrap(out, behavior, highlevel)
ioanaif marked this conversation as resolved.
Show resolved Hide resolved
Loading