scikit-hep · jpivarski · Dec 16, 2022 · Nov 21, 2022 · Nov 21, 2022 · Nov 21, 2022
diff --git a/awkward-cpp/src/cpu-kernels/awkward_ListOffsetArray_drop_none_indexes.cpp b/awkward-cpp/src/cpu-kernels/awkward_ListOffsetArray_drop_none_indexes.cpp
@@ -0,0 +1,57 @@
+// BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
+
+#define FILENAME(line) FILENAME_FOR_EXCEPTIONS_C("src/cpu-kernels/awkward_ListOffsetArray_drop_none_indexes.cpp", line)
+
+#include "awkward/kernels.h"
+
+template <typename T, typename C>
+ERROR awkward_ListOffsetArray_drop_none_indexes(
+  T* tooffsets,
+  const C* noneindexes,
+  const T* fromoffsets,
+  int64_t length_offsets,
+  int64_t length_indexes) {
+  T nr_of_nones = 0;
+  int64_t offset1 = 0;
+  int64_t offset2 = 0;
+
+  for (int64_t i = 0; i < length_offsets; i++) {
+    offset2 = fromoffsets[i];
+    for (int j = offset1; j < offset2; j++) {
+        if (noneindexes[j] < 0) {
+            nr_of_nones++;
+        }
+    }
+    tooffsets[i] = fromoffsets[i] - nr_of_nones;
+    offset1 = offset2;
+  }
+
+  return success();
+}
+ERROR awkward_ListOffsetArray_drop_none_indexes_64(
+  int64_t* tooffsets,
+  const int64_t* noneindexes,
+  const int64_t* fromoffsets,
+  int64_t length_offsets,
+  int64_t length_indexes) {
+  return awkward_ListOffsetArray_drop_none_indexes<int64_t, int64_t>(
+    tooffsets,
+    noneindexes,
+    fromoffsets,
+    length_offsets,
+    length_indexes);
+}
+
+ERROR awkward_ListOffsetArray_drop_none_indexes_32(
+  int64_t* tooffsets,
+  const int32_t* noneindexes,
+  const int64_t* fromoffsets,
+  int64_t length_offsets,
+  int64_t length_indexes) {
+  return awkward_ListOffsetArray_drop_none_indexes<int64_t, int32_t>(
+    tooffsets,
+    noneindexes,
+    fromoffsets,
+    length_offsets,
+    length_indexes);
+}
diff --git a/kernel-specification.yml b/kernel-specification.yml
@@ -2236,6 +2236,36 @@ kernels:
     automatic-tests: true
     manual-tests: []
 
+  - name: awkward_ListOffsetArray_drop_none_indexes
+    specializations:
+      - name: awkward_ListOffsetArray_drop_none_indexes_32
+        args:
+          - {name: tooffsets, type: "List[int64_t]", dir: out}
+          - {name: noneindexes, type: "Const[List[int32_t]]", dir: in, role: IndexedArray-index}
+          - {name: fromoffsets, type: "Const[List[int64_t]]", dir: in, role: ListOffsetArray-offsets}
+          - {name: length_offsets, type: "int64_t", dir: in, role: default}
+          - {name: length_indexes, type: "int64_t", dir: in, role: default}
+      - name: awkward_ListOffsetArray_drop_none_indexes_64
+        args:
+          - {name: tooffsets, type: "List[int64_t]", dir: out}
+          - {name: noneindexes, type: "Const[List[int64_t]]", dir: in, role: IndexedArray-index}
+          - {name: fromoffsets, type: "Const[List[int64_t]]", dir: in, role: ListOffsetArray-offsets}
+          - {name: length_offsets, type: "int64_t", dir: in, role: default}
+          - {name: length_indexes, type: "int64_t", dir: in, role: default}
+    description: null
+    definition: |
+      def awkward_ListOffsetArray_drop_none_indexes(tooffsets, noneindexes, fromoffsets, length_offsets, length_indexes):
+          nr_of_nones, offset1, offset2 = 0, 0, 0
+          for i in range(length_offsets):
+            offset2 = fromoffsets[i]
+            for j in range(offset1, offset2):
+              if (noneindexes[j] < 0):
+                nr_of_nones+=1
+            tooffsets[i] = fromoffsets[i] - nr_of_nones
+            offset1 = offset2
+    automatic-tests: true
+    manual-tests: []
+
   - name: awkward_ListOffsetArray_flatten_offsets
     specializations:
       - name: awkward_ListOffsetArray32_flatten_offsets_64

diff --git a/src/awkward/contents/bitmaskedarray.py b/src/awkward/contents/bitmaskedarray.py
@@ -597,6 +597,9 @@ def _completely_flatten(self, nplike, options):
         else:
             return [self.simplify_optiontype()]
 
+    def _drop_none(self):
+        return self.toByteMaskedArray()._drop_none()
+
     def _recursively_apply(
         self, action, behavior, depth, depth_context, lateral_context, options
     ):

diff --git a/src/awkward/contents/bytemaskedarray.py b/src/awkward/contents/bytemaskedarray.py
@@ -995,6 +995,11 @@ def _completely_flatten(self, nplike, options):
         else:
             return [self.simplify_optiontype()]
 
+    def _drop_none(self):
+        numnull = ak.index.Index64.empty(1, self._nplike)
+        _, outindex = self._nextcarry_outindex(numnull)
+        return (outindex, self.project())
+
     def _recursively_apply(
         self, action, behavior, depth, depth_context, lateral_context, options
     ):

diff --git a/src/awkward/contents/content.py b/src/awkward/contents/content.py
@@ -1573,6 +1573,9 @@ def _completely_flatten(
     ) -> list:
         raise ak._util.error(NotImplementedError)
 
+    def drop_none(self):
+        return self._drop_none()
+
     def recursively_apply(
         self,
         action: ActionType,

diff --git a/src/awkward/contents/indexedoptionarray.py b/src/awkward/contents/indexedoptionarray.py
@@ -1575,6 +1575,10 @@ def _completely_flatten(self, nplike, options):
         else:
             return [self.simplify_optiontype()]
 
+    def _drop_none(self):
+        _, _, outindex = self._nextcarry_outindex(self._nplike)
+        return (outindex, self.project())
+
     def _recursively_apply(
         self, action, behavior, depth, depth_context, lateral_context, options
     ):

diff --git a/src/awkward/contents/listarray.py b/src/awkward/contents/listarray.py
@@ -1377,6 +1377,9 @@ def _completely_flatten(self, nplike, options):
             flat = next.content[next.offsets[0] : next.offsets[-1]]
             return flat._completely_flatten(nplike, options)
 
+    def _drop_none(self):
+        return self.toListOffsetArray64()._drop_none()
+
     def _recursively_apply(
         self, action, behavior, depth, depth_context, lateral_context, options
     ):

diff --git a/src/awkward/contents/listoffsetarray.py b/src/awkward/contents/listoffsetarray.py
@@ -1975,6 +1975,35 @@ def _completely_flatten(self, nplike, options):
             flat = self._content[self._offsets[0] : self._offsets[-1]]
             return flat._completely_flatten(nplike, options)
 
+    def _drop_none(self):
+        if self._content.is_option:
+
+            index, new_content = self._content._drop_none()
+            new_offsets = ak.index.Index64.empty(self._offsets.length, self._nplike)
+
+            assert (
+                new_offsets.nplike is self._nplike
+                and self._offsets.nplike is self._nplike
+                and index.nplike is self._nplike
+            )
+            self._handle_error(
+                self._nplike[
+                    "awkward_ListOffsetArray_drop_none_indexes",
+                    new_offsets.dtype.type,
+                    index.dtype.type,
+                    self._offsets.dtype.type,
+                ](
+                    new_offsets.data,
+                    index.data,
+                    self._offsets.data,
+                    self._offsets.length,
+                    index.length,
+                )
+            )
+            return ak.contents.ListOffsetArray(new_offsets, new_content)
+        else:
+            return self
+
     def _recursively_apply(
         self, action, behavior, depth, depth_context, lateral_context, options
     ):

diff --git a/src/awkward/contents/regulararray.py b/src/awkward/contents/regulararray.py
@@ -1270,6 +1270,9 @@ def _completely_flatten(self, nplike, options):
             flat = self._content[: self._length * self._size]
             return flat._completely_flatten(nplike, options)
 
+    def _drop_none(self):
+        return self.toListOffsetArray64()._drop_none()
+
     def _recursively_apply(
         self, action, behavior, depth, depth_context, lateral_context, options
     ):

diff --git a/src/awkward/contents/unmaskedarray.py b/src/awkward/contents/unmaskedarray.py
@@ -520,6 +520,9 @@ def _completely_flatten(self, nplike, options):
         else:
             return [self.simplify_optiontype()]
 
+    def _drop_none(self):
+        return self.toByteMaskedArray()._drop_none()
+
     def _recursively_apply(
         self, action, behavior, depth, depth_context, lateral_context, options
     ):

diff --git a/src/awkward/operations/__init__.py b/src/awkward/operations/__init__.py
@@ -18,6 +18,7 @@
 from awkward.operations.ak_count import count
 from awkward.operations.ak_count_nonzero import count_nonzero
 from awkward.operations.ak_covar import covar
+from awkward.operations.ak_drop_none import drop_none
 from awkward.operations.ak_fields import fields
 from awkward.operations.ak_fill_none import fill_none
 from awkward.operations.ak_firsts import firsts

diff --git a/src/awkward/operations/ak_drop_none.py b/src/awkward/operations/ak_drop_none.py
@@ -0,0 +1,82 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
+
+import awkward as ak
+
+np = ak.nplikes.NumpyMetadata.instance()
+
+
+def drop_none(array, axis=None, highlevel=True, behavior=None):
+    """
+    Args:
+        array: Data in which to remove Nones.
+        axis (None or int): If None, the operation drops Nones at all levels of
+            nesting, returning an array of the same dimension, but without Nones.
+            Otherwise, it drops Nones at a specified depth.
+            The outermost dimension is `0`, followed by `1`, etc.,
+            and negative values count backward from the innermost: `-1` is the
+            innermost dimension, `-2` is the next level up, etc.
+        highlevel (bool): If True, return an #ak.Array; otherwise, return
+            a low-level #ak.contents.Content subclass.
+        behavior (None or dict): Custom #ak.behavior for the output array, if
+            high-level.
+
+    Removes missing values (None) from a given array.
+
+    For example, in the following `array`,
+
+       a = ak.Array([[[0]], [[None]], [[1], None], [[2, None]]])
+
+    The None value will be removed, resulting in
+
+        >>> ak.drop_none(a)
+        <Array [[[0]], [[]], [[1]], [[2]]] type='4 * var * var * int64'>
+
+    The default axis is None, however an axis can be specified:
+
+        >>> ak.drop_none(a,axis=1)
+        <Array [[[0]], [[None]], [[1]], [[2, None]]] type='4 * var * var * ?int64'>
+
+    """
+    with ak._errors.OperationErrorContext(
+        "ak.drop_none",
+        dict(array=array, axis=axis, highlevel=highlevel, behavior=behavior),
+    ):
+        return _impl(array, axis, highlevel, behavior)
+
+
+def _impl(array, axis, highlevel, behavior):
+    layout = ak.operations.to_layout(array, allow_record=False, allow_other=False)
+
+    if layout.is_numpy:
+        return array
+
+    def maybe_drop_none(layout):
+        if layout.is_list:
+            return layout.drop_none()
+        else:
+            return layout
+
+    if axis is None:
+        if layout.is_option:
+            return layout.project()
+
+        def action(layout, continuation, **kwargs):
+            return maybe_drop_none(continuation())
+
+    else:
+
+        def action(layout, depth, depth_context, **kwargs):
+            posaxis = layout.axis_wrap_if_negative(depth_context["posaxis"])
+
+            if posaxis == depth and layout.is_option:
+                return layout.project()
+            elif posaxis == depth and layout.is_list:
+                if layout.content.is_option:
+                    return layout.drop_none()
+
+            depth_context["posaxis"] = posaxis
+
+    depth_context = {"posaxis": axis}
+    out = layout.recursively_apply(action, behavior, depth_context)
+
+    return ak._util.wrap(out, behavior, highlevel)