Skip to content

Commit

Permalink
refactor: remove to_arraylib (#2128)
Browse files Browse the repository at this point in the history
  • Loading branch information
agoose77 authored Jan 16, 2023
1 parent dacc54b commit 1e22929
Show file tree
Hide file tree
Showing 23 changed files with 188 additions and 217 deletions.
4 changes: 3 additions & 1 deletion src/awkward/_connect/rdataframe/from_rdataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,9 @@ def cpp_fill_function(depth):

if col == "awkward_index_":
contents_index = ak.index.Index64(
array.layout.to_numpy(allow_missing=True)
array.layout.to_backend_array(
allow_missing=True, backend=ak._backends.NumpyBackend.instance()
)
)
else:
contents[col] = array.layout
Expand Down
3 changes: 3 additions & 0 deletions src/awkward/_nplikes.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,9 @@ def datetime_as_string(self, *args, **kwargs):
def can_cast(self, *args, **kwargs):
return self._module.can_cast(*args, **kwargs)

def raw(self, array, nplike):
raise ak._errors.wrap_error(NotImplementedError)

@classmethod
def is_own_array(cls, obj) -> bool:
"""
Expand Down
23 changes: 21 additions & 2 deletions src/awkward/_reducers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
from __future__ import annotations

from abc import ABC, abstractmethod
from typing import Any as AnyType

import awkward as ak
Expand All @@ -12,11 +13,19 @@
DTypeLike = AnyType


class Reducer:
class Reducer(ABC):
name: str

# Does the output correspond to array positions?
needs_position: Final = False
@property
@abstractmethod
def needs_position(self) -> bool:
...

@property
@abstractmethod
def preferred_dtype(self) -> DTypeLike:
...

@classmethod
def highlevel_function(cls):
Expand Down Expand Up @@ -46,9 +55,11 @@ def maybe_other_type(cls, dtype: DTypeLike) -> DTypeLike:
type = np.float32
return type

@abstractmethod
def identity_for(self, dtype: DTypeLike | None):
raise ak._errors.wrap_error(NotImplementedError)

@abstractmethod
def apply(self, array, parents, outlength: int):
raise ak._errors.wrap_error(NotImplementedError)

Expand Down Expand Up @@ -158,6 +169,7 @@ def apply(self, array, parents, outlength):
class Count(Reducer):
name: Final = "count"
preferred_dtype: Final = np.int64
needs_position: Final = False

@classmethod
def return_dtype(cls, given_dtype):
Expand Down Expand Up @@ -186,6 +198,7 @@ def identity_for(self, dtype: np.dtype | None):
class CountNonzero(Reducer):
name: Final = "count_nonzero"
preferred_dtype: Final = np.float64
needs_position: Final = False

@classmethod
def return_dtype(cls, given_dtype):
Expand Down Expand Up @@ -236,6 +249,7 @@ def identity_for(self, dtype: np.dtype | None):
class Sum(Reducer):
name: Final = "sum"
preferred_dtype: Final = np.float64
needs_position: Final = False

def apply(self, array, parents, outlength):
assert isinstance(array, ak.contents.NumpyArray)
Expand Down Expand Up @@ -340,6 +354,7 @@ def identity_for(self, dtype: np.dtype | None):
class Prod(Reducer):
name: Final = "prod"
preferred_dtype: Final = np.int64
needs_position: Final = False

def apply(self, array, parents, outlength):
assert isinstance(array, ak.contents.NumpyArray)
Expand Down Expand Up @@ -426,6 +441,7 @@ def identity_for(self, dtype: np.dtype | None):
class Any(Reducer):
name: Final = "any"
preferred_dtype: Final = np.bool_
needs_position: Final = False

@classmethod
def return_dtype(cls, given_dtype):
Expand Down Expand Up @@ -476,6 +492,7 @@ def identity_for(self, dtype: DTypeLike | None) -> float:
class All(Reducer):
name: Final = "all"
preferred_dtype: Final = np.bool_
needs_position: Final = False

@classmethod
def return_dtype(cls, given_dtype):
Expand Down Expand Up @@ -526,6 +543,7 @@ def identity_for(self, dtype: DTypeLike | None) -> float:
class Min(Reducer):
name: Final = "min"
preferred_dtype: Final = np.float64
needs_position: Final = False

def __init__(self, initial: float | None):
self._initial = initial
Expand Down Expand Up @@ -626,6 +644,7 @@ def apply(self, array, parents, outlength):
class Max(Reducer):
name: Final = "max"
preferred_dtype: Final = np.float64
needs_position: Final = False

def __init__(self, initial):
self._initial = initial
Expand Down
112 changes: 0 additions & 112 deletions src/awkward/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@

import numpy
import packaging.version
from awkward_cpp.lib import _ext

import awkward as ak

Expand Down Expand Up @@ -738,117 +737,6 @@ def attach(x):
return ak._util.wrap(layout, behavior, highlevel)


def to_arraylib(module, array, allow_missing):
def _impl(array):
if isinstance(array, (bool, numbers.Number)):
return module.array(array)

elif isinstance(array, module.ndarray):
return array

elif isinstance(array, np.ndarray):
return module.asarray(array)

elif isinstance(array, ak.highlevel.Array):
return _impl(array.layout)

elif isinstance(array, ak.highlevel.Record):
raise ak._errors.wrap_error(
ValueError(f"{module.__name__} does not support record structures")
)

elif isinstance(array, ak.highlevel.ArrayBuilder):
return _impl(array.snapshot().layout)

elif isinstance(array, _ext.ArrayBuilder):
return _impl(array.snapshot())

elif ak.operations.parameters(array).get("__array__") in (
"bytestring",
"string",
):
raise ak._errors.wrap_error(
ValueError(f"{module.__name__} does not support arrays of strings")
)

elif isinstance(array, ak.contents.EmptyArray):
return module.array([])

elif isinstance(array, ak.contents.IndexedArray):
return _impl(array.project())

elif isinstance(array, ak.contents.UnionArray):
contents = [_impl(array.project(i)) for i in range(len(array.contents))]
out = module.concatenate(contents)

tags = module.asarray(array.tags)
for tag, content in enumerate(contents):
mask = tags == tag
if ak._nplikes.Jax.is_own_array(out):
out = out.at[mask].set(content)
else:
out[mask] = content
return out

elif isinstance(array, ak.contents.UnmaskedArray):
return _impl(array.content)

elif isinstance(array, ak.contents.IndexedOptionArray):
content = _impl(array.project())

mask0 = array.mask_as_bool(valid_when=False)
if mask0.any():
raise ak._errors.wrap_error(
ValueError(f"{module.__name__} does not support masked arrays")
)
else:
return content

elif isinstance(array, ak.contents.RegularArray):
out = _impl(array.content)
head, tail = out.shape[0], out.shape[1:]
shape = (head // array.size, array.size) + tail
return out[: shape[0] * array.size].reshape(shape)

elif isinstance(array, (ak.contents.ListArray, ak.contents.ListOffsetArray)):
return _impl(array.to_RegularArray())

elif isinstance(array, ak.contents.RecordArray):
raise ak._errors.wrap_error(
ValueError(f"{module.__name__} does not support record structures")
)

elif isinstance(array, ak.contents.NumpyArray):
return module.asarray(array.data)

elif isinstance(array, ak.contents.Content):
raise ak._errors.wrap_error(
AssertionError(f"unrecognized Content type: {type(array)}")
)

elif isinstance(array, Iterable):
return module.asarray(array)

else:
raise ak._errors.wrap_error(
ValueError(f"cannot convert {array} into {type(module.array([]))}")
)

if module.__name__ in ("jax.numpy", "cupy"):
return _impl(array)
elif module.__name__ == "numpy":
layout = ak.operations.to_layout(array, allow_record=True, allow_other=True)

if isinstance(layout, (ak.contents.Content, ak.record.Record)):
return layout.to_numpy(allow_missing=allow_missing)
else:
return module.asarray(array)
else:
raise ak._errors.wrap_error(
ValueError(f"{module.__name__} is not supported by to_arraylib")
)


def maybe_posaxis(layout, axis, depth):
if isinstance(layout, ak.record.Record):
if axis == 0:
Expand Down
6 changes: 3 additions & 3 deletions src/awkward/contents/bitmaskedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -575,8 +575,8 @@ def _to_arrow(self, pyarrow, mask_node, validbytes, length, options):
pyarrow, mask_node, validbytes, length, options
)

def _to_numpy(self, allow_missing):
return self.to_ByteMaskedArray()._to_numpy(allow_missing)
def _to_backend_array(self, allow_missing, backend):
return self.to_ByteMaskedArray()._to_backend_array(allow_missing, backend)

def _completely_flatten(self, backend, options):
branch, depth = self.branch_depth
Expand Down Expand Up @@ -697,7 +697,7 @@ def _to_list(self, behavior, json_conversions):

return out

def to_backend(self, backend: ak._backends.Backend) -> Self:
def _to_backend(self, backend: ak._backends.Backend) -> Self:
content = self._content.to_backend(backend)
mask = self._mask.to_nplike(backend.index_nplike)
return BitMaskedArray(
Expand Down
6 changes: 3 additions & 3 deletions src/awkward/contents/bytemaskedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -958,8 +958,8 @@ def _to_arrow(self, pyarrow, mask_node, validbytes, length, options):
options,
)

def _to_numpy(self, allow_missing):
return self.to_IndexedOptionArray64()._to_numpy(allow_missing)
def _to_backend_array(self, allow_missing, backend):
return self.to_IndexedOptionArray64()._to_backend_array(allow_missing, backend)

def _completely_flatten(self, backend, options):
branch, depth = self.branch_depth
Expand Down Expand Up @@ -1066,7 +1066,7 @@ def _to_list(self, behavior, json_conversions):

return out

def to_backend(self, backend: ak._backends.Backend) -> Self:
def _to_backend(self, backend: ak._backends.Backend) -> Self:
content = self._content.to_backend(backend)
mask = self._mask.to_nplike(backend.index_nplike)
return ByteMaskedArray(
Expand Down
29 changes: 26 additions & 3 deletions src/awkward/contents/content.py
Original file line number Diff line number Diff line change
Expand Up @@ -1019,9 +1019,25 @@ def _to_arrow(
raise ak._errors.wrap_error(NotImplementedError)

def to_numpy(self, allow_missing: bool = True):
return self._to_numpy(allow_missing)
ak._errors.deprecate(
"`Content.to_numpy` is deprecated. Please replace calls to "
"`Content.to_numpy(...)` with `Content.to_backend_array(..., backend='cpu')`.",
"2.2.0",
)
return self.to_backend(ak._backends.NumpyBackend.instance())._to_backend_array(
allow_missing
)

def _to_numpy(self, allow_missing: bool):
def to_backend_array(
self, allow_missing: bool = True, *, backend: Backend | str | None = None
):
if backend is None:
backend = self._backend
else:
backend = ak._backends.regularize_backend(backend)
return self._to_backend_array(allow_missing, backend)

def _to_backend_array(self, allow_missing: bool, backend: ak._backends.Backend):
raise ak._errors.wrap_error(NotImplementedError)

def drop_none(self):
Expand Down Expand Up @@ -1195,7 +1211,14 @@ def _offsets_and_flattened(
) -> tuple[ak.index.Index, Content]:
raise ak._errors.wrap_error(NotImplementedError)

def to_backend(self, backend: Backend) -> Self:
def to_backend(self, backend: Backend | str | None = None) -> Self:
if backend is None:
backend = self._backend
else:
backend = ak._backends.regularize_backend(backend)
return self._to_backend(backend)

def _to_backend(self, backend: Backend) -> Self:
raise ak._errors.wrap_error(NotImplementedError)

def with_parameter(self, key: str, value: Any) -> Self:
Expand Down
10 changes: 6 additions & 4 deletions src/awkward/contents/emptyarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,12 +295,14 @@ def _to_arrow(self, pyarrow, mask_node, validbytes, length, options):
else:
dtype = np.dtype(options["emptyarray_to"])
next = ak.contents.NumpyArray(
numpy.empty(length, dtype), self._parameters, backend=self._backend
numpy.empty(length, dtype),
parameters=self._parameters,
backend=self._backend,
)
return next._to_arrow(pyarrow, mask_node, validbytes, length, options)

def _to_numpy(self, allow_missing):
return self._backend.nplike.empty(0, dtype=np.float64)
def _to_backend_array(self, allow_missing, backend):
return backend.nplike.empty(0, dtype=np.float64)

def _completely_flatten(self, backend, options):
return []
Expand Down Expand Up @@ -345,7 +347,7 @@ def to_packed(self) -> Self:
def _to_list(self, behavior, json_conversions):
return []

def to_backend(self, backend: ak._backends.Backend) -> Self:
def _to_backend(self, backend: ak._backends.Backend) -> Self:
return EmptyArray(parameters=self._parameters, backend=backend)

def _is_equal_to(self, other, index_dtype, numpyarray):
Expand Down
6 changes: 3 additions & 3 deletions src/awkward/contents/indexedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -958,8 +958,8 @@ def _to_arrow(self, pyarrow, mask_node, validbytes, length, options):
)
return next2._to_arrow(pyarrow, mask_node, validbytes, length, options)

def _to_numpy(self, allow_missing):
return self.project()._to_numpy(allow_missing)
def _to_backend_array(self, allow_missing, backend):
return self.project()._to_backend_array(allow_missing, backend)

def _completely_flatten(self, backend, options):
return self.project()._completely_flatten(backend, options)
Expand Down Expand Up @@ -1053,7 +1053,7 @@ def _to_list(self, behavior, json_conversions):
nextcontent = self._content._carry(ak.index.Index(index), False)
return nextcontent._to_list(behavior, json_conversions)

def to_backend(self, backend: ak._backends.Backend) -> Self:
def _to_backend(self, backend: ak._backends.Backend) -> Self:
content = self._content.to_backend(backend)
index = self._index.to_nplike(backend.index_nplike)
return IndexedArray(index, content, parameters=self._parameters)
Expand Down
Loading

0 comments on commit 1e22929

Please sign in to comment.