Skip to content

Commit

Permalink
feat: add byteorder argument to to_buffers (#2095)
Browse files Browse the repository at this point in the history
  • Loading branch information
agoose77 authored Jan 10, 2023
1 parent f88fa8e commit 4db99b6
Show file tree
Hide file tree
Showing 27 changed files with 260 additions and 85 deletions.
4 changes: 1 addition & 3 deletions src/awkward/_connect/rdataframe/from_rdataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,9 +230,7 @@ def cpp_fill_function(depth):
cpp_buffers_self.to_char_buffers[builder_type](builder)

array = ak.from_buffers(
form,
builder.length(),
buffers,
form, builder.length(), buffers, byteorder=ak._util.native_byteorder
)

if col == "awkward_index_":
Expand Down
5 changes: 3 additions & 2 deletions src/awkward/_do.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from awkward.contents.content import ActionType, Content
from awkward.forms import form
from awkward.record import Record
from awkward.typing import Any, AxisMaybeNone
from awkward.typing import Any, AxisMaybeNone, Literal

np = ak._nplikes.NumpyMetadata.instance()

Expand Down Expand Up @@ -76,6 +76,7 @@ def to_buffers(
form_key: str | None = "node{id}",
id_start: Integral = 0,
backend: Backend = None,
byteorder: Literal["<", ">"] = "<",
) -> tuple[form.Form, int, Mapping[str, Any]]:
if container is None:
container = {}
Expand Down Expand Up @@ -119,7 +120,7 @@ def getkey(layout, form, attribute):

form = content.form_with_key(form_key=form_key, id_start=id_start)

content._to_buffers(form, getkey, container, backend)
content._to_buffers(form, getkey, container, backend, byteorder)

return form, len(content), container

Expand Down
21 changes: 19 additions & 2 deletions src/awkward/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import numbers
import os
import re
import sys
from collections.abc import Iterable, Mapping, Sized

import packaging.version
Expand Down Expand Up @@ -69,8 +70,24 @@ def tobytes(array):
return array.tostring()


def little_endian(array):
return array.astype(array.dtype.newbyteorder("<"), copy=False)
native_byteorder = "<" if sys.byteorder == "little" else ">"


def native_to_byteorder(array, byteorder: str):
"""
Args:
array: nplike array
byteorder (`"<"` or `">"`): desired byteorder
Return a copy of array. Swap the byteorder if `byteorder` does not match
`ak._util.native_byteorder`. This function is _not_ idempotent; no metadata
from `array` exists to determine its current byteorder.
"""
assert byteorder in "<>"
if byteorder != native_byteorder:
return array.byteswap(inplace=False)
else:
return array


def identifier_hash(str):
Expand Down
8 changes: 5 additions & 3 deletions src/awkward/contents/bitmaskedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,11 +200,13 @@ def _form_with_key(self, getkey):
form_key=form_key,
)

def _to_buffers(self, form, getkey, container, backend):
def _to_buffers(self, form, getkey, container, backend, byteorder):
assert isinstance(form, self.form_cls)
key = getkey(self, form, "mask")
container[key] = ak._util.little_endian(self._mask.raw(backend.index_nplike))
self._content._to_buffers(form.content, getkey, container, backend)
container[key] = ak._util.native_to_byteorder(
self._mask.raw(backend.index_nplike), byteorder
)
self._content._to_buffers(form.content, getkey, container, backend, byteorder)

def _to_typetracer(self, forget_length: bool) -> Self:
tt = ak._typetracer.TypeTracer.instance()
Expand Down
8 changes: 5 additions & 3 deletions src/awkward/contents/bytemaskedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,11 +148,13 @@ def _form_with_key(self, getkey):
form_key=form_key,
)

def _to_buffers(self, form, getkey, container, backend):
def _to_buffers(self, form, getkey, container, backend, byteorder):
assert isinstance(form, self.form_cls)
key = getkey(self, form, "mask")
container[key] = ak._util.little_endian(self._mask.raw(backend.index_nplike))
self._content._to_buffers(form.content, getkey, container, backend)
container[key] = ak._util.native_to_byteorder(
self._mask.raw(backend.index_nplike), byteorder
)
self._content._to_buffers(form.content, getkey, container, backend, byteorder)

def _to_typetracer(self, forget_length: bool) -> Self:
tt = ak._typetracer.TypeTracer.instance()
Expand Down
3 changes: 2 additions & 1 deletion src/awkward/contents/content.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import awkward as ak
from awkward._backends import Backend
from awkward.forms.form import Form, _parameters_equal
from awkward.typing import Any, AxisMaybeNone, Self, TypeAlias, TypedDict
from awkward.typing import Any, AxisMaybeNone, Literal, Self, TypeAlias, TypedDict

np = ak._nplikes.NumpyMetadata.instance()
numpy = ak._nplikes.Numpy.instance()
Expand Down Expand Up @@ -197,6 +197,7 @@ def _to_buffers(
getkey: Callable[[Content, Form, str], str],
container: MutableMapping[str, Any] | None,
backend: Backend,
byteorder: Literal["<", ">"],
) -> tuple[Form, int, Mapping[str, Any]]:
raise ak._errors.wrap_error(NotImplementedError)

Expand Down
2 changes: 1 addition & 1 deletion src/awkward/contents/emptyarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def simplified(cls, *, parameters=None, backend=None):
def _form_with_key(self, getkey):
return self.form_cls(parameters=self._parameters, form_key=getkey(self))

def _to_buffers(self, form, getkey, container, backend):
def _to_buffers(self, form, getkey, container, backend, byteorder):
assert isinstance(form, self.form_cls)

def _to_typetracer(self, forget_length: bool) -> Self:
Expand Down
8 changes: 5 additions & 3 deletions src/awkward/contents/indexedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,11 +145,13 @@ def _form_with_key(self, getkey):
form_key=form_key,
)

def _to_buffers(self, form, getkey, container, backend):
def _to_buffers(self, form, getkey, container, backend, byteorder):
assert isinstance(form, self.form_cls)
key = getkey(self, form, "index")
container[key] = ak._util.little_endian(self._index.raw(backend.index_nplike))
self._content._to_buffers(form.content, getkey, container, backend)
container[key] = ak._util.native_to_byteorder(
self._index.raw(backend.index_nplike), byteorder
)
self._content._to_buffers(form.content, getkey, container, backend, byteorder)

def _to_typetracer(self, forget_length: bool) -> Self:
index = self._index.to_nplike(ak._typetracer.TypeTracer.instance())
Expand Down
8 changes: 5 additions & 3 deletions src/awkward/contents/indexedoptionarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,11 +132,13 @@ def _form_with_key(self, getkey):
form_key=form_key,
)

def _to_buffers(self, form, getkey, container, backend):
def _to_buffers(self, form, getkey, container, backend, byteorder):
assert isinstance(form, self.form_cls)
key = getkey(self, form, "index")
container[key] = ak._util.little_endian(self._index.raw(backend.index_nplike))
self._content._to_buffers(form.content, getkey, container, backend)
container[key] = ak._util.native_to_byteorder(
self._index.raw(backend.index_nplike), byteorder
)
self._content._to_buffers(form.content, getkey, container, backend, byteorder)

def _to_typetracer(self, forget_length: bool) -> Self:
index = self._index.to_nplike(ak._typetracer.TypeTracer.instance())
Expand Down
12 changes: 8 additions & 4 deletions src/awkward/contents/listarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,13 +134,17 @@ def _form_with_key(self, getkey):
form_key=form_key,
)

def _to_buffers(self, form, getkey, container, backend):
def _to_buffers(self, form, getkey, container, backend, byteorder):
assert isinstance(form, self.form_cls)
key1 = getkey(self, form, "starts")
key2 = getkey(self, form, "stops")
container[key1] = ak._util.little_endian(self._starts.raw(backend.index_nplike))
container[key2] = ak._util.little_endian(self._stops.raw(backend.index_nplike))
self._content._to_buffers(form.content, getkey, container, backend)
container[key1] = ak._util.native_to_byteorder(
self._starts.raw(backend.index_nplike), byteorder
)
container[key2] = ak._util.native_to_byteorder(
self._stops.raw(backend.index_nplike), byteorder
)
self._content._to_buffers(form.content, getkey, container, backend, byteorder)

def _to_typetracer(self, forget_length: bool) -> Self:
tt = ak._typetracer.TypeTracer.instance()
Expand Down
8 changes: 5 additions & 3 deletions src/awkward/contents/listoffsetarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,11 +120,13 @@ def _form_with_key(self, getkey):
form_key=form_key,
)

def _to_buffers(self, form, getkey, container, backend):
def _to_buffers(self, form, getkey, container, backend, byteorder):
assert isinstance(form, self.form_cls)
key = getkey(self, form, "offsets")
container[key] = ak._util.little_endian(self._offsets.raw(backend.index_nplike))
self._content._to_buffers(form.content, getkey, container, backend)
container[key] = ak._util.native_to_byteorder(
self._offsets.raw(backend.index_nplike), byteorder
)
self._content._to_buffers(form.content, getkey, container, backend, byteorder)

def _to_typetracer(self, forget_length: bool) -> Self:
offsets = self._offsets.to_nplike(ak._typetracer.TypeTracer.instance())
Expand Down
6 changes: 4 additions & 2 deletions src/awkward/contents/numpyarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,10 +111,12 @@ def _form_with_key(self, getkey):
form_key=getkey(self),
)

def _to_buffers(self, form, getkey, container, backend):
def _to_buffers(self, form, getkey, container, backend, byteorder):
assert isinstance(form, self.form_cls)
key = getkey(self, form, "data")
container[key] = ak._util.little_endian(self._raw(backend.nplike))
container[key] = ak._util.native_to_byteorder(
self._raw(backend.nplike), byteorder
)

def _to_typetracer(self, forget_length: bool) -> Self:
backend = ak._backends.TypeTracerBackend.instance()
Expand Down
10 changes: 7 additions & 3 deletions src/awkward/contents/recordarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,14 +204,18 @@ def _form_with_key(self, getkey):
form_key=form_key,
)

def _to_buffers(self, form, getkey, container, backend):
def _to_buffers(self, form, getkey, container, backend, byteorder):
assert isinstance(form, self.form_cls)
if self._fields is None:
for i, content in enumerate(self._contents):
content._to_buffers(form.content(i), getkey, container, backend)
content._to_buffers(
form.content(i), getkey, container, backend, byteorder
)
else:
for field, content in zip(self._fields, self._contents):
content._to_buffers(form.content(field), getkey, container, backend)
content._to_buffers(
form.content(field), getkey, container, backend, byteorder
)

def _to_typetracer(self, forget_length: bool) -> Self:
backend = ak._backends.TypeTracerBackend.instance()
Expand Down
4 changes: 2 additions & 2 deletions src/awkward/contents/regulararray.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,9 +127,9 @@ def _form_with_key(self, getkey):
form_key=form_key,
)

def _to_buffers(self, form, getkey, container, backend):
def _to_buffers(self, form, getkey, container, backend, byteorder):
assert isinstance(form, self.form_cls)
self._content._to_buffers(form.content, getkey, container, backend)
self._content._to_buffers(form.content, getkey, container, backend, byteorder)

def _to_typetracer(self, forget_length: bool) -> Self:
return RegularArray(
Expand Down
12 changes: 8 additions & 4 deletions src/awkward/contents/unionarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,14 +391,18 @@ def _form_with_key(self, getkey):
form_key=form_key,
)

def _to_buffers(self, form, getkey, container, backend):
def _to_buffers(self, form, getkey, container, backend, byteorder):
assert isinstance(form, self.form_cls)
key1 = getkey(self, form, "tags")
key2 = getkey(self, form, "index")
container[key1] = ak._util.little_endian(self._tags.raw(backend.index_nplike))
container[key2] = ak._util.little_endian(self._index.raw(backend.index_nplike))
container[key1] = ak._util.native_to_byteorder(
self._tags.raw(backend.index_nplike), byteorder
)
container[key2] = ak._util.native_to_byteorder(
self._index.raw(backend.index_nplike), byteorder
)
for i, content in enumerate(self._contents):
content._to_buffers(form.content(i), getkey, container, backend)
content._to_buffers(form.content(i), getkey, container, backend, byteorder)

def _to_typetracer(self, forget_length: bool) -> Self:
tt = ak._typetracer.TypeTracer.instance()
Expand Down
4 changes: 2 additions & 2 deletions src/awkward/contents/unmaskedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,9 @@ def _form_with_key(self, getkey):
form_key=form_key,
)

def _to_buffers(self, form, getkey, container, backend):
def _to_buffers(self, form, getkey, container, backend, byteorder):
assert isinstance(form, self.form_cls)
self._content._to_buffers(form.content, getkey, container, backend)
self._content._to_buffers(form.content, getkey, container, backend, byteorder)

def _to_typetracer(self, forget_length: bool) -> Self:
return UnmaskedArray(
Expand Down
2 changes: 2 additions & 0 deletions src/awkward/forms/form.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,7 @@ def length_zero_array(
container={"": b"\x00\x00\x00\x00\x00\x00\x00\x00"},
buffer_key="",
backend=backend,
byteorder=ak._util.native_byteorder,
highlevel=highlevel,
behavior=behavior,
simplify=False,
Expand All @@ -446,6 +447,7 @@ def length_one_array(self, *, backend=numpy_backend, highlevel=True, behavior=No
},
buffer_key="",
backend=backend,
byteorder=ak._util.native_byteorder,
highlevel=highlevel,
behavior=behavior,
simplify=False,
Expand Down
13 changes: 11 additions & 2 deletions src/awkward/highlevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -1433,7 +1433,10 @@ def numba_type(self):
def __getstate__(self):
packed = ak.operations.to_packed(self._layout, highlevel=False)
form, length, container = ak.operations.to_buffers(
packed, buffer_key="{form_key}-{attribute}", form_key="node{id}"
packed,
buffer_key="{form_key}-{attribute}",
form_key="node{id}",
byteorder="<",
)
if self._behavior is ak.behavior:
behavior = None
Expand All @@ -1449,6 +1452,7 @@ def __setstate__(self, state):
container,
highlevel=False,
buffer_key="{form_key}-{attribute}",
byteorder="<",
)
self.layout = layout
self.behavior = behavior
Expand Down Expand Up @@ -2067,7 +2071,10 @@ def numba_type(self):
def __getstate__(self):
packed = ak.operations.to_packed(self._layout, highlevel=False)
form, length, container = ak.operations.to_buffers(
packed.array, buffer_key="{form_key}-{attribute}", form_key="node{id}"
packed.array,
buffer_key="{form_key}-{attribute}",
form_key="node{id}",
byteorder="<",
)
if self._behavior is ak.behavior:
behavior = None
Expand All @@ -2083,6 +2090,7 @@ def __setstate__(self, state):
container,
highlevel=False,
buffer_key="{form_key}-{attribute}",
byteorder="<",
)
layout = ak.record.Record(layout, at)
self.layout = layout
Expand Down Expand Up @@ -2435,6 +2443,7 @@ def snapshot(self):
container,
buffer_key="{form_key}-{attribute}",
backend="cpu",
byteorder=ak._util.native_byteorder,
highlevel=True,
behavior=self._behavior,
simplify=True,
Expand Down
1 change: 1 addition & 0 deletions src/awkward/operations/ak_from_avro_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ def _impl(form, length, container, highlevel, behavior):
container=container,
buffer_key="{form_key}-{attribute}",
backend="cpu",
byteorder=ak._util.native_byteorder,
highlevel=highlevel,
behavior=behavior,
simplify=True,
Expand Down
Loading

0 comments on commit 4db99b6

Please sign in to comment.