Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

C++ refactoring: nbytes #1138

Merged
merged 8 commits into from
Nov 12, 2021
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions src/awkward/_v2/contents/bitmaskedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -474,6 +474,12 @@ def _validityerror(self, path):
else:
return self._content.validityerror(path + ".content")

def _nbytes_part(self, largest):
self.mask._nbytes_part(largest)
self.content._nbytes_part(largest)
if self.identifier is not None:
self.identifier._nbytes_part(largest)

def _rpad(self, target, axis, depth, clip):
return self.toByteMaskedArray()._rpad(target, axis, depth, clip)

Expand Down
6 changes: 6 additions & 0 deletions src/awkward/_v2/contents/bytemaskedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -704,6 +704,12 @@ def _validityerror(self, path):
else:
return self._content.validityerror(path + ".content")

def _nbytes_part(self, largest):
self.mask._nbytes_part(largest)
self.content._nbytes_part(largest)
if self.identifier is not None:
self.identifier._nbytes_part(largest)

def bytemask(self):
if not self._valid_when:
return self._mask
Expand Down
6 changes: 6 additions & 0 deletions src/awkward/_v2/contents/content.py
Original file line number Diff line number Diff line change
Expand Up @@ -1013,6 +1013,12 @@ def validityerror(self, path="layout"):
return paramcheck
return self._validityerror(path)

@property
def nbytes(self):
largest = {0: 0}
self._nbytes_part(largest)
return sum(largest.values())

def purelist_parameter(self, key):
return self.Form.purelist_parameter(self, key)

Expand Down
4 changes: 4 additions & 0 deletions src/awkward/_v2/contents/emptyarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,10 @@ def _reduce_next(
def _validityerror(self, path):
return ""

def _nbytes_part(self, largest):
if self.identifier is not None:
self.identifier._nbytes_part(largest)

def _rpad(self, target, axis, depth, clip):
posaxis = self.axis_wrap_if_negative(axis)
if posaxis != depth:
Expand Down
6 changes: 6 additions & 0 deletions src/awkward/_v2/contents/indexedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -927,6 +927,12 @@ def _validityerror(self, path):
else:
return self._content.validityerror(path + ".content")

def _nbytes_part(self, largest):
self.index._nbytes_part(largest)
self.content._nbytes_part(largest)
if self.identifier is not None:
self.identifier._nbytes_part(largest)

def _rpad(self, target, axis, depth, clip):
posaxis = self.axis_wrap_if_negative(axis)
if posaxis == depth:
Expand Down
6 changes: 6 additions & 0 deletions src/awkward/_v2/contents/indexedoptionarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1376,6 +1376,12 @@ def _validityerror(self, path):
else:
return self._content.validityerror(path + ".content")

def _nbytes_part(self, largest):
self.index._nbytes_part(largest)
self.content._nbytes_part(largest)
if self.identifier is not None:
self.identifier._nbytes_part(largest)

def bytemask(self):
out = ak._v2.index.Index8.empty(len(self.index), self.nplike)
self._handle_error(
Expand Down
7 changes: 7 additions & 0 deletions src/awkward/_v2/contents/listarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -985,6 +985,13 @@ def _validityerror(self, path):
else:
return self._content.validityerror(path + ".content")

def _nbytes_part(self, largest):
self.starts._nbytes_part(largest)
self.stops._nbytes_part(largest)
self.content._nbytes_part(largest)
if self.identifier is not None:
self.identifier._nbytes_part(largest)

def _rpad(self, target, axis, depth, clip):
if not clip:
posaxis = self.axis_wrap_if_negative(axis)
Expand Down
6 changes: 6 additions & 0 deletions src/awkward/_v2/contents/listoffsetarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1556,6 +1556,12 @@ def _validityerror(self, path):
else:
return self._content.validityerror(path + ".content")

def _nbytes_part(self, largest):
self.offsets._nbytes_part(largest)
self.content._nbytes_part(largest)
if self.identifier is not None:
self.identifier._nbytes_part(largest)

def _rpad(self, target, axis, depth, clip):
posaxis = self.axis_wrap_if_negative(axis)
if posaxis == depth:
Expand Down
7 changes: 7 additions & 0 deletions src/awkward/_v2/contents/numpyarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1003,6 +1003,13 @@ def _rpad(self, target, axis, depth, clip):
else:
return self.rpad_axis0(target, clip=True)

def _nbytes_part(self, largest):
it = id(self.ptr)
if it not in largest or largest[it] < self.data.nbytes:
largest[it] = self.data.nbytes
if self.identifier is not None:
self.identifier._nbytes_part(largest)

def _to_arrow(self, pyarrow, mask_node, validbytes, length, options):
if self._data.ndim != 1:
return self.toRegularArray()._to_arrow(
Expand Down
6 changes: 6 additions & 0 deletions src/awkward/_v2/contents/recordarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -651,6 +651,12 @@ def _validityerror(self, path):
return sub
return ""

def _nbytes_part(self, largest):
for content in self.contents:
content._nbytes_part(largest)
if self.identifier is not None:
self.identifier._nbytes_part(largest)

def _rpad(self, target, axis, depth, clip):
posaxis = self.axis_wrap_if_negative(axis)
if posaxis == depth:
Expand Down
5 changes: 5 additions & 0 deletions src/awkward/_v2/contents/regulararray.py
Original file line number Diff line number Diff line change
Expand Up @@ -854,6 +854,11 @@ def _validityerror(self, path):
else:
return self._content.validityerror(path + ".content")

def _nbytes_part(self, largest):
self.content._nbytes_part(largest)
if self.identifier is not None:
self.identifier._nbytes_part(largest)

def _rpad(self, target, axis, depth, clip):
posaxis = self.axis_wrap_if_negative(axis)
if posaxis == depth:
Expand Down
6 changes: 6 additions & 0 deletions src/awkward/_v2/contents/unionarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -810,6 +810,12 @@ def _validityerror(self, path):
return sub
return ""

def _nbytes_part(self, largest):
for content in self.contents:
content._nbytes_part(largest)
if self.identifier is not None:
self.identifier._nbytes_part(largest)

def _rpad(self, target, axis, depth, clip):
posaxis = self.axis_wrap_if_negative(axis)
if posaxis == depth:
Expand Down
6 changes: 6 additions & 0 deletions src/awkward/_v2/contents/unmaskedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,6 +391,12 @@ def _validityerror(self, path):
else:
return self._content.validityerror(path + ".content")

def _nbytes_part(self, largest):
self.content._nbytes_part(largest)
# FIXME: identifier is not counted in C++ code?
if self.identifier is not None:
self.identifier._nbytes_part(largest)

def _rpad(self, target, axis, depth, clip):
posaxis = self.axis_wrap_if_negative(axis)
if posaxis == depth:
Expand Down
34 changes: 17 additions & 17 deletions src/awkward/_v2/highlevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,25 +455,25 @@ def to_numpy(self, allow_missing=True):
"""
return ak._v2.operations.convert.to_numpy(self, allow_missing=allow_missing)

# @property
# def nbytes(self):
# """
# The total number of bytes in all the #ak.layout.Index,
# #ak.layout.Identities, and #ak.layout.NumpyArray buffers in this
# array tree.
@property
def nbytes(self):
"""
The total number of bytes in all the #ak.layout.Index,
#ak.layout.Identities, and #ak.layout.NumpyArray buffers in this
array tree.

# Note: this calculation takes overlapping buffers into account, to the
# extent that overlaps are not double-counted, but overlaps are currently
# assumed to be complete subsets of one another, and so it is
# theoretically possible (though unlikely) that this number is an
# underestimate of the true usage.
Note: this calculation takes overlapping buffers into account, to the
extent that overlaps are not double-counted, but overlaps are currently
assumed to be complete subsets of one another, and so it is
theoretically possible (though unlikely) that this number is an
underestimate of the true usage.

# It also does not count buffers that must be kept in memory because
# of ownership, but are not directly used in the array. Nor does it count
# the (small) C++ nodes or Python objects that reference the (large)
# array buffers.
# """
# return self._layout.nbytes
It also does not count buffers that must be kept in memory because
of ownership, but are not directly used in the array. Nor does it count
the (small) C++ nodes or Python objects that reference the (large)
array buffers.
"""
return self._layout.nbytes

@property
def ndim(self):
Expand Down
5 changes: 5 additions & 0 deletions src/awkward/_v2/identifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,3 +115,8 @@ def referentially_equal(self, other):
and self._data.strides == other._data.strides
and self._data.dtype == other._data.dtype
)

def _nbytes_part(self, largest):
it = id(self.ref)
if it not in largest or largest[it] < self.data.nbytes:
largest[it] = self.data.nbytes
5 changes: 5 additions & 0 deletions src/awkward/_v2/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,11 @@ def to64(self):
def __copy__(self):
return Index(self._data.copy())

def _nbytes_part(self, largest):
it = id(self.data)
if it not in largest or largest[it] < self.data.nbytes:
largest[it] = self.data.nbytes


class Index8(Index):
_expected_dtype = np.dtype(np.int8)
Expand Down
Loading