Skip to content

Commit

Permalink
Implemented Content.to_list and using that instead of v2_to_v1.
Browse files Browse the repository at this point in the history
  • Loading branch information
jpivarski committed Nov 10, 2021
1 parent 544ad94 commit ee0c2cb
Show file tree
Hide file tree
Showing 22 changed files with 342 additions and 113 deletions.
1 change: 1 addition & 0 deletions src/awkward/_v2/_connect/pyarrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,7 @@ def popbuffers(paarray, awkwardarrow_type, storage_type, buffers):
paarray.indices, None, storage_type.index_type, buffers
)
index = masked_index.content.data

if not isinstance(masked_index, ak._v2.contents.UnmaskedArray):
mask = masked_index.mask_as_bool(valid_when=False)
if mask.any():
Expand Down
16 changes: 15 additions & 1 deletion src/awkward/_v2/contents/bitmaskedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from awkward._v2.forms.form import _parameters_equal

np = ak.nplike.NumpyMetadata.instance()
numpy = ak.nplike.Numpy.instance()


class BitMaskedArray(Content):
Expand Down Expand Up @@ -228,7 +229,7 @@ def mask_as_bool(self, valid_when=None, nplike=None):
bytemask.to(nplike),
self._mask.to(nplike),
len(self._mask),
0 if valid_when else 1,
0 if valid_when == self._valid_when else 1,
self._lsb_order,
)
)
Expand Down Expand Up @@ -568,3 +569,16 @@ def packed(self):
self._identifier,
self._parameters,
)

def _to_list(self, behavior):
out = self._to_list_custom(behavior)
if out is not None:
return out

mask = self.mask_as_bool(valid_when=True, nplike=numpy)[: self._length]
content = self._content._to_list(behavior)
out = [None] * self._length
for i, isvalid in enumerate(mask):
if isvalid:
out[i] = content[i]
return out
13 changes: 13 additions & 0 deletions src/awkward/_v2/contents/bytemaskedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -844,3 +844,16 @@ def packed(self):
self._identifier,
self._parameters,
)

def _to_list(self, behavior):
out = self._to_list_custom(behavior)
if out is not None:
return out

mask = self.mask_as_bool(valid_when=True, nplike=numpy)
content = self._content._to_list(behavior)
out = [None] * len(self._mask)
for i, isvalid in enumerate(mask):
if isvalid:
out[i] = content[i]
return out
15 changes: 15 additions & 0 deletions src/awkward/_v2/contents/content.py
Original file line number Diff line number Diff line change
Expand Up @@ -1179,3 +1179,18 @@ def recursively_apply(
"function_name": function_name,
},
)

def tolist(self, behavior=None):
return self.to_list(behavior)

def to_list(self, behavior=None):
return self.packed()._to_list(behavior)

def _to_list_custom(self, behavior):
cls = ak._v2._util.arrayclass(self, behavior)
if cls.__getitem__ is not ak._v2.highlevel.Array.__getitem__:
array = cls(self)
out = [None] * len(self)
for i in range(len(self)):
out[i] = array[i]
return out
3 changes: 3 additions & 0 deletions src/awkward/_v2/contents/emptyarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,3 +282,6 @@ def continuation():

def packed(self):
return self

def _to_list(self, behavior):
return []
12 changes: 12 additions & 0 deletions src/awkward/_v2/contents/indexedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1040,3 +1040,15 @@ def continuation():

def packed(self):
return self.project().packed()

def _to_list(self, behavior):
out = self._to_list_custom(behavior)
if out is not None:
return out

index = self._index.to(numpy)
content = self._content._to_list(behavior)
out = [None] * len(index)
for i, ind in enumerate(index):
out[i] = content[ind]
return out
13 changes: 13 additions & 0 deletions src/awkward/_v2/contents/indexedoptionarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1537,3 +1537,16 @@ def packed(self):
self._identifier,
self._parameters,
)

def _to_list(self, behavior):
out = self._to_list_custom(behavior)
if out is not None:
return out

index = self._index.to(numpy)
content = self._content._to_list(behavior)
out = [None] * len(index)
for i, ind in enumerate(index):
if ind >= 0:
out[i] = content[ind]
return out
3 changes: 3 additions & 0 deletions src/awkward/_v2/contents/listarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1137,3 +1137,6 @@ def continuation():

def packed(self):
return self.toListOffsetArray64(True).packed()

def _to_list(self, behavior):
return ListOffsetArray._to_list(self, behavior)
30 changes: 30 additions & 0 deletions src/awkward/_v2/contents/listoffsetarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1817,3 +1817,33 @@ def packed(self):
return ListOffsetArray(
next._offsets, content, next._identifier, next._parameters
)

def _to_list(self, behavior):
if self.parameter("__array__") == "bytestring":
content = ak._v2._util.tobytes(self._content.data)
starts, stops = self.starts, self.stops
out = [None] * len(starts)
for i in range(len(starts)):
out[i] = content[starts[i] : stops[i]]
return out

elif self.parameter("__array__") == "string":
content = ak._v2._util.tobytes(self._content.data)
starts, stops = self.starts, self.stops
out = [None] * len(starts)
for i in range(len(starts)):
out[i] = content[starts[i] : stops[i]].decode(errors="surrogateescape")
return out

else:
out = self._to_list_custom(behavior)
if out is not None:
return out

content = self._content._to_list(behavior)
starts, stops = self.starts, self.stops
out = [None] * len(starts)

for i in range(len(starts)):
out[i] = content[starts[i] : stops[i]]
return out
14 changes: 14 additions & 0 deletions src/awkward/_v2/contents/numpyarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1071,3 +1071,17 @@ def continuation():

def packed(self):
return self.contiguous().toRegularArray()

def _to_list(self, behavior):
if self.parameter("__array__") == "byte":
return ak._v2._util.tobytes(self._data)

elif self.parameter("__array__") == "char":
return ak._v2._util.tobytes(self._data).decode(errors="surrogateescape")

else:
out = self._to_list_custom(behavior)
if out is not None:
return out

return self._data.tolist()
30 changes: 30 additions & 0 deletions src/awkward/_v2/contents/recordarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -777,3 +777,33 @@ def packed(self):
self._identifier,
self._parameters,
)

def _to_list(self, behavior):
out = self._to_list_custom(behavior)
if out is not None:
return out

cls = ak._v2._util.recordclass(self, behavior)
if cls is not ak._v2.highlevel.Record:
length = self._length
out = [None] * length
for i in range(length):
out[i] = cls(self[i])
return out

if self.is_tuple:
contents = [x._to_list(behavior) for x in self._contents]
length = self._length
out = [None] * length
for i in range(length):
out[i] = tuple(x[i] for x in contents)
return out

else:
fields = self._fields
contents = [x._to_list(behavior) for x in self._contents]
length = self._length
out = [None] * length
for i in range(length):
out[i] = dict(zip(fields, [x[i] for x in contents]))
return out
31 changes: 31 additions & 0 deletions src/awkward/_v2/contents/regulararray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1013,3 +1013,34 @@ def packed(self):
return RegularArray(
content, self._size, self._length, self._identifier, self._parameters
)

def _to_list(self, behavior):
if self.parameter("__array__") == "bytestring":
content = ak._v2._util.tobytes(self._content.data)
length, size = self._length, self._size
out = [None] * length
for i in range(length):
out[i] = content[(i) * size : (i + 1) * size]
return out

elif self.parameter("__array__") == "string":
content = ak._v2._util.tobytes(self._content.data)
length, size = self._length, self._size
out = [None] * length
for i in range(length):
out[i] = content[(i) * size : (i + 1) * size].decode(
errors="surrogateescape"
)
return out

else:
out = self._to_list_custom(behavior)
if out is not None:
return out

content = self._content._to_list(behavior)
length, size = self._length, self._size
out = [None] * length
for i in range(length):
out[i] = content[(i) * size : (i + 1) * size]
return out
21 changes: 18 additions & 3 deletions src/awkward/_v2/contents/unionarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ def _carry(self, carry, allow_lazy, exception):
def project(self, index):
nplike = self.nplike
lentags = len(self._tags)
assert len(self._index) == lentags
assert len(self._index) >= lentags
lenout = ak._v2.index.Index64.empty(1, nplike)
tmpcarry = ak._v2.index.Index64.empty(lentags, nplike)
self._handle_error(
Expand Down Expand Up @@ -958,17 +958,18 @@ def packed(self):
nplike = self._tags.nplike

tags = self._tags.to(nplike)
original_index = index = self._index.to(nplike)
original_index = index = self._index.to(nplike)[: len(tags)]

contents = list(self._contents)

for tag in range(len(self._contents)):
is_tag = tags == tag
num_tag = nplike.count_nonzero(is_tag)

if len(contents[tag]) > num_tag:
if original_index is index:
index = index.copy()
index[is_tag] = nplike.arange(num_tag)
index[is_tag] = nplike.arange(num_tag, dtype=index.dtype)
contents[tag] = self.project(tag)

contents[tag] = contents[tag].packed()
Expand All @@ -980,3 +981,17 @@ def packed(self):
self._identifier,
self._parameters,
)

def _to_list(self, behavior):
out = self._to_list_custom(behavior)
if out is not None:
return out

tags = self._tags.to(numpy)
index = self._index.to(numpy)
contents = [x._to_list(behavior) for x in self._contents]

out = [None] * len(tags)
for i, tag in enumerate(tags):
out[i] = contents[tag][index[i]]
return out
7 changes: 7 additions & 0 deletions src/awkward/_v2/contents/unmaskedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,3 +457,10 @@ def continuation():

def packed(self):
return UnmaskedArray(self._content.packed(), self._identifier, self._parameters)

def _to_list(self, behavior):
out = self._to_list_custom(behavior)
if out is not None:
return out

return self._content._to_list(behavior)
Loading

0 comments on commit ee0c2cb

Please sign in to comment.