Skip to content

Commit

Permalink
C++ refactoring: Merge and Simplify Types (#1082)
Browse files Browse the repository at this point in the history
* Draft/start work on implementing simplify uniontype

* WIP Implementating mergemany

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Implementing mergemany for all arrays

* Testing addition on Simplify Types

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Implementation of meargeable

* Simplify implementations

* Implementation of merge_as_union

* Clean up - simplify/mergeable/merging_strategy/reverse_merge/mergemany

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Fixing all mergemany tests

* Adding concatenate for typetracer, fixing tests, using form's parameters comparison

* Updating typeracer concatenate

* Adding tests for typetracer in mergemany and simplify

* Fixing typetracer tests

* Fixing typetracer tests

* Removing conflict on typetracer concatenate

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
ioanaif and pre-commit-ci[bot] authored Oct 15, 2021
1 parent 17dcc99 commit 7d059bb
Show file tree
Hide file tree
Showing 21 changed files with 5,247 additions and 68 deletions.
15 changes: 11 additions & 4 deletions src/awkward/_v2/_typetracer.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,6 @@ def __getitem__(self, where):
length2 = None

shape = (Interval(length1, length2),) + self._shape[1:]

return TypeTracerArray(
self._dtype, shape, self._fill_zero, self._fill_other
)
Expand Down Expand Up @@ -633,9 +632,17 @@ def unique(self, *args, **kwargs):
# array
raise NotImplementedError

def concatenate(self, *args, **kwargs):
# arrays
raise NotImplementedError
def concatenate(self, arrays):
shape = arrays[0].shape[0]
array = arrays[0]
for i in range(1, len(arrays)):
assert arrays[i - 1].shape[1:] == arrays[i].shape[1:]
shape += arrays[i].shape[0]
array = numpy.concatenate(
[numpy.empty(0, arrays[i - 1].dtype), numpy.empty(0, arrays[i].dtype)]
)
dtype = array.dtype
return TypeTracerArray(dtype, (shape,) + arrays[0].shape[1:])

def repeat(self, *args, **kwargs):
# array, int
Expand Down
55 changes: 55 additions & 0 deletions src/awkward/_v2/contents/bitmaskedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from awkward._v2.contents.content import Content
from awkward._v2.contents.bytemaskedarray import ByteMaskedArray
from awkward._v2.forms.bitmaskedform import BitMaskedForm
from awkward._v2.forms.form import _parameters_equal

np = ak.nplike.NumpyMetadata.instance()

Expand Down Expand Up @@ -293,6 +294,60 @@ def _getitem_next(self, head, tail, advanced):
else:
raise AssertionError(repr(head))

def simplify_optiontype(self):
if isinstance(
self.content,
(
ak._v2.contents.indexedarray.IndexedArray,
ak._v2.contents.indexedoptionarray.IndexedOptionArray,
ak._v2.contents.bytemaskedarray.ByteMaskedArray,
ak._v2.contents.bitmaskeddarray.BitMaskedArray,
ak._v2.contents.unmaskeddarray.UnmaskedArray,
),
):
return self.toIndexedOptionArray64.simplify_optiontype
else:
return self

def mergeable(self, other, mergebool):
if isinstance(other, ak._v2.contents.virtualarray.VirtualArray):
return self.mergeable(other.array, mergebool)

if not _parameters_equal(self._parameters, other._parameters):
return False

if isinstance(
other,
(
ak._v2.contents.emptyArray.EmptyArray,
ak._v2.contents.unionarray.UnionArray,
),
):
return True

if isinstance(
other,
(
ak._v2.contents.indexedarray.IndexedArray,
ak._v2.contents.indexedoptionarray.IndexedOptionArray,
ak._v2.contents.bytemaskedarray.ByteMaskedArray,
ak._v2.contents.bitmaskedarray.BitMaskedArray,
ak._v2.contents.unmaskedarray.UnmaskedArray,
),
):
self.content.mergeable(other.content, mergebool)

else:
return self.content.mergeable(other, mergebool)

def _reverse_merge(self, other):
return self.toIndexedOptionArray64()._reverse_merge(other)

def mergemany(self, others):
if len(others) == 0:
return self
return self.toIndexedOptionArray64().mergemany(others)

def _localindex(self, axis, depth):
return self.toByteMaskedArray()._localindex(axis, depth)

Expand Down
69 changes: 62 additions & 7 deletions src/awkward/_v2/contents/bytemaskedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from awkward._v2._slicing import NestedIndexError
from awkward._v2.contents.content import Content
from awkward._v2.forms.bytemaskedform import ByteMaskedForm
from awkward._v2.forms.form import _parameters_equal

np = ak.nplike.NumpyMetadata.instance()

Expand Down Expand Up @@ -270,7 +271,7 @@ def _getitem_next_jagged_generic(self, slicestarts, slicestops, slicecontent, ta
out2 = ak._v2.contents.indexedoptionarray.IndexedOptionArray(
outindex, out, self._identifier, self._parameters
)
return out2._simplify_optiontype()
return out2.simplify_optiontype()

def _getitem_next_jagged(self, slicestarts, slicestops, slicecontent, tail):
return self._getitem_next_jagged_generic(
Expand All @@ -294,7 +295,7 @@ def _getitem_next(self, head, tail, advanced):
self._identifier,
self._parameters,
)
return out2._simplify_optiontype()
return out2.simplify_optiontype()

elif ak._util.isstr(head):
return self._getitem_next_field(head, tail, advanced)
Expand All @@ -317,8 +318,62 @@ def _getitem_next(self, head, tail, advanced):
else:
raise AssertionError(repr(head))

def simplify_optiontype(self):
if isinstance(
self.content,
(
ak._v2.contents.indexedarray.IndexedArray,
ak._v2.contents.indexedoptionarray.IndexedOptionArray,
ak._v2.contents.bytemaskedarray.ByteMaskedArray,
ak._v2.contents.bitmaskeddarray.BitMaskedArray,
ak._v2.contents.unmaskeddarray.UnmaskedArray,
),
):
return self.toIndexedOptionArray64.simplify_optiontype
else:
return self

def mergeable(self, other, mergebool):
if isinstance(other, ak._v2.contents.virtualarray.VirtualArray):
return self.mergeable(other.array, mergebool)

if not _parameters_equal(self._parameters, other._parameters):
return False

if isinstance(
other,
(
ak._v2.contents.emptyArray.EmptyArray,
ak._v2.contents.unionarray.UnionArray,
),
):
return True

if isinstance(
other,
(
ak._v2.contents.indexedarray.IndexedArray,
ak._v2.contents.indexedoptionarray.IndexedOptionArray,
ak._v2.contents.bytemaskedarray.ByteMaskedArray,
ak._v2.contents.bitmaskedarray.BitMaskedArray,
ak._v2.contents.unmaskedarray.UnmaskedArray,
),
):
self.content.mergeable(other.content, mergebool)

else:
return self.content.mergeable(other, mergebool)

def _reverse_merge(self, other):
return self.toIndexedOptionArray64()._reverse_merge(other)

def mergemany(self, others):
if len(others) == 0:
return self
return self.toIndexedOptionArray64().mergemany(others)

def _localindex(self, axis, depth):
posaxis = self._axis_wrap_if_negative(axis)
posaxis = self.axis_wrap_if_negative(axis)
if posaxis == depth:
return self._localindex_axis0()
else:
Expand All @@ -333,7 +388,7 @@ def _localindex(self, axis, depth):
self._identifier,
self._parameters,
)
return out2._simplify_optiontype()
return out2.simplify_optiontype()

def _argsort_next(
self,
Expand Down Expand Up @@ -379,7 +434,7 @@ def _sort_next(
def _combinations(self, n, replacement, recordlookup, parameters, axis, depth):
if n < 1:
raise ValueError("in combinations, 'n' must be at least 1")
posaxis = self._axis_wrap_if_negative(axis)
posaxis = self.axis_wrap_if_negative(axis)
if posaxis == depth:
return self._combinations_axis0(n, replacement, recordlookup, parameters)
else:
Expand All @@ -393,7 +448,7 @@ def _combinations(self, n, replacement, recordlookup, parameters, axis, depth):
out2 = ak._v2.contents.indexedoptionarray.IndexedOptionArray(
outindex, out, parameters=parameters
)
return out2._simplify_optiontype()
return out2.simplify_optiontype()

def _reduce_next(
self,
Expand Down Expand Up @@ -523,7 +578,7 @@ def _reduce_next(
out.content,
None,
None,
)._simplify_optiontype()
).simplify_optiontype()

return ak._v2.contents.ListOffsetArray(
outoffsets,
Expand Down
Loading

0 comments on commit 7d059bb

Please sign in to comment.