Skip to content

Commit

Permalink
Merge pull request modin-project#52 from intel-go/ienkovich/concat-su…
Browse files Browse the repository at this point in the history
…b-execute

run sub-queries when rowid is accessed
  • Loading branch information
ienkovich authored Jun 23, 2020
2 parents e368399 + 102eee2 commit 0998044
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 3 deletions.
18 changes: 15 additions & 3 deletions modin/experimental/engines/omnisci_on_ray/frame/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ def __init__(
dtypes=None,
op=None,
index_cols=None,
uses_rowid=False,
):
assert dtypes is not None

Expand Down Expand Up @@ -93,6 +94,8 @@ def __init__(
if partitions is not None:
self._filter_empties()

self._uses_rowid = uses_rowid

def id_str(self):
return f"frame${self.id}"

Expand Down Expand Up @@ -412,6 +415,7 @@ def _union_all(
for frame in [self] + other_modin_frames:
aligned_index = None
exprs = OrderedDict()
uses_rowid = False

if not ignore_index:
if frame._index_cols:
Expand All @@ -425,6 +429,7 @@ def _union_all(
aligned_index = ["__index__"]
exprs["__index__"] = frame.ref("__rowid__")
aligned_index_dtypes = [_get_dtype(int)]
uses_rowid = True
aligned_dtypes = aligned_index_dtypes + new_dtypes
else:
aligned_dtypes = new_dtypes
Expand All @@ -442,6 +447,7 @@ def _union_all(
dtypes=aligned_dtypes,
op=aligned_frame_op,
index_cols=aligned_index,
uses_rowid=uses_rowid,
)
)

Expand Down Expand Up @@ -628,7 +634,7 @@ def _execute(self):
if isinstance(self._op, FrameNode):
return

# MaskNode requires rowid which is available for executed frames only.
# Some frames require rowid which is available for executed frames only.
# Also there is a common pattern when MaskNode is executed to print
# frame. If we run the whole tree then any following frame usage will
# require re-compute. So we just execute MaskNode's operands.
Expand All @@ -640,12 +646,18 @@ def _execute(self):
self._partitions = new_partitions
self._op = FrameNode(self)

def _require_executed_base(self):
if isinstance(self._op, MaskNode):
return True
return self._uses_rowid

def _run_sub_queries(self):
if isinstance(self._op, FrameNode):
return

if isinstance(self._op, MaskNode):
self._op.input[0]._execute()
if self._require_executed_base():
for op in self._op.input:
op._execute()
else:
for frame in self._op.input:
frame._run_sub_queries()
Expand Down
14 changes: 14 additions & 0 deletions modin/experimental/engines/omnisci_on_ray/test/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,20 @@ def test_concat_many(self):

df_equals(ref, exp)

def test_concat_agg(self):
def concat(lib, df1, df2):
df1 = df1.groupby("a", as_index=False).agg(
{"b": "sum", "d": "sum", "e": "sum"}
)
df2 = df2.groupby("a", as_index=False).agg(
{"c": "sum", "b": "sum", "f": "sum"}
)
return lib.concat([df1, df2])

run_and_compare(
concat, data=self.data, data2=self.data2,
)


class TestGroupby:
data = {
Expand Down

0 comments on commit 0998044

Please sign in to comment.