Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[TensorIR][M2a] Reorder #8767

Merged
merged 18 commits into from
Aug 23, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions include/tvm/tir/schedule/schedule.h
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,19 @@ class ScheduleNode : public runtime::Object {
* \return The new loops after split
*/
virtual Array<LoopRV> Split(const LoopRV& loop_rv, const Array<Optional<ExprRV>>& factors) = 0;
/*!
* \brief Reorder a list of loops. It doesn't require the loops to be consecutive.
* It requires:
* 1) The loops are in the same chain. That means: the loops can be ordered to [l_1, l_2, ... ,
* l_n] where l_i is an ancestor of l_{i+1} and there are only single-branch loops between
* l_1 and l_n (which also indicates they are under the same scope).
* 2) After reordering, the domain of an outer loop cannot depend on any of the inner loops.
* 3) For every block under the loop nests, its block binding must be affine, and the block
* variables must be either data parallel or reduction.
* 4) No duplicated loops are allowed in the arguments.
* \param ordered_loop_rvs The loops in the new order
*/
virtual void Reorder(const Array<LoopRV>& ordered_loop_rvs) = 0;
/******** Schedule: Manipulate ForKind ********/
/*!
* \brief Parallelize the input loop. It requires:
Expand Down
59 changes: 59 additions & 0 deletions python/tvm/tir/schedule/schedule.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,65 @@ def after_split(a: ty.handle, b: ty.handle) -> None:
# that there is at most one None in `factors`
return _ffi_api.ScheduleSplit(self, loop, factors) # type: ignore # pylint: disable=no-member

def reorder(self, *ordered_loops: List[LoopRV]) -> None:
"""
Reorder a list of loops. It doesn't require the loops to be consecutive.
It requires:
1) The loops are in the same chain. That means: the loops can be ordered to [l_1, l_2, ... ,
l_n] where l_i is an ancestor of l_{i+1} and there are only single-branch loops between
l_1 and l_n (which also indicates they are under the same scope).
2) After reordering, the domain of an outer loop cannot depend on any of the inner loops.
3) For every block under the loop nests, its block binding must be affine, and the block
variables must be either data parallel or reduction.
4) No duplicated loops are allowed in the arguments.

Parameters
----------
*ordered_loops : List[LoopRV]
The loops in the new order

Examples
--------

Before reorder, in TensorIR, the IR is:

.. code-block:: python

@tvm.script.tir
def before_reorder(a: ty.handle, b: ty.handle) -> None:
A = tir.match_buffer(a, (128, 128))
B = tir.match_buffer(b, (128, 128))
for i, j in tir.grid(128, 128):
with tir.block([128, 128], "B") as [vi, vj]:
B[vi, vj] = A[vi, vj] * 2.0

Create the schedule and do reorder:

.. code-block:: python

sch = tir.Schedule(before_reorder)
i, j = sch.get_loops(sch.get_block("B"))
sch.reorder(j, i)
print(tvm.script.asscript(sch.mod["main"]))

After applying reorder, the IR becomes:

.. code-block:: python

@tvm.script.tir
def after_reorder(a: ty.handle, b: ty.handle) -> None:
A = tir.match_buffer(a, (128, 128))
B = tir.match_buffer(b, (128, 128))
# Here j and i are reordered
for j, i in tir.grid(128, 128):
with tir.block([128, 128], "B") as [vi, vj]:
tir.bind(vi, i)
tir.bind(vj, j)
B[vi, vj] = A[vi, vj] * 2.0

"""
_ffi_api.ScheduleReorder(self, ordered_loops) # type: ignore # pylint: disable=no-member

########## Schedule: Manipulate ForKind ##########

def parallel(self, loop: LoopRV) -> None:
Expand Down
7 changes: 7 additions & 0 deletions src/tir/schedule/concrete_schedule.cc
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,13 @@ Array<LoopRV> ConcreteScheduleNode::Split(const LoopRV& loop_rv,
return CreateRV<LoopRV>(results);
}

void ConcreteScheduleNode::Reorder(const Array<LoopRV>& ordered_loop_rvs) {
TVM_TIR_SCHEDULE_BEGIN();
tir::Reorder(state_, GetSRefs(ordered_loop_rvs));
TVM_TIR_SCHEDULE_END("reorder", this->error_render_level_);
this->state_->DebugVerify();
}

/******** Schedule: Manipulate ForKind ********/

void ConcreteScheduleNode::Parallel(const LoopRV& loop_rv) {
Expand Down
1 change: 1 addition & 0 deletions src/tir/schedule/concrete_schedule.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ class ConcreteScheduleNode : public ScheduleNode {
/******** Schedule: Transform loops ********/
LoopRV Fuse(const Array<LoopRV>& loop_rvs) override;
Array<LoopRV> Split(const LoopRV& loop_rv, const Array<Optional<ExprRV>>& factors) override;
void Reorder(const Array<LoopRV>& ordered_loop_rvs) override;
/******** Schedule: Manipulate ForKind ********/
void Parallel(const LoopRV& loop_rv) override;
void Vectorize(const LoopRV& loop_rv) override;
Expand Down
15 changes: 15 additions & 0 deletions src/tir/schedule/primitive.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,21 @@ TVM_DLL Array<StmtSRef> Split(ScheduleState self, const StmtSRef& loop_sref,
* \return The sref to the fused loop
*/
TVM_DLL StmtSRef Fuse(ScheduleState self, const Array<StmtSRef>& loop_srefs);
/*!
* \brief Reorder a list of loops. It doesn't require the loops to be consecutive.
* It requires:
* 1) The loops are in the same chain. That means: the loops can be ordered to [l_1, l_2, ... ,
* l_n] where l_i is an ancestor of l_{i+1} and there are only single-branch loops between
* l_1 and l_n (which also indicates they are under the same scope).
* 2) After reordering, the domain of an outer loop cannot depend on any of the inner loops.
* 3) For every block under the loop nests, its block binding must be affine, and the block
* variables must be either data parallel or reduction.
* 4) No duplicated loops are allowed in the arguments.
* \param self The state of the schedule
* \param ordered_loop_srefs An array of srefs which indicates the new order of loops
*/
TVM_DLL void Reorder(ScheduleState self, const Array<StmtSRef>& ordered_loop_srefs);

/******** Schedule: Manipulate ForKind ********/
/*!
* \brief Parallelize the input loop. It requires:
Expand Down
Loading