From 60b27e207924e096d0c3ccafc5cc489a5f64dbed Mon Sep 17 00:00:00 2001 From: Philip Mueller Date: Thu, 19 Sep 2024 11:39:50 +0200 Subject: [PATCH 1/3] Added a custom SDFG inline implementation. --- .../transformations/__init__.py | 3 +- .../transformations/auto_opt.py | 74 ++++++++++++++++++- 2 files changed, 72 insertions(+), 5 deletions(-) diff --git a/src/gt4py/next/program_processors/runners/dace_fieldview/transformations/__init__.py b/src/gt4py/next/program_processors/runners/dace_fieldview/transformations/__init__.py index 53fa1eee05..7c5b21d5dc 100644 --- a/src/gt4py/next/program_processors/runners/dace_fieldview/transformations/__init__.py +++ b/src/gt4py/next/program_processors/runners/dace_fieldview/transformations/__init__.py @@ -12,7 +12,7 @@ that explains the general structure and requirements on the SDFGs. """ -from .auto_opt import gt_auto_optimize, gt_set_iteration_order, gt_simplify +from .auto_opt import gt_auto_optimize, gt_inline_nested_sdfg, gt_set_iteration_order, gt_simplify from .gpu_utils import GPUSetBlockSize, gt_gpu_transformation, gt_set_gpu_blocksize from .loop_blocking import LoopBlocking from .map_orderer import MapIterationOrder @@ -29,6 +29,7 @@ "SerialMapPromoterGPU", "gt_auto_optimize", "gt_gpu_transformation", + "gt_inline_nested_sdfg", "gt_set_iteration_order", "gt_set_gpu_blocksize", "gt_simplify", diff --git a/src/gt4py/next/program_processors/runners/dace_fieldview/transformations/auto_opt.py b/src/gt4py/next/program_processors/runners/dace_fieldview/transformations/auto_opt.py index 3895f7f5e8..f7979c5af0 100644 --- a/src/gt4py/next/program_processors/runners/dace_fieldview/transformations/auto_opt.py +++ b/src/gt4py/next/program_processors/runners/dace_fieldview/transformations/auto_opt.py @@ -11,9 +11,8 @@ from typing import Any, Final, Iterable, Optional, Sequence import dace -from dace.transformation import dataflow as dace_dataflow +from dace.transformation import dataflow as dace_dataflow, passes as dace_passes from dace.transformation.auto import auto_optimize as dace_aoptimize -from dace.transformation.passes import simplify as dace_passes_simplify from gt4py.next import common as gtx_common from gt4py.next.program_processors.runners.dace_fieldview import ( @@ -48,6 +47,9 @@ def gt_simplify( will be forwarded to DaCe, i.e. `GT_SIMPLIFY_DEFAULT_SKIP_SET` are not added automatically. + Passes that are replaced: + - Instead of `InlineSDFGs` the function will run `gt_inline_nested_sdfg()`. + Args: sdfg: The SDFG to optimize. validate: Perform validation after the pass has run. @@ -55,11 +57,23 @@ def gt_simplify( skip: List of simplify passes that should not be applied, defaults to `GT_SIMPLIFY_DEFAULT_SKIP_SET`. """ - return dace_passes_simplify.SimplifyPass( + if skip is None: + skip = set() + + if "InlineSDFGs" not in skip: + gt_inline_nested_sdfg( + sdfg=sdfg, + multistate=True, + permissive=False, + validate=validate, + validate_all=validate_all, + ) + + return dace_passes.SimplifyPass( validate=validate, validate_all=validate_all, verbose=False, - skip=set(skip) if skip is not None else skip, + skip=set(skip) | {"InlineSDFGs"}, ).apply_pass(sdfg, {}) @@ -91,6 +105,58 @@ def gt_set_iteration_order( ) +def gt_inline_nested_sdfg( + sdfg: dace.SDFG, + multistate: bool = True, + permissive: bool = False, + validate: bool = True, + validate_all: bool = False, +) -> dace.SDFG: + """Perform inlining of nested SDFG into their parent SDFG. + + The function uses DaCe's `InlineSDFG` transformation, the same used in simplify. + However, before the inline transformation is run the function will run some + cleaning passes that allows inlining nested SDFG. + As a side effect, the function will split stages into more states. + + Args: + sdfg: The SDFG that should be processed, will be modified in place and returned. + multistate: Allow inlining of multistate nested SDFG, defaults to `True`. + permissive: Be less strict on the accepted SDFGs. + validate: Perform validation after the transformation has finished. + validate_all: Performs extensive validation. + """ + first_iteration = True + i = 0 + while True: + print(f"ITERATION: {i}") + nb_preproccess = sdfg.apply_transformations_repeated( + [dace_dataflow.PruneSymbols, dace_dataflow.PruneConnectors], + validate=False, + validate_all=validate_all, + ) + if (nb_preproccess == 0) and (not first_iteration): + break + + # Create and configure the inline pass + inline_sdfg = dace_passes.InlineSDFGs() + inline_sdfg.progress = False + inline_sdfg.permissive = permissive + inline_sdfg.multistate = multistate + + # Apply the inline pass + nb_inlines = inline_sdfg.apply_pass(sdfg, {}) + + # Check result, if needed and test if we can stop + if validate_all or validate: + sdfg.validate() + if nb_inlines == 0: + break + first_iteration = False + + return sdfg + + def gt_auto_optimize( sdfg: dace.SDFG, gpu: bool, From bdf7a3c0dd462f039ea3fb475ac0d709e4550b8f Mon Sep 17 00:00:00 2001 From: Philip Mueller Date: Thu, 19 Sep 2024 12:28:46 +0200 Subject: [PATCH 2/3] Added review comments. --- .../dace_fieldview/transformations/auto_opt.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/gt4py/next/program_processors/runners/dace_fieldview/transformations/auto_opt.py b/src/gt4py/next/program_processors/runners/dace_fieldview/transformations/auto_opt.py index f7979c5af0..a629fd504a 100644 --- a/src/gt4py/next/program_processors/runners/dace_fieldview/transformations/auto_opt.py +++ b/src/gt4py/next/program_processors/runners/dace_fieldview/transformations/auto_opt.py @@ -35,7 +35,7 @@ def gt_simplify( sdfg: dace.SDFG, validate: bool = True, validate_all: bool = False, - skip: Optional[Iterable[str]] = GT_SIMPLIFY_DEFAULT_SKIP_SET, + skip: Iterable[str] = GT_SIMPLIFY_DEFAULT_SKIP_SET, ) -> Any: """Performs simplifications on the SDFG in place. @@ -43,12 +43,11 @@ def gt_simplify( as it is specially tuned for GridTool based SDFGs. By default this function will run the normal DaCe simplify pass, but skip - passes listed in `GT_SIMPLIFY_DEFAULT_SKIP_SET`. If `skip` is passed it - will be forwarded to DaCe, i.e. `GT_SIMPLIFY_DEFAULT_SKIP_SET` are not - added automatically. + passes listed in `GT_SIMPLIFY_DEFAULT_SKIP_SET`. If `skip` is given it will + not be modified, i.e. `GT_SIMPLIFY_DEFAULT_SKIP_SET` is not added by default. Passes that are replaced: - - Instead of `InlineSDFGs` the function will run `gt_inline_nested_sdfg()`. + - `InlineSDFGs`: Instead the `gt_inline_nested_sdfg()` will be used. Args: sdfg: The SDFG to optimize. @@ -57,8 +56,8 @@ def gt_simplify( skip: List of simplify passes that should not be applied, defaults to `GT_SIMPLIFY_DEFAULT_SKIP_SET`. """ - if skip is None: - skip = set() + # Ensure that `skip` is a `set` + skip = set(skip) if "InlineSDFGs" not in skip: gt_inline_nested_sdfg( @@ -116,7 +115,7 @@ def gt_inline_nested_sdfg( The function uses DaCe's `InlineSDFG` transformation, the same used in simplify. However, before the inline transformation is run the function will run some - cleaning passes that allows inlining nested SDFG. + cleaning passes that allows inlining nested SDFGs. As a side effect, the function will split stages into more states. Args: From 1fae90dc0d4c8b5e7f2edcf5e8bd6ace60ded6c3 Mon Sep 17 00:00:00 2001 From: Philip Mueller Date: Thu, 19 Sep 2024 14:43:50 +0200 Subject: [PATCH 3/3] Applied Edoardo's latest suggestions. --- .../dace_fieldview/transformations/__init__.py | 9 ++++++++- .../dace_fieldview/transformations/auto_opt.py | 16 ++++++++-------- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/src/gt4py/next/program_processors/runners/dace_fieldview/transformations/__init__.py b/src/gt4py/next/program_processors/runners/dace_fieldview/transformations/__init__.py index 7c5b21d5dc..8852dd6d2d 100644 --- a/src/gt4py/next/program_processors/runners/dace_fieldview/transformations/__init__.py +++ b/src/gt4py/next/program_processors/runners/dace_fieldview/transformations/__init__.py @@ -12,7 +12,13 @@ that explains the general structure and requirements on the SDFGs. """ -from .auto_opt import gt_auto_optimize, gt_inline_nested_sdfg, gt_set_iteration_order, gt_simplify +from .auto_opt import ( + GT_SIMPLIFY_DEFAULT_SKIP_SET, + gt_auto_optimize, + gt_inline_nested_sdfg, + gt_set_iteration_order, + gt_simplify, +) from .gpu_utils import GPUSetBlockSize, gt_gpu_transformation, gt_set_gpu_blocksize from .loop_blocking import LoopBlocking from .map_orderer import MapIterationOrder @@ -21,6 +27,7 @@ __all__ = [ + "GT_SIMPLIFY_DEFAULT_SKIP_SET", "GPUSetBlockSize", "LoopBlocking", "MapIterationOrder", diff --git a/src/gt4py/next/program_processors/runners/dace_fieldview/transformations/auto_opt.py b/src/gt4py/next/program_processors/runners/dace_fieldview/transformations/auto_opt.py index a629fd504a..37cc89aa2b 100644 --- a/src/gt4py/next/program_processors/runners/dace_fieldview/transformations/auto_opt.py +++ b/src/gt4py/next/program_processors/runners/dace_fieldview/transformations/auto_opt.py @@ -35,19 +35,19 @@ def gt_simplify( sdfg: dace.SDFG, validate: bool = True, validate_all: bool = False, - skip: Iterable[str] = GT_SIMPLIFY_DEFAULT_SKIP_SET, + skip: Optional[Iterable[str]] = None, ) -> Any: """Performs simplifications on the SDFG in place. Instead of calling `sdfg.simplify()` directly, you should use this function, as it is specially tuned for GridTool based SDFGs. - By default this function will run the normal DaCe simplify pass, but skip - passes listed in `GT_SIMPLIFY_DEFAULT_SKIP_SET`. If `skip` is given it will - not be modified, i.e. `GT_SIMPLIFY_DEFAULT_SKIP_SET` is not added by default. + This function runs the DaCe simplification pass, but the following passes are + replaced: + - `InlineSDFGs`: Instead `gt_inline_nested_sdfg()` will be called. - Passes that are replaced: - - `InlineSDFGs`: Instead the `gt_inline_nested_sdfg()` will be used. + Furthermore, by default, or if `None` is passed fro `skip` the passes listed in + `GT_SIMPLIFY_DEFAULT_SKIP_SET` will be skipped. Args: sdfg: The SDFG to optimize. @@ -57,7 +57,7 @@ def gt_simplify( to `GT_SIMPLIFY_DEFAULT_SKIP_SET`. """ # Ensure that `skip` is a `set` - skip = set(skip) + skip = GT_SIMPLIFY_DEFAULT_SKIP_SET if skip is None else set(skip) if "InlineSDFGs" not in skip: gt_inline_nested_sdfg( @@ -72,7 +72,7 @@ def gt_simplify( validate=validate, validate_all=validate_all, verbose=False, - skip=set(skip) | {"InlineSDFGs"}, + skip=(skip | {"InlineSDFGs"}), ).apply_pass(sdfg, {})