From ab347ebadea4912fd3c2dc6ef37e6694a95df5dc Mon Sep 17 00:00:00 2001 From: Animesh Jain Date: Wed, 29 Jan 2020 02:03:18 +0000 Subject: [PATCH 1/3] [TOPI][x86] Injective Schedule Improvement. --- topi/python/topi/x86/injective.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/topi/python/topi/x86/injective.py b/topi/python/topi/x86/injective.py index 8c97214ea4bb..349b5d72dfa4 100644 --- a/topi/python/topi/x86/injective.py +++ b/topi/python/topi/x86/injective.py @@ -45,6 +45,8 @@ def schedule_injective_from_existing(sch, out): sch[out].parallel(fused) elif len(sch[out].op.axis) >= 1: sch[out].parallel(sch[out].op.axis[0]) + # Vectorize the inner most for loop + sch[out].vectorize(sch[out].op.axis[-1]) return sch @generic.schedule_injective.register(["cpu"]) From bc74ceb06172392bf31b1ddc393e93829195a3d8 Mon Sep 17 00:00:00 2001 From: Animesh Jain Date: Tue, 4 Feb 2020 20:49:33 +0000 Subject: [PATCH 2/3] Add tiling. --- topi/python/topi/x86/injective.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/topi/python/topi/x86/injective.py b/topi/python/topi/x86/injective.py index 349b5d72dfa4..36dbd6fcab1d 100644 --- a/topi/python/topi/x86/injective.py +++ b/topi/python/topi/x86/injective.py @@ -45,8 +45,10 @@ def schedule_injective_from_existing(sch, out): sch[out].parallel(fused) elif len(sch[out].op.axis) >= 1: sch[out].parallel(sch[out].op.axis[0]) - # Vectorize the inner most for loop - sch[out].vectorize(sch[out].op.axis[-1]) + # Vectorize the inner most for loop. Tiling first to get a const extent + l = sch[out].op.axis[-1] + _, li = sch[out].split(l, factor=16) + sch[out].vectorize(li) return sch @generic.schedule_injective.register(["cpu"]) From 861ac14d453bccc777c80a316917483ba0d3fef5 Mon Sep 17 00:00:00 2001 From: Animesh Jain Date: Tue, 4 Feb 2020 21:10:41 +0000 Subject: [PATCH 3/3] Vectorize when there is an axis. --- topi/python/topi/x86/injective.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/topi/python/topi/x86/injective.py b/topi/python/topi/x86/injective.py index 36dbd6fcab1d..d6bb7622d640 100644 --- a/topi/python/topi/x86/injective.py +++ b/topi/python/topi/x86/injective.py @@ -45,10 +45,12 @@ def schedule_injective_from_existing(sch, out): sch[out].parallel(fused) elif len(sch[out].op.axis) >= 1: sch[out].parallel(sch[out].op.axis[0]) + # Vectorize the inner most for loop. Tiling first to get a const extent - l = sch[out].op.axis[-1] - _, li = sch[out].split(l, factor=16) - sch[out].vectorize(li) + if len(sch[out].op.axis) >= 1: + l = sch[out].op.axis[-1] + _, li = sch[out].split(l, factor=16) + sch[out].vectorize(li) return sch @generic.schedule_injective.register(["cpu"])