From 78dc3735c6a3ba49eba0d42c73032b2d86a46abb Mon Sep 17 00:00:00 2001 From: Yuan-Chuan-YUE Date: Mon, 16 Aug 2021 14:12:04 +0800 Subject: [PATCH 1/3] register tir.erf to lower opencl directly --- src/target/source/intrin_rule_opencl.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/target/source/intrin_rule_opencl.cc b/src/target/source/intrin_rule_opencl.cc index 288bb2cfc069..64a50c3c84b1 100644 --- a/src/target/source/intrin_rule_opencl.cc +++ b/src/target/source/intrin_rule_opencl.cc @@ -49,6 +49,9 @@ TVM_REGISTER_OP("tir.round") TVM_REGISTER_OP("tir.exp").set_attr("opencl.FLowerIntrinsic", DispatchPureExtern); +TVM_REGISTER_OP("tir.erf").set_attr("opencl.FLowerIntrinsic", + DispatchPureExtern); + TVM_REGISTER_OP("tir.exp2") .set_attr("opencl.FLowerIntrinsic", DispatchPureExtern); From 1a434fdb5ba0db13407c869fceac0b4cb3afc3b4 Mon Sep 17 00:00:00 2001 From: Yuan-Chuan-YUE Date: Wed, 18 Aug 2021 10:27:59 +0800 Subject: [PATCH 2/3] add opencl codegen unit test --- .../unittest/test_target_codegen_opencl.py | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tests/python/unittest/test_target_codegen_opencl.py b/tests/python/unittest/test_target_codegen_opencl.py index 98340f0e6ac5..0acf61209196 100644 --- a/tests/python/unittest/test_target_codegen_opencl.py +++ b/tests/python/unittest/test_target_codegen_opencl.py @@ -119,7 +119,27 @@ def check_max(dev, n, dtype): check_max(dev, 1, "float32") check_max(dev, 1, "float64") +@tvm.testing.requires_gpu +@tvm.testing.requires_opencl +def test_opencl_erf(): + def check_erf(dev, n, dtype): + A = te.placeholder((n,), name="A", dtype=dtype) + C = te.compute(A.shape, lambda *i: te.erf(A(*i)), name="C") + s = te.create_schedule(C.op) + s[C].bind(s[C].op.axis[0], te.thread_axis("threadIdx.x")) + fun = tvm.build(s, [A, C], target) + a = tvm.nd.empty((n,), A.dtype, dev) + c = tvm.nd.empty((n,), A.dtype, dev) + # Only need to test compiling here + fun(a, c) + + dev = tvm.device(target, 0) + + check_erf(dev, 1, "float32") + check_erf(dev, 1, "float64") + if __name__ == "__main__": test_opencl_ternary_expression() test_opencl_inf_nan() + test_opencl_erf() From b258ec6676a5db3dff39576c7f435229a1d03740 Mon Sep 17 00:00:00 2001 From: Yuan-Chuan-YUE Date: Fri, 20 Aug 2021 15:48:27 +0800 Subject: [PATCH 3/3] change erf opencl codegen unit test for checking there is erf in the source not erff --- tests/python/unittest/test_target_codegen_opencl.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/python/unittest/test_target_codegen_opencl.py b/tests/python/unittest/test_target_codegen_opencl.py index 0acf61209196..56392ec8cccc 100644 --- a/tests/python/unittest/test_target_codegen_opencl.py +++ b/tests/python/unittest/test_target_codegen_opencl.py @@ -17,6 +17,7 @@ import tvm from tvm import te import tvm.testing +import re target = "opencl" @@ -119,8 +120,7 @@ def check_max(dev, n, dtype): check_max(dev, 1, "float32") check_max(dev, 1, "float64") -@tvm.testing.requires_gpu -@tvm.testing.requires_opencl + def test_opencl_erf(): def check_erf(dev, n, dtype): A = te.placeholder((n,), name="A", dtype=dtype) @@ -128,10 +128,10 @@ def check_erf(dev, n, dtype): s = te.create_schedule(C.op) s[C].bind(s[C].op.axis[0], te.thread_axis("threadIdx.x")) fun = tvm.build(s, [A, C], target) - a = tvm.nd.empty((n,), A.dtype, dev) - c = tvm.nd.empty((n,), A.dtype, dev) - # Only need to test compiling here - fun(a, c) + source_str = fun.imported_modules[0].get_source() + matches = re.findall("erf", source_str) + error_matches = re.findall("erff", source_str) + assert len(matches) == 1 and len(error_matches) == 0 dev = tvm.device(target, 0)