spcl · lukastruemper · Jun 1, 2022 · Jun 1, 2022 · Jun 2, 2022 · Jun 2, 2022
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,7 @@
+# Custom build caches
+**/.halidecache/
+**/.dacecache/
+
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]

diff --git a/bench_info/conv2d_bias.json b/bench_info/conv2d_bias.json
@@ -17,10 +17,10 @@
         "init": {
             "func_name": "initialize",
             "input_args": ["C_in", "C_out", "H", "K", "N", "W"],
-            "output_args": ["input", "weights", "bias"]
+            "output_args": ["input", "weights", "bias", "output"]
         },
-        "input_args": ["input", "weights", "bias"],
-        "array_args": ["input", "weights", "bias"],
-        "output_args": []
+        "input_args": ["input", "weights", "bias", "output"],
+        "array_args": ["input", "weights", "bias", "output"],
+        "output_args": ["output"]
     }
 }
diff --git a/bench_info/mlp.json b/bench_info/mlp.json
@@ -17,10 +17,10 @@
         "init": {
             "func_name": "initialize",
             "input_args": ["C_in", "N", "S0", "S1", "S2"],
-            "output_args": ["input", "w1", "b1", "w2", "b2", "w3", "b3"]
+            "output_args": ["input", "w1", "b1", "w2", "b2", "w3", "b3", "output"]
         },
-        "input_args": ["input", "w1", "b1", "w2", "b2", "w3", "b3"],
-        "array_args": ["input", "w1", "b1", "w2", "b2", "w3", "b3"],
-        "output_args": []
+        "input_args": ["input", "w1", "b1", "w2", "b2", "w3", "b3", "output"],
+        "array_args": ["input", "w1", "b1", "w2", "b2", "w3", "b3", "output"],
+        "output_args": ["output"]
     }
 }
diff --git a/bench_info/softmax.json b/bench_info/softmax.json
@@ -17,10 +17,10 @@
         "init": {
             "func_name": "initialize",
             "input_args": ["N", "H", "SM"],
-            "output_args": ["x"]
+            "output_args": ["x", "output"]
         },
-        "input_args": ["x"],
-        "array_args": ["x"],
-        "output_args": []
+        "input_args": ["x", "output"],
+        "array_args": ["x", "output"],
+        "output_args": ["output"]
     }
 }
diff --git a/framework_info/halide.json b/framework_info/halide.json
@@ -0,0 +1,10 @@
+{
+    "framework": {
+        "simple_name": "halide",
+        "full_name": "Halide",
+        "prefix": "hl",
+        "postfix": "halide",
+        "class": "HalideFramework",
+        "arch": "cpu"
+    }
+}
diff --git a/npbench/benchmarks/deep_learning/conv2d_bias/conv2d.py b/npbench/benchmarks/deep_learning/conv2d_bias/conv2d.py
@@ -11,4 +11,8 @@ def initialize(C_in, C_out, H, K, N, W):
     # Weights
     weights = rng.random((K, K, C_in, C_out), dtype=np.float32)
     bias = rng.random((C_out, ), dtype=np.float32)
-    return input, weights, bias
+
+    H_out = input.shape[1] - K + 1
+    W_out = input.shape[2] - K + 1
+    output = np.empty((N, H_out, W_out, C_out), dtype=np.float32)
+    return input, weights, bias, output
diff --git a/npbench/benchmarks/deep_learning/conv2d_bias/conv2d_halide.py b/npbench/benchmarks/deep_learning/conv2d_bias/conv2d_halide.py
@@ -0,0 +1,41 @@
+import numpy as np
+import halide as hl
+
+input_buffers = {
+    "input": hl.ImageParam(hl.Float(32), 4, "input"),
+    "weights": hl.ImageParam(hl.Float(32), 4, "weights"),
+    "bias": hl.ImageParam(hl.Float(32), 1, "bias")
+}
+
+x = hl.Var("x")
+y = hl.Var("y")
+c = hl.Var("c")
+n = hl.Var("n")
+
+def set_estimates(input, weights, bias, output, N, C_in, C_out, W, H, K):
+    input.dim(0).set_estimate(0, C_in)
+    input.dim(1).set_estimate(0, W) 
+    input.dim(2).set_estimate(0, H)
+    input.dim(3).set_estimate(0, N)
+
+    output.set_estimate(c, 0, C_out)
+    output.set_estimate(x, 0, W - (K - 1))
+    output.set_estimate(y, 0, H - (K - 1))
+    output.set_estimate(n, 0, N)
+
+    weights.dim(0).set_estimate(0, C_in)
+    weights.dim(1).set_estimate(0, K)
+    weights.dim(2).set_estimate(0, K)
+    weights.dim(3).set_estimate(0, C_out)
+
+    bias.dim(0).set_estimate(0, C_out)
+
+# Deep learning convolutional operator (stride = 1)
+def conv2d_bias(input, weights, bias):
+
+    r = hl.RDom([(0, weights.width()), (0, weights.height()), (0, weights.channels())])
+    output = hl.Func("output")
+    output[c, x, y, n] = bias[c]
+    output[c, x, y, n] += weights[r.x, r.y, r.z, c] * input[r.x, x + r.y, y + r.z, n]
+
+    return {"output": output}
diff --git a/npbench/benchmarks/deep_learning/conv2d_bias/conv2d_numpy.py b/npbench/benchmarks/deep_learning/conv2d_bias/conv2d_numpy.py
@@ -2,13 +2,10 @@
 
 
 # Deep learning convolutional operator (stride = 1)
-def conv2d(input, weights):
+def conv2d(input, weights, output):
     K = weights.shape[0]  # Assuming square kernel
-    N = input.shape[0]
     H_out = input.shape[1] - K + 1
     W_out = input.shape[2] - K + 1
-    C_out = weights.shape[3]
-    output = np.empty((N, H_out, W_out, C_out), dtype=np.float32)
 
     # Loop structure adapted from https://github.com/SkalskiP/ILearnDeepLearning.py/blob/ba0b5ba589d4e656141995e8d1a06d44db6ce58d/01_mysteries_of_neural_networks/06_numpy_convolutional_neural_net/src/layers/convolutional.py#L88
     for i in range(H_out):
@@ -19,8 +16,6 @@ def conv2d(input, weights):
                 axis=(1, 2, 3),
             )
 
-    return output
-
-
-def conv2d_bias(input, weights, bias):
-    return conv2d(input, weights) + bias
+def conv2d_bias(input, weights, bias, output):
+    conv2d(input, weights, output)
+    output = output + bias
diff --git a/npbench/benchmarks/deep_learning/mlp/mlp.py b/npbench/benchmarks/deep_learning/mlp/mlp.py
@@ -18,4 +18,5 @@ def initialize(C_in, N, S0, S1, S2):
     w3 = rng.random((mlp_sizes[1], mlp_sizes[2]), dtype=np.float32)
     b3 = rng.random((mlp_sizes[2], ), dtype=np.float32)
 
-    return input, w1, b1, w2, b2, w3, b3
+    output = np.empty((N, S2), dtype=np.float32)
+    return input, w1, b1, w2, b2, w3, b3, output
diff --git a/npbench/benchmarks/deep_learning/mlp/mlp_halide.py b/npbench/benchmarks/deep_learning/mlp/mlp_halide.py
@@ -0,0 +1,81 @@
+import numpy as np
+import halide as hl
+
+input_buffers = {
+    "input": hl.ImageParam(hl.Float(32), 2, "input"),
+    "w1": hl.ImageParam(hl.Float(32), 2, "w1"),
+    "b1": hl.ImageParam(hl.Float(32), 1, "b1"),
+    "w2": hl.ImageParam(hl.Float(32), 2, "w2"),
+    "b2": hl.ImageParam(hl.Float(32), 1, "b2"),
+    "w3": hl.ImageParam(hl.Float(32), 2, "w3"),
+    "b3": hl.ImageParam(hl.Float(32), 1, "b3")
+}
+
+n = hl.Var("n")
+h1 = hl.Var("h1")
+h2 = hl.Var("h2")
+h3 = hl.Var("h3")
+
+def set_estimates(input, w1, b1, w2, b2, w3, b3, output, N, C_in, S0, S1, S2):
+    input.dim(0).set_estimate(0, C_in)
+    input.dim(1).set_estimate(0, N)
+
+    w1.dim(0).set_estimate(0, C_in)
+    w1.dim(1).set_estimate(0, S0)
+    b1.dim(0).set_estimate(0, S0)
+
+    w2.dim(0).set_estimate(0, S0)
+    w2.dim(1).set_estimate(0, S1)
+    b2.dim(0).set_estimate(0, S1)
+
+    w3.dim(0).set_estimate(0, S1)
+    w3.dim(1).set_estimate(0, S2)
+    b3.dim(0).set_estimate(0, S2)
+
+    output.set_estimate(h3, 0, S2)
+    output.set_estimate(n, 0, N)
+
+# 3-layer MLP
+def mlp(input, w1, b1, w2, b2, w3, b3):
+
+    r1 = hl.RDom([(0, input.width())])
+    layer1 = hl.Func("layer1")
+    layer1[h1, n] = b1[h1]
+    layer1[h1, n] += input[r1.x, n] * w1[r1.x, h1]
+
+    relu1 = hl.Func("relu1")
+    relu1[h1, n] = hl.max(0.0, layer1[h1, n])
+
+    # Layer 2
+    r2 = hl.RDom([(0, b1.width())])
+    layer2 = hl.Func()
+    layer2[h2, n] = b2[h2]
+    layer2[h2, n] += layer1[r2.x, n] * w2[r2.x, h2]
+
+    relu2 = hl.Func("relu2")
+    relu2[h2, n] = hl.max(0.0, layer2[h2, n])
+
+    # Layer 3
+    r3 = hl.RDom([(0, b2.width())])
+    layer3 = hl.Func("layer3")
+    layer3[h3, n] = b3[h3]
+    layer3[h3, n] += layer2[r3.x, n] * w3[r3.x, h3]
+
+    # Softmax
+
+    a = hl.RDom([(0, b3.width())])
+    maxi = hl.Func("maxi")
+    maxi[n] = hl.maximum(layer3[a.x, n])
+
+    expo = hl.Func("expo")
+    expo[h3, n] = hl.exp(layer3[h3, n] - maxi[n])
+
+    b = hl.RDom([(0, b3.width())])
+    norm = hl.Func("norm")
+    norm[n] = 0.0
+    norm[n] += expo[b.x, n]
+
+    output = hl.Func("output")
+    output[h3, n] = expo[h3, n] / norm[n]
+
+    return {"output": output}
diff --git a/npbench/benchmarks/deep_learning/mlp/mlp_numpy.py b/npbench/benchmarks/deep_learning/mlp/mlp_numpy.py
@@ -14,8 +14,7 @@ def softmax(x):
 
 
 # 3-layer MLP
-def mlp(input, w1, b1, w2, b2, w3, b3):
+def mlp(input, w1, b1, w2, b2, w3, b3, output):
     x = relu(input @ w1 + b1)
     x = relu(x @ w2 + b2)
-    x = softmax(x @ w3 + b3)  # Softmax call can be omitted if necessary
-    return x
+    output[:, :] = softmax(x @ w3 + b3)  # Softmax call can be omitted if necessary
diff --git a/npbench/benchmarks/deep_learning/softmax/softmax.py b/npbench/benchmarks/deep_learning/softmax/softmax.py
@@ -7,4 +7,5 @@ def initialize(N, H, SM):
     from numpy.random import default_rng
     rng = default_rng(42)
     x = rng.random((N, H, SM, SM), dtype=np.float32)
-    return x
+    output = np.empty((N, H, SM, SM), dtype=np.float32)
+    return x, output
diff --git a/npbench/benchmarks/deep_learning/softmax/softmax_halide.py b/npbench/benchmarks/deep_learning/softmax/softmax_halide.py
@@ -0,0 +1,41 @@
+import numpy as np
+import halide as hl
+
+input_buffers = {
+    "x": hl.ImageParam(hl.Float(32), 4, "x")
+}
+
+s4 = hl.Var()
+s3 = hl.Var()
+s2 = hl.Var()
+s1 = hl.Var()
+
+def softmax(x):
+    a = hl.RDom([(0, x.width())])
+    maxi = hl.Func("maxi")
+    maxi[s3, s2, s1] = hl.maximum(x[a.x, s3, s2, s1])
+
+    expo = hl.Func("expo")
+    expo[s4, s3, s2, s1] = hl.exp(x[s4, s3, s2, s1] - maxi[s3, s2, s1])
+
+    b = hl.RDom([(0, x.width())])
+    nm = hl.Func("nm")
+    nm[s3, s2, s1] = 0.0
+    nm[s3, s2, s1] += expo[b.x, s3, s2, s1]
+
+    output = hl.Func("output")
+    output[s4, s3, s2, s1] = expo[s4, s3, s2, s1] / nm[s3, s2, s1]
+
+    return {"output": output}
+
+def set_estimates(x, output, N, H, SM):
+    x.dim(0).set_estimate(0, SM)
+    x.dim(1).set_estimate(0, SM)
+    x.dim(2).set_estimate(0, H)
+    x.dim(3).set_estimate(0, N)
+
+    output.set_estimate(s4, 0, SM)
+    output.set_estimate(s3, 0, SM)
+    output.set_estimate(s2, 0, H)
+    output.set_estimate(s1, 0, N)
+
diff --git a/npbench/benchmarks/deep_learning/softmax/softmax_numpy.py b/npbench/benchmarks/deep_learning/softmax/softmax_numpy.py
@@ -2,8 +2,8 @@
 
 
 # Numerically-stable version of softmax
-def softmax(x):
+def softmax(x, output):
     tmp_max = np.max(x, axis=-1, keepdims=True)
     tmp_out = np.exp(x - tmp_max)
     tmp_sum = np.sum(tmp_out, axis=-1, keepdims=True)
-    return tmp_out / tmp_sum
+    output[:, :, :, :] = tmp_out / tmp_sum
diff --git a/npbench/benchmarks/weather_stencils/hdiff/hdiff_halide.py b/npbench/benchmarks/weather_stencils/hdiff/hdiff_halide.py
@@ -0,0 +1,62 @@
+import numpy as np
+import halide as hl
+
+def hdiff_params():
+    in_field = hl.ImageParam(hl.Float(32), 3, "in_field")
+    coeff = hl.ImageParam(hl.Float(32), 3, "coeff")
+    return in_field, coeff
+
+def hdiff(in_field, coeff):
+    I = 256
+    J = 256
+    K = 160
+
+    i, j, k = hl.Var("i"), hl.Var("j"), hl.Var("k")
+
+    lap_field = hl.Func("lap_field")
+    lap_field[k, j, i] = 4.0 * in_field[k, j + 1, i + 1] - (in_field[k, j + 1, i + 2] + in_field[k, j + 1, i] + in_field[k, j + 2, i + 1] + in_field[k, j, i + 1])
+
+    res_flx = hl.Func("res_flx")
+    res_flx[k, j, i] = lap_field[k, j + 1, i + 1] - lap_field[k, j + 1, i]
+
+    condition_flx = hl.Func("condition_flx")
+    condition_flx[k, j, i] = res_flx[k, j, i] * (in_field[k, j + 2, i + 2] - in_field[k, j + 2, i + 1])
+
+    flx_field = hl.Func("flx_field")
+    flx_field[k, j, i] = hl.select(condition_flx[k, j, i] > 0, 0, res_flx[k, j, i])
+
+    res_fly = hl.Func("res_fly")
+    res_fly[k, j, i] = lap_field[k, j + 1, i + 1] - lap_field[k, j, i + 1]
+
+    condition_fly = hl.Func("condition_fly")
+    condition_fly[k, j, i] = res_flx[k, j, i] * (in_field[k, j + 2, i + 2] - in_field[k, j + 1, i + 2])
+
+    fly_field = hl.Func("fly_field")
+    fly_field[k, j, i] = hl.select(condition_fly[k, j, i] > 0, 0, res_fly[k, j, i])
+
+    out_field = hl.Func("out_field")
+    out_field[k, j, i] = in_field[k, j + 2, i + 2] - coeff[k, j, i] * ((flx_field[k, j, i + 1] - flx_field[k, j, i]) + (fly_field[k, j + 1, i] - fly_field[k, j , i]))
+
+    # Set bounds
+
+    in_field.dim(0).set_bounds(0, K).set_stride(1)
+    in_field.dim(1).set_bounds(0, J + 4).set_stride(K)
+    in_field.dim(2).set_bounds(0, I + 4).set_stride(K * (J + 4))
+
+    coeff.dim(0).set_bounds(0, K).set_stride(1)
+    coeff.dim(1).set_bounds(0, J).set_stride(K)
+    coeff.dim(2).set_bounds(0, I).set_stride(K * J)
+
+    in_field.dim(0).set_estimate(0, K)
+    in_field.dim(1).set_estimate(0, J + 4)
+    in_field.dim(2).set_estimate(0, I + 4)
+
+    coeff.dim(0).set_estimate(0, K)
+    coeff.dim(1).set_estimate(0, J)
+    coeff.dim(2).set_estimate(0, I)
+
+    out_field.set_estimate(k, 0, K)
+    out_field.set_estimate(j, 0, J)
+    out_field.set_estimate(i, 0, I)
+
+    return out_field
diff --git a/npbench/infrastructure/__init__.py b/npbench/infrastructure/__init__.py
@@ -10,3 +10,4 @@
 from .legate_framework import *
 from .numba_framework import *
 from .pythran_framework import *
+from .halide_framework import *