WIP: batch_norm2d kernel script

daphne-eu · Jun 18, 2024 · da84126 · da84126
1 parent 0edf870
commit da84126
Showing 1 changed file with 164 additions and 0 deletions.
diff --git a/scripts/nn/layers/batch_norm2d_kernel.daph b/scripts/nn/layers/batch_norm2d_kernel.daph
@@ -0,0 +1,164 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# Modifications Copyright 2024 The DAPHNE Consortium
+#
+#-------------------------------------------------------------
+
+# This script has been manually translated from Apache SystemDS.
+
+/*
+ * 2D (Spatial) Batch Normalization layer.
+ */
+
+import "../util.daph" as "util";
+
+#forward = function(matrix[double] X, matrix[double] gamma, matrix[double] beta,
+#                   int C, int Hin, int Win, string mode,
+#                   matrix[double] ema_mean, matrix[double] ema_var,
+#                   double mu, double epsilon)
+#    return (matrix[double] out, matrix[double] ema_mean_upd, matrix[double] ema_var_upd,
+#            matrix[double] cache_mean, matrix[double] cache_inv_var) {
+def forward(X:matrix, W:matrix, gamma:matrix, beta:matrix, C, Hin, Win, mode, ema_mean:matrix, ema_var:matrix, mu, eps)
+        -> matrix {
+        #, matrix, matrix, matrix, matrix {
+  /*
+   * Computes the forward pass for a 2D (spatial) batch normalization
+   * layer.  The input data has N examples, each represented as a 3D
+   * volume unrolled into a single vector.
+   *
+   * A spatial batch normalization layer uses the per-channel sample
+   * mean and per-channel uncorrected sample variance during training
+   * to normalize each channel of the input data.  Additionally, it
+   * introduces learnable parameters (gamma, beta) to control the
+   * amount of normalization.
+   *
+   *   `y = ((x-mean) / sqrt(var+eps)) * gamma + beta`
+   *
+   * This implementation maintains exponential moving averages of the
+   * mean and variance during training for use during testing.
+   *
+   * Reference:
+   *  - Batch Normalization: Accelerating Deep Network Training by
+   *    Reducing Internal Covariate Shift, S. Ioffe & C. Szegedy, 2015
+   *    - https://arxiv.org/abs/1502.03167
+   *
+   * Inputs:
+   *  - X: Inputs, of shape (N, C*Hin*Win).
+   *  - gamma: Scale parameters, of shape (C, 1).
+   *  - beta: Shift parameters, of shape (C, 1).
+   *  - C: Number of input channels (dimensionality of input depth).
+   *  - Hin: Input height.
+   *  - Win: Input width.
+   *  - mode: 'train' or 'test' to indicate if the model is currently
+   *      being trained or tested.  During training, the current batch
+   *      mean and variance will be used to normalize the inputs, while
+   *      during testing, the exponential average of the mean and
+   *      variance over all previous batches will be used.
+   *  - ema_mean: Exponential moving average of the mean, of
+   *      shape (C, 1).
+   *  - ema_var: Exponential moving average of the variance, of
+   *      shape (C, 1).
+   *  - mu: Momentum value for moving averages.
+   *      Typical values are in the range of [0.9, 0.999].
+   *  - epsilon: Smoothing term to avoid divide by zero errors.
+   *      Typical values are in the range of [1e-5, 1e-3].
+   *
+   * Outputs:
+   *  - out: Outputs, of shape (N, C*Hin*Win).
+   *  - ema_mean_upd: Updated exponential moving average of the mean,
+   *      of shape (C, 1).
+   *  - ema_var_upd: Updated exponential moving average of the variance,
+   *      of shape (C, 1).
+   *  - cache_mean: Cache of the batch mean, of shape (C, 1).
+   *      Note: This is used for performance during training.
+   *  - cache_inv_var: Cache of the inverse variance, of shape (C, 1).
+   *      Note: This is used for performance during training.
+   */
+  #out = X; ema_mean_upd = ema_mean; ema_var_upd = ema_var;  cache_mean = ema_mean;  cache_inv_var = ema_var
+  #[out, ema_mean_upd, ema_var_upd, cache_mean, cache_inv_var] = batch_norm2d(X, gamma, beta, ema_mean, ema_var, mode, epsilon, mu)
+  return batch_norm2d(X, gamma, beta, ema_mean, ema_var, mode, epsilon, mu);
+}
+
+#backward = function(matrix[double] dout,
+#                    matrix[double] cache_mean, matrix[double] cache_inv_var,
+#                    matrix[double] X, matrix[double] gamma,
+#                    int C, int Hin, int Win, double epsilon)
+#      return (matrix[double] dX, matrix[double] dgamma, matrix[double] dbeta) {
+def backward(dout:matrix, cache_mean:matrix, cache_inv_var:matrix, X:matrix, gamma:matrix, C, Hin, Win, eps) -> matrix, matrix, matrix {
+  /*
+   * Computes the backward pass for a 2D (spatial) batch normalization
+   * layer.
+   *
+   * Inputs:
+   *  - dout: Gradient wrt `out` from upstream, of shape (N, C*Hin*Win).
+   *  - cache_mean: Cache of the batch mean from the forward pass, of
+   *      shape (C, 1).  Note: This is used for performance during
+   *      training.
+   *  - cache_inv_var: Cache of the inverse variance from the forward pass,
+   *      of shape (C, 1).  Note: This is used for performance during
+   *      training.
+   *  - X: Input data matrix to the forward pass, of
+   *      shape (N, C*Hin*Win).
+   *  - gamma: Scale parameters, of shape (C, 1).
+   *  - C: Number of input channels (dimensionality of input depth).
+   *  - Hin: Input height.
+   *  - Win: Input width.
+   *  - epsilon: Smoothing term to avoid divide by zero errors.
+   *      Typical values are in the range of [1e-5, 1e-3].
+   *
+   * Outputs:
+   *  - dX: Gradient wrt `X`, of shape (N, C*Hin*Win).
+   *  - dgamma: Gradient wrt `W`, of shape (C, 1).
+   *  - dbeta: Gradient wrt `b`, of shape (C, 1).
+   *
+   */
+  # Compute gradients during training
+  dX = X; dgamma = gamma; dbeta = gamma;
+#  [dX, dgamma, dbeta] = batch_norm2d_backward(X, dout, gamma, epsilon, cache_mean, cache_inv_var)
+
+    return dX, dgamma, dbeta;
+}
+
+#init = function(int C)
+#    return (matrix[double] gamma, matrix[double] beta,
+#            matrix[double] ema_mean, matrix[double] ema_var) {
+def init(gamma:matrix, beta:matrix, ema_mean:matrix, ema_var:matrix) -> matrix, matrix {
+  /*
+   * Initialize the parameters of this layer.
+   *
+   * Note: This is just a convenience function, and parameters
+   * may be initialized manually if needed.
+   *
+   * Inputs:
+   *  - C: Number of input channels (dimensionality of input depth).
+   *
+   * Outputs:
+   *  - gamma: Scale parameters, of shape (C, 1).
+   *  - beta: Shift parameters, of shape (C, 1).
+   *  - ema_mean: Exponential moving average of the mean, of
+   *      shape (C, 1).
+   *  - ema_var: Exponential moving average of the variance, of
+   *      shape (C, 1).
+   */
+   gamma = fill(1, C, 1);
+   beta = fill(0, C, 1);
+   ema_mean = fill(0, C, 1);
+   ema_var = fill(1, C, 1);
+}