-
Notifications
You must be signed in to change notification settings - Fork 59
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
0edf870
commit da84126
Showing
1 changed file
with
164 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,164 @@ | ||
#------------------------------------------------------------- | ||
# | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
# | ||
# Modifications Copyright 2024 The DAPHNE Consortium | ||
# | ||
#------------------------------------------------------------- | ||
|
||
# This script has been manually translated from Apache SystemDS. | ||
|
||
/* | ||
* 2D (Spatial) Batch Normalization layer. | ||
*/ | ||
|
||
import "../util.daph" as "util"; | ||
|
||
#forward = function(matrix[double] X, matrix[double] gamma, matrix[double] beta, | ||
# int C, int Hin, int Win, string mode, | ||
# matrix[double] ema_mean, matrix[double] ema_var, | ||
# double mu, double epsilon) | ||
# return (matrix[double] out, matrix[double] ema_mean_upd, matrix[double] ema_var_upd, | ||
# matrix[double] cache_mean, matrix[double] cache_inv_var) { | ||
def forward(X:matrix, W:matrix, gamma:matrix, beta:matrix, C, Hin, Win, mode, ema_mean:matrix, ema_var:matrix, mu, eps) | ||
-> matrix { | ||
#, matrix, matrix, matrix, matrix { | ||
/* | ||
* Computes the forward pass for a 2D (spatial) batch normalization | ||
* layer. The input data has N examples, each represented as a 3D | ||
* volume unrolled into a single vector. | ||
* | ||
* A spatial batch normalization layer uses the per-channel sample | ||
* mean and per-channel uncorrected sample variance during training | ||
* to normalize each channel of the input data. Additionally, it | ||
* introduces learnable parameters (gamma, beta) to control the | ||
* amount of normalization. | ||
* | ||
* `y = ((x-mean) / sqrt(var+eps)) * gamma + beta` | ||
* | ||
* This implementation maintains exponential moving averages of the | ||
* mean and variance during training for use during testing. | ||
* | ||
* Reference: | ||
* - Batch Normalization: Accelerating Deep Network Training by | ||
* Reducing Internal Covariate Shift, S. Ioffe & C. Szegedy, 2015 | ||
* - https://arxiv.org/abs/1502.03167 | ||
* | ||
* Inputs: | ||
* - X: Inputs, of shape (N, C*Hin*Win). | ||
* - gamma: Scale parameters, of shape (C, 1). | ||
* - beta: Shift parameters, of shape (C, 1). | ||
* - C: Number of input channels (dimensionality of input depth). | ||
* - Hin: Input height. | ||
* - Win: Input width. | ||
* - mode: 'train' or 'test' to indicate if the model is currently | ||
* being trained or tested. During training, the current batch | ||
* mean and variance will be used to normalize the inputs, while | ||
* during testing, the exponential average of the mean and | ||
* variance over all previous batches will be used. | ||
* - ema_mean: Exponential moving average of the mean, of | ||
* shape (C, 1). | ||
* - ema_var: Exponential moving average of the variance, of | ||
* shape (C, 1). | ||
* - mu: Momentum value for moving averages. | ||
* Typical values are in the range of [0.9, 0.999]. | ||
* - epsilon: Smoothing term to avoid divide by zero errors. | ||
* Typical values are in the range of [1e-5, 1e-3]. | ||
* | ||
* Outputs: | ||
* - out: Outputs, of shape (N, C*Hin*Win). | ||
* - ema_mean_upd: Updated exponential moving average of the mean, | ||
* of shape (C, 1). | ||
* - ema_var_upd: Updated exponential moving average of the variance, | ||
* of shape (C, 1). | ||
* - cache_mean: Cache of the batch mean, of shape (C, 1). | ||
* Note: This is used for performance during training. | ||
* - cache_inv_var: Cache of the inverse variance, of shape (C, 1). | ||
* Note: This is used for performance during training. | ||
*/ | ||
#out = X; ema_mean_upd = ema_mean; ema_var_upd = ema_var; cache_mean = ema_mean; cache_inv_var = ema_var | ||
#[out, ema_mean_upd, ema_var_upd, cache_mean, cache_inv_var] = batch_norm2d(X, gamma, beta, ema_mean, ema_var, mode, epsilon, mu) | ||
return batch_norm2d(X, gamma, beta, ema_mean, ema_var, mode, epsilon, mu); | ||
} | ||
|
||
#backward = function(matrix[double] dout, | ||
# matrix[double] cache_mean, matrix[double] cache_inv_var, | ||
# matrix[double] X, matrix[double] gamma, | ||
# int C, int Hin, int Win, double epsilon) | ||
# return (matrix[double] dX, matrix[double] dgamma, matrix[double] dbeta) { | ||
def backward(dout:matrix, cache_mean:matrix, cache_inv_var:matrix, X:matrix, gamma:matrix, C, Hin, Win, eps) -> matrix, matrix, matrix { | ||
/* | ||
* Computes the backward pass for a 2D (spatial) batch normalization | ||
* layer. | ||
* | ||
* Inputs: | ||
* - dout: Gradient wrt `out` from upstream, of shape (N, C*Hin*Win). | ||
* - cache_mean: Cache of the batch mean from the forward pass, of | ||
* shape (C, 1). Note: This is used for performance during | ||
* training. | ||
* - cache_inv_var: Cache of the inverse variance from the forward pass, | ||
* of shape (C, 1). Note: This is used for performance during | ||
* training. | ||
* - X: Input data matrix to the forward pass, of | ||
* shape (N, C*Hin*Win). | ||
* - gamma: Scale parameters, of shape (C, 1). | ||
* - C: Number of input channels (dimensionality of input depth). | ||
* - Hin: Input height. | ||
* - Win: Input width. | ||
* - epsilon: Smoothing term to avoid divide by zero errors. | ||
* Typical values are in the range of [1e-5, 1e-3]. | ||
* | ||
* Outputs: | ||
* - dX: Gradient wrt `X`, of shape (N, C*Hin*Win). | ||
* - dgamma: Gradient wrt `W`, of shape (C, 1). | ||
* - dbeta: Gradient wrt `b`, of shape (C, 1). | ||
* | ||
*/ | ||
# Compute gradients during training | ||
dX = X; dgamma = gamma; dbeta = gamma; | ||
# [dX, dgamma, dbeta] = batch_norm2d_backward(X, dout, gamma, epsilon, cache_mean, cache_inv_var) | ||
|
||
return dX, dgamma, dbeta; | ||
} | ||
|
||
#init = function(int C) | ||
# return (matrix[double] gamma, matrix[double] beta, | ||
# matrix[double] ema_mean, matrix[double] ema_var) { | ||
def init(gamma:matrix, beta:matrix, ema_mean:matrix, ema_var:matrix) -> matrix, matrix { | ||
/* | ||
* Initialize the parameters of this layer. | ||
* | ||
* Note: This is just a convenience function, and parameters | ||
* may be initialized manually if needed. | ||
* | ||
* Inputs: | ||
* - C: Number of input channels (dimensionality of input depth). | ||
* | ||
* Outputs: | ||
* - gamma: Scale parameters, of shape (C, 1). | ||
* - beta: Shift parameters, of shape (C, 1). | ||
* - ema_mean: Exponential moving average of the mean, of | ||
* shape (C, 1). | ||
* - ema_var: Exponential moving average of the variance, of | ||
* shape (C, 1). | ||
*/ | ||
gamma = fill(1, C, 1); | ||
beta = fill(0, C, 1); | ||
ema_mean = fill(0, C, 1); | ||
ema_var = fill(1, C, 1); | ||
} |