From dc12757074bf07f75594625a51327a2b5682f7bd Mon Sep 17 00:00:00 2001
From: Eric Tzeng <etzeng@eecs.berkeley.edu>
Date: Fri, 20 Feb 2015 12:42:28 -0800
Subject: [PATCH 1/2] Add failing tests for LRNLayer due to large local region

---
 src/caffe/test/test_lrn_layer.cpp | 38 +++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)
diff --git a/src/caffe/test/test_lrn_layer.cpp b/src/caffe/test/test_lrn_layer.cpp
index 07425df9b3a..c4e2f8ea7f2 100644
--- a/src/caffe/test/test_lrn_layer.cpp
+++ b/src/caffe/test/test_lrn_layer.cpp
@@ -138,6 +138,22 @@ TYPED_TEST(LRNLayerTest, TestForwardAcrossChannels) {
   }
 }
 
+TYPED_TEST(LRNLayerTest, TestForwardAcrossChannelsLargeRegion) {
+  typedef typename TypeParam::Dtype Dtype;
+  LayerParameter layer_param;
+  layer_param.mutable_lrn_param()->set_local_size(15);
+  LRNLayer<Dtype> layer(layer_param);
+  layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+  layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+  Blob<Dtype> top_reference;
+  this->ReferenceLRNForward(*(this->blob_bottom_), layer_param,
+      &top_reference);
+  for (int i = 0; i < this->blob_bottom_->count(); ++i) {
+    EXPECT_NEAR(this->blob_top_->cpu_data()[i], top_reference.cpu_data()[i],
+                this->epsilon_);
+  }
+}
+
 TYPED_TEST(LRNLayerTest, TestGradientAcrossChannels) {
   typedef typename TypeParam::Dtype Dtype;
   LayerParameter layer_param;
@@ -159,6 +175,28 @@ TYPED_TEST(LRNLayerTest, TestGradientAcrossChannels) {
       this->blob_top_vec_);
 }
 
+TYPED_TEST(LRNLayerTest, TestGradientAcrossChannelsLargeRegion) {
+  typedef typename TypeParam::Dtype Dtype;
+  LayerParameter layer_param;
+  layer_param.mutable_lrn_param()->set_local_size(15);
+  LRNLayer<Dtype> layer(layer_param);
+  GradientChecker<Dtype> checker(1e-2, 1e-2);
+  layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+  layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+  for (int i = 0; i < this->blob_top_->count(); ++i) {
+    this->blob_top_->mutable_cpu_diff()[i] = 1.;
+  }
+  vector<bool> propagate_down(this->blob_bottom_vec_.size(), true);
+  layer.Backward(this->blob_top_vec_, propagate_down,
+                 this->blob_bottom_vec_);
+  // for (int i = 0; i < this->blob_bottom_->count(); ++i) {
+  //   std::cout << "CPU diff " << this->blob_bottom_->cpu_diff()[i]
+  //       << std::endl;
+  // }
+  checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,
+      this->blob_top_vec_);
+}
+
 TYPED_TEST(LRNLayerTest, TestSetupWithinChannel) {
   typedef typename TypeParam::Dtype Dtype;
   LayerParameter layer_param;

From e27c369d3fc06e122a786230af4bb7859c990e86 Mon Sep 17 00:00:00 2001
From: Eric Tzeng <etzeng@eecs.berkeley.edu>
Date: Fri, 20 Feb 2015 13:07:32 -0800
Subject: [PATCH 2/2] Bounds checks for cross-channel LRN.

---
 src/caffe/layers/lrn_layer.cu | 39 ++++++++++++++---------------------
 1 file changed, 16 insertions(+), 23 deletions(-)

diff --git a/src/caffe/layers/lrn_layer.cu b/src/caffe/layers/lrn_layer.cu
index 58c39926c72..24aa6a30130 100644
--- a/src/caffe/layers/lrn_layer.cu
+++ b/src/caffe/layers/lrn_layer.cu
@@ -26,26 +26,24 @@ __global__ void LRNFillScale(const int nthreads, const Dtype* in,
     Dtype accum_scale = 0;
     // fill the scale at [n, :, h, w]
     // accumulate values
-    while (head < post_pad) {
+    while (head < post_pad && head < channels) {
       accum_scale += in[head * step] * in[head * step];
       ++head;
     }
-    // until we reach size, nothing needs to be subtracted
-    while (head < size) {
-      accum_scale += in[head * step] * in[head * step];
-      scale[(head - post_pad) * step] = k + accum_scale * alpha_over_size;
-      ++head;
-    }
     // both add and subtract
     while (head < channels) {
       accum_scale += in[head * step] * in[head * step];
-      accum_scale -= in[(head - size) * step] * in[(head - size) * step];
+      if (head - size >= 0) {
+        accum_scale -= in[(head - size) * step] * in[(head - size) * step];
+      }
       scale[(head - post_pad) * step] = k + accum_scale * alpha_over_size;
       ++head;
     }
     // subtract only
     while (head < channels + post_pad) {
-      accum_scale -= in[(head - size) * step] * in[(head - size) * step];
+      if (head - size >= 0) {
+        accum_scale -= in[(head - size) * step] * in[(head - size) * step];
+      }
       scale[(head - post_pad) * step] = k + accum_scale * alpha_over_size;
       ++head;
     }
@@ -143,26 +141,19 @@ __global__ void LRNComputeDiff(const int nthreads, const Dtype* bottom_data,
     int post_pad = size - pre_pad - 1;
     Dtype accum_ratio = 0;
     // accumulate values
-    while (head < post_pad) {
+    while (head < post_pad && head < channels) {
       accum_ratio += top_diff[head * step] * top_data[head * step] /
           scale[head * step];
       ++head;
     }
-    // until we reach size, nothing needs to be subtracted
-    while (head < size) {
-      accum_ratio += top_diff[head * step] * top_data[head * step] /
-          scale[head * step];
-      bottom_diff[(head - post_pad) * step] = top_diff[(head - post_pad) * step]
-          * pow(scale[(head - post_pad) * step], negative_beta) - cache_ratio *
-          bottom_data[(head - post_pad) * step] * accum_ratio;
-      ++head;
-    }
     // both add and subtract
     while (head < channels) {
       accum_ratio += top_diff[head * step] * top_data[head * step] /
           scale[head * step];
-      accum_ratio -= top_diff[(head - size) * step] *
-          top_data[(head - size) * step] / scale[(head - size) * step];
+      if (head - size >= 0) {
+        accum_ratio -= top_diff[(head - size) * step] *
+            top_data[(head - size) * step] / scale[(head - size) * step];
+      }
       bottom_diff[(head - post_pad) * step] = top_diff[(head - post_pad) * step]
           * pow(scale[(head - post_pad) * step], negative_beta) - cache_ratio *
           bottom_data[(head - post_pad) * step] * accum_ratio;
@@ -170,8 +161,10 @@ __global__ void LRNComputeDiff(const int nthreads, const Dtype* bottom_data,
     }
     // subtract only
     while (head < channels + post_pad) {
-      accum_ratio -= top_diff[(head - size) * step] *
-          top_data[(head - size) * step] / scale[(head - size) * step];
+      if (head - size >= 0) {
+        accum_ratio -= top_diff[(head - size) * step] *
+            top_data[(head - size) * step] / scale[(head - size) * step];
+      }
       bottom_diff[(head - post_pad) * step] = top_diff[(head - post_pad) * step]
           * pow(scale[(head - post_pad) * step], negative_beta) - cache_ratio *
           bottom_data[(head - post_pad) * step] * accum_ratio;