BVLC · jeffdonahue · Mar 18, 2015 · Feb 20, 2015 · Feb 20, 2015
diff --git a/src/caffe/layers/lrn_layer.cu b/src/caffe/layers/lrn_layer.cu
@@ -26,26 +26,24 @@ __global__ void LRNFillScale(const int nthreads, const Dtype* in,
     Dtype accum_scale = 0;
     // fill the scale at [n, :, h, w]
     // accumulate values
-    while (head < post_pad) {
+    while (head < post_pad && head < channels) {
       accum_scale += in[head * step] * in[head * step];
       ++head;
     }
-    // until we reach size, nothing needs to be subtracted
-    while (head < size) {
-      accum_scale += in[head * step] * in[head * step];
-      scale[(head - post_pad) * step] = k + accum_scale * alpha_over_size;
-      ++head;
-    }
     // both add and subtract
     while (head < channels) {
       accum_scale += in[head * step] * in[head * step];
-      accum_scale -= in[(head - size) * step] * in[(head - size) * step];
+      if (head - size >= 0) {
+        accum_scale -= in[(head - size) * step] * in[(head - size) * step];
+      }
       scale[(head - post_pad) * step] = k + accum_scale * alpha_over_size;
       ++head;
     }
     // subtract only
     while (head < channels + post_pad) {
-      accum_scale -= in[(head - size) * step] * in[(head - size) * step];
+      if (head - size >= 0) {
+        accum_scale -= in[(head - size) * step] * in[(head - size) * step];
+      }
       scale[(head - post_pad) * step] = k + accum_scale * alpha_over_size;
       ++head;
     }
@@ -143,35 +141,30 @@ __global__ void LRNComputeDiff(const int nthreads, const Dtype* bottom_data,
     int post_pad = size - pre_pad - 1;
     Dtype accum_ratio = 0;
     // accumulate values
-    while (head < post_pad) {
+    while (head < post_pad && head < channels) {
       accum_ratio += top_diff[head * step] * top_data[head * step] /
           scale[head * step];
       ++head;
     }
-    // until we reach size, nothing needs to be subtracted
-    while (head < size) {
-      accum_ratio += top_diff[head * step] * top_data[head * step] /
-          scale[head * step];
-      bottom_diff[(head - post_pad) * step] = top_diff[(head - post_pad) * step]
-          * pow(scale[(head - post_pad) * step], negative_beta) - cache_ratio *
-          bottom_data[(head - post_pad) * step] * accum_ratio;
-      ++head;
-    }
     // both add and subtract
     while (head < channels) {
       accum_ratio += top_diff[head * step] * top_data[head * step] /
           scale[head * step];
-      accum_ratio -= top_diff[(head - size) * step] *
-          top_data[(head - size) * step] / scale[(head - size) * step];
+      if (head - size >= 0) {
+        accum_ratio -= top_diff[(head - size) * step] *
+            top_data[(head - size) * step] / scale[(head - size) * step];
+      }
       bottom_diff[(head - post_pad) * step] = top_diff[(head - post_pad) * step]
           * pow(scale[(head - post_pad) * step], negative_beta) - cache_ratio *
           bottom_data[(head - post_pad) * step] * accum_ratio;
       ++head;
     }
     // subtract only
     while (head < channels + post_pad) {
-      accum_ratio -= top_diff[(head - size) * step] *
-          top_data[(head - size) * step] / scale[(head - size) * step];
+      if (head - size >= 0) {
+        accum_ratio -= top_diff[(head - size) * step] *
+            top_data[(head - size) * step] / scale[(head - size) * step];
+      }
       bottom_diff[(head - post_pad) * step] = top_diff[(head - post_pad) * step]
           * pow(scale[(head - post_pad) * step], negative_beta) - cache_ratio *
           bottom_data[(head - post_pad) * step] * accum_ratio;

diff --git a/src/caffe/test/test_lrn_layer.cpp b/src/caffe/test/test_lrn_layer.cpp
@@ -138,6 +138,22 @@ TYPED_TEST(LRNLayerTest, TestForwardAcrossChannels) {
   }
 }
 
+TYPED_TEST(LRNLayerTest, TestForwardAcrossChannelsLargeRegion) {
+  typedef typename TypeParam::Dtype Dtype;
+  LayerParameter layer_param;
+  layer_param.mutable_lrn_param()->set_local_size(15);
+  LRNLayer<Dtype> layer(layer_param);
+  layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+  layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+  Blob<Dtype> top_reference;
+  this->ReferenceLRNForward(*(this->blob_bottom_), layer_param,
+      &top_reference);
+  for (int i = 0; i < this->blob_bottom_->count(); ++i) {
+    EXPECT_NEAR(this->blob_top_->cpu_data()[i], top_reference.cpu_data()[i],
+                this->epsilon_);
+  }
+}
+
 TYPED_TEST(LRNLayerTest, TestGradientAcrossChannels) {
   typedef typename TypeParam::Dtype Dtype;
   LayerParameter layer_param;
@@ -159,6 +175,28 @@ TYPED_TEST(LRNLayerTest, TestGradientAcrossChannels) {
       this->blob_top_vec_);
 }
 
+TYPED_TEST(LRNLayerTest, TestGradientAcrossChannelsLargeRegion) {
+  typedef typename TypeParam::Dtype Dtype;
+  LayerParameter layer_param;
+  layer_param.mutable_lrn_param()->set_local_size(15);
+  LRNLayer<Dtype> layer(layer_param);
+  GradientChecker<Dtype> checker(1e-2, 1e-2);
+  layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+  layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+  for (int i = 0; i < this->blob_top_->count(); ++i) {
+    this->blob_top_->mutable_cpu_diff()[i] = 1.;
+  }
+  vector<bool> propagate_down(this->blob_bottom_vec_.size(), true);
+  layer.Backward(this->blob_top_vec_, propagate_down,
+                 this->blob_bottom_vec_);
+  // for (int i = 0; i < this->blob_bottom_->count(); ++i) {
+  //   std::cout << "CPU diff " << this->blob_bottom_->cpu_diff()[i]
+  //       << std::endl;
+  // }
+  checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,
+      this->blob_top_vec_);
+}
+
 TYPED_TEST(LRNLayerTest, TestSetupWithinChannel) {
   typedef typename TypeParam::Dtype Dtype;
   LayerParameter layer_param;