From 761c81505e86ef9e263820d38448cac5bf81f838 Mon Sep 17 00:00:00 2001
From: to3i <tobias.dencker@gmail.com>
Date: Fri, 11 Jul 2014 13:19:23 +0200
Subject: [PATCH 1/2] Implemented elementwise max layer

---
 include/caffe/common_layers.hpp       |  1 +
 src/caffe/layers/eltwise_layer.cpp    | 50 ++++++++++++++++++++
 src/caffe/layers/eltwise_layer.cu     | 67 ++++++++++++++++++++++++---
 src/caffe/proto/caffe.proto           |  1 +
 src/caffe/test/test_eltwise_layer.cpp | 32 +++++++++++++
 5 files changed, 145 insertions(+), 6 deletions(-)
diff --git a/include/caffe/common_layers.hpp b/include/caffe/common_layers.hpp
index c170742308c..190b5c24b05 100644
--- a/include/caffe/common_layers.hpp
+++ b/include/caffe/common_layers.hpp
@@ -178,6 +178,7 @@ class EltwiseLayer : public Layer<Dtype> {
 
   EltwiseParameter_EltwiseOp op_;
   vector<Dtype> coeffs_;
+  shared_ptr<Blob<int> > max_idx_;
 
   bool stable_prod_grad_;
 };
diff --git a/src/caffe/layers/eltwise_layer.cpp b/src/caffe/layers/eltwise_layer.cpp
index 56a1920e32f..b25611aa6c3 100644
--- a/src/caffe/layers/eltwise_layer.cpp
+++ b/src/caffe/layers/eltwise_layer.cpp
@@ -1,4 +1,5 @@
 #include <vector>
+#include <cfloat>
 
 #include "caffe/layer.hpp"
 #include "caffe/util/math_functions.hpp"
@@ -36,11 +37,20 @@ void EltwiseLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
     }
   }
   stable_prod_grad_ = this->layer_param_.eltwise_param().stable_prod_grad();
+  // If max operation, we will initialize the vector index part.
+  if (this->layer_param_.eltwise_param().operation() ==
+      EltwiseParameter_EltwiseOp_MAX && top->size() == 1) {
+    max_idx_.reset(new Blob<int>(bottom[0]->num(), channels,
+                                 height, width));
+  }
 }
 
 template <typename Dtype>
 void EltwiseLayer<Dtype>::Forward_cpu(
     const vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>* top) {
+  int* mask = NULL;
+  const Dtype* bottom_data_a = NULL;
+  const Dtype* bottom_data_b = NULL;
   const int count = (*top)[0]->count();
   Dtype* top_data = (*top)[0]->mutable_cpu_data();
   switch (op_) {
@@ -57,6 +67,35 @@ void EltwiseLayer<Dtype>::Forward_cpu(
       caffe_axpy(count, coeffs_[i], bottom[i]->cpu_data(), top_data);
     }
     break;
+  case EltwiseParameter_EltwiseOp_MAX:
+    // Initialize
+    mask = max_idx_->mutable_cpu_data();
+    caffe_set(count, -1, mask);
+    caffe_set(count, Dtype(-FLT_MAX), top_data);
+    // bottom 0 & 1
+    bottom_data_a = bottom[0]->cpu_data();
+    bottom_data_b = bottom[1]->cpu_data();
+    for (int idx = 0; idx < count; ++idx) {
+      if (bottom_data_a[idx] > bottom_data_b[idx]) {
+        top_data[idx] = bottom_data_a[idx]; // maxval
+        mask[idx] = 0; // maxid
+      } else {
+        top_data[idx] = bottom_data_b[idx]; // maxval
+        mask[idx] = 1; // maxid
+      }
+    }
+    // bottom 2++
+    bottom_data_a = top_data;
+    for (int blob_idx = 2; blob_idx < bottom.size(); ++blob_idx) {
+      bottom_data_b = bottom[blob_idx]->cpu_data();
+      for (int idx = 0; idx < count; ++idx) {
+        if (bottom_data_a[idx] < bottom_data_b[idx]) {
+          top_data[idx] = bottom_data_b[idx];  // maxval
+          mask[idx] = blob_idx;  // maxid
+        }
+      }
+    }
+    break;
   default:
     LOG(FATAL) << "Unknown elementwise operation.";
   }
@@ -65,6 +104,7 @@ void EltwiseLayer<Dtype>::Forward_cpu(
 template <typename Dtype>
 void EltwiseLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
     const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom) {
+  const int* mask = NULL;
   const int count = top[0]->count();
   const Dtype* top_data = top[0]->cpu_data();
   const Dtype* top_diff = top[0]->cpu_diff();
@@ -98,6 +138,16 @@ void EltwiseLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
           caffe_cpu_scale(count, coeffs_[i], top_diff, bottom_diff);
         }
         break;
+      case EltwiseParameter_EltwiseOp_MAX:
+        mask = max_idx_->cpu_data();
+        for (int index = 0; index < count; ++index) {
+          Dtype gradient = 0;
+          if (mask[index] == i) {
+            gradient += top_diff[index];
+          }
+          bottom_diff[index] = gradient;
+        }
+        break;
       default:
         LOG(FATAL) << "Unknown elementwise operation.";
       }
diff --git a/src/caffe/layers/eltwise_layer.cu b/src/caffe/layers/eltwise_layer.cu
index e005cb91122..78f911ba263 100644
--- a/src/caffe/layers/eltwise_layer.cu
+++ b/src/caffe/layers/eltwise_layer.cu
@@ -1,4 +1,5 @@
 #include <vector>
+#include <cfloat>
 
 #include "caffe/layer.hpp"
 #include "caffe/util/math_functions.hpp"
@@ -7,14 +8,39 @@
 namespace caffe {
 
 template <typename Dtype>
-void EltwiseLayer<Dtype>::Forward_gpu(
-    const vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>* top) {
+__global__ void MaxForward(const int nthreads, const Dtype* bottom_data_a,
+    const Dtype* bottom_data_b, const int blob_idx, Dtype* top_data,
+    int* mask) {
+  CUDA_KERNEL_LOOP(index, nthreads) {
+    Dtype maxval = -FLT_MAX;
+    int maxidx = -1;
+    if (bottom_data_a[index] > bottom_data_b[index]) {
+      // only update for very first bottom_data blob (blob_idx == 0)
+      if (blob_idx == 0) {
+        maxval = bottom_data_a[index];
+        top_data[index] = maxval;
+        maxidx = blob_idx;
+        mask[index] = maxidx;
+      }
+    } else {
+      maxval = bottom_data_b[index];
+      top_data[index] = maxval;
+      maxidx = blob_idx + 1;
+      mask[index] = maxidx;
+    }
+  }
+}
+
+template<typename Dtype>
+void EltwiseLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+    vector<Blob<Dtype>*>* top) {
+  int* mask = NULL;
   const int count = (*top)[0]->count();
   Dtype* top_data = (*top)[0]->mutable_gpu_data();
   switch (op_) {
   case EltwiseParameter_EltwiseOp_PROD:
-    caffe_gpu_mul(count, bottom[0]->gpu_data(),
-        bottom[1]->gpu_data(), top_data);
+    caffe_gpu_mul(count, bottom[0]->gpu_data(), bottom[1]->gpu_data(),
+        top_data);
     for (int i = 2; i < bottom.size(); ++i) {
       caffe_gpu_mul(count, top_data, bottom[i]->gpu_data(), top_data);
     }
@@ -26,14 +52,38 @@ void EltwiseLayer<Dtype>::Forward_gpu(
       caffe_gpu_axpy(count, coeffs_[i], bottom[i]->gpu_data(), top_data);
     }
     break;
+  case EltwiseParameter_EltwiseOp_MAX:
+    mask = max_idx_->mutable_gpu_data();
+    // NOLINT_NEXT_LINE(whitespace/operators)
+    MaxForward<Dtype> <<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
+        count, bottom[0]->gpu_data(), bottom[1]->gpu_data(), 0, top_data, mask);
+    for (int i = 2; i < bottom.size(); ++i) {
+      // NOLINT_NEXT_LINE(whitespace/operators)
+      MaxForward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
+          count, top_data, bottom[i]->gpu_data(), i-1, top_data, mask);
+    }
+    break;
   default:
     LOG(FATAL) << "Unknown elementwise operation.";
   }
 }
 
-template <typename Dtype>
+template<typename Dtype>
+__global__ void MaxBackward(const int nthreads, const Dtype* top_diff,
+    const int blob_idx, const int* mask, Dtype* bottom_diff) {
+  CUDA_KERNEL_LOOP(index, nthreads) {
+    Dtype gradient = 0;
+    if (mask[index] == blob_idx) {
+      gradient += top_diff[index];
+    }
+    bottom_diff[index] = gradient;
+  }
+}
+
+template<typename Dtype>
 void EltwiseLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
     const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom) {
+  const int* mask = NULL;
   const int count = top[0]->count();
   const Dtype* top_data = top[0]->gpu_data();
   const Dtype* top_diff = top[0]->gpu_diff();
@@ -67,6 +117,12 @@ void EltwiseLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
           caffe_gpu_scale(count, coeffs_[i], top_diff, bottom_diff);
         }
         break;
+      case EltwiseParameter_EltwiseOp_MAX:
+        mask = max_idx_->gpu_data();
+        // NOLINT_NEXT_LINE(whitespace/operators)
+        MaxBackward<Dtype> <<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
+            count, top_diff, i, mask, bottom_diff);
+        break;
       default:
         LOG(FATAL) << "Unknown elementwise operation.";
       }
@@ -76,5 +132,4 @@ void EltwiseLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
 
 INSTANTIATE_CLASS(EltwiseLayer);
 
-
 }  // namespace caffe
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
index 8cb82cebe22..0e9ecf72728 100644
--- a/src/caffe/proto/caffe.proto
+++ b/src/caffe/proto/caffe.proto
@@ -450,6 +450,7 @@ message EltwiseParameter {
   enum EltwiseOp {
     PROD = 0;
     SUM = 1;
+    MAX = 2;
   }
   optional EltwiseOp operation = 1 [default = SUM]; // element-wise operation
   repeated float coeff = 2; // blob-wise coefficient for SUM operation
diff --git a/src/caffe/test/test_eltwise_layer.cpp b/src/caffe/test/test_eltwise_layer.cpp
index da5e3538131..3c617281504 100644
--- a/src/caffe/test/test_eltwise_layer.cpp
+++ b/src/caffe/test/test_eltwise_layer.cpp
@@ -173,4 +173,36 @@ TYPED_TEST(EltwiseLayerTest, TestSumCoeffGradient) {
       &(this->blob_top_vec_));
 }
 
+TYPED_TEST(EltwiseLayerTest, TestMax) {
+  typedef typename TypeParam::Dtype Dtype;
+  LayerParameter layer_param;
+  EltwiseParameter* eltwise_param = layer_param.mutable_eltwise_param();
+  eltwise_param->set_operation(EltwiseParameter_EltwiseOp_MAX);
+  shared_ptr<EltwiseLayer<Dtype> > layer(
+      new EltwiseLayer<Dtype>(layer_param));
+  layer->SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
+  layer->Forward(this->blob_bottom_vec_, &(this->blob_top_vec_));
+  const Dtype* data = this->blob_top_->cpu_data();
+  const int count = this->blob_top_->count();
+  const Dtype* in_data_a = this->blob_bottom_a_->cpu_data();
+  const Dtype* in_data_b = this->blob_bottom_b_->cpu_data();
+  const Dtype* in_data_c = this->blob_bottom_c_->cpu_data();
+  for (int i = 0; i < count; ++i) {
+    EXPECT_GE(data[i], in_data_a[i]);
+    EXPECT_GE(data[i], in_data_b[i]);
+    EXPECT_GE(data[i], in_data_c[i]);
+  }
+}
+
+TYPED_TEST(EltwiseLayerTest, TestMaxGradient) {
+  typedef typename TypeParam::Dtype Dtype;
+  LayerParameter layer_param;
+  EltwiseParameter* eltwise_param = layer_param.mutable_eltwise_param();
+  eltwise_param->set_operation(EltwiseParameter_EltwiseOp_MAX);
+  EltwiseLayer<Dtype> layer(layer_param);
+  GradientChecker<Dtype> checker(1e-2, 1e-3);
+  checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_),
+      &(this->blob_top_vec_));
+}
+
 }  // namespace caffe

From 6bda40640bba5e3e4751ae5fa6161da7df941ebe Mon Sep 17 00:00:00 2001
From: Jeff Donahue <jeff.donahue@gmail.com>
Date: Mon, 8 Sep 2014 17:49:42 +0200
Subject: [PATCH 2/2] lint & reduce gradient check stepsize to pass checks

---
 src/caffe/layers/eltwise_layer.cpp    | 13 ++++++-------
 src/caffe/layers/eltwise_layer.cu     | 12 ++++++------
 src/caffe/test/test_eltwise_layer.cpp |  9 +++++----
 3 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/src/caffe/layers/eltwise_layer.cpp b/src/caffe/layers/eltwise_layer.cpp
index b25611aa6c3..46034be4784 100644
--- a/src/caffe/layers/eltwise_layer.cpp
+++ b/src/caffe/layers/eltwise_layer.cpp
@@ -1,5 +1,5 @@
-#include <vector>
 #include <cfloat>
+#include <vector>
 
 #include "caffe/layer.hpp"
 #include "caffe/util/math_functions.hpp"
@@ -77,19 +77,18 @@ void EltwiseLayer<Dtype>::Forward_cpu(
     bottom_data_b = bottom[1]->cpu_data();
     for (int idx = 0; idx < count; ++idx) {
       if (bottom_data_a[idx] > bottom_data_b[idx]) {
-        top_data[idx] = bottom_data_a[idx]; // maxval
-        mask[idx] = 0; // maxid
+        top_data[idx] = bottom_data_a[idx];  // maxval
+        mask[idx] = 0;  // maxid
       } else {
-        top_data[idx] = bottom_data_b[idx]; // maxval
-        mask[idx] = 1; // maxid
+        top_data[idx] = bottom_data_b[idx];  // maxval
+        mask[idx] = 1;  // maxid
       }
     }
     // bottom 2++
-    bottom_data_a = top_data;
     for (int blob_idx = 2; blob_idx < bottom.size(); ++blob_idx) {
       bottom_data_b = bottom[blob_idx]->cpu_data();
       for (int idx = 0; idx < count; ++idx) {
-        if (bottom_data_a[idx] < bottom_data_b[idx]) {
+        if (bottom_data_b[idx] > top_data[idx]) {
           top_data[idx] = bottom_data_b[idx];  // maxval
           mask[idx] = blob_idx;  // maxid
         }
diff --git a/src/caffe/layers/eltwise_layer.cu b/src/caffe/layers/eltwise_layer.cu
index 78f911ba263..c0d47fd413b 100644
--- a/src/caffe/layers/eltwise_layer.cu
+++ b/src/caffe/layers/eltwise_layer.cu
@@ -1,5 +1,5 @@
-#include <vector>
 #include <cfloat>
+#include <vector>
 
 #include "caffe/layer.hpp"
 #include "caffe/util/math_functions.hpp"
@@ -31,7 +31,7 @@ __global__ void MaxForward(const int nthreads, const Dtype* bottom_data_a,
   }
 }
 
-template<typename Dtype>
+template <typename Dtype>
 void EltwiseLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
     vector<Blob<Dtype>*>* top) {
   int* mask = NULL;
@@ -68,7 +68,7 @@ void EltwiseLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
   }
 }
 
-template<typename Dtype>
+template <typename Dtype>
 __global__ void MaxBackward(const int nthreads, const Dtype* top_diff,
     const int blob_idx, const int* mask, Dtype* bottom_diff) {
   CUDA_KERNEL_LOOP(index, nthreads) {
@@ -80,7 +80,7 @@ __global__ void MaxBackward(const int nthreads, const Dtype* top_diff,
   }
 }
 
-template<typename Dtype>
+template <typename Dtype>
 void EltwiseLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
     const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom) {
   const int* mask = NULL;
@@ -119,8 +119,8 @@ void EltwiseLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
         break;
       case EltwiseParameter_EltwiseOp_MAX:
         mask = max_idx_->gpu_data();
-        // NOLINT_NEXT_LINE(whitespace/operators)
-        MaxBackward<Dtype> <<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
+        MaxBackward<Dtype>  // NOLINT_NEXT_LINE(whitespace/operators)
+            <<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
             count, top_diff, i, mask, bottom_diff);
         break;
       default:
diff --git a/src/caffe/test/test_eltwise_layer.cpp b/src/caffe/test/test_eltwise_layer.cpp
index 3c617281504..d5cf08229ab 100644
--- a/src/caffe/test/test_eltwise_layer.cpp
+++ b/src/caffe/test/test_eltwise_layer.cpp
@@ -1,3 +1,4 @@
+#include <algorithm>
 #include <vector>
 
 #include "gtest/gtest.h"
@@ -23,6 +24,7 @@ class EltwiseLayerTest : public MultiDeviceTest<TypeParam> {
         blob_bottom_c_(new Blob<Dtype>(2, 3, 4, 5)),
         blob_top_(new Blob<Dtype>()) {
     // fill the values
+    Caffe::set_random_seed(1701);
     FillerParameter filler_param;
     UniformFiller<Dtype> filler(filler_param);
     filler.Fill(this->blob_bottom_a_);
@@ -188,9 +190,8 @@ TYPED_TEST(EltwiseLayerTest, TestMax) {
   const Dtype* in_data_b = this->blob_bottom_b_->cpu_data();
   const Dtype* in_data_c = this->blob_bottom_c_->cpu_data();
   for (int i = 0; i < count; ++i) {
-    EXPECT_GE(data[i], in_data_a[i]);
-    EXPECT_GE(data[i], in_data_b[i]);
-    EXPECT_GE(data[i], in_data_c[i]);
+    EXPECT_EQ(data[i],
+              std::max(in_data_a[i], std::max(in_data_b[i], in_data_c[i])));
   }
 }
 
@@ -200,7 +201,7 @@ TYPED_TEST(EltwiseLayerTest, TestMaxGradient) {
   EltwiseParameter* eltwise_param = layer_param.mutable_eltwise_param();
   eltwise_param->set_operation(EltwiseParameter_EltwiseOp_MAX);
   EltwiseLayer<Dtype> layer(layer_param);
-  GradientChecker<Dtype> checker(1e-2, 1e-3);
+  GradientChecker<Dtype> checker(1e-4, 1e-3);
   checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_),
       &(this->blob_top_vec_));
 }