Merge pull request #1053 from jeffdonahue/to3i-elem_max_layer

rebase and fixup #688 from @to3i: elementwise max
BVLC · Sep 10, 2014 · 133b4db · 133b4db
2 parents fc921bf + 6bda406
commit 133b4db
Show file tree

Hide file tree

Showing 5 changed files with 144 additions and 5 deletions.
diff --git a/include/caffe/common_layers.hpp b/include/caffe/common_layers.hpp
@@ -178,6 +178,7 @@ class EltwiseLayer : public Layer<Dtype> {
 
   EltwiseParameter_EltwiseOp op_;
   vector<Dtype> coeffs_;
+  shared_ptr<Blob<int> > max_idx_;
 
   bool stable_prod_grad_;
 };

diff --git a/src/caffe/layers/eltwise_layer.cpp b/src/caffe/layers/eltwise_layer.cpp
@@ -1,3 +1,4 @@
+#include <cfloat>
 #include <vector>
 
 #include "caffe/layer.hpp"
@@ -36,11 +37,20 @@ void EltwiseLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
     }
   }
   stable_prod_grad_ = this->layer_param_.eltwise_param().stable_prod_grad();
+  // If max operation, we will initialize the vector index part.
+  if (this->layer_param_.eltwise_param().operation() ==
+      EltwiseParameter_EltwiseOp_MAX && top->size() == 1) {
+    max_idx_.reset(new Blob<int>(bottom[0]->num(), channels,
+                                 height, width));
+  }
 }
 
 template <typename Dtype>
 void EltwiseLayer<Dtype>::Forward_cpu(
     const vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>* top) {
+  int* mask = NULL;
+  const Dtype* bottom_data_a = NULL;
+  const Dtype* bottom_data_b = NULL;
   const int count = (*top)[0]->count();
   Dtype* top_data = (*top)[0]->mutable_cpu_data();
   switch (op_) {
@@ -57,6 +67,34 @@ void EltwiseLayer<Dtype>::Forward_cpu(
       caffe_axpy(count, coeffs_[i], bottom[i]->cpu_data(), top_data);
     }
     break;
+  case EltwiseParameter_EltwiseOp_MAX:
+    // Initialize
+    mask = max_idx_->mutable_cpu_data();
+    caffe_set(count, -1, mask);
+    caffe_set(count, Dtype(-FLT_MAX), top_data);
+    // bottom 0 & 1
+    bottom_data_a = bottom[0]->cpu_data();
+    bottom_data_b = bottom[1]->cpu_data();
+    for (int idx = 0; idx < count; ++idx) {
+      if (bottom_data_a[idx] > bottom_data_b[idx]) {
+        top_data[idx] = bottom_data_a[idx];  // maxval
+        mask[idx] = 0;  // maxid
+      } else {
+        top_data[idx] = bottom_data_b[idx];  // maxval
+        mask[idx] = 1;  // maxid
+      }
+    }
+    // bottom 2++
+    for (int blob_idx = 2; blob_idx < bottom.size(); ++blob_idx) {
+      bottom_data_b = bottom[blob_idx]->cpu_data();
+      for (int idx = 0; idx < count; ++idx) {
+        if (bottom_data_b[idx] > top_data[idx]) {
+          top_data[idx] = bottom_data_b[idx];  // maxval
+          mask[idx] = blob_idx;  // maxid
+        }
+      }
+    }
+    break;
   default:
     LOG(FATAL) << "Unknown elementwise operation.";
   }
@@ -65,6 +103,7 @@ void EltwiseLayer<Dtype>::Forward_cpu(
 template <typename Dtype>
 void EltwiseLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
     const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom) {
+  const int* mask = NULL;
   const int count = top[0]->count();
   const Dtype* top_data = top[0]->cpu_data();
   const Dtype* top_diff = top[0]->cpu_diff();
@@ -98,6 +137,16 @@ void EltwiseLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
           caffe_cpu_scale(count, coeffs_[i], top_diff, bottom_diff);
         }
         break;
+      case EltwiseParameter_EltwiseOp_MAX:
+        mask = max_idx_->cpu_data();
+        for (int index = 0; index < count; ++index) {
+          Dtype gradient = 0;
+          if (mask[index] == i) {
+            gradient += top_diff[index];
+          }
+          bottom_diff[index] = gradient;
+        }
+        break;
       default:
         LOG(FATAL) << "Unknown elementwise operation.";
       }

diff --git a/src/caffe/layers/eltwise_layer.cu b/src/caffe/layers/eltwise_layer.cu
@@ -1,3 +1,4 @@
+#include <cfloat>
 #include <vector>
 
 #include "caffe/layer.hpp"
@@ -7,14 +8,39 @@
 namespace caffe {
 
 template <typename Dtype>
-void EltwiseLayer<Dtype>::Forward_gpu(
-    const vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>* top) {
+__global__ void MaxForward(const int nthreads, const Dtype* bottom_data_a,
+    const Dtype* bottom_data_b, const int blob_idx, Dtype* top_data,
+    int* mask) {
+  CUDA_KERNEL_LOOP(index, nthreads) {
+    Dtype maxval = -FLT_MAX;
+    int maxidx = -1;
+    if (bottom_data_a[index] > bottom_data_b[index]) {
+      // only update for very first bottom_data blob (blob_idx == 0)
+      if (blob_idx == 0) {
+        maxval = bottom_data_a[index];
+        top_data[index] = maxval;
+        maxidx = blob_idx;
+        mask[index] = maxidx;
+      }
+    } else {
+      maxval = bottom_data_b[index];
+      top_data[index] = maxval;
+      maxidx = blob_idx + 1;
+      mask[index] = maxidx;
+    }
+  }
+}
+
+template <typename Dtype>
+void EltwiseLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+    vector<Blob<Dtype>*>* top) {
+  int* mask = NULL;
   const int count = (*top)[0]->count();
   Dtype* top_data = (*top)[0]->mutable_gpu_data();
   switch (op_) {
   case EltwiseParameter_EltwiseOp_PROD:
-    caffe_gpu_mul(count, bottom[0]->gpu_data(),
-        bottom[1]->gpu_data(), top_data);
+    caffe_gpu_mul(count, bottom[0]->gpu_data(), bottom[1]->gpu_data(),
+        top_data);
     for (int i = 2; i < bottom.size(); ++i) {
       caffe_gpu_mul(count, top_data, bottom[i]->gpu_data(), top_data);
     }
@@ -26,14 +52,38 @@ void EltwiseLayer<Dtype>::Forward_gpu(
       caffe_gpu_axpy(count, coeffs_[i], bottom[i]->gpu_data(), top_data);
     }
     break;
+  case EltwiseParameter_EltwiseOp_MAX:
+    mask = max_idx_->mutable_gpu_data();
+    // NOLINT_NEXT_LINE(whitespace/operators)
+    MaxForward<Dtype> <<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
+        count, bottom[0]->gpu_data(), bottom[1]->gpu_data(), 0, top_data, mask);
+    for (int i = 2; i < bottom.size(); ++i) {
+      // NOLINT_NEXT_LINE(whitespace/operators)
+      MaxForward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
+          count, top_data, bottom[i]->gpu_data(), i-1, top_data, mask);
+    }
+    break;
   default:
     LOG(FATAL) << "Unknown elementwise operation.";
   }
 }
 
+template <typename Dtype>
+__global__ void MaxBackward(const int nthreads, const Dtype* top_diff,
+    const int blob_idx, const int* mask, Dtype* bottom_diff) {
+  CUDA_KERNEL_LOOP(index, nthreads) {
+    Dtype gradient = 0;
+    if (mask[index] == blob_idx) {
+      gradient += top_diff[index];
+    }
+    bottom_diff[index] = gradient;
+  }
+}
+
 template <typename Dtype>
 void EltwiseLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
     const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom) {
+  const int* mask = NULL;
   const int count = top[0]->count();
   const Dtype* top_data = top[0]->gpu_data();
   const Dtype* top_diff = top[0]->gpu_diff();
@@ -67,6 +117,12 @@ void EltwiseLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
           caffe_gpu_scale(count, coeffs_[i], top_diff, bottom_diff);
         }
         break;
+      case EltwiseParameter_EltwiseOp_MAX:
+        mask = max_idx_->gpu_data();
+        MaxBackward<Dtype>  // NOLINT_NEXT_LINE(whitespace/operators)
+            <<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
+            count, top_diff, i, mask, bottom_diff);
+        break;
       default:
         LOG(FATAL) << "Unknown elementwise operation.";
       }
@@ -76,5 +132,4 @@ void EltwiseLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
 
 INSTANTIATE_CLASS(EltwiseLayer);
 
-
 }  // namespace caffe
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
@@ -450,6 +450,7 @@ message EltwiseParameter {
   enum EltwiseOp {
     PROD = 0;
     SUM = 1;
+    MAX = 2;
   }
   optional EltwiseOp operation = 1 [default = SUM]; // element-wise operation
   repeated float coeff = 2; // blob-wise coefficient for SUM operation

diff --git a/src/caffe/test/test_eltwise_layer.cpp b/src/caffe/test/test_eltwise_layer.cpp
@@ -1,3 +1,4 @@
+#include <algorithm>
 #include <vector>
 
 #include "gtest/gtest.h"
@@ -23,6 +24,7 @@ class EltwiseLayerTest : public MultiDeviceTest<TypeParam> {
         blob_bottom_c_(new Blob<Dtype>(2, 3, 4, 5)),
         blob_top_(new Blob<Dtype>()) {
     // fill the values
+    Caffe::set_random_seed(1701);
     FillerParameter filler_param;
     UniformFiller<Dtype> filler(filler_param);
     filler.Fill(this->blob_bottom_a_);
@@ -173,4 +175,35 @@ TYPED_TEST(EltwiseLayerTest, TestSumCoeffGradient) {
       &(this->blob_top_vec_));
 }
 
+TYPED_TEST(EltwiseLayerTest, TestMax) {
+  typedef typename TypeParam::Dtype Dtype;
+  LayerParameter layer_param;
+  EltwiseParameter* eltwise_param = layer_param.mutable_eltwise_param();
+  eltwise_param->set_operation(EltwiseParameter_EltwiseOp_MAX);
+  shared_ptr<EltwiseLayer<Dtype> > layer(
+      new EltwiseLayer<Dtype>(layer_param));
+  layer->SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
+  layer->Forward(this->blob_bottom_vec_, &(this->blob_top_vec_));
+  const Dtype* data = this->blob_top_->cpu_data();
+  const int count = this->blob_top_->count();
+  const Dtype* in_data_a = this->blob_bottom_a_->cpu_data();
+  const Dtype* in_data_b = this->blob_bottom_b_->cpu_data();
+  const Dtype* in_data_c = this->blob_bottom_c_->cpu_data();
+  for (int i = 0; i < count; ++i) {
+    EXPECT_EQ(data[i],
+              std::max(in_data_a[i], std::max(in_data_b[i], in_data_c[i])));
+  }
+}
+
+TYPED_TEST(EltwiseLayerTest, TestMaxGradient) {
+  typedef typename TypeParam::Dtype Dtype;
+  LayerParameter layer_param;
+  EltwiseParameter* eltwise_param = layer_param.mutable_eltwise_param();
+  eltwise_param->set_operation(EltwiseParameter_EltwiseOp_MAX);
+  EltwiseLayer<Dtype> layer(layer_param);
+  GradientChecker<Dtype> checker(1e-4, 1e-3);
+  checker.CheckGradientEltwise(&layer, &(this->blob_bottom_vec_),
+      &(this->blob_top_vec_));
+}
+
 }  // namespace caffe