Merge pull request BVLC#742 from longjon/direct-blob-buffers

Use Blob directly instead of shared_ptr for internal layer buffers
RazvanRanca · Jul 20, 2014 · 0731da5 · 0731da5
2 parents f142421 + 6d083ed
commit 0731da5
Show file tree

Hide file tree

Showing 17 changed files with 74 additions and 79 deletions.
diff --git a/include/caffe/data_layers.hpp b/include/caffe/data_layers.hpp
@@ -81,8 +81,8 @@ class DataLayer : public Layer<Dtype> {
   int datum_width_;
   int datum_size_;
   pthread_t thread_;
-  shared_ptr<Blob<Dtype> > prefetch_data_;
-  shared_ptr<Blob<Dtype> > prefetch_label_;
+  Blob<Dtype> prefetch_data_;
+  Blob<Dtype> prefetch_label_;
   Blob<Dtype> data_mean_;
   bool output_labels_;
   Caffe::Phase phase_;
@@ -228,8 +228,8 @@ class ImageDataLayer : public Layer<Dtype> {
   int datum_width_;
   int datum_size_;
   pthread_t thread_;
-  shared_ptr<Blob<Dtype> > prefetch_data_;
-  shared_ptr<Blob<Dtype> > prefetch_label_;
+  Blob<Dtype> prefetch_data_;
+  Blob<Dtype> prefetch_label_;
   Blob<Dtype> data_mean_;
   Caffe::Phase phase_;
 };
@@ -315,8 +315,8 @@ class WindowDataLayer : public Layer<Dtype> {
 
   shared_ptr<Caffe::RNG> prefetch_rng_;
   pthread_t thread_;
-  shared_ptr<Blob<Dtype> > prefetch_data_;
-  shared_ptr<Blob<Dtype> > prefetch_label_;
+  Blob<Dtype> prefetch_data_;
+  Blob<Dtype> prefetch_label_;
   Blob<Dtype> data_mean_;
   vector<std::pair<std::string, vector<int> > > image_database_;
   enum WindowField { IMAGE_INDEX, LABEL, OVERLAP, X1, Y1, X2, Y2, NUM };

diff --git a/include/caffe/neuron_layers.hpp b/include/caffe/neuron_layers.hpp
@@ -100,7 +100,7 @@ class DropoutLayer : public NeuronLayer<Dtype> {
   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
       const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom);
 
-  shared_ptr<Blob<unsigned int> > rand_vec_;
+  Blob<unsigned int> rand_vec_;
   Dtype threshold_;
   Dtype scale_;
   unsigned int uint_thres_;

diff --git a/include/caffe/vision_layers.hpp b/include/caffe/vision_layers.hpp
@@ -55,7 +55,7 @@ class ConvolutionLayer : public Layer<Dtype> {
   int num_output_;
   int group_;
   Blob<Dtype> col_buffer_;
-  shared_ptr<Blob<Dtype> > bias_multiplier_;
+  Blob<Dtype> bias_multiplier_;
   bool bias_term_;
   int M_;
   int K_;
@@ -157,7 +157,7 @@ class InnerProductLayer : public Layer<Dtype> {
   int K_;
   int N_;
   bool bias_term_;
-  shared_ptr<Blob<Dtype> > bias_multiplier_;
+  Blob<Dtype> bias_multiplier_;
 };
 
 // Forward declare PoolingLayer and SplitLayer for use in LRNLayer.
@@ -273,7 +273,7 @@ class PoolingLayer : public Layer<Dtype> {
   int pooled_height_;
   int pooled_width_;
   Blob<Dtype> rand_idx_;
-  shared_ptr<Blob<int> > max_idx_;
+  Blob<int> max_idx_;
 };
 
 }  // namespace caffe

diff --git a/src/caffe/layers/conv_layer.cpp b/src/caffe/layers/conv_layer.cpp
@@ -76,10 +76,10 @@ void ConvolutionLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
       bias_filler->Fill(this->blobs_[1].get());
     }
   }
-  // Set up the bias filler
+  // Set up the all ones "bias multiplier" for adding bias using blas
   if (bias_term_) {
-    bias_multiplier_.reset(new Blob<Dtype>(1, 1, 1, N_));
-    caffe_set(N_, Dtype(1), bias_multiplier_->mutable_cpu_data());
+    bias_multiplier_.Reshape(1, 1, 1, N_);
+    caffe_set(N_, Dtype(1), bias_multiplier_.mutable_cpu_data());
   }
   this->param_propagate_down_.resize(this->blobs_.size(), true);
 }
@@ -110,7 +110,7 @@ Dtype ConvolutionLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
       if (bias_term_) {
         caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_output_,
             N_, 1, (Dtype)1., this->blobs_[1]->cpu_data(),
-            bias_multiplier_->cpu_data(),
+            bias_multiplier_.cpu_data(),
             (Dtype)1., top_data + (*top)[i]->offset(n));
       }
     }
@@ -144,7 +144,7 @@ void ConvolutionLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
       for (int n = 0; n < num_; ++n) {
         caffe_cpu_gemv<Dtype>(CblasNoTrans, num_output_, N_,
             1., top_diff + top[0]->offset(n),
-            bias_multiplier_->cpu_data(), 1.,
+            bias_multiplier_.cpu_data(), 1.,
             bias_diff);
       }
     }

diff --git a/src/caffe/layers/conv_layer.cu b/src/caffe/layers/conv_layer.cu
@@ -35,7 +35,7 @@ Dtype ConvolutionLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
       if (bias_term_) {
         caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_output_,
             N_, 1, (Dtype)1., this->blobs_[1]->gpu_data(),
-            bias_multiplier_->gpu_data(),
+            bias_multiplier_.gpu_data(),
             (Dtype)1., top_data + (*top)[i]->offset(n));
       }
     }
@@ -69,7 +69,7 @@ void ConvolutionLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
       for (int n = 0; n < num_; ++n) {
         caffe_gpu_gemv<Dtype>(CblasNoTrans, num_output_, N_,
             1., top_diff + top[0]->offset(n),
-            bias_multiplier_->gpu_data(), 1.,
+            bias_multiplier_.gpu_data(), 1.,
             bias_diff);
       }
     }

diff --git a/src/caffe/layers/data_layer.cpp b/src/caffe/layers/data_layer.cpp
@@ -22,11 +22,11 @@ void* DataLayerPrefetch(void* layer_pointer) {
   DataLayer<Dtype>* layer = static_cast<DataLayer<Dtype>*>(layer_pointer);
   CHECK(layer);
   Datum datum;
-  CHECK(layer->prefetch_data_);
-  Dtype* top_data = layer->prefetch_data_->mutable_cpu_data();
+  CHECK(layer->prefetch_data_.count());
+  Dtype* top_data = layer->prefetch_data_.mutable_cpu_data();
   Dtype* top_label = NULL;  // suppress warnings about uninitialized variables
   if (layer->output_labels_) {
-    top_label = layer->prefetch_label_->mutable_cpu_data();
+    top_label = layer->prefetch_label_.mutable_cpu_data();
   }
   const Dtype scale = layer->layer_param_.data_param().scale();
   const int batch_size = layer->layer_param_.data_param().batch_size();
@@ -257,25 +257,23 @@ void DataLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
   if (crop_size > 0) {
     (*top)[0]->Reshape(this->layer_param_.data_param().batch_size(),
                        datum.channels(), crop_size, crop_size);
-    prefetch_data_.reset(new Blob<Dtype>(
-        this->layer_param_.data_param().batch_size(), datum.channels(),
-        crop_size, crop_size));
+    prefetch_data_.Reshape(this->layer_param_.data_param().batch_size(),
+        datum.channels(), crop_size, crop_size);
   } else {
     (*top)[0]->Reshape(
         this->layer_param_.data_param().batch_size(), datum.channels(),
         datum.height(), datum.width());
-    prefetch_data_.reset(new Blob<Dtype>(
-        this->layer_param_.data_param().batch_size(), datum.channels(),
-        datum.height(), datum.width()));
+    prefetch_data_.Reshape(this->layer_param_.data_param().batch_size(),
+        datum.channels(), datum.height(), datum.width());
   }
   LOG(INFO) << "output data size: " << (*top)[0]->num() << ","
       << (*top)[0]->channels() << "," << (*top)[0]->height() << ","
       << (*top)[0]->width();
   // label
   if (output_labels_) {
     (*top)[1]->Reshape(this->layer_param_.data_param().batch_size(), 1, 1, 1);
-    prefetch_label_.reset(
-        new Blob<Dtype>(this->layer_param_.data_param().batch_size(), 1, 1, 1));
+    prefetch_label_.Reshape(this->layer_param_.data_param().batch_size(),
+        1, 1, 1);
   }
   // datum size
   datum_channels_ = datum.channels();
@@ -303,9 +301,9 @@ void DataLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
   // cpu_data calls so that the prefetch thread does not accidentally make
   // simultaneous cudaMalloc calls when the main thread is running. In some
   // GPUs this seems to cause failures if we do not so.
-  prefetch_data_->mutable_cpu_data();
+  prefetch_data_.mutable_cpu_data();
   if (output_labels_) {
-    prefetch_label_->mutable_cpu_data();
+    prefetch_label_.mutable_cpu_data();
   }
   data_mean_.cpu_data();
   DLOG(INFO) << "Initializing prefetch";
@@ -349,10 +347,10 @@ Dtype DataLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
   // First, join the thread
   JoinPrefetchThread();
   // Copy the data
-  caffe_copy(prefetch_data_->count(), prefetch_data_->cpu_data(),
+  caffe_copy(prefetch_data_.count(), prefetch_data_.cpu_data(),
              (*top)[0]->mutable_cpu_data());
   if (output_labels_) {
-    caffe_copy(prefetch_label_->count(), prefetch_label_->cpu_data(),
+    caffe_copy(prefetch_label_.count(), prefetch_label_.cpu_data(),
                (*top)[1]->mutable_cpu_data());
   }
   // Start a new prefetch thread

diff --git a/src/caffe/layers/data_layer.cu b/src/caffe/layers/data_layer.cu
@@ -19,10 +19,10 @@ Dtype DataLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
   // First, join the thread
   JoinPrefetchThread();
   // Copy the data
-  caffe_copy(prefetch_data_->count(), prefetch_data_->cpu_data(),
+  caffe_copy(prefetch_data_.count(), prefetch_data_.cpu_data(),
       (*top)[0]->mutable_gpu_data());
   if (output_labels_) {
-    caffe_copy(prefetch_label_->count(), prefetch_label_->cpu_data(),
+    caffe_copy(prefetch_label_.count(), prefetch_label_.cpu_data(),
         (*top)[1]->mutable_gpu_data());
   }
   // Start a new prefetch thread

diff --git a/src/caffe/layers/dropout_layer.cpp b/src/caffe/layers/dropout_layer.cpp
@@ -17,8 +17,8 @@ void DropoutLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
       vector<Blob<Dtype>*>* top) {
   NeuronLayer<Dtype>::SetUp(bottom, top);
   // Set up the cache for random number generation
-  rand_vec_.reset(new Blob<unsigned int>(bottom[0]->num(),
-      bottom[0]->channels(), bottom[0]->height(), bottom[0]->width()));
+  rand_vec_.Reshape(bottom[0]->num(), bottom[0]->channels(),
+      bottom[0]->height(), bottom[0]->width());
   threshold_ = this->layer_param_.dropout_param().dropout_ratio();
   DCHECK(threshold_ > 0.);
   DCHECK(threshold_ < 1.);
@@ -31,7 +31,7 @@ Dtype DropoutLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
     vector<Blob<Dtype>*>* top) {
   const Dtype* bottom_data = bottom[0]->cpu_data();
   Dtype* top_data = (*top)[0]->mutable_cpu_data();
-  unsigned int* mask = rand_vec_->mutable_cpu_data();
+  unsigned int* mask = rand_vec_.mutable_cpu_data();
   const int count = bottom[0]->count();
   if (Caffe::phase() == Caffe::TRAIN) {
     // Create random numbers
@@ -53,7 +53,7 @@ void DropoutLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
     const Dtype* top_diff = top[0]->cpu_diff();
     Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
     if (Caffe::phase() == Caffe::TRAIN) {
-      const unsigned int* mask = rand_vec_->cpu_data();
+      const unsigned int* mask = rand_vec_.cpu_data();
       const int count = (*bottom)[0]->count();
       for (int i = 0; i < count; ++i) {
         bottom_diff[i] = top_diff[i] * mask[i] * scale_;

diff --git a/src/caffe/layers/dropout_layer.cu b/src/caffe/layers/dropout_layer.cu
@@ -32,7 +32,7 @@ Dtype DropoutLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
   const int count = bottom[0]->count();
   if (Caffe::phase() == Caffe::TRAIN) {
     unsigned int* mask =
-        static_cast<unsigned int*>(rand_vec_->mutable_gpu_data());
+        static_cast<unsigned int*>(rand_vec_.mutable_gpu_data());
     caffe_gpu_rng_uniform(count, mask);
     // set thresholds
     // NOLINT_NEXT_LINE(whitespace/operators)
@@ -63,7 +63,7 @@ void DropoutLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
     Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff();
     if (Caffe::phase() == Caffe::TRAIN) {
       const unsigned int* mask =
-          static_cast<const unsigned int*>(rand_vec_->gpu_data());
+          static_cast<const unsigned int*>(rand_vec_.gpu_data());
       const int count = (*bottom)[0]->count();
       // NOLINT_NEXT_LINE(whitespace/operators)
       DropoutBackward<Dtype><<<CAFFE_GET_BLOCKS(count),

diff --git a/src/caffe/layers/image_data_layer.cpp b/src/caffe/layers/image_data_layer.cpp
@@ -28,9 +28,9 @@ void* ImageDataLayerPrefetch(void* layer_pointer) {
       reinterpret_cast<ImageDataLayer<Dtype>*>(layer_pointer);
   CHECK(layer);
   Datum datum;
-  CHECK(layer->prefetch_data_);
-  Dtype* top_data = layer->prefetch_data_->mutable_cpu_data();
-  Dtype* top_label = layer->prefetch_label_->mutable_cpu_data();
+  CHECK(layer->prefetch_data_.count());
+  Dtype* top_data = layer->prefetch_data_.mutable_cpu_data();
+  Dtype* top_label = layer->prefetch_label_.mutable_cpu_data();
   ImageDataParameter image_data_param = layer->layer_param_.image_data_param();
   const Dtype scale = image_data_param.scale();
   const int batch_size = image_data_param.batch_size();
@@ -183,20 +183,19 @@ void ImageDataLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
   const string& mean_file = this->layer_param_.image_data_param().mean_file();
   if (crop_size > 0) {
     (*top)[0]->Reshape(batch_size, datum.channels(), crop_size, crop_size);
-    prefetch_data_.reset(new Blob<Dtype>(batch_size, datum.channels(),
-                                         crop_size, crop_size));
+    prefetch_data_.Reshape(batch_size, datum.channels(), crop_size, crop_size);
   } else {
     (*top)[0]->Reshape(batch_size, datum.channels(), datum.height(),
                        datum.width());
-    prefetch_data_.reset(new Blob<Dtype>(batch_size, datum.channels(),
-                                         datum.height(), datum.width()));
+    prefetch_data_.Reshape(batch_size, datum.channels(), datum.height(),
+        datum.width());
   }
   LOG(INFO) << "output data size: " << (*top)[0]->num() << ","
       << (*top)[0]->channels() << "," << (*top)[0]->height() << ","
       << (*top)[0]->width();
   // label
   (*top)[1]->Reshape(batch_size, 1, 1, 1);
-  prefetch_label_.reset(new Blob<Dtype>(batch_size, 1, 1, 1));
+  prefetch_label_.Reshape(batch_size, 1, 1, 1);
   // datum size
   datum_channels_ = datum.channels();
   datum_height_ = datum.height();
@@ -222,8 +221,8 @@ void ImageDataLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
   // cpu_data calls so that the prefetch thread does not accidentally make
   // simultaneous cudaMalloc calls when the main thread is running. In some
   // GPUs this seems to cause failures if we do not so.
-  prefetch_data_->mutable_cpu_data();
-  prefetch_label_->mutable_cpu_data();
+  prefetch_data_.mutable_cpu_data();
+  prefetch_label_.mutable_cpu_data();
   data_mean_.cpu_data();
   DLOG(INFO) << "Initializing prefetch";
   CreatePrefetchThread();
@@ -277,9 +276,9 @@ Dtype ImageDataLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
   // First, join the thread
   JoinPrefetchThread();
   // Copy the data
-  caffe_copy(prefetch_data_->count(), prefetch_data_->cpu_data(),
+  caffe_copy(prefetch_data_.count(), prefetch_data_.cpu_data(),
              (*top)[0]->mutable_cpu_data());
-  caffe_copy(prefetch_label_->count(), prefetch_label_->cpu_data(),
+  caffe_copy(prefetch_label_.count(), prefetch_label_.cpu_data(),
              (*top)[1]->mutable_cpu_data());
   // Start a new prefetch thread
   CreatePrefetchThread();

diff --git a/src/caffe/layers/image_data_layer.cu b/src/caffe/layers/image_data_layer.cu
@@ -25,9 +25,9 @@ Dtype ImageDataLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
   // First, join the thread
   JoinPrefetchThread();
   // Copy the data
-  caffe_copy(prefetch_data_->count(), prefetch_data_->cpu_data(),
+  caffe_copy(prefetch_data_.count(), prefetch_data_.cpu_data(),
       (*top)[0]->mutable_gpu_data());
-  caffe_copy(prefetch_label_->count(), prefetch_label_->cpu_data(),
+  caffe_copy(prefetch_label_.count(), prefetch_label_.cpu_data(),
       (*top)[1]->mutable_gpu_data());
   // Start a new prefetch thread
   CreatePrefetchThread();

diff --git a/src/caffe/layers/inner_product_layer.cpp b/src/caffe/layers/inner_product_layer.cpp
@@ -47,8 +47,8 @@ void InnerProductLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
   }  // parameter initialization
   // Setting up the bias multiplier
   if (bias_term_) {
-    bias_multiplier_.reset(new Blob<Dtype>(1, 1, 1, M_));
-    caffe_set(M_, Dtype(1), bias_multiplier_->mutable_cpu_data());
+    bias_multiplier_.Reshape(1, 1, 1, M_);
+    caffe_set(M_, Dtype(1), bias_multiplier_.mutable_cpu_data());
   }
   this->param_propagate_down_.resize(this->blobs_.size(), true);
 }
@@ -63,7 +63,7 @@ Dtype InnerProductLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
       bottom_data, weight, (Dtype)0., top_data);
   if (bias_term_) {
     caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, 1, (Dtype)1.,
-        bias_multiplier_->cpu_data(),
+        bias_multiplier_.cpu_data(),
         this->blobs_[1]->cpu_data(), (Dtype)1., top_data);
   }
   return Dtype(0);
@@ -84,7 +84,7 @@ void InnerProductLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
     const Dtype* top_diff = top[0]->cpu_diff();
     // Gradient with respect to bias
     caffe_cpu_gemv<Dtype>(CblasTrans, M_, N_, (Dtype)1., top_diff,
-        bias_multiplier_->cpu_data(), (Dtype)0.,
+        bias_multiplier_.cpu_data(), (Dtype)0.,
         this->blobs_[1]->mutable_cpu_diff());
   }
   if (propagate_down[0]) {

diff --git a/src/caffe/layers/inner_product_layer.cu b/src/caffe/layers/inner_product_layer.cu
@@ -21,7 +21,7 @@ Dtype InnerProductLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
       bottom_data, weight, (Dtype)0., top_data);
   if (bias_term_) {
     caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, 1, (Dtype)1.,
-        bias_multiplier_->gpu_data(),
+        bias_multiplier_.gpu_data(),
         this->blobs_[1]->gpu_data(), (Dtype)1., top_data);
   }
   return Dtype(0);
@@ -42,7 +42,7 @@ void InnerProductLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
     const Dtype* top_diff = top[0]->gpu_diff();
     // Gradient with respect to bias
     caffe_gpu_gemv<Dtype>(CblasTrans, M_, N_, (Dtype)1., top_diff,
-        bias_multiplier_->gpu_data(), (Dtype)0.,
+        bias_multiplier_.gpu_data(), (Dtype)0.,
         this->blobs_[1]->mutable_gpu_diff());
   }
   if (propagate_down[0]) {

diff --git a/src/caffe/layers/pooling_layer.cpp b/src/caffe/layers/pooling_layer.cpp
@@ -99,8 +99,8 @@ void PoolingLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
   // If max pooling, we will initialize the vector index part.
   if (this->layer_param_.pooling_param().pool() ==
       PoolingParameter_PoolMethod_MAX && top->size() == 1) {
-    max_idx_.reset(new Blob<int>(bottom[0]->num(), channels_,
-                                 pooled_height_, pooled_width_));
+    max_idx_.Reshape(bottom[0]->num(), channels_, pooled_height_,
+        pooled_width_);
   }
   // If stochastic pooling, we will initialize the random index part.
   if (this->layer_param_.pooling_param().pool() ==
@@ -131,7 +131,7 @@ Dtype PoolingLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
       top_mask = (*top)[1]->mutable_cpu_data();
       caffe_set(top_count, Dtype(-1), top_mask);
     } else {
-      mask = max_idx_->mutable_cpu_data();
+      mask = max_idx_.mutable_cpu_data();
       caffe_set(top_count, -1, mask);
     }
     caffe_set(top_count, Dtype(-FLT_MAX), top_data);
@@ -236,7 +236,7 @@ void PoolingLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
     if (use_top_mask) {
       top_mask = top[1]->cpu_data();
     } else {
-      mask = max_idx_->cpu_data();
+      mask = max_idx_.cpu_data();
     }
     for (int n = 0; n < top[0]->num(); ++n) {
       for (int c = 0; c < channels_; ++c) {