Skip to content

Commit

Permalink
Merge pull request BVLC#742 from longjon/direct-blob-buffers
Browse files Browse the repository at this point in the history
Use Blob directly instead of shared_ptr for internal layer buffers
  • Loading branch information
jeffdonahue committed Jul 20, 2014
2 parents f142421 + 6d083ed commit 0731da5
Show file tree
Hide file tree
Showing 17 changed files with 74 additions and 79 deletions.
12 changes: 6 additions & 6 deletions include/caffe/data_layers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,8 @@ class DataLayer : public Layer<Dtype> {
int datum_width_;
int datum_size_;
pthread_t thread_;
shared_ptr<Blob<Dtype> > prefetch_data_;
shared_ptr<Blob<Dtype> > prefetch_label_;
Blob<Dtype> prefetch_data_;
Blob<Dtype> prefetch_label_;
Blob<Dtype> data_mean_;
bool output_labels_;
Caffe::Phase phase_;
Expand Down Expand Up @@ -228,8 +228,8 @@ class ImageDataLayer : public Layer<Dtype> {
int datum_width_;
int datum_size_;
pthread_t thread_;
shared_ptr<Blob<Dtype> > prefetch_data_;
shared_ptr<Blob<Dtype> > prefetch_label_;
Blob<Dtype> prefetch_data_;
Blob<Dtype> prefetch_label_;
Blob<Dtype> data_mean_;
Caffe::Phase phase_;
};
Expand Down Expand Up @@ -315,8 +315,8 @@ class WindowDataLayer : public Layer<Dtype> {

shared_ptr<Caffe::RNG> prefetch_rng_;
pthread_t thread_;
shared_ptr<Blob<Dtype> > prefetch_data_;
shared_ptr<Blob<Dtype> > prefetch_label_;
Blob<Dtype> prefetch_data_;
Blob<Dtype> prefetch_label_;
Blob<Dtype> data_mean_;
vector<std::pair<std::string, vector<int> > > image_database_;
enum WindowField { IMAGE_INDEX, LABEL, OVERLAP, X1, Y1, X2, Y2, NUM };
Expand Down
2 changes: 1 addition & 1 deletion include/caffe/neuron_layers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ class DropoutLayer : public NeuronLayer<Dtype> {
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom);

shared_ptr<Blob<unsigned int> > rand_vec_;
Blob<unsigned int> rand_vec_;
Dtype threshold_;
Dtype scale_;
unsigned int uint_thres_;
Expand Down
6 changes: 3 additions & 3 deletions include/caffe/vision_layers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ class ConvolutionLayer : public Layer<Dtype> {
int num_output_;
int group_;
Blob<Dtype> col_buffer_;
shared_ptr<Blob<Dtype> > bias_multiplier_;
Blob<Dtype> bias_multiplier_;
bool bias_term_;
int M_;
int K_;
Expand Down Expand Up @@ -157,7 +157,7 @@ class InnerProductLayer : public Layer<Dtype> {
int K_;
int N_;
bool bias_term_;
shared_ptr<Blob<Dtype> > bias_multiplier_;
Blob<Dtype> bias_multiplier_;
};

// Forward declare PoolingLayer and SplitLayer for use in LRNLayer.
Expand Down Expand Up @@ -273,7 +273,7 @@ class PoolingLayer : public Layer<Dtype> {
int pooled_height_;
int pooled_width_;
Blob<Dtype> rand_idx_;
shared_ptr<Blob<int> > max_idx_;
Blob<int> max_idx_;
};

} // namespace caffe
Expand Down
10 changes: 5 additions & 5 deletions src/caffe/layers/conv_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,10 +76,10 @@ void ConvolutionLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
bias_filler->Fill(this->blobs_[1].get());
}
}
// Set up the bias filler
// Set up the all ones "bias multiplier" for adding bias using blas
if (bias_term_) {
bias_multiplier_.reset(new Blob<Dtype>(1, 1, 1, N_));
caffe_set(N_, Dtype(1), bias_multiplier_->mutable_cpu_data());
bias_multiplier_.Reshape(1, 1, 1, N_);
caffe_set(N_, Dtype(1), bias_multiplier_.mutable_cpu_data());
}
this->param_propagate_down_.resize(this->blobs_.size(), true);
}
Expand Down Expand Up @@ -110,7 +110,7 @@ Dtype ConvolutionLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
if (bias_term_) {
caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_output_,
N_, 1, (Dtype)1., this->blobs_[1]->cpu_data(),
bias_multiplier_->cpu_data(),
bias_multiplier_.cpu_data(),
(Dtype)1., top_data + (*top)[i]->offset(n));
}
}
Expand Down Expand Up @@ -144,7 +144,7 @@ void ConvolutionLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
for (int n = 0; n < num_; ++n) {
caffe_cpu_gemv<Dtype>(CblasNoTrans, num_output_, N_,
1., top_diff + top[0]->offset(n),
bias_multiplier_->cpu_data(), 1.,
bias_multiplier_.cpu_data(), 1.,
bias_diff);
}
}
Expand Down
4 changes: 2 additions & 2 deletions src/caffe/layers/conv_layer.cu
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ Dtype ConvolutionLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
if (bias_term_) {
caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_output_,
N_, 1, (Dtype)1., this->blobs_[1]->gpu_data(),
bias_multiplier_->gpu_data(),
bias_multiplier_.gpu_data(),
(Dtype)1., top_data + (*top)[i]->offset(n));
}
}
Expand Down Expand Up @@ -69,7 +69,7 @@ void ConvolutionLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
for (int n = 0; n < num_; ++n) {
caffe_gpu_gemv<Dtype>(CblasNoTrans, num_output_, N_,
1., top_diff + top[0]->offset(n),
bias_multiplier_->gpu_data(), 1.,
bias_multiplier_.gpu_data(), 1.,
bias_diff);
}
}
Expand Down
28 changes: 13 additions & 15 deletions src/caffe/layers/data_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,11 @@ void* DataLayerPrefetch(void* layer_pointer) {
DataLayer<Dtype>* layer = static_cast<DataLayer<Dtype>*>(layer_pointer);
CHECK(layer);
Datum datum;
CHECK(layer->prefetch_data_);
Dtype* top_data = layer->prefetch_data_->mutable_cpu_data();
CHECK(layer->prefetch_data_.count());
Dtype* top_data = layer->prefetch_data_.mutable_cpu_data();
Dtype* top_label = NULL; // suppress warnings about uninitialized variables
if (layer->output_labels_) {
top_label = layer->prefetch_label_->mutable_cpu_data();
top_label = layer->prefetch_label_.mutable_cpu_data();
}
const Dtype scale = layer->layer_param_.data_param().scale();
const int batch_size = layer->layer_param_.data_param().batch_size();
Expand Down Expand Up @@ -257,25 +257,23 @@ void DataLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
if (crop_size > 0) {
(*top)[0]->Reshape(this->layer_param_.data_param().batch_size(),
datum.channels(), crop_size, crop_size);
prefetch_data_.reset(new Blob<Dtype>(
this->layer_param_.data_param().batch_size(), datum.channels(),
crop_size, crop_size));
prefetch_data_.Reshape(this->layer_param_.data_param().batch_size(),
datum.channels(), crop_size, crop_size);
} else {
(*top)[0]->Reshape(
this->layer_param_.data_param().batch_size(), datum.channels(),
datum.height(), datum.width());
prefetch_data_.reset(new Blob<Dtype>(
this->layer_param_.data_param().batch_size(), datum.channels(),
datum.height(), datum.width()));
prefetch_data_.Reshape(this->layer_param_.data_param().batch_size(),
datum.channels(), datum.height(), datum.width());
}
LOG(INFO) << "output data size: " << (*top)[0]->num() << ","
<< (*top)[0]->channels() << "," << (*top)[0]->height() << ","
<< (*top)[0]->width();
// label
if (output_labels_) {
(*top)[1]->Reshape(this->layer_param_.data_param().batch_size(), 1, 1, 1);
prefetch_label_.reset(
new Blob<Dtype>(this->layer_param_.data_param().batch_size(), 1, 1, 1));
prefetch_label_.Reshape(this->layer_param_.data_param().batch_size(),
1, 1, 1);
}
// datum size
datum_channels_ = datum.channels();
Expand Down Expand Up @@ -303,9 +301,9 @@ void DataLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
// cpu_data calls so that the prefetch thread does not accidentally make
// simultaneous cudaMalloc calls when the main thread is running. In some
// GPUs this seems to cause failures if we do not so.
prefetch_data_->mutable_cpu_data();
prefetch_data_.mutable_cpu_data();
if (output_labels_) {
prefetch_label_->mutable_cpu_data();
prefetch_label_.mutable_cpu_data();
}
data_mean_.cpu_data();
DLOG(INFO) << "Initializing prefetch";
Expand Down Expand Up @@ -349,10 +347,10 @@ Dtype DataLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
// First, join the thread
JoinPrefetchThread();
// Copy the data
caffe_copy(prefetch_data_->count(), prefetch_data_->cpu_data(),
caffe_copy(prefetch_data_.count(), prefetch_data_.cpu_data(),
(*top)[0]->mutable_cpu_data());
if (output_labels_) {
caffe_copy(prefetch_label_->count(), prefetch_label_->cpu_data(),
caffe_copy(prefetch_label_.count(), prefetch_label_.cpu_data(),
(*top)[1]->mutable_cpu_data());
}
// Start a new prefetch thread
Expand Down
4 changes: 2 additions & 2 deletions src/caffe/layers/data_layer.cu
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@ Dtype DataLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
// First, join the thread
JoinPrefetchThread();
// Copy the data
caffe_copy(prefetch_data_->count(), prefetch_data_->cpu_data(),
caffe_copy(prefetch_data_.count(), prefetch_data_.cpu_data(),
(*top)[0]->mutable_gpu_data());
if (output_labels_) {
caffe_copy(prefetch_label_->count(), prefetch_label_->cpu_data(),
caffe_copy(prefetch_label_.count(), prefetch_label_.cpu_data(),
(*top)[1]->mutable_gpu_data());
}
// Start a new prefetch thread
Expand Down
8 changes: 4 additions & 4 deletions src/caffe/layers/dropout_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ void DropoutLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top) {
NeuronLayer<Dtype>::SetUp(bottom, top);
// Set up the cache for random number generation
rand_vec_.reset(new Blob<unsigned int>(bottom[0]->num(),
bottom[0]->channels(), bottom[0]->height(), bottom[0]->width()));
rand_vec_.Reshape(bottom[0]->num(), bottom[0]->channels(),
bottom[0]->height(), bottom[0]->width());
threshold_ = this->layer_param_.dropout_param().dropout_ratio();
DCHECK(threshold_ > 0.);
DCHECK(threshold_ < 1.);
Expand All @@ -31,7 +31,7 @@ Dtype DropoutLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top) {
const Dtype* bottom_data = bottom[0]->cpu_data();
Dtype* top_data = (*top)[0]->mutable_cpu_data();
unsigned int* mask = rand_vec_->mutable_cpu_data();
unsigned int* mask = rand_vec_.mutable_cpu_data();
const int count = bottom[0]->count();
if (Caffe::phase() == Caffe::TRAIN) {
// Create random numbers
Expand All @@ -53,7 +53,7 @@ void DropoutLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
const Dtype* top_diff = top[0]->cpu_diff();
Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
if (Caffe::phase() == Caffe::TRAIN) {
const unsigned int* mask = rand_vec_->cpu_data();
const unsigned int* mask = rand_vec_.cpu_data();
const int count = (*bottom)[0]->count();
for (int i = 0; i < count; ++i) {
bottom_diff[i] = top_diff[i] * mask[i] * scale_;
Expand Down
4 changes: 2 additions & 2 deletions src/caffe/layers/dropout_layer.cu
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ Dtype DropoutLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const int count = bottom[0]->count();
if (Caffe::phase() == Caffe::TRAIN) {
unsigned int* mask =
static_cast<unsigned int*>(rand_vec_->mutable_gpu_data());
static_cast<unsigned int*>(rand_vec_.mutable_gpu_data());
caffe_gpu_rng_uniform(count, mask);
// set thresholds
// NOLINT_NEXT_LINE(whitespace/operators)
Expand Down Expand Up @@ -63,7 +63,7 @@ void DropoutLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff();
if (Caffe::phase() == Caffe::TRAIN) {
const unsigned int* mask =
static_cast<const unsigned int*>(rand_vec_->gpu_data());
static_cast<const unsigned int*>(rand_vec_.gpu_data());
const int count = (*bottom)[0]->count();
// NOLINT_NEXT_LINE(whitespace/operators)
DropoutBackward<Dtype><<<CAFFE_GET_BLOCKS(count),
Expand Down
23 changes: 11 additions & 12 deletions src/caffe/layers/image_data_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@ void* ImageDataLayerPrefetch(void* layer_pointer) {
reinterpret_cast<ImageDataLayer<Dtype>*>(layer_pointer);
CHECK(layer);
Datum datum;
CHECK(layer->prefetch_data_);
Dtype* top_data = layer->prefetch_data_->mutable_cpu_data();
Dtype* top_label = layer->prefetch_label_->mutable_cpu_data();
CHECK(layer->prefetch_data_.count());
Dtype* top_data = layer->prefetch_data_.mutable_cpu_data();
Dtype* top_label = layer->prefetch_label_.mutable_cpu_data();
ImageDataParameter image_data_param = layer->layer_param_.image_data_param();
const Dtype scale = image_data_param.scale();
const int batch_size = image_data_param.batch_size();
Expand Down Expand Up @@ -183,20 +183,19 @@ void ImageDataLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
const string& mean_file = this->layer_param_.image_data_param().mean_file();
if (crop_size > 0) {
(*top)[0]->Reshape(batch_size, datum.channels(), crop_size, crop_size);
prefetch_data_.reset(new Blob<Dtype>(batch_size, datum.channels(),
crop_size, crop_size));
prefetch_data_.Reshape(batch_size, datum.channels(), crop_size, crop_size);
} else {
(*top)[0]->Reshape(batch_size, datum.channels(), datum.height(),
datum.width());
prefetch_data_.reset(new Blob<Dtype>(batch_size, datum.channels(),
datum.height(), datum.width()));
prefetch_data_.Reshape(batch_size, datum.channels(), datum.height(),
datum.width());
}
LOG(INFO) << "output data size: " << (*top)[0]->num() << ","
<< (*top)[0]->channels() << "," << (*top)[0]->height() << ","
<< (*top)[0]->width();
// label
(*top)[1]->Reshape(batch_size, 1, 1, 1);
prefetch_label_.reset(new Blob<Dtype>(batch_size, 1, 1, 1));
prefetch_label_.Reshape(batch_size, 1, 1, 1);
// datum size
datum_channels_ = datum.channels();
datum_height_ = datum.height();
Expand All @@ -222,8 +221,8 @@ void ImageDataLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
// cpu_data calls so that the prefetch thread does not accidentally make
// simultaneous cudaMalloc calls when the main thread is running. In some
// GPUs this seems to cause failures if we do not so.
prefetch_data_->mutable_cpu_data();
prefetch_label_->mutable_cpu_data();
prefetch_data_.mutable_cpu_data();
prefetch_label_.mutable_cpu_data();
data_mean_.cpu_data();
DLOG(INFO) << "Initializing prefetch";
CreatePrefetchThread();
Expand Down Expand Up @@ -277,9 +276,9 @@ Dtype ImageDataLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
// First, join the thread
JoinPrefetchThread();
// Copy the data
caffe_copy(prefetch_data_->count(), prefetch_data_->cpu_data(),
caffe_copy(prefetch_data_.count(), prefetch_data_.cpu_data(),
(*top)[0]->mutable_cpu_data());
caffe_copy(prefetch_label_->count(), prefetch_label_->cpu_data(),
caffe_copy(prefetch_label_.count(), prefetch_label_.cpu_data(),
(*top)[1]->mutable_cpu_data());
// Start a new prefetch thread
CreatePrefetchThread();
Expand Down
4 changes: 2 additions & 2 deletions src/caffe/layers/image_data_layer.cu
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ Dtype ImageDataLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
// First, join the thread
JoinPrefetchThread();
// Copy the data
caffe_copy(prefetch_data_->count(), prefetch_data_->cpu_data(),
caffe_copy(prefetch_data_.count(), prefetch_data_.cpu_data(),
(*top)[0]->mutable_gpu_data());
caffe_copy(prefetch_label_->count(), prefetch_label_->cpu_data(),
caffe_copy(prefetch_label_.count(), prefetch_label_.cpu_data(),
(*top)[1]->mutable_gpu_data());
// Start a new prefetch thread
CreatePrefetchThread();
Expand Down
8 changes: 4 additions & 4 deletions src/caffe/layers/inner_product_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ void InnerProductLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
} // parameter initialization
// Setting up the bias multiplier
if (bias_term_) {
bias_multiplier_.reset(new Blob<Dtype>(1, 1, 1, M_));
caffe_set(M_, Dtype(1), bias_multiplier_->mutable_cpu_data());
bias_multiplier_.Reshape(1, 1, 1, M_);
caffe_set(M_, Dtype(1), bias_multiplier_.mutable_cpu_data());
}
this->param_propagate_down_.resize(this->blobs_.size(), true);
}
Expand All @@ -63,7 +63,7 @@ Dtype InnerProductLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
bottom_data, weight, (Dtype)0., top_data);
if (bias_term_) {
caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, 1, (Dtype)1.,
bias_multiplier_->cpu_data(),
bias_multiplier_.cpu_data(),
this->blobs_[1]->cpu_data(), (Dtype)1., top_data);
}
return Dtype(0);
Expand All @@ -84,7 +84,7 @@ void InnerProductLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
const Dtype* top_diff = top[0]->cpu_diff();
// Gradient with respect to bias
caffe_cpu_gemv<Dtype>(CblasTrans, M_, N_, (Dtype)1., top_diff,
bias_multiplier_->cpu_data(), (Dtype)0.,
bias_multiplier_.cpu_data(), (Dtype)0.,
this->blobs_[1]->mutable_cpu_diff());
}
if (propagate_down[0]) {
Expand Down
4 changes: 2 additions & 2 deletions src/caffe/layers/inner_product_layer.cu
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ Dtype InnerProductLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
bottom_data, weight, (Dtype)0., top_data);
if (bias_term_) {
caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, 1, (Dtype)1.,
bias_multiplier_->gpu_data(),
bias_multiplier_.gpu_data(),
this->blobs_[1]->gpu_data(), (Dtype)1., top_data);
}
return Dtype(0);
Expand All @@ -42,7 +42,7 @@ void InnerProductLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
const Dtype* top_diff = top[0]->gpu_diff();
// Gradient with respect to bias
caffe_gpu_gemv<Dtype>(CblasTrans, M_, N_, (Dtype)1., top_diff,
bias_multiplier_->gpu_data(), (Dtype)0.,
bias_multiplier_.gpu_data(), (Dtype)0.,
this->blobs_[1]->mutable_gpu_diff());
}
if (propagate_down[0]) {
Expand Down
8 changes: 4 additions & 4 deletions src/caffe/layers/pooling_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,8 @@ void PoolingLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
// If max pooling, we will initialize the vector index part.
if (this->layer_param_.pooling_param().pool() ==
PoolingParameter_PoolMethod_MAX && top->size() == 1) {
max_idx_.reset(new Blob<int>(bottom[0]->num(), channels_,
pooled_height_, pooled_width_));
max_idx_.Reshape(bottom[0]->num(), channels_, pooled_height_,
pooled_width_);
}
// If stochastic pooling, we will initialize the random index part.
if (this->layer_param_.pooling_param().pool() ==
Expand Down Expand Up @@ -131,7 +131,7 @@ Dtype PoolingLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
top_mask = (*top)[1]->mutable_cpu_data();
caffe_set(top_count, Dtype(-1), top_mask);
} else {
mask = max_idx_->mutable_cpu_data();
mask = max_idx_.mutable_cpu_data();
caffe_set(top_count, -1, mask);
}
caffe_set(top_count, Dtype(-FLT_MAX), top_data);
Expand Down Expand Up @@ -236,7 +236,7 @@ void PoolingLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
if (use_top_mask) {
top_mask = top[1]->cpu_data();
} else {
mask = max_idx_->cpu_data();
mask = max_idx_.cpu_data();
}
for (int n = 0; n < top[0]->num(); ++n) {
for (int c = 0; c < channels_; ++c) {
Expand Down
Loading

0 comments on commit 0731da5

Please sign in to comment.