diff --git a/include/caffe/layers/crop_layer.hpp b/include/caffe/layers/crop_layer.hpp index bab290718e0..f84bab1ec49 100644 --- a/include/caffe/layers/crop_layer.hpp +++ b/include/caffe/layers/crop_layer.hpp @@ -41,9 +41,27 @@ class CropLayer : public Layer { virtual void Backward_gpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom); - int crop_h_, crop_w_; -}; + vector offsets; + + private: + void crop_copy(const vector*>& bottom, + const vector*>& top, + const vector& offsets, + vector indices, + int cur_dim, + const Dtype* src_data, + Dtype* dest_data, + bool is_forward); + void crop_copy_gpu(const vector*>& bottom, + const vector*>& top, + const vector& offsets, + vector indices, + int cur_dim, + const Dtype* src_data, + Dtype* dest_data, + bool is_forward); +}; } // namespace caffe #endif // CAFFE_CROP_LAYER_HPP_ diff --git a/src/caffe/layers/crop_layer.cpp b/src/caffe/layers/crop_layer.cpp index 76409bd7408..82729f17c9f 100644 --- a/src/caffe/layers/crop_layer.cpp +++ b/src/caffe/layers/crop_layer.cpp @@ -1,8 +1,10 @@ #include +#include #include #include #include + #include "caffe/layer.hpp" #include "caffe/layers/crop_layer.hpp" #include "caffe/net.hpp" @@ -13,40 +15,108 @@ namespace caffe { template void CropLayer::LayerSetUp(const vector*>& bottom, const vector*>& top) { - const CropParameter& param = this->layer_param_.crop_param(); CHECK_EQ(bottom.size(), 2) << "Wrong number of bottom blobs."; - CHECK_EQ(bottom[0]->num_axes(), 4) << "Only works with 4D blobs."; - CHECK_EQ(bottom[1]->num_axes(), 4) << "Only works with 4D blobs."; - crop_h_ = param.offset_height(); - crop_w_ = param.offset_width(); + // parameter setup moved to Reshape because it depends on size. } template void CropLayer::Reshape(const vector*>& bottom, const vector*>& top) { - // Check that the image we are cropping minus the margin is bigger than the - // destination image. - CHECK_GT(bottom[0]->height()-crop_h_, bottom[1]->height()) - << "invalid offset"; - CHECK_GT(bottom[0]->width()-crop_w_, bottom[1]->width()) << "invalid offset"; - top[0]->Reshape(bottom[0]->num(), bottom[0]->channels(), bottom[1]->height(), - bottom[1]->width()); + const CropParameter& param = this->layer_param_.crop_param(); + // bottom[0] supplies the data + // bottom[1] supplies the size + int input_dim = bottom[0]->num_axes(); + CHECK_LT(param.axis(), input_dim) << "crop axis bigger than input dim"; + // initialize all offsets to 0 + offsets = vector(input_dim, 0); + // initialize new shape to bottom[0] + vector new_shape(bottom[0]->shape()); + + if (param.offset_size() > 1) { + // the number of crop values specified must be equal to the number + // of dimensions following axis + CHECK_EQ(param.axis() + param.offset_size(), input_dim) + << "number of crop values specified must be equal to the number of " + << "dimensions following axis."; + } + // apply crops + for (int i = 0; i < input_dim; ++i) { + int crop_offset = 0; + int new_size = bottom[0]->shape(i); + if (i >= param.axis() && param.offset_size() == 1) { + // if only one crop value is supplied, crop all dimensions after axis + // by this crop value + crop_offset = param.offset(0); + new_size = bottom[1]->shape(i); + } else if (i >= param.axis() && param.offset_size() > 1) { + // crop values specified must be equal to the number of dimensions + // following axis + crop_offset = param.offset(i - param.axis()); + new_size = bottom[1]->shape(i); + } + // Check that the image we are cropping minus the margin is bigger + // than the destination image. + CHECK_GE(bottom[0]->shape(i) - crop_offset, + bottom[1]->shape(i)) + << "invalid crop parameters in dimension: " << i; + // Now set new size and offsets + new_shape[i] = new_size; + offsets[i] = crop_offset; + } + top[0]->Reshape(new_shape); +} + +// recursive copy function +template +void CropLayer::crop_copy(const vector*>& bottom, + const vector*>& top, + const vector& offsets, + vector indices, + int cur_dim, + const Dtype* src_data, + Dtype* dest_data, + bool is_forward) { + if (cur_dim + 1 < top[0]->num_axes()) { + // We are not yet at the final dimension, call copy recursivley + for (int i = 0; i < top[0]->shape(cur_dim); ++i) { + indices[cur_dim] = i; + crop_copy(bottom, top, offsets, indices, cur_dim+1, + src_data, dest_data, is_forward); + } + } else { + // We are at the last dimensions, which is stored continously in memory + for (int i = 0; i < top[0]->shape(cur_dim); ++i) { + // prepare index vector reduced(red) and with offsets(off) + std::vector ind_red(cur_dim, 0); + std::vector ind_off(cur_dim+1, 0); + for (int j = 0; j < cur_dim; ++j) { + ind_red[j] = indices[j]; + ind_off[j] = indices[j] + offsets[j]; + } + ind_off[cur_dim] = offsets[cur_dim]; + // do the copy + if (is_forward) { + caffe_copy(top[0]->shape(cur_dim), + src_data + bottom[0]->offset(ind_off), + dest_data + top[0]->offset(ind_red)); + } else { + // in the backwards pass the src_data is top_diff + // and the dest_data is bottom_diff + caffe_copy(top[0]->shape(cur_dim), + src_data + top[0]->offset(ind_red), + dest_data + bottom[0]->offset(ind_off)); + } + } + } } template void CropLayer::Forward_cpu(const vector*>& bottom, const vector*>& top) { + std::vector indices(top[0]->num_axes(), 0); const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = top[0]->mutable_cpu_data(); - for (int n = 0; n < top[0]->num(); ++n) { - for (int c = 0; c < top[0]->channels(); ++c) { - for (int h = 0; h < top[0]->height(); ++h) { - caffe_copy(top[0]->width(), - bottom_data + bottom[0]->offset(n, c, crop_h_ + h, crop_w_), - top_data + top[0]->offset(n, c, h)); - } - } - } + crop_copy(bottom, top, offsets, indices, 0, bottom_data, top_data, true); } template @@ -54,17 +124,11 @@ void CropLayer::Backward_cpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom) { const Dtype* top_diff = top[0]->cpu_diff(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); + if (propagate_down[0]) { caffe_set(bottom[0]->count(), static_cast(0), bottom_diff); - for (int n = 0; n < top[0]->num(); ++n) { - for (int c = 0; c < top[0]->channels(); ++c) { - for (int h = 0; h < top[0]->height(); ++h) { - caffe_copy(top[0]->width(), - top_diff + top[0]->offset(n, c, h), - bottom_diff + bottom[0]->offset(n, c, crop_h_ + h, crop_w_)); - } - } - } + std::vector indices(top[0]->num_axes(), 0); + crop_copy(bottom, top, offsets, indices, 0, top_diff, bottom_diff, false); } } diff --git a/src/caffe/layers/crop_layer.cu b/src/caffe/layers/crop_layer.cu index 262f5fa8483..7b832c0a0dc 100644 --- a/src/caffe/layers/crop_layer.cu +++ b/src/caffe/layers/crop_layer.cu @@ -22,19 +22,90 @@ __global__ void copy_kernel(const int n, const int height, const int width, } } +// recursive copy function, this function is similar to crop_copy but loops +// over all but the last two dimensions. It is implemented this way to allow +// for ND cropping while still relying on a CUDA kernel for the innermost +// two dimensions for performance reasons. +// An alternative way to implement ND cropping relying more on the kernel +// would require passing offsets to the kernel, which is a bit problematic +// because it is of variable length. Since in the standard (N,C,W,H) case +// N,C are usually not cropped a speedup could be achieved by not looping +// the application of the copy_kernel around these dimensions. +template +void CropLayer::crop_copy_gpu(const vector*>& bottom, + const vector*>& top, + const vector& offsets, + vector indices, + int cur_dim, + const Dtype* src_data, + Dtype* dest_data, + bool is_forward) { + if (cur_dim + 2 < top[0]->num_axes()) { + // We are not yet at the final dimension, call copy recursivley + for (int i = 0; i < top[0]->shape(cur_dim); ++i) { + indices[cur_dim] = i; + crop_copy_gpu(bottom, top, offsets, indices, cur_dim+1, + src_data, dest_data, is_forward); + } + } else { + // We are at the last two dimensions, which are stored continously in memory + // With (N,C,H,W) + // (0,1,2,3) cur_dim -> H + // cur_dim+1 -> W + const int lines = top[0]->shape(cur_dim); + const int height = top[0]->shape(cur_dim); + const int width = top[0]->shape(cur_dim+1); + std::vector ind_off(cur_dim+2, 0); + for (int j = 0; j < cur_dim; ++j) { + ind_off[j] = indices[j] + offsets[j]; + } + ind_off[cur_dim] = offsets[cur_dim]; + ind_off[cur_dim+1] = offsets[cur_dim+1]; + // Compute copy strides + const int src_outer_stride = + bottom[0]->shape(cur_dim)*bottom[0]->shape(cur_dim+1); + const int src_inner_stride = bottom[0]->shape(cur_dim+1); + const int dest_outer_stride = + top[0]->shape(cur_dim)*top[0]->shape(cur_dim+1); + const int dest_inner_stride = top[0]->shape(cur_dim+1); + + if (is_forward) { + const Dtype* bottom_data = bottom[0]->gpu_data() + + bottom[0]->offset(ind_off); + Dtype* top_data = top[0]->mutable_gpu_data() + + top[0]->offset(indices); + // NOLINT_NEXT_LINE(whitespace/operators) + copy_kernel<<>>( + lines, height, width, + src_outer_stride, src_inner_stride, + dest_outer_stride, dest_inner_stride, + bottom_data, top_data); + + } else { + const Dtype* top_diff = top[0]->gpu_diff() + + top[0]->offset(indices); + Dtype* bottom_diff = bottom[0]->mutable_gpu_diff() + + bottom[0]->offset(ind_off); + // NOLINT_NEXT_LINE(whitespace/operators) + copy_kernel<<>>( + lines, height, width, + dest_outer_stride, dest_inner_stride, + src_outer_stride, src_inner_stride, + top_diff, bottom_diff); + } + } +} + template void CropLayer::Forward_gpu(const vector*>& bottom, const vector*>& top) { + std::vector indices(top[0]->num_axes(), 0); + // This works because crop_copy uses caffe_copy which calls cudaMemcpy. + // My intuition is that calling this thousands of times is probably less + // efficient than writing a custom kernel. const Dtype* bottom_data = bottom[0]->gpu_data(); Dtype* top_data = top[0]->mutable_gpu_data(); - const int lines = top[0]->count() / top[0]->width(); - - // NOLINT_NEXT_LINE(whitespace/operators) - copy_kernel<<>>( - lines, top[0]->height(), top[0]->width(), - bottom[0]->height() * bottom[0]->width(), bottom[0]->width(), - top[0]->height() * top[0]->width(), top[0]->width(), - bottom_data + bottom[0]->offset(0, 0, crop_h_, crop_w_), top_data); + crop_copy_gpu(bottom, top, offsets, indices, 0, bottom_data, top_data, true); } template @@ -42,16 +113,12 @@ void CropLayer::Backward_gpu(const vector*>& top, const vector& propagate_down, const vector*>& bottom) { const Dtype* top_diff = top[0]->gpu_diff(); Dtype* bottom_diff = bottom[0]->mutable_gpu_diff(); - const int lines = top[0]->count() / top[0]->width(); if (propagate_down[0]) { caffe_gpu_set(bottom[0]->count(), static_cast(0), bottom_diff); - // NOLINT_NEXT_LINE(whitespace/operators) - copy_kernel<<>>( - lines, top[0]->height(), top[0]->width(), - top[0]->height() * top[0]->width(), top[0]->width(), - bottom[0]->height() * bottom[0]->width(), bottom[0]->width(), - top_diff, bottom_diff + bottom[0]->offset(0, 0, crop_h_, crop_w_)); + std::vector indices(top[0]->num_axes(), 0); + crop_copy_gpu(bottom, top, offsets, indices, 0, top_diff, bottom_diff, + false); } } diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index fc27b00c827..a202c2ce1ec 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -564,11 +564,14 @@ message ConvolutionParameter { optional bool force_nd_im2col = 17 [default = false]; } -message CropParameter { - // Assumes standard dimensions: ( N,C,H,W ) - // This could possibly be extended to use "optional BlobShape offsets" - optional uint32 offset_height = 1[default = 0]; - optional uint32 offset_width = 2[default = 0]; +message CropParameter { + // If only one crop_offset is specified, all the dimensions after the specified + // axis would be cropped (by the same amount); otherwise the number of crop + // values specified must be equal to the number of dimensions following axis, and + // the trailing dimensions would be cropped accordingly. + // Protip: standard dimensions are ( N,C,H,W ) + optional uint32 axis = 1 [default = 2]; + repeated uint32 offset = 2; } message DataParameter {