From 41685ac81b6bab42c4fa47cc6c8faa203a4d0c54 Mon Sep 17 00:00:00 2001 From: Jeff Donahue Date: Sun, 8 Jun 2014 19:53:45 -0700 Subject: [PATCH 1/3] weight sharing --- include/caffe/net.hpp | 10 +- src/caffe/net.cpp | 111 ++++++++- src/caffe/proto/caffe.proto | 12 + src/caffe/test/test_net.cpp | 469 ++++++++++++++++++++++++++++++++++++ 4 files changed, 594 insertions(+), 8 deletions(-) diff --git a/include/caffe/net.hpp b/include/caffe/net.hpp index d05ca09f20a..aa540edf27e 100644 --- a/include/caffe/net.hpp +++ b/include/caffe/net.hpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include "caffe/blob.hpp" @@ -14,9 +15,10 @@ #include "caffe/proto/caffe.pb.h" using std::map; -using std::vector; +using std::pair; using std::set; using std::string; +using std::vector; namespace caffe { @@ -103,6 +105,7 @@ class Net { const shared_ptr > blob_by_name(const string& blob_name); bool has_layer(const string& layer_name); const shared_ptr > layer_by_name(const string& layer_name); + const map& param_names_index() { return param_names_index_; } protected: // Helpers for Init. @@ -114,6 +117,8 @@ class Net { int AppendBottom(const NetParameter& param, const int layer_id, const int bottom_id, set* available_blobs, map* blob_name_to_idx); + void AppendParam(const NetParameter& param, const int layer_id, + const int param_id); // Function to get misc parameters, e.g. the learning rate multiplier and // weight decay. void GetLearningRateAndWeightDecay(); @@ -138,6 +143,9 @@ class Net { // top_vecs stores the vectors containing the output for each layer vector*> > top_vecs_; vector > top_id_vecs_; + vector param_owners_; + vector > param_net_indices_; + map param_names_index_; // blob indices for the input and the output of the net vector net_input_blob_indices_; vector net_output_blob_indices_; diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp index a6537619a8b..fc532b7b376 100644 --- a/src/caffe/net.cpp +++ b/src/caffe/net.cpp @@ -11,10 +11,12 @@ #include "caffe/net.hpp" #include "caffe/util/io.hpp" #include "caffe/util/insert_splits.hpp" +#include "caffe/util/math_functions.hpp" #include "caffe/util/upgrade_proto.hpp" -using std::pair; +using std::make_pair; using std::map; +using std::pair; using std::set; namespace caffe { @@ -86,8 +88,9 @@ void Net::Init(const NetParameter& in_param) { } DLOG(INFO) << "Memory required for data: " << memory_used_ * sizeof(Dtype); const int blobs_lr_size = layers_[layer_id]->layer_param().blobs_lr_size(); - CHECK(blobs_lr_size == layers_[layer_id]->blobs().size() || - blobs_lr_size == 0) << "Incorrect blobs lr size: should be either 0 " + const int num_param_blobs = layers_[layer_id]->blobs().size(); + CHECK(blobs_lr_size == num_param_blobs || blobs_lr_size == 0) + << "Incorrect blobs lr size: should be either 0 " << "or the same as the number of the layer's parameter blobs."; if (blobs_lr_size) { // Check if this layer needs backward operation itself @@ -100,6 +103,17 @@ void Net::Init(const NetParameter& in_param) { // learning rate to be 1. Thus we will need to perform backward. need_backward = true; } + const int blob_name_size = layer_param.blob_name_size(); + CHECK(blob_name_size == num_param_blobs || blob_name_size == 0) + << "Incorrect blob_name size: should be either 0 or the same as " + "the number of the layer's parameter blobs: " << num_param_blobs; + const int blob_share_mode_size = layer_param.blob_share_mode_size(); + CHECK(blob_share_mode_size == num_param_blobs || blob_share_mode_size == 0) + << "Incorrect blob_share_mode size: should be either 0 or the same as " + "the number of the layer's parameter blobs: " << num_param_blobs; + for (int param_id = 0; param_id < num_param_blobs; ++param_id) { + AppendParam(param, layer_id, param_id); + } // Finally, set the backward flag layer_need_backward_.push_back(need_backward); if (need_backward) { @@ -217,14 +231,69 @@ int Net::AppendBottom(const NetParameter& param, return blob_id; } +template +void Net::AppendParam(const NetParameter& param, const int layer_id, + const int param_id) { + const LayerParameter& layer_param = layers_[layer_id]->layer_param(); + const int blob_name_size = layer_param.blob_name_size(); + string param_name; + if (blob_name_size) { + param_name = layer_param.blob_name(param_id); + } + const int net_param_id = params_.size(); + params_.push_back(layers_[layer_id]->blobs()[param_id]); + param_net_indices_.push_back(make_pair(layer_id, param_id)); + if (!blob_name_size || !param_name.size() || (param_name.size() && + param_names_index_.find(param_name) == param_names_index_.end())) { + // This layer "owns" this parameter blob -- it is either anonymous + // (i.e., not given a param_name) or explicitly given a name that we + // haven't already seen. + param_owners_.push_back(-1); + if (blob_name_size) { + param_names_index_[param_name] = net_param_id; + } + } else { + // Named param blob with name we've seen before: share params + const int owner_net_param_id = param_names_index_[param_name]; + param_owners_.push_back(owner_net_param_id); + const pair& owner_index = + param_net_indices_[owner_net_param_id]; + const int owner_layer_id = owner_index.first; + const int owner_param_id = owner_index.second; + LOG(INFO) << "Sharing parameters '" << param_name << "' owned by " + << "layer '" << layer_names_[owner_layer_id] << "', param " + << "index " << owner_param_id; + Blob* this_blob = layers_[layer_id]->blobs()[param_id].get(); + Blob* owner_blob = + layers_[owner_layer_id]->blobs()[owner_param_id].get(); + const int blob_share_mode_size = layer_param.blob_share_mode_size(); + if (blob_share_mode_size > param_id && + (layer_param.blob_share_mode(param_id) == + LayerParameter_DimCheckMode_PERMISSIVE)) { + // Permissive dimension checking -- only check counts are the same. + CHECK_EQ(this_blob->count(), owner_blob->count()) + << "Shared parameter blobs must have the same count."; + } else { + // Strict dimension checking -- all dims must be the same. + CHECK_EQ(this_blob->num(), owner_blob->num()) + << "Shared parameter blobs must have the same num."; + CHECK_EQ(this_blob->channels(), owner_blob->channels()) + << "Shared parameter blobs must have the same channels."; + CHECK_EQ(this_blob->height(), owner_blob->height()) + << "Shared parameter blobs must have the same height."; + CHECK_EQ(this_blob->width(), owner_blob->width()) + << "Shared parameter blobs must have the same width."; + } + layers_[layer_id]->blobs()[param_id]->ShareData( + *layers_[owner_layer_id]->blobs()[owner_param_id]); + } +} + template void Net::GetLearningRateAndWeightDecay() { LOG(INFO) << "Collecting Learning Rate and Weight Decay."; for (int i = 0; i < layers_.size(); ++i) { vector > >& layer_blobs = layers_[i]->blobs(); - for (int j = 0; j < layer_blobs.size(); ++j) { - params_.push_back(layer_blobs[j]); - } // push the learning rate mutlipliers if (layers_[i]->layer_param().blobs_lr_size()) { CHECK_EQ(layers_[i]->layer_param().blobs_lr_size(), layer_blobs.size()); @@ -403,8 +472,36 @@ void Net::ToProto(NetParameter* param, bool write_diff) { template void Net::Update() { + // First, accumulate the diffs of any shared parameters into their owner's + // diff. (Assumes that the learning rate, weight decay, etc. have already been + // accounted for in the current diff.) for (int i = 0; i < params_.size(); ++i) { - params_[i]->Update(); + if (param_owners_[i] < 0) { + continue; + } + const int count = params_[i]->count(); + const Dtype* this_diff; + Dtype* owner_diff; + switch (Caffe::mode()) { + case Caffe::CPU: + this_diff = params_[i]->cpu_diff(); + owner_diff = params_[param_owners_[i]]->mutable_cpu_diff(); + caffe_add(count, this_diff, owner_diff, owner_diff); + break; + case Caffe::GPU: + this_diff = params_[i]->gpu_diff(); + owner_diff = params_[param_owners_[i]]->mutable_gpu_diff(); + caffe_gpu_add(count, this_diff, owner_diff, owner_diff); + break; + default: + LOG(FATAL) << "Unknown caffe mode: " << Caffe::mode(); + } + } + // Now, update the owned parameters. + for (int i = 0; i < params_.size(); ++i) { + if (param_owners_[i] < 0) { + params_[i]->Update(); + } } } diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index 954f1033e97..76b643d5ff4 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -170,6 +170,18 @@ message LayerParameter { // The blobs containing the numeric parameters of the layer repeated BlobProto blobs = 6; + // The names of the parameter blobs -- useful for sharing parameters among + // layers (but never required). + repeated string blob_name = 1001; + // Whether to require shared weights to have the same shape, or just the same + // count -- defaults to STRICT if unspecified. + repeated DimCheckMode blob_share_mode = 1002; + enum DimCheckMode { + // STRICT (default) requires that num, channels, height, width each match. + STRICT = 0; + // PERMISSIVE requires only the count (num*channels*height*width) to match. + PERMISSIVE = 1; + } // The ratio that is multiplied on the global learning rate. If you want to // set the learning ratio for one blob, you need to set it for all blobs. repeated float blobs_lr = 7; diff --git a/src/caffe/test/test_net.cpp b/src/caffe/test/test_net.cpp index 8fb02fcb7a7..1eb7b077cdf 100644 --- a/src/caffe/test/test_net.cpp +++ b/src/caffe/test/test_net.cpp @@ -1,6 +1,7 @@ // Copyright 2014 BVLC and contributors. #include +#include #include #include "google/protobuf/text_format.h" @@ -8,6 +9,7 @@ #include "gtest/gtest.h" #include "caffe/common.hpp" #include "caffe/net.hpp" +#include "caffe/util/math_functions.hpp" #include "caffe/test/test_gradient_check_util.hpp" #include "caffe/test/test_caffe_main.hpp" @@ -17,6 +19,8 @@ namespace caffe { template class NetTest : public ::testing::Test { protected: + NetTest() : seed_(1701) {} + virtual void InitNetFromProtoString(const string& proto) { NetParameter param; CHECK(google::protobuf::TextFormat::ParseFromString(proto, ¶m)); @@ -210,6 +214,245 @@ class NetTest : public ::testing::Test { InitNetFromProtoString(proto); } + virtual void InitUnsharedWeightsNet() { + const string& proto = + "name: 'UnsharedWeightsNetwork' " + "layers: { " + " name: 'data' " + " type: DUMMY_DATA " + " dummy_data_param { " + " num: 5 " + " channels: 2 " + " height: 3 " + " width: 4 " + " data_filler { " + " type: 'gaussian' " + " std: 0.01 " + " } " + " } " + " top: 'data' " + "} " + "layers: { " + " name: 'innerproduct1' " + " type: INNER_PRODUCT " + " inner_product_param { " + " num_output: 10 " + " bias_term: false " + " weight_filler { " + " type: 'gaussian' " + " std: 10 " + " } " + " } " + " blob_name: 'unsharedweights1' " + " bottom: 'data' " + " top: 'innerproduct1' " + "} " + "layers: { " + " name: 'innerproduct2' " + " type: INNER_PRODUCT " + " inner_product_param { " + " num_output: 10 " + " bias_term: false " + " weight_filler { " + " type: 'gaussian' " + " std: 10 " + " } " + " } " + " blob_name: 'unsharedweights2' " + " bottom: 'data' " + " top: 'innerproduct2' " + "} " + "layers: { " + " name: 'loss' " + " type: EUCLIDEAN_LOSS " + " bottom: 'innerproduct1' " + " bottom: 'innerproduct2' " + "} "; + InitNetFromProtoString(proto); + } + + virtual void InitSharedWeightsNet() { + const string& proto = + "name: 'SharedWeightsNetwork' " + "layers: { " + " name: 'data' " + " type: DUMMY_DATA " + " dummy_data_param { " + " num: 5 " + " channels: 2 " + " height: 3 " + " width: 4 " + " data_filler { " + " type: 'gaussian' " + " std: 0.01 " + " } " + " } " + " top: 'data' " + "} " + "layers: { " + " name: 'innerproduct1' " + " type: INNER_PRODUCT " + " inner_product_param { " + " num_output: 10 " + " bias_term: false " + " weight_filler { " + " type: 'gaussian' " + " std: 10 " + " } " + " } " + " blob_name: 'sharedweights' " + " bottom: 'data' " + " top: 'innerproduct1' " + "} " + "layers: { " + " name: 'innerproduct2' " + " type: INNER_PRODUCT " + " inner_product_param { " + " num_output: 10 " + " bias_term: false " + " weight_filler { " + " type: 'gaussian' " + " std: 10 " + " } " + " } " + " blob_name: 'sharedweights' " + " bottom: 'data' " + " top: 'innerproduct2' " + "} " + "layers: { " + " name: 'loss' " + " type: EUCLIDEAN_LOSS " + " bottom: 'innerproduct1' " + " bottom: 'innerproduct2' " + "} "; + InitNetFromProtoString(proto); + } + + virtual void InitDiffDataUnsharedWeightsNet() { + const string& proto = + "name: 'DiffDataUnsharedWeightsNetwork' " + "layers: { " + " name: 'data' " + " type: DUMMY_DATA " + " dummy_data_param { " + " num: 10 " + " channels: 10 " + " height: 1 " + " width: 1 " + " num: 10 " + " channels: 10 " + " height: 1 " + " width: 1 " + " data_filler { " + " type: 'gaussian' " + " std: 10 " + " } " + " } " + " top: 'data1' " + " top: 'data2' " + "} " + "layers: { " + " name: 'innerproduct1' " + " type: INNER_PRODUCT " + " inner_product_param { " + " num_output: 10 " + " bias_term: false " + " weight_filler { " + " type: 'constant' " + " value: 0.5 " + " } " + " } " + " blob_name: 'unsharedweights1' " + " bottom: 'data1' " + " top: 'innerproduct1' " + "} " + "layers: { " + " name: 'innerproduct2' " + " type: INNER_PRODUCT " + " inner_product_param { " + " num_output: 10 " + " bias_term: false " + " weight_filler { " + " type: 'constant' " + " value: 0.5 " + " } " + " } " + " blob_name: 'unsharedweights2' " + " bottom: 'innerproduct1' " + " top: 'innerproduct2' " + "} " + "layers: { " + " name: 'loss' " + " type: EUCLIDEAN_LOSS " + " bottom: 'data2' " + " bottom: 'innerproduct2' " + "} "; + InitNetFromProtoString(proto); + } + + virtual void InitDiffDataSharedWeightsNet() { + const string& proto = + "name: 'DiffDataSharedWeightsNetwork' " + "layers: { " + " name: 'data' " + " type: DUMMY_DATA " + " dummy_data_param { " + " num: 10 " + " channels: 10 " + " height: 1 " + " width: 1 " + " num: 10 " + " channels: 10 " + " height: 1 " + " width: 1 " + " data_filler { " + " type: 'gaussian' " + " std: 10 " + " } " + " } " + " top: 'data1' " + " top: 'data2' " + "} " + "layers: { " + " name: 'innerproduct1' " + " type: INNER_PRODUCT " + " inner_product_param { " + " num_output: 10 " + " bias_term: false " + " weight_filler { " + " type: 'constant' " + " value: 0.5 " + " } " + " } " + " blob_name: 'sharedweights' " + " bottom: 'data1' " + " top: 'innerproduct1' " + "} " + "layers: { " + " name: 'innerproduct2' " + " type: INNER_PRODUCT " + " inner_product_param { " + " num_output: 10 " + " bias_term: false " + " weight_filler { " + " type: 'constant' " + " value: 0.5 " + " } " + " } " + " blob_name: 'sharedweights' " + " bottom: 'innerproduct1' " + " top: 'innerproduct2' " + "} " + "layers: { " + " name: 'loss' " + " type: EUCLIDEAN_LOSS " + " bottom: 'data2' " + " bottom: 'innerproduct2' " + "} "; + InitNetFromProtoString(proto); + } + + int seed_; shared_ptr > net_; }; @@ -309,4 +552,230 @@ TYPED_TEST(NetTest, TestBottomNeedBackwardTricky) { EXPECT_EQ(true, bottom_need_backward[3][1]); } +TYPED_TEST(NetTest, TestUnsharedWeightsDataNet) { + this->InitUnsharedWeightsNet(); + vector*> bottom; + TypeParam loss; + this->net_->Forward(bottom, &loss); + EXPECT_GT(loss, 0); +} + +TYPED_TEST(NetTest, TestSharedWeightsDataNet) { + this->InitSharedWeightsNet(); + vector*> bottom; + TypeParam loss; + this->net_->Forward(bottom, &loss); + EXPECT_FLOAT_EQ(loss, 0); +} + +TYPED_TEST(NetTest, TestUnsharedWeightsDiffNet) { + this->InitUnsharedWeightsNet(); + vector*> bottom; + Net* net = this->net_.get(); + net->Forward(bottom); + net->Backward(); + Layer* ip1_layer = net->layer_by_name("innerproduct1").get(); + Layer* ip2_layer = net->layer_by_name("innerproduct2").get(); + const int count = ip1_layer->blobs()[0]->count(); + const TypeParam* grad1 = ip1_layer->blobs()[0]->cpu_diff(); + const TypeParam* grad2 = ip2_layer->blobs()[0]->cpu_diff(); + for (int i = 0; i < count; ++i) { + EXPECT_GT(fabs(grad1[i]), 0); + EXPECT_FLOAT_EQ(-1 * grad1[i], grad2[i]); + } +} + +TYPED_TEST(NetTest, TestSharedWeightsDiffNet) { + this->InitSharedWeightsNet(); + vector*> bottom; + Net* net = this->net_.get(); + TypeParam loss; + net->Forward(bottom, &loss); + net->Backward(); + EXPECT_FLOAT_EQ(loss, 0); + Layer* ip1_layer = net->layer_by_name("innerproduct1").get(); + Layer* ip2_layer = net->layer_by_name("innerproduct2").get(); + const int count = ip1_layer->blobs()[0]->count(); + const TypeParam* grad1 = ip1_layer->blobs()[0]->cpu_diff(); + const TypeParam* grad2 = ip2_layer->blobs()[0]->cpu_diff(); + for (int i = 0; i < count; ++i) { + EXPECT_FLOAT_EQ(0, grad1[i]); + EXPECT_FLOAT_EQ(0, grad2[i]); + } +} + +TYPED_TEST(NetTest, TestSharedWeightsUpdateCPU) { + Caffe::set_random_seed(this->seed_); + Caffe::set_mode(Caffe::CPU); + this->InitDiffDataSharedWeightsNet(); + vector*> bottom; + EXPECT_EQ(this->net_->layer_names()[1], "innerproduct1"); + EXPECT_EQ(this->net_->layer_names()[2], "innerproduct2"); + Blob* ip1_weights = this->net_->layers()[1]->blobs()[0].get(); + Blob* ip2_weights = this->net_->layers()[2]->blobs()[0].get(); + // Check that data blobs of shared weights share the same location in memory. + EXPECT_EQ(ip1_weights->cpu_data(), ip2_weights->cpu_data()); + // Check that diff blobs of shared weights are at different locations in + // locations. (The diffs should be accumulated at update time.) + EXPECT_NE(ip1_weights->cpu_diff(), ip2_weights->cpu_diff()); + this->net_->Forward(bottom); + this->net_->Backward(); + // Compute the expected update as the data minus the two diffs. + Blob shared_params; + const bool reshape = true; + const bool copy_diff = false; + shared_params.CopyFrom(*ip1_weights, copy_diff, reshape); + shared_params.CopyFrom(*ip1_weights, !copy_diff, reshape); + const int count = ip1_weights->count(); + // Make sure the diffs are non-trivial. + for (int i = 0; i < count; ++i) { + EXPECT_NE(0, ip1_weights->cpu_diff()[i]); + EXPECT_NE(0, ip2_weights->cpu_diff()[i]); + EXPECT_NE(ip1_weights->cpu_diff()[i], ip2_weights->cpu_diff()[i]); + } + caffe_axpy(count, TypeParam(1), ip2_weights->cpu_diff(), + shared_params.mutable_cpu_diff()); + caffe_axpy(count, TypeParam(-1), shared_params.cpu_diff(), + shared_params.mutable_cpu_data()); + const TypeParam* expected_updated_params = shared_params.cpu_data(); + this->net_->Update(); + const TypeParam* actual_updated_params = ip1_weights->cpu_data(); + for (int i = 0; i < count; ++i) { + EXPECT_EQ(expected_updated_params[i], actual_updated_params[i]); + } + // Check that data blobs of shared weights STILL point to the same memory + // location (because ... who knows). + EXPECT_EQ(ip1_weights->cpu_data(), ip2_weights->cpu_data()); + + Caffe::set_random_seed(this->seed_); + this->InitDiffDataUnsharedWeightsNet(); + EXPECT_EQ(this->net_->layer_names()[1], "innerproduct1"); + EXPECT_EQ(this->net_->layer_names()[2], "innerproduct2"); + ip1_weights = this->net_->layers()[1]->blobs()[0].get(); + ip2_weights = this->net_->layers()[2]->blobs()[0].get(); + // Check that data and diff blobs of unshared weights are at different + // locations in memory. + EXPECT_NE(ip1_weights->cpu_data(), ip2_weights->cpu_data()); + EXPECT_NE(ip1_weights->cpu_diff(), ip2_weights->cpu_diff()); + this->net_->Forward(bottom); + this->net_->Backward(); + // Compute the expected update. + Blob unshared_params1; + unshared_params1.CopyFrom(*ip1_weights, copy_diff, reshape); + unshared_params1.CopyFrom(*ip1_weights, !copy_diff, reshape); + Blob unshared_params2; + unshared_params2.CopyFrom(*ip2_weights, copy_diff, reshape); + unshared_params2.CopyFrom(*ip2_weights, !copy_diff, reshape); + // Make sure the diffs are non-trivial and sum to the diff in the shared net. + for (int i = 0; i < count; ++i) { + EXPECT_NE(0, ip1_weights->cpu_diff()[i]); + EXPECT_NE(0, ip2_weights->cpu_diff()[i]); + EXPECT_NE(ip1_weights->cpu_diff()[i], ip2_weights->cpu_diff()[i]); + EXPECT_EQ(ip1_weights->cpu_diff()[i] + ip2_weights->cpu_diff()[i], + shared_params.cpu_diff()[i]); + } + caffe_axpy(count, TypeParam(-1), ip1_weights->cpu_diff(), + unshared_params1.mutable_cpu_data()); + caffe_axpy(count, TypeParam(-1), ip2_weights->cpu_diff(), + unshared_params2.mutable_cpu_data()); + const TypeParam* expected_updated_params1 = unshared_params1.cpu_data(); + const TypeParam* expected_updated_params2 = unshared_params2.cpu_data(); + this->net_->Update(); + const TypeParam* actual_updated_params1 = ip1_weights->cpu_data(); + const TypeParam* actual_updated_params2 = ip2_weights->cpu_data(); + for (int i = 0; i < count; ++i) { + EXPECT_EQ(expected_updated_params1[i], actual_updated_params1[i]); + EXPECT_EQ(expected_updated_params2[i], actual_updated_params2[i]); + EXPECT_NE(actual_updated_params1[i], actual_updated_params2[i]); + EXPECT_NE(expected_updated_params, expected_updated_params1); + } +} + +TYPED_TEST(NetTest, TestSharedWeightsUpdateGPU) { + Caffe::set_random_seed(this->seed_); + Caffe::set_mode(Caffe::GPU); + this->InitDiffDataSharedWeightsNet(); + vector*> bottom; + EXPECT_EQ(this->net_->layer_names()[1], "innerproduct1"); + EXPECT_EQ(this->net_->layer_names()[2], "innerproduct2"); + Blob* ip1_weights = this->net_->layers()[1]->blobs()[0].get(); + Blob* ip2_weights = this->net_->layers()[2]->blobs()[0].get(); + // Check that data blobs of shared weights share the same location in memory. + EXPECT_EQ(ip1_weights->cpu_data(), ip2_weights->cpu_data()); + // Check that diff blobs of shared weights are at different locations in + // locations. (The diffs should be accumulated at update time.) + EXPECT_NE(ip1_weights->cpu_diff(), ip2_weights->cpu_diff()); + this->net_->Forward(bottom); + this->net_->Backward(); + // Compute the expected update as the data minus the two diffs. + Blob shared_params; + const bool reshape = true; + const bool copy_diff = false; + shared_params.CopyFrom(*ip1_weights, copy_diff, reshape); + shared_params.CopyFrom(*ip1_weights, !copy_diff, reshape); + const int count = ip1_weights->count(); + // Make sure the diffs are non-trivial. + for (int i = 0; i < count; ++i) { + EXPECT_NE(0, ip1_weights->cpu_diff()[i]); + EXPECT_NE(0, ip2_weights->cpu_diff()[i]); + EXPECT_NE(ip1_weights->cpu_diff()[i], ip2_weights->cpu_diff()[i]); + } + caffe_axpy(count, TypeParam(1), ip2_weights->cpu_diff(), + shared_params.mutable_cpu_diff()); + caffe_axpy(count, TypeParam(-1), shared_params.cpu_diff(), + shared_params.mutable_cpu_data()); + const TypeParam* expected_updated_params = shared_params.cpu_data(); + this->net_->Update(); + const TypeParam* actual_updated_params = ip1_weights->cpu_data(); + for (int i = 0; i < count; ++i) { + EXPECT_EQ(expected_updated_params[i], actual_updated_params[i]); + } + // Check that data blobs of shared weights STILL point to the same memory + // location (because ... who knows). + EXPECT_EQ(ip1_weights->cpu_data(), ip2_weights->cpu_data()); + + Caffe::set_random_seed(this->seed_); + this->InitDiffDataUnsharedWeightsNet(); + EXPECT_EQ(this->net_->layer_names()[1], "innerproduct1"); + EXPECT_EQ(this->net_->layer_names()[2], "innerproduct2"); + ip1_weights = this->net_->layers()[1]->blobs()[0].get(); + ip2_weights = this->net_->layers()[2]->blobs()[0].get(); + // Check that data and diff blobs of unshared weights are at different + // locations in memory. + EXPECT_NE(ip1_weights->cpu_data(), ip2_weights->cpu_data()); + EXPECT_NE(ip1_weights->cpu_diff(), ip2_weights->cpu_diff()); + this->net_->Forward(bottom); + this->net_->Backward(); + // Compute the expected update. + Blob unshared_params1; + unshared_params1.CopyFrom(*ip1_weights, copy_diff, reshape); + unshared_params1.CopyFrom(*ip1_weights, !copy_diff, reshape); + Blob unshared_params2; + unshared_params2.CopyFrom(*ip2_weights, copy_diff, reshape); + unshared_params2.CopyFrom(*ip2_weights, !copy_diff, reshape); + // Make sure the diffs are non-trivial and sum to the diff in the shared net. + for (int i = 0; i < count; ++i) { + EXPECT_NE(0, ip1_weights->cpu_diff()[i]); + EXPECT_NE(0, ip2_weights->cpu_diff()[i]); + EXPECT_NE(ip1_weights->cpu_diff()[i], ip2_weights->cpu_diff()[i]); + EXPECT_EQ(ip1_weights->cpu_diff()[i] + ip2_weights->cpu_diff()[i], + shared_params.cpu_diff()[i]); + } + caffe_axpy(count, TypeParam(-1), ip1_weights->cpu_diff(), + unshared_params1.mutable_cpu_data()); + caffe_axpy(count, TypeParam(-1), ip2_weights->cpu_diff(), + unshared_params2.mutable_cpu_data()); + const TypeParam* expected_updated_params1 = unshared_params1.cpu_data(); + const TypeParam* expected_updated_params2 = unshared_params2.cpu_data(); + this->net_->Update(); + const TypeParam* actual_updated_params1 = ip1_weights->cpu_data(); + const TypeParam* actual_updated_params2 = ip2_weights->cpu_data(); + for (int i = 0; i < count; ++i) { + EXPECT_EQ(expected_updated_params1[i], actual_updated_params1[i]); + EXPECT_EQ(expected_updated_params2[i], actual_updated_params2[i]); + EXPECT_NE(actual_updated_params1[i], actual_updated_params2[i]); + EXPECT_NE(expected_updated_params, expected_updated_params1); + } +} + } // namespace caffe From 26e022acedcb53b2c45147b635fc9081b66396af Mon Sep 17 00:00:00 2001 From: Evan Shelhamer Date: Thu, 26 Jun 2014 12:49:00 -0700 Subject: [PATCH 2/3] change weight blob field name to param --- src/caffe/net.cpp | 16 ++++++++-------- src/caffe/proto/caffe.proto | 6 +++--- src/caffe/test/test_net.cpp | 16 ++++++++-------- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp index fc532b7b376..e25c754bda6 100644 --- a/src/caffe/net.cpp +++ b/src/caffe/net.cpp @@ -103,9 +103,9 @@ void Net::Init(const NetParameter& in_param) { // learning rate to be 1. Thus we will need to perform backward. need_backward = true; } - const int blob_name_size = layer_param.blob_name_size(); - CHECK(blob_name_size == num_param_blobs || blob_name_size == 0) - << "Incorrect blob_name size: should be either 0 or the same as " + const int param_size = layer_param.param_size(); + CHECK(param_size == num_param_blobs || param_size == 0) + << "Incorrect param size: should be either 0 or the same as " "the number of the layer's parameter blobs: " << num_param_blobs; const int blob_share_mode_size = layer_param.blob_share_mode_size(); CHECK(blob_share_mode_size == num_param_blobs || blob_share_mode_size == 0) @@ -235,21 +235,21 @@ template void Net::AppendParam(const NetParameter& param, const int layer_id, const int param_id) { const LayerParameter& layer_param = layers_[layer_id]->layer_param(); - const int blob_name_size = layer_param.blob_name_size(); + const int param_size = layer_param.param_size(); string param_name; - if (blob_name_size) { - param_name = layer_param.blob_name(param_id); + if (param_size) { + param_name = layer_param.param(param_id); } const int net_param_id = params_.size(); params_.push_back(layers_[layer_id]->blobs()[param_id]); param_net_indices_.push_back(make_pair(layer_id, param_id)); - if (!blob_name_size || !param_name.size() || (param_name.size() && + if (!param_size || !param_name.size() || (param_name.size() && param_names_index_.find(param_name) == param_names_index_.end())) { // This layer "owns" this parameter blob -- it is either anonymous // (i.e., not given a param_name) or explicitly given a name that we // haven't already seen. param_owners_.push_back(-1); - if (blob_name_size) { + if (param_size) { param_names_index_[param_name] = net_param_id; } } else { diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index 76b643d5ff4..fd71f45fab4 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -172,7 +172,7 @@ message LayerParameter { repeated BlobProto blobs = 6; // The names of the parameter blobs -- useful for sharing parameters among // layers (but never required). - repeated string blob_name = 1001; + repeated string param = 1001; // Whether to require shared weights to have the same shape, or just the same // count -- defaults to STRICT if unspecified. repeated DimCheckMode blob_share_mode = 1002; @@ -319,9 +319,9 @@ message HDF5OutputParameter { } message HingeLossParameter { - enum Norm { + enum Norm { L1 = 1; - L2 = 2; + L2 = 2; } // Specify the Norm to use L1 or L2 optional Norm norm = 1 [default = L1]; diff --git a/src/caffe/test/test_net.cpp b/src/caffe/test/test_net.cpp index 1eb7b077cdf..18b03476996 100644 --- a/src/caffe/test/test_net.cpp +++ b/src/caffe/test/test_net.cpp @@ -243,7 +243,7 @@ class NetTest : public ::testing::Test { " std: 10 " " } " " } " - " blob_name: 'unsharedweights1' " + " param: 'unsharedweights1' " " bottom: 'data' " " top: 'innerproduct1' " "} " @@ -258,7 +258,7 @@ class NetTest : public ::testing::Test { " std: 10 " " } " " } " - " blob_name: 'unsharedweights2' " + " param: 'unsharedweights2' " " bottom: 'data' " " top: 'innerproduct2' " "} " @@ -300,7 +300,7 @@ class NetTest : public ::testing::Test { " std: 10 " " } " " } " - " blob_name: 'sharedweights' " + " param: 'sharedweights' " " bottom: 'data' " " top: 'innerproduct1' " "} " @@ -315,7 +315,7 @@ class NetTest : public ::testing::Test { " std: 10 " " } " " } " - " blob_name: 'sharedweights' " + " param: 'sharedweights' " " bottom: 'data' " " top: 'innerproduct2' " "} " @@ -362,7 +362,7 @@ class NetTest : public ::testing::Test { " value: 0.5 " " } " " } " - " blob_name: 'unsharedweights1' " + " param: 'unsharedweights1' " " bottom: 'data1' " " top: 'innerproduct1' " "} " @@ -377,7 +377,7 @@ class NetTest : public ::testing::Test { " value: 0.5 " " } " " } " - " blob_name: 'unsharedweights2' " + " param: 'unsharedweights2' " " bottom: 'innerproduct1' " " top: 'innerproduct2' " "} " @@ -424,7 +424,7 @@ class NetTest : public ::testing::Test { " value: 0.5 " " } " " } " - " blob_name: 'sharedweights' " + " param: 'sharedweights' " " bottom: 'data1' " " top: 'innerproduct1' " "} " @@ -439,7 +439,7 @@ class NetTest : public ::testing::Test { " value: 0.5 " " } " " } " - " blob_name: 'sharedweights' " + " param: 'sharedweights' " " bottom: 'innerproduct1' " " top: 'innerproduct2' " "} " From a71354f87d9b4cf205180e5339b958fc684e8a51 Mon Sep 17 00:00:00 2001 From: Evan Shelhamer Date: Thu, 26 Jun 2014 13:05:44 -0700 Subject: [PATCH 3/3] rename layer -> param mapping for clarity --- include/caffe/net.hpp | 2 +- src/caffe/net.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/caffe/net.hpp b/include/caffe/net.hpp index aa540edf27e..cbd5becde2d 100644 --- a/include/caffe/net.hpp +++ b/include/caffe/net.hpp @@ -144,7 +144,7 @@ class Net { vector*> > top_vecs_; vector > top_id_vecs_; vector param_owners_; - vector > param_net_indices_; + vector > layer_param_indices_; map param_names_index_; // blob indices for the input and the output of the net vector net_input_blob_indices_; diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp index e25c754bda6..f364e6767c6 100644 --- a/src/caffe/net.cpp +++ b/src/caffe/net.cpp @@ -242,7 +242,7 @@ void Net::AppendParam(const NetParameter& param, const int layer_id, } const int net_param_id = params_.size(); params_.push_back(layers_[layer_id]->blobs()[param_id]); - param_net_indices_.push_back(make_pair(layer_id, param_id)); + layer_param_indices_.push_back(make_pair(layer_id, param_id)); if (!param_size || !param_name.size() || (param_name.size() && param_names_index_.find(param_name) == param_names_index_.end())) { // This layer "owns" this parameter blob -- it is either anonymous @@ -257,7 +257,7 @@ void Net::AppendParam(const NetParameter& param, const int layer_id, const int owner_net_param_id = param_names_index_[param_name]; param_owners_.push_back(owner_net_param_id); const pair& owner_index = - param_net_indices_[owner_net_param_id]; + layer_param_indices_[owner_net_param_id]; const int owner_layer_id = owner_index.first; const int owner_param_id = owner_index.second; LOG(INFO) << "Sharing parameters '" << param_name << "' owned by "