Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Yet another batch normalization PR #3229

Merged
merged 2 commits into from
Oct 23, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions examples/cifar10/cifar10_full_sigmoid_solver.prototxt
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# reduce learning rate after 120 epochs (60000 iters) by factor 0f 10
# then another factor of 10 after 10 more epochs (5000 iters)

# The train/test net protocol buffer definition
net: "examples/cifar10/cifar10_full_sigmoid_train_test.prototxt"
# test_iter specifies how many forward passes the test should carry out.
# In the case of CIFAR10, we have test batch size 100 and 100 test iterations,
# covering the full 10,000 testing images.
test_iter: 10
# Carry out testing every 1000 training iterations.
test_interval: 1000
# The base learning rate, momentum and the weight decay of the network.
base_lr: 0.001
momentum: 0.9
#weight_decay: 0.004
# The learning rate policy
lr_policy: "step"
gamma: 1
stepsize: 5000
# Display every 200 iterations
display: 100
# The maximum number of iterations
max_iter: 60000
# snapshot intermediate results
snapshot: 10000
snapshot_prefix: "examples/cifar10_full_sigmoid"
# solver mode: CPU or GPU
solver_mode: GPU
28 changes: 28 additions & 0 deletions examples/cifar10/cifar10_full_sigmoid_solver_bn.prototxt
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# reduce learning rate after 120 epochs (60000 iters) by factor 0f 10
# then another factor of 10 after 10 more epochs (5000 iters)

# The train/test net protocol buffer definition
net: "examples/cifar10/cifar10_full_sigmoid_train_test_bn.prototxt"
# test_iter specifies how many forward passes the test should carry out.
# In the case of CIFAR10, we have test batch size 100 and 100 test iterations,
# covering the full 10,000 testing images.
test_iter: 10
# Carry out testing every 1000 training iterations.
test_interval: 1000
# The base learning rate, momentum and the weight decay of the network.
base_lr: 0.001
momentum: 0.9
#weight_decay: 0.004
# The learning rate policy
lr_policy: "step"
gamma: 1
stepsize: 5000
# Display every 200 iterations
display: 100
# The maximum number of iterations
max_iter: 60000
# snapshot intermediate results
snapshot: 10000
snapshot_prefix: "examples/cifar10_full_sigmoid_bn"
# solver mode: CPU or GPU
solver_mode: GPU
212 changes: 212 additions & 0 deletions examples/cifar10/cifar10_full_sigmoid_train_test.prototxt
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
name: "CIFAR10_full"
layer {
name: "cifar"
type: "Data"
top: "data"
top: "label"
include {
phase: TRAIN
}
transform_param {
mean_file: "examples/cifar10/mean.binaryproto"
}
data_param {
source: "examples/cifar10/cifar10_train_lmdb"
batch_size: 111
backend: LMDB
}
}
layer {
name: "cifar"
type: "Data"
top: "data"
top: "label"
include {
phase: TEST
}
transform_param {
mean_file: "examples/cifar10/mean.binaryproto"
}
data_param {
source: "examples/cifar10/cifar10_test_lmdb"
batch_size: 1000
backend: LMDB
}
}
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 32
pad: 2
kernel_size: 5
stride: 1
weight_filler {
type: "gaussian"
std: 0.0001
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}



layer {
name: "Sigmoid1"
type: "Sigmoid"
bottom: "pool1"
top: "Sigmoid1"
}

layer {
name: "conv2"
type: "Convolution"
bottom: "Sigmoid1"
top: "conv2"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 32
pad: 2
kernel_size: 5
stride: 1
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
}
}
}


Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Somewhat unrelated to batch normalization, but is it intentional to use conv -> pooling -> sigmoid in the first layer and conv -> sigmoid -> pooling in the second layer?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not. Intention was to reduce memory usage by conv -> pooling -> sigmoid, but missed it in 2nd layer.

layer {
name: "Sigmoid2"
type: "Sigmoid"
bottom: "conv2"
top: "Sigmoid2"
}
layer {
name: "pool2"
type: "Pooling"
bottom: "Sigmoid2"
top: "pool2"
pooling_param {
pool: AVE
kernel_size: 3
stride: 2
}
}
layer {
name: "conv3"
type: "Convolution"
bottom: "pool2"
top: "conv3"
convolution_param {
num_output: 64
pad: 2
kernel_size: 5
stride: 1
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
}
}
param {
lr_mult: 1
}
param {
lr_mult: 1
}

}

layer {
name: "Sigmoid3"
type: "Sigmoid"
bottom: "conv3"
top: "Sigmoid3"
}

layer {
name: "pool3"
type: "Pooling"
bottom: "Sigmoid3"
top: "pool3"
pooling_param {
pool: AVE
kernel_size: 3
stride: 2
}
}

layer {
name: "ip1"
type: "InnerProduct"
bottom: "pool3"
top: "ip1"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 10
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "accuracy"
type: "Accuracy"
bottom: "ip1"
bottom: "label"
top: "accuracy"
include {
phase: TEST
}
}
layer {
name: "loss"
type: "SoftmaxWithLoss"
bottom: "ip1"
bottom: "label"
top: "loss"
}
Loading