Merge pull request #31 from antinucleon/master

Conv Op
apache · Aug 25, 2015 · c057b56 · c057b56
2 parents 7f93905 + 1045c18
commit c057b56
Show file tree

Hide file tree

Showing 14 changed files with 549 additions and 483 deletions.
diff --git a/Makefile b/Makefile
@@ -64,14 +64,14 @@ endif
 #BIN = test/test_threaded_engine test/api_registry_test
 OBJ = narray_function_cpu.o
 # add threaded engine after it is done
-OBJCXX11 = engine.o narray.o c_api.o operator.o symbol.o storage.o fully_connected_cpu.o static_graph.o activation_cpu.o graph_executor.o softmax_cpu.o elementwise_sum_cpu.o pooling_cpu.o
+OBJCXX11 = flatten_cpu.o engine.o narray.o c_api.o operator.o symbol.o storage.o fully_connected_cpu.o static_graph.o activation_cpu.o graph_executor.o softmax_cpu.o elementwise_sum_cpu.o pooling_cpu.o convolution_cpu.o
 CUOBJ =
 SLIB = lib/libmxnet.so
 ALIB = lib/libmxnet.a
 LIB_DEP = $(DMLC_CORE)/libdmlc.a
 
 ifeq ($(USE_CUDA), 1)
-	CUOBJ += narray_function_gpu.o fully_connected_gpu.o activation_gpu.o elementwise_sum_gpu.o pooling_gpu.o softmax_gpu.o
+	CUOBJ += flatten_gpu.o narray_function_gpu.o fully_connected_gpu.o activation_gpu.o elementwise_sum_gpu.o pooling_gpu.o softmax_gpu.o convolution_gpu.o
 endif
 
 .PHONY: clean all test lint doc
@@ -101,6 +101,10 @@ pooling_cpu.o: src/operator/pooling.cc
 pooling_gpu.o: src/operator/pooling.cu
 softmax_cpu.o: src/operator/softmax.cc
 softmax_gpu.o: src/operator/softmax.cu
+convolution_cpu.o: src/operator/convolution.cc
+convolution_gpu.o: src/operator/convolution.cu
+flatten_cpu.o: src/operator/flatten.cc
+flatten_gpu.o: src/operator/flatten.cu
 
 lib/libmxnet.a: $(OBJ) $(OBJCXX11) $(CUOBJ)
 lib/libmxnet.so: $(OBJ) $(OBJCXX11) $(CUOBJ)

diff --git a/include/mxnet/base.h b/include/mxnet/base.h
@@ -6,6 +6,7 @@
 #ifndef MXNET_BASE_H_
 #define MXNET_BASE_H_
 #include <dmlc/base.h>
+#include <dmlc/type_traits.h>
 #include <mshadow/tensor.h>
 #include <string>
 
@@ -45,6 +46,5 @@ typedef mshadow::default_real_t real_t;
 typedef mshadow::TShape TShape;
 /*! \brief storage container type */
 typedef mshadow::TBlob TBlob;
-
 }  // namespace mxnet
 #endif  // MXNET_BASE_H_
diff --git a/python/test_mnist.py b/python/test_mnist.py
@@ -3,28 +3,14 @@
 import numpy as np
 import os, cPickle, gzip
 
-def Softmax(x):
-    batch, nidden = x.shape
-    maxes = np.max(x, axis=1)
-    x -= maxes.reshape(batch, 1)
-    x = np.exp(x)
-    norm = np.sum(x, axis=1)
-    prob = x / norm.reshape((batch, 1))
-    return prob
-
 def CalAcc(out, label):
     pred = np.argmax(out, axis=1)
     return np.sum(pred == label) * 1.0 / out.shape[0]
 
-def SetGradient(out_grad, label):
-    assert(out_grad.shape[0] == label.shape[0])
-    for i in xrange(label.shape[0]):
-        k = label[i]
-        out_grad[i][k] -= 1.0
 
 # load data
 class MNISTIter(object):
-    def __init__(self, which_set, batch_size=100):
+    def __init__(self, which_set, batch_size=100, flatten=True):
         if not os.path.exists('mnist.pkl.gz'):
             os.system("wget http://deeplearning.net/data/mnist/mnist.pkl.gz")
         f = gzip.open('mnist.pkl.gz', 'rb')
@@ -39,6 +25,7 @@ def __init__(self, which_set, batch_size=100):
         else:
             self.data = test_set[0]
             self.data = np.asarray(test_set[1])
+        self.flatten = flatten
         self.batch_size = batch_size
         self.nbatch = self.data.shape[0] / batch_size
         assert(self.data.shape[0] % batch_size == 0) # I am lazy
@@ -57,25 +44,34 @@ def Get(self):
             raise Exception("Iterator is at end")
         start = self.now_idx * self.batch_size
         end = (self.now_idx + 1) * self.batch_size
-        return (self.data[start:end, :], self.label[start:end])
+        if self.flatten:
+            return (self.data[start:end, :], self.label[start:end])
+        else:
+            return (self.data[start:end, :].reshape(batch_size, 1, 28, 28),
+                    self.label[start:end])
 
 
 
 # symbol net
 batch_size = 100
 data = mx.symbol.Variable('data')
-fc1 = mx.symbol.FullyConnected(data=data, name='fc1', num_hidden=160)
+fc1 = mx.symbol.Convolution(data = data, name='conv1', nb_filter=32, kernel=(7,7), stride=(2,2), nstep=10, no_bias=1)
 act1 = mx.symbol.Activation(data = fc1, name='relu1', act_type="relu")
-fc2 = mx.symbol.FullyConnected(data = act1, name='fc2', num_hidden=10)
-args_list = fc2.list_arguments()
+mp = mx.symbol.Pooling(data = act1, name = 'mp', kernel=(2,2), stride=(2,2), pool_type='avg')
+fl = mx.symbol.Flatten(data = mp, name="flatten")
+fc2 = mx.symbol.FullyConnected(data = fl, name='fc2', num_hidden=10)
+softmax = mx.symbol.Softmax(data = fc2, name = 'sm')
+args_list = softmax.list_arguments()
 # infer shape
-data_shape = (batch_size, 784)
-arg_shapes, out_shapes = fc2.infer_shape(data=data_shape)
+#data_shape = (batch_size, 784)
+
+data_shape = (batch_size, 1, 28, 28)
+arg_shapes, out_shapes = softmax.infer_shape(data=data_shape)
 arg_narrays = [mx.narray.create(shape) for shape in arg_shapes]
 grad_narrays = [mx.narray.create(shape) for shape in arg_shapes]
 mom_narrays = [mx.narray.create(shape) for shape in arg_shapes]
 inputs = dict(zip(args_list, arg_narrays))
-
+print zip(args_list, arg_shapes)
 np.random.seed(0)
 # set random weight
 for name, narray in inputs.items():
@@ -87,7 +83,7 @@ def Get(self):
 req = ['write_to' for i in range(len(arg_narrays))]
 # bind executer
 # TODO(bing): think of a better bind interface
-executor = fc2.bind(mx.Context('cpu'), arg_narrays, grad_narrays, req)
+executor = softmax.bind(mx.Context('cpu'), arg_narrays, grad_narrays, req)
 # update
 
 out_narray = executor.heads()[0]
@@ -104,8 +100,8 @@ def Update(mom, grad, weight):
 block = zip(mom_narrays, grad_narrays, arg_narrays)
 
 
-train = MNISTIter("train", batch_size)
-valid = MNISTIter("valid", batch_size)
+train = MNISTIter("train", batch_size, False)
+valid = MNISTIter("valid", batch_size, False)
 
 for i in xrange(epoch):
     # train
@@ -115,11 +111,10 @@ def Update(mom, grad, weight):
     while train.Next():
         data, label = train.Get()
         inputs["data"].numpy[:] = data
+        inputs["sm_label"].numpy[:] = label
         executor.forward()
-        out_narray.numpy[:] = Softmax(out_narray.numpy)
         train_acc += CalAcc(out_narray.numpy, label)
         grad_narray.numpy[:] = out_narray.numpy
-        SetGradient(grad_narray.numpy, label)
         executor.backward([grad_narray])
 
         for mom, grad, weight in block: