apache · ghost · Sep 8, 2015 · Sep 9, 2015
@@ -5,7 +5,7 @@
 import sys
 sys.path.append("../../tests/python")
 import get_data
-
+import time
 
 """
 CXXNET Result:
@@ -70,8 +70,8 @@ def ConvFactory(**kwargs):
     param = copy.copy(kwargs)
     act = param["act_type"]
     del param["act_type"]
+    param["workspace"] = 512
     param["name"] = "conv%d" % conv_cnt
-    param["nstep"] = 64
     conv = mx.symbol.Convolution(**param)
     bn = mx.symbol.BatchNorm(data = conv, name="bn%d" % conv_cnt)
     relu = mx.symbol.Activation(data = bn, name = "%s%d" % (act, conv_cnt), act_type=act)
@@ -89,13 +89,11 @@ def DownsampleFactory(data, ch_3x3, stride = 2):
     param["num_filter"] = ch_3x3
     param["act_type"] = "relu"
     param["data"] = data
-    param["nstep"] = 100
     param["pad"] = (1, 1)
     conv3x3 = ConvFactory(**param)
     # pool
     del param["num_filter"]
     del param["act_type"]
-    del param["nstep"]
     del param["pad"]
     param["pool_type"] = "max"
     param["name"] = "pool%d" % pool_cnt
@@ -117,7 +115,6 @@ def SimpleFactory(data, ch_1x1, ch_3x3):
     param["stride"] = (1, 1)
     param["act_type"] = "relu"
     param["data"] = data
-    param["nstep"] = 128
     conv1x1 = ConvFactory(**param)
 
     # 3x3
@@ -143,7 +140,7 @@ def RandomInit(narray):
 in3a = SimpleFactory(conv1, 32, 32)
 in3b = SimpleFactory(in3a, 32, 48)
 in3c = DownsampleFactory(in3b, 80)
-in4a = SimpleFactory(in3c, 112, 38)
+in4a = SimpleFactory(in3c, 112, 48)
 in4b = SimpleFactory(in4a, 96, 64)
 in4c = SimpleFactory(in4b, 80, 80)
 in4d = SimpleFactory(in4c, 48, 96)
@@ -155,27 +152,30 @@ def RandomInit(narray):
 fc = mx.symbol.FullyConnected(data=flatten, num_hidden=10, name="fc1")
 loss = mx.symbol.Softmax(data=fc, name="sm")
 
-args_list = loss.list_arguments()
 
+epoch = 9
+lr = 0.05
+wd = 0.0001
+momentum = 0.9
 
 batch_size = 128
 data_shape = (batch_size, 3, 28, 28)
-arg_shapes, out_shapes, aux_shapes = loss.infer_shape(data=data_shape)
 
-arg_narrays = [mx.narray.zeros(shape, ctx=mx.Context("gpu")) for shape in arg_shapes]
-grad_narrays = [mx.narray.zeros(shape, ctx=mx.Context("gpu")) for shape in arg_shapes]
-mom_narrays = [mx.narray.zeros(shape, ctx=mx.Context("gpu")) for shape in arg_shapes]
-aux_narrays = [mx.narray.zeros(shape, ctx=mx.Context("gpu")) for shape in aux_shapes]
+in_data = mx.narray.empty(data_shape, mx.Context('gpu'))
+executor, executor_data = loss.simple_bind(mx.Context('gpu'), {"data": in_data})
+out_narray = executor.heads()[0]
+pred = mx.narray.zeros(out_narray.shape)
 
-inputs = dict(zip(args_list, arg_narrays))
+inputs = dict(zip(loss.list_arguments(), executor_data["args"]))
+block = list(zip(executor_data["grad"],
+                 executor_data["args"],
+                 executor_data["momentum"]))
 
-name2shape = dict(zip(args_list, arg_shapes))
-pred = mx.narray.zeros(out_shapes[0])
 
 np.random.seed(0)
 # set random weight
 
-for name, narray in inputs.items():
+for name, narray in zip(loss.list_arguments(), executor_data["args"]):
     if "weight" in name:
         narray[:] = np.random.uniform(-0.1, 0.1, narray.shape)
     if "bias" in name:
@@ -185,25 +185,11 @@ def RandomInit(narray):
     if "beta" in name:
         narray[:] = 0.0
 
-# bind executer
-# TODO(bing): think of a better bind interface
-executor = loss.bind(mx.Context('gpu'), arg_narrays, grad_narrays, 'write', aux_narrays)
-# update
-
-out_narray = executor.heads()[0]
-
-epoch = 9
-lr = 0.05
-wd = 0.0001
-momentum = 0.9
-
 def Update(grad, weight, mom):
     mom[:] *= momentum
     mom[:] += -lr * (grad / batch_size + wd * weight)
     weight[:] += mom
 
-block = list(zip(grad_narrays, arg_narrays, mom_narrays))
-
 #check data
 get_data.GetCifar10()
 
@@ -224,15 +210,17 @@ def Update(grad, weight, mom):
         batch_size=batch_size,
         nthread=1)
 
-tmp_label = mx.narray.zeros(name2shape["sm_label"])
+tmp_label = mx.narray.zeros(inputs["sm_label"].shape)
 
-def progress(count, total, suffix=''):
-    bar_len = 80
+def progress(count, total, epoch, toc):
+    bar_len = 60
     filled_len = int(round(bar_len * count / float(total)))
 
     percents = round(100.0 * count / float(total), 1)
     bar = '=' * filled_len + '-' * (bar_len - filled_len)
-
+    tic = time.time()
+    speed = batch_size / float(tic - toc)
+    suffix = "Epoch %d, Speed: %.2f pic/sec" % (epoch, speed)
     sys.stdout.write('[%s] %s%s ...%s\r' % (bar, percents, '%', suffix))
 
 def test_cifar():
@@ -247,7 +235,7 @@ def test_cifar():
         val_nbatch = 0
         all_train_bacth = 50000 / float(batch_size)
         for data, label in train_dataiter:
-            progress(train_nbatch, all_train_bacth, "Epoch %d" % i)
+            toc = time.time()
             label = label.asnumpy().flatten()
             tmp_label[:] = label
             inputs["data"][:] = data
@@ -260,6 +248,7 @@ def test_cifar():
 
             for grad, weight, mom in block:
                 Update(grad, weight, mom)
+            progress(train_nbatch, all_train_bacth, i, toc)
 
         # evaluate
         for data, label in test_dataiter:

@@ -8,6 +8,7 @@
 #define MXNET_OPERATOR_H_
 
 #include <dmlc/base.h>
+#include <dmlc/json.h>
 #include <dmlc/logging.h>
 #include <dmlc/registry.h>
 #include <vector>
@@ -385,6 +386,9 @@ class OperatorProperty {
    * \return a new constructed OperatorProperty
    */
   static OperatorProperty *Create(const char* type_name);
+
+  virtual void Save(dmlc::JSONWriter *writer) const = 0;
+  virtual void Load(dmlc::JSONReader *reader) = 0;
 };
 
 /*! \brief typedef the factory function of operator property */

@@ -8,6 +8,8 @@
 #define MXNET_SYMBOLIC_H_
 
 #include <dmlc/base.h>
+#include <dmlc/json.h>
+#include <algorithm>
 #include <vector>
 #include <memory>
 #include <string>
@@ -64,6 +66,11 @@ class StaticGraph {
       if (source_id == other.source_id) return index < other.index;
       return source_id < other.source_id;
     }
+
+    /*! \brief interface for json serialization */
+    void Save(dmlc::JSONWriter *writer) const;
+    /*! \brief interface for json serialization */
+    void Load(dmlc::JSONReader *reader);
   };
   /*!
    * \brief Operation Node in static graphs.
@@ -95,6 +102,21 @@ class StaticGraph {
     int32_t backward_source_id;
     /*! \brief default constructor */
     Node() : backward_source_id(-1) {}
+
+    friend void swap(Node& lhs, Node& rhs) {
+      std::swap(lhs.op, rhs.op);
+      std::swap(lhs.name, rhs.name);
+      std::swap(lhs.inputs, rhs.inputs);
+    }
+    /*! \brief copy constructor in favor of serialization. */
+    Node(const Node& another) : op(another.op.get() ? another.op.get()->Copy() : nullptr),
+                                name(another.name),
+                                inputs(another.inputs) {}
+
+    inline Node& operator=(Node another) {
+      swap(*this, another);
+      return *this;
+    }
     /*! \return whether the node is forward op node */
     inline bool is_forward() const {
       return op != nullptr;
@@ -107,13 +129,25 @@ class StaticGraph {
     inline bool is_variable() const {
       return op == nullptr && !is_backward();
     }
+    /*! \brief interface for json serialization */
+    void Save(dmlc::JSONWriter *writer) const;
+    /*! \brief interface for json serialization */
+    void Load(dmlc::JSONReader *reader);
   };
   /*! \brief all nodes in the graph */
   std::vector<Node> nodes;
   /*! \brief index of nodes that correspods to arguments */
   std::vector<uint32_t> arg_nodes;
   /*! \brief heads outputs of the graph */
   std::vector<DataEntry> heads;
+  /*! \brief load static graph from json. TODO: a static creator's better */
+  void Load(const std::string& json);
+  /*! \brief save static graph to json */
+  void Save(std::string* json) const;
+  /*! \brief interface for json serialization */
+  void Save(dmlc::JSONWriter *writer) const;
+  /*! \brief interface for json serialization */
+  void Load(dmlc::JSONReader *reader);
   // funtions to help inference in static graph
   /*!
    * \brief Perform a topological sort on the graph

diff --git a/lib/README.md b/lib/README.md
@@ -349,9 +349,7 @@ def zeros(shape, ctx=None):
     out: Array
         The created NArray.
     """
-    if ctx is None:
-        ctx = Context.default_ctx
-    arr = NArray(handle=_new_alloc_handle(shape, ctx, False))
+    arr = empty(shape, ctx)
     arr[:] = 0.0
     return arr
 
@@ -371,15 +369,11 @@ def ones(shape, ctx=None):
     out: Array
         The created NArray.
     """
-    if ctx is None:
-        ctx = Context.default_ctx
-    arr = NArray(handle=_new_alloc_handle(shape, ctx, False))
+    arr = empty(shape, ctx)
     arr[:] = 1.0
     return arr
 
 
-
-
 def array(source_array, ctx=None):
     """Create a new NArray that copies content from source_array.
 

@@ -10,7 +10,7 @@
 from .base import NArrayHandle, ExecutorHandle, SymbolHandle
 from .base import check_call
 from .context import Context
-from .narray import NArray
+from .narray import NArray, zeros
 from .executor import Executor
 
 
@@ -332,6 +332,49 @@ def _get_narray_handle(arg_key, args, arg_names, allow_missing):
             raise TypeError('Only Accept list of NArrays or dict of str->NArray')
         return c_array(NArrayHandle, arg_handles)
 
+    def simple_bind(self, ctx, args, grad_req='write'):
+        """Simply bind current symbol to get an executor
+        Parameters
+        ----------
+        ctx : Context
+            The device context the generated executor to run on.
+
+        args : list of NArray or dict of str->NArray
+            Input arguments to the symbol.
+            - type is dict of str->NArray, then it maps the name of arguments
+              to the corresponding NArray,
+            - Not all the arguments must be provided.
+        Returns
+        -------
+        executor : mxnet.Executor
+            The generated Executor
+        executor_data : dict of str -> list(NArray)
+            The data for the executor,
+            key is "args", "grad", "momentum", "auxiliary_states"
+            sequence is same to the list function
+        """
+        if not isinstance(args, dict):
+            raise TypeError("args must be dict of str->NArray")
+        input_shapes = dict((arr[0], arr[1].shape) for arr in args.items())
+        arg_shapes, out_shapes, aux_shapes = self.infer_shape(**input_shapes)
+        if arg_shapes == None:
+            raise ValueError("Input node is not complete")
+        # alloc space
+        arg_narrays = []
+        for name, shape in zip(self.list_arguments(), arg_shapes):
+            if name in args:
+                arg_narrays.append(args[name])
+            else:
+                arg_narrays.append(zeros(shape, ctx))
+        # TODO(bing): specail treat input data grad
+        grad_narrays = [zeros(shape, ctx) for shape in arg_shapes]
+        mom_narrays = [zeros(shape, ctx) for shape in arg_shapes]
+        aux_narrays = [zeros(shape, ctx) for shape in aux_shapes]
+        executor = self.bind(ctx, arg_narrays, grad_narrays, grad_req, aux_narrays)
+        executor_data = {"args" : arg_narrays, "grad" : grad_narrays,
+                         "momentum" : mom_narrays, "auxiliary_states" : aux_narrays}
+        return (executor, executor_data)
+
     def bind(self, ctx, args, args_grad=None, grad_req='write', aux_states=None):
         """Bind current symbol to get an executor.
 

diff --git a/src/operator/activation-inl.h b/src/operator/activation-inl.h
@@ -35,6 +35,17 @@ struct ActivationParam : public dmlc::Parameter<ActivationParam> {
         .add_enum("tanh", kTanh)
         .describe("Activation function to be applied.");
   }
+
+  inline void Save(dmlc::JSONWriter *writer) const {
+    writer->BeginObject();
+    writer->WriteObjectKeyValue("act_type", act_type);
+    writer->EndObject();
+  }
+  inline void Load(dmlc::JSONReader *reader) {
+    dmlc::JSONObjectReadHelper helper;
+    helper.DeclareField("act_type", &act_type);
+    helper.ReadAllFields(reader);
+  }
 };
 
 /**
@@ -84,7 +95,7 @@ template<typename xpu>
 Operator* CreateOp(ActivationParam type);
 
 #if DMLC_USE_CXX11
-class ActivationProp : public OperatorProperty {
+class ActivationProp : public ParamOperatorProperty<ActivationParam> {
  public:
   void Init(const std::vector<std::pair<std::string, std::string> >& kwargs) override {
     param_.Init(kwargs);
@@ -135,12 +146,8 @@ class ActivationProp : public OperatorProperty {
   }
 
   Operator* CreateOperator(Context ctx) const;
-
- private:
-  ActivationParam param_;
 };
 #endif  // DMLC_USE_CXX11
 }  // namespace op
 }  // namespace mxnet
 #endif  // MXNET_OPERATOR_ACTIVATION_INL_H_
-