[Layer] add "add layer"

- added "add layer" - added a "model unit test" for add layer. Since 'model-level unit test' havn't been run for a long time, I diabled some test cases that causing issues when running model unit test. Many people gave great feedback, so I've improved the structure accordingly. - An upper class called "OperationLayer" was added to reduce redundant code. - Based on the number of input tensors, the behavior of "OperationLayer" has been classified into two types: unary and binary operations. - Various additional code cleanups have also taken place. - there is an issue where committing compressed files containing golden data for unit test prevents pushing changes to the remote server. (I've confirmed that all unit tests pass locally using that golden data.) **Self evaluation:** 1. Build test: [X]Passed [X]Failed [ ]Skipped 2. Run test: [X]Passed [X]Failed [ ]Skipped Signed-off-by: Seungbaek Hong <[email protected]>
nnstreamer · Oct 7, 2024 · 4eb6d12 · 4eb6d12
1 parent 1df526c
commit 4eb6d12
Show file tree

Hide file tree

Showing 12 changed files with 700 additions and 159 deletions.
diff --git a/api/ccapi/include/layer.h b/api/ccapi/include/layer.h
@@ -37,6 +37,7 @@ namespace train {
 enum LayerType {
   LAYER_IN = ML_TRAIN_LAYER_TYPE_INPUT,      /**< Input Layer type */
   LAYER_WEIGHT = ML_TRAIN_LAYER_TYPE_WEIGHT, /**< Weight Layer type */
+  LAYER_ADD = ML_TRAIN_LAYER_TYPE_ADD,       /**< Add Layer type */
   LAYER_FC = ML_TRAIN_LAYER_TYPE_FC,         /**< Fully Connected Layer type */
   LAYER_SWIGLU = ML_TRAIN_LAYER_TYPE_SWIGLU, /**< Swiglu Layer type */
   LAYER_BN = ML_TRAIN_LAYER_TYPE_BN, /**< Batch Normalization Layer type */
@@ -299,6 +300,14 @@ WeightLayer(const std::vector<std::string> &properties = {}) {
   return createLayer(LayerType::LAYER_WEIGHT, properties);
 }
 
+/**
+ * @brief Helper function to create add layer
+ */
+inline std::unique_ptr<Layer>
+AddLayer(const std::vector<std::string> &properties = {}) {
+  return createLayer(LayerType::LAYER_ADD, properties);
+}
+
 /**
  * @brief Helper function to create fully connected layer
  */

diff --git a/api/nntrainer-api-common.h b/api/nntrainer-api-common.h
@@ -65,6 +65,7 @@ typedef enum {
   ML_TRAIN_LAYER_TYPE_IDENTITY = 29, /**< Identity Layer type (Since 8.0) */
   ML_TRAIN_LAYER_TYPE_SWIGLU = 30,   /**< Swiglu Layer type */
   ML_TRAIN_LAYER_TYPE_WEIGHT = 31,   /**< Weight Layer type (Since 9.0)*/
+  ML_TRAIN_LAYER_TYPE_ADD = 32,      /**< Add Layer type (Since 9.0)*/
   ML_TRAIN_LAYER_TYPE_PREPROCESS_FLIP =
     300, /**< Preprocess flip Layer (Since 6.5) */
   ML_TRAIN_LAYER_TYPE_PREPROCESS_TRANSLATE =

diff --git a/nntrainer/app_context.cpp b/nntrainer/app_context.cpp
@@ -31,6 +31,7 @@
 #include <sgd.h>
 
 #include <activation_layer.h>
+#include <add_layer.h>
 #include <addition_layer.h>
 #include <attention_layer.h>
 #include <bn_layer.h>
@@ -248,6 +249,8 @@ static void add_default_object(AppContext &ac) {
                      LayerType::LAYER_IN);
   ac.registerFactory(nntrainer::createLayer<WeightLayer>, WeightLayer::type,
                      LayerType::LAYER_WEIGHT);
+  ac.registerFactory(nntrainer::createLayer<AddLayer>, AddLayer::type,
+                     LayerType::LAYER_ADD);
   ac.registerFactory(nntrainer::createLayer<FullyConnectedLayer>,
                      FullyConnectedLayer::type, LayerType::LAYER_FC);
   ac.registerFactory(nntrainer::createLayer<BatchNormalizationLayer>,

diff --git a/nntrainer/layers/add_layer.cpp b/nntrainer/layers/add_layer.cpp
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2024 SeungBaek Hong <[email protected]>
+ *
+ * @file   add_layer.cpp
+ * @date   7 Oct 2024
+ * @see    https://github.com/nnstreamer/nntrainer
+ * @author SeungBaek Hong <[email protected]>
+ * @bug    No known bugs except for NYI items
+ * @brief  This is add layer class (operation layer)
+ *
+ */
+
+#include <add_layer.h>
+#include <nntrainer_error.h>
+#include <nntrainer_log.h>
+#include <node_exporter.h>
+#include <util_func.h>
+
+#include <layer_context.h>
+
+namespace nntrainer {
+
+static constexpr size_t SINGLE_INOUT_IDX = 0;
+
+void AddLayer::finalize(InitLayerContext &context) {
+  op_type = OperationType::BINARY;
+  context.setOutputDimensions({context.getInputDimensions()[0]});
+}
+
+void AddLayer::forwarding_operation(const Tensor &input, Tensor &hidden) {}
+
+void AddLayer::forwarding_operation(const Tensor &input0, const Tensor &input1,
+                                    Tensor &hidden) {
+  input0.add(input1, hidden);
+}
+
+void AddLayer::calcDerivative(RunLayerContext &context) {
+  context.getOutgoingDerivative(0).copy(
+    context.getIncomingDerivative(SINGLE_INOUT_IDX));
+
+  context.getOutgoingDerivative(1).copy(
+    context.getIncomingDerivative(SINGLE_INOUT_IDX));
+}
+
+void AddLayer::setProperty(const std::vector<std::string> &values) {
+  auto remain_props = loadProperties(values, add_props);
+  if (!remain_props.empty()) {
+    std::string msg = "[AddLayer] Unknown Layer Properties count " +
+                      std::to_string(values.size());
+    throw exception::not_supported(msg);
+  }
+}
+} /* namespace nntrainer */
diff --git a/nntrainer/layers/add_layer.h b/nntrainer/layers/add_layer.h
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2024 SeungBaek Hong <[email protected]>
+ *
+ * @file   add_layer.h
+ * @date   2 August 2024
+ * @see    https://github.com/nnstreamer/nntrainer
+ * @author SeungBaek Hong <[email protected]>
+ * @bug    No known bugs except for NYI items
+ * @brief  This is add layer class (operation layer)
+ *
+ */
+
+#ifndef __ADD_LAYER_H__
+#define __ADD_LAYER_H__
+#ifdef __cplusplus
+
+#include <common_properties.h>
+#include <layer_devel.h>
+#include <operation_layer.h>
+
+namespace nntrainer {
+
+/**
+ * @class Add Layer
+ * @brief Add Layer
+ */
+class AddLayer : public OperationLayer {
+public:
+  /**
+   * @brief Destructor of Add Layer
+   */
+  ~AddLayer() {}
+
+  /**
+   * @brief Constructor of Add Layer
+   */
+  AddLayer() : OperationLayer(), add_props(props::Print()) {}
+
+  /**
+   *  @brief  Move constructor of Add Layer.
+   *  @param[in] AddLayer &&
+   */
+  AddLayer(AddLayer &&rhs) noexcept = default;
+
+  /**
+   * @brief  Move assignment operator.
+   * @parma[in] rhs AddLayer to be moved.
+   */
+  AddLayer &operator=(AddLayer &&rhs) = default;
+
+  /**
+   * @copydoc Layer::finalize(InitLayerContext &context)
+   */
+  void finalize(InitLayerContext &context) final;
+
+  void forwarding_operation(const Tensor &input, Tensor &hidden) final;
+
+  /**
+   * @brief forwarding operation for add
+   *
+   * @param input0 input tensor 0
+   * @param input1 input tensor 1
+   * @param hidden tensor to store the result of addition
+   */
+  void forwarding_operation(const Tensor &input0, const Tensor &input1,
+                            Tensor &hidden) final;
+
+  /**
+   * @copydoc Layer::incremental_forwarding(RunLayerContext &context, unsigned
+   * int from, unsigned int to, bool training)
+   */
+  void incremental_forwarding(RunLayerContext &context, unsigned int from,
+                              unsigned int to, bool training) final{};
+
+  /**
+   * @copydoc Layer::calcDerivative(RunLayerContext &context)
+   */
+  void calcDerivative(RunLayerContext &context) final;
+
+  /**
+   * @copydoc bool supportBackwarding() const
+   */
+  bool supportBackwarding() const final { return true; };
+
+  /**
+   * @copydoc Layer::exportTo(Exporter &exporter, ml::train::ExportMethods
+   * method)
+   */
+  void exportTo(Exporter &exporter,
+                const ml::train::ExportMethods &method) const final {}
+
+  /**
+   * @copydoc Layer::setProperty(const std::vector<std::string> &values)
+   */
+  void setProperty(const std::vector<std::string> &values) final;
+
+  /**
+   * @copydoc Layer::getType()
+   */
+  const std::string getType() const final { return AddLayer::type; }
+
+  std::tuple<props::Print> add_props;
+
+  inline static const std::string type = "add";
+};
+
+} // namespace nntrainer
+
+#endif /* __cplusplus */
+#endif /* __ADD_LAYER_H__ */
diff --git a/nntrainer/layers/meson.build b/nntrainer/layers/meson.build
@@ -5,6 +5,7 @@ nntrainer_inc_abs += meson.current_source_dir() / 'loss'
 layer_sources = [
   'activation_layer.cpp',
   'weight_layer.cpp',
+  'add_layer.cpp',
   'addition_layer.cpp',
   'attention_layer.cpp',
   'mol_attention_layer.cpp',
@@ -51,6 +52,7 @@ layer_headers = [
   'layer_context.h',
   'layer_devel.h',
   'layer_impl.h',
+  'operation_layer.h',
   'common_properties.h',
 ]
 

diff --git a/nntrainer/layers/operation_layer.h b/nntrainer/layers/operation_layer.h
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2024 SeungBaek Hong <[email protected]>
+ *
+ * @file   operation_layer.h
+ * @date   4 Oct 2024
+ * @see    https://github.com/nnstreamer/nntrainer
+ * @author SeungBaek Hong <[email protected]>
+ * @bug    No known bugs except for NYI items
+ * @brief  This is common class for operation layers
+ *
+ */
+
+#include <layer_devel.h>
+#include <nntrainer_error.h>
+#include <nntrainer_log.h>
+#include <node_exporter.h>
+#include <tensor.h>
+#include <util_func.h>
+
+#include <layer_context.h>
+
+#include <fstream>
+#include <iostream>
+
+namespace nntrainer {
+
+enum class OperationType { NONE, UNARY, BINARY };
+
+class OperationLayer : public Layer {
+public:
+  virtual void forwarding_operation(const Tensor &input, Tensor &hidden) = 0;
+
+  virtual void forwarding_operation(const Tensor &input0, const Tensor &input1,
+                                    Tensor &hidden) = 0;
+
+  void forwarding(RunLayerContext &context, bool training) override {
+    Tensor &hidden_ = context.getOutput(SINGLE_INOUT_IDX);
+
+    if (op_type == OperationType::UNARY) {
+      const Tensor input = context.getInput(0);
+      forwarding_operation(input, hidden_);
+    } else if (op_type == OperationType::BINARY) {
+      const Tensor &input0 = context.getInput(0);
+      const Tensor &input1 = context.getInput(1);
+      forwarding_operation(input0, input1, hidden_);
+    } else {
+      throw std::invalid_argument("Operation type is not defined");
+    }
+  }
+
+  void incremental_forwarding(RunLayerContext &context, unsigned int from,
+                              unsigned int to, bool training) override {
+    if (from) {
+      NNTR_THROW_IF(to - from != 1, std::invalid_argument)
+        << "incremental step size is not 1";
+      from = 0;
+      to = 1;
+    }
+
+    Tensor &hidden_ = context.getOutput(SINGLE_INOUT_IDX);
+    TensorDim hidden_dim = hidden_.getDim();
+    TensorDim hidden_step_dim = hidden_dim;
+
+    hidden_step_dim.batch(1);
+    hidden_step_dim.height(to - from);
+
+    if (op_type == OperationType::UNARY) {
+      const Tensor &input = context.getInput(0);
+      TensorDim input_dim = input.getDim();
+      TensorDim input_step_dim = input_dim;
+      input_step_dim.batch(1);
+      input_step_dim.height(to - from);
+
+      for (unsigned int b = 0; b < hidden_.batch(); ++b) {
+        Tensor hidden_step = hidden_.getSharedDataTensor(
+          hidden_step_dim, b * hidden_dim.getFeatureLen(), true);
+
+        Tensor input_step = input.getSharedDataTensor(
+          input_step_dim, b * input_dim.getFeatureLen(), true);
+
+        forwarding_operation(input_step, hidden_step);
+      }
+    } else if (op_type == OperationType::BINARY) {
+      const Tensor &input0 = context.getInput(0);
+      const Tensor &input1 = context.getInput(1);
+
+      TensorDim input0_dim = input0.getDim();
+      TensorDim input1_dim = input1.getDim();
+      if (input0_dim != input1_dim) {
+        throw std::invalid_argument(
+          "If the two input dimensions are different, the incremental "
+          "forwarding implementation must be overridden.");
+      }
+
+      TensorDim input_step_dim = input0_dim;
+      input_step_dim.batch(1);
+      input_step_dim.height(to - from);
+
+      for (unsigned int b = 0; b < hidden_.batch(); ++b) {
+        Tensor hidden_step = hidden_.getSharedDataTensor(
+          hidden_step_dim, b * hidden_dim.getFeatureLen(), true);
+
+        Tensor input0_step = input0.getSharedDataTensor(
+          input_step_dim, b * input0_dim.getFeatureLen(), true);
+
+        Tensor input1_step = input1.getSharedDataTensor(
+          input_step_dim, b * input1_dim.getFeatureLen(), true);
+
+        forwarding_operation(input0_step, input1_step, hidden_step);
+      }
+    } else {
+      throw std::invalid_argument("Operation type is not defined");
+    }
+  }
+
+  OperationType op_type = OperationType::NONE; /**< type of operation */
+  static constexpr size_t SINGLE_INOUT_IDX = 0;
+};
+} // namespace nntrainer
diff --git a/test/ccapi/unittest_ccapi.cpp b/test/ccapi/unittest_ccapi.cpp
@@ -64,6 +64,9 @@ TEST(ccapi_layer, construct_02_p) {
   EXPECT_NO_THROW(layer = ml::train::layer::WeightLayer());
   EXPECT_EQ(layer->getType(), "weight");
 
+  EXPECT_NO_THROW(layer = ml::train::layer::AddLayer());
+  EXPECT_EQ(layer->getType(), "add");
+
   EXPECT_NO_THROW(layer = ml::train::layer::FullyConnected());
   EXPECT_EQ(layer->getType(), "fully_connected");