diff --git a/api/ccapi/include/model.h b/api/ccapi/include/model.h
index e4d3a1bfe..cc94e34e3 100644
--- a/api/ccapi/include/model.h
+++ b/api/ccapi/include/model.h
@@ -307,6 +307,7 @@ class Model {
    * @param[in] init_seq_len initial sequence length
    * @param[in] from current working step index
    * @param[in] to next working step index
+   * @param[in] return_last_output_only return last output if true else return all outputs
    * @retval list of output as float *
    * @note The output memory must not be freed by the caller
    */
@@ -314,7 +315,8 @@ class Model {
   incremental_inference(unsigned int batch, const std::vector<float *> &input,
                         const std::vector<float *> &label,
                         unsigned int init_seq_len, unsigned int from,
-                        unsigned int to) = 0;
+                        unsigned int to,
+                        bool return_last_output_only = false) = 0;
 
   /**
    * @brief     Summarize the model
diff --git a/nntrainer/models/neuralnet.cpp b/nntrainer/models/neuralnet.cpp
index d0e542825..e81656fb3 100644
--- a/nntrainer/models/neuralnet.cpp
+++ b/nntrainer/models/neuralnet.cpp
@@ -816,7 +816,7 @@ sharedConstTensors NeuralNetwork::incremental_inference(
 std::vector<float *> NeuralNetwork::incremental_inference(
   unsigned int batch_size, const std::vector<float *> &input,
   const std::vector<float *> &label, unsigned int init_seq_len,
-  unsigned int from, unsigned int to) {
+  unsigned int from, unsigned int to, bool return_last_output_only) {
   sharedConstTensors input_tensors, output_tensors;
   auto in_dim = getInputDimension();
 
@@ -849,27 +849,33 @@ std::vector<float *> NeuralNetwork::incremental_inference(
   unsigned int step = from ? 0 : to - 1;
 
   for (auto &out : output_tensors) {
-    const auto &out_t = *out.get();
-    float *last_out_buf_data = new float[batch_size * out_t.width()];
+    auto out_t = *out.get();
+    float *last_out_buf_data;
 
-    for (unsigned int batch = 0; batch < batch_size; ++batch) {
-      if (out->getDataType() == ml::train::TensorDim::DataType::FP16) {
+    if (return_last_output_only) {
+      last_out_buf_data = out_t.getData();
+    } else {
+      last_out_buf_data = new float[batch_size * out_t.width()];
+
+      for (unsigned int batch = 0; batch < batch_size; ++batch) {
+        if (out->getDataType() == ml::train::TensorDim::DataType::FP16) {
 #ifdef ENABLE_FP16
-        const _FP16 *out_t_batch_ptr = out_t.getData<_FP16>() +
-                                       batch * out_t.getDim().getFeatureLen() +
-                                       step * out_t.getDim().width();
-        scopy(out_t.getDim().width(), out_t_batch_ptr, 1,
-              last_out_buf_data + batch * out_t.width(), 1);
+          const _FP16 *out_t_batch_ptr = out_t.getData<_FP16>() +
+                                        batch * out_t.getDim().getFeatureLen() +
+                                        step * out_t.getDim().width();
+          scopy(out_t.getDim().width(), out_t_batch_ptr, 1,
+                last_out_buf_data + batch * out_t.width(), 1);
 
 #else
-        throw std::invalid_argument("Error: enable-fp16 is not set");
+          throw std::invalid_argument("Error: enable-fp16 is not set");
 #endif
-      } else if (out->getDataType() == ml::train::TensorDim::DataType::FP32) {
-        const float *out_t_batch_ptr = out_t.getData() +
-                                       batch * out_t.getDim().getFeatureLen() +
-                                       step * out_t.getDim().width();
-        scopy(out_t.getDim().width(), out_t_batch_ptr, 1,
-              last_out_buf_data + batch * out_t.width(), 1);
+        } else if (out->getDataType() == ml::train::TensorDim::DataType::FP32) {
+          const float *out_t_batch_ptr = out_t.getData() +
+                                        batch * out_t.getDim().getFeatureLen() +
+                                        step * out_t.getDim().width();
+          scopy(out_t.getDim().width(), out_t_batch_ptr, 1,
+                last_out_buf_data + batch * out_t.width(), 1);
+        }
       }
     }
 
diff --git a/nntrainer/models/neuralnet.h b/nntrainer/models/neuralnet.h
index 30d2288fd..45be0821b 100644
--- a/nntrainer/models/neuralnet.h
+++ b/nntrainer/models/neuralnet.h
@@ -408,7 +408,8 @@ s   * @retval shared_ptr<const Tensor>
                                              const std::vector<float *> &label,
                                              unsigned int init_seq_len,
                                              unsigned int from,
-                                             unsigned int to) override;
+                                             unsigned int to,
+                                             bool return_last_output_only = false) override;
 
   /**
    * @brief     Run NeuralNetwork train with callback function by user