diff --git a/api/ccapi/include/model.h b/api/ccapi/include/model.h index e4d3a1bfe..cc94e34e3 100644 --- a/api/ccapi/include/model.h +++ b/api/ccapi/include/model.h @@ -307,6 +307,7 @@ class Model { * @param[in] init_seq_len initial sequence length * @param[in] from current working step index * @param[in] to next working step index + * @param[in] return_last_output_only return last output if true else return all outputs * @retval list of output as float * * @note The output memory must not be freed by the caller */ @@ -314,7 +315,8 @@ class Model { incremental_inference(unsigned int batch, const std::vector &input, const std::vector &label, unsigned int init_seq_len, unsigned int from, - unsigned int to) = 0; + unsigned int to, + bool return_last_output_only = false) = 0; /** * @brief Summarize the model diff --git a/nntrainer/models/neuralnet.cpp b/nntrainer/models/neuralnet.cpp index d0e542825..e81656fb3 100644 --- a/nntrainer/models/neuralnet.cpp +++ b/nntrainer/models/neuralnet.cpp @@ -816,7 +816,7 @@ sharedConstTensors NeuralNetwork::incremental_inference( std::vector NeuralNetwork::incremental_inference( unsigned int batch_size, const std::vector &input, const std::vector &label, unsigned int init_seq_len, - unsigned int from, unsigned int to) { + unsigned int from, unsigned int to, bool return_last_output_only) { sharedConstTensors input_tensors, output_tensors; auto in_dim = getInputDimension(); @@ -849,27 +849,33 @@ std::vector NeuralNetwork::incremental_inference( unsigned int step = from ? 0 : to - 1; for (auto &out : output_tensors) { - const auto &out_t = *out.get(); - float *last_out_buf_data = new float[batch_size * out_t.width()]; + auto out_t = *out.get(); + float *last_out_buf_data; - for (unsigned int batch = 0; batch < batch_size; ++batch) { - if (out->getDataType() == ml::train::TensorDim::DataType::FP16) { + if (return_last_output_only) { + last_out_buf_data = out_t.getData(); + } else { + last_out_buf_data = new float[batch_size * out_t.width()]; + + for (unsigned int batch = 0; batch < batch_size; ++batch) { + if (out->getDataType() == ml::train::TensorDim::DataType::FP16) { #ifdef ENABLE_FP16 - const _FP16 *out_t_batch_ptr = out_t.getData<_FP16>() + - batch * out_t.getDim().getFeatureLen() + - step * out_t.getDim().width(); - scopy(out_t.getDim().width(), out_t_batch_ptr, 1, - last_out_buf_data + batch * out_t.width(), 1); + const _FP16 *out_t_batch_ptr = out_t.getData<_FP16>() + + batch * out_t.getDim().getFeatureLen() + + step * out_t.getDim().width(); + scopy(out_t.getDim().width(), out_t_batch_ptr, 1, + last_out_buf_data + batch * out_t.width(), 1); #else - throw std::invalid_argument("Error: enable-fp16 is not set"); + throw std::invalid_argument("Error: enable-fp16 is not set"); #endif - } else if (out->getDataType() == ml::train::TensorDim::DataType::FP32) { - const float *out_t_batch_ptr = out_t.getData() + - batch * out_t.getDim().getFeatureLen() + - step * out_t.getDim().width(); - scopy(out_t.getDim().width(), out_t_batch_ptr, 1, - last_out_buf_data + batch * out_t.width(), 1); + } else if (out->getDataType() == ml::train::TensorDim::DataType::FP32) { + const float *out_t_batch_ptr = out_t.getData() + + batch * out_t.getDim().getFeatureLen() + + step * out_t.getDim().width(); + scopy(out_t.getDim().width(), out_t_batch_ptr, 1, + last_out_buf_data + batch * out_t.width(), 1); + } } } diff --git a/nntrainer/models/neuralnet.h b/nntrainer/models/neuralnet.h index 30d2288fd..45be0821b 100644 --- a/nntrainer/models/neuralnet.h +++ b/nntrainer/models/neuralnet.h @@ -408,7 +408,8 @@ s * @retval shared_ptr const std::vector &label, unsigned int init_seq_len, unsigned int from, - unsigned int to) override; + unsigned int to, + bool return_last_output_only = false) override; /** * @brief Run NeuralNetwork train with callback function by user