Skip to content

Commit

Permalink
[ccapi] add return last output option in incremental inference
Browse files Browse the repository at this point in the history
 - Added return last output option in incremental inference.
   To support backward compatibility, default value would be set as false.

Signed-off-by: hyeonseok <[email protected]>
  • Loading branch information
lhs8928 committed Sep 26, 2024
1 parent 8104cbe commit fa8a3fe
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 19 deletions.
4 changes: 3 additions & 1 deletion api/ccapi/include/model.h
Original file line number Diff line number Diff line change
Expand Up @@ -307,14 +307,16 @@ class Model {
* @param[in] init_seq_len initial sequence length
* @param[in] from current working step index
* @param[in] to next working step index
* @param[in] return_last_output_only return last output if true else return all outputs
* @retval list of output as float *
* @note The output memory must not be freed by the caller
*/
virtual std::vector<float *>
incremental_inference(unsigned int batch, const std::vector<float *> &input,
const std::vector<float *> &label,
unsigned int init_seq_len, unsigned int from,
unsigned int to) = 0;
unsigned int to,
bool return_last_output_only = false) = 0;

/**
* @brief Summarize the model
Expand Down
40 changes: 23 additions & 17 deletions nntrainer/models/neuralnet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -816,7 +816,7 @@ sharedConstTensors NeuralNetwork::incremental_inference(
std::vector<float *> NeuralNetwork::incremental_inference(
unsigned int batch_size, const std::vector<float *> &input,
const std::vector<float *> &label, unsigned int init_seq_len,
unsigned int from, unsigned int to) {
unsigned int from, unsigned int to, bool return_last_output_only) {
sharedConstTensors input_tensors, output_tensors;
auto in_dim = getInputDimension();

Expand Down Expand Up @@ -849,27 +849,33 @@ std::vector<float *> NeuralNetwork::incremental_inference(
unsigned int step = from ? 0 : to - 1;

for (auto &out : output_tensors) {
const auto &out_t = *out.get();
float *last_out_buf_data = new float[batch_size * out_t.width()];
auto out_t = *out.get();
float *last_out_buf_data;

for (unsigned int batch = 0; batch < batch_size; ++batch) {
if (out->getDataType() == ml::train::TensorDim::DataType::FP16) {
if (return_last_output_only) {
last_out_buf_data = out_t.getData();
} else {
last_out_buf_data = new float[batch_size * out_t.width()];

for (unsigned int batch = 0; batch < batch_size; ++batch) {
if (out->getDataType() == ml::train::TensorDim::DataType::FP16) {
#ifdef ENABLE_FP16
const _FP16 *out_t_batch_ptr = out_t.getData<_FP16>() +
batch * out_t.getDim().getFeatureLen() +
step * out_t.getDim().width();
scopy(out_t.getDim().width(), out_t_batch_ptr, 1,
last_out_buf_data + batch * out_t.width(), 1);
const _FP16 *out_t_batch_ptr = out_t.getData<_FP16>() +
batch * out_t.getDim().getFeatureLen() +
step * out_t.getDim().width();
scopy(out_t.getDim().width(), out_t_batch_ptr, 1,
last_out_buf_data + batch * out_t.width(), 1);

#else
throw std::invalid_argument("Error: enable-fp16 is not set");
throw std::invalid_argument("Error: enable-fp16 is not set");
#endif
} else if (out->getDataType() == ml::train::TensorDim::DataType::FP32) {
const float *out_t_batch_ptr = out_t.getData() +
batch * out_t.getDim().getFeatureLen() +
step * out_t.getDim().width();
scopy(out_t.getDim().width(), out_t_batch_ptr, 1,
last_out_buf_data + batch * out_t.width(), 1);
} else if (out->getDataType() == ml::train::TensorDim::DataType::FP32) {
const float *out_t_batch_ptr = out_t.getData() +
batch * out_t.getDim().getFeatureLen() +
step * out_t.getDim().width();
scopy(out_t.getDim().width(), out_t_batch_ptr, 1,
last_out_buf_data + batch * out_t.width(), 1);
}
}
}

Expand Down
3 changes: 2 additions & 1 deletion nntrainer/models/neuralnet.h
Original file line number Diff line number Diff line change
Expand Up @@ -408,7 +408,8 @@ s * @retval shared_ptr<const Tensor>
const std::vector<float *> &label,
unsigned int init_seq_len,
unsigned int from,
unsigned int to) override;
unsigned int to,
bool return_last_output_only = false) override;

/**
* @brief Run NeuralNetwork train with callback function by user
Expand Down

0 comments on commit fa8a3fe

Please sign in to comment.