Skip to content

Commit

Permalink
[Unnittest & Loss] Update KLD Loss & Fix Unittest
Browse files Browse the repository at this point in the history
Update KLD Loss Function Reflect review

**Self evaluation:**
1. Build test:	 [X]Passed [ ]Failed [ ]Skipped
2. Run test:	 [X]Passed [ ]Failed [ ]Skipped

Signed-off-by: Donghak PARK <[email protected]>
  • Loading branch information
DonghakPark committed Oct 24, 2024
1 parent 4003d40 commit d9e182e
Show file tree
Hide file tree
Showing 7 changed files with 65 additions and 93 deletions.
94 changes: 30 additions & 64 deletions nntrainer/layers/loss/kld_loss_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,75 +17,41 @@
#include <vector>

namespace nntrainer {
KLDLossLayer::KLDLossLayer() {}

KLDLossLayer::~KLDLossLayer() {}

void KLDLossLayer::finalize(nntrainer::InitLayerContext &context) {
if (context.getNumInputs() != 2) {
throw std::invalid_argument("kld loss requires two input");
}
const auto &input_dims = context.getInputDimensions();

if (input_dims.front() != input_dims.back()) {
throw std::invalid_argument("dimension of mu and log_var is different");
}

auto &input_dim = input_dims.front();

temp_idx = context.requestTensor(input_dim, "temp");
before_sum_idx = context.requestTensor(
input_dim, "before_sum", nntrainer::Initializer::NONE, false,
nntrainer::TensorLifespan::FORWARD_FUNC_LIFESPAN);

/// output is a scaler-like tensor
context.setOutputDimensions({{input_dim.batch(), 1, 1, 1}});
}

void KLDLossLayer::setProperty(const std::vector<std::string> &values) {
if (values.size()) {
throw std::invalid_argument(
"kld loss does not take any properties, but values given");
}
}
static constexpr size_t SINGLE_INOUT_IDX = 0;

void KLDLossLayer::forwarding(RunLayerContext &context, bool training) {
// -0.5 * sum(1 + log_std - pow(mu, 2) - exp(log_std))
auto &mu = context.getInput(0);
auto &log_std = context.getInput(1);
auto &ret = context.getOutput(0);
auto &temp = context.getTensor(temp_idx);
auto &before_sum = context.getTensor(before_sum_idx);

mu.pow(2.0f, temp); // 1. temp = mu ^ 2
log_std.subtract(temp, before_sum); // 2. before_sum = log_std - temp
log_std.apply<float>(expf, temp); // 3. temp = exp(log_std) - 1
temp.subtract_i(1.0f);
before_sum.subtract_i(temp); // 4. before_sum = before_sum - temp
before_sum.sum({1, 2, 3}, ret, -0.5); // 5. sum * 0.5
// Result = (P * (P / Q).log()).sum()
// KL(P ∣∣ Q) whereP denotes the distribution of the observations in datasets
// and Q denotes the model output.

nntrainer::Tensor &predicted = context.getInput(SINGLE_INOUT_IDX);
nntrainer::Tensor &output = context.getOutput(SINGLE_INOUT_IDX);
if (context.isLabelAvailable(SINGLE_INOUT_IDX)) {
nntrainer::Tensor &label = context.getLabel(SINGLE_INOUT_IDX);
nntrainer::Tensor temp; // temp output
/**
* 1. Output = label / predicted
* 2. Output = log(Output)
* 3. Output = Output * label
* 4. Output = sum(output)
*/
label.divide(predicted, temp);
temp.apply<float>(logf, temp);
temp.multiply_i(label);
output.fill(temp.sum({0, 1, 2, 3}));
}
}

void KLDLossLayer::calcDerivative(RunLayerContext &context) {
auto &d_incoming = context.getIncomingDerivative(0);
auto &mu = context.getInput(0);

auto &temp = context.getTensor(temp_idx);

auto &d_mu = context.getOutgoingDerivative(0);
auto &d_var = context.getOutgoingDerivative(1);

// d_mu = d_incoming * mu
mu.multiply(d_incoming, d_mu);

// temp is exp(log_std) - 1;
// d_var = d_incoming * (-0.5) * ( 1 - exp(log_std) )
// = d_incoming * (0.5) * ( temp )
temp.multiply(d_incoming.multiply(0.5), d_var);
/**
* d/dQ = -P/Q
*/
nntrainer::Tensor &predicted = context.getInput(SINGLE_INOUT_IDX); // Q
nntrainer::Tensor &label = context.getLabel(SINGLE_INOUT_IDX); // P
nntrainer::Tensor &deriv = context.getOutgoingDerivative(SINGLE_INOUT_IDX);

label.multiply_i(-1.0f);
label.divide(predicted, deriv);
}

void KLDLossLayer::setBatch(nntrainer::RunLayerContext &context,
unsigned int batch) {
context.updateTensor(temp_idx, batch);
context.updateTensor(before_sum_idx, batch);
}
} // namespace nntrainer
27 changes: 3 additions & 24 deletions nntrainer/layers/loss/kld_loss_layer.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,27 +25,17 @@ namespace nntrainer {
* @class KLD (Kullback-Leibler Divergence) Loss layer
* @brief kld loss layer
*/
class KLDLossLayer final : public LossLayer {
class KLDLossLayer : public LossLayer {
public:
/**
* @brief Constructor of Constant Loss Layer
*/
KLDLossLayer();
KLDLossLayer() : LossLayer() {}

/**
* @brief Destructor of MSE Loss Layer
*/
~KLDLossLayer();

/**
* @copydoc Layer::finalize(InitLayerContext &context)
*/
void finalize(nntrainer::InitLayerContext &context) override;

/**
* @copydoc Layer::setProperty(const std::vector<std::string> &values)
*/
void setProperty(const std::vector<std::string> &values) override;
~KLDLossLayer() = default;

/**
* @copydoc Layer::forwarding(RunLayerContext &context, bool training)
Expand All @@ -62,20 +52,9 @@ class KLDLossLayer final : public LossLayer {
*/
const std::string getType() const override { return KLDLossLayer::type; }

/**
* @copydoc Layer::setBatch(RunLayerContext &context, unsigned int batch)
*/
void setBatch(nntrainer::RunLayerContext &context,
unsigned int batch) override;

inline static const std::string type = "kld";

private:
unsigned before_sum_idx;
unsigned temp_idx;
};
} // namespace nntrainer

#endif /* __cplusplus */

#endif // __KLD_LOSS_LAYER_H__
3 changes: 2 additions & 1 deletion nntrainer/layers/loss/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ loss_layer_sources = [
'mse_loss_layer.cpp',
'cross_entropy_sigmoid_loss_layer.cpp',
'cross_entropy_softmax_loss_layer.cpp',
'constant_derivative_loss_layer.cpp'
'constant_derivative_loss_layer.cpp',
'kld_loss_layer.cpp'
]

loss_layer_headers = []
Expand Down
3 changes: 2 additions & 1 deletion nntrainer/models/model_common_properties.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ Epochs::Epochs(unsigned int value) { set(value); }

bool LossType::isValid(const std::string &value) const {
ml_logw("Model loss property is deprecated, use loss layer directly instead");
return istrequal(value, "cross") || istrequal(value, "mse");
return istrequal(value, "cross") || istrequal(value, "mse") ||
istrequal(value, "kld");
}

TrainingBatchSize::TrainingBatchSize(unsigned int value) { set(value); }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,3 +91,28 @@ TEST(crossentropy_loss, model_fail_test) {
int status = model->compile();
EXPECT_FALSE(status == ML_ERROR_NONE);
}

TEST(kld_loss, compile_test) {

std::unique_ptr<ml::train::Model> model = ml::train::createModel(
ml::train::ModelType::NEURAL_NET, {withKey("loss", "kld")});

std::shared_ptr<ml::train::Layer> input_layer = ml::train::createLayer(
"input", {withKey("name", "input0"), withKey("input_shape", "3:32:32")});
std::shared_ptr<ml::train::Layer> fc_layer = ml::train::createLayer(
"fully_connected",
{withKey("unit", 100), withKey("activation", "softmax")});

model->addLayer(input_layer);
model->addLayer(fc_layer);

model->setProperty({withKey("batch_size", 16), withKey("epochs", 1)});

auto optimizer = ml::train::createOptimizer("adam", {"learning_rate=0.001"});
model->setOptimizer(std::move(optimizer));
int status = model->compile();
EXPECT_FALSE(status == ML_ERROR_NONE);

status = model->initialize();
EXPECT_FALSE(status == ML_ERROR_NONE);
}
4 changes: 2 additions & 2 deletions test/unittest/integration_tests/integration_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
/**
* Copyright (C) 2024 Donghak Park <[email protected]>
*
* @file unittest_loss_crossentropy.cpp
* @file integration_tests.cpp
* @date 16 Oct 2024
* @brief CrossEntropy loss Layer Test
* @brief Layer Integration Test
* @see https://github.com/nnstreamer/nntrainer
* @author Donghak Park <[email protected]>
* @bug No known bugs except for NYI items
Expand Down
2 changes: 1 addition & 1 deletion test/unittest/integration_tests/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ test_name = 'integration_tests'

test_target = [
'integration_tests.cpp',
'unittest_loss_crossentropy.cpp',
'integration_test_loss.cpp',
]

exe = executable(
Expand Down

0 comments on commit d9e182e

Please sign in to comment.