Skip to content

Commit

Permalink
[ Weight ] Add Var32 Tensor in Weight.
Browse files Browse the repository at this point in the history
We will add Var32 Tensor if the Variable Weight is not Full
precision (FP32). This eables the Weight Update with full precision
and only Apply Gradient Process ueses this Tensor. Therefore, the
lifespan of this tensor should be "ApplyGradient".

**Self evaluation:**
1. Build test:	 [X]Passed [ ]Failed [ ]Skipped
2. Run test:	 [X]Passed [ ]Failed [ ]Skipped

Signed-off-by: jijoong.moon <[email protected]>
  • Loading branch information
jijoongmoon committed May 2, 2024
1 parent cb0f83f commit 69f3534
Show file tree
Hide file tree
Showing 2 changed files with 90 additions and 16 deletions.
85 changes: 85 additions & 0 deletions nntrainer/tensor/weight.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,28 @@ Weight::Weight(const TensorDim &dim, const Tensor::Initializer init,
throw std::invalid_argument("Weight initializer cannot be none");
if (regularizer == WeightRegularizer::UNKNOWN)
throw std::invalid_argument("Weight regularizer unknown");

std::string var32_suffix = ":fp32";
std::string var32_name = name + var32_suffix;

/**
* @note We assume if the Weight Data Type is not FP32, then FP32 Weight is
* necessary to maintain the accuracy.
* We could think it can be other data type and if there is the case to
* support other data type, then the code below needs to be udpated.
*
* Also, the loss_scale is not used in Weight but leave as it is for later
* usage.
*/

if (dim.getDataType() != ml::train::TensorDim::DataType::FP32) {
TensorDim var32_dim(dim);
var32_dim.setDataType(ml::train::TensorDim::DataType::FP32);

var32 = std::make_shared<Tensor>(var32_dim, alloc_now_, init, var32_name);
} else {
var32 = std::make_shared<Tensor>(var32_name);
}
}

Weight::Weight(const TensorDim &dim_v, const TensorDim &dim_g,
Expand All @@ -52,6 +74,69 @@ Weight::Weight(const TensorDim &dim_v, const TensorDim &dim_g,
throw std::invalid_argument("Weight initializer cannot be none");
if (regularizer == WeightRegularizer::UNKNOWN)
throw std::invalid_argument("Weight regularizer unknown");

std::string var32_suffix = ":fp32";
std::string var32_name = name + var32_suffix;

if (dim_v.getDataType() != ml::train::TensorDim::DataType::FP32) {
TensorDim var32_dim(dim_v);
var32_dim.setDataType(ml::train::TensorDim::DataType::FP32);
std::string var32_suffix = ":fp32";
std::string var32_name = name + var32_suffix;

var32 = std::make_shared<Tensor>(var32_dim, alloc_now_, init, var32_name);
} else {
var32 = std::make_shared<Tensor>(var32_name);
}
}

Weight::Weight(const Tensor &v, const Tensor &g, const std::string &n,
bool is_dependent, unsigned int output_axis_) :
Var_Grad(v, g, n, is_dependent),
regularizer(WeightRegularizer::NONE),
regularizer_constant(1.0f),
decay(0.0f),
clip_by_global_norm(0.0f),
output_axis(output_axis_),
loss_scale(0.0) {

std::string var32_suffix = ":fp32";
std::string var32_name = n + var32_suffix;

if (v.getDataType() != ml::train::TensorDim::DataType::FP32) {
TensorDim var32_dim(v.getDim());
var32_dim.setDataType(ml::train::TensorDim::DataType::FP32);

var32 = std::make_shared<Tensor>(var32_dim, true, Tensor::Initializer::NONE,
var32_name);
} else {
var32 = std::make_shared<Tensor>(var32_name);
}
}

Weight::Weight(Tensor *v, Tensor *g, const WeightRegularizer reg,
const float reg_const, const float decay, bool is_dependent,
const float max_norm, unsigned int output_axis_,
float loss_scale_) :
Var_Grad(v, g, is_dependent),
regularizer(reg),
regularizer_constant(reg_const),
decay(decay),
clip_by_global_norm(max_norm),
output_axis(output_axis_),
loss_scale(loss_scale_) {
std::string var32_suffix = ":fp32";
std::string var32_name = "" + var32_suffix;

if (v->getDataType() != ml::train::TensorDim::DataType::FP32) {
TensorDim var32_dim(v->getDim());
var32_dim.setDataType(ml::train::TensorDim::DataType::FP32);

var32 = std::make_shared<Tensor>(var32_dim, true, Tensor::Initializer::NONE,
var32_name);
} else {
var32 = std::make_shared<Tensor>(var32_name);
}
}

} // namespace nntrainer
21 changes: 5 additions & 16 deletions nntrainer/tensor/weight.h
Original file line number Diff line number Diff line change
Expand Up @@ -124,14 +124,7 @@ class Weight : public Var_Grad {
* if the owner of these tensors free the tensors.
*/
explicit Weight(const Tensor &v, const Tensor &g, const std::string &n = "",
bool is_dependent = false, unsigned int output_axis_ = 3) :
Var_Grad(v, g, n, is_dependent),
regularizer(WeightRegularizer::NONE),
regularizer_constant(1.0f),
decay(0.0f),
clip_by_global_norm(0.0f),
output_axis(output_axis_),
loss_scale(0.0) {}
bool is_dependent = false, unsigned int output_axis_ = 3);

/**
* @brief Construct a new Weight object
Expand All @@ -144,14 +137,7 @@ class Weight : public Var_Grad {
explicit Weight(Tensor *v, Tensor *g, const WeightRegularizer reg,
const float reg_const, const float decay,
bool is_dependent = false, const float max_norm = 0.0f,
unsigned int output_axis_ = 3, float loss_scale_ = 0.0f) :
Var_Grad(v, g, is_dependent),
regularizer(reg),
regularizer_constant(reg_const),
decay(decay),
clip_by_global_norm(max_norm),
output_axis(output_axis_),
loss_scale(loss_scale_) {}
unsigned int output_axis_ = 3, float loss_scale_ = 0.0f);

/**
* @brief Swap for weight
Expand All @@ -170,6 +156,7 @@ class Weight : public Var_Grad {
swap(lhs.output_axis, rhs.output_axis);
swap(lhs.opt_vars, rhs.opt_vars);
swap(lhs.loss_scale, rhs.loss_scale);
swap(lhs.var32, rhs.var32);
}

/**
Expand Down Expand Up @@ -213,6 +200,8 @@ class Weight : public Var_Grad {
w.var = std::make_shared<Tensor>(this->var->clone());
if (!this->grad->empty())
w.grad = std::make_shared<Tensor>(this->grad->clone());
if (!this->var32->empty())
w.var32 = std::make_shared<Tensor>(this->var32->clone());

return w;
}
Expand Down

0 comments on commit 69f3534

Please sign in to comment.