-
Notifications
You must be signed in to change notification settings - Fork 5.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Local response normalize. #4426
Changes from 16 commits
1887412
86f6659
7b91373
07414e9
7f11638
eb72653
3c5e61a
62cedb1
3f23ec7
794143f
a7f3232
f4c58a7
5594a27
75a06b7
545f359
9aa30ad
2f5a963
2c3ab96
01c6ddf
7a04458
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,140 @@ | ||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. | ||
|
||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
|
||
http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. */ | ||
|
||
#include "paddle/operators/lrn_op.h" | ||
|
||
namespace paddle { | ||
namespace operators { | ||
|
||
using framework::Tensor; | ||
|
||
class LRNOp : public framework::OperatorWithKernel { | ||
public: | ||
using framework::OperatorWithKernel::OperatorWithKernel; | ||
|
||
protected: | ||
void InferShape(const framework::InferShapeContext &ctx) const override { | ||
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), | ||
"Input(X) of LRNOp should not be null."); | ||
PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("Out"), | ||
"Output(Out) of LRNOp should not be null."); | ||
PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("mid_out"), | ||
"mid_out(Out) of LRNOp should not be null."); | ||
|
||
auto x_dim = ctx.Input<Tensor>("X")->dims(); | ||
PADDLE_ENFORCE_EQ(x_dim.size(), 4, "Input(X)'rank of LRNOp should be 4."); | ||
|
||
ctx.Output<Tensor>("Out")->Resize(x_dim); | ||
ctx.Output<Tensor>("mid_out")->Resize(x_dim); | ||
ctx.ShareLoD("X", /*->*/ "Out"); | ||
} | ||
}; | ||
|
||
class LRNOpMaker : public framework::OpProtoAndCheckerMaker { | ||
public: | ||
LRNOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) | ||
: OpProtoAndCheckerMaker(proto, op_checker) { | ||
AddInput("X", R"DOC( | ||
(Tensor)Input of lrn op.It must be a 4 rank tenor with NCHW format. | ||
)DOC"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (Tensor) The input of LRN operator. It must be a 4D tenor with NCHW format. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
|
||
AddOutput("Out", "(Tensor)The output of lrn op"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (Tensor) The output of LRN operator, which is also the 4D tensor with NCHW format. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
AddOutput("mid_out", R"Doc( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not follow the name convention. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
(Tensor)Middle result of lrn op.It's computed in forward process | ||
and also used in backward process. | ||
)Doc"); | ||
|
||
AddAttr<int>("n", R"DOC( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. n -> size There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 这是公式里边的变量,是不是按照公式的来更好? |
||
(int, default 5)n is “adjacent” kernel maps at the same spatial position. | ||
)DOC") | ||
.SetDefault(5) | ||
.GreaterThan(0); | ||
|
||
AddAttr<float>("k", R"DOC( | ||
(float, default 2.0)k is the bias. | ||
)DOC") | ||
.SetDefault(2.0) | ||
.GreaterThan(0.0); | ||
|
||
AddAttr<float>("alpha", R"DOC( | ||
(float, default 0.0001)alpha is the scale number. | ||
)DOC") | ||
.SetDefault(0.0001) | ||
.GreaterThan(0.0); | ||
|
||
AddAttr<float>("beta", R"DOC( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
(float, default 0.75)beta is the power number. | ||
)DOC") | ||
.SetDefault(0.75) | ||
.GreaterThan(0.0); | ||
|
||
AddComment(R"DOC( | ||
Local Response Normalization. | ||
|
||
This Function comes from the paper | ||
"ImageNet Classification with Deep Convolutional Neural Networks". | ||
|
||
The original formula is: | ||
|
||
Input(i, x, y) | ||
Output(i, x, y) = ---------------------------------------------- | ||
-- upper | ||
(k + alpha * > (Input(j, x, y))^2) ^ (beta) | ||
-- j = lower | ||
|
||
upper is `min(C, c + n/2)` | ||
lower if `max(0, c - n/2)` | ||
|
||
Function implementation: | ||
|
||
inputs and outpus is NCHW format, while input.shape.ndims() is equal 4. | ||
And the meaning of each dimension(0-3) is respectively batch size, | ||
feature maps, rows and columns. | ||
|
||
Input and Output in the above formula is for each map(i) of one image, and | ||
Input(i, x, y), Output(i, x, y) represents an element in an image. | ||
|
||
C is the number of feature maps of one image, and n is a hyper-parameters | ||
is configured when Function is initialized. The sum in the denominator | ||
is the sum of the same position in the neighboring maps. | ||
)DOC"); | ||
} | ||
}; | ||
|
||
class LRNOpGrad : public framework::OperatorWithKernel { | ||
public: | ||
using framework::OperatorWithKernel::OperatorWithKernel; | ||
|
||
protected: | ||
void InferShape(const framework::InferShapeContext &ctx) const override { | ||
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) should not be null"); | ||
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("mid_out")), | ||
"Input(mid_out@GRAD) should not be null"); | ||
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")), | ||
"Input(Out@GRAD) should not be null"); | ||
|
||
auto x_dims = ctx.Input<Tensor>("X")->dims(); | ||
auto *x_g = ctx.Output<framework::Tensor>(framework::GradVarName("X")); | ||
x_g->Resize(x_dims); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Need to update to the latest code. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
} | ||
}; | ||
|
||
} // namespace operators | ||
} // namespace paddle | ||
|
||
namespace ops = paddle::operators; | ||
REGISTER_OP(lrn, ops::LRNOp, ops::LRNOpMaker, lrn_grad, ops::LRNOpGrad); | ||
REGISTER_OP_CPU_KERNEL(lrn, ops::LRNKernel<paddle::platform::CPUPlace, float>); | ||
REGISTER_OP_CPU_KERNEL(lrn_grad, | ||
ops::LRNGradKernel<paddle::platform::CPUPlace, float>); |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. | ||
|
||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
|
||
http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. */ | ||
|
||
#define EIGEN_USE_GPU | ||
#include "paddle/operators/lrn_op.h" | ||
|
||
namespace ops = paddle::operators; | ||
|
||
REGISTER_OP_GPU_KERNEL(lrn, ops::LRNKernel<paddle::platform::GPUPlace, float>); | ||
REGISTER_OP_GPU_KERNEL(lrn_grad, | ||
ops::LRNGradKernel<paddle::platform::GPUPlace, float>); |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,185 @@ | ||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. | ||
|
||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
You may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
|
||
http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. */ | ||
|
||
#pragma once | ||
|
||
#include "paddle/framework/eigen.h" | ||
#include "paddle/framework/op_registry.h" | ||
#include "paddle/operators/math/math_function.h" | ||
|
||
namespace paddle { | ||
namespace operators { | ||
|
||
template <typename Place, typename T> | ||
class LRNKernel : public framework::OpKernel { | ||
public: | ||
using Tensor = framework::Tensor; | ||
|
||
// f(x) = x * ( k + alpha * SUM((x)^2) )^(-beta) | ||
// x represents inputs | ||
// f(x) represents outputs | ||
void Compute(const framework::ExecutionContext& ctx) const override { | ||
// input | ||
const Tensor* x = ctx.Input<Tensor>("X"); | ||
auto x_dims = x->dims(); | ||
|
||
// NCHW | ||
int N = x_dims[0]; | ||
int C = x_dims[1]; | ||
int H = x_dims[2]; | ||
int W = x_dims[3]; | ||
|
||
Tensor* out = ctx.Output<Tensor>("Out"); | ||
out->mutable_data<T>(ctx.GetPlace()); | ||
|
||
// mid_out save the intermediate result for backward | ||
Tensor* mid_out = ctx.Output<Tensor>("mid_out"); | ||
mid_out->mutable_data<T>(ctx.GetPlace()); | ||
|
||
int n = ctx.Attr<int>("n"); | ||
float alpha = ctx.Attr<float>("alpha"); | ||
float beta = ctx.Attr<float>("beta"); | ||
float k = ctx.Attr<float>("k"); | ||
|
||
PADDLE_ENFORCE(n > 0, "n should >= 0"); | ||
PADDLE_ENFORCE(alpha >= 0.0, "alpha should >= 0.0"); | ||
PADDLE_ENFORCE(beta >= 0.0, "beta should >= 0.0"); | ||
PADDLE_ENFORCE(k >= 0.0, "k should >= 0.0"); | ||
|
||
auto x_v = framework::EigenVector<T>::Flatten(*x); | ||
|
||
const int start = -(n - 1) / 2; | ||
const int end = start + n; | ||
|
||
auto e_mid = framework::EigenTensor<T, 4>::From(*mid_out); | ||
e_mid.device(ctx.GetEigenDevice<Place>()) = e_mid.constant(k); | ||
|
||
auto e_x = framework::EigenTensor<T, 4>::From(*x); | ||
for (int m = 0; m < N; m++) { | ||
for (int i = 0; i < C; i++) { | ||
for (int c = start; c <= end; c++) { | ||
int ch = i + c; | ||
if (ch >= 0 && ch < C) { | ||
auto s = e_mid.slice(Eigen::array<int, 4>({{m, i, 0, 0}}), | ||
Eigen::array<int, 4>({{1, 1, H, W}})); | ||
|
||
auto r = e_x.slice(Eigen::array<int, 4>({{m, ch, 0, 0}}), | ||
Eigen::array<int, 4>({{1, 1, H, W}})); | ||
|
||
s.device(ctx.GetEigenDevice<Place>()) += alpha * r.square(); | ||
} | ||
} | ||
} | ||
} | ||
|
||
auto out_e = framework::EigenVector<T>::Flatten(*out); | ||
out_e.device(ctx.GetEigenDevice<Place>()) = | ||
x_v * e_mid.reshape(Eigen::DSizes<int, 1>(e_mid.size())).pow(-beta); | ||
} | ||
}; | ||
|
||
/** | ||
* \brief Backward calculation for normalization with across maps. | ||
* | ||
* Function implementation: | ||
* | ||
* The implementation of this Function is derived from the | ||
* CrossMapNormalFunc implementation. | ||
* | ||
* InputGrad = OutputGrad * denoms ^ (-beta) | ||
* -- upper | ||
* + > (OutputGrad * OutputValue * (-2 * alpha * beta) / mid_out) * InputValue | ||
* -- lower | ||
* | ||
* The data of inputs/outputs format is the same as the forward interface | ||
* and is NCHW. | ||
* | ||
* The upper and lower is the same as forward. The logic of the sum | ||
* is also the same as forward. | ||
*/ | ||
template <typename Place, typename T> | ||
class LRNGradKernel : public framework::OpKernel { | ||
public: | ||
using Tensor = framework::Tensor; | ||
void Compute(const framework::ExecutionContext& ctx) const override { | ||
const Tensor* x = ctx.Input<Tensor>("X"); | ||
const Tensor* out = ctx.Input<Tensor>("Out"); | ||
const Tensor* out_g = ctx.Input<Tensor>(framework::GradVarName("Out")); | ||
const Tensor* mid = ctx.Input<Tensor>("mid_out"); | ||
|
||
auto x_g = ctx.Output<Tensor>(framework::GradVarName("X")); | ||
x_g->mutable_data<T>(ctx.GetPlace()); | ||
|
||
auto x_g_e = framework::EigenVector<T>::Flatten(*x_g); | ||
x_g_e.device(ctx.GetEigenDevice<Place>()) = x_g_e.constant(0.0); | ||
|
||
auto x_dims = x->dims(); | ||
int N = x_dims[0]; | ||
int C = x_dims[1]; | ||
int H = x_dims[2]; | ||
int W = x_dims[3]; | ||
|
||
int n = ctx.Attr<int>("n"); | ||
float alpha = ctx.Attr<float>("alpha"); | ||
float beta = ctx.Attr<float>("beta"); | ||
float ratio = -2 * alpha * beta; | ||
|
||
auto e_x = framework::EigenTensor<T, 4>::From(*x); | ||
auto e_x_g = framework::EigenTensor<T, 4>::From(*x_g); | ||
auto e_out = framework::EigenTensor<T, 4>::From(*out); | ||
auto e_out_g = framework::EigenTensor<T, 4>::From(*out_g); | ||
auto e_mid = framework::EigenTensor<T, 4>::From(*mid); | ||
|
||
const int start = -(n - 1) / 2; | ||
const int end = start + n; | ||
for (int m = 0; m < N; m++) { | ||
for (int i = 0; i < C; i++) { | ||
auto i_x = e_x.slice(Eigen::array<int, 4>({{m, i, 0, 0}}), | ||
Eigen::array<int, 4>({{1, 1, H, W}})); | ||
|
||
auto i_x_g = e_x_g.slice(Eigen::array<int, 4>({{m, i, 0, 0}}), | ||
Eigen::array<int, 4>({{1, 1, H, W}})); | ||
|
||
auto i_out_g = e_out_g.slice(Eigen::array<int, 4>({{m, i, 0, 0}}), | ||
Eigen::array<int, 4>({{1, 1, H, W}})); | ||
|
||
auto i_mid = e_mid.slice(Eigen::array<int, 4>({{m, i, 0, 0}}), | ||
Eigen::array<int, 4>({{1, 1, H, W}})); | ||
|
||
i_x_g.device(ctx.GetEigenDevice<Place>()) = i_mid.pow(-beta) * i_out_g; | ||
for (int c = start; c <= end; c++) { | ||
int ch = i + c; | ||
if (ch < 0 || ch >= C) { | ||
continue; | ||
} | ||
|
||
auto c_out = e_out.slice(Eigen::array<int, 4>({{m, ch, 0, 0}}), | ||
Eigen::array<int, 4>({{1, 1, H, W}})); | ||
|
||
auto c_mid = e_mid.slice(Eigen::array<int, 4>({{m, ch, 0, 0}}), | ||
Eigen::array<int, 4>({{1, 1, H, W}})); | ||
|
||
auto c_out_g = e_out_g.slice(Eigen::array<int, 4>({{m, ch, 0, 0}}), | ||
Eigen::array<int, 4>({{1, 1, H, W}})); | ||
|
||
i_x_g.device(ctx.GetEigenDevice<Place>()) += | ||
ratio * c_out_g * c_out * i_x / c_mid; | ||
} | ||
} | ||
} | ||
} | ||
}; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. GPU的实现, 最好复用kernel: https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/function/CrossMapNormalOpGpu.cu 现在这样的循环对于GPU来说效率低。 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 我增加了一个ISSUE,并且写到了layer port中,新开PR解决这个问题吧! |
||
|
||
} // namespace operators | ||
} // namespace paddle |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Need to update to the latest code.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.