Skip to content

Commit

Permalink
[Pten]Move expand_v2 to pten (#39471)
Browse files Browse the repository at this point in the history
* move expand to pten

* move expand_v2 to pten

* move expand_v2 to pten

* fix grad register

* fix grad register

* fix tensorcpry

* fix tensorcopy

* fix tensorcopy

* fix tensorcopy

* fix tensorcopy

* fix ci

* fix tensorcopy
  • Loading branch information
linjieccc authored Feb 15, 2022
1 parent ab86677 commit 2d16d69
Show file tree
Hide file tree
Showing 14 changed files with 550 additions and 284 deletions.
33 changes: 3 additions & 30 deletions paddle/fluid/operators/expand_v2_op.cc
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ limitations under the License. */
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"

#define MAX_RANK_SUPPORTED 6

namespace paddle {
namespace operators {
Expand Down Expand Up @@ -296,33 +299,3 @@ REGISTER_OPERATOR(expand_v2_grad, ops::ExpandV2GradOp,
ops::ExpandV2DoubleGradOpMaker<paddle::framework::OpDesc>,
ops::ExpandV2DoubleGradOpMaker<paddle::imperative::OpBase>,
ops::ExpandV2GradNoNeedBufVarsInferer);
REGISTER_OP_CPU_KERNEL(
expand_v2, ops::ExpandV2Kernel<paddle::platform::CPUDeviceContext, float>,
ops::ExpandV2Kernel<paddle::platform::CPUDeviceContext, double>,
ops::ExpandV2Kernel<paddle::platform::CPUDeviceContext, int>,
ops::ExpandV2Kernel<paddle::platform::CPUDeviceContext, int64_t>,
ops::ExpandV2Kernel<paddle::platform::CPUDeviceContext, bool>);
REGISTER_OP_CPU_KERNEL(
expand_v2_grad,
ops::ExpandV2GradKernel<paddle::platform::CPUDeviceContext, float>,
ops::ExpandV2GradKernel<paddle::platform::CPUDeviceContext, double>,
ops::ExpandV2GradKernel<paddle::platform::CPUDeviceContext, int>,
ops::ExpandV2GradKernel<paddle::platform::CPUDeviceContext, int64_t>);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
REGISTER_OP_CUDA_KERNEL(
expand_v2, ops::ExpandV2Kernel<paddle::platform::CUDADeviceContext, float>,
ops::ExpandV2Kernel<paddle::platform::CUDADeviceContext, double>,
ops::ExpandV2Kernel<paddle::platform::CUDADeviceContext,
paddle::platform::float16>,
ops::ExpandV2Kernel<paddle::platform::CUDADeviceContext, int>,
ops::ExpandV2Kernel<paddle::platform::CUDADeviceContext, int64_t>,
ops::ExpandV2Kernel<paddle::platform::CUDADeviceContext, bool>);
REGISTER_OP_CUDA_KERNEL(
expand_v2_grad,
ops::ExpandV2GradKernel<paddle::platform::CUDADeviceContext, float>,
ops::ExpandV2GradKernel<paddle::platform::CUDADeviceContext, double>,
ops::ExpandV2GradKernel<paddle::platform::CUDADeviceContext,
paddle::platform::float16>,
ops::ExpandV2GradKernel<paddle::platform::CUDADeviceContext, int>,
ops::ExpandV2GradKernel<paddle::platform::CUDADeviceContext, int64_t>);
#endif
254 changes: 0 additions & 254 deletions paddle/fluid/operators/expand_v2_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,259 +91,5 @@ inline std::vector<int> get_expand_shape(
return ctx.Attr<std::vector<int>>("shape");
}
}

using Tensor = framework::Tensor;
template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
using EigenVector = framework::EigenVector<T, MajorType, IndexType>;
template <typename T, size_t D, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
using EigenTensor = framework::EigenTensor<T, D, MajorType, IndexType>;
using framework::To32BitIndex;

template <typename DeviceContext, typename T>
class ExpandV2Kernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto rank = context.Input<Tensor>("X")->dims().size();
PADDLE_ENFORCE_GE(
rank, 1,
platform::errors::InvalidArgument(
"The rank of the input 'X' for expand_v2 op must be positive, "
"but the value received is %d.",
rank));
PADDLE_ENFORCE_LE(
rank, MAX_RANK_SUPPORTED,
platform::errors::InvalidArgument(
"The rank of the input 'X' for expand_v2 op must be less than "
"or equal to %d, but the value received is %d.",
MAX_RANK_SUPPORTED, rank));
auto expand_shape = get_expand_shape(context);
auto shape_size = expand_shape.size();
PADDLE_ENFORCE_GE(
shape_size, rank,
platform::errors::InvalidArgument(
"The number (%d) of elements of 'shape' for expand_v2 op must be "
"greater than or equal to the rank (%d) of the input 'X'.",
shape_size, rank));
PADDLE_ENFORCE_LE(
shape_size, MAX_RANK_SUPPORTED,
platform::errors::InvalidArgument(
"The number (%d) of elements of 'shape' for expand_v2 op must be "
"less than or equal to %d.",
shape_size, MAX_RANK_SUPPORTED));
rank = std::max(rank, static_cast<int>(shape_size));
switch (rank) {
case 1:
Expand<1>(context);
break;
case 2:
Expand<2>(context);
break;
case 3:
Expand<3>(context);
break;
case 4:
Expand<4>(context);
break;
case 5:
Expand<5>(context);
break;
case 6:
Expand<6>(context);
break;
}
}

protected:
template <int Rank>
void Expand(const framework::ExecutionContext& context) const {
auto* in0 = context.Input<Tensor>("X");

auto in_dims = in0->dims();
auto expand_shape = get_expand_shape(context);
auto vec_in_dims = framework::vectorize<int>(in_dims);
auto diff = expand_shape.size() - vec_in_dims.size();
vec_in_dims.insert(vec_in_dims.begin(), diff, 1);
std::vector<int> repeat_times(vec_in_dims.size());
for (size_t i = 0; i < vec_in_dims.size(); ++i) {
PADDLE_ENFORCE_NE(expand_shape[i], 0,
platform::errors::InvalidArgument(
"The expanded size cannot be zero."));
if (i < diff) {
PADDLE_ENFORCE_GT(
expand_shape[i], 0,
platform::errors::InvalidArgument(
"The expanded size (%d) for non-existing dimensions must be "
"positive for expand_v2 op.",
expand_shape[i]));
repeat_times[i] = expand_shape[i];
} else if (expand_shape[i] > 0) {
if (vec_in_dims[i] != 1) {
PADDLE_ENFORCE_EQ(
vec_in_dims[i], expand_shape[i],
platform::errors::InvalidArgument(
"The value (%d) of the non-singleton dimension does not match"
" the corresponding value (%d) in shape for expand_v2 op.",
vec_in_dims[i], expand_shape[i]));
repeat_times[i] = 1;
} else {
repeat_times[i] = expand_shape[i];
}
} else {
PADDLE_ENFORCE_EQ(
expand_shape[i], -1,
platform::errors::InvalidArgument(
"When the value in shape is negative for expand_v2 op, "
"only -1 is supported, but the value received is %d.",
expand_shape[i]));
repeat_times[i] = 1;
}
}

auto* out0 = context.Output<Tensor>("Out");
Eigen::DSizes<Eigen::DenseIndex, Rank> bcast_dims;
for (size_t i = 0; i < repeat_times.size(); ++i) {
bcast_dims[i] = repeat_times[i];
}

framework::DDim new_in_dims = framework::make_ddim(vec_in_dims);
framework::DDim out_dims(new_in_dims);
for (size_t i = 0; i < repeat_times.size(); ++i) {
out_dims[i] *= repeat_times[i];
}

out0->Resize(out_dims);
auto x = EigenTensor<T, Rank>::From(*in0, new_in_dims);
out0->mutable_data<T>(context.GetPlace());
auto y = EigenTensor<T, Rank>::From(*out0, out_dims);
auto& place =
*context.template device_context<DeviceContext>().eigen_device();
// use 32-bit index to speed up
bool use_32bit_index = y.size() < Eigen::NumTraits<int>::highest();
if (use_32bit_index) {
EigenBroadcast<std::decay_t<decltype(place)>, T, Rank>::Eval(
place, To32BitIndex(y), To32BitIndex(x), bcast_dims);
} else {
EigenBroadcast<std::decay_t<decltype(place)>, T, Rank>::Eval(place, y, x,
bcast_dims);
}
}
};

template <typename DeviceContext, typename T>
class ExpandV2GradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto* in0 = context.Input<Tensor>("X");
auto expand_shape = get_expand_shape(context);
auto x_dims = in0->dims();
auto vec_in_dims = framework::vectorize<int>(x_dims);
auto diff = expand_shape.size() - vec_in_dims.size();
vec_in_dims.insert(vec_in_dims.begin(), diff, 1);
// 1. reshape_dims_vec is the broadcast parameter.
// 2. reduce_dims_vec is the dimension parameter to compute gradients. For
// each dimension expanded, the gradients should be summed to original
// size.
std::vector<int> repeat_times(vec_in_dims.size());
for (size_t i = 0; i < vec_in_dims.size(); ++i) {
if (expand_shape[i] < 0) {
repeat_times[i] = 1;
} else {
repeat_times[i] = expand_shape[i] / vec_in_dims[i];
}
}
std::vector<int> reshape_dims_vec;
std::vector<int> reduce_dims_vec;
for (size_t i = 0; i < repeat_times.size(); ++i) {
reduce_dims_vec.push_back(reshape_dims_vec.size());
reshape_dims_vec.push_back(repeat_times[i]);
reshape_dims_vec.push_back(vec_in_dims[i]);
}

int dims = reduce_dims_vec.size();

bool just_copy = true;
for (size_t i = 0; i < repeat_times.size(); i++) {
if (repeat_times[i] != 1) {
just_copy = false;
break;
}
}
// no need reduce, just copy
if (just_copy) {
auto* in0 = context.Input<Tensor>(framework::GradVarName("Out"));
auto* out0 = context.Output<Tensor>(framework::GradVarName("X"));
out0->mutable_data<T>(context.GetPlace());
framework::TensorCopy(*in0, context.GetPlace(), context.device_context(),
out0);
} else {
PADDLE_ENFORCE_GE(dims, 1,
platform::errors::InvalidArgument(
"The rank of the input 'Out@GRAD' for "
"expand_v2_grad op must be greater than or "
"equal to 1, but the value received is %d.",
dims));
PADDLE_ENFORCE_LE(dims, MAX_RANK_SUPPORTED,
platform::errors::InvalidArgument(
"The rank of the input 'Out@GRAD' for "
"expand_v2_grad op must be less than or equal "
"to %d, but the value received is %d.",
MAX_RANK_SUPPORTED, dims));
switch (dims) {
case 1:
ExpandBackward<1>(context, reshape_dims_vec, reduce_dims_vec);
break;
case 2:
ExpandBackward<2>(context, reshape_dims_vec, reduce_dims_vec);
break;
case 3:
ExpandBackward<3>(context, reshape_dims_vec, reduce_dims_vec);
break;
case 4:
ExpandBackward<4>(context, reshape_dims_vec, reduce_dims_vec);
break;
case 5:
ExpandBackward<5>(context, reshape_dims_vec, reduce_dims_vec);
break;
case 6:
ExpandBackward<6>(context, reshape_dims_vec, reduce_dims_vec);
break;
default:
PADDLE_THROW(platform::errors::InvalidArgument(
"Only support tensor with rank being between 1 and 6. But "
"received tensor's rank = %d.",
dims));
}
}
}

protected:
template <int Dims>
void ExpandBackward(const framework::ExecutionContext& context,
const std::vector<int>& reshape_dims_vec,
const std::vector<int>& reduce_dims_vec) const {
size_t reshape_size = reshape_dims_vec.size();
size_t reduce_size = reduce_dims_vec.size();
auto* in0 = context.Input<Tensor>(framework::GradVarName("Out"));
auto* out0 = context.Output<Tensor>(framework::GradVarName("X"));
out0->mutable_data<T>(context.GetPlace());
auto x_grad = EigenVector<T>::Flatten(*out0);
Eigen::DSizes<Eigen::DenseIndex, Dims * 2> reshape_dims;
for (size_t i = 0; i < reshape_size; ++i) {
reshape_dims[i] = reshape_dims_vec[i];
}
Eigen::DSizes<Eigen::DenseIndex, Dims> reduce_dims;
for (size_t i = 0; i < reduce_size; ++i) {
reduce_dims[i] = reduce_dims_vec[i];
}
auto out_grad = EigenVector<T>::Flatten(*in0);
auto& place =
*context.template device_context<DeviceContext>().eigen_device();
EigenBroadcastGrad<std::decay_t<decltype(place)>, T, Dims>::Eval(
place, x_grad, out_grad, reduce_dims, reshape_dims);
}
};

} // namespace operators
} // namespace paddle
1 change: 1 addition & 0 deletions paddle/fluid/operators/expand_v2_op_npu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/operators/expand_v2_op.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"

namespace paddle {
Expand Down
1 change: 1 addition & 0 deletions paddle/fluid/operators/expand_v2_op_xpu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ limitations under the License. */
#ifdef PADDLE_WITH_XPU

#include "paddle/fluid/operators/expand_v2_op.h"
#include "paddle/fluid/framework/op_registry.h"

namespace paddle {
namespace operators {
Expand Down
2 changes: 2 additions & 0 deletions paddle/pten/core/compat/op_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ const std::unordered_set<std::string> deprecated_op_names({"flatten",
"mean",
"reshape",
"reshape_grad",
"expand",
"expand_grad",
"sum"});

class DefaultKernelSignatureMap {
Expand Down
29 changes: 29 additions & 0 deletions paddle/pten/kernels/cpu/expand_grad_kernel.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/pten/kernels/expand_grad_kernel.h"
#include "paddle/pten/backends/cpu/cpu_context.h"
#include "paddle/pten/common/scalar.h"
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/kernels/impl/expand_grad_kernel_impl.h"

PT_REGISTER_KERNEL(expand_grad,
CPU,
ALL_LAYOUT,
pten::ExpandGradKernel,
float,
double,
int,
int64_t) {}
30 changes: 30 additions & 0 deletions paddle/pten/kernels/cpu/expand_kernel.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/pten/kernels/expand_kernel.h"
#include "paddle/pten/backends/cpu/cpu_context.h"
#include "paddle/pten/common/scalar.h"
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/kernels/impl/expand_kernel_impl.h"

PT_REGISTER_KERNEL(expand,
CPU,
ALL_LAYOUT,
pten::ExpandKernel,
float,
double,
int,
int64_t,
bool) {}
Loading

0 comments on commit 2d16d69

Please sign in to comment.