Skip to content

Commit

Permalink
Merge pull request PaddlePaddle#56 from tiancaitzp/paddlebox
Browse files Browse the repository at this point in the history
optimize fused_seqpool_cvm ops host time 2.
  • Loading branch information
tiancaitzp authored Feb 29, 2024
2 parents 555d9c8 + 5bbc38e commit e8b4b99
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 9 deletions.
5 changes: 2 additions & 3 deletions paddle/fluid/operators/fused/fused_seqpool_cvm_op_xpu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ class FusedSeqpoolCVMOpXPUKernel : public framework::OpKernel<T> {
} else {
cpu_y_addr_vec[i] = reinterpret_cast<T*>(out[i]->mutable_data<T>(place));
}
auto x_lod = ins[i]->lod()[0];
auto& x_lod = ins[i]->lod()[0];
#ifdef PADDLE_WITH_MKLML
#pragma omp parallel for
#endif
Expand Down Expand Up @@ -225,10 +225,9 @@ class FusedSeqpoolCVMGradOpXPUKernel : public framework::OpKernel<T> {
T* dx_data = dx->mutable_data<T>(place);
// T* dx_data = dx->mutable_data<T>(place);
T* dy_data = const_cast<T*>(dy->data<T>());
auto lod = dx->lod();
cpu_dx_list[k] = dx_data;
cpu_dy_list[k] = (const T*)dy_data;
auto lod_level_0 = dx->lod()[0];
auto& lod_level_0 = dx->lod()[0];
int lod_size = lod_level_0.size();
for (int i = 0; i < lod_size; i++) {
cpu_lodx[i + start_index] = lod_level_0[i];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ class FusedSeqpoolCVMWithConvOpXPUKernel : public framework::OpKernel<T> {
} else {
cpu_y_addr_vec[i] = reinterpret_cast<T*>(out[i]->mutable_data<T>(place));
}
auto x_lod = ins[i]->lod()[0];
auto& x_lod = ins[i]->lod()[0];
#ifdef PADDLE_WITH_MKLML
#pragma omp parallel for
#endif
Expand Down Expand Up @@ -193,10 +193,9 @@ class FusedSeqpoolCVMWithConvGradOpXPUKernel : public framework::OpKernel<T> {
}
T* dx_data = dx->mutable_data<T>(place);
T* dy_data = const_cast<T*>(dy->data<T>());
auto lod = dx->lod();
cpu_dx_list[k] = dx_data;
cpu_dy_list[k] = (const T*)dy_data;
auto lod_level_0 = dx->lod()[0];
auto& lod_level_0 = dx->lod()[0];
int lod_size = lod_level_0.size();
for (int i = 0; i < lod_size; i++) {
cpu_lodx[i + start_index] = lod_level_0[i];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ class FusedSeqpoolCVMWithDiffThresOpXPUKernel : public framework::OpKernel<T> {
} else {
cpu_y_addr_vec[i] = reinterpret_cast<T*>(out[i]->mutable_data<T>(place));
}
auto x_lod = ins[i]->lod()[0];
auto& x_lod = ins[i]->lod()[0];
#ifdef PADDLE_WITH_MKLML
#pragma omp parallel for
#endif
Expand Down Expand Up @@ -213,10 +213,9 @@ class FusedSeqpoolCVMWithDiffThresGradOpXPUKernel : public framework::OpKernel<T
T* dx_data = dx->mutable_data<T>(place);
// T* dx_data = dx->mutable_data<T>(place);
T* dy_data = const_cast<T*>(dy->data<T>());
auto lod = dx->lod();
cpu_dx_list[k] = dx_data;
cpu_dy_list[k] = (const T*)dy_data;
auto lod_level_0 = dx->lod()[0];
auto& lod_level_0 = dx->lod()[0];
int lod_size = lod_level_0.size();
for (int i = 0; i < lod_size; i++) {
cpu_lodx[i + start_index] = lod_level_0[i];
Expand Down

0 comments on commit e8b4b99

Please sign in to comment.