Skip to content

Commit

Permalink
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
Browse files Browse the repository at this point in the history
… cinn_instruction_run_op_test
  • Loading branch information
CtfGo committed Feb 16, 2022
2 parents 40e7577 + 7d53a28 commit 1c5aecd
Show file tree
Hide file tree
Showing 27 changed files with 1,023 additions and 648 deletions.
131 changes: 29 additions & 102 deletions paddle/fluid/distributed/fleet_executor/dist_model.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ bool LoadDataFromDistModelTensor(const DistModelTensor &input_data,
} else if (input_data.dtype == DistModelDataType::INT32) {
input_tensor_ptr = input_tensor->mutable_data<int32_t>(dims, place);
} else {
// Q(fleet exe dev): for input/output, should we support fp16
LOG(ERROR) << "unsupported feed type " << input_data.dtype;
return false;
}
Expand Down Expand Up @@ -113,14 +112,6 @@ std::string DistModelDTypeToString(DistModelDataType dtype) {
return "NOT SUPPORT DTYPE";
}

bool IsPPFirstStage(const DistModelConfig &config) {
return config.local_rank - config.mp_degree < 0;
}

bool IsPPLastStage(const DistModelConfig &config) {
return config.local_rank + config.mp_degree >= config.nranks;
}

class DistModelTimer {
public:
void tic() { tic_time = std::chrono::high_resolution_clock::now(); }
Expand Down Expand Up @@ -197,65 +188,34 @@ bool DistModel::PreparePlace() {
}

bool DistModel::CommInit() {
// NOTE (Yuang Liu): The peer endpoints will be obtained with the assumption
// that mp part is always on inner side and pp part is always on outer side.
// TODO(fleet exe dev): The peer endpoints could be configured by users.
PADDLE_ENFORCE_EQ(
config_.pp_degree * config_.mp_degree, config_.nranks,
platform::errors::InvalidArgument(
"The mp_degree multiplies pp_degree is not equal with nranks"));
std::unique_ptr<framework::ProgramDesc> comm_init_program(
new framework::ProgramDesc());
framework::BlockDesc *comm_init_block = comm_init_program->MutableBlock(0);
if (config_.mp_degree > 1) {
PADDLE_ENFORCE_GE(
config_.mp_ring_id, 0,
platform::errors::InvalidArgument(
"mp ring id must be provided for inference under mp."));
VLOG(3) << "Init comm group for mp.";
std::vector<int64_t> &ring_ids =
config_.rank_to_ring_ids_[config_.local_rank];
int64_t order = 0;
std::string var_name_base = "comm_init_";
for (int64_t ring_id : ring_ids) {
VLOG(3) << "Init comm for ring id: " << ring_id;
int64_t ranks_in_group = config_.ring_id_to_ranks_[ring_id].size();
int64_t rank_in_group = 0;
std::vector<int64_t> &ranks = config_.ring_id_to_ranks_[ring_id];
for (int64_t rank : ranks) {
if (config_.local_rank == rank) {
break;
}
rank_in_group += 1;
}
std::vector<std::string> peer_endpoints;
for (int64_t
idx = (config_.local_rank / config_.mp_degree) * config_.mp_degree,
i = 0;
i < config_.mp_degree; ++idx, ++i) {
if (config_.trainer_endpoints[idx] == config_.current_endpoint) {
for (int64_t rank : ranks) {
if (config_.local_rank == rank) {
continue;
}
peer_endpoints.emplace_back(config_.trainer_endpoints[idx]);
}
// get nranks in a mp group and inner group rank for local rank
int64_t mp_group_nranks = config_.nranks / config_.pp_degree;
int64_t mp_group_rank = config_.local_rank % config_.mp_degree;
InsertCommOp("mp_comm_id", mp_group_nranks, mp_group_rank, peer_endpoints,
comm_init_block, config_.mp_ring_id);
}
if (config_.pp_degree > 1) {
VLOG(3) << "Init comm group for pp.";
if (!IsPPFirstStage(config_)) {
PADDLE_ENFORCE_EQ(config_.pp_upstream_ring_id >= 0, true,
platform::errors::InvalidArgument(
"pp upstream ring id must be provided for "
"non-first pp stage if inference under pp."));
// not the first pp stage, has upstream
std::vector<std::string> upstream_peer_endpoints;
upstream_peer_endpoints.emplace_back(
config_.trainer_endpoints[config_.local_rank - config_.mp_degree]);
InsertCommOp("pp_upstream_comm_id", 2, 1, upstream_peer_endpoints,
comm_init_block, config_.pp_upstream_ring_id);
}

if (!IsPPLastStage(config_)) {
PADDLE_ENFORCE_EQ(config_.pp_downstream_ring_id >= 0, true,
platform::errors::InvalidArgument(
"pp downstream ring id must be provided for "
"non-last pp stage if inference under pp."));
// not the last pp stage, has downstream
std::vector<std::string> downstream_peer_endpoints;
downstream_peer_endpoints.emplace_back(
config_.trainer_endpoints[config_.local_rank + config_.mp_degree]);
InsertCommOp("pp_downstream_comm_id", 2, 0, downstream_peer_endpoints,
comm_init_block, config_.pp_downstream_ring_id);
peer_endpoints.emplace_back(config_.trainer_endpoints[rank]);
}
InsertCommOp(var_name_base + std::to_string(order), ranks_in_group,
rank_in_group, peer_endpoints, comm_init_block, ring_id);
order += 1;
}
framework::NaiveExecutor e(place_);
e.CreateVariables(*comm_init_program, 0, true, scope_.get());
Expand Down Expand Up @@ -409,12 +369,7 @@ bool DistModel::LoadParameters() {

bool DistModel::PrepareFleetExe() {
task_node_.reset(new TaskNode(program_.get(), config_.local_rank));
if (config_.local_rank - config_.mp_degree >= 0) {
task_node_->AddUpstreamTask(config_.local_rank - config_.mp_degree);
}
if (config_.local_rank + config_.mp_degree < config_.nranks) {
task_node_->AddDownstreamTask(config_.local_rank + config_.mp_degree);
}
// With auto cut, there is no concept of pp, no need to add dependency.
task_node_->SetType("Compute");
task_node_->Init();
executor_desc_ = FleetExecutorDesc();
Expand Down Expand Up @@ -473,40 +428,13 @@ bool DistModel::PrepareFeedAndFetch() {
}
}

if (config_.pp_degree == 1) {
if (feeds_.size() == 0) {
LOG(ERROR) << "No feed ops in the inf program, please check the program.";
return false;
}
if (fetches_.size() == 0) {
LOG(ERROR) << "No fetch op in the inf program, please check the program.";
return false;
}
} else {
if (IsPPFirstStage(config_)) {
if (feeds_.size() == 0) {
LOG(ERROR) << "Feed ops are needed for the first pp stage.";
return false;
}
} else {
if (feeds_.size() > 0) {
LOG(WARNING) << "Feed op is found in the non-first stage of pp.";
} else {
LOG(INFO) << "No feed ops in non-first pp stage.";
}
}
if (IsPPLastStage(config_)) {
if (fetches_.size() == 0) {
LOG(WARNING) << "No fetch op was found in the last pp stage. Make sure "
"the result has been sent to frist pp stage.";
}
} else {
if (fetches_.size() > 0) {
LOG(WARNING) << "Fetch op is found in the non-last stage of pp.";
} else {
LOG(INFO) << "No fetch op in non-last pp stage.";
}
}
if (feeds_.size() == 0) {
LOG(ERROR) << "No feed ops in the inf program, please check the program.";
return false;
}
if (fetches_.size() == 0) {
LOG(ERROR) << "No fetch op in the inf program, please check the program.";
return false;
}
return true;
}
Expand Down Expand Up @@ -606,7 +534,6 @@ bool DistModel::FetchResult(const framework::LoDTensor &fetch,

bool DistModel::Run(const std::vector<DistModelTensor> &input_data,
std::vector<DistModelTensor> *output_data) {
// TODO(fleet exe dev): support pipeline inf mode
VLOG(3) << "DistModel run for once.";

DistModelTimer timer;
Expand Down
8 changes: 3 additions & 5 deletions paddle/fluid/distributed/fleet_executor/dist_model.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
// limitations under the License.

#pragma once
#include <map>
#include <memory>
#include <string>
#include <vector>
Expand Down Expand Up @@ -47,12 +48,9 @@ struct DistModelConfig {
std::string current_endpoint{};
int64_t nranks{1};
int64_t local_rank{0};
int64_t mp_degree{1};
int64_t pp_degree{1};
int64_t mp_ring_id{-1};
int64_t pp_upstream_ring_id{-1};
int64_t pp_downstream_ring_id{-1};
bool enable_timer{false};
std::map<int64_t, std::vector<int64_t>> ring_id_to_ranks_{};
std::map<int64_t, std::vector<int64_t>> rank_to_ring_ids_{};
};

class DistModel {
Expand Down
17 changes: 7 additions & 10 deletions paddle/fluid/operators/detection/prior_box_op_xpu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ limitations under the License. */
#ifdef PADDLE_WITH_XPU

#include "paddle/fluid/operators/detection/prior_box_op.h"
#include "paddle/fluid/platform/device/device_wrapper.h"

namespace paddle {
namespace operators {
Expand Down Expand Up @@ -81,21 +82,17 @@ class PriorBoxOpXPUKernel : public framework::OpKernel<T> {
dev_ctx.x_context(), boxes_data, aspect_ratios_param, min_sizes_param,
max_sizes_param, feature_height, feature_width, img_height, img_width,
offset, step_height, step_width, clip, min_max_aspect_ratios_order);
PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS,
platform::errors::External(
"XPU gen_prior_box kernel return wrong value[%d %s]",
ret, XPUAPIErrorMsg[ret]));
PADDLE_ENFORCE_XDNN_SUCCESS(ret, "gen_prior_box");

int box_num = feature_height * feature_width * num_priors;
int vlen = variances.size();
std::vector<K> var_cpu(vlen * box_num);
for (int i = 0; i < box_num; ++i) {
ret = xpu_memcpy(vars_data + i * vlen, variances.data(), vlen * sizeof(K),
XPUMemcpyKind::XPU_HOST_TO_DEVICE);
PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS, platform::errors::External(
"XPU xpu_memcpy return wrong "
"value[%d %s] in prior_box.",
ret, XPUAPIErrorMsg[ret]));
std::copy(variances.begin(), variances.end(), var_cpu.begin() + i * vlen);
}
ret = xpu_memcpy(vars_data, var_cpu.data(), var_cpu.size() * sizeof(K),
XPUMemcpyKind::XPU_HOST_TO_DEVICE);
PADDLE_ENFORCE_XPU_SUCCESS(ret);
}
};

Expand Down
14 changes: 1 addition & 13 deletions paddle/fluid/operators/lerp_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/fluid/operators/lerp_op.h"
#include "paddle/fluid/framework/op_registry.h"

namespace paddle {
namespace operators {
Expand Down Expand Up @@ -132,15 +132,3 @@ REGISTER_OPERATOR(
paddle::operators::LerpInplaceInferer);

REGISTER_OPERATOR(lerp_grad, paddle::operators::LerpGradOp);

REGISTER_OP_CPU_KERNEL(
lerp,
paddle::operators::LerpKernel<paddle::platform::CPUDeviceContext, float>,
paddle::operators::LerpKernel<paddle::platform::CPUDeviceContext, double>);

REGISTER_OP_CPU_KERNEL(
lerp_grad,
paddle::operators::LerpGradKernel<paddle::platform::CPUDeviceContext,
float>,
paddle::operators::LerpGradKernel<paddle::platform::CPUDeviceContext,
double>);
Loading

1 comment on commit 1c5aecd

@paddle-bot-old
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Congratulation! Your pull request passed all required CI. You could ask reviewer(s) to approve and merge. 🎉

Please sign in to comment.