Skip to content

Commit

Permalink
Merge pull request #1783 from TeslaZhao/v0.9.0
Browse files Browse the repository at this point in the history
[cherry-pick] some Updates
  • Loading branch information
TeslaZhao committed May 18, 2022
2 parents b0bd791 + 4d035eb commit f93957f
Show file tree
Hide file tree
Showing 44 changed files with 3,824 additions and 191 deletions.
17 changes: 15 additions & 2 deletions cmake/paddlepaddle.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -171,14 +171,27 @@ LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/mklml/lib)
SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib")
LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib)

#SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${PADDLE_INSTALL_DIR}/third_party/install/paddle2onnx/lib")
#LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/paddle2onnx/lib)

#SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${PADDLE_INSTALL_DIR}/third_party/install/onnxruntime/lib")
#LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/onnxruntime/lib)

if (NOT WITH_MKLML)
ADD_LIBRARY(openblas STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET openblas PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/openblas/lib/libopenblas.a)
endif()

#ADD_LIBRARY(paddle2onnx STATIC IMPORTED GLOBAL)
#SET_PROPERTY(TARGET paddle2onnx PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/paddle2onnx/lib/libpaddle2onnx.so)

#ADD_LIBRARY(onnxruntime STATIC IMPORTED GLOBAL)
#SET_PROPERTY(TARGET onnxruntime PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/onnxruntime/lib/libonnxruntime.so.1.10.0)

ADD_LIBRARY(paddle_inference STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET paddle_inference PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_inference.so)
if (WITH_ASCEND_CL)

SET_PROPERTY(TARGET paddle_inference PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_inference.a)
if (WITH_ASCEND_CL OR WITH_XPU)
SET_PROPERTY(TARGET paddle_inference PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_inference.so)
endif()

Expand Down
7 changes: 4 additions & 3 deletions core/configure/proto/general_model_service.proto
Original file line number Diff line number Diff line change
Expand Up @@ -90,11 +90,12 @@ message Request {
message Response {
repeated ModelOutput outputs = 1;
repeated int64 profile_time = 2;
bool profile_server = 3;
uint64 log_id = 4;
// Error code
int32 err_no = 3;

int32 err_no = 5;
// Error messages
string err_msg = 4;
string err_msg = 6;
};

message ModelOutput {
Expand Down
14 changes: 14 additions & 0 deletions core/configure/proto/server_configure.proto
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,20 @@ message EngineDesc {
optional bool gpu_multi_stream = 20;
optional bool use_ascend_cl = 21;

/*
* "gpu_memory_mb": allocate gpu memory by config.EnableUseGpu()
* "cpu_math_thread_num": set thread numbers of cpu math by
* config.SetCpuMathLibraryNumThreads()
* "trt_workspace_size": set TensorRT workspace size by
* config.EnableTensorRtEngine(), 1 << 25 default
* "trt_use_static": If true, save the optimization information of the TRT
* serialized to the disk, and load from the disk.
*/
optional int32 gpu_memory_mb = 22 [ default = 100 ];
optional int32 cpu_math_thread_num = 23 [ default = 1 ];
optional int32 trt_workspace_size = 24 [ default = 33554432 ];
optional bool trt_use_static = 25 [ default = false ];

/*
* "runtime_thread_num": n == 0 means don`t use Asynchronous task scheduling
* mode.
Expand Down
126 changes: 126 additions & 0 deletions core/general-server/op/general_remote_op.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "core/general-server/op/general_remote_op.h"
#include <iostream>
#include <sstream>
#include "core/util/include/timer.h"

// paddle inference 2.1 support: FLOAT32, INT64, INT32, UINT8, INT8
// will support: FLOAT16
#define BRPC_MAX_BODY_SIZE 2 * 1024 * 1024 * 1024
const std::string LODABALANCE = "";

namespace baidu {
namespace paddle_serving {
namespace serving {

using baidu::paddle_serving::Timer;
using baidu::paddle_serving::predictor::general_model::Tensor;
using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::general_model::Response;

brpc::Channel BRPCStub::brpc_channels[MAX_MP_NUM];

brpc::ChannelOptions BRPCStub::options;
std::atomic<int> BRPCStub::inited(0);

int GeneralRemoteOp::inference() {
LOG(INFO) << "Enter GeneralRemoteOp:inference()";
int expected = 0;
std::vector<std::string> op_address = address();
if (BRPCStub::inited.compare_exchange_strong(expected, 1)) {
BRPCStub::options.protocol = "baidu_std";
BRPCStub::options.connection_type = "short";
BRPCStub::options.timeout_ms = 80000 /*milliseconds*/;
BRPCStub::options.max_retry = 100;
brpc::fLU64::FLAGS_max_body_size = BRPC_MAX_BODY_SIZE;

LOG(ERROR) << "address size: " << op_address.size();
for (int i = 0; i < op_address.size(); ++i) {
LOG(INFO) << i + 1 << " address is " << op_address[i].c_str();
BRPCStub::brpc_channels[i].Init(
op_address[i].c_str(), LODABALANCE.c_str(), &BRPCStub::options);
}

BRPCStub::inited++;
}
while (BRPCStub::inited < 2) {
}

Timer timeline;
int64_t start = timeline.TimeStampUS();
timeline.Start();
VLOG(2) << "Going to run Remote inference";

Request* req = (Request*)(get_request_message());
Response* res = mutable_data<Response>();
uint64_t log_id = req->log_id();

brpc::Controller brpc_controllers[MAX_MP_NUM];
brpc::CallId brpc_callids[MAX_MP_NUM];
Response brpc_response_tmp;

size_t i = 0;
// Init BRPC controllers, callids and stubs
for (i = 0; i < op_address.size(); ++i) {
brpc_controllers[i].set_log_id(log_id);
brpc_callids[i] = brpc_controllers[i].call_id();
}
for (i = 0; i < op_address.size(); ++i) {
baidu::paddle_serving::predictor::general_model::GeneralModelService_Stub
stub(&BRPCStub::brpc_channels[i]);
LOG(INFO) << "Sended 1 request to Slave Sever " << i;
if (0 == i) {
stub.inference(&brpc_controllers[i], req, res, brpc::DoNothing());
continue;
}
stub.inference(
&brpc_controllers[i], req, &brpc_response_tmp, brpc::DoNothing());
}

LOG(INFO) << "All request are sended, waiting for all responses.";

// Wait RPC done.
for (i = 0; i < op_address.size(); ++i) {
brpc::Join(brpc_callids[i]);
}

// Print RPC Results
for (i = 0; i < op_address.size(); ++i) {
LOG(INFO) << "brpc_controller_" << i
<< " status:" << brpc_controllers[i].Failed();
if (!brpc_controllers[i].Failed()) {
LOG(INFO) << "Received response from "
<< brpc_controllers[i].remote_side()
<< " Latency=" << brpc_controllers[i].latency_us() << "us";
} else {
LOG(ERROR) << brpc_controllers[i].ErrorText();
}
}
LOG(INFO) << "All brpc remote stubs joined done.";

res->set_log_id(log_id);
res->set_profile_server(req->profile_server());
int64_t end = timeline.TimeStampUS();
res->add_profile_time(start);
res->add_profile_time(end);

return 0;
}

DEFINE_OP(GeneralRemoteOp);
} // namespace serving
} // namespace paddle_serving
} // namespace baidu
58 changes: 58 additions & 0 deletions core/general-server/op/general_remote_op.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once
#include <brpc/channel.h>
#include <butil/logging.h>
#include <butil/time.h>
#include <gflags/gflags.h>
#include <atomic>
#include <memory>
#include <string>
#include <vector>

#include "core/general-server/general_model_service.pb.h"

#include "core/sdk-cpp/builtin_format.pb.h"
#include "core/sdk-cpp/general_model_service.pb.h"
#include "core/sdk-cpp/include/common.h"
#include "core/sdk-cpp/include/predictor_sdk.h"

#define MAX_MP_NUM 16

namespace baidu {
namespace paddle_serving {
namespace serving {

using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::general_model::Response;

class GeneralRemoteOp
: public baidu::paddle_serving::predictor::OpWithChannel<
baidu::paddle_serving::predictor::general_model::Response> {
public:
DECLARE_OP(GeneralRemoteOp);
int inference();
};

class BRPCStub {
public:
static brpc::Channel brpc_channels[MAX_MP_NUM];
static brpc::ChannelOptions options;
static std::atomic<int> inited;
};

} // namespace serving
} // namespace paddle_serving
} // namespace baidu
2 changes: 1 addition & 1 deletion core/general-server/proto/general_model_service.proto
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,9 @@ message Response {
repeated int64 profile_time = 2;
bool profile_server = 3;
uint64 log_id = 4;

// Error code
int32 err_no = 5;

// Error messages
string err_msg = 6;
};
Expand Down
2 changes: 1 addition & 1 deletion core/predictor/common/constant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ namespace predictor {

DEFINE_bool(use_parallel_infer_service, false, "");
DEFINE_int32(el_log_level, 16, "");
DEFINE_int32(idle_timeout_s, 16, "");
DEFINE_int32(idle_timeout_s, 80, "");
DEFINE_int32(port, 8010, "");
DEFINE_string(workflow_path, "./conf", "");
DEFINE_string(workflow_file, "workflow.prototxt", "");
Expand Down
2 changes: 1 addition & 1 deletion core/predictor/framework/bsf-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,7 @@ bool TaskExecutor<TaskT>::move_task_to_batch(
LOG(INFO) << "Hit auto padding, merge " << padding_task_count
<< " tasks into 1 batch.";
}
LOG(INFO) << "Number of tasks remaining in _task_queue is"
LOG(INFO) << "Number of tasks remaining in _task_queue is "
<< _task_queue.size();
return true;
}
Expand Down
2 changes: 1 addition & 1 deletion core/sdk-cpp/proto/general_model_service.proto
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,9 @@ message Response {
repeated int64 profile_time = 2;
bool profile_server = 3;
uint64 log_id = 4;

// Error code
int32 err_no = 5;

// Error messages
string err_msg = 6;
};
Expand Down
1 change: 1 addition & 0 deletions core/sdk-cpp/proto/load_general_model_service.proto
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ option cc_generic_services = true;
message RequestAndResponse {
required int32 a = 1;
required float b = 2;
required uint64 log_id = 3 [ default = 0 ];
};

service LoadGeneralModelService {
Expand Down
1 change: 1 addition & 0 deletions doc/Offical_Docs/10-0_Terminology.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# 名词术语解释
Loading

0 comments on commit f93957f

Please sign in to comment.