Merge pull request #1783 from TeslaZhao/v0.9.0

[cherry-pick] some Updates
PaddlePaddle · May 18, 2022 · f93957f · f93957f
2 parents b0bd791 + 4d035eb
commit f93957f
Show file tree

Hide file tree

Showing 44 changed files with 3,824 additions and 191 deletions.
diff --git a/cmake/paddlepaddle.cmake b/cmake/paddlepaddle.cmake
@@ -171,14 +171,27 @@ LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/mklml/lib)
 SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib")
 LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib)
 
+#SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${PADDLE_INSTALL_DIR}/third_party/install/paddle2onnx/lib")
+#LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/paddle2onnx/lib)
+
+#SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${PADDLE_INSTALL_DIR}/third_party/install/onnxruntime/lib")
+#LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/onnxruntime/lib)
+
 if (NOT WITH_MKLML)
     ADD_LIBRARY(openblas STATIC IMPORTED GLOBAL)
     SET_PROPERTY(TARGET openblas PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/openblas/lib/libopenblas.a)
 endif()
 
+#ADD_LIBRARY(paddle2onnx STATIC IMPORTED GLOBAL)
+#SET_PROPERTY(TARGET paddle2onnx PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/paddle2onnx/lib/libpaddle2onnx.so)
+
+#ADD_LIBRARY(onnxruntime STATIC IMPORTED GLOBAL)
+#SET_PROPERTY(TARGET onnxruntime PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/onnxruntime/lib/libonnxruntime.so.1.10.0)
+
 ADD_LIBRARY(paddle_inference STATIC IMPORTED GLOBAL)
-SET_PROPERTY(TARGET paddle_inference PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_inference.so)
-if (WITH_ASCEND_CL)
+
+SET_PROPERTY(TARGET paddle_inference PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_inference.a)
+if (WITH_ASCEND_CL OR WITH_XPU)
     SET_PROPERTY(TARGET paddle_inference PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_inference.so)
 endif()
 

diff --git a/core/configure/proto/general_model_service.proto b/core/configure/proto/general_model_service.proto
@@ -90,11 +90,12 @@ message Request {
 message Response {
   repeated ModelOutput outputs = 1;
   repeated int64 profile_time = 2;
+  bool profile_server = 3;
+  uint64 log_id = 4;
   // Error code
-  int32 err_no = 3;
-
+  int32 err_no = 5;
   // Error messages
-  string err_msg = 4;
+  string err_msg = 6;
 };
 
 message ModelOutput {

diff --git a/core/configure/proto/server_configure.proto b/core/configure/proto/server_configure.proto
@@ -49,6 +49,20 @@ message EngineDesc {
   optional bool gpu_multi_stream = 20;
   optional bool use_ascend_cl = 21;
 
+  /*
+   * "gpu_memory_mb": allocate gpu memory by config.EnableUseGpu()
+   * "cpu_math_thread_num": set thread numbers of cpu math by
+   * config.SetCpuMathLibraryNumThreads()
+   * "trt_workspace_size": set TensorRT workspace size by
+   * config.EnableTensorRtEngine(), 1 << 25 default
+   * "trt_use_static": If true, save the optimization information of the TRT
+   * serialized to the disk, and load from the disk.
+   */
+  optional int32 gpu_memory_mb = 22 [ default = 100 ];
+  optional int32 cpu_math_thread_num = 23 [ default = 1 ];
+  optional int32 trt_workspace_size = 24 [ default = 33554432 ];
+  optional bool trt_use_static = 25 [ default = false ];
+
   /*
    * "runtime_thread_num": n == 0 means don`t use Asynchronous task scheduling
    * mode.

diff --git a/core/general-server/op/general_remote_op.cpp b/core/general-server/op/general_remote_op.cpp
@@ -0,0 +1,126 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "core/general-server/op/general_remote_op.h"
+#include <iostream>
+#include <sstream>
+#include "core/util/include/timer.h"
+
+// paddle inference 2.1 support: FLOAT32, INT64, INT32, UINT8, INT8
+// will support: FLOAT16
+#define BRPC_MAX_BODY_SIZE 2 * 1024 * 1024 * 1024
+const std::string LODABALANCE = "";
+
+namespace baidu {
+namespace paddle_serving {
+namespace serving {
+
+using baidu::paddle_serving::Timer;
+using baidu::paddle_serving::predictor::general_model::Tensor;
+using baidu::paddle_serving::predictor::general_model::Request;
+using baidu::paddle_serving::predictor::general_model::Response;
+
+brpc::Channel BRPCStub::brpc_channels[MAX_MP_NUM];
+
+brpc::ChannelOptions BRPCStub::options;
+std::atomic<int> BRPCStub::inited(0);
+
+int GeneralRemoteOp::inference() {
+  LOG(INFO) << "Enter GeneralRemoteOp:inference()";
+  int expected = 0;
+  std::vector<std::string> op_address = address();
+  if (BRPCStub::inited.compare_exchange_strong(expected, 1)) {
+    BRPCStub::options.protocol = "baidu_std";
+    BRPCStub::options.connection_type = "short";
+    BRPCStub::options.timeout_ms = 80000 /*milliseconds*/;
+    BRPCStub::options.max_retry = 100;
+    brpc::fLU64::FLAGS_max_body_size = BRPC_MAX_BODY_SIZE;
+
+    LOG(ERROR) << "address size: " << op_address.size();
+    for (int i = 0; i < op_address.size(); ++i) {
+      LOG(INFO) << i + 1 << " address is " << op_address[i].c_str();
+      BRPCStub::brpc_channels[i].Init(
+          op_address[i].c_str(), LODABALANCE.c_str(), &BRPCStub::options);
+    }
+
+    BRPCStub::inited++;
+  }
+  while (BRPCStub::inited < 2) {
+  }
+
+  Timer timeline;
+  int64_t start = timeline.TimeStampUS();
+  timeline.Start();
+  VLOG(2) << "Going to run Remote inference";
+
+  Request* req = (Request*)(get_request_message());
+  Response* res = mutable_data<Response>();
+  uint64_t log_id = req->log_id();
+
+  brpc::Controller brpc_controllers[MAX_MP_NUM];
+  brpc::CallId brpc_callids[MAX_MP_NUM];
+  Response brpc_response_tmp;
+
+  size_t i = 0;
+  // Init BRPC controllers, callids and stubs
+  for (i = 0; i < op_address.size(); ++i) {
+    brpc_controllers[i].set_log_id(log_id);
+    brpc_callids[i] = brpc_controllers[i].call_id();
+  }
+  for (i = 0; i < op_address.size(); ++i) {
+    baidu::paddle_serving::predictor::general_model::GeneralModelService_Stub
+        stub(&BRPCStub::brpc_channels[i]);
+    LOG(INFO) << "Sended 1 request to Slave Sever " << i;
+    if (0 == i) {
+      stub.inference(&brpc_controllers[i], req, res, brpc::DoNothing());
+      continue;
+    }
+    stub.inference(
+        &brpc_controllers[i], req, &brpc_response_tmp, brpc::DoNothing());
+  }
+
+  LOG(INFO) << "All request are sended, waiting for all responses.";
+
+  // Wait RPC done.
+  for (i = 0; i < op_address.size(); ++i) {
+    brpc::Join(brpc_callids[i]);
+  }
+
+  // Print RPC Results
+  for (i = 0; i < op_address.size(); ++i) {
+    LOG(INFO) << "brpc_controller_" << i
+              << " status:" << brpc_controllers[i].Failed();
+    if (!brpc_controllers[i].Failed()) {
+      LOG(INFO) << "Received response from "
+                << brpc_controllers[i].remote_side()
+                << " Latency=" << brpc_controllers[i].latency_us() << "us";
+    } else {
+      LOG(ERROR) << brpc_controllers[i].ErrorText();
+    }
+  }
+  LOG(INFO) << "All brpc remote stubs joined done.";
+
+  res->set_log_id(log_id);
+  res->set_profile_server(req->profile_server());
+  int64_t end = timeline.TimeStampUS();
+  res->add_profile_time(start);
+  res->add_profile_time(end);
+
+  return 0;
+}
+
+DEFINE_OP(GeneralRemoteOp);
+}  // namespace serving
+}  // namespace paddle_serving
+}  // namespace baidu
diff --git a/core/general-server/op/general_remote_op.h b/core/general-server/op/general_remote_op.h
@@ -0,0 +1,58 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include <brpc/channel.h>
+#include <butil/logging.h>
+#include <butil/time.h>
+#include <gflags/gflags.h>
+#include <atomic>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "core/general-server/general_model_service.pb.h"
+
+#include "core/sdk-cpp/builtin_format.pb.h"
+#include "core/sdk-cpp/general_model_service.pb.h"
+#include "core/sdk-cpp/include/common.h"
+#include "core/sdk-cpp/include/predictor_sdk.h"
+
+#define MAX_MP_NUM 16
+
+namespace baidu {
+namespace paddle_serving {
+namespace serving {
+
+using baidu::paddle_serving::predictor::general_model::Request;
+using baidu::paddle_serving::predictor::general_model::Response;
+
+class GeneralRemoteOp
+    : public baidu::paddle_serving::predictor::OpWithChannel<
+          baidu::paddle_serving::predictor::general_model::Response> {
+ public:
+  DECLARE_OP(GeneralRemoteOp);
+  int inference();
+};
+
+class BRPCStub {
+ public:
+  static brpc::Channel brpc_channels[MAX_MP_NUM];
+  static brpc::ChannelOptions options;
+  static std::atomic<int> inited;
+};
+
+}  // namespace serving
+}  // namespace paddle_serving
+}  // namespace baidu
diff --git a/core/general-server/proto/general_model_service.proto b/core/general-server/proto/general_model_service.proto
@@ -94,9 +94,9 @@ message Response {
   repeated int64 profile_time = 2;
   bool profile_server = 3;
   uint64 log_id = 4;
+
   // Error code
   int32 err_no = 5;
-
   // Error messages
   string err_msg = 6;
 };

diff --git a/core/predictor/common/constant.cpp b/core/predictor/common/constant.cpp
@@ -20,7 +20,7 @@ namespace predictor {
 
 DEFINE_bool(use_parallel_infer_service, false, "");
 DEFINE_int32(el_log_level, 16, "");
-DEFINE_int32(idle_timeout_s, 16, "");
+DEFINE_int32(idle_timeout_s, 80, "");
 DEFINE_int32(port, 8010, "");
 DEFINE_string(workflow_path, "./conf", "");
 DEFINE_string(workflow_file, "workflow.prototxt", "");

diff --git a/core/predictor/framework/bsf-inl.h b/core/predictor/framework/bsf-inl.h
@@ -341,7 +341,7 @@ bool TaskExecutor<TaskT>::move_task_to_batch(
     LOG(INFO) << "Hit auto padding, merge " << padding_task_count
               << " tasks into 1 batch.";
   }
-  LOG(INFO) << "Number of tasks remaining in _task_queue is"
+  LOG(INFO) << "Number of tasks remaining in _task_queue is "
             << _task_queue.size();
   return true;
 }

diff --git a/core/sdk-cpp/proto/general_model_service.proto b/core/sdk-cpp/proto/general_model_service.proto
@@ -94,9 +94,9 @@ message Response {
   repeated int64 profile_time = 2;
   bool profile_server = 3;
   uint64 log_id = 4;
+
   // Error code
   int32 err_no = 5;
-
   // Error messages
   string err_msg = 6;
 };

diff --git a/core/sdk-cpp/proto/load_general_model_service.proto b/core/sdk-cpp/proto/load_general_model_service.proto
@@ -21,6 +21,7 @@ option cc_generic_services = true;
 message RequestAndResponse {
   required int32 a = 1;
   required float b = 2;
+  required uint64 log_id = 3 [ default = 0 ];
 };
 
 service LoadGeneralModelService {

diff --git a/doc/Offical_Docs/10-0_Terminology.md b/doc/Offical_Docs/10-0_Terminology.md
@@ -0,0 +1 @@
+# 名词术语解释