Merge pull request #1759 from ShiningZhang/trt_dynamic_shape

set trt dynamic shape for ocr
PaddlePaddle · Apr 24, 2022 · 89f24dd · 89f24dd
2 parents 37840af + 55d8936
commit 89f24dd
Show file tree

Hide file tree

Showing 16 changed files with 181 additions and 42 deletions.
diff --git a/core/configure/proto/server_configure.proto b/core/configure/proto/server_configure.proto
@@ -65,6 +65,10 @@ message EngineDesc {
   optional int32 batch_infer_size = 31 [ default = 32 ];
   optional bool enable_overrun = 32 [ default = false ];
   optional bool allow_split_request = 33 [ default = true ];
+  optional int32 min_subgraph_size = 34 [ default = 3 ];
+  map<string,string> min_input_shape = 35;
+  map<string,string> max_input_shape = 36;
+  map<string,string> opt_input_shape = 37;
 };
 
 // model_toolkit conf

diff --git a/core/general-server/op/general_detection_op.cpp b/core/general-server/op/general_detection_op.cpp
@@ -244,7 +244,7 @@ int GeneralDetectionOp::inference() {
     databuf_char_out = reinterpret_cast<char*>(databuf_data_out);
     paddle::PaddleBuf paddleBuf(databuf_char_out, databuf_size_out);
     paddle::PaddleTensor tensor_out;
-    tensor_out.name = "image";
+    tensor_out.name = "x";
     tensor_out.dtype = paddle::PaddleDType::FLOAT32;
     tensor_out.shape = output_shape;
     tensor_out.data = paddleBuf;

diff --git a/examples/C++/PaddleOCR/ocr/README.md b/examples/C++/PaddleOCR/ocr/README.md
@@ -4,9 +4,9 @@
 
 ## Get Model
 ```
-wget https://paddle-serving.bj.bcebos.com/paddle_hub_models/image/OCR/ocr_rec.tar.gz
+python3 -m paddle_serving_app.package --get_model ocr_rec
 tar -xzvf ocr_rec.tar.gz
-wget https://paddle-serving.bj.bcebos.com/ocr/ocr_det.tar.gz
+python3 -m paddle_serving_app.package --get_model ocr_det
 tar -xzvf ocr_det.tar.gz
 ```
 
@@ -108,7 +108,7 @@ python3 rec_web_client.py
 When a service starts the concatenation of two models, it only needs to pass in the relative path of the model folder in order after `--model`, and the custom C++ OP class name after `--op`. The order of the model after `--model` and the class name after `--OP` needs to correspond. Here, it is assumed that we have defined the two OPs as GeneralDetectionOp and GeneralRecOp respectively, The script code is as follows:
 ```python
 #One service starts the concatenation of two models
-python3 -m paddle_serving_server.serve --model ocr_det_model ocr_rec_model --op GeneralDetectionOp GeneralRecOp --port 9293
+python3 -m paddle_serving_server.serve --model ocr_det_model ocr_rec_model --op GeneralDetectionOp GeneralInferOp --port 9293
 #ocr_det_model correspond to GeneralDetectionOp, ocr_rec_model correspond to GeneralRecOp
 ```
 

diff --git a/examples/C++/PaddleOCR/ocr/README_CN.md b/examples/C++/PaddleOCR/ocr/README_CN.md
@@ -4,9 +4,9 @@
 
 ## 获取模型
 ```
-wget https://paddle-serving.bj.bcebos.com/paddle_hub_models/image/OCR/ocr_rec.tar.gz
+python3 -m paddle_serving_app.package --get_model ocr_rec
 tar -xzvf ocr_rec.tar.gz
-wget https://paddle-serving.bj.bcebos.com/ocr/ocr_det.tar.gz
+python3 -m paddle_serving_app.package --get_model ocr_det
 tar -xzvf ocr_det.tar.gz
 ```
 ## 获取数据集（可选）
@@ -106,7 +106,7 @@ python3 rec_web_client.py
 一个服务启动两个模型串联，只需要在`--model后依次按顺序传入模型文件夹的相对路径`，且需要在`--op后依次传入自定义C++OP类名称`，其中--model后面的模型与--op后面的类名称的顺序需要对应，`这里假设我们已经定义好了两个OP分别为GeneralDetectionOp和GeneralRecOp`，则脚本代码如下：
 ```python
 #一个服务启动多模型串联
-python3 -m paddle_serving_server.serve --model ocr_det_model ocr_rec_model --op GeneralDetectionOp GeneralRecOp --port 9293
+python3 -m paddle_serving_server.serve --model ocr_det_model ocr_rec_model --op GeneralDetectionOp GeneralInferOp --port 9293
 #多模型串联 ocr_det_model对应GeneralDetectionOp  ocr_rec_model对应GeneralRecOp
 ```
 

diff --git a/examples/C++/PaddleOCR/ocr/det_debugger_server.py b/examples/C++/PaddleOCR/ocr/det_debugger_server.py
@@ -47,18 +47,18 @@ def init_det(self):
         })
 
     def preprocess(self, feed=[], fetch=[]):
-        data = base64.b64decode(feed[0]["image"].encode('utf8'))
+        data = base64.b64decode(feed[0]["x"].encode('utf8'))
         data = np.fromstring(data, np.uint8)
         im = cv2.imdecode(data, cv2.IMREAD_COLOR)
         self.ori_h, self.ori_w, _ = im.shape
         det_img = self.det_preprocess(im)
         _, self.new_h, self.new_w = det_img.shape
         return {
-            "image": det_img[np.newaxis, :].copy()
-        }, ["concat_1.tmp_0"], True
+            "x": det_img[np.newaxis, :].copy()
+        }, ["save_infer_model/scale_0.tmp_1"], True
 
     def postprocess(self, feed={}, fetch=[], fetch_map=None):
-        det_out = fetch_map["concat_1.tmp_0"]
+        det_out = fetch_map["save_infer_model/scale_0.tmp_1"]
         ratio_list = [
             float(self.new_h) / self.ori_h, float(self.new_w) / self.ori_w
         ]

diff --git a/examples/C++/PaddleOCR/ocr/det_web_server.py b/examples/C++/PaddleOCR/ocr/det_web_server.py
@@ -47,17 +47,17 @@ def init_det(self):
         })
 
     def preprocess(self, feed=[], fetch=[]):
-        data = base64.b64decode(feed[0]["image"].encode('utf8'))
+        data = base64.b64decode(feed[0]["x"].encode('utf8'))
         data = np.fromstring(data, np.uint8)
         im = cv2.imdecode(data, cv2.IMREAD_COLOR)
         self.ori_h, self.ori_w, _ = im.shape
         det_img = self.det_preprocess(im)
         _, self.new_h, self.new_w = det_img.shape
         print(det_img)
-        return {"image": det_img}, ["concat_1.tmp_0"], False
+        return {"x": det_img}, ["save_infer_model/scale_0.tmp_1"], False
 
     def postprocess(self, feed={}, fetch=[], fetch_map=None):
-        det_out = fetch_map["concat_1.tmp_0"]
+        det_out = fetch_map["save_infer_model/scale_0.tmp_1"]
         ratio_list = [
             float(self.new_h) / self.ori_h, float(self.new_w) / self.ori_w
         ]

diff --git a/examples/C++/PaddleOCR/ocr/ocr_cpp_client.py b/examples/C++/PaddleOCR/ocr/ocr_cpp_client.py
@@ -42,13 +42,11 @@ def cv2_to_base64(image):
         image_data = file.read()
     image = cv2_to_base64(image_data)
     fetch_map = client.predict(
-        feed={"image": image},
-        fetch=["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"],
+        feed={"x": image},
+        fetch=["save_infer_model/scale_0.tmp_1"],
         batch=True)
     result = {}
-    result["score"] = fetch_map["softmax_0.tmp_0"]
-    del fetch_map["softmax_0.tmp_0"]
-    rec_res = OCRReader().postprocess(fetch_map, with_score=False)
+    rec_res = OCRReader().postprocess_ocrv2(fetch_map, with_score=False)
     res_lst = []
     for res in rec_res:
         res_lst.append(res[0])

diff --git a/examples/C++/PaddleOCR/ocr/ocr_debugger_server.py b/examples/C++/PaddleOCR/ocr/ocr_debugger_server.py
@@ -48,7 +48,7 @@ def init_det_debugger(self, det_model_config):
         self.ocr_reader = OCRReader()
 
     def preprocess(self, feed=[], fetch=[]):
-        data = base64.b64decode(feed[0]["image"].encode('utf8'))
+        data = base64.b64decode(feed[0]["x"].encode('utf8'))
         data = np.fromstring(data, np.uint8)
         im = cv2.imdecode(data, cv2.IMREAD_COLOR)
         ori_h, ori_w, _ = im.shape
@@ -57,7 +57,7 @@ def preprocess(self, feed=[], fetch=[]):
         det_img = det_img[np.newaxis, :]
         det_img = det_img.copy()
         det_out = self.det_client.predict(
-            feed={"image": det_img}, fetch=["concat_1.tmp_0"], batch=True)
+            feed={"x": det_img}, fetch=["save_infer_model/scale_0.tmp_1"], batch=True)
         filter_func = FilterBoxes(10, 10)
         post_func = DBPostProcess({
             "thresh": 0.3,
@@ -68,7 +68,7 @@ def preprocess(self, feed=[], fetch=[]):
         })
         sorted_boxes = SortedBoxes()
         ratio_list = [float(new_h) / ori_h, float(new_w) / ori_w]
-        dt_boxes_list = post_func(det_out["concat_1.tmp_0"], [ratio_list])
+        dt_boxes_list = post_func(det_out["save_infer_model/scale_0.tmp_1"], [ratio_list])
         dt_boxes = filter_func(dt_boxes_list[0], [ori_h, ori_w])
         dt_boxes = sorted_boxes(dt_boxes)
         get_rotate_crop_image = GetRotateCropImage()
@@ -88,12 +88,12 @@ def preprocess(self, feed=[], fetch=[]):
         for id, img in enumerate(img_list):
             norm_img = self.ocr_reader.resize_norm_img(img, max_wh_ratio)
             imgs[id] = norm_img
-        feed = {"image": imgs.copy()}
-        fetch = ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"]
+        feed = {"x": imgs.copy()}
+        fetch = ["save_infer_model/scale_0.tmp_1"]
         return feed, fetch, True
 
     def postprocess(self, feed={}, fetch=[], fetch_map=None):
-        rec_res = self.ocr_reader.postprocess(fetch_map, with_score=True)
+        rec_res = self.ocr_reader.postprocess_ocrv2(fetch_map, with_score=True)
         res_lst = []
         for res in rec_res:
             res_lst.append(res[0])

diff --git a/examples/C++/PaddleOCR/ocr/ocr_web_client.py b/examples/C++/PaddleOCR/ocr/ocr_web_client.py
@@ -34,7 +34,7 @@ def cv2_to_base64(image):
     with open(os.path.join(test_img_dir, img_file), 'rb') as file:
         image_data1 = file.read()
     image = cv2_to_base64(image_data1)
-    data = {"feed": [{"image": image}], "fetch": ["res"]}
+    data = {"feed": [{"x": image}], "fetch": ["res"]}
     r = requests.post(url=url, headers=headers, data=json.dumps(data))
     print(r)
     print(r.json())
diff --git a/examples/C++/PaddleOCR/ocr/ocr_web_server.py b/examples/C++/PaddleOCR/ocr/ocr_web_server.py
@@ -44,13 +44,13 @@ def init_det_client(self, det_port, det_client_config):
         self.ocr_reader = OCRReader()
 
     def preprocess(self, feed=[], fetch=[]):
-        data = base64.b64decode(feed[0]["image"].encode('utf8'))
+        data = base64.b64decode(feed[0]["x"].encode('utf8'))
         data = np.fromstring(data, np.uint8)
         im = cv2.imdecode(data, cv2.IMREAD_COLOR)
         ori_h, ori_w, _ = im.shape
         det_img = self.det_preprocess(im)
         det_out = self.det_client.predict(
-            feed={"image": det_img}, fetch=["concat_1.tmp_0"], batch=False)
+            feed={"x": det_img}, fetch=["save_infer_model/scale_0.tmp_1"], batch=False)
         _, new_h, new_w = det_img.shape
         filter_func = FilterBoxes(10, 10)
         post_func = DBPostProcess({
@@ -62,7 +62,7 @@ def preprocess(self, feed=[], fetch=[]):
         })
         sorted_boxes = SortedBoxes()
         ratio_list = [float(new_h) / ori_h, float(new_w) / ori_w]
-        dt_boxes_list = post_func(det_out["concat_1.tmp_0"], [ratio_list])
+        dt_boxes_list = post_func(det_out["save_infer_model/scale_0.tmp_1"], [ratio_list])
         dt_boxes = filter_func(dt_boxes_list[0], [ori_h, ori_w])
         dt_boxes = sorted_boxes(dt_boxes)
         get_rotate_crop_image = GetRotateCropImage()
@@ -78,12 +78,12 @@ def preprocess(self, feed=[], fetch=[]):
         for img in img_list:
             norm_img = self.ocr_reader.resize_norm_img(img, max_wh_ratio)
             feed_list.append(norm_img[np.newaxis, :])
-        feed_batch = {"image": np.concatenate(feed_list, axis=0)}
-        fetch = ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"]
+        feed_batch = {"x": np.concatenate(feed_list, axis=0)}
+        fetch = ["save_infer_model/scale_0.tmp_1"]
         return feed_batch, fetch, True
 
     def postprocess(self, feed={}, fetch=[], fetch_map=None):
-        rec_res = self.ocr_reader.postprocess(fetch_map, with_score=True)
+        rec_res = self.ocr_reader.postprocess_ocrv2(fetch_map, with_score=True)
         res_lst = []
         for res in rec_res:
             res_lst.append(res[0])

diff --git a/examples/C++/PaddleOCR/ocr/rec_debugger_server.py b/examples/C++/PaddleOCR/ocr/rec_debugger_server.py
@@ -38,7 +38,7 @@ def init_rec(self):
     def preprocess(self, feed=[], fetch=[]):
         img_list = []
         for feed_data in feed:
-            data = base64.b64decode(feed_data["image"].encode('utf8'))
+            data = base64.b64decode(feed_data["x"].encode('utf8'))
             data = np.fromstring(data, np.uint8)
             im = cv2.imdecode(data, cv2.IMREAD_COLOR)
             img_list.append(im)
@@ -53,12 +53,12 @@ def preprocess(self, feed=[], fetch=[]):
         for i, img in enumerate(img_list):
             norm_img = self.ocr_reader.resize_norm_img(img, max_wh_ratio)
             imgs[i] = norm_img
-        feed = {"image": imgs.copy()}
-        fetch = ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"]
+        feed = {"x": imgs.copy()}
+        fetch = ["save_infer_model/scale_0.tmp_1"]
         return feed, fetch, True
 
     def postprocess(self, feed={}, fetch=[], fetch_map=None):
-        rec_res = self.ocr_reader.postprocess(fetch_map, with_score=True)
+        rec_res = self.ocr_reader.postprocess_ocrv2(fetch_map, with_score=True)
         res_lst = []
         for res in rec_res:
             res_lst.append(res[0])

diff --git a/examples/C++/PaddleOCR/ocr/rec_web_client.py b/examples/C++/PaddleOCR/ocr/rec_web_client.py
@@ -36,6 +36,6 @@ def cv2_to_base64(image):
         image_data1 = file.read()
     image = cv2_to_base64(image_data1)
     #data = {"feed": [{"image": image}], "fetch": ["res"]}
-    data = {"feed": [{"image": image}] * 3, "fetch": ["res"]}
+    data = {"feed": [{"x": image}] * 3, "fetch": ["res"]}
     r = requests.post(url=url, headers=headers, data=json.dumps(data))
     print(r.json())
diff --git a/examples/C++/PaddleOCR/ocr/rec_web_server.py b/examples/C++/PaddleOCR/ocr/rec_web_server.py
@@ -39,7 +39,7 @@ def preprocess(self, feed=[], fetch=[]):
         # TODO: to handle batch rec images
         img_list = []
         for feed_data in feed:
-            data = base64.b64decode(feed_data["image"].encode('utf8'))
+            data = base64.b64decode(feed_data["x"].encode('utf8'))
             data = np.fromstring(data, np.uint8)
             im = cv2.imdecode(data, cv2.IMREAD_COLOR)
             img_list.append(im)
@@ -55,12 +55,12 @@ def preprocess(self, feed=[], fetch=[]):
             norm_img = self.ocr_reader.resize_norm_img(img, max_wh_ratio)
             imgs[i] = norm_img
 
-        feed = {"image": imgs.copy()}
-        fetch = ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"]
+        feed = {"x": imgs.copy()}
+        fetch = ["save_infer_model/scale_0.tmp_1"]
         return feed, fetch, True
 
     def postprocess(self, feed={}, fetch=[], fetch_map=None):
-        rec_res = self.ocr_reader.postprocess(fetch_map, with_score=True)
+        rec_res = self.ocr_reader.postprocess_ocrv2(fetch_map, with_score=True)
         res_lst = []
         for res in rec_res:
             res_lst.append(res[0])

diff --git a/paddle_inference/paddle/include/paddle_engine.h b/paddle_inference/paddle/include/paddle_engine.h
@@ -225,6 +225,11 @@ class PaddleInferenceEngine : public EngineCore {
       config.SwitchIrOptim(true);
     }
 
+    int local_min_subgraph_size = min_subgraph_size;
+    if (engine_conf.has_min_subgraph_size()) {
+      local_min_subgraph_size = engine_conf.min_subgraph_size();
+    }
+
     if (engine_conf.has_use_trt() && engine_conf.use_trt()) {
       config.SwitchIrOptim(true);
       if (!engine_conf.has_use_gpu() || !engine_conf.use_gpu()) {
@@ -236,10 +241,55 @@ class PaddleInferenceEngine : public EngineCore {
       }
       config.EnableTensorRtEngine(1 << 20,
                                   max_batch,
-                                  min_subgraph_size,
+                                  local_min_subgraph_size,
                                   precision_type,
                                   false,
                                   FLAGS_use_calib);
+      std::map<std::string, std::vector<int>> min_input_shape;
+      std::map<std::string, std::vector<int>> max_input_shape;
+      std::map<std::string, std::vector<int>> optim_input_shape;
+      if (engine_conf.min_input_shape_size() > 0) {
+        for (auto& iter : engine_conf.min_input_shape()) {
+          std::string key = iter.first;
+          std::string value = iter.second;
+          std::istringstream ss(value);
+          std::string word;
+          std::vector<int> arr;
+          while(ss >> word) {
+            arr.push_back(std::stoi(word));
+          }
+          min_input_shape[key] = arr;
+        }
+      }
+      if (engine_conf.max_input_shape_size() > 0) {
+        for (auto& iter : engine_conf.max_input_shape()) {
+          std::string key = iter.first;
+          std::string value = iter.second;
+          std::istringstream ss(value);
+          std::string word;
+          std::vector<int> arr;
+          while(ss >> word) {
+            arr.push_back(std::stoi(word));
+          }
+          max_input_shape[key] = arr;
+        }
+      }
+      if (engine_conf.opt_input_shape_size() > 0) {
+        for (auto& iter : engine_conf.opt_input_shape()) {
+          std::string key = iter.first;
+          std::string value = iter.second;
+          std::istringstream ss(value);
+          std::string word;
+          std::vector<int> arr;
+          while(ss >> word) {
+            arr.push_back(std::stoi(word));
+          }
+          optim_input_shape[key] = arr;
+        }
+      }
+      config.SetTRTDynamicShapeInfo(min_input_shape,
+                                    max_input_shape,
+                                    optim_input_shape);
       LOG(INFO) << "create TensorRT predictor";
     }