-
Notifications
You must be signed in to change notification settings - Fork 3
/
detector.cpp
256 lines (237 loc) · 9.65 KB
/
detector.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
#include "detector.h"
#define IMAGE_LEN 640
bool Detector::parseYolov5(const Blob::Ptr &blob, float cof_threshold,
std::vector<Rect> &o_rect, std::vector<float> &o_rect_cof,
std::vector<int> &classId)
{
//如果输入图像大小为[1,3,640,640],那么三个输出的尺寸为3*80*80*(4+1)*80 | 40*40 | 20*20
//利用blob->getTensorDesc().getDims()获取输出头的参数,包括w,h,c,batch_size,class_num
const int net_grid_h = static_cast<int>(blob->getTensorDesc().getDims()[2]); // 80/40/20
const int net_grid_w = static_cast<int>(blob->getTensorDesc().getDims()[3]); // 80/40/20
const int batch_size = static_cast<int>(blob->getTensorDesc().getDims()[0]); // batch_size
const int anchor_num = static_cast<int>(blob->getTensorDesc().getDims()[1]); // anchor_num,3
// item_size为类别个数,对于COCO而言,输出为80
const int item_size = static_cast<int>(blob->getTensorDesc().getDims()[4]); // (4+1+class_num)
cout << "anchor scale: " << net_grid_h << endl;
//根据输出的尺度,利用get_anchors方法获得相应的锚框
std::vector<int> anchors = get_anchors(net_grid_h);
//输出分类内存分配
InferenceEngine::LockedMemory<const void> blobMapped = InferenceEngine::as<InferenceEngine::MemoryBlob>(blob)->rmap();
const float *output_blob = blobMapped.as<float *>();
int net_grid = net_grid_h;
std::size_t gi = net_grid * item_size;
std::size_t ggi = net_grid * gi;
std::size_t anchor_n = anchor_num;
//输出解析与预测框解码部分
for (int n = 0; n < anchor_num; ++n)
for (int i = 0; i < net_grid; ++i)
for (int j = 0; j < net_grid; ++j) {
//获取输出置信度
double box_prob = sigmoid(output_blob[n*ggi + i * gi + j * item_size + 4]);
if (box_prob < cof_threshold) continue;
double obj_prob = sigmoid(output_blob[n*ggi + i * gi + j * item_size + 5]);
// 获取输出目标框
//注意此处输出为中心点坐标,需要转化为角点坐标
double x = output_blob[n*ggi + i * gi + j * item_size + 0];
double y = output_blob[n*ggi + i * gi + j * item_size + 1];
double w = output_blob[n*ggi + i * gi + j * item_size + 2];
double h = output_blob[n*ggi + i * gi + j * item_size + 3];
//获取输出类别索引
double max_prob = 0;
int idx = 0;
for (int t = 5; t < item_size; ++t) {
double tp = sigmoid(output_blob[n*ggi + i * gi + j * item_size + t]);
if (tp > max_prob) {
max_prob = tp;
idx = t - 5;
}
}
float cof = box_prob * max_prob;
//对于边框置信度小于阈值的边框,不关心其他数值,不进行计算减少计算量
if (cof < cof_threshold) continue;
//预测框解码
x = (sigmoid(x) * 2 - 0.5 + j)* 640.0f / net_grid;
y = (sigmoid(y) * 2 - 0.5 + i)* 640.0f / net_grid;
w = pow(sigmoid(w) * 2, 2) * anchors[n * 2];
h = pow(sigmoid(h) * 2, 2) * anchors[n * 2 + 1];
double r_x = x - w / 2;
double r_y = y - h / 2;
cv::Rect rect = cv::Rect(round(r_x), round(r_y), round(w), round(h));
//将结果保存在vector中
o_rect.push_back(rect);
o_rect_cof.push_back(cof);
classId.push_back(idx);
}
return true;
}
bool Detector::init(std::string xml_path) {
try {
_xml_path = xml_path;
// 加载推理引擎Core,该引擎需要从当前路径加载plugins.xml文件
InferenceEngine::Core ie;
//查询支持硬件设备
std::vector<std::string> availableDev = ie.GetAvailableDevices();
for (int i = 0; i < availableDev.size(); i++) {
cout << "supported device name: " << availableDev[i].c_str() << endl;
}
//从IR加载检测模型
InferenceEngine::CNNNetwork cnnNetwork = ie.ReadNetwork(_xml_path);
cnnNetwork.setBatchSize(1);
// 网络输入头参数设置
InferenceEngine::InputsDataMap inputInfo(cnnNetwork.getInputsInfo());
InferenceEngine::InputInfo::Ptr input = inputInfo.begin()->second;
_input_name = inputInfo.begin()->first;//输入头的名称
input->setPrecision(InferenceEngine::Precision::FP32);
input->getInputData()->setLayout(InferenceEngine::Layout::NCHW);
InferenceEngine::ICNNNetwork::InputShapes inputShapes = cnnNetwork.getInputShapes();
InferenceEngine::SizeVector& inSizeVector = inputShapes.begin()->second;
cnnNetwork.reshape(inputShapes);
//网络输出设置,YOLOV5包含三个输出
_outputinfo = InferenceEngine::OutputsDataMap(cnnNetwork.getOutputsInfo());
for (auto& output : _outputinfo) {
cout << output.first << endl;
output.second->setPrecision(InferenceEngine::Precision::FP32);
}
//获取可执行网络
_network = ie.LoadNetwork(cnnNetwork, "GPU");
return true;
}
catch (std::exception & e) {
return false;
}
}
bool Detector::process_frame(Mat &curFrame, std::vector<Object> &detectedObj)
{
cv::Mat resize_img = letterBox(curFrame);
std::size_t img_size = 640 * 640;
//建立推理请求
InferenceEngine::InferRequest::Ptr infer_request = _network.CreateInferRequestPtr();
InferenceEngine::Blob::Ptr frameBlob = infer_request->GetBlob(_input_name);
InferenceEngine::LockedMemory<void> blobMapped = InferenceEngine::as<InferenceEngine::MemoryBlob>(frameBlob)->wmap();
float* blob_data = blobMapped.as<float*>();
//nchw
//将输入图像关联到Blob
for (std::size_t row = 0; row < IMAGE_LEN; row++) {
for (std::size_t col = 0; col < IMAGE_LEN; col++) {
for (std::size_t ch = 0; ch < 3; ch++) {
//遍历输入图像的每个像素保存到blob_data,目前不理解blob_data的索引值
blob_data[img_size*ch + row * IMAGE_LEN + col] = float(resize_img.at<cv::Vec3b>(row, col)[ch] / 255.0f);
}
}
}
//执行推理
infer_request->Infer();
//获取各层结果,保存到origin_rect,origin_rect_cof,classId
std::vector<Rect> origin_rect;
std::vector<float> origin_rect_cof;
std::vector<int> classId;
//大规模计算之前先收集指针
std::vector<InferenceEngine::Blob::Ptr> blobs;
//blobs保存推理结果,用于输入到parseYolov5方法进行解析
for (auto &output : _outputinfo) {
auto output_name = output.first;
InferenceEngine::Blob::Ptr blob = infer_request->GetBlob(output_name);
blobs.push_back(blob);
}
//blob的大小为3
for (int i = 0; i < blobs.size(); i++) {
float th = 0.5;
//TODO:根据网格大小使用不同阈值,可自己调配
if (i == 0) { th = 0.55; } //小目标严格要求
if (i == 1) { th = 0.45; } //大目标放宽要求
if (i == 2) { th = 0.40; }
//用于保存解析结果的临时vector
std::vector<cv::Rect> origin_rect_temp;
std::vector<float> origin_rect_cof_temp;
//解析blobs
parseYolov5(blobs[i], th, origin_rect_temp, origin_rect_cof_temp, classId);
origin_rect.insert(origin_rect.end(), origin_rect_temp.begin(), origin_rect_temp.end());
origin_rect_cof.insert(origin_rect_cof.end(), origin_rect_cof_temp.begin(), origin_rect_cof_temp.end());
}
//后处理操作获得最终结果
std::vector<int> final_id;
//TODO:此处的阈值需要调整
//调用opencv dnn模块中的NMSBoxes进行非极大抑制处理
cv::dnn::NMSBoxes(origin_rect, origin_rect_cof, _conf_threshold, _nms_area_threshold, final_id);
//根据final_id获取最终结果
for (int i = 0; i < final_id.size(); ++i) {
cv::Rect resize_rect = origin_rect[final_id[i]];
//调用detect2origin方法将框映射到原图
cv::Rect rawrect = detect2origin(resize_rect, ratio, topPad, leftPad);
//结果以数据结构Object保存到vector
cout << "is: " << final_id[i] << endl;
cout << "is: " << classId[final_id[i]] << endl;
detectedObj.push_back(Object{
origin_rect_cof[final_id[i]],
className[classId[final_id[i]]],rawrect,
});
}
return true;
}
double Detector::sigmoid(double x) {
return (1 / (1 + exp(-x)));
}
const int anchorBig = 640 / 8; //8倍下采样
const int anchorMid = 640 / 16; //16倍下采样
const int anchorSml = 640 / 32; //32倍下采样
/*yolov5s.yaml内容
anchors:
- [10,13,16,30,32,23] # P3/8
- [30,61,62,45,59,119] # P4/16
- [116,90,156,198,373,326] # P5/32
*/
const int aBig[6] = { 10,13,16,30,32,23 };
const int aMid[6] = { 30,61,62,45,59,119 };
const int aSml[6] = { 116,90,156,198,373,326 };
//获取锚框
std::vector<int> Detector::get_anchors(int net_grid)
{
std::vector<int> anchors(6);
if (net_grid == anchorBig) { anchors.insert(anchors.begin(), aBig, aBig + 6); }
else if (net_grid == anchorMid) { anchors.insert(anchors.begin(), aMid, aMid + 6); }
else if (net_grid == anchorSml) { anchors.insert(anchors.begin(), aSml, aSml + 6); }
return anchors;
}
//图像缩放与填充
cv::Mat Detector::letterBox(Mat src)
{
if (src.empty()) { std::cout << "input image invalid" << endl; return cv::Mat(); }
//以下为带边框图像生成
int in_w = src.cols;
int in_h = src.rows;
int tar_w = 640;
int tar_h = 640;
//哪个缩放比例小选哪个
ratio = std::min(float(tar_h) / in_h, float(tar_w) / in_w);
int inside_w = std::round(in_w * ratio);
int inside_h = std::round(in_h * ratio);
int pad_w = tar_w - inside_w;
int pad_h = tar_h - inside_h;
//内层图像resize
cv::Mat resize_img;
cv::resize(src, resize_img, cv::Size(inside_w, inside_h)); //最小的Resize
cv::cvtColor(resize_img, resize_img, cv::COLOR_BGR2RGB);
pad_w = pad_w / 2;
pad_h = pad_h / 2;
//外层边框填充灰色
topPad = int(std::round(pad_h - 0.1));
btmPad = int(std::round(pad_h + 0.1));
leftPad = int(std::round(pad_w - 0.1));
rightPad = int(std::round(pad_w + 0.1));
cv::copyMakeBorder(resize_img, resize_img, topPad, btmPad, leftPad, rightPad, cv::BORDER_CONSTANT, cv::Scalar(114, 114, 114));
return resize_img;
}
//还原
//从detect得到的xywh转换回到原图xywh
cv::Rect Detector::detect2origin(const Rect &det_rect, float rate_to, int top, int left)
{
//detect坐标转换到内部纯图坐标
int inside_x = det_rect.x - left;
int inside_y = det_rect.y - top;
int ox = std::round(float(inside_x) / rate_to);
int oy = std::round(float(inside_y) / rate_to);
int ow = std::round(float(det_rect.width) / rate_to);
int oh = std::round(float(det_rect.height) / rate_to);
cv::Rect origin_rect(ox, oy, ow, oh);
return origin_rect;
}