Detection.hpp_bak 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506
  1. /*
  2. * AXERA is pleased to support the open source community by making ax-samples available.
  3. *
  4. * Copyright (c) 2022, AXERA Semiconductor (Shanghai) Co., Ltd. All rights reserved.
  5. *
  6. * Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  7. * in compliance with the License. You may obtain a copy of the License at
  8. *
  9. * https://opensource.org/licenses/BSD-3-Clause
  10. *
  11. * Unless required by applicable law or agreed to in writing, software distributed
  12. * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
  13. * CONDITIONS OF ANY KIND, either express or implied. See the License for the
  14. * specific language governing permissions and limitations under the License.
  15. */
  16. /*
  17. * Author: ls.wang
  18. */
  19. #pragma once
  20. #include <cstdint>
  21. #include <vector>
  22. #include <algorithm>
  23. #include <cmath>
  24. #include <string>
  25. #ifndef CHECK_SCORE
  26. #define CHECK_SCORE(class_attributes, class_names, score) \
  27. ((class_attributes.find(class_names) != class_attributes.end() && (score) < class_attributes.find(class_names)->second) || \
  28. ((score) < class_attributes.find("all")->second))
  29. #endif
  30. #ifndef UNSIGMOID
  31. #define UNSIGMOID(value)(-1.0f * (float)std::log((1.0f / value) - 1.0f))
  32. #define UNSIGMOID_STR "_unsigmoid"
  33. #endif
  34. namespace detection
  35. {
  36. typedef struct
  37. {
  38. int grid0;
  39. int grid1;
  40. int stride;
  41. } GridAndStride;
  42. struct Box
  43. {
  44. float xyxy[4] = {0, 0, 0, 0};
  45. float xywh[4] = {0, 0, 0, 0};
  46. float object_score = 0;
  47. size_t index = 0;
  48. float score = 0;
  49. float area = 0;
  50. };
  51. static inline float sigmoid(float x)
  52. {
  53. return static_cast<float>(1.f / (1.f + exp(-x)));
  54. }
  55. static float softmax(const float* src, float* dst, int length)
  56. {
  57. const float alpha = *std::max_element(src, src + length);
  58. float denominator = 0;
  59. float dis_sum = 0;
  60. for (int i = 0; i < length; ++i)
  61. {
  62. dst[i] = exp(src[i] - alpha);
  63. denominator += dst[i];
  64. }
  65. for (int i = 0; i < length; ++i)
  66. {
  67. dst[i] /= denominator;
  68. dis_sum += i * dst[i];
  69. }
  70. return dis_sum;
  71. }
  72. template<typename T>
  73. static inline float intersection_area(const T& a, const T& b)
  74. {
  75. float xx1 = std::max(a.xyxy[0], b.xyxy[0]);
  76. float yy1 = std::max(a.xyxy[1], b.xyxy[1]);
  77. float xx2 = std::min(a.xyxy[2], b.xyxy[2]);
  78. float yy2 = std::min(a.xyxy[3], b.xyxy[3]);
  79. float w = std::max(0.0f, xx2 - xx1 + 1.0f);
  80. float h = std::max(0.0f, yy2 - yy1 + 1.0f);
  81. float inter_area = w * h;
  82. return inter_area;
  83. }
  84. template<typename T>
  85. static void qsort_descent_inplace(std::vector<T>& faceobjects, int left, int right)
  86. {
  87. int i = left;
  88. int j = right;
  89. float p = faceobjects[(left + right) / 2].score;
  90. while (i <= j)
  91. {
  92. while (faceobjects[i].score > p)
  93. i++;
  94. while (faceobjects[j].score < p)
  95. j--;
  96. if (i <= j)
  97. {
  98. // swap
  99. std::swap(faceobjects[i], faceobjects[j]);
  100. i++;
  101. j--;
  102. }
  103. }
  104. #pragma omp parallel sections
  105. {
  106. #pragma omp section
  107. {
  108. if (left < j) qsort_descent_inplace(faceobjects, left, j);
  109. }
  110. #pragma omp section
  111. {
  112. if (i < right) qsort_descent_inplace(faceobjects, i, right);
  113. }
  114. }
  115. }
  116. template<typename T>
  117. static void qsort_descent_inplace(std::vector<T>& faceobjects)
  118. {
  119. if (faceobjects.empty())
  120. return;
  121. qsort_descent_inplace(faceobjects, 0, faceobjects.size() - 1);
  122. }
  123. template<typename T>
  124. static void nms_sorted_bboxes(const std::vector<T>& faceobjects, std::vector<int>& picked, float nms_threshold)
  125. {
  126. picked.clear();
  127. const int n = faceobjects.size();
  128. std::vector<float> areas(n);
  129. for (int i = 0; i < n; i++)
  130. {
  131. areas[i] = faceobjects[i].area;
  132. }
  133. for (int i = 0; i < n; i++)
  134. {
  135. const T& a = faceobjects[i];
  136. int keep = 1;
  137. for (int j = 0; j < (int)picked.size(); j++)
  138. {
  139. const T& b = faceobjects[picked[j]];
  140. // intersection over union
  141. float inter_area = intersection_area(a, b);
  142. float union_area = areas[i] + areas[picked[j]] - inter_area;
  143. // float IoU = inter_area / union_area
  144. if (inter_area / union_area > nms_threshold)
  145. keep = 0;
  146. }
  147. if (keep)
  148. picked.push_back(i);
  149. }
  150. }
  151. static void generate_grids_and_stride(const int target_w, const int target_h, std::vector<int>& strides, std::vector<GridAndStride>& grid_strides)
  152. {
  153. for (auto stride : strides)
  154. {
  155. int num_grid_w = target_w / stride;
  156. int num_grid_h = target_h / stride;
  157. for (int g1 = 0; g1 < num_grid_h; g1++)
  158. {
  159. for (int g0 = 0; g0 < num_grid_w; g0++)
  160. {
  161. GridAndStride gs;
  162. gs.grid0 = g0;
  163. gs.grid1 = g1;
  164. gs.stride = stride;
  165. grid_strides.push_back(gs);
  166. }
  167. }
  168. }
  169. }
  170. // static void generate_proposals_yolov7(int stride, const float* feat, float prob_threshold, std::vector<Object>& objects,
  171. // int letterbox_cols, int letterbox_rows, const float* anchors, int cls_num = 80)
  172. // {
  173. // int feat_w = letterbox_cols / stride;
  174. // int feat_h = letterbox_rows / stride;
  175. // auto feat_ptr = feat;
  176. // for (int h = 0; h <= feat_h - 1; h++)
  177. // {
  178. // for (int w = 0; w <= feat_w - 1; w++)
  179. // {
  180. // for (int a_index = 0; a_index < 3; ++a_index)
  181. // {
  182. // float box_objectness = feat_ptr[4];
  183. // if (box_objectness < prob_threshold)
  184. // {
  185. // feat_ptr += cls_num + 5;
  186. // continue;
  187. // }
  188. // //process cls score
  189. // int class_index = 0;
  190. // float class_score = -FLT_MAX;
  191. // for (int s = 0; s <= cls_num - 1; s++)
  192. // {
  193. // float score = feat_ptr[s + 5];
  194. // if (score > class_score)
  195. // {
  196. // class_index = s;
  197. // class_score = score;
  198. // }
  199. // }
  200. // float box_prob = box_objectness * class_score;
  201. // if (box_prob > prob_threshold)
  202. // {
  203. // float x_center = (feat_ptr[0] * 2 - 0.5f + (float)w) * (float)stride;
  204. // float y_center = (feat_ptr[1] * 2 - 0.5f + (float)h) * (float)stride;
  205. // float box_w = (feat_ptr[2] * 2) * (feat_ptr[2] * 2) * anchors[a_index * 2];
  206. // float box_h = (feat_ptr[3] * 2) * (feat_ptr[3] * 2) * anchors[a_index * 2 + 1];
  207. // float x0 = x_center - box_w * 0.5f;
  208. // float y0 = y_center - box_h * 0.5f;
  209. // Object obj;
  210. // obj.rect.x = x0;
  211. // obj.rect.y = y0;
  212. // obj.rect.width = box_w;
  213. // obj.rect.height = box_h;
  214. // obj.label = class_index;
  215. // obj.prob = box_prob;
  216. // objects.push_back(obj);
  217. // }
  218. // feat_ptr += cls_num + 5;
  219. // }
  220. // }
  221. // }
  222. // }
  223. static void generate_proposals_yolov5(const float* feat, std::vector<Box>& objects,
  224. std::array<size_t, 4U> output_size, int stride,
  225. const unsigned int *anchors, const std::vector<std::string> class_names,
  226. std::unordered_map<std::string, float> class_attributes)
  227. {
  228. int cls_num = class_names.size();
  229. int anchor_num = 3;
  230. int feat_h = output_size[1];
  231. int feat_w = output_size[2];
  232. auto feature_ptr = feat;
  233. for (int h = 0; h <= feat_h - 1; h++)
  234. {
  235. for (int w = 0; w <= feat_w - 1; w++)
  236. {
  237. for (int a = 0; a <= anchor_num - 1; a++)
  238. {
  239. //process cls score
  240. int class_index = 0;
  241. float class_score = -1;
  242. for (int s = 0; s <= cls_num - 1; s++)
  243. {
  244. float score = feature_ptr[s + 5];
  245. if (score > class_score)
  246. {
  247. class_index = s;
  248. class_score = score;
  249. }
  250. }
  251. // process box score
  252. float box_score = feature_ptr[4];
  253. if (CHECK_SCORE(class_attributes, class_names[class_index] + UNSIGMOID_STR, box_score))
  254. {
  255. feature_ptr += (cls_num + 5);
  256. continue;
  257. }
  258. float final_score = sigmoid(box_score) * sigmoid(class_score);
  259. if (CHECK_SCORE(class_attributes, class_names[class_index], final_score))
  260. {
  261. feature_ptr += (cls_num + 5);
  262. continue;
  263. }
  264. float dx = sigmoid(feature_ptr[0]);
  265. float dy = sigmoid(feature_ptr[1]);
  266. float dw = sigmoid(feature_ptr[2]);
  267. float dh = sigmoid(feature_ptr[3]);
  268. float pred_cx = (dx * 2.0f - 0.5f + w) * stride;
  269. float pred_cy = (dy * 2.0f - 0.5f + h) * stride;
  270. float anchor_w = (float)anchors[a * 2];
  271. float anchor_h = (float)anchors[a * 2 + 1];
  272. float pred_w = dw * dw * 4.0f * anchor_w;
  273. float pred_h = dh * dh * 4.0f * anchor_h;
  274. float x0 = pred_cx - pred_w * 0.5f;
  275. float y0 = pred_cy - pred_h * 0.5f;
  276. float x1 = pred_cx + pred_w * 0.5f;
  277. float y1 = pred_cy + pred_h * 0.5f;
  278. Box obj;
  279. obj.xyxy[0] = x0;
  280. obj.xyxy[1] = y0;
  281. obj.xyxy[2] = x1;
  282. obj.xyxy[3] = y1;
  283. obj.index = class_index;
  284. obj.score = final_score;
  285. obj.area = (obj.xyxy[2] - obj.xyxy[0] + 1) * (obj.xyxy[3] - obj.xyxy[1] + 1);
  286. objects.push_back(obj);
  287. feature_ptr += (cls_num + 5);
  288. }
  289. }
  290. }
  291. }
  292. inline static float clamp(
  293. float val,
  294. float min = 0.f,
  295. float max = 1536.f)
  296. {
  297. return val > min ? (val < max ? val : max) : min;
  298. }
  299. // static void generate_proposals_yolov8(int stride, const float* dfl_feat, const float* cls_feat, const float* cls_idx, float prob_threshold, std::vector<Object>& objects,
  300. // int letterbox_cols, int letterbox_rows, int cls_num = 80)
  301. // {
  302. // int feat_w = letterbox_cols / stride;
  303. // int feat_h = letterbox_rows / stride;
  304. // int reg_max = 16;
  305. // auto dfl_ptr = dfl_feat;
  306. // auto cls_ptr = cls_feat;
  307. // auto cls_idx_ptr = cls_idx;
  308. // std::vector<float> dis_after_sm(reg_max, 0.f);
  309. // for (int h = 0; h <= feat_h - 1; h++)
  310. // {
  311. // for (int w = 0; w <= feat_w - 1; w++)
  312. // {
  313. // //process cls score
  314. // int class_index = static_cast<int>(cls_idx_ptr[h * feat_w + w]);
  315. // float class_score = cls_ptr[h * feat_w * cls_num + w * cls_num + class_index];
  316. // float box_prob = sigmoid(class_score);
  317. // if (box_prob > prob_threshold)
  318. // {
  319. // float pred_ltrb[4];
  320. // for (int k = 0; k < 4; k++)
  321. // {
  322. // float dis = softmax(dfl_ptr + k * reg_max, dis_after_sm.data(), reg_max);
  323. // pred_ltrb[k] = dis * stride;
  324. // }
  325. // float pb_cx = (w + 0.5f) * stride;
  326. // float pb_cy = (h + 0.5f) * stride;
  327. // float x0 = pb_cx - pred_ltrb[0];
  328. // float y0 = pb_cy - pred_ltrb[1];
  329. // float x1 = pb_cx + pred_ltrb[2];
  330. // float y1 = pb_cy + pred_ltrb[3];
  331. // x0 = std::max(std::min(x0, (float)(letterbox_cols - 1)), 0.f);
  332. // y0 = std::max(std::min(y0, (float)(letterbox_rows - 1)), 0.f);
  333. // x1 = std::max(std::min(x1, (float)(letterbox_cols - 1)), 0.f);
  334. // y1 = std::max(std::min(y1, (float)(letterbox_rows - 1)), 0.f);
  335. // Object obj;
  336. // obj.rect.x = x0;
  337. // obj.rect.y = y0;
  338. // obj.rect.width = x1 - x0;
  339. // obj.rect.height = y1 - y0;
  340. // obj.label = class_index;
  341. // obj.prob = box_prob;
  342. // objects.push_back(obj);
  343. // }
  344. // dfl_ptr += (4 * reg_max);
  345. // }
  346. // }
  347. // }
  348. // static void get_out_bbox(std::vector<Box>& objects, int letterbox_rows, int letterbox_cols, int src_rows, int src_cols)
  349. // {
  350. // /* yolov5 draw the result */
  351. // float scale_letterbox;
  352. // int resize_rows;
  353. // int resize_cols;
  354. // if ((letterbox_rows * 1.0 / src_rows) < (letterbox_cols * 1.0 / src_cols))
  355. // {
  356. // scale_letterbox = letterbox_rows * 1.0 / src_rows;
  357. // }
  358. // else
  359. // {
  360. // scale_letterbox = letterbox_cols * 1.0 / src_cols;
  361. // }
  362. // resize_cols = int(scale_letterbox * src_cols);
  363. // resize_rows = int(scale_letterbox * src_rows);
  364. // int tmp_h = (letterbox_rows - resize_rows) / 2;
  365. // int tmp_w = (letterbox_cols - resize_cols) / 2;
  366. // float ratio_x = (float)src_rows / resize_rows;
  367. // float ratio_y = (float)src_cols / resize_cols;
  368. // int count = objects.size();
  369. // objects.resize(count);
  370. // for (int i = 0; i < count; i++)
  371. // {
  372. // float x0 = (objects[i].xyxy[0]);
  373. // float y0 = (objects[i].xyxy[1]);
  374. // float x1 = (objects[i].xyxy[2]);
  375. // float y1 = (objects[i].xyxy[3]);
  376. // x0 = (x0 - tmp_w) * ratio_x;
  377. // y0 = (y0 - tmp_h) * ratio_y;
  378. // x1 = (x1 - tmp_w) * ratio_x;
  379. // y1 = (y1 - tmp_h) * ratio_y;
  380. // // for (int l = 0; l < 5; l++)
  381. // // {
  382. // // auto lx = objects[i].landmark[l].x;
  383. // // auto ly = objects[i].landmark[l].y;
  384. // // objects[i].landmark[l] = cv::Point2f((lx - tmp_w) * ratio_x, (ly - tmp_h) * ratio_y);
  385. // // }
  386. // x0 = std::max(std::min(x0, (float)(src_cols - 1)), 0.f);
  387. // y0 = std::max(std::min(y0, (float)(src_rows - 1)), 0.f);
  388. // x1 = std::max(std::min(x1, (float)(src_cols - 1)), 0.f);
  389. // y1 = std::max(std::min(y1, (float)(src_rows - 1)), 0.f);
  390. // objects[i].xyxy[0] = x0;
  391. // objects[i].xyxy[1] = y0;
  392. // objects[i].xyxy[2] = x1;
  393. // objects[i].xyxy[3] = y1;
  394. // }
  395. // }
  396. static void get_out_bbox(std::vector<Box> proposals, std::vector<BoundingBox> *objects,
  397. float scaleInfo, int src_rows, int src_cols,
  398. std::vector<std::string> classNames,
  399. std::unordered_map<std::string, float> classAttributes)
  400. {
  401. qsort_descent_inplace(proposals);
  402. std::vector<int> picked;
  403. nms_sorted_bboxes(proposals, picked, classAttributes.find("iou_thresh")->second);
  404. int count = picked.size();
  405. objects->resize(count);
  406. for (int i = 0; i < count; i++)
  407. {
  408. if (i >= classAttributes.find("topk")->second)
  409. {
  410. break;
  411. }
  412. BoundingBox post_box;
  413. float x0 = (proposals[picked[i]].xyxy[0] / scaleInfo);
  414. float y0 = (proposals[picked[i]].xyxy[1] / scaleInfo);
  415. float x1 = (proposals[picked[i]].xyxy[2] / scaleInfo);
  416. float y1 = (proposals[picked[i]].xyxy[3] / scaleInfo);
  417. // for (int l = 0; l < 5; l++)
  418. // {
  419. // auto lx = objects[i].landmark[l].x;
  420. // auto ly = objects[i].landmark[l].y;
  421. // objects[i].landmark[l] = cv::Point2f((lx - tmp_w) * ratio_x, (ly - tmp_h) * ratio_y);
  422. // }
  423. x0 = std::max(std::min(x0, (float)(src_cols - 1)), 0.f);
  424. y0 = std::max(std::min(y0, (float)(src_rows - 1)), 0.f);
  425. x1 = std::max(std::min(x1, (float)(src_cols - 1)), 0.f);
  426. y1 = std::max(std::min(y1, (float)(src_rows - 1)), 0.f);
  427. post_box.x1 = x0;
  428. post_box.y1 = y0;
  429. post_box.x2 = x1;
  430. post_box.y2 = y1;
  431. post_box.category = classNames[proposals[picked[i]].index];
  432. post_box.detect_confidence = proposals[picked[i]].score;
  433. objects->push_back(post_box);
  434. printf("[%s Line:%d] ------------final-box x1:%.0f y1:%.0f x2:%.0f y2:%.0f conf:%.2f class_name:%s------------\n", __FUNCTION__, __LINE__, post_box.x1, post_box.y1, post_box.x2, post_box.y2, post_box.detect_confidence, post_box.category.c_str());
  435. }
  436. }
  437. } // namespace detection