Detection.hpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522
  1. /*
  2. * AXERA is pleased to support the open source community by making ax-samples available.
  3. *
  4. * Copyright (c) 2022, AXERA Semiconductor (Shanghai) Co., Ltd. All rights reserved.
  5. *
  6. * Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  7. * in compliance with the License. You may obtain a copy of the License at
  8. *
  9. * https://opensource.org/licenses/BSD-3-Clause
  10. *
  11. * Unless required by applicable law or agreed to in writing, software distributed
  12. * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
  13. * CONDITIONS OF ANY KIND, either express or implied. See the License for the
  14. * specific language governing permissions and limitations under the License.
  15. */
  16. /*
  17. * Author: ls.wang
  18. */
  19. #pragma once
  20. #include <cstdint>
  21. #include <vector>
  22. #include <algorithm>
  23. #include <cmath>
  24. #include <string>
  25. #ifndef CHECK_SCORE
  26. #define CHECK_SCORE(class_attributes, class_names, score) \
  27. ((class_attributes.find(class_names) != class_attributes.end() && (score) < class_attributes.find(class_names)->second) || \
  28. ((score) < class_attributes.find("all")->second))
  29. #endif
  30. #ifndef UNSIGMOID
  31. #define UNSIGMOID(value)(-1.0f * (float)std::log((1.0f / value) - 1.0f))
  32. #endif
  33. namespace detection
  34. {
  35. typedef struct
  36. {
  37. int grid0;
  38. int grid1;
  39. int stride;
  40. } GridAndStride;
  41. struct Box
  42. {
  43. float xyxy[4] = {0, 0, 0, 0};
  44. float xywh[4] = {0, 0, 0, 0};
  45. float object_score = 0;
  46. size_t index = 0;
  47. float score = 0;
  48. float area = 0;
  49. };
  50. static inline float fast_exp(const float& x)
  51. {
  52. union
  53. {
  54. uint32_t i;
  55. float f;
  56. } v{};
  57. v.i = (1 << 23) * (1.4426950409 * x + 126.93490512f);
  58. return v.f;
  59. }
  60. static inline float fast_sigmoid(const float& x)
  61. {
  62. return 1.0f / (1.0f + fast_exp(-x));
  63. }
  64. static inline float sigmoid(float x)
  65. {
  66. return static_cast<float>(1.f / (1.f + exp(-x)));
  67. }
  68. static float softmax(const float* src, float* dst, int length)
  69. {
  70. const float alpha = *std::max_element(src, src + length);
  71. float denominator = 0;
  72. float dis_sum = 0;
  73. for (int i = 0; i < length; ++i)
  74. {
  75. dst[i] = exp(src[i] - alpha);
  76. denominator += dst[i];
  77. }
  78. for (int i = 0; i < length; ++i)
  79. {
  80. dst[i] /= denominator;
  81. dis_sum += i * dst[i];
  82. }
  83. return dis_sum;
  84. }
  85. template<typename T>
  86. static inline float intersection_area(const T& a, const T& b)
  87. {
  88. float xx1 = std::max(a.xyxy[0], b.xyxy[0]);
  89. float yy1 = std::max(a.xyxy[1], b.xyxy[1]);
  90. float xx2 = std::min(a.xyxy[2], b.xyxy[2]);
  91. float yy2 = std::min(a.xyxy[3], b.xyxy[3]);
  92. float w = std::max(0.0f, xx2 - xx1 + 1.0f);
  93. float h = std::max(0.0f, yy2 - yy1 + 1.0f);
  94. float inter_area = w * h;
  95. return inter_area;
  96. }
  97. template<typename T>
  98. static void qsort_descent_inplace(std::vector<T>& faceobjects, int left, int right)
  99. {
  100. int i = left;
  101. int j = right;
  102. float p = faceobjects[(left + right) / 2].score;
  103. while (i <= j)
  104. {
  105. while (faceobjects[i].score > p)
  106. i++;
  107. while (faceobjects[j].score < p)
  108. j--;
  109. if (i <= j)
  110. {
  111. // swap
  112. std::swap(faceobjects[i], faceobjects[j]);
  113. i++;
  114. j--;
  115. }
  116. }
  117. #pragma omp parallel sections
  118. {
  119. #pragma omp section
  120. {
  121. if (left < j) qsort_descent_inplace(faceobjects, left, j);
  122. }
  123. #pragma omp section
  124. {
  125. if (i < right) qsort_descent_inplace(faceobjects, i, right);
  126. }
  127. }
  128. }
  129. template<typename T>
  130. static void qsort_descent_inplace(std::vector<T>& faceobjects)
  131. {
  132. if (faceobjects.empty())
  133. return;
  134. qsort_descent_inplace(faceobjects, 0, faceobjects.size() - 1);
  135. }
  136. template<typename T>
  137. static void nms_sorted_bboxes(const std::vector<T>& faceobjects, std::vector<int>& picked, float nms_threshold)
  138. {
  139. picked.clear();
  140. const int n = faceobjects.size();
  141. std::vector<float> areas(n);
  142. for (int i = 0; i < n; i++)
  143. {
  144. areas[i] = faceobjects[i].area;
  145. }
  146. for (int i = 0; i < n; i++)
  147. {
  148. const T& a = faceobjects[i];
  149. int keep = 1;
  150. for (int j = 0; j < (int)picked.size(); j++)
  151. {
  152. const T& b = faceobjects[picked[j]];
  153. // intersection over union
  154. float inter_area = intersection_area(a, b);
  155. float union_area = areas[i] + areas[picked[j]] - inter_area;
  156. // float IoU = inter_area / union_area
  157. if (inter_area / union_area > nms_threshold)
  158. keep = 0;
  159. }
  160. if (keep)
  161. picked.push_back(i);
  162. }
  163. }
  164. static void generate_grids_and_stride(const int target_w, const int target_h, std::vector<int>& strides, std::vector<GridAndStride>& grid_strides)
  165. {
  166. for (auto stride : strides)
  167. {
  168. int num_grid_w = target_w / stride;
  169. int num_grid_h = target_h / stride;
  170. for (int g1 = 0; g1 < num_grid_h; g1++)
  171. {
  172. for (int g0 = 0; g0 < num_grid_w; g0++)
  173. {
  174. GridAndStride gs;
  175. gs.grid0 = g0;
  176. gs.grid1 = g1;
  177. gs.stride = stride;
  178. grid_strides.push_back(gs);
  179. }
  180. }
  181. }
  182. }
  183. // static void generate_proposals_yolov7(int stride, const float* feat, float prob_threshold, std::vector<Object>& objects,
  184. // int letterbox_cols, int letterbox_rows, const float* anchors, int cls_num = 80)
  185. // {
  186. // int feat_w = letterbox_cols / stride;
  187. // int feat_h = letterbox_rows / stride;
  188. // auto feat_ptr = feat;
  189. // for (int h = 0; h <= feat_h - 1; h++)
  190. // {
  191. // for (int w = 0; w <= feat_w - 1; w++)
  192. // {
  193. // for (int a_index = 0; a_index < 3; ++a_index)
  194. // {
  195. // float box_objectness = feat_ptr[4];
  196. // if (box_objectness < prob_threshold)
  197. // {
  198. // feat_ptr += cls_num + 5;
  199. // continue;
  200. // }
  201. // //process cls score
  202. // int class_index = 0;
  203. // float class_score = -FLT_MAX;
  204. // for (int s = 0; s <= cls_num - 1; s++)
  205. // {
  206. // float score = feat_ptr[s + 5];
  207. // if (score > class_score)
  208. // {
  209. // class_index = s;
  210. // class_score = score;
  211. // }
  212. // }
  213. // float box_prob = box_objectness * class_score;
  214. // if (box_prob > prob_threshold)
  215. // {
  216. // float x_center = (feat_ptr[0] * 2 - 0.5f + (float)w) * (float)stride;
  217. // float y_center = (feat_ptr[1] * 2 - 0.5f + (float)h) * (float)stride;
  218. // float box_w = (feat_ptr[2] * 2) * (feat_ptr[2] * 2) * anchors[a_index * 2];
  219. // float box_h = (feat_ptr[3] * 2) * (feat_ptr[3] * 2) * anchors[a_index * 2 + 1];
  220. // float x0 = x_center - box_w * 0.5f;
  221. // float y0 = y_center - box_h * 0.5f;
  222. // Object obj;
  223. // obj.rect.x = x0;
  224. // obj.rect.y = y0;
  225. // obj.rect.width = box_w;
  226. // obj.rect.height = box_h;
  227. // obj.label = class_index;
  228. // obj.prob = box_prob;
  229. // objects.push_back(obj);
  230. // }
  231. // feat_ptr += cls_num + 5;
  232. // }
  233. // }
  234. // }
  235. // }
  236. static void generate_proposals_yolov5(const float* feat, std::vector<Box>& objects,
  237. std::array<size_t, 4U> output_size, int stride,
  238. const unsigned int *anchors, const std::vector<std::string> class_names,
  239. std::unordered_map<std::string, float> class_attributes_unsigmoid,
  240. std::unordered_map<std::string, float> class_attributes)
  241. {
  242. int cls_num = class_names.size();
  243. int anchor_num = 3;
  244. int feat_h = output_size[1];
  245. int feat_w = output_size[2];
  246. auto feature_ptr = feat;
  247. for (int h = 0; h <= feat_h - 1; h++)
  248. {
  249. for (int w = 0; w <= feat_w - 1; w++)
  250. {
  251. for (int a = 0; a <= anchor_num - 1; a++)
  252. {
  253. //process cls score
  254. int class_index = 0;
  255. float class_score = -1;
  256. for (int s = 0; s <= cls_num - 1; s++)
  257. {
  258. float score = feature_ptr[s + 5];
  259. if (score > class_score)
  260. {
  261. class_index = s;
  262. class_score = score;
  263. }
  264. }
  265. // process box score
  266. float box_score = feature_ptr[4];
  267. if (CHECK_SCORE(class_attributes_unsigmoid, class_names[class_index], box_score))
  268. {
  269. feature_ptr += (cls_num + 5);
  270. continue;
  271. }
  272. float final_score = sigmoid(box_score) * sigmoid(class_score);
  273. if (CHECK_SCORE(class_attributes, class_names[class_index], final_score))
  274. {
  275. feature_ptr += (cls_num + 5);
  276. continue;
  277. }
  278. float dx = sigmoid(feature_ptr[0]);
  279. float dy = sigmoid(feature_ptr[1]);
  280. float dw = sigmoid(feature_ptr[2]);
  281. float dh = sigmoid(feature_ptr[3]);
  282. float pred_cx = (dx * 2.0f - 0.5f + w) * stride;
  283. float pred_cy = (dy * 2.0f - 0.5f + h) * stride;
  284. float anchor_w = (float)anchors[a * 2];
  285. float anchor_h = (float)anchors[a * 2 + 1];
  286. float pred_w = dw * dw * 4.0f * anchor_w;
  287. float pred_h = dh * dh * 4.0f * anchor_h;
  288. float x0 = pred_cx - pred_w * 0.5f;
  289. float y0 = pred_cy - pred_h * 0.5f;
  290. float x1 = pred_cx + pred_w * 0.5f;
  291. float y1 = pred_cy + pred_h * 0.5f;
  292. Box obj;
  293. obj.xyxy[0] = x0;
  294. obj.xyxy[1] = y0;
  295. obj.xyxy[2] = x1;
  296. obj.xyxy[3] = y1;
  297. obj.index = class_index;
  298. obj.score = final_score;
  299. obj.area = (obj.xyxy[2] - obj.xyxy[0] + 1) * (obj.xyxy[3] - obj.xyxy[1] + 1);
  300. objects.push_back(obj);
  301. feature_ptr += (cls_num + 5);
  302. }
  303. }
  304. }
  305. }
  306. inline static float clamp(
  307. float val,
  308. float min = 0.f,
  309. float max = 1536.f)
  310. {
  311. return val > min ? (val < max ? val : max) : min;
  312. }
  313. // static void generate_proposals_yolov8(int stride, const float* dfl_feat, const float* cls_feat, const float* cls_idx, float prob_threshold, std::vector<Object>& objects,
  314. // int letterbox_cols, int letterbox_rows, int cls_num = 80)
  315. // {
  316. // int feat_w = letterbox_cols / stride;
  317. // int feat_h = letterbox_rows / stride;
  318. // int reg_max = 16;
  319. // auto dfl_ptr = dfl_feat;
  320. // auto cls_ptr = cls_feat;
  321. // auto cls_idx_ptr = cls_idx;
  322. // std::vector<float> dis_after_sm(reg_max, 0.f);
  323. // for (int h = 0; h <= feat_h - 1; h++)
  324. // {
  325. // for (int w = 0; w <= feat_w - 1; w++)
  326. // {
  327. // //process cls score
  328. // int class_index = static_cast<int>(cls_idx_ptr[h * feat_w + w]);
  329. // float class_score = cls_ptr[h * feat_w * cls_num + w * cls_num + class_index];
  330. // float box_prob = sigmoid(class_score);
  331. // if (box_prob > prob_threshold)
  332. // {
  333. // float pred_ltrb[4];
  334. // for (int k = 0; k < 4; k++)
  335. // {
  336. // float dis = softmax(dfl_ptr + k * reg_max, dis_after_sm.data(), reg_max);
  337. // pred_ltrb[k] = dis * stride;
  338. // }
  339. // float pb_cx = (w + 0.5f) * stride;
  340. // float pb_cy = (h + 0.5f) * stride;
  341. // float x0 = pb_cx - pred_ltrb[0];
  342. // float y0 = pb_cy - pred_ltrb[1];
  343. // float x1 = pb_cx + pred_ltrb[2];
  344. // float y1 = pb_cy + pred_ltrb[3];
  345. // x0 = std::max(std::min(x0, (float)(letterbox_cols - 1)), 0.f);
  346. // y0 = std::max(std::min(y0, (float)(letterbox_rows - 1)), 0.f);
  347. // x1 = std::max(std::min(x1, (float)(letterbox_cols - 1)), 0.f);
  348. // y1 = std::max(std::min(y1, (float)(letterbox_rows - 1)), 0.f);
  349. // Object obj;
  350. // obj.rect.x = x0;
  351. // obj.rect.y = y0;
  352. // obj.rect.width = x1 - x0;
  353. // obj.rect.height = y1 - y0;
  354. // obj.label = class_index;
  355. // obj.prob = box_prob;
  356. // objects.push_back(obj);
  357. // }
  358. // dfl_ptr += (4 * reg_max);
  359. // }
  360. // }
  361. // }
  362. // static void get_out_bbox(std::vector<Box>& objects, int letterbox_rows, int letterbox_cols, int src_rows, int src_cols)
  363. // {
  364. // /* yolov5 draw the result */
  365. // float scale_letterbox;
  366. // int resize_rows;
  367. // int resize_cols;
  368. // if ((letterbox_rows * 1.0 / src_rows) < (letterbox_cols * 1.0 / src_cols))
  369. // {
  370. // scale_letterbox = letterbox_rows * 1.0 / src_rows;
  371. // }
  372. // else
  373. // {
  374. // scale_letterbox = letterbox_cols * 1.0 / src_cols;
  375. // }
  376. // resize_cols = int(scale_letterbox * src_cols);
  377. // resize_rows = int(scale_letterbox * src_rows);
  378. // int tmp_h = (letterbox_rows - resize_rows) / 2;
  379. // int tmp_w = (letterbox_cols - resize_cols) / 2;
  380. // float ratio_x = (float)src_rows / resize_rows;
  381. // float ratio_y = (float)src_cols / resize_cols;
  382. // int count = objects.size();
  383. // objects.resize(count);
  384. // for (int i = 0; i < count; i++)
  385. // {
  386. // float x0 = (objects[i].xyxy[0]);
  387. // float y0 = (objects[i].xyxy[1]);
  388. // float x1 = (objects[i].xyxy[2]);
  389. // float y1 = (objects[i].xyxy[3]);
  390. // x0 = (x0 - tmp_w) * ratio_x;
  391. // y0 = (y0 - tmp_h) * ratio_y;
  392. // x1 = (x1 - tmp_w) * ratio_x;
  393. // y1 = (y1 - tmp_h) * ratio_y;
  394. // // for (int l = 0; l < 5; l++)
  395. // // {
  396. // // auto lx = objects[i].landmark[l].x;
  397. // // auto ly = objects[i].landmark[l].y;
  398. // // objects[i].landmark[l] = cv::Point2f((lx - tmp_w) * ratio_x, (ly - tmp_h) * ratio_y);
  399. // // }
  400. // x0 = std::max(std::min(x0, (float)(src_cols - 1)), 0.f);
  401. // y0 = std::max(std::min(y0, (float)(src_rows - 1)), 0.f);
  402. // x1 = std::max(std::min(x1, (float)(src_cols - 1)), 0.f);
  403. // y1 = std::max(std::min(y1, (float)(src_rows - 1)), 0.f);
  404. // objects[i].xyxy[0] = x0;
  405. // objects[i].xyxy[1] = y0;
  406. // objects[i].xyxy[2] = x1;
  407. // objects[i].xyxy[3] = y1;
  408. // }
  409. // }
  410. static void get_out_bbox(std::vector<Box> proposals, std::vector<BoundingBox> *objects,
  411. float scaleInfo, int src_rows, int src_cols,
  412. std::vector<std::string> classNames,
  413. std::unordered_map<std::string, float> classAttributes)
  414. {
  415. qsort_descent_inplace(proposals);
  416. std::vector<int> picked;
  417. nms_sorted_bboxes(proposals, picked, classAttributes.find("iou_thresh")->second);
  418. int count = picked.size();
  419. objects->resize(count);
  420. for (int i = 0; i < count; i++)
  421. {
  422. if (i >= classAttributes.find("topk")->second)
  423. {
  424. break;
  425. }
  426. BoundingBox post_box;
  427. float x0 = (proposals[picked[i]].xyxy[0] / scaleInfo);
  428. float y0 = (proposals[picked[i]].xyxy[1] / scaleInfo);
  429. float x1 = (proposals[picked[i]].xyxy[2] / scaleInfo);
  430. float y1 = (proposals[picked[i]].xyxy[3] / scaleInfo);
  431. // for (int l = 0; l < 5; l++)
  432. // {
  433. // auto lx = objects[i].landmark[l].x;
  434. // auto ly = objects[i].landmark[l].y;
  435. // objects[i].landmark[l] = cv::Point2f((lx - tmp_w) * ratio_x, (ly - tmp_h) * ratio_y);
  436. // }
  437. x0 = std::max(std::min(x0, (float)(src_cols - 1)), 0.f);
  438. y0 = std::max(std::min(y0, (float)(src_rows - 1)), 0.f);
  439. x1 = std::max(std::min(x1, (float)(src_cols - 1)), 0.f);
  440. y1 = std::max(std::min(y1, (float)(src_rows - 1)), 0.f);
  441. post_box.x1 = x0;
  442. post_box.y1 = y0;
  443. post_box.x2 = x1;
  444. post_box.y2 = y1;
  445. post_box.category = classNames[proposals[picked[i]].index];
  446. post_box.detect_confidence = proposals[picked[i]].score;
  447. objects->push_back(post_box);
  448. printf("[%s Line:%d] ------------final-box x1:%.0f y1:%.0f x2:%.0f y2:%.0f conf:%.2f class_name:%s------------\n", __FUNCTION__, __LINE__, post_box.x1, post_box.y1, post_box.x2, post_box.y2, post_box.detect_confidence, post_box.category.c_str());
  449. }
  450. }
  451. } // namespace detection