yolo.cpp 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627
  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. #include <VX/vx.h>
  4. #include <vx_ext_amd.h>
  5. #include <net_api.h>
  6. #include <math.h>
  7. #include <memory.h>
  8. #include <algorithm>
  9. #include <vx_log.h>
  10. #include <unistd.h>
  11. #include "yolo.h"
  12. #include <ax_type.h>
  13. #include "ansjer_ai_cfg.h"
  14. #ifndef CLIPRETINA
  15. #define CLIPRETINA(v,mn,mx) \
  16. { \
  17. if ((v)<(mn)) {(v)=(mn);} \
  18. else if((v)>(mx)) {(v)=(mx);} \
  19. }
  20. #endif
  21. #define JSONPATH "/platform/ax/model/"
  22. typedef struct {
  23. vx_size dst_handle;
  24. vx_context context;
  25. vx_context handle_context;
  26. vx_graph graph;
  27. // net_work_width
  28. int net_w;
  29. // net_work_hight
  30. int net_h;
  31. // input_image_width
  32. int src_w;
  33. // input_image_hight
  34. int src_h;
  35. // wh_scale
  36. float scale_info;
  37. unsigned int strides[3];
  38. int nBlobSz;
  39. ax_nna_tensors_t pOutTensor;
  40. std::string model_type;
  41. unsigned int anchors_g[18];
  42. std::vector<std::string> yolo_outputs_name_g;
  43. std::vector<std::string> label;
  44. bool need_nu_freq;
  45. int freq_nu;
  46. bool need_vu_freq;
  47. int freq_vu;
  48. float conf_thresh;
  49. float iou_thresh;
  50. } nna_yolo_private_t;
  51. struct Box {
  52. float xyxy[4] = { 0, 0, 0, 0 };
  53. float object_score = 0;
  54. size_t index = 0;
  55. float score = 0;
  56. float area = 0;
  57. };
  58. static float sigmoid(float x)
  59. {
  60. return 1.0 / (1.0 + exp(-x));
  61. }
  62. static void dfl_process(const std::vector<float> &dfl, std::vector<float> &dst, int reg_max=16)
  63. {
  64. std::vector<float> temp;
  65. float max_value, denominator, sum_value;
  66. for(int i = 0; i < int(dfl.size() / reg_max); ++i){
  67. max_value = *std::max_element(dfl.begin() + int(i * reg_max), dfl.begin() + int((i + 1) * reg_max));
  68. denominator = 0;
  69. sum_value = 0;
  70. for(int j = int(i * reg_max); j < int((i + 1) * reg_max);++j){
  71. temp.push_back(std::exp(dfl[j] - max_value));
  72. denominator += temp[j];
  73. }
  74. for(int j = int(i * reg_max); j < int((i + 1) * reg_max);++j){
  75. temp[j] /= denominator;
  76. sum_value += temp[j] * (j - int(i * reg_max));
  77. }
  78. dst.push_back(sum_value);
  79. }
  80. }
  81. void RefineFileName(int len, char * fname)
  82. {
  83. for (int n = 0; n < len; n++) {
  84. if (fname[n] == '/' ||
  85. fname[n] == '(' ||
  86. fname[n] == ')' ||
  87. fname[n] == ',' ||
  88. fname[n] == ' ' ||
  89. fname[n] == '[' ||
  90. fname[n] == ']' ||
  91. fname[n] == ':')
  92. {
  93. fname[n] = '_';
  94. }
  95. }
  96. }
  97. int DumpBlobsInfo(std::vector<std::string> & blobs_name, std::vector<vx_tensor> & blobs_tensor)
  98. {
  99. for (int i = 0; i < blobs_name.size(); i++) {
  100. vx_size dims[4];
  101. ERROR_CHECK_STATUS(vxQueryTensor(blobs_tensor.at(i), VX_TENSOR_DIMS, dims, sizeof(dims)));
  102. vx_enum data_type;
  103. ERROR_CHECK_STATUS(vxQueryTensor(blobs_tensor.at(i), VX_TENSOR_DATA_TYPE, &data_type, sizeof(data_type)));
  104. vx_size fixed_pos;
  105. ERROR_CHECK_STATUS(vxQueryTensor(blobs_tensor.at(i), VX_TENSOR_FIXED_POINT_POSITION, &fixed_pos, sizeof(fixed_pos)));
  106. vx_size data_size;
  107. ERROR_CHECK_STATUS(vxQueryTensor(blobs_tensor.at(i), VX_TENSOR_MEMORY_SIZE, &data_size, sizeof(data_size)));
  108. vx_size data_handle;
  109. ERROR_CHECK_STATUS(vxQueryTensor(blobs_tensor.at(i), VX_TENSOR_MEMORY_HANDLE, &data_handle, sizeof(data_handle)));
  110. vx_size data_virt_addr;
  111. ERROR_CHECK_STATUS(vxQueryTensor(blobs_tensor.at(i), VX_TENSOR_MEMORY_VIRT_ADDR, &data_virt_addr, sizeof(data_virt_addr)));
  112. vx_size data_phys_addr;
  113. ERROR_CHECK_STATUS(vxQueryTensor(blobs_tensor.at(i), VX_TENSOR_MEMORY_PHYS_ADDR, &data_phys_addr, sizeof(data_phys_addr)));
  114. VX_LOG("name: %s\n", blobs_name.at(i).c_str());
  115. VX_LOG("\t(w, h, c, b): (%d, %d, %d, %d)\n", dims[0], dims[1], dims[2], dims[3]);
  116. VX_LOG("\tdata type: %d\n", data_type);
  117. VX_LOG("\tfixed position: %d\n", fixed_pos);
  118. VX_LOG("\tdata size: %d\n", data_size);
  119. VX_LOG("\tdata handle: 0x%x\n", data_handle);
  120. VX_LOG("\tdata virt addr: 0x%x\n", data_virt_addr);
  121. VX_LOG("\tdata phys addr: 0x%x\n", data_phys_addr);
  122. }
  123. return 0;
  124. }
  125. int DumpBlobsData(std::vector<std::string> & blobs_name, std::vector<vx_tensor> & blobs_tensor)
  126. {
  127. for (int i = 0; i < blobs_name.size(); i++) {
  128. vx_size dims[4];
  129. ERROR_CHECK_STATUS(vxQueryTensor(blobs_tensor.at(i), VX_TENSOR_DIMS, dims, sizeof(dims)));
  130. vx_size data_size;
  131. ERROR_CHECK_STATUS(vxQueryTensor(blobs_tensor.at(i), VX_TENSOR_MEMORY_SIZE, &data_size, sizeof(data_size)));
  132. vx_int8 * data;
  133. ERROR_CHECK_STATUS(vxQueryTensor(blobs_tensor.at(i), VX_TENSOR_MEMORY_VIRT_ADDR, &data, sizeof(data)));
  134. VX_LOG("name: %s, dims: (%d, %d, %d, %d), data size: %d\n", blobs_name.at(i).c_str(), dims[0], dims[1], dims[2], dims[3], data_size);
  135. char out_file[512];
  136. sprintf(out_file, "/share/res_hw/%s.bin", blobs_name.at(i).c_str());
  137. RefineFileName(512, out_file);
  138. FILE * fin = fopen(out_file, "wb");
  139. fwrite(data, 1, data_size, fin);
  140. fclose(fin);
  141. }
  142. return 0;
  143. }
  144. static bool yolov5_layer(std::vector<Box>& proposals,
  145. const signed char* output_ptr,
  146. const size_t output_size[4],
  147. const unsigned int& image_size,
  148. const unsigned int& stride,
  149. const unsigned int& fl,
  150. const unsigned int& data_type,
  151. const unsigned int* anchor,
  152. const unsigned int& class_num,
  153. const float& confidence_threshold)
  154. {
  155. int H_algin = 0;
  156. if (data_type == 2) H_algin = (output_size[1] + 3) / 4 * 4;
  157. else H_algin = (output_size[1] + 1) / 2 * 2;
  158. for (size_t a = 0; a < 3; ++a) { // anchor groups = 3
  159. for (size_t w = 0; w < output_size[0]; ++w) {
  160. for (size_t h = 0; h < output_size[1]; ++h) {
  161. Box box;
  162. size_t max_index = 0;
  163. float max_score = -1;
  164. for (size_t c = 0; c < 4 + 1 + class_num; ++c) {
  165. size_t ci = a * (4 + 1 + class_num) + c;
  166. size_t index = ci / 16 * output_size[0] * H_algin * 16 + w * H_algin * 16 + h * 16 + (ci % 16);
  167. // scale and sigmoid
  168. float data = sigmoid(output_ptr[index] * 1.0 / pow(2, fl));
  169. if (c == 0) {
  170. data = (data * 2 - 0.5f + w) * static_cast<float>(stride);
  171. box.xyxy[c] = data;
  172. } else if (c == 1) {
  173. data = (data * 2 - 0.5f + h) * static_cast<float>(stride);
  174. box.xyxy[c] = data;
  175. } else if (c == 2 || c == 3) {
  176. data = powf((data * 2), 2) * anchor[a * 2 + c - 2];
  177. box.xyxy[c] = data;
  178. } else if (c == 4) {
  179. box.object_score = data;
  180. } else {
  181. if (data > max_score) {
  182. max_index = c - 5;
  183. max_score = data;
  184. }
  185. }
  186. }
  187. box.score = max_score * box.object_score;
  188. box.index = max_index;
  189. if (box.object_score > confidence_threshold && box.score > confidence_threshold) {
  190. // xywh -> xyxy
  191. float x = box.xyxy[0], y = box.xyxy[1], w = box.xyxy[2], h = box.xyxy[3];
  192. box.xyxy[0] = x - w / 2;
  193. box.xyxy[1] = y - h / 2;
  194. box.xyxy[2] = x + w / 2;
  195. box.xyxy[3] = y + h / 2;
  196. box.area = (box.xyxy[2] - box.xyxy[0] + 1) * (box.xyxy[3] - box.xyxy[1] + 1);
  197. proposals.push_back(box);
  198. // VX_LOG("%s box %f %f %f %f %f %d\n", __FUNCTION__, box.xyxy[0], box.xyxy[1], box.xyxy[2], box.xyxy[3], box.score, box.index);
  199. }
  200. }
  201. }
  202. }
  203. return VX_SUCCESS;
  204. }
  205. static bool yolov8_layer(std::vector<Box>& proposals,
  206. const signed char* output_ptr,
  207. const size_t output_size[4],
  208. const unsigned int& image_size,
  209. const unsigned int& stride,
  210. const unsigned int& fl,
  211. const unsigned int& data_type,
  212. const unsigned int& class_num,
  213. const float& confidence_threshold)
  214. {
  215. int reg_max = 16;
  216. float grid_cell_offset = 0.5f;
  217. int H_algin = 0;
  218. if (data_type == 2) H_algin = (output_size[1] + 3) / 4 * 4;
  219. else H_algin = (output_size[1] + 1) / 2 * 2;
  220. for (size_t w = 0; w < output_size[0]; ++w) {
  221. for (size_t h = 0; h < output_size[1]; ++h) {
  222. Box box;
  223. size_t max_index = 0;
  224. float max_score = -1;
  225. std::vector<float> dfl, ltrb;
  226. for (size_t c = 0; c < 4 * reg_max + class_num; ++c) {
  227. size_t index = c / 16 * output_size[0] * H_algin * 16 + w * H_algin * 16 + h * 16 + c % 16;
  228. float data = output_ptr[index] * 1.0 / pow(2, fl);
  229. if (c < 4 * reg_max) {
  230. dfl.push_back(data);
  231. if (c == (4 * reg_max - 1)){
  232. dfl_process(dfl, ltrb, reg_max);
  233. box.xyxy[0] = (w + grid_cell_offset - ltrb[0]) * stride;
  234. box.xyxy[1] = (h + grid_cell_offset - ltrb[1]) * stride;
  235. box.xyxy[2] = (w + grid_cell_offset + ltrb[2]) * stride;
  236. box.xyxy[3] = (h + grid_cell_offset + ltrb[3]) * stride;
  237. }
  238. } else {
  239. if (data > max_score) {
  240. max_index = c - 4 * reg_max;
  241. max_score = data;
  242. }
  243. }
  244. }
  245. box.score = max_score;
  246. box.index = max_index;
  247. if (box.score > confidence_threshold) {
  248. box.area = (box.xyxy[2] - box.xyxy[0] + 1) * (box.xyxy[3] - box.xyxy[1] + 1);
  249. proposals.push_back(box);
  250. // VX_LOG("%s box %f %f %f %f %f %d\n", __FUNCTION__, box.xyxy[0], box.xyxy[1], box.xyxy[2], box.xyxy[3], box.score, box.index);
  251. }
  252. }
  253. }
  254. return VX_SUCCESS;
  255. }
  256. static bool yolov_box_cmp(const Box& a, const Box& b)
  257. {
  258. return a.score > b.score;
  259. }
  260. static bool yolo_nms(std::vector<Box>& boxes, const float& nms_threshold)
  261. {
  262. std::sort(boxes.begin(), boxes.end(), yolov_box_cmp);
  263. size_t current_index = 0;
  264. while (current_index < boxes.size()) {
  265. Box current_box = boxes[current_index];
  266. size_t running_index = current_index + 1;
  267. while (running_index < boxes.size()) {
  268. Box running_box = boxes[running_index];
  269. float xx1 = std::max(current_box.xyxy[0], running_box.xyxy[0]);
  270. float yy1 = std::max(current_box.xyxy[1], running_box.xyxy[1]);
  271. float xx2 = std::min(current_box.xyxy[2], running_box.xyxy[2]);
  272. float yy2 = std::min(current_box.xyxy[3], running_box.xyxy[3]);
  273. float w = std::max(0.0f, xx2 - xx1 + 1.0f);
  274. float h = std::max(0.0f, yy2 - yy1 + 1.0f);
  275. float inter_area = w * h;
  276. float union_area = current_box.area + running_box.area - inter_area;
  277. float overlap = inter_area / union_area;
  278. if (overlap > nms_threshold) {
  279. boxes.erase(boxes.begin() + running_index);
  280. } else {
  281. ++running_index;
  282. }
  283. }
  284. ++current_index;
  285. }
  286. return VX_SUCCESS;
  287. }
  288. void *nna_custom_det_open(ezax_custom_det_cfg_t *cfg)
  289. {
  290. // char *filename = (char *)"/data/yolo_config.json";
  291. // char *filename = (char *)"/platform/ax/model/yolo_config.json";
  292. // char *filename = (char *)"/share/yolo_config.json";
  293. // VX_LOG("%s Line:%d %s\n", __FUNCTION__, __LINE__, cfg->model_rootpath);
  294. char *filename = (char*)malloc(strlen(JSONPATH) + strlen(cfg->model_rootpath) + 1);
  295. strcpy(filename, JSONPATH);
  296. strcat(filename, cfg->model_rootpath);
  297. VX_LOG("%s Line:%d read json from %s\n", __FUNCTION__, __LINE__, filename);
  298. Asj_Ai_Cfg_t *ai_config;
  299. ai_config = (Asj_Ai_Cfg_t *)malloc(sizeof(Asj_Ai_Cfg_t));
  300. memset(ai_config, 0, sizeof(Asj_Ai_Cfg_t));
  301. try{
  302. read_Asj_Ai_Json(ai_config, filename);
  303. } catch (const std::exception &e) {
  304. VX_LOG("%s Line:%d read json error..\n", __FUNCTION__, __LINE__);
  305. return NULL;
  306. }
  307. vx_int8* net;
  308. vx_int8* blobs;
  309. nna_yolo_private_t *yolo;
  310. std::vector<std::string> input_blobs_name;
  311. std::vector<vx_tensor> input_blobs_tensor;
  312. std::vector<std::string> output_blobs_name;
  313. std::vector<vx_tensor> output_blobs_tensor;
  314. vx_size input_data_dims[4] = { 1 };
  315. vx_size output_data_dims[4] = { 1 };
  316. yolo = (nna_yolo_private_t *)malloc(sizeof(nna_yolo_private_t));
  317. memset(yolo, 0, sizeof(nna_yolo_private_t));
  318. yolo->model_type = ai_config->property.model_type;
  319. if (yolo->model_type == "yolov5"){
  320. for (int i = 0; i < 18; i++){
  321. yolo->anchors_g[i] = ai_config->property.anchors[i];
  322. }
  323. }
  324. yolo->yolo_outputs_name_g = ai_config->property.yolo_outputs_name;
  325. yolo->need_nu_freq = ai_config->property.need_nu_freq;
  326. yolo->freq_nu = ai_config->property.freq_nu;
  327. yolo->need_vu_freq = ai_config->property.need_vu_freq;
  328. yolo->freq_vu = ai_config->property.freq_vu;
  329. yolo->conf_thresh = ai_config->class_attrs_all.conf_thresh;
  330. yolo->iou_thresh = ai_config->class_attrs_all.iou_thresh;
  331. yolo->label = ai_config->property.label;
  332. if (yolo == NULL) {
  333. VX_LOG("%s Line:%d malloc yolo error\n", __FUNCTION__, __LINE__);
  334. return NULL;
  335. }
  336. yolo->context = vxCreateContext();
  337. ERROR_CHECK_OBJECT(yolo->context);
  338. ERROR_CHECK_STATUS(vxLoadKernels(yolo->context, "openvx-nn"));
  339. ERROR_CHECK_STATUS(LoadNetModel(yolo->context, ai_config->property.ezbStr, true, &net));
  340. ERROR_CHECK_STATUS(LoadNetModel(yolo->context, ai_config->property.binStr, true, &blobs));
  341. yolo->graph = CreateNetGraph(yolo->context, (vx_uint32*)net, blobs, true);
  342. ERROR_CHECK_STATUS(vxVerifyGraph(yolo->graph));
  343. vx_int32 graph_mem = vxGetGraphInOutMemorySize(yolo->graph);
  344. VX_LOG("%s Line:%d graph_mem:%d.\n", __FUNCTION__, __LINE__, graph_mem);
  345. // ERROR_CHECK_STATUS(vxProfileGraph(yolo->graph));
  346. // VX_LOG("%s Line:%d vxProfileGraph success.\n", __FUNCTION__, __LINE__, graph_mem);
  347. UnLoadNetModel(blobs);
  348. UnLoadNetModel(net);
  349. GetNetInputBlob(yolo->graph, input_blobs_name, input_blobs_tensor);
  350. ERROR_CHECK_STATUS(vxQueryTensor(input_blobs_tensor.at(0), VX_TENSOR_DIMS, input_data_dims, sizeof(input_data_dims)));
  351. yolo->net_w = input_data_dims[0];
  352. yolo->net_h = input_data_dims[1];
  353. yolo->src_w = cfg->width;
  354. yolo->src_h = cfg->height;
  355. yolo->scale_info = std::min(yolo->net_w * 1.0 / yolo->src_w, yolo->net_h * 1.0 / yolo->src_h);
  356. yolo->handle_context = vxCreateContext();
  357. ERROR_CHECK_OBJECT(yolo->handle_context);
  358. VX_LOG("%s Line:%d before AllocDeviceImageBuffer.\n", __FUNCTION__, __LINE__);
  359. yolo->dst_handle = AllocDeviceImageBuffer(yolo->handle_context, NNA_BGRA, yolo->net_w, yolo->net_h);
  360. if (yolo->dst_handle == 0) {
  361. VX_LOG("%s Line:%d alloc rgba image buffer error.\n", __FUNCTION__, __LINE__);
  362. return NULL;
  363. }
  364. VX_LOG("%s Line:%d AllocDeviceImageBuffer success.\n", __FUNCTION__, __LINE__);
  365. VX_LOG("%s Line:%d yolo->model_type:%s yolo->net_w:%d yolo->net_h:%d yolo->src_w:%d yolo->src_h:%d yolo->scale_info:%f\n", __FUNCTION__, __LINE__, yolo->model_type.c_str(), yolo->net_w, yolo->net_h, yolo->src_w, yolo->src_h,
  366. yolo->scale_info);
  367. input_blobs_name.clear();
  368. input_blobs_tensor.clear();
  369. GetNetOutputBlob(yolo->graph, output_blobs_name, output_blobs_tensor);
  370. ERROR_CHECK_STATUS(vxQueryTensor(output_blobs_tensor.at(0), VX_TENSOR_DIMS, output_data_dims, sizeof(output_data_dims)));
  371. yolo->nBlobSz = output_blobs_tensor.size();
  372. yolo->pOutTensor.nTensor = (ax_nna_tensor_t*)AX_MALLOC(sizeof(ax_nna_tensor_t)*output_blobs_tensor.size());
  373. yolo->pOutTensor.nTensorNum = output_blobs_tensor.size();
  374. for (vx_size i = 0; i < output_blobs_tensor.size(); ++i) {
  375. vx_size output_layer_data_dims[4];
  376. vx_tensor layer_output = output_blobs_tensor[i];
  377. ERROR_CHECK_STATUS(vxQueryTensor(layer_output, VX_TENSOR_DIMS, output_layer_data_dims, sizeof(output_layer_data_dims)));
  378. yolo->pOutTensor.nTensor[i].w = output_layer_data_dims[0];
  379. yolo->pOutTensor.nTensor[i].h = output_layer_data_dims[1];
  380. yolo->pOutTensor.nTensor[i].c = output_layer_data_dims[2];
  381. yolo->pOutTensor.nTensor[i].n = output_layer_data_dims[3];
  382. yolo->strides[i] = int(yolo->net_w / yolo->pOutTensor.nTensor[i].w);
  383. vx_size fl;
  384. ERROR_CHECK_STATUS(vxQueryTensor(layer_output, VX_TENSOR_FIXED_POINT_POSITION, &fl, sizeof(fl)));
  385. yolo->pOutTensor.nTensor[i].fl = fl;
  386. VX_LOG("%s Line:%d %dth layer output_data_dims:[%d,%d,%d,%d] fl:%d strides:[%d]\n", __FUNCTION__, __LINE__, i + 1, yolo->pOutTensor.nTensor[i].w, yolo->pOutTensor.nTensor[i].h,
  387. yolo->pOutTensor.nTensor[i].c, yolo->pOutTensor.nTensor[i].n, yolo->pOutTensor.nTensor[i].fl, yolo->strides[i]);
  388. }
  389. return yolo;
  390. }
  391. static std::vector<Box> post_process(void *hdl, std::vector<std::string>& output_blobs_name,
  392. std::vector<vx_tensor>& output_blobs_tensor, float conf_thresh, float iou_thresh)
  393. {
  394. nna_yolo_private_t *yolo = (nna_yolo_private_t *)hdl;
  395. std::map<std::string, int> output_blobs_name_to_index;
  396. for(size_t i = 0;i < output_blobs_name.size(); ++i) {
  397. output_blobs_name_to_index[output_blobs_name[i]] = i;
  398. // VX_LOG("%s output_blobs_name_to_index[%s]=%d\n", __FUNCTION__, output_blobs_name[i].c_str(), i);
  399. }
  400. unsigned int image_size = yolo->net_h;
  401. unsigned int *strides = yolo->strides;
  402. unsigned int class_num = yolo->label.size();
  403. std::vector<Box> proposals;
  404. for(size_t i = 0;i < yolo->yolo_outputs_name_g.size(); ++i){
  405. int layer_index = output_blobs_name_to_index[yolo->yolo_outputs_name_g[i]];
  406. vx_tensor layer_output = output_blobs_tensor[layer_index];
  407. vx_int8* layer_output_ptr;
  408. vx_size layer_output_dims[4]; // w,h,c,n
  409. vx_enum data_type;
  410. ERROR_CHECK_STATUS(vxQueryTensor(layer_output, VX_TENSOR_MEMORY_VIRT_ADDR, &layer_output_ptr, sizeof(layer_output_ptr)));
  411. ERROR_CHECK_STATUS(vxQueryTensor(layer_output, VX_TENSOR_DIMS, &layer_output_dims, sizeof(layer_output_dims)));
  412. ERROR_CHECK_STATUS(vxQueryTensor(layer_output, VX_TENSOR_DATA_TYPE, &data_type, sizeof(data_type)));
  413. // VX_LOG("%s Line:%d %s ptr:%d dim:[%d,%d,%d,%d] image_size:%d class_num:%d strides:%d dtype:%d\n", __FUNCTION__, __LINE__, yolo->yolo_outputs_name_g[layer_index].c_str(), layer_output_ptr, layer_output_dims[0]
  414. // , layer_output_dims[1], layer_output_dims[2], layer_output_dims[3], image_size, class_num, strides[layer_index], data_type);
  415. int status;
  416. if (yolo->model_type == "yolov5"){
  417. status = yolov5_layer(proposals, layer_output_ptr, layer_output_dims, image_size, strides[layer_index], yolo->pOutTensor.nTensor[layer_index].fl, data_type, yolo->anchors_g + i * 6, class_num, conf_thresh);
  418. } else if (yolo->model_type == "yolov8"){
  419. status = yolov8_layer(proposals, layer_output_ptr, layer_output_dims, image_size, strides[layer_index], yolo->pOutTensor.nTensor[layer_index].fl, data_type, class_num, conf_thresh);
  420. }
  421. if (status != VX_SUCCESS) {
  422. VX_LOG("%s Line:%d %dth layer yolov_layer error.\n", __FUNCTION__, __LINE__, i + 1);
  423. }
  424. }
  425. std::vector<Box> boxes;
  426. yolo_nms(proposals, iou_thresh);
  427. return proposals;
  428. }
  429. int nna_custom_det_process(void *hdl, ezax_img_t *pImgIn, ezax_boxes_t *yolo_det_out, float conf_thresh, float iou_thresh)
  430. {
  431. VX_LOG("%s Line:%d conf_thresh:%f iou_thresh:%f\n", __FUNCTION__, __LINE__, conf_thresh, iou_thresh);
  432. nna_yolo_private_t *yolo = (nna_yolo_private_t *)hdl;
  433. input_image input_img;
  434. std::vector<std::string> input_blobs_name;
  435. std::vector<vx_tensor> input_blobs_tensor;
  436. std::vector<std::string> output_blobs_name;
  437. std::vector<vx_tensor> output_blobs_tensor;
  438. std::vector<Box> boxes;
  439. vx_status status;
  440. vx_size dst_virt_addr = vxGetDeviceMemoryVirtualAddress(yolo->dst_handle);
  441. vx_size dst_phy_addr = vxGetDeviceMemoryPhysicalAddress(yolo->dst_handle);
  442. vx_size dst_img_size = vxGetDeviceMemorySize(yolo->dst_handle);
  443. vx_size dst_phy_addr_uv = dst_phy_addr + yolo->net_w * yolo->net_h;
  444. img_cvt_param param;
  445. param.input_fmt = (img_fmt)pImgIn->img_handle.fmt;
  446. param.input_width = pImgIn->img_handle.w;
  447. param.input_height = pImgIn->img_handle.h;
  448. param.input_crop_x = 0;
  449. param.input_crop_y = 0;
  450. param.input_crop_w = pImgIn->img_handle.w;
  451. param.input_crop_h = pImgIn->img_handle.h;
  452. param.input_color_range = 0;
  453. // param.output_fmt = (img_fmt)pImgIn->img_handle.fmt;
  454. param.output_fmt = NNA_BGRA;
  455. param.output_width = yolo->net_w;
  456. param.output_height = yolo->net_h;
  457. param.output_crop_x = 0;
  458. param.output_crop_y = 0;
  459. param.output_crop_w = int(pImgIn->img_handle.w * yolo->scale_info);
  460. param.output_crop_h = int(pImgIn->img_handle.h * yolo->scale_info);
  461. status = ImageConvert(yolo->handle_context, &param, pImgIn->img_handle.pPhy, pImgIn->img_handle.pPhy_UV, yolo->dst_handle);
  462. vxInvalidateDeviceMemoryCache(yolo->handle_context, yolo->dst_handle);
  463. if (status != VX_SUCCESS) {
  464. VX_LOG("%s Line:%d ImageConvert error.\n", __FUNCTION__, __LINE__);
  465. return VX_FAILURE;
  466. }
  467. // else VX_LOG("%s Line:%d ImageConvert success.\n", __FUNCTION__, __LINE__);
  468. // VX_LOG("%s Line:%d dst_phy_addr:%d dst_img_size:%d img_handle.w:%d img_handle.h:%d pImgIn->img_handle.sz:%d img_handle.c:%d img_handle.stride:%d fmt:%d\n", __FUNCTION__, __LINE__, dst_phy_addr, dst_img_size, pImgIn->img_handle.w, pImgIn->img_handle.h, pImgIn->img_handle.sz, pImgIn->img_handle.c, pImgIn->img_handle.stride, param.input_fmt);
  469. GetNetInputBlob(yolo->graph, input_blobs_name, input_blobs_tensor);
  470. status = ImportNetInputDataFromMem(yolo->graph, input_blobs_name[0], dst_img_size, (vx_uint8*)dst_virt_addr, 0);
  471. if (status != VX_SUCCESS) {
  472. VX_LOG("%s Line:%d import data error.\n", __FUNCTION__, __LINE__);
  473. return VX_FAILURE;
  474. }
  475. // else VX_LOG("%s Line:%d import data success.\n", __FUNCTION__, __LINE__);
  476. input_blobs_name.clear();
  477. input_blobs_tensor.clear();
  478. ERROR_CHECK_STATUS(vxProcessGraph(yolo->graph));
  479. status = vxFinish(yolo->context);
  480. if (status != VX_SUCCESS) {
  481. VX_LOG("%s Line:%d vxFinish error.\n", __FUNCTION__, __LINE__);
  482. return VX_FAILURE;
  483. }
  484. // else VX_LOG("%s Line:%d vxFinish success.\n", __FUNCTION__, __LINE__);
  485. status = GetNetOutputBlob(yolo->graph, output_blobs_name, output_blobs_tensor);
  486. if (status != VX_SUCCESS) {
  487. VX_LOG("%s Line:%d GetNetOutputBlob error.\n", __FUNCTION__, __LINE__);
  488. return VX_FAILURE;
  489. }
  490. // else VX_LOG("%s Line:%d GetNetOutputBlob success.\n", __FUNCTION__, __LINE__);
  491. boxes = post_process(yolo, output_blobs_name, output_blobs_tensor, conf_thresh, iou_thresh);
  492. for (unsigned int i = 0; i < boxes.size(); i++) {
  493. Box box = boxes[i];
  494. ezax_rt_t *prect = &yolo_det_out->pRect[i];
  495. if (i >= MAX_CLASSIFICATION_DET_NUM) {
  496. break;
  497. }
  498. prect->x0 = (int)((box.xyxy[0]) / yolo->scale_info);
  499. prect->y0 = (int)((box.xyxy[1]) / yolo->scale_info);
  500. prect->x1 = (int)((box.xyxy[2]) / yolo->scale_info);
  501. prect->y1 = (int)((box.xyxy[3]) / yolo->scale_info);
  502. CLIPRETINA(prect->x0, 0, yolo->src_w);
  503. CLIPRETINA(prect->y0, 0, yolo->src_h);
  504. CLIPRETINA(prect->x1, 0, yolo->src_w);
  505. CLIPRETINA(prect->y1, 0, yolo->src_h);
  506. prect->s = box.score;
  507. prect->c = box.index;
  508. // VX_LOG("%s Line:%d final box x0:%d y0:%d x1:%d y1:%d score:%f c_index:%d\n", __FUNCTION__, __LINE__, prect->x0, prect->y0, prect->x1, prect->y1, prect->s, prect->c);
  509. }
  510. yolo_det_out->num = (boxes.size() > MAX_CLASSIFICATION_DET_NUM) ? MAX_CLASSIFICATION_DET_NUM : boxes.size();
  511. return VX_SUCCESS;
  512. }
  513. int nna_custom_det_close(void *hdl)
  514. {
  515. nna_yolo_private_t *yolo = (nna_yolo_private_t *)hdl;
  516. vxInvalidateDeviceMemoryCacheAll(yolo->context);
  517. vxInvalidateDeviceMemoryCacheAll(yolo->handle_context);
  518. ERROR_CHECK_STATUS(vxReleaseGraph(&yolo->graph));
  519. ERROR_CHECK_STATUS(vxUnloadKernels(yolo->context, "openvx-nn"));
  520. ERROR_CHECK_STATUS(vxReleaseContext(&yolo->context));
  521. ERROR_CHECK_STATUS(vxReleaseContext(&yolo->handle_context));
  522. if (yolo)
  523. free(yolo);
  524. VX_LOG("%s Line:%d done.\n", __FUNCTION__, __LINE__);
  525. return VX_SUCCESS;
  526. }
  527. int nna_custom_det_cmd(void *hdl, ezax_custom_det_cmd_t cmd, unsigned int args)
  528. {
  529. nna_yolo_private_t *yolo = (nna_yolo_private_t *)hdl;
  530. // switch (cmd) {
  531. // ezax_freq_t *pfreq = (ezax_freq_t*)args;
  532. // case EZAX_YOLO_RESET_AXFREQ:
  533. // {
  534. // if(pfreq) {
  535. // int dev_id = pfreq->id;
  536. // int freq = pfreq->freq;
  537. // int pre_freq, set_freq;;
  538. // vxGetDeviceFreq(yolo->context, "nu", pre_freq);
  539. // vxSetDeviceFreq(yolo->context, "nu", freq);
  540. // vxGetDeviceFreq(yolo->context, "nu", set_freq);
  541. // VX_LOG("%s RESET_AXFREQ pre_freq:%d set_freq:%d\n", __FUNCTION__, pre_freq, set_freq);
  542. // }
  543. // }
  544. // case EZAX_YOLO_GET_AXFREQ:
  545. // {
  546. // }
  547. // }
  548. return VX_SUCCESS;
  549. }