/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * License); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ #pragma once #include #include #include #include #include // #include "types.hpp" #include #include #include // #include namespace detection { typedef struct { int grid0; int grid1; int stride; } GridAndStride; // typedef struct // { // cv::Rect_ rect; // int label; // float prob; // } Object; typedef struct { cv::Rect_ rect; int label; float prob; // cv::Point2f landmark[5]; /* for yolov5-seg */ cv::Mat mask; std::vector mask_feat; std::vector kps_feat; } Object; static inline float sigmoid(float x) { return static_cast(1.f / (1.f + exp(-x))); } static float softmax(const float *src, float *dst, int length) { const float alpha = *std::max_element(src, src + length); float denominator = 0; float dis_sum = 0; for (int i = 0; i < length; ++i) { dst[i] = exp(src[i] - alpha); denominator += dst[i]; } for (int i = 0; i < length; ++i) { dst[i] /= denominator; dis_sum += i * dst[i]; } return dis_sum; } static inline float intersection_area(const Object &a, const Object &b) { cv::Rect_ inter = a.rect & b.rect; return inter.area(); } static void qsort_descent_inplace(std::vector &faceobjects, int left, int right) { int i = left; int j = right; float p = faceobjects[(left + right) / 2].prob; while (i <= j) { while (faceobjects[i].prob > p) i++; while (faceobjects[j].prob < p) j--; if (i <= j) { // swap std::swap(faceobjects[i], faceobjects[j]); i++; j--; } } // #pragma omp parallel sections { // #pragma omp section { if (left < j) qsort_descent_inplace(faceobjects, left, j); } // #pragma omp section { if (i < right) qsort_descent_inplace(faceobjects, i, right); } } } static void qsort_descent_inplace(std::vector &faceobjects) { if (faceobjects.empty()) return; qsort_descent_inplace(faceobjects, 0, faceobjects.size() - 1); } static void nms_sorted_bboxes(const std::vector &faceobjects, std::vector &picked, float nms_threshold) { picked.clear(); const int n = faceobjects.size(); std::vector areas(n); for (int i = 0; i < n; i++) { areas[i] = faceobjects[i].rect.area(); } for (int i = 0; i < n; i++) { const Object &a = faceobjects[i]; int keep = 1; for (int j = 0; j < (int)picked.size(); j++) { const Object &b = faceobjects[picked[j]]; // intersection over union float inter_area = intersection_area(a, b); float union_area = areas[i] + areas[picked[j]] - inter_area; // float IoU = inter_area / union_area if (inter_area / union_area > nms_threshold) keep = 0; } if (keep) picked.push_back(i); } } static void generate_grids_and_stride(const int target_w, const int target_h, std::vector &strides, std::vector &grid_strides) { for (auto stride : strides) { int num_grid_w = target_w / stride; int num_grid_h = target_h / stride; for (int g1 = 0; g1 < num_grid_h; g1++) { for (int g0 = 0; g0 < num_grid_w; g0++) { GridAndStride gs; gs.grid0 = g0; gs.grid1 = g1; gs.stride = stride; grid_strides.push_back(gs); } } } } void reverse_letterbox(std::vector &proposal, std::vector &objects, int letterbox_rows, int letterbox_cols, int src_rows, int src_cols) { float scale_letterbox; int resize_rows; int resize_cols; if ((letterbox_rows * 1.0 / src_rows) < (letterbox_cols * 1.0 / src_cols)) { scale_letterbox = letterbox_rows * 1.0 / src_rows; } else { scale_letterbox = letterbox_cols * 1.0 / src_cols; } resize_cols = int(scale_letterbox * src_cols); resize_rows = int(scale_letterbox * src_rows); int tmp_h = (letterbox_rows - resize_rows) / 2; int tmp_w = (letterbox_cols - resize_cols) / 2; float ratio_x = (float)src_rows / resize_rows; float ratio_y = (float)src_cols / resize_cols; int count = proposal.size(); objects.resize(count); for (int i = 0; i < count; i++) { objects[i] = proposal[i]; float x0 = (objects[i].rect.x); float y0 = (objects[i].rect.y); float x1 = (objects[i].rect.x + objects[i].rect.width); float y1 = (objects[i].rect.y + objects[i].rect.height); x0 = (x0 - tmp_w) * ratio_x; y0 = (y0 - tmp_h) * ratio_y; x1 = (x1 - tmp_w) * ratio_x; y1 = (y1 - tmp_h) * ratio_y; x0 = std::max(std::min(x0, (float)(src_cols - 1)), 0.f); y0 = std::max(std::min(y0, (float)(src_rows - 1)), 0.f); x1 = std::max(std::min(x1, (float)(src_cols - 1)), 0.f); y1 = std::max(std::min(y1, (float)(src_rows - 1)), 0.f); objects[i].rect.x = x0; objects[i].rect.y = y0; objects[i].rect.width = x1 - x0; objects[i].rect.height = y1 - y0; } } static void get_out_bbox_kps(std::vector &proposals, std::vector &objects, const float nms_threshold, int letterbox_rows, int letterbox_cols, int src_rows, int src_cols) { qsort_descent_inplace(proposals); std::vector picked; nms_sorted_bboxes(proposals, picked, nms_threshold); /* yolov8 draw the result */ float scale_letterbox; int resize_rows; int resize_cols; if ((letterbox_rows * 1.0 / src_rows) < (letterbox_cols * 1.0 / src_cols)) { scale_letterbox = letterbox_rows * 1.0 / src_rows; } else { scale_letterbox = letterbox_cols * 1.0 / src_cols; } resize_cols = int(scale_letterbox * src_cols); resize_rows = int(scale_letterbox * src_rows); int tmp_h = (letterbox_rows - resize_rows) / 2; int tmp_w = (letterbox_cols - resize_cols) / 2; float ratio_x = (float)src_rows / resize_rows; float ratio_y = (float)src_cols / resize_cols; int count = picked.size(); objects.resize(count); for (int i = 0; i < count; i++) { objects[i] = proposals[picked[i]]; float x0 = (objects[i].rect.x); float y0 = (objects[i].rect.y); float x1 = (objects[i].rect.x + objects[i].rect.width); float y1 = (objects[i].rect.y + objects[i].rect.height); x0 = (x0 - tmp_w) * ratio_x; y0 = (y0 - tmp_h) * ratio_y; x1 = (x1 - tmp_w) * ratio_x; y1 = (y1 - tmp_h) * ratio_y; x0 = std::max(std::min(x0, (float)(src_cols - 1)), 0.f); y0 = std::max(std::min(y0, (float)(src_rows - 1)), 0.f); x1 = std::max(std::min(x1, (float)(src_cols - 1)), 0.f); y1 = std::max(std::min(y1, (float)(src_rows - 1)), 0.f); objects[i].rect.x = x0; objects[i].rect.y = y0; objects[i].rect.width = x1 - x0; objects[i].rect.height = y1 - y0; for (int j = 0; j < int(objects[i].kps_feat.size() / 3); j++) { objects[i].kps_feat[j * 3] = std::max( std::min((objects[i].kps_feat[j * 3] - tmp_w) * ratio_x, (float)(src_cols - 1)), 0.f); objects[i].kps_feat[j * 3 + 1] = std::max( std::min((objects[i].kps_feat[j * 3 + 1] - tmp_h) * ratio_y, (float)(src_rows - 1)), 0.f); } } } void get_out_bbox(std::vector &proposals, std::vector &objects, const float nms_threshold, int letterbox_rows, int letterbox_cols, int src_rows, int src_cols) { qsort_descent_inplace(proposals); std::vector picked; nms_sorted_bboxes(proposals, picked, nms_threshold); /* yolov5 draw the result */ float scale_letterbox; int resize_rows; int resize_cols; if ((letterbox_rows * 1.0 / src_rows) < (letterbox_cols * 1.0 / src_cols)) { scale_letterbox = letterbox_rows * 1.0 / src_rows; } else { scale_letterbox = letterbox_cols * 1.0 / src_cols; } resize_cols = int(scale_letterbox * src_cols); resize_rows = int(scale_letterbox * src_rows); int tmp_h = (letterbox_rows - resize_rows) / 2; int tmp_w = (letterbox_cols - resize_cols) / 2; float ratio_x = (float)src_rows / resize_rows; float ratio_y = (float)src_cols / resize_cols; int count = picked.size(); objects.resize(count); for (int i = 0; i < count; i++) { objects[i] = proposals[picked[i]]; float x0 = (objects[i].rect.x); float y0 = (objects[i].rect.y); float x1 = (objects[i].rect.x + objects[i].rect.width); float y1 = (objects[i].rect.y + objects[i].rect.height); x0 = (x0 - tmp_w) * ratio_x; y0 = (y0 - tmp_h) * ratio_y; x1 = (x1 - tmp_w) * ratio_x; y1 = (y1 - tmp_h) * ratio_y; x0 = std::max(std::min(x0, (float)(src_cols - 1)), 0.f); y0 = std::max(std::min(y0, (float)(src_rows - 1)), 0.f); x1 = std::max(std::min(x1, (float)(src_cols - 1)), 0.f); y1 = std::max(std::min(y1, (float)(src_rows - 1)), 0.f); objects[i].rect.x = x0; objects[i].rect.y = y0; objects[i].rect.width = x1 - x0; objects[i].rect.height = y1 - y0; } } static void generate_proposals_yolov8_pose_native(int stride, const float *feat, const float *feat_kps, float prob_threshold, std::vector &objects, int letterbox_cols, int letterbox_rows, const int num_point = 17, int cls_num = 1) { int feat_w = letterbox_cols / stride; int feat_h = letterbox_rows / stride; int reg_max = 16; auto feat_ptr = feat; std::vector dis_after_sm(reg_max, 0.f); for (int h = 0; h <= feat_h - 1; h++) { for (int w = 0; w <= feat_w - 1; w++) { // process cls score int class_index = 0; float class_score = -FLT_MAX; for (int s = 0; s <= cls_num - 1; s++) { float score = feat_ptr[s + 4 * reg_max]; if (score > class_score) { class_index = s; class_score = score; } } float box_prob = sigmoid(class_score); if (box_prob > prob_threshold) { float pred_ltrb[4]; for (int k = 0; k < 4; k++) { float dis = softmax(feat_ptr + k * reg_max, dis_after_sm.data(), reg_max); pred_ltrb[k] = dis * stride; } float pb_cx = (w + 0.5f) * stride; float pb_cy = (h + 0.5f) * stride; float x0 = pb_cx - pred_ltrb[0]; float y0 = pb_cy - pred_ltrb[1]; float x1 = pb_cx + pred_ltrb[2]; float y1 = pb_cy + pred_ltrb[3]; x0 = std::max(std::min(x0, (float)(letterbox_cols - 1)), 0.f); y0 = std::max(std::min(y0, (float)(letterbox_rows - 1)), 0.f); x1 = std::max(std::min(x1, (float)(letterbox_cols - 1)), 0.f); y1 = std::max(std::min(y1, (float)(letterbox_rows - 1)), 0.f); Object obj; obj.rect.x = x0; obj.rect.y = y0; obj.rect.width = x1 - x0; obj.rect.height = y1 - y0; obj.label = class_index; obj.prob = box_prob; obj.kps_feat.clear(); for (int k = 0; k < num_point; k++) { float kps_x = (feat_kps[k * 3] * 2.f + w) * stride; float kps_y = (feat_kps[k * 3 + 1] * 2.f + h) * stride; float kps_s = sigmoid(feat_kps[k * 3 + 2]); obj.kps_feat.push_back(kps_x); obj.kps_feat.push_back(kps_y); obj.kps_feat.push_back(kps_s); } objects.push_back(obj); } feat_ptr += (cls_num + 4 * reg_max); feat_kps += 3 * num_point; } } } static void generate_proposals_yolov8_seg_native(int stride, const float *feat, const float *feat_seg, float prob_threshold, std::vector &objects, int letterbox_cols, int letterbox_rows, int cls_num = 80, int mask_proto_dim = 32) { int feat_w = letterbox_cols / stride; int feat_h = letterbox_rows / stride; int reg_max = 16; auto feat_ptr = feat; auto feat_seg_ptr = feat_seg; std::vector dis_after_sm(reg_max, 0.f); for (int h = 0; h <= feat_h - 1; h++) { for (int w = 0; w <= feat_w - 1; w++) { // process cls score int class_index = 0; float class_score = -FLT_MAX; for (int s = 0; s < cls_num; s++) { float score = feat_ptr[s + 4 * reg_max]; if (score > class_score) { class_index = s; class_score = score; } } float box_prob = sigmoid(class_score); if (box_prob > prob_threshold) { float pred_ltrb[4]; for (int k = 0; k < 4; k++) { float dis = softmax(feat_ptr + k * reg_max, dis_after_sm.data(), reg_max); pred_ltrb[k] = dis * stride; } float pb_cx = (w + 0.5f) * stride; float pb_cy = (h + 0.5f) * stride; float x0 = pb_cx - pred_ltrb[0]; float y0 = pb_cy - pred_ltrb[1]; float x1 = pb_cx + pred_ltrb[2]; float y1 = pb_cy + pred_ltrb[3]; x0 = std::max(std::min(x0, (float)(letterbox_cols - 1)), 0.f); y0 = std::max(std::min(y0, (float)(letterbox_rows - 1)), 0.f); x1 = std::max(std::min(x1, (float)(letterbox_cols - 1)), 0.f); y1 = std::max(std::min(y1, (float)(letterbox_rows - 1)), 0.f); Object obj; obj.rect.x = x0; obj.rect.y = y0; obj.rect.width = x1 - x0; obj.rect.height = y1 - y0; obj.label = class_index; obj.prob = box_prob; obj.mask_feat.resize(mask_proto_dim); memcpy(obj.mask_feat.data(), feat_seg_ptr, sizeof(float) * mask_proto_dim); // for (int k = 0; k < mask_proto_dim; k++) // { // obj.mask_feat[k] = feat_seg_ptr[k]; // } objects.push_back(obj); } feat_ptr += cls_num + 4 * reg_max; feat_seg_ptr += mask_proto_dim; } } } void get_out_bbox_mask(std::vector &proposals, std::vector &objects, const float *mask_proto, int mask_proto_dim, int mask_stride, const float nms_threshold, int letterbox_rows, int letterbox_cols, int src_rows, int src_cols) { qsort_descent_inplace(proposals); std::vector picked; nms_sorted_bboxes(proposals, picked, nms_threshold); /* yolov5 draw the result */ float scale_letterbox; int resize_rows; int resize_cols; if ((letterbox_rows * 1.0 / src_rows) < (letterbox_cols * 1.0 / src_cols)) { scale_letterbox = letterbox_rows * 1.0 / src_rows; } else { scale_letterbox = letterbox_cols * 1.0 / src_cols; } resize_cols = int(scale_letterbox * src_cols); resize_rows = int(scale_letterbox * src_rows); int tmp_h = (letterbox_rows - resize_rows) / 2; int tmp_w = (letterbox_cols - resize_cols) / 2; float ratio_x = (float)src_rows / resize_rows; float ratio_y = (float)src_cols / resize_cols; int mask_proto_h = int(letterbox_rows / mask_stride); int mask_proto_w = int(letterbox_cols / mask_stride); int count = picked.size(); objects.resize(count); for (int i = 0; i < count; i++) { objects[i] = proposals[picked[i]]; float x0 = (objects[i].rect.x); float y0 = (objects[i].rect.y); float x1 = (objects[i].rect.x + objects[i].rect.width); float y1 = (objects[i].rect.y + objects[i].rect.height); /* naive RoiAlign by opencv */ int hstart = std::floor(objects[i].rect.y / mask_stride); int hend = std::ceil(objects[i].rect.y / mask_stride + objects[i].rect.height / mask_stride); int wstart = std::floor(objects[i].rect.x / mask_stride); int wend = std::ceil(objects[i].rect.x / mask_stride + objects[i].rect.width / mask_stride); hstart = std::min(std::max(hstart, 0), mask_proto_h); wstart = std::min(std::max(wstart, 0), mask_proto_w); hend = std::min(std::max(hend, 0), mask_proto_h); wend = std::min(std::max(wend, 0), mask_proto_w); int mask_w = wend - wstart; int mask_h = hend - hstart; cv::Mat mask = cv::Mat(mask_h, mask_w, CV_32FC1); if (mask_w > 0 && mask_h > 0) { std::vector roi_ranges; roi_ranges.push_back(cv::Range(0, 1)); roi_ranges.push_back(cv::Range::all()); roi_ranges.push_back(cv::Range(hstart, hend)); roi_ranges.push_back(cv::Range(wstart, wend)); cv::Mat mask_protos = cv::Mat(mask_proto_dim, mask_proto_h * mask_proto_w, CV_32FC1, (float *)mask_proto); int sz[] = {1, mask_proto_dim, mask_proto_h, mask_proto_w}; cv::Mat mask_protos_reshape = mask_protos.reshape(1, 4, sz); cv::Mat protos = mask_protos_reshape(roi_ranges).clone().reshape(0, {mask_proto_dim, mask_w * mask_h}); cv::Mat mask_proposals = cv::Mat(1, mask_proto_dim, CV_32FC1, (float *)objects[i].mask_feat.data()); cv::Mat masks_feature = (mask_proposals * protos); /* sigmoid */ cv::exp(-masks_feature.reshape(1, {mask_h, mask_w}), mask); mask = 1.0 / (1.0 + mask); } x0 = (x0 - tmp_w) * ratio_x; y0 = (y0 - tmp_h) * ratio_y; x1 = (x1 - tmp_w) * ratio_x; y1 = (y1 - tmp_h) * ratio_y; x0 = std::max(std::min(x0, (float)(src_cols - 1)), 0.f); y0 = std::max(std::min(y0, (float)(src_rows - 1)), 0.f); x1 = std::max(std::min(x1, (float)(src_cols - 1)), 0.f); y1 = std::max(std::min(y1, (float)(src_rows - 1)), 0.f); objects[i].rect.x = x0; objects[i].rect.y = y0; objects[i].rect.width = x1 - x0; objects[i].rect.height = y1 - y0; cv::resize(mask, mask, cv::Size((int)objects[i].rect.width, (int)objects[i].rect.height)); objects[i].mask = mask > 0.5; // cv::Mat mask_8bit; // objects[i].mask.convertTo(mask_8bit, CV_8U, 255.0); // // 获取当前时间戳 // auto timestamp = std::chrono::system_clock::now(); // auto seconds = std::chrono::duration_cast(timestamp.time_since_epoch()).count(); // // 使用时间戳作为文件名 // std::string filename = "output_" + std::to_string(seconds)+"_"+std::to_string(objects[i].label)+ ".jpg"; // // 保存为 JPEG 文件 // cv::imwrite(filename, mask_8bit); } } } // namespace detection