123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618 |
- /*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * License); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
- #pragma once
- #include <algorithm>
- #include <cmath>
- #include <cstdint>
- #include <string>
- #include <vector>
- // #include "types.hpp"
- #include <opencv2/opencv.hpp>
- #include <opencv2/imgproc.hpp>
- #include <opencv2/highgui.hpp>
- // #include <chrono>
- namespace detection
- {
- typedef struct
- {
- int grid0;
- int grid1;
- int stride;
- } GridAndStride;
- // typedef struct
- // {
- // cv::Rect_<float> rect;
- // int label;
- // float prob;
- // } Object;
- typedef struct
- {
- cv::Rect_<float> rect;
- int label;
- float prob;
- // cv::Point2f landmark[5];
- /* for yolov5-seg */
- cv::Mat mask;
- std::vector<float> mask_feat;
- std::vector<float> kps_feat;
- } Object;
- static inline float sigmoid(float x)
- {
- return static_cast<float>(1.f / (1.f + exp(-x)));
- }
- static float softmax(const float *src, float *dst, int length)
- {
- const float alpha = *std::max_element(src, src + length);
- float denominator = 0;
- float dis_sum = 0;
- for (int i = 0; i < length; ++i)
- {
- dst[i] = exp(src[i] - alpha);
- denominator += dst[i];
- }
- for (int i = 0; i < length; ++i)
- {
- dst[i] /= denominator;
- dis_sum += i * dst[i];
- }
- return dis_sum;
- }
- static inline float intersection_area(const Object &a, const Object &b)
- {
- cv::Rect_<float> inter = a.rect & b.rect;
- return inter.area();
- }
- static void qsort_descent_inplace(std::vector<Object> &faceobjects, int left, int right)
- {
- int i = left;
- int j = right;
- float p = faceobjects[(left + right) / 2].prob;
- while (i <= j)
- {
- while (faceobjects[i].prob > p)
- i++;
- while (faceobjects[j].prob < p)
- j--;
- if (i <= j)
- {
- // swap
- std::swap(faceobjects[i], faceobjects[j]);
- i++;
- j--;
- }
- }
- // #pragma omp parallel sections
- {
- // #pragma omp section
- {
- if (left < j)
- qsort_descent_inplace(faceobjects, left, j);
- }
- // #pragma omp section
- {
- if (i < right)
- qsort_descent_inplace(faceobjects, i, right);
- }
- }
- }
- static void qsort_descent_inplace(std::vector<Object> &faceobjects)
- {
- if (faceobjects.empty())
- return;
- qsort_descent_inplace(faceobjects, 0, faceobjects.size() - 1);
- }
- static void nms_sorted_bboxes(const std::vector<Object> &faceobjects, std::vector<int> &picked, float nms_threshold)
- {
- picked.clear();
- const int n = faceobjects.size();
- std::vector<float> areas(n);
- for (int i = 0; i < n; i++)
- {
- areas[i] = faceobjects[i].rect.area();
- }
- for (int i = 0; i < n; i++)
- {
- const Object &a = faceobjects[i];
- int keep = 1;
- for (int j = 0; j < (int)picked.size(); j++)
- {
- const Object &b = faceobjects[picked[j]];
- // intersection over union
- float inter_area = intersection_area(a, b);
- float union_area = areas[i] + areas[picked[j]] - inter_area;
- // float IoU = inter_area / union_area
- if (inter_area / union_area > nms_threshold)
- keep = 0;
- }
- if (keep)
- picked.push_back(i);
- }
- }
- static void generate_grids_and_stride(const int target_w, const int target_h, std::vector<int> &strides,
- std::vector<GridAndStride> &grid_strides)
- {
- for (auto stride : strides)
- {
- int num_grid_w = target_w / stride;
- int num_grid_h = target_h / stride;
- for (int g1 = 0; g1 < num_grid_h; g1++)
- {
- for (int g0 = 0; g0 < num_grid_w; g0++)
- {
- GridAndStride gs;
- gs.grid0 = g0;
- gs.grid1 = g1;
- gs.stride = stride;
- grid_strides.push_back(gs);
- }
- }
- }
- }
- void reverse_letterbox(std::vector<Object> &proposal, std::vector<Object> &objects, int letterbox_rows, int letterbox_cols, int src_rows,
- int src_cols)
- {
- float scale_letterbox;
- int resize_rows;
- int resize_cols;
- if ((letterbox_rows * 1.0 / src_rows) < (letterbox_cols * 1.0 / src_cols))
- {
- scale_letterbox = letterbox_rows * 1.0 / src_rows;
- }
- else
- {
- scale_letterbox = letterbox_cols * 1.0 / src_cols;
- }
- resize_cols = int(scale_letterbox * src_cols);
- resize_rows = int(scale_letterbox * src_rows);
- int tmp_h = (letterbox_rows - resize_rows) / 2;
- int tmp_w = (letterbox_cols - resize_cols) / 2;
- float ratio_x = (float)src_rows / resize_rows;
- float ratio_y = (float)src_cols / resize_cols;
- int count = proposal.size();
- objects.resize(count);
- for (int i = 0; i < count; i++)
- {
- objects[i] = proposal[i];
- float x0 = (objects[i].rect.x);
- float y0 = (objects[i].rect.y);
- float x1 = (objects[i].rect.x + objects[i].rect.width);
- float y1 = (objects[i].rect.y + objects[i].rect.height);
- x0 = (x0 - tmp_w) * ratio_x;
- y0 = (y0 - tmp_h) * ratio_y;
- x1 = (x1 - tmp_w) * ratio_x;
- y1 = (y1 - tmp_h) * ratio_y;
- x0 = std::max(std::min(x0, (float)(src_cols - 1)), 0.f);
- y0 = std::max(std::min(y0, (float)(src_rows - 1)), 0.f);
- x1 = std::max(std::min(x1, (float)(src_cols - 1)), 0.f);
- y1 = std::max(std::min(y1, (float)(src_rows - 1)), 0.f);
- objects[i].rect.x = x0;
- objects[i].rect.y = y0;
- objects[i].rect.width = x1 - x0;
- objects[i].rect.height = y1 - y0;
- }
- }
- static void get_out_bbox_kps(std::vector<Object> &proposals, std::vector<Object> &objects, const float nms_threshold, int letterbox_rows, int letterbox_cols, int src_rows, int src_cols)
- {
- qsort_descent_inplace(proposals);
- std::vector<int> picked;
- nms_sorted_bboxes(proposals, picked, nms_threshold);
- /* yolov8 draw the result */
- float scale_letterbox;
- int resize_rows;
- int resize_cols;
- if ((letterbox_rows * 1.0 / src_rows) < (letterbox_cols * 1.0 / src_cols))
- {
- scale_letterbox = letterbox_rows * 1.0 / src_rows;
- }
- else
- {
- scale_letterbox = letterbox_cols * 1.0 / src_cols;
- }
- resize_cols = int(scale_letterbox * src_cols);
- resize_rows = int(scale_letterbox * src_rows);
- int tmp_h = (letterbox_rows - resize_rows) / 2;
- int tmp_w = (letterbox_cols - resize_cols) / 2;
- float ratio_x = (float)src_rows / resize_rows;
- float ratio_y = (float)src_cols / resize_cols;
- int count = picked.size();
- objects.resize(count);
- for (int i = 0; i < count; i++)
- {
- objects[i] = proposals[picked[i]];
- float x0 = (objects[i].rect.x);
- float y0 = (objects[i].rect.y);
- float x1 = (objects[i].rect.x + objects[i].rect.width);
- float y1 = (objects[i].rect.y + objects[i].rect.height);
- x0 = (x0 - tmp_w) * ratio_x;
- y0 = (y0 - tmp_h) * ratio_y;
- x1 = (x1 - tmp_w) * ratio_x;
- y1 = (y1 - tmp_h) * ratio_y;
- x0 = std::max(std::min(x0, (float)(src_cols - 1)), 0.f);
- y0 = std::max(std::min(y0, (float)(src_rows - 1)), 0.f);
- x1 = std::max(std::min(x1, (float)(src_cols - 1)), 0.f);
- y1 = std::max(std::min(y1, (float)(src_rows - 1)), 0.f);
- objects[i].rect.x = x0;
- objects[i].rect.y = y0;
- objects[i].rect.width = x1 - x0;
- objects[i].rect.height = y1 - y0;
- for (int j = 0; j < int(objects[i].kps_feat.size() / 3); j++)
- {
- objects[i].kps_feat[j * 3] = std::max(
- std::min((objects[i].kps_feat[j * 3] - tmp_w) * ratio_x, (float)(src_cols - 1)), 0.f);
- objects[i].kps_feat[j * 3 + 1] = std::max(
- std::min((objects[i].kps_feat[j * 3 + 1] - tmp_h) * ratio_y, (float)(src_rows - 1)), 0.f);
- }
- }
- }
- void get_out_bbox(std::vector<Object> &proposals, std::vector<Object> &objects, const float nms_threshold, int letterbox_rows,
- int letterbox_cols, int src_rows, int src_cols)
- {
- qsort_descent_inplace(proposals);
- std::vector<int> picked;
- nms_sorted_bboxes(proposals, picked, nms_threshold);
- /* yolov5 draw the result */
- float scale_letterbox;
- int resize_rows;
- int resize_cols;
- if ((letterbox_rows * 1.0 / src_rows) < (letterbox_cols * 1.0 / src_cols))
- {
- scale_letterbox = letterbox_rows * 1.0 / src_rows;
- }
- else
- {
- scale_letterbox = letterbox_cols * 1.0 / src_cols;
- }
- resize_cols = int(scale_letterbox * src_cols);
- resize_rows = int(scale_letterbox * src_rows);
- int tmp_h = (letterbox_rows - resize_rows) / 2;
- int tmp_w = (letterbox_cols - resize_cols) / 2;
- float ratio_x = (float)src_rows / resize_rows;
- float ratio_y = (float)src_cols / resize_cols;
- int count = picked.size();
- objects.resize(count);
- for (int i = 0; i < count; i++)
- {
- objects[i] = proposals[picked[i]];
- float x0 = (objects[i].rect.x);
- float y0 = (objects[i].rect.y);
- float x1 = (objects[i].rect.x + objects[i].rect.width);
- float y1 = (objects[i].rect.y + objects[i].rect.height);
- x0 = (x0 - tmp_w) * ratio_x;
- y0 = (y0 - tmp_h) * ratio_y;
- x1 = (x1 - tmp_w) * ratio_x;
- y1 = (y1 - tmp_h) * ratio_y;
- x0 = std::max(std::min(x0, (float)(src_cols - 1)), 0.f);
- y0 = std::max(std::min(y0, (float)(src_rows - 1)), 0.f);
- x1 = std::max(std::min(x1, (float)(src_cols - 1)), 0.f);
- y1 = std::max(std::min(y1, (float)(src_rows - 1)), 0.f);
- objects[i].rect.x = x0;
- objects[i].rect.y = y0;
- objects[i].rect.width = x1 - x0;
- objects[i].rect.height = y1 - y0;
- }
- }
- static void generate_proposals_yolov8_pose_native(int stride, const float *feat, const float *feat_kps, float prob_threshold, std::vector<Object> &objects,
- int letterbox_cols, int letterbox_rows, const int num_point = 17, int cls_num = 1)
- {
- int feat_w = letterbox_cols / stride;
- int feat_h = letterbox_rows / stride;
- int reg_max = 16;
- auto feat_ptr = feat;
- std::vector<float> dis_after_sm(reg_max, 0.f);
- for (int h = 0; h <= feat_h - 1; h++)
- {
- for (int w = 0; w <= feat_w - 1; w++)
- {
- // process cls score
- int class_index = 0;
- float class_score = -FLT_MAX;
- for (int s = 0; s <= cls_num - 1; s++)
- {
- float score = feat_ptr[s + 4 * reg_max];
- if (score > class_score)
- {
- class_index = s;
- class_score = score;
- }
- }
- float box_prob = sigmoid(class_score);
- if (box_prob > prob_threshold)
- {
- float pred_ltrb[4];
- for (int k = 0; k < 4; k++)
- {
- float dis = softmax(feat_ptr + k * reg_max, dis_after_sm.data(), reg_max);
- pred_ltrb[k] = dis * stride;
- }
- float pb_cx = (w + 0.5f) * stride;
- float pb_cy = (h + 0.5f) * stride;
- float x0 = pb_cx - pred_ltrb[0];
- float y0 = pb_cy - pred_ltrb[1];
- float x1 = pb_cx + pred_ltrb[2];
- float y1 = pb_cy + pred_ltrb[3];
- x0 = std::max(std::min(x0, (float)(letterbox_cols - 1)), 0.f);
- y0 = std::max(std::min(y0, (float)(letterbox_rows - 1)), 0.f);
- x1 = std::max(std::min(x1, (float)(letterbox_cols - 1)), 0.f);
- y1 = std::max(std::min(y1, (float)(letterbox_rows - 1)), 0.f);
- Object obj;
- obj.rect.x = x0;
- obj.rect.y = y0;
- obj.rect.width = x1 - x0;
- obj.rect.height = y1 - y0;
- obj.label = class_index;
- obj.prob = box_prob;
- obj.kps_feat.clear();
- for (int k = 0; k < num_point; k++)
- {
- float kps_x = (feat_kps[k * 3] * 2.f + w) * stride;
- float kps_y = (feat_kps[k * 3 + 1] * 2.f + h) * stride;
- float kps_s = sigmoid(feat_kps[k * 3 + 2]);
- obj.kps_feat.push_back(kps_x);
- obj.kps_feat.push_back(kps_y);
- obj.kps_feat.push_back(kps_s);
- }
- objects.push_back(obj);
- }
- feat_ptr += (cls_num + 4 * reg_max);
- feat_kps += 3 * num_point;
- }
- }
- }
- static void generate_proposals_yolov8_seg_native(int stride, const float *feat, const float *feat_seg, float prob_threshold, std::vector<Object> &objects,
- int letterbox_cols, int letterbox_rows, int cls_num = 80, int mask_proto_dim = 32)
- {
- int feat_w = letterbox_cols / stride;
- int feat_h = letterbox_rows / stride;
- int reg_max = 16;
- auto feat_ptr = feat;
- auto feat_seg_ptr = feat_seg;
- std::vector<float> dis_after_sm(reg_max, 0.f);
- for (int h = 0; h <= feat_h - 1; h++)
- {
- for (int w = 0; w <= feat_w - 1; w++)
- {
- // process cls score
- int class_index = 0;
- float class_score = -FLT_MAX;
- for (int s = 0; s < cls_num; s++)
- {
- float score = feat_ptr[s + 4 * reg_max];
- if (score > class_score)
- {
- class_index = s;
- class_score = score;
- }
- }
- float box_prob = sigmoid(class_score);
- if (box_prob > prob_threshold)
- {
- float pred_ltrb[4];
- for (int k = 0; k < 4; k++)
- {
- float dis = softmax(feat_ptr + k * reg_max, dis_after_sm.data(), reg_max);
- pred_ltrb[k] = dis * stride;
- }
- float pb_cx = (w + 0.5f) * stride;
- float pb_cy = (h + 0.5f) * stride;
- float x0 = pb_cx - pred_ltrb[0];
- float y0 = pb_cy - pred_ltrb[1];
- float x1 = pb_cx + pred_ltrb[2];
- float y1 = pb_cy + pred_ltrb[3];
- x0 = std::max(std::min(x0, (float)(letterbox_cols - 1)), 0.f);
- y0 = std::max(std::min(y0, (float)(letterbox_rows - 1)), 0.f);
- x1 = std::max(std::min(x1, (float)(letterbox_cols - 1)), 0.f);
- y1 = std::max(std::min(y1, (float)(letterbox_rows - 1)), 0.f);
- Object obj;
- obj.rect.x = x0;
- obj.rect.y = y0;
- obj.rect.width = x1 - x0;
- obj.rect.height = y1 - y0;
- obj.label = class_index;
- obj.prob = box_prob;
- obj.mask_feat.resize(mask_proto_dim);
- memcpy(obj.mask_feat.data(), feat_seg_ptr, sizeof(float) * mask_proto_dim);
- // for (int k = 0; k < mask_proto_dim; k++)
- // {
- // obj.mask_feat[k] = feat_seg_ptr[k];
- // }
- objects.push_back(obj);
- }
- feat_ptr += cls_num + 4 * reg_max;
- feat_seg_ptr += mask_proto_dim;
- }
- }
- }
- void get_out_bbox_mask(std::vector<Object> &proposals, std::vector<Object> &objects, const float *mask_proto, int mask_proto_dim, int mask_stride, const float nms_threshold, int letterbox_rows, int letterbox_cols, int src_rows, int src_cols)
- {
- qsort_descent_inplace(proposals);
- std::vector<int> picked;
- nms_sorted_bboxes(proposals, picked, nms_threshold);
- /* yolov5 draw the result */
- float scale_letterbox;
- int resize_rows;
- int resize_cols;
- if ((letterbox_rows * 1.0 / src_rows) < (letterbox_cols * 1.0 / src_cols))
- {
- scale_letterbox = letterbox_rows * 1.0 / src_rows;
- }
- else
- {
- scale_letterbox = letterbox_cols * 1.0 / src_cols;
- }
- resize_cols = int(scale_letterbox * src_cols);
- resize_rows = int(scale_letterbox * src_rows);
- int tmp_h = (letterbox_rows - resize_rows) / 2;
- int tmp_w = (letterbox_cols - resize_cols) / 2;
- float ratio_x = (float)src_rows / resize_rows;
- float ratio_y = (float)src_cols / resize_cols;
- int mask_proto_h = int(letterbox_rows / mask_stride);
- int mask_proto_w = int(letterbox_cols / mask_stride);
- int count = picked.size();
- objects.resize(count);
- for (int i = 0; i < count; i++)
- {
- objects[i] = proposals[picked[i]];
- float x0 = (objects[i].rect.x);
- float y0 = (objects[i].rect.y);
- float x1 = (objects[i].rect.x + objects[i].rect.width);
- float y1 = (objects[i].rect.y + objects[i].rect.height);
- /* naive RoiAlign by opencv */
- int hstart = std::floor(objects[i].rect.y / mask_stride);
- int hend = std::ceil(objects[i].rect.y / mask_stride + objects[i].rect.height / mask_stride);
- int wstart = std::floor(objects[i].rect.x / mask_stride);
- int wend = std::ceil(objects[i].rect.x / mask_stride + objects[i].rect.width / mask_stride);
- hstart = std::min(std::max(hstart, 0), mask_proto_h);
- wstart = std::min(std::max(wstart, 0), mask_proto_w);
- hend = std::min(std::max(hend, 0), mask_proto_h);
- wend = std::min(std::max(wend, 0), mask_proto_w);
- int mask_w = wend - wstart;
- int mask_h = hend - hstart;
- cv::Mat mask = cv::Mat(mask_h, mask_w, CV_32FC1);
- if (mask_w > 0 && mask_h > 0)
- {
- std::vector<cv::Range> roi_ranges;
- roi_ranges.push_back(cv::Range(0, 1));
- roi_ranges.push_back(cv::Range::all());
- roi_ranges.push_back(cv::Range(hstart, hend));
- roi_ranges.push_back(cv::Range(wstart, wend));
- cv::Mat mask_protos = cv::Mat(mask_proto_dim, mask_proto_h * mask_proto_w, CV_32FC1, (float *)mask_proto);
- int sz[] = {1, mask_proto_dim, mask_proto_h, mask_proto_w};
- cv::Mat mask_protos_reshape = mask_protos.reshape(1, 4, sz);
- cv::Mat protos = mask_protos_reshape(roi_ranges).clone().reshape(0, {mask_proto_dim, mask_w * mask_h});
- cv::Mat mask_proposals = cv::Mat(1, mask_proto_dim, CV_32FC1, (float *)objects[i].mask_feat.data());
- cv::Mat masks_feature = (mask_proposals * protos);
- /* sigmoid */
- cv::exp(-masks_feature.reshape(1, {mask_h, mask_w}), mask);
- mask = 1.0 / (1.0 + mask);
- }
- x0 = (x0 - tmp_w) * ratio_x;
- y0 = (y0 - tmp_h) * ratio_y;
- x1 = (x1 - tmp_w) * ratio_x;
- y1 = (y1 - tmp_h) * ratio_y;
- x0 = std::max(std::min(x0, (float)(src_cols - 1)), 0.f);
- y0 = std::max(std::min(y0, (float)(src_rows - 1)), 0.f);
- x1 = std::max(std::min(x1, (float)(src_cols - 1)), 0.f);
- y1 = std::max(std::min(y1, (float)(src_rows - 1)), 0.f);
- objects[i].rect.x = x0;
- objects[i].rect.y = y0;
- objects[i].rect.width = x1 - x0;
- objects[i].rect.height = y1 - y0;
- cv::resize(mask, mask, cv::Size((int)objects[i].rect.width, (int)objects[i].rect.height));
- objects[i].mask = mask > 0.5;
- // cv::Mat mask_8bit;
- // objects[i].mask.convertTo(mask_8bit, CV_8U, 255.0);
- // // 获取当前时间戳
- // auto timestamp = std::chrono::system_clock::now();
- // auto seconds = std::chrono::duration_cast<std::chrono::seconds>(timestamp.time_since_epoch()).count();
- // // 使用时间戳作为文件名
- // std::string filename = "output_" + std::to_string(seconds)+"_"+std::to_string(objects[i].label)+ ".jpg";
- // // 保存为 JPEG 文件
- // cv::imwrite(filename, mask_8bit);
- }
- }
- } // namespace detection
|