huangmeng
/
asj_ai


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618
							/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * License); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * AS IS BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

#pragma once

#include <algorithm>
#include <cmath>
#include <cstdint>
#include <string>
#include <vector>
// #include "types.hpp"
#include <opencv2/opencv.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>

// #include <chrono>

namespace detection
{
    typedef struct
    {
        int grid0;
        int grid1;
        int stride;
    } GridAndStride;

    // typedef struct
    // {
    //     cv::Rect_<float> rect;
    //     int label;
    //     float prob;
    // } Object;

    typedef struct
    {
        cv::Rect_<float> rect;
        int label;
        float prob;
        // cv::Point2f landmark[5];
        /* for yolov5-seg */
        cv::Mat mask;
        std::vector<float> mask_feat;
        std::vector<float> kps_feat;
    } Object;

    static inline float sigmoid(float x)
    {
        return static_cast<float>(1.f / (1.f + exp(-x)));
    }
    static float softmax(const float *src, float *dst, int length)
    {
        const float alpha = *std::max_element(src, src + length);
        float denominator = 0;
        float dis_sum = 0;
        for (int i = 0; i < length; ++i)
        {
            dst[i] = exp(src[i] - alpha);
            denominator += dst[i];
        }
        for (int i = 0; i < length; ++i)
        {
            dst[i] /= denominator;
            dis_sum += i * dst[i];
        }
        return dis_sum;
    }

    static inline float intersection_area(const Object &a, const Object &b)
    {
        cv::Rect_<float> inter = a.rect & b.rect;
        return inter.area();
    }

    static void qsort_descent_inplace(std::vector<Object> &faceobjects, int left, int right)
    {
        int i = left;
        int j = right;
        float p = faceobjects[(left + right) / 2].prob;

        while (i <= j)
        {
            while (faceobjects[i].prob > p)
                i++;

            while (faceobjects[j].prob < p)
                j--;

            if (i <= j)
            {
                // swap
                std::swap(faceobjects[i], faceobjects[j]);

                i++;
                j--;
            }
        }
        // #pragma omp parallel sections
        {
            // #pragma omp section
            {
                if (left < j)
                    qsort_descent_inplace(faceobjects, left, j);
            }
            // #pragma omp section
            {
                if (i < right)
                    qsort_descent_inplace(faceobjects, i, right);
            }
        }
    }

    static void qsort_descent_inplace(std::vector<Object> &faceobjects)
    {
        if (faceobjects.empty())
            return;

        qsort_descent_inplace(faceobjects, 0, faceobjects.size() - 1);
    }

    static void nms_sorted_bboxes(const std::vector<Object> &faceobjects, std::vector<int> &picked, float nms_threshold)
    {
        picked.clear();

        const int n = faceobjects.size();

        std::vector<float> areas(n);
        for (int i = 0; i < n; i++)
        {
            areas[i] = faceobjects[i].rect.area();
        }

        for (int i = 0; i < n; i++)
        {
            const Object &a = faceobjects[i];

            int keep = 1;
            for (int j = 0; j < (int)picked.size(); j++)
            {
                const Object &b = faceobjects[picked[j]];

                // intersection over union
                float inter_area = intersection_area(a, b);
                float union_area = areas[i] + areas[picked[j]] - inter_area;
                // float IoU = inter_area / union_area
                if (inter_area / union_area > nms_threshold)
                    keep = 0;
            }

            if (keep)
                picked.push_back(i);
        }
    }

    static void generate_grids_and_stride(const int target_w, const int target_h, std::vector<int> &strides,
                                          std::vector<GridAndStride> &grid_strides)
    {
        for (auto stride : strides)
        {
            int num_grid_w = target_w / stride;
            int num_grid_h = target_h / stride;
            for (int g1 = 0; g1 < num_grid_h; g1++)
            {
                for (int g0 = 0; g0 < num_grid_w; g0++)
                {
                    GridAndStride gs;
                    gs.grid0 = g0;
                    gs.grid1 = g1;
                    gs.stride = stride;
                    grid_strides.push_back(gs);
                }
            }
        }
    }

    void reverse_letterbox(std::vector<Object> &proposal, std::vector<Object> &objects, int letterbox_rows, int letterbox_cols, int src_rows,
                           int src_cols)
    {
        float scale_letterbox;
        int resize_rows;
        int resize_cols;
        if ((letterbox_rows * 1.0 / src_rows) < (letterbox_cols * 1.0 / src_cols))
        {
            scale_letterbox = letterbox_rows * 1.0 / src_rows;
        }
        else
        {
            scale_letterbox = letterbox_cols * 1.0 / src_cols;
        }
        resize_cols = int(scale_letterbox * src_cols);
        resize_rows = int(scale_letterbox * src_rows);

        int tmp_h = (letterbox_rows - resize_rows) / 2;
        int tmp_w = (letterbox_cols - resize_cols) / 2;

        float ratio_x = (float)src_rows / resize_rows;
        float ratio_y = (float)src_cols / resize_cols;

        int count = proposal.size();

        objects.resize(count);
        for (int i = 0; i < count; i++)
        {
            objects[i] = proposal[i];
            float x0 = (objects[i].rect.x);
            float y0 = (objects[i].rect.y);
            float x1 = (objects[i].rect.x + objects[i].rect.width);
            float y1 = (objects[i].rect.y + objects[i].rect.height);

            x0 = (x0 - tmp_w) * ratio_x;
            y0 = (y0 - tmp_h) * ratio_y;
            x1 = (x1 - tmp_w) * ratio_x;
            y1 = (y1 - tmp_h) * ratio_y;

            x0 = std::max(std::min(x0, (float)(src_cols - 1)), 0.f);
            y0 = std::max(std::min(y0, (float)(src_rows - 1)), 0.f);
            x1 = std::max(std::min(x1, (float)(src_cols - 1)), 0.f);
            y1 = std::max(std::min(y1, (float)(src_rows - 1)), 0.f);

            objects[i].rect.x = x0;
            objects[i].rect.y = y0;
            objects[i].rect.width = x1 - x0;
            objects[i].rect.height = y1 - y0;
        }
    }

    static void get_out_bbox_kps(std::vector<Object> &proposals, std::vector<Object> &objects, const float nms_threshold, int letterbox_rows, int letterbox_cols, int src_rows, int src_cols)
    {
        qsort_descent_inplace(proposals);
        std::vector<int> picked;
        nms_sorted_bboxes(proposals, picked, nms_threshold);

        /* yolov8 draw the result */
        float scale_letterbox;
        int resize_rows;
        int resize_cols;
        if ((letterbox_rows * 1.0 / src_rows) < (letterbox_cols * 1.0 / src_cols))
        {
            scale_letterbox = letterbox_rows * 1.0 / src_rows;
        }
        else
        {
            scale_letterbox = letterbox_cols * 1.0 / src_cols;
        }
        resize_cols = int(scale_letterbox * src_cols);
        resize_rows = int(scale_letterbox * src_rows);

        int tmp_h = (letterbox_rows - resize_rows) / 2;
        int tmp_w = (letterbox_cols - resize_cols) / 2;

        float ratio_x = (float)src_rows / resize_rows;
        float ratio_y = (float)src_cols / resize_cols;

        int count = picked.size();

        objects.resize(count);
        for (int i = 0; i < count; i++)
        {
            objects[i] = proposals[picked[i]];
            float x0 = (objects[i].rect.x);
            float y0 = (objects[i].rect.y);
            float x1 = (objects[i].rect.x + objects[i].rect.width);
            float y1 = (objects[i].rect.y + objects[i].rect.height);

            x0 = (x0 - tmp_w) * ratio_x;
            y0 = (y0 - tmp_h) * ratio_y;
            x1 = (x1 - tmp_w) * ratio_x;
            y1 = (y1 - tmp_h) * ratio_y;

            x0 = std::max(std::min(x0, (float)(src_cols - 1)), 0.f);
            y0 = std::max(std::min(y0, (float)(src_rows - 1)), 0.f);
            x1 = std::max(std::min(x1, (float)(src_cols - 1)), 0.f);
            y1 = std::max(std::min(y1, (float)(src_rows - 1)), 0.f);

            objects[i].rect.x = x0;
            objects[i].rect.y = y0;
            objects[i].rect.width = x1 - x0;
            objects[i].rect.height = y1 - y0;

            for (int j = 0; j < int(objects[i].kps_feat.size() / 3); j++)
            {
                objects[i].kps_feat[j * 3] = std::max(
                    std::min((objects[i].kps_feat[j * 3] - tmp_w) * ratio_x, (float)(src_cols - 1)), 0.f);
                objects[i].kps_feat[j * 3 + 1] = std::max(
                    std::min((objects[i].kps_feat[j * 3 + 1] - tmp_h) * ratio_y, (float)(src_rows - 1)), 0.f);
            }
        }
    }

    void get_out_bbox(std::vector<Object> &proposals, std::vector<Object> &objects, const float nms_threshold, int letterbox_rows,
                      int letterbox_cols, int src_rows, int src_cols)
    {
        qsort_descent_inplace(proposals);
        std::vector<int> picked;
        nms_sorted_bboxes(proposals, picked, nms_threshold);

        /* yolov5 draw the result */
        float scale_letterbox;
        int resize_rows;
        int resize_cols;
        if ((letterbox_rows * 1.0 / src_rows) < (letterbox_cols * 1.0 / src_cols))
        {
            scale_letterbox = letterbox_rows * 1.0 / src_rows;
        }
        else
        {
            scale_letterbox = letterbox_cols * 1.0 / src_cols;
        }
        resize_cols = int(scale_letterbox * src_cols);
        resize_rows = int(scale_letterbox * src_rows);

        int tmp_h = (letterbox_rows - resize_rows) / 2;
        int tmp_w = (letterbox_cols - resize_cols) / 2;

        float ratio_x = (float)src_rows / resize_rows;
        float ratio_y = (float)src_cols / resize_cols;

        int count = picked.size();

        objects.resize(count);
        for (int i = 0; i < count; i++)
        {
            objects[i] = proposals[picked[i]];
            float x0 = (objects[i].rect.x);
            float y0 = (objects[i].rect.y);
            float x1 = (objects[i].rect.x + objects[i].rect.width);
            float y1 = (objects[i].rect.y + objects[i].rect.height);

            x0 = (x0 - tmp_w) * ratio_x;
            y0 = (y0 - tmp_h) * ratio_y;
            x1 = (x1 - tmp_w) * ratio_x;
            y1 = (y1 - tmp_h) * ratio_y;

            x0 = std::max(std::min(x0, (float)(src_cols - 1)), 0.f);
            y0 = std::max(std::min(y0, (float)(src_rows - 1)), 0.f);
            x1 = std::max(std::min(x1, (float)(src_cols - 1)), 0.f);
            y1 = std::max(std::min(y1, (float)(src_rows - 1)), 0.f);

            objects[i].rect.x = x0;
            objects[i].rect.y = y0;
            objects[i].rect.width = x1 - x0;
            objects[i].rect.height = y1 - y0;
        }
    }

    static void generate_proposals_yolov8_pose_native(int stride, const float *feat, const float *feat_kps, float prob_threshold, std::vector<Object> &objects,
                                                      int letterbox_cols, int letterbox_rows, const int num_point = 17, int cls_num = 1)
    {
        int feat_w = letterbox_cols / stride;
        int feat_h = letterbox_rows / stride;
        int reg_max = 16;

        auto feat_ptr = feat;

        std::vector<float> dis_after_sm(reg_max, 0.f);

        for (int h = 0; h <= feat_h - 1; h++)
        {

            for (int w = 0; w <= feat_w - 1; w++)
            {
                // process cls score
                int class_index = 0;
                float class_score = -FLT_MAX;
                for (int s = 0; s <= cls_num - 1; s++)
                {
                    float score = feat_ptr[s + 4 * reg_max];
                    if (score > class_score)
                    {
                        class_index = s;
                        class_score = score;
                    }
                }

                float box_prob = sigmoid(class_score);
                if (box_prob > prob_threshold)
                {
                    float pred_ltrb[4];
                    for (int k = 0; k < 4; k++)
                    {
                        float dis = softmax(feat_ptr + k * reg_max, dis_after_sm.data(), reg_max);
                        pred_ltrb[k] = dis * stride;
                    }

                    float pb_cx = (w + 0.5f) * stride;
                    float pb_cy = (h + 0.5f) * stride;

                    float x0 = pb_cx - pred_ltrb[0];
                    float y0 = pb_cy - pred_ltrb[1];
                    float x1 = pb_cx + pred_ltrb[2];
                    float y1 = pb_cy + pred_ltrb[3];

                    x0 = std::max(std::min(x0, (float)(letterbox_cols - 1)), 0.f);
                    y0 = std::max(std::min(y0, (float)(letterbox_rows - 1)), 0.f);
                    x1 = std::max(std::min(x1, (float)(letterbox_cols - 1)), 0.f);
                    y1 = std::max(std::min(y1, (float)(letterbox_rows - 1)), 0.f);

                    Object obj;
                    obj.rect.x = x0;
                    obj.rect.y = y0;
                    obj.rect.width = x1 - x0;
                    obj.rect.height = y1 - y0;
                    obj.label = class_index;
                    obj.prob = box_prob;
                    obj.kps_feat.clear();
                    for (int k = 0; k < num_point; k++)
                    {
                        float kps_x = (feat_kps[k * 3] * 2.f + w) * stride;
                        float kps_y = (feat_kps[k * 3 + 1] * 2.f + h) * stride;
                        float kps_s = sigmoid(feat_kps[k * 3 + 2]);
                        obj.kps_feat.push_back(kps_x);
                        obj.kps_feat.push_back(kps_y);
                        obj.kps_feat.push_back(kps_s);
                    }
                    objects.push_back(obj);
                }
                feat_ptr += (cls_num + 4 * reg_max);
                feat_kps += 3 * num_point;
            }
        }
    }

    static void generate_proposals_yolov8_seg_native(int stride, const float *feat, const float *feat_seg, float prob_threshold, std::vector<Object> &objects,
                                                     int letterbox_cols, int letterbox_rows, int cls_num = 80, int mask_proto_dim = 32)
    {
        int feat_w = letterbox_cols / stride;
        int feat_h = letterbox_rows / stride;
        int reg_max = 16;

        auto feat_ptr = feat;
        auto feat_seg_ptr = feat_seg;

        std::vector<float> dis_after_sm(reg_max, 0.f);
        for (int h = 0; h <= feat_h - 1; h++)
        {
            for (int w = 0; w <= feat_w - 1; w++)
            {
                // process cls score
                int class_index = 0;
                float class_score = -FLT_MAX;
                for (int s = 0; s < cls_num; s++)
                {
                    float score = feat_ptr[s + 4 * reg_max];
                    if (score > class_score)
                    {
                        class_index = s;
                        class_score = score;
                    }
                }

                float box_prob = sigmoid(class_score);
                if (box_prob > prob_threshold)
                {
                    float pred_ltrb[4];
                    for (int k = 0; k < 4; k++)
                    {
                        float dis = softmax(feat_ptr + k * reg_max, dis_after_sm.data(), reg_max);
                        pred_ltrb[k] = dis * stride;
                    }

                    float pb_cx = (w + 0.5f) * stride;
                    float pb_cy = (h + 0.5f) * stride;

                    float x0 = pb_cx - pred_ltrb[0];
                    float y0 = pb_cy - pred_ltrb[1];
                    float x1 = pb_cx + pred_ltrb[2];
                    float y1 = pb_cy + pred_ltrb[3];

                    x0 = std::max(std::min(x0, (float)(letterbox_cols - 1)), 0.f);
                    y0 = std::max(std::min(y0, (float)(letterbox_rows - 1)), 0.f);
                    x1 = std::max(std::min(x1, (float)(letterbox_cols - 1)), 0.f);
                    y1 = std::max(std::min(y1, (float)(letterbox_rows - 1)), 0.f);

                    Object obj;
                    obj.rect.x = x0;
                    obj.rect.y = y0;
                    obj.rect.width = x1 - x0;
                    obj.rect.height = y1 - y0;
                    obj.label = class_index;
                    obj.prob = box_prob;
                    obj.mask_feat.resize(mask_proto_dim);
                    memcpy(obj.mask_feat.data(), feat_seg_ptr, sizeof(float) * mask_proto_dim);
                    // for (int k = 0; k < mask_proto_dim; k++)
                    // {
                    //     obj.mask_feat[k] = feat_seg_ptr[k];
                    // }
                    objects.push_back(obj);
                }

                feat_ptr += cls_num + 4 * reg_max;
                feat_seg_ptr += mask_proto_dim;
            }
        }
    }

    void get_out_bbox_mask(std::vector<Object> &proposals, std::vector<Object> &objects, const float *mask_proto, int mask_proto_dim, int mask_stride, const float nms_threshold, int letterbox_rows, int letterbox_cols, int src_rows, int src_cols)
    {
        qsort_descent_inplace(proposals);
        std::vector<int> picked;
        nms_sorted_bboxes(proposals, picked, nms_threshold);

        /* yolov5 draw the result */
        float scale_letterbox;
        int resize_rows;
        int resize_cols;
        if ((letterbox_rows * 1.0 / src_rows) < (letterbox_cols * 1.0 / src_cols))
        {
            scale_letterbox = letterbox_rows * 1.0 / src_rows;
        }
        else
        {
            scale_letterbox = letterbox_cols * 1.0 / src_cols;
        }
        resize_cols = int(scale_letterbox * src_cols);
        resize_rows = int(scale_letterbox * src_rows);

        int tmp_h = (letterbox_rows - resize_rows) / 2;
        int tmp_w = (letterbox_cols - resize_cols) / 2;

        float ratio_x = (float)src_rows / resize_rows;
        float ratio_y = (float)src_cols / resize_cols;

        int mask_proto_h = int(letterbox_rows / mask_stride);
        int mask_proto_w = int(letterbox_cols / mask_stride);

        int count = picked.size();
        objects.resize(count);

        for (int i = 0; i < count; i++)
        {
            objects[i] = proposals[picked[i]];
            float x0 = (objects[i].rect.x);
            float y0 = (objects[i].rect.y);
            float x1 = (objects[i].rect.x + objects[i].rect.width);
            float y1 = (objects[i].rect.y + objects[i].rect.height);
            /* naive RoiAlign by opencv */
            int hstart = std::floor(objects[i].rect.y / mask_stride);
            int hend = std::ceil(objects[i].rect.y / mask_stride + objects[i].rect.height / mask_stride);
            int wstart = std::floor(objects[i].rect.x / mask_stride);
            int wend = std::ceil(objects[i].rect.x / mask_stride + objects[i].rect.width / mask_stride);

            hstart = std::min(std::max(hstart, 0), mask_proto_h);
            wstart = std::min(std::max(wstart, 0), mask_proto_w);
            hend = std::min(std::max(hend, 0), mask_proto_h);
            wend = std::min(std::max(wend, 0), mask_proto_w);

            int mask_w = wend - wstart;
            int mask_h = hend - hstart;

            cv::Mat mask = cv::Mat(mask_h, mask_w, CV_32FC1);
            if (mask_w > 0 && mask_h > 0)
            {
                std::vector<cv::Range> roi_ranges;
                roi_ranges.push_back(cv::Range(0, 1));
                roi_ranges.push_back(cv::Range::all());
                roi_ranges.push_back(cv::Range(hstart, hend));
                roi_ranges.push_back(cv::Range(wstart, wend));

                cv::Mat mask_protos = cv::Mat(mask_proto_dim, mask_proto_h * mask_proto_w, CV_32FC1, (float *)mask_proto);
                int sz[] = {1, mask_proto_dim, mask_proto_h, mask_proto_w};
                cv::Mat mask_protos_reshape = mask_protos.reshape(1, 4, sz);
                cv::Mat protos = mask_protos_reshape(roi_ranges).clone().reshape(0, {mask_proto_dim, mask_w * mask_h});
                cv::Mat mask_proposals = cv::Mat(1, mask_proto_dim, CV_32FC1, (float *)objects[i].mask_feat.data());
                cv::Mat masks_feature = (mask_proposals * protos);
                /* sigmoid */
                cv::exp(-masks_feature.reshape(1, {mask_h, mask_w}), mask);
                mask = 1.0 / (1.0 + mask);
            }

            x0 = (x0 - tmp_w) * ratio_x;
            y0 = (y0 - tmp_h) * ratio_y;
            x1 = (x1 - tmp_w) * ratio_x;
            y1 = (y1 - tmp_h) * ratio_y;

            x0 = std::max(std::min(x0, (float)(src_cols - 1)), 0.f);
            y0 = std::max(std::min(y0, (float)(src_rows - 1)), 0.f);
            x1 = std::max(std::min(x1, (float)(src_cols - 1)), 0.f);
            y1 = std::max(std::min(y1, (float)(src_rows - 1)), 0.f);

            objects[i].rect.x = x0;
            objects[i].rect.y = y0;
            objects[i].rect.width = x1 - x0;
            objects[i].rect.height = y1 - y0;
            cv::resize(mask, mask, cv::Size((int)objects[i].rect.width, (int)objects[i].rect.height));
            objects[i].mask = mask > 0.5;

            // cv::Mat mask_8bit;
            // objects[i].mask.convertTo(mask_8bit, CV_8U, 255.0);

            // // 获取当前时间戳
            // auto timestamp = std::chrono::system_clock::now();
            // auto seconds = std::chrono::duration_cast<std::chrono::seconds>(timestamp.time_since_epoch()).count();

            // // 使用时间戳作为文件名
            // std::string filename = "output_" + std::to_string(seconds)+"_"+std::to_string(objects[i].label)+ ".jpg";

            // // 保存为 JPEG 文件
            // cv::imwrite(filename, mask_8bit);
        }
    }


} // namespace detection