yolov8의 TensorRT 배포(C++ 버전)

1. 환경 구성

        CUDA, CUDNN 환경 설정이 필요하며, TensorRT 환경도 설치해야 합니다.다음 블로그를 참고해주세요.

[Ubuntu 버전] TensorRT 설치 튜토리얼(tar 패키지 방식)_ubuntu 설치 tensorrt-CSDN 블로그

2. 모델 준비

        먼저 ONNX 모델 파일이 필요합니다. 저는 Pytorch->ONNX입니다.

from ultralytics import YOLO

model = YOLO("./yolov8n.pt")

if __name__ == '__main__':

    model.export(format="onnx")

        ​ ​ 그런 다음 yolov8n.onnx 파일을 가져온 다음 ONNX->trt를 가져옵니다.

TensorRT 설치 위치 찾기

TensorRT-8.6.4.3/bin/trtexec --onnx=yolov8n.onnx --saveEigine=yolov8n.trt

TensorRT가 사용할 수 있는 모델 파일이 생성됩니다.

3. yolov8 출력 결과 분석

        yolov8의 출력0.shape 출력은 1x84x8400입니다.

8400은 미리 선택된 상자의 개수로 yolov5의 25600보다 훨씬 적습니다.

그 중 84개 중 처음 4개는 프레임의 xywh(중심좌표 + 너비와 높이)인데 yolov5에 비해 객체성이 하나 적습니다. 그렇다면 어떻게 객체성을 얻을 수 있을까요? yolov8에서는 객체성 = 다음 80개 클래스 중 가장 큰 신뢰도입니다.

최종점수=객관성*자신감

4. 메인코드

감지.cpp

#include<iostream>  
#include<opencv2/opencv.hpp>
#include <opencv2/core/core.hpp>
#include <opencv2/imgproc.hpp>
#include<fstream>
#include "NvInfer.h"
#include "processing.hpp"
//#include "logging.h"

using namespace nvinfer1;
using namespace std;

const int model_width = 640;
const int model_height = 640;

class MyLogger : public nvinfer1::ILogger
{
    public:
    explicit MyLogger(nvinfer1::ILogger::Severity severity =nvinfer1::ILogger::Severity::kWARNING) : severity_(severity) {}

    void log(nvinfer1::ILogger::Severity severity, const char *msg) noexcept override
    {
        if (severity <= severity_) {
            std::cerr << msg << std::endl;
        }
    }
    nvinfer1::ILogger::Severity severity_;
};

int main()
{
//一、图像处理
    string image_path = "/home/hitcrt/code/tensorrt/TRT_test/street.jpg";  //填写自己图片路径(需要绝对路径)
    cv::Mat input_image = cv::imread(image_path);

    float* input_blob = new float[model_height * model_width * 3];
    cv::Mat resize_image;
	//比例
    const float _ratio = std::min(model_width / (input_image.cols * 1.0f),
                            model_height / (input_image.rows * 1.0f));
    // 等比例缩放
    const int border_width = input_image.cols * _ratio;
    const int border_height = input_image.rows * _ratio;
    // 计算偏移值
    const int x_offset = (model_width - border_width) / 2;
    const int y_offset = (model_height - border_height) / 2;

    //将输入图像缩放至resize_image
    cv::resize(input_image, resize_image, cv::Size(border_width, border_height));
    //复制图像并且制作边界
    cv::copyMakeBorder(resize_image, resize_image, y_offset, y_offset, x_offset,
                        x_offset, cv::BORDER_CONSTANT, cv::Scalar(114, 114, 114));
    // 转换为RGB格式
    cv::cvtColor(resize_image, resize_image, cv::COLOR_BGR2RGB);
    
    //归一化
    const int channels = resize_image.channels();
    const int width = resize_image.cols;
    const int height = resize_image.rows;
    for (int c = 0; c < channels; c++) {
        for (int h = 0; h < height; h++) {
            for (int w = 0; w < width; w++) {
                input_blob[c * width * height + h * width + w] =
                    resize_image.at<cv::Vec3b>(h, w)[c] / 255.0f;  //at<Vec3b> 是 OpenCV 中用于访问图像像素的一种方法,使用 at<Vec3b> 获取彩色图像中特定位置的像素颜色值
            }
        }
    }

//二、模型反序列化
    MyLogger logger;
    //读取trt信息
    const std::string engine_file_path = "/home/hitcrt/code/tensorrt/TRT_test/yolov8n.trt";  //填写自己trt文件路径(需要绝对路径)
    std::stringstream engine_file_stream;
    engine_file_stream.seekg(0, engine_file_stream.beg);  //从起始位置偏移0个字节,指针移动到文件流的开头
    std::ifstream ifs(engine_file_path);
    engine_file_stream << ifs.rdbuf();  //将读取到的数据流交给engine_file_stream
    ifs.close();

    engine_file_stream.seekg(0, std::ios::end); //先把文件输入流指针定位到文档末尾来获取文档的长度
    const int model_size = engine_file_stream.tellg();  //获取文件流的总长度
    engine_file_stream.seekg(0, std::ios::beg);
    void *model_mem = malloc(model_size);               //开辟一样长的空间
    engine_file_stream.read(static_cast<char *>(model_mem), model_size);    //将内容读取到model_mem中

    nvinfer1::IRuntime *runtime = nvinfer1::createInferRuntime(logger);
    nvinfer1::ICudaEngine *engine = runtime->deserializeCudaEngine(model_mem, model_size);

    free(model_mem);

//三、模型推理
    nvinfer1::IExecutionContext *context = engine->createExecutionContext();

    void *buffers[2];
    // 获取模型输入尺寸并分配GPU内存
    nvinfer1::Dims input_dim = engine->getBindingDimensions(0);
    int input_size = 1;
    for (int j = 0; j < input_dim.nbDims; ++j) {
        if(input_dim.d[j] < 0)
            input_size *= -input_dim.d[j];
        else
            input_size *= input_dim.d[j];
    }
    cudaMalloc(&buffers[0], input_size * sizeof(float));

    // 获取模型输出尺寸并分配GPU内存
    nvinfer1::Dims output_dim = engine->getBindingDimensions(1);

    int output_size = 1;
    for (int j = 0; j < output_dim.nbDims; ++j) {
        if(output_dim.d[j] < 0)
            output_size *= -output_dim.d[j];
        else
            output_size *= output_dim.d[j];
    }
    cudaMalloc(&buffers[1], output_size * sizeof(float));

    // 给模型输出数据分配相应的CPU内存
    float *output_buffer = new float[output_size];
    //数据投入
    cudaStream_t stream;
    cudaStreamCreate(&stream);
    // 拷贝输入数据
    cudaMemcpyAsync(buffers[0], input_blob, input_size * sizeof(float),
                    cudaMemcpyHostToDevice, stream);
    // 执行推理
    if(context->enqueueV2(buffers, stream, nullptr))
    {
        cout << "enqueueV2执行推理成功" << endl;
    }
    else{
        cout << "enqueueV2执行推理失败" << endl;
        return -1;
    }
    // 拷贝输出数据
    cudaMemcpyAsync(output_buffer, buffers[1], output_size * sizeof(float),
                    cudaMemcpyDeviceToHost, stream);

    cudaStreamSynchronize(stream);

    delete context;
    delete engine;
    delete runtime;
    delete[] input_blob;

//四、输出结果output_buffer,放入objs  xywh为中心点坐标 和宽高
    float *ptr = output_buffer;     // 1x84x8400  =  705600
    vector<vector<float>> temp(84, vector<float>(8400));
    vector<vector<float>> outVec(8400, vector<float>(84));
    for(int i = 0; i < 705600; i++)
    {
        temp[i/8400][i%8400] = *ptr;
        ptr++;
    }
    for(int i = 0; i < 84; i++)
    {
        for(int j = 0; j < 8400; j++)
        {
            outVec[j][i] = temp[i][j];
        }
    }
    std::vector<Object> objs;
    for (int i = 0; i < 8400; ++i)
    {
        const float objectness = *(std::max_element(outVec[i].begin() + 4, outVec[i].begin() + 83));
        if (objectness >= 0.45f)
        {
            const int label = std::max_element(outVec[i].begin() + 4, outVec[i].begin() + 83) - (outVec[i].begin() + 4);  //std::max_element返回范围内的最大元素
            const float confidence = outVec[i][label + 4] * objectness;
            if (confidence >= 0.25f) {
                const float bx = outVec[i][0];
                const float by = outVec[i][1];
                const float bw = outVec[i][2];
                const float bh = outVec[i][3];
                Object obj;
                // 还原图像尺寸中box的尺寸比例,这里要减掉偏移值,并把box中心点坐标xy转成左上角坐标xy
                obj.box.x = (bx - bw * 0.5f - x_offset) / _ratio;
                obj.box.y = (by - bh * 0.5f - y_offset) / _ratio;
                obj.box.width = bw / _ratio;
                obj.box.height = bh / _ratio;
                obj.label = label;
                obj.confidence = confidence;
                objs.push_back(std::move(obj));
            }
        }
    }  // i loop

//五、NMS非极大值抑制
    vector<Object> output;
    hardNMS(objs, output, 0.6, 10);

//六、画框
    vector<Object>::iterator it = output.begin();
    while(it != output.end()){
        cv::Point topLeft(it->box.x, it->box.y);
        cv::Point bottomRight(it->box.x + it->box.width, it->box.y + it->box.height);
        cv::rectangle(input_image, topLeft, bottomRight, cv::Scalar(0, 0, 255), 2);
        std::stringstream buff;
        buff.precision(2);  //覆盖默认精度,置信度保留2位小数
        buff.setf(std::ios::fixed);
        buff << it->confidence;
        string text =names[it->label] + " " + buff.str();
        cv::putText(input_image, text, topLeft, 0, 1, cv::Scalar(0, 255, 0), 2);
        it++;
    }
    cv::imwrite("detected.jpg", input_image);

    return 0;
}

전처리.hpp

#include <iostream>
#include <vector>
#include <list>
using namespace std;

//以coco数据集为例
string names[] = {"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
        "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
        "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
        "'skis'", "'snowboard'", "'sports ball'", "'kite'", "'baseball bat'", "'baseball glove'", "'skateboard'", "'surfboard'",
        "'tennis racket'", "'bottle'", "'wine glass'", "'cup'", "'fork'", "'knife'", "'spoon'", "'bowl'", "'banana'", "'apple'",
        "'sandwich'", "'orange'", "'broccoli'", "'carrot'", "'hot dog'", "'pizza'", "'donut'", "'cake'", "'chair'", "'couch'",
        "'potted plant'", "'bed'", "'dining table'", "'toilet'", "'tv'", "'laptop'", "'mouse'", "'remote'", "'keyboard'", "'cell phone'",
        "'microwave'", "'oven'", "'toaster'", "'sink'", "'refrigerator'", "'book'", "'clock'", "'vase'", "'scissors'", "'teddy bear'",
        "'hair drier'", "'toothbrush'"};

struct BOX
{
    float x;
    float y;
    float width;
    float height;
};

struct Object
{
    BOX box;    // lu点和wh
    int label;
    float confidence;  //这里的confidence实际指的是score 即 objectness*confidence
};

bool cmp(Object &obj1, Object &obj2){
    return obj1.confidence > obj2.confidence;
}

float iou_of(const Object &obj1, const Object &obj2)
{
    float x1_lu = obj1.box.x;
    float y1_lu = obj1.box.y;
    float x1_rb = x1_lu + obj1.box.width;
    float y1_rb = y1_lu + obj1.box.height;
    float x2_lu = obj2.box.x;
    float y2_lu = obj2.box.y;
    float x2_rb = x2_lu + obj2.box.width;
    float y2_rb = y2_lu + obj2.box.height;
    //交集左上角坐标i_x1, i_y1
    float i_x1 = std::max(x1_lu, x2_lu);
    float i_y1 = std::max(y1_lu, y2_lu);
    //交集右下角坐标i_x2, i_y2
    float i_x2 = std::min(x1_rb, x2_rb);
    float i_y2 = std::min(y1_rb, y2_rb);
    //交集框宽高
    float i_w = i_x2 - i_x1;
    float i_h = i_y2 - i_y1;
    //并集左上角坐标
    float o_x1 = std::min(x1_lu, x2_lu);
    float o_y1 = std::min(y1_lu, y2_lu);
    //并集右下角坐标
    float o_x2 = std::max(x1_rb, x2_rb);
    float o_y2 = std::max(y1_rb, y2_rb);
    //并集宽高
    float o_w = o_x2 - o_x1;
    float o_h = o_y2 - o_y1;

    return (i_w*i_h) / (o_w*o_h);
}

std::vector<int> hardNMS(std::vector<Object> &input, std::vector<Object> &output, float iou_threshold, unsigned int topk)
{  //Object只有confidence和label
    const unsigned int box_num = input.size(); 
    std::vector<int> merged(box_num, 0);
    std::vector<int> indices;

    if (input.empty())
        return indices;
    std::vector<Object> res;
    //先对bboxs按照conf进行排序
    std::sort(input.begin(), input.end(),
            [](const Object &a, const Object &b)
            { return a.confidence > b.confidence; });   //[]表示C++中的lambda函数
    
    unsigned int count = 0;
    for (unsigned int i = 0; i < box_num; ++i)
    {   //按照conf依次遍历bbox
        if (merged[i])
            continue;
        //如果已经被剔除,continue
        Object buf;
        buf = input[i];
        merged[i] = 1; //剔除当前bbox

        //由于后面的置信度低,只需要考虑当前bbox后面的即可
        for (unsigned int j = i + 1; j < box_num; ++j)
        {
            if (merged[j])
                continue;

            float iou = static_cast<float>(iou_of(input[j], input[i]));
            //计算iou
            if (iou > iou_threshold)
            { //超过阈值认为重合,剔除第j个bbox,
                merged[j] = 1;
            }
        }
        indices.push_back(i);
        res.push_back(buf); //将最高conf的bbox填入结果

        // keep top k
        //获取前k个输出,这个应该是针对密集输出的情况,此时input已经做了conf剔除
        count += 1;
        if (count >= topk)
            break;
    }
    output.swap(res);

    return indices;
}

float sigmoid(float x)
{
    return 1.0 / (exp(-x) + 1.0);
}

CMakeLists.txt

cmake_minimum_required(VERSION 2.6)
project(Demo)

set(CMAKE_BUILD_TYPE "Debug")    # 用于gdb调试

add_definitions(-std=c++11)      # 14?

option(CUDA_USE_STATIC_CUDA_RUNTIME OFF)    # 是否必要
# set(CMAKE_CXX_STANDARD 11)        # 14?
# set(CMAKE_BUILD_TYPE Debug)       # 用于gdb调试

find_package(CUDA REQUIRED)

if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
    message("embed_platform on")
    include_directories(/usr/local/cuda/targets/aarch64-linux/include)
    link_directories(/usr/local/cuda/targets/aarch64-linux/lib)
else()
    message("embed_platform off")
    include_directories(/usr/local/cuda/include)
    link_directories(/usr/local/cuda/lib64)
endif()

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Ofast -Wfatal-errors -D_MWAITXINTRIN_H_INCLUDED")

find_package(OpenCV)
include_directories(${OpenCV_INCLUDE_DIRS} /home/home_expand/TensorRT-8.6.1.6/include)
link_directories(/home/home_expand/TensorRT-8.6.1.6/lib)

add_executable(Demo segment.cpp)

target_link_libraries(Demo nvinfer cudart ${OpenCV_LIBRARIES})

add_definitions(-O2 -pthread)

추천

출처blog.csdn.net/liujiahao123987/article/details/133892746