推荐|C++ TensorRT yolov8推理 CUDA核函数加速前处理、后处理

效果

4K视频 CPU前处理效果

4K视频 CUDA核函数前处理效果

4K视频 CUDA核函数前处理、后处理效果

2K视频 CUDA核函数前处理效果

2K视频 CUDA核函数前处理、后处理效果

1080P 视频 CUDA核函数前处理效果

效果

C++ TensorRT yolov8推理 CUDA核函数加速前处理

4K视频 CPU前处理效果

4K视频 CUDA核函数前处理效果

4K视频 CUDA核函数前处理、后处理效果

2K视频 CUDA核函数前处理效果

2K视频 CUDA核函数前处理、后处理效果

1080P 视频 CUDA核函数前处理效果

模型

Model Properties
-------------------------
date：2023-09-05T13:17:15.396588
description：Ultralytics YOLOv8n model trained on coco.yaml
author：Ultralytics
task：detect
license：AGPL-3.0 https://ultralytics.com/license
version：8.0.170
stride：32
batch：1
imgsz：[640, 640]
names：{0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed', 60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone', 68: 'microwave', 69: 'oven', 70: 'toaster', 71: 'sink', 72: 'refrigerator', 73: 'book', 74: 'clock', 75: 'vase', 76: 'scissors', 77: 'teddy bear', 78: 'hair drier', 79: 'toothbrush'}
---------------------------------------------------------------

Inputs
-------------------------
name：images
tensor：Float[1, 3, 640, 640]
---------------------------------------------------------------

Outputs
-------------------------
name：output0
tensor：Float[1, 84, 8400]
---------------------------------------------------------------

电脑环境

处理器：AMD Ryzen 7 7735H with Radeon Graphics 3.20 GHz
内存：16.0 GB
显卡：NVIDIA GeForce RTX 4060 Laptop GPU
操作系统：Windows 10 企业版
opencv-4.8.1
CUDA12.4
TensorRT-8.6.1.6
VS2022

项目

包含目录

库目录

附件依赖项

cublas.lib
cublasLt.lib
cuda.lib
cudadevrt.lib
cudart.lib
cudart_static.lib
cudnn.lib
cudnn64_8.lib
cudnn_adv_infer.lib
cudnn_adv_infer64_8.lib
cudnn_adv_train.lib
cudnn_adv_train64_8.lib
cudnn_cnn_infer.lib
cudnn_cnn_infer64_8.lib
cudnn_cnn_train.lib
cudnn_cnn_train64_8.lib
cudnn_ops_infer.lib
cudnn_ops_infer64_8.lib
cudnn_ops_train.lib
cudnn_ops_train64_8.lib
cufft.lib
cufftw.lib
cufilt.lib
curand.lib
cusolver.lib
cusolverMg.lib
cusparse.lib
nppc.lib
nppial.lib
nppicc.lib
nppidei.lib
nppif.lib
nppig.lib
nppim.lib
nppist.lib
nppisu.lib
nppitc.lib
npps.lib
nvblas.lib
nvJitLink.lib
nvJitLink_static.lib
nvjpeg.lib
nvml.lib
nvptxcompiler_static.lib
nvrtc-builtins_static.lib
nvrtc.lib
nvrtc_static.lib
OpenCL.lib
nvinfer.lib
nvinfer_dispatch.lib
nvinfer_lean.lib
nvinfer_plugin.lib
nvinfer_vc_plugin.lib
nvonnxparser.lib
nvparsers.lib
opencv_world481.lib

代码

#define _CRT_SECURE_NO_DEPRECATE

#include
#include
#include
#include
#include
#include
#include "NvInfer.h"
#include "kernel_function.h"
#include "utils.h"

std::vectorstring> labels;
float score_threshold = 0.3f;
float nms_threshold = 0.5f;
std::string lable_path = "";
std::string engin_path = "";
std::string video_path = "";

utils::NvinferStruct* p = nullptr;
utils::Logger logger;

utils::InitParameter m_param;
utils::AffineMat m_dst2src;

int src_w = 0;
int src_h = 0;
int dst_w = 640;
int dst_h = 640;
double fps = 0;
int output_size = 0;

double preprocessTime = 0;
double inferTime = 0;
double postprocessTime = 0;
double totalTime = 0;
double detFps = 0;

// input
unsigned char* m_input_src_device = nullptr;
float* m_input_resize_device = nullptr;
float* m_input_rgb_device = nullptr;
float* m_input_norm_device = nullptr;
float* m_input_hwc_device = nullptr;
// output
float* m_output_src_device = nullptr;
float* output_data = nullptr;

//初始化
int init() {

   std::ifstream lable_file(lable_path);
   if (!lable_file.is_open())
   {
       std::cerr << "Error opening file: " << lable_path << std::endl;
       return -1;
   }
   std::string line;
   while (std::getline(lable_file, line))
   {
       if (!line.empty())
       {
           labels.push_back(line);
       }
   }
   lable_file.close();

   // 以二进制方式读取文件
   std::ifstream engin_file(engin_path.data(), std::ios::binary);
   if (!engin_file.good()) {
       std::cerr << "文件无法打开，请确定文件是否可用！" << std::endl;
       return -1;
   }
   size_t size = 0;
   engin_file.seekg(0, engin_file.end);   // 将读指针从文件末尾开始移动0个字节
   size = engin_file.tellg();   // 返回读指针的位置，此时读指针的位置就是文件的字节数
   engin_file.seekg(0, engin_file.beg);   // 将读指针从文件开头开始移动0个字节
   char* modelStream = new char[size];
   engin_file.read(modelStream, size);
   engin_file.close();// 关闭文件

//创建推理核心结构体，初始化变量
p = new utils::NvinferStruct();

//初始化反序列化引擎
p->runtime = nvinfer1::createInferRuntime(logger);

// 初始化推理引擎
p->engine = p->runtime->deserializeCudaEngine(modelStream, size);

   // 创建上下文
   p->context = p->engine->createExecutionContext();
   int numNode = p->engine->getNbBindings();

delete[] modelStream;

output_size = 1 * (labels.size() + 4) * 8400;;
output_data = new float[output_size];

   float a = float(dst_h) / src_h;
   float b = float(dst_w) / src_w;
   float scale = a < b ? a : b;

/*cv::Mat src2dst = (cv::Mat_(2, 3) << scale, 0.f, (-scale * src_w + dst_w + scale - 1) * 0.5,
0.f, scale, (-scale * src_h + dst_h + scale - 1) * 0.5);*/

cv::Mat src2dst = (cv::Mat_(2, 3) << scale, 0.f, (-scale * src_w + dst_w) * 0.5,
0.f, scale, (-scale * src_h + dst_h) * 0.5);

cv::Mat dst2src = cv::Mat::zeros(2, 3, CV_32FC1);

cv::invertAffineTransform(src2dst, dst2src);

   m_dst2src.v0 = dst2src.ptr(0)[0];
   m_dst2src.v1 = dst2src.ptr(0)[1];
   m_dst2src.v2 = dst2src.ptr(0)[2];
   m_dst2src.v3 = dst2src.ptr(1)[0];
   m_dst2src.v4 = dst2src.ptr(1)[1];
   m_dst2src.v5 = dst2src.ptr(1)[2];

   CHECK(cudaMalloc(&m_input_src_device, 1 * 3 * src_h * src_w * sizeof(unsigned char)));
   CHECK(cudaMalloc(&m_input_resize_device, 1 * 3 * dst_h * dst_w * sizeof(float)));
   CHECK(cudaMalloc(&m_input_rgb_device, 1 * 3 * dst_h * dst_w * sizeof(float)));
   CHECK(cudaMalloc(&m_input_norm_device, 1 * 3 * dst_h * dst_w * sizeof(float)));
   CHECK(cudaMalloc(&m_input_hwc_device, 1 * 3 * dst_h * dst_w * sizeof(float)));
   CHECK(cudaMalloc(&m_output_src_device, 1 * output_size * sizeof(float)));

return 0;
}

//前处理
void preprocess(cv::Mat& frame) {

CHECK(cudaMemcpy(m_input_src_device, frame.data, sizeof(unsigned char) * 3 * src_h * src_w, cudaMemcpyHostToDevice));

resizeDevice(1, m_input_src_device, src_w, src_h, m_input_resize_device, dst_w, dst_h, 114, m_dst2src);

bgr2rgbDevice(1, m_input_resize_device, dst_w, dst_h, m_input_rgb_device, dst_w, dst_h);

normDevice(1, m_input_rgb_device, dst_w, dst_h, m_input_norm_device, dst_w, dst_h, m_param);

hwc2chwDevice(1, m_input_norm_device, dst_w, dst_h, m_input_hwc_device, dst_w, dst_h);
}

//后处理
void postprocess(std::vector& detectionResult) {

CHECK(cudaMemcpy(output_data, m_output_src_device, output_size * sizeof(float), cudaMemcpyDeviceToHost));

cv::Mat dout(labels.size() + 4, 8400, CV_32F, output_data);
cv::Mat det_output = dout.t();

   std::vector boxes;
   std::vector classIds;
   std::vector confidences;

const float ratio_h = dst_h / (float)src_h;
const float ratio_w = dst_w / (float)src_w;

   for (int i = 0; i < det_output.rows; i++)
   {
       cv::Mat classes_scores = det_output.row(i).colRange(4, labels.size() + 4);
       cv::Point classIdPoint;
       double score;
       cv::minMaxLoc(classes_scores, 0, &score, 0, &classIdPoint);

       if (score > score_threshold)
       {
           float cx = det_output.at(i, 0);
           float cy = det_output.at(i, 1);
           float ow = det_output.at(i, 2);
           float oh = det_output.at(i, 3);

int x = static_cast((cx - 0.5 * ow));
int y = static_cast((cy - 0.5 * oh));

           int width = static_cast(ow);
           int height = static_cast(oh);
           // 基于纵横比调整边界框坐标
           if (ratio_h > ratio_w)
           {
               x = x / ratio_w;
               y = (y - (dst_h - ratio_w * src_h) / 2) / ratio_w;
               width = width / ratio_w;
               height = height / ratio_w;
           }
           else
           {
               x = (x - (dst_w - ratio_h * src_w) / 2) / ratio_h;
               y = y / ratio_h;
               width = width / ratio_h;
               height = height / ratio_h;
           }

           // 坐标值安全校验
           x = std::max(x, 0);
           y = std::max(y, 0);
           width = std::min(width, src_w - x);
           height = std::min(height, src_h - y);

           cv::Rect box;
           box.x = x;
           box.y = y;
           box.width = width;
           box.height = height;

           boxes.push_back(box);
           classIds.push_back(classIdPoint.x);
           confidences.push_back(score);
       }
   }

std::vector indexes;
cv::dnn::NMSBoxes(boxes, confidences, score_threshold, nms_threshold, indexes);

   for (size_t i = 0; i < indexes.size(); i++)
   {
       int index = indexes[i];
       utils::detresult box(labels[classIds[index]], classIds[index], confidences[index], boxes[index]);
       detectionResult.push_back(box);
   }

}

//绘制
void draw(cv::Mat& frame, std::vector& detectionResult) {

   for (size_t i = 0; i < detectionResult.size(); ++i)
   {
       utils::detresult box = detectionResult[i];
       cv::rectangle(frame, box.rect, cv::Scalar(0, 0, 255), 2);
       std::string label = box.className + ":" + cv::format("%.2f", box.confidence);
       putText(frame, label, cv::Point(box.rect.x, box.rect.y - 5), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 255, 0), 2);
   }

   // 绘制时间
   putText(frame, "preprocessTime:" + std::to_string(preprocessTime * 1000) + "ms", cv::Point(10, 30), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 255, 0), 2);
   putText(frame, "inferTime:" + std::to_string(inferTime * 1000) + "ms", cv::Point(10, 70), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 255, 0), 2);
   putText(frame, "postprocessTime:" + std::to_string(postprocessTime * 1000) + "ms", cv::Point(10, 110), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 255, 0), 2);
   putText(frame, "totalTime:" + std::to_string(totalTime * 1000) + "ms", cv::Point(10, 150), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 255, 0), 2);
   putText(frame, "detFps:" + std::to_string(detFps), cv::Point(10, 190), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 255, 0), 2);

cv::imshow("detresult", frame);

}

//清理
void destroy() {

   p->context->destroy();
   p->engine->destroy();
   p->runtime->destroy();
   delete p;

   // input
   CHECK(cudaFree(m_input_src_device));
   CHECK(cudaFree(m_input_resize_device));
   CHECK(cudaFree(m_input_rgb_device));
   CHECK(cudaFree(m_input_norm_device));
   CHECK(cudaFree(m_input_hwc_device));
   // output
   CHECK(cudaFree(m_output_src_device));
   delete output_data;
}

int main()
{
   lable_path = "model/lable.txt";
   engin_path = "model/yolov8n.engine";
   video_path = "test/VID_4K.mp4"; //3840x2160

   cv::VideoCapture capture(video_path);
   // 检查视频是否成功打开
   if (!capture.isOpened())
   {
       std::cout << "无法读取视频文件" << std::endl;
       return -1;
   }

   fps = capture.get(cv::CAP_PROP_FPS);
   src_w = static_cast(capture.get(cv::CAP_PROP_FRAME_WIDTH));
   src_h = static_cast(capture.get(cv::CAP_PROP_FRAME_HEIGHT));
   dst_w = 640;
   dst_h = 640;
   score_threshold = 0.3f;
   nms_threshold = 0.5f;

//初始化
init();

   cv::Mat frame;
   while (true)
   {
       bool success = capture.read(frame); // 读取一帧数据
       // 检查是否成功读取帧
       if (!success)
       {
           std::cout << "读取完毕" << std::endl;
           break;
       }

       //前处理
       double start = (double)cv::getTickCount();
       preprocess(frame);
       preprocessTime = ((double)cv::getTickCount() - start) / cv::getTickFrequency();

       //推理
       start = (double)cv::getTickCount();
       float* bindings[] = { m_input_hwc_device, m_output_src_device };
       bool context = p->context->executeV2((void**)bindings);
       inferTime = ((double)cv::getTickCount() - start) / cv::getTickFrequency();

       //后处理
       start = (double)cv::getTickCount();
       std::vector detectionResult;
       postprocess(detectionResult);
       postprocessTime = ((double)cv::getTickCount() - start) / cv::getTickFrequency();

       //总时间
       totalTime = preprocessTime + inferTime + postprocessTime;
       detFps = (1 / (totalTime));

       //绘制、显示
       cv::namedWindow("detresult", cv::WINDOW_NORMAL); // cv::WINDOW_NORMAL允许用户调整窗口大小
       //cv::resizeWindow("detresult", src_w / 2, src_h / 2); // 设置窗口的宽度和高度
       draw(frame, detectionResult);

       if (cv::waitKey(1) == 27) // 通过按下ESC键退出循环
       {
           break;
       }
   }

cv::destroyAllWindows();

destroy();

return 0;
}


#define _CRT_SECURE_NO_DEPRECATE
 
#include <iostream>
#include <opencv2/opencv.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <fstream>
#include <numeric>
#include "NvInfer.h"
#include "kernel_function.h"
#include "utils.h"
 
std::vector<std::string> labels;
float score_threshold = 0.3f;
float nms_threshold = 0.5f;
std::string lable_path = "";
std::string engin_path = "";
std::string video_path = "";
 
utils::NvinferStruct* p = nullptr;
utils::Logger logger;
 
utils::InitParameter m_param;
utils::AffineMat m_dst2src;
 
int src_w = 0;
int src_h = 0;
int dst_w = 640;
int dst_h = 640;
double fps = 0;
int output_size = 0;
 
double preprocessTime = 0;
double inferTime = 0;
double postprocessTime = 0;
double totalTime = 0;
double detFps = 0;
 
// input
unsigned char* m_input_src_device = nullptr;
float* m_input_resize_device = nullptr;
float* m_input_rgb_device = nullptr;
float* m_input_norm_device = nullptr;
float* m_input_hwc_device = nullptr;
// output
float* m_output_src_device = nullptr;
float* output_data = nullptr;
//初始化
int init() {
 
	std::ifstream lable_file(lable_path);
	if (!lable_file.is_open())
	{
		std::cerr << "Error opening file: " << lable_path << std::endl;
		return -1;
	}
	std::string line;
	while (std::getline(lable_file, line))
	{
		if (!line.empty())
		{
			labels.push_back(line);
		}
	}
	lable_file.close();
 
	// 以二进制方式读取文件
	std::ifstream engin_file(engin_path.data(), std::ios::binary);
	if (!engin_file.good()) {
		std::cerr << "文件无法打开，请确定文件是否可用！" << std::endl;
		return -1;
	}
	size_t size = 0;
	engin_file.seekg(0, engin_file.end);	// 将读指针从文件末尾开始移动0个字节
	size = engin_file.tellg();	// 返回读指针的位置，此时读指针的位置就是文件的字节数
	engin_file.seekg(0, engin_file.beg);	// 将读指针从文件开头开始移动0个字节
	char* modelStream = new char[size];
	engin_file.read(modelStream, size);
	engin_file.close();// 关闭文件
 
	//创建推理核心结构体，初始化变量
	p = new utils::NvinferStruct();
 
	//初始化反序列化引擎
	p->runtime = nvinfer1::createInferRuntime(logger);
 
	// 初始化推理引擎
	p->engine = p->runtime->deserializeCudaEngine(modelStream, size);
 
	// 创建上下文
	p->context = p->engine->createExecutionContext();
	int numNode = p->engine->getNbBindings();
 
	delete[] modelStream;
 
	output_size = 1 * (labels.size() + 4) * 8400;;
	output_data = new float[output_size];
 
	float a = float(dst_h) / src_h;
	float b = float(dst_w) / src_w;
	float scale = a < b ? a : b;
 
	/*cv::Mat src2dst = (cv::Mat_<float>(2, 3) << scale, 0.f, (-scale * src_w + dst_w + scale - 1) * 0.5,
		0.f, scale, (-scale * src_h + dst_h + scale - 1) * 0.5);*/
 
	cv::Mat src2dst = (cv::Mat_<float>(2, 3) << scale, 0.f, (-scale * src_w + dst_w) * 0.5,
		0.f, scale, (-scale * src_h + dst_h) * 0.5);
 
	cv::Mat dst2src = cv::Mat::zeros(2, 3, CV_32FC1);
 
	cv::invertAffineTransform(src2dst, dst2src);
 
	m_dst2src.v0 = dst2src.ptr<float>(0)[0];
	m_dst2src.v1 = dst2src.ptr<float>(0)[1];
	m_dst2src.v2 = dst2src.ptr<float>(0)[2];
	m_dst2src.v3 = dst2src.ptr<float>(1)[0];
	m_dst2src.v4 = dst2src.ptr<float>(1)[1];
	m_dst2src.v5 = dst2src.ptr<float>(1)[2];
 
	CHECK(cudaMalloc(&m_input_src_device, 1 * 3 * src_h * src_w * sizeof(unsigned char)));
	CHECK(cudaMalloc(&m_input_resize_device, 1 * 3 * dst_h * dst_w * sizeof(float)));
	CHECK(cudaMalloc(&m_input_rgb_device, 1 * 3 * dst_h * dst_w * sizeof(float)));
	CHECK(cudaMalloc(&m_input_norm_device, 1 * 3 * dst_h * dst_w * sizeof(float)));
	CHECK(cudaMalloc(&m_input_hwc_device, 1 * 3 * dst_h * dst_w * sizeof(float)));
	CHECK(cudaMalloc(&m_output_src_device, 1 * output_size * sizeof(float)));
 
	return 0;
}
 
//前处理
void preprocess(cv::Mat& frame) {
 
	CHECK(cudaMemcpy(m_input_src_device, frame.data, sizeof(unsigned char) * 3 * src_h * src_w, cudaMemcpyHostToDevice));
 
	resizeDevice(1, m_input_src_device, src_w, src_h, m_input_resize_device, dst_w, dst_h, 114, m_dst2src);
 
	bgr2rgbDevice(1, m_input_resize_device, dst_w, dst_h, m_input_rgb_device, dst_w, dst_h);
 
	normDevice(1, m_input_rgb_device, dst_w, dst_h, m_input_norm_device, dst_w, dst_h, m_param);
 
	hwc2chwDevice(1, m_input_norm_device, dst_w, dst_h, m_input_hwc_device, dst_w, dst_h);
}
 
//后处理
void postprocess(std::vector<utils::detresult>& detectionResult) {
 
	CHECK(cudaMemcpy(output_data, m_output_src_device, output_size * sizeof(float), cudaMemcpyDeviceToHost));
 
	cv::Mat dout(labels.size() + 4, 8400, CV_32F, output_data);
	cv::Mat det_output = dout.t();
 
	std::vector<cv::Rect> boxes;
	std::vector<int> classIds;
	std::vector<float> confidences;
 
	const float ratio_h = dst_h / (float)src_h;
	const float ratio_w = dst_w / (float)src_w;
 
	for (int i = 0; i < det_output.rows; i++)
	{
		cv::Mat classes_scores = det_output.row(i).colRange(4, labels.size() + 4);
		cv::Point classIdPoint;
		double score;
		cv::minMaxLoc(classes_scores, 0, &score, 0, &classIdPoint);
 
		if (score > score_threshold)
		{
			float cx = det_output.at<float>(i, 0);
			float cy = det_output.at<float>(i, 1);
			float ow = det_output.at<float>(i, 2);
			float oh = det_output.at<float>(i, 3);
 
			int x = static_cast<int>((cx - 0.5 * ow));
			int y = static_cast<int>((cy - 0.5 * oh));
 
			int width = static_cast<int>(ow);
			int height = static_cast<int>(oh);
			// 基于纵横比调整边界框坐标
			if (ratio_h > ratio_w)
			{
				x = x / ratio_w;
				y = (y - (dst_h - ratio_w * src_h) / 2) / ratio_w;
				width = width / ratio_w;
				height = height / ratio_w;
			}
			else
			{
				x = (x - (dst_w - ratio_h * src_w) / 2) / ratio_h;
				y = y / ratio_h;
				width = width / ratio_h;
				height = height / ratio_h;
			}
 
			// 坐标值安全校验
			x = std::max(x, 0);
			y = std::max(y, 0);
			width = std::min(width, src_w - x);
			height = std::min(height, src_h - y);
 
			cv::Rect box;
			box.x = x;
			box.y = y;
			box.width = width;
			box.height = height;
 
			boxes.push_back(box);
			classIds.push_back(classIdPoint.x);
			confidences.push_back(score);
		}
	}
 
	std::vector<int> indexes;
	cv::dnn::NMSBoxes(boxes, confidences, score_threshold, nms_threshold, indexes);
 
	for (size_t i = 0; i < indexes.size(); i++)
	{
		int index = indexes[i];
		utils::detresult box(labels[classIds[index]], classIds[index], confidences[index], boxes[index]);
		detectionResult.push_back(box);
	}
 
}
 
//绘制
void draw(cv::Mat& frame, std::vector<utils::detresult>& detectionResult) {
 
	for (size_t i = 0; i < detectionResult.size(); ++i)
	{
		utils::detresult box = detectionResult[i];
		cv::rectangle(frame, box.rect, cv::Scalar(0, 0, 255), 2);
		std::string label = box.className + ":" + cv::format("%.2f", box.confidence);
		putText(frame, label, cv::Point(box.rect.x, box.rect.y - 5), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 255, 0), 2);
	}
 
	// 绘制时间
	putText(frame, "preprocessTime:" + std::to_string(preprocessTime * 1000) + "ms", cv::Point(10, 30), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 255, 0), 2);
	putText(frame, "inferTime:" + std::to_string(inferTime * 1000) + "ms", cv::Point(10, 70), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 255, 0), 2);
	putText(frame, "postprocessTime:" + std::to_string(postprocessTime * 1000) + "ms", cv::Point(10, 110), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 255, 0), 2);
	putText(frame, "totalTime:" + std::to_string(totalTime * 1000) + "ms", cv::Point(10, 150), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 255, 0), 2);
	putText(frame, "detFps:" + std::to_string(detFps), cv::Point(10, 190), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 255, 0), 2);
 
	cv::imshow("detresult", frame);
 
}
 
//清理
void destroy() {
 
	p->context->destroy();
	p->engine->destroy();
	p->runtime->destroy();
	delete p;
 
	// input
	CHECK(cudaFree(m_input_src_device));
	CHECK(cudaFree(m_input_resize_device));
	CHECK(cudaFree(m_input_rgb_device));
	CHECK(cudaFree(m_input_norm_device));
	CHECK(cudaFree(m_input_hwc_device));
	// output
	CHECK(cudaFree(m_output_src_device));
	delete output_data;
}
 
int main()
{
	lable_path = "model/lable.txt";
	engin_path = "model/yolov8n.engine";
	video_path = "test/VID_4K.mp4"; //3840x2160
 
	cv::VideoCapture capture(video_path);
	// 检查视频是否成功打开
	if (!capture.isOpened())
	{
		std::cout << "无法读取视频文件" << std::endl;
		return -1;
	}
 
	fps = capture.get(cv::CAP_PROP_FPS);
	src_w = static_cast<int>(capture.get(cv::CAP_PROP_FRAME_WIDTH));
	src_h = static_cast<int>(capture.get(cv::CAP_PROP_FRAME_HEIGHT));
	dst_w = 640;
	dst_h = 640;
	score_threshold = 0.3f;
	nms_threshold = 0.5f;
 
	//初始化
	init();
 
	cv::Mat frame;
	while (true)
	{
		bool success = capture.read(frame); // 读取一帧数据
		// 检查是否成功读取帧
		if (!success)
		{
			std::cout << "读取完毕" << std::endl;
			break;
		}
 
		//前处理
		double start = (double)cv::getTickCount();
		preprocess(frame);
		preprocessTime = ((double)cv::getTickCount() - start) / cv::getTickFrequency();
 
		//推理
		start = (double)cv::getTickCount();
		float* bindings[] = { m_input_hwc_device, m_output_src_device };
		bool context = p->context->executeV2((void**)bindings);
		inferTime = ((double)cv::getTickCount() - start) / cv::getTickFrequency();
 
		//后处理
		start = (double)cv::getTickCount();
		std::vector<utils::detresult> detectionResult;
		postprocess(detectionResult);
		postprocessTime = ((double)cv::getTickCount() - start) / cv::getTickFrequency();
 
		//总时间
		totalTime = preprocessTime + inferTime + postprocessTime;
		detFps = (1 / (totalTime));
 
		//绘制、显示
		cv::namedWindow("detresult", cv::WINDOW_NORMAL); // cv::WINDOW_NORMAL允许用户调整窗口大小
		//cv::resizeWindow("detresult", src_w / 2, src_h / 2); // 设置窗口的宽度和高度
		draw(frame, detectionResult);
 
		if (cv::waitKey(1) == 27) // 通过按下ESC键退出循环
		{
			break;
		}
	}
 
	cv::destroyAllWindows();
 
	destroy();
 
	return 0;
}

下载

C++ TensorRT yolov8推理 CUDA核函数加速前处理

C++ TensorRT yolov8推理 CUDA核函数加速前处理、后处理

天天代码码天天

微信公众号

.NET 人工智能实践

效果

4K视频 CPU前处理效果

4K视频 CUDA核函数前处理效果

4K视频 CUDA核函数前处理、后处理效果

2K视频 CUDA核函数前处理效果

2K视频 CUDA核函数前处理、后处理效果

1080P 视频 CUDA核函数前处理效果

模型

电脑环境

项目

代码

下载

评论记录：