1. Code
#include <opencv2/opencv.hpp>
#include <opencv2/dnn/dnn.hpp>
#include <librealsense2/rs.hpp> // Include RealSense Cross Platform API
using namespace cv;
using namespace dnn;
using namespace std;
using namespace rs2;
// 类名数组,这里需要替换为实际YOLO模型所检测的对象的类名
const char* classNames[] = {"object1", "object2", "object3", "object4"};
int main(int argc, char** argv)
{
// 模型权重和配置文件路径,这些文件包含了训练好的YOLO模型参数和网络配置
String model = "yolov5.onnx"; // 替换为实际模型文件路径
// 加载预训练的模型和配置到DNN网络中
Net net = readNetFromONNX(model);
// 设置推理引擎后端为OpenCV,目标设备为CPU
net.setPreferableBackend(DNN_BACKEND_OPENCV);
net.setPreferableTarget(DNN_TARGET_CPU);
// Declare depth colorizer for pretty visualization of depth data
colorizer color_map;
// Declare RealSense pipeline, encapsulating the actual device and sensors
pipeline p;
// Start streaming with default recommended configuration
p.start();
// 循环直到用户按下键盘上的任意键
while (waitKey(1) < 0) {
// Wait for the next set of frames from the camera
frameset frames = p.wait_for_frames();
// Get a frame from the RGB camera
frame color = frames.get_color_frame();
// Create OpenCV matrix of size (color_height, color_width)
Mat frame(Size(640, 480), CV_8UC3, (void*)color.get_data(), Mat::AUTO_STEP);
Mat blob; // 用于存储处理后的图像,以适应网络输入
// 将帧图像转换为网络输入所需格式
blobFromImage(frame, blob, 1/255.0, cv::Size(416, 416), Scalar(0,0,0), true, false);
// 将blob设置为网络的输入
net.setInput(blob);
// 运行前向传递以获取网络的输出层
vector<Mat> outs;
net.forward(outs, net.getUnconnectedOutLayersNames());
// 遍历网络输出的每一层结果
for (size_t i = 0; i < outs.size(); ++i) {
for (int j = 0; j < outs[i].rows; ++j) {
Mat scores = outs[i].row(j).colRange(5, outs[i].cols);
Point classIdPoint;
double confidence;
minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);
if (confidence > 0.5) {
int centerX = (int)(outs[i].at<float>(j, 0) * frame.cols);
int centerY = (int)(outs[i].at<float>(j, 1) * frame.rows);
int width = (int)(outs[i].at<float>(j, 2) * frame.cols);
int height = (int)(outs[i].at<float>(j, 3) * frame.rows);
int left = centerX - width / 2;
int top = centerY - height / 2;
rectangle(frame, Rect(left, top, width, height), Scalar(0, 255, 0), 2);
int classIdx = static_cast<int>(classIdPoint.x);
string classLabel = string(classNames[classIdx]);
string label = classLabel + ":" + format("%.2f", confidence);
int baseLine;
Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
top = max(top, labelSize.height);
rectangle(frame, Point(left, top - labelSize.height), Point(left + labelSize.width, top + baseLine), Scalar::all(255), FILLED);
putText(frame, label, Point(left, top), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0,0,0));
}
}
}
// 展示处理后的帧
imshow("YoloV8", frame);
}
return 0;
}
Hinweis: Da ich die Kamera nicht zur Hand habe, habe ich nur die Informationen und den nach dem Dokument geschriebenen Code nachgeschlagen und es nicht geübt.
2. Installationspaket
Opencv und die Bibliothek librealsense2 müssen installiert werden