You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
149 lines
5.7 KiB
149 lines
5.7 KiB
// This example is derived from the ssd_mobilenet_object_detection opencv demo
|
|
// and adapted to be used with Intel RealSense Cameras
|
|
// Please see https://github.com/opencv/opencv/blob/master/LICENSE
|
|
|
|
#include <opencv2/dnn.hpp>
|
|
#include <librealsense2/rs.hpp>
|
|
#include "../cv-helpers.hpp"
|
|
|
|
const size_t inWidth = 300;
|
|
const size_t inHeight = 300;
|
|
const float WHRatio = inWidth / (float)inHeight;
|
|
const float inScaleFactor = 0.007843f;
|
|
const float meanVal = 127.5;
|
|
const char* classNames[] = {"background",
|
|
"aeroplane", "bicycle", "bird", "boat",
|
|
"bottle", "bus", "car", "cat", "chair",
|
|
"cow", "diningtable", "dog", "horse",
|
|
"motorbike", "person", "pottedplant",
|
|
"sheep", "sofa", "train", "tvmonitor"};
|
|
|
|
int main(int argc, char** argv) try
|
|
{
|
|
using namespace cv;
|
|
using namespace cv::dnn;
|
|
using namespace rs2;
|
|
|
|
Net net = readNetFromCaffe("MobileNetSSD_deploy.prototxt",
|
|
"MobileNetSSD_deploy.caffemodel");
|
|
|
|
// Start streaming from Intel RealSense Camera
|
|
pipeline pipe;
|
|
auto config = pipe.start();
|
|
auto profile = config.get_stream(RS2_STREAM_COLOR)
|
|
.as<video_stream_profile>();
|
|
rs2::align align_to(RS2_STREAM_COLOR);
|
|
|
|
Size cropSize;
|
|
if (profile.width() / (float)profile.height() > WHRatio)
|
|
{
|
|
cropSize = Size(static_cast<int>(profile.height() * WHRatio),
|
|
profile.height());
|
|
}
|
|
else
|
|
{
|
|
cropSize = Size(profile.width(),
|
|
static_cast<int>(profile.width() / WHRatio));
|
|
}
|
|
|
|
Rect crop(Point((profile.width() - cropSize.width) / 2,
|
|
(profile.height() - cropSize.height) / 2),
|
|
cropSize);
|
|
|
|
const auto window_name = "Display Image";
|
|
namedWindow(window_name, WINDOW_AUTOSIZE);
|
|
|
|
while (getWindowProperty(window_name, WND_PROP_AUTOSIZE) >= 0)
|
|
{
|
|
// Wait for the next set of frames
|
|
auto data = pipe.wait_for_frames();
|
|
// Make sure the frames are spatially aligned
|
|
data = align_to.process(data);
|
|
|
|
auto color_frame = data.get_color_frame();
|
|
auto depth_frame = data.get_depth_frame();
|
|
|
|
// If we only received new depth frame,
|
|
// but the color did not update, continue
|
|
static int last_frame_number = 0;
|
|
if (color_frame.get_frame_number() == last_frame_number) continue;
|
|
last_frame_number = static_cast<int>(color_frame.get_frame_number());
|
|
|
|
// Convert RealSense frame to OpenCV matrix:
|
|
auto color_mat = frame_to_mat(color_frame);
|
|
auto depth_mat = depth_frame_to_meters(depth_frame);
|
|
|
|
Mat inputBlob = blobFromImage(color_mat, inScaleFactor,
|
|
Size(inWidth, inHeight), meanVal, false); //Convert Mat to batch of images
|
|
net.setInput(inputBlob, "data"); //set the network input
|
|
Mat detection = net.forward("detection_out"); //compute output
|
|
|
|
Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr<float>());
|
|
|
|
// Crop both color and depth frames
|
|
color_mat = color_mat(crop);
|
|
depth_mat = depth_mat(crop);
|
|
|
|
float confidenceThreshold = 0.8f;
|
|
for(int i = 0; i < detectionMat.rows; i++)
|
|
{
|
|
float confidence = detectionMat.at<float>(i, 2);
|
|
|
|
if(confidence > confidenceThreshold)
|
|
{
|
|
size_t objectClass = (size_t)(detectionMat.at<float>(i, 1));
|
|
|
|
int xLeftBottom = static_cast<int>(detectionMat.at<float>(i, 3) * color_mat.cols);
|
|
int yLeftBottom = static_cast<int>(detectionMat.at<float>(i, 4) * color_mat.rows);
|
|
int xRightTop = static_cast<int>(detectionMat.at<float>(i, 5) * color_mat.cols);
|
|
int yRightTop = static_cast<int>(detectionMat.at<float>(i, 6) * color_mat.rows);
|
|
|
|
Rect object((int)xLeftBottom, (int)yLeftBottom,
|
|
(int)(xRightTop - xLeftBottom),
|
|
(int)(yRightTop - yLeftBottom));
|
|
|
|
object = object & Rect(0, 0, depth_mat.cols, depth_mat.rows);
|
|
|
|
// Calculate mean depth inside the detection region
|
|
// This is a very naive way to estimate objects depth
|
|
// but it is intended to demonstrate how one might
|
|
// use depth data in general
|
|
Scalar m = mean(depth_mat(object));
|
|
|
|
std::ostringstream ss;
|
|
ss << classNames[objectClass] << " ";
|
|
ss << std::setprecision(2) << m[0] << " meters away";
|
|
String conf(ss.str());
|
|
|
|
rectangle(color_mat, object, Scalar(0, 255, 0));
|
|
int baseLine = 0;
|
|
Size labelSize = getTextSize(ss.str(), FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
|
|
|
|
auto center = (object.br() + object.tl())*0.5;
|
|
center.x = center.x - labelSize.width / 2;
|
|
|
|
rectangle(color_mat, Rect(Point(center.x, center.y - labelSize.height),
|
|
Size(labelSize.width, labelSize.height + baseLine)),
|
|
Scalar(255, 255, 255), FILLED);
|
|
putText(color_mat, ss.str(), center,
|
|
FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0,0,0));
|
|
}
|
|
}
|
|
|
|
imshow(window_name, color_mat);
|
|
if (waitKey(1) >= 0) break;
|
|
}
|
|
|
|
return EXIT_SUCCESS;
|
|
}
|
|
catch (const rs2::error & e)
|
|
{
|
|
std::cerr << "RealSense error calling " << e.get_failed_function() << "(" << e.get_failed_args() << "):\n " << e.what() << std::endl;
|
|
return EXIT_FAILURE;
|
|
}
|
|
catch (const std::exception& e)
|
|
{
|
|
std::cerr << e.what() << std::endl;
|
|
return EXIT_FAILURE;
|
|
}
|