// This example is derived from the ssd_mobilenet_object_detection opencv demo // and adapted to be used with Intel RealSense Cameras // Please see https://github.com/opencv/opencv/blob/master/LICENSE #include #include #include "../cv-helpers.hpp" const size_t inWidth = 300; const size_t inHeight = 300; const float WHRatio = inWidth / (float)inHeight; const float inScaleFactor = 0.007843f; const float meanVal = 127.5; const char* classNames[] = {"background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"}; int main(int argc, char** argv) try { using namespace cv; using namespace cv::dnn; using namespace rs2; Net net = readNetFromCaffe("MobileNetSSD_deploy.prototxt", "MobileNetSSD_deploy.caffemodel"); // Start streaming from Intel RealSense Camera pipeline pipe; auto config = pipe.start(); auto profile = config.get_stream(RS2_STREAM_COLOR) .as(); rs2::align align_to(RS2_STREAM_COLOR); Size cropSize; if (profile.width() / (float)profile.height() > WHRatio) { cropSize = Size(static_cast(profile.height() * WHRatio), profile.height()); } else { cropSize = Size(profile.width(), static_cast(profile.width() / WHRatio)); } Rect crop(Point((profile.width() - cropSize.width) / 2, (profile.height() - cropSize.height) / 2), cropSize); const auto window_name = "Display Image"; namedWindow(window_name, WINDOW_AUTOSIZE); while (getWindowProperty(window_name, WND_PROP_AUTOSIZE) >= 0) { // Wait for the next set of frames auto data = pipe.wait_for_frames(); // Make sure the frames are spatially aligned data = align_to.process(data); auto color_frame = data.get_color_frame(); auto depth_frame = data.get_depth_frame(); // If we only received new depth frame, // but the color did not update, continue static int last_frame_number = 0; if (color_frame.get_frame_number() == last_frame_number) continue; last_frame_number = static_cast(color_frame.get_frame_number()); // Convert RealSense frame to OpenCV matrix: auto color_mat = frame_to_mat(color_frame); auto depth_mat = depth_frame_to_meters(depth_frame); Mat inputBlob = blobFromImage(color_mat, inScaleFactor, Size(inWidth, inHeight), meanVal, false); //Convert Mat to batch of images net.setInput(inputBlob, "data"); //set the network input Mat detection = net.forward("detection_out"); //compute output Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr()); // Crop both color and depth frames color_mat = color_mat(crop); depth_mat = depth_mat(crop); float confidenceThreshold = 0.8f; for(int i = 0; i < detectionMat.rows; i++) { float confidence = detectionMat.at(i, 2); if(confidence > confidenceThreshold) { size_t objectClass = (size_t)(detectionMat.at(i, 1)); int xLeftBottom = static_cast(detectionMat.at(i, 3) * color_mat.cols); int yLeftBottom = static_cast(detectionMat.at(i, 4) * color_mat.rows); int xRightTop = static_cast(detectionMat.at(i, 5) * color_mat.cols); int yRightTop = static_cast(detectionMat.at(i, 6) * color_mat.rows); Rect object((int)xLeftBottom, (int)yLeftBottom, (int)(xRightTop - xLeftBottom), (int)(yRightTop - yLeftBottom)); object = object & Rect(0, 0, depth_mat.cols, depth_mat.rows); // Calculate mean depth inside the detection region // This is a very naive way to estimate objects depth // but it is intended to demonstrate how one might // use depth data in general Scalar m = mean(depth_mat(object)); std::ostringstream ss; ss << classNames[objectClass] << " "; ss << std::setprecision(2) << m[0] << " meters away"; String conf(ss.str()); rectangle(color_mat, object, Scalar(0, 255, 0)); int baseLine = 0; Size labelSize = getTextSize(ss.str(), FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); auto center = (object.br() + object.tl())*0.5; center.x = center.x - labelSize.width / 2; rectangle(color_mat, Rect(Point(center.x, center.y - labelSize.height), Size(labelSize.width, labelSize.height + baseLine)), Scalar(255, 255, 255), FILLED); putText(color_mat, ss.str(), center, FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0,0,0)); } } imshow(window_name, color_mat); if (waitKey(1) >= 0) break; } return EXIT_SUCCESS; } catch (const rs2::error & e) { std::cerr << "RealSense error calling " << e.get_failed_function() << "(" << e.get_failed_args() << "):\n " << e.what() << std::endl; return EXIT_FAILURE; } catch (const std::exception& e) { std::cerr << e.what() << std::endl; return EXIT_FAILURE; }