You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

377 lines
15 KiB

// License: Apache 2.0. See LICENSE file in root directory.
// Copyright(c) 2020 Intel Corporation. All Rights Reserved.
// NOTE: This file will be compiled only with INTEL_OPENVINO_DIR pointing to an OpenVINO install!
#include "post-processing-filters-list.h"
#include "post-processing-worker-filter.h"
#include <rs-vino/object-detection.h>
#include <rs-vino/age-gender-detection.h>
#include <rs-vino/detected-object.h>
#include <cv-helpers.hpp>
#include <rsutils/string/from.h>
namespace openvino = InferenceEngine;
/* We need to extend the basic detected_object to include facial characteristics
*/
class detected_face : public openvino_helpers::detected_object
{
float _age;
float _male_score, _female_score; // cumulative - see update_gender()
public:
using ptr = std::shared_ptr< detected_face >;
explicit detected_face( size_t id,
cv::Rect const& location,
float male_prob,
float age,
cv::Rect const& depth_location = cv::Rect{},
float intensity = 1,
float depth = 0 )
: detected_object( id, std::string(), location, depth_location, intensity, depth )
, _age( age )
, _male_score( male_prob > 0.5f ? male_prob - 0.5f : 0.f )
, _female_score( male_prob > 0.5f ? 0.f : 0.5f - male_prob )
{
}
void update_age( float value )
{
_age = (_age == -1) ? value : 0.95f * _age + 0.05f * value;
}
void update_gender( float value )
{
if( value >= 0 )
{
if( value > 0.5 )
_male_score += value - 0.5f;
else
_female_score += 0.5f - value;
}
}
int get_age() const { return static_cast< int >( std::floor( _age + 0.5f )); }
bool is_male() const { return( _male_score > _female_score ); }
bool is_female() const { return !is_male(); }
};
/* Define a filter that will perform facial detection using OpenVINO
*/
class openvino_face_detection : public post_processing_worker_filter
{
InferenceEngine::Core _ie;
openvino_helpers::object_detection _face_detector;
openvino_helpers::age_gender_detection _age_detector;
openvino_helpers::detected_objects _faces;
size_t _id = 0;
std::shared_ptr< atomic_objects_in_frame > _objects;
public:
openvino_face_detection( std::string const & name )
: post_processing_worker_filter( name )
/*
This face detector is from the OpenCV Model Zoo:
https://github.com/opencv/open_model_zoo/blob/master/models/intel/face-detection-adas-0001/description/face-detection-adas-0001.md
*/
, _face_detector(
"face-detection-adas-0001.xml",
0.5, // Probability threshold
false ) // Not async
/*
*/
, _age_detector(
"age-gender-recognition-retail-0013.xml",
false ) // Not async
{
}
~openvino_face_detection()
{
// Complete background worker to ensure it releases the instance's resources in controlled manner
release_background_worker();
}
public:
void start( rs2::subdevice_model & model ) override
{
post_processing_worker_filter::start( model );
_objects = model.detected_objects;
}
private:
void worker_start() override
{
LOG(INFO) << "Loading CPU extensions...";
std::string const device_name{ "CPU" };
// Cpu extensions library was removed in OpenVINO >= 2020.1, extensions were merged into the cpu plugin.
#ifdef OPENVINO2019
_ie.AddExtension(std::make_shared< openvino::Extensions::Cpu::CpuExtensions >(), device_name);
#endif
_face_detector.load_into( _ie, device_name);
_age_detector.load_into( _ie, device_name);
}
/*
Returns the "intensity" of the face in the picture, and calculates the distance to it, ignoring
Invalid depth pixels or those outside a range that would be appropriate for a face.
*/
static float calc_face_attrs(
const rs2::video_frame & cf,
const rs2::depth_frame & df,
cv::Rect const & depth_bbox,
float * p_mean_depth )
{
uint16_t const * const pdw = reinterpret_cast<const uint16_t*>( df.get_data() );
uint8_t const * const pcb = reinterpret_cast<uint8_t*>(const_cast<void*>( cf.get_data() ));
float const depth_scale = df.get_units();
int const depth_width = df.get_width();
int const color_width = cf.get_width();
int const color_bpp = cf.get_bytes_per_pixel();
int const top = depth_bbox.y;
int const bot = top + depth_bbox.height;
int const left = depth_bbox.x;
int const right = left + depth_bbox.width;
// Find a center point that has a depth on it
int center_x = (left + right) / 2;
int center_index = (top + bot) / 2 * depth_width + center_x;
for( int d = 1; !pdw[center_index] && d < 10; ++d )
{
if( pdw[center_index + d] ) center_index += d;
if( pdw[center_index - d] ) center_index -= d;
if( pdw[center_index + depth_width] ) center_index += depth_width;
if( pdw[center_index - depth_width] ) center_index -= depth_width;
}
if( !pdw[center_index] )
{
if( p_mean_depth )
*p_mean_depth = 0;
return 1;
}
float const d_center = pdw[center_index] * depth_scale;
// Set a "near" and "far" threshold -- anything closer or father, respectively,
// would be deemed not a part of the face and therefore background:
float const d_far_threshold = d_center + 0.2f;
float const d_near_threshold = std::max( d_center - 0.5f, 0.001f );
// Average human head diameter ~= 7.5" or ~19cm
// Assume that the center point is in the front of the face, so the near threshold
// should be very close to that, while the far farther...
float total_luminance = 0;
float total_depth = 0;
unsigned pixel_count = 0;
#pragma omp parallel for schedule(dynamic) //Using OpenMP to try to parallelise the loop
for( int y = top; y < bot; ++y )
{
auto depth_pixel_index = y * depth_width + left;
for( int x = left; x < right; ++x, ++depth_pixel_index )
{
// Get the depth value of the current pixel
auto d = depth_scale * pdw[depth_pixel_index];
// Check if the depth value is invalid (<=0) or greater than the threashold
if( d >= d_near_threshold && d <= d_far_threshold )
{
// Calculate the offset in other frame's buffer to current pixel
auto const coffset = depth_pixel_index * color_bpp;
auto const pc = &pcb[coffset];
// Using RGB...
auto r = pc[0], g = pc[1], b = pc[2];
total_luminance += 0.2989f * r + 0.5870f * g + 0.1140f * b; // CCIR 601 -- see https://en.wikipedia.org/wiki/Luma_(video)
++pixel_count;
// And get a mean depth, too
total_depth += d;
}
}
}
if( p_mean_depth )
*p_mean_depth = pixel_count ? total_depth / pixel_count : 0;
return pixel_count ? total_luminance / pixel_count : 1;
}
void worker_body( rs2::frame f ) override
{
auto fs = f.as< rs2::frameset >();
auto cf = f;
rs2::depth_frame df = rs2::frame{};
if (fs)
{
cf = fs.get_color_frame();
df = fs.get_depth_frame();
}
if ((!fs && f.get_profile().stream_name() != "Color") || (fs && !cf))
{
_objects->clear();
return;
}
// A color video frame is the minimum we need for detection
if( cf.get_profile().format() != RS2_FORMAT_RGB8 )
{
LOG(ERROR) << get_context(fs) << "color format must be RGB8; it's " << cf.get_profile().format();
return;
}
// A depth frame is optional: if not enabled, we won't get it, and we simply won't provide depth info...
if (df && df.get_profile().format() != RS2_FORMAT_Z16)
{
LOG(ERROR) << get_context(fs) << "depth format must be Z16; it's " << df.get_profile().format();
return;
}
try
{
rs2_intrinsics color_intrin, depth_intrin;
rs2_extrinsics color_extrin, depth_extrin;
get_trinsics( cf, df, color_intrin, depth_intrin, color_extrin, depth_extrin );
objects_in_frame objects;
cv::Mat image( color_intrin.height, color_intrin.width, CV_8UC3, const_cast<void *>(cf.get_data()), cv::Mat::AUTO_STEP );
_face_detector.enqueue( image );
_face_detector.submit_request();
auto results = _face_detector.fetch_results();
openvino_helpers::detected_objects prev_faces { std::move( _faces ) };
_faces.clear();
for( auto && result : results )
{
cv::Rect rect = result.location & cv::Rect( 0, 0, image.cols, image.rows );
detected_face::ptr face = std::dynamic_pointer_cast< detected_face >(
openvino_helpers::find_object( rect, prev_faces ));
try
{
// Use a mean of the face intensity to help identify faces -- if the intensity changes too much,
// it's not the same face...
float depth = 0, intensity = 1;
cv::Rect depth_rect;
if( df )
{
rs2::rect depth_bbox = project_rect_to_depth(
rs2::rect { float( rect.x ), float( rect.y ), float( rect.width ), float( rect.height ) },
df,
color_intrin, depth_intrin, color_extrin, depth_extrin
);
// It is possible to get back an invalid rect!
if( depth_bbox == depth_bbox.intersection( rs2::rect { 0.f, 0.f, float( depth_intrin.width ), float( depth_intrin.height) } ) )
{
depth_rect = cv::Rect( int( depth_bbox.x ), int( depth_bbox.y ), int( depth_bbox.w ), int( depth_bbox.h ) );
intensity = calc_face_attrs( cf, df, depth_rect, &depth );
}
else
{
LOG(DEBUG) << get_context(fs) << "depth_bbox is no good!";
}
}
else
{
intensity = openvino_helpers::calc_intensity( image( rect ) );
}
float intensity_change = face ? std::abs( intensity - face->get_intensity() ) / face->get_intensity() : 1;
float depth_change = ( face && face->get_depth() ) ? std::abs( depth - face->get_depth() ) / face->get_depth() : 0;
if( intensity_change > 0.07f || depth_change > 0.2f )
{
// Figure out the age for this new face
float age = 0, maleProb = 0.5;
// Enlarge the bounding box around the detected face for more robust operation of face analytics networks
cv::Mat face_image = image(
openvino_helpers::adjust_face_bbox( rect, 1.4f )
& cv::Rect( 0, 0, image.cols, image.rows ) );
_age_detector.enqueue( face_image );
_age_detector.submit_request();
_age_detector.wait();
auto age_gender = _age_detector[0];
age = age_gender.age;
maleProb = age_gender.maleProb;
// Note: we may want to update the gender/age for each frame, as it may change...
face = std::make_shared< detected_face >( _id++, rect, maleProb, age, depth_rect, intensity, depth );
}
else
{
face->move( rect, depth_rect, intensity, depth );
}
_faces.push_back( face );
}
catch( ... )
{
LOG(ERROR) << get_context(fs) << "Unhandled exception!!!";
}
}
for( auto && object : _faces )
{
auto face = std::dynamic_pointer_cast<detected_face>( object );
cv::Rect const & loc = face->get_location();
rs2::rect bbox { float( loc.x ), float( loc.y ), float( loc.width ), float( loc.height ) };
rs2::rect normalized_color_bbox = bbox.normalize( rs2::rect { 0, 0, float(color_intrin.width), float(color_intrin.height) } );
rs2::rect normalized_depth_bbox = normalized_color_bbox;
if( df )
{
cv::Rect const & depth_loc = face->get_depth_location();
rs2::rect depth_bbox { float( depth_loc.x ), float( depth_loc.y ), float( depth_loc.width ), float( depth_loc.height ) };
normalized_depth_bbox = depth_bbox.normalize( rs2::rect { 0, 0, float( df.get_width() ), float( df.get_height() ) } );
}
objects.emplace_back(
face->get_id(),
rsutils::string::from() << (face->is_male() ? u8"\uF183" : u8"\uF182") << " " << face->get_age(),
normalized_color_bbox,
normalized_depth_bbox,
face->get_depth()
);
}
std::lock_guard< std::mutex > lock( _objects->mutex );
if( is_pb_enabled() )
{
if( _objects->sensor_is_on )
_objects->swap( objects );
}
else
{
_objects->clear();
}
}
catch( const std::exception & e )
{
LOG(ERROR) << get_context(fs) << e.what();
}
catch( ... )
{
LOG(ERROR) << get_context(fs) << "Unhandled exception caught in openvino_face_detection";
}
}
void on_processing_block_enable( bool e ) override
{
post_processing_worker_filter::on_processing_block_enable( e );
if( !e )
{
// Make sure all the objects go away!
std::lock_guard< std::mutex > lock( _objects->mutex );
_objects->clear();
}
}
};
static auto it = post_processing_filters_list::register_filter< openvino_face_detection >( "Face Detection : OpenVINO" );