OpenCV使用级联分类器做人脸识别的时候,调用了void CascadeClassifier::detectMultiScale方法,采用的滑窗机制,这里列出该函数的源码实现过程。
代码读起来不复杂,但是很有趣^_^。
void CascadeClassifier::detectMultiScale( const Mat& image, vector<Rect>& objects, double scaleFactor, int minNeighbors, int flags, Size minObjectSize, Size maxObjectSize) { vector<int> fakeLevels; // 检测未通过层的级数 vector<double> fakeWeights; // 未通过层的强分类器的输出, 不使用时outputRejectLevels = false; detectMultiScale( image, objects, fakeLevels, fakeWeights, scaleFactor, minNeighbors, flags, minObjectSize, maxObjectSize, false ); }
void CascadeClassifier::detectMultiScale( const Mat& image, vector<Rect>& objects, vector<int>& rejectLevels, vector<double>& levelWeights, double scaleFactor, int minNeighbors, int flags, Size minObjectSize, Size maxObjectSize, bool outputRejectLevels ) { const double GROUP_EPS = 0.2; CV_Assert( scaleFactor > 1 && image.depth() == CV_8U ); // [email protected] scaleFactor大于1且图像是8位的。 if( empty() ) return; if( isOldFormatCascade() ) { MemStorage storage(cvCreateMemStorage(0)); CvMat _image = image; CvSeq* _objects = cvHaarDetectObjectsForROC( &_image, oldCascade, storage, rejectLevels, levelWeights, scaleFactor, minNeighbors, flags, minObjectSize, maxObjectSize, outputRejectLevels ); vector<CvAvgComp> vecAvgComp; Seq<CvAvgComp>(_objects).copyTo(vecAvgComp); objects.resize(vecAvgComp.size()); std::transform(vecAvgComp.begin(), vecAvgComp.end(), objects.begin(), getRect()); return; } objects.clear(); // 清空存放检测结果的容器 if (!maskGenerator.empty()) { // maskGenerator->initializeMask(image); } if( maxObjectSize.height == 0 || maxObjectSize.width == 0 ) maxObjectSize = image.size(); // 默认检测目标最大为图像本身大小 Mat grayImage = image; if( grayImage.channels() > 1 ) // 通道数大于1,则转成灰度图 { Mat temp; cvtColor(grayImage, temp, CV_BGR2GRAY); grayImage = temp; } Mat imageBuffer(image.rows + 1, image.cols + 1, CV_8U); // [email protected] 多加一行一列 vector<Rect> candidates; // 存放候选 rect // 多尺度,每次放大scaleFactor倍 for( double factor = 1; ; factor *= scaleFactor ) { // 分类器训练的时候的图像大小 Size originalWindowSize = getOriginalWindowSize(); // <height>20</height> <width>20</width> xml文件中的首两行即为训练的目标窗口大小 // 每次放大sacleFactor倍 Size windowSize( cvRound(originalWindowSize.width*factor), cvRound(originalWindowSize.height*factor) ); // 原图一次缩小 Size scaledImageSize( cvRound( grayImage.cols/factor ), cvRound( grayImage.rows/factor ) ); // 可供原始窗口移动的范围大小 Size processingRectSize( scaledImageSize.width - originalWindowSize.width, scaledImageSize.height - originalWindowSize.height ); // 检测窗口可以动区域小于0,表明图像比originalWindowSize还小,然后退出 if( processingRectSize.width <= 0 || processingRectSize.height <= 0 ) break; // 窗口大于最大检测目标时,退出;默认是输入图像大小 if( windowSize.width > maxObjectSize.width || windowSize.height > maxObjectSize.height ) break; // 窗口小于最小检测目标,跳过 if( windowSize.width < minObjectSize.width || windowSize.height < minObjectSize.height ) continue; // 拷贝数据 Mat scaledImage( scaledImageSize, CV_8U, imageBuffer.data ); resize( grayImage, scaledImage, scaledImageSize, 0, 0, CV_INTER_LINEAR ); // y步长 int yStep; if( getFeatureType() == cv::FeatureEvaluator::HOG ) { yStep = 4; // HOG特征,步长设为 4 } else { yStep = factor > 2. ? 1 : 2; // 缩放因子大于2时,步长为1,否则为2 } int stripCount, stripSize; // 并行计算个数及大小,分行并行计算 const int PTS_PER_THREAD = 1000; // 预订时间标准系统 stripCount = ((processingRectSize.width/yStep)*(processingRectSize.height + yStep-1)/yStep + PTS_PER_THREAD/2)/PTS_PER_THREAD; stripCount = std::min(std::max(stripCount, 1), 100); stripSize = (((processingRectSize.height + stripCount - 1)/stripCount + yStep-1)/yStep)*yStep; // 调用但尺度检测函数 if( !detectSingleScale( scaledImage, stripCount, processingRectSize, stripSize, yStep, factor, candidates, rejectLevels, levelWeights, outputRejectLevels ) ) break; } objects.resize(candidates.size()); std::copy(candidates.begin(), candidates.end(), objects.begin()); if( outputRejectLevels ) { groupRectangles( objects, rejectLevels, levelWeights, minNeighbors, GROUP_EPS ); } else { groupRectangles( objects, minNeighbors, GROUP_EPS ); // 合并检测结果 } }
bool CascadeClassifier::detectSingleScale( const Mat& image, int stripCount, Size processingRectSize, int stripSize, int yStep, double factor, vector<Rect>& candidates, vector<int>& levels, vector<double>& weights, bool outputRejectLevels ) { /* getOriginalWindowSize --> data.origWinSize (width=20, height=20) */ if( !featureEvaluator->setImage( image, data.origWinSize ) ) // 对图像做积分图 return false; #if defined (LOG_CASCADE_STATISTIC) logger.setImage(image); #endif Mat currentMask; if (!maskGenerator.empty()) { currentMask=maskGenerator->generateMask(image); } vector<Rect> candidatesVector; vector<int> rejectLevels; vector<double> levelWeights; Mutex mtx; if( outputRejectLevels ) // outputRejectLevels = false { parallel_for_(Range(0, stripCount), CascadeClassifierInvoker( *this, processingRectSize, stripSize, yStep, factor, candidatesVector, rejectLevels, levelWeights, true, currentMask, &mtx)); levels.insert( levels.end(), rejectLevels.begin(), rejectLevels.end() ); weights.insert( weights.end(), levelWeights.begin(), levelWeights.end() ); } else { // parallel_for_ 为了TBB加速时使用 // 生成stripCount个平行线程(每个线程生成一个CascadeClassifierInvoker), // 在每个CascadeClassifierInvoker中分配当前缩放图像的N行做检测,这是TBB利用多线程做的加速计算 parallel_for_(Range(0, stripCount), CascadeClassifierInvoker( *this, processingRectSize, stripSize, yStep, factor, candidatesVector, rejectLevels, levelWeights, false, currentMask, &mtx)); } candidates.insert( candidates.end(), candidatesVector.begin(), candidatesVector.end() ); #if defined (LOG_CASCADE_STATISTIC) logger.write(); #endif return true; }
class CascadeClassifierInvoker : public ParallelLoopBody { public: CascadeClassifierInvoker( CascadeClassifier& _cc, Size _sz1, int _stripSize, int _yStep, double _factor, vector<Rect>& _vec, vector<int>& _levels, vector<double>& _weights, bool outputLevels, const Mat& _mask, Mutex* _mtx) { classifier = &_cc; processingRectSize = _sz1; stripSize = _stripSize; yStep = _yStep; scalingFactor = _factor; rectangles = &_vec; rejectLevels = outputLevels ? &_levels : 0; levelWeights = outputLevels ? &_weights : 0; mask = _mask; mtx = _mtx; } void operator()(const Range& range) const { Ptr<FeatureEvaluator> evaluator = classifier->featureEvaluator->clone(); Size winSize(cvRound(classifier->data.origWinSize.width * scalingFactor), cvRound(classifier->data.origWinSize.height * scalingFactor)); int y1 = range.start * stripSize; int y2 = min(range.end * stripSize, processingRectSize.height); for( int y = y1; y < y2; y += yStep ) { for( int x = 0; x < processingRectSize.width; x += yStep ) { if ( (!mask.empty()) && (mask.at<uchar>(Point(x,y))==0)) { continue; } double gypWeight; // //result =1表示通过所有分类器,result<0表示失败的级数。 int result = classifier->runAt(evaluator, Point(x, y), gypWeight); #if defined (LOG_CASCADE_STATISTIC) logger.setPoint(Point(x, y), result); #endif if( rejectLevels ) { if( result == 1 ) result = -(int)classifier->data.stages.size(); if( classifier->data.stages.size() + result < 4 ) { mtx->lock(); rectangles->push_back(Rect(cvRound(x*scalingFactor), cvRound(y*scalingFactor), winSize.width, winSize.height)); rejectLevels->push_back(-result); levelWeights->push_back(gypWeight); mtx->unlock(); } } else if( result > 0 ) { mtx->lock(); rectangles->push_back(Rect(cvRound(x*scalingFactor), cvRound(y*scalingFactor), winSize.width, winSize.height)); mtx->unlock(); } if( result == 0 ) x += yStep; } } } CascadeClassifier* classifier; vector<Rect>* rectangles; Size processingRectSize; int stripSize, yStep; double scalingFactor; vector<int> *rejectLevels; vector<double> *levelWeights; Mat mask; Mutex* mtx; };
int CascadeClassifier::runAt( Ptr<FeatureEvaluator>& evaluator, Point pt, double& weight ) { CV_Assert( oldCascade.empty() ); assert( data.featureType == FeatureEvaluator::HAAR || data.featureType == FeatureEvaluator::LBP || data.featureType == FeatureEvaluator::HOG ); if( !evaluator->setWindow(pt) ) return -1; if( data.isStumpBased ) { if( data.featureType == FeatureEvaluator::HAAR ) return predictOrderedStump<HaarEvaluator>( *this, evaluator, weight ); else if( data.featureType == FeatureEvaluator::LBP ) return predictCategoricalStump<LBPEvaluator>( *this, evaluator, weight ); else if( data.featureType == FeatureEvaluator::HOG ) return predictOrderedStump<HOGEvaluator>( *this, evaluator, weight ); else return -2; } else { if( data.featureType == FeatureEvaluator::HAAR ) return predictOrdered<HaarEvaluator>( *this, evaluator, weight ); else if( data.featureType == FeatureEvaluator::LBP ) return predictCategorical<LBPEvaluator>( *this, evaluator, weight ); else if( data.featureType == FeatureEvaluator::HOG ) return predictOrdered<HOGEvaluator>( *this, evaluator, weight ); else return -2; } }合并候选框的函数 groupRectangles,在上一篇的文章里介绍了。
void groupRectangles(vector<Rect>& rectList, int groupThreshold, double eps, vector<int>* weights, vector<double>* levelWeights) { /* 当组合阈值groupThreshold小于等于0的时候,如果输出weights, 则weights中返回与rectList同样个数个1,函数直接返回,不进行合并操作 */ if( groupThreshold <= 0 || rectList.empty() ) { if( weights ) { size_t i, sz = rectList.size(); weights->resize(sz); for( i = 0; i < sz; i++ ) (*weights)[i] = 1; } return; } // 调用partition函数对rectList中的矩形进行分类 // 其中nclasses表示组合类别,labels表示每个rect属于哪个类别的,相似度计算使用SimilarRects类 vector<int> labels; int nclasses = partition(rectList, labels, SimilarRects(eps)); vector<Rect> rrects(nclasses); vector<int> rweights(nclasses, 0); vector<int> rejectLevels(nclasses, 0); vector<double> rejectWeights(nclasses, DBL_MIN); int i, j, nlabels = (int)labels.size(); /* * 组合分到同一类别的矩形并保存当前类别下通过stage的最大值以及最大的权重 */ for( i = 0; i < nlabels; i++ ) { int cls = labels[i]; rrects[cls].x += rectList[i].x; rrects[cls].y += rectList[i].y; rrects[cls].width += rectList[i].width; rrects[cls].height += rectList[i].height; rweights[cls]++; } if ( levelWeights && weights && !weights->empty() && !levelWeights->empty() ) { for( i = 0; i < nlabels; i++ ) { int cls = labels[i]; if( (*weights)[i] > rejectLevels[cls] ) { rejectLevels[cls] = (*weights)[i]; rejectWeights[cls] = (*levelWeights)[i]; } else if( ( (*weights)[i] == rejectLevels[cls] ) && ( (*levelWeights)[i] > rejectWeights[cls] ) ) rejectWeights[cls] = (*levelWeights)[i]; } } for( i = 0; i < nclasses; i++ ) { Rect r = rrects[i]; float s = 1.f/rweights[i]; rrects[i] = Rect(saturate_cast<int>(r.x*s), saturate_cast<int>(r.y*s), saturate_cast<int>(r.width*s), saturate_cast<int>(r.height*s)); } rectList.clear(); if( weights ) weights->clear(); if( levelWeights ) levelWeights->clear(); // 按照groupThreshold合并规则,以及是否存在包含关系输出合并后的矩形 for( i = 0; i < nclasses; i++ ) { Rect r1 = rrects[i]; int n1 = levelWeights ? rejectLevels[i] : rweights[i]; double w1 = rejectWeights[i]; if( n1 <= groupThreshold ) continue; // filter out small face rectangles inside large rectangles for( j = 0; j < nclasses; j++ ) { int n2 = rweights[j]; if( j == i || n2 <= groupThreshold ) continue; Rect r2 = rrects[j]; int dx = saturate_cast<int>( r2.width * eps ); int dy = saturate_cast<int>( r2.height * eps ); // 当r1在r2的内部时,跳出 if( i != j && r1.x >= r2.x - dx && r1.y >= r2.y - dy && r1.x + r1.width <= r2.x + r2.width + dx && r1.y + r1.height <= r2.y + r2.height + dy && (n2 > std::max(3, n1) || n1 < 3) ) break; } if( j == nclasses ) { rectList.push_back(r1); if( weights ) weights->push_back(n1); if( levelWeights ) levelWeights->push_back(w1); } } }