SIFT类的构造函数：初始化算法参数

SIFT::SIFT(int nfeatures=0, int nOctaveLayers=3, double contrastThreshold=0.04, double edgeThreshold=  
    10, double sigma=1.6)

参数：

nfeatures：特征点数目（算法对检测出的特征点排名，返回最好的nfeatures个特征点）。
nOctaveLayers：金字塔中每组的层数（算法中会自己计算这个值，后面会介绍）。
contrastThreshold：特征点的判别阈值。这个参数的大小怎么把握？
edgeThreshold：过滤掉边缘效应的阈值。
sigma：金字塔第0层图像高斯滤波系

SIFT的“()”运算符：SIFT算法的主要流程

void SIFT::operator()(InputArray _image, InputArray _mask,
                      vector<KeyPoint>& keypoints,
                      OutputArray _descriptors,
                      bool useProvidedKeypoints) const
{

    int firstOctave = -1, actualNOctaves = 0, actualNLayers = 0;
firstOctave 代表最底层一组的组号。如果为0，则源图像所在的组就是最底层一组；如果为 -1，则需要将源图像放大 1 倍（先与sigma=0.5的高斯核卷积再双线性插值放大），放大后的图像位于最底层一组；
actualNOctaves 实际所需金字塔共多少组；actualNLayers 每组金字塔实际需要多少层。只有当特征点keypoints被直接输入时，这两个参数才用得着。如果是从从图像中找keypoint，可以忽略这两参数。


    Mat image = _image.getMat(), mask = _mask.getMat();

    if( image.empty() || image.depth() != CV_8U )
        CV_Error( CV_StsBadArg, "image is empty or has incorrect depth (!=CV_8U)" );

    if( !mask.empty() && mask.type() != CV_8UC1 )
        CV_Error( CV_StsBadArg, "mask has incorrect type (!=CV_8UC1)" );


    if( useProvidedKeypoints )
    {
        firstOctave = 0;
        int maxOctave = INT_MIN;
        for( size_t i = 0; i < keypoints.size(); i++ )
        {
            int octave, layer;
            float scale;
            unpackOctave(keypoints[i], octave, layer, scale);
            firstOctave = std::min(firstOctave, octave);
            maxOctave = std::max(maxOctave, octave);
            actualNLayers = std::max(actualNLayers, layer-2);

        }

        firstOctave = std::min(firstOctave, 0);
        CV_Assert( firstOctave >= -1 && actualNLayers <= nOctaveLayers );
        actualNOctaves = maxOctave - firstOctave + 1;
    }
如果特征点是直接输入的，则依次读取输入的特征点，看看这些特征点需要多少层、多少组金字塔。
约束： 
最底层的金字塔组号至少大于 -1 ，即源图像最多被放大一倍；
实际每组金字塔的层数不得大于nOctaveLayers，nOctaveLayers是在初始化SIFT时指定的；
实际的金字塔组数根据特征点中的组号确定（按需分配，节省空间和提高效率）。


    Mat base = createInitialImage(image, firstOctave < 0, (float)sigma);
    vector<Mat> gpyr, dogpyr;
    int nOctaves = actualNOctaves > 0 ? actualNOctaves : cvRound(log( (double)std::min( base.cols, base.rows ) ) / log(2.) - 2) - firstOctave;
对原始图像进行 方差=sigma^2 的高斯滤波，得到基础图像base：金字塔最底层的图像。
计算金字塔的组数 nOctaves： 使得最高的一组金字塔的宽（或高）只有8个像素（or 4个像素？）。

    //double t, tf = getTickFrequency();
    //t = (double)getTickCount();
    buildGaussianPyramid(base, gpyr, nOctaves);
    buildDoGPyramid(gpyr, dogpyr);
创建差分高斯金字塔DOG

    //t = (double)getTickCount() - t;
    //printf("pyramid construction time: %g\n", t*1000./tf);

    if( !useProvidedKeypoints )
    {
        //t = (double)getTickCount();
        findScaleSpaceExtrema(gpyr, dogpyr, keypoints);
        KeyPointsFilter::removeDuplicated( keypoints );

        if( nfeatures > 0 )
            KeyPointsFilter::retainBest(keypoints, nfeatures);
        //t = (double)getTickCount() - t;
        //printf("keypoint detection time: %g\n", t*1000./tf);

        if( firstOctave < 0 )
            for( size_t i = 0; i < keypoints.size(); i++ )
            {
                KeyPoint& kpt = keypoints[i];
                float scale = 1.f/(float)(1 << -firstOctave);
                kpt.octave = (kpt.octave & ~255) | ((kpt.octave + firstOctave) & 255);
                kpt.pt *= scale;
                kpt.size *= scale;
            }

        if( !mask.empty() )
            KeyPointsFilter::runByPixelsMask( keypoints, mask );
    }
    else
    {
        // filter keypoints by mask
        //KeyPointsFilter::runByPixelsMask( keypoints, mask );
    }
如果特征点不是直接输入的（false=useProvidedKeypoints），则需要先调用findScaleSpaceExtrema()找到特征点，即尺度空间内的极值点（正的极大值和负的极小值），并调用removeDuplicated()去掉一些重复的特征点（坐标pt、邻域大小size、方向angle都一致的特征点被认为是重复）。如果特征点个数超出上限，还要用retainBest() 来挑出最好的nfeatures个特征点(响应response越高的特征点被认为越"好")。
另外，由于findScaleSpaceExtrema()找特征点时默认最底层的金字塔是第0组，因此如果实际的最底层是第 -1 组的话，需要将求出的关键点的组号减一，并将关键点的坐标pt、邻域尺寸size等乘以scale(=0.5，缩小一倍)。关键点的坐标和邻域尺寸都是以第0组的图像为基准的。


    if( _descriptors.needed() )
    {
        //t = (double)getTickCount();
        int dsize = descriptorSize();

descriptorSize()被定义为  SIFT_DESCR_WIDTH*SIFT_DESCR_WIDTH*SIFT_DESCR_HIST_BINS , 即 4*4*8，共4*4个种子点，每个种子点需要8个bin(梯度直方图)。

        _descriptors.create((int)keypoints.size(), dsize, CV_32F);
        Mat descriptors = _descriptors.getMat();

        calcDescriptors(gpyr, keypoints, descriptors, nOctaveLayers, firstOctave);
        //t = (double)getTickCount() - t;
        //printf("descriptor extraction time: %g\n", t*1000./tf);
    }
}

调用calcDescriptors()为每个特征点计算特征向量，特征向量是128维的。

子流程 createInitialImage() ：创建初始图像（第0层图像）

static Mat createInitialImage( const Mat& img, bool doubleImageSize, float sigma )
{
    Mat gray, gray_fpt;
    if( img.channels() == 3 || img.channels() == 4 )
        cvtColor(img, gray, COLOR_BGR2GRAY);
    else
        img.copyTo(gray);
    gray.convertTo(gray_fpt, DataType<sift_wt>::type, SIFT_FIXPT_SCALE, 0);

    float sig_diff;

    if( doubleImageSize )
    {
        sig_diff = sqrtf( std::max(sigma * sigma - SIFT_INIT_SIGMA * SIFT_INIT_SIGMA * 4, 0.01f) );
        Mat dbl;
        resize(gray_fpt, dbl, Size(gray.cols*2, gray.rows*2), 0, 0, INTER_LINEAR);
        GaussianBlur(dbl, dbl, Size(), sig_diff, sig_diff);
        return dbl;
    }
    else
    {
        sig_diff = sqrtf( std::max(sigma * sigma - SIFT_INIT_SIGMA * SIFT_INIT_SIGMA, 0.01f) );
        GaussianBlur(gray_fpt, gray_fpt, Size(), sig_diff, sig_diff);
        return gray_fpt;
    }
}

如果需要将图像放大一倍(doubleImageSize为true)，则现将源图像用双线性插值放大一倍，再进行方差 = ( sigma^2 - 4*0.5^2 ) 的高斯滤波，生成基础图像；
如果不需要放大，则直接将源图像进行方差 = ( sigma^2 - 0.5^2 ) 的高斯滤波，生成基础图像；（0.5^2不乘4）
0.5是哪里来的？输入的源图像被认为是由某个假想的原始图像经过方差 = 0.5^2 的高斯滤波后得来的。如果需要进行放大，那放大后的源图像就是由某个原始图像放大后经过方差=(2*0.5)^2=4*0.5^2 高斯滤波得来。所有的 sigma 都是以假想的原始图像为基准的，所以做高斯滤波时，方差是 sigma^2 减去 4*0.5^2 或 0.5^2，而不是直接用sigma^2 。（两个高斯核的叠加作用仍等效于一个新的高斯核，新高斯核的方差等于前两个核的方差之和）

子流程 findScaleSpaceExtrema() ：寻找尺度空间极值点（即特征点）

void SIFT::findScaleSpaceExtrema( const vector<Mat>& gauss_pyr, const vector<Mat>& dog_pyr,
                                  vector<KeyPoint>& keypoints ) const
{
    int nOctaves = (int)gauss_pyr.size()/(nOctaveLayers + 3);
    int threshold = cvFloor(0.5 * contrastThreshold / nOctaveLayers * 255 * SIFT_FIXPT_SCALE);
    const int n = SIFT_ORI_HIST_BINS;
    float hist[n];
    KeyPoint kpt;

    keypoints.clear();

    for( int o = 0; o < nOctaves; o++ )
        for( int i = 1; i <= nOctaveLayers; i++ )
        {
            int idx = o*(nOctaveLayers+2)+i;
            const Mat& img = dog_pyr[idx];
            const Mat& prev = dog_pyr[idx-1];
            const Mat& next = dog_pyr[idx+1];
            int step = (int)img.step1();
            int rows = img.rows, cols = img.cols;

            for( int r = SIFT_IMG_BORDER; r < rows-SIFT_IMG_BORDER; r++)
            {
                const sift_wt* currptr = img.ptr<sift_wt>(r);
                const sift_wt* prevptr = prev.ptr<sift_wt>(r);
                const sift_wt* nextptr = next.ptr<sift_wt>(r);

                for( int c = SIFT_IMG_BORDER; c < cols-SIFT_IMG_BORDER; c++)
                {
                    sift_wt val = currptr[c];

                    // find local extrema with pixel accuracy
                    if( std::abs(val) > threshold &&
                       ((val > 0 && val >= currptr[c-1] && val >= currptr[c+1] &&
                         val >= currptr[c-step-1] && val >= currptr[c-step] && val >= currptr[c-step+1] &&
                         val >= currptr[c+step-1] && val >= currptr[c+step] && val >= currptr[c+step+1] &&
                         val >= nextptr[c] && val >= nextptr[c-1] && val >= nextptr[c+1] &&
                         val >= nextptr[c-step-1] && val >= nextptr[c-step] && val >= nextptr[c-step+1] &&
                         val >= nextptr[c+step-1] && val >= nextptr[c+step] && val >= nextptr[c+step+1] &&
                         val >= prevptr[c] && val >= prevptr[c-1] && val >= prevptr[c+1] &&
                         val >= prevptr[c-step-1] && val >= prevptr[c-step] && val >= prevptr[c-step+1] &&
                         val >= prevptr[c+step-1] && val >= prevptr[c+step] && val >= prevptr[c+step+1]) ||
                        (val < 0 && val <= currptr[c-1] && val <= currptr[c+1] &&
                         val <= currptr[c-step-1] && val <= currptr[c-step] && val <= currptr[c-step+1] &&
                         val <= currptr[c+step-1] && val <= currptr[c+step] && val <= currptr[c+step+1] &&
                         val <= nextptr[c] && val <= nextptr[c-1] && val <= nextptr[c+1] &&
                         val <= nextptr[c-step-1] && val <= nextptr[c-step] && val <= nextptr[c-step+1] &&
                         val <= nextptr[c+step-1] && val <= nextptr[c+step] && val <= nextptr[c+step+1] &&
                         val <= prevptr[c] && val <= prevptr[c-1] && val <= prevptr[c+1] &&
                         val <= prevptr[c-step-1] && val <= prevptr[c-step] && val <= prevptr[c-step+1] &&
                         val <= prevptr[c+step-1] && val <= prevptr[c+step] && val <= prevptr[c+step+1])))

这个这么长的if语句寻找的是正的极大值和负的极小值

                    {
                        int r1 = r, c1 = c, layer = i;
                        if( !adjustLocalExtrema(dog_pyr, kpt, o, layer, r1, c1,
                                                nOctaveLayers, (float)contrastThreshold,
                                                (float)edgeThreshold, (float)sigma) )
                            continue;

adjustLocalExtrema() 用来精确求解角点位置，并过滤掉边缘点。后面会解释这个函数。



                        float scl_octv = kpt.size*0.5f/(1 << o);
                        float omax = calcOrientationHist(gauss_pyr[o*(nOctaveLayers+3) + layer],
                                                         Point(c1, r1),
                                                         cvRound(SIFT_ORI_RADIUS * scl_octv),
                                                         SIFT_ORI_SIG_FCTR * scl_octv,
                                                         hist, n);

求出关键点邻域内的梯度方向直方图并求出最大的bin包含的元素数量 omax。此直方图在与关键点尺度相对应的金字塔层中求取。
scl_octv是关键点的组内尺度坐标，SIFT_ORI_RADIUS被定义为3，因此邻域的半径为 3*关键点组内尺度；
求梯度直方图时对邻域内的点会依其距关键点的距离进行高斯加权，加权的Sigma=1.5*关键点组内尺度。（SIFT_ORI_SIG_FCTR被定义为1.5）。
到现在为止只是求出了直方图，但还并未提取出关键点的主方向和辅方向。提取关键点方向由下面的几行实现。

下面的SIFT_ORI_PEAK_RATIO被定义为0.8。元素数量多于 0.8 倍omax 的bin都被认为是关键点的主方向或辅方向。每个主/辅方向都用一个单独的特征点表示。

                        float mag_thr = (float)(omax * SIFT_ORI_PEAK_RATIO);
                        for( int j = 0; j < n; j++ )
                        {
                            int l = j > 0 ? j - 1 : n - 1;
                            int r2 = j < n-1 ? j + 1 : 0;

                            if( hist[j] > hist[l]  &&  hist[j] > hist[r2]  &&  hist[j] >= mag_thr )
                            {
                                float bin = j + 0.5f * (hist[l]-hist[r2]) / (hist[l] - 2*hist[j] + hist[r2]);
j是整数，但直方图的极值点一般并不是准确地位于整数位置，因此这里进行了极值点拟合。对直方图一维二阶泰勒展开，再取导数为0，即可拟合出精度更高的极值点，极值点的位置等于 (负的一阶导数/二阶导数)。

                                bin = bin < 0 ? n + bin : bin >= n ? bin - n : bin;
                                kpt.angle = 360.f - (float)((360.f/n) * bin);
                                if(std::abs(kpt.angle - 360.f) < FLT_EPSILON)
                                    kpt.angle = 0.f;
                                keypoints.push_back(kpt);
                            }
                        }

                    }
                }
            }
        }
}

上面出现了一个新函数：adjustLocalExtrema() 。它可以用来精确求解角点位置（二阶泰勒级数拟合），并过滤掉边缘点。下面阐述该函数的原理。

在尺度空间（三维）的 (r1,c1,layer) 点处，进行三维二阶泰勒展开，可以求取极值的精确位置。
三维二阶泰勒展开的原理如下：
△f = f ( x+△x, y+△y, z+△z ) - f ( x, y, z )
= f ( x+△x, y+△y, z+△z ) - f ( x+△x, y+△y, z ) +
f ( x+△x, y+△y, z ) - f ( x+△x, y, z ) +
f ( x+△x, y, z ) - f ( x, y, z )

// 进行泰勒展开近似
≈ Fz (x+△x, y+△y, z) * △z + 0.5 * Fzz(x, y, z) * △z^2 +
Fy (x+△x, y, z) * △y + 0.5 * Fyy(x, y, z) * △y^2 +
Fx (x, y, z) * △x + 0.5 * Fxx(x, y, z) * △x^2

≈ [ Fz (x, y, z) + Fzx * △x + Fyz * △y ] * △z + 0.5 * Fzz(x, y, z) * △z^2 +
[ Fy (x, y, z) + Fxy * △x ] * △y + 0.5 * Fyy(x, y, z) * △y^2 +
Fx (x, y, z) * △x + 0.5 * Fxx(x, y, z) * △x^2

上面推导的基本思路是， f ( x, y, z ) 到 f ( x+△x, y+△y, z+△z ) 之间的增量，可以分解为：

" f ( x, y, z ) 到 f ( x+△x, y, z ) 的增量 " +

" f ( x+△x, y, z ) 到 f ( x+△x, y+△y, z ) 的增量 " +

" f ( x+△x, y+△y, z ) 到 f ( x+△x, y+△y, z+△z ) 的增量 "。

而这三个分量分别都可以用单变量（一维）的二阶泰勒展开求解，由此组合起来就可以得到三维的二阶泰勒展开式。

整理一下就是：
△f ≈ G0' * △ + 0.5* △'* H0 * △
其中G0代表点(x,y,z)处的梯度向量，单引号上角标代表转置，△代表偏移( △x, △y, △z )，H0 代表点(x,y,z)处的Hessian矩阵。对两边求导并取导数为0的点，[△(△f)]/[△] = 0，得到的解就是极值点。极值点的方程为： -G0 = H0 * △，或 G0 = H0 * (-△)
这个公式以及上面的推理方法同样适用于更高维的泰勒展开。

另外，上式的第二项 △'* H0 * △，构成了一个二次型：
2*(△f - G0' * △) = △'* H0 * △
H 的特征向量反映了二阶分量的变化方向，特征值反映了对应的方向上二阶分量变化的快慢和正负。在边缘区域，垂直边缘的方向上，一、二阶分量变化都很剧烈，而在平行于边缘的方向上，一、二阶分量变化都很小。因此通过比较Hessian矩阵的两个特征值还可辨别出位于边缘的点：大特征值与小特征值之比远大于1的点是边缘点（这些边缘点要被干掉）。设两个特征值分别是α和β，则 Tr(H)^2/det(H) = (α+β)^2/αβ = (1+γ)^2/γ 的值就可以反映出大、小特征值之比。利用这个关系可以避免特征值的求取。
这跟 Harris 角点检测中去除边缘点的原理类似，只不过Harris 角点检测中用到的2X2特征矩阵是由一阶导数的二次式构成，而这里的 Hession 矩阵是由二阶导数构成。

adjustLocalExtrema() 的实现代码如下：

static bool adjustLocalExtrema( const vector<Mat>& dog_pyr, KeyPoint& kpt, int octv,
                                int& layer, int& r, int& c, int nOctaveLayers,
                                float contrastThreshold, float edgeThreshold, float sigma )
{
    const float img_scale = 1.f/(255*SIFT_FIXPT_SCALE);
    const float deriv_scale = img_scale*0.5f;
    const float second_deriv_scale = img_scale;
    const float cross_deriv_scale = img_scale*0.25f;

    float xi=0, xr=0, xc=0, contr=0;
    int i = 0;

    for( ; i < SIFT_MAX_INTERP_STEPS; i++ )
    {
        int idx = octv*(nOctaveLayers+2) + layer;
        const Mat& img = dog_pyr[idx];
        const Mat& prev = dog_pyr[idx-1];
        const Mat& next = dog_pyr[idx+1];

        Vec3f dD((img.at<sift_wt>(r, c+1) - img.at<sift_wt>(r, c-1))*deriv_scale,
                 (img.at<sift_wt>(r+1, c) - img.at<sift_wt>(r-1, c))*deriv_scale,
                 (next.at<sift_wt>(r, c) - prev.at<sift_wt>(r, c))*deriv_scale);

        float v2 = (float)img.at<sift_wt>(r, c)*2;
        float dxx = (img.at<sift_wt>(r, c+1) + img.at<sift_wt>(r, c-1) - v2)*second_deriv_scale;
        float dyy = (img.at<sift_wt>(r+1, c) + img.at<sift_wt>(r-1, c) - v2)*second_deriv_scale;
        float dss = (next.at<sift_wt>(r, c) + prev.at<sift_wt>(r, c) - v2)*second_deriv_scale;
        float dxy = (img.at<sift_wt>(r+1, c+1) - img.at<sift_wt>(r+1, c-1) -
                     img.at<sift_wt>(r-1, c+1) + img.at<sift_wt>(r-1, c-1))*cross_deriv_scale;
        float dxs = (next.at<sift_wt>(r, c+1) - next.at<sift_wt>(r, c-1) -
                     prev.at<sift_wt>(r, c+1) + prev.at<sift_wt>(r, c-1))*cross_deriv_scale;
        float dys = (next.at<sift_wt>(r+1, c) - next.at<sift_wt>(r-1, c) -
                     prev.at<sift_wt>(r+1, c) + prev.at<sift_wt>(r-1, c))*cross_deriv_scale;

        Matx33f H(dxx, dxy, dxs,
                  dxy, dyy, dys,
                  dxs, dys, dss);

        Vec3f X = H.solve(dD, DECOMP_LU);

H.solve(dD)用于求解线性方程组，该方程组是  dD = H * X 。
前面提到极值点的方程是 ： G0 = H0 * (-△) ，这里G0就是dD,H0就是H，(-△)就是X。因此为求得△，只需对方程解X求反。如下面的xi、xr和xc。

        xi = -X[2];
        xr = -X[1];
        xc = -X[0];

        if( std::abs(xi) < 0.5f && std::abs(xr) < 0.5f && std::abs(xc) < 0.5f )
            break;

        if( std::abs(xi) > (float)(INT_MAX/3) ||
            std::abs(xr) > (float)(INT_MAX/3) ||
            std::abs(xc) > (float)(INT_MAX/3) )
            return false;

        c += cvRound(xc);
        r += cvRound(xr);
        layer += cvRound(xi);


如果拟合的极值点与当前的整数点偏差大于0.5，则说明真正的极值点偏离当前整数点很多，需要换一个更好的整数点，然后重新拟合。知道偏差小于0.5或迭代超过一定次数。



        if( layer < 1 || layer > nOctaveLayers ||
            c < SIFT_IMG_BORDER || c >= img.cols - SIFT_IMG_BORDER  ||
            r < SIFT_IMG_BORDER || r >= img.rows - SIFT_IMG_BORDER )
            return false;
    }

    // ensure convergence of interpolation
    if( i >= SIFT_MAX_INTERP_STEPS )
        return false;

    {
        int idx = octv*(nOctaveLayers+2) + layer;
        const Mat& img = dog_pyr[idx];
        const Mat& prev = dog_pyr[idx-1];
        const Mat& next = dog_pyr[idx+1];
        Matx31f dD((img.at<sift_wt>(r, c+1) - img.at<sift_wt>(r, c-1))*deriv_scale,
                   (img.at<sift_wt>(r+1, c) - img.at<sift_wt>(r-1, c))*deriv_scale,
                   (next.at<sift_wt>(r, c) - prev.at<sift_wt>(r, c))*deriv_scale);
        float t = dD.dot(Matx31f(xc, xr, xi));

        contr = img.at<sift_wt>(r, c)*img_scale + t * 0.5f;
        if( std::abs( contr ) * nOctaveLayers < contrastThreshold )
            return false;

        // principal curvatures are computed using the trace and det of Hessian
        float v2 = img.at<sift_wt>(r, c)*2.f;
        float dxx = (img.at<sift_wt>(r, c+1) + img.at<sift_wt>(r, c-1) - v2)*second_deriv_scale;
        float dyy = (img.at<sift_wt>(r+1, c) + img.at<sift_wt>(r-1, c) - v2)*second_deriv_scale;
        float dxy = (img.at<sift_wt>(r+1, c+1) - img.at<sift_wt>(r+1, c-1) -
                     img.at<sift_wt>(r-1, c+1) + img.at<sift_wt>(r-1, c-1)) * cross_deriv_scale;
        float tr = dxx + dyy;
        float det = dxx * dyy - dxy * dxy;

求出了Hessian矩阵的轨迹和行列式，以便做边缘判断。

        if( det <= 0 || tr*tr*edgeThreshold >= (edgeThreshold + 1)*(edgeThreshold + 1)*det )
            return false;
    }


    kpt.pt.x = (c + xc) * (1 << octv);
    kpt.pt.y = (r + xr) * (1 << octv);
    kpt.octave = octv + (layer << 8) + (cvRound((xi + 0.5)*255) << 16);
    kpt.size = sigma*powf(2.f, (layer + xi) / nOctaveLayers)*(1 << octv)*2;
    kpt.response = std::abs(contr);

    return true;
}

注意，关键点kpt的坐标pt和邻域size都是以第0组图像为基准的。size代表邻域的直径，它的大小为 2*Sigma, Sigma是关键点所在的尺度。邻域的半径正好是 Sigma 。

子流程 calcDescriptors() ：特征点描述符的计算

static void calcDescriptors(const vector<Mat>& gpyr, const vector<KeyPoint>& keypoints,
                            Mat& descriptors, int nOctaveLayers, int firstOctave )
{
    int d = SIFT_DESCR_WIDTH, n = SIFT_DESCR_HIST_BINS;

SIFT_DESCR_WIDTH 是4，SIFT_DESCR_HIST_BINS是8。


    for( size_t i = 0; i < keypoints.size(); i++ )
    {
        KeyPoint kpt = keypoints[i];
        int octave, layer;
        float scale;
        unpackOctave(kpt, octave, layer, scale);

        CV_Assert(octave >= firstOctave && layer <= nOctaveLayers+2);
        float size=kpt.size*scale;
        Point2f ptf(kpt.pt.x*scale, kpt.pt.y*scale);
        const Mat& img = gpyr[(octave - firstOctave)*(nOctaveLayers + 3) + layer];

解析出关键点金字塔组号、层号和比例。再根据组号和层号找到对应层的金字塔图像，根据scale计算出在该层金字塔图像上关键点的位置和邻域大小。（关键点kpt内部存储的坐标pt和size都是以第0组第0层图像为基准的，因此要想在其他层中使用，需要先乘以scale变换一下）

        float angle = 360.f - kpt.angle;
        if(std::abs(angle - 360.f) < FLT_EPSILON)
            angle = 0.f;
        calcSIFTDescriptor(img, ptf, angle, size*0.5f, d, n, descriptors.ptr<float>((int)i));
计算关键点的描述符（一个128维的向量）

    }
}






static void calcSIFTDescriptor( const Mat& img, Point2f ptf, float ori, float scl,
                               int d, int n, float* dst )
{
    Point pt(cvRound(ptf.x), cvRound(ptf.y));
    float cos_t = cosf(ori*(float)(CV_PI/180));
    float sin_t = sinf(ori*(float)(CV_PI/180));
    float bins_per_rad = n / 360.f;
    float exp_scale = -1.f/(d * d * 0.5f);

exp_scale = -1.f / [2*(d/2)^2], 所以高斯加权的Sigma=d/2 ;
d=4,n=8。bins_per_rad表示每1°包含多少个bin。

    float hist_width = SIFT_DESCR_SCL_FCTR * scl;
    int radius = cvRound(hist_width * 1.4142135623730951f * (d + 1) * 0.5f);

scl等于 size*0.5,正好对应关键点的尺度。
SIFT_DESCR_SCL_FCTR = 3 ，hist_width是每个种子点（一个矩形区域）的宽度，共4*4个种子点。
radius 理论上应为 hist_width*d/2，但考虑到双线性差值的需要，应将radius额外扩展一些，所以乘以 d+1；又考虑到旋转时正方形的对角线，所以再乘以 根号2。


    // Clip the radius to the diagonal of the image to avoid autobuffer too large exception
    radius = std::min(radius, (int) sqrt((double) img.cols*img.cols + img.rows*img.rows));
    cos_t /= hist_width;
    sin_t /= hist_width;
这时 cos_t 和 sin_t 已经不单单是正余弦值了，他们除了一个系数。

    int i, j, k, len = (radius*2+1)*(radius*2+1), histlen = (d+2)*(d+2)*(n+2);
    int rows = img.rows, cols = img.cols;

    AutoBuffer<float> buf(len*6 + histlen);
    float *X = buf, *Y = X + len, *Mag = Y, *Ori = Mag + len, *W = Ori + len;
    float *RBin = W + len, *CBin = RBin + len, *hist = CBin + len;

    for( i = 0; i < d+2; i++ )
    {
        for( j = 0; j < d+2; j++ )
            for( k = 0; k < n+2; k++ )
                hist[(i*(d+2) + j)*(n+2) + k] = 0.;
    }

    for( i = -radius, k = 0; i <= radius; i++ )
        for( j = -radius; j <= radius; j++ )
        {
            // Calculate sample's histogram array coords rotated relative to ori.
            // Subtract 0.5 so samples that fall e.g. in the center of row 1 (i.e.
            // r_rot = 1.5) have full weight placed in row 1 after interpolation.
            float c_rot = j * cos_t - i * sin_t;
            float r_rot = j * sin_t + i * cos_t;

由于cos_t 和 sin_t除过了hist_width，所以这时旋转后的坐标的 c_rot 和 r_rot 的单位直接就是种子点了。下面的 rbin 和 cbin 计算的就是种子点的编号。种子点的编号从(-1,-1)到(d,d)。减去0.5是为了方便差值时计算权重。

            float rbin = r_rot + d/2 - 0.5f;
            float cbin = c_rot + d/2 - 0.5f;
            int r = pt.y + i, c = pt.x + j;

            if( rbin > -1 && rbin < d && cbin > -1 && cbin < d &&
                r > 0 && r < rows - 1 && c > 0 && c < cols - 1 )
超出窗边界或图像边界的点不予考虑。注意这里 rbin 和 cbin 的下线都是 -1，也就是说它们可能为负
            {
                float dx = (float)(img.at<sift_wt>(r, c+1) - img.at<sift_wt>(r, c-1));
                float dy = (float)(img.at<sift_wt>(r-1, c) - img.at<sift_wt>(r+1, c));
                X[k] = dx; Y[k] = dy; RBin[k] = rbin; CBin[k] = cbin;
                W[k] = (c_rot * c_rot + r_rot * r_rot)*exp_scale;
                k++;
            }
        }

    len = k;
    fastAtan2(Y, X, Ori, len, true);
    magnitude(X, Y, Mag, len);
    exp(W, W, len);
Ori 存放角度数组，Mag存放梯度的能量数组，W存放高斯权重

    for( k = 0; k < len; k++ )
    {
        float rbin = RBin[k], cbin = CBin[k];
        float obin = (Ori[k] - ori)*bins_per_rad;
        float mag = Mag[k]*W[k];
mag等于高斯权重乘以梯度能量

        int r0 = cvFloor( rbin );
        int c0 = cvFloor( cbin );
        int o0 = cvFloor( obin );
由于 rbin 可能为负，所以 r0 可能为 -1，这样r0的取值就是 [-1,d-1]，而后面双线性插值时还会扩展到d,因此r0的取值是[-1,d]，共d+2个取值，c0也一样，所以hist数组的r维和c维的size都是 d+2。


        rbin -= r0;
        cbin -= c0;
        obin -= o0;

只保留rbin、cbin和obin的小数部分。每个样点对与之相邻的各个bin的贡献比重与该样点到每个bin的距离成反比。


        if( o0 < 0 )
            o0 += n;
        if( o0 >= n )
            o0 -= n;
角度的bin是循环的，所以处理一下 o0。这样o0的范围就是 [0,n-1] ，但后面双线性差值时可能还会扩展到n，因此o0的范围就是 [0,n],共n+1个值，所以 hist 数组o维的长度应为 n+1 。( ?? 这里的程序中o维的长度定义为了 n+2，是我分析错了还是opencv确实保留得多了? )。

        // histogram update using tri-linear interpolation
        float v_r1 = mag*rbin, v_r0 = mag - v_r1;	
        float v_rc11 = v_r1*cbin, v_rc10 = v_r1 - v_rc11;
        float v_rc01 = v_r0*cbin, v_rc00 = v_r0 - v_rc01;
        float v_rco111 = v_rc11*obin, v_rco110 = v_rc11 - v_rco111;
        float v_rco101 = v_rc10*obin, v_rco100 = v_rc10 - v_rco101;
        float v_rco011 = v_rc01*obin, v_rco010 = v_rc01 - v_rco011;
        float v_rco001 = v_rc00*obin, v_rco000 = v_rc00 - v_rco001;

对于一个三维的直方图， 样点对与之相邻的位于(r,c,o)处的bin的贡献是：
mag * (dis(r,rbin)) * (dis(c,cbin)) * (dis(o,obin))
其中 dis(r,rbin) 表示r到rbin的距离（差）。注意r,c,o需要满足：dis(r,rbin)<1 && dis(c,cbin)<1 && dis(o,obin)<1 ,才算是与样点相邻。


        int idx = ((r0+1)*(d+2) + c0+1)*(n+2) + o0;
        hist[idx] += v_rco000;
        hist[idx+1] += v_rco001;
        hist[idx+(n+2)] += v_rco010;
        hist[idx+(n+3)] += v_rco011;
        hist[idx+(d+2)*(n+2)] += v_rco100;
        hist[idx+(d+2)*(n+2)+1] += v_rco101;
        hist[idx+(d+3)*(n+2)] += v_rco110;
        hist[idx+(d+3)*(n+2)+1] += v_rco111;
    }

    // finalize histogram, since the orientation histograms are circular
    for( i = 0; i < d; i++ )
        for( j = 0; j < d; j++ )
        {
            int idx = ((i+1)*(d+2) + (j+1))*(n+2);
            hist[idx] += hist[idx+n];
            hist[idx+1] += hist[idx+n+1];
            for( k = 0; k < n; k++ )
                dst[(i*d + j)*n + k] = hist[idx+k];
        }
将 hist 拷贝到 dst
    // copy histogram to the descriptor,
    // apply hysteresis thresholding
    // and scale the result, so that it can be easily converted
    // to byte array
    float nrm2 = 0;
    len = d*d*n;
    for( k = 0; k < len; k++ )
        nrm2 += dst[k]*dst[k];
    float thr = std::sqrt(nrm2)*SIFT_DESCR_MAG_THR;
    for( i = 0, nrm2 = 0; i < k; i++ )
    {
        float val = std::min(dst[i], thr);
        dst[i] = val;
        nrm2 += val*val;
    }
    nrm2 = SIFT_INT_DESCR_FCTR/std::max(std::sqrt(nrm2), FLT_EPSILON);
抑制暴走的特征：每个特征不能大于特征向量总模长的 0.2 倍。

#if 1
    for( k = 0; k < len; k++ )
    {
        dst[k] = saturate_cast<uchar>(dst[k]*nrm2);
    }
归一化并将每个特征都转化为8位深度。
#else
    float nrm1 = 0;
    for( k = 0; k < len; k++ )
    {
        dst[k] *= nrm2;
        nrm1 += dst[k];
    }
    nrm1 = 1.f/std::max(nrm1, FLT_EPSILON);
    for( k = 0; k < len; k++ )
    {
        dst[k] = std::sqrt(dst[k] * nrm1);//saturate_cast<uchar>(std::sqrt(dst[k] * nrm1)*SIFT_INT_DESCR_FCTR);
    }
#endif
}

SIFT特征点算法源码分析（opencv）

SIFT类的构造函数：初始化算法参数

SIFT的“()”运算符：SIFT算法的主要流程

子流程 createInitialImage() ：创建初始图像（第0层图像）

子流程 findScaleSpaceExtrema() ：寻找尺度空间极值点（即特征点）

子流程 calcDescriptors() ：特征点描述符的计算

猜你喜欢

SIFT特征点算法源码分析（opencv）

SIFT类的构造函数：初始化算法参数

SIFT的“()”运算符：SIFT算法的主要流程

子流程 createInitialImage() ： 创建初始图像（第0层图像）

子流程 findScaleSpaceExtrema() ：寻找尺度空间极值点（即特征点）

子流程 calcDescriptors() ： 特征点描述符的计算

猜你喜欢

子流程 createInitialImage() ：创建初始图像（第0层图像）

子流程 calcDescriptors() ：特征点描述符的计算