Opencv 机器学习 快速入手小程序

OpenCV机器学习库中主要实现算法如下:
1)一般贝叶斯分类器(Normal Bayes Classifier):CvNormalBayesClassifier
2)K近邻分类(K-nearest Neighbor Classifier): CvKNearest
3)支持向量机(Support Vector Machine):CvSVM
4)期望最大化(Expection Maximization): EM
5)决策树(Decision Tree):CvDTree
6)随机森林(Random Treess Classifier):CvForestTree
7)超随机树分类器(Extremely randomized trees Classifier): CvERTrees
8)Boost分类器(Boosted tree Classifier): CvBoost
9)梯度下降Boost分类器(Gradient Boosted Trees):CvGBTrees
10)神经网络(Artificial Neural Networks): CvANN_MLP

#include "opencv2/core/core.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/ml/ml.hpp"
#include "opencv2/opencv.hpp"
#include <iostream>
#include <stdlib.h>
#include <time.h>
using namespace cv;
using namespace std;

/************************************************************************/
/* K-Nearest Neighbour Classifier(K-邻近算法);                       */
/************************************************************************/
void KNN()
{
    float labels[10] = {0,0,0,0,0,1,1,1,1,1};
    Mat labelsMat(10, 1, CV_32FC1, labels);
    cout<<labelsMat<<endl;
    float trainingData[10][2];
    srand(time(0)); 
    for(int i=0;i<5;i++)
    {
        trainingData[i][0] = rand()%100+1;
        trainingData[i][1] = rand()%100+1;
        trainingData[i+5][0] = rand()%100+100;
        trainingData[i+5][1] = rand()%100+100;
    }
    Mat trainingDataMat(10, 2, CV_32FC1, trainingData);
    cout<<trainingDataMat<<endl;
    KNearest knn;
    knn.train(trainingDataMat,labelsMat,Mat(), false, 2 );

    //train( const cv::Mat& trainData, const cv::Mat& responses,
    //  const cv::Mat& sampleIdx=cv::Mat(), bool isRegression=false,
    //  int maxK=32, bool updateBase=false );
    // Data for visual representation
    int width =200, height =200;
    Mat image = Mat::zeros(height, width, CV_8UC3);
    Vec3b green(0,255,0), blue (255,0,0);

    for (int i = 0; i < image.rows; ++i){
        for (int j = 0; j < image.cols; ++j){
            const Mat sampleMat = (Mat_<float>(1,2) << i,j);
            Mat response(1, 2, CV_32FC1);
            float result = knn.find_nearest(sampleMat,2,&response);

            if (result !=0){
                image.at<Vec3b>(j, i)  = green;
            }
            else  
                image.at<Vec3b>(j, i)  = blue;
        }
    }

    // Show the training data
    for(int i=0;i<5;i++){
        circle( image, Point(trainingData[i][0],  trainingData[i][1]), 
            5, Scalar(  0,   0,   0), -1, 8);
        circle( image, Point(trainingData[i+5][0],  trainingData[i+5][1]), 
            5, Scalar(255, 255, 255), -1, 8);
    }
    imshow("KNN Simple Example", image); // show it to the user
    waitKey();

}

void  Kmeans()
{
    const int MAX_CLUSTERS = 5;
    Scalar colorTab[] =     //因为最多只有5类,所以最多也就给5个颜色
    {
        Scalar(0, 0, 255),
        Scalar(0,255,0),
        Scalar(255,100,100),
        Scalar(255,0,255),
        Scalar(0,255,255)
    };

    Mat img(500, 500, CV_8UC3);
    RNG rng(12345); //随机数产生器

    for(;;)
    {
        int k, clusterCount = rng.uniform(2, MAX_CLUSTERS+1);
        int i, sampleCount = rng.uniform(1, 1001);
        Mat points(sampleCount, 1, CV_32FC2), labels;   //产生的样本数,实际上为2通道的列向量,元素类型为Point2f

        clusterCount = MIN(clusterCount, sampleCount);
        Mat centers(clusterCount, 1, points.type());    //用来存储聚类后的中心点

        /* generate random sample from multigaussian distribution */
        for( k = 0; k < clusterCount; k++ ) //产生随机数
        {
            Point center;
            center.x = rng.uniform(0, img.cols);
            center.y = rng.uniform(0, img.rows);
            Mat pointChunk = points.rowRange(k*sampleCount/clusterCount,
                k == clusterCount - 1 ? sampleCount :
                (k+1)*sampleCount/clusterCount);   //最后一个类的样本数不一定是平分的,
            //剩下的一份都给最后一类
            //每一类都是同样的方差,只是均值不同而已
            rng.fill(pointChunk, CV_RAND_NORMAL, Scalar(center.x, center.y), Scalar(img.cols*0.05, img.rows*0.05));
        }

        randShuffle(points, 1, &rng);   //因为要聚类,所以先随机打乱points里面的点,注意pointspointChunk是共用数据的。

        kmeans(points, clusterCount, labels,
            TermCriteria( CV_TERMCRIT_EPS+CV_TERMCRIT_ITER, 10, 1.0),
            3, KMEANS_PP_CENTERS, centers);  //聚类3次,取结果最好的那次,聚类的初始化采用PP特定的随机算法。

        img = Scalar::all(0);

        for( i = 0; i < sampleCount; i++ )
        {
            int clusterIdx = labels.at<int>(i);
            Point ipt = points.at<Point2f>(i);
            circle( img, ipt, 2, colorTab[clusterIdx], CV_FILLED, CV_AA );
        }

        imshow("clusters", img);

        char key = (char)waitKey();     //无限等待
        if( key == 27 || key == 'q' || key == 'Q' ) // 'ESC'
            break;
    }

}

#define NTRAINING_SAMPLES   100         // Number of training samples per class
#define FRAC_LINEAR_SEP     0.9f        // Fraction of samples which compose the linear separable part
/************************************************************************/
/* SVM,support vector machine(支持向量机);                             */
/************************************************************************/
void  SVM_()
{
    // Data for visual representation
    const int WIDTH = 512, HEIGHT = 512;
    Mat I = Mat::zeros(HEIGHT, WIDTH, CV_8UC3);

    //--------------------- 1. Set up training data randomly ---------------------------------------
    Mat trainData(2*NTRAINING_SAMPLES, 2, CV_32FC1);
    Mat labels   (2*NTRAINING_SAMPLES, 1, CV_32FC1);

    RNG rng(100); // Random value generation class

    // Set up the linearly separable part of the training data
    int nLinearSamples = (int) (FRAC_LINEAR_SEP * NTRAINING_SAMPLES);

    // Generate random points for the class 1
    Mat trainClass = trainData.rowRange(0, nLinearSamples);
    // The x coordinate of the points is in [0, 0.4)
    Mat c = trainClass.colRange(0, 1);
    rng.fill(c, RNG::UNIFORM, Scalar(1), Scalar(0.4 * WIDTH));
    // The y coordinate of the points is in [0, 1)
    c = trainClass.colRange(1,2);
    rng.fill(c, RNG::UNIFORM, Scalar(1), Scalar(HEIGHT));

    // Generate random points for the class 2
    trainClass = trainData.rowRange(2*NTRAINING_SAMPLES-nLinearSamples, 2*NTRAINING_SAMPLES);
    // The x coordinate of the points is in [0.6, 1]
    c = trainClass.colRange(0 , 1); 
    rng.fill(c, RNG::UNIFORM, Scalar(0.6*WIDTH), Scalar(WIDTH));
    // The y coordinate of the points is in [0, 1)
    c = trainClass.colRange(1,2);
    rng.fill(c, RNG::UNIFORM, Scalar(1), Scalar(HEIGHT));

    //------------------ Set up the non-linearly separable part of the training data ---------------

    // Generate random points for the classes 1 and 2
    trainClass = trainData.rowRange(  nLinearSamples, 2*NTRAINING_SAMPLES-nLinearSamples);
    // The x coordinate of the points is in [0.4, 0.6)
    c = trainClass.colRange(0,1);
    rng.fill(c, RNG::UNIFORM, Scalar(0.4*WIDTH), Scalar(0.6*WIDTH)); 
    // The y coordinate of the points is in [0, 1)
    c = trainClass.colRange(1,2);
    rng.fill(c, RNG::UNIFORM, Scalar(1), Scalar(HEIGHT));

    //------------------------- Set up the labels for the classes ---------------------------------
    labels.rowRange(                0,   NTRAINING_SAMPLES).setTo(1);  // Class 1
    labels.rowRange(NTRAINING_SAMPLES, 2*NTRAINING_SAMPLES).setTo(2);  // Class 2

    //------------------------ 2. Set up the support vector machines parameters --------------------
    CvSVMParams params;
    params.svm_type    = SVM::C_SVC;
    params.C           = 0.1;
    params.kernel_type = SVM::LINEAR;
    params.term_crit   = TermCriteria(CV_TERMCRIT_ITER, (int)1e7, 1e-6);

    //------------------------ 3. Train the svm ----------------------------------------------------
    cout << "Starting training process" << endl;
    CvSVM svm;
    /*
        svm.train(trainData, labels, Mat(), Mat(), params);
        svm.save("supportVectorMachine.txt"); 
    */

    svm.load("supportVectorMachine.txt");
    cout << "Finished training process" << endl;

    //------------------------ 4. Show the decision regions ----------------------------------------
    Vec3b green(0,100,0), blue (100,0,0);
    for (int i = 0; i < I.rows; ++i)
        for (int j = 0; j < I.cols; ++j)
        {
            Mat sampleMat = (Mat_<float>(1,2) << i, j);
            float response = svm.predict(sampleMat);

            if      (response == 1)    I.at<Vec3b>(j, i)  = green;
            else if (response == 2)    I.at<Vec3b>(j, i)  = blue;
        }

        //----------------------- 5. Show the training data --------------------------------------------
        int thick = -1;
        int lineType = 8;
        float px, py;
        // Class 1
        for (int i = 0; i < NTRAINING_SAMPLES; ++i)
        {
            px = trainData.at<float>(i,0);
            py = trainData.at<float>(i,1);
            circle(I, Point( (int) px,  (int) py ), 3, Scalar(0, 255, 0), thick, lineType);
        }
        // Class 2
        for (int i = NTRAINING_SAMPLES; i <2*NTRAINING_SAMPLES; ++i)
        {
            px = trainData.at<float>(i,0);
            py = trainData.at<float>(i,1);
            circle(I, Point( (int) px, (int) py ), 3, Scalar(255, 0, 0), thick, lineType);
        }

        //------------------------- 6. Show support vectors --------------------------------------------
        thick = 2;
        lineType  = 8;
        int x     = svm.get_support_vector_count();

        for (int i = 0; i < x; ++i)
        {
            const float* v = svm.get_support_vector(i);
            circle( I,  Point( (int) v[0], (int) v[1]), 6, Scalar(128, 128, 128), thick, lineType);
        }

        imwrite("result.png", I);                      // save the Image
        imshow("SVM for Non-Linear Training Data", I); // show it to the user
        waitKey(0);
}
/************************************************************************/
/*    bayesian,Normal Bayes Classifier(贝叶斯分类)                    */
/************************************************************************/
void NBC()  
{     
    float trainingData[8][3] = { {6, 180, 12}, {5.92, 190, 11}, {5.58, 170, 12}, {5.92, 165, 10},  
    {5, 100, 6}, {5.5, 150, 8},{5.42, 130, 7}, {5.75, 150, 9}};  
    Mat trainingDataMat(8, 3, CV_32FC1, trainingData);   

    float responses[8] = {'M', 'M', 'M', 'M', 'F', 'F', 'F', 'F'};  
    Mat responsesMat(8, 1, CV_32FC1, responses);  

    NormalBayesClassifier nbc;  
    //NormalBayesClassifier nbc2; 
    /*
    nbc.train(trainingDataMat, responsesMat);   
    nbc.save("normalBayes.txt"); 
    */
    nbc.load("normalBayes.txt");
    float myData[3] = {6, 130, 8};  
    Mat myDataMat(1, 3, CV_32FC1, myData);  
    float r = nbc.predict( myDataMat );  

    cout<<endl<<"result:  "<<(char)r<<endl;  
    system("pause"); 

} 
//Gradient Boosted Trees
/************************************************************************/
/*         Gradient Boosted Trees (梯度Boost树算法)                   */
/************************************************************************/
void GBT()
{   
    double trainingData[28][2]={{210.4, 3}, {240.0, 3}, {300.0, 4}, {153.4, 3}, {138.0, 3},
    {194.0,4}, {189.0, 3}, {126.8, 3}, {132.0, 2}, {260.9, 4},
    {176.7,3}, {160.4, 3}, {389.0, 3}, {145.8, 3}, {160.0, 3},
    {141.6,2}, {198.5, 4}, {142.7, 3}, {149.4, 3}, {200.0, 3},
    {447.8,5}, {230.0, 4}, {123.6, 3}, {303.1, 4}, {188.8, 2},
    {196.2,4}, {110.0, 3}, {252.6, 3} };
    Mat trainingDataMat(28, 2, CV_32FC1, trainingData); 

    float responses[28] = { 399900, 369000, 539900, 314900, 212000, 239999, 329999,
        259900, 299900, 499998, 252900, 242900, 573900, 464500,
        329900, 232000, 299900, 198999, 242500, 347000, 699900, 
        449900, 199900, 599000, 255000, 259900, 249900, 469000};
    Mat responsesMat(28, 1, CV_32FC1, responses);
    //设置参数
    CvGBTreesParams params;
    params.loss_function_type = CvGBTrees::ABSOLUTE_LOSS;
    params.weak_count = 10;
    params.shrinkage = 0.01f;
    params.subsample_portion = 0.8f;
    params.max_depth = 3;
    params.use_surrogates = false;

    CvGBTrees gbt;    //训练样本
    gbt.train(trainingDataMat, CV_ROW_SAMPLE, responsesMat, Mat(), Mat(), Mat(), Mat(),params);

    double sampleData[2]={185.4, 4};    //待预测样本
    Mat sampleMat(2, 1, CV_32FC1, sampleData);

    float r = gbt.predict(sampleMat);    //预测
    cout<<endl<<"result:  "<<r<<endl;
    system("pause");

}
/************************************************************************/
/*     Extremely randomized trees Classifier(绝对随机森林算法)        */
/************************************************************************/

void ET()
{   
    double trainingData[28][2]={{210.4, 3}, {240.0, 3}, {300.0, 4}, {153.4, 3}, {138.0, 3},
    {194.0,4}, {189.0, 3}, {126.8, 3}, {132.0, 2}, {260.9, 4},
    {176.7,3}, {160.4, 3}, {389.0, 3}, {145.8, 3}, {160.0, 3},
    {141.6,2}, {198.5, 4}, {142.7, 3}, {149.4, 3}, {200.0, 3},
    {447.8,5}, {230.0, 4}, {123.6, 3}, {303.1, 4}, {188.8, 2},
    {196.2,4}, {110.0, 3}, {252.6, 3} };
    CvMat trainingDataCvMat = cvMat( 28, 2, CV_32FC1, trainingData );

    float responses[28] = { 399900, 369000, 539900, 314900, 212000, 239999, 329999,
        259900, 299900, 499998, 252900, 242900, 573900, 464500,
        329900, 232000, 299900, 198999, 242500, 347000, 699900, 
        449900, 199900, 599000, 255000, 259900, 249900, 469000};
    CvMat responsesCvMat = cvMat( 28, 1, CV_32FC1, responses );

    CvRTParams params= CvRTParams(10, 2, 0, false,16, 0, true, 0, 100, 0, CV_TERMCRIT_ITER );

    CvRTrees rtrees;
    rtrees.train(&trainingDataCvMat, CV_ROW_SAMPLE, &responsesCvMat, 
        NULL, NULL, NULL, NULL,params);

    double sampleData[2]={201.5, 3};
    Mat sampleMat(2, 1, CV_32FC1, sampleData);
    float r = rtrees.predict(sampleMat);
    cout<<endl<<"result:  "<<r<<endl;
    system("pause");
}
/************************************************************************/
/*       Expectation - MaximizationEM算法)                          */
/************************************************************************/
void EM_()
{
    Mat src = imread("4.jpg");    //读取图像

    namedWindow( "my daughter", WINDOW_AUTOSIZE );
    imshow( "my daughter", src);    //显示原始图像
    waitKey(0);
    //data为样本数据,labels为样本数据的类别标签
    Mat data, labels;
    //由彩色图像转换为EM算法所需的样本数据
    for (int i = 0; i < src.rows; i++)
    {
        for (int j = 0; j < src.cols; j++)
        {
            Vec3b point = src.at<Vec3b>(i, j);    //提取出当前像素的彩色值
            //三个颜色强度值转换为一个样本数据
            Mat tmp = (Mat_<float>(1, 3) << point[0], point[1], point[2]);
            data.push_back(tmp);    //存储当前样本
        }
    }

    int clusters = 4;    //表示要分割的数量,即一共分4个类
    EM em = EM(clusters);    //实例化EM
    //训练样本,得到样本的类别标签labels
    em.train(data, noArray(), labels, noArray());
    //不同的类用不同的颜色代替
    Vec3b colorTab[] =
    {
        Vec3b(0, 0, 255),
        Vec3b(0, 255, 0),
        Vec3b(255, 100, 100),
        Vec3b(255, 0, 255),
        Vec3b(0, 255, 255)
    };

    int n = 0;    //样本数据的索引
    for (int i = 0; i < src.rows; i++)
    {
        for (int j = 0; j < src.cols; j++)
        {
            int clusterIdx = labels.at<int>(n);    //得到当前像素的类别标签
            src.at<Vec3b>(i, j) = colorTab[clusterIdx];    //赋上相应的颜色值
            n++;
        }
    }

    namedWindow( "EM", WINDOW_AUTOSIZE );
    imshow( "EM", src);    //显示分割结果

    waitKey(0);
}

static const char* var_desc[] =
{
    "Age (young=Y, middle=M, old=O)",
    "Salary? (low=L, medium=M, high=H)",
    "Own_House? (false=N, true=Y)",
    "Own_Car? (false=N, true=Y)",
    "Credit_Rating (fair=F, good=G, excellent=E)",
    0
};
/************************************************************************/
/*    Decision Tree(决策树);                                         */
/************************************************************************/
void DT()
{   
    //19个训练样本
    float trainingData[19][5]={ {'Y','L','N','N','F'},
    {'Y','L','Y','N','G'},
    {'Y','M','Y','N','G'},
    {'Y','M','Y','Y','G'},
    {'Y','H','Y','Y','G'},
    {'Y','M','N','Y','G'},
    {'M','L','Y','Y','E'},
    {'M','H','Y','Y','G'},
    {'M','L','N','Y','G'},
    {'M','M','Y','Y','F'},
    {'M','H','Y','Y','E'},
    {'M','M','N','N','G'},
    {'O','L','N','N','G'},
    {'O','L','Y','Y','E'},
    {'O','L','Y','N','E'},
    {'O','M','N','Y','G'},
    {'O','L','N','N','E'},
    {'O','H','N','Y','F'},
    {'O','H','Y','Y','E'}   };
    Mat trainingDataMat(19, 5, CV_32FC1, trainingData);    //样本的矩阵形式
    //样本的分类结果,即响应值
    float responses[19] = {'N','N','Y','Y','Y','N','Y','Y','N','N','Y','N','N','Y','Y','N','N','N','Y'};
    Mat responsesMat(19, 1, CV_32FC1, responses);    //矩阵形式

    float priors[5] = {1, 1, 1, 1, 1};    //先验概率,这里的每个特征属性的作用都是相同
    //定义决策树的参数
    CvDTreeParams params(  15,    // 决策树的最大深度
        1,    //决策树叶节点的最小样本数
        0,    //回归精度,这里不需要
        false,    //是否使用替代分叉属性,由于没有缺失的特征属性,所以这里不需要替代分叉属性
        25,    //最大的类数量
        0,    // 交叉验证的子集数,由于样本太少,这里不需要交叉验证
        false,    //使用1SE规则,这里不需要
        false,    //是否真正的去掉被剪切的分支,这里不需要
        priors    //先验概率
        );  
    //类形式的掩码,这里是分类树,而且5个特征属性都是类的形式,因此该变量都为1
    Mat varTypeMat(6, 1, CV_8U, Scalar::all(1));

    CvDTree* dtree = new CvDTree();    //实例化CvDTree类
    //训练样本,构建决策树
    dtree->train (  trainingDataMat,    //训练样本
        CV_ROW_SAMPLE,    //样本矩阵的行表示样本,列表示特征属性
        responsesMat,    //样本的响应值矩阵
        Mat(),    //应用所有的特征属性
        Mat(),    //应用所有的训练样本
        varTypeMat,    //类形式的掩码
        Mat(),    //没有缺失任何特征属性
        params    //决策树参数
        );  
    //调用get_var_importance函数
    const CvMat* var_importance = dtree->get_var_importance();
    //输出特征属性重要性程度
    for( int i = 0; i < var_importance->cols*var_importance->rows; i++ )
    {
        double val = var_importance->data.db[i];
        char buf[100];
        int len = (int)(strchr( var_desc[i], '(' ) - var_desc[i] - 1);
        strncpy( buf, var_desc[i], len );
        buf[len] = '\0';
        printf( "%s", buf );
        printf( ": %g%%\n", val*100. );
    }

    float myData[5] = {'M','H','Y','N','F'};    //预测样本
    Mat myDataMat(5, 1, CV_32FC1, myData);    //矩阵形式
    double r = dtree->predict( myDataMat, Mat(), false)->value;    //得到预测结果

    cout<<endl<<"result:  "<<(char)r<<endl;    //输出预测结果
    system("pause");


}
/************************************************************************/
/*    Boosted tree classifierBoost树算法 )                           */
/************************************************************************/
void BT()
{   
    //训练样本
    float trainingData[42][2]={ {40, 55},{35, 35},{55, 15},{45, 25},{10, 10},{15, 15},{40, 10},
    {30, 15},{30, 50},{100, 20},{45, 65},{20, 35},{80, 20},{90, 5},
    {95, 35},{80, 65},{15, 55},{25, 65},{85, 35},{85, 55},{95, 70},
    {105, 50},{115, 65},{110, 25},{120, 45},{15, 45},
    {55, 30},{60, 65},{95, 60},{25, 40},{75, 45},{105, 35},{65, 10},
    {50, 50},{40, 35},{70, 55},{80, 30},{95, 45},{60, 20},{70, 30},
    {65, 45},{85, 40}   };
    Mat trainingDataMat(42, 2, CV_32FC1, trainingData); 
    //训练样本的响应值
    float responses[42] = {'R','R','R','R','R','R','R','R','R','R','R','R','R','R','R','R',
        'R','R','R','R','R','R','R','R','R','R',
        'B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B' };
    Mat responsesMat(42, 1, CV_32FC1, responses);

    float priors[2] = {1, 1};    //先验概率

    CvBoostParams params( CvBoost::REAL, // boost_type  
        10, // weak_count  
        0.95, // weight_trim_rate  
        15, // max_depth  
        false, // use_surrogates  
        priors // priors 
        );  

    //  CvBoost boost;
    Boost boost;
    boost.train (   trainingDataMat, 
        CV_ROW_SAMPLE, 
        responsesMat,
        Mat(),  
        Mat(),
        Mat(),
        Mat(),  
        params
        );  
    //预测样本
    float myData[2] = {55, 25};
    Mat myDataMat(2, 1, CV_32FC1, myData);
    double r = boost.predict( myDataMat );
    cout<<endl<<"result:  "<<(char)r<<endl;
    system("pause");
}


void main()
{
    //KNN();
    //Kmeans();
    //SVM_();
    //NBC();  
    GBT();
    //ET();
    //EM_();
    //DT();
    //BT();

    //float labels[2][2][3][2] = {0,1,2,3,4,5,6,7,8,9,10,11};
    //Mat labelsMat(2, 4, CV_32FC3, labels);
    //cout<<labelsMat<<endl;
    system("pause");
}

猜你喜欢

转载自blog.csdn.net/liufanghuangdi/article/details/56013189