OpenCV机器学习库中主要实现算法如下:
1)一般贝叶斯分类器(Normal Bayes Classifier):CvNormalBayesClassifier
2)K近邻分类(K-nearest Neighbor Classifier): CvKNearest
3)支持向量机(Support Vector Machine):CvSVM
4)期望最大化(Expection Maximization): EM
5)决策树(Decision Tree):CvDTree
6)随机森林(Random Treess Classifier):CvForestTree
7)超随机树分类器(Extremely randomized trees Classifier): CvERTrees
8)Boost分类器(Boosted tree Classifier): CvBoost
9)梯度下降Boost分类器(Gradient Boosted Trees):CvGBTrees
10)神经网络(Artificial Neural Networks): CvANN_MLP
#include "opencv2/core/core.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/ml/ml.hpp"
#include "opencv2/opencv.hpp"
#include <iostream>
#include <stdlib.h>
#include <time.h>
using namespace cv;
using namespace std;
/************************************************************************/
/* K-Nearest Neighbour Classifier(K-邻近算法); */
/************************************************************************/
void KNN()
{
float labels[10] = {0,0,0,0,0,1,1,1,1,1};
Mat labelsMat(10, 1, CV_32FC1, labels);
cout<<labelsMat<<endl;
float trainingData[10][2];
srand(time(0));
for(int i=0;i<5;i++)
{
trainingData[i][0] = rand()%100+1;
trainingData[i][1] = rand()%100+1;
trainingData[i+5][0] = rand()%100+100;
trainingData[i+5][1] = rand()%100+100;
}
Mat trainingDataMat(10, 2, CV_32FC1, trainingData);
cout<<trainingDataMat<<endl;
KNearest knn;
knn.train(trainingDataMat,labelsMat,Mat(), false, 2 );
//train( const cv::Mat& trainData, const cv::Mat& responses,
// const cv::Mat& sampleIdx=cv::Mat(), bool isRegression=false,
// int maxK=32, bool updateBase=false );
// Data for visual representation
int width =200, height =200;
Mat image = Mat::zeros(height, width, CV_8UC3);
Vec3b green(0,255,0), blue (255,0,0);
for (int i = 0; i < image.rows; ++i){
for (int j = 0; j < image.cols; ++j){
const Mat sampleMat = (Mat_<float>(1,2) << i,j);
Mat response(1, 2, CV_32FC1);
float result = knn.find_nearest(sampleMat,2,&response);
if (result !=0){
image.at<Vec3b>(j, i) = green;
}
else
image.at<Vec3b>(j, i) = blue;
}
}
// Show the training data
for(int i=0;i<5;i++){
circle( image, Point(trainingData[i][0], trainingData[i][1]),
5, Scalar( 0, 0, 0), -1, 8);
circle( image, Point(trainingData[i+5][0], trainingData[i+5][1]),
5, Scalar(255, 255, 255), -1, 8);
}
imshow("KNN Simple Example", image); // show it to the user
waitKey();
}
void Kmeans()
{
const int MAX_CLUSTERS = 5;
Scalar colorTab[] = //因为最多只有5类,所以最多也就给5个颜色
{
Scalar(0, 0, 255),
Scalar(0,255,0),
Scalar(255,100,100),
Scalar(255,0,255),
Scalar(0,255,255)
};
Mat img(500, 500, CV_8UC3);
RNG rng(12345); //随机数产生器
for(;;)
{
int k, clusterCount = rng.uniform(2, MAX_CLUSTERS+1);
int i, sampleCount = rng.uniform(1, 1001);
Mat points(sampleCount, 1, CV_32FC2), labels; //产生的样本数,实际上为2通道的列向量,元素类型为Point2f
clusterCount = MIN(clusterCount, sampleCount);
Mat centers(clusterCount, 1, points.type()); //用来存储聚类后的中心点
/* generate random sample from multigaussian distribution */
for( k = 0; k < clusterCount; k++ ) //产生随机数
{
Point center;
center.x = rng.uniform(0, img.cols);
center.y = rng.uniform(0, img.rows);
Mat pointChunk = points.rowRange(k*sampleCount/clusterCount,
k == clusterCount - 1 ? sampleCount :
(k+1)*sampleCount/clusterCount); //最后一个类的样本数不一定是平分的,
//剩下的一份都给最后一类
//每一类都是同样的方差,只是均值不同而已
rng.fill(pointChunk, CV_RAND_NORMAL, Scalar(center.x, center.y), Scalar(img.cols*0.05, img.rows*0.05));
}
randShuffle(points, 1, &rng); //因为要聚类,所以先随机打乱points里面的点,注意points和pointChunk是共用数据的。
kmeans(points, clusterCount, labels,
TermCriteria( CV_TERMCRIT_EPS+CV_TERMCRIT_ITER, 10, 1.0),
3, KMEANS_PP_CENTERS, centers); //聚类3次,取结果最好的那次,聚类的初始化采用PP特定的随机算法。
img = Scalar::all(0);
for( i = 0; i < sampleCount; i++ )
{
int clusterIdx = labels.at<int>(i);
Point ipt = points.at<Point2f>(i);
circle( img, ipt, 2, colorTab[clusterIdx], CV_FILLED, CV_AA );
}
imshow("clusters", img);
char key = (char)waitKey(); //无限等待
if( key == 27 || key == 'q' || key == 'Q' ) // 'ESC'
break;
}
}
#define NTRAINING_SAMPLES 100 // Number of training samples per class
#define FRAC_LINEAR_SEP 0.9f // Fraction of samples which compose the linear separable part
/************************************************************************/
/* SVM,support vector machine(支持向量机); */
/************************************************************************/
void SVM_()
{
// Data for visual representation
const int WIDTH = 512, HEIGHT = 512;
Mat I = Mat::zeros(HEIGHT, WIDTH, CV_8UC3);
//--------------------- 1. Set up training data randomly ---------------------------------------
Mat trainData(2*NTRAINING_SAMPLES, 2, CV_32FC1);
Mat labels (2*NTRAINING_SAMPLES, 1, CV_32FC1);
RNG rng(100); // Random value generation class
// Set up the linearly separable part of the training data
int nLinearSamples = (int) (FRAC_LINEAR_SEP * NTRAINING_SAMPLES);
// Generate random points for the class 1
Mat trainClass = trainData.rowRange(0, nLinearSamples);
// The x coordinate of the points is in [0, 0.4)
Mat c = trainClass.colRange(0, 1);
rng.fill(c, RNG::UNIFORM, Scalar(1), Scalar(0.4 * WIDTH));
// The y coordinate of the points is in [0, 1)
c = trainClass.colRange(1,2);
rng.fill(c, RNG::UNIFORM, Scalar(1), Scalar(HEIGHT));
// Generate random points for the class 2
trainClass = trainData.rowRange(2*NTRAINING_SAMPLES-nLinearSamples, 2*NTRAINING_SAMPLES);
// The x coordinate of the points is in [0.6, 1]
c = trainClass.colRange(0 , 1);
rng.fill(c, RNG::UNIFORM, Scalar(0.6*WIDTH), Scalar(WIDTH));
// The y coordinate of the points is in [0, 1)
c = trainClass.colRange(1,2);
rng.fill(c, RNG::UNIFORM, Scalar(1), Scalar(HEIGHT));
//------------------ Set up the non-linearly separable part of the training data ---------------
// Generate random points for the classes 1 and 2
trainClass = trainData.rowRange( nLinearSamples, 2*NTRAINING_SAMPLES-nLinearSamples);
// The x coordinate of the points is in [0.4, 0.6)
c = trainClass.colRange(0,1);
rng.fill(c, RNG::UNIFORM, Scalar(0.4*WIDTH), Scalar(0.6*WIDTH));
// The y coordinate of the points is in [0, 1)
c = trainClass.colRange(1,2);
rng.fill(c, RNG::UNIFORM, Scalar(1), Scalar(HEIGHT));
//------------------------- Set up the labels for the classes ---------------------------------
labels.rowRange( 0, NTRAINING_SAMPLES).setTo(1); // Class 1
labels.rowRange(NTRAINING_SAMPLES, 2*NTRAINING_SAMPLES).setTo(2); // Class 2
//------------------------ 2. Set up the support vector machines parameters --------------------
CvSVMParams params;
params.svm_type = SVM::C_SVC;
params.C = 0.1;
params.kernel_type = SVM::LINEAR;
params.term_crit = TermCriteria(CV_TERMCRIT_ITER, (int)1e7, 1e-6);
//------------------------ 3. Train the svm ----------------------------------------------------
cout << "Starting training process" << endl;
CvSVM svm;
/*
svm.train(trainData, labels, Mat(), Mat(), params);
svm.save("supportVectorMachine.txt");
*/
svm.load("supportVectorMachine.txt");
cout << "Finished training process" << endl;
//------------------------ 4. Show the decision regions ----------------------------------------
Vec3b green(0,100,0), blue (100,0,0);
for (int i = 0; i < I.rows; ++i)
for (int j = 0; j < I.cols; ++j)
{
Mat sampleMat = (Mat_<float>(1,2) << i, j);
float response = svm.predict(sampleMat);
if (response == 1) I.at<Vec3b>(j, i) = green;
else if (response == 2) I.at<Vec3b>(j, i) = blue;
}
//----------------------- 5. Show the training data --------------------------------------------
int thick = -1;
int lineType = 8;
float px, py;
// Class 1
for (int i = 0; i < NTRAINING_SAMPLES; ++i)
{
px = trainData.at<float>(i,0);
py = trainData.at<float>(i,1);
circle(I, Point( (int) px, (int) py ), 3, Scalar(0, 255, 0), thick, lineType);
}
// Class 2
for (int i = NTRAINING_SAMPLES; i <2*NTRAINING_SAMPLES; ++i)
{
px = trainData.at<float>(i,0);
py = trainData.at<float>(i,1);
circle(I, Point( (int) px, (int) py ), 3, Scalar(255, 0, 0), thick, lineType);
}
//------------------------- 6. Show support vectors --------------------------------------------
thick = 2;
lineType = 8;
int x = svm.get_support_vector_count();
for (int i = 0; i < x; ++i)
{
const float* v = svm.get_support_vector(i);
circle( I, Point( (int) v[0], (int) v[1]), 6, Scalar(128, 128, 128), thick, lineType);
}
imwrite("result.png", I); // save the Image
imshow("SVM for Non-Linear Training Data", I); // show it to the user
waitKey(0);
}
/************************************************************************/
/* bayesian,Normal Bayes Classifier(贝叶斯分类) */
/************************************************************************/
void NBC()
{
float trainingData[8][3] = { {6, 180, 12}, {5.92, 190, 11}, {5.58, 170, 12}, {5.92, 165, 10},
{5, 100, 6}, {5.5, 150, 8},{5.42, 130, 7}, {5.75, 150, 9}};
Mat trainingDataMat(8, 3, CV_32FC1, trainingData);
float responses[8] = {'M', 'M', 'M', 'M', 'F', 'F', 'F', 'F'};
Mat responsesMat(8, 1, CV_32FC1, responses);
NormalBayesClassifier nbc;
//NormalBayesClassifier nbc2;
/*
nbc.train(trainingDataMat, responsesMat);
nbc.save("normalBayes.txt");
*/
nbc.load("normalBayes.txt");
float myData[3] = {6, 130, 8};
Mat myDataMat(1, 3, CV_32FC1, myData);
float r = nbc.predict( myDataMat );
cout<<endl<<"result: "<<(char)r<<endl;
system("pause");
}
//Gradient Boosted Trees
/************************************************************************/
/* Gradient Boosted Trees (梯度Boost树算法) */
/************************************************************************/
void GBT()
{
double trainingData[28][2]={{210.4, 3}, {240.0, 3}, {300.0, 4}, {153.4, 3}, {138.0, 3},
{194.0,4}, {189.0, 3}, {126.8, 3}, {132.0, 2}, {260.9, 4},
{176.7,3}, {160.4, 3}, {389.0, 3}, {145.8, 3}, {160.0, 3},
{141.6,2}, {198.5, 4}, {142.7, 3}, {149.4, 3}, {200.0, 3},
{447.8,5}, {230.0, 4}, {123.6, 3}, {303.1, 4}, {188.8, 2},
{196.2,4}, {110.0, 3}, {252.6, 3} };
Mat trainingDataMat(28, 2, CV_32FC1, trainingData);
float responses[28] = { 399900, 369000, 539900, 314900, 212000, 239999, 329999,
259900, 299900, 499998, 252900, 242900, 573900, 464500,
329900, 232000, 299900, 198999, 242500, 347000, 699900,
449900, 199900, 599000, 255000, 259900, 249900, 469000};
Mat responsesMat(28, 1, CV_32FC1, responses);
//设置参数
CvGBTreesParams params;
params.loss_function_type = CvGBTrees::ABSOLUTE_LOSS;
params.weak_count = 10;
params.shrinkage = 0.01f;
params.subsample_portion = 0.8f;
params.max_depth = 3;
params.use_surrogates = false;
CvGBTrees gbt; //训练样本
gbt.train(trainingDataMat, CV_ROW_SAMPLE, responsesMat, Mat(), Mat(), Mat(), Mat(),params);
double sampleData[2]={185.4, 4}; //待预测样本
Mat sampleMat(2, 1, CV_32FC1, sampleData);
float r = gbt.predict(sampleMat); //预测
cout<<endl<<"result: "<<r<<endl;
system("pause");
}
/************************************************************************/
/* Extremely randomized trees Classifier(绝对随机森林算法) */
/************************************************************************/
void ET()
{
double trainingData[28][2]={{210.4, 3}, {240.0, 3}, {300.0, 4}, {153.4, 3}, {138.0, 3},
{194.0,4}, {189.0, 3}, {126.8, 3}, {132.0, 2}, {260.9, 4},
{176.7,3}, {160.4, 3}, {389.0, 3}, {145.8, 3}, {160.0, 3},
{141.6,2}, {198.5, 4}, {142.7, 3}, {149.4, 3}, {200.0, 3},
{447.8,5}, {230.0, 4}, {123.6, 3}, {303.1, 4}, {188.8, 2},
{196.2,4}, {110.0, 3}, {252.6, 3} };
CvMat trainingDataCvMat = cvMat( 28, 2, CV_32FC1, trainingData );
float responses[28] = { 399900, 369000, 539900, 314900, 212000, 239999, 329999,
259900, 299900, 499998, 252900, 242900, 573900, 464500,
329900, 232000, 299900, 198999, 242500, 347000, 699900,
449900, 199900, 599000, 255000, 259900, 249900, 469000};
CvMat responsesCvMat = cvMat( 28, 1, CV_32FC1, responses );
CvRTParams params= CvRTParams(10, 2, 0, false,16, 0, true, 0, 100, 0, CV_TERMCRIT_ITER );
CvRTrees rtrees;
rtrees.train(&trainingDataCvMat, CV_ROW_SAMPLE, &responsesCvMat,
NULL, NULL, NULL, NULL,params);
double sampleData[2]={201.5, 3};
Mat sampleMat(2, 1, CV_32FC1, sampleData);
float r = rtrees.predict(sampleMat);
cout<<endl<<"result: "<<r<<endl;
system("pause");
}
/************************************************************************/
/* Expectation - Maximization (EM算法) */
/************************************************************************/
void EM_()
{
Mat src = imread("4.jpg"); //读取图像
namedWindow( "my daughter", WINDOW_AUTOSIZE );
imshow( "my daughter", src); //显示原始图像
waitKey(0);
//data为样本数据,labels为样本数据的类别标签
Mat data, labels;
//由彩色图像转换为EM算法所需的样本数据
for (int i = 0; i < src.rows; i++)
{
for (int j = 0; j < src.cols; j++)
{
Vec3b point = src.at<Vec3b>(i, j); //提取出当前像素的彩色值
//三个颜色强度值转换为一个样本数据
Mat tmp = (Mat_<float>(1, 3) << point[0], point[1], point[2]);
data.push_back(tmp); //存储当前样本
}
}
int clusters = 4; //表示要分割的数量,即一共分4个类
EM em = EM(clusters); //实例化EM
//训练样本,得到样本的类别标签labels
em.train(data, noArray(), labels, noArray());
//不同的类用不同的颜色代替
Vec3b colorTab[] =
{
Vec3b(0, 0, 255),
Vec3b(0, 255, 0),
Vec3b(255, 100, 100),
Vec3b(255, 0, 255),
Vec3b(0, 255, 255)
};
int n = 0; //样本数据的索引
for (int i = 0; i < src.rows; i++)
{
for (int j = 0; j < src.cols; j++)
{
int clusterIdx = labels.at<int>(n); //得到当前像素的类别标签
src.at<Vec3b>(i, j) = colorTab[clusterIdx]; //赋上相应的颜色值
n++;
}
}
namedWindow( "EM", WINDOW_AUTOSIZE );
imshow( "EM", src); //显示分割结果
waitKey(0);
}
static const char* var_desc[] =
{
"Age (young=Y, middle=M, old=O)",
"Salary? (low=L, medium=M, high=H)",
"Own_House? (false=N, true=Y)",
"Own_Car? (false=N, true=Y)",
"Credit_Rating (fair=F, good=G, excellent=E)",
0
};
/************************************************************************/
/* Decision Tree(决策树); */
/************************************************************************/
void DT()
{
//19个训练样本
float trainingData[19][5]={ {'Y','L','N','N','F'},
{'Y','L','Y','N','G'},
{'Y','M','Y','N','G'},
{'Y','M','Y','Y','G'},
{'Y','H','Y','Y','G'},
{'Y','M','N','Y','G'},
{'M','L','Y','Y','E'},
{'M','H','Y','Y','G'},
{'M','L','N','Y','G'},
{'M','M','Y','Y','F'},
{'M','H','Y','Y','E'},
{'M','M','N','N','G'},
{'O','L','N','N','G'},
{'O','L','Y','Y','E'},
{'O','L','Y','N','E'},
{'O','M','N','Y','G'},
{'O','L','N','N','E'},
{'O','H','N','Y','F'},
{'O','H','Y','Y','E'} };
Mat trainingDataMat(19, 5, CV_32FC1, trainingData); //样本的矩阵形式
//样本的分类结果,即响应值
float responses[19] = {'N','N','Y','Y','Y','N','Y','Y','N','N','Y','N','N','Y','Y','N','N','N','Y'};
Mat responsesMat(19, 1, CV_32FC1, responses); //矩阵形式
float priors[5] = {1, 1, 1, 1, 1}; //先验概率,这里的每个特征属性的作用都是相同
//定义决策树的参数
CvDTreeParams params( 15, // 决策树的最大深度
1, //决策树叶节点的最小样本数
0, //回归精度,这里不需要
false, //是否使用替代分叉属性,由于没有缺失的特征属性,所以这里不需要替代分叉属性
25, //最大的类数量
0, // 交叉验证的子集数,由于样本太少,这里不需要交叉验证
false, //使用1SE规则,这里不需要
false, //是否真正的去掉被剪切的分支,这里不需要
priors //先验概率
);
//类形式的掩码,这里是分类树,而且5个特征属性都是类的形式,因此该变量都为1
Mat varTypeMat(6, 1, CV_8U, Scalar::all(1));
CvDTree* dtree = new CvDTree(); //实例化CvDTree类
//训练样本,构建决策树
dtree->train ( trainingDataMat, //训练样本
CV_ROW_SAMPLE, //样本矩阵的行表示样本,列表示特征属性
responsesMat, //样本的响应值矩阵
Mat(), //应用所有的特征属性
Mat(), //应用所有的训练样本
varTypeMat, //类形式的掩码
Mat(), //没有缺失任何特征属性
params //决策树参数
);
//调用get_var_importance函数
const CvMat* var_importance = dtree->get_var_importance();
//输出特征属性重要性程度
for( int i = 0; i < var_importance->cols*var_importance->rows; i++ )
{
double val = var_importance->data.db[i];
char buf[100];
int len = (int)(strchr( var_desc[i], '(' ) - var_desc[i] - 1);
strncpy( buf, var_desc[i], len );
buf[len] = '\0';
printf( "%s", buf );
printf( ": %g%%\n", val*100. );
}
float myData[5] = {'M','H','Y','N','F'}; //预测样本
Mat myDataMat(5, 1, CV_32FC1, myData); //矩阵形式
double r = dtree->predict( myDataMat, Mat(), false)->value; //得到预测结果
cout<<endl<<"result: "<<(char)r<<endl; //输出预测结果
system("pause");
}
/************************************************************************/
/* Boosted tree classifier (Boost树算法 ) */
/************************************************************************/
void BT()
{
//训练样本
float trainingData[42][2]={ {40, 55},{35, 35},{55, 15},{45, 25},{10, 10},{15, 15},{40, 10},
{30, 15},{30, 50},{100, 20},{45, 65},{20, 35},{80, 20},{90, 5},
{95, 35},{80, 65},{15, 55},{25, 65},{85, 35},{85, 55},{95, 70},
{105, 50},{115, 65},{110, 25},{120, 45},{15, 45},
{55, 30},{60, 65},{95, 60},{25, 40},{75, 45},{105, 35},{65, 10},
{50, 50},{40, 35},{70, 55},{80, 30},{95, 45},{60, 20},{70, 30},
{65, 45},{85, 40} };
Mat trainingDataMat(42, 2, CV_32FC1, trainingData);
//训练样本的响应值
float responses[42] = {'R','R','R','R','R','R','R','R','R','R','R','R','R','R','R','R',
'R','R','R','R','R','R','R','R','R','R',
'B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B' };
Mat responsesMat(42, 1, CV_32FC1, responses);
float priors[2] = {1, 1}; //先验概率
CvBoostParams params( CvBoost::REAL, // boost_type
10, // weak_count
0.95, // weight_trim_rate
15, // max_depth
false, // use_surrogates
priors // priors
);
// CvBoost boost;
Boost boost;
boost.train ( trainingDataMat,
CV_ROW_SAMPLE,
responsesMat,
Mat(),
Mat(),
Mat(),
Mat(),
params
);
//预测样本
float myData[2] = {55, 25};
Mat myDataMat(2, 1, CV_32FC1, myData);
double r = boost.predict( myDataMat );
cout<<endl<<"result: "<<(char)r<<endl;
system("pause");
}
void main()
{
//KNN();
//Kmeans();
//SVM_();
//NBC();
GBT();
//ET();
//EM_();
//DT();
//BT();
//float labels[2][2][3][2] = {0,1,2,3,4,5,6,7,8,9,10,11};
//Mat labelsMat(2, 4, CV_32FC3, labels);
//cout<<labelsMat<<endl;
system("pause");
}