Input : 特征向量
Output : 类别(+1/-1) 【二分类】
直接学习f 或者P(Y|X) ——> 判别模型
模型:
函数 f 是线性的---->线性分类模型
策略:
M是误分类样本点的集合,其中
所以加上符号以后L(w,b)非负。
学习算法:(梯度下降)
原始形式
随机选择一个样本点(xi,yi)
#include <iostream>
#include <iostream>
#include<Eigen/Dense>
#include<vector>
#include<fstream>
using namespace std;
const int Max_data = 10;
struct data{
//std::vector<double> data(2);
vector<double> samples;
//double x1;
int labels;
}dataSet[Max_data];
struct result{
vector<double> w;
double b;
}result;
class perception_ori
{
public:
/*导入数据并按照数据结构填充,返回样本个数*/
int loadDataSet()
{
fstream f ;
int countNum = 0;
f.open("data.dat");
int i = 0; double temp1,temp2;
while(!f.eof())
{
f>>temp1>>temp2>>dataSet[i].labels;
dataSet[i].samples.push_back(temp1);
dataSet[i].samples.push_back(temp2);
i++;
countNum++;
}
f.close();
return countNum;
}
double eta = 0;//设置学习率
void perception(data dataSet[],double eta,int N)
{
//vector<double> w(2);
result.w.push_back(0.0);
result.w.push_back(0.0);
result.b = 0.0;
bool flag = false;
int cnt;
while(!flag)
{
cnt = 0;
for(int i =0; i<N;i++)
{
if((double)dataSet[i].labels*(dataSet[i].samples[0]*result.w[0]+dataSet[i].samples[1]*result.w[1]+result.b)<=0.0)
{
cnt++;
result.w[0] = result.w[0]+eta*(double)dataSet[i].labels*dataSet[i].samples[0];
result.w[1] = result.w[1]+eta*(double)dataSet[i].labels*dataSet[i].samples[1];
result.b = result.b+eta*(double)dataSet[i].labels;
}
}
if(cnt == 0 )
flag = true;
}
//cout<<result.w[0]<<" "<<result.w[1]<<" "<<result.b<<endl;
}
};
int main()
{
perception_ori test;
int N = test.loadDataSet();
test.eta = 1.0;
test.perception(dataSet,test.eta,N);
cout<<"The weight is :" <<endl<<result.w[0]<<endl<<result.w[1]<<endl<<"And the biase is "<<endl<<result.b<<endl;
return 0;
}
对偶形式
基本思想:w,b表示为xi,yi的线性组合的形式,求其系数(线性组合的系数)
这样关于xi,yi的内积可以提前计算,方便update,其余与原始形式的实质是一样的,之前看有人说两者的区别是梯度下降和随机梯度下降,我认为不是,区分梯度和随机梯度的关键在于,迭代次数内是遍历整个数据集更新参数,还是随机选择样本更新参数。书中无论是原始问题还是对偶问题都是随机选择样本进行更新,都是随机梯度下降。练习写代码的时候没有弄清楚,没有实现随机,是挨个选取的样本
设(xi,yi)经过n次修改,则w,b关于原始值的增量分别为
更新
#include <iostream>
#include <iostream>
#include <vector>
#include <fstream>
using namespace std;
const int Max_data = 10;
struct data{
//std::vector<double> data(2);
vector<double> samples;
//double x1;
int labels;
}dataSet[Max_data];
struct Result{
vector<double> alpha;
double b;
}result;
class Perception_dual
{
public:
/*导入数据并按照数据结构填充,返回样本个数*/
int loadDataSet()
{
fstream f ;
int countNum = 0;
f.open("data.dat");
int i = 0; double temp1,temp2; while(!f.eof())
{
f>>temp1>>temp2>>dataSet[i].labels;
dataSet[i].samples.push_back(temp1);
dataSet[i].samples.push_back(temp2);
i++;
countNum++;
}
f.close();
return countNum;
} double eta = 0;//设置学习率
/*计算内积,N样本个数*/
vector<vector<double>> cal_dot(data dataSet[],int N)
{
vector<vector<double>> dot(N);
for(int k=0;k<dot.size();k++)
{
dot[k].resize(N);
}
//cout<<N<<endl;
double temp = 0.0;
for(int i=0;i<N;i++)
{
for(int j=0;j<N;j++)
{
//cout<<dataSet[i].samples[0]*dataSet[j].samples[0]+dataSet[i].samples[1]*dataSet[j].samples[1]<<endl;
temp = dataSet[i].samples[0]*dataSet[j].samples[0]+dataSet[i].samples[1]*dataSet[j].samples[1];
//cout<<temp<<endl;
dot[i][j]=temp;
}
}
return dot;
}
/*更新变量*/
Result update(Result result,double eta,int labels,int i)
{
//int N = result.alpha.size();
//for (int i =0; i<N ;i++)
//{
result.alpha[i] = result.alpha[i]+eta;
//}
result.b = result.b + eta*(double)labels;
return result;
}
Result initialize(Result result,int N)
{
for (int i=0;i<N;i++)
{
result.alpha.push_back(0);
}
result.b = 0.0;
return result;
}
/*计算判别条件*/
double cal_discriminate(data dataSet[],int N,Result result,int i)
{
double temp=0;
vector<vector<double>> dotx = cal_dot(dataSet,N); for (int j=0;j<N;j++)
{
temp = temp+result.alpha[j]*(double)dataSet[j].labels*dotx[j][i];
}
temp = (double)dataSet[i].labels*(temp+result.b);
return temp;
}
vector<double> cal_w(vector<double> alpha,data dataSet[],int N)
{
vector<double> w;
double temp1=0;
double temp2=0;
for (int i=0;i<N;i++)
{
temp1 = temp1+(double)dataSet[i].labels*(dataSet[i].samples[0]*result.alpha[i]);
temp2 = temp2+(double)dataSet[i].labels*(dataSet[i].samples[1]*result.alpha[i]);
}
w.push_back(temp1);
w.push_back(temp2);
return w;
}
Result perception_dual(data dataSet[],double eta,int N)
{
result = initialize(result,N);
bool flag = false;
int cnt = 0;double temp=0;
cout<<temp<<endl;
while(!flag)
{
cnt =0;
for (int i= 0;i<N;i++)
{
temp = cal_discriminate(dataSet,N,result,i);
if(temp<=0.0)
{
cnt++;
result = update(result,eta,dataSet[i].labels,i);
}
}
if(cnt == 0)
{
flag = true;
}
}
cout <<"Hello World"<<endl;
return result;
}
};
int main()
{
Perception_dual test;
int N = test.loadDataSet();
test.eta = 1.0;
cout<<"Hello World"<<endl; result = test.perception_dual(dataSet,test.eta,N);
for (int k = 0;k<N;k++)
{
cout<<result.alpha[k]<<endl;
}
vector<double> w = test.cal_w(result.alpha,dataSet,N);
cout<<"The weight is :" <<endl<<w[0]<<endl<<w[1]<<endl<<"And the biase is "<<endl<<result.b<<endl;
return 0;
}