《统计学习方法》——第二章感知机及C++实现

Input : 特征向量
Output : 类别(+1/-1) 【二分类】
直接学习f 或者P(Y|X) ——> 判别模型

模型:

f ( x ) = s i g n ( w T x + b ) f(x)=sign(w^Tx+b) s i g n ( ) w , b > sign()符号函数,运算是直接学习参数 w,b ——>判别模型
函数 f 是线性的---->线性分类模型

策略:

L ( w , b ) = x i ϵ M ( y i ( w T x i + b ) ) L(w,b) = -\sum_{x_i\epsilon M}(y_i(w^Tx_i+b))
M是误分类样本点的集合,其中 y i ϵ { + 1 , 1 } y_i\epsilon \{+1,-1\} 所以加上符号以后L(w,b)非负。

学习算法:(梯度下降)

原始形式

min w , b L ( w , b ) = x i ϵ M ( y i ( w T x i + b ) ) \min_{w,b} L(w,b) = - \sum_{x_i\epsilon M}(y_i(w^Tx_i+b)) w L ( w , b ) = x i ϵ M y i x i ( w ) \bigtriangledown_wL(w,b) = -\sum_{x_i\epsilon M}y_ix_i(w的负梯度方向) b L ( w , b ) = x i ϵ M y i ( b ) \bigtriangledown_bL(w,b) = -\sum_{x_i\epsilon M}y_i(b的负梯度方向)
随机选择一个样本点(xi,yi) w w + η y i x i [ ] w\leftarrow w+\eta y_ix_i [更新起来比较麻烦] b b + η y i ( η ) b \leftarrow b+ \eta y_i (\eta 表示学习率)

#include <iostream> 
#include <iostream> 
#include<Eigen/Dense> 
#include<vector> 
#include<fstream> 
using namespace std;
const int Max_data = 10;
struct data{
    //std::vector<double> data(2);
    vector<double> samples;
    //double x1;
    int labels;
}dataSet[Max_data];
struct result{
    vector<double> w;
    double b;
}result;
class perception_ori
{
    public:
    /*导入数据并按照数据结构填充,返回样本个数*/
    int loadDataSet()
    {
        fstream f ;
        int countNum = 0;
        f.open("data.dat");
        int i = 0; double temp1,temp2;
        while(!f.eof())
        {
            f>>temp1>>temp2>>dataSet[i].labels;
            dataSet[i].samples.push_back(temp1);
            dataSet[i].samples.push_back(temp2);
            i++;
            countNum++;
        }
        f.close();
        return countNum;
    }
    double eta = 0;//设置学习率    
    void perception(data dataSet[],double eta,int N)
    {
        //vector<double> w(2);
        result.w.push_back(0.0);
        result.w.push_back(0.0);
        result.b = 0.0;
        bool flag = false;
        int cnt;
        while(!flag)
        {
            cnt = 0;
            for(int i =0; i<N;i++)
            {
                if((double)dataSet[i].labels*(dataSet[i].samples[0]*result.w[0]+dataSet[i].samples[1]*result.w[1]+result.b)<=0.0)
                {
                    cnt++;
                    result.w[0] = result.w[0]+eta*(double)dataSet[i].labels*dataSet[i].samples[0];
                    result.w[1] = result.w[1]+eta*(double)dataSet[i].labels*dataSet[i].samples[1];
                    result.b = result.b+eta*(double)dataSet[i].labels;
                }
            }
            if(cnt == 0 )
                flag = true;
        }
        //cout<<result.w[0]<<" "<<result.w[1]<<" "<<result.b<<endl;
    }
};
int main()
{
    perception_ori test;
    int N = test.loadDataSet();
    test.eta = 1.0;
    test.perception(dataSet,test.eta,N);
    cout<<"The weight is :" <<endl<<result.w[0]<<endl<<result.w[1]<<endl<<"And the biase is "<<endl<<result.b<<endl;
    return 0;
}

对偶形式

基本思想:w,b表示为xi,yi的线性组合的形式,求其系数(线性组合的系数)
这样关于xi,yi的内积可以提前计算,方便update,其余与原始形式的实质是一样的,之前看有人说两者的区别是梯度下降和随机梯度下降,我认为不是,区分梯度和随机梯度的关键在于,迭代次数内是遍历整个数据集更新参数,还是随机选择样本更新参数。书中无论是原始问题还是对偶问题都是随机选择样本进行更新,都是随机梯度下降。练习写代码的时候没有弄清楚,没有实现随机,是挨个选取的样本
设(xi,yi)经过n次修改,则w,b关于原始值的增量分别为 w = i = 1 N α i y i x i w = \sum_{i=1}^{N}\alpha_iy_ix_i b = i = 1 N α i y i ( α i ) b = \sum_{i=1}^{N}\alpha_iy_i (只需要学习\alpha_i)
更新 α i α i + η [ ] \alpha_i \leftarrow \alpha_i+\eta [如果提前计算了内积矩阵,更新起来比较简单] b b + η y i b\leftarrow b+\eta y_i

#include <iostream>
 #include <iostream> 
#include <vector> 
#include <fstream> 
using namespace std; 
const int Max_data = 10; 
struct data{
    //std::vector<double> data(2);
    vector<double> samples;
    //double x1;
    int labels;
}dataSet[Max_data];
struct Result{
    vector<double> alpha;
    double b;
}result;
class Perception_dual
{
public:
    /*导入数据并按照数据结构填充,返回样本个数*/
    int loadDataSet()
    {
        fstream f ;
        int countNum = 0;
        f.open("data.dat");
        int i = 0; double temp1,temp2;        while(!f.eof())
        {
            f>>temp1>>temp2>>dataSet[i].labels;
            dataSet[i].samples.push_back(temp1);
            dataSet[i].samples.push_back(temp2);
            i++;
            countNum++;
        }
        f.close();
        return countNum;
    }    double eta = 0;//设置学习率
    /*计算内积,N样本个数*/
    vector<vector<double>> cal_dot(data dataSet[],int N)
    {
        vector<vector<double>> dot(N);
        for(int k=0;k<dot.size();k++)
        {
            dot[k].resize(N);
        }
        //cout<<N<<endl;
        double temp = 0.0;
        for(int i=0;i<N;i++)
        {
            for(int j=0;j<N;j++)
            {
                //cout<<dataSet[i].samples[0]*dataSet[j].samples[0]+dataSet[i].samples[1]*dataSet[j].samples[1]<<endl;
                temp = dataSet[i].samples[0]*dataSet[j].samples[0]+dataSet[i].samples[1]*dataSet[j].samples[1];
                //cout<<temp<<endl;
                dot[i][j]=temp;
            }
        }
        return dot;
    }
    /*更新变量*/
    Result update(Result result,double eta,int labels,int i)
    {
        //int N = result.alpha.size();
        //for (int i =0; i<N ;i++)
        //{
        result.alpha[i] = result.alpha[i]+eta;
        //}
        result.b = result.b + eta*(double)labels;
        return result;
    }    
    Result initialize(Result result,int N)
    {
        for (int i=0;i<N;i++)
        {
            result.alpha.push_back(0);
        }
        result.b = 0.0;
        return result;
    }   
     /*计算判别条件*/
    double cal_discriminate(data dataSet[],int N,Result result,int i)
    {
        double temp=0;
        vector<vector<double>> dotx = cal_dot(dataSet,N);        for (int j=0;j<N;j++)
        {
            temp = temp+result.alpha[j]*(double)dataSet[j].labels*dotx[j][i];
        }
        temp = (double)dataSet[i].labels*(temp+result.b);
        return temp;
    }    
    vector<double> cal_w(vector<double> alpha,data dataSet[],int N)
    {
        vector<double> w;
        double temp1=0;
        double temp2=0;
        for (int i=0;i<N;i++)
        {
            temp1 = temp1+(double)dataSet[i].labels*(dataSet[i].samples[0]*result.alpha[i]);
            temp2 = temp2+(double)dataSet[i].labels*(dataSet[i].samples[1]*result.alpha[i]);
        }
        w.push_back(temp1);
        w.push_back(temp2);
        return w;
    }
    Result perception_dual(data dataSet[],double eta,int N)
    {
        result = initialize(result,N);
        bool flag = false;
        int cnt = 0;double temp=0;
        cout<<temp<<endl;
        while(!flag)
        {
            cnt =0;
            for (int i= 0;i<N;i++)
            {
                temp = cal_discriminate(dataSet,N,result,i);
                if(temp<=0.0)
                {
                    cnt++;
                    result = update(result,eta,dataSet[i].labels,i);
                }
            }
            if(cnt == 0)
            {
                flag = true;
            }
        }
        cout <<"Hello World"<<endl;
        return result;
    }
};
int main()
{
    Perception_dual test;
    int N = test.loadDataSet();
    test.eta = 1.0;
    cout<<"Hello World"<<endl;    result = test.perception_dual(dataSet,test.eta,N);
    for (int k = 0;k<N;k++)
    {
        cout<<result.alpha[k]<<endl;
    }
    vector<double> w = test.cal_w(result.alpha,dataSet,N);
    cout<<"The weight is :" <<endl<<w[0]<<endl<<w[1]<<endl<<"And the biase is "<<endl<<result.b<<endl;
    return 0;
}

猜你喜欢

转载自blog.csdn.net/weixin_35479108/article/details/84939172