CFS算法

import numpy as np
import pandas as pd
from scipy.stats import pearsonr
from sklearn import datasets
from sklearn import preprocessing


dataset = datasets.load_iris()
feat,featNames= dataset.data,dataset.feature_names
label = dataset.target.astype(float)


scaler = preprocessing.StandardScaler()
x = scaler.fit_transform(feat)
y = scaler.fit_transform(label.reshape(len(label),1))


def countPearsonInFeats(feat):
    pearson = []
    colNum = feat.shape[1]
    for i in range(colNum):
        for j in range(colNum):
            coef = pearsonr(feat[:,i],feat[:,j])[0]
            pearson.append(coef)
    arr = np.array(pearson).reshape(colNum,colNum)
    return arr
featsPersonr = countPearsonInFeats(x)


def countPearsonInFeatLabel(x,y):
    featNum = x.shape[1]
    pearson = np.zeros(featNum)
    for i in range(featNum):
        pearson[i] = pearsonr(x[:,i],y.ravel())[0]
    return pearson


featlabelPearsonr = countPearsonInFeatLabel(x,y)


def countMrits(arr):
    num = len(arr)
    ff,fl= 0,0
    count = num*(num-1)/2 if num>1 else num
    for i in range(num):
       for j in range(i+1,num):
           ff += featsPersonr[arr[i],arr[j]]
    ff = ff/count
    for elem in arr:
        fl += featlabelPearsonr[elem]
    fl = fl/num
    merit = num*fl/np.sqrt((num+num*(num-1)*ff))
    return merit
    
def getFeatChoose():
    indexList = list(np.arange(len(featNames)))
    choose=[]
    flag = True
    sort = {}
    
    while(flag):
        tmp = choose.copy()
        for elem in indexList:
            tmp.append(elem)
            sort[elem] = countMrits(tmp)
            print(tmp,sort[elem])
            tmp.pop()
        
        _id_ = max(sort,key=sort.get)
        value = sort[_id_]
        
        if len(choose)==0:
            choose.append(_id_)
            merit = value
            indexList.pop(_id_)
            print(merit)
        else:
            if merit<value:
                choose.append(_id_)
                merit = value
                indexList.pop(_id_)
                print(merit)
            else:
                flag = False
        sort = {}
       
    print(choose)
    print('所选特征为:\n',np.array(featNames)[choose])


if __name__ =='__main__':
    getFeatChoose()

猜你喜欢

转载自blog.csdn.net/qq_42394743/article/details/80778590
CFS