import numpy as np
import pandas as pd
from scipy.stats import pearsonr
from sklearn import datasets
from sklearn import preprocessing
dataset = datasets.load_iris()
feat,featNames= dataset.data,dataset.feature_names
label = dataset.target.astype(float)
scaler = preprocessing.StandardScaler()
x = scaler.fit_transform(feat)
y = scaler.fit_transform(label.reshape(len(label),1))
def countPearsonInFeats(feat):
pearson = []
colNum = feat.shape[1]
for i in range(colNum):
for j in range(colNum):
coef = pearsonr(feat[:,i],feat[:,j])[0]
pearson.append(coef)
arr = np.array(pearson).reshape(colNum,colNum)
return arr
featsPersonr = countPearsonInFeats(x)
def countPearsonInFeatLabel(x,y):
featNum = x.shape[1]
pearson = np.zeros(featNum)
for i in range(featNum):
pearson[i] = pearsonr(x[:,i],y.ravel())[0]
return pearson
featlabelPearsonr = countPearsonInFeatLabel(x,y)
def countMrits(arr):
num = len(arr)
ff,fl= 0,0
count = num*(num-1)/2 if num>1 else num
for i in range(num):
for j in range(i+1,num):
ff += featsPersonr[arr[i],arr[j]]
ff = ff/count
for elem in arr:
fl += featlabelPearsonr[elem]
fl = fl/num
merit = num*fl/np.sqrt((num+num*(num-1)*ff))
return merit
def getFeatChoose():
indexList = list(np.arange(len(featNames)))
choose=[]
flag = True
sort = {}
while(flag):
tmp = choose.copy()
for elem in indexList:
tmp.append(elem)
sort[elem] = countMrits(tmp)
print(tmp,sort[elem])
tmp.pop()
_id_ = max(sort,key=sort.get)
value = sort[_id_]
if len(choose)==0:
choose.append(_id_)
merit = value
indexList.pop(_id_)
print(merit)
else:
if merit<value:
choose.append(_id_)
merit = value
indexList.pop(_id_)
print(merit)
else:
flag = False
sort = {}
print(choose)
print('所选特征为:\n',np.array(featNames)[choose])
if __name__ =='__main__':
getFeatChoose()
import pandas as pd
from scipy.stats import pearsonr
from sklearn import datasets
from sklearn import preprocessing
dataset = datasets.load_iris()
feat,featNames= dataset.data,dataset.feature_names
label = dataset.target.astype(float)
scaler = preprocessing.StandardScaler()
x = scaler.fit_transform(feat)
y = scaler.fit_transform(label.reshape(len(label),1))
def countPearsonInFeats(feat):
pearson = []
colNum = feat.shape[1]
for i in range(colNum):
for j in range(colNum):
coef = pearsonr(feat[:,i],feat[:,j])[0]
pearson.append(coef)
arr = np.array(pearson).reshape(colNum,colNum)
return arr
featsPersonr = countPearsonInFeats(x)
def countPearsonInFeatLabel(x,y):
featNum = x.shape[1]
pearson = np.zeros(featNum)
for i in range(featNum):
pearson[i] = pearsonr(x[:,i],y.ravel())[0]
return pearson
featlabelPearsonr = countPearsonInFeatLabel(x,y)
def countMrits(arr):
num = len(arr)
ff,fl= 0,0
count = num*(num-1)/2 if num>1 else num
for i in range(num):
for j in range(i+1,num):
ff += featsPersonr[arr[i],arr[j]]
ff = ff/count
for elem in arr:
fl += featlabelPearsonr[elem]
fl = fl/num
merit = num*fl/np.sqrt((num+num*(num-1)*ff))
return merit
def getFeatChoose():
indexList = list(np.arange(len(featNames)))
choose=[]
flag = True
sort = {}
while(flag):
tmp = choose.copy()
for elem in indexList:
tmp.append(elem)
sort[elem] = countMrits(tmp)
print(tmp,sort[elem])
tmp.pop()
_id_ = max(sort,key=sort.get)
value = sort[_id_]
if len(choose)==0:
choose.append(_id_)
merit = value
indexList.pop(_id_)
print(merit)
else:
if merit<value:
choose.append(_id_)
merit = value
indexList.pop(_id_)
print(merit)
else:
flag = False
sort = {}
print(choose)
print('所选特征为:\n',np.array(featNames)[choose])
if __name__ =='__main__':
getFeatChoose()