吴裕雄 python 机器学习——数据预处理包裹式特征选取模型

from sklearn.svm import LinearSVC
from sklearn.datasets import load_iris
from sklearn.feature_selection import RFE,RFECV
from sklearn.model_selection import train_test_split

#数据预处理包裹式特征选取RFE模型
def test_RFE():
    iris=load_iris()
    X=iris.data
    y=iris.target
    estimator=LinearSVC()
    selector=RFE(estimator=estimator,n_features_to_select=2)
    selector.fit(X,y)
    print("N_features %s"%selector.n_features_)
    print("Support is %s"%selector.support_)
    print("Ranking %s"%selector.ranking_)
    
#调用test_RFE()
test_RFE()

#数据预处理包裹式特征选取RFECV模型
def test_RFECV():
    iris=load_iris()
    X=iris.data
    y=iris.target
    estimator=LinearSVC()
    selector=RFECV(estimator=estimator,cv=3)
    selector.fit(X,y)
    print("N_features %s"%selector.n_features_)
    print("Support is %s"%selector.support_)
    print("Ranking %s"%selector.ranking_)
    print("Grid Scores %s"%selector.grid_scores_)
    
#调用test_RFECV()
test_RFECV()

def test_compare_with_no_feature_selection():
    '''
    比较经过特征选择和未经特征选择的数据集,对 LinearSVC 的预测性能的区别
    '''
    ### 加载数据
    iris=load_iris()
    X,y=iris.data,iris.target
    ### 特征提取
    estimator=LinearSVC()
    selector=RFE(estimator=estimator,n_features_to_select=2)
    X_t=selector.fit_transform(X,y)
    #### 切分测试集与验证集
    X_train,X_test,y_train,y_test=train_test_split(X, y,test_size=0.25,random_state=0,stratify=y)
    X_train_t,X_test_t,y_train_t,y_test_t=train_test_split(X_t, y,test_size=0.25,random_state=0,stratify=y)
    ### 测试与验证
    clf=LinearSVC()
    clf_t=LinearSVC()
    clf.fit(X_train,y_train)
    clf_t.fit(X_train_t,y_train_t)
    print("Original DataSet: test score=%s"%(clf.score(X_test,y_test)))
    print("Selected DataSet: test score=%s"%(clf_t.score(X_test_t,y_test_t)))
    
#调用test_compare_with_no_feature_selection()
test_compare_with_no_feature_selection()

猜你喜欢

转载自www.cnblogs.com/tszr/p/10802046.html