用Python的feature_selection库实现特征选择。
from sklearn.datasets import load_iris
from sklearn.feature_selection import chi2
from sklearn.feature_selection import SelectKBest
iris = load_iris() # 加载数据
X, y = iris.data, iris.target
print("原始特征(前10个):\n", X[0:10]) # 查看原始特征前10个样本
selector = SelectKBest(chi2, k=3) # k是重要特征个数,Number of top features to select.
selector.fit(X, y)
X_new = selector.transform(X)
print("选择后特征(前10个):\n", X_new[0:10]) # 查看选择后特征前10个样本
print("新特征维度:", X_new.shape)
print("所有特征的名称:", iris.feature_names)
print("特征重要性评分:", selector.scores_)
z = selector.get_support(indices=True)
print("所选特征所在列:", z)
运行结果如下:(共4个特征,第2个特征对于结果影响程度较小。)