主成分降维python代码实现(承接上一篇)

# coding=utf-8
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
data = pd.read_csv("pca.csv")  # 数据的导入
# print(type(data))
column_name = data.columns.values.tolist()  # 得出所有的列名
# print(data.shape)
data_standard = StandardScaler().fit_transform(data) # z-score 标准化

data_standard = pd.DataFrame(data_standard)
# print(data_standard.corr())
pca = PCA()   # 保留所有成分
pca.fit(data_standard)
feature_vectors = pca.components_  # 返回模型的各个特征向量
# print(feature_vectors)
pca_contribution = pca.explained_variance_ratio_  # 返回各个成分各自的方差百分比(也称贡献率)
# print(pca_contribution)
# 选出累计方差贡献率大于0.6时的主成分
pca_contribution_sum = 0    # 累计方差贡献率
counter = 0  # 对应选取的主成分个数
for i in range(len(pca_contribution)):
    pca_contribution_sum += pca_contribution[i]
    counter += 1
    if pca_contribution_sum >= 0.6:   # 本人的数据选取的不好,一般设定为累计方差贡献率大于0.8
        break


reduced_dimension_vector = feature_vectors[:, 0:(counter-1)]  # 取前count个特征向量
Vector_judgment = (reduced_dimension_vector > 0.4)  # 取各个特征向量中值大于0.3的系数 (一般选取大于0.7-1的指标)
# print(Vector_judgment.shape)
index_after_selection = []  # 降维后的指标列表
after_selection_name = []   # 降维后的指标列名
# 选取降维后的指标
for j in range(Vector_judgment.shape[0]):
    Vector = Vector_judgment[j]
    if True in Vector:
        index_after_selection.append(data_standard.ix[:,j])
        after_selection_name.append(column_name[j])

after_selection_data=pd.DataFrame(index_after_selection)  # list转换成dataframe
# print(after_selection_data.shape)
after_selection_data_T = np.transpose(after_selection_data)  # 转置
after_selection_data_T.columns = list(after_selection_name)  # 数据集添加列名
print(after_selection_data_T) #输出降维后的指标向量

猜你喜欢

转载自blog.csdn.net/hllingg/article/details/85342803
今日推荐