# coding=utf-8
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
data = pd.read_csv("pca.csv") # 数据的导入
# print(type(data))
column_name = data.columns.values.tolist() # 得出所有的列名
# print(data.shape)
data_standard = StandardScaler().fit_transform(data) # z-score 标准化
data_standard = pd.DataFrame(data_standard)
# print(data_standard.corr())
pca = PCA() # 保留所有成分
pca.fit(data_standard)
feature_vectors = pca.components_ # 返回模型的各个特征向量
# print(feature_vectors)
pca_contribution = pca.explained_variance_ratio_ # 返回各个成分各自的方差百分比(也称贡献率)
# print(pca_contribution)
# 选出累计方差贡献率大于0.6时的主成分
pca_contribution_sum = 0 # 累计方差贡献率
counter = 0 # 对应选取的主成分个数
for i in range(len(pca_contribution)):
pca_contribution_sum += pca_contribution[i]
counter += 1
if pca_contribution_sum >= 0.6: # 本人的数据选取的不好,一般设定为累计方差贡献率大于0.8
break
reduced_dimension_vector = feature_vectors[:, 0:(counter-1)] # 取前count个特征向量
Vector_judgment = (reduced_dimension_vector > 0.4) # 取各个特征向量中值大于0.3的系数 (一般选取大于0.7-1的指标)
# print(Vector_judgment.shape)
index_after_selection = [] # 降维后的指标列表
after_selection_name = [] # 降维后的指标列名
# 选取降维后的指标
for j in range(Vector_judgment.shape[0]):
Vector = Vector_judgment[j]
if True in Vector:
index_after_selection.append(data_standard.ix[:,j])
after_selection_name.append(column_name[j])
after_selection_data=pd.DataFrame(index_after_selection) # list转换成dataframe
# print(after_selection_data.shape)
after_selection_data_T = np.transpose(after_selection_data) # 转置
after_selection_data_T.columns = list(after_selection_name) # 数据集添加列名
print(after_selection_data_T) #输出降维后的指标向量
主成分降维python代码实现(承接上一篇)
猜你喜欢
转载自blog.csdn.net/hllingg/article/details/85342803
今日推荐
周排行