import matplotlib.pyplot as plt
import pandas as pd
from numpy import *
dataframe=pd.read_csv("svm1.csv").head(10)[["factor1","factor2"]]
data=dataframe.values.T
meanVals=mean(data,axis=1,keepdims=1)
dataCenter=data-meanVals #中心化
c=cov(dataCenter) #求协方差
d,v=linalg.eig(c) #求协方差矩阵的特征值和特征向量
index=d.argsort()[::-1]
d=d[index]
v=v[:,index]
v1 = v[:,0]
final=dot(dataCenter.T, v1)
fig=plt.figure()
ax=fig.add_subplot(111)
ax.scatter(data.T[:,0],data.T[:,1],s=80,marker='o')
theta = arctan(v1[1]/v1[0])
final_x = (final)*cos(theta)+meanVals[0]
final_y = (final)*sin(theta)+meanVals[1]
final_xy = vstack((final_x,final_y))
ax.scatter(final_xy.T[:,0],final_xy.T[:,1],marker='^',c='orange',s=80)
plt.plot(final_xy.T[:,0],final_xy.T[:,1],'y--',linewidth=1)
ax.grid()
plt.show()
以下是得到的第一主成分(三角形表示):