版权声明:欢迎转载~ 转载请注明来源及作者,谢谢! https://blog.csdn.net/qq_42442369/article/details/86730054
#导入模块
import numpy as np
import matplotlib.pyplot as plt
from math import sqrt
#计算欧式距离
def eucDistance(vec1,vec2):
return sqrt(sum(pow(vec2-vec1,2)))
#初始聚类中心选择
def initCentroids(dataSet,k):
numSamples,dim = dataSet.shape
centroids = np.zeros((k,dim))
for i in range(k):
index = int(np.random.uniform(0,numSamples))
centroids[i,:] = dataSet[index,:]
return centroids
#K-means聚类算法,迭代
def kmeanss(dataSet,k):
numSamples = dataSet.shape[0]
clusterAssement = np.mat(np.zeros((numSamples,2)))
clusterChanged = True
centroids = initCentroids(dataSet,k)
while clusterChanged:
clusterChanged = False
for i in range(numSamples):
minDist = 100000.0
minIndex = 0
for j in range(k):
distance = eucDistance(centroids[j,:],dataSet[i,:])
if distance<minDist:
minDist = distance
minIndex = j
clusterAssement[i,:] = minIndex,minDist**2
if clusterAssement[i,0]!=minIndex:
clusterChanged = True
for j in range(k):
pointsInCluster = dataSet[np.nonzero(clusterAssement[:0].A==j)[0]]
centroids[j,:] = np.mean(pointsInCluster,axis=0)
print('Congratulations,cluster complete!')
return centroids,clusterAssement
#聚类结果显示
def showCluster(dataSet,k,centroids,clusterAssement):
numSamples,dim = dataSet.shape
mark = ['or','ob','og','ok','^r','+r','<r','pr']
if k>len(mark):
print('Sorry!')
return 1
for i in np.xrange(numSamples):
markIndex = int(clusterAssement[i,0])
plt.plot(centroids[i,0],centroids[i,1],mark[i],markersize=12)
plt.show()
#导入模块
import kmeans
import numpy as np
import matplotlib.pyplot as plt
from math import sqrt
#从文件加载数据集
dataSet=[]
fileIn = open('./testSet.txt')
for line in fileIn.readline():
lineArr = line.strip().split('\t')
dataSet.append([float(lineArr[0]),float(lineArr[1])])
#调用k-means进行数据聚类
dataSet = np.mat(dataSet)
k = 4
centroids,clusterAssement = kmeans.kmeanss(dataSet,k)
#显示结果
kmeans.showCluster(dataSet,centroids,clusterAssement)