版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/u014539580/article/details/78248003
具体的Kmeans算法自行百度,这里只把简单的调用方法展示出来
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans
def loadData(filePath):
fr = open(filePath,'r+')
lines = fr.readlines()
retData = []
retCityName = []
for line in lines:
items = line.strip().split(",")
retCityName.append(items[0])
retData.append([float(items[i]) for i in range(1,len(items))])
return retData,retCityName
if __name__ == '__main__':
#data,cityName = loadData('city.txt')
a = np.arange(100)
data = a[:20].reshape(4,5).tolist()
c = a[-20:].reshape(4,5).tolist()
data.extend(c)
cityName = ['a','b','c','d','w','x','y','z']
km = KMeans(n_clusters=2)
label = km.fit_predict(data)
expenses = np.sum(km.cluster_centers_,axis=1)
#print(expenses)
CityCluster = [[],[]]
for i in range(len(cityName)):
CityCluster[label[i]].append(cityName[i])
//排序下
p = pd.DataFrame(CityCluster,index=expenses)
p = p.sort_index()
for i in p.index.tolist():
print('Expenses:%.2f' % i)
print(p.loc[i].values.tolist())
没找到city.txt 数据集
所以简单的构建了一组模拟数据
data
[[0, 1, 2, 3, 4],
[5, 6, 7, 8, 9],
[10, 11, 12, 13, 14],
[15, 16, 17, 18, 19],
[80, 81, 82, 83, 84],
[85, 86, 87, 88, 89],
[90, 91, 92, 93, 94],
[95, 96, 97, 98, 99]]
每一行分别对应cityName
cityName = [‘a’,’b’,’c’,’d’,’w’,’x’,’y’,’z’]
很明显可以看出应分为abcd 和wxyz两类
调用km = KMeans(n_clusters=2)
label = km.fit_predict(data)
返回的label为1维标签数组 array([0, 0, 0, 0, 1, 1, 1, 1])
将其成功分为两类