import numpy as np
import pandas as pd
def dist(x,y):
xy = (sum((x-y)**2))**0.5
return(xy)
def kmeans(data,m):
m = m
name = ['center'+str(i) for i in range(m)]
for j in range(len(name)):
name[j] = data.iloc[j,:]
dis = pd.DataFrame(index=range(len(data)),columns=range(len(name)))
dis['class'] = 0
while True:
for i in range(len(data)):
for k in range(len(name)):
dis.iloc[i,k] = dist(data.iloc[i,:],name[k])
for i in range(len(data)):
dis.iloc[i,len(name)] = np.argmin(list(dis.iloc[i,0:(len(name))]))
index = ['index'+str(i) for i in range(m)]
for q in range(m):
index[q] = dis.iloc[:,len(name)] == q
name2 = ['center_new'+str(i) for i in range(m)]
for t in range(m):
name2[t] = data.loc[index[t],:].mean()
sum_s = []
for w in range(m):
sum_s.append(sum(name[w]==name2[w]))
if sum(sum_s) == (m*(data.shape[1])):
break
for e in range(m):
name[e] = name2[e]
return dis
from sklearn.datasets import load_iris
dataset = load_iris()
data = pd.DataFrame(dataset['data'])
kmeans(data,3)
kmeans(data,3)
Out[88]:
0 1 2 class
0 5.03133 3.41251 0.146942 2
1 5.08751 3.38964 0.438169 2
2 5.25229 3.56011 0.412301 2
3 5.12704 3.41232 0.518837 2
4 5.07638 3.46031 0.19797 2
5 4.65292 3.14251 0.683807 2
6 5.18486 3.50716 0.415201 2
7 4.97467 3.32903 0.0599333 2
8 5.30207 3.5611 0.800994 2
9 5.04034 3.34972 0.366595 2
10 4.8691 3.31978 0.487844 2
11 4.96949 3.30275 0.25138 2
12 5.16374 3.45735 0.491927 2
13 5.6256 3.89487 0.909061 2
14 5.0782 3.64453 1.02019 2
15 4.8566 3.4928 1.21309 2
16 5.00219 3.49088 0.662414 2
17 4.99535 3.37762 0.15097 2
18 4.58841 3.10971 0.828488 2
19 4.94411 3.37136 0.398989 2
20 4.6667 3.06923 0.461727 2
21 4.8958 3.30868 0.337627 2
22 5.57001 3.9232 0.644354 2
23 4.65441 3.00464 0.379463 2
24 4.72404 3.0537 0.484553 2
25 4.88014 3.18552 0.441805 2
26 4.81796 3.1719 0.207827 2
27 4.91125 3.30315 0.218156 2
28 4.98988 3.36997 0.209743 2
29 5.00273 3.30323 0.401985 2
.. ... ... ... ...
120 0.279475 1.9269 5.07992 0
121 1.52203 0.815718 3.95277 1
122 1.32854 2.99425 6.17566 0
123 1.08541 0.755192 4.05181 1
124 0.275316 1.77256 4.92666 0
125 0.529542 2.16196 5.27803 0
126 1.18599 0.637608 3.91888 1
127 1.14171 0.713559 3.94953 1
128 0.545991 1.47676 4.78293 0
129 0.58213 1.95439 5.06241 0
130 0.739302 2.33427 5.5089 0
131 1.44529 3.09084 5.9974 0
132 0.563327 1.52444 4.82261 0
133 1.03386 0.829074 4.10541 1
134 1.11201 1.23918 4.50653 0
135 0.96458 2.65419 5.75778 0
136 0.73774 1.73427 4.84041 0
137 0.563327 1.32837 4.55574 0
138 1.27958 0.619349 3.83573 1
139 0.322894 1.62112 4.75659 0
140 0.396658 1.78757 4.97248 0
141 0.664795 1.55479 4.59739 0
142 1.28648 0.855351 4.13628 1
143 0.335741 2.02095 5.2126 0
144 0.522228 1.95817 5.09085 0
145 0.596489 1.46332 4.60751 0
146 1.08399 0.906707 4.21459 1
147 0.632175 1.1939 4.40999 0
148 0.831837 1.51878 4.59839 0
149 1.16571 0.840952 4.07622 1
[150 rows x 4 columns]