python自编实现kmeans算法(任意维度)

import numpy as np
import pandas as pd


def dist(x,y):
    xy = (sum((x-y)**2))**0.5
    return(xy)

def kmeans(data,m):

    m = m
    name = ['center'+str(i) for i in range(m)]
    for j in range(len(name)):
        name[j] = data.iloc[j,:]
    
    dis = pd.DataFrame(index=range(len(data)),columns=range(len(name)))
    dis['class'] = 0


    while True:
        for i in range(len(data)):
            for k in range(len(name)):
                dis.iloc[i,k] = dist(data.iloc[i,:],name[k])


        for i in range(len(data)):
            dis.iloc[i,len(name)] = np.argmin(list(dis.iloc[i,0:(len(name))]))


        index = ['index'+str(i) for i in range(m)]
        for q in range(m):
            index[q] = dis.iloc[:,len(name)] == q
        name2 = ['center_new'+str(i) for i in range(m)]
        for t in range(m):
            name2[t] = data.loc[index[t],:].mean()


        sum_s = []
        for w in range(m):
            sum_s.append(sum(name[w]==name2[w]))
        if sum(sum_s) == (m*(data.shape[1])):
            break


        for e in range(m):
            name[e] = name2[e]

    return dis
from sklearn.datasets import load_iris
dataset = load_iris()
data = pd.DataFrame(dataset['data'])


kmeans(data,3)
kmeans(data,3)
Out[88]: 
            0         1          2  class
0     5.03133   3.41251   0.146942      2
1     5.08751   3.38964   0.438169      2
2     5.25229   3.56011   0.412301      2
3     5.12704   3.41232   0.518837      2
4     5.07638   3.46031    0.19797      2
5     4.65292   3.14251   0.683807      2
6     5.18486   3.50716   0.415201      2
7     4.97467   3.32903  0.0599333      2
8     5.30207    3.5611   0.800994      2
9     5.04034   3.34972   0.366595      2
10     4.8691   3.31978   0.487844      2
11    4.96949   3.30275    0.25138      2
12    5.16374   3.45735   0.491927      2
13     5.6256   3.89487   0.909061      2
14     5.0782   3.64453    1.02019      2
15     4.8566    3.4928    1.21309      2
16    5.00219   3.49088   0.662414      2
17    4.99535   3.37762    0.15097      2
18    4.58841   3.10971   0.828488      2
19    4.94411   3.37136   0.398989      2
20     4.6667   3.06923   0.461727      2
21     4.8958   3.30868   0.337627      2
22    5.57001    3.9232   0.644354      2
23    4.65441   3.00464   0.379463      2
24    4.72404    3.0537   0.484553      2
25    4.88014   3.18552   0.441805      2
26    4.81796    3.1719   0.207827      2
27    4.91125   3.30315   0.218156      2
28    4.98988   3.36997   0.209743      2
29    5.00273   3.30323   0.401985      2
..        ...       ...        ...    ...
120  0.279475    1.9269    5.07992      0
121   1.52203  0.815718    3.95277      1
122   1.32854   2.99425    6.17566      0
123   1.08541  0.755192    4.05181      1
124  0.275316   1.77256    4.92666      0
125  0.529542   2.16196    5.27803      0
126   1.18599  0.637608    3.91888      1
127   1.14171  0.713559    3.94953      1
128  0.545991   1.47676    4.78293      0
129   0.58213   1.95439    5.06241      0
130  0.739302   2.33427     5.5089      0
131   1.44529   3.09084     5.9974      0
132  0.563327   1.52444    4.82261      0
133   1.03386  0.829074    4.10541      1
134   1.11201   1.23918    4.50653      0
135   0.96458   2.65419    5.75778      0
136   0.73774   1.73427    4.84041      0
137  0.563327   1.32837    4.55574      0
138   1.27958  0.619349    3.83573      1
139  0.322894   1.62112    4.75659      0
140  0.396658   1.78757    4.97248      0
141  0.664795   1.55479    4.59739      0
142   1.28648  0.855351    4.13628      1
143  0.335741   2.02095     5.2126      0
144  0.522228   1.95817    5.09085      0
145  0.596489   1.46332    4.60751      0
146   1.08399  0.906707    4.21459      1
147  0.632175    1.1939    4.40999      0
148  0.831837   1.51878    4.59839      0
149   1.16571  0.840952    4.07622      1
[150 rows x 4 columns]

猜你喜欢

转载自blog.csdn.net/spartanfuk/article/details/81545704