PythonでK-meansクラスタリング そのいち

plot = [
    [0, 0], [1, 1], [0, 2], [1, 3], [0, 4],
    [5, 0], [5, 2], [4, 2], [6, 2], [5, 4]]

k = 2

import random

class Node:
    def __init__(self, data):
        self.xy = data
        self.id = random.randint(0, k-1)

    def info(self):
        print self.xy, self.id

    def get_id(self):
        return self.id

    def get_xy(self):
        return self.xy

nodelist = []
for i in plot:
    nodelist.append(Node(i))

class Cluster:
    def __init__(self, id):
        self.id = id
        self.nodes = []
        self.center = []

    def add_node(self, n):
        self.nodes.append(n)

    def info(self):
        for i in self.nodes:
            i.info()

cluster = []
for i in range(k):
    cluster.append(Cluster(i))

# update cluster id of each node
import math
def dist(p, t):
    dp = [Decimal(p[0]), Decimal(p[1])]
    dt = [Decimal(t[0]), Decimal(t[1])]

    dist = math.sqrt(pow(dp[0] - dt[0], 2) + pow(dp[1] - dt[1], 2))

    return dist

from decimal import *

print 'Init >> '
for i in nodelist:
    i.info()

for i in range(2):
    # add nodes to cluster
    for i in range(k):
        for j in nodelist:
            if i == j.get_id():
                cluster[i].add_node(j)

    # calc cluster position
    x = 0
    y = 0
    for i in range(k):
        for j in cluster[i].nodes:
            xy = j.get_xy()
            x += xy[0]
            y += xy[1]

            cluster[i].center = [
                Decimal(x) / Decimal(len(cluster[i].nodes)),
                Decimal(y) / Decimal(len(cluster[i].nodes))]

    for i in nodelist:
        min = Decimal('1024.0')
        minid = -1

        for j in range(k):
            nodexy = i.get_xy()

            if min > dist(i.get_xy(), cluster[j].center):
                min = dist(i.get_xy(), cluster[j].center)
                minid = cluster[j].id

        i.id = minid

    # clear nodes in cluster instance
    for i in range(k):
        cluster[i].nodes[:] = []

print 'Post >> '
for i in nodelist:
    i.info()

データ数が少ないうえにテキトー。
なので、K-meansの性質も相まって毎回結果が変わる。(´・ω・)