PythonでK-meansクラスタリング そのいち
plot = [ [0, 0], [1, 1], [0, 2], [1, 3], [0, 4], [5, 0], [5, 2], [4, 2], [6, 2], [5, 4]] k = 2 import random class Node: def __init__(self, data): self.xy = data self.id = random.randint(0, k-1) def info(self): print self.xy, self.id def get_id(self): return self.id def get_xy(self): return self.xy nodelist = [] for i in plot: nodelist.append(Node(i)) class Cluster: def __init__(self, id): self.id = id self.nodes = [] self.center = [] def add_node(self, n): self.nodes.append(n) def info(self): for i in self.nodes: i.info() cluster = [] for i in range(k): cluster.append(Cluster(i)) # update cluster id of each node import math def dist(p, t): dp = [Decimal(p[0]), Decimal(p[1])] dt = [Decimal(t[0]), Decimal(t[1])] dist = math.sqrt(pow(dp[0] - dt[0], 2) + pow(dp[1] - dt[1], 2)) return dist from decimal import * print 'Init >> ' for i in nodelist: i.info() for i in range(2): # add nodes to cluster for i in range(k): for j in nodelist: if i == j.get_id(): cluster[i].add_node(j) # calc cluster position x = 0 y = 0 for i in range(k): for j in cluster[i].nodes: xy = j.get_xy() x += xy[0] y += xy[1] cluster[i].center = [ Decimal(x) / Decimal(len(cluster[i].nodes)), Decimal(y) / Decimal(len(cluster[i].nodes))] for i in nodelist: min = Decimal('1024.0') minid = -1 for j in range(k): nodexy = i.get_xy() if min > dist(i.get_xy(), cluster[j].center): min = dist(i.get_xy(), cluster[j].center) minid = cluster[j].id i.id = minid # clear nodes in cluster instance for i in range(k): cluster[i].nodes[:] = [] print 'Post >> ' for i in nodelist: i.info()
データ数が少ないうえにテキトー。
なので、K-meansの性質も相まって毎回結果が変わる。(´・ω・)