# Function to implement the K means clustering algorithm. Arguments are the # matrix of cases, and the number of clusters. Result returned a list # containing a matrix of cluster means and a vector of cluster assignments. K_means = function (x, K) { x = as.matrix(x) m = x [sample(1:nrow(x),K), ] # matrix of cluster means rownames(m) = NULL c = rep(NA,nrow(x)) # vector of cluster assignments # Loop to update cluster means and cluster assignments until no change. repeat { # Save previous means, so we can detect change. om = m # Assign all cases to the cluster with the nearest mean. for (i in 1:nrow(x)) { c[i] = 1 for (k in 2:K) { if (sum((x[i,]-m[k,])^2) < sum((x[i,]-m[c[i],])^2)) { c[i] = k } } } # Update cluster means to the mean of cases assigned to the cluster. for (k in 1:K) { m[k,] = apply(x[c==k,,drop=F],2,mean) } # Exit if nothing has changed. if (all(m==om)) { break } } list (means=m, clusters=c) }