CS 431

The K-Means Clustering Algorithm



//D = {x₁, ..., x_N} is a dataset of points
K-MEANS(D, K)
    
    // Initialization
    for k = 1...K
        c_k ← randomly select element from D // Cluster center
        ω_k ← {}   // Stores elements that belong to the cluster
    end


    while stopping criteria is false

        // Assign each element to the cluster with closest center
        for i = 1...N
            min_index = -1
            min_value = POS_INFINITY
            for k = 1...K
                dist = distance between x_i and cluster center c_k
                if dist < min_value
                    min_value = dist
                    min_index = k
                end
            end

            ω_{min_index} ← ω_{min_index} ∪ {x_i}
        end

        // Recompute cluster centers
        for k = 1...K
            centroid = 0
            for x in ω_k
                centroid += x
            end
            centroid /= |ω_k|
            c_k = centroid
        end
    end

    return {c₁, ..., c_K}
end