The K-Means Clustering Algorithm



//D = {x1, ..., xN} is a dataset of points
K-MEANS(D, K)
    
    // Initialization
    for k = 1...K
        ck ← randomly select element from D // Cluster center
        ωk ← {}   // Stores elements that belong to the cluster
    end


    while stopping criteria is false

        // Assign each element to the cluster with closest center
        for i = 1...N
            min_index = -1
            min_value = POS_INFINITY
            for k = 1...K
                dist = distance between xi and cluster center ck
                if dist < min_value
                    min_value = dist
                    min_index = k
                end
            end

            ωmin_index ← ωmin_index ∪ {xi}
        end

        // Recompute cluster centers
        for k = 1...K
            centroid = 0
            for x in ωk
                centroid += x
            end
            centroid /= |ωk|
            ck = centroid
        end
    end

    return {c1, ..., cK}
end