Hierarchical Agglomerative Clustering Algorithm
//D = {x1, ..., xN} is a dataset of points
AGGLOMERATIVE(D)
// Initialization
for i = 1...N
for j = 1...N
C(i,j) = SIM(xi, xj)
end
I(i) ← 1 // Indicates data point i is the "head" of a cluster
end
M ← {} // Keep track of merges
for k = 1 ... N-1
// Find 2 most similar clusters
maxSimilarity = 0
maxPair = null
for i = 1...N
for j = 1...N
if i != j && I(i) == 1 && I(j) == 1
if C(i,j) > maxSimilarity
maxSimilarity = C(i,j)
maxPair = (i, j)
end
end
end
end
M.append({i,j}) // We are merging clusters i and j
// The ith row and column now hold distances for new cluster
for k = 1...N
C(i,k) = SIM({i, j}, k)
C(k,i) = SIM({i, j}, k)
end
I(j) = 0 // Deactive the "head" of this cluster
end
return M
end