import Data.List -- For nub {- Haskell implementation of k-Means Clustering, with a bit of I/O: The user is prompted for the details of the initial cluster centers, but the data points are hard-coded in. -} -- Data type to hold information about a labeled data point. The constructor -- takes a label and a list of Doubles that is the point's location. It's -- general enough that the label can be any type, though the rest of this -- code consistently uses strings for labels. data LabeledPoint a = Point a [Double] deriving (Ord, Show) -- For a change, we'll implement == rather than asking for its default implementation. -- We'll consider points == if the coordinates are within .0001 across all dimensions. instance Eq (LabeledPoint a) where Point _ ns == Point _ ms = and (map (\(n,m)->abs(n-m)<0.0001) (zip ns ms)) -- Some helper functions for accessing content in points getLabel (Point label _) = label getCoord (Point _ coords) = coords ------------------------------------------------------------------------------- -- Lots of functions -- get defined here ------------------------------------------------------------------------------- -- The main function! It collects some info from the user, runs the clustering -- function, and reports the results. Note that the data points being clustered -- are hard-coded in. The "let" in the second line gives the name pts to the -- list of points to be clustered. You can set it to points_2D or points_1D, or -- add your own list of data points to the program and cluster that. main = do putStrLn "Welcome to k-Means" let pts = points_2D -- use points_1D or points_2D here, or create your own let numPts = length pts let len = length (getCoord (head pts)) putStrLn ("There are "++(show numPts)++" points to be labeled.") putStrLn ("It looks like we're working with "++(show len)++" dimension(s).") -- Now call readCenters to get info from user about the centers -- Call cluster with pts and the centers you just got read in from the user -- Report the results -- This helper function prints out results in a format that can be directly pasted -- into a Google Sheet and plotted as a Scatter Chart to see the clusters. formatData points centers = do let labels = nub (map getLabel points) -- Get list of unique labels let tabs n = ['\t' | _<-[1..n]] -- Build sequence of n \t chars -- Turns a single point into corresponding string with \t's let display (Point _ (x:ys)) n = (show x)++(tabs n)++(concat (map ((++"\t").show) ys)) -- Group points with related labels together let groups = map (\l->filter ((==l).getLabel) points) labels -- Print entire group with specified # of tabs between columns let printGroup g n = mapM_ putStrLn (map (\p->display p n) g) -- Print each of the groups mapM_ (\n->printGroup (groups !! n) (n+1)) [0..((length labels)-1)] -- Now print the centers at an additional tab level mapM_ (\p->putStrLn (display p ((length labels)+1))) centers -- You could call "cluster points_1D centers_1D" to try out your code on a simple problem centers_1D = [Point "left" [0], Point "right" [11.9], Point "mid" [4.9]] points_1D = [Point "a" [0], Point "a" [2], Point "a" [5], Point "a" [7], Point "a" [10], Point "a" [12]] -- These are the centers and data points used in the example on the assignment page centers_2D = [Point "one" [2,3], Point "two" [13,22], Point "three" [28,17]] points_2D = [Point "a" [12.1,7.2], Point "a" [8.7,11.0], Point "a" [6.1,3.9], Point "a" [11.7,8.2], Point "a" [7.6,8.5], Point "a" [4.3,11.0], Point "a" [7.5,11.7], Point "a" [6.8,6.3], Point "a" [8.6,14.4], Point "a" [17.0,17.6], Point "a" [12.8,16.3], Point "a" [4.2,10.9], Point "a" [2.8,6.6], Point "a" [2.6,15.0], Point "a" [10.1,10.0], Point "a" [2.0,15.6], Point "a" [7.5,8.4], Point "a" [10.3,11.7], Point "a" [14.2,7.8], Point "a" [9.8,3.7], Point "a" [8.6,9.1], Point "a" [9.2,8.2], Point "a" [9.8,7.3], Point "a" [9.6,9.6], Point "a" [10.2,14.5], Point "a" [9.1,9.0], Point "a" [13.0,9.7], Point "a" [6.3,9.4], Point "a" [10.2,10.1], Point "a" [11.1,13.8], Point "a" [9.4,12.0], Point "a" [13.1,5.2], Point "a" [5.9,12.9], Point "a" [12.1,9.3], Point "a" [3.7,13.3], Point "a" [10.8,10.0], Point "a" [9.1,3.2], Point "a" [12.2,-1.1], Point "a" [10.0,10.0], Point "a" [13.9,11.2], Point "a" [9.4,10.0], Point "a" [14.2,7.6], Point "a" [10.7,4.6], Point "a" [9.2,10.4], Point "a" [9.8,10.7], Point "a" [11.6,9.5], Point "a" [13.7,8.3], Point "a" [10.5,11.6], Point "a" [11.2,10.6], Point "a" [11.5,12.3], Point "a" [18.5,26.9], Point "a" [19.9,25.2], Point "a" [18.7,24.2], Point "a" [18.2,33.3], Point "a" [24.1,25.8], Point "a" [18.8,25.9], Point "a" [17.1,24.5], Point "a" [17.0,26.2], Point "a" [20.3,25.0], Point "a" [20.3,25.0], Point "a" [22.0,27.3], Point "a" [21.5,25.8], Point "a" [22.1,26.6], Point "a" [25.6,28.9], Point "a" [13.9,20.7], Point "a" [26.0,27.2], Point "a" [19.9,25.0], Point "a" [10.2,26.0], Point "a" [19.9,24.8], Point "a" [20.3,24.3], Point "a" [19.0,28.7], Point "a" [17.0,22.1], Point "a" [15.2,22.4], Point "a" [21.0,22.2], Point "a" [19.1,28.5], Point "a" [22.6,24.0], Point "a" [18.2,26.4], Point "a" [20.0,25.1], Point "a" [19.3,18.8], Point "a" [20.7,21.3], Point "a" [17.3,24.4], Point "a" [24.5,19.7], Point "a" [12.3,21.6], Point "a" [16.4,25.6], Point "a" [22.0,26.4], Point "a" [18.1,24.1], Point "a" [19.0,23.8], Point "a" [21.1,31.5], Point "a" [17.4,24.0], Point "a" [20.1,24.9], Point "a" [17.7,22.0], Point "a" [20.6,29.3], Point "a" [21.2,29.7], Point "a" [19.8,18.9], Point "a" [21.4,18.8], Point "a" [23.1,23.5], Point "a" [21.2,24.5], Point "a" [21.0,24.5], Point "a" [19.9,23.9], Point "a" [19.9,18.7], Point "a" [25.9,14.2], Point "a" [26.5,12.4], Point "a" [32.3,10.3], Point "a" [26.0,5.7], Point "a" [22.7,6.0], Point "a" [30.1,13.0], Point "a" [33.8,8.1], Point "a" [29.9,9.3], Point "a" [38.9,10.3], Point "a" [30.7,10.5], Point "a" [35.2,1.3], Point "a" [28.4,14.8], Point "a" [30.8,8.2], Point "a" [26.3,16.8], Point "a" [24.3,7.6], Point "a" [32.1,14.4], Point "a" [31.2,8.6], Point "a" [31.4,9.3], Point "a" [31.3,10.5], Point "a" [35.4,13.3], Point "a" [30.0,10.7], Point "a" [29.1,10.2], Point "a" [25.2,6.0], Point "a" [30.3,10.0], Point "a" [30.0,2.9], Point "a" [28.7,0.3], Point "a" [29.2,8.8], Point "a" [25.4,-1.5], Point "a" [24.0,4.5], Point "a" [30.2,14.3], Point "a" [33.5,11.2], Point "a" [31.9,10.7], Point "a" [28.1,10.4], Point "a" [29.9,8.7], Point "a" [31.6,10.4], Point "a" [29.3,8.5], Point "a" [29.3,13.7], Point "a" [27.2,11.3], Point "a" [33.0,7.7], Point "a" [30.0,10.0], Point "a" [28.4,15.1], Point "a" [37.1,13.3], Point "a" [37.6,13.1], Point "a" [29.6,10.7], Point "a" [31.9,14.2], Point "a" [29.5,9.7], Point "a" [30.1,9.5], Point "a" [29.6,9.6], Point "a" [34.1,14.7], Point "a" [29.6,9.8]]