(* * Copyright (c) 2005, 2006, 2007 Abram Hindle * * This file is part of CaptchaBreaker * CaptchaBreaker is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * Foobar is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * You should have received a copy of the GNU General Public License * along with this program. If not, see . *) let kmeans ?epsilon:(epsilon=1.0) ?iters:(iters=60) k xrange yrange pntlist = let arr = Array.create k (0.,0.) in let (xs,xe) = xrange in let xr = xe -. xs in let (ys,ye) = yrange in let yr = ye -. ys in let rand_coord () = (((Random.float xr) +. xs), ((Random.float yr) +. ys)) in for i = 0 to (k - 1) do arr.(i) <- rand_coord (); done; let dist (x,y) (x2,y2) = let x' = x -. x2 in let y' = y -. y2 in x'*.x' +. y'*.y' in let find_closest_centroid arr (x,y) = let m = ref (dist arr.(0) (x,y)) in let mi = ref 0 in for i = 1 to (k-1) do let d = dist arr.(i) (x,y) in if (d < !m) then begin m := d; mi := i; end; done; !mi in let pntlist = List.map (fun (x,y) -> ((find_closest_centroid arr (x,y)),(x,y))) pntlist in let split pntlist = let arr = Array.create k [] in List.iter (fun (c,(x,y)) -> arr.(c) <- (x,y)::(arr.(c)) ) pntlist; Array.to_list arr in let tolerance arr1 arr2 = let t = ref 0. in for i = 0 to (k-1) do t := !t +. (dist arr1.(i) arr2.(i)) done; let avg = !t /. (float_of_int k) in (avg < epsilon) in let rec kmeans n oldarr pntlist = let arr = Array.create k (0.,0.) in let counts = Array.create k 0 in List.iter (fun (c,(x,y)) -> counts.(c) <- counts.(c) + 1 ; let (x',y') = arr.(c) in arr.(c) <- (x' +. x,y' +. y); ) pntlist; for i = 0 to (k-1) do let (x,y) = arr.(i) in if (counts.(i) = 0) then () else arr.(i) <- (x /. (float_of_int counts.(i)), y /. (float_of_int counts.(i))) done; let pntlist = List.map (fun (c,(x,y)) -> ((find_closest_centroid arr (x,y)),(x,y))) pntlist in for i = 0 to (k - 1) do let (x,y) = arr.(i) in print_endline (String.concat "\t" [(string_of_int i) ; (string_of_float x) ; (string_of_float y)]) done; if (tolerance oldarr arr || n > iters) then split pntlist else kmeans (n+1) arr pntlist in kmeans 0 arr pntlist ;;