(* * Copyright (c) 2005, 2006, 2007 Abram Hindle * * This file is part of CaptchaBreaker * CaptchaBreaker is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * Foobar is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * You should have received a copy of the GNU General Public License * along with this program. If not, see . *) (* TODO: goal 1: make a alphabet png goal 2: load alphabet png and segment in letters and numbers goal 3: make segmenter/cropper goal 4: use segmeneter and cropper goal 5: make hand made captchas goal 6: generate captchas (perl?) goal 7: bitmap match * Segmenter needs to be improved More line following (fill algorithm) Handle dots * PHPBB segment by half grey * * move code into a lib * AIM * Need histogram * Colorize works well * -> 1 bit && invert if not majority * use a fill based segmenter *) open Images;; open OImages;; type ptype = F of float | I of int | S of string | L of (unit -> unit);; let rec pj = function [] -> print_string "\n"; | x::xs -> match x with F(f) -> print_float f; | I(i) -> print_int i; | S(s) -> print_string s; | L(x) -> x () ; ; print_string " "; pj xs ;; let get_extension s = let dotpos = String.rindex s '.' in String.sub s (dotpos + 1) (String.length s - dotpos - 1) ;; let get_basename s = let dotpos = String.rindex s '/' in String.sub s (dotpos + 1) (String.length s - dotpos - 1) ;; let load_rgb_file file = let oimage = OImages.load file [] in match OImages.tag oimage with | Index8 img -> let rgb = img#to_rgb24 in img#destroy; rgb | Index16 img -> let rgb = img#to_rgb24 in img#destroy; rgb | Rgb24 img -> img | _ -> raise (Invalid_argument "not supported") ;; let for_map f max = let rec foreach = function x when x >= max -> [] | x -> (f x) :: (foreach (x+1)) in foreach 0 ;; (* WARNING INCLUSIVE *) let for_map_from f from too = let rec foreach = function x when x > too -> [] | x -> (f x) :: (foreach (x+1)) in foreach from ;; let iteri f l = let rec iii index = function [] -> () | x::xs -> let _ = f (index) x in iii (index+1) xs in iii 0 l ;; let for_iter f max = let rec foreach = function x when x >= max -> () | x -> f x; foreach (x+1); in foreach 0 ;; let for_each_xy f width height = for_iter ( fun y -> for_iter ( fun x -> f x y; ) width ) height ;; let for_fold f max dfl = let rec ffold last = function x when x >= max -> last | x -> let last = f x last in ffold last (x+1) in ffold dfl 0 ;; let fold_each_xy f width height dfl = for_fold (fun y yl -> for_fold (fun x xl -> f x y xl ) width yl ) height dfl ;; let rec shortcircuit f = function [] -> false | x::xs -> if (f x) then true else shortcircuit f xs ;; let shortcircuit_c l = shortcircuit (fun x -> x) l ;; let col c = (c.r,c.g,c.b) ;; let is_text c = let (r,g,b) = col c in (r > 129 && g > 129 && b > 129) ;; let is_in x y (x1,y1,x2,y2) = (x >= x1 && x <= x2 && y >= y1 && y <= y2) ;; let segmenter ?(istext=is_text) (bmp : rgb24) = let width = bmp#width in let height = bmp#height in (* pj [S("W/H:");I(width);I(height);]; *) let gp x y = if (is_in x y (0,0,(width-1),(height-1))) then istext (bmp#get x y) else false in let segments = ref [] in let currsegment = ref None in let rec expandsegment (cx1,cy1,cx2,cy2) = (* across top *) let top = shortcircuit_c ( for_map_from ( fun x -> gp x (cy1-1) ) (cx1-1) (cx2+1) ) in let right = shortcircuit_c (for_map_from (fun x -> gp cx2(x+1)) (cy1-1) (cy2+1)) in let left = shortcircuit_c (for_map_from (fun x -> gp cx1 (x-1)) (cy1-1) (cy2+1)) in let bottom = shortcircuit_c (for_map_from (fun x -> gp x (cy2+1)) (cx1-1) (cx2+1)) in let cx1' = if (left) then cx1 - 1 else cx1 in let cy1' = if (top) then cy1 - 1 else cy1 in let cx2' = if (right) then cx2 + 1 else cx2 in let cy2' = if (bottom) then cy2 + 1 else cy2 in (* DO A CHECK HERE IF WE SHOULD KEEP GOING *) let ret = (cx1',cy1',cx2',cy2') in if (top || right || left || bottom) then expandsegment ret else ret in let segments = fold_each_xy (fun x y o -> if (shortcircuit (fun l -> is_in x y l) o) then o else if (gp x y) then let segment = expandsegment (x,y,x,y) in o @ [segment] else o ) width height [] in (* pj [S("SHORTS?")]; *) List.map (fun (x1,y1,x2,y2) -> let w = x2 - x1 + 1 in let h = y2 - y1 + 1 in let w = min w (width-1) in let h = min h (height-1) in let x1 = min x1 (width-1) in let y1 = min y1 (height-1) in let x1 = max x1 0 in let y1 = max y1 0 in (* pj [I(x1);I(y1);I(w);I(h);]; *) let b = bmp#sub x1 y1 w h in (b,(x1,y1,x2,y2)) ) segments ;; let segment_size (x1,y1,x2,y2) = (x2 - x1) * (y2 - y1) ;; let rec segment_filter size = function [] -> [] | (bmp,x)::xs -> if ((segment_size x) < size) then segment_filter size xs else (bmp,x) :: (segment_filter size xs) ;; let segment_sort segments = List.sort (fun (_,(x,_,_,_)) (_,(y,_,_,_)) -> if (x = y) then 0 else if (x > y) then 1 else -1 ) segments ;; let is_image_file file = let ext = String.lowercase (get_extension file) in (ext = "png" || ext = "jpg" || ext = "gif" || ext = "jpeg" || ext = "bmp") ;; let load_ebaumhash () = (* print_string "Loading ebaumhash!\n"; *) let values = [ "0" ; "1" ; "2" ; "3" ; "4" ; "5" ; "6" ; "7" ; "8" ; "9" ] in let l = List.map ( fun ch -> let dir = "fonts/ebaum/"^ch in (* print_string dir ; print_string "\n"; *) let files = Sys.readdir dir in (* List.iter (fun x -> print_string x ; print_string "\n";) (Array.to_list files) ; *) let files = List.filter (is_image_file) (Array.to_list files) in (* List.iter (fun x -> print_string x ; print_string "\n";) files ; *) let bmps = List.map ( fun x -> let path = dir ^ "/" ^ x in let bmp = load_rgb_file path in bmp ) files in (ch,bmps) ) values in List.flatten ( List.map (fun (label,l2) -> List.map (fun x -> (label,x) ) l2 ) l ) ;; let bool2int b = if b then 1 else 0 ;; let maxlist f l = let head::tail = l in let rec max m v = function [] -> m | x::xs -> let fx = f x in if (fx > v) then max x fx xs else max m v xs in max head (f head) tail ;; let quickcompare f b1 b2 = let bw1 = b1#width in let bw2 = b2#width in let bh1 = b1#height in let bh2 = b2#height in let (a,b,c) = fold_each_xy ( fun x y (a,b,c) -> let ab = (f b1 bw1 bh1 x y) in let bb = (f b2 bw2 bh2 x y) in let a' = bool2int ab in let b' = bool2int bb in let c' = bool2int (ab && bb) in let a = a + a' in let b = b + b' in let c = c + c' in (a,b,c) ) bw1 bh1 (0,0,0) in ((float_of_int c) /. (float_of_int b) +. (float_of_int c) /. (float_of_int a)) /. 2. ;; let ebaumsolver ?(ebaumhash = [])(bmp : rgb24) = let ebaumhash = if ((List.length ebaumhash) > 0) then ebaumhash else load_ebaumhash () in let segments = segmenter bmp in let segments = segment_filter 32 segments in let segments = segment_sort segments in let istext c = let (r,g,b) = col c in (r > 129 && g > 129 && b > 129) in let qp bmp width height x y = if (is_in x y (0,0,(width-1),(height-1))) then istext (bmp#get x y) else false in String.concat "" ( List.map ( fun (bmp,segment) -> let matches = List.map ( fun (label,x) -> let v = quickcompare qp bmp x in (v,label,x) ) ebaumhash in let (value,label,matchbmp) = maxlist (fun (v,_,_) -> v) matches in label ) segments ) ;; let files = ref [] in Arg.parse [] (fun s -> files := s :: !files) "edge files"; let files = List.rev !files in let (first::rest) = files in let (files,segmentit) = if (first = "segment") then (rest,true) else (files,false) in let ebaumhash = load_ebaumhash() in List.iter (fun file -> (* pj [S("File ");S(file);]; *) let rgb = load_rgb_file file in if (segmentit) then begin pj [S("BITMAP");]; let bitmaps = segmenter rgb in pj [S("SEGMENTED");]; pj [S("List Length");I(List.length bitmaps);]; let ext = get_extension file in let body = get_basename (String.sub file 0 (String.length file - String.length ext - 1)) in let bitmaps = segmenter rgb in iteri (fun i (b,_) -> let outfile = "segments/" ^ body ^ "." ^ (string_of_int i) ^ ".segment.jpg" in pj [S(outfile)]; b#save outfile (Some Jpeg) []; ) bitmaps ; end else begin (* pj [S("SOLVE")]; *) let str = ebaumsolver ~ebaumhash:ebaumhash rgb in pj [S(file);S(str)]; end; ) files;;