(* * Copyright (c) 2005, 2006, 2007 Abram Hindle * * This file is part of CaptchaBreaker * CaptchaBreaker is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * Foobar is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * You should have received a copy of the GNU General Public License * along with this program. If not, see . *) (* TODO: goal 1: make a alphabet png goal 2: load alphabet png and segment in letters and numbers goal 3: make segmenter/cropper goal 4: use segmeneter and cropper goal 5: make hand made captchas goal 6: generate captchas (perl?) goal 7: bitmap match * Segmenter needs to be improved More line following (fill algorithm) Handle dots * PHPBB segment by half grey * *) open Images;; open OImages;; type ptype = F of float | I of int | S of string | L of (unit -> unit);; let rec pj = function [] -> print_string "\n"; | x::xs -> match x with F(f) -> print_float f; | I(i) -> print_int i; | S(s) -> print_string s; | L(x) -> x () ; ; print_string " "; pj xs ;; let get_basename s = let dotpos = String.rindex s '/' in String.sub s (dotpos + 1) (String.length s - dotpos - 1) in let num_max x::xs = let max = x in let f m = function [] -> m | x::xs -> f (if (x > m) then x else m) xs in f x xs ;; let for_map f max = let rec foreach = function x when x >= max -> [] | x -> (f x) :: (foreach (x+1)) in foreach 0 ;; (* WARNING INCLUSIVE *) let for_map_from f from too = let rec foreach = function x when x > too -> [] | x -> (f x) :: (foreach (x+1)) in foreach from ;; let iteri f l = let rec iii index = function [] -> () | x::xs -> let _ = f (index) x in iii (index+1) xs in iii 0 l ;; let for_iter f max = let rec foreach = function x when x >= max -> () | x -> f x; foreach (x+1); in foreach 0 ;; let for_each_xy f width height = for_iter ( fun y -> for_iter ( fun x -> f x y; ) width ) height ;; let for_fold f max dfl = let rec ffold last = function x when x >= max -> last | x -> let last = f x last in ffold last (x+1) in ffold dfl 0 ;; let fold_each_xy f width height dfl = for_fold (fun y yl -> for_fold (fun x xl -> f x y xl ) width yl ) height dfl ;; let edge (bmp : rgb24) = let width = bmp#width in let height = bmp#height in let rgb24 = new rgb24 width height in let f x y = let center = bmp#get x y in let points = Array.init 3 (fun dx -> Array.init 3 (fun dy -> try bmp#get (x+dx-1) (y+dy-1) with Out_of_image -> center)) in let r = ref 0 and g = ref 0 and b = ref 0 in let diff = ref 0 in let add dx dy weight = let rgb = points.(dx+1).(dy+1) in r := !r + rgb.r * weight; g := !g + rgb.g * weight; b := !b + rgb.b * weight; let dr = center.r - rgb.r and dg = center.g - rgb.g and db = center.b - rgb.b in diff := !diff + truncate (sqrt ( float (dr * dr + dg * dg + db * db) )) in add (-1) 0 1; add 1 0 1; add 0 (-1) 1; add 0 1 1; add (-1) (-1) 1; add 1 (-1) 1; add (-1) 1 1; add 1 1 1; (* 0 <= diff <= 3544 *) let cw = let cw = float !diff /. 3544.0 in if cw < 0.0 then 0.0 else if cw > 1.0 then 1.0 else cw in (* we need to emphasize the difference *) let cw = truncate (sqrt cw *. 256.0) in let newcolor org = let c = org / 8 + cw in if c > 255 then 255 else c in { r= newcolor center.r; g= newcolor center.g; b= newcolor center.b } in for x = 0 to width -1 do for y = 0 to height -1 do rgb24#unsafe_set x y (f x y) done done; rgb24;; let rec shortcircuit f = function [] -> false | x::xs -> if (f x) then true else shortcircuit f xs ;; let shortcircuit_c l = shortcircuit (fun x -> x) l ;; let col c = (c.r,c.g,c.b) ;; let is_text c = let (r,g,b) = col c in (r < 129 && g < 129 && b < 129) ;; let segmenter ?(istext=is_text) (bmp : rgb24) = let width = bmp#width in let height = bmp#height in let is_in x y (x1,y1,x2,y2) = (x >= x1 && x <= x2 && y >= y1 && y <= y2) in let gp x y = if (is_in x y (0,0,(width-1),(height-1))) then istext (bmp#get x y) else false in let segments = ref [] in let currsegment = ref None in let rec expandsegment (cx1,cy1,cx2,cy2) = (* across top *) let top = shortcircuit_c ( for_map_from ( fun x -> gp x (cy1-1) ) (cx1-1) (cx2+1) ) in let right = shortcircuit_c (for_map_from (fun x -> gp cx2(x+1)) (cy1-1) (cy2+1)) in let left = shortcircuit_c (for_map_from (fun x -> gp cx1 (x-1)) (cy1-1) (cy2+1)) in let bottom = shortcircuit_c (for_map_from (fun x -> gp x (cy2+1)) (cx1-1) (cx2+1)) in let cx1' = if (left) then cx1 - 1 else cx1 in let cy1' = if (top) then cy1 - 1 else cy1 in let cx2' = if (right) then cx2 + 1 else cx2 in let cy2' = if (bottom) then cy2 + 1 else cy2 in (* DO A CHECK HERE IF WE SHOULD KEEP GOING *) let ret = (cx1',cy1',cx2',cy2') in if (top || right || left || bottom) then expandsegment ret else ret in let segments = fold_each_xy (fun x y o -> if (shortcircuit (fun l -> is_in x y l) o) then o else if (gp x y) then let segment = expandsegment (x,y,x,y) in o @ [segment] else o ) width height [] in List.map (fun (x1,y1,x2,y2) -> let w = x2 - x1 + 1 in let h = y2 - y1 + 1 in let b = bmp#sub x1 y1 w h in (* (b,(x1,y1,x2,y2)) *) b ) segments ;; let files = ref [] in Arg.parse [] (fun s -> files := s :: !files) "edge files"; let files = List.rev !files in List.iter (fun file -> try let rgb = let oimage = OImages.load file [] in match OImages.tag oimage with | Index8 img -> let rgb = img#to_rgb24 in img#destroy; rgb | Index16 img -> let rgb = img#to_rgb24 in img#destroy; rgb | Rgb24 img -> img | _ -> raise (Invalid_argument "not supported") in (* let rgb' = edge rgb in *) let bitmaps = segmenter rgb in pj [S("List Length");I(List.length bitmaps);]; let get_extension s = let dotpos = String.rindex s '.' in String.sub s (dotpos + 1) (String.length s - dotpos - 1) in let ext = get_extension file in let body = String.sub file 0 (String.length file - String.length ext - 1) in iteri (fun i b -> let outfile = "segments/" ^ body ^ "." ^ (string_of_int i) ^ ".segment.jpg" in pj [S(outfile)]; b#save outfile (Some Jpeg) []; ) bitmaps ; with _ -> ()) files;;