(*
* Copyright (c) 2005, 2006, 2007 Abram Hindle
*
* This file is part of CaptchaBreaker
* CaptchaBreaker is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
* Foobar is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*)
(*
TODO:
goal 1: make a alphabet png
goal 2: load alphabet png and segment in letters and numbers
goal 3: make segmenter/cropper
goal 4: use segmeneter and cropper
goal 5: make hand made captchas
goal 6: generate captchas (perl?)
goal 7: bitmap match
* Segmenter needs to be improved
More line following (fill algorithm)
Handle dots
* PHPBB segment by half grey *
*)
open Images;;
open OImages;;
type ptype = F of float | I of int | S of string | L of (unit -> unit);;
let rec pj = function
[] -> print_string "\n";
| x::xs ->
match x with
F(f) -> print_float f;
| I(i) -> print_int i;
| S(s) -> print_string s;
| L(x) -> x () ;
;
print_string " ";
pj xs
;;
let get_basename s =
let dotpos = String.rindex s '/' in
String.sub s (dotpos + 1) (String.length s - dotpos - 1) in
let num_max x::xs =
let max = x in
let f m = function
[] -> m
| x::xs ->
f (if (x > m) then x else m) xs
in
f x xs
;;
let for_map f max =
let rec foreach = function
x when x >= max -> []
| x -> (f x) :: (foreach (x+1))
in
foreach 0
;;
(* WARNING INCLUSIVE *)
let for_map_from f from too =
let rec foreach = function
x when x > too -> []
| x -> (f x) :: (foreach (x+1))
in
foreach from
;;
let iteri f l =
let rec iii index = function
[] -> ()
| x::xs ->
let _ = f (index) x in
iii (index+1) xs
in
iii 0 l
;;
let for_iter f max =
let rec foreach = function
x when x >= max -> ()
| x -> f x;
foreach (x+1);
in
foreach 0
;;
let for_each_xy f width height =
for_iter (
fun y ->
for_iter ( fun x ->
f x y;
) width
) height
;;
let for_fold f max dfl =
let rec ffold last = function
x when x >= max -> last
| x ->
let last = f x last in
ffold last (x+1)
in
ffold dfl 0
;;
let fold_each_xy f width height dfl =
for_fold (fun y yl ->
for_fold (fun x xl ->
f x y xl
) width yl
) height dfl
;;
let edge (bmp : rgb24) =
let width = bmp#width in
let height = bmp#height in
let rgb24 = new rgb24 width height in
let f x y =
let center = bmp#get x y in
let points =
Array.init 3 (fun dx ->
Array.init 3 (fun dy ->
try
bmp#get (x+dx-1) (y+dy-1)
with
Out_of_image -> center)) in
let r = ref 0
and g = ref 0
and b = ref 0 in
let diff = ref 0 in
let add dx dy weight =
let rgb = points.(dx+1).(dy+1) in
r := !r + rgb.r * weight;
g := !g + rgb.g * weight;
b := !b + rgb.b * weight;
let dr = center.r - rgb.r
and dg = center.g - rgb.g
and db = center.b - rgb.b
in
diff := !diff + truncate (sqrt ( float (dr * dr + dg * dg + db * db) ))
in
add (-1) 0 1;
add 1 0 1;
add 0 (-1) 1;
add 0 1 1;
add (-1) (-1) 1;
add 1 (-1) 1;
add (-1) 1 1;
add 1 1 1;
(* 0 <= diff <= 3544 *)
let cw =
let cw = float !diff /. 3544.0 in
if cw < 0.0 then 0.0 else
if cw > 1.0 then 1.0 else cw in
(* we need to emphasize the difference *)
let cw = truncate (sqrt cw *. 256.0) in
let newcolor org =
let c = org / 8 + cw in
if c > 255 then 255 else c in
{ r= newcolor center.r;
g= newcolor center.g;
b= newcolor center.b } in
for x = 0 to width -1 do
for y = 0 to height -1 do
rgb24#unsafe_set x y (f x y)
done
done;
rgb24;;
let rec shortcircuit f = function
[] -> false
| x::xs -> if (f x) then
true
else
shortcircuit f xs
;;
let shortcircuit_c l =
shortcircuit (fun x -> x) l
;;
let col c =
(c.r,c.g,c.b)
;;
let is_text c =
let (r,g,b) = col c in
(r < 129 && g < 129 && b < 129)
;;
let segmenter ?(istext=is_text) (bmp : rgb24) =
let width = bmp#width in
let height = bmp#height in
let is_in x y (x1,y1,x2,y2) = (x >= x1 && x <= x2 && y >= y1 && y <= y2) in
let gp x y = if (is_in x y (0,0,(width-1),(height-1))) then
istext (bmp#get x y)
else
false
in
let segments = ref [] in
let currsegment = ref None in
let rec expandsegment (cx1,cy1,cx2,cy2) =
(* across top *)
let top = shortcircuit_c (
for_map_from (
fun x ->
gp x (cy1-1)
) (cx1-1) (cx2+1)
) in
let right = shortcircuit_c (for_map_from (fun x -> gp cx2(x+1)) (cy1-1) (cy2+1)) in
let left = shortcircuit_c (for_map_from (fun x -> gp cx1 (x-1)) (cy1-1) (cy2+1)) in
let bottom = shortcircuit_c (for_map_from (fun x -> gp x (cy2+1)) (cx1-1) (cx2+1)) in
let cx1' = if (left) then cx1 - 1 else cx1 in
let cy1' = if (top) then cy1 - 1 else cy1 in
let cx2' = if (right) then cx2 + 1 else cx2 in
let cy2' = if (bottom) then cy2 + 1 else cy2 in
(* DO A CHECK HERE IF WE SHOULD KEEP GOING *)
let ret = (cx1',cy1',cx2',cy2') in
if (top || right || left || bottom) then
expandsegment ret
else
ret
in
let segments = fold_each_xy
(fun x y o ->
if (shortcircuit (fun l -> is_in x y l) o) then
o
else if (gp x y) then
let segment = expandsegment (x,y,x,y) in
o @ [segment]
else
o
) width height []
in
List.map (fun (x1,y1,x2,y2) ->
let w = x2 - x1 + 1 in
let h = y2 - y1 + 1 in
let b = bmp#sub x1 y1 w h in
(* (b,(x1,y1,x2,y2)) *)
b
) segments
;;
let files = ref [] in
Arg.parse [] (fun s -> files := s :: !files) "edge files";
let files = List.rev !files in
List.iter (fun file ->
try
let rgb =
let oimage = OImages.load file [] in
match OImages.tag oimage with
| Index8 img ->
let rgb = img#to_rgb24 in
img#destroy;
rgb
| Index16 img ->
let rgb = img#to_rgb24 in
img#destroy;
rgb
| Rgb24 img -> img
| _ -> raise (Invalid_argument "not supported") in
(* let rgb' = edge rgb in *)
let bitmaps = segmenter rgb in
pj [S("List Length");I(List.length bitmaps);];
let get_extension s =
let dotpos = String.rindex s '.' in
String.sub s (dotpos + 1) (String.length s - dotpos - 1) in
let ext = get_extension file in
let body = String.sub file 0
(String.length file - String.length ext - 1) in
iteri (fun i b ->
let outfile = "segments/" ^ body ^ "." ^ (string_of_int i) ^ ".segment.jpg" in
pj [S(outfile)];
b#save outfile (Some Jpeg) [];
) bitmaps ;
with _ -> ()) files;;