(*
* Copyright (c) 2005, 2006, 2007 Abram Hindle
*
* This file is part of CaptchaBreaker
* CaptchaBreaker is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
* Foobar is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*)
(*
TODO:
goal 1: make a alphabet png
goal 2: load alphabet png and segment in letters and numbers
goal 3: make segmenter/cropper
goal 4: use segmeneter and cropper
goal 5: make hand made captchas
goal 6: generate captchas (perl?)
goal 7: bitmap match
* Segmenter needs to be improved
More line following (fill algorithm)
Handle dots
* PHPBB segment by half grey *
* move code into a lib
* AIM
* Need histogram
* Colorize works well
* -> 1 bit && invert if not majority
* use a fill based segmenter
*)
open Images;;
open OImages;;
type ptype = F of float | I of int | S of string | L of (unit -> unit);;
let rec pj = function
[] -> print_string "\n";
| x::xs ->
match x with
F(f) -> print_float f;
| I(i) -> print_int i;
| S(s) -> print_string s;
| L(x) -> x () ;
;
print_string " ";
pj xs
;;
let get_extension s =
let dotpos = String.rindex s '.' in
String.sub s (dotpos + 1) (String.length s - dotpos - 1)
;;
let load_rgb_file file =
let oimage = OImages.load file [] in
match OImages.tag oimage with
| Index8 img ->
let rgb = img#to_rgb24 in
img#destroy;
rgb
| Index16 img ->
let rgb = img#to_rgb24 in
img#destroy;
rgb
| Rgb24 img -> img
| _ -> raise (Invalid_argument "not supported")
;;
let for_map f max =
let rec foreach = function
x when x >= max -> []
| x -> (f x) :: (foreach (x+1))
in
foreach 0
;;
(* WARNING INCLUSIVE *)
let for_map_from f from too =
let rec foreach = function
x when x > too -> []
| x -> (f x) :: (foreach (x+1))
in
foreach from
;;
let iteri f l =
let rec iii index = function
[] -> ()
| x::xs ->
let _ = f (index) x in
iii (index+1) xs
in
iii 0 l
;;
let for_iter f max =
let rec foreach = function
x when x >= max -> ()
| x -> f x;
foreach (x+1);
in
foreach 0
;;
let for_each_xy f width height =
for_iter (
fun y ->
for_iter ( fun x ->
f x y;
) width
) height
;;
let for_fold f max dfl =
let rec ffold last = function
x when x >= max -> last
| x ->
let last = f x last in
ffold last (x+1)
in
ffold dfl 0
;;
let fold_each_xy f width height dfl =
for_fold (fun y yl ->
for_fold (fun x xl ->
f x y xl
) width yl
) height dfl
;;
let edge (bmp : rgb24) =
let width = bmp#width in
let height = bmp#height in
let rgb24 = new rgb24 width height in
let f x y =
let center = bmp#get x y in
let points =
Array.init 3 (fun dx ->
Array.init 3 (fun dy ->
try
bmp#get (x+dx-1) (y+dy-1)
with
Out_of_image -> center)) in
let r = ref 0
and g = ref 0
and b = ref 0 in
let diff = ref 0 in
let add dx dy weight =
let rgb = points.(dx+1).(dy+1) in
r := !r + rgb.r * weight;
g := !g + rgb.g * weight;
b := !b + rgb.b * weight;
let dr = center.r - rgb.r
and dg = center.g - rgb.g
and db = center.b - rgb.b
in
diff := !diff + truncate (sqrt ( float (dr * dr + dg * dg + db * db) ))
in
add (-1) 0 1;
add 1 0 1;
add 0 (-1) 1;
add 0 1 1;
add (-1) (-1) 1;
add 1 (-1) 1;
add (-1) 1 1;
add 1 1 1;
(* 0 <= diff <= 3544 *)
let cw =
let cw = float !diff /. 3544.0 in
if cw < 0.0 then 0.0 else
if cw > 1.0 then 1.0 else cw in
(* we need to emphasize the difference *)
let cw = truncate (sqrt cw *. 256.0) in
let newcolor org =
let c = org / 8 + cw in
if c > 255 then 255 else c in
{ r= newcolor center.r;
g= newcolor center.g;
b= newcolor center.b } in
for x = 0 to width -1 do
for y = 0 to height -1 do
rgb24#unsafe_set x y (f x y)
done
done;
rgb24;;
let rec shortcircuit f = function
[] -> false
| x::xs -> if (f x) then
true
else
shortcircuit f xs
;;
let shortcircuit_c l =
shortcircuit (fun x -> x) l
;;
let col c =
(c.r,c.g,c.b)
;;
let is_text c =
let (r,g,b) = col c in
(r < 129 && g < 129 && b < 129)
;;
let is_in x y (x1,y1,x2,y2) = (x >= x1 && x <= x2 && y >= y1 && y <= y2) ;;
let segmenter ?(istext=is_text) (bmp : rgb24) =
let width = bmp#width in
let height = bmp#height in
let gp x y = if (is_in x y (0,0,(width-1),(height-1))) then
istext (bmp#get x y)
else
false
in
let segments = ref [] in
let currsegment = ref None in
let rec expandsegment (cx1,cy1,cx2,cy2) =
(* across top *)
let top = shortcircuit_c (
for_map_from (
fun x ->
gp x (cy1-1)
) (cx1-1) (cx2+1)
) in
let right = shortcircuit_c (for_map_from (fun x -> gp cx2(x+1)) (cy1-1) (cy2+1)) in
let left = shortcircuit_c (for_map_from (fun x -> gp cx1 (x-1)) (cy1-1) (cy2+1)) in
let bottom = shortcircuit_c (for_map_from (fun x -> gp x (cy2+1)) (cx1-1) (cx2+1)) in
let cx1' = if (left) then cx1 - 1 else cx1 in
let cy1' = if (top) then cy1 - 1 else cy1 in
let cx2' = if (right) then cx2 + 1 else cx2 in
let cy2' = if (bottom) then cy2 + 1 else cy2 in
(* DO A CHECK HERE IF WE SHOULD KEEP GOING *)
let ret = (cx1',cy1',cx2',cy2') in
if (top || right || left || bottom) then
expandsegment ret
else
ret
in
let segments = fold_each_xy
(fun x y o ->
if (shortcircuit (fun l -> is_in x y l) o) then
o
else if (gp x y) then
let segment = expandsegment (x,y,x,y) in
o @ [segment]
else
o
) width height []
in
List.map (fun (x1,y1,x2,y2) ->
let w = x2 - x1 + 1 in
let h = y2 - y1 + 1 in
let b = bmp#sub x1 y1 w h in
(b,(x1,y1,x2,y2))
(* b *)
) segments
;;
let segment_size (x1,y1,x2,y2) =
(x2 - x1) * (y2 - y1)
;;
let rec segment_filter size = function
[] -> []
| (bmp,x)::xs ->
if ((segment_size x) < size) then
segment_filter size xs
else
(bmp,x) :: (segment_filter size xs)
;;
let segment_sort segments =
List.sort (fun (_,(x,_,_,_)) (_,(y,_,_,_)) ->
if (x = y) then
0
else if (x > y) then
1
else
-1
) segments ;;
let is_image_file file =
let ext = String.lowercase (get_extension file) in
(ext = "png" || ext = "jpg" || ext = "gif" || ext = "jpeg" || ext = "bmp")
;;
let load_phphash () =
(* print_string "Loading phphash!\n"; *)
let values = [ "A" ; "B" ; "C" ; "D" ; "E" ; "F" ;
"G" ; "H" ; "I" ; "J" ; "K" ; "L" ;
"M" ; "N" ; "O" ; "P" ; "Q" ; "R" ;
"S" ; "T" ; "U" ; "V" ; "W" ; "X" ;
"Y" ; "Z" ; "0" ; "1" ; "2" ; "3" ;
"4" ; "5" ; "6" ; "7" ; "8" ; "9" ] in
let l = List.map (
fun ch ->
let dir = "fonts/phpbb/"^ch in
(* print_string dir ;
print_string "\n"; *)
let files = Sys.readdir dir in
(* List.iter (fun x -> print_string x ; print_string "\n";) (Array.to_list files) ; *)
let files = List.filter (is_image_file) (Array.to_list files) in
(* List.iter (fun x -> print_string x ; print_string "\n";) files ; *)
let bmps = List.map (
fun x ->
let path = dir ^ "/" ^ x in
let bmp = load_rgb_file path in
bmp
) files in
(ch,bmps)
) values
in
List.flatten (
List.map (fun (label,l2) ->
List.map (fun x ->
(label,x)
) l2
) l
)
;;
let bool2int b = if b then 1 else 0 ;;
let maxlist f l =
let head::tail = l in
let rec max m v = function
[] -> m
| x::xs ->
let fx = f x in
if (fx > v) then
max x fx xs
else
max m v xs
in
max head (f head) tail
;;
let quickcompare f b1 b2 =
let bw1 = b1#width in
let bw2 = b2#width in
let bh1 = b1#height in
let bh2 = b2#height in
let (a,b,c) = fold_each_xy (
fun x y (a,b,c) ->
let ab = (f b1 bw1 bh1 x y) in
let bb = (f b2 bw2 bh2 x y) in
let a' = bool2int ab in
let b' = bool2int bb in
let c' = bool2int (ab && bb) in
let a = a + a' in
let b = b + b' in
let c = c + c' in
(a,b,c)
) bw1 bh1 (0,0,0) in
((float_of_int c) /. (float_of_int b) +. (float_of_int c) /. (float_of_int a)) /. 2.
;;
let phpbbsolver ?(phphash = [])(bmp : rgb24) =
let phphash = if ((List.length phphash) > 0) then phphash else load_phphash () in
let segments = segmenter bmp in
let segments = segment_filter 100 segments in
let segments = segment_sort segments in
let istext c =
let (r,g,b) = col c in
(r < 129 && g < 129 && b < 129)
in
let qp bmp width height x y =
if (is_in x y (0,0,(width-1),(height-1))) then
istext (bmp#get x y)
else
false
in
String.concat "" (
List.map (
fun (bmp,segment) ->
let matches = List.map (
fun (label,x) ->
let v = quickcompare qp bmp x in
(v,label,x)
) phphash in
let (value,label,matchbmp) = maxlist (fun (v,_,_) -> v) matches in
label
) segments
)
;;
let files = ref [] in
Arg.parse [] (fun s -> files := s :: !files) "edge files";
let files = List.rev !files in
let phphash = load_phphash() in
List.iter (fun file ->
try
(* pj [S("Loading ");S(file)]; *)
let rgb = load_rgb_file file in
(* pj [S("File Loaded")]; *)
let str = phpbbsolver ~phphash:phphash rgb in
pj [S(file);S(str)];
(* let rgb' = edge rgb in *)
(*
let bitmaps = segmenter rgb in
pj [S("List Length");I(List.length bitmaps);];
*)
(*
let ext = get_extension file in
let body = String.sub file 0
(String.length file - String.length ext - 1) in
*)
(*
iteri (fun i (b,_) ->
let outfile = "segments/" ^ body ^ "." ^ (string_of_int i) ^ ".segment.jpg" in
pj [S(outfile)];
b#save outfile (Some Jpeg) [];
) bitmaps ;
*)
with _ -> ()) files;;