(*
* Copyright (c) 2005, 2006, 2007 Abram Hindle
*
* This file is part of CaptchaBreaker
* CaptchaBreaker is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
* Foobar is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*)
(*
TODO:
goal 1: make a alphabet png
goal 2: load alphabet png and segment in letters and numbers
goal 3: make segmenter/cropper
goal 4: use segmeneter and cropper
goal 5: make hand made captchas
goal 6: generate captchas (perl?)
goal 7: bitmap match
* Segmenter needs to be improved
More line following (fill algorithm)
Handle dots
* PHPBB segment by half grey *
* move code into a lib
* AIM
* Need histogram
* Colorize works well
* -> 1 bit && invert if not majority
* use a fill based segmenter
*)
open Images;;
open OImages;;
type ptype = F of float | I of int | S of string | L of (unit -> unit);;
let rec pj = function
[] -> print_endline "";
| x::xs ->
match x with
F(f) -> print_float f;
| I(i) -> print_int i;
| S(s) -> print_string s;
| L(x) -> x () ;
;
print_string " ";
pj xs
;;
let get_extension s =
let dotpos = String.rindex s '.' in
String.sub s (dotpos + 1) (String.length s - dotpos - 1)
;;
let get_basename s =
let dotpos = String.rindex s '/' in
String.sub s (dotpos + 1) (String.length s - dotpos - 1)
;;
let load_rgb_file file =
let oimage = OImages.load file [] in
match OImages.tag oimage with
| Index8 img ->
let rgb = img#to_rgb24 in
img#destroy;
rgb
| Index16 img ->
let rgb = img#to_rgb24 in
img#destroy;
rgb
| Rgb24 img -> img
| _ -> raise (Invalid_argument "not supported")
;;
let for_map f max =
let rec foreach = function
x when x >= max -> []
| x -> (f x) :: (foreach (x+1))
in
foreach 0
;;
(* WARNING INCLUSIVE *)
let for_map_from f from too =
let rec foreach = function
x when x > too -> []
| x -> (f x) :: (foreach (x+1))
in
foreach from
;;
let iteri f l =
let rec iii index = function
[] -> ()
| x::xs ->
let _ = f (index) x in
iii (index+1) xs
in
iii 0 l
;;
let for_iter f max =
let rec foreach = function
x when x >= max -> ()
| x -> f x;
foreach (x+1);
in
foreach 0
;;
let for_each_xy f width height =
for_iter (
fun y ->
for_iter ( fun x ->
f x y;
) width
) height
;;
let for_fold f max dfl =
let rec ffold last = function
x when x >= max -> last
| x ->
let last = f x last in
ffold last (x+1)
in
ffold dfl 0
;;
let fold_each_xy f width height dfl =
for_fold (fun y yl ->
for_fold (fun x xl ->
f x y xl
) width yl
) height dfl
;;
let rec shortcircuit f = function
[] -> false
| x::xs -> if (f x) then
true
else
shortcircuit f xs
;;
let shortcircuit_c l =
shortcircuit (fun x -> x) l
;;
let col c =
(c.r,c.g,c.b)
;;
let is_text c =
let (r,g,b) = col c in
(r < 50 && g < 100 && b < 120)
;;
let is_in x y (x1,y1,x2,y2) = (x >= x1 && x <= x2 && y >= y1 && y <= y2) ;;
let segmenter ?(istext=is_text) (bmp : rgb24) =
let width = bmp#width in
let height = bmp#height in
(* pj [S("W/H:");I(width);I(height);]; *)
let gp x y = if (is_in x y (0,0,(width-1),(height-1))) then
istext (bmp#get x y)
else
false
in
let segments = ref [] in
let currsegment = ref None in
let rec expandsegment (cx1,cy1,cx2,cy2) =
(* across top *)
let top = shortcircuit_c (
for_map_from (
fun x ->
gp x (cy1-1)
) (cx1-1) (cx2+1)
) in
let right = shortcircuit_c (for_map_from (fun x -> gp cx2(x+1)) (cy1-1) (cy2+1)) in
let left = shortcircuit_c (for_map_from (fun x -> gp cx1 (x-1)) (cy1-1) (cy2+1)) in
let bottom = shortcircuit_c (for_map_from (fun x -> gp x (cy2+1)) (cx1-1) (cx2+1)) in
let cx1' = if (left) then cx1 - 1 else cx1 in
let cy1' = if (top) then cy1 - 1 else cy1 in
let cx2' = if (right) then cx2 + 1 else cx2 in
let cy2' = if (bottom) then cy2 + 1 else cy2 in
(* DO A CHECK HERE IF WE SHOULD KEEP GOING *)
let ret = (cx1',cy1',cx2',cy2') in
if (top || right || left || bottom) then
expandsegment ret
else
ret
in
let segments = fold_each_xy
(fun x y o ->
if (shortcircuit (fun l -> is_in x y l) o) then
o
else if (gp x y) then
let segment = expandsegment (x,y,x,y) in
o @ [segment]
else
o
) width height []
in
(* pj [S("SHORTS?")]; *)
List.map (fun (x1,y1,x2,y2) ->
let w = x2 - x1 + 1 in
let h = y2 - y1 + 1 in
let w = min w (width-1) in
let h = min h (height-1) in
let x1 = min x1 (width-1) in
let y1 = min y1 (height-1) in
let x1 = max x1 0 in
let y1 = max y1 0 in
(* pj [I(x1);I(y1);I(w);I(h);]; *)
let b = bmp#sub x1 y1 w h in
(b,(x1,y1,x2,y2))
) segments
;;
let segment_size (x1,y1,x2,y2) =
(x2 - x1) * (y2 - y1)
;;
let rec segment_filter size = function
[] -> []
| (bmp,x)::xs ->
if ((segment_size x) < size) then
segment_filter size xs
else
(bmp,x) :: (segment_filter size xs)
;;
let segment_sort segments =
List.sort (fun (_,(x,_,_,_)) (_,(y,_,_,_)) -> compare x y) segments ;;
let is_image_file file =
let ext = String.lowercase (get_extension file) in
(ext = "png" || ext = "jpg" || ext = "gif" || ext = "jpeg" || ext = "bmp")
;;
let load_lilohash () =
(* print_string "Loading lilohash!\n"; *)
let values = [ "0" ; "1" ; "2" ; "3" ; "4" ; "5" ; "6" ; "7" ; "8" ; "9" ;
"a" ; "b" ; "c" ; "d" ; "e" ; "f" ; "g" ; "h" ; "i" ; "j" ; "k" ; "l" ; "m" ; "n" ; "o" ; "p" ; "q" ; "r" ; "s" ; "t" ; "u" ; "v" ; "w" ; "x" ; "y" ; "z"
] in
let l = List.map (
fun ch ->
let dir = "fonts/lilo/"^ch in
(* print_string dir ;
print_string "\n"; *)
let files = Sys.readdir dir in
(* List.iter (fun x -> print_string x ; print_string "\n";) (Array.to_list files) ; *)
let files = List.filter (is_image_file) (Array.to_list files) in
(* List.iter (fun x -> print_string x ; print_string "\n";) files ; *)
let bmps = List.map (
fun x ->
let path = dir ^ "/" ^ x in
let bmp = load_rgb_file path in
print_endline path;
bmp
) files in
(ch,bmps)
) values
in
List.flatten (
List.map (fun (label,l2) ->
List.map (fun x ->
(label,x)
) l2
) l
)
;;
let bool2int b = if b then 1 else 0 ;;
let maxlist f l =
let head::tail = l in
let rec max m v = function
[] -> m
| x::xs ->
let fx = f x in
if (fx > v) then
max x fx xs
else
max m v xs
in
max head (f head) tail
;;
let quickcompare f b1 b2 =
let bw1 = b1#width in
let bw2 = b2#width in
let bh1 = b1#height in
let bh2 = b2#height in
let (a,b,c) = fold_each_xy (
fun x y (a,b,c) ->
let ab = (f b1 bw1 bh1 x y) in
let bb = (f b2 bw2 bh2 x y) in
let a' = bool2int ab in
let b' = bool2int bb in
let c' = bool2int (ab && bb) in
let a = a + a' in
let b = b + b' in
let c = c + c' in
(a,b,c)
) bw1 bh1 (0,0,0) in
((float_of_int c) /. (float_of_int b) +. (float_of_int c) /. (float_of_int a)) /. 2.
;;
let lilosolver ?(lilohash = [])(bmp : rgb24) =
let lilohash = if ((List.length lilohash) > 0) then lilohash else load_lilohash () in
let segments = segmenter bmp in
let segments = segment_filter 32 segments in
let segments = segment_sort segments in
let istext = is_text in
let qp bmp width height x y =
if (is_in x y (0,0,(width-1),(height-1))) then
istext (bmp#get x y)
else
false
in
String.concat "" (
List.map (
fun (bmp,segment) ->
let matches = List.map (
fun (label,x) ->
let v = quickcompare qp bmp x in
(v,label,x)
) lilohash in
let (value,label,matchbmp) = maxlist (fun (v,_,_) -> v) matches in
label
) segments
)
;;
let files = ref [] in
Arg.parse [] (fun s -> files := s :: !files) "edge files";
let files = List.rev !files in
let (first::rest) = files in
let (files,segmentit) = if (first = "segment") then (rest,true) else (files,false) in
let lilohash = load_lilohash() in
List.iter (fun file ->
(* pj [S("File ");S(file);]; *)
let rgb = load_rgb_file file in
if (segmentit) then
begin
pj [S("BITMAP");];
let bitmaps = segmenter rgb in
pj [S("SEGMENTED");];
pj [S("List Length");I(List.length bitmaps);];
let ext = get_extension file in
let body = get_basename (String.sub file 0 (String.length file - String.length ext - 1)) in
let bitmaps = segmenter rgb in
let bitmaps = segment_sort bitmaps in
iteri (fun i (b,_) ->
let outfile = "segments/" ^ body ^ "." ^ (string_of_int i) ^ ".segment.jpg" in
pj [S(outfile)];
b#save outfile (Some Jpeg) [];
) bitmaps ;
end
else
begin
(* pj [S("SOLVE")]; *)
let str = lilosolver ~lilohash:lilohash rgb in
pj [S(file);S(str)];
end;
) files;;