(* * Copyright (c) 2005, 2006, 2007 Abram Hindle * * This file is part of CaptchaBreaker * CaptchaBreaker is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * Foobar is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * You should have received a copy of the GNU General Public License * along with this program. If not, see . *) (* TODO: goal 1: make a alphabet png goal 2: load alphabet png and segment in letters and numbers goal 3: make segmenter/cropper goal 4: use segmeneter and cropper goal 5: make hand made captchas goal 6: generate captchas (perl?) goal 7: bitmap match * Segmenter needs to be improved More line following (fill algorithm) Handle dots * PHPBB segment by half grey * * move code into a lib * AIM * Need histogram * Colorize works well * -> 1 bit && invert if not majority * use a fill based segmenter *) open Images;; open OImages;; open Abez;; open Captchas;; open Shape;; (* use classic segmenter and a sub segmenter? *) (* debug segmenter ;_; *) let files = ref [] in Arg.parse [] (fun s -> files := s :: !files) "edge files"; let files = List.rev !files in let pt_cnt = 100 in let angles = 6 in let llength = 6 in let aim_hash = Captchas.load_aimhash pt_cnt angles llength in (* save the histograms out *) (* let _ = List.iter ( fun (key,histogram,file,_) -> let height = histogram.npts in let width = histogram.llength * histogram.angles in let buf = Captchas.get_white_buffer width height in let hist = histogram.histogram in Abez.for_xy width height (fun x y -> let col = int_of_float (255. *. hist.(y).(x)) in let c = { r = col; g = col; b = col } in buf#set x y c ); let outfile = "segments/" ^ (file) ^ ".histogram.jpg" in print_endline ("Outfile: "^outfile); buf#save outfile (Some Jpeg) []; ) aim_hash in *) let _ = print_string "Aim_hash loaded\n" in let _ = List.iter ( fun file -> let outfile = "segments/" ^ (get_basename file) ^ ".gray.jpg" in let rgb = Captchas.load_rgb_file file in let edged = Captchas.edge rgb in (* let stretch = Captchas.stretch_in_place edged in *) let invert = Captchas.invert_in_place edged in let threshold = Captchas.threshold_bucket 10 invert in let threshold = Captchas.clearedges 3 threshold in let _ = threshold#save outfile (Some Jpeg) [] in let segments = Captchas.fill_segmenter ~istext:is_black threshold in let ext = Captchas.get_extension file in let body = String.sub file 0 (String.length file - String.length ext - 1) in let body = (get_basename body) in let _ = Captchas.iteri (fun i (b,_) -> let outfile = "segments/" ^ body ^ "." ^ (string_of_int i) ^ ".segment.jpg" in pj [S(outfile)]; b#save outfile (Some Jpeg) []; ) segments in let _ = pj [S("SUB SEGMENTS")] in let subsegments = List.flatten ( List.map ( fun (b,r) -> (* color_subsegmenter (b,r) rgb *) stupid_segmenter (b,r) ) segments ) in let _ = Captchas.iteri ( fun i (b,r) -> let outfile = "segments/" ^ body ^ ".sub." ^ (string_of_int i) ^ ".segment.jpg" in pj [S(outfile)]; print_region r; b#save outfile (Some Jpeg) []; ) subsegments in let _ = print_string "Pre-sort \n" in let subsegments' = Captchas.segment_sort subsegments in let _ = print_string "Sorted SubSegments\n" in let pts_of = List.map (fun (b,r) -> Captchas.sample_conv pt_cnt b) subsegments' in let _ = print_string "Converted \n" in let str = String.concat " " ( List.map ( fun x -> let histogram = Shape.biglog_histogram angles llength pt_cnt x in let (score,sym) = Shape.find_closest x histogram aim_hash in print_float score ; print_string " "; print_string sym ; print_string "\n"; sym ) pts_of ) in print_string (file ^ " -> " ^ str ^ "\n"); print_string "What\n"; ) files in print_string "\n";; (* Captchas.solver_main Captchas.aim_solver files;; *)