1 /////////////////////////////////////////////////////////////////////// 2 // File: tesseractclass.h 3 // Description: An instance of Tesseract. For thread safety, *every* 4 // global variable goes in here, directly, or indirectly. 5 // Author: Ray Smith 6 // Created: Fri Mar 07 08:17:01 PST 2008 7 // 8 // (C) Copyright 2008, Google Inc. 9 // Licensed under the Apache License, Version 2.0 (the "License"); 10 // you may not use this file except in compliance with the License. 11 // You may obtain a copy of the License at 12 // http://www.apache.org/licenses/LICENSE-2.0 13 // Unless required by applicable law or agreed to in writing, software 14 // distributed under the License is distributed on an "AS IS" BASIS, 15 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 // See the License for the specific language governing permissions and 17 // limitations under the License. 18 // 19 /////////////////////////////////////////////////////////////////////// 20 21 #ifndef TESSERACT_CCMAIN_TESSERACTCLASS_H__ 22 #define TESSERACT_CCMAIN_TESSERACTCLASS_H__ 23 24 #include "varable.h" 25 #include "wordrec.h" 26 #include "ocrclass.h" 27 #include "control.h" 28 #include "docqual.h" 29 30 class CHAR_SAMPLES_LIST; 31 class CHAR_SAMPLE_LIST; 32 class PAGE_RES; 33 class PAGE_RES_IT; 34 class BLOCK_LIST; 35 class TO_BLOCK_LIST; 36 class IMAGE; 37 class WERD_RES; 38 class ROW; 39 class TBOX; 40 class SVMenuNode; 41 struct Pix; 42 class WERD_CHOICE; 43 class WERD; 44 class BLOB_CHOICE_LIST_CLIST; 45 46 47 // Top-level class for all tesseract global instance data. 48 // This class either holds or points to all data used by an instance 49 // of Tesseract, including the memory allocator. When this is 50 // complete, Tesseract will be thread-safe. UNTIL THEN, IT IS NOT! 51 // 52 // NOTE to developers: Do not create cyclic dependencies through this class! 53 // The directory dependency tree must remain a tree! The keep this clean, 54 // lower-level code (eg in ccutil, the bottom level) must never need to 55 // know about the content of a higher-level directory. 56 // The following scheme will grant the easiest access to lower-level 57 // global members without creating a cyclic dependency: 58 // ccmain inherits wordrec, includes textord as a member 59 // wordrec inherits classify 60 // classify inherits ccstruct, includes dict as a member 61 // ccstruct inherits c_util, includes image as a member 62 // c_util inherits cc_util 63 // textord has a pointer to ccstruct, but doesn't own it. 64 // dict has a pointer to ccstruct, but doesn't own it. 65 // 66 // NOTE: that each level contains members that correspond to global 67 // data that is defined (and used) at that level, not necessarily where 68 // the type is defined so for instance: 69 // BOOL_VAR (textord_show_blobs, FALSE, "Display unsorted blobs"); 70 // goes inside the Textord class, not the cc_util class. 71 72 namespace tesseract { 73 74 class Tesseract : public Wordrec { 75 public: 76 Tesseract(); 77 ~Tesseract(); 78 79 void Clear(); 80 81 // Simple accessors. reskew()82 const FCOORD& reskew() const { 83 return reskew_; 84 } 85 // Destroy any existing pix and return a pointer to the pointer. mutable_pix_binary()86 Pix** mutable_pix_binary() { 87 Clear(); 88 return &pix_binary_; 89 } pix_binary()90 Pix* pix_binary() const { 91 return pix_binary_; 92 } 93 94 void SetBlackAndWhitelist(); 95 int SegmentPage(const STRING* input_file, 96 IMAGE* image, BLOCK_LIST* blocks); 97 int AutoPageSeg(int width, int height, int resolution, 98 bool single_column, IMAGE* image, 99 BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks); 100 101 //// control.h ///////////////////////////////////////////////////////// 102 void recog_all_words( //process words 103 PAGE_RES *page_res, //page structure 104 //progress monitor 105 volatile ETEXT_DESC *monitor, 106 TBOX *target_word_box=0L, 107 inT16 dopasses=0 108 ); 109 void classify_word_pass1( //recog one word 110 WERD_RES *word, //word to do 111 ROW *row, 112 BLOCK* block, 113 BOOL8 cluster_adapt, 114 CHAR_SAMPLES_LIST *char_clusters, 115 CHAR_SAMPLE_LIST *chars_waiting); 116 void recog_pseudo_word( //recognize blobs 117 BLOCK_LIST *block_list, //blocks to check 118 TBOX &selection_box); 119 120 // This method returns all the blobs in the specified blocks. 121 // It's the caller's responsibility to destroy the returned list. 122 C_BLOB_LIST* get_blobs_from_blocks(BLOCK_LIST* blocks // blocks to look at. 123 ); 124 125 // This method can be used to perform word-level training using box files. 126 // TODO: this can be modified to perform training in general case too. 127 void train_word_level_with_boxes( 128 const STRING& box_file, // File with boxes. 129 const STRING& out_file, // Output file. 130 BLOCK_LIST* blocks // Blocks to use. 131 ); 132 void fix_rep_char(WERD_RES *word); 133 void fix_quotes( //make double quotes 134 WERD_CHOICE *choice, //choice to fix 135 WERD *word, //word to do //char choices 136 BLOB_CHOICE_LIST_CLIST *blob_choices); 137 ACCEPTABLE_WERD_TYPE acceptable_word_string(const char *s, 138 const char *lengths); 139 void match_word_pass2( //recog one word 140 WERD_RES *word, //word to do 141 ROW *row, 142 BLOCK* block, 143 float x_height); 144 void classify_word_pass2( //word to do 145 WERD_RES *word, 146 BLOCK* block, 147 ROW *row); 148 BOOL8 recog_interactive( //recognize blobs 149 BLOCK *block, //block 150 ROW *row, //row of word 151 WERD *word //word to recognize 152 ); 153 void fix_hyphens( //crunch double hyphens 154 WERD_CHOICE *choice, //choice to fix 155 WERD *word, //word to do //char choices 156 BLOB_CHOICE_LIST_CLIST *blob_choices); 157 void set_word_fonts( 158 WERD_RES *word, // word to adapt to 159 BLOB_CHOICE_LIST_CLIST *blob_choices); // detailed results 160 void font_recognition_pass( //good chars in word 161 PAGE_RES_IT &page_res_it); 162 163 //// output.h ////////////////////////////////////////////////////////// 164 165 void output_pass( //Tess output pass //send to api 166 PAGE_RES_IT &page_res_it, 167 BOOL8 write_to_shm, 168 TBOX *target_word_box); 169 FILE *open_outfile( //open .map & .unlv file 170 const char *extension); 171 void write_results( //output a word 172 PAGE_RES_IT &page_res_it, //full info 173 char newline_type, //type of newline 174 BOOL8 force_eol, //override tilde crunch? 175 BOOL8 write_to_shm //send to api 176 ); 177 void set_unlv_suspects(WERD_RES *word); 178 UNICHAR_ID get_rep_char(WERD_RES *word); // what char is repeated? 179 BOOL8 acceptable_number_string(const char *s, 180 const char *lengths); 181 inT16 count_alphanums(const WERD_CHOICE &word); 182 inT16 count_alphas(const WERD_CHOICE &word); 183 //// tessedit.h //////////////////////////////////////////////////////// 184 void read_config_file(const char *filename, bool global_only); 185 int init_tesseract(const char *arg0, 186 const char *textbase, 187 const char *language, 188 char **configs, 189 int configs_size, 190 bool configs_global_only); 191 192 int init_tesseract_lm(const char *arg0, 193 const char *textbase, 194 const char *language); 195 196 // Initializes the tesseract classifier without loading language models. 197 int init_tesseract_classifier(const char *arg0, 198 const char *textbase, 199 const char *language, 200 char **configs, 201 int configs_size, 202 bool configs_global_only); 203 204 void recognize_page(STRING& image_name); 205 void end_tesseract(); 206 207 bool init_tesseract_lang_data(const char *arg0, 208 const char *textbase, 209 const char *language, 210 char **configs, 211 int configs_size, 212 bool configs_global_only); 213 214 //// pgedit.h ////////////////////////////////////////////////////////// 215 SVMenuNode *build_menu_new(); 216 void pgeditor_main(BLOCK_LIST *blocks); 217 void process_image_event( // action in image win 218 const SVEvent &event); 219 void pgeditor_read_file( // of serialised file 220 STRING &filename, 221 BLOCK_LIST *blocks // block list to add to 222 ); 223 void do_new_source( // serialise 224 ); 225 BOOL8 process_cmd_win_event( // UI command semantics 226 inT32 cmd_event, // which menu item? 227 char *new_value // any prompt data 228 ); 229 //// reject.h ////////////////////////////////////////////////////////// 230 const char *char_ambiguities(char c); 231 void make_reject_map( //make rej map for wd //detailed results 232 WERD_RES *word, 233 BLOB_CHOICE_LIST_CLIST *blob_choices, 234 ROW *row, 235 inT16 pass //1st or 2nd? 236 ); 237 BOOL8 one_ell_conflict(WERD_RES *word_res, BOOL8 update_map); 238 inT16 first_alphanum_index(const char *word, 239 const char *word_lengths); 240 inT16 first_alphanum_offset(const char *word, 241 const char *word_lengths); 242 inT16 alpha_count(const char *word, 243 const char *word_lengths); 244 BOOL8 word_contains_non_1_digit(const char *word, 245 const char *word_lengths); 246 void dont_allow_1Il(WERD_RES *word); 247 inT16 count_alphanums( //how many alphanums 248 WERD_RES *word); 249 BOOL8 repeated_ch_string(const char *rep_ch_str, 250 const char *lengths); 251 void flip_0O(WERD_RES *word); 252 BOOL8 non_0_digit(UNICHAR_ID unichar_id); 253 BOOL8 non_O_upper(UNICHAR_ID unichar_id); 254 BOOL8 repeated_nonalphanum_wd(WERD_RES *word, ROW *row); 255 void nn_match_word( //Match a word 256 WERD_RES *word, 257 ROW *row); 258 void nn_recover_rejects(WERD_RES *word, ROW *row); 259 BOOL8 test_ambig_word( //test for ambiguity 260 WERD_RES *word); 261 void set_done( //set done flag 262 WERD_RES *word, 263 inT16 pass); 264 inT16 safe_dict_word(const WERD_CHOICE &word); 265 void flip_hyphens(WERD_RES *word); 266 //// adaptions.h /////////////////////////////////////////////////////// 267 void adapt_to_good_ems(WERD_RES *word, 268 CHAR_SAMPLES_LIST *char_clusters, 269 CHAR_SAMPLE_LIST *chars_waiting); 270 void adapt_to_good_samples(WERD_RES *word, 271 CHAR_SAMPLES_LIST *char_clusters, 272 CHAR_SAMPLE_LIST *chars_waiting); 273 BOOL8 word_adaptable( //should we adapt? 274 WERD_RES *word, 275 uinT16 mode); 276 void reject_suspect_ems(WERD_RES *word); 277 void collect_ems_for_adaption(WERD_RES *word, 278 CHAR_SAMPLES_LIST *char_clusters, 279 CHAR_SAMPLE_LIST *chars_waiting); 280 void collect_characters_for_adaption(WERD_RES *word, 281 CHAR_SAMPLES_LIST *char_clusters, 282 CHAR_SAMPLE_LIST *chars_waiting); 283 void check_wait_list(CHAR_SAMPLE_LIST *chars_waiting, 284 CHAR_SAMPLE *sample, 285 CHAR_SAMPLES *best_cluster); 286 void cluster_sample(CHAR_SAMPLE *sample, 287 CHAR_SAMPLES_LIST *char_clusters, 288 CHAR_SAMPLE_LIST *chars_waiting); 289 void complete_clustering(CHAR_SAMPLES_LIST *char_clusters, 290 CHAR_SAMPLE_LIST *chars_waiting); 291 292 //// tfacepp.cpp /////////////////////////////////////////////////////// 293 WERD_CHOICE *recog_word_recursive( //recog one owrd 294 WERD *word, //word to do 295 DENORM *denorm, //de-normaliser 296 //matcher function 297 POLY_MATCHER matcher, 298 //tester function 299 POLY_TESTER tester, 300 //trainer function 301 POLY_TESTER trainer, 302 BOOL8 testing, //true if answer driven 303 //raw result 304 WERD_CHOICE *&raw_choice, 305 //list of blob lists 306 BLOB_CHOICE_LIST_CLIST *blob_choices, 307 WERD *&outword //bln word output 308 ); 309 WERD_CHOICE *recog_word( //recog one owrd 310 WERD *word, //word to do 311 DENORM *denorm, //de-normaliser 312 POLY_MATCHER matcher, //matcher function 313 POLY_TESTER tester, //tester function 314 POLY_TESTER trainer, //trainer function 315 BOOL8 testing, //true if answer driven 316 WERD_CHOICE *&raw_choice, //raw result 317 //list of blob lists 318 BLOB_CHOICE_LIST_CLIST *blob_choices, 319 WERD *&outword //bln word output 320 ); 321 WERD_CHOICE *split_and_recog_word( //recog one owrd 322 WERD *word, //word to do 323 DENORM *denorm, //de-normaliser 324 //matcher function 325 POLY_MATCHER matcher, 326 //tester function 327 POLY_TESTER tester, 328 //trainer function 329 POLY_TESTER trainer, 330 BOOL8 testing, //true if answer driven 331 //raw result 332 WERD_CHOICE *&raw_choice, 333 //list of blob lists 334 BLOB_CHOICE_LIST_CLIST *blob_choices, 335 WERD *&outword //bln word output 336 ); 337 //// fixspace.cpp /////////////////////////////////////////////////////// 338 BOOL8 digit_or_numeric_punct(WERD_RES *word, int char_position); 339 inT16 eval_word_spacing(WERD_RES_LIST &word_res_list); 340 void match_current_words(WERD_RES_LIST &words, ROW *row, BLOCK* block); 341 inT16 fp_eval_word_spacing(WERD_RES_LIST &word_res_list); 342 void fix_noisy_space_list(WERD_RES_LIST &best_perm, ROW *row, BLOCK* block); 343 void fix_fuzzy_space_list( //space explorer 344 WERD_RES_LIST &best_perm, 345 ROW *row, 346 BLOCK* block); 347 void fix_sp_fp_word(WERD_RES_IT &word_res_it, ROW *row, BLOCK* block); 348 void fix_fuzzy_spaces( //find fuzzy words 349 volatile ETEXT_DESC *monitor, //progress monitor 350 inT32 word_count, //count of words in doc 351 PAGE_RES *page_res); 352 //// docqual.cpp //////////////////////////////////////////////////////// 353 GARBAGE_LEVEL garbage_word(WERD_RES *word, BOOL8 ok_dict_word); 354 BOOL8 potential_word_crunch(WERD_RES *word, 355 GARBAGE_LEVEL garbage_level, 356 BOOL8 ok_dict_word); 357 void tilde_crunch(PAGE_RES_IT &page_res_it); 358 void unrej_good_quality_words( //unreject potential 359 PAGE_RES_IT &page_res_it); 360 void doc_and_block_rejection( //reject big chunks 361 PAGE_RES_IT &page_res_it, 362 BOOL8 good_quality_doc); 363 void quality_based_rejection(PAGE_RES_IT &page_res_it, 364 BOOL8 good_quality_doc); 365 void convert_bad_unlv_chs(WERD_RES *word_res); 366 void merge_tess_fails(WERD_RES *word_res); 367 void tilde_delete(PAGE_RES_IT &page_res_it); 368 void insert_rej_cblobs(WERD_RES *word); 369 //// pagewalk.cpp /////////////////////////////////////////////////////// 370 void 371 process_selected_words ( 372 BLOCK_LIST * block_list, //blocks to check 373 //function to call 374 TBOX & selection_box, 375 BOOL8 (tesseract::Tesseract::*word_processor) ( 376 BLOCK *, 377 ROW *, 378 WERD *)); 379 //// tessbox.cpp /////////////////////////////////////////////////////// 380 void tess_add_doc_word( //test acceptability 381 WERD_CHOICE *word_choice //after context 382 ); 383 void tess_adapter( //adapt to word 384 WERD *word, //bln word 385 DENORM *denorm, //de-normalise 386 const WERD_CHOICE& choice, //string for word 387 const WERD_CHOICE& raw_choice, //before context 388 const char *rejmap //reject map 389 ); 390 WERD_CHOICE *test_segment_pass2( //recog one word 391 WERD *word, //bln word to do 392 DENORM *denorm, //de-normaliser 393 POLY_MATCHER matcher, //matcher function 394 POLY_TESTER tester, //tester function 395 //raw result 396 WERD_CHOICE *&raw_choice, 397 //list of blob lists 398 BLOB_CHOICE_LIST_CLIST *blob_choices, 399 WERD *&outword //bln word output 400 ); 401 WERD_CHOICE *tess_segment_pass1( //recog one word 402 WERD *word, //bln word to do 403 DENORM *denorm, //de-normaliser 404 POLY_MATCHER matcher, //matcher function 405 //raw result 406 WERD_CHOICE *&raw_choice, 407 //list of blob lists 408 BLOB_CHOICE_LIST_CLIST *blob_choices, 409 WERD *&outword //bln word output 410 ); 411 WERD_CHOICE *tess_segment_pass2( //recog one word 412 WERD *word, //bln word to do 413 DENORM *denorm, //de-normaliser 414 POLY_MATCHER matcher, //matcher function 415 //raw result 416 WERD_CHOICE *&raw_choice, 417 //list of blob lists 418 BLOB_CHOICE_LIST_CLIST *blob_choices, 419 WERD *&outword //bln word output 420 ); 421 WERD_CHOICE *correct_segment_pass2( //recog one word 422 WERD *word, //bln word to do 423 DENORM *denorm, //de-normaliser 424 POLY_MATCHER matcher, //matcher function 425 POLY_TESTER tester, //tester function 426 //raw result 427 WERD_CHOICE *&raw_choice, 428 //list of blob lists 429 BLOB_CHOICE_LIST_CLIST *blob_choices, 430 WERD *&outword //bln word output 431 ); 432 void tess_default_matcher( //call tess 433 PBLOB *pblob, //previous blob 434 PBLOB *blob, //blob to match 435 PBLOB *nblob, //next blob 436 WERD *word, //word it came from 437 DENORM *denorm, //de-normaliser 438 BLOB_CHOICE_LIST *ratings, //list of results 439 const char* script 440 ); 441 void tess_bn_matcher( //call tess 442 PBLOB *pblob, //previous blob 443 PBLOB *blob, //blob to match 444 PBLOB *nblob, //next blob 445 WERD *word, //word it came from 446 DENORM *denorm, //de-normaliser 447 BLOB_CHOICE_LIST *ratings //list of results 448 ); 449 void tess_cn_matcher( //call tess 450 PBLOB *pblob, //previous blob 451 PBLOB *blob, //blob to match 452 PBLOB *nblob, //next blob 453 WERD *word, //word it came from 454 DENORM *denorm, //de-normaliser 455 BLOB_CHOICE_LIST *ratings, //list of results 456 // Sorted array of CP_RESULT_STRUCT from class pruner. 457 CLASS_PRUNER_RESULTS cpresults 458 ); 459 BOOL8 tess_adaptable_word( //test adaptability 460 WERD *word, //word to test 461 WERD_CHOICE *word_choice, //after context 462 WERD_CHOICE *raw_choice //before context 463 ); 464 BOOL8 tess_acceptable_word( //test acceptability 465 WERD_CHOICE *word_choice, //after context 466 WERD_CHOICE *raw_choice //before context 467 ); 468 //// applybox.cpp ////////////////////////////////////////////////////// 469 void apply_box_testing(BLOCK_LIST *block_list); 470 void apply_boxes(const STRING& fname, 471 BLOCK_LIST *block_list //real blocks 472 ); 473 // converts an array of boxes to a block list 474 int Boxes2BlockList(int box_cnt, TBOX *boxes, BLOCK_LIST *block_list, 475 bool right2left); 476 //// blobcmp.cpp /////////////////////////////////////////////////////// 477 float compare_tess_blobs(TBLOB *blob1, 478 TEXTROW *row1, 479 TBLOB *blob2, 480 TEXTROW *row2); 481 //// paircmp.cpp /////////////////////////////////////////////////////// 482 float compare_bln_blobs( //match 2 blobs 483 PBLOB *blob1, //first blob 484 DENORM *denorm1, 485 PBLOB *blob2, //other blob 486 DENORM *denorm2); 487 float compare_blobs( //match 2 blobs 488 PBLOB *blob1, //first blob 489 ROW *row1, //row it came from 490 PBLOB *blob2, //other blob 491 ROW *row2); 492 BOOL8 compare_blob_pairs( //blob processor 493 BLOCK *, 494 ROW *row, //row it came from 495 WERD *, 496 PBLOB *blob //blob to compare 497 ); 498 //// fixxht.cpp /////////////////////////////////////////////////////// 499 void check_block_occ(WERD_RES *word_res); 500 501 //// Data members /////////////////////////////////////////////////////// 502 BOOL_VAR_H(tessedit_resegment_from_boxes, false, 503 "Take segmentation and labeling from box file"); 504 BOOL_VAR_H(tessedit_train_from_boxes, false, 505 "Generate training data from boxed chars"); 506 BOOL_VAR_H(tessedit_dump_pageseg_images, false, 507 "Dump itermediate images made during page segmentation"); 508 INT_VAR_H(tessedit_pageseg_mode, 2, 509 "Page seg mode: 0=auto, 1=col, 2=block, 3=line, 4=word, 6=char" 510 " (Values from PageSegMode enum in baseapi.h)"); 511 INT_VAR_H(tessedit_accuracyvspeed, 0, 512 "Accuracy V Speed tradeoff: 0 fastest, 100 most accurate" 513 " (Values from AccuracyVSpeed enum in baseapi.h)"); 514 BOOL_VAR_H(tessedit_train_from_boxes_word_level, false, 515 "Generate training data from boxed chars at word level."); 516 STRING_VAR_H(tessedit_char_blacklist, "", 517 "Blacklist of chars not to recognize"); 518 STRING_VAR_H(tessedit_char_whitelist, "", 519 "Whitelist of chars to recognize"); 520 BOOL_VAR_H(global_tessedit_ambigs_training, false, 521 "Perform training for ambiguities"); 522 //// ambigsrecog.cpp ///////////////////////////////////////////////////////// 523 FILE *init_ambigs_training(const STRING &fname); 524 void ambigs_training_segmented(const STRING &fname, 525 PAGE_RES *page_res, 526 volatile ETEXT_DESC *monitor, 527 FILE *output_file); 528 void ambigs_classify_and_output(PAGE_RES_IT *page_res_it, 529 const char *label, 530 FILE *output_file); 531 private: 532 Pix* pix_binary_; 533 FCOORD deskew_; 534 FCOORD reskew_; 535 bool hindi_image_; 536 }; 537 538 } // namespace tesseract 539 540 541 #endif // TESSERACT_CCMAIN_TESSERACTCLASS_H__ 542