1 /////////////////////////////////////////////////////////////////////// 2 // File: wordrec.h 3 // Description: wordrec class. 4 // Author: Samuel Charron 5 // 6 // (C) Copyright 2006, Google Inc. 7 // Licensed under the Apache License, Version 2.0 (the "License"); 8 // you may not use this file except in compliance with the License. 9 // You may obtain a copy of the License at 10 // http://www.apache.org/licenses/LICENSE-2.0 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 // 17 /////////////////////////////////////////////////////////////////////// 18 19 #ifndef TESSERACT_WORDREC_WORDREC_H__ 20 #define TESSERACT_WORDREC_WORDREC_H__ 21 22 #include "classify.h" 23 #include "ratngs.h" 24 #include "matrix.h" 25 #include "seam.h" 26 #include "callback.h" 27 #include "associate.h" 28 #include "badwords.h" 29 30 struct CHUNKS_RECORD; 31 struct SEARCH_RECORD; 32 33 namespace tesseract { 34 class Wordrec : public Classify { 35 public: 36 Wordrec(); 37 ~Wordrec(); 38 void save_summary(inT32 elapsed_time); 39 /* tface.cpp ***************************************************************/ 40 void program_editup(const char *textbase, bool init_permute); 41 BLOB_CHOICE_LIST_VECTOR *cc_recog(TWERD *tessword, 42 WERD_CHOICE *best_choice, 43 WERD_CHOICE *best_raw_choice, 44 BOOL8 tester, 45 BOOL8 trainer, 46 bool last_word_on_line); 47 void program_editdown(inT32 elasped_time); 48 void set_pass1(); 49 void set_pass2(); 50 int end_recog(); 51 int start_recog(const char *textbase); 52 BLOB_CHOICE_LIST *call_matcher( //call a matcher 53 TBLOB *ptblob, //previous 54 TBLOB *tessblob, //blob to match 55 TBLOB *ntblob, //next 56 void *, //unused parameter 57 TEXTROW * //always null anyway 58 ); 59 /* tessinit.cpp ************************************************************/ 60 void program_init(); 61 /* wordclass.cpp ***********************************************************/ 62 BLOB_CHOICE_LIST *classify_blob(TBLOB *pblob, 63 TBLOB *blob, 64 TBLOB *nblob, 65 TEXTROW *row, 66 const char *string, 67 C_COL color); 68 void update_blob_classifications(TWERD *word, 69 const BLOB_CHOICE_LIST_VECTOR &choices); 70 /* bestfirst.cpp ***********************************************************/ 71 BLOB_CHOICE_LIST_VECTOR *evaluate_chunks(CHUNKS_RECORD *chunks_record, 72 SEARCH_STATE search_state); 73 void update_ratings(const BLOB_CHOICE_LIST_VECTOR &new_choices, 74 const CHUNKS_RECORD *chunks_record, 75 const SEARCH_STATE search_state); 76 inT16 evaluate_state(CHUNKS_RECORD *chunks_record, 77 SEARCH_RECORD *the_search, 78 DANGERR *fixpt); 79 void best_first_search(CHUNKS_RECORD *chunks_record, 80 WERD_CHOICE *best_choice, 81 WERD_CHOICE *raw_choice, 82 STATE *state, 83 DANGERR *fixpt, 84 STATE *best_state); 85 void expand_node(FLOAT32 worst_priority, 86 CHUNKS_RECORD *chunks_record, 87 SEARCH_RECORD *the_search); 88 BLOB_CHOICE_LIST_VECTOR *rebuild_current_state( 89 TBLOB *blobs, 90 SEAMS seam_list, 91 STATE *state, 92 BLOB_CHOICE_LIST_VECTOR *char_choices, 93 int fx, 94 bool force_rebuild, 95 const WERD_CHOICE &best_choice, 96 const MATRIX *ratings); 97 BLOB_CHOICE_LIST *join_blobs_and_classify( 98 TBLOB *blobs, SEAMS seam_list, 99 int x, int y, int fx, const MATRIX *ratings, 100 BLOB_CHOICE_LIST_VECTOR *old_choices); 101 102 /* chopper.cpp *************************************************************/ 103 bool improve_one_blob(TWERD *word, 104 BLOB_CHOICE_LIST_VECTOR *char_choices, 105 int fx, 106 inT32 *blob_number, 107 SEAMS *seam_list, 108 DANGERR *fixpt, 109 bool split_next_to_fragment); 110 void modify_blob_choice(BLOB_CHOICE_LIST *answer, 111 int chop_index); 112 bool chop_one_blob(TWERD *word, 113 BLOB_CHOICE_LIST_VECTOR *char_choices, 114 inT32 *blob_number, 115 SEAMS *seam_list, 116 int *right_chop_index); 117 BLOB_CHOICE_LIST_VECTOR *chop_word_main(register TWERD *word, 118 int fx, 119 WERD_CHOICE *best_choice, 120 WERD_CHOICE *raw_choice, 121 BOOL8 tester, 122 BOOL8 trainer); 123 void improve_by_chopping(register TWERD *word, 124 BLOB_CHOICE_LIST_VECTOR *char_choices, 125 int fx, 126 STATE *best_state, 127 WERD_CHOICE *best_choice, 128 WERD_CHOICE *raw_choice, 129 SEAMS *seam_list, 130 DANGERR *fixpt, 131 STATE *chop_states, 132 inT32 *state_count); 133 MATRIX *word_associator(TBLOB *blobs, 134 SEAMS seams, 135 STATE *state, 136 int fxid, 137 WERD_CHOICE *best_choice, 138 WERD_CHOICE *raw_choice, 139 char *correct, 140 DANGERR *fixpt, 141 STATE *best_state); 142 inT16 select_blob_to_split(const BLOB_CHOICE_LIST_VECTOR &char_choices, 143 float rating_ceiling, 144 bool split_next_to_fragment); 145 /* mfvars.cpp **************************************************************/ 146 void mfeature_init(); 147 /* pieces.cpp **************************************************************/ 148 BLOB_CHOICE_LIST *classify_piece(TBLOB *pieces, 149 SEAMS seams, 150 inT16 start, 151 inT16 end); 152 BLOB_CHOICE_LIST *get_piece_rating(MATRIX *ratings, 153 TBLOB *blobs, 154 SEAMS seams, 155 inT16 start, 156 inT16 end); 157 /* djmenus.cpp **************************************************************/ 158 // Prints out statistics gathered. dj_statistics(FILE * File)159 void dj_statistics(FILE *File) { 160 PrintAdaptiveStatistics(File); 161 PrintBadWords(File); 162 } 163 // Does clean up (should be called at the end of the program). dj_cleanup()164 void dj_cleanup() { EndAdaptiveClassifier(); } 165 166 167 /* heuristic.cpp ************************************************************/ 168 FLOAT32 prioritize_state(CHUNKS_RECORD *chunks_record, 169 SEARCH_RECORD *the_search); 170 FLOAT32 width_priority(CHUNKS_RECORD *chunks_record, 171 STATE *state, 172 int num_joints); 173 FLOAT32 seamcut_priority(SEAMS seams, 174 STATE *state, 175 int num_joints); 176 FLOAT32 rating_priority(CHUNKS_RECORD *chunks_record, 177 STATE *state, 178 int num_joints); 179 180 /* member variables *********************************************************/ 181 /* tface.cpp ****************************************************************/ 182 POLY_MATCHER tess_matcher;//current matcher 183 POLY_TESTER tess_tester; //current tester 184 POLY_TESTER tess_trainer; //current trainer 185 DENORM *tess_denorm; //current denorm 186 WERD *tess_word; //current word 187 int dict_word(const WERD_CHOICE &word); 188 }; 189 190 191 192 /* ccmain/tstruct.cpp *********************************************************/ 193 class FRAGMENT:public ELIST_LINK 194 { 195 public: FRAGMENT()196 FRAGMENT() { //constructor 197 } 198 FRAGMENT(EDGEPT *head_pt, //start 199 EDGEPT *tail_pt); //end 200 201 ICOORD head; //coords of start 202 ICOORD tail; //coords of end 203 EDGEPT *headpt; //start point 204 EDGEPT *tailpt; //end point 205 206 NEWDELETE2 (FRAGMENT) 207 }; 208 209 ELISTIZEH (FRAGMENT) 210 PBLOB *make_ed_blob( //construct blob 211 TBLOB *tessblob //blob to convert 212 ); 213 OUTLINE *make_ed_outline( //constructoutline 214 FRAGMENT_LIST *list //list of fragments 215 ); 216 void register_outline( //add fragments 217 TESSLINE *outline, //tess format 218 FRAGMENT_LIST *list //list to add to 219 ); 220 221 } // namespace tesseract 222 223 #endif // TESSERACT_WORDREC_WORDREC_H__ 224