• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 ///////////////////////////////////////////////////////////////////////
2 // File:        wordrec.h
3 // Description: wordrec class.
4 // Author:      Samuel Charron
5 //
6 // (C) Copyright 2006, Google Inc.
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
17 ///////////////////////////////////////////////////////////////////////
18 
19 #ifndef TESSERACT_WORDREC_WORDREC_H__
20 #define TESSERACT_WORDREC_WORDREC_H__
21 
22 #include "classify.h"
23 #include "ratngs.h"
24 #include "matrix.h"
25 #include "seam.h"
26 #include "callback.h"
27 #include "associate.h"
28 #include "badwords.h"
29 
30 struct CHUNKS_RECORD;
31 struct SEARCH_RECORD;
32 
33 namespace tesseract {
34 class Wordrec : public Classify {
35  public:
36   Wordrec();
37   ~Wordrec();
38   void save_summary(inT32 elapsed_time);
39   /* tface.cpp ***************************************************************/
40   void program_editup(const char *textbase, bool init_permute);
41   BLOB_CHOICE_LIST_VECTOR *cc_recog(TWERD *tessword,
42                                     WERD_CHOICE *best_choice,
43                                     WERD_CHOICE *best_raw_choice,
44                                     BOOL8 tester,
45                                     BOOL8 trainer,
46                                     bool last_word_on_line);
47   void program_editdown(inT32 elasped_time);
48   void set_pass1();
49   void set_pass2();
50   int end_recog();
51   int start_recog(const char *textbase);
52   BLOB_CHOICE_LIST *call_matcher(                  //call a matcher
53                     TBLOB *ptblob,    //previous
54                     TBLOB *tessblob,  //blob to match
55                     TBLOB *ntblob,    //next
56                     void *,           //unused parameter
57                     TEXTROW *         //always null anyway
58                    );
59   /* tessinit.cpp ************************************************************/
60   void program_init();
61   /* wordclass.cpp ***********************************************************/
62   BLOB_CHOICE_LIST *classify_blob(TBLOB *pblob,
63                                   TBLOB *blob,
64                                   TBLOB *nblob,
65                                   TEXTROW *row,
66                                   const char *string,
67                                   C_COL color);
68   void update_blob_classifications(TWERD *word,
69                                    const BLOB_CHOICE_LIST_VECTOR &choices);
70   /* bestfirst.cpp ***********************************************************/
71   BLOB_CHOICE_LIST_VECTOR *evaluate_chunks(CHUNKS_RECORD *chunks_record,
72                                            SEARCH_STATE search_state);
73   void update_ratings(const BLOB_CHOICE_LIST_VECTOR &new_choices,
74                       const CHUNKS_RECORD *chunks_record,
75                       const SEARCH_STATE search_state);
76   inT16 evaluate_state(CHUNKS_RECORD *chunks_record,
77                        SEARCH_RECORD *the_search,
78                        DANGERR *fixpt);
79   void best_first_search(CHUNKS_RECORD *chunks_record,
80                          WERD_CHOICE *best_choice,
81                          WERD_CHOICE *raw_choice,
82                          STATE *state,
83                          DANGERR *fixpt,
84                          STATE *best_state);
85   void expand_node(FLOAT32 worst_priority,
86                    CHUNKS_RECORD *chunks_record,
87                    SEARCH_RECORD *the_search);
88   BLOB_CHOICE_LIST_VECTOR *rebuild_current_state(
89       TBLOB *blobs,
90       SEAMS seam_list,
91       STATE *state,
92       BLOB_CHOICE_LIST_VECTOR *char_choices,
93       int fx,
94       bool force_rebuild,
95       const WERD_CHOICE &best_choice,
96       const MATRIX *ratings);
97   BLOB_CHOICE_LIST *join_blobs_and_classify(
98       TBLOB *blobs, SEAMS seam_list,
99       int x, int y, int fx, const MATRIX *ratings,
100       BLOB_CHOICE_LIST_VECTOR *old_choices);
101 
102   /* chopper.cpp *************************************************************/
103   bool improve_one_blob(TWERD *word,
104                         BLOB_CHOICE_LIST_VECTOR *char_choices,
105                         int fx,
106                         inT32 *blob_number,
107                         SEAMS *seam_list,
108                         DANGERR *fixpt,
109                         bool split_next_to_fragment);
110   void modify_blob_choice(BLOB_CHOICE_LIST *answer,
111                           int chop_index);
112   bool chop_one_blob(TWERD *word,
113                      BLOB_CHOICE_LIST_VECTOR *char_choices,
114                      inT32 *blob_number,
115                      SEAMS *seam_list,
116                      int *right_chop_index);
117   BLOB_CHOICE_LIST_VECTOR *chop_word_main(register TWERD *word,
118                                           int fx,
119                                           WERD_CHOICE *best_choice,
120                                           WERD_CHOICE *raw_choice,
121                                           BOOL8 tester,
122                                           BOOL8 trainer);
123   void improve_by_chopping(register TWERD *word,
124                            BLOB_CHOICE_LIST_VECTOR *char_choices,
125                            int fx,
126                            STATE *best_state,
127                            WERD_CHOICE *best_choice,
128                            WERD_CHOICE *raw_choice,
129                            SEAMS *seam_list,
130                            DANGERR *fixpt,
131                            STATE *chop_states,
132                            inT32 *state_count);
133   MATRIX *word_associator(TBLOB *blobs,
134                           SEAMS seams,
135                           STATE *state,
136                           int fxid,
137                           WERD_CHOICE *best_choice,
138                           WERD_CHOICE *raw_choice,
139                           char *correct,
140                           DANGERR *fixpt,
141                           STATE *best_state);
142   inT16 select_blob_to_split(const BLOB_CHOICE_LIST_VECTOR &char_choices,
143                              float rating_ceiling,
144                              bool split_next_to_fragment);
145   /* mfvars.cpp **************************************************************/
146   void mfeature_init();
147   /* pieces.cpp **************************************************************/
148   BLOB_CHOICE_LIST *classify_piece(TBLOB *pieces,
149                                    SEAMS seams,
150                                    inT16 start,
151                                    inT16 end);
152   BLOB_CHOICE_LIST *get_piece_rating(MATRIX *ratings,
153                                      TBLOB *blobs,
154                                      SEAMS seams,
155                                      inT16 start,
156                                      inT16 end);
157   /* djmenus.cpp **************************************************************/
158   // Prints out statistics gathered.
dj_statistics(FILE * File)159   void dj_statistics(FILE *File) {
160     PrintAdaptiveStatistics(File);
161     PrintBadWords(File);
162   }
163   // Does clean up (should be called at the end of the program).
dj_cleanup()164   void dj_cleanup() { EndAdaptiveClassifier(); }
165 
166 
167   /* heuristic.cpp ************************************************************/
168   FLOAT32 prioritize_state(CHUNKS_RECORD *chunks_record,
169                            SEARCH_RECORD *the_search);
170   FLOAT32 width_priority(CHUNKS_RECORD *chunks_record,
171                          STATE *state,
172                          int num_joints);
173   FLOAT32 seamcut_priority(SEAMS seams,
174                            STATE *state,
175                            int num_joints);
176   FLOAT32 rating_priority(CHUNKS_RECORD *chunks_record,
177                           STATE *state,
178                           int num_joints);
179 
180   /* member variables *********************************************************/
181   /* tface.cpp ****************************************************************/
182   POLY_MATCHER tess_matcher;//current matcher
183   POLY_TESTER tess_tester;  //current tester
184   POLY_TESTER tess_trainer; //current trainer
185   DENORM *tess_denorm;      //current denorm
186   WERD *tess_word;          //current word
187   int dict_word(const WERD_CHOICE &word);
188 };
189 
190 
191 
192 /* ccmain/tstruct.cpp *********************************************************/
193 class FRAGMENT:public ELIST_LINK
194 {
195   public:
FRAGMENT()196     FRAGMENT() {  //constructor
197     }
198     FRAGMENT(EDGEPT *head_pt,   //start
199              EDGEPT *tail_pt);  //end
200 
201     ICOORD head;                 //coords of start
202     ICOORD tail;                 //coords of end
203     EDGEPT *headpt;              //start point
204     EDGEPT *tailpt;              //end point
205 
206     NEWDELETE2 (FRAGMENT)
207 };
208 
209 ELISTIZEH (FRAGMENT)
210 PBLOB *make_ed_blob(                 //construct blob
211                     TBLOB *tessblob  //blob to convert
212                    );
213 OUTLINE *make_ed_outline(                     //constructoutline
214                          FRAGMENT_LIST *list  //list of fragments
215                         );
216 void register_outline(                     //add fragments
217                       TESSLINE *outline,   //tess format
218                       FRAGMENT_LIST *list  //list to add to
219                      );
220 
221 }  // namespace tesseract
222 
223 #endif  // TESSERACT_WORDREC_WORDREC_H__
224