• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**********************************************************************
2  * File:        tessbox.cpp  (Formerly tessbox.c)
3  * Description: Black boxed Tess for developing a resaljet.
4  * Author:					Ray Smith
5  * Created:					Thu Apr 23 11:03:36 BST 1992
6  *
7  * (C) Copyright 1992, Hewlett-Packard Ltd.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #include "mfcpch.h"
21 #include          "tfacep.h"
22 #include          "tfacepp.h"
23 #include          "tessbox.h"
24 #include "mfoutline.h"
25 #include "tesseractclass.h"
26 
27 #define EXTERN
28 
29 /**********************************************************************
30  * tess_segment_pass1
31  *
32  * Segment a word using the pass1 conditions of the tess segmenter.
33  **********************************************************************/
34 
35 namespace tesseract {
tess_segment_pass1(WERD * word,DENORM * denorm,POLY_MATCHER matcher,WERD_CHOICE * & raw_choice,BLOB_CHOICE_LIST_CLIST * blob_choices,WERD * & outword)36 WERD_CHOICE *Tesseract::tess_segment_pass1(                 //recog one word
37                                            WERD *word,      //bln word to do
38                                            DENORM *denorm,  //de-normaliser
39                                                             //matcher function
40                                            POLY_MATCHER matcher,
41                                                             //raw result
42                                            WERD_CHOICE *&raw_choice,
43                                                             //list of blob lists
44                                            BLOB_CHOICE_LIST_CLIST *blob_choices,
45                                            WERD *&outword   //bln word output
46                                           ) {
47   WERD_CHOICE *result;           //return value
48   int saved_enable_assoc = 0;
49   int saved_chop_enable = 0;
50 
51   if (word->flag (W_DONT_CHOP)) {
52     saved_enable_assoc = wordrec_enable_assoc;
53     saved_chop_enable = chop_enable;
54     wordrec_enable_assoc.set_value(0);
55     chop_enable.set_value(0);
56     if (word->flag (W_REP_CHAR))
57       permute_only_top = 1;
58   }
59   set_pass1();
60   //      tprintf("pass1 chop on=%d, seg=%d, onlytop=%d",chop_enable,enable_assoc,permute_only_top);
61   result = recog_word (word, denorm, matcher, NULL, NULL, FALSE,
62     raw_choice, blob_choices, outword);
63   if (word->flag (W_DONT_CHOP)) {
64     wordrec_enable_assoc.set_value(saved_enable_assoc);
65     chop_enable.set_value(saved_chop_enable);
66     permute_only_top = 0;
67   }
68   return result;
69 }
70 
71 
72 /**********************************************************************
73  * tess_segment_pass2
74  *
75  * Segment a word using the pass2 conditions of the tess segmenter.
76  **********************************************************************/
77 
tess_segment_pass2(WERD * word,DENORM * denorm,POLY_MATCHER matcher,WERD_CHOICE * & raw_choice,BLOB_CHOICE_LIST_CLIST * blob_choices,WERD * & outword)78 WERD_CHOICE *Tesseract::tess_segment_pass2(                 //recog one word
79                                            WERD *word,      //bln word to do
80                                            DENORM *denorm,  //de-normaliser
81                                                             //matcher function
82                                            POLY_MATCHER matcher,
83                                                             //raw result
84                                            WERD_CHOICE *&raw_choice,
85                                                             //list of blob lists
86                                            BLOB_CHOICE_LIST_CLIST *blob_choices,
87                                            WERD *&outword   //bln word output
88                                           ) {
89   WERD_CHOICE *result;           //return value
90   int saved_enable_assoc = 0;
91   int saved_chop_enable = 0;
92 
93   if (word->flag (W_DONT_CHOP)) {
94     saved_enable_assoc = wordrec_enable_assoc;
95     saved_chop_enable = chop_enable;
96     wordrec_enable_assoc.set_value(0);
97     chop_enable.set_value(0);
98     if (word->flag (W_REP_CHAR))
99       permute_only_top = 1;
100   }
101   set_pass2();
102   result = recog_word (word, denorm, matcher, NULL, NULL, FALSE,
103     raw_choice, blob_choices, outword);
104   if (word->flag (W_DONT_CHOP)) {
105     wordrec_enable_assoc.set_value(saved_enable_assoc);
106     chop_enable.set_value(saved_chop_enable);
107     permute_only_top = 0;
108   }
109   return result;
110 }
111 
112 
113 /**********************************************************************
114  * correct_segment_pass2
115  *
116  * Segment a word correctly using the pass2 conditions of the tess segmenter.
117  * Then call the tester with all the correctly segmented blobs.
118  * If the correct segmentation cannot be found, the tester is called
119  * with the segmentation found by tess and all the correct flags set to
120  * false and all strings are NULL.
121  **********************************************************************/
122 
correct_segment_pass2(WERD * word,DENORM * denorm,POLY_MATCHER matcher,POLY_TESTER tester,WERD_CHOICE * & raw_choice,BLOB_CHOICE_LIST_CLIST * blob_choices,WERD * & outword)123 WERD_CHOICE *Tesseract::correct_segment_pass2(              //recog one word
124                                               WERD *word,   //bln word to do
125                                                             //de-normaliser
126                                               DENORM *denorm,
127                                                             //matcher function
128                                               POLY_MATCHER matcher,
129                                                             //tester function
130                                               POLY_TESTER tester,
131                                                             //raw result
132                                               WERD_CHOICE *&raw_choice,
133                                                             //list of blob lists
134                                               BLOB_CHOICE_LIST_CLIST *blob_choices,
135                                                             //bln word output
136                                               WERD *&outword
137                                              ) {
138   set_pass2();
139   return recog_word (word, denorm, matcher, NULL, tester, TRUE,
140     raw_choice, blob_choices, outword);
141 }
142 
143 
144 /**********************************************************************
145  * test_segment_pass2
146  *
147  * Segment a word correctly using the pass2 conditions of the tess segmenter.
148  * Then call the tester on all words used by tess in its search.
149  * Do this only on words where the correct segmentation could be found.
150  **********************************************************************/
test_segment_pass2(WERD * word,DENORM * denorm,POLY_MATCHER matcher,POLY_TESTER tester,WERD_CHOICE * & raw_choice,BLOB_CHOICE_LIST_CLIST * blob_choices,WERD * & outword)151 WERD_CHOICE *Tesseract::test_segment_pass2(               //recog one word
152                                            WERD *word,    //bln word to do
153                                                           //de-normaliser
154                                            DENORM *denorm,
155                                                           //matcher function
156                                            POLY_MATCHER matcher,
157                                                           //tester function
158                                            POLY_TESTER tester,
159                                                           //raw result
160                                            WERD_CHOICE *&raw_choice,
161                                                           //list of blob lists
162                                            BLOB_CHOICE_LIST_CLIST *blob_choices,
163                                                           //bln word output
164                                            WERD *&outword
165                                           ) {
166   set_pass2();
167   return recog_word (word, denorm, matcher, tester, NULL, TRUE,
168     raw_choice, blob_choices, outword);
169 }
170 
171 
172 /**********************************************************************
173  * tess_acceptable_word
174  *
175  * Return true if the word is regarded as "good enough".
176  **********************************************************************/
tess_acceptable_word(WERD_CHOICE * word_choice,WERD_CHOICE * raw_choice)177 BOOL8 Tesseract::tess_acceptable_word(
178     WERD_CHOICE *word_choice,  // after context
179     WERD_CHOICE *raw_choice) {  // before context
180   return getDict().AcceptableResult(*word_choice, *raw_choice);
181 }
182 
183 
184 /**********************************************************************
185  * tess_adaptable_word
186  *
187  * Return true if the word is regarded as "good enough".
188  **********************************************************************/
tess_adaptable_word(WERD * word,WERD_CHOICE * best_choice,WERD_CHOICE * raw_choice)189 BOOL8 Tesseract::tess_adaptable_word(  // test adaptability
190     WERD *word,                        // word to test
191     WERD_CHOICE *best_choice,          // after context
192     WERD_CHOICE *raw_choice            // before context
193                                      ) {
194   TWERD *tessword = make_tess_word(word, NULL);
195   int result = (tessword && best_choice && raw_choice &&
196                 AdaptableWord(tessword, *best_choice, *raw_choice));
197   delete_word(tessword);
198   return result != 0;
199 }
200 
201 
202 /**********************************************************************
203  * tess_cn_matcher
204  *
205  * Match a blob using the Tess Char Normalized (non-adaptive) matcher
206  * only.
207  **********************************************************************/
208 
tess_cn_matcher(PBLOB * pblob,PBLOB * blob,PBLOB * nblob,WERD * word,DENORM * denorm,BLOB_CHOICE_LIST * ratings,CLASS_PRUNER_RESULTS cpresults)209 void Tesseract::tess_cn_matcher(                           //call tess
210                                 PBLOB *pblob,              //previous blob
211                                 PBLOB *blob,               //blob to match
212                                 PBLOB *nblob,              //next blob
213                                 WERD *word,                //word it came from
214                                 DENORM *denorm,            //de-normaliser
215                                 BLOB_CHOICE_LIST *ratings,  //list of results
216                                 CLASS_PRUNER_RESULTS cpresults  // may be null.
217                                ) {
218   TBLOB *tessblob;               //converted blob
219   TEXTROW tessrow;               //dummy row
220 
221   tess_cn_matching.set_value(true);       //turn it on
222   tess_bn_matching.set_value(false);
223                                  //convert blob
224   tessblob = make_rotated_tess_blob(denorm, blob, true);
225                                  //make dummy row
226   make_tess_row(denorm, &tessrow);
227                                  //classify
228   AdaptiveClassifier(tessblob, NULL, &tessrow, ratings, cpresults);
229   free_blob(tessblob);
230 }
231 
232 
233 /**********************************************************************
234  * tess_bn_matcher
235  *
236  * Match a blob using the Tess Baseline Normalized (adaptive) matcher
237  * only.
238  **********************************************************************/
239 
tess_bn_matcher(PBLOB * pblob,PBLOB * blob,PBLOB * nblob,WERD * word,DENORM * denorm,BLOB_CHOICE_LIST * ratings)240 void Tesseract::tess_bn_matcher(                           //call tess
241                                 PBLOB *pblob,              //previous blob
242                                 PBLOB *blob,               //blob to match
243                                 PBLOB *nblob,              //next blob
244                                 WERD *word,                //word it came from
245                                 DENORM *denorm,            //de-normaliser
246                                 BLOB_CHOICE_LIST *ratings  //list of results
247                                ) {
248   TBLOB *tessblob;               //converted blob
249   TEXTROW tessrow;               //dummy row
250 
251   tess_bn_matching.set_value(true);       //turn it on
252   tess_cn_matching.set_value(false);
253                                  //convert blob
254   tessblob = make_rotated_tess_blob(denorm, blob, true);
255                                  //make dummy row
256   make_tess_row(denorm, &tessrow);
257                                  //classify
258   AdaptiveClassifier(tessblob, NULL, &tessrow, ratings, NULL);
259   free_blob(tessblob);
260 }
261 
262 
263 /**********************************************************************
264  * tess_default_matcher
265  *
266  * Match a blob using the default functionality of the Tess matcher.
267  **********************************************************************/
268 
tess_default_matcher(PBLOB * pblob,PBLOB * blob,PBLOB * nblob,WERD * word,DENORM * denorm,BLOB_CHOICE_LIST * ratings,const char * script)269 void Tesseract::tess_default_matcher(                       //call tess
270                                      PBLOB *pblob,          //previous blob
271                                      PBLOB *blob,           //blob to match
272                                      PBLOB *nblob,          //next blob
273                                      WERD *word,            //word it came from
274                                      DENORM *denorm,        //de-normaliser
275                                                             //list of results
276                                      BLOB_CHOICE_LIST *ratings,
277                                      const char* script
278                                     ) {
279   assert(ratings != NULL);
280   TBLOB *tessblob;               //converted blob
281   TEXTROW tessrow;               //dummy row
282 
283   tess_bn_matching.set_value(false);      //turn it off
284   tess_cn_matching.set_value(false);
285                                  //convert blob
286   tessblob = make_rotated_tess_blob(denorm, blob, true);
287                                  //make dummy row
288   make_tess_row(denorm, &tessrow);
289                                  //classify
290   AdaptiveClassifier (tessblob, NULL, &tessrow, ratings, NULL);
291   free_blob(tessblob);
292 }
293 }  // namespace tesseract
294 
295 
296 /**********************************************************************
297  * tess_training_tester
298  *
299  * Matcher tester function which actually trains tess.
300  **********************************************************************/
301 
tess_training_tester(const STRING & filename,PBLOB * blob,DENORM * denorm,BOOL8 correct,char * text,inT32 count,BLOB_CHOICE_LIST * ratings)302 void tess_training_tester(                           //call tess
303                           const STRING& filename,    //filename to output
304                           PBLOB *blob,               //blob to match
305                           DENORM *denorm,            //de-normaliser
306                           BOOL8 correct,             //ly segmented
307                           char *text,                //correct text
308                           inT32 count,               //chars in text
309                           BLOB_CHOICE_LIST *ratings  //list of results
310                          ) {
311   TBLOB *tessblob;               //converted blob
312   TEXTROW tessrow;               //dummy row
313 
314   if (correct) {
315     classify_norm_method.set_value(character); // force char norm spc 30/11/93
316     tess_bn_matching.set_value(false);    //turn it off
317     tess_cn_matching.set_value(false);
318                                  //convert blob
319     tessblob = make_tess_blob (blob, TRUE);
320                                  //make dummy row
321     make_tess_row(denorm, &tessrow);
322                                  //learn it
323     LearnBlob(filename, tessblob, &tessrow, text);
324     free_blob(tessblob);
325   }
326 }
327 
328 
329 /**********************************************************************
330  * tess_adapter
331  *
332  * Adapt to the word using the Tesseract mechanism.
333  **********************************************************************/
334 
335 namespace tesseract {
tess_adapter(WERD * word,DENORM * denorm,const WERD_CHOICE & choice,const WERD_CHOICE & raw_choice,const char * rejmap)336 void Tesseract::tess_adapter(                         //adapt to word
337                              WERD *word,              //bln word
338                              DENORM *denorm,          //de-normalise
339                              const WERD_CHOICE& choice,      //string for word
340                              const WERD_CHOICE& raw_choice,  //before context
341                              const char *rejmap       //reject map
342                             ) {
343   TWERD *tessword;               //converted word
344   static TEXTROW tessrow;        //dummy row
345 
346                                  //make dummy row
347   make_tess_row(denorm, &tessrow);
348                                  //make a word
349   tessword = make_tess_word (word, &tessrow);
350   AdaptToWord(tessword, &tessrow, choice, raw_choice, rejmap);
351   //adapt to it
352   delete_word(tessword);  //free it
353 }
354 
355 
356 /**********************************************************************
357  * tess_add_doc_word
358  *
359  * Add the given word to the document dictionary
360  **********************************************************************/
tess_add_doc_word(WERD_CHOICE * word_choice)361 void Tesseract::tess_add_doc_word(WERD_CHOICE *word_choice) {
362   getDict().add_document_word(*word_choice);
363 }
364 }  // namespace tesseract
365