1 /**********************************************************************
2 * File: tessbox.cpp (Formerly tessbox.c)
3 * Description: Black boxed Tess for developing a resaljet.
4 * Author: Ray Smith
5 * Created: Thu Apr 23 11:03:36 BST 1992
6 *
7 * (C) Copyright 1992, Hewlett-Packard Ltd.
8 ** Licensed under the Apache License, Version 2.0 (the "License");
9 ** you may not use this file except in compliance with the License.
10 ** You may obtain a copy of the License at
11 ** http://www.apache.org/licenses/LICENSE-2.0
12 ** Unless required by applicable law or agreed to in writing, software
13 ** distributed under the License is distributed on an "AS IS" BASIS,
14 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 ** See the License for the specific language governing permissions and
16 ** limitations under the License.
17 *
18 **********************************************************************/
19
20 #include "mfcpch.h"
21 #include "tfacep.h"
22 #include "tfacepp.h"
23 #include "tessbox.h"
24 #include "mfoutline.h"
25 #include "tesseractclass.h"
26
27 #define EXTERN
28
29 /**********************************************************************
30 * tess_segment_pass1
31 *
32 * Segment a word using the pass1 conditions of the tess segmenter.
33 **********************************************************************/
34
35 namespace tesseract {
tess_segment_pass1(WERD * word,DENORM * denorm,POLY_MATCHER matcher,WERD_CHOICE * & raw_choice,BLOB_CHOICE_LIST_CLIST * blob_choices,WERD * & outword)36 WERD_CHOICE *Tesseract::tess_segment_pass1( //recog one word
37 WERD *word, //bln word to do
38 DENORM *denorm, //de-normaliser
39 //matcher function
40 POLY_MATCHER matcher,
41 //raw result
42 WERD_CHOICE *&raw_choice,
43 //list of blob lists
44 BLOB_CHOICE_LIST_CLIST *blob_choices,
45 WERD *&outword //bln word output
46 ) {
47 WERD_CHOICE *result; //return value
48 int saved_enable_assoc = 0;
49 int saved_chop_enable = 0;
50
51 if (word->flag (W_DONT_CHOP)) {
52 saved_enable_assoc = wordrec_enable_assoc;
53 saved_chop_enable = chop_enable;
54 wordrec_enable_assoc.set_value(0);
55 chop_enable.set_value(0);
56 if (word->flag (W_REP_CHAR))
57 permute_only_top = 1;
58 }
59 set_pass1();
60 // tprintf("pass1 chop on=%d, seg=%d, onlytop=%d",chop_enable,enable_assoc,permute_only_top);
61 result = recog_word (word, denorm, matcher, NULL, NULL, FALSE,
62 raw_choice, blob_choices, outword);
63 if (word->flag (W_DONT_CHOP)) {
64 wordrec_enable_assoc.set_value(saved_enable_assoc);
65 chop_enable.set_value(saved_chop_enable);
66 permute_only_top = 0;
67 }
68 return result;
69 }
70
71
72 /**********************************************************************
73 * tess_segment_pass2
74 *
75 * Segment a word using the pass2 conditions of the tess segmenter.
76 **********************************************************************/
77
tess_segment_pass2(WERD * word,DENORM * denorm,POLY_MATCHER matcher,WERD_CHOICE * & raw_choice,BLOB_CHOICE_LIST_CLIST * blob_choices,WERD * & outword)78 WERD_CHOICE *Tesseract::tess_segment_pass2( //recog one word
79 WERD *word, //bln word to do
80 DENORM *denorm, //de-normaliser
81 //matcher function
82 POLY_MATCHER matcher,
83 //raw result
84 WERD_CHOICE *&raw_choice,
85 //list of blob lists
86 BLOB_CHOICE_LIST_CLIST *blob_choices,
87 WERD *&outword //bln word output
88 ) {
89 WERD_CHOICE *result; //return value
90 int saved_enable_assoc = 0;
91 int saved_chop_enable = 0;
92
93 if (word->flag (W_DONT_CHOP)) {
94 saved_enable_assoc = wordrec_enable_assoc;
95 saved_chop_enable = chop_enable;
96 wordrec_enable_assoc.set_value(0);
97 chop_enable.set_value(0);
98 if (word->flag (W_REP_CHAR))
99 permute_only_top = 1;
100 }
101 set_pass2();
102 result = recog_word (word, denorm, matcher, NULL, NULL, FALSE,
103 raw_choice, blob_choices, outword);
104 if (word->flag (W_DONT_CHOP)) {
105 wordrec_enable_assoc.set_value(saved_enable_assoc);
106 chop_enable.set_value(saved_chop_enable);
107 permute_only_top = 0;
108 }
109 return result;
110 }
111
112
113 /**********************************************************************
114 * correct_segment_pass2
115 *
116 * Segment a word correctly using the pass2 conditions of the tess segmenter.
117 * Then call the tester with all the correctly segmented blobs.
118 * If the correct segmentation cannot be found, the tester is called
119 * with the segmentation found by tess and all the correct flags set to
120 * false and all strings are NULL.
121 **********************************************************************/
122
correct_segment_pass2(WERD * word,DENORM * denorm,POLY_MATCHER matcher,POLY_TESTER tester,WERD_CHOICE * & raw_choice,BLOB_CHOICE_LIST_CLIST * blob_choices,WERD * & outword)123 WERD_CHOICE *Tesseract::correct_segment_pass2( //recog one word
124 WERD *word, //bln word to do
125 //de-normaliser
126 DENORM *denorm,
127 //matcher function
128 POLY_MATCHER matcher,
129 //tester function
130 POLY_TESTER tester,
131 //raw result
132 WERD_CHOICE *&raw_choice,
133 //list of blob lists
134 BLOB_CHOICE_LIST_CLIST *blob_choices,
135 //bln word output
136 WERD *&outword
137 ) {
138 set_pass2();
139 return recog_word (word, denorm, matcher, NULL, tester, TRUE,
140 raw_choice, blob_choices, outword);
141 }
142
143
144 /**********************************************************************
145 * test_segment_pass2
146 *
147 * Segment a word correctly using the pass2 conditions of the tess segmenter.
148 * Then call the tester on all words used by tess in its search.
149 * Do this only on words where the correct segmentation could be found.
150 **********************************************************************/
test_segment_pass2(WERD * word,DENORM * denorm,POLY_MATCHER matcher,POLY_TESTER tester,WERD_CHOICE * & raw_choice,BLOB_CHOICE_LIST_CLIST * blob_choices,WERD * & outword)151 WERD_CHOICE *Tesseract::test_segment_pass2( //recog one word
152 WERD *word, //bln word to do
153 //de-normaliser
154 DENORM *denorm,
155 //matcher function
156 POLY_MATCHER matcher,
157 //tester function
158 POLY_TESTER tester,
159 //raw result
160 WERD_CHOICE *&raw_choice,
161 //list of blob lists
162 BLOB_CHOICE_LIST_CLIST *blob_choices,
163 //bln word output
164 WERD *&outword
165 ) {
166 set_pass2();
167 return recog_word (word, denorm, matcher, tester, NULL, TRUE,
168 raw_choice, blob_choices, outword);
169 }
170
171
172 /**********************************************************************
173 * tess_acceptable_word
174 *
175 * Return true if the word is regarded as "good enough".
176 **********************************************************************/
tess_acceptable_word(WERD_CHOICE * word_choice,WERD_CHOICE * raw_choice)177 BOOL8 Tesseract::tess_acceptable_word(
178 WERD_CHOICE *word_choice, // after context
179 WERD_CHOICE *raw_choice) { // before context
180 return getDict().AcceptableResult(*word_choice, *raw_choice);
181 }
182
183
184 /**********************************************************************
185 * tess_adaptable_word
186 *
187 * Return true if the word is regarded as "good enough".
188 **********************************************************************/
tess_adaptable_word(WERD * word,WERD_CHOICE * best_choice,WERD_CHOICE * raw_choice)189 BOOL8 Tesseract::tess_adaptable_word( // test adaptability
190 WERD *word, // word to test
191 WERD_CHOICE *best_choice, // after context
192 WERD_CHOICE *raw_choice // before context
193 ) {
194 TWERD *tessword = make_tess_word(word, NULL);
195 int result = (tessword && best_choice && raw_choice &&
196 AdaptableWord(tessword, *best_choice, *raw_choice));
197 delete_word(tessword);
198 return result != 0;
199 }
200
201
202 /**********************************************************************
203 * tess_cn_matcher
204 *
205 * Match a blob using the Tess Char Normalized (non-adaptive) matcher
206 * only.
207 **********************************************************************/
208
tess_cn_matcher(PBLOB * pblob,PBLOB * blob,PBLOB * nblob,WERD * word,DENORM * denorm,BLOB_CHOICE_LIST * ratings,CLASS_PRUNER_RESULTS cpresults)209 void Tesseract::tess_cn_matcher( //call tess
210 PBLOB *pblob, //previous blob
211 PBLOB *blob, //blob to match
212 PBLOB *nblob, //next blob
213 WERD *word, //word it came from
214 DENORM *denorm, //de-normaliser
215 BLOB_CHOICE_LIST *ratings, //list of results
216 CLASS_PRUNER_RESULTS cpresults // may be null.
217 ) {
218 TBLOB *tessblob; //converted blob
219 TEXTROW tessrow; //dummy row
220
221 tess_cn_matching.set_value(true); //turn it on
222 tess_bn_matching.set_value(false);
223 //convert blob
224 tessblob = make_rotated_tess_blob(denorm, blob, true);
225 //make dummy row
226 make_tess_row(denorm, &tessrow);
227 //classify
228 AdaptiveClassifier(tessblob, NULL, &tessrow, ratings, cpresults);
229 free_blob(tessblob);
230 }
231
232
233 /**********************************************************************
234 * tess_bn_matcher
235 *
236 * Match a blob using the Tess Baseline Normalized (adaptive) matcher
237 * only.
238 **********************************************************************/
239
tess_bn_matcher(PBLOB * pblob,PBLOB * blob,PBLOB * nblob,WERD * word,DENORM * denorm,BLOB_CHOICE_LIST * ratings)240 void Tesseract::tess_bn_matcher( //call tess
241 PBLOB *pblob, //previous blob
242 PBLOB *blob, //blob to match
243 PBLOB *nblob, //next blob
244 WERD *word, //word it came from
245 DENORM *denorm, //de-normaliser
246 BLOB_CHOICE_LIST *ratings //list of results
247 ) {
248 TBLOB *tessblob; //converted blob
249 TEXTROW tessrow; //dummy row
250
251 tess_bn_matching.set_value(true); //turn it on
252 tess_cn_matching.set_value(false);
253 //convert blob
254 tessblob = make_rotated_tess_blob(denorm, blob, true);
255 //make dummy row
256 make_tess_row(denorm, &tessrow);
257 //classify
258 AdaptiveClassifier(tessblob, NULL, &tessrow, ratings, NULL);
259 free_blob(tessblob);
260 }
261
262
263 /**********************************************************************
264 * tess_default_matcher
265 *
266 * Match a blob using the default functionality of the Tess matcher.
267 **********************************************************************/
268
tess_default_matcher(PBLOB * pblob,PBLOB * blob,PBLOB * nblob,WERD * word,DENORM * denorm,BLOB_CHOICE_LIST * ratings,const char * script)269 void Tesseract::tess_default_matcher( //call tess
270 PBLOB *pblob, //previous blob
271 PBLOB *blob, //blob to match
272 PBLOB *nblob, //next blob
273 WERD *word, //word it came from
274 DENORM *denorm, //de-normaliser
275 //list of results
276 BLOB_CHOICE_LIST *ratings,
277 const char* script
278 ) {
279 assert(ratings != NULL);
280 TBLOB *tessblob; //converted blob
281 TEXTROW tessrow; //dummy row
282
283 tess_bn_matching.set_value(false); //turn it off
284 tess_cn_matching.set_value(false);
285 //convert blob
286 tessblob = make_rotated_tess_blob(denorm, blob, true);
287 //make dummy row
288 make_tess_row(denorm, &tessrow);
289 //classify
290 AdaptiveClassifier (tessblob, NULL, &tessrow, ratings, NULL);
291 free_blob(tessblob);
292 }
293 } // namespace tesseract
294
295
296 /**********************************************************************
297 * tess_training_tester
298 *
299 * Matcher tester function which actually trains tess.
300 **********************************************************************/
301
tess_training_tester(const STRING & filename,PBLOB * blob,DENORM * denorm,BOOL8 correct,char * text,inT32 count,BLOB_CHOICE_LIST * ratings)302 void tess_training_tester( //call tess
303 const STRING& filename, //filename to output
304 PBLOB *blob, //blob to match
305 DENORM *denorm, //de-normaliser
306 BOOL8 correct, //ly segmented
307 char *text, //correct text
308 inT32 count, //chars in text
309 BLOB_CHOICE_LIST *ratings //list of results
310 ) {
311 TBLOB *tessblob; //converted blob
312 TEXTROW tessrow; //dummy row
313
314 if (correct) {
315 classify_norm_method.set_value(character); // force char norm spc 30/11/93
316 tess_bn_matching.set_value(false); //turn it off
317 tess_cn_matching.set_value(false);
318 //convert blob
319 tessblob = make_tess_blob (blob, TRUE);
320 //make dummy row
321 make_tess_row(denorm, &tessrow);
322 //learn it
323 LearnBlob(filename, tessblob, &tessrow, text);
324 free_blob(tessblob);
325 }
326 }
327
328
329 /**********************************************************************
330 * tess_adapter
331 *
332 * Adapt to the word using the Tesseract mechanism.
333 **********************************************************************/
334
335 namespace tesseract {
tess_adapter(WERD * word,DENORM * denorm,const WERD_CHOICE & choice,const WERD_CHOICE & raw_choice,const char * rejmap)336 void Tesseract::tess_adapter( //adapt to word
337 WERD *word, //bln word
338 DENORM *denorm, //de-normalise
339 const WERD_CHOICE& choice, //string for word
340 const WERD_CHOICE& raw_choice, //before context
341 const char *rejmap //reject map
342 ) {
343 TWERD *tessword; //converted word
344 static TEXTROW tessrow; //dummy row
345
346 //make dummy row
347 make_tess_row(denorm, &tessrow);
348 //make a word
349 tessword = make_tess_word (word, &tessrow);
350 AdaptToWord(tessword, &tessrow, choice, raw_choice, rejmap);
351 //adapt to it
352 delete_word(tessword); //free it
353 }
354
355
356 /**********************************************************************
357 * tess_add_doc_word
358 *
359 * Add the given word to the document dictionary
360 **********************************************************************/
tess_add_doc_word(WERD_CHOICE * word_choice)361 void Tesseract::tess_add_doc_word(WERD_CHOICE *word_choice) {
362 getDict().add_document_word(*word_choice);
363 }
364 } // namespace tesseract
365