1 /**********************************************************************
2 * File: tface.c (Formerly tface.c)
3 * Description: C side of the Tess/tessedit C/C++ interface.
4 * Author: Ray Smith
5 * Created: Mon Apr 27 11:57:06 BST 1992
6 *
7 * (C) Copyright 1992, Hewlett-Packard Ltd.
8 ** Licensed under the Apache License, Version 2.0 (the "License");
9 ** you may not use this file except in compliance with the License.
10 ** You may obtain a copy of the License at
11 ** http://www.apache.org/licenses/LICENSE-2.0
12 ** Unless required by applicable law or agreed to in writing, software
13 ** distributed under the License is distributed on an "AS IS" BASIS,
14 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 ** See the License for the specific language governing permissions and
16 ** limitations under the License.
17 *
18 **********************************************************************/
19 #include "tface.h"
20 #include "danerror.h"
21 #include "globals.h"
22 #include "tordvars.h" /* Feature stuff */
23 #include "fxid.h"
24 #include "wordclass.h"
25 #include "bestfirst.h"
26 #include "context.h"
27 #include "gradechop.h"
28 /* includes for init */
29 #include "tessinit.h"
30 #include "mfvars.h"
31 #include "metrics.h"
32 #include "adaptmatch.h"
33 #include "matchtab.h"
34 #include "chopper.h"
35 #include "permdawg.h"
36 #include "permute.h"
37 #include "chop.h"
38 #include "callcpp.h"
39 #include "badwords.h"
40 #include "wordrec.h"
41
42 #include <math.h>
43 #ifdef __UNIX__
44 #include <unistd.h>
45 #endif
46
47 const int kReallyBadCertainty = -20;
48
49 namespace tesseract {
50 class Tesseract;
51 }
52
53 //extern "C" int record_matcher_output;
54
55 /*----------------------------------------------------------------------
56 Variables
57 ----------------------------------------------------------------------*/
58 static PRIORITY pass2_ok_split;
59 static int pass2_seg_states;
60
61 BOOL_VAR(wordrec_no_block, false, "Don't output block information");
62
63 /*----------------------------------------------------------------------
64 Function Code
65 ----------------------------------------------------------------------*/
66 /**********************************************************************
67 * start_recog
68 *
69 * Startup recog program ready to recognize words.
70 **********************************************************************/
71 namespace tesseract {
start_recog(const char * textbase)72 int Wordrec::start_recog(const char *textbase) {
73
74 program_editup(textbase, true);
75 return (0);
76 }
77
78
79 /**********************************************************************
80 * program_editup
81 *
82 * Initialize all the things in the program that need to be initialized.
83 * init_permute determines whether to initialize the permute functions
84 * and Dawg models.
85 **********************************************************************/
program_editup(const char * textbase,bool init_permute)86 void Wordrec::program_editup(const char *textbase, bool init_permute) {
87 if (textbase != NULL) {
88 imagefile = textbase;
89 /* Read in data files */
90 edit_with_ocr(textbase);
91 }
92
93 /* Initialize subsystems */
94 program_init();
95 mfeature_init(); // assumes that imagefile is initialized
96 if (init_permute)
97 getDict().init_permute();
98 setup_cp_maps();
99
100 init_metrics();
101 pass2_ok_split = chop_ok_split;
102 pass2_seg_states = wordrec_num_seg_states;
103 }
104 } // namespace tesseract
105
106
107 /**********************************************************************
108 * edit_with_ocr
109 *
110 * Initialize all the things in the program needed before the classifier
111 * code is called.
112 **********************************************************************/
edit_with_ocr(const char * imagename)113 void edit_with_ocr(const char *imagename) {
114 char name[FILENAMESIZE]; /*base name of file */
115
116 if (tord_write_output) {
117 strcpy(name, imagename);
118 strcat (name, ".txt");
119 //xiaofan
120 textfile = open_file (name, "w");
121 }
122 if (tord_write_raw_output) {
123 strcpy(name, imagename);
124 strcat (name, ".raw");
125 rawfile = open_file (name, "w");
126 }
127 if (record_matcher_output) {
128 strcpy(name, imagename);
129 strcat (name, ".mlg");
130 matcher_fp = open_file (name, "w");
131 strcpy(name, imagename);
132 strcat (name, ".ctx");
133 correct_fp = open_file (name, "r");
134 }
135 }
136
137
138 /**********************************************************************
139 * end_recog
140 *
141 * Cleanup and exit the recog program.
142 **********************************************************************/
143 namespace tesseract {
end_recog()144 int Wordrec::end_recog() {
145 program_editdown (0);
146
147 return (0);
148 }
149
150
151 /**********************************************************************
152 * program_editdown
153 *
154 * This function holds any nessessary post processing for the Wise Owl
155 * program.
156 **********************************************************************/
program_editdown(inT32 elasped_time)157 void Wordrec::program_editdown(inT32 elasped_time) {
158 dj_cleanup();
159 if (tord_display_text)
160 cprintf ("\n");
161 if (!wordrec_no_block && tord_write_output)
162 fprintf (textfile, "\n");
163 if (tord_write_raw_output)
164 fprintf (rawfile, "\n");
165 if (tord_write_output) {
166 #ifdef __UNIX__
167 fsync (fileno (textfile));
168 #endif
169 fclose(textfile);
170 }
171 if (tord_write_raw_output) {
172 #ifdef __UNIX__
173 fsync (fileno (rawfile));
174 #endif
175 fclose(rawfile);
176 }
177 close_choices();
178 if (tessedit_save_stats)
179 save_summary (elasped_time);
180 end_match_table();
181 getDict().InitChoiceAccum();
182 if (global_hash != NULL) {
183 free_mem(global_hash);
184 global_hash = NULL;
185 }
186 end_metrics();
187 getDict().end_permute();
188 }
189
190
191 /**********************************************************************
192 * set_pass1
193 *
194 * Get ready to do some pass 1 stuff.
195 **********************************************************************/
set_pass1()196 void Wordrec::set_pass1() {
197 tord_blob_skip.set_value(false);
198 chop_ok_split.set_value(70.0);
199 wordrec_num_seg_states.set_value(15);
200 SettupPass1();
201 first_pass = 1;
202 }
203
204
205 /**********************************************************************
206 * set_pass2
207 *
208 * Get ready to do some pass 2 stuff.
209 **********************************************************************/
set_pass2()210 void Wordrec::set_pass2() {
211 tord_blob_skip.set_value(false);
212 chop_ok_split.set_value(pass2_ok_split);
213 wordrec_num_seg_states.set_value(pass2_seg_states);
214 SettupPass2();
215 first_pass = 0;
216 }
217
218
219 /**********************************************************************
220 * cc_recog
221 *
222 * Recognize a word.
223 **********************************************************************/
cc_recog(TWERD * tessword,WERD_CHOICE * best_choice,WERD_CHOICE * best_raw_choice,BOOL8 tester,BOOL8 trainer,bool last_word_on_line)224 BLOB_CHOICE_LIST_VECTOR *Wordrec::cc_recog(TWERD *tessword,
225 WERD_CHOICE *best_choice,
226 WERD_CHOICE *best_raw_choice,
227 BOOL8 tester,
228 BOOL8 trainer,
229 bool last_word_on_line) {
230 int fx;
231 BLOB_CHOICE_LIST_VECTOR *results; /*matcher results */
232
233 if (SetErrorTrap (NULL)) {
234 cprintf ("Tess copped out!\n");
235 ReleaseErrorTrap();
236 class_string (best_choice) = NULL;
237 return NULL;
238 }
239 getDict().InitChoiceAccum();
240 getDict().reset_hyphen_vars(last_word_on_line);
241 init_match_table();
242 for (fx = 0; fx < MAX_FX && (acts[OCR] & (FXSELECT << fx)) == 0; fx++);
243 results =
244 chop_word_main(tessword,
245 fx,
246 best_choice,
247 best_raw_choice,
248 tester,
249 trainer);
250 getDict().DebugWordChoices();
251 ReleaseErrorTrap();
252 return results;
253 }
254
255
256 /**********************************************************************
257 * dict_word()
258 *
259 * Test the dictionaries, returning NO_PERM (0) if not found, or one
260 * of the PermuterType values if found, according to the dictionary.
261 **********************************************************************/
dict_word(const WERD_CHOICE & word)262 int Wordrec::dict_word(const WERD_CHOICE &word) {
263 return getDict().valid_word (word);
264 }
265
266 /**********************************************************************
267 * call_matcher
268 *
269 * Called from Tess with a blob in tess form.
270 * Convert the blob to editor form.
271 * Call the matcher setup by the segmenter in tess_matcher.
272 * Convert the output choices back to tess form.
273 **********************************************************************/
call_matcher(TBLOB * ptblob,TBLOB * tessblob,TBLOB * ntblob,void *,TEXTROW *)274 BLOB_CHOICE_LIST *Wordrec::call_matcher(TBLOB *ptblob, //previous
275 TBLOB *tessblob, //blob to match
276 TBLOB *ntblob, //next
277 void *, //unused parameter
278 TEXTROW * //always null anyway
279 ) {
280 PBLOB *pblob; //converted blob
281 PBLOB *blob; //converted blob
282 PBLOB *nblob; //converted blob
283 BLOB_CHOICE_LIST *ratings = new BLOB_CHOICE_LIST(); // matcher result
284
285 blob = make_ed_blob (tessblob);//convert blob
286 if (blob == NULL) {
287 // Since it is actually possible to get a NULL blob here, due to invalid
288 // segmentations, fake a really bad classification.
289 BLOB_CHOICE *choice =
290 new BLOB_CHOICE(0, static_cast<float>(MAX_NUM_INT_FEATURES),
291 static_cast<float>(-MAX_FLOAT32), 0, NULL);
292 BLOB_CHOICE_IT temp_it;
293 temp_it.set_to_list(ratings);
294 temp_it.add_after_stay_put(choice);
295 return ratings;
296 }
297 pblob = ptblob != NULL ? make_ed_blob (ptblob) : NULL;
298 nblob = ntblob != NULL ? make_ed_blob (ntblob) : NULL;
299 // Because of the typedef for tess_matcher, the object on which it is called
300 // must be of type Tesseract*. With a Wordrec type it seems it doesn't work.
301 (reinterpret_cast<Tesseract* const>(this)->*tess_matcher)
302 (pblob, blob, nblob, tess_word, tess_denorm, ratings, NULL);
303
304 //match it
305 delete blob; //don't need that now
306 if (pblob != NULL)
307 delete pblob;
308 if (nblob != NULL)
309 delete nblob;
310 return ratings;
311 }
312
313 /**********************************************************************
314 * make_ed_blob
315 *
316 * Make an editor format blob from the tess style blob.
317 **********************************************************************/
318
make_ed_blob(TBLOB * tessblob)319 PBLOB *make_ed_blob( //construct blob
320 TBLOB *tessblob //blob to convert
321 ) {
322 TESSLINE *tessol; //tess outline
323 FRAGMENT_LIST fragments; //list of fragments
324 OUTLINE *outline; //current outline
325 OUTLINE_LIST out_list; //list of outlines
326 OUTLINE_IT out_it = &out_list; //iterator
327
328 for (tessol = tessblob->outlines; tessol != NULL; tessol = tessol->next) {
329 //stick in list
330 register_outline(tessol, &fragments);
331 }
332 while (!fragments.empty ()) {
333 outline = make_ed_outline (&fragments);
334 if (outline != NULL) {
335 out_it.add_after_then_move (outline);
336 }
337 }
338 if (out_it.empty())
339 return NULL; //couldn't do it
340 return new PBLOB (&out_list); //turn to blob
341 }
342 /**********************************************************************
343 * make_ed_outline
344 *
345 * Make an editor format outline from the list of fragments.
346 **********************************************************************/
347
make_ed_outline(FRAGMENT_LIST * list)348 OUTLINE *make_ed_outline( //constructoutline
349 FRAGMENT_LIST *list //list of fragments
350 ) {
351 FRAGMENT *fragment; //current fragment
352 EDGEPT *edgept; //current point
353 ICOORD headpos; //coords of head
354 ICOORD tailpos; //coords of tail
355 FCOORD pos; //coords of edgept
356 FCOORD vec; //empty
357 POLYPT *polypt; //current point
358 POLYPT_LIST poly_list; //list of point
359 POLYPT_IT poly_it = &poly_list;//iterator
360 FRAGMENT_IT fragment_it = list;//fragment
361
362 headpos = fragment_it.data ()->head;
363 do {
364 fragment = fragment_it.data ();
365 edgept = fragment->headpt; //start of segment
366 do {
367 pos = FCOORD (edgept->pos.x, edgept->pos.y);
368 vec = FCOORD (edgept->vec.x, edgept->vec.y);
369 polypt = new POLYPT (pos, vec);
370 //add to list
371 poly_it.add_after_then_move (polypt);
372 edgept = edgept->next;
373 }
374 while (edgept != fragment->tailpt);
375 tailpos = ICOORD (edgept->pos.x, edgept->pos.y);
376 //get rid of it
377 delete fragment_it.extract ();
378 if (tailpos != headpos) {
379 if (fragment_it.empty ()) {
380 return NULL;
381 }
382 fragment_it.forward ();
383 //find next segment
384 for (fragment_it.mark_cycle_pt (); !fragment_it.cycled_list () &&
385 fragment_it.data ()->head != tailpos;
386 fragment_it.forward ());
387 if (fragment_it.data ()->head != tailpos) {
388 // It is legitimate for the heads to not all match to tails,
389 // since not all combinations of seams always make sense.
390 for (fragment_it.mark_cycle_pt ();
391 !fragment_it.cycled_list (); fragment_it.forward ()) {
392 fragment = fragment_it.extract ();
393 delete fragment;
394 }
395 return NULL; //can't do it
396 }
397 }
398 }
399 while (tailpos != headpos);
400 return new OUTLINE (&poly_it); //turn to outline
401 }
402 /**********************************************************************
403 * register_outline
404 *
405 * Add the fragments in the given outline to the list
406 **********************************************************************/
407
register_outline(TESSLINE * outline,FRAGMENT_LIST * list)408 void register_outline( //add fragments
409 TESSLINE *outline, //tess format
410 FRAGMENT_LIST *list //list to add to
411 ) {
412 EDGEPT *startpt; //start of outline
413 EDGEPT *headpt; //start of fragment
414 EDGEPT *tailpt; //end of fragment
415 FRAGMENT *fragment; //new fragment
416 FRAGMENT_IT it = list; //iterator
417
418 startpt = outline->loop;
419 do {
420 startpt = startpt->next;
421 if (startpt == NULL)
422 return; //illegal!
423 }
424 while (startpt->flags[0] == 0 && startpt != outline->loop);
425 headpt = startpt;
426 do
427 startpt = startpt->next;
428 while (startpt->flags[0] != 0 && startpt != headpt);
429 if (startpt->flags[0] != 0)
430 return; //all hidden!
431
432 headpt = startpt;
433 do {
434 tailpt = headpt;
435 do
436 tailpt = tailpt->next;
437 while (tailpt->flags[0] == 0 && tailpt != startpt);
438 fragment = new FRAGMENT (headpt, tailpt);
439 it.add_after_then_move (fragment);
440 while (tailpt->flags[0] != 0)
441 tailpt = tailpt->next;
442 headpt = tailpt;
443 }
444 while (tailpt != startpt);
445 }
446
ELISTIZE(FRAGMENT)447 ELISTIZE (FRAGMENT)
448
449 /**********************************************************************
450 * FRAGMENT::FRAGMENT
451 *
452 * Constructor for fragments.
453 **********************************************************************/
454 FRAGMENT::FRAGMENT ( //constructor
455 EDGEPT * head_pt, //start point
456 EDGEPT * tail_pt //end point
457 ):head (head_pt->pos.x, head_pt->pos.y), tail (tail_pt->pos.x,
458 tail_pt->pos.y) {
459 headpt = head_pt; // save ptrs
460 tailpt = tail_pt;
461 }
462
463 } // namespace tesseract
464