• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**********************************************************************
2  * File:        tface.c  (Formerly tface.c)
3  * Description: C side of the Tess/tessedit C/C++ interface.
4  * Author:		Ray Smith
5  * Created:		Mon Apr 27 11:57:06 BST 1992
6  *
7  * (C) Copyright 1992, Hewlett-Packard Ltd.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 #include "tface.h"
20 #include "danerror.h"
21 #include "globals.h"
22 #include "tordvars.h"            /* Feature stuff */
23 #include "fxid.h"
24 #include "wordclass.h"
25 #include "bestfirst.h"
26 #include "context.h"
27 #include "gradechop.h"
28 /* includes for init */
29 #include "tessinit.h"
30 #include "mfvars.h"
31 #include "metrics.h"
32 #include "adaptmatch.h"
33 #include "matchtab.h"
34 #include "chopper.h"
35 #include "permdawg.h"
36 #include "permute.h"
37 #include "chop.h"
38 #include "callcpp.h"
39 #include "badwords.h"
40 #include "wordrec.h"
41 
42 #include <math.h>
43 #ifdef __UNIX__
44 #include <unistd.h>
45 #endif
46 
47 const int kReallyBadCertainty = -20;
48 
49 namespace tesseract {
50   class Tesseract;
51 }
52 
53 //extern "C" int record_matcher_output;
54 
55 /*----------------------------------------------------------------------
56               Variables
57 ----------------------------------------------------------------------*/
58 static PRIORITY pass2_ok_split;
59 static int pass2_seg_states;
60 
61 BOOL_VAR(wordrec_no_block, false, "Don't output block information");
62 
63 /*----------------------------------------------------------------------
64               Function Code
65 ----------------------------------------------------------------------*/
66 /**********************************************************************
67  * start_recog
68  *
69  * Startup recog program ready to recognize words.
70  **********************************************************************/
71 namespace tesseract {
start_recog(const char * textbase)72 int Wordrec::start_recog(const char *textbase) {
73 
74   program_editup(textbase, true);
75   return (0);
76 }
77 
78 
79 /**********************************************************************
80  * program_editup
81  *
82  * Initialize all the things in the program that need to be initialized.
83  * init_permute determines whether to initialize the permute functions
84  * and Dawg models.
85  **********************************************************************/
program_editup(const char * textbase,bool init_permute)86 void Wordrec::program_editup(const char *textbase, bool init_permute) {
87   if (textbase != NULL) {
88     imagefile = textbase;
89     /* Read in data files */
90     edit_with_ocr(textbase);
91   }
92 
93   /* Initialize subsystems */
94   program_init();
95   mfeature_init();  // assumes that imagefile is initialized
96   if (init_permute)
97   getDict().init_permute();
98   setup_cp_maps();
99 
100   init_metrics();
101   pass2_ok_split = chop_ok_split;
102   pass2_seg_states = wordrec_num_seg_states;
103 }
104 }  // namespace tesseract
105 
106 
107 /**********************************************************************
108  * edit_with_ocr
109  *
110  * Initialize all the things in the program needed before the classifier
111  * code is called.
112  **********************************************************************/
edit_with_ocr(const char * imagename)113 void edit_with_ocr(const char *imagename) {
114   char name[FILENAMESIZE];       /*base name of file */
115 
116   if (tord_write_output) {
117     strcpy(name, imagename);
118     strcat (name, ".txt");
119                                  //xiaofan
120     textfile = open_file (name, "w");
121   }
122   if (tord_write_raw_output) {
123     strcpy(name, imagename);
124     strcat (name, ".raw");
125     rawfile = open_file (name, "w");
126   }
127   if (record_matcher_output) {
128     strcpy(name, imagename);
129     strcat (name, ".mlg");
130     matcher_fp = open_file (name, "w");
131     strcpy(name, imagename);
132     strcat (name, ".ctx");
133     correct_fp = open_file (name, "r");
134   }
135 }
136 
137 
138 /**********************************************************************
139  * end_recog
140  *
141  * Cleanup and exit the recog program.
142  **********************************************************************/
143 namespace tesseract {
end_recog()144 int Wordrec::end_recog() {
145   program_editdown (0);
146 
147   return (0);
148 }
149 
150 
151 /**********************************************************************
152  * program_editdown
153  *
154  * This function holds any nessessary post processing for the Wise Owl
155  * program.
156  **********************************************************************/
program_editdown(inT32 elasped_time)157 void Wordrec::program_editdown(inT32 elasped_time) {
158   dj_cleanup();
159   if (tord_display_text)
160     cprintf ("\n");
161   if (!wordrec_no_block && tord_write_output)
162     fprintf (textfile, "\n");
163   if (tord_write_raw_output)
164     fprintf (rawfile, "\n");
165   if (tord_write_output) {
166     #ifdef __UNIX__
167     fsync (fileno (textfile));
168     #endif
169     fclose(textfile);
170   }
171   if (tord_write_raw_output) {
172     #ifdef __UNIX__
173     fsync (fileno (rawfile));
174     #endif
175     fclose(rawfile);
176   }
177   close_choices();
178   if (tessedit_save_stats)
179     save_summary (elasped_time);
180   end_match_table();
181   getDict().InitChoiceAccum();
182   if (global_hash != NULL) {
183     free_mem(global_hash);
184     global_hash = NULL;
185   }
186   end_metrics();
187   getDict().end_permute();
188 }
189 
190 
191 /**********************************************************************
192  * set_pass1
193  *
194  * Get ready to do some pass 1 stuff.
195  **********************************************************************/
set_pass1()196 void Wordrec::set_pass1() {
197   tord_blob_skip.set_value(false);
198   chop_ok_split.set_value(70.0);
199   wordrec_num_seg_states.set_value(15);
200   SettupPass1();
201   first_pass = 1;
202 }
203 
204 
205 /**********************************************************************
206  * set_pass2
207  *
208  * Get ready to do some pass 2 stuff.
209  **********************************************************************/
set_pass2()210 void Wordrec::set_pass2() {
211   tord_blob_skip.set_value(false);
212   chop_ok_split.set_value(pass2_ok_split);
213   wordrec_num_seg_states.set_value(pass2_seg_states);
214   SettupPass2();
215   first_pass = 0;
216 }
217 
218 
219 /**********************************************************************
220  * cc_recog
221  *
222  * Recognize a word.
223  **********************************************************************/
cc_recog(TWERD * tessword,WERD_CHOICE * best_choice,WERD_CHOICE * best_raw_choice,BOOL8 tester,BOOL8 trainer,bool last_word_on_line)224 BLOB_CHOICE_LIST_VECTOR *Wordrec::cc_recog(TWERD *tessword,
225                                            WERD_CHOICE *best_choice,
226                                            WERD_CHOICE *best_raw_choice,
227                                            BOOL8 tester,
228                                            BOOL8 trainer,
229                                            bool last_word_on_line) {
230   int fx;
231   BLOB_CHOICE_LIST_VECTOR *results;          /*matcher results */
232 
233   if (SetErrorTrap (NULL)) {
234     cprintf ("Tess copped out!\n");
235     ReleaseErrorTrap();
236     class_string (best_choice) = NULL;
237     return NULL;
238   }
239   getDict().InitChoiceAccum();
240   getDict().reset_hyphen_vars(last_word_on_line);
241   init_match_table();
242   for (fx = 0; fx < MAX_FX && (acts[OCR] & (FXSELECT << fx)) == 0; fx++);
243   results =
244     chop_word_main(tessword,
245                    fx,
246                    best_choice,
247                    best_raw_choice,
248                    tester,
249                    trainer);
250   getDict().DebugWordChoices();
251   ReleaseErrorTrap();
252   return results;
253 }
254 
255 
256 /**********************************************************************
257  * dict_word()
258  *
259  * Test the dictionaries, returning NO_PERM (0) if not found, or one
260  * of the PermuterType values if found, according to the dictionary.
261  **********************************************************************/
dict_word(const WERD_CHOICE & word)262 int Wordrec::dict_word(const WERD_CHOICE &word) {
263     return getDict().valid_word (word);
264 }
265 
266 /**********************************************************************
267  * call_matcher
268  *
269  * Called from Tess with a blob in tess form.
270  * Convert the blob to editor form.
271  * Call the matcher setup by the segmenter in tess_matcher.
272  * Convert the output choices back to tess form.
273  **********************************************************************/
call_matcher(TBLOB * ptblob,TBLOB * tessblob,TBLOB * ntblob,void *,TEXTROW *)274 BLOB_CHOICE_LIST *Wordrec::call_matcher(TBLOB *ptblob,    //previous
275                                         TBLOB *tessblob,  //blob to match
276                                         TBLOB *ntblob,    //next
277                                         void *,           //unused parameter
278                                         TEXTROW *         //always null anyway
279                                         ) {
280   PBLOB *pblob;                  //converted blob
281   PBLOB *blob;                   //converted blob
282   PBLOB *nblob;                  //converted blob
283   BLOB_CHOICE_LIST *ratings = new BLOB_CHOICE_LIST();  // matcher result
284 
285   blob = make_ed_blob (tessblob);//convert blob
286   if (blob == NULL) {
287     // Since it is actually possible to get a NULL blob here, due to invalid
288     // segmentations, fake a really bad classification.
289     BLOB_CHOICE *choice =
290       new BLOB_CHOICE(0, static_cast<float>(MAX_NUM_INT_FEATURES),
291                       static_cast<float>(-MAX_FLOAT32), 0, NULL);
292     BLOB_CHOICE_IT temp_it;
293     temp_it.set_to_list(ratings);
294     temp_it.add_after_stay_put(choice);
295     return ratings;
296   }
297   pblob = ptblob != NULL ? make_ed_blob (ptblob) : NULL;
298   nblob = ntblob != NULL ? make_ed_blob (ntblob) : NULL;
299   // Because of the typedef for tess_matcher, the object on which it is called
300   // must be of type Tesseract*. With a Wordrec type it seems it doesn't work.
301   (reinterpret_cast<Tesseract* const>(this)->*tess_matcher)
302       (pblob, blob, nblob, tess_word, tess_denorm, ratings, NULL);
303 
304   //match it
305   delete blob;                   //don't need that now
306   if (pblob != NULL)
307     delete pblob;
308   if (nblob != NULL)
309     delete nblob;
310   return ratings;
311 }
312 
313 /**********************************************************************
314  * make_ed_blob
315  *
316  * Make an editor format blob from the tess style blob.
317  **********************************************************************/
318 
make_ed_blob(TBLOB * tessblob)319 PBLOB *make_ed_blob(                 //construct blob
320                     TBLOB *tessblob  //blob to convert
321                    ) {
322   TESSLINE *tessol;              //tess outline
323   FRAGMENT_LIST fragments;       //list of fragments
324   OUTLINE *outline;              //current outline
325   OUTLINE_LIST out_list;         //list of outlines
326   OUTLINE_IT out_it = &out_list; //iterator
327 
328   for (tessol = tessblob->outlines; tessol != NULL; tessol = tessol->next) {
329                                  //stick in list
330     register_outline(tessol, &fragments);
331   }
332   while (!fragments.empty ()) {
333     outline = make_ed_outline (&fragments);
334     if (outline != NULL) {
335       out_it.add_after_then_move (outline);
336     }
337   }
338   if (out_it.empty())
339     return NULL;                 //couldn't do it
340   return new PBLOB (&out_list);  //turn to blob
341 }
342 /**********************************************************************
343  * make_ed_outline
344  *
345  * Make an editor format outline from the list of fragments.
346  **********************************************************************/
347 
make_ed_outline(FRAGMENT_LIST * list)348 OUTLINE *make_ed_outline(                     //constructoutline
349                          FRAGMENT_LIST *list  //list of fragments
350                         ) {
351   FRAGMENT *fragment;            //current fragment
352   EDGEPT *edgept;                //current point
353   ICOORD headpos;                //coords of head
354   ICOORD tailpos;                //coords of tail
355   FCOORD pos;                    //coords of edgept
356   FCOORD vec;                    //empty
357   POLYPT *polypt;                //current point
358   POLYPT_LIST poly_list;         //list of point
359   POLYPT_IT poly_it = &poly_list;//iterator
360   FRAGMENT_IT fragment_it = list;//fragment
361 
362   headpos = fragment_it.data ()->head;
363   do {
364     fragment = fragment_it.data ();
365     edgept = fragment->headpt;   //start of segment
366     do {
367       pos = FCOORD (edgept->pos.x, edgept->pos.y);
368       vec = FCOORD (edgept->vec.x, edgept->vec.y);
369       polypt = new POLYPT (pos, vec);
370                                  //add to list
371       poly_it.add_after_then_move (polypt);
372       edgept = edgept->next;
373     }
374     while (edgept != fragment->tailpt);
375     tailpos = ICOORD (edgept->pos.x, edgept->pos.y);
376                                  //get rid of it
377     delete fragment_it.extract ();
378     if (tailpos != headpos) {
379       if (fragment_it.empty ()) {
380         return NULL;
381       }
382       fragment_it.forward ();
383                                  //find next segment
384       for (fragment_it.mark_cycle_pt (); !fragment_it.cycled_list () &&
385                fragment_it.data ()->head != tailpos;
386         fragment_it.forward ());
387       if (fragment_it.data ()->head != tailpos) {
388         // It is legitimate for the heads to not all match to tails,
389         // since not all combinations of seams always make sense.
390         for (fragment_it.mark_cycle_pt ();
391         !fragment_it.cycled_list (); fragment_it.forward ()) {
392           fragment = fragment_it.extract ();
393           delete fragment;
394         }
395         return NULL;             //can't do it
396       }
397     }
398   }
399   while (tailpos != headpos);
400   return new OUTLINE (&poly_it); //turn to outline
401 }
402 /**********************************************************************
403  * register_outline
404  *
405  * Add the fragments in the given outline to the list
406  **********************************************************************/
407 
register_outline(TESSLINE * outline,FRAGMENT_LIST * list)408 void register_outline(                     //add fragments
409                       TESSLINE *outline,   //tess format
410                       FRAGMENT_LIST *list  //list to add to
411                      ) {
412   EDGEPT *startpt;               //start of outline
413   EDGEPT *headpt;                //start of fragment
414   EDGEPT *tailpt;                //end of fragment
415   FRAGMENT *fragment;            //new fragment
416   FRAGMENT_IT it = list;         //iterator
417 
418   startpt = outline->loop;
419   do {
420     startpt = startpt->next;
421     if (startpt == NULL)
422       return;                    //illegal!
423   }
424   while (startpt->flags[0] == 0 && startpt != outline->loop);
425   headpt = startpt;
426   do
427   startpt = startpt->next;
428   while (startpt->flags[0] != 0 && startpt != headpt);
429   if (startpt->flags[0] != 0)
430     return;                      //all hidden!
431 
432   headpt = startpt;
433   do {
434     tailpt = headpt;
435     do
436     tailpt = tailpt->next;
437     while (tailpt->flags[0] == 0 && tailpt != startpt);
438     fragment = new FRAGMENT (headpt, tailpt);
439     it.add_after_then_move (fragment);
440     while (tailpt->flags[0] != 0)
441       tailpt = tailpt->next;
442     headpt = tailpt;
443   }
444   while (tailpt != startpt);
445 }
446 
ELISTIZE(FRAGMENT)447 ELISTIZE (FRAGMENT)
448 
449 /**********************************************************************
450  * FRAGMENT::FRAGMENT
451  *
452  * Constructor for fragments.
453  **********************************************************************/
454 FRAGMENT::FRAGMENT (             //constructor
455 EDGEPT * head_pt,                //start point
456 EDGEPT * tail_pt                 //end point
457 ):head (head_pt->pos.x, head_pt->pos.y), tail (tail_pt->pos.x,
458 tail_pt->pos.y) {
459   headpt = head_pt;              // save ptrs
460   tailpt = tail_pt;
461 }
462 
463 }  // namespace tesseract
464