• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**********************************************************************
2  * File:        tstruct.cpp  (Formerly tstruct.c)
3  * Description: Code to manipulate the structures of the C++/C interface.
4  * Author:		Ray Smith
5  * Created:		Thu Apr 23 15:49:29 BST 1992
6  *
7  * (C) Copyright 1992, Hewlett-Packard Ltd.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #include "mfcpch.h"
21 #include          "tfacep.h"
22 #include          "tstruct.h"
23 #include          "makerow.h"
24 #include          "ocrblock.h"
25 //#include "structures.h"
26 
27 static ERRCODE BADFRAGMENTS = "Couldn't find matching fragment ends";
28 
ELISTIZE(FRAGMENT)29 ELISTIZE (FRAGMENT)
30 //extern /*"C"*/ oldoutline(TESSLINE*);
31 /**********************************************************************
32  * FRAGMENT::FRAGMENT
33  *
34  * Constructor for fragments.
35  **********************************************************************/
36 FRAGMENT::FRAGMENT (             //constructor
37 EDGEPT * head_pt,                //start point
38 EDGEPT * tail_pt                 //end point
39 ):head (head_pt->pos.x, head_pt->pos.y), tail (tail_pt->pos.x,
40 tail_pt->pos.y) {
41   headpt = head_pt;              // save ptrs
42   tailpt = tail_pt;
43 }
44 
45 // Helper function to make a fake PBLOB formed from the bounding box
46 // of the given old-format outline.
MakeRectBlob(TESSLINE * ol)47 static PBLOB* MakeRectBlob(TESSLINE* ol) {
48   POLYPT_LIST poly_list;
49   POLYPT_IT poly_it = &poly_list;
50   FCOORD pos, vec;
51   POLYPT *polypt;
52 
53   // Create points at each of the 4 corners of the rectangle in turn.
54   pos = FCOORD(ol->topleft.x, ol->topleft.y);
55   vec = FCOORD(0.0f, ol->botright.y - ol->topleft.y);
56   polypt = new POLYPT(pos, vec);
57   poly_it.add_after_then_move(polypt);
58   pos = FCOORD(ol->topleft.x, ol->botright.y);
59   vec = FCOORD(ol->botright.x - ol->topleft.x, 0.0f);
60   polypt = new POLYPT(pos, vec);
61   poly_it.add_after_then_move(polypt);
62   pos = FCOORD(ol->botright.x, ol->botright.y);
63   vec = FCOORD(0.0f, ol->topleft.y - ol->botright.y);
64   polypt = new POLYPT(pos, vec);
65   poly_it.add_after_then_move(polypt);
66   pos = FCOORD(ol->botright.x, ol->topleft.y);
67   vec = FCOORD(ol->topleft.x - ol->botright.x, 0.0f);
68   polypt = new POLYPT(pos, vec);
69   poly_it.add_after_then_move(polypt);
70 
71   OUTLINE_LIST out_list;
72   OUTLINE_IT out_it = &out_list;
73   out_it.add_after_then_move(new OUTLINE(&poly_it));
74   return new PBLOB(&out_list);
75 }
76 
77 /**********************************************************************
78  * make_ed_word
79  *
80  * Make an editor format word from the tess style word.
81  **********************************************************************/
82 
make_ed_word(TWERD * tessword,WERD * clone)83 WERD *make_ed_word(                  //construct word
84                    TWERD *tessword,  //word to convert
85                    WERD *clone       //clone this one
86                   ) {
87   WERD *word;                    //converted word
88   TBLOB *tblob;                  //current blob
89   PBLOB *blob;                   //new blob
90   PBLOB_LIST blobs;              //list of blobs
91   PBLOB_IT blob_it = &blobs;     //iterator
92 
93   for (tblob = tessword->blobs; tblob != NULL; tblob = tblob->next) {
94     blob = make_ed_blob (tblob);
95     if (blob == NULL && tblob->outlines != NULL) {
96       // Make a fake blob using the bounding box rectangle of the 1st outline.
97       blob = MakeRectBlob(tblob->outlines);
98     }
99     if (blob != NULL) {
100       blob_it.add_after_then_move (blob);
101     }
102   }
103   if (!blobs.empty ())
104     word = new WERD (&blobs, clone);
105   else
106     word = NULL;
107   return word;
108 }
109 
110 
111 /**********************************************************************
112  * make_ed_blob
113  *
114  * Make an editor format blob from the tess style blob.
115  **********************************************************************/
116 
make_ed_blob(TBLOB * tessblob)117 PBLOB *make_ed_blob(                 //construct blob
118                     TBLOB *tessblob  //blob to convert
119                    ) {
120   TESSLINE *tessol;              //tess outline
121   FRAGMENT_LIST fragments;       //list of fragments
122   OUTLINE *outline;              //current outline
123   OUTLINE_LIST out_list;         //list of outlines
124   OUTLINE_IT out_it = &out_list; //iterator
125 
126   for (tessol = tessblob->outlines; tessol != NULL; tessol = tessol->next) {
127                                  //stick in list
128     register_outline(tessol, &fragments);
129   }
130   while (!fragments.empty ()) {
131     outline = make_ed_outline (&fragments);
132     if (outline != NULL) {
133       out_it.add_after_then_move (outline);
134     }
135   }
136   if (out_it.empty())
137     return NULL;                 //couldn't do it
138   return new PBLOB (&out_list);  //turn to blob
139 }
140 
141 
142 /**********************************************************************
143  * make_ed_outline
144  *
145  * Make an editor format outline from the list of fragments.
146  **********************************************************************/
147 
make_ed_outline(FRAGMENT_LIST * list)148 OUTLINE *make_ed_outline(                     //constructoutline
149                          FRAGMENT_LIST *list  //list of fragments
150                         ) {
151   FRAGMENT *fragment;            //current fragment
152   EDGEPT *edgept;                //current point
153   ICOORD headpos;                //coords of head
154   ICOORD tailpos;                //coords of tail
155   FCOORD pos;                    //coords of edgept
156   FCOORD vec;                    //empty
157   POLYPT *polypt;                //current point
158   POLYPT_LIST poly_list;         //list of point
159   POLYPT_IT poly_it = &poly_list;//iterator
160   FRAGMENT_IT fragment_it = list;//fragment
161 
162   headpos = fragment_it.data ()->head;
163   do {
164     fragment = fragment_it.data ();
165     edgept = fragment->headpt;   //start of segment
166     do {
167       pos = FCOORD (edgept->pos.x, edgept->pos.y);
168       vec = FCOORD (edgept->vec.x, edgept->vec.y);
169       polypt = new POLYPT (pos, vec);
170                                  //add to list
171       poly_it.add_after_then_move (polypt);
172       edgept = edgept->next;
173     }
174     while (edgept != fragment->tailpt);
175     tailpos = ICOORD (edgept->pos.x, edgept->pos.y);
176                                  //get rid of it
177     delete fragment_it.extract ();
178     if (tailpos != headpos) {
179       if (fragment_it.empty ()) {
180         return NULL;
181       }
182       fragment_it.forward ();
183                                  //find next segment
184       for (fragment_it.mark_cycle_pt (); !fragment_it.cycled_list () &&
185                fragment_it.data ()->head != tailpos;
186         fragment_it.forward ());
187       if (fragment_it.data ()->head != tailpos) {
188         // It is legitimate for the heads to not all match to tails,
189         // since not all combinations of seams always make sense.
190         for (fragment_it.mark_cycle_pt ();
191         !fragment_it.cycled_list (); fragment_it.forward ()) {
192           fragment = fragment_it.extract ();
193           delete fragment;
194         }
195         return NULL;             //can't do it
196       }
197     }
198   }
199   while (tailpos != headpos);
200   return new OUTLINE (&poly_it); //turn to outline
201 }
202 
203 
204 /**********************************************************************
205  * register_outline
206  *
207  * Add the fragments in the given outline to the list
208  **********************************************************************/
209 
register_outline(TESSLINE * outline,FRAGMENT_LIST * list)210 void register_outline(                     //add fragments
211                       TESSLINE *outline,   //tess format
212                       FRAGMENT_LIST *list  //list to add to
213                      ) {
214   EDGEPT *startpt;               //start of outline
215   EDGEPT *headpt;                //start of fragment
216   EDGEPT *tailpt;                //end of fragment
217   FRAGMENT *fragment;            //new fragment
218   FRAGMENT_IT it = list;         //iterator
219 
220   startpt = outline->loop;
221   do {
222     startpt = startpt->next;
223     if (startpt == NULL)
224       return;                    //illegal!
225   }
226   while (startpt->flags[0] == 0 && startpt != outline->loop);
227   headpt = startpt;
228   do
229   startpt = startpt->next;
230   while (startpt->flags[0] != 0 && startpt != headpt);
231   if (startpt->flags[0] != 0)
232     return;                      //all hidden!
233 
234   headpt = startpt;
235   do {
236     tailpt = headpt;
237     do
238     tailpt = tailpt->next;
239     while (tailpt->flags[0] == 0 && tailpt != startpt);
240     fragment = new FRAGMENT (headpt, tailpt);
241     it.add_after_then_move (fragment);
242     while (tailpt->flags[0] != 0)
243       tailpt = tailpt->next;
244     headpt = tailpt;
245   }
246   while (tailpt != startpt);
247 }
248 
249 
250 /**********************************************************************
251  * make_tess_row
252  *
253  * Make a fake row structure to pass to the tesseract matchers.
254  **********************************************************************/
255 
make_tess_row(DENORM * denorm,TEXTROW * tessrow)256 void make_tess_row(                  //make fake row
257                    DENORM *denorm,   //row info
258                    TEXTROW *tessrow  //output row
259                   ) {
260   tessrow->baseline.segments = 1;
261   tessrow->baseline.xstarts[0] = -32767;
262   tessrow->baseline.xstarts[1] = 32767;
263   tessrow->baseline.quads[0].a = 0;
264   tessrow->baseline.quads[0].b = 0;
265   tessrow->baseline.quads[0].c = bln_baseline_offset;
266   tessrow->xheight.segments = 1;
267   tessrow->xheight.xstarts[0] = -32767;
268   tessrow->xheight.xstarts[1] = 32767;
269   tessrow->xheight.quads[0].a = 0;
270   tessrow->xheight.quads[0].b = 0;
271   tessrow->xheight.quads[0].c = bln_x_height + bln_baseline_offset;
272   tessrow->lineheight = bln_x_height;
273   if (denorm != NULL) {
274     tessrow->ascrise = denorm->row ()->ascenders () * denorm->scale ();
275     tessrow->descdrop = denorm->row ()->descenders () * denorm->scale ();
276   } else {
277     tessrow->ascrise = bln_baseline_offset;
278     tessrow->descdrop = -bln_baseline_offset;
279   }
280 }
281 
282 
283 /**********************************************************************
284  * make_tess_word
285  *
286  * Convert the word to Tess format.
287  **********************************************************************/
288 
make_tess_word(WERD * word,TEXTROW * row)289 TWERD *make_tess_word(              //convert word
290                       WERD *word,   //word to do
291                       TEXTROW *row  //fake row
292                      ) {
293   TWERD *tessword;               //tess format
294 
295   tessword = newword ();         //use old allocator
296   tessword->row = row;           //give them something
297                                  //copy string
298   tessword->correct = strsave (word->text ());
299   tessword->guess = NULL;
300   tessword->blobs = make_tess_blobs (word->blob_list ());
301   tessword->blanks = 1;
302   tessword->blobcount = word->blob_list ()->length ();
303   tessword->next = NULL;
304   return tessword;
305 }
306 
307 
308 /**********************************************************************
309  * make_tess_blobs
310  *
311  * Make Tess style blobs from a list of BLOBs.
312  **********************************************************************/
313 
make_tess_blobs(PBLOB_LIST * bloblist)314 TBLOB *make_tess_blobs(                      //make tess blobs
315                        PBLOB_LIST *bloblist  //list to convert
316                       ) {
317   PBLOB_IT it = bloblist;        //iterator
318   PBLOB *blob;                   //current blob
319   TBLOB *head;                   //output list
320   TBLOB *tail;                   //end of list
321   TBLOB *tessblob;
322 
323   head = NULL;
324   tail = NULL;
325   for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
326     blob = it.data ();
327     tessblob = make_tess_blob (blob, TRUE);
328     if (head)
329       tail->next = tessblob;
330     else
331       head = tessblob;
332     tail = tessblob;
333   }
334   return head;
335 }
336 
337 /**********************************************************************
338  * make_rotated_tess_blob
339  *
340  * Make a single Tess style blob, applying the given rotation and
341  * renormalizing.
342  **********************************************************************/
make_rotated_tess_blob(const DENORM * denorm,PBLOB * blob,BOOL8 flatten)343 TBLOB *make_rotated_tess_blob(const DENORM* denorm, PBLOB *blob,
344                               BOOL8 flatten) {
345   if (denorm != NULL && denorm->block() != NULL &&
346       denorm->block()->classify_rotation().y() != 0.0) {
347     TBOX box = blob->bounding_box();
348     int src_width = box.width();
349     int src_height = box.height();
350     src_width = static_cast<int>(src_width / denorm->scale() + 0.5);
351     src_height = static_cast<int>(src_height / denorm->scale() + 0.5);
352     int x_middle = (box.left() + box.right()) / 2;
353     int y_middle = (box.top() + box.bottom()) / 2;
354     PBLOB* rotated_blob = PBLOB::deep_copy(blob);
355     rotated_blob->move(FCOORD(-x_middle, -y_middle));
356     rotated_blob->rotate(denorm->block()->classify_rotation());
357     ICOORD median_size = denorm->block()->median_size();
358     int tolerance = median_size.x() / 8;
359     // TODO(dsl/rays) find a better normalization solution. In the mean time
360     // make it work for CJK by normalizing for Cap height in the same way
361     // as is applied in compute_block_xheight when the row is presumed to
362     // be ALLCAPS, i.e. the x-height is the fixed fraction
363     // blob height * textord_merge_x / (textord_merge_x + textord_merge_asc)
364     if (NearlyEqual(src_width, static_cast<int>(median_size.x()), tolerance) &&
365         NearlyEqual(src_height, static_cast<int>(median_size.y()), tolerance)) {
366       float target_height = bln_x_height * (textord_merge_x + textord_merge_asc)
367                           / textord_merge_x;
368       rotated_blob->scale(target_height / box.width());
369       rotated_blob->move(FCOORD(0.0f,
370                                 bln_baseline_offset -
371                                   rotated_blob->bounding_box().bottom()));
372     }
373     TBLOB* result = make_tess_blob(rotated_blob, flatten);
374     delete rotated_blob;
375     return result;
376   } else {
377     return make_tess_blob(blob, flatten);
378   }
379 }
380 
381 /**********************************************************************
382  * make_tess_blob
383  *
384  * Make a single Tess style blob
385  **********************************************************************/
386 
make_tess_blob(PBLOB * blob,BOOL8 flatten)387 TBLOB *make_tess_blob(               //make tess blob
388                       PBLOB *blob,   //blob to convert
389                       BOOL8 flatten  //flatten outline structure
390                      ) {
391   inT32 index;
392   TBLOB *tessblob;
393 
394   tessblob = newblob ();
395   tessblob->outlines = (struct olinestruct *)
396     make_tess_outlines (blob->out_list (), flatten);
397   for (index = 0; index < TBLOBFLAGS; index++)
398     tessblob->flags[index] = 0;  //!!
399   tessblob->correct = 0;
400   tessblob->guess = 0;
401   for (index = 0; index < MAX_WO_CLASSES; index++) {
402     tessblob->classes[index] = 0;
403     tessblob->values[index] = 0;
404   }
405   tessblob->next = NULL;
406   return tessblob;
407 }
408 
409 
410 /**********************************************************************
411  * make_tess_outlines
412  *
413  * Make Tess style outlines from a list of OUTLINEs.
414  **********************************************************************/
415 
make_tess_outlines(OUTLINE_LIST * outlinelist,BOOL8 flatten)416 TESSLINE *make_tess_outlines(                            //make tess outlines
417                              OUTLINE_LIST *outlinelist,  //list to convert
418                              BOOL8 flatten               //flatten outline structure
419                             ) {
420   OUTLINE_IT it = outlinelist;   //iterator
421   OUTLINE *outline;              //current outline
422   TESSLINE *head;                //output list
423   TESSLINE *tail;                //end of list
424   TESSLINE *tessoutline;
425 
426   head = NULL;
427   tail = NULL;
428   for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
429     outline = it.data ();
430     tessoutline = newoutline ();
431     tessoutline->compactloop = NULL;
432     tessoutline->loop = make_tess_edgepts (outline->polypts (),
433       tessoutline->topleft,
434       tessoutline->botright);
435     if (tessoutline->loop == NULL) {
436       oldoutline(tessoutline);
437       continue;
438     }
439     tessoutline->start = tessoutline->loop->pos;
440     tessoutline->node = NULL;
441     tessoutline->next = NULL;
442     tessoutline->child = NULL;
443     if (!outline->child ()->empty ()) {
444       if (flatten)
445         tessoutline->next = (struct olinestruct *)
446           make_tess_outlines (outline->child (), flatten);
447       else {
448         tessoutline->next = NULL;
449         tessoutline->child = (struct olinestruct *)
450           make_tess_outlines (outline->child (), flatten);
451       }
452     }
453     else
454       tessoutline->next = NULL;
455     if (head)
456       tail->next = tessoutline;
457     else
458       head = tessoutline;
459     while (tessoutline->next != NULL)
460       tessoutline = tessoutline->next;
461     tail = tessoutline;
462   }
463   return head;
464 }
465 
466 
467 /**********************************************************************
468  * make_tess_edgepts
469  *
470  * Make Tess style edgepts from a list of POLYPTs.
471  **********************************************************************/
472 
make_tess_edgepts(POLYPT_LIST * edgeptlist,TPOINT & tl,TPOINT & br)473 EDGEPT *make_tess_edgepts(                          //make tess edgepts
474                           POLYPT_LIST *edgeptlist,  //list to convert
475                           TPOINT &tl,               //bounding box
476                           TPOINT &br) {
477   inT32 index;
478   POLYPT_IT it = edgeptlist;     //iterator
479   POLYPT *edgept;                //current edgept
480   EDGEPT *head;                  //output list
481   EDGEPT *tail;                  //end of list
482   EDGEPT *tessedgept;
483 
484   head = NULL;
485   tail = NULL;
486   tl.x = MAX_INT16;
487   tl.y = -MAX_INT16;
488   br.x = -MAX_INT16;
489   br.y = MAX_INT16;
490   for (it.mark_cycle_pt (); !it.cycled_list ();) {
491     edgept = it.data ();
492     tessedgept = newedgept ();
493     tessedgept->pos.x = (inT16) edgept->pos.x ();
494     tessedgept->pos.y = (inT16) edgept->pos.y ();
495     if (tessedgept->pos.x < tl.x)
496       tl.x = tessedgept->pos.x;
497     if (tessedgept->pos.x > br.x)
498       br.x = tessedgept->pos.x;
499     if (tessedgept->pos.y > tl.y)
500       tl.y = tessedgept->pos.y;
501     if (tessedgept->pos.y < br.y)
502       br.y = tessedgept->pos.y;
503     if (head != NULL && tessedgept->pos.x == tail->pos.x
504     && tessedgept->pos.y == tail->pos.y) {
505       oldedgept(tessedgept);
506     }
507     else {
508       for (index = 0; index < EDGEPTFLAGS; index++)
509         tessedgept->flags[index] = 0;
510       if (head != NULL) {
511         tail->vec.x = tessedgept->pos.x - tail->pos.x;
512         tail->vec.y = tessedgept->pos.y - tail->pos.y;
513         tessedgept->prev = tail;
514       }
515       tessedgept->next = head;
516       if (head)
517         tail->next = tessedgept;
518       else
519         head = tessedgept;
520       tail = tessedgept;
521     }
522     it.forward ();
523   }
524   head->prev = tail;
525   tail->vec.x = head->pos.x - tail->pos.x;
526   tail->vec.y = head->pos.y - tail->pos.y;
527   if (head == tail) {
528     oldedgept(head);
529     return NULL;                 //empty
530   }
531   return head;
532 }
533