• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**********************************************************************
2  * File:        werd.cpp  (Formerly word.c)
3  * Description: Code for the WERD class.
4  * Author:		Ray Smith
5  * Created:		Tue Oct 08 14:32:12 BST 1991
6  *
7  * (C) Copyright 1991, Hewlett-Packard Ltd.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #include "mfcpch.h"
21 #include          "blckerr.h"
22 #include          "linlsq.h"
23 #include          "werd.h"
24 
25 #define FIRST_COLOUR    ScrollView::RED      //first rainbow colour
26                                  //last rainbow colour
27 #define LAST_COLOUR     ScrollView::AQUAMARINE
28 #define CHILD_COLOUR    ScrollView::BROWN    //colour of children
29 
30 const ERRCODE CANT_SCALE_EDGESTEPS =
31 "Attempted to scale an edgestep format word";
32 
33 #define EXTERN
34 
35 EXTERN BOOL_VAR (bln_numericmode, 0, "Optimize for numbers");
36 EXTERN INT_VAR (bln_x_height, 128, "Baseline Normalisation X-height");
37 EXTERN INT_VAR (bln_baseline_offset, 64, "Baseline Norm. offset of baseline");
38 EXTERN double_VAR (bln_blshift_maxshift, -1.0,
39 "Fraction of xh before shifting");
40 EXTERN double_VAR (bln_blshift_xfraction, 0.75,
41 "Size fraction of xh before shifting");
42 
ELISTIZE_S(WERD)43 ELISTIZE_S (WERD)
44 /**********************************************************************
45  * WERD::WERD
46  *
47  * Constructor to build a WERD from a list of C_BLOBs.
48  * The C_BLOBs are not copied so the source list is emptied.
49  **********************************************************************/
50 WERD::WERD (                     //constructor
51 C_BLOB_LIST * blob_list,         //in word order
52 uinT8 blank_count,               //blanks in front
53 const char *text                 //correct text
54 ):
55 flags (0),
56 correct(text) {
57   C_BLOB_IT start_it = blob_list;//iterator
58   C_BLOB_IT end_it = blob_list;  //another
59                                  //rejected blobs in wd
60   C_BLOB_IT rej_cblob_it = &rej_cblobs;
61   C_OUTLINE_IT c_outline_it;     //coutline iterator
62   BOOL8 blob_inverted;
63   BOOL8 reject_blob;
64   inT16 inverted_vote = 0;
65   inT16 non_inverted_vote = 0;
66 
67   while (!end_it.at_last ())
68     end_it.forward ();           //move to last
69                                  //move to our list
70   cblobs.assign_to_sublist (&start_it, &end_it);
71   blanks = blank_count;
72   /*
73     Set white on black flag for the WERD, moving any duff blobs onto the
74     rej_cblobs list.
75     First, walk the cblobs checking the inverse flag for each outline of each
76     cblob. If a cblob has inconsistent flag settings for its different
77     outlines, move the blob to the reject list. Otherwise, increment the
78     appropriate w-on-b or b-on-w vote for the word.
79 
80     Now set the inversion flag for the WERD by maximum vote.
81 
82     Walk the blobs again, moving any blob whose inversion flag does not agree
83     with the concencus onto the reject list.
84   */
85   start_it.set_to_list (&cblobs);
86   if (start_it.empty ())
87     return;
88   for (start_it.mark_cycle_pt ();
89   !start_it.cycled_list (); start_it.forward ()) {
90     c_outline_it.set_to_list (start_it.data ()->out_list ());
91     blob_inverted = c_outline_it.data ()->flag (COUT_INVERSE);
92     reject_blob = FALSE;
93     for (c_outline_it.mark_cycle_pt ();
94       !c_outline_it.cycled_list () && !reject_blob;
95     c_outline_it.forward ()) {
96       reject_blob =
97         c_outline_it.data ()->flag (COUT_INVERSE) != blob_inverted;
98     }
99     if (reject_blob)
100       rej_cblob_it.add_after_then_move (start_it.extract ());
101     else {
102       if (blob_inverted)
103         inverted_vote++;
104       else
105         non_inverted_vote++;
106     }
107   }
108 
109   flags.set_bit (W_INVERSE, (inverted_vote > non_inverted_vote));
110 
111   start_it.set_to_list (&cblobs);
112   if (start_it.empty ())
113     return;
114   for (start_it.mark_cycle_pt ();
115   !start_it.cycled_list (); start_it.forward ()) {
116     c_outline_it.set_to_list (start_it.data ()->out_list ());
117     if (c_outline_it.data ()->flag (COUT_INVERSE) != flags.bit (W_INVERSE))
118       rej_cblob_it.add_after_then_move (start_it.extract ());
119   }
120 }
121 
122 
123 /**********************************************************************
124  * WERD::WERD
125  *
126  * Constructor to build a WERD from a list of BLOBs.
127  * The BLOBs are not copied so the source list is emptied.
128  **********************************************************************/
129 
WERD(PBLOB_LIST * blob_list,uinT8 blank_count,const char * text)130 WERD::WERD (                     //constructor
131 PBLOB_LIST * blob_list,          //in word order
132 uinT8 blank_count,               //blanks in front
133 const char *text                 //correct text
134 ):
135 flags (0),
136 correct(text) {
137   PBLOB_IT start_it = blob_list; //iterator
138   PBLOB_IT end_it = blob_list;   //another
139 
140   while (!end_it.at_last ())
141     end_it.forward ();           //move to last
142   ((PBLOB_LIST *) (&cblobs))->assign_to_sublist (&start_it, &end_it);
143   //move to our list
144                                  //it's a polygon
145   flags.set_bit (W_POLYGON, TRUE);
146   blanks = blank_count;
147   //      fprintf(stderr,"Wrong constructor!!!!\n");
148 }
149 
150 
151 /**********************************************************************
152  * WERD::WERD
153  *
154  * Constructor to build a WERD from a list of BLOBs.
155  * The BLOBs are not copied so the source list is emptied.
156  **********************************************************************/
157 
WERD(PBLOB_LIST * blob_list,WERD * clone)158 WERD::WERD (                     //constructor
159 PBLOB_LIST * blob_list,          //in word order
160 WERD * clone                     //sorce of flags
161 ):flags (clone->flags), correct (clone->correct) {
162   PBLOB_IT start_it = blob_list; //iterator
163   PBLOB_IT end_it = blob_list;   //another
164 
165   while (!end_it.at_last ())
166     end_it.forward ();           //move to last
167   ((PBLOB_LIST *) (&cblobs))->assign_to_sublist (&start_it, &end_it);
168   //move to our list
169   blanks = clone->blanks;
170   //      fprintf(stderr,"Wrong constructor!!!!\n");
171 }
172 
173 
174 /**********************************************************************
175  * WERD::WERD
176  *
177  * Constructor to build a WERD from a list of C_BLOBs.
178  * The C_BLOBs are not copied so the source list is emptied.
179  **********************************************************************/
180 
WERD(C_BLOB_LIST * blob_list,WERD * clone)181 WERD::WERD (                     //constructor
182 C_BLOB_LIST * blob_list,         //in word order
183 WERD * clone                     //sorce of flags
184 ):flags (clone->flags), correct (clone->correct) {
185   C_BLOB_IT start_it = blob_list;//iterator
186   C_BLOB_IT end_it = blob_list;  //another
187 
188   while (!end_it.at_last ())
189     end_it.forward ();           //move to last
190   ((C_BLOB_LIST *) (&cblobs))->assign_to_sublist (&start_it, &end_it);
191   //move to our list
192   blanks = clone->blanks;
193   //      fprintf(stderr,"Wrong constructor!!!!\n");
194 }
195 
196 
197 /**********************************************************************
198  * WERD::poly_copy
199  *
200  * Make a copy of a WERD in polygon format.
201  * The source WERD is untouched.
202  **********************************************************************/
203 
poly_copy(float xheight)204 WERD *WERD::poly_copy(               //make a poly copy
205                       float xheight  //row height
206                      ) {
207   PBLOB *blob;                   //new blob
208   WERD *result = new WERD;       //output word
209   C_BLOB_IT src_it = &cblobs;    //iterator
210   //      LARC_BLOB_IT                            larc_it=(LARC_BLOB_LIST*)(&cblobs);
211   PBLOB_IT dest_it = (PBLOB_LIST *) (&result->cblobs);
212   //another
213 
214   if (flags.bit (W_POLYGON)) {
215     *result = *this;             //just copy it
216   }
217   else {
218     result->flags = flags;
219     result->correct = correct;   //copy info
220     result->dummy = dummy;
221     if (!src_it.empty ()) {
222       //                      if (flags.bit(W_LINEARC))
223       //                      {
224       //                              do
225       //                              {
226       //                                      blob=new PBLOB;
227       //                                      poly_linearc_outlines(larc_it.data()->out_list(),
228       //                                                                                              blob->out_list());      //convert outlines
229       //                                      dest_it.add_after_then_move(blob);                      //add to dest list
230       //                                      larc_it.forward();
231       //                              }
232       //                              while (!larc_it.at_first());
233       //                      }
234       //                      else
235       //                      {
236       do {
237         blob = new PBLOB (src_it.data (), xheight);
238         //convert blob
239                                  //add to dest list
240         dest_it.add_after_then_move (blob);
241         src_it.forward ();
242       }
243       while (!src_it.at_first ());
244       //                      }
245     }
246     if (!rej_cblobs.empty ()) {
247       /* Polygonal approx of reject blobs */
248       src_it.set_to_list (&rej_cblobs);
249       dest_it = (PBLOB_LIST *) (&result->rej_cblobs);
250       do {
251                                  //convert blob
252         blob = new PBLOB (src_it.data (), xheight);
253                                  //add to dest list
254         dest_it.add_after_then_move (blob);
255         src_it.forward ();
256       }
257       while (!src_it.at_first ());
258     }
259                                  //polygon now
260     result->flags.set_bit (W_POLYGON, TRUE);
261     result->blanks = blanks;
262   }
263   return result;
264 }
265 
266 
267 /**********************************************************************
268  * WERD::bounding_box
269  *
270  * Return the bounding box of the WERD.
271  * This is quite a mess to compute!
272  * ORIGINALLY, REJECT CBLOBS WERE EXCLUDED, however, this led to bugs when the
273  * words on the row were re-sorted. The original words were built with reject
274  * blobs included. The FUZZY SPACE flags were set accordingly. If ALL the
275  * blobs in a word are rejected the BB for the word is NULL, causing the sort
276  * to screw up, leading to the erroneous possibility of the first word in a
277  * row being marked as FUZZY space.
278  **********************************************************************/
279 
bounding_box()280 TBOX WERD::bounding_box() {  //bounding box
281   TBOX box;                       //box being built
282                                  //rejected blobs in wd
283   C_BLOB_IT rej_cblob_it = &rej_cblobs;
284 
285   for (rej_cblob_it.mark_cycle_pt ();
286   !rej_cblob_it.cycled_list (); rej_cblob_it.forward ()) {
287     box += rej_cblob_it.data ()->bounding_box ();
288   }
289 
290   if (flags.bit (W_POLYGON)) {
291                                  //polygons
292     PBLOB_IT it = (PBLOB_LIST *) (&cblobs);
293 
294     for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
295       box += it.data ()->bounding_box ();
296     }
297   }
298   else {
299     C_BLOB_IT it = &cblobs;      //blobs of WERD
300 
301     for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
302       box += it.data ()->bounding_box ();
303     }
304   }
305   return box;
306 }
307 
308 
309 /**********************************************************************
310  * WERD::move
311  *
312  * Reposition WERD by vector
313  * NOTE!! REJECT CBLOBS ARE NOT MOVED
314  **********************************************************************/
315 
move(const ICOORD vec)316 void WERD::move(                  // reposition WERD
317                 const ICOORD vec  // by vector
318                ) {
319   PBLOB_IT blob_it ((PBLOB_LIST *) & cblobs);
320   // blob iterator
321   //      LARC_BLOB_IT                            lblob_it((LARC_BLOB_LIST*)&cblobs);
322   C_BLOB_IT cblob_it(&cblobs);  // cblob iterator
323 
324   if (flags.bit (W_POLYGON))
325     for (blob_it.mark_cycle_pt ();
326     !blob_it.cycled_list (); blob_it.forward ())
327   blob_it.data ()->move (vec);
328   //      else if (flags.bit(W_LINEARC))
329   //              for( lblob_it.mark_cycle_pt();
330   //                      !lblob_it.cycled_list();
331   //                      lblob_it.forward() )
332   //                      lblob_it.data()->move( vec );
333   else
334     for (cblob_it.mark_cycle_pt ();
335     !cblob_it.cycled_list (); cblob_it.forward ())
336   cblob_it.data ()->move (vec);
337 }
338 
339 
340 /**********************************************************************
341  * WERD::scale
342  *
343  * Scale WERD by multiplier
344  **********************************************************************/
345 
scale(const float f)346 void WERD::scale(               // scale WERD
347                  const float f  // by multiplier
348                 ) {
349   PBLOB_IT blob_it ((PBLOB_LIST *) & cblobs);
350   // blob iterator
351   //      LARC_BLOB_IT                            lblob_it((LARC_BLOB_LIST*)&cblobs);
352 
353   if (flags.bit (W_POLYGON))
354     for (blob_it.mark_cycle_pt ();
355     !blob_it.cycled_list (); blob_it.forward ())
356   blob_it.data ()->scale (f);
357   //      else if (flags.bit(W_LINEARC))
358   //              for (lblob_it.mark_cycle_pt();
359   //                              !lblob_it.cycled_list();
360   //                              lblob_it.forward() )
361   //                      lblob_it.data()->scale( f );
362   else
363     CANT_SCALE_EDGESTEPS.error ("WERD::scale", ABORT, NULL);
364 }
365 
366 
367 /**********************************************************************
368  * WERD::join_on
369  *
370  * Join other word onto this one. Delete the old word.
371  **********************************************************************/
372 
join_on(WERD * & other)373 void WERD::join_on(              // join WERD
374                    WERD *&other  //other word
375                   ) {
376   PBLOB_IT blob_it ((PBLOB_LIST *) & cblobs);
377   // blob iterator
378   PBLOB_IT src_it ((PBLOB_LIST *) & other->cblobs);
379   C_BLOB_IT rej_cblob_it(&rej_cblobs);
380   C_BLOB_IT src_rej_it (&other->rej_cblobs);
381 
382   while (!src_it.empty ()) {
383     blob_it.add_to_end (src_it.extract ());
384     src_it.forward ();
385   }
386   while (!src_rej_it.empty ()) {
387     rej_cblob_it.add_to_end (src_rej_it.extract ());
388     src_rej_it.forward ();
389   }
390 }
391 
392 
393 /**********************************************************************
394  * WERD::copy_on
395  *
396  * Copy blobs from other word onto this one.
397  **********************************************************************/
398 
copy_on(WERD * & other)399 void WERD::copy_on(              //copy blobs
400                    WERD *&other  //from other
401                   ) {
402   if (flags.bit (W_POLYGON)) {
403     PBLOB_IT blob_it ((PBLOB_LIST *) & cblobs);
404     // blob iterator
405     PBLOB_LIST blobs;
406 
407     blobs.deep_copy(reinterpret_cast<PBLOB_LIST*>(&other->cblobs),
408                     &PBLOB::deep_copy);
409     blob_it.move_to_last ();
410     blob_it.add_list_after (&blobs);
411   } else {
412     C_BLOB_IT c_blob_it(&cblobs);
413     C_BLOB_LIST c_blobs;
414 
415     c_blobs.deep_copy(&other->cblobs, &C_BLOB::deep_copy);
416     c_blob_it.move_to_last ();
417     c_blob_it.add_list_after (&c_blobs);
418   }
419   if (!other->rej_cblobs.empty ()) {
420     C_BLOB_IT rej_c_blob_it(&rej_cblobs);
421     C_BLOB_LIST new_rej_c_blobs;
422 
423     new_rej_c_blobs.deep_copy(&other->rej_cblobs, &C_BLOB::deep_copy);
424     rej_c_blob_it.move_to_last ();
425     rej_c_blob_it.add_list_after (&new_rej_c_blobs);
426   }
427 }
428 
429 
430 /**********************************************************************
431  * WERD::baseline_normalise
432  *
433  * Baseline Normalise the word in Tesseract style.  (I.e origin at centre of
434  * word at bottom. x-height region scaled to region y =
435  * (bln_baseline_offset)..(bln_baseline_offset + bln_x_height)
436  * - usually 64..192)
437  **********************************************************************/
438 
baseline_normalise(ROW * row,DENORM * denorm)439 void WERD::baseline_normalise(                // Tess style BL Norm
440                               ROW *row,
441                               DENORM *denorm  //antidote
442                              ) {
443   baseline_normalise_x (row, row->x_height (), denorm);
444   //Use standard x ht
445 }
446 
447 
448 /**********************************************************************
449  * WERD::baseline_normalise_x
450  *
451  * Baseline Normalise the word in Tesseract style.  (I.e origin at centre of
452  * word at bottom. x-height region scaled to region y =
453  * (bln_baseline_offset)..(bln_baseline_offset + bln_x_height)
454  * - usually 64..192)
455  *  USE A SPECIFIED X-HEIGHT - NOT NECESSARILY THE ONE IN row
456  **********************************************************************/
457 
baseline_normalise_x(ROW * row,float x_height,DENORM * denorm)458 void WERD::baseline_normalise_x(                 // Tess style BL Norm
459                                 ROW *row,
460                                 float x_height,  //non standard value
461                                 DENORM *denorm   //antidote
462                                ) {
463   BOOL8 using_row;               //as baseline
464   float blob_x_centre;           //middle of blob
465   float blob_offset;             //bottom miss
466   float top_offset;              //top miss
467   float blob_x_height;           //xh for this blob
468   inT16 segments;                //no of segments
469   inT16 segment;                 //current segment
470   DENORM_SEG *segs;              //array of segments
471   float mean_x;                  //mean xheight
472   inT32 x_count;                 //no of xs
473   TBOX word_box = bounding_box ();//word bounding box
474   TBOX blob_box;                  //blob bounding box
475   PBLOB_IT blob_it ((PBLOB_LIST *) & cblobs);
476   // blob iterator
477   PBLOB *blob;
478   LLSQ line;                     //fitted line
479   double line_m, line_c;         //fitted line
480                                  //inverse norm
481   DENORM antidote (word_box.left () +
482 
483     (word_box.right () - word_box.left ()) / 2.0,
484     bln_x_height / x_height, row);
485 
486   if (!flags.bit (W_POLYGON)) {
487     WRONG_WORD.error ("WERD::baseline_normalise", ABORT,
488       "Need to poly approx");
489   }
490 
491   if (flags.bit (W_NORMALIZED)) {
492     WRONG_WORD.error ("WERD::baseline_normalise", ABORT,
493       "Baseline unnormalised");
494   }
495 
496   if (bln_numericmode) {
497     segs = new DENORM_SEG[blob_it.length ()];
498     segments = 0;
499     float factor;  // For scaling to baseline normalised size.
500     for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
501     blob_it.forward ()) {
502       blob = blob_it.data ();
503       blob_box = blob->bounding_box ();
504       blob->move (FCOORD (-antidote.origin (),
505         -blob_box.bottom ()));
506       factor = bln_x_height * 4.0f / (3 * blob_box.height ());
507       // Constrain the scale factor as target numbers should be either
508       // cap height already or xheight.
509       if (factor < antidote.scale())
510         factor = antidote.scale();
511       else if (factor > antidote.scale() * 1.5f)
512         factor = antidote.scale() * 1.5f;
513       blob->scale (factor);
514       blob->move (FCOORD (0.0, bln_baseline_offset));
515       segs[segments].xstart = blob->bounding_box().left();
516       segs[segments].ycoord = blob_box.bottom();
517       segs[segments++].scale_factor = factor;
518     }
519     antidote = DENORM (antidote.origin (), antidote.scale (),
520       0.0f, 0.0f, segments, segs, true, row);
521     delete [] segs;
522 
523                                  //Repeat for rej blobs
524     blob_it.set_to_list ((PBLOB_LIST *) & rej_cblobs);
525     for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
526     blob_it.forward ()) {
527       blob = blob_it.data ();
528       blob_box = blob->bounding_box ();
529       blob->move (FCOORD (-antidote.origin (),
530                           -blob_box.bottom ()));
531       blob->scale (bln_x_height * 4.0f / (3 * blob_box.height ()));
532       blob->move (FCOORD (0.0, bln_baseline_offset));
533     }
534   }
535   else if (bln_blshift_maxshift < 0) {
536     for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
537     blob_it.forward ()) {
538       blob = blob_it.data ();
539       blob_box = blob->bounding_box ();
540       blob_x_centre = blob_box.left () +
541         (blob_box.right () - blob_box.left ()) / 2.0;
542       blob->move (FCOORD (-antidote.origin (),
543         -(row->base_line (blob_x_centre))));
544       blob->scale (antidote.scale ());
545       blob->move (FCOORD (0.0, bln_baseline_offset));
546     }
547 
548                                  //Repeat for rej blobs
549     blob_it.set_to_list ((PBLOB_LIST *) & rej_cblobs);
550     for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
551     blob_it.forward ()) {
552       blob = blob_it.data ();
553       blob_box = blob->bounding_box ();
554       blob_x_centre = blob_box.left () +
555         (blob_box.right () - blob_box.left ()) / 2.0;
556       blob->move (FCOORD (-antidote.origin (),
557         -(row->base_line (blob_x_centre))));
558       blob->scale (antidote.scale ());
559       blob->move (FCOORD (0.0, bln_baseline_offset));
560     }
561 
562   }
563   else {
564     mean_x = x_height;
565     x_count = 1;
566     segs = new DENORM_SEG[blob_it.length ()];
567     segments = 0;
568     for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
569     blob_it.forward ()) {
570       blob = blob_it.data ();
571       blob_box = blob->bounding_box ();
572       if (blob_box.height () > bln_blshift_xfraction * x_height) {
573         blob_x_centre = blob_box.left () +
574           (blob_box.right () - blob_box.left ()) / 2.0;
575         blob_offset =
576           blob_box.bottom () - row->base_line (blob_x_centre);
577         top_offset = blob_offset + blob_box.height () - x_height - 1;
578         blob_x_height = top_offset + x_height;
579         if (top_offset < 0)
580           top_offset = -top_offset;
581         if (blob_offset < 0)
582           blob_offset = -blob_offset;
583         if (blob_offset < bln_blshift_maxshift * x_height) {
584           segs[segments].ycoord = blob_box.bottom ();
585           line.add (blob_x_centre, blob_box.bottom ());
586           if (top_offset < bln_blshift_maxshift * x_height) {
587             segs[segments].scale_factor = blob_box.height () - 1.0f;
588             x_count++;
589           }
590           else
591             segs[segments].scale_factor = 0.0f;
592           //fix it later
593         }
594         else {
595                                  //not a goer
596           segs[segments].ycoord = -MAX_INT32;
597           if (top_offset < bln_blshift_maxshift * x_height) {
598             segs[segments].scale_factor = blob_x_height;
599             x_count++;
600           }
601           else
602             segs[segments].scale_factor = 0.0f;
603           //fix it later
604         }
605       }
606       else {
607         segs[segments].scale_factor = 0.0f;
608         segs[segments].ycoord = -MAX_INT32;
609       }
610       segs[segments].xstart = blob_box.left ();
611       segments++;
612     }
613     using_row = line.count () <= 1;
614     if (!using_row) {
615       line_m = line.m ();
616       line_c = line.c (line_m);
617     }
618     else
619       line_m = line_c = 0;
620     segments = 0;
621     for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
622     blob_it.forward ()) {
623       blob = blob_it.data ();
624       blob_box = blob->bounding_box ();
625       blob_x_centre = blob_box.left () +
626         (blob_box.right () - blob_box.left ()) / 2.0;
627       if (segs[segments].ycoord == -MAX_INT32
628       && segs[segments].scale_factor != 0 && !using_row) {
629         blob_offset = line_m * blob_x_centre + line_c;
630         segs[segments].scale_factor = blob_box.top () - blob_offset;
631       }
632       if (segs[segments].scale_factor != 0)
633         mean_x += segs[segments].scale_factor;
634       segments++;
635     }
636     mean_x /= x_count;
637     //              printf("mean x=%g, count=%d, line_m=%g, line_c=%g\n",
638     //                      mean_x,x_count,line_m,line_c);
639     segments = 0;
640     for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
641     blob_it.forward ()) {
642       blob = blob_it.data ();
643       blob_box = blob->bounding_box ();
644       blob_x_centre = blob_box.left () +
645         (blob_box.right () - blob_box.left ()) / 2.0;
646       if (segs[segments].ycoord != -MAX_INT32)
647         blob_offset = (float) segs[segments].ycoord;
648       else if (using_row)
649         blob_offset = row->base_line (blob_x_centre);
650       else
651         blob_offset = line_m * blob_x_centre + line_c;
652       if (segs[segments].scale_factor == 0)
653         segs[segments].scale_factor = mean_x;
654       segs[segments].scale_factor =
655         bln_x_height / segs[segments].scale_factor;
656       //                      printf("Blob sf=%g, top=%d, bot=%d, base=%g\n",
657       //                              segs[segments].scale_factor,blob_box.top(),
658       //                              blob_box.bottom(),blob_offset);
659       blob->move (FCOORD (-antidote.origin (), -blob_offset));
660       blob->
661         scale (FCOORD (antidote.scale (), segs[segments].scale_factor));
662       blob->move (FCOORD (0.0, bln_baseline_offset));
663       segments++;
664     }
665 
666                                  //Repeat for rej blobs
667     blob_it.set_to_list ((PBLOB_LIST *) & rej_cblobs);
668     segment = 0;
669     for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
670     blob_it.forward ()) {
671       blob = blob_it.data ();
672       blob_box = blob->bounding_box ();
673       blob_x_centre = blob_box.left () +
674         (blob_box.right () - blob_box.left ()) / 2.0;
675       while (segment < segments - 1
676         && segs[segment + 1].xstart <= blob_x_centre)
677         segment++;
678       if (segs[segment].ycoord != -MAX_INT32)
679         blob_offset = (float) segs[segment].ycoord;
680       else if (using_row)
681         blob_offset = row->base_line (blob_x_centre);
682       else
683         blob_offset = line_m * blob_x_centre + line_c;
684       blob->move (FCOORD (-antidote.origin (), -blob_offset));
685       blob->
686         scale (FCOORD (antidote.scale (), segs[segment].scale_factor));
687       blob->move (FCOORD (0.0, bln_baseline_offset));
688     }
689     if (line.count () > 0 || x_count > 1)
690       antidote = DENORM (antidote.origin (), antidote.scale (),
691         line_m, line_c, segments, segs, using_row, row);
692     delete[]segs;
693   }
694   if (denorm != NULL)
695     *denorm = antidote;
696                                  //it's normalised
697   flags.set_bit (W_NORMALIZED, TRUE);
698 }
699 
700 
701 /**********************************************************************
702  * WERD::baseline_denormalise
703  *
704  * Baseline DeNormalise the word in Tesseract style.  (I.e origin at centre of
705  * word at bottom. x-height region scaled to region y =
706  * (bln_baseline_offset)..(bln_baseline_offset + bln_x_height)
707  * - usually 64..192)
708  **********************************************************************/
709 
baseline_denormalise(const DENORM * denorm)710 void WERD::baseline_denormalise(                      // Tess style BL Norm
711                                 const DENORM *denorm  //antidote
712                                ) {
713   PBLOB_IT blob_it ((PBLOB_LIST *) & cblobs);
714   // blob iterator
715   PBLOB *blob;
716 
717   if (!flags.bit (W_NORMALIZED)) {
718     WRONG_WORD.error ("WERD::baseline_denormalise", ABORT,
719       "Baseline normalised");
720   }
721 
722   for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
723     blob = blob_it.data ();
724                                  //denormalise it
725     blob->baseline_denormalise (denorm);
726   }
727 
728                                  //Repeat for rej blobs
729   blob_it.set_to_list ((PBLOB_LIST *) & rej_cblobs);
730   for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
731     blob = blob_it.data ();
732                                  //denormalise it
733     blob->baseline_denormalise (denorm);
734   }
735 
736                                  //it's not normalised
737   flags.set_bit (W_NORMALIZED, FALSE);
738 }
739 
740 
741 /**********************************************************************
742  * WERD::print
743  *
744  * Display members
745  **********************************************************************/
746 
print(FILE *)747 void WERD::print(        //print
748                  FILE *  //file to print on
749                 ) {
750   tprintf ("Blanks= %d\n", blanks);
751   bounding_box ().print ();
752   tprintf ("Flags = %d = 0%o\n", flags.val, flags.val);
753   tprintf ("   W_SEGMENTED = %s\n",
754     flags.bit (W_SEGMENTED) ? "TRUE" : "FALSE ");
755   tprintf ("   W_ITALIC = %s\n", flags.bit (W_ITALIC) ? "TRUE" : "FALSE ");
756   tprintf ("   W_BOL = %s\n", flags.bit (W_BOL) ? "TRUE" : "FALSE ");
757   tprintf ("   W_EOL = %s\n", flags.bit (W_EOL) ? "TRUE" : "FALSE ");
758   tprintf ("   W_NORMALIZED = %s\n",
759     flags.bit (W_NORMALIZED) ? "TRUE" : "FALSE ");
760   tprintf ("   W_POLYGON = %s\n", flags.bit (W_POLYGON) ? "TRUE" : "FALSE ");
761   tprintf ("   W_LINEARC = %s\n", flags.bit (W_LINEARC) ? "TRUE" : "FALSE ");
762   tprintf ("   W_DONT_CHOP = %s\n",
763     flags.bit (W_DONT_CHOP) ? "TRUE" : "FALSE ");
764   tprintf ("   W_REP_CHAR = %s\n",
765     flags.bit (W_REP_CHAR) ? "TRUE" : "FALSE ");
766   tprintf ("   W_FUZZY_SP = %s\n",
767     flags.bit (W_FUZZY_SP) ? "TRUE" : "FALSE ");
768   tprintf ("   W_FUZZY_NON = %s\n",
769     flags.bit (W_FUZZY_NON) ? "TRUE" : "FALSE ");
770   tprintf ("Correct= %s\n", correct.string ());
771   tprintf ("Rejected cblob count = %d\n", rej_cblobs.length ());
772 }
773 
774 
775 /**********************************************************************
776  * WERD::plot
777  *
778  * Draw the WERD in the given colour.
779  **********************************************************************/
780 
781 #ifndef GRAPHICS_DISABLED
plot(ScrollView * window,ScrollView::Color colour,BOOL8 solid)782 void WERD::plot(                //draw it
783                 ScrollView* window,  //window to draw in
784                 ScrollView::Color colour,  //colour to draw in
785                 BOOL8 solid     //draw larcs solid
786                ) {
787   if (flags.bit (W_POLYGON)) {
788                                  //polygons
789     PBLOB_IT it = (PBLOB_LIST *) (&cblobs);
790 
791     for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
792       it.data ()->plot (window, colour, colour);
793     }
794   }
795   //      else if (flags.bit(W_LINEARC))
796   //      {
797   //              LARC_BLOB_IT                    it=(LARC_BLOB_LIST*)(&cblobs);
798 
799   //              for ( it.mark_cycle_pt(); !it.cycled_list(); it.forward() )
800   //              {
801   //                      it.data()->plot(window,solid,colour,solid ? BLACK : colour);
802   //              }
803   //      }
804   else {
805     C_BLOB_IT it = &cblobs;      //blobs of WERD
806 
807     for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
808       it.data ()->plot (window, colour, colour);
809     }
810   }
811   plot_rej_blobs(window, solid);
812 }
813 #endif
814 
815 
816 /**********************************************************************
817  * WERD::plot
818  *
819  * Draw the WERD in rainbow colours.
820  **********************************************************************/
821 
822 #ifndef GRAPHICS_DISABLED
plot(ScrollView * window,BOOL8 solid)823 void WERD::plot(                //draw it
824                 ScrollView* window,  //window to draw in
825                 BOOL8 solid     //draw larcs solid
826                ) {
827   ScrollView::Color colour = FIRST_COLOUR;  //current colour
828   if (flags.bit (W_POLYGON)) {
829                                  //polygons
830     PBLOB_IT it = (PBLOB_LIST *) (&cblobs);
831 
832     for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
833       it.data ()->plot (window, colour, CHILD_COLOUR);
834       colour = (ScrollView::Color) (colour + 1);
835       if (colour == LAST_COLOUR)
836         colour = FIRST_COLOUR;   //cycle round
837     }
838   }
839   //      else if (flags.bit(W_LINEARC))
840   //      {
841   //              LARC_BLOB_IT                    it=(LARC_BLOB_LIST*)(&cblobs);
842 
843   //              for ( it.mark_cycle_pt(); !it.cycled_list(); it.forward() )
844   //              {
845   //                      it.data()->plot(window,solid,colour,solid ? BLACK : CHILD_COLOUR);
846   //                      colour=(COLOUR)(colour+1);
847   //                      if (colour==LAST_COLOUR)
848   //                              colour=FIRST_COLOUR;
849   //              }
850   //      }
851   else {
852     C_BLOB_IT it = &cblobs;      //blobs of WERD
853 
854     for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
855       it.data ()->plot (window, colour, CHILD_COLOUR);
856       colour = (ScrollView::Color) (colour + 1);
857       if (colour == LAST_COLOUR)
858         colour = FIRST_COLOUR;   //cycle round
859     }
860   }
861   plot_rej_blobs(window, solid);
862 }
863 #endif
864 
865 
866 /**********************************************************************
867  * WERD::plot_rej_blobs
868  *
869  * Draw the WERD rejected blobs - ALWAYS GREY
870  **********************************************************************/
871 
872 #ifndef GRAPHICS_DISABLED
plot_rej_blobs(ScrollView * window,BOOL8 solid)873 void WERD::plot_rej_blobs(                //draw it
874                           ScrollView* window,  //window to draw in
875                           BOOL8 solid     //draw larcs solid
876                          ) {
877   if (flags.bit (W_POLYGON)) {
878     PBLOB_IT it = (PBLOB_LIST *) (&rej_cblobs);
879     //polygons
880 
881     for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
882       it.data ()->plot (window, ScrollView::GREY, ScrollView::GREY);
883     }
884   } else {
885     C_BLOB_IT it = &rej_cblobs;  //blobs of WERD
886 
887     for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
888       it.data ()->plot (window, ScrollView::GREY, ScrollView::GREY);
889     }
890   }
891 }
892 #endif
893 
894 
895 /**********************************************************************
896  * WERD::shallow_copy()
897  *
898  * Make a shallow copy of a word
899  **********************************************************************/
900 
shallow_copy()901 WERD *WERD::shallow_copy() {  //shallow copy
902   WERD *new_word = new WERD;
903 
904   new_word->blanks = blanks;
905   new_word->flags = flags;
906   new_word->dummy = dummy;
907   new_word->correct = correct;
908   return new_word;
909 }
910 
911 
912 /**********************************************************************
913  * WERD::operator=
914  *
915  * Assign a word, DEEP copying the blob list
916  **********************************************************************/
917 
operator =(const WERD & source)918 WERD & WERD::operator= (         //assign words
919 const WERD & source              //from this
920 ) {
921   this->ELIST_LINK::operator= (source);
922   blanks = source.blanks;
923   flags = source.flags;
924   dummy = source.dummy;
925   correct = source.correct;
926   if (flags.bit (W_POLYGON)) {
927     if (!cblobs.empty ())
928       reinterpret_cast<PBLOB_LIST*>(&cblobs)->clear();
929     reinterpret_cast<PBLOB_LIST*>(&cblobs)->deep_copy(
930       reinterpret_cast<const PBLOB_LIST*>(&source.cblobs), &PBLOB::deep_copy);
931 
932     if (!rej_cblobs.empty ())
933       reinterpret_cast<PBLOB_LIST*>(&rej_cblobs)->clear();
934     reinterpret_cast<PBLOB_LIST*>(&rej_cblobs)->deep_copy(
935       reinterpret_cast<const PBLOB_LIST*>(&source.rej_cblobs),
936       &PBLOB::deep_copy);
937   } else {
938     if (!cblobs.empty ())
939       cblobs.clear ();
940     cblobs.deep_copy(&source.cblobs, &C_BLOB::deep_copy);
941 
942     if (!rej_cblobs.empty ())
943       rej_cblobs.clear ();
944     rej_cblobs.deep_copy(&source.rej_cblobs, &C_BLOB::deep_copy);
945   }
946   return *this;
947 }
948 
949 
950 /**********************************************************************
951  *  word_comparator()
952  *
953  *  word comparator used to sort a word list so that words are in increasing
954  *  order of left edge.
955  **********************************************************************/
956 
word_comparator(const void * word1p,const void * word2p)957 int word_comparator(                     //sort blobs
958                     const void *word1p,  //ptr to ptr to word1
959                     const void *word2p   //ptr to ptr to word2
960                    ) {
961   WERD *
962     word1 = *(WERD **) word1p;
963   WERD *
964     word2 = *(WERD **) word2p;
965 
966   return word1->bounding_box ().left () - word2->bounding_box ().left ();
967 }
968