1 /**********************************************************************
2 * File: werd.cpp (Formerly word.c)
3 * Description: Code for the WERD class.
4 * Author: Ray Smith
5 * Created: Tue Oct 08 14:32:12 BST 1991
6 *
7 * (C) Copyright 1991, Hewlett-Packard Ltd.
8 ** Licensed under the Apache License, Version 2.0 (the "License");
9 ** you may not use this file except in compliance with the License.
10 ** You may obtain a copy of the License at
11 ** http://www.apache.org/licenses/LICENSE-2.0
12 ** Unless required by applicable law or agreed to in writing, software
13 ** distributed under the License is distributed on an "AS IS" BASIS,
14 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 ** See the License for the specific language governing permissions and
16 ** limitations under the License.
17 *
18 **********************************************************************/
19
20 #include "mfcpch.h"
21 #include "blckerr.h"
22 #include "linlsq.h"
23 #include "werd.h"
24
25 #define FIRST_COLOUR ScrollView::RED //first rainbow colour
26 //last rainbow colour
27 #define LAST_COLOUR ScrollView::AQUAMARINE
28 #define CHILD_COLOUR ScrollView::BROWN //colour of children
29
30 const ERRCODE CANT_SCALE_EDGESTEPS =
31 "Attempted to scale an edgestep format word";
32
33 #define EXTERN
34
35 EXTERN BOOL_VAR (bln_numericmode, 0, "Optimize for numbers");
36 EXTERN INT_VAR (bln_x_height, 128, "Baseline Normalisation X-height");
37 EXTERN INT_VAR (bln_baseline_offset, 64, "Baseline Norm. offset of baseline");
38 EXTERN double_VAR (bln_blshift_maxshift, -1.0,
39 "Fraction of xh before shifting");
40 EXTERN double_VAR (bln_blshift_xfraction, 0.75,
41 "Size fraction of xh before shifting");
42
ELISTIZE_S(WERD)43 ELISTIZE_S (WERD)
44 /**********************************************************************
45 * WERD::WERD
46 *
47 * Constructor to build a WERD from a list of C_BLOBs.
48 * The C_BLOBs are not copied so the source list is emptied.
49 **********************************************************************/
50 WERD::WERD ( //constructor
51 C_BLOB_LIST * blob_list, //in word order
52 uinT8 blank_count, //blanks in front
53 const char *text //correct text
54 ):
55 flags (0),
56 correct(text) {
57 C_BLOB_IT start_it = blob_list;//iterator
58 C_BLOB_IT end_it = blob_list; //another
59 //rejected blobs in wd
60 C_BLOB_IT rej_cblob_it = &rej_cblobs;
61 C_OUTLINE_IT c_outline_it; //coutline iterator
62 BOOL8 blob_inverted;
63 BOOL8 reject_blob;
64 inT16 inverted_vote = 0;
65 inT16 non_inverted_vote = 0;
66
67 while (!end_it.at_last ())
68 end_it.forward (); //move to last
69 //move to our list
70 cblobs.assign_to_sublist (&start_it, &end_it);
71 blanks = blank_count;
72 /*
73 Set white on black flag for the WERD, moving any duff blobs onto the
74 rej_cblobs list.
75 First, walk the cblobs checking the inverse flag for each outline of each
76 cblob. If a cblob has inconsistent flag settings for its different
77 outlines, move the blob to the reject list. Otherwise, increment the
78 appropriate w-on-b or b-on-w vote for the word.
79
80 Now set the inversion flag for the WERD by maximum vote.
81
82 Walk the blobs again, moving any blob whose inversion flag does not agree
83 with the concencus onto the reject list.
84 */
85 start_it.set_to_list (&cblobs);
86 if (start_it.empty ())
87 return;
88 for (start_it.mark_cycle_pt ();
89 !start_it.cycled_list (); start_it.forward ()) {
90 c_outline_it.set_to_list (start_it.data ()->out_list ());
91 blob_inverted = c_outline_it.data ()->flag (COUT_INVERSE);
92 reject_blob = FALSE;
93 for (c_outline_it.mark_cycle_pt ();
94 !c_outline_it.cycled_list () && !reject_blob;
95 c_outline_it.forward ()) {
96 reject_blob =
97 c_outline_it.data ()->flag (COUT_INVERSE) != blob_inverted;
98 }
99 if (reject_blob)
100 rej_cblob_it.add_after_then_move (start_it.extract ());
101 else {
102 if (blob_inverted)
103 inverted_vote++;
104 else
105 non_inverted_vote++;
106 }
107 }
108
109 flags.set_bit (W_INVERSE, (inverted_vote > non_inverted_vote));
110
111 start_it.set_to_list (&cblobs);
112 if (start_it.empty ())
113 return;
114 for (start_it.mark_cycle_pt ();
115 !start_it.cycled_list (); start_it.forward ()) {
116 c_outline_it.set_to_list (start_it.data ()->out_list ());
117 if (c_outline_it.data ()->flag (COUT_INVERSE) != flags.bit (W_INVERSE))
118 rej_cblob_it.add_after_then_move (start_it.extract ());
119 }
120 }
121
122
123 /**********************************************************************
124 * WERD::WERD
125 *
126 * Constructor to build a WERD from a list of BLOBs.
127 * The BLOBs are not copied so the source list is emptied.
128 **********************************************************************/
129
WERD(PBLOB_LIST * blob_list,uinT8 blank_count,const char * text)130 WERD::WERD ( //constructor
131 PBLOB_LIST * blob_list, //in word order
132 uinT8 blank_count, //blanks in front
133 const char *text //correct text
134 ):
135 flags (0),
136 correct(text) {
137 PBLOB_IT start_it = blob_list; //iterator
138 PBLOB_IT end_it = blob_list; //another
139
140 while (!end_it.at_last ())
141 end_it.forward (); //move to last
142 ((PBLOB_LIST *) (&cblobs))->assign_to_sublist (&start_it, &end_it);
143 //move to our list
144 //it's a polygon
145 flags.set_bit (W_POLYGON, TRUE);
146 blanks = blank_count;
147 // fprintf(stderr,"Wrong constructor!!!!\n");
148 }
149
150
151 /**********************************************************************
152 * WERD::WERD
153 *
154 * Constructor to build a WERD from a list of BLOBs.
155 * The BLOBs are not copied so the source list is emptied.
156 **********************************************************************/
157
WERD(PBLOB_LIST * blob_list,WERD * clone)158 WERD::WERD ( //constructor
159 PBLOB_LIST * blob_list, //in word order
160 WERD * clone //sorce of flags
161 ):flags (clone->flags), correct (clone->correct) {
162 PBLOB_IT start_it = blob_list; //iterator
163 PBLOB_IT end_it = blob_list; //another
164
165 while (!end_it.at_last ())
166 end_it.forward (); //move to last
167 ((PBLOB_LIST *) (&cblobs))->assign_to_sublist (&start_it, &end_it);
168 //move to our list
169 blanks = clone->blanks;
170 // fprintf(stderr,"Wrong constructor!!!!\n");
171 }
172
173
174 /**********************************************************************
175 * WERD::WERD
176 *
177 * Constructor to build a WERD from a list of C_BLOBs.
178 * The C_BLOBs are not copied so the source list is emptied.
179 **********************************************************************/
180
WERD(C_BLOB_LIST * blob_list,WERD * clone)181 WERD::WERD ( //constructor
182 C_BLOB_LIST * blob_list, //in word order
183 WERD * clone //sorce of flags
184 ):flags (clone->flags), correct (clone->correct) {
185 C_BLOB_IT start_it = blob_list;//iterator
186 C_BLOB_IT end_it = blob_list; //another
187
188 while (!end_it.at_last ())
189 end_it.forward (); //move to last
190 ((C_BLOB_LIST *) (&cblobs))->assign_to_sublist (&start_it, &end_it);
191 //move to our list
192 blanks = clone->blanks;
193 // fprintf(stderr,"Wrong constructor!!!!\n");
194 }
195
196
197 /**********************************************************************
198 * WERD::poly_copy
199 *
200 * Make a copy of a WERD in polygon format.
201 * The source WERD is untouched.
202 **********************************************************************/
203
poly_copy(float xheight)204 WERD *WERD::poly_copy( //make a poly copy
205 float xheight //row height
206 ) {
207 PBLOB *blob; //new blob
208 WERD *result = new WERD; //output word
209 C_BLOB_IT src_it = &cblobs; //iterator
210 // LARC_BLOB_IT larc_it=(LARC_BLOB_LIST*)(&cblobs);
211 PBLOB_IT dest_it = (PBLOB_LIST *) (&result->cblobs);
212 //another
213
214 if (flags.bit (W_POLYGON)) {
215 *result = *this; //just copy it
216 }
217 else {
218 result->flags = flags;
219 result->correct = correct; //copy info
220 result->dummy = dummy;
221 if (!src_it.empty ()) {
222 // if (flags.bit(W_LINEARC))
223 // {
224 // do
225 // {
226 // blob=new PBLOB;
227 // poly_linearc_outlines(larc_it.data()->out_list(),
228 // blob->out_list()); //convert outlines
229 // dest_it.add_after_then_move(blob); //add to dest list
230 // larc_it.forward();
231 // }
232 // while (!larc_it.at_first());
233 // }
234 // else
235 // {
236 do {
237 blob = new PBLOB (src_it.data (), xheight);
238 //convert blob
239 //add to dest list
240 dest_it.add_after_then_move (blob);
241 src_it.forward ();
242 }
243 while (!src_it.at_first ());
244 // }
245 }
246 if (!rej_cblobs.empty ()) {
247 /* Polygonal approx of reject blobs */
248 src_it.set_to_list (&rej_cblobs);
249 dest_it = (PBLOB_LIST *) (&result->rej_cblobs);
250 do {
251 //convert blob
252 blob = new PBLOB (src_it.data (), xheight);
253 //add to dest list
254 dest_it.add_after_then_move (blob);
255 src_it.forward ();
256 }
257 while (!src_it.at_first ());
258 }
259 //polygon now
260 result->flags.set_bit (W_POLYGON, TRUE);
261 result->blanks = blanks;
262 }
263 return result;
264 }
265
266
267 /**********************************************************************
268 * WERD::bounding_box
269 *
270 * Return the bounding box of the WERD.
271 * This is quite a mess to compute!
272 * ORIGINALLY, REJECT CBLOBS WERE EXCLUDED, however, this led to bugs when the
273 * words on the row were re-sorted. The original words were built with reject
274 * blobs included. The FUZZY SPACE flags were set accordingly. If ALL the
275 * blobs in a word are rejected the BB for the word is NULL, causing the sort
276 * to screw up, leading to the erroneous possibility of the first word in a
277 * row being marked as FUZZY space.
278 **********************************************************************/
279
bounding_box()280 TBOX WERD::bounding_box() { //bounding box
281 TBOX box; //box being built
282 //rejected blobs in wd
283 C_BLOB_IT rej_cblob_it = &rej_cblobs;
284
285 for (rej_cblob_it.mark_cycle_pt ();
286 !rej_cblob_it.cycled_list (); rej_cblob_it.forward ()) {
287 box += rej_cblob_it.data ()->bounding_box ();
288 }
289
290 if (flags.bit (W_POLYGON)) {
291 //polygons
292 PBLOB_IT it = (PBLOB_LIST *) (&cblobs);
293
294 for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
295 box += it.data ()->bounding_box ();
296 }
297 }
298 else {
299 C_BLOB_IT it = &cblobs; //blobs of WERD
300
301 for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
302 box += it.data ()->bounding_box ();
303 }
304 }
305 return box;
306 }
307
308
309 /**********************************************************************
310 * WERD::move
311 *
312 * Reposition WERD by vector
313 * NOTE!! REJECT CBLOBS ARE NOT MOVED
314 **********************************************************************/
315
move(const ICOORD vec)316 void WERD::move( // reposition WERD
317 const ICOORD vec // by vector
318 ) {
319 PBLOB_IT blob_it ((PBLOB_LIST *) & cblobs);
320 // blob iterator
321 // LARC_BLOB_IT lblob_it((LARC_BLOB_LIST*)&cblobs);
322 C_BLOB_IT cblob_it(&cblobs); // cblob iterator
323
324 if (flags.bit (W_POLYGON))
325 for (blob_it.mark_cycle_pt ();
326 !blob_it.cycled_list (); blob_it.forward ())
327 blob_it.data ()->move (vec);
328 // else if (flags.bit(W_LINEARC))
329 // for( lblob_it.mark_cycle_pt();
330 // !lblob_it.cycled_list();
331 // lblob_it.forward() )
332 // lblob_it.data()->move( vec );
333 else
334 for (cblob_it.mark_cycle_pt ();
335 !cblob_it.cycled_list (); cblob_it.forward ())
336 cblob_it.data ()->move (vec);
337 }
338
339
340 /**********************************************************************
341 * WERD::scale
342 *
343 * Scale WERD by multiplier
344 **********************************************************************/
345
scale(const float f)346 void WERD::scale( // scale WERD
347 const float f // by multiplier
348 ) {
349 PBLOB_IT blob_it ((PBLOB_LIST *) & cblobs);
350 // blob iterator
351 // LARC_BLOB_IT lblob_it((LARC_BLOB_LIST*)&cblobs);
352
353 if (flags.bit (W_POLYGON))
354 for (blob_it.mark_cycle_pt ();
355 !blob_it.cycled_list (); blob_it.forward ())
356 blob_it.data ()->scale (f);
357 // else if (flags.bit(W_LINEARC))
358 // for (lblob_it.mark_cycle_pt();
359 // !lblob_it.cycled_list();
360 // lblob_it.forward() )
361 // lblob_it.data()->scale( f );
362 else
363 CANT_SCALE_EDGESTEPS.error ("WERD::scale", ABORT, NULL);
364 }
365
366
367 /**********************************************************************
368 * WERD::join_on
369 *
370 * Join other word onto this one. Delete the old word.
371 **********************************************************************/
372
join_on(WERD * & other)373 void WERD::join_on( // join WERD
374 WERD *&other //other word
375 ) {
376 PBLOB_IT blob_it ((PBLOB_LIST *) & cblobs);
377 // blob iterator
378 PBLOB_IT src_it ((PBLOB_LIST *) & other->cblobs);
379 C_BLOB_IT rej_cblob_it(&rej_cblobs);
380 C_BLOB_IT src_rej_it (&other->rej_cblobs);
381
382 while (!src_it.empty ()) {
383 blob_it.add_to_end (src_it.extract ());
384 src_it.forward ();
385 }
386 while (!src_rej_it.empty ()) {
387 rej_cblob_it.add_to_end (src_rej_it.extract ());
388 src_rej_it.forward ();
389 }
390 }
391
392
393 /**********************************************************************
394 * WERD::copy_on
395 *
396 * Copy blobs from other word onto this one.
397 **********************************************************************/
398
copy_on(WERD * & other)399 void WERD::copy_on( //copy blobs
400 WERD *&other //from other
401 ) {
402 if (flags.bit (W_POLYGON)) {
403 PBLOB_IT blob_it ((PBLOB_LIST *) & cblobs);
404 // blob iterator
405 PBLOB_LIST blobs;
406
407 blobs.deep_copy(reinterpret_cast<PBLOB_LIST*>(&other->cblobs),
408 &PBLOB::deep_copy);
409 blob_it.move_to_last ();
410 blob_it.add_list_after (&blobs);
411 } else {
412 C_BLOB_IT c_blob_it(&cblobs);
413 C_BLOB_LIST c_blobs;
414
415 c_blobs.deep_copy(&other->cblobs, &C_BLOB::deep_copy);
416 c_blob_it.move_to_last ();
417 c_blob_it.add_list_after (&c_blobs);
418 }
419 if (!other->rej_cblobs.empty ()) {
420 C_BLOB_IT rej_c_blob_it(&rej_cblobs);
421 C_BLOB_LIST new_rej_c_blobs;
422
423 new_rej_c_blobs.deep_copy(&other->rej_cblobs, &C_BLOB::deep_copy);
424 rej_c_blob_it.move_to_last ();
425 rej_c_blob_it.add_list_after (&new_rej_c_blobs);
426 }
427 }
428
429
430 /**********************************************************************
431 * WERD::baseline_normalise
432 *
433 * Baseline Normalise the word in Tesseract style. (I.e origin at centre of
434 * word at bottom. x-height region scaled to region y =
435 * (bln_baseline_offset)..(bln_baseline_offset + bln_x_height)
436 * - usually 64..192)
437 **********************************************************************/
438
baseline_normalise(ROW * row,DENORM * denorm)439 void WERD::baseline_normalise( // Tess style BL Norm
440 ROW *row,
441 DENORM *denorm //antidote
442 ) {
443 baseline_normalise_x (row, row->x_height (), denorm);
444 //Use standard x ht
445 }
446
447
448 /**********************************************************************
449 * WERD::baseline_normalise_x
450 *
451 * Baseline Normalise the word in Tesseract style. (I.e origin at centre of
452 * word at bottom. x-height region scaled to region y =
453 * (bln_baseline_offset)..(bln_baseline_offset + bln_x_height)
454 * - usually 64..192)
455 * USE A SPECIFIED X-HEIGHT - NOT NECESSARILY THE ONE IN row
456 **********************************************************************/
457
baseline_normalise_x(ROW * row,float x_height,DENORM * denorm)458 void WERD::baseline_normalise_x( // Tess style BL Norm
459 ROW *row,
460 float x_height, //non standard value
461 DENORM *denorm //antidote
462 ) {
463 BOOL8 using_row; //as baseline
464 float blob_x_centre; //middle of blob
465 float blob_offset; //bottom miss
466 float top_offset; //top miss
467 float blob_x_height; //xh for this blob
468 inT16 segments; //no of segments
469 inT16 segment; //current segment
470 DENORM_SEG *segs; //array of segments
471 float mean_x; //mean xheight
472 inT32 x_count; //no of xs
473 TBOX word_box = bounding_box ();//word bounding box
474 TBOX blob_box; //blob bounding box
475 PBLOB_IT blob_it ((PBLOB_LIST *) & cblobs);
476 // blob iterator
477 PBLOB *blob;
478 LLSQ line; //fitted line
479 double line_m, line_c; //fitted line
480 //inverse norm
481 DENORM antidote (word_box.left () +
482
483 (word_box.right () - word_box.left ()) / 2.0,
484 bln_x_height / x_height, row);
485
486 if (!flags.bit (W_POLYGON)) {
487 WRONG_WORD.error ("WERD::baseline_normalise", ABORT,
488 "Need to poly approx");
489 }
490
491 if (flags.bit (W_NORMALIZED)) {
492 WRONG_WORD.error ("WERD::baseline_normalise", ABORT,
493 "Baseline unnormalised");
494 }
495
496 if (bln_numericmode) {
497 segs = new DENORM_SEG[blob_it.length ()];
498 segments = 0;
499 float factor; // For scaling to baseline normalised size.
500 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
501 blob_it.forward ()) {
502 blob = blob_it.data ();
503 blob_box = blob->bounding_box ();
504 blob->move (FCOORD (-antidote.origin (),
505 -blob_box.bottom ()));
506 factor = bln_x_height * 4.0f / (3 * blob_box.height ());
507 // Constrain the scale factor as target numbers should be either
508 // cap height already or xheight.
509 if (factor < antidote.scale())
510 factor = antidote.scale();
511 else if (factor > antidote.scale() * 1.5f)
512 factor = antidote.scale() * 1.5f;
513 blob->scale (factor);
514 blob->move (FCOORD (0.0, bln_baseline_offset));
515 segs[segments].xstart = blob->bounding_box().left();
516 segs[segments].ycoord = blob_box.bottom();
517 segs[segments++].scale_factor = factor;
518 }
519 antidote = DENORM (antidote.origin (), antidote.scale (),
520 0.0f, 0.0f, segments, segs, true, row);
521 delete [] segs;
522
523 //Repeat for rej blobs
524 blob_it.set_to_list ((PBLOB_LIST *) & rej_cblobs);
525 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
526 blob_it.forward ()) {
527 blob = blob_it.data ();
528 blob_box = blob->bounding_box ();
529 blob->move (FCOORD (-antidote.origin (),
530 -blob_box.bottom ()));
531 blob->scale (bln_x_height * 4.0f / (3 * blob_box.height ()));
532 blob->move (FCOORD (0.0, bln_baseline_offset));
533 }
534 }
535 else if (bln_blshift_maxshift < 0) {
536 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
537 blob_it.forward ()) {
538 blob = blob_it.data ();
539 blob_box = blob->bounding_box ();
540 blob_x_centre = blob_box.left () +
541 (blob_box.right () - blob_box.left ()) / 2.0;
542 blob->move (FCOORD (-antidote.origin (),
543 -(row->base_line (blob_x_centre))));
544 blob->scale (antidote.scale ());
545 blob->move (FCOORD (0.0, bln_baseline_offset));
546 }
547
548 //Repeat for rej blobs
549 blob_it.set_to_list ((PBLOB_LIST *) & rej_cblobs);
550 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
551 blob_it.forward ()) {
552 blob = blob_it.data ();
553 blob_box = blob->bounding_box ();
554 blob_x_centre = blob_box.left () +
555 (blob_box.right () - blob_box.left ()) / 2.0;
556 blob->move (FCOORD (-antidote.origin (),
557 -(row->base_line (blob_x_centre))));
558 blob->scale (antidote.scale ());
559 blob->move (FCOORD (0.0, bln_baseline_offset));
560 }
561
562 }
563 else {
564 mean_x = x_height;
565 x_count = 1;
566 segs = new DENORM_SEG[blob_it.length ()];
567 segments = 0;
568 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
569 blob_it.forward ()) {
570 blob = blob_it.data ();
571 blob_box = blob->bounding_box ();
572 if (blob_box.height () > bln_blshift_xfraction * x_height) {
573 blob_x_centre = blob_box.left () +
574 (blob_box.right () - blob_box.left ()) / 2.0;
575 blob_offset =
576 blob_box.bottom () - row->base_line (blob_x_centre);
577 top_offset = blob_offset + blob_box.height () - x_height - 1;
578 blob_x_height = top_offset + x_height;
579 if (top_offset < 0)
580 top_offset = -top_offset;
581 if (blob_offset < 0)
582 blob_offset = -blob_offset;
583 if (blob_offset < bln_blshift_maxshift * x_height) {
584 segs[segments].ycoord = blob_box.bottom ();
585 line.add (blob_x_centre, blob_box.bottom ());
586 if (top_offset < bln_blshift_maxshift * x_height) {
587 segs[segments].scale_factor = blob_box.height () - 1.0f;
588 x_count++;
589 }
590 else
591 segs[segments].scale_factor = 0.0f;
592 //fix it later
593 }
594 else {
595 //not a goer
596 segs[segments].ycoord = -MAX_INT32;
597 if (top_offset < bln_blshift_maxshift * x_height) {
598 segs[segments].scale_factor = blob_x_height;
599 x_count++;
600 }
601 else
602 segs[segments].scale_factor = 0.0f;
603 //fix it later
604 }
605 }
606 else {
607 segs[segments].scale_factor = 0.0f;
608 segs[segments].ycoord = -MAX_INT32;
609 }
610 segs[segments].xstart = blob_box.left ();
611 segments++;
612 }
613 using_row = line.count () <= 1;
614 if (!using_row) {
615 line_m = line.m ();
616 line_c = line.c (line_m);
617 }
618 else
619 line_m = line_c = 0;
620 segments = 0;
621 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
622 blob_it.forward ()) {
623 blob = blob_it.data ();
624 blob_box = blob->bounding_box ();
625 blob_x_centre = blob_box.left () +
626 (blob_box.right () - blob_box.left ()) / 2.0;
627 if (segs[segments].ycoord == -MAX_INT32
628 && segs[segments].scale_factor != 0 && !using_row) {
629 blob_offset = line_m * blob_x_centre + line_c;
630 segs[segments].scale_factor = blob_box.top () - blob_offset;
631 }
632 if (segs[segments].scale_factor != 0)
633 mean_x += segs[segments].scale_factor;
634 segments++;
635 }
636 mean_x /= x_count;
637 // printf("mean x=%g, count=%d, line_m=%g, line_c=%g\n",
638 // mean_x,x_count,line_m,line_c);
639 segments = 0;
640 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
641 blob_it.forward ()) {
642 blob = blob_it.data ();
643 blob_box = blob->bounding_box ();
644 blob_x_centre = blob_box.left () +
645 (blob_box.right () - blob_box.left ()) / 2.0;
646 if (segs[segments].ycoord != -MAX_INT32)
647 blob_offset = (float) segs[segments].ycoord;
648 else if (using_row)
649 blob_offset = row->base_line (blob_x_centre);
650 else
651 blob_offset = line_m * blob_x_centre + line_c;
652 if (segs[segments].scale_factor == 0)
653 segs[segments].scale_factor = mean_x;
654 segs[segments].scale_factor =
655 bln_x_height / segs[segments].scale_factor;
656 // printf("Blob sf=%g, top=%d, bot=%d, base=%g\n",
657 // segs[segments].scale_factor,blob_box.top(),
658 // blob_box.bottom(),blob_offset);
659 blob->move (FCOORD (-antidote.origin (), -blob_offset));
660 blob->
661 scale (FCOORD (antidote.scale (), segs[segments].scale_factor));
662 blob->move (FCOORD (0.0, bln_baseline_offset));
663 segments++;
664 }
665
666 //Repeat for rej blobs
667 blob_it.set_to_list ((PBLOB_LIST *) & rej_cblobs);
668 segment = 0;
669 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
670 blob_it.forward ()) {
671 blob = blob_it.data ();
672 blob_box = blob->bounding_box ();
673 blob_x_centre = blob_box.left () +
674 (blob_box.right () - blob_box.left ()) / 2.0;
675 while (segment < segments - 1
676 && segs[segment + 1].xstart <= blob_x_centre)
677 segment++;
678 if (segs[segment].ycoord != -MAX_INT32)
679 blob_offset = (float) segs[segment].ycoord;
680 else if (using_row)
681 blob_offset = row->base_line (blob_x_centre);
682 else
683 blob_offset = line_m * blob_x_centre + line_c;
684 blob->move (FCOORD (-antidote.origin (), -blob_offset));
685 blob->
686 scale (FCOORD (antidote.scale (), segs[segment].scale_factor));
687 blob->move (FCOORD (0.0, bln_baseline_offset));
688 }
689 if (line.count () > 0 || x_count > 1)
690 antidote = DENORM (antidote.origin (), antidote.scale (),
691 line_m, line_c, segments, segs, using_row, row);
692 delete[]segs;
693 }
694 if (denorm != NULL)
695 *denorm = antidote;
696 //it's normalised
697 flags.set_bit (W_NORMALIZED, TRUE);
698 }
699
700
701 /**********************************************************************
702 * WERD::baseline_denormalise
703 *
704 * Baseline DeNormalise the word in Tesseract style. (I.e origin at centre of
705 * word at bottom. x-height region scaled to region y =
706 * (bln_baseline_offset)..(bln_baseline_offset + bln_x_height)
707 * - usually 64..192)
708 **********************************************************************/
709
baseline_denormalise(const DENORM * denorm)710 void WERD::baseline_denormalise( // Tess style BL Norm
711 const DENORM *denorm //antidote
712 ) {
713 PBLOB_IT blob_it ((PBLOB_LIST *) & cblobs);
714 // blob iterator
715 PBLOB *blob;
716
717 if (!flags.bit (W_NORMALIZED)) {
718 WRONG_WORD.error ("WERD::baseline_denormalise", ABORT,
719 "Baseline normalised");
720 }
721
722 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
723 blob = blob_it.data ();
724 //denormalise it
725 blob->baseline_denormalise (denorm);
726 }
727
728 //Repeat for rej blobs
729 blob_it.set_to_list ((PBLOB_LIST *) & rej_cblobs);
730 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
731 blob = blob_it.data ();
732 //denormalise it
733 blob->baseline_denormalise (denorm);
734 }
735
736 //it's not normalised
737 flags.set_bit (W_NORMALIZED, FALSE);
738 }
739
740
741 /**********************************************************************
742 * WERD::print
743 *
744 * Display members
745 **********************************************************************/
746
print(FILE *)747 void WERD::print( //print
748 FILE * //file to print on
749 ) {
750 tprintf ("Blanks= %d\n", blanks);
751 bounding_box ().print ();
752 tprintf ("Flags = %d = 0%o\n", flags.val, flags.val);
753 tprintf (" W_SEGMENTED = %s\n",
754 flags.bit (W_SEGMENTED) ? "TRUE" : "FALSE ");
755 tprintf (" W_ITALIC = %s\n", flags.bit (W_ITALIC) ? "TRUE" : "FALSE ");
756 tprintf (" W_BOL = %s\n", flags.bit (W_BOL) ? "TRUE" : "FALSE ");
757 tprintf (" W_EOL = %s\n", flags.bit (W_EOL) ? "TRUE" : "FALSE ");
758 tprintf (" W_NORMALIZED = %s\n",
759 flags.bit (W_NORMALIZED) ? "TRUE" : "FALSE ");
760 tprintf (" W_POLYGON = %s\n", flags.bit (W_POLYGON) ? "TRUE" : "FALSE ");
761 tprintf (" W_LINEARC = %s\n", flags.bit (W_LINEARC) ? "TRUE" : "FALSE ");
762 tprintf (" W_DONT_CHOP = %s\n",
763 flags.bit (W_DONT_CHOP) ? "TRUE" : "FALSE ");
764 tprintf (" W_REP_CHAR = %s\n",
765 flags.bit (W_REP_CHAR) ? "TRUE" : "FALSE ");
766 tprintf (" W_FUZZY_SP = %s\n",
767 flags.bit (W_FUZZY_SP) ? "TRUE" : "FALSE ");
768 tprintf (" W_FUZZY_NON = %s\n",
769 flags.bit (W_FUZZY_NON) ? "TRUE" : "FALSE ");
770 tprintf ("Correct= %s\n", correct.string ());
771 tprintf ("Rejected cblob count = %d\n", rej_cblobs.length ());
772 }
773
774
775 /**********************************************************************
776 * WERD::plot
777 *
778 * Draw the WERD in the given colour.
779 **********************************************************************/
780
781 #ifndef GRAPHICS_DISABLED
plot(ScrollView * window,ScrollView::Color colour,BOOL8 solid)782 void WERD::plot( //draw it
783 ScrollView* window, //window to draw in
784 ScrollView::Color colour, //colour to draw in
785 BOOL8 solid //draw larcs solid
786 ) {
787 if (flags.bit (W_POLYGON)) {
788 //polygons
789 PBLOB_IT it = (PBLOB_LIST *) (&cblobs);
790
791 for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
792 it.data ()->plot (window, colour, colour);
793 }
794 }
795 // else if (flags.bit(W_LINEARC))
796 // {
797 // LARC_BLOB_IT it=(LARC_BLOB_LIST*)(&cblobs);
798
799 // for ( it.mark_cycle_pt(); !it.cycled_list(); it.forward() )
800 // {
801 // it.data()->plot(window,solid,colour,solid ? BLACK : colour);
802 // }
803 // }
804 else {
805 C_BLOB_IT it = &cblobs; //blobs of WERD
806
807 for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
808 it.data ()->plot (window, colour, colour);
809 }
810 }
811 plot_rej_blobs(window, solid);
812 }
813 #endif
814
815
816 /**********************************************************************
817 * WERD::plot
818 *
819 * Draw the WERD in rainbow colours.
820 **********************************************************************/
821
822 #ifndef GRAPHICS_DISABLED
plot(ScrollView * window,BOOL8 solid)823 void WERD::plot( //draw it
824 ScrollView* window, //window to draw in
825 BOOL8 solid //draw larcs solid
826 ) {
827 ScrollView::Color colour = FIRST_COLOUR; //current colour
828 if (flags.bit (W_POLYGON)) {
829 //polygons
830 PBLOB_IT it = (PBLOB_LIST *) (&cblobs);
831
832 for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
833 it.data ()->plot (window, colour, CHILD_COLOUR);
834 colour = (ScrollView::Color) (colour + 1);
835 if (colour == LAST_COLOUR)
836 colour = FIRST_COLOUR; //cycle round
837 }
838 }
839 // else if (flags.bit(W_LINEARC))
840 // {
841 // LARC_BLOB_IT it=(LARC_BLOB_LIST*)(&cblobs);
842
843 // for ( it.mark_cycle_pt(); !it.cycled_list(); it.forward() )
844 // {
845 // it.data()->plot(window,solid,colour,solid ? BLACK : CHILD_COLOUR);
846 // colour=(COLOUR)(colour+1);
847 // if (colour==LAST_COLOUR)
848 // colour=FIRST_COLOUR;
849 // }
850 // }
851 else {
852 C_BLOB_IT it = &cblobs; //blobs of WERD
853
854 for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
855 it.data ()->plot (window, colour, CHILD_COLOUR);
856 colour = (ScrollView::Color) (colour + 1);
857 if (colour == LAST_COLOUR)
858 colour = FIRST_COLOUR; //cycle round
859 }
860 }
861 plot_rej_blobs(window, solid);
862 }
863 #endif
864
865
866 /**********************************************************************
867 * WERD::plot_rej_blobs
868 *
869 * Draw the WERD rejected blobs - ALWAYS GREY
870 **********************************************************************/
871
872 #ifndef GRAPHICS_DISABLED
plot_rej_blobs(ScrollView * window,BOOL8 solid)873 void WERD::plot_rej_blobs( //draw it
874 ScrollView* window, //window to draw in
875 BOOL8 solid //draw larcs solid
876 ) {
877 if (flags.bit (W_POLYGON)) {
878 PBLOB_IT it = (PBLOB_LIST *) (&rej_cblobs);
879 //polygons
880
881 for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
882 it.data ()->plot (window, ScrollView::GREY, ScrollView::GREY);
883 }
884 } else {
885 C_BLOB_IT it = &rej_cblobs; //blobs of WERD
886
887 for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
888 it.data ()->plot (window, ScrollView::GREY, ScrollView::GREY);
889 }
890 }
891 }
892 #endif
893
894
895 /**********************************************************************
896 * WERD::shallow_copy()
897 *
898 * Make a shallow copy of a word
899 **********************************************************************/
900
shallow_copy()901 WERD *WERD::shallow_copy() { //shallow copy
902 WERD *new_word = new WERD;
903
904 new_word->blanks = blanks;
905 new_word->flags = flags;
906 new_word->dummy = dummy;
907 new_word->correct = correct;
908 return new_word;
909 }
910
911
912 /**********************************************************************
913 * WERD::operator=
914 *
915 * Assign a word, DEEP copying the blob list
916 **********************************************************************/
917
operator =(const WERD & source)918 WERD & WERD::operator= ( //assign words
919 const WERD & source //from this
920 ) {
921 this->ELIST_LINK::operator= (source);
922 blanks = source.blanks;
923 flags = source.flags;
924 dummy = source.dummy;
925 correct = source.correct;
926 if (flags.bit (W_POLYGON)) {
927 if (!cblobs.empty ())
928 reinterpret_cast<PBLOB_LIST*>(&cblobs)->clear();
929 reinterpret_cast<PBLOB_LIST*>(&cblobs)->deep_copy(
930 reinterpret_cast<const PBLOB_LIST*>(&source.cblobs), &PBLOB::deep_copy);
931
932 if (!rej_cblobs.empty ())
933 reinterpret_cast<PBLOB_LIST*>(&rej_cblobs)->clear();
934 reinterpret_cast<PBLOB_LIST*>(&rej_cblobs)->deep_copy(
935 reinterpret_cast<const PBLOB_LIST*>(&source.rej_cblobs),
936 &PBLOB::deep_copy);
937 } else {
938 if (!cblobs.empty ())
939 cblobs.clear ();
940 cblobs.deep_copy(&source.cblobs, &C_BLOB::deep_copy);
941
942 if (!rej_cblobs.empty ())
943 rej_cblobs.clear ();
944 rej_cblobs.deep_copy(&source.rej_cblobs, &C_BLOB::deep_copy);
945 }
946 return *this;
947 }
948
949
950 /**********************************************************************
951 * word_comparator()
952 *
953 * word comparator used to sort a word list so that words are in increasing
954 * order of left edge.
955 **********************************************************************/
956
word_comparator(const void * word1p,const void * word2p)957 int word_comparator( //sort blobs
958 const void *word1p, //ptr to ptr to word1
959 const void *word2p //ptr to ptr to word2
960 ) {
961 WERD *
962 word1 = *(WERD **) word1p;
963 WERD *
964 word2 = *(WERD **) word2p;
965
966 return word1->bounding_box ().left () - word2->bounding_box ().left ();
967 }
968