1 /**********************************************************************
2 * File: pagewalk.cpp (Formerly walkers.c)
3 * Description: Block list processors
4 * Author: Phil Cheatle
5 * Created: Thu Oct 10 16:25:24 BST 1991
6 *
7 * (C) Copyright 1991, Hewlett-Packard Ltd.
8 ** Licensed under the Apache License, Version 2.0 (the "License");
9 ** you may not use this file except in compliance with the License.
10 ** You may obtain a copy of the License at
11 ** http://www.apache.org/licenses/LICENSE-2.0
12 ** Unless required by applicable law or agreed to in writing, software
13 ** distributed under the License is distributed on an "AS IS" BASIS,
14 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 ** See the License for the specific language governing permissions and
16 ** limitations under the License.
17 *
18 **********************************************************************/
19
20 #include "mfcpch.h"
21 #include "pagewalk.h"
22 #include "tesseractclass.h"
23
24 #define EXTERN
25
26 EXTERN BOOL_VAR (current_word_quit, FALSE, "Stop processing this word");
27 DLLSYM BOOL_VAR (selection_quit, FALSE, "Stop processing this selection");
28
29 /**********************************************************************
30 * block_list_bounding_box()
31 *
32 * Scan block list to find the bounding box of all blocks.
33 **********************************************************************/
34
block_list_bounding_box(BLOCK_LIST * block_list)35 TBOX block_list_bounding_box( //find bounding box
36 BLOCK_LIST *block_list //of this block list
37 ) {
38 BLOCK_IT block_it(block_list);
39 TBOX enclosing_box;
40
41 for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
42 block_it.forward ())
43 enclosing_box += block_it.data ()->bounding_box ();
44 return enclosing_box;
45 }
46
47
48 /**********************************************************************
49 * block_list_compress()
50 *
51 * Pack a block list to occupy a smaller space by compressing each block and
52 * moving the compressed blocks one above the other.
53 * The compressed block list has the same top left point as the uncompressed
54 * first. Blocks are reordered so that the source names are in alphabetic
55 * order. (This gathers together, but does not combine, blocks from the same
56 * file.)
57 * The enclosing box of the compressed block list is returned.
58 **********************************************************************/
59
block_list_compress(BLOCK_LIST * block_list)60 const TBOX block_list_compress( //shuffle up blocks
61 BLOCK_LIST *block_list) {
62 BLOCK_IT block_it(block_list);
63 BLOCK *block;
64 ICOORD initial_top_left;
65 ICOORD block_spacing (0, BLOCK_SPACING);
66 TBOX enclosing_box; //for full display
67
68 initial_top_left = block_it.data ()->bounding_box ().topleft ();
69 //group srcfile blks
70 block_it.sort (block_name_order);
71
72 /* Compress the target block list into an area starting from the top left of
73 the first block on the list */
74
75 enclosing_box = TBOX (initial_top_left, initial_top_left);
76 enclosing_box.move_bottom_edge (BLOCK_SPACING);
77
78 for (block_it.mark_cycle_pt ();
79 !block_it.cycled_list (); block_it.forward ()) {
80 block = block_it.data ();
81 block->compress (enclosing_box.botleft () - block_spacing -
82 block->bounding_box ().topleft ());
83 enclosing_box += block->bounding_box ();
84 }
85 return enclosing_box;
86 }
87
88
89 /**********************************************************************
90 * block_list_move()
91 *
92 * Move all the blocks in the list by a vector
93 **********************************************************************/
94
block_list_move(BLOCK_LIST * block_list,ICOORD vec)95 void block_list_move( //move
96 BLOCK_LIST *block_list, //this list
97 ICOORD vec //by this vector
98 ) {
99 BLOCK_IT block_it(block_list);
100
101 for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
102 block_it.forward ())
103 block_it.data ()->move (vec);
104 }
105
106
107 /**********************************************************************
108 * block_name_order()
109 *
110 * Block comparator used to sort a block list so that blocks from the same
111 * filename are located together, and blocks from the same file are ordered
112 * by vertical position.
113 **********************************************************************/
114
block_name_order(const void * block1p,const void * block2p)115 int block_name_order( //sort blocks
116 const void *block1p, //ptr to ptr to block1
117 const void *block2p //ptr to ptr to block2
118 ) {
119 int result;
120 BLOCK *block1 = *(BLOCK **) block1p;
121 BLOCK *block2 = *(BLOCK **) block2p;
122
123 result = strcmp (block1->name (), block2->name ());
124 if (result == 0)
125 result = block2->bounding_box ().top () - block1->bounding_box ().top ();
126 return result;
127 }
128
129
130 /**********************************************************************
131 * process_all_blobs()
132 *
133 * Walk the current block list applying the specified blob processor function
134 * to all blobs
135 **********************************************************************/
136
137 void
process_all_blobs(BLOCK_LIST * block_list,BOOL8 blob_processor (BLOCK *,ROW *,WERD *,PBLOB *),BOOL8 c_blob_processor (BLOCK *,ROW *,WERD *,C_BLOB *))138 process_all_blobs ( //process blobs
139 BLOCK_LIST * block_list, //blocks to check
140 BOOL8 blob_processor ( //function to call
141 //function to call
142 BLOCK *, ROW *, WERD *, PBLOB *), BOOL8 c_blob_processor (
143 BLOCK
144 *,
145 ROW
146 *,
147 WERD
148 *,
149 C_BLOB
150 *)) {
151 BLOCK_IT block_it(block_list);
152 BLOCK *block;
153 ROW_IT row_it;
154 ROW *row;
155 WERD_IT word_it;
156 WERD *word;
157 PBLOB_IT blob_it;
158 PBLOB *blob;
159 C_BLOB_IT c_blob_it;
160 C_BLOB *c_blob;
161
162 for (block_it.mark_cycle_pt ();
163 !block_it.cycled_list (); block_it.forward ()) {
164 block = block_it.data ();
165 row_it.set_to_list (block->row_list ());
166 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
167 row = row_it.data ();
168 word_it.set_to_list (row->word_list ());
169 for (word_it.mark_cycle_pt ();
170 !word_it.cycled_list (); word_it.forward ()) {
171 word = word_it.data ();
172 if (word->flag (W_POLYGON)) {
173 if (blob_processor != NULL) {
174 blob_it.set_to_list (word->blob_list ());
175 for (blob_it.mark_cycle_pt ();
176 !blob_it.cycled_list (); blob_it.forward ()) {
177 blob = blob_it.data ();
178 if (!blob_processor (block, row, word, blob) ||
179 selection_quit)
180 return;
181 }
182 }
183 }
184 else {
185 if (c_blob_processor != NULL) {
186 c_blob_it.set_to_list (word->cblob_list ());
187 for (c_blob_it.mark_cycle_pt ();
188 !c_blob_it.cycled_list (); c_blob_it.forward ()) {
189 c_blob = c_blob_it.data ();
190 if (!c_blob_processor (block, row, word, c_blob) ||
191 selection_quit)
192 return;
193 }
194 }
195 }
196 }
197 }
198 }
199 }
200
201
202 /**********************************************************************
203 * process_selected_blobs()
204 *
205 * Walk the current block list applying the specified blob processor function
206 * to each selected blob
207 **********************************************************************/
208
209 void
process_selected_blobs(BLOCK_LIST * block_list,TBOX & selection_box,BOOL8 blob_processor (BLOCK *,ROW *,WERD *,PBLOB *),BOOL8 c_blob_processor (BLOCK *,ROW *,WERD *,C_BLOB *))210 process_selected_blobs ( //process blobs
211 BLOCK_LIST * block_list, //blocks to check
212 //function to call
213 TBOX & selection_box, BOOL8 blob_processor (
214 //function to call
215 BLOCK *, ROW *, WERD *, PBLOB *), BOOL8 c_blob_processor (
216 BLOCK
217 *,
218 ROW
219 *,
220 WERD
221 *,
222 C_BLOB
223 *)) {
224 BLOCK_IT block_it(block_list);
225 BLOCK *block;
226 ROW_IT row_it;
227 ROW *row;
228 WERD_IT word_it;
229 WERD *word;
230 PBLOB_IT blob_it;
231 PBLOB *blob;
232 C_BLOB_IT c_blob_it;
233 C_BLOB *c_blob;
234
235 for (block_it.mark_cycle_pt ();
236 !block_it.cycled_list (); block_it.forward ()) {
237 block = block_it.data ();
238 if (block->bounding_box ().overlap (selection_box)) {
239 row_it.set_to_list (block->row_list ());
240 for (row_it.mark_cycle_pt ();
241 !row_it.cycled_list (); row_it.forward ()) {
242 row = row_it.data ();
243 if (row->bounding_box ().overlap (selection_box)) {
244 word_it.set_to_list (row->word_list ());
245 for (word_it.mark_cycle_pt ();
246 !word_it.cycled_list (); word_it.forward ()) {
247 word = word_it.data ();
248 if (word->bounding_box ().overlap (selection_box)) {
249 if (word->flag (W_POLYGON)) {
250 if (blob_processor != NULL) {
251 blob_it.set_to_list (word->blob_list ());
252 for (blob_it.mark_cycle_pt ();
253 !blob_it.cycled_list ();
254 blob_it.forward ()) {
255 blob = blob_it.data ();
256 if (blob->bounding_box ().
257 overlap (selection_box)) {
258 if (!blob_processor
259 (block, row, word, blob)
260 || selection_quit)
261 return;
262 }
263 }
264 }
265 }
266 else {
267 if (c_blob_processor != NULL) {
268 c_blob_it.set_to_list (word->cblob_list ());
269 for (c_blob_it.mark_cycle_pt ();
270 !c_blob_it.cycled_list ();
271 c_blob_it.forward ()) {
272 c_blob = c_blob_it.data ();
273 if (c_blob->
274 bounding_box ().
275 overlap (selection_box)) {
276 if (!c_blob_processor
277 (block, row, word, c_blob)
278 || selection_quit)
279 return;
280 }
281 }
282 }
283 }
284 }
285 }
286 }
287 }
288 }
289 }
290 }
291
292
293 /**********************************************************************
294 * process_all_words()
295 *
296 * Walk the current block list applying the specified word processor function
297 * to all words
298 **********************************************************************/
299 void
process_all_words(BLOCK_LIST * block_list,BOOL8 word_processor (BLOCK *,ROW *,WERD *))300 process_all_words ( //process words
301 BLOCK_LIST * block_list, //blocks to check
302 BOOL8 word_processor ( //function to call
303 BLOCK *, ROW *, WERD *)) {
304 BLOCK_IT block_it(block_list);
305 BLOCK *block;
306 ROW_IT row_it;
307 ROW *row;
308 WERD_IT word_it;
309 WERD *word;
310
311 for (block_it.mark_cycle_pt ();
312 !block_it.cycled_list (); block_it.forward ()) {
313 block = block_it.data ();
314 row_it.set_to_list (block->row_list ());
315 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
316 row = row_it.data ();
317 word_it.set_to_list (row->word_list ());
318 for (word_it.mark_cycle_pt ();
319 !word_it.cycled_list (); word_it.forward ()) {
320 word = word_it.data ();
321 if (!word_processor (block, row, word) || selection_quit)
322 return;
323 }
324 }
325 }
326 }
327
328
329 /**********************************************************************
330 * process_selected_words()
331 *
332 * Walk the current block list applying the specified word processor function
333 * to each word selected.
334 **********************************************************************/
335
336 void
process_selected_words(BLOCK_LIST * block_list,TBOX & selection_box,BOOL8 word_processor (BLOCK *,ROW *,WERD *))337 process_selected_words (
338 //process words
339 BLOCK_LIST * block_list, //blocks to check
340 //function to call
341 TBOX & selection_box,
342 BOOL8 word_processor (
343 BLOCK *,
344 ROW *,
345 WERD *)) {
346 BLOCK_IT block_it(block_list);
347 BLOCK *block;
348 ROW_IT row_it;
349 ROW *row;
350 WERD_IT word_it;
351 WERD *word;
352
353 for (block_it.mark_cycle_pt ();
354 !block_it.cycled_list (); block_it.forward ()) {
355 block = block_it.data ();
356 if (block->bounding_box ().overlap (selection_box)) {
357 row_it.set_to_list (block->row_list ());
358 for (row_it.mark_cycle_pt ();
359 !row_it.cycled_list (); row_it.forward ()) {
360 row = row_it.data ();
361 if (row->bounding_box ().overlap (selection_box)) {
362 word_it.set_to_list (row->word_list ());
363 for (word_it.mark_cycle_pt ();
364 !word_it.cycled_list (); word_it.forward ()) {
365 word = word_it.data ();
366 if (word->bounding_box ().overlap (selection_box)) {
367 if (!word_processor (block, row, word) ||
368 selection_quit)
369 return;
370 }
371 }
372 }
373 }
374 }
375 }
376 }
377 namespace tesseract {
378 void
process_selected_words(BLOCK_LIST * block_list,TBOX & selection_box,BOOL8 (tesseract::Tesseract::* word_processor)(BLOCK *,ROW *,WERD *))379 Tesseract::process_selected_words (
380 //process words
381 BLOCK_LIST * block_list, //blocks to check
382 //function to call
383 TBOX & selection_box,
384 BOOL8 (tesseract::Tesseract::*word_processor) (
385 BLOCK *,
386 ROW *,
387 WERD *)) {
388 BLOCK_IT block_it(block_list);
389 BLOCK *block;
390 ROW_IT row_it;
391 ROW *row;
392 WERD_IT word_it;
393 WERD *word;
394
395 for (block_it.mark_cycle_pt ();
396 !block_it.cycled_list (); block_it.forward ()) {
397 block = block_it.data ();
398 if (block->bounding_box ().overlap (selection_box)) {
399 row_it.set_to_list (block->row_list ());
400 for (row_it.mark_cycle_pt ();
401 !row_it.cycled_list (); row_it.forward ()) {
402 row = row_it.data ();
403 if (row->bounding_box ().overlap (selection_box)) {
404 word_it.set_to_list (row->word_list ());
405 for (word_it.mark_cycle_pt ();
406 !word_it.cycled_list (); word_it.forward ()) {
407 word = word_it.data ();
408 if (word->bounding_box ().overlap (selection_box)) {
409 if (!((this->*word_processor) (block, row, word)) ||
410 selection_quit)
411 return;
412 }
413 }
414 }
415 }
416 }
417 }
418 }
419 } // namespace tesseract
420
421
422 /**********************************************************************
423 * process_all_words_it() PASS ITERATORS
424 *
425 * Walk the current block list applying the specified word processor function
426 * to all words
427 **********************************************************************/
428
429 void
process_all_words_it(BLOCK_LIST * block_list,BOOL8 word_processor (BLOCK *,ROW *,WERD *,BLOCK_IT &,ROW_IT &,WERD_IT &))430 process_all_words_it ( //process words
431 BLOCK_LIST * block_list, //blocks to check
432 BOOL8 word_processor ( //function to call
433 BLOCK *,
434 ROW *,
435 WERD *,
436 BLOCK_IT &,
437 ROW_IT &, WERD_IT &)) {
438 BLOCK_IT block_it(block_list);
439 BLOCK *block;
440 ROW_IT row_it;
441 ROW *row;
442 WERD_IT word_it;
443 WERD *word;
444
445 for (block_it.mark_cycle_pt ();
446 !block_it.cycled_list (); block_it.forward ()) {
447 block = block_it.data ();
448 row_it.set_to_list (block->row_list ());
449 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
450 row = row_it.data ();
451 word_it.set_to_list (row->word_list ());
452 for (word_it.mark_cycle_pt ();
453 !word_it.cycled_list (); word_it.forward ()) {
454 word = word_it.data ();
455 if (!word_processor
456 (block, row, word, block_it, row_it, word_it)
457 || selection_quit)
458 return;
459 }
460 }
461 }
462 }
463
464
465 /**********************************************************************
466 * process_selected_words_it() PASS ITERATORS
467 *
468 * Walk the current block list applying the specified word processor function
469 * to each word selected.
470 **********************************************************************/
471
472 void
process_selected_words_it(BLOCK_LIST * block_list,TBOX & selection_box,BOOL8 word_processor (BLOCK *,ROW *,WERD *,BLOCK_IT &,ROW_IT &,WERD_IT &))473 process_selected_words_it ( //process words
474 BLOCK_LIST * block_list, //blocks to check
475 //function to call
476 TBOX & selection_box, BOOL8 word_processor (
477 BLOCK
478 *,
479 ROW *,
480 WERD
481 *,
482 BLOCK_IT
483 &,
484 ROW_IT
485 &,
486 WERD_IT
487 &)) {
488 BLOCK_IT block_it(block_list);
489 BLOCK *block;
490 ROW_IT row_it;
491 ROW *row;
492 WERD_IT word_it;
493 WERD *word;
494
495 for (block_it.mark_cycle_pt ();
496 !block_it.cycled_list (); block_it.forward ()) {
497 block = block_it.data ();
498 if (block->bounding_box ().overlap (selection_box)) {
499 row_it.set_to_list (block->row_list ());
500 for (row_it.mark_cycle_pt ();
501 !row_it.cycled_list (); row_it.forward ()) {
502 row = row_it.data ();
503 if (row->bounding_box ().overlap (selection_box)) {
504 word_it.set_to_list (row->word_list ());
505 for (word_it.mark_cycle_pt ();
506 !word_it.cycled_list (); word_it.forward ()) {
507 word = word_it.data ();
508 if (word->bounding_box ().overlap (selection_box)) {
509 if (!word_processor (block, row, word,
510 block_it, row_it, word_it) ||
511 selection_quit)
512 return;
513 }
514 }
515 }
516 }
517 }
518 }
519 }
520
521
522 /**********************************************************************
523 * process_all_blocks()
524 *
525 * Walk the current block list applying the specified block processor function
526 * to each block.
527 **********************************************************************/
528
529 void
process_all_blocks(BLOCK_LIST * block_list,BOOL8 block_processor (BLOCK *))530 process_all_blocks ( //process blocks
531 BLOCK_LIST * block_list, //blocks to check
532 BOOL8 block_processor ( //function to call
533 BLOCK *)) {
534 BLOCK_IT block_it(block_list);
535 BLOCK *block;
536
537 for (block_it.mark_cycle_pt ();
538 !block_it.cycled_list (); block_it.forward ()) {
539 block = block_it.data ();
540 if (!block_processor (block) || selection_quit)
541 return;
542 }
543 }
544
545
546 /**********************************************************************
547 * process_selected_blocks()
548 *
549 * Walk the current block list applying the specified block processor function
550 * to each block selected.
551 **********************************************************************/
552
553 void
process_selected_blocks(BLOCK_LIST * block_list,TBOX & selection_box,BOOL8 block_processor (BLOCK *))554 process_selected_blocks ( //process blocks
555 BLOCK_LIST * block_list, //blocks to check
556 //function to call
557 TBOX & selection_box, BOOL8 block_processor (
558 BLOCK
559 *)) {
560 BLOCK_IT block_it(block_list);
561 BLOCK *block;
562
563 for (block_it.mark_cycle_pt ();
564 !block_it.cycled_list (); block_it.forward ()) {
565 block = block_it.data ();
566 if (block->bounding_box ().overlap (selection_box)) {
567 if (!block_processor (block) || selection_quit)
568 return;
569 }
570 }
571 }
572
573
574 /**********************************************************************
575 * process_all_rows()
576 *
577 * Walk the current block list applying the specified row processor function
578 * to all rows
579 **********************************************************************/
580
581 void
process_all_rows(BLOCK_LIST * block_list,BOOL8 row_processor (BLOCK *,ROW *))582 process_all_rows ( //process words
583 BLOCK_LIST * block_list, //blocks to check
584 BOOL8 row_processor ( //function to call
585 BLOCK *, ROW *)) {
586 BLOCK_IT block_it(block_list);
587 BLOCK *block;
588 ROW_IT row_it;
589 ROW *row;
590
591 for (block_it.mark_cycle_pt ();
592 !block_it.cycled_list (); block_it.forward ()) {
593 block = block_it.data ();
594 row_it.set_to_list (block->row_list ());
595 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
596 row = row_it.data ();
597 if (!row_processor (block, row) || selection_quit)
598 return;
599 }
600 }
601 }
602
603
604 /**********************************************************************
605 * process_selected_rows()
606 *
607 * Walk the current block list applying the specified row processor function
608 * to each row selected.
609 **********************************************************************/
610
611 void
process_selected_rows(BLOCK_LIST * block_list,TBOX & selection_box,BOOL8 row_processor (BLOCK *,ROW *))612 process_selected_rows ( //process rows
613 BLOCK_LIST * block_list, //blocks to check
614 //function to call
615 TBOX & selection_box, BOOL8 row_processor (
616 BLOCK *,
617 ROW *)) {
618 BLOCK_IT block_it(block_list);
619 BLOCK *block;
620 ROW_IT row_it;
621 ROW *row;
622
623 for (block_it.mark_cycle_pt ();
624 !block_it.cycled_list (); block_it.forward ()) {
625 block = block_it.data ();
626 if (block->bounding_box ().overlap (selection_box)) {
627 row_it.set_to_list (block->row_list ());
628 for (row_it.mark_cycle_pt ();
629 !row_it.cycled_list (); row_it.forward ()) {
630 row = row_it.data ();
631 if (row->bounding_box ().overlap (selection_box)) {
632 if (!row_processor (block, row) || selection_quit)
633 return;
634 }
635 }
636 }
637 }
638 }
639
640
641 /**********************************************************************
642 * process_all_rows_it() PASS ITERATORS
643 *
644 * Walk the current block list applying the specified row processor function
645 * to all rows
646 **********************************************************************/
647
648 void
process_all_rows_it(BLOCK_LIST * block_list,BOOL8 row_processor (BLOCK *,ROW *,BLOCK_IT &,ROW_IT &))649 process_all_rows_it ( //process words
650 BLOCK_LIST * block_list, //blocks to check
651 BOOL8 row_processor ( //function to call
652 BLOCK *,
653 ROW *, BLOCK_IT &, ROW_IT &)) {
654 BLOCK_IT block_it(block_list);
655 BLOCK *block;
656 ROW_IT row_it;
657 ROW *row;
658
659 for (block_it.mark_cycle_pt ();
660 !block_it.cycled_list (); block_it.forward ()) {
661 block = block_it.data ();
662 row_it.set_to_list (block->row_list ());
663 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
664 row = row_it.data ();
665 if (!row_processor (block, row, block_it, row_it) || selection_quit)
666 return;
667 }
668 }
669 }
670
671
672 /**********************************************************************
673 * process_selected_rows_it() PASS ITERATORS
674 *
675 * Walk the current block list applying the specified row processor function
676 * to each row selected.
677 **********************************************************************/
678
679 void
process_selected_rows_it(BLOCK_LIST * block_list,TBOX & selection_box,BOOL8 row_processor (BLOCK *,ROW *,BLOCK_IT &,ROW_IT &))680 process_selected_rows_it ( //process rows
681 BLOCK_LIST * block_list, //blocks to check
682 //function to call
683 TBOX & selection_box, BOOL8 row_processor (
684 BLOCK *,
685 ROW *,
686 BLOCK_IT
687 &,
688 ROW_IT
689 &)) {
690 BLOCK_IT block_it(block_list);
691 BLOCK *block;
692 ROW_IT row_it;
693 ROW *row;
694
695 for (block_it.mark_cycle_pt ();
696 !block_it.cycled_list (); block_it.forward ()) {
697 block = block_it.data ();
698 if (block->bounding_box ().overlap (selection_box)) {
699 row_it.set_to_list (block->row_list ());
700 for (row_it.mark_cycle_pt ();
701 !row_it.cycled_list (); row_it.forward ()) {
702 row = row_it.data ();
703 if (row->bounding_box ().overlap (selection_box)) {
704 if (!row_processor (block, row, block_it, row_it) ||
705 selection_quit)
706 return;
707 }
708 }
709 }
710 }
711 }
712