1 /**********************************************************************
2 * File: blobbox.h (Formerly blobnbox.h)
3 * Description: Code for the textord blob class.
4 * Author: Ray Smith
5 * Created: Thu Jul 30 09:08:51 BST 1992
6 *
7 * (C) Copyright 1992, Hewlett-Packard Ltd.
8 ** Licensed under the Apache License, Version 2.0 (the "License");
9 ** you may not use this file except in compliance with the License.
10 ** You may obtain a copy of the License at
11 ** http://www.apache.org/licenses/LICENSE-2.0
12 ** Unless required by applicable law or agreed to in writing, software
13 ** distributed under the License is distributed on an "AS IS" BASIS,
14 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 ** See the License for the specific language governing permissions and
16 ** limitations under the License.
17 *
18 **********************************************************************/
19
20 #ifndef BLOBBOX_H
21 #define BLOBBOX_H
22
23 #include "varable.h"
24 #include "clst.h"
25 #include "elst2.h"
26 #include "werd.h"
27 #include "ocrblock.h"
28 #include "statistc.h"
29
30 extern double_VAR_H (textord_error_weight, 3,
31 "Weighting for error in believability");
32
33 enum PITCH_TYPE
34 {
35 PITCH_DUNNO, //insufficient data
36 PITCH_DEF_FIXED, //definitely fixed
37 PITCH_MAYBE_FIXED, //could be
38 PITCH_DEF_PROP,
39 PITCH_MAYBE_PROP,
40 PITCH_CORR_FIXED,
41 PITCH_CORR_PROP
42 };
43
44 // The possible tab-stop types of each side of a BLOBNBOX.
45 enum TabType {
46 TT_NONE, // Not a tab.
47 TT_DELETED, // Not a tab after detailed analysis.
48 TT_UNCONFIRMED, // Initial designation of a tab-stop candidate.
49 TT_FAKE, // Added by interpolation.
50 TT_CONFIRMED, // Aligned with neighbours.
51 TT_VLINE // Detected as a vertical line.
52 };
53
54 // The possible region types of a BLOBNBOX.
55 // Note: keep all the text types > BRT_UNKNOWN and all the image types less.
56 // Keep in sync with kBlobTypes in colpartition.cpp and BoxColor below.
57 enum BlobRegionType {
58 BRT_NOISE, // Neither text nor image.
59 BRT_HLINE, // Horizontal separator line.
60 BRT_RECTIMAGE, // Rectangular image.
61 BRT_POLYIMAGE, // Non-rectangular image.
62 BRT_UNKNOWN, // Not determined yet.
63 BRT_VERT_TEXT, // Vertical alignment, not necessarily vertically oriented.
64 BRT_TEXT, // Convincing text.
65
66 BRT_COUNT // Number of possibilities.
67 };
68
69 namespace tesseract {
70 class ColPartition;
71 }
72
73 class BLOBNBOX;
ELISTIZEH(BLOBNBOX)74 ELISTIZEH (BLOBNBOX)
75 class BLOBNBOX:public ELIST_LINK
76 {
77 public:
78 BLOBNBOX() {
79 blob_ptr = NULL;
80 cblob_ptr = NULL;
81 area = 0;
82 Init();
83 }
84 explicit BLOBNBOX(PBLOB *srcblob) {
85 blob_ptr = srcblob;
86 cblob_ptr = NULL;
87 box = srcblob->bounding_box ();
88 area = (int) srcblob->area ();
89 Init();
90 }
91 explicit BLOBNBOX(C_BLOB *srcblob) {
92 blob_ptr = NULL;
93 cblob_ptr = srcblob;
94 box = srcblob->bounding_box ();
95 area = (int) srcblob->area ();
96 Init();
97 }
98
99 void rotate_box(FCOORD vec) {
100 box.rotate(vec);
101 }
102 void translate_box(ICOORD v) {
103 box.move(v);
104 }
105 void merge(BLOBNBOX *nextblob);
106 void chop( // fake chop blob
107 BLOBNBOX_IT *start_it, // location of this
108 BLOBNBOX_IT *blob_it, // iterator
109 FCOORD rotation, // for landscape
110 float xheight); // line height
111
112 // Simple accessors.
113 const TBOX &bounding_box() const {
114 return box;
115 }
116 void compute_bounding_box() {
117 box = cblob_ptr != NULL ? cblob_ptr->bounding_box()
118 : blob_ptr->bounding_box();
119 }
120 const TBOX &reduced_box() const {
121 return red_box;
122 }
123 void set_reduced_box(TBOX new_box) {
124 red_box = new_box;
125 reduced = TRUE;
126 }
127 inT32 enclosed_area() const {
128 return area;
129 }
130 bool joined_to_prev() const {
131 return joined != 0;
132 }
133 bool red_box_set() const {
134 return reduced != 0;
135 }
136 int repeated_set() const {
137 return repeated_set_;
138 }
139 void set_repeated_set(int set_id) {
140 repeated_set_ = set_id;
141 }
142 PBLOB *blob() const {
143 return blob_ptr;
144 }
145 C_BLOB *cblob() const {
146 return cblob_ptr;
147 }
148 TabType left_tab_type() const {
149 return left_tab_type_;
150 }
151 void set_left_tab_type(TabType new_type) {
152 left_tab_type_ = new_type;
153 }
154 TabType right_tab_type() const {
155 return right_tab_type_;
156 }
157 void set_right_tab_type(TabType new_type) {
158 right_tab_type_ = new_type;
159 }
160 BlobRegionType region_type() const {
161 return region_type_;
162 }
163 void set_region_type(BlobRegionType new_type) {
164 region_type_ = new_type;
165 }
166 int left_rule() const {
167 return left_rule_;
168 }
169 void set_left_rule(int new_left) {
170 left_rule_ = new_left;
171 }
172 int right_rule() const {
173 return right_rule_;
174 }
175 void set_right_rule(int new_right) {
176 right_rule_ = new_right;
177 }
178 int left_crossing_rule() const {
179 return left_crossing_rule_;
180 }
181 void set_left_crossing_rule(int new_left) {
182 left_crossing_rule_ = new_left;
183 }
184 int right_crossing_rule() const {
185 return right_crossing_rule_;
186 }
187 void set_right_crossing_rule(int new_right) {
188 right_crossing_rule_ = new_right;
189 }
190 float horz_stroke_width() const {
191 return horz_stroke_width_;
192 }
193 void set_horz_stroke_width(float width) {
194 horz_stroke_width_ = width;
195 }
196 float vert_stroke_width() const {
197 return vert_stroke_width_;
198 }
199 void set_vert_stroke_width(float width) {
200 vert_stroke_width_ = width;
201 }
202 tesseract::ColPartition* owner() const {
203 return owner_;
204 }
205 void set_owner(tesseract::ColPartition* new_owner) {
206 owner_ = new_owner;
207 }
208 void set_noise_flag(bool flag) {
209 noise_flag_ = flag;
210 }
211 bool noise_flag() const {
212 return noise_flag_;
213 }
214
215 #ifndef GRAPHICS_DISABLED
216 // Keep in sync with BlobRegionType.
217 ScrollView::Color BoxColor() const {
218 switch (region_type_) {
219 case BRT_HLINE:
220 return ScrollView::YELLOW;
221 case BRT_RECTIMAGE:
222 return ScrollView::RED;
223 case BRT_POLYIMAGE:
224 return ScrollView::ORANGE;
225 case BRT_UNKNOWN:
226 return ScrollView::CYAN;
227 case BRT_VERT_TEXT:
228 return ScrollView::GREEN;
229 case BRT_TEXT:
230 return ScrollView::BLUE;
231 case BRT_NOISE:
232 default:
233 return ScrollView::GREY;
234 }
235 }
236
237 void plot(ScrollView* window, // window to draw in
238 ScrollView::Color blob_colour, //for outer bits
239 ScrollView::Color child_colour) { //for holes
240 if (blob_ptr != NULL)
241 blob_ptr->plot (window, blob_colour, child_colour);
242 if (cblob_ptr != NULL)
243 cblob_ptr->plot (window, blob_colour, child_colour);
244 }
245 #endif
246
247 NEWDELETE2(BLOBNBOX)
248
249 private:
250 // Initializes the bulk of the members to default values.
251 void Init() {
252 joined = false;
253 reduced = false;
254 repeated_set_ = 0;
255 left_tab_type_ = TT_NONE;
256 right_tab_type_ = TT_NONE;
257 region_type_ = BRT_UNKNOWN;
258 left_rule_ = 0;
259 right_rule_ = 0;
260 left_crossing_rule_ = 0;
261 right_crossing_rule_ = 0;
262 horz_stroke_width_ = 0.0f;
263 vert_stroke_width_ = 0.0f;
264 owner_ = NULL;
265 noise_flag_ = false;
266 }
267
268 PBLOB *blob_ptr; // poly blob
269 C_BLOB *cblob_ptr; // edgestep blob
270 TBOX box; // bounding box
271 TBOX red_box; // bounding box
272 int area:30; //enclosed area
273 int joined:1; //joined to prev
274 int reduced:1; //reduced box set
275 int repeated_set_; // id of the set of repeated blobs
276 TabType left_tab_type_; // Indicates tab-stop assessment
277 TabType right_tab_type_; // Indicates tab-stop assessment
278 BlobRegionType region_type_; // Type of region this blob belongs to
279 inT16 left_rule_; // x-coord of nearest but not crossing rule line
280 inT16 right_rule_; // x-coord of nearest but not crossing rule line
281 inT16 left_crossing_rule_; // x-coord of nearest or crossing rule line
282 inT16 right_crossing_rule_; // x-coord of nearest or crossing rule line
283 float horz_stroke_width_; // Median horizontal stroke width
284 float vert_stroke_width_; // Median vertical stroke width
285 tesseract::ColPartition* owner_; // Who will delete me when I am not needed
286 // Was the blob flagged as noise in the initial filtering step
287 bool noise_flag_;
288 };
289
290 class TO_ROW:public ELIST2_LINK
291 {
292 public:
TO_ROW()293 TO_ROW() {
294 num_repeated_sets_ = -1;
295 } //empty
296 TO_ROW( //constructor
297 BLOBNBOX *blob, //from first blob
298 float top, //of row //target height
299 float bottom,
300 float row_size);
301
max_y()302 float max_y() const { //access function
303 return y_max;
304 }
min_y()305 float min_y() const {
306 return y_min;
307 }
mean_y()308 float mean_y() const {
309 return (y_min + y_max) / 2.0f;
310 }
initial_min_y()311 float initial_min_y() const {
312 return initial_y_min;
313 }
line_m()314 float line_m() const { //access to line fit
315 return m;
316 }
line_c()317 float line_c() const {
318 return c;
319 }
line_error()320 float line_error() const {
321 return error;
322 }
parallel_c()323 float parallel_c() const {
324 return para_c;
325 }
parallel_error()326 float parallel_error() const {
327 return para_error;
328 }
believability()329 float believability() const { //baseline goodness
330 return credibility;
331 }
intercept()332 float intercept() const { //real parallel_c
333 return y_origin;
334 }
335 void add_blob( //put in row
336 BLOBNBOX *blob, //blob to add
337 float top, //of row //target height
338 float bottom,
339 float row_size);
340 void insert_blob( //put in row in order
341 BLOBNBOX *blob);
342
blob_list()343 BLOBNBOX_LIST *blob_list() { //get list
344 return &blobs;
345 }
346
set_line(float new_m,float new_c,float new_error)347 void set_line( //set line spec
348 float new_m, //line to set
349 float new_c,
350 float new_error) {
351 m = new_m;
352 c = new_c;
353 error = new_error;
354 }
set_parallel_line(float gradient,float new_c,float new_error)355 void set_parallel_line( //set fixed gradient line
356 float gradient, //page gradient
357 float new_c,
358 float new_error) {
359 para_c = new_c;
360 para_error = new_error;
361 credibility =
362 (float) (blobs.length () - textord_error_weight * new_error);
363 y_origin = (float) (new_c / sqrt (1 + gradient * gradient));
364 //real intercept
365 }
set_limits(float new_min,float new_max)366 void set_limits( //set min,max
367 float new_min, //bottom and
368 float new_max) { //top of row
369 y_min = new_min;
370 y_max = new_max;
371 }
372 void compute_vertical_projection();
373 //get projection
374
rep_chars_marked()375 bool rep_chars_marked() const {
376 return num_repeated_sets_ != -1;
377 }
clear_rep_chars_marked()378 void clear_rep_chars_marked() {
379 num_repeated_sets_ = -1;
380 }
num_repeated_sets()381 int num_repeated_sets() const {
382 return num_repeated_sets_;
383 }
set_num_repeated_sets(int num_sets)384 void set_num_repeated_sets(int num_sets) {
385 num_repeated_sets_ = num_sets;
386 }
387
388 //true when dead
389 NEWDELETE2 (TO_ROW) BOOL8 merged;
390 BOOL8 all_caps; //had no ascenders
391 BOOL8 used_dm_model; //in guessing pitch
392 inT16 projection_left; //start of projection
393 inT16 projection_right; //start of projection
394 PITCH_TYPE pitch_decision; //how strong is decision
395 float fixed_pitch; //pitch or 0
396 float fp_space; //sp if fixed pitch
397 float fp_nonsp; //nonsp if fixed pitch
398 float pr_space; //sp if prop
399 float pr_nonsp; //non sp if prop
400 float spacing; //to "next" row
401 float xheight; //of line
402 int xheight_evidence; // number of blobs of height xheight
403 float ascrise; //ascenders
404 float descdrop; //descenders
405 inT32 min_space; //min size for real space
406 inT32 max_nonspace; //max size of non-space
407 inT32 space_threshold; //space vs nonspace
408 float kern_size; //average non-space
409 float space_size; //average space
410 WERD_LIST rep_words; //repeated chars
411 ICOORDELT_LIST char_cells; //fixed pitch cells
412 QSPLINE baseline; //curved baseline
413 STATS projection; //vertical projection
414
415 private:
416 BLOBNBOX_LIST blobs; //blobs in row
417 float y_min; //coords
418 float y_max;
419 float initial_y_min;
420 float m, c; //line spec
421 float error; //line error
422 float para_c; //constrained fit
423 float para_error;
424 float y_origin; //rotated para_c;
425 float credibility; //baseline believability
426 int num_repeated_sets_; // number of sets of repeated blobs
427 // set to -1 if we have not searched
428 // for repeated blobs in this row yet
429 };
430
ELIST2IZEH(TO_ROW)431 ELIST2IZEH (TO_ROW)
432 class TO_BLOCK:public ELIST_LINK
433 {
434 public:
435 TO_BLOCK() {
436 } //empty
437 TO_BLOCK( //constructor
438 BLOCK *src_block); //real block
439 ~TO_BLOCK();
440
441 TO_ROW_LIST *get_rows() { //access function
442 return &row_list;
443 }
444
445 void print_rows() { //debug info
446 TO_ROW_IT row_it = &row_list;
447 TO_ROW *row;
448
449 for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
450 row_it.forward ()) {
451 row = row_it.data ();
452 printf ("Row range (%g,%g), para_c=%g, blobcount=" INT32FORMAT
453 "\n", row->min_y (), row->max_y (), row->parallel_c (),
454 row->blob_list ()->length ());
455 }
456 }
457
458 // Draw the blobs on on the various lists in the block in different colors.
459 void plot_graded_blobs(ScrollView* to_win);
460
461 BLOBNBOX_LIST blobs; //medium size
462 BLOBNBOX_LIST underlines; //underline blobs
463 BLOBNBOX_LIST noise_blobs; //very small
464 BLOBNBOX_LIST small_blobs; //fairly small
465 BLOBNBOX_LIST large_blobs; //big blobs
466 BLOCK *block; //real block
467 PITCH_TYPE pitch_decision; //how strong is decision
468 float line_spacing; //estimate
469 float line_size; //estimate
470 float max_blob_size; //line assignment limit
471 float baseline_offset; //phase shift
472 float xheight; //median blob size
473 float fixed_pitch; //pitch or 0
474 float kern_size; //average non-space
475 float space_size; //average space
476 inT32 min_space; //min definite space
477 inT32 max_nonspace; //max definite
478 float fp_space; //sp if fixed pitch
479 float fp_nonsp; //nonsp if fixed pitch
480 float pr_space; //sp if prop
481 float pr_nonsp; //non sp if prop
482 TO_ROW *key_row; //starting row
483
484 NEWDELETE2 (TO_BLOCK) private:
485 TO_ROW_LIST row_list; //temporary rows
486 };
487
488 ELISTIZEH (TO_BLOCK)
489 extern double_VAR_H (textord_error_weight, 3,
490 "Weighting for error in believability");
491 void find_blob_limits( //get y limits
492 PBLOB *blob, //blob to search
493 float leftx, //x limits
494 float rightx,
495 FCOORD rotation, //for landscape
496 float &ymin, //output y limits
497 float &ymax);
498 void find_cblob_limits( //get y limits
499 C_BLOB *blob, //blob to search
500 float leftx, //x limits
501 float rightx,
502 FCOORD rotation, //for landscape
503 float &ymin, //output y limits
504 float &ymax);
505 void find_cblob_vlimits( //get y limits
506 C_BLOB *blob, //blob to search
507 float leftx, //x limits
508 float rightx,
509 float &ymin, //output y limits
510 float &ymax);
511 void find_cblob_hlimits( //get x limits
512 C_BLOB *blob, //blob to search
513 float bottomy, //y limits
514 float topy,
515 float &xmin, //output x limits
516 float &xymax);
517 PBLOB *rotate_blob( //get y limits
518 PBLOB *blob, //blob to search
519 FCOORD rotation //vector to rotate by
520 );
521 PBLOB *rotate_cblob( //rotate it
522 C_BLOB *blob, //blob to search
523 float xheight, //for poly approx
524 FCOORD rotation //for landscape
525 );
526 C_BLOB *crotate_cblob( //rotate it
527 C_BLOB *blob, //blob to search
528 FCOORD rotation //for landscape
529 );
530 TBOX box_next( //get bounding box
531 BLOBNBOX_IT *it //iterator to blobds
532 );
533 TBOX box_next_pre_chopped( //get bounding box
534 BLOBNBOX_IT *it //iterator to blobds
535 );
536 void vertical_blob_projection( //project outlines
537 PBLOB *blob, //blob to project
538 STATS *stats //output
539 );
540 //project outlines
541 void vertical_outline_projection(OUTLINE *outline, //outline to project
542 STATS *stats //output
543 );
544 void vertical_cblob_projection( //project outlines
545 C_BLOB *blob, //blob to project
546 STATS *stats //output
547 );
548 void vertical_coutline_projection( //project outlines
549 C_OUTLINE *outline, //outline to project
550 STATS *stats //output
551 );
552 void plot_blob_list(ScrollView* win, // window to draw in
553 BLOBNBOX_LIST *list, // blob list
554 ScrollView::Color body_colour, // colour to draw
555 ScrollView::Color child_colour); // colour of child
556
557 #endif
558