• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**********************************************************************
2  * File:        blobbox.h  (Formerly blobnbox.h)
3  * Description: Code for the textord blob class.
4  * Author:					Ray Smith
5  * Created:					Thu Jul 30 09:08:51 BST 1992
6  *
7  * (C) Copyright 1992, Hewlett-Packard Ltd.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #ifndef           BLOBBOX_H
21 #define           BLOBBOX_H
22 
23 #include          "varable.h"
24 #include          "clst.h"
25 #include          "elst2.h"
26 #include          "werd.h"
27 #include          "ocrblock.h"
28 #include          "statistc.h"
29 
30 extern double_VAR_H (textord_error_weight, 3,
31 "Weighting for error in believability");
32 
33 enum PITCH_TYPE
34 {
35   PITCH_DUNNO,                   //insufficient data
36   PITCH_DEF_FIXED,               //definitely fixed
37   PITCH_MAYBE_FIXED,             //could be
38   PITCH_DEF_PROP,
39   PITCH_MAYBE_PROP,
40   PITCH_CORR_FIXED,
41   PITCH_CORR_PROP
42 };
43 
44 // The possible tab-stop types of each side of a BLOBNBOX.
45 enum TabType {
46   TT_NONE,         // Not a tab.
47   TT_DELETED,      // Not a tab after detailed analysis.
48   TT_UNCONFIRMED,  // Initial designation of a tab-stop candidate.
49   TT_FAKE,         // Added by interpolation.
50   TT_CONFIRMED,    // Aligned with neighbours.
51   TT_VLINE         // Detected as a vertical line.
52 };
53 
54 // The possible region types of a BLOBNBOX.
55 // Note: keep all the text types > BRT_UNKNOWN and all the image types less.
56 // Keep in sync with kBlobTypes in colpartition.cpp and BoxColor below.
57 enum BlobRegionType {
58   BRT_NOISE,      // Neither text nor image.
59   BRT_HLINE,      // Horizontal separator line.
60   BRT_RECTIMAGE,  // Rectangular image.
61   BRT_POLYIMAGE,  // Non-rectangular image.
62   BRT_UNKNOWN,    // Not determined yet.
63   BRT_VERT_TEXT,  // Vertical alignment, not necessarily vertically oriented.
64   BRT_TEXT,       // Convincing text.
65 
66   BRT_COUNT       // Number of possibilities.
67 };
68 
69 namespace tesseract {
70 class ColPartition;
71 }
72 
73 class BLOBNBOX;
ELISTIZEH(BLOBNBOX)74 ELISTIZEH (BLOBNBOX)
75 class BLOBNBOX:public ELIST_LINK
76 {
77   public:
78     BLOBNBOX() {
79       blob_ptr = NULL;
80       cblob_ptr = NULL;
81       area = 0;
82       Init();
83     }
84     explicit BLOBNBOX(PBLOB *srcblob) {
85       blob_ptr = srcblob;
86       cblob_ptr = NULL;
87       box = srcblob->bounding_box ();
88       area = (int) srcblob->area ();
89       Init();
90     }
91     explicit BLOBNBOX(C_BLOB *srcblob) {
92       blob_ptr = NULL;
93       cblob_ptr = srcblob;
94       box = srcblob->bounding_box ();
95       area = (int) srcblob->area ();
96       Init();
97     }
98 
99     void rotate_box(FCOORD vec) {
100       box.rotate(vec);
101     }
102     void translate_box(ICOORD v) {
103       box.move(v);
104     }
105     void merge(BLOBNBOX *nextblob);
106     void chop(                        // fake chop blob
107               BLOBNBOX_IT *start_it,  // location of this
108               BLOBNBOX_IT *blob_it,   // iterator
109               FCOORD rotation,        // for landscape
110               float xheight);         // line height
111 
112     // Simple accessors.
113     const TBOX &bounding_box() const {
114       return box;
115     }
116     void compute_bounding_box() {
117       box = cblob_ptr != NULL ? cblob_ptr->bounding_box()
118                               : blob_ptr->bounding_box();
119     }
120     const TBOX &reduced_box() const {
121       return red_box;
122     }
123     void set_reduced_box(TBOX new_box) {
124       red_box = new_box;
125       reduced = TRUE;
126     }
127     inT32 enclosed_area() const {
128       return area;
129     }
130     bool joined_to_prev() const {
131       return joined != 0;
132     }
133     bool red_box_set() const {
134       return reduced != 0;
135     }
136     int repeated_set() const {
137       return repeated_set_;
138     }
139     void set_repeated_set(int set_id) {
140       repeated_set_ = set_id;
141     }
142     PBLOB *blob() const {
143       return blob_ptr;
144     }
145     C_BLOB *cblob() const {
146       return cblob_ptr;
147     }
148     TabType left_tab_type() const {
149       return left_tab_type_;
150     }
151     void set_left_tab_type(TabType new_type) {
152       left_tab_type_ = new_type;
153     }
154     TabType right_tab_type() const {
155       return right_tab_type_;
156     }
157     void set_right_tab_type(TabType new_type) {
158       right_tab_type_ = new_type;
159     }
160     BlobRegionType region_type() const {
161       return region_type_;
162     }
163     void set_region_type(BlobRegionType new_type) {
164       region_type_ = new_type;
165     }
166     int left_rule() const {
167       return left_rule_;
168     }
169     void set_left_rule(int new_left) {
170       left_rule_ = new_left;
171     }
172     int right_rule() const {
173       return right_rule_;
174     }
175     void set_right_rule(int new_right) {
176       right_rule_ = new_right;
177     }
178     int left_crossing_rule() const {
179       return left_crossing_rule_;
180     }
181     void set_left_crossing_rule(int new_left) {
182       left_crossing_rule_ = new_left;
183     }
184     int right_crossing_rule() const {
185       return right_crossing_rule_;
186     }
187     void set_right_crossing_rule(int new_right) {
188       right_crossing_rule_ = new_right;
189     }
190     float horz_stroke_width() const {
191       return horz_stroke_width_;
192     }
193     void set_horz_stroke_width(float width) {
194       horz_stroke_width_ = width;
195     }
196     float vert_stroke_width() const {
197       return vert_stroke_width_;
198     }
199     void set_vert_stroke_width(float width) {
200       vert_stroke_width_ = width;
201     }
202     tesseract::ColPartition* owner() const {
203       return owner_;
204     }
205     void set_owner(tesseract::ColPartition* new_owner) {
206       owner_ = new_owner;
207     }
208     void set_noise_flag(bool flag) {
209       noise_flag_ = flag;
210     }
211     bool noise_flag() const {
212       return noise_flag_;
213     }
214 
215 #ifndef GRAPHICS_DISABLED
216     // Keep in sync with BlobRegionType.
217     ScrollView::Color BoxColor() const {
218       switch (region_type_) {
219       case BRT_HLINE:
220         return ScrollView::YELLOW;
221       case BRT_RECTIMAGE:
222         return ScrollView::RED;
223       case BRT_POLYIMAGE:
224         return ScrollView::ORANGE;
225       case BRT_UNKNOWN:
226         return ScrollView::CYAN;
227       case BRT_VERT_TEXT:
228         return ScrollView::GREEN;
229       case BRT_TEXT:
230         return ScrollView::BLUE;
231       case BRT_NOISE:
232       default:
233         return ScrollView::GREY;
234       }
235     }
236 
237     void plot(ScrollView* window,                // window to draw in
238               ScrollView::Color blob_colour,     //for outer bits
239               ScrollView::Color child_colour) {  //for holes
240       if (blob_ptr != NULL)
241         blob_ptr->plot (window, blob_colour, child_colour);
242       if (cblob_ptr != NULL)
243         cblob_ptr->plot (window, blob_colour, child_colour);
244     }
245 #endif
246 
247     NEWDELETE2(BLOBNBOX)
248 
249  private:
250   // Initializes the bulk of the members to default values.
251   void Init() {
252     joined = false;
253     reduced = false;
254     repeated_set_ = 0;
255     left_tab_type_ = TT_NONE;
256     right_tab_type_ = TT_NONE;
257     region_type_ = BRT_UNKNOWN;
258     left_rule_ = 0;
259     right_rule_ = 0;
260     left_crossing_rule_ = 0;
261     right_crossing_rule_ = 0;
262     horz_stroke_width_ = 0.0f;
263     vert_stroke_width_ = 0.0f;
264     owner_ = NULL;
265     noise_flag_ = false;
266   }
267 
268   PBLOB *blob_ptr;              // poly blob
269   C_BLOB *cblob_ptr;            // edgestep blob
270   TBOX box;                     // bounding box
271   TBOX red_box;                 // bounding box
272     int area:30;                 //enclosed area
273     int joined:1;                //joined to prev
274     int reduced:1;               //reduced box set
275   int repeated_set_;            // id of the set of repeated blobs
276   TabType left_tab_type_;       // Indicates tab-stop assessment
277   TabType right_tab_type_;      // Indicates tab-stop assessment
278   BlobRegionType region_type_;  // Type of region this blob belongs to
279   inT16 left_rule_;             // x-coord of nearest but not crossing rule line
280   inT16 right_rule_;            // x-coord of nearest but not crossing rule line
281   inT16 left_crossing_rule_;    // x-coord of nearest or crossing rule line
282   inT16 right_crossing_rule_;   // x-coord of nearest or crossing rule line
283   float horz_stroke_width_;     // Median horizontal stroke width
284   float vert_stroke_width_;     // Median vertical stroke width
285   tesseract::ColPartition* owner_;  // Who will delete me when I am not needed
286   // Was the blob flagged as noise in the initial filtering step
287   bool noise_flag_;
288 };
289 
290 class TO_ROW:public ELIST2_LINK
291 {
292   public:
TO_ROW()293     TO_ROW() {
294       num_repeated_sets_ = -1;
295     }                            //empty
296     TO_ROW(                 //constructor
297            BLOBNBOX *blob,  //from first blob
298            float top,       //of row //target height
299            float bottom,
300            float row_size);
301 
max_y()302     float max_y() const {  //access function
303       return y_max;
304     }
min_y()305     float min_y() const {
306       return y_min;
307     }
mean_y()308     float mean_y() const {
309       return (y_min + y_max) / 2.0f;
310     }
initial_min_y()311     float initial_min_y() const {
312       return initial_y_min;
313     }
line_m()314     float line_m() const {  //access to line fit
315       return m;
316     }
line_c()317     float line_c() const {
318       return c;
319     }
line_error()320     float line_error() const {
321       return error;
322     }
parallel_c()323     float parallel_c() const {
324       return para_c;
325     }
parallel_error()326     float parallel_error() const {
327       return para_error;
328     }
believability()329     float believability() const {  //baseline goodness
330       return credibility;
331     }
intercept()332     float intercept() const {  //real parallel_c
333       return y_origin;
334     }
335     void add_blob(                 //put in row
336                   BLOBNBOX *blob,  //blob to add
337                   float top,       //of row //target height
338                   float bottom,
339                   float row_size);
340     void insert_blob(  //put in row in order
341                      BLOBNBOX *blob);
342 
blob_list()343     BLOBNBOX_LIST *blob_list() {  //get list
344       return &blobs;
345     }
346 
set_line(float new_m,float new_c,float new_error)347     void set_line(              //set line spec
348                   float new_m,  //line to set
349                   float new_c,
350                   float new_error) {
351       m = new_m;
352       c = new_c;
353       error = new_error;
354     }
set_parallel_line(float gradient,float new_c,float new_error)355     void set_parallel_line(                 //set fixed gradient line
356                            float gradient,  //page gradient
357                            float new_c,
358                            float new_error) {
359       para_c = new_c;
360       para_error = new_error;
361       credibility =
362         (float) (blobs.length () - textord_error_weight * new_error);
363       y_origin = (float) (new_c / sqrt (1 + gradient * gradient));
364       //real intercept
365     }
set_limits(float new_min,float new_max)366     void set_limits(                  //set min,max
367                     float new_min,    //bottom and
368                     float new_max) {  //top of row
369       y_min = new_min;
370       y_max = new_max;
371     }
372     void compute_vertical_projection();
373     //get projection
374 
rep_chars_marked()375     bool rep_chars_marked() const {
376       return num_repeated_sets_ != -1;
377     }
clear_rep_chars_marked()378     void clear_rep_chars_marked() {
379       num_repeated_sets_ = -1;
380     }
num_repeated_sets()381     int num_repeated_sets() const {
382       return num_repeated_sets_;
383     }
set_num_repeated_sets(int num_sets)384     void set_num_repeated_sets(int num_sets) {
385       num_repeated_sets_ = num_sets;
386     }
387 
388                                  //true when dead
389     NEWDELETE2 (TO_ROW) BOOL8 merged;
390     BOOL8 all_caps;              //had no ascenders
391     BOOL8 used_dm_model;         //in guessing pitch
392     inT16 projection_left;       //start of projection
393     inT16 projection_right;      //start of projection
394     PITCH_TYPE pitch_decision;   //how strong is decision
395     float fixed_pitch;           //pitch or 0
396     float fp_space;              //sp if fixed pitch
397     float fp_nonsp;              //nonsp if fixed pitch
398     float pr_space;              //sp if prop
399     float pr_nonsp;              //non sp if prop
400     float spacing;               //to "next" row
401     float xheight;               //of line
402     int xheight_evidence;        // number of blobs of height xheight
403     float ascrise;               //ascenders
404     float descdrop;              //descenders
405     inT32 min_space;             //min size for real space
406     inT32 max_nonspace;          //max size of non-space
407     inT32 space_threshold;       //space vs nonspace
408     float kern_size;             //average non-space
409     float space_size;            //average space
410     WERD_LIST rep_words;         //repeated chars
411     ICOORDELT_LIST char_cells;   //fixed pitch cells
412     QSPLINE baseline;            //curved baseline
413     STATS projection;            //vertical projection
414 
415   private:
416     BLOBNBOX_LIST blobs;         //blobs in row
417     float y_min;                 //coords
418     float y_max;
419     float initial_y_min;
420     float m, c;                  //line spec
421     float error;                 //line error
422     float para_c;                //constrained fit
423     float para_error;
424     float y_origin;              //rotated para_c;
425     float credibility;           //baseline believability
426     int num_repeated_sets_;      // number of sets of repeated blobs
427                                  // set to -1 if we have not searched
428                                  // for repeated blobs in this row yet
429 };
430 
ELIST2IZEH(TO_ROW)431 ELIST2IZEH (TO_ROW)
432 class TO_BLOCK:public ELIST_LINK
433 {
434   public:
435     TO_BLOCK() {
436     }                            //empty
437     TO_BLOCK(                    //constructor
438              BLOCK *src_block);  //real block
439     ~TO_BLOCK();
440 
441     TO_ROW_LIST *get_rows() {  //access function
442       return &row_list;
443     }
444 
445     void print_rows() {  //debug info
446       TO_ROW_IT row_it = &row_list;
447       TO_ROW *row;
448 
449       for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
450       row_it.forward ()) {
451         row = row_it.data ();
452         printf ("Row range (%g,%g), para_c=%g, blobcount=" INT32FORMAT
453           "\n", row->min_y (), row->max_y (), row->parallel_c (),
454           row->blob_list ()->length ());
455       }
456     }
457 
458     // Draw the blobs on on the various lists in the block in different colors.
459     void plot_graded_blobs(ScrollView* to_win);
460 
461     BLOBNBOX_LIST blobs;         //medium size
462     BLOBNBOX_LIST underlines;    //underline blobs
463     BLOBNBOX_LIST noise_blobs;   //very small
464     BLOBNBOX_LIST small_blobs;   //fairly small
465     BLOBNBOX_LIST large_blobs;   //big blobs
466     BLOCK *block;                //real block
467     PITCH_TYPE pitch_decision;   //how strong is decision
468     float line_spacing;          //estimate
469     float line_size;             //estimate
470     float max_blob_size;         //line assignment limit
471     float baseline_offset;       //phase shift
472     float xheight;               //median blob size
473     float fixed_pitch;           //pitch or 0
474     float kern_size;             //average non-space
475     float space_size;            //average space
476     inT32 min_space;             //min definite space
477     inT32 max_nonspace;          //max definite
478     float fp_space;              //sp if fixed pitch
479     float fp_nonsp;              //nonsp if fixed pitch
480     float pr_space;              //sp if prop
481     float pr_nonsp;              //non sp if prop
482     TO_ROW *key_row;             //starting row
483 
484     NEWDELETE2 (TO_BLOCK) private:
485     TO_ROW_LIST row_list;        //temporary rows
486 };
487 
488 ELISTIZEH (TO_BLOCK)
489 extern double_VAR_H (textord_error_weight, 3,
490 "Weighting for error in believability");
491 void find_blob_limits(                  //get y limits
492                       PBLOB *blob,      //blob to search
493                       float leftx,      //x limits
494                       float rightx,
495                       FCOORD rotation,  //for landscape
496                       float &ymin,      //output y limits
497                       float &ymax);
498 void find_cblob_limits(                  //get y limits
499                        C_BLOB *blob,     //blob to search
500                        float leftx,      //x limits
501                        float rightx,
502                        FCOORD rotation,  //for landscape
503                        float &ymin,      //output y limits
504                        float &ymax);
505 void find_cblob_vlimits(               //get y limits
506                         C_BLOB *blob,  //blob to search
507                         float leftx,   //x limits
508                         float rightx,
509                         float &ymin,   //output y limits
510                         float &ymax);
511 void find_cblob_hlimits(                //get x limits
512                         C_BLOB *blob,   //blob to search
513                         float bottomy,  //y limits
514                         float topy,
515                         float &xmin,    //output x limits
516                         float &xymax);
517 PBLOB *rotate_blob(                 //get y limits
518                    PBLOB *blob,     //blob to search
519                    FCOORD rotation  //vector to rotate by
520                   );
521 PBLOB *rotate_cblob(                 //rotate it
522                     C_BLOB *blob,    //blob to search
523                     float xheight,   //for poly approx
524                     FCOORD rotation  //for landscape
525                    );
526 C_BLOB *crotate_cblob(                 //rotate it
527                       C_BLOB *blob,    //blob to search
528                       FCOORD rotation  //for landscape
529                      );
530 TBOX box_next(                 //get bounding box
531              BLOBNBOX_IT *it  //iterator to blobds
532             );
533 TBOX box_next_pre_chopped(                 //get bounding box
534                          BLOBNBOX_IT *it  //iterator to blobds
535                         );
536 void vertical_blob_projection(              //project outlines
537                               PBLOB *blob,  //blob to project
538                               STATS *stats  //output
539                              );
540                                  //project outlines
541 void vertical_outline_projection(OUTLINE *outline,  //outline to project
542                                  STATS *stats       //output
543                                 );
544 void vertical_cblob_projection(               //project outlines
545                                C_BLOB *blob,  //blob to project
546                                STATS *stats   //output
547                               );
548 void vertical_coutline_projection(                     //project outlines
549                                   C_OUTLINE *outline,  //outline to project
550                                   STATS *stats         //output
551                                  );
552 void plot_blob_list(ScrollView* win,                   // window to draw in
553                     BLOBNBOX_LIST *list,               // blob list
554                     ScrollView::Color body_colour,     // colour to draw
555                     ScrollView::Color child_colour);   // colour of child
556 
557 #endif
558