• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 ///////////////////////////////////////////////////////////////////////
2 // File:        colpartition.h
3 // Description: Class to hold partitions of the page that correspond
4 //              roughly to text lines.
5 // Author:      Ray Smith
6 // Created:     Thu Aug 14 10:50:01 PDT 2008
7 //
8 // (C) Copyright 2008, Google Inc.
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 // http://www.apache.org/licenses/LICENSE-2.0
13 // Unless required by applicable law or agreed to in writing, software
14 // distributed under the License is distributed on an "AS IS" BASIS,
15 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 // See the License for the specific language governing permissions and
17 // limitations under the License.
18 //
19 ///////////////////////////////////////////////////////////////////////
20 
21 #ifndef TESSERACT_TEXTORD_COLPARTITION_H__
22 #define TESSERACT_TEXTORD_COLPARTITION_H__
23 
24 #include "bbgrid.h"
25 #include "blobbox.h"       // For BlobRegionType.
26 #include "ndminx.h"
27 #include "ocrblock.h"
28 #include "rect.h"           // For TBOX.
29 #include "scrollview.h"
30 #include "tabfind.h"        // For WidthCallback.
31 #include "tabvector.h"      // For BLOBNBOX_CLIST.
32 
33 namespace tesseract {
34 
35 
36 class ColPartition;
37 class ColPartitionSet;
38 class WorkingPartSet;
39 class WorkingPartSet_LIST;
40 
41 ELIST2IZEH(ColPartition)
CLISTIZEH(ColPartition)42 CLISTIZEH(ColPartition)
43 
44 // ColPartition is a partition of a horizontal slice of the page.
45 // It starts out as a collection of blobs at a particular y-coord in the grid,
46 // but ends up (after merging and uniquing) as an approximate text line.
47 // ColPartitions are also used to hold a partitioning of the page into
48 // columns, each representing one column. Although a ColPartition applies
49 // to a given y-coordinate range, eventually, a ColPartitionSet of ColPartitions
50 // emerges, which represents the columns over a wide y-coordinate range.
51 class ColPartition : public ELIST2_LINK {
52  public:
53   ColPartition() {
54     // This empty constructor is here only so that the class can be ELISTIZED.
55     // TODO(rays) change deep_copy in elst.h line 955 to take a callback copier
56     // and eliminate CLASSNAME##_copier.
57   }
58   // blob_type is the blob_region_type_ of the blobs in this partition.
59   // Vertical is the direction of logical vertical on the possibly skewed image.
60   ColPartition(BlobRegionType blob_type, const ICOORD& vertical);
61   // Constructs a fake ColPartition with no BLOBNBOXes.
62   // Used for making horizontal line ColPartitions and types it accordingly.
63   ColPartition(const ICOORD& vertical,
64                int left, int bottom, int right, int top);
65 
66   // Constructs and returns a fake ColPartition with a single fake BLOBNBOX,
67   // all made from a single TBOX.
68   // WARNING: Despite being on C_LISTs, the BLOBNBOX owns the C_BLOB and
69   // the ColPartition owns the BLOBNBOX!!!
70   // Call DeleteBoxes before deleting the ColPartition.
71   static ColPartition* FakePartition(const TBOX& box);
72 
73   ~ColPartition();
74 
75   // Simple accessors.
76   const TBOX& bounding_box() const {
77     return bounding_box_;
78   }
79   int left_margin() const {
80     return left_margin_;
81   }
82   void set_left_margin(int margin) {
83     left_margin_ = margin;
84   }
85   int right_margin() const {
86     return right_margin_;
87   }
88   void set_right_margin(int margin) {
89     right_margin_ = margin;
90   }
91   int median_top() const {
92     return median_top_;
93   }
94   int median_bottom() const {
95     return median_bottom_;
96   }
97   int median_size() const {
98     return median_size_;
99   }
100   BlobRegionType blob_type() const {
101     return blob_type_;
102   }
103   void set_blob_type(BlobRegionType t) {
104     blob_type_ = t;
105   }
106   bool good_width() const {
107     return good_width_;
108   }
109   bool good_column() const {
110     return good_column_;
111   }
112   bool left_key_tab() const {
113     return left_key_tab_;
114   }
115   int left_key() const {
116     return left_key_;
117   }
118   bool right_key_tab() const {
119     return right_key_tab_;
120   }
121   int right_key() const {
122     return right_key_;
123   }
124   PolyBlockType type() const {
125     return type_;
126   }
127   void set_type(PolyBlockType t) {
128     type_ = t;
129   }
130   BLOBNBOX_CLIST* boxes() {
131     return &boxes_;
132   }
133   ColPartition_CLIST* upper_partners() {
134     return &upper_partners_;
135   }
136   ColPartition_CLIST* lower_partners() {
137     return &lower_partners_;
138   }
139   void set_working_set(WorkingPartSet* working_set) {
140     working_set_ = working_set;
141   }
142   ColPartitionSet* column_set() const {
143     return column_set_;
144   }
145   void set_side_step(int step) {
146     side_step_ = step;
147   }
148   int bottom_spacing() const {
149     return bottom_spacing_;
150   }
151   void set_bottom_spacing(int spacing) {
152     bottom_spacing_ = spacing;
153   }
154   int top_spacing() const {
155     return top_spacing_;
156   }
157   void set_top_spacing(int spacing) {
158     top_spacing_ = spacing;
159   }
160 
161   void set_table_type() {
162     if (type_ != PT_TABLE) {
163       type_before_table_ = type_;
164       type_ = PT_TABLE;
165     }
166   }
167   void clear_table_type() {
168     if (type_ == PT_TABLE)
169       type_ = type_before_table_;
170   }
171   bool inside_table_column() {
172     return inside_table_column_;
173   }
174   void set_inside_table_column(bool val) {
175     inside_table_column_ = val;
176   }
177   ColPartition* nearest_neighbor_above() const {
178     return nearest_neighbor_above_;
179   }
180   void set_nearest_neighbor_above(ColPartition* part) {
181     nearest_neighbor_above_ = part;
182   }
183   ColPartition* nearest_neighbor_below() const {
184     return nearest_neighbor_below_;
185   }
186   void set_nearest_neighbor_below(ColPartition* part) {
187     nearest_neighbor_below_ = part;
188   }
189   int space_above() const {
190     return space_above_;
191   }
192   void set_space_above(int space) {
193     space_above_ = space;
194   }
195   int space_below() const {
196     return space_below_;
197   }
198   void set_space_below(int space) {
199     space_below_ = space;
200   }
201   int space_to_left() const {
202     return space_to_left_;
203   }
204   void set_space_to_left(int space) {
205     space_to_left_ = space;
206   }
207   int space_to_right() const {
208     return space_to_right_;
209   }
210   void set_space_to_right(int space) {
211     space_to_right_ = space;
212   }
213 
214   // Inline quasi-accessors that require some computation.
215 
216   // Returns the middle y-coord of the bounding box.
217   int MidY() const {
218     return (bounding_box_.top() + bounding_box_.bottom()) / 2;
219   }
220   // Returns the middle y-coord of the median top and bottom.
221   int MedianY() const {
222     return (median_top_ + median_bottom_) / 2;
223   }
224   // Returns the sort key at any given x,y.
225   int SortKey(int x, int y) const {
226     return TabVector::SortKey(vertical_, x, y);
227   }
228   // Returns the x corresponding to the sortkey, y pair.
229   int XAtY(int sort_key, int y) const {
230     return TabVector::XAtY(vertical_, sort_key, y);
231   }
232   // Returns the x difference between the two sort keys.
233   int KeyWidth(int left_key, int right_key) const {
234     return (right_key - left_key) / vertical_.y();
235   }
236   // Returns the column width between the left and right keys.
237   int ColumnWidth() const {
238     return KeyWidth(left_key_, right_key_);
239   }
240   // Returns the sort key of the box left edge.
241   int BoxLeftKey() const {
242     return SortKey(bounding_box_.left(), MidY());
243   }
244   // Returns the sort key of the box right edge.
245   int BoxRightKey() const {
246     return SortKey(bounding_box_.right(), MidY());
247   }
248   // Returns the left edge at the given y, using the sort key.
249   int LeftAtY(int y) const {
250     return XAtY(left_key_, y);
251   }
252   // Returns the right edge at the given y, using the sort key.
253   int RightAtY(int y) const {
254     return XAtY(right_key_, y);
255   }
256   // Returns true if the right edge of this is to the left of the right
257   // edge of other.
258   bool IsLeftOf(const ColPartition& other) const {
259     return bounding_box_.right() < other.bounding_box_.right();
260   }
261   // Returns true if the partition contains the given x coordinate at the y.
262   bool ColumnContains(int x, int y) const {
263     return LeftAtY(y) - 1 <= x && x <= RightAtY(y) + 1;
264   }
265   // Returns true if there are no blobs in the list.
266   bool IsEmpty() {
267     return boxes_.empty();
268   }
269   // Returns true if this and other overlap horizontally by bounding box.
270   bool HOverlaps(const ColPartition& other) const {
271     return bounding_box_.x_overlap(other.bounding_box_);
272   }
273   // Returns true if this and other can be combined without putting a
274   // horizontal step in either left or right edge.
275   bool HCompatible(const ColPartition& other) const {
276     return left_margin_ <= other.bounding_box_.left() &&
277            bounding_box_.left() >= other.left_margin_ &&
278            bounding_box_.right() <= other.right_margin_ &&
279            right_margin_ >= other.bounding_box_.right();
280   }
281   // Returns the vertical overlap (by median) of this and other.
282   int VOverlap(const ColPartition& other) const {
283     return MIN(median_top_, other.median_top_) -
284            MAX(median_bottom_, other.median_bottom_);
285   }
286   // Returns true if this and other overlap significantly vertically.
287   bool VOverlaps(const ColPartition& other) const {
288     int overlap = VOverlap(other);
289     int height = MIN(median_top_ - median_bottom_,
290                      other.median_top_ - other.median_bottom_);
291     return overlap * 3 > height;
292   }
293   // Returns true if the region types (aligned_text_) match.
294   bool TypesMatch(const ColPartition& other) const {
295     return TypesMatch(blob_type_, other.blob_type_);
296   }
297   static bool TypesMatch(BlobRegionType type1, BlobRegionType type2) {
298     return type1 == type2 ||
299            (type1 < BRT_UNKNOWN && type2 < BRT_UNKNOWN);
300   }
301 
302   // Returns true if partitions is of horizontal line type
303   bool IsLineType() {
304     return POLY_BLOCK::IsLineType(type_);
305   }
306   // Returns true if partitions is of image type
307   bool IsImageType() {
308     return POLY_BLOCK::IsImageType(type_);
309   }
310   // Returns true if partitions is of text type
311   bool IsTextType() {
312     return POLY_BLOCK::IsTextType(type_);
313   }
314 
315   // Adds the given box to the partition, updating the partition bounds.
316   // The list of boxes in the partition is updated, ensuring that no box is
317   // recorded twice, and the boxes are kept in increasing left position.
318   void AddBox(BLOBNBOX* box);
319 
320   // Claims the boxes in the boxes_list by marking them with a this owner
321   // pointer. If a box is already owned, then run Unique on it.
322   void ClaimBoxes(WidthCallback* cb);
323 
324   // Delete the boxes that this partition owns.
325   void DeleteBoxes();
326 
327   // Returns true if this is a legal partition - meaning that the conditions
328   // left_margin <= bounding_box left
329   // left_key <= bounding box left key
330   // bounding box left <= bounding box right
331   // and likewise for right margin and key
332   // are all met.
333   bool IsLegal();
334 
335   // Returns true if the left and right edges are approximately equal.
336   bool MatchingColumns(const ColPartition& other) const;
337 
338   // Sets the sort key using either the tab vector, or the bounding box if
339   // the tab vector is NULL. If the tab_vector lies inside the bounding_box,
340   // use the edge of the box as a key any way.
341   void SetLeftTab(const TabVector* tab_vector);
342   void SetRightTab(const TabVector* tab_vector);
343 
344   // Copies the left/right tab from the src partition, but if take_box is
345   // true, copies the box instead and uses that as a key.
346   void CopyLeftTab(const ColPartition& src, bool take_box);
347   void CopyRightTab(const ColPartition& src, bool take_box);
348 
349   // Add a partner above if upper, otherwise below.
350   // Add them uniquely and keep the list sorted by box left.
351   // Partnerships are added symmetrically to partner and this.
352   void AddPartner(bool upper, ColPartition* partner);
353   // Removes the partner from this, but does not remove this from partner.
354   // This asymmetric removal is so as not to mess up the iterator that is
355   // working on partner's partner list.
356   void RemovePartner(bool upper, ColPartition* partner);
357   // Returns the partner if the given partner is a singleton, otherwise NULL.
358   ColPartition* SingletonPartner(bool upper);
359 
360   // Merge with the other partition and delete it.
361   void Absorb(ColPartition* other, WidthCallback* cb);
362 
363   // Shares out any common boxes amongst the partitions, ensuring that no
364   // box stays in both. Returns true if anything was done.
365   bool Unique(ColPartition* other, WidthCallback* cb);
366 
367   // Splits this partition at the given x coordinate, returning the right
368   // half and keeping the left half in this.
369   ColPartition* SplitAt(int split_x);
370 
371   // Recalculates all the coordinate limits of the partition.
372   void ComputeLimits();
373 
374   // Computes and sets the type_, first_column_, last_column_ and column_set_.
375   void SetPartitionType(ColPartitionSet* columns);
376 
377   // Returns the first and last column touched by this partition.
378   void ColumnRange(ColPartitionSet* columns, int* first_col, int* last_col);
379 
380   // Sets the internal flags good_width_ and good_column_.
381   void SetColumnGoodness(WidthCallback* cb);
382 
383   // Adds this ColPartition to a matching WorkingPartSet if one can be found,
384   // otherwise starts a new one in the appropriate column, ending the previous.
385   void AddToWorkingSet(const ICOORD& bleft, const ICOORD& tright,
386                        int resolution, ColPartition_LIST* used_parts,
387                        WorkingPartSet_LIST* working_set);
388 
389   // From the given block_parts list, builds one or more BLOCKs and
390   // corresponding TO_BLOCKs, such that the line spacing is uniform in each.
391   // Created blocks are appended to the end of completed_blocks and to_blocks.
392   // The used partitions are put onto used_parts, as they may still be referred
393   // to in the partition grid. bleft, tright and resolution are the bounds
394   // and resolution of the original image.
395   static void LineSpacingBlocks(const ICOORD& bleft, const ICOORD& tright,
396                                 int resolution,
397                                 ColPartition_LIST* block_parts,
398                                 ColPartition_LIST* used_parts,
399                                 BLOCK_LIST* completed_blocks,
400                                 TO_BLOCK_LIST* to_blocks);
401   // Constructs a block from the given list of partitions.
402   // Arguments are as LineSpacingBlocks above.
403   static TO_BLOCK* MakeBlock(const ICOORD& bleft, const ICOORD& tright,
404                              ColPartition_LIST* block_parts,
405                              ColPartition_LIST* used_parts);
406 
407 
408   // Returns a copy of everything except the list of boxes. The resulting
409   // ColPartition is only suitable for keeping in a column candidate list.
410   ColPartition* ShallowCopy() const;
411 
412   // Provides a color for BBGrid to draw the rectangle.
413   ScrollView::Color  BoxColor() const;
414 
415   // Prints debug information on this.
416   void Print();
417 
418   // Sets the types of all partitions in the run to be the max of the types.
419   void SmoothPartnerRun(int working_set_count);
420 
421   // Cleans up the partners of the given type so that there is at most
422   // one partner. This makes block creation simpler.
423   void RefinePartners(PolyBlockType type);
424 
425  private:
426   // enum to refer to the entries in a neigbourhood of lines.
427   // Used by SmoothSpacings to test for blips with OKSpacingBlip.
428   enum SpacingNeighbourhood {
429     PN_ABOVE2,
430     PN_ABOVE1,
431     PN_UPPER,
432     PN_LOWER,
433     PN_BELOW1,
434     PN_BELOW2,
435     PN_COUNT
436   };
437 
438   // Cleans up the partners above if upper is true, else below.
439   void RefinePartnersInternal(bool upper);
440   // Restricts the partners to only desirable types. For text and BRT_HLINE this
441   // means the same type_ , and for image types it means any image type.
442   void RefinePartnersByType(bool upper, ColPartition_CLIST* partners);
443   // Remove transitive partnerships: this<->a, and a<->b and this<->b.
444   // Gets rid of this<->b, leaving a clean chain.
445   // Also if we have this<->a and a<->this, then gets rid of this<->a, as
446   // this has multiple partners.
447   void RefinePartnerShortcuts(bool upper, ColPartition_CLIST* partners);
448   // Keeps the partner with the longest sequence of singleton matching partners.
449   // Converts all others to pullout.
450   void RefineFlowingTextPartners(bool upper, ColPartition_CLIST* partners);
451   // Keep the partner with the biggest overlap.
452   void RefinePartnersByOverlap(bool upper, ColPartition_CLIST* partners);
453 
454   // Return true if bbox belongs better in this than other.
455   bool ThisPartitionBetter(BLOBNBOX* bbox, const ColPartition& other);
456 
457   // Smoothes the spacings in the list into groups of equal linespacing.
458   // resolution is the resolution of the original image, used as a basis
459   // for thresholds in change of spacing. page_height is in pixels.
460   static void SmoothSpacings(int resolution, int page_height,
461                              ColPartition_LIST* parts);
462 
463   // Returns true if the parts array of pointers to partitions matches the
464   // condition for a spacing blip. See SmoothSpacings for what this means
465   // and how it is used.
466   static bool OKSpacingBlip(int resolution, int median_spacing,
467                             ColPartition** parts);
468 
469   // Returns true if both the top and bottom spacings of this match the given
470   // spacing to within suitable margins dictated by the image resolution.
471   bool SpacingEqual(int spacing, int resolution) const;
472 
473   // Returns true if both the top and bottom spacings of this and other
474   // match to within suitable margins dictated by the image resolution.
475   bool SpacingsEqual(const ColPartition& other, int resolution) const;
476 
477   // Returns true if the sum spacing of this and other match the given
478   // spacing (or twice the given spacing) to within a suitable margin dictated
479   // by the image resolution.
480   bool SummedSpacingOK(const ColPartition& other,
481                        int spacing, int resolution) const;
482 
483   // Returns a suitable spacing margin that can be applied to bottoms of
484   // text lines, based on the resolution and the stored side_step_.
485   int BottomSpacingMargin(int resolution) const;
486 
487   // Returns a suitable spacing margin that can be applied to tops of
488   // text lines, based on the resolution and the stored side_step_.
489   int TopSpacingMargin(int resolution) const;
490 
491   // Returns true if the median text sizes of this and other agree to within
492   // a reasonable multiplicative factor.
493   bool SizesSimilar(const ColPartition& other) const;
494 
495   // Computes and returns in start, end a line segment formed from a
496   // forwards-iterated group of left edges of partitions that satisfy the
497   // condition that the rightmost left margin is to the left of the
498   // leftmost left bounding box edge.
499   // TODO(rays) Not good enough. Needs improving to tightly wrap text in both
500   // directions, and to loosely wrap images.
501   static void LeftEdgeRun(ColPartition_IT* part_it,
502                           ICOORD* start, ICOORD* end);
503   // Computes and returns in start, end a line segment formed from a
504   // backwards-iterated group of right edges of partitions that satisfy the
505   // condition that the leftmost right margin is to the right of the
506   // rightmost right bounding box edge.
507   // TODO(rays) Not good enough. Needs improving to tightly wrap text in both
508   // directions, and to loosely wrap images.
509   static void RightEdgeRun(ColPartition_IT* part_it,
510                            ICOORD* start, ICOORD* end);
511 
512   // The margins are determined by the position of the nearest vertically
513   // overlapping neighbour to the side. They indicate the maximum extent
514   // that the block/column may be extended without touching something else.
515   // Leftmost coordinate that the region may occupy over the y limits.
516   int left_margin_;
517   // Rightmost coordinate that the region may occupy over the y limits.
518   int right_margin_;
519   // Bounding box of all blobs in the partition.
520   TBOX bounding_box_;
521   // Median top and bottom of blobs in this partition.
522   int median_bottom_;
523   int median_top_;
524   // Median height of blobs in this partition.
525   int median_size_;
526   // blob_region_type_ for the blobs in this partition.
527   BlobRegionType blob_type_;
528   // True if this partition has a common width.
529   bool good_width_;
530   // True if this is a good column candidate.
531   bool good_column_;
532   // True if the left_key_ is from a tab vector.
533   bool left_key_tab_;
534   // True if the right_key_ is from a tab vector.
535   bool right_key_tab_;
536   // Left and right sort keys for the edges of the partition.
537   // If the respective *_key_tab_ is true then this key came from a tab vector.
538   // If not, then the class promises to keep the key equal to the sort key
539   // for the respective edge of the bounding box at the MidY, so that
540   // LeftAtY and RightAtY always returns an x coordinate on the line parallel
541   // to vertical_ through the bounding box edge at MidY.
542   int left_key_;
543   int right_key_;
544   // Type of this partition after looking at its relation to the columns.
545   PolyBlockType type_;
546   // All boxes in the partition stored in increasing left edge coordinate.
547   BLOBNBOX_CLIST boxes_;
548   // The global vertical skew direction.
549   ICOORD vertical_;
550   // The partitions above that matched this.
551   ColPartition_CLIST upper_partners_;
552   // The partitions below that matched this.
553   ColPartition_CLIST lower_partners_;
554   // The WorkingPartSet it lives in while blocks are being made.
555   WorkingPartSet* working_set_;
556   // True when the partition's ownership has been taken from the grid and
557   // placed in a working set, or, after that, in the good_parts_ list.
558   bool block_owned_;
559   // The first and last column that this partition applies to.
560   // Flowing partitions (see type_) will have an equal first and last value
561   // of the form 2n + 1, where n is the zero-based index into the partitions
562   // in column_set_. (See ColPartitionSet::GetColumnByIndex).
563   // Heading partitions will have unequal values of the same form.
564   // Pullout partitions will have equal values, but may have even values,
565   // indicating placement between columns.
566   int first_column_;
567   int last_column_;
568   // Column_set_ is the column layout applicable to this ColPartition.
569   ColPartitionSet* column_set_;
570   // Linespacing data.
571   int side_step_;       // Median y-shift to next blob on same line.
572   int top_spacing_;     // Line spacing from median_top_.
573   int bottom_spacing_;  // Line spacing from median_bottom_.
574 
575   // Type of this partition before considering it as a table cell. This is
576   // used to revert the type if a partition is first marked as a table cell but
577   // later filtering steps decide it does not belong to a table
578   PolyBlockType type_before_table_;
579   bool inside_table_column_;  // Check whether the current partition has been
580                               // assigned to a table column
581   // Nearest neighbor above with major x-overlap
582   ColPartition* nearest_neighbor_above_;
583   // Nearest neighbor below with major x-overlap
584   ColPartition* nearest_neighbor_below_;
585   int space_above_;      // Distance from nearest_neighbor_above
586   int space_below_;      // Distance from nearest_neighbor_below
587   int space_to_left_;    // Distance from the left edge of the column
588   int space_to_right_;   // Distance from the right edge of the column
589 };
590 
591 // Typedef it now in case it becomes a class later.
592 typedef BBGrid<ColPartition,
593                ColPartition_CLIST,
594                ColPartition_C_IT> ColPartitionGrid;
595 typedef GridSearch<ColPartition,
596                ColPartition_CLIST,
597                ColPartition_C_IT> ColPartitionGridSearch;
598 
599 }  // namespace tesseract.
600 
601 #endif  // TESSERACT_TEXTORD_COLPARTITION_H__
602