• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 ///////////////////////////////////////////////////////////////////////
2 // File:        colpartitionset.h
3 // Description: Class to hold a list of ColPartitions of the page that
4 //              correspond roughly to columns.
5 // Author:      Ray Smith
6 // Created:     Thu Aug 14 10:50:01 PDT 2008
7 //
8 // (C) Copyright 2008, Google Inc.
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 // http://www.apache.org/licenses/LICENSE-2.0
13 // Unless required by applicable law or agreed to in writing, software
14 // distributed under the License is distributed on an "AS IS" BASIS,
15 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 // See the License for the specific language governing permissions and
17 // limitations under the License.
18 //
19 ///////////////////////////////////////////////////////////////////////
20 
21 #ifndef TESSERACT_TEXTORD_COLPARTITIONSET_H__
22 #define TESSERACT_TEXTORD_COLPARTITIONSET_H__
23 
24 #include "colpartition.h"   // For ColPartition_LIST.
25 #include "genericvector.h"  // For GenericVector.
26 #include "rect.h"           // For TBOX.
27 #include "tabvector.h"      // For BLOBNBOX_CLIST.
28 
29 namespace tesseract {
30 
31 class WorkingPartSet_LIST;
32 class ColSegment_LIST;
33 class ColPartitionSet;
34 typedef GenericVector<ColPartitionSet*> PartSetVector;
35 
36 // ColPartitionSet is a class that holds a list of ColPartitions.
37 // Its main use is in holding a candidate partitioning of the width of the
38 // image into columns, where each member ColPartition is a single column.
39 // ColPartitionSets are used in building the column layout of a page.
40 class ColPartitionSet : public ELIST_LINK {
41  public:
ColPartitionSet()42   ColPartitionSet() {
43   }
44   explicit ColPartitionSet(ColPartition_LIST* partitions);
45   explicit ColPartitionSet(ColPartition* partition);
46 
47   ~ColPartitionSet();
48 
49   // Simple accessors.
bounding_box()50   const TBOX& bounding_box() const {
51     return bounding_box_;
52   }
Empty()53   bool Empty() {
54     return parts_.empty();
55   }
ColumnCount()56   int ColumnCount() {
57     return parts_.length();
58   }
59 
60   // Return an element of the parts_ list from its index.
61   ColPartition* GetColumnByIndex(int index);
62 
63   // Return the ColPartition that contains the given coords, if any, else NULL.
64   ColPartition* ColumnContaining(int x, int y);
65 
66   // Return the bounding boxes of columns at the given y-range
67   void GetColumnBoxes(int y_bottom, int y_top, ColSegment_LIST *segments);
68 
69   // Move the parts to the output list, giving up ownership.
70   void ReturnParts(ColPartition_LIST* parts);
71 
72   // Merge any significantly overlapping partitions within the this and other,
73   // and unique the boxes so that no two partitions use the same box.
74   // Return true if any changes were made to either set.
75   bool MergeOverlaps(ColPartitionSet* other, WidthCallback* cb);
76 
77   // Attempt to improve this by adding partitions or expanding partitions.
78   void ImproveColumnCandidate(WidthCallback* cb, PartSetVector* src_sets);
79 
80   // If this set is good enough to represent a new partitioning into columns,
81   // add it to the vector of sets, otherwise delete it.
82   void AddToColumnSetsIfUnique(PartSetVector* column_sets, WidthCallback* cb);
83 
84   // Return true if the partitions in other are all compatible with the columns
85   // in this.
86   bool CompatibleColumns(bool debug, ColPartitionSet* other, WidthCallback* cb);
87 
88   // Return true if this ColPartitionSet makes a legal column candidate by
89   // having legal individual partitions and non-overlapping adjacent pairs.
90   bool LegalColumnCandidate();
91 
92   // Return a copy of this. If good_only will only copy the Good ColPartitions.
93   ColPartitionSet* Copy(bool good_only);
94 
95   // Display the edges of the columns at the given y coords.
96   void DisplayColumnEdges(int y_bottom, int y_top, ScrollView* win);
97 
98   // Return the PolyBlockType that best explains the columns overlapped
99   // by the given coords(left,right,y), with the given margins.
100   // Also return the first and last column index touched by the coords and
101   // the leftmost and rightmost spanned columns.
102   // Column indices are 2n + 1 for real colums (0 based) and even values
103   // represent the gaps in between columns, with 0 being left of the leftmost.
104   PolyBlockType SpanningType(BlobRegionType type, int left, int right, int y,
105                              int left_margin, int right_margin,
106                              int* first_col, int* last_col,
107                              int* first_spanned_col, int* last_spanned_col);
108 
109   // The column_set has changed. Close down all in-progress WorkingPartSets in
110   // columns that do not match and start new ones for the new columns in this.
111   // As ColPartitions are turned into BLOCKs, the used ones are put in
112   // used_parts, as they still need to be referenced in the grid.
113   void ChangeWorkColumns(const ICOORD& bleft, const ICOORD& tright,
114                          int resolution, ColPartition_LIST* used_parts,
115                          WorkingPartSet_LIST* working_set);
116 
117   // Accumulate the widths and gaps into the given variables.
118   void AccumulateColumnWidthsAndGaps(int* total_width, int* width_samples,
119                                      int* total_gap, int* gap_samples);
120 
121   // Provide debug output for this ColPartitionSet and all the ColPartitions.
122   void Print();
123 
124  private:
125   // Add the given partition to the list in the appropriate place.
126   void AddPartition(ColPartition* new_part, ColPartition_IT* it);
127 
128   // Compute the coverage and good column count.
129   void ComputeCoverage();
130 
131   // The partitions in this column candidate.
132   ColPartition_LIST parts_;
133   // The number of partitions that have a frequent column width.
134   int good_column_count_;
135   // Total width of all the ColPartitions.
136   int total_coverage_;
137   // Bounding box of all partitions in the set.
138   TBOX bounding_box_;
139 };
140 
141 ELISTIZEH(ColPartitionSet)
142 
143 }  // namespace tesseract.
144 
145 #endif  // TESSERACT_TEXTORD_COLPARTITION_H__
146