1 /////////////////////////////////////////////////////////////////////// 2 // File: colpartitionset.h 3 // Description: Class to hold a list of ColPartitions of the page that 4 // correspond roughly to columns. 5 // Author: Ray Smith 6 // Created: Thu Aug 14 10:50:01 PDT 2008 7 // 8 // (C) Copyright 2008, Google Inc. 9 // Licensed under the Apache License, Version 2.0 (the "License"); 10 // you may not use this file except in compliance with the License. 11 // You may obtain a copy of the License at 12 // http://www.apache.org/licenses/LICENSE-2.0 13 // Unless required by applicable law or agreed to in writing, software 14 // distributed under the License is distributed on an "AS IS" BASIS, 15 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 // See the License for the specific language governing permissions and 17 // limitations under the License. 18 // 19 /////////////////////////////////////////////////////////////////////// 20 21 #ifndef TESSERACT_TEXTORD_COLPARTITIONSET_H__ 22 #define TESSERACT_TEXTORD_COLPARTITIONSET_H__ 23 24 #include "colpartition.h" // For ColPartition_LIST. 25 #include "genericvector.h" // For GenericVector. 26 #include "rect.h" // For TBOX. 27 #include "tabvector.h" // For BLOBNBOX_CLIST. 28 29 namespace tesseract { 30 31 class WorkingPartSet_LIST; 32 class ColSegment_LIST; 33 class ColPartitionSet; 34 typedef GenericVector<ColPartitionSet*> PartSetVector; 35 36 // ColPartitionSet is a class that holds a list of ColPartitions. 37 // Its main use is in holding a candidate partitioning of the width of the 38 // image into columns, where each member ColPartition is a single column. 39 // ColPartitionSets are used in building the column layout of a page. 40 class ColPartitionSet : public ELIST_LINK { 41 public: ColPartitionSet()42 ColPartitionSet() { 43 } 44 explicit ColPartitionSet(ColPartition_LIST* partitions); 45 explicit ColPartitionSet(ColPartition* partition); 46 47 ~ColPartitionSet(); 48 49 // Simple accessors. bounding_box()50 const TBOX& bounding_box() const { 51 return bounding_box_; 52 } Empty()53 bool Empty() { 54 return parts_.empty(); 55 } ColumnCount()56 int ColumnCount() { 57 return parts_.length(); 58 } 59 60 // Return an element of the parts_ list from its index. 61 ColPartition* GetColumnByIndex(int index); 62 63 // Return the ColPartition that contains the given coords, if any, else NULL. 64 ColPartition* ColumnContaining(int x, int y); 65 66 // Return the bounding boxes of columns at the given y-range 67 void GetColumnBoxes(int y_bottom, int y_top, ColSegment_LIST *segments); 68 69 // Move the parts to the output list, giving up ownership. 70 void ReturnParts(ColPartition_LIST* parts); 71 72 // Merge any significantly overlapping partitions within the this and other, 73 // and unique the boxes so that no two partitions use the same box. 74 // Return true if any changes were made to either set. 75 bool MergeOverlaps(ColPartitionSet* other, WidthCallback* cb); 76 77 // Attempt to improve this by adding partitions or expanding partitions. 78 void ImproveColumnCandidate(WidthCallback* cb, PartSetVector* src_sets); 79 80 // If this set is good enough to represent a new partitioning into columns, 81 // add it to the vector of sets, otherwise delete it. 82 void AddToColumnSetsIfUnique(PartSetVector* column_sets, WidthCallback* cb); 83 84 // Return true if the partitions in other are all compatible with the columns 85 // in this. 86 bool CompatibleColumns(bool debug, ColPartitionSet* other, WidthCallback* cb); 87 88 // Return true if this ColPartitionSet makes a legal column candidate by 89 // having legal individual partitions and non-overlapping adjacent pairs. 90 bool LegalColumnCandidate(); 91 92 // Return a copy of this. If good_only will only copy the Good ColPartitions. 93 ColPartitionSet* Copy(bool good_only); 94 95 // Display the edges of the columns at the given y coords. 96 void DisplayColumnEdges(int y_bottom, int y_top, ScrollView* win); 97 98 // Return the PolyBlockType that best explains the columns overlapped 99 // by the given coords(left,right,y), with the given margins. 100 // Also return the first and last column index touched by the coords and 101 // the leftmost and rightmost spanned columns. 102 // Column indices are 2n + 1 for real colums (0 based) and even values 103 // represent the gaps in between columns, with 0 being left of the leftmost. 104 PolyBlockType SpanningType(BlobRegionType type, int left, int right, int y, 105 int left_margin, int right_margin, 106 int* first_col, int* last_col, 107 int* first_spanned_col, int* last_spanned_col); 108 109 // The column_set has changed. Close down all in-progress WorkingPartSets in 110 // columns that do not match and start new ones for the new columns in this. 111 // As ColPartitions are turned into BLOCKs, the used ones are put in 112 // used_parts, as they still need to be referenced in the grid. 113 void ChangeWorkColumns(const ICOORD& bleft, const ICOORD& tright, 114 int resolution, ColPartition_LIST* used_parts, 115 WorkingPartSet_LIST* working_set); 116 117 // Accumulate the widths and gaps into the given variables. 118 void AccumulateColumnWidthsAndGaps(int* total_width, int* width_samples, 119 int* total_gap, int* gap_samples); 120 121 // Provide debug output for this ColPartitionSet and all the ColPartitions. 122 void Print(); 123 124 private: 125 // Add the given partition to the list in the appropriate place. 126 void AddPartition(ColPartition* new_part, ColPartition_IT* it); 127 128 // Compute the coverage and good column count. 129 void ComputeCoverage(); 130 131 // The partitions in this column candidate. 132 ColPartition_LIST parts_; 133 // The number of partitions that have a frequent column width. 134 int good_column_count_; 135 // Total width of all the ColPartitions. 136 int total_coverage_; 137 // Bounding box of all partitions in the set. 138 TBOX bounding_box_; 139 }; 140 141 ELISTIZEH(ColPartitionSet) 142 143 } // namespace tesseract. 144 145 #endif // TESSERACT_TEXTORD_COLPARTITION_H__ 146