1 ///////////////////////////////////////////////////////////////////////
2 // File: colpartition.h
3 // Description: Class to hold partitions of the page that correspond
4 // roughly to text lines.
5 // Author: Ray Smith
6 // Created: Thu Aug 14 10:50:01 PDT 2008
7 //
8 // (C) Copyright 2008, Google Inc.
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 // http://www.apache.org/licenses/LICENSE-2.0
13 // Unless required by applicable law or agreed to in writing, software
14 // distributed under the License is distributed on an "AS IS" BASIS,
15 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 // See the License for the specific language governing permissions and
17 // limitations under the License.
18 //
19 ///////////////////////////////////////////////////////////////////////
20
21 #ifndef TESSERACT_TEXTORD_COLPARTITION_H__
22 #define TESSERACT_TEXTORD_COLPARTITION_H__
23
24 #include "bbgrid.h"
25 #include "blobbox.h" // For BlobRegionType.
26 #include "ndminx.h"
27 #include "ocrblock.h"
28 #include "rect.h" // For TBOX.
29 #include "scrollview.h"
30 #include "tabfind.h" // For WidthCallback.
31 #include "tabvector.h" // For BLOBNBOX_CLIST.
32
33 namespace tesseract {
34
35
36 class ColPartition;
37 class ColPartitionSet;
38 class WorkingPartSet;
39 class WorkingPartSet_LIST;
40
41 ELIST2IZEH(ColPartition)
CLISTIZEH(ColPartition)42 CLISTIZEH(ColPartition)
43
44 // ColPartition is a partition of a horizontal slice of the page.
45 // It starts out as a collection of blobs at a particular y-coord in the grid,
46 // but ends up (after merging and uniquing) as an approximate text line.
47 // ColPartitions are also used to hold a partitioning of the page into
48 // columns, each representing one column. Although a ColPartition applies
49 // to a given y-coordinate range, eventually, a ColPartitionSet of ColPartitions
50 // emerges, which represents the columns over a wide y-coordinate range.
51 class ColPartition : public ELIST2_LINK {
52 public:
53 ColPartition() {
54 // This empty constructor is here only so that the class can be ELISTIZED.
55 // TODO(rays) change deep_copy in elst.h line 955 to take a callback copier
56 // and eliminate CLASSNAME##_copier.
57 }
58 // blob_type is the blob_region_type_ of the blobs in this partition.
59 // Vertical is the direction of logical vertical on the possibly skewed image.
60 ColPartition(BlobRegionType blob_type, const ICOORD& vertical);
61 // Constructs a fake ColPartition with no BLOBNBOXes.
62 // Used for making horizontal line ColPartitions and types it accordingly.
63 ColPartition(const ICOORD& vertical,
64 int left, int bottom, int right, int top);
65
66 // Constructs and returns a fake ColPartition with a single fake BLOBNBOX,
67 // all made from a single TBOX.
68 // WARNING: Despite being on C_LISTs, the BLOBNBOX owns the C_BLOB and
69 // the ColPartition owns the BLOBNBOX!!!
70 // Call DeleteBoxes before deleting the ColPartition.
71 static ColPartition* FakePartition(const TBOX& box);
72
73 ~ColPartition();
74
75 // Simple accessors.
76 const TBOX& bounding_box() const {
77 return bounding_box_;
78 }
79 int left_margin() const {
80 return left_margin_;
81 }
82 void set_left_margin(int margin) {
83 left_margin_ = margin;
84 }
85 int right_margin() const {
86 return right_margin_;
87 }
88 void set_right_margin(int margin) {
89 right_margin_ = margin;
90 }
91 int median_top() const {
92 return median_top_;
93 }
94 int median_bottom() const {
95 return median_bottom_;
96 }
97 int median_size() const {
98 return median_size_;
99 }
100 BlobRegionType blob_type() const {
101 return blob_type_;
102 }
103 void set_blob_type(BlobRegionType t) {
104 blob_type_ = t;
105 }
106 bool good_width() const {
107 return good_width_;
108 }
109 bool good_column() const {
110 return good_column_;
111 }
112 bool left_key_tab() const {
113 return left_key_tab_;
114 }
115 int left_key() const {
116 return left_key_;
117 }
118 bool right_key_tab() const {
119 return right_key_tab_;
120 }
121 int right_key() const {
122 return right_key_;
123 }
124 PolyBlockType type() const {
125 return type_;
126 }
127 void set_type(PolyBlockType t) {
128 type_ = t;
129 }
130 BLOBNBOX_CLIST* boxes() {
131 return &boxes_;
132 }
133 ColPartition_CLIST* upper_partners() {
134 return &upper_partners_;
135 }
136 ColPartition_CLIST* lower_partners() {
137 return &lower_partners_;
138 }
139 void set_working_set(WorkingPartSet* working_set) {
140 working_set_ = working_set;
141 }
142 ColPartitionSet* column_set() const {
143 return column_set_;
144 }
145 void set_side_step(int step) {
146 side_step_ = step;
147 }
148 int bottom_spacing() const {
149 return bottom_spacing_;
150 }
151 void set_bottom_spacing(int spacing) {
152 bottom_spacing_ = spacing;
153 }
154 int top_spacing() const {
155 return top_spacing_;
156 }
157 void set_top_spacing(int spacing) {
158 top_spacing_ = spacing;
159 }
160
161 void set_table_type() {
162 if (type_ != PT_TABLE) {
163 type_before_table_ = type_;
164 type_ = PT_TABLE;
165 }
166 }
167 void clear_table_type() {
168 if (type_ == PT_TABLE)
169 type_ = type_before_table_;
170 }
171 bool inside_table_column() {
172 return inside_table_column_;
173 }
174 void set_inside_table_column(bool val) {
175 inside_table_column_ = val;
176 }
177 ColPartition* nearest_neighbor_above() const {
178 return nearest_neighbor_above_;
179 }
180 void set_nearest_neighbor_above(ColPartition* part) {
181 nearest_neighbor_above_ = part;
182 }
183 ColPartition* nearest_neighbor_below() const {
184 return nearest_neighbor_below_;
185 }
186 void set_nearest_neighbor_below(ColPartition* part) {
187 nearest_neighbor_below_ = part;
188 }
189 int space_above() const {
190 return space_above_;
191 }
192 void set_space_above(int space) {
193 space_above_ = space;
194 }
195 int space_below() const {
196 return space_below_;
197 }
198 void set_space_below(int space) {
199 space_below_ = space;
200 }
201 int space_to_left() const {
202 return space_to_left_;
203 }
204 void set_space_to_left(int space) {
205 space_to_left_ = space;
206 }
207 int space_to_right() const {
208 return space_to_right_;
209 }
210 void set_space_to_right(int space) {
211 space_to_right_ = space;
212 }
213
214 // Inline quasi-accessors that require some computation.
215
216 // Returns the middle y-coord of the bounding box.
217 int MidY() const {
218 return (bounding_box_.top() + bounding_box_.bottom()) / 2;
219 }
220 // Returns the middle y-coord of the median top and bottom.
221 int MedianY() const {
222 return (median_top_ + median_bottom_) / 2;
223 }
224 // Returns the sort key at any given x,y.
225 int SortKey(int x, int y) const {
226 return TabVector::SortKey(vertical_, x, y);
227 }
228 // Returns the x corresponding to the sortkey, y pair.
229 int XAtY(int sort_key, int y) const {
230 return TabVector::XAtY(vertical_, sort_key, y);
231 }
232 // Returns the x difference between the two sort keys.
233 int KeyWidth(int left_key, int right_key) const {
234 return (right_key - left_key) / vertical_.y();
235 }
236 // Returns the column width between the left and right keys.
237 int ColumnWidth() const {
238 return KeyWidth(left_key_, right_key_);
239 }
240 // Returns the sort key of the box left edge.
241 int BoxLeftKey() const {
242 return SortKey(bounding_box_.left(), MidY());
243 }
244 // Returns the sort key of the box right edge.
245 int BoxRightKey() const {
246 return SortKey(bounding_box_.right(), MidY());
247 }
248 // Returns the left edge at the given y, using the sort key.
249 int LeftAtY(int y) const {
250 return XAtY(left_key_, y);
251 }
252 // Returns the right edge at the given y, using the sort key.
253 int RightAtY(int y) const {
254 return XAtY(right_key_, y);
255 }
256 // Returns true if the right edge of this is to the left of the right
257 // edge of other.
258 bool IsLeftOf(const ColPartition& other) const {
259 return bounding_box_.right() < other.bounding_box_.right();
260 }
261 // Returns true if the partition contains the given x coordinate at the y.
262 bool ColumnContains(int x, int y) const {
263 return LeftAtY(y) - 1 <= x && x <= RightAtY(y) + 1;
264 }
265 // Returns true if there are no blobs in the list.
266 bool IsEmpty() {
267 return boxes_.empty();
268 }
269 // Returns true if this and other overlap horizontally by bounding box.
270 bool HOverlaps(const ColPartition& other) const {
271 return bounding_box_.x_overlap(other.bounding_box_);
272 }
273 // Returns true if this and other can be combined without putting a
274 // horizontal step in either left or right edge.
275 bool HCompatible(const ColPartition& other) const {
276 return left_margin_ <= other.bounding_box_.left() &&
277 bounding_box_.left() >= other.left_margin_ &&
278 bounding_box_.right() <= other.right_margin_ &&
279 right_margin_ >= other.bounding_box_.right();
280 }
281 // Returns the vertical overlap (by median) of this and other.
282 int VOverlap(const ColPartition& other) const {
283 return MIN(median_top_, other.median_top_) -
284 MAX(median_bottom_, other.median_bottom_);
285 }
286 // Returns true if this and other overlap significantly vertically.
287 bool VOverlaps(const ColPartition& other) const {
288 int overlap = VOverlap(other);
289 int height = MIN(median_top_ - median_bottom_,
290 other.median_top_ - other.median_bottom_);
291 return overlap * 3 > height;
292 }
293 // Returns true if the region types (aligned_text_) match.
294 bool TypesMatch(const ColPartition& other) const {
295 return TypesMatch(blob_type_, other.blob_type_);
296 }
297 static bool TypesMatch(BlobRegionType type1, BlobRegionType type2) {
298 return type1 == type2 ||
299 (type1 < BRT_UNKNOWN && type2 < BRT_UNKNOWN);
300 }
301
302 // Returns true if partitions is of horizontal line type
303 bool IsLineType() {
304 return POLY_BLOCK::IsLineType(type_);
305 }
306 // Returns true if partitions is of image type
307 bool IsImageType() {
308 return POLY_BLOCK::IsImageType(type_);
309 }
310 // Returns true if partitions is of text type
311 bool IsTextType() {
312 return POLY_BLOCK::IsTextType(type_);
313 }
314
315 // Adds the given box to the partition, updating the partition bounds.
316 // The list of boxes in the partition is updated, ensuring that no box is
317 // recorded twice, and the boxes are kept in increasing left position.
318 void AddBox(BLOBNBOX* box);
319
320 // Claims the boxes in the boxes_list by marking them with a this owner
321 // pointer. If a box is already owned, then run Unique on it.
322 void ClaimBoxes(WidthCallback* cb);
323
324 // Delete the boxes that this partition owns.
325 void DeleteBoxes();
326
327 // Returns true if this is a legal partition - meaning that the conditions
328 // left_margin <= bounding_box left
329 // left_key <= bounding box left key
330 // bounding box left <= bounding box right
331 // and likewise for right margin and key
332 // are all met.
333 bool IsLegal();
334
335 // Returns true if the left and right edges are approximately equal.
336 bool MatchingColumns(const ColPartition& other) const;
337
338 // Sets the sort key using either the tab vector, or the bounding box if
339 // the tab vector is NULL. If the tab_vector lies inside the bounding_box,
340 // use the edge of the box as a key any way.
341 void SetLeftTab(const TabVector* tab_vector);
342 void SetRightTab(const TabVector* tab_vector);
343
344 // Copies the left/right tab from the src partition, but if take_box is
345 // true, copies the box instead and uses that as a key.
346 void CopyLeftTab(const ColPartition& src, bool take_box);
347 void CopyRightTab(const ColPartition& src, bool take_box);
348
349 // Add a partner above if upper, otherwise below.
350 // Add them uniquely and keep the list sorted by box left.
351 // Partnerships are added symmetrically to partner and this.
352 void AddPartner(bool upper, ColPartition* partner);
353 // Removes the partner from this, but does not remove this from partner.
354 // This asymmetric removal is so as not to mess up the iterator that is
355 // working on partner's partner list.
356 void RemovePartner(bool upper, ColPartition* partner);
357 // Returns the partner if the given partner is a singleton, otherwise NULL.
358 ColPartition* SingletonPartner(bool upper);
359
360 // Merge with the other partition and delete it.
361 void Absorb(ColPartition* other, WidthCallback* cb);
362
363 // Shares out any common boxes amongst the partitions, ensuring that no
364 // box stays in both. Returns true if anything was done.
365 bool Unique(ColPartition* other, WidthCallback* cb);
366
367 // Splits this partition at the given x coordinate, returning the right
368 // half and keeping the left half in this.
369 ColPartition* SplitAt(int split_x);
370
371 // Recalculates all the coordinate limits of the partition.
372 void ComputeLimits();
373
374 // Computes and sets the type_, first_column_, last_column_ and column_set_.
375 void SetPartitionType(ColPartitionSet* columns);
376
377 // Returns the first and last column touched by this partition.
378 void ColumnRange(ColPartitionSet* columns, int* first_col, int* last_col);
379
380 // Sets the internal flags good_width_ and good_column_.
381 void SetColumnGoodness(WidthCallback* cb);
382
383 // Adds this ColPartition to a matching WorkingPartSet if one can be found,
384 // otherwise starts a new one in the appropriate column, ending the previous.
385 void AddToWorkingSet(const ICOORD& bleft, const ICOORD& tright,
386 int resolution, ColPartition_LIST* used_parts,
387 WorkingPartSet_LIST* working_set);
388
389 // From the given block_parts list, builds one or more BLOCKs and
390 // corresponding TO_BLOCKs, such that the line spacing is uniform in each.
391 // Created blocks are appended to the end of completed_blocks and to_blocks.
392 // The used partitions are put onto used_parts, as they may still be referred
393 // to in the partition grid. bleft, tright and resolution are the bounds
394 // and resolution of the original image.
395 static void LineSpacingBlocks(const ICOORD& bleft, const ICOORD& tright,
396 int resolution,
397 ColPartition_LIST* block_parts,
398 ColPartition_LIST* used_parts,
399 BLOCK_LIST* completed_blocks,
400 TO_BLOCK_LIST* to_blocks);
401 // Constructs a block from the given list of partitions.
402 // Arguments are as LineSpacingBlocks above.
403 static TO_BLOCK* MakeBlock(const ICOORD& bleft, const ICOORD& tright,
404 ColPartition_LIST* block_parts,
405 ColPartition_LIST* used_parts);
406
407
408 // Returns a copy of everything except the list of boxes. The resulting
409 // ColPartition is only suitable for keeping in a column candidate list.
410 ColPartition* ShallowCopy() const;
411
412 // Provides a color for BBGrid to draw the rectangle.
413 ScrollView::Color BoxColor() const;
414
415 // Prints debug information on this.
416 void Print();
417
418 // Sets the types of all partitions in the run to be the max of the types.
419 void SmoothPartnerRun(int working_set_count);
420
421 // Cleans up the partners of the given type so that there is at most
422 // one partner. This makes block creation simpler.
423 void RefinePartners(PolyBlockType type);
424
425 private:
426 // enum to refer to the entries in a neigbourhood of lines.
427 // Used by SmoothSpacings to test for blips with OKSpacingBlip.
428 enum SpacingNeighbourhood {
429 PN_ABOVE2,
430 PN_ABOVE1,
431 PN_UPPER,
432 PN_LOWER,
433 PN_BELOW1,
434 PN_BELOW2,
435 PN_COUNT
436 };
437
438 // Cleans up the partners above if upper is true, else below.
439 void RefinePartnersInternal(bool upper);
440 // Restricts the partners to only desirable types. For text and BRT_HLINE this
441 // means the same type_ , and for image types it means any image type.
442 void RefinePartnersByType(bool upper, ColPartition_CLIST* partners);
443 // Remove transitive partnerships: this<->a, and a<->b and this<->b.
444 // Gets rid of this<->b, leaving a clean chain.
445 // Also if we have this<->a and a<->this, then gets rid of this<->a, as
446 // this has multiple partners.
447 void RefinePartnerShortcuts(bool upper, ColPartition_CLIST* partners);
448 // Keeps the partner with the longest sequence of singleton matching partners.
449 // Converts all others to pullout.
450 void RefineFlowingTextPartners(bool upper, ColPartition_CLIST* partners);
451 // Keep the partner with the biggest overlap.
452 void RefinePartnersByOverlap(bool upper, ColPartition_CLIST* partners);
453
454 // Return true if bbox belongs better in this than other.
455 bool ThisPartitionBetter(BLOBNBOX* bbox, const ColPartition& other);
456
457 // Smoothes the spacings in the list into groups of equal linespacing.
458 // resolution is the resolution of the original image, used as a basis
459 // for thresholds in change of spacing. page_height is in pixels.
460 static void SmoothSpacings(int resolution, int page_height,
461 ColPartition_LIST* parts);
462
463 // Returns true if the parts array of pointers to partitions matches the
464 // condition for a spacing blip. See SmoothSpacings for what this means
465 // and how it is used.
466 static bool OKSpacingBlip(int resolution, int median_spacing,
467 ColPartition** parts);
468
469 // Returns true if both the top and bottom spacings of this match the given
470 // spacing to within suitable margins dictated by the image resolution.
471 bool SpacingEqual(int spacing, int resolution) const;
472
473 // Returns true if both the top and bottom spacings of this and other
474 // match to within suitable margins dictated by the image resolution.
475 bool SpacingsEqual(const ColPartition& other, int resolution) const;
476
477 // Returns true if the sum spacing of this and other match the given
478 // spacing (or twice the given spacing) to within a suitable margin dictated
479 // by the image resolution.
480 bool SummedSpacingOK(const ColPartition& other,
481 int spacing, int resolution) const;
482
483 // Returns a suitable spacing margin that can be applied to bottoms of
484 // text lines, based on the resolution and the stored side_step_.
485 int BottomSpacingMargin(int resolution) const;
486
487 // Returns a suitable spacing margin that can be applied to tops of
488 // text lines, based on the resolution and the stored side_step_.
489 int TopSpacingMargin(int resolution) const;
490
491 // Returns true if the median text sizes of this and other agree to within
492 // a reasonable multiplicative factor.
493 bool SizesSimilar(const ColPartition& other) const;
494
495 // Computes and returns in start, end a line segment formed from a
496 // forwards-iterated group of left edges of partitions that satisfy the
497 // condition that the rightmost left margin is to the left of the
498 // leftmost left bounding box edge.
499 // TODO(rays) Not good enough. Needs improving to tightly wrap text in both
500 // directions, and to loosely wrap images.
501 static void LeftEdgeRun(ColPartition_IT* part_it,
502 ICOORD* start, ICOORD* end);
503 // Computes and returns in start, end a line segment formed from a
504 // backwards-iterated group of right edges of partitions that satisfy the
505 // condition that the leftmost right margin is to the right of the
506 // rightmost right bounding box edge.
507 // TODO(rays) Not good enough. Needs improving to tightly wrap text in both
508 // directions, and to loosely wrap images.
509 static void RightEdgeRun(ColPartition_IT* part_it,
510 ICOORD* start, ICOORD* end);
511
512 // The margins are determined by the position of the nearest vertically
513 // overlapping neighbour to the side. They indicate the maximum extent
514 // that the block/column may be extended without touching something else.
515 // Leftmost coordinate that the region may occupy over the y limits.
516 int left_margin_;
517 // Rightmost coordinate that the region may occupy over the y limits.
518 int right_margin_;
519 // Bounding box of all blobs in the partition.
520 TBOX bounding_box_;
521 // Median top and bottom of blobs in this partition.
522 int median_bottom_;
523 int median_top_;
524 // Median height of blobs in this partition.
525 int median_size_;
526 // blob_region_type_ for the blobs in this partition.
527 BlobRegionType blob_type_;
528 // True if this partition has a common width.
529 bool good_width_;
530 // True if this is a good column candidate.
531 bool good_column_;
532 // True if the left_key_ is from a tab vector.
533 bool left_key_tab_;
534 // True if the right_key_ is from a tab vector.
535 bool right_key_tab_;
536 // Left and right sort keys for the edges of the partition.
537 // If the respective *_key_tab_ is true then this key came from a tab vector.
538 // If not, then the class promises to keep the key equal to the sort key
539 // for the respective edge of the bounding box at the MidY, so that
540 // LeftAtY and RightAtY always returns an x coordinate on the line parallel
541 // to vertical_ through the bounding box edge at MidY.
542 int left_key_;
543 int right_key_;
544 // Type of this partition after looking at its relation to the columns.
545 PolyBlockType type_;
546 // All boxes in the partition stored in increasing left edge coordinate.
547 BLOBNBOX_CLIST boxes_;
548 // The global vertical skew direction.
549 ICOORD vertical_;
550 // The partitions above that matched this.
551 ColPartition_CLIST upper_partners_;
552 // The partitions below that matched this.
553 ColPartition_CLIST lower_partners_;
554 // The WorkingPartSet it lives in while blocks are being made.
555 WorkingPartSet* working_set_;
556 // True when the partition's ownership has been taken from the grid and
557 // placed in a working set, or, after that, in the good_parts_ list.
558 bool block_owned_;
559 // The first and last column that this partition applies to.
560 // Flowing partitions (see type_) will have an equal first and last value
561 // of the form 2n + 1, where n is the zero-based index into the partitions
562 // in column_set_. (See ColPartitionSet::GetColumnByIndex).
563 // Heading partitions will have unequal values of the same form.
564 // Pullout partitions will have equal values, but may have even values,
565 // indicating placement between columns.
566 int first_column_;
567 int last_column_;
568 // Column_set_ is the column layout applicable to this ColPartition.
569 ColPartitionSet* column_set_;
570 // Linespacing data.
571 int side_step_; // Median y-shift to next blob on same line.
572 int top_spacing_; // Line spacing from median_top_.
573 int bottom_spacing_; // Line spacing from median_bottom_.
574
575 // Type of this partition before considering it as a table cell. This is
576 // used to revert the type if a partition is first marked as a table cell but
577 // later filtering steps decide it does not belong to a table
578 PolyBlockType type_before_table_;
579 bool inside_table_column_; // Check whether the current partition has been
580 // assigned to a table column
581 // Nearest neighbor above with major x-overlap
582 ColPartition* nearest_neighbor_above_;
583 // Nearest neighbor below with major x-overlap
584 ColPartition* nearest_neighbor_below_;
585 int space_above_; // Distance from nearest_neighbor_above
586 int space_below_; // Distance from nearest_neighbor_below
587 int space_to_left_; // Distance from the left edge of the column
588 int space_to_right_; // Distance from the right edge of the column
589 };
590
591 // Typedef it now in case it becomes a class later.
592 typedef BBGrid<ColPartition,
593 ColPartition_CLIST,
594 ColPartition_C_IT> ColPartitionGrid;
595 typedef GridSearch<ColPartition,
596 ColPartition_CLIST,
597 ColPartition_C_IT> ColPartitionGridSearch;
598
599 } // namespace tesseract.
600
601 #endif // TESSERACT_TEXTORD_COLPARTITION_H__
602