• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 ///////////////////////////////////////////////////////////////////////
2 // File:        TabFind.cpp
3 // Description: Subclass of BBGrid to find vertically aligned blobs.
4 // Author:      Ray Smith
5 // Created:     Fri Mar 21 15:03:01 PST 2008
6 //
7 // (C) Copyright 2008, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
18 ///////////////////////////////////////////////////////////////////////
19 
20 #include "tabfind.h"
21 #include "alignedblob.h"
22 #include "blobbox.h"
23 #include "detlinefit.h"
24 #include "linefind.h"
25 #include "ndminx.h"
26 
27 namespace tesseract {
28 
29 // Multiple of box size to search for initial gaps.
30 const int kTabRadiusFactor = 5;
31 // Min and Max multiple of height to search vertically when extrapolating.
32 const int kMinVerticalSearch = 3;
33 const int kMaxVerticalSearch = 12;
34 const int kMaxRaggedSearch = 25;
35 // Minimum number of lines in a column width to make it interesting.
36 const int kMinLinesInColumn = 10;
37 // Minimum width of a column to be interesting.
38 const int kMinColumnWidth = 200;
39 // Minimum fraction of total column lines for a column to be interesting.
40 const double kMinFractionalLinesInColumn = 0.125;
41 // Fraction of height used as alignment tolerance for aligned tabs.
42 const double kAlignedFraction = 0.03125;
43 // Fraction of height used as a minimum gap for aligned blobs.
44 const double kAlignedGapFraction = 0.75;
45 // Multiplier of new y positions in running average for skew estimation.
46 const double kSmoothFactor = 0.25;
47 // Min coverage for a good baseline between vectors
48 const double kMinBaselineCoverage = 0.5;
49 // Minimum overlap fraction when scanning text lines for column widths.
50 const double kCharVerticalOverlapFraction = 0.375;
51 // Maximum horizontal gap allowed when scanning for column widths
52 const double kMaxHorizontalGap = 3.0;
53 // Maximum upper quartile error allowed on a baseline fit as a fraction
54 // of height.
55 const double kMaxBaselineError = 0.4375;
56 // Min number of points to accept after evaluation.
57 const int kMinEvaluatedTabs = 3;
58 // Minimum aspect ratio of a textline to make a good textline blob with a
59 // single blob.
60 const int kMaxTextLineBlobRatio = 5;
61 // Minimum aspect ratio of a textline to make a good textline blob with
62 // multiple blobs. Target ratio varies according to number of blobs.
63 const int kMinTextLineBlobRatio = 3;
64 // Fraction of box area covered by image to make a blob image.
65 const double kMinImageArea = 0.5;
66 
67 BOOL_VAR(textord_tabfind_show_initialtabs, false, "Show tab candidates");
68 BOOL_VAR(textord_tabfind_show_finaltabs, false, "Show tab vectors");
69 BOOL_VAR(textord_tabfind_vertical_text, true, "Enable vertical detection");
70 
TabFind(int gridsize,const ICOORD & bleft,const ICOORD & tright,TabVector_LIST * vlines,int vertical_x,int vertical_y)71 TabFind::TabFind(int gridsize, const ICOORD& bleft, const ICOORD& tright,
72                  TabVector_LIST* vlines, int vertical_x, int vertical_y)
73   : AlignedBlob(gridsize, bleft, tright),
74     image_origin_(0, tright.y() - 1),
75     tab_grid_(new BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT>(gridsize,
76                                                                   bleft,
77                                                                   tright)) {
78   resolution_ = 0;
79   width_cb_ = NULL;
80   v_it_.set_to_list(&vectors_);
81   v_it_.add_list_after(vlines);
82   SetVerticalSkewAndParellelize(vertical_x, vertical_y);
83   width_cb_ = NewPermanentCallback(this, &TabFind::CommonWidth);
84 }
85 
~TabFind()86 TabFind::~TabFind() {
87   delete tab_grid_;
88   if (width_cb_ != NULL)
89     delete width_cb_;
90 }
91 
92 ///////////////// PUBLIC functions (mostly used by TabVector). //////////////
93 
94 // Insert a list of blobs into the given grid (not necessarily this).
95 // If take_ownership is true, then the blobs are removed from the source list.
96 // See InsertBlob for the other arguments.
InsertBlobList(bool h_spread,bool v_spread,bool large,BLOBNBOX_LIST * blobs,bool take_ownership,BBGrid<BLOBNBOX,BLOBNBOX_CLIST,BLOBNBOX_C_IT> * grid)97 void TabFind::InsertBlobList(bool h_spread, bool v_spread, bool large,
98                              BLOBNBOX_LIST* blobs, bool take_ownership,
99                              BBGrid<BLOBNBOX, BLOBNBOX_CLIST,
100                                     BLOBNBOX_C_IT>* grid) {
101   BLOBNBOX_IT blob_it(blobs);
102   int b_count = 0;
103   int reject_count = 0;
104   for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
105     BLOBNBOX* blob = blob_it.data();
106     if (InsertBlob(h_spread, v_spread, large, blob, grid)) {
107       ++b_count;
108     } else {
109       ++reject_count;
110     }
111     if (take_ownership)
112       blob_it.extract();
113   }
114   if (textord_debug_tabfind) {
115     if (large)
116       tprintf("Inserted %d large blobs into grid, %d rejected\n",
117               b_count, reject_count);
118     else
119       tprintf("Inserted %d normal blobs into grid\n", b_count);
120   }
121 }
122 
123 // Insert a single blob into the given grid (not necessarily this).
124 // If h_spread, then all cells covered horizontally by the box are
125 // used, otherwise, just the bottom-left. Similarly for v_spread.
126 // If large, then insert only if the bounding box doesn't intersect
127 // anything else already in the grid. Returns true if the blob was inserted.
128 // A side effect is that the left and right rule edges of the blob are
129 // set according to the tab vectors in this (not grid).
InsertBlob(bool h_spread,bool v_spread,bool large,BLOBNBOX * blob,BBGrid<BLOBNBOX,BLOBNBOX_CLIST,BLOBNBOX_C_IT> * grid)130 bool TabFind::InsertBlob(bool h_spread, bool v_spread, bool large,
131                          BLOBNBOX* blob,
132                          BBGrid<BLOBNBOX, BLOBNBOX_CLIST,
133                                 BLOBNBOX_C_IT>* grid) {
134   TBOX box = blob->bounding_box();
135   blob->set_left_rule(LeftEdgeForBox(box, false, false));
136   blob->set_right_rule(RightEdgeForBox(box, false, false));
137   blob->set_left_crossing_rule(LeftEdgeForBox(box, true, false));
138   blob->set_right_crossing_rule(RightEdgeForBox(box, true, false));
139   if (large) {
140     // Search the grid to see what intersects it.
141     // Setup a Rectangle search for overlapping this blob.
142     GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> rsearch(grid);
143     rsearch.StartRectSearch(box);
144     BLOBNBOX* neighbour = rsearch.NextRectSearch();
145     if (neighbour != NULL && box.major_overlap(neighbour->bounding_box())) {
146       if (textord_debug_tabfind) {
147         TBOX n_box = neighbour->bounding_box();
148         tprintf("Blob at (%d,%d)->(%d,%d) significantly overlaps blob"
149                 " at (%d,%d)->(%d,%d)\n",
150                 box.left(), box.top(), box.right(), box.bottom(),
151                 n_box.left(), n_box.top(), n_box.right(), n_box.bottom());
152       }
153       return false;
154     }
155   }
156   grid->InsertBBox(h_spread, v_spread, blob);
157   return true;
158 }
159 
160 // Find the gutter width and distance to inner neighbour for the given blob.
GutterWidthAndNeighbourGap(int tab_x,int mean_height,int max_gutter,bool left,BLOBNBOX * bbox,int * gutter_width,int * neighbour_gap)161 void TabFind::GutterWidthAndNeighbourGap(int tab_x, int mean_height,
162                                          int max_gutter, bool left,
163                                          BLOBNBOX* bbox, int* gutter_width,
164                                          int* neighbour_gap ) {
165   const TBOX& box = bbox->bounding_box();
166   int height = box.height();
167   // The gutter and internal sides of the box.
168   int gutter_x = left ? box.left() : box.right();
169   int internal_x = left ? box.right() : box.left();
170   // On ragged edges, the gutter side of the box is away from the tabstop.
171   int tab_gap = left ? gutter_x - tab_x : tab_x - gutter_x;
172   *gutter_width = max_gutter;
173   // If the box is away from the tabstop, we need to increase
174   // the allowed gutter width.
175   if (tab_gap > 0)
176     *gutter_width += tab_gap;
177   // Find the nearest blob on the outside of the column.
178   BLOBNBOX* gutter_bbox = AdjacentBlob(bbox, left, *gutter_width);
179   if (gutter_bbox != NULL) {
180     TBOX gutter_box = gutter_bbox->bounding_box();
181     *gutter_width = left ? tab_x - gutter_box.right()
182                         : gutter_box.left() - tab_x;
183   }
184   if (*gutter_width >= max_gutter) {
185     // If there is no box because a tab was in the way, get the tab coord.
186     TBOX gutter_box(box);
187     if (left) {
188       gutter_box.set_left(tab_x - max_gutter - 1);
189       gutter_box.set_right(tab_x - max_gutter);
190       int tab_gutter = RightEdgeForBox(gutter_box, true, false);
191       if (tab_gutter < tab_x - 1)
192         *gutter_width = tab_x - tab_gutter;
193     } else {
194       gutter_box.set_left(tab_x + max_gutter);
195       gutter_box.set_right(tab_x + max_gutter + 1);
196       int tab_gutter = LeftEdgeForBox(gutter_box, true, false);
197       if (tab_gutter > tab_x + 1)
198         *gutter_width = tab_gutter - tab_x;
199     }
200   }
201   if (*gutter_width > max_gutter)
202     *gutter_width = max_gutter;
203   // Now look for a neighbour on the inside.
204   BLOBNBOX* neighbour = AdjacentBlob(bbox, !left, *gutter_width);
205   int neighbour_edge = left ? RightEdgeForBox(box, true, false)
206                             : LeftEdgeForBox(box, true, false);
207   if (neighbour != NULL) {
208     TBOX n_box = neighbour->bounding_box();
209     if (!DifferentSizes(height, n_box.height())) {
210       if (left && n_box.left() < neighbour_edge)
211         neighbour_edge = n_box.left();
212       else if (!left && n_box.right() > neighbour_edge)
213         neighbour_edge = n_box.right();
214     }
215   }
216   *neighbour_gap = left ? neighbour_edge - internal_x
217                         : internal_x - neighbour_edge;
218 }
219 
220 // Find the next adjacent (to left or right) blob on this text line,
221 // with the constraint that it must vertically significantly overlap
222 // the input box.
AdjacentBlob(const BLOBNBOX * bbox,bool right_to_left,int gap_limit)223 BLOBNBOX* TabFind::AdjacentBlob(const BLOBNBOX* bbox,
224                                 bool right_to_left, int gap_limit) {
225   const TBOX& box = bbox->bounding_box();
226   return AdjacentBlob(bbox, right_to_left, false, gap_limit,
227                       box.top(), box.bottom());
228 }
229 
230 // Compute and return, but do not set the type as being BRT_TEXT or
231 // BRT_UNKNOWN according to how well it forms a text line.
ComputeBlobType(BLOBNBOX * blob)232 BlobRegionType TabFind::ComputeBlobType(BLOBNBOX* blob) {
233   // Check the text line width.
234   TBOX box = blob->bounding_box();
235   int blob_count;
236   int total_blobs;
237   int width = FindTextlineWidth(true, blob, &total_blobs);
238   width += FindTextlineWidth(false, blob, &blob_count);
239   total_blobs += blob_count - 1;
240   int target_ratio = kMaxTextLineBlobRatio - (total_blobs - 1);
241   if (target_ratio < kMinTextLineBlobRatio)
242     target_ratio = kMinTextLineBlobRatio;
243   BlobRegionType blob_type = (width >= box.height() * target_ratio)
244                            ? BRT_TEXT : BRT_UNKNOWN;
245   if (WithinTestRegion(3, box.left(), box.bottom()))
246     tprintf("Line width = %d, target = %d, result = %d\n",
247             width, box.height() * target_ratio, blob_type);
248   return blob_type;
249 }
250 
251 // Return the x-coord that corresponds to the right edge for the given
252 // box. If there is a rule line to the right that vertically overlaps it,
253 // then return the x-coord of the rule line, otherwise return the right
254 // edge of the page. For details see RightTabForBox below.
RightEdgeForBox(const TBOX & box,bool crossing,bool extended)255 int TabFind::RightEdgeForBox(const TBOX& box, bool crossing, bool extended) {
256   TabVector* v = RightTabForBox(box, crossing, extended);
257   return v == NULL ? tright_.x() : v->XAtY((box.top() + box.bottom()) / 2);
258 }
259 // As RightEdgeForBox, but finds the left Edge instead.
LeftEdgeForBox(const TBOX & box,bool crossing,bool extended)260 int TabFind::LeftEdgeForBox(const TBOX& box, bool crossing, bool extended) {
261   TabVector* v = LeftTabForBox(box, crossing, extended);
262   return v == NULL ? bleft_.x() : v->XAtY((box.top() + box.bottom()) / 2);
263 }
264 
265 // Return true if the given width is close to one of the common
266 // widths in column_widths_.
CommonWidth(int width)267 bool TabFind::CommonWidth(int width) {
268   width /= kColumnWidthFactor;
269   ICOORDELT_IT it(&column_widths_);
270   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
271     ICOORDELT* w = it.data();
272     if (NearlyEqual<int>(width, w->x(), 1))
273       return true;
274   }
275   return false;
276 }
277 
278 // Return true if the sizes are more than a
279 // factor of 2 different.
DifferentSizes(int size1,int size2)280 bool TabFind::DifferentSizes(int size1, int size2) {
281   return size1 > size2 * 2 || size2 > size1 * 2;
282 }
283 
284 ///////////////// PROTECTED functions (used by ColumnFinder). //////////////
285 
286 // Top-level function to find TabVectors in an input page block.
FindTabVectors(int resolution,TabVector_LIST * hlines,BLOBNBOX_LIST * image_blobs,TO_BLOCK * block,FCOORD * reskew,FCOORD * rerotate)287 void TabFind::FindTabVectors(int resolution, TabVector_LIST* hlines,
288                              BLOBNBOX_LIST* image_blobs, TO_BLOCK* block,
289                              FCOORD* reskew, FCOORD* rerotate) {
290   resolution_ = resolution;
291   *rerotate = FCOORD(1.0f, 0.0f);
292   FindInitialTabVectors(image_blobs, block);
293   if (textord_tabfind_vertical_text && TextMostlyVertical()) {
294     ResetForVerticalText(hlines, image_blobs, block, rerotate);
295     FindInitialTabVectors(image_blobs, block);
296   }
297   TabVector::MergeSimilarTabVectors(vertical_skew_, &vectors_, this);
298   SortVectors();
299   CleanupTabs();
300   Deskew(hlines, image_blobs, block, reskew);
301   ApplyTabConstraints();
302 #ifndef GRAPHICS_DISABLED
303   if (textord_tabfind_show_finaltabs) {
304     ScrollView* tab_win = MakeWindow(640, 50, "FinalTabs");
305     if (textord_debug_images) {
306       tab_win->Image(AlignedBlob::textord_debug_pix().string(),
307                      image_origin_.x(), image_origin_.y());
308     } else {
309       DisplayBoxes(tab_win);
310       DisplayTabs("FinalTabs", tab_win);
311     }
312     tab_win = DisplayTabVectors(tab_win);
313   }
314 #endif
315 }
316 
317 // Top-level function to not find TabVectors in an input page block,
318 // but setup for single column mode.
DontFindTabVectors(int resolution,BLOBNBOX_LIST * image_blobs,TO_BLOCK * block,FCOORD * reskew)319 void TabFind::DontFindTabVectors(int resolution, BLOBNBOX_LIST* image_blobs,
320                                  TO_BLOCK* block, FCOORD* reskew) {
321   resolution_ = resolution;
322   InsertBlobList(false, false, false, image_blobs, false, this);
323   InsertBlobList(true, false, false, &block->blobs, false, this);
324   ComputeBlobGoodness();
325   reskew->set_x(1);
326   reskew->set_y(0);
327 }
328 
329 // This comment documents how this function works.
330 // For its purpose and arguments, see the comment in tabfind.h.
331 // TabVectors are stored sorted by perpendicular distance of middle from
332 // the global mean vertical vector. Since the individual vectors can have
333 // differing directions, their XAtY for a given y is not necessarily in the
334 // right order. Therefore the search has to be run with a margin.
335 // The middle of a vector that passes through (x,y) cannot be higher than
336 // halfway from y to the top, or lower than halfway from y to the bottom
337 // of the coordinate range; therefore, the search margin is the range of
338 // sort keys between these halfway points. Any vector with a sort key greater
339 // than the upper margin must be to the right of x at y, and likewise any
340 // vector with a sort key less than the lower margin must pass to the left
341 // of x at y.
RightTabForBox(const TBOX & box,bool crossing,bool extended)342 TabVector* TabFind::RightTabForBox(const TBOX& box, bool crossing,
343                                    bool extended) {
344   if (v_it_.empty())
345     return NULL;
346   int top_y = box.top();
347   int bottom_y = box.bottom();
348   int mid_y = (top_y + bottom_y) / 2;
349   int right = crossing ? (box.left() + box.right()) / 2 : box.right();
350   int min_key, max_key;
351   SetupTabSearch(right, mid_y, &min_key, &max_key);
352   // Position the iterator at the first TabVector with sort_key >= min_key.
353   while (!v_it_.at_first() && v_it_.data()->sort_key() >= min_key)
354     v_it_.backward();
355   while (!v_it_.at_last() && v_it_.data()->sort_key() < min_key)
356     v_it_.forward();
357   // Find the leftmost tab vector that overlaps and has XAtY(mid_y) >= right.
358   TabVector* best_v = NULL;
359   int best_x = -1;
360   int key_limit = -1;
361   do {
362     TabVector* v = v_it_.data();
363     int x = v->XAtY(mid_y);
364     if (x >= right &&
365         (v->VOverlap(top_y, bottom_y) > 0 ||
366          (extended && v->ExtendedOverlap(top_y, bottom_y) > 0))) {
367       if (best_v == NULL || x < best_x) {
368         best_v = v;
369         best_x = x;
370         // We can guarantee that no better vector can be found if the
371         // sort key exceeds that of the best by max_key - min_key.
372         key_limit = v->sort_key() + max_key - min_key;
373       }
374     }
375     // Break when the search is done to avoid wrapping the iterator and
376     // thereby potentially slowing the next search.
377     if (v_it_.at_last() ||
378         (best_v != NULL && v->sort_key() > key_limit))
379       break;  // Prevent restarting list for next call.
380     v_it_.forward();
381   } while (!v_it_.at_first());
382   return best_v;
383 }
384 
385 // As RightTabForBox, but finds the left TabVector instead.
LeftTabForBox(const TBOX & box,bool crossing,bool extended)386 TabVector* TabFind::LeftTabForBox(const TBOX& box, bool crossing,
387                                   bool extended) {
388   if (v_it_.empty())
389     return NULL;
390   int top_y = box.top();
391   int bottom_y = box.bottom();
392   int mid_y = (top_y + bottom_y) / 2;
393   int left = crossing ? (box.left() + box.right()) / 2 : box.left();
394   int min_key, max_key;
395   SetupTabSearch(left, mid_y, &min_key, &max_key);
396   // Position the iterator at the last TabVector with sort_key <= max_key.
397   while (!v_it_.at_last() && v_it_.data()->sort_key() <= max_key)
398     v_it_.forward();
399   while (!v_it_.at_first() && v_it_.data()->sort_key() > max_key) {
400     v_it_.backward();
401   }
402   // Find the rightmost tab vector that overlaps and has XAtY(mid_y) <= left.
403   TabVector* best_v = NULL;
404   int best_x = -1;
405   int key_limit = -1;
406   do {
407     TabVector* v = v_it_.data();
408     int x = v->XAtY(mid_y);
409     if (x <= left &&
410         (v->VOverlap(top_y, bottom_y) > 0 ||
411          (extended && v->ExtendedOverlap(top_y, bottom_y) > 0))) {
412       if (best_v == NULL || x > best_x) {
413         best_v = v;
414         best_x = x;
415         // We can guarantee that no better vector can be found if the
416         // sort key is less than that of the best by max_key - min_key.
417         key_limit = v->sort_key() - (max_key - min_key);
418       }
419     }
420     // Break when the search is done to avoid wrapping the iterator and
421     // thereby potentially slowing the next search.
422     if (v_it_.at_first() ||
423         (best_v != NULL && v->sort_key() < key_limit))
424       break;  // Prevent restarting list for next call.
425     v_it_.backward();
426   } while (!v_it_.at_last());
427   return best_v;
428 }
429 
430 // Helper function to setup search limits for *TabForBox.
SetupTabSearch(int x,int y,int * min_key,int * max_key)431 void TabFind::SetupTabSearch(int x, int y, int* min_key, int* max_key) {
432   int key1 = TabVector::SortKey(vertical_skew_, x, (y + tright_.y()) / 2);
433   int key2 = TabVector::SortKey(vertical_skew_, x, (y + bleft_.y()) / 2);
434   *min_key = MIN(key1, key2);
435   *max_key = MAX(key1, key2);
436 }
437 
DisplayTabVectors(ScrollView * tab_win)438 ScrollView* TabFind::DisplayTabVectors(ScrollView* tab_win) {
439 #ifndef GRAPHICS_DISABLED
440   // For every vector, display it.
441   TabVector_IT it(&vectors_);
442   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
443     TabVector* vector = it.data();
444     vector->Display(tab_win);
445   }
446   tab_win->Update();
447 #endif
448   return tab_win;
449 }
450 
451 // PRIVATE CODE.
452 //
453 // First part of FindTabVectors, which may be used twice if the text
454 // is mostly of vertical alignment.
FindInitialTabVectors(BLOBNBOX_LIST * image_blobs,TO_BLOCK * block)455 void TabFind::FindInitialTabVectors(BLOBNBOX_LIST* image_blobs,
456                                     TO_BLOCK* block) {
457   if (textord_tabfind_show_initialtabs) {
458     ScrollView* line_win = MakeWindow(0, 0, "VerticalLines");
459     line_win = DisplayTabVectors(line_win);
460   }
461   // Prepare the grid.
462   InsertBlobList(false, false, false, image_blobs, false, this);
463   InsertBlobList(true, false, false, &block->blobs, false, this);
464   ScrollView* initial_win = FindTabBoxes();
465   FindAllTabVectors();
466   if (textord_tabfind_show_initialtabs)
467     initial_win = DisplayTabVectors(initial_win);
468 
469   TabVector::MergeSimilarTabVectors(vertical_skew_, &vectors_, this);
470   SortVectors();
471   EvaluateTabs();
472   ComputeColumnWidths(initial_win);
473   if (textord_tabfind_vertical_text)
474     MarkVerticalText();
475 }
476 
477 // For each box in the grid, decide whether it is a candidate tab-stop,
478 // and if so add it to the tab_grid_.
FindTabBoxes()479 ScrollView* TabFind::FindTabBoxes() {
480   // For every bbox in the grid, determine whether it uses a tab on an edge.
481   GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> gsearch(this);
482   gsearch.StartFullSearch();
483   BLOBNBOX* bbox;
484   while ((bbox = gsearch.NextFullSearch()) != NULL) {
485     if (TestBoxForTabs(bbox)) {
486       // If it is any kind of tab, insert it into the tab grid.
487       tab_grid_->InsertBBox(false, false, bbox);
488     }
489   }
490   ScrollView* tab_win = NULL;
491   if (textord_tabfind_show_initialtabs) {
492     tab_win = tab_grid_->MakeWindow(0, 100, "InitialTabs");
493     tab_grid_->DisplayBoxes(tab_win);
494     tab_win = DisplayTabs("Tabs", tab_win);
495   }
496   return tab_win;
497 }
498 
TestBoxForTabs(BLOBNBOX * bbox)499 bool TabFind::TestBoxForTabs(BLOBNBOX* bbox) {
500   GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> radsearch(this);
501   TBOX box = bbox->bounding_box();
502   // If there are separator lines, get the column edges.
503   int left_column_edge = bbox->left_rule();
504   int right_column_edge = bbox->right_rule();
505   // The edges of the bounding box of the blob being processed.
506   int left_x = box.left();
507   int right_x = box.right();
508   int top_y = box.top();
509   int bottom_y = box.bottom();
510   int height = box.height();
511   bool debug = WithinTestRegion(3, left_x, top_y);
512   if (debug) {
513     tprintf("Column edges for blob at (%d,%d)->(%d,%d) are [%d, %d]\n",
514             left_x, top_y, right_x, bottom_y,
515             left_column_edge, right_column_edge);
516   }
517   // Compute a search radius based on a multiple of the height.
518   int radius = (height * kTabRadiusFactor + gridsize_ - 1) / gridsize_;
519   radsearch.StartRadSearch((left_x + right_x)/2, (top_y + bottom_y)/2, radius);
520   int target_right = left_x - height * kAlignedGapFraction;
521   int target_left = right_x + height * kAlignedGapFraction;
522   // We will be evaluating whether the left edge could be a left tab, and
523   // whether the right edge could be a right tab.
524   // A box can be a tab if its bool is_(left/right)_tab remains true, meaning
525   // that no blobs have been found in the gutter during the radial search.
526   // A box can also be a tab if there are objects in the gutter only above
527   // or only below, and there are aligned objects on the opposite side, but
528   // not too many unaligned objects. The maybe_(left/right)_tab_up counts
529   // aligned objects above and negatively counts unaligned objects above,
530   // and is set to -MAX_INT32 if a gutter object is found above.
531   // The other 3 maybe ints work similarly for the other sides.
532   bool is_left_tab = true;
533   bool is_right_tab = true;
534   int maybe_left_tab_up = 0;
535   int maybe_right_tab_up = 0;
536   int maybe_left_tab_down = 0;
537   int maybe_right_tab_down = 0;
538   int alignment_tolerance = static_cast<int>(resolution_ * kAlignedFraction);
539   BLOBNBOX* neighbour = NULL;
540   while ((neighbour = radsearch.NextRadSearch()) != NULL) {
541     if (neighbour == bbox)
542       continue;
543     TBOX nbox = neighbour->bounding_box();
544     int n_left = nbox.left();
545     int n_right = nbox.right();
546     if (debug)
547       tprintf("Neighbour at (%d,%d)->(%d,%d)\n",
548               n_left, nbox.bottom(), n_right, nbox.top());
549     // If the neighbouring blob is the wrong side of a separator line, then it
550     // "doesn't exist" as far as we are concerned.
551     if (n_right > right_column_edge || n_left < left_column_edge ||
552         left_x < neighbour->left_rule() || right_x > neighbour->right_rule())
553       continue;  // Separator line in the way.
554     int n_mid_x = (n_left + n_right) / 2;
555     int n_mid_y = (nbox.top() + nbox.bottom()) / 2;
556     if (n_mid_x <= left_x && n_right >= target_right) {
557       if (debug)
558         tprintf("Not a left tab\n");
559       is_left_tab = false;
560       if (n_mid_y < top_y)
561         maybe_left_tab_down = -MAX_INT32;
562       if (n_mid_y > bottom_y)
563         maybe_left_tab_up = -MAX_INT32;
564     } else if (NearlyEqual(left_x, n_left, alignment_tolerance)) {
565       if (debug)
566         tprintf("Maybe a left tab\n");
567       if (n_mid_y > top_y && maybe_left_tab_up > -MAX_INT32)
568         ++maybe_left_tab_up;
569       if (n_mid_y < bottom_y && maybe_left_tab_down > -MAX_INT32)
570         ++maybe_left_tab_down;
571     } else if (n_left < left_x && n_right >= left_x) {
572       // Overlaps but not aligned so negative points on a maybe.
573       if (debug)
574         tprintf("Maybe Not a left tab\n");
575       if (n_mid_y > top_y && maybe_left_tab_up > -MAX_INT32)
576         --maybe_left_tab_up;
577       if (n_mid_y < bottom_y && maybe_left_tab_down > -MAX_INT32)
578         --maybe_left_tab_down;
579     }
580     if (n_mid_x >= right_x && n_left <= target_left) {
581       if (debug)
582         tprintf("Not a right tab\n");
583       is_right_tab = false;
584       if (n_mid_y < top_y)
585         maybe_right_tab_down = -MAX_INT32;
586       if (n_mid_y > bottom_y)
587         maybe_right_tab_up = -MAX_INT32;
588     } else if (NearlyEqual(right_x, n_right, alignment_tolerance)) {
589       if (debug)
590         tprintf("Maybe a right tab\n");
591       if (n_mid_y > top_y && maybe_right_tab_up > -MAX_INT32)
592         ++maybe_right_tab_up;
593       if (n_mid_y < bottom_y && maybe_right_tab_down > -MAX_INT32)
594         ++maybe_right_tab_down;
595     } else if (n_right > right_x && n_left <= right_x) {
596       // Overlaps but not aligned so negative points on a maybe.
597       if (debug)
598         tprintf("Maybe Not a right tab\n");
599       if (n_mid_y > top_y && maybe_right_tab_up > -MAX_INT32)
600         --maybe_right_tab_up;
601       if (n_mid_y < bottom_y && maybe_right_tab_down > -MAX_INT32)
602         --maybe_right_tab_down;
603     }
604     if (maybe_left_tab_down == -MAX_INT32 && maybe_left_tab_up == -MAX_INT32 &&
605         maybe_right_tab_down == -MAX_INT32 && maybe_right_tab_up == -MAX_INT32)
606       break;
607   }
608   if (is_left_tab || maybe_left_tab_up > 1 || maybe_left_tab_down > 1) {
609     if (debug)
610       tprintf("Setting left tab\n");
611     bbox->set_left_tab_type(TT_UNCONFIRMED);
612   }
613   if (is_right_tab || maybe_right_tab_up > 1 || maybe_right_tab_down > 1) {
614     if (debug)
615       tprintf("Setting right tab\n");
616     bbox->set_right_tab_type(TT_UNCONFIRMED);
617   }
618   return bbox->left_tab_type() != TT_NONE || bbox->right_tab_type() != TT_NONE;
619 }
620 
FindAllTabVectors()621 void TabFind::FindAllTabVectors() {
622   // A list of vectors that will be created in estimating the skew.
623   TabVector_LIST dummy_vectors;
624   // An estimate of the vertical direction, revised as more lines are added.
625   int vertical_x = 0;
626   int vertical_y = 1;
627   // Find an estimate of the vertical direction by finding some tab vectors.
628   // Slowly up the search size until we get some vectors.
629   for (int search_size = kMinVerticalSearch; search_size < kMaxVerticalSearch;
630        search_size += kMinVerticalSearch) {
631     int vector_count = FindTabVectors(search_size, TA_LEFT_ALIGNED,
632                                       &dummy_vectors,
633                                       &vertical_x, &vertical_y);
634     vector_count += FindTabVectors(search_size, TA_RIGHT_ALIGNED,
635                                    &dummy_vectors,
636                                    &vertical_x, &vertical_y);
637     if (vector_count > 0)
638       break;
639   }
640   // Get rid of the test vectors and reset the types of the tabs.
641   dummy_vectors.clear();
642   GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> tsearch(tab_grid_);
643   BLOBNBOX* bbox;
644   tsearch.StartFullSearch();
645   while ((bbox = tsearch.NextFullSearch()) != NULL) {
646     if (bbox->left_tab_type() == TT_CONFIRMED)
647       bbox->set_left_tab_type(TT_UNCONFIRMED);
648     if (bbox->right_tab_type() == TT_CONFIRMED)
649       bbox->set_right_tab_type(TT_UNCONFIRMED);
650   }
651   if (textord_debug_tabfind) {
652     tprintf("Beginning real tab search with vertical = %d,%d...\n",
653             vertical_x, vertical_y);
654   }
655   // Now do the real thing ,but keep the vectors in the dummy_vectors list
656   // until they are all done, so we don't get the tab vectors confused with
657   // the rule line vectors.
658   FindTabVectors(kMaxVerticalSearch, TA_LEFT_ALIGNED,
659                  &dummy_vectors, &vertical_x, &vertical_y);
660   FindTabVectors(kMaxVerticalSearch, TA_RIGHT_ALIGNED,
661                  &dummy_vectors, &vertical_x, &vertical_y);
662   FindTabVectors(kMaxRaggedSearch, TA_LEFT_RAGGED,
663                  &dummy_vectors, &vertical_x, &vertical_y);
664   FindTabVectors(kMaxRaggedSearch, TA_RIGHT_RAGGED,
665                  &dummy_vectors, &vertical_x, &vertical_y);
666   // Now add the vectors to the vectors_ list.
667   TabVector_IT v_it(&vectors_);
668   v_it.add_list_after(&dummy_vectors);
669   // Now use the summed (mean) vertical vector as the direction for everything.
670   SetVerticalSkewAndParellelize(vertical_x, vertical_y);
671 }
672 
673 // Helper for FindAllTabVectors finds the vectors of a particular type.
FindTabVectors(int search_size_multiple,TabAlignment alignment,TabVector_LIST * vectors,int * vertical_x,int * vertical_y)674 int TabFind::FindTabVectors(int search_size_multiple, TabAlignment alignment,
675                             TabVector_LIST* vectors,
676                             int* vertical_x, int* vertical_y) {
677   TabVector_IT vector_it(vectors);
678   int vector_count = 0;
679   // Search the entire tab grid, looking for tab vectors.
680   GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> tsearch(tab_grid_);
681   BLOBNBOX* bbox;
682   tsearch.StartFullSearch();
683   bool right = alignment == TA_RIGHT_ALIGNED || alignment == TA_RIGHT_RAGGED;
684   while ((bbox = tsearch.NextFullSearch()) != NULL) {
685     if ((!right && bbox->left_tab_type() == TT_UNCONFIRMED) ||
686         (right && bbox->right_tab_type() == TT_UNCONFIRMED)) {
687       TabVector* vector = FindTabVector(search_size_multiple, alignment,
688                                         bbox, vertical_x, vertical_y);
689       if (vector != NULL) {
690         ++vector_count;
691         vector_it.add_to_end(vector);
692         ICOORD merged_vector = vector->endpt();
693         merged_vector -= vector->startpt();
694         if (abs(merged_vector.x()) > 100) {
695           vector->Debug("Garbage result of FindTabVector?");
696         }
697       }
698     }
699   }
700   return vector_count;
701 }
702 
703 // Finds a vector corresponding to a tabstop running through the
704 // given box of the given alignment type.
705 // search_size_multiple is a multiple of height used to control
706 // the size of the search.
707 // vertical_x and y are updated with an estimate of the real
708 // vertical direction. (skew finding.)
709 // Returns NULL if no decent tabstop can be found.
FindTabVector(int search_size_multiple,TabAlignment alignment,BLOBNBOX * bbox,int * vertical_x,int * vertical_y)710 TabVector* TabFind::FindTabVector(int search_size_multiple,
711                                   TabAlignment alignment,
712                                   BLOBNBOX* bbox,
713                                   int* vertical_x, int* vertical_y) {
714   AlignedBlobParams align_params(*vertical_x, *vertical_y,
715                                  bbox->bounding_box().height(),
716                                  search_size_multiple, resolution_,
717                                  alignment);
718   // FindVerticalAlignment is in the parent (AlignedBlob) class.
719   return FindVerticalAlignment(align_params, bbox, vertical_x, vertical_y);
720 }
721 
722 // Set the vertical_skew_ member from the given vector and refit
723 // all vectors parallel to the skew vector.
SetVerticalSkewAndParellelize(int vertical_x,int vertical_y)724 void TabFind::SetVerticalSkewAndParellelize(int vertical_x, int vertical_y) {
725   // Fit the vertical vector into an ICOORD, which is 16 bit.
726   vertical_skew_.set_with_shrink(vertical_x, vertical_y);
727   if (textord_debug_tabfind)
728     tprintf("Vertical skew vector=(%d,%d)\n",
729             vertical_skew_.x(), vertical_skew_.y());
730   v_it_.set_to_list(&vectors_);
731   for (v_it_.mark_cycle_pt(); !v_it_.cycled_list(); v_it_.forward()) {
732     TabVector* v = v_it_.data();
733     v->Fit(vertical_skew_, true);
734   }
735   // Now sort the vectors as their direction has potentially changed.
736   SortVectors();
737 }
738 
739 // Sort all the current vectors using the given vertical direction vector.
SortVectors()740 void TabFind::SortVectors() {
741   vectors_.sort(TabVector::SortVectorsByKey);
742   v_it_.set_to_list(&vectors_);
743 }
744 
745 // Evaluate all the current tab vectors.
EvaluateTabs()746 void TabFind::EvaluateTabs() {
747   TabVector_IT rule_it(&vectors_);
748   for (rule_it.mark_cycle_pt(); !rule_it.cycled_list(); rule_it.forward()) {
749     TabVector* tab = rule_it.data();
750     if (!tab->IsSeparator()) {
751       tab->Evaluate(vertical_skew_, this);
752       if (tab->BoxCount() < kMinEvaluatedTabs) {
753         if (textord_debug_tabfind > 2)
754           tab->Print("Too few boxes");
755         delete rule_it.extract();
756         v_it_.set_to_list(&vectors_);
757       } else if (WithinTestRegion(3, tab->startpt().x(), tab->startpt().y())) {
758         tab->Print("Evaluated tab");
759       }
760     }
761   }
762 }
763 
764 // Trace textlines from one side to the other of each tab vector, saving
765 // the most frequent column widths found in a list so that a given width
766 // can be tested for being a common width with a simple callback function.
ComputeColumnWidths(ScrollView * tab_win)767 void TabFind::ComputeColumnWidths(ScrollView* tab_win) {
768   // Set the aligned_text_ member of each blob, so text lines traces
769   // get terminated where there is a change from text to image.
770   ComputeBlobGoodness();
771 #ifndef GRAPHICS_DISABLED
772   if (tab_win != NULL)
773     tab_win->Pen(ScrollView::WHITE);
774 #endif
775   // Accumulate column sections into a STATS
776   int col_widths_size = (tright_.x() - bleft_.x()) /kColumnWidthFactor;
777   STATS col_widths(0, col_widths_size + 1);
778   // For every bbox in the tab grid, search for an opposite
779   // to estimate column width and skew..
780   GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> gsearch(tab_grid_);
781   gsearch.StartFullSearch();
782   BLOBNBOX* bbox;
783   while ((bbox = gsearch.NextFullSearch()) != NULL) {
784     ICOORD start_pt, end_pt;
785     if (bbox->left_tab_type() != TT_CONFIRMED &&
786         bbox->right_tab_type() != TT_CONFIRMED)
787       continue;
788     int line_left, line_right;
789     if (TraceTextline(bbox, &start_pt, &end_pt, &line_left, &line_right)) {
790       int left_y = (line_left - start_pt.x()) * (end_pt.y() - start_pt.y()) /
791                (end_pt.x() - start_pt.x()) + start_pt.y();
792       int right_y = (line_right - start_pt.x()) * (end_pt.y() - start_pt.y()) /
793                 (end_pt.x() - start_pt.x()) + start_pt.y();
794       if (start_pt.x() != end_pt.x()) {
795         if (WithinTestRegion(3, start_pt.x(), start_pt.y()))
796           tprintf("Baseline from (%d,%d) to (%d,%d), started at (%d,%d)\n",
797                   line_left, left_y, line_right, right_y,
798                   bbox->bounding_box().left(), bbox->bounding_box().bottom());
799 #ifndef GRAPHICS_DISABLED
800         if (tab_win != NULL)
801           tab_win->Line(line_left, left_y, line_right, right_y);
802 #endif
803         // If line scan was successful, add to STATS of measurements.
804         int width = line_right - line_left;
805         if (width >= kMinColumnWidth) {
806           col_widths.add(width / kColumnWidthFactor, 1);
807         }
808       }
809     }
810   }
811 #ifndef GRAPHICS_DISABLED
812   if (tab_win != NULL) {
813     tab_win->Update();
814   }
815 #endif
816   // Now make a list of column widths.
817   ICOORDELT_IT w_it(&column_widths_);
818   int total_col_count = col_widths.get_total();
819   while (col_widths.get_total() > 0) {
820     int width = col_widths.mode();
821     int col_count = col_widths.pile_count(width);
822     col_widths.add(width, -col_count);
823     // Get the entire peak.
824     for (int left = width - 1; left > 0 &&
825          col_widths.pile_count(left) > 0;
826          --left) {
827       int new_count = col_widths.pile_count(left);
828       col_count += new_count;
829       col_widths.add(left, -new_count);
830     }
831     for (int right = width + 1; right < col_widths_size &&
832          col_widths.pile_count(right) > 0;
833          ++right) {
834       int new_count = col_widths.pile_count(right);
835       col_count += new_count;
836       col_widths.add(right, -new_count);
837     }
838     if (col_count > kMinLinesInColumn &&
839         col_count > kMinFractionalLinesInColumn * total_col_count) {
840       ICOORDELT* w = new ICOORDELT(width, col_count);
841       w_it.add_after_then_move(w);
842       if (textord_debug_tabfind)
843         tprintf("Column of width %d has %d = %.2f%% lines\n",
844               width * kColumnWidthFactor, col_count,
845               100.0 * col_count / total_col_count);
846     }
847   }
848 }
849 
850 // Set the region_type_ member for all the blobs in the grid.
ComputeBlobGoodness()851 void TabFind::ComputeBlobGoodness() {
852   GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> gsearch(this);
853   gsearch.StartFullSearch();
854   BLOBNBOX* bbox;
855   while ((bbox = gsearch.NextFullSearch()) != NULL) {
856     SetBlobRegionType(bbox);
857   }
858 }
859 
860 // Set the region_type_ member of the blob, if not already known.
SetBlobRegionType(BLOBNBOX * blob)861 void TabFind::SetBlobRegionType(BLOBNBOX* blob) {
862   // If already set, just return the result.
863   BlobRegionType blob_type = blob->region_type();
864   if (blob_type != BRT_UNKNOWN)
865     return;
866 
867   // Check for overlapping image blob or other blob already set to image.
868   TBOX box = blob->bounding_box();
869   GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> rectsearch(this);
870   rectsearch.StartRectSearch(box);
871   int rect_overlap = 0;
872   int poly_overlap = 0;
873   int text_overlap = 0;
874   BLOBNBOX* neighbour;
875   while ((neighbour = rectsearch.NextRectSearch()) != NULL) {
876     if (neighbour != blob &&
877         (blob_type = neighbour->region_type()) != BRT_UNKNOWN) {
878       TBOX nbox = neighbour->bounding_box();
879       nbox -= box;  // This is box intersection, not subtraction.
880       int area = nbox.area();
881       if (blob_type == BRT_RECTIMAGE) {
882         rect_overlap += area;
883       } else if (blob_type == BRT_POLYIMAGE) {
884         poly_overlap += area;
885       } else if (blob_type == BRT_TEXT) {
886         text_overlap += area;
887       }
888     }
889   }
890   int area = box.area();
891   rect_overlap -= text_overlap;
892   poly_overlap -= text_overlap;
893   if (rect_overlap >= area || poly_overlap >= area) {
894     blob->set_region_type(BRT_NOISE);  // Make it disappear
895   } else if (rect_overlap > area * kMinImageArea) {
896     blob->set_region_type(BRT_RECTIMAGE);
897   } else if (poly_overlap > area * kMinImageArea) {
898     blob->set_region_type(BRT_POLYIMAGE);
899   } else {
900     // Actually check the text line width.
901     blob->set_region_type(ComputeBlobType(blob));
902   }
903 }
904 
905 // Mark blobs as being in a vertical text line where that is the case.
906 // Returns true if the majority of the image is vertical text lines.
MarkVerticalText()907 void TabFind::MarkVerticalText() {
908   TabVector_IT it(&vectors_);
909   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
910     TabVector* v = it.data();
911     TabVector* partner = v->VerticalTextlinePartner();
912     if (partner != NULL) {
913       TabVector* left = v->IsLeftTab() ? v : partner;
914       TabVector* right = v->IsLeftTab() ? partner : v;
915       // Setup a rectangle search to mark the text as vertical.
916       TBOX box;
917       box.set_left(MIN(left->startpt().x(), left->endpt().x()));
918       box.set_right(MAX(right->startpt().x(), right->endpt().x()));
919       box.set_bottom(MIN(left->startpt().y(), right->startpt().y()));
920       box.set_top(MAX(left->endpt().y(), right->endpt().y()));
921 
922       GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> rsearch(this);
923       rsearch.StartRectSearch(box);
924       BLOBNBOX* blob = NULL;
925       while ((blob = rsearch.NextRectSearch()) != NULL) {
926         if (blob->region_type() < BRT_UNKNOWN)
927           continue;
928         const TBOX& blob_box = blob->bounding_box();
929         if ((LeftTabForBox(blob_box, false, false) == left ||
930              LeftTabForBox(blob_box, true, false) == left) &&
931             (RightTabForBox(blob_box, false, false) == right ||
932              RightTabForBox(blob_box, true, false) == right)) {
933           blob->set_region_type(BRT_VERT_TEXT);
934         }
935       }
936     }
937   }
938 }
939 
940 // Returns true if the majority of the image is vertical text lines.
TextMostlyVertical()941 bool TabFind::TextMostlyVertical() {
942   int vertical_boxes = 0;
943   int horizontal_boxes = 0;
944   // Count vertical bboxes in the grid.
945   GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> gsearch(this);
946   gsearch.StartFullSearch();
947   BLOBNBOX* bbox;
948   while ((bbox = gsearch.NextFullSearch()) != NULL) {
949     if (bbox->region_type() == BRT_VERT_TEXT)
950       ++vertical_boxes;
951     else
952       ++horizontal_boxes;
953   }
954   return vertical_boxes > horizontal_boxes;
955 }
956 
957 // If this box looks like it is on a textline in the given direction,
958 // return the width of the textline-like group of blobs, and the number
959 // of blobs found.
960 // For more detail see FindTextlineSegment below.
FindTextlineWidth(bool right_to_left,BLOBNBOX * bbox,int * blob_count)961 int TabFind::FindTextlineWidth(bool right_to_left, BLOBNBOX* bbox,
962                                int* blob_count) {
963   ICOORD start_pt, end_pt;
964   BLOBNBOX* left_blob;
965   BLOBNBOX* right_blob;
966   return FindTextlineSegment(right_to_left, true, bbox, blob_count,
967                              &start_pt, &end_pt, NULL, NULL,
968                              &left_blob, &right_blob);
969 }
970 
971 // Search from the given tabstop bbox to the next opposite
972 // tabstop bbox on the same text line, which may be itself.
973 // Returns true if the search is successful, and sets
974 // start_pt, end_pt to the fitted baseline, width to the measured
975 // width of the text line (column width estimate.)
TraceTextline(BLOBNBOX * bbox,ICOORD * start_pt,ICOORD * end_pt,int * left_edge,int * right_edge)976 bool TabFind::TraceTextline(BLOBNBOX* bbox, ICOORD* start_pt, ICOORD* end_pt,
977                             int* left_edge, int* right_edge) {
978   bool right_to_left = bbox->left_tab_type() != TT_CONFIRMED;
979   const TBOX& box = bbox->bounding_box();
980   TabVector* left_vector = NULL;
981   TabVector* right_vector = NULL;
982   if (right_to_left) {
983     right_vector = RightTabForBox(box, true, false);
984     if (right_vector == NULL || right_vector->IsLeftTab())
985       return false;
986   } else {
987     left_vector = LeftTabForBox(box, true, false);
988     if (left_vector == NULL || left_vector->IsRightTab())
989       return false;
990   }
991 
992   BLOBNBOX* left_blob;
993   BLOBNBOX* right_blob;
994   int blob_count;
995   if (FindTextlineSegment(right_to_left, false, bbox, &blob_count,
996                           start_pt, end_pt, &left_vector, &right_vector,
997                           &left_blob, &right_blob)) {
998     AddPartnerVector(left_blob, right_blob, left_vector, right_vector);
999     *left_edge = left_vector->XAtY(left_blob->bounding_box().bottom());
1000     *right_edge = right_vector->XAtY(right_blob->bounding_box().bottom());
1001     return true;
1002   }
1003   return false;
1004 }
1005 
1006 // Search from the given bbox in the given direction until the next tab
1007 // vector is found or a significant horizontal gap is found.
1008 // Returns the width of the line if the search is successful, (defined
1009 // as good coverage of the width and a good fitting baseline) and sets
1010 // start_pt, end_pt to the fitted baseline, left_blob, right_blob to
1011 // the ends of the line. Returns zero otherwise.
1012 // Sets blob_count to the number of blobs found on the line.
1013 // On input, either both left_vector and right_vector should be NULL,
1014 // indicating a basic search, or both left_vector and right_vector should
1015 // be not NULL and one of *left_vector and *right_vector should be not NULL,
1016 // in which case the search is strictly between tab vectors and will return
1017 // zero if a gap is found before the opposite tab vector is reached, or a
1018 // conflicting tab vector is found.
1019 // If ignore_images is true, then blobs with aligned_text() < 0 are treated
1020 // as if they do not exist.
FindTextlineSegment(bool right_to_left,bool ignore_images,BLOBNBOX * bbox,int * blob_count,ICOORD * start_pt,ICOORD * end_pt,TabVector ** left_vector,TabVector ** right_vector,BLOBNBOX ** left_blob,BLOBNBOX ** right_blob)1021 int TabFind::FindTextlineSegment(bool right_to_left, bool ignore_images,
1022                                  BLOBNBOX* bbox, int* blob_count,
1023                                  ICOORD* start_pt, ICOORD* end_pt,
1024                                  TabVector** left_vector,
1025                                  TabVector** right_vector,
1026                                  BLOBNBOX** left_blob, BLOBNBOX** right_blob) {
1027   // Bounding box of the current blob.
1028   TBOX box = bbox->bounding_box();
1029   // The estimates of top and bottom of the current line move in an
1030   // alpha-smoothed manner, but in lock-step.
1031   int top_y = box.top();
1032   int bottom_y = box.bottom();
1033   // Left and right of the current blob.
1034   int left = box.left();
1035   int right = box.right();
1036   // Returning the leftmost and rightmost blob used.
1037   *left_blob = bbox;
1038   *right_blob = bbox;
1039   // Coverage measurement as goodness metric.
1040   int coverage = 0;
1041   // Approximation of the baseline.
1042   DetLineFit linepoints;
1043   // Calculation of the mean height on this line segment.
1044   double total_height = 0.0;
1045   int height_count = 0;
1046   // Starter point for the approximation.
1047   ICOORD first_pt(right_to_left ? right : left, box.bottom());
1048   linepoints.Add(first_pt);
1049   // Last point for the approximation.
1050   ICOORD last_pt = first_pt;
1051   // End coordinate that we must not pass.
1052   int end_coord = right_to_left ? bleft_.x() : tright_.x();
1053   *blob_count = 0;
1054 
1055   // Maximum gap allowed before abandoning the search for the other edge.
1056   int gap_limit = static_cast<int>(kMaxHorizontalGap * box.height());
1057   if (WithinTestRegion(3, first_pt.x(), first_pt.y())) {
1058     tprintf("Starting %s line search at (%d, %d-%d)\n",
1059             right_to_left ? "RTL" : "LTR",
1060             left, bottom_y, top_y);
1061   }
1062   while (bbox != NULL) {
1063     int mid_x = (left + right) / 2;
1064     if (right_to_left) {
1065       TabVector* v = LeftTabForBox(box, true, false);
1066       if ((right_vector != NULL && v == *right_vector) ||
1067           (v != NULL && bbox == *right_blob && v->IsRightTab()))
1068         v = LeftTabForBox(box, false, false);
1069       if (right <= end_coord) {
1070         if (WithinTestRegion(3, first_pt.x(), first_pt.y()))
1071           v->Print("Passed through end vector");
1072         break;
1073       }
1074       // No break, so this is a good box.
1075       linepoints.Add(ICOORD(mid_x, box.bottom()));
1076       coverage += box.width();
1077       total_height += box.height();
1078       ++height_count;
1079       // In case this is the last one...
1080       *left_blob = bbox;
1081       last_pt.set_x(left);
1082       last_pt.set_y(box.bottom());
1083       if (v != NULL && (right_vector == NULL || v != *right_vector) &&
1084           (bbox != *right_blob || v->IsLeftTab())) {
1085         // The vector is not the starting vector. See if it is within range.
1086         int x_at_y = v->XAtY(bottom_y);
1087         if (x_at_y > left - gap_limit) {
1088           // Once we cross the end_coord, the search will stop.
1089           if (x_at_y > end_coord)
1090             end_coord = x_at_y;
1091           // If this vector is not the correct polarity, then strict searches
1092           // will fail.
1093           if (v->IsLeftTab()) {
1094             if (WithinTestRegion(3, first_pt.x(), first_pt.y()))
1095               v->Print("Hit End Vector!");
1096             if (left_vector != NULL)
1097               *left_vector = v;
1098           } else {
1099             if (WithinTestRegion(3, first_pt.x(), first_pt.y()))
1100               v->Print("Collided with like tab vector");
1101           }
1102         }
1103       }
1104       if (bbox->left_tab_type() == TT_CONFIRMED) {
1105         if (WithinTestRegion(3, first_pt.x(), first_pt.y()))
1106           tprintf("Hit another tab pt\n");
1107         break;
1108       }
1109     } else {
1110       TabVector* v = RightTabForBox(box, true, false);
1111       if ((left_vector != NULL && v == *left_vector) ||
1112           (v != NULL && bbox == *left_blob && v->IsLeftTab()))
1113         v = RightTabForBox(box, false, false);
1114       if (left >= end_coord) {
1115         if (WithinTestRegion(3, first_pt.x(), first_pt.y())) {
1116           tprintf("left=%d, end_coord=%d\n", left, end_coord);
1117           v->Print("Passed through end vector");
1118         }
1119         break;
1120       }
1121       // No break, so this is a good box.
1122       linepoints.Add(ICOORD(mid_x, box.bottom()));
1123       coverage += box.width();
1124       total_height += box.height();
1125       ++height_count;
1126       // In case this is the last one...
1127       *right_blob = bbox;
1128       last_pt.set_x(right);
1129       last_pt.set_y(box.bottom());
1130       if (v != NULL && (left_vector == NULL || v != *left_vector) &&
1131           (bbox != *left_blob || v->IsRightTab())) {
1132         // The vector is not the starting vector. See if it is within range.
1133         int x_at_y = v->XAtY(bottom_y);
1134         if (x_at_y < right + gap_limit) {
1135           // Once we cross the end_coord, the search will stop.
1136           if (x_at_y < end_coord)
1137             end_coord = x_at_y;
1138           // If this vector is not the correct polarity, then strict searches
1139           // will fail.
1140           if (v->IsRightTab()) {
1141             if (WithinTestRegion(3, first_pt.x(), first_pt.y()))
1142               v->Print("Hit End Vector!");
1143             if (right_vector != NULL)
1144               *right_vector = v;
1145           } else {
1146             if (WithinTestRegion(3, first_pt.x(), first_pt.y()))
1147               v->Print("Collided with like tab vector");
1148           }
1149         }
1150       }
1151       if (bbox->right_tab_type() == TT_CONFIRMED) {
1152         if (WithinTestRegion(3, first_pt.x(), first_pt.y()))
1153           tprintf("Hit another tab pt\n");
1154         break;
1155       }
1156     }
1157     // Force the top and bottom to stay the same distance apart by
1158     // computing the mean alpha smoothing factor of the top and bottom.
1159     double top_delta = (box.top() - top_y) * kSmoothFactor;
1160     double bottom_delta = (box.bottom() - bottom_y) * kSmoothFactor;
1161     int mean_delta = static_cast<int>((top_delta + bottom_delta) / 2.0);
1162     top_y += mean_delta;
1163     bottom_y += mean_delta;
1164     bbox = AdjacentBlob(bbox, right_to_left, ignore_images,
1165                         gap_limit, top_y, bottom_y);
1166     if (bbox != NULL && bbox->region_type() < BRT_UNKNOWN) {
1167       if (WithinTestRegion(3, first_pt.x(), first_pt.y()))
1168         tprintf("Next box is image region\n");
1169       bbox = NULL;
1170     }
1171     if (bbox != NULL) {
1172       box = bbox->bounding_box();
1173       left = box.left();
1174       right = box.right();
1175       if (WithinTestRegion(3, first_pt.x(), first_pt.y()))
1176         tprintf("Next box is at %d, %d\n", left, box.bottom());
1177     }
1178   }
1179   // Either none or both vectors should be NULL.
1180   if (height_count > 0 &&
1181       (left_vector == NULL || *left_vector == NULL) ==
1182       (right_vector == NULL || *right_vector == NULL)) {
1183     linepoints.Add(last_pt);
1184     // Maximum median error allowed to be a good text line.
1185     double max_error = kMaxBaselineError * total_height / height_count;
1186     double error = linepoints.Fit(start_pt, end_pt);
1187     int width = (*right_blob)->bounding_box().right()
1188               - (*left_blob)->bounding_box().left();
1189     bool good_fit = error < max_error && end_pt->x() != start_pt->x() &&
1190                     coverage >= kMinBaselineCoverage * width;
1191     if (WithinTestRegion(3, first_pt.x(), first_pt.y())) {
1192       tprintf("Found end! good=%d, error=%g/%g, coverage=%g%%"
1193               " on line (%d, %d) to (%d,%d)\n",
1194               good_fit, error, max_error, 100.0 * coverage / width,
1195               start_pt->x(), start_pt->y(), end_pt->x(), end_pt->y());
1196       tprintf("width=%d, L/R blob=%d/%d, first/last=%d/%d, start/end=%d/%d\n",
1197               width, (*left_blob)->bounding_box().left(),
1198               (*right_blob)->bounding_box().right(),
1199               first_pt.x(), last_pt.x(), start_pt->x(), end_pt->x());
1200     }
1201     *blob_count = height_count;
1202     return good_fit ? width : 0;
1203   }
1204   return 0;
1205 }
1206 
1207 // Find the next adjacent (to left or right) blob on this text line,
1208 // with the constraint that it must vertically significantly overlap
1209 // the [top_y, bottom_y] range.
1210 // If ignore_images is true, then blobs with aligned_text() < 0 are treated
1211 // as if they do not exist.
AdjacentBlob(const BLOBNBOX * bbox,bool right_to_left,bool ignore_images,int gap_limit,int top_y,int bottom_y)1212 BLOBNBOX* TabFind::AdjacentBlob(const BLOBNBOX* bbox,
1213                                 bool right_to_left, bool ignore_images,
1214                                 int gap_limit, int top_y, int bottom_y) {
1215   GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> sidesearch(this);
1216   const TBOX& box = bbox->bounding_box();
1217   int left = box.left();
1218   int right = box.right();
1219   int mid_x = (left + right) / 2;
1220   sidesearch.StartSideSearch(mid_x, bottom_y, top_y);
1221   int best_gap = 0;
1222   BLOBNBOX* result = NULL;
1223   BLOBNBOX* neighbour = NULL;
1224   while ((neighbour = sidesearch.NextSideSearch(right_to_left)) != NULL) {
1225     if (neighbour == bbox ||
1226         (ignore_images && neighbour->region_type() < BRT_UNKNOWN))
1227       continue;
1228     const TBOX& nbox = neighbour->bounding_box();
1229     int n_top_y = nbox.top();
1230     int n_bottom_y = nbox.bottom();
1231     int v_overlap = MIN(n_top_y, top_y) - MAX(n_bottom_y, bottom_y);
1232     int height = top_y - bottom_y;
1233     int n_height = n_top_y - n_bottom_y;
1234     if (v_overlap > kCharVerticalOverlapFraction * MIN(height, n_height) &&
1235         !DifferentSizes(height, n_height)) {
1236       int n_left = nbox.left();
1237       int n_right = nbox.right();
1238       int h_gap = MAX(n_left, left) - MIN(n_right, right);
1239       int n_mid_x = (n_left + n_right) / 2;
1240       if (right_to_left == (n_mid_x < mid_x) && n_mid_x != mid_x) {
1241         if (h_gap > gap_limit) {
1242           // Hit a big gap before next tab so don't return anything.
1243           if (WithinTestRegion(3, left, n_top_y))
1244             tprintf("Giving up due to big gap = %d vs %d\n",
1245                     h_gap, gap_limit);
1246           return result;
1247         }
1248         if ((right_to_left ? neighbour->right_tab_type()
1249                            : neighbour->left_tab_type()) >= TT_FAKE) {
1250           // Hit a tab facing the wrong way. Stop in case we are crossing
1251           // the column boundary.
1252           if (WithinTestRegion(3, left, n_top_y))
1253             tprintf("Collision with like tab of type %d at %d,%d\n",
1254                     right_to_left ? neighbour->right_tab_type()
1255                                   : neighbour->left_tab_type(),
1256                     n_left, nbox.bottom());
1257           return result;
1258         }
1259         // This is a good fit to the line. Continue with this
1260         // neighbour as the bbox if the best gap.
1261         if (result == NULL || h_gap < best_gap) {
1262           if (WithinTestRegion(3, left, n_top_y))
1263             tprintf("Good result\n");
1264           result = neighbour;
1265           best_gap = h_gap;
1266         } else {
1267           // The new one is worse, so we probably already have the best result.
1268           return result;
1269         }
1270       }
1271     }
1272   }
1273   if (WithinTestRegion(3, left, box.top()))
1274     tprintf("Giving up due to end of search\n");
1275   return result;  // Hit the edge and found nothing.
1276 }
1277 
1278 // Add a bi-directional partner relationship between the left
1279 // and the right. If one (or both) of the vectors is a separator,
1280 // extend a nearby extendable vector or create a new one of the
1281 // correct type, using the given left or right blob as a guide.
AddPartnerVector(BLOBNBOX * left_blob,BLOBNBOX * right_blob,TabVector * left,TabVector * right)1282 void TabFind::AddPartnerVector(BLOBNBOX* left_blob, BLOBNBOX* right_blob,
1283                                TabVector* left, TabVector* right) {
1284   const TBOX& left_box = left_blob->bounding_box();
1285   const TBOX& right_box = right_blob->bounding_box();
1286   if (left->IsSeparator()) {
1287     // Try to find a nearby left edge to extend.
1288     TabVector* v = LeftTabForBox(left_box, true, true);
1289     if (v != NULL && v != left && v->IsLeftTab() &&
1290         v->XAtY(left_box.top()) > left->XAtY(left_box.top())) {
1291       left = v;  // Found a good replacement.
1292       left->ExtendToBox(left_blob);
1293     } else {
1294       // Fake a vector.
1295       left = new TabVector(*left, TA_LEFT_RAGGED, vertical_skew_, left_blob);
1296       vectors_.add_sorted(TabVector::SortVectorsByKey, left);
1297       v_it_.move_to_first();
1298     }
1299   }
1300   if (right->IsSeparator()) {
1301     // Try to find a nearby left edge to extend.
1302     if (WithinTestRegion(3, right_box.right(), right_box.bottom())) {
1303       tprintf("Box edge (%d,%d-%d)",
1304               right_box.right(), right_box.bottom(), right_box.top());
1305       right->Print(" looking for improvement for");
1306     }
1307     TabVector* v = RightTabForBox(right_box, true, true);
1308     if (v != NULL && v != right && v->IsRightTab() &&
1309         v->XAtY(right_box.top()) < right->XAtY(right_box.top())) {
1310       right = v;  // Found a good replacement.
1311       right->ExtendToBox(right_blob);
1312       if (WithinTestRegion(3, right_box.right(), right_box.bottom())) {
1313         right->Print("Extended vector");
1314       }
1315     } else {
1316       // Fake a vector.
1317       right = new TabVector(*right, TA_RIGHT_RAGGED, vertical_skew_,
1318                             right_blob);
1319       vectors_.add_sorted(TabVector::SortVectorsByKey, right);
1320       v_it_.move_to_first();
1321       if (WithinTestRegion(3, right_box.right(), right_box.bottom())) {
1322         right->Print("Created new vector");
1323       }
1324     }
1325   }
1326   left->AddPartner(right);
1327   right->AddPartner(left);
1328 }
1329 
1330 // Remove separators and unused tabs from the main vectors_ list
1331 // to the dead_vectors_ list.
CleanupTabs()1332 void TabFind::CleanupTabs() {
1333   // TODO(rays) Before getting rid of separators and unused vectors, it
1334   // would be useful to try moving ragged vectors outwards to see if this
1335   // allows useful extension. Could be combined with checking ends of partners.
1336   TabVector_IT it(&vectors_);
1337   TabVector_IT dead_it(&dead_vectors_);
1338   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1339     TabVector* v = it.data();
1340     if (v->IsSeparator() || v->Partnerless()) {
1341       dead_it.add_after_then_move(it.extract());
1342       v_it_.set_to_list(&vectors_);
1343     } else {
1344       v->FitAndEvaluateIfNeeded(vertical_skew_, this);
1345     }
1346   }
1347 }
1348 
RotateBlobList(const FCOORD & rotation,BLOBNBOX_LIST * blobs)1349 static void RotateBlobList(const FCOORD& rotation, BLOBNBOX_LIST* blobs) {
1350   BLOBNBOX_IT it(blobs);
1351   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1352     it.data()->rotate_box(rotation);
1353   }
1354 }
1355 
1356 // Recreate the grid with deskewed BLOBNBOXes.
Deskew(TabVector_LIST * hlines,BLOBNBOX_LIST * image_blobs,TO_BLOCK * block,FCOORD * reskew)1357 void TabFind::Deskew(TabVector_LIST* hlines, BLOBNBOX_LIST* image_blobs,
1358                      TO_BLOCK* block, FCOORD* reskew) {
1359   FCOORD deskew;
1360   ComputeDeskewVectors(&deskew, reskew);
1361   RotateBlobList(deskew, image_blobs);
1362   RotateBlobList(deskew, &block->blobs);
1363   RotateBlobList(deskew, &block->small_blobs);
1364   RotateBlobList(deskew, &block->noise_blobs);
1365 #ifdef HAVE_LIBLEPT
1366   if (textord_debug_images) {
1367     // Rotate the debug pix and arrange for it to be drawn at the correct
1368     // pixel offset.
1369     Pix* pix_grey = pixRead(AlignedBlob::textord_debug_pix().string());
1370     int width = pixGetWidth(pix_grey);
1371     int height = pixGetHeight(pix_grey);
1372     float angle = atan2(deskew.y(), deskew.x());
1373     // Positive angle is clockwise to pixRotate.
1374     Pix* pix_rot = pixRotate(pix_grey, -angle, L_ROTATE_AREA_MAP,
1375                              L_BRING_IN_WHITE, width, height);
1376     // The image must be translated by the rotation of its center, since it
1377     // has just been rotated about its center.
1378     ICOORD center_offset(width / 2, height / 2);
1379     ICOORD new_center_offset(center_offset);
1380     new_center_offset.rotate(deskew);
1381     image_origin_ += new_center_offset - center_offset;
1382     // The image grew as it was rotated, so offset the (top/left) origin
1383     // by half the change in size. y is opposite to x because it is drawn
1384     // at ist top/left, not bottom/left.
1385     ICOORD corner_offset((width - pixGetWidth(pix_rot)) / 2,
1386                          (pixGetHeight(pix_rot) - height) / 2);
1387     image_origin_ += corner_offset;
1388     pixWrite(AlignedBlob::textord_debug_pix().string(), pix_rot, IFF_PNG);
1389     pixDestroy(&pix_grey);
1390     pixDestroy(&pix_rot);
1391   }
1392 #endif  // HAVE_LIBLEPT
1393 
1394   // Rotate the horizontal vectors. The vertical vectors don't need
1395   // rotating as they can just be refitted.
1396   TabVector_IT h_it(hlines);
1397   for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) {
1398     TabVector* h = h_it.data();
1399     h->Rotate(deskew);
1400   }
1401   SetVerticalSkewAndParellelize(0, 1);
1402   // Rebuild the grid to the new size.
1403   TBOX grid_box(bleft_, tright_);
1404   grid_box.rotate_large(deskew);
1405   Init(gridsize(), grid_box.botleft(), grid_box.topright());
1406   InsertBlobList(false, false, false, image_blobs, false, this);
1407   InsertBlobList(true, false, false, &block->blobs, false, this);
1408 }
1409 
ResetBlobList(BLOBNBOX_LIST * blobs)1410 static void ResetBlobList(BLOBNBOX_LIST* blobs) {
1411   BLOBNBOX_IT it(blobs);
1412   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1413     BLOBNBOX* blob = it.data();
1414     blob->set_left_tab_type(TT_NONE);
1415     blob->set_right_tab_type(TT_NONE);
1416     blob->set_region_type(BRT_UNKNOWN);
1417   }
1418 }
1419 
1420 // Restart everything and rotate the input blobs ready for vertical text.
ResetForVerticalText(TabVector_LIST * hlines,BLOBNBOX_LIST * image_blobs,TO_BLOCK * block,FCOORD * rerotate)1421 void TabFind::ResetForVerticalText(TabVector_LIST* hlines,
1422                                    BLOBNBOX_LIST* image_blobs,
1423                                    TO_BLOCK* block, FCOORD* rerotate) {
1424   // Rotate anti-clockwise, so vertical CJK text is still in reading order.
1425   FCOORD derotate(0.0f, 1.0f);
1426   *rerotate = FCOORD(0.0f, -1.0f);
1427   RotateBlobList(derotate, image_blobs);
1428   RotateBlobList(derotate, &block->blobs);
1429   RotateBlobList(derotate, &block->small_blobs);
1430   RotateBlobList(derotate, &block->noise_blobs);
1431   ResetBlobList(&block->blobs);
1432 
1433   // Rotate the horizontal and vertical vectors and swap them over.
1434   // Only the separators are kept, and existing tabs are deleted.
1435   // Note that to retain correct relative orientation, vertical and
1436   // horizontal lines must be rotated in opposite directions!
1437   TabVector_LIST ex_verticals;
1438   TabVector_IT ex_v_it(&ex_verticals);
1439   while (!v_it_.empty()) {
1440     TabVector* v = v_it_.extract();
1441     if (v->IsSeparator()) {
1442       v->Rotate(*rerotate);
1443       ex_v_it.add_after_then_move(v);
1444     } else {
1445       delete v;
1446     }
1447     v_it_.forward();
1448   }
1449   TabVector_IT h_it(hlines);
1450   for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) {
1451     TabVector* h = h_it.data();
1452     h->Rotate(derotate);
1453   }
1454   v_it_.add_list_after(hlines);
1455   v_it_.move_to_first();
1456   h_it.set_to_list(hlines);
1457   h_it.add_list_after(&ex_verticals);
1458 
1459   // Rebuild the grid to the new size.
1460   TBOX grid_box(bleft_, tright_);
1461   grid_box.rotate_large(derotate);
1462   Init(gridsize(), grid_box.botleft(), grid_box.topright());
1463   column_widths_.clear();
1464 }
1465 
1466 // Compute the rotation required to deskew, and its inverse rotation.
ComputeDeskewVectors(FCOORD * deskew,FCOORD * reskew)1467 void TabFind::ComputeDeskewVectors(FCOORD* deskew, FCOORD* reskew) {
1468   double length = vertical_skew_ % vertical_skew_;
1469   length = sqrt(length);
1470   deskew->set_x(vertical_skew_.y() / length);
1471   deskew->set_y(vertical_skew_.x() / length);
1472   reskew->set_x(deskew->x());
1473   reskew->set_y(-deskew->y());
1474 }
1475 
1476 // Compute and apply constraints to the end positions of TabVectors so
1477 // that where possible partners end at the same y coordinate.
ApplyTabConstraints()1478 void TabFind::ApplyTabConstraints() {
1479   TabVector_IT it(&vectors_);
1480   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1481     TabVector* v = it.data();
1482     v->SetupConstraints();
1483   }
1484   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1485     TabVector* v = it.data();
1486     // With the first and last partner, we want a common bottom and top,
1487     // respectively, and for each change of partner, we want a common
1488     // top of first with bottom of next.
1489     v->SetupPartnerConstraints();
1490   }
1491   // TODO(rays) The back-to-back pairs should really be done like the
1492   // front-to-front pairs, but there is no convenient way of producing the
1493   // list of partners like there is with the front-to-front.
1494   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1495     TabVector* v = it.data();
1496     if (!v->IsRightTab())
1497       continue;
1498     // For each back-to-back pair of vectors, try for common top and bottom.
1499     TabVector_IT partner_it(it);
1500     for (partner_it.forward(); !partner_it.at_first(); partner_it.forward()) {
1501       TabVector* partner = partner_it.data();
1502       if (!partner->IsLeftTab() || !v->VOverlap(*partner))
1503         continue;
1504       v->SetupPartnerConstraints(partner);
1505     }
1506   }
1507   // Now actually apply the constraints to get common start/end points.
1508   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1509     TabVector* v = it.data();
1510     if (!v->IsSeparator())
1511       v->ApplyConstraints();
1512   }
1513   // TODO(rays) Where constraint application fails, it would be good to try
1514   // checking the ends to see if they really should be moved.
1515 }
1516 
1517 }  // namespace tesseract.
1518 
1519