• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 ///////////////////////////////////////////////////////////////////////
2 // File:        tabvector.cpp
3 // Description: Class to hold a near-vertical vector representing a tab-stop.
4 // Author:      Ray Smith
5 // Created:     Thu Apr 10 16:28:01 PST 2008
6 //
7 // (C) Copyright 2008, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
18 ///////////////////////////////////////////////////////////////////////
19 
20 #include "tabvector.h"
21 #include "blobbox.h"
22 #include "colfind.h"
23 #include "colpartitionset.h"
24 #include "detlinefit.h"
25 
26 namespace tesseract {
27 
28 // Multiple of height used as a gutter for evaluation search.
29 const int kGutterMultiple = 4;
30 // Multiple of neighbour gap that we expect the gutter gap to be at minimum.
31 const int kGutterToNeighbourRatio = 3;
32 // Pixel distance for tab vectors to be considered the same.
33 const int kSimilarVectorDist = 10;
34 // Pixel distance for ragged tab vectors to be considered the same if there
35 // is nothing in the overlap box
36 const int kSimilarRaggedDist = 50;
37 // Max multiple of height to allow filling in between blobs when evaluating.
38 const int kMaxFillinMultiple = 11;
39 // Min fraction of mean gutter size to allow a gutter on a good tab blob.
40 const double kMinGutterFraction = 0.5;
41 // Max fraction of mean blob width allowed for vertical gaps in vertical text.
42 const double kVerticalTextGapFraction = 0.5;
43 
ELISTIZE(TabConstraint)44 ELISTIZE(TabConstraint)
45 
46 // Create a constraint for the top or bottom of this TabVector.
47 void TabConstraint::CreateConstraint(TabVector* vector, bool is_top) {
48   TabConstraint* constraint = new TabConstraint(vector, is_top);
49   TabConstraint_LIST* constraints = new TabConstraint_LIST;
50   TabConstraint_IT it(constraints);
51   it.add_to_end(constraint);
52   if (is_top)
53     vector->set_top_constraints(constraints);
54   else
55     vector->set_bottom_constraints(constraints);
56 }
57 
58 // Test to see if the constraints are compatible enough to merge.
CompatibleConstraints(TabConstraint_LIST * list1,TabConstraint_LIST * list2)59 bool TabConstraint::CompatibleConstraints(TabConstraint_LIST* list1,
60                                           TabConstraint_LIST* list2) {
61   if (list1 == list2)
62     return false;
63   int y_min = -MAX_INT32;
64   int y_max = MAX_INT32;
65   if (textord_debug_tabfind > 3)
66     tprintf("Testing constraint compatibility\n");
67   GetConstraints(list1, &y_min, &y_max);
68   GetConstraints(list2, &y_min, &y_max);
69   if (textord_debug_tabfind > 3)
70     tprintf("Resulting range = [%d,%d]\n", y_min, y_max);
71   return y_max >= y_min;
72 }
73 
74 // Merge the lists of constraints and update the TabVector pointers.
75 // The second list is deleted.
MergeConstraints(TabConstraint_LIST * list1,TabConstraint_LIST * list2)76 void TabConstraint::MergeConstraints(TabConstraint_LIST* list1,
77                                      TabConstraint_LIST* list2) {
78   if (list1 == list2)
79     return;
80   TabConstraint_IT it(list2);
81   if (textord_debug_tabfind > 3)
82     tprintf("Merging constraints\n");
83   // The vectors of all constraints on list2 are now going to be on list1.
84   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
85     TabConstraint* constraint = it.data();
86     if (textord_debug_tabfind> 3)
87       constraint->vector_->Print("Merge");
88     if (constraint->is_top_)
89       constraint->vector_->set_top_constraints(list1);
90     else
91       constraint->vector_->set_bottom_constraints(list1);
92   }
93   it = list1;
94   it.add_list_before(list2);
95   delete list2;
96 }
97 
98 // Set all the tops and bottoms as appropriate to a mean of the
99 // constrained range. Delete all the constraints and list.
ApplyConstraints(TabConstraint_LIST * constraints)100 void TabConstraint::ApplyConstraints(TabConstraint_LIST* constraints) {
101   int y_min = -MAX_INT32;
102   int y_max = MAX_INT32;
103   GetConstraints(constraints, &y_min, &y_max);
104   int y = (y_min + y_max) / 2;
105   TabConstraint_IT it(constraints);
106   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
107     TabConstraint* constraint = it.data();
108     TabVector* v = constraint->vector_;
109     if (constraint->is_top_) {
110       v->SetYEnd(y);
111       v->set_top_constraints(NULL);
112     } else {
113       v->SetYStart(y);
114       v->set_bottom_constraints(NULL);
115     }
116   }
117   delete constraints;
118 }
119 
TabConstraint(TabVector * vector,bool is_top)120 TabConstraint::TabConstraint(TabVector* vector, bool is_top)
121   : vector_(vector), is_top_(is_top) {
122   if (is_top) {
123     y_min_ = vector->endpt().y();
124     y_max_ = vector->extended_ymax();
125   } else {
126     y_max_ = vector->startpt().y();
127     y_min_ = vector->extended_ymin();
128   }
129 }
130 
131 // Get the max of the mins and the min of the maxes.
GetConstraints(TabConstraint_LIST * constraints,int * y_min,int * y_max)132 void TabConstraint::GetConstraints(TabConstraint_LIST* constraints,
133                                    int* y_min, int* y_max) {
134   TabConstraint_IT it(constraints);
135   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
136     TabConstraint* constraint = it.data();
137     if (textord_debug_tabfind > 3) {
138       tprintf("Constraint is [%d,%d]", constraint->y_min_, constraint->y_max_);
139       constraint->vector_->Print(" for");
140     }
141     *y_min = MAX(*y_min, constraint->y_min_);
142     *y_max = MIN(*y_max, constraint->y_max_);
143   }
144 }
145 
146 ELIST2IZE(TabVector)
CLISTIZE(TabVector)147 CLISTIZE(TabVector)
148 
149 // The constructor is private. See the bottom of the file...
150 
151 TabVector::~TabVector() {
152 }
153 
154 
155 // Public factory to build a TabVector from a list of boxes.
156 // The TabVector will be of the given alignment type.
157 // The input vertical vector is used in fitting, and the output
158 // vertical_x, vertical_y have the resulting line vector added to them
159 // if the alignment is not ragged.
160 // The extended_start_y and extended_end_y are the maximum possible
161 // extension to the line segment that can be used to align with others.
162 // The input CLIST of BLOBNBOX good_points is consumed and taken over.
FitVector(TabAlignment alignment,ICOORD vertical,int extended_start_y,int extended_end_y,BLOBNBOX_CLIST * good_points,int * vertical_x,int * vertical_y)163 TabVector* TabVector::FitVector(TabAlignment alignment, ICOORD vertical,
164                                 int  extended_start_y, int extended_end_y,
165                                 BLOBNBOX_CLIST* good_points,
166                                 int* vertical_x, int* vertical_y) {
167   TabVector* vector = new TabVector(extended_start_y, extended_end_y,
168                                     alignment, good_points);
169   if (!vector->Fit(vertical, false)) {
170     delete vector;
171     return NULL;
172   }
173   if (!vector->IsRagged()) {
174     vertical = vector->endpt_ - vector->startpt_;
175     int weight = vector->BoxCount();
176     *vertical_x += vertical.x() * weight;
177     *vertical_y += vertical.y() * weight;
178   }
179   return vector;
180 }
181 
182 // Build a ragged TabVector by copying another's direction, shifting it
183 // to match the given blob, and making its initial extent the height
184 // of the blob, but its extended bounds from the bounds of the original.
TabVector(const TabVector & src,TabAlignment alignment,const ICOORD & vertical_skew,BLOBNBOX * blob)185 TabVector::TabVector(const TabVector& src, TabAlignment alignment,
186                      const ICOORD& vertical_skew, BLOBNBOX* blob)
187   : extended_ymin_(src.extended_ymin_), extended_ymax_(src.extended_ymax_),
188     sort_key_(0), percent_score_(0),
189     needs_refit_(true), needs_evaluation_(true), alignment_(alignment),
190     top_constraints_(NULL), bottom_constraints_(NULL) {
191   BLOBNBOX_C_IT it(&boxes_);
192   it.add_to_end(blob);
193   TBOX box = blob->bounding_box();
194   if (IsLeftTab()) {
195     startpt_ = box.botleft();
196     endpt_ = box.topleft();
197   } else {
198     startpt_ = box.botright();
199     endpt_ = box.topright();
200   }
201   sort_key_ = SortKey(vertical_skew,
202                       (startpt_.x() + endpt_.x()) / 2,
203                       (startpt_.y() + endpt_.y()) / 2);
204   if (textord_debug_tabfind > 3)
205     Print("Constructed a new tab vector:");
206 }
207 
208 // Extend this vector to include the supplied blob if it doesn't
209 // already have it.
ExtendToBox(BLOBNBOX * new_blob)210 void TabVector::ExtendToBox(BLOBNBOX* new_blob) {
211   TBOX new_box = new_blob->bounding_box();
212   BLOBNBOX_C_IT it(&boxes_);
213   if (!it.empty()) {
214     BLOBNBOX* blob = it.data();
215     TBOX box = blob->bounding_box();
216     while (!it.at_last() && box.top() <= new_box.top()) {
217       if (blob == new_blob)
218         return;  // We have it already.
219       it.forward();
220       blob = it.data();
221       box = blob->bounding_box();
222     }
223     if (box.top() >= new_box.top()) {
224       it.add_before_stay_put(new_blob);
225       needs_refit_ = true;
226       return;
227     }
228   }
229   needs_refit_ = true;
230   it.add_after_stay_put(new_blob);
231 }
232 
233 // Set the ycoord of the start and move the xcoord to match.
SetYStart(int start_y)234 void TabVector::SetYStart(int start_y) {
235   startpt_.set_x(XAtY(start_y));
236   startpt_.set_y(start_y);
237 }
238 // Set the ycoord of the end and move the xcoord to match.
SetYEnd(int end_y)239 void TabVector::SetYEnd(int end_y) {
240   endpt_.set_x(XAtY(end_y));
241   endpt_.set_y(end_y);
242 }
243 
244 // Rotate the ends by the given vector.
Rotate(const FCOORD & rotation)245 void TabVector::Rotate(const FCOORD& rotation) {
246   startpt_.rotate(rotation);
247   endpt_.rotate(rotation);
248 }
249 
250 // Setup the initial constraints, being the limits of
251 // the vector and the extended ends.
SetupConstraints()252 void TabVector::SetupConstraints() {
253   TabConstraint::CreateConstraint(this, false);
254   TabConstraint::CreateConstraint(this, true);
255 }
256 
257 // Setup the constraints between the partners of this TabVector.
SetupPartnerConstraints()258 void TabVector::SetupPartnerConstraints() {
259   // With the first and last partner, we want a common bottom and top,
260   // respectively, and for each change of partner, we want a common
261   // top of first with bottom of next.
262   TabVector_C_IT it(&partners_);
263   TabVector* prev_partner = NULL;
264   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
265     TabVector* partner = it.data();
266     if (partner->top_constraints_ == NULL ||
267         partner->bottom_constraints_ == NULL) {
268       partner->Print("Impossible: has no constraints");
269       Print("This vector has it as a partner");
270       continue;
271     }
272     if (prev_partner == NULL) {
273       // This is the first partner, so common bottom.
274       if (TabConstraint::CompatibleConstraints(bottom_constraints_,
275                                                partner->bottom_constraints_))
276         TabConstraint::MergeConstraints(bottom_constraints_,
277                                         partner->bottom_constraints_);
278     } else {
279       // We need prev top to be common with partner bottom.
280       if (TabConstraint::CompatibleConstraints(prev_partner->top_constraints_,
281                                                partner->bottom_constraints_))
282         TabConstraint::MergeConstraints(prev_partner->top_constraints_,
283                                         partner->bottom_constraints_);
284     }
285     prev_partner = partner;
286     if (it.at_last()) {
287       // This is the last partner, so common top.
288       if (TabConstraint::CompatibleConstraints(top_constraints_,
289                                                partner->top_constraints_))
290         TabConstraint::MergeConstraints(top_constraints_,
291                                         partner->top_constraints_);
292     }
293   }
294 }
295 
296 // Setup the constraints between this and its partner.
SetupPartnerConstraints(TabVector * partner)297 void TabVector::SetupPartnerConstraints(TabVector* partner) {
298   if (TabConstraint::CompatibleConstraints(bottom_constraints_,
299                                            partner->bottom_constraints_))
300     TabConstraint::MergeConstraints(bottom_constraints_,
301                                     partner->bottom_constraints_);
302   if (TabConstraint::CompatibleConstraints(top_constraints_,
303                                            partner->top_constraints_))
304     TabConstraint::MergeConstraints(top_constraints_,
305                                     partner->top_constraints_);
306 }
307 
308 // Use the constraints to modify the top and bottom.
ApplyConstraints()309 void TabVector::ApplyConstraints() {
310   if (top_constraints_ != NULL)
311     TabConstraint::ApplyConstraints(top_constraints_);
312   if (bottom_constraints_ != NULL)
313     TabConstraint::ApplyConstraints(bottom_constraints_);
314 }
315 
316 // Merge close tab vectors of the same side that overlap.
MergeSimilarTabVectors(const ICOORD & vertical,TabVector_LIST * vectors,BlobGrid * grid)317 void TabVector::MergeSimilarTabVectors(const ICOORD& vertical,
318                                        TabVector_LIST* vectors,
319                                        BlobGrid* grid) {
320   TabVector_IT it1(vectors);
321   for (it1.mark_cycle_pt(); !it1.cycled_list(); it1.forward()) {
322     TabVector* v1 = it1.data();
323     TabVector_IT it2(it1);
324     for (it2.forward(); !it2.at_first(); it2.forward()) {
325       TabVector* v2 = it2.data();
326       if (v2->SimilarTo(vertical, *v1, grid)) {
327         // Merge into the forward one, in case the combined vector now
328         // overlaps one in between.
329         if (textord_debug_tabfind) {
330           v2->Print("Merging");
331           v1->Print("by deleting");
332         }
333         v2->MergeWith(vertical, it1.extract());
334         ICOORD merged_vector = v2->endpt();
335         merged_vector -= v2->startpt();
336         if (abs(merged_vector.x()) > 100) {
337           v2->Print("Garbage result of merge?");
338         }
339         break;
340       }
341     }
342   }
343 }
344 
345 // Return true if this vector is the same side, overlaps, and close
346 // enough to the other to be merged.
SimilarTo(const ICOORD & vertical,const TabVector & other,BlobGrid * grid) const347 bool TabVector::SimilarTo(const ICOORD& vertical,
348                           const TabVector& other, BlobGrid* grid) const {
349   if ((IsRightTab() && other.IsRightTab()) ||
350       (IsLeftTab() && other.IsLeftTab())) {
351     // If they don't overlap, at least in extensions, then there is no chance.
352     if (ExtendedOverlap(other.extended_ymax_, other.extended_ymin_) < 0)
353       return false;
354     // A fast approximation to the scale factor of the sort_key_.
355     int v_scale = abs(vertical.y());
356     if (v_scale == 0)
357       v_scale = 1;
358     // If they are close enough, then OK.
359     if (sort_key_ + kSimilarVectorDist * v_scale >= other.sort_key_ &&
360         sort_key_ - kSimilarVectorDist * v_scale <= other.sort_key_)
361       return true;
362     // Ragged tabs get a bigger threshold.
363     if (!IsRagged() || !other.IsRagged() ||
364         sort_key_ + kSimilarRaggedDist * v_scale < other.sort_key_ ||
365         sort_key_ - kSimilarRaggedDist * v_scale > other.sort_key_)
366       return false;
367     if (grid == NULL) {
368       // There is nothing else to test!
369       return true;
370     }
371     // If there is nothing in the rectangle between the vector that is going to
372     // move, and the place it is moving to, then they can be merged.
373     // Setup a vertical search for any blob.
374     const TabVector* mover = (IsRightTab() &&
375        sort_key_ < other.sort_key_) ? this : &other;
376     int top_y = mover->endpt_.y();
377     int bottom_y = mover->startpt_.y();
378     int left = MIN(mover->XAtY(top_y), mover->XAtY(bottom_y));
379     int right = MAX(mover->XAtY(top_y), mover->XAtY(bottom_y));
380     int shift = abs(sort_key_ - other.sort_key_) / v_scale;
381     if (IsRightTab()) {
382       right += shift;
383     } else {
384       left -= shift;
385     }
386 
387     GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> vsearch(grid);
388     vsearch.StartVerticalSearch(left, right, top_y);
389     BLOBNBOX* blob;
390     while ((blob = vsearch.NextVerticalSearch(true)) != NULL) {
391       TBOX box = blob->bounding_box();
392       if (box.top() > bottom_y)
393         return true;  // Nothing found.
394       if (box.bottom() < top_y)
395         continue;  // Doesn't overlap.
396       int left_at_box = XAtY(box.bottom());
397       int right_at_box = left_at_box;
398       if (IsRightTab())
399         right_at_box += shift;
400       else
401         left_at_box -= shift;
402       if (MIN(right_at_box, box.right()) > MAX(left_at_box, box.left()))
403         return false;
404     }
405     return true;  // Nothing found.
406   }
407   return false;
408 }
409 
410 // Eat the other TabVector into this and delete it.
MergeWith(const ICOORD & vertical,TabVector * other)411 void TabVector::MergeWith(const ICOORD& vertical, TabVector* other) {
412   extended_ymin_ = MIN(extended_ymin_, other->extended_ymin_);
413   extended_ymax_ = MAX(extended_ymax_, other->extended_ymax_);
414   if (other->IsRagged()) {
415     alignment_ = other->alignment_;
416   }
417   // Merge sort the two lists of boxes.
418   BLOBNBOX_C_IT it1(&boxes_);
419   BLOBNBOX_C_IT it2(&other->boxes_);
420   while (!it2.empty()) {
421     BLOBNBOX* bbox2 = it2.extract();
422     it2.forward();
423     TBOX box2 = bbox2->bounding_box();
424     BLOBNBOX* bbox1 = it1.data();
425     TBOX box1 = bbox1->bounding_box();
426     while (box1.bottom() < box2.bottom() && !it1.at_last()) {
427       it1.forward();
428       bbox1 = it1.data();
429       box1 = bbox1->bounding_box();
430     }
431     if (box1.bottom() < box2.bottom()) {
432       it1.add_to_end(bbox2);
433     } else if (bbox1 != bbox2) {
434       it1.add_before_stay_put(bbox2);
435     }
436   }
437   Fit(vertical, true);
438   other->Delete(this);
439 }
440 
441 // Add a new element to the list of partner TabVectors.
442 // Partners must be added in order of increasing y coordinate of the text line
443 // that makes them partners.
444 // Groups of identical partners are merged into one.
AddPartner(TabVector * partner)445 void TabVector::AddPartner(TabVector* partner) {
446   if (IsSeparator() || partner->IsSeparator())
447     return;
448   TabVector_C_IT it(&partners_);
449   if (!it.empty()) {
450     it.move_to_last();
451     if (it.data() == partner)
452       return;
453   }
454   it.add_after_then_move(partner);
455 }
456 
457 // Return true if other is a partner of this.
IsAPartner(const TabVector * other)458 bool TabVector::IsAPartner(const TabVector* other) {
459   TabVector_C_IT it(&partners_);
460   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
461     if (it.data() == other)
462       return true;
463   }
464   return false;
465 }
466 
467 // These names must be synced with the TabAlignment enum in tabvector.h.
468 const char* kAlignmentNames[] = {
469   "Left Aligned",
470   "Left Ragged",
471   "Center",
472   "Right Aligned",
473   "Right Ragged",
474   "Separator"
475 };
476 
477 // Print basic information about this tab vector.
Print(const char * prefix)478 void TabVector::Print(const char* prefix) {
479   if (this == NULL) {
480     tprintf("%s <null>\n", prefix);
481   } else {
482     tprintf("%s %s (%d,%d)->(%d,%d) s=%d, sort key=%d, boxes=%d, partners=%d\n",
483             prefix, kAlignmentNames[alignment_],
484             startpt_.x(), startpt_.y(), endpt_.x(), endpt_.y(),
485             percent_score_, sort_key_,
486             boxes_.length(), partners_.length());
487   }
488 }
489 
490 // Print basic information about this tab vector and every box in it.
Debug(const char * prefix)491 void TabVector::Debug(const char* prefix) {
492   Print(prefix);
493   BLOBNBOX_C_IT it(&boxes_);
494   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
495     BLOBNBOX* bbox = it.data();
496     const TBOX& box = bbox->bounding_box();
497     tprintf("Box at (%d,%d)->(%d,%d)\n",
498             box.left(), box.bottom(), box.right(), box.top());
499   }
500 }
501 
502 // Draw this tabvector in place in the given window.
Display(ScrollView * tab_win)503 void TabVector::Display(ScrollView* tab_win) {
504 #ifndef GRAPHICS_DISABLED
505   if (textord_debug_printable)
506     tab_win->Pen(ScrollView::BLUE);
507   else if (alignment_ == TA_LEFT_ALIGNED)
508     tab_win->Pen(ScrollView::LIME_GREEN);
509   else if (alignment_ == TA_LEFT_RAGGED)
510     tab_win->Pen(ScrollView::DARK_GREEN);
511   else if (alignment_ == TA_RIGHT_ALIGNED)
512     tab_win->Pen(ScrollView::PINK);
513   else if (alignment_ == TA_RIGHT_RAGGED)
514     tab_win->Pen(ScrollView::CORAL);
515   else
516     tab_win->Pen(ScrollView::WHITE);
517   tab_win->Line(startpt_.x(), startpt_.y(), endpt_.x(), endpt_.y());
518   tab_win->Pen(ScrollView::GREY);
519   tab_win->Line(startpt_.x(), startpt_.y(), startpt_.x(), extended_ymin_);
520   tab_win->Line(endpt_.x(), extended_ymax_, endpt_.x(), endpt_.y());
521   char score_buf[64];
522   snprintf(score_buf, sizeof(score_buf), "%d", percent_score_);
523   tab_win->TextAttributes("Times", 50, false, false, false);
524   tab_win->Text(startpt_.x(), startpt_.y(), score_buf);
525 #endif
526 }
527 
528 // Refit the line and/or re-evaluate the vector if the dirty flags are set.
FitAndEvaluateIfNeeded(const ICOORD & vertical,TabFind * finder)529 void TabVector::FitAndEvaluateIfNeeded(const ICOORD& vertical,
530                                        TabFind* finder) {
531   if (needs_refit_)
532     Fit(vertical, true);
533   if (needs_evaluation_)
534     Evaluate(vertical, finder);
535 }
536 
537 // Evaluate the vector in terms of coverage of its length by good-looking
538 // box edges. A good looking box is one where its nearest neighbour on the
539 // inside is nearer than half the distance its nearest neighbour on the
540 // outside of the putative column. Bad boxes are removed from the line.
541 // A second pass then further filters boxes by requiring that the gutter
542 // width be a minimum fraction of the mean gutter along the line.
Evaluate(const ICOORD & vertical,TabFind * finder)543 void TabVector::Evaluate(const ICOORD& vertical, TabFind* finder) {
544   needs_evaluation_ = false;
545   int length = endpt_.y() - startpt_.y();
546   if (length == 0 || boxes_.empty()) {
547     percent_score_ = 0;
548     Print("Zero length in evaluate");
549     return;
550   }
551   // Compute the mean box height.
552   BLOBNBOX_C_IT it(&boxes_);
553   int mean_height = 0;
554   int height_count = 0;
555   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
556     BLOBNBOX* bbox = it.data();
557     const TBOX& box = bbox->bounding_box();
558     int height = box.height();
559     mean_height += height;
560     ++height_count;
561   }
562   mean_height /= height_count;
563 
564   // Evaluate the boxes for their goodness, calculating the coverage as we go.
565   // Remove boxes that are not good and shorten the list to the first and
566   // last good boxes.
567   bool deleted_a_box = false;
568   int mean_gutter = 0;
569   int gutter_count = 0;
570   int good_length = 0;
571   const TBOX* prev_good_box = NULL;
572   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
573     BLOBNBOX* bbox = it.data();
574     const TBOX& box = bbox->bounding_box();
575     int mid_y = (box.top() + box.bottom()) / 2;
576     // A good box is one where the nearest neighbour on the inside is closer
577     // than half the distance to the nearest neighbour on the outside
578     // (of the putative column).
579     bool left = IsLeftTab();
580     int tab_x = XAtY(mid_y);
581     int max_gutter = kGutterMultiple * mean_height;
582     if (IsRagged()) {
583       // Ragged edges face a tougher test in that the gap must always be within
584       // the height of the blob.
585       max_gutter = kGutterToNeighbourRatio * mean_height;
586     }
587     int gutter_width;
588     int neighbour_gap;
589     finder->GutterWidthAndNeighbourGap(tab_x, mean_height, max_gutter, left,
590                                        bbox, &gutter_width, &neighbour_gap);
591     if (TabFind::WithinTestRegion(2, tab_x, mid_y)) {
592       tprintf("Box (%d,%d)->(%d,%d) has gutter %d, ndist %d\n",
593               box.left(), box.bottom(), box.right(), box.top(),
594               gutter_width, neighbour_gap);
595     }
596     // Now we can make the test.
597     if (neighbour_gap * kGutterToNeighbourRatio <= gutter_width) {
598       // A good box contributes its height to the good_length.
599       good_length += box.top() - box.bottom();
600       mean_gutter += gutter_width;
601       ++gutter_count;
602       // Two good boxes together contribute the gap between them
603       // to the good_length as well, as long as the gap is not
604       // too big.
605       if (prev_good_box != NULL) {
606         int vertical_gap = box.bottom() - prev_good_box->top();
607         double size1 = sqrt(static_cast<double>(prev_good_box->area()));
608         double size2 = sqrt(static_cast<double>(box.area()));
609         if (vertical_gap < kMaxFillinMultiple * MIN(size1, size2))
610           good_length += vertical_gap;
611         if (TabFind::WithinTestRegion(2, tab_x, mid_y))
612           tprintf("Box and prev good, gap=%d, target %g, goodlength=%d\n",
613                   vertical_gap, kMaxFillinMultiple * MIN(size1, size2),
614                   good_length);
615       } else {
616         // Adjust the start to the first good box.
617         SetYStart(box.bottom());
618       }
619       prev_good_box = &box;
620     } else {
621       // Get rid of boxes that are not good.
622       if (TabFind::WithinTestRegion(2, tab_x, mid_y)) {
623         tprintf("Bad Box (%d,%d)->(%d,%d) with gutter %d, ndist %d\n",
624                 box.left(), box.bottom(), box.right(), box.top(),
625                 gutter_width, neighbour_gap);
626       }
627       it.extract();
628       deleted_a_box = true;
629     }
630   }
631   // If there are any good boxes, do it again, except this time get rid of
632   // boxes that have a gutter that is a small fraction of the mean gutter.
633   // This filters out ends that run into a coincidental gap in the text.
634   if (gutter_count > 0) {
635     mean_gutter /= gutter_count;
636     prev_good_box = NULL;
637     for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
638       BLOBNBOX* bbox = it.data();
639       const TBOX& box = bbox->bounding_box();
640       int mid_y = (box.top() + box.bottom()) / 2;
641       // A good box is one where the gutter width is at least some constant
642       // fraction of the mean gutter width.
643       bool left = IsLeftTab();
644       int tab_x = XAtY(mid_y);
645       int max_gutter = kGutterMultiple * mean_height;
646       if (IsRagged()) {
647         // Ragged edges face a tougher test in that the gap must always be
648         // within the height of the blob.
649         max_gutter = kGutterToNeighbourRatio * mean_height;
650       }
651       int gutter_width;
652       int neighbour_gap;
653       finder->GutterWidthAndNeighbourGap(tab_x, mean_height, max_gutter, left,
654                                          bbox, &gutter_width, &neighbour_gap);
655       // Now we can make the test.
656       if (gutter_width >= mean_gutter * kMinGutterFraction) {
657         if (prev_good_box == NULL) {
658           // Adjust the start to the first good box.
659           SetYStart(box.bottom());
660         }
661         prev_good_box = &box;
662       } else {
663         // Get rid of boxes that are not good.
664         if (TabFind::WithinTestRegion(2, tab_x, mid_y)) {
665           tprintf("Bad Box (%d,%d)->(%d,%d) with gutter %d, mean gutter %d\n",
666                   box.left(), box.bottom(), box.right(), box.top(),
667                   gutter_width, mean_gutter);
668         }
669         it.extract();
670         deleted_a_box = true;
671       }
672     }
673   }
674   // If there has been a good box, adjust the end.
675   if (prev_good_box != NULL) {
676     SetYEnd(prev_good_box->top());
677     // Compute the percentage of the vector that is occupied by good boxes.
678     int length = endpt_.y() - startpt_.y();
679     percent_score_ = 100 * good_length / length;
680     if (deleted_a_box) {
681       needs_refit_ = true;
682       FitAndEvaluateIfNeeded(vertical, finder);
683     }
684   } else {
685     // There are no good boxes left, so score is 0.
686     percent_score_ = 0;
687   }
688 }
689 
690 // (Re)Fit a line to the stored points. Returns false if the line
691 // is degenerate.
Fit(ICOORD vertical,bool force_parallel)692 bool TabVector::Fit(ICOORD vertical, bool force_parallel) {
693   needs_refit_ = false;
694   if (boxes_.empty() && !force_parallel) {
695     // Don't refit something with no boxes, as that only happens
696     // in Evaluate, and we don't want to end up with a zero vector.
697     // If we are forcing parallel, then that is OK.
698     return false;
699   }
700   if (!force_parallel && !IsRagged()) {
701     // Use a fitted line as the vertical.
702     DetLineFit linepoints;
703     BLOBNBOX_C_IT it(&boxes_);
704     // Fit a line to all the boxes in the list.
705     for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
706       BLOBNBOX* bbox = it.data();
707       TBOX box = bbox->bounding_box();
708       int x1 = IsRightTab() ? box.right() : box.left();
709       ICOORD boxpt(x1, box.bottom());
710       linepoints.Add(boxpt);
711       if (it.at_last()) {
712         ICOORD top_pt(x1, box.top());
713         linepoints.Add(top_pt);
714       }
715     }
716     linepoints.Fit(&startpt_, &endpt_);
717     if (startpt_.y() != endpt_.y()) {
718       vertical = endpt_;
719       vertical -= startpt_;
720     }
721   }
722   int start_y = startpt_.y();
723   int end_y = endpt_.y();
724   sort_key_ = IsLeftTab() ? MAX_INT32 : -MAX_INT32;
725   BLOBNBOX_C_IT it(&boxes_);
726   // Choose a line parallel to the vertical such that all boxes are on the
727   // correct side of it.
728   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
729     BLOBNBOX* bbox = it.data();
730     TBOX box = bbox->bounding_box();
731     int x1 = IsRightTab() ? box.right() : box.left();
732     // Test both the bottom and the top, as one will be more extreme, depending
733     // on the direction of skew.
734     int bottom_y = box.bottom();
735     int top_y = box.top();
736     int key = SortKey(vertical, x1, bottom_y);
737     if (IsLeftTab() == (key < sort_key_)) {
738       sort_key_ = key;
739       startpt_ = ICOORD(x1, bottom_y);
740     }
741     key = SortKey(vertical, x1, top_y);
742     if (IsLeftTab() == (key < sort_key_)) {
743       sort_key_ = key;
744       startpt_ = ICOORD(x1, top_y);
745     }
746     if (it.at_first())
747       start_y = bottom_y;
748     if (it.at_last())
749       end_y = top_y;
750   }
751   if (boxes_.empty()) {
752     ICOORD midpt = startpt_;
753     midpt += endpt_;
754     midpt /= 2;
755     sort_key_ = SortKey(vertical, midpt.x(), midpt.y());
756   }
757   endpt_ = startpt_ + vertical;
758   needs_evaluation_ = true;
759   if (start_y != end_y) {
760     // Set the ends of the vector to fully include the first and last blobs.
761     startpt_.set_x(XAtY(vertical, sort_key_, start_y));
762     startpt_.set_y(start_y);
763     endpt_.set_x(XAtY(vertical, sort_key_, end_y));
764     endpt_.set_y(end_y);
765     return true;
766   }
767   return false;
768 }
769 
770 // Return the partner of this TabVector if the vector qualifies as
771 // being a vertical text line, otherwise NULL.
VerticalTextlinePartner()772 TabVector* TabVector::VerticalTextlinePartner() {
773   if (!partners_.singleton())
774     return NULL;
775   TabVector_C_IT partner_it(&partners_);
776   TabVector* partner = partner_it.data();
777   BLOBNBOX_C_IT box_it1(&boxes_);
778   BLOBNBOX_C_IT box_it2(&partner->boxes_);
779   // Count how many boxes are also in the other list.
780   // At the same time, gather the mean width and median vertical gap.
781   int num_matched = 0;
782   int num_unmatched = 0;
783   int total_widths = 0;
784   int width = startpt().x() - partner->startpt().x();
785   if (width < 0)
786     width = -width;
787   STATS gaps(0, width * 2);
788   BLOBNBOX* prev_bbox = NULL;
789   box_it2.mark_cycle_pt();
790   for (box_it1.mark_cycle_pt(); !box_it1.cycled_list(); box_it1.forward()) {
791     BLOBNBOX* bbox = box_it1.data();
792     TBOX box = bbox->bounding_box();
793     if (prev_bbox != NULL) {
794       gaps.add(box.bottom() - prev_bbox->bounding_box().top(), 1);
795     }
796     while (!box_it2.cycled_list() && box_it2.data() != bbox &&
797            box_it2.data()->bounding_box().bottom() < box.bottom()) {
798       box_it2.forward();
799     }
800     if (!box_it2.cycled_list() && box_it2.data() == bbox &&
801         bbox->region_type() >= BRT_UNKNOWN &&
802         (prev_bbox == NULL || prev_bbox->region_type() >= BRT_UNKNOWN))
803       ++num_matched;
804     else
805       ++num_unmatched;
806     total_widths += box.width();
807     prev_bbox = bbox;
808   }
809   if (textord_debug_tabfind > 1) {
810     Print("Testing for vertical text");
811     tprintf("gaps=%d, matched=%d, unmatched=%d, median gap=%.2f, width=%.2f\n",
812             gaps.get_total(), num_matched, num_unmatched,
813             gaps.median(),
814             total_widths * 1.0 / (num_unmatched + num_matched));
815   }
816   if (gaps.get_total() == 0 || num_matched <= num_unmatched) {
817     return NULL;
818   }
819   // It qualifies if the median gap is less than kVerticalTextGapFraction *
820   // mean width.
821   if (gaps.median() >= total_widths * kVerticalTextGapFraction /
822       (num_unmatched + num_matched)) {
823     return NULL;
824   }
825   if (textord_debug_tabfind > 1) {
826     tprintf("Vertical text found\n");
827   }
828   return partner;
829 }
830 
831 // The constructor is private.
TabVector(int extended_ymin,int extended_ymax,TabAlignment alignment,BLOBNBOX_CLIST * boxes)832 TabVector::TabVector(int extended_ymin, int extended_ymax,
833                      TabAlignment alignment, BLOBNBOX_CLIST* boxes)
834   : extended_ymin_(extended_ymin), extended_ymax_(extended_ymax),
835     sort_key_(0), percent_score_(0),
836     needs_refit_(true), needs_evaluation_(true), alignment_(alignment),
837     top_constraints_(NULL), bottom_constraints_(NULL) {
838   BLOBNBOX_C_IT it(&boxes_);
839   it.add_list_after(boxes);
840 }
841 
842 // Delete this, but first, repoint all the partners to point to
843 // replacement. If replacement is NULL, then partner relationships
844 // are removed.
Delete(TabVector * replacement)845 void TabVector::Delete(TabVector* replacement) {
846   TabVector_C_IT it(&partners_);
847   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
848     TabVector* partner = it.data();
849     TabVector_C_IT p_it(&partner->partners_);
850     // If partner already has replacement in its list, then make
851     // replacement null, and just remove this TabVector when we find it.
852     TabVector* partner_replacement = replacement;
853     for (p_it.mark_cycle_pt(); !p_it.cycled_list(); p_it.forward()) {
854       TabVector* p_partner = p_it.data();
855       if (p_partner == partner_replacement) {
856         partner_replacement = NULL;
857         break;
858       }
859     }
860     // Remove all references to this, and replace with replacement if not NULL.
861     for (p_it.mark_cycle_pt(); !p_it.cycled_list(); p_it.forward()) {
862       TabVector* p_partner = p_it.data();
863       if (p_partner == this) {
864         p_it.extract();
865         if (partner_replacement != NULL)
866           p_it.add_before_stay_put(partner_replacement);
867       }
868     }
869     if (partner_replacement != NULL) {
870       partner_replacement->AddPartner(partner);
871     }
872   }
873   delete this;
874 }
875 
876 
877 }  // namespace tesseract.
878 
879