1 ///////////////////////////////////////////////////////////////////////
2 // File: tabvector.cpp
3 // Description: Class to hold a near-vertical vector representing a tab-stop.
4 // Author: Ray Smith
5 // Created: Thu Apr 10 16:28:01 PST 2008
6 //
7 // (C) Copyright 2008, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
18 ///////////////////////////////////////////////////////////////////////
19
20 #include "tabvector.h"
21 #include "blobbox.h"
22 #include "colfind.h"
23 #include "colpartitionset.h"
24 #include "detlinefit.h"
25
26 namespace tesseract {
27
28 // Multiple of height used as a gutter for evaluation search.
29 const int kGutterMultiple = 4;
30 // Multiple of neighbour gap that we expect the gutter gap to be at minimum.
31 const int kGutterToNeighbourRatio = 3;
32 // Pixel distance for tab vectors to be considered the same.
33 const int kSimilarVectorDist = 10;
34 // Pixel distance for ragged tab vectors to be considered the same if there
35 // is nothing in the overlap box
36 const int kSimilarRaggedDist = 50;
37 // Max multiple of height to allow filling in between blobs when evaluating.
38 const int kMaxFillinMultiple = 11;
39 // Min fraction of mean gutter size to allow a gutter on a good tab blob.
40 const double kMinGutterFraction = 0.5;
41 // Max fraction of mean blob width allowed for vertical gaps in vertical text.
42 const double kVerticalTextGapFraction = 0.5;
43
ELISTIZE(TabConstraint)44 ELISTIZE(TabConstraint)
45
46 // Create a constraint for the top or bottom of this TabVector.
47 void TabConstraint::CreateConstraint(TabVector* vector, bool is_top) {
48 TabConstraint* constraint = new TabConstraint(vector, is_top);
49 TabConstraint_LIST* constraints = new TabConstraint_LIST;
50 TabConstraint_IT it(constraints);
51 it.add_to_end(constraint);
52 if (is_top)
53 vector->set_top_constraints(constraints);
54 else
55 vector->set_bottom_constraints(constraints);
56 }
57
58 // Test to see if the constraints are compatible enough to merge.
CompatibleConstraints(TabConstraint_LIST * list1,TabConstraint_LIST * list2)59 bool TabConstraint::CompatibleConstraints(TabConstraint_LIST* list1,
60 TabConstraint_LIST* list2) {
61 if (list1 == list2)
62 return false;
63 int y_min = -MAX_INT32;
64 int y_max = MAX_INT32;
65 if (textord_debug_tabfind > 3)
66 tprintf("Testing constraint compatibility\n");
67 GetConstraints(list1, &y_min, &y_max);
68 GetConstraints(list2, &y_min, &y_max);
69 if (textord_debug_tabfind > 3)
70 tprintf("Resulting range = [%d,%d]\n", y_min, y_max);
71 return y_max >= y_min;
72 }
73
74 // Merge the lists of constraints and update the TabVector pointers.
75 // The second list is deleted.
MergeConstraints(TabConstraint_LIST * list1,TabConstraint_LIST * list2)76 void TabConstraint::MergeConstraints(TabConstraint_LIST* list1,
77 TabConstraint_LIST* list2) {
78 if (list1 == list2)
79 return;
80 TabConstraint_IT it(list2);
81 if (textord_debug_tabfind > 3)
82 tprintf("Merging constraints\n");
83 // The vectors of all constraints on list2 are now going to be on list1.
84 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
85 TabConstraint* constraint = it.data();
86 if (textord_debug_tabfind> 3)
87 constraint->vector_->Print("Merge");
88 if (constraint->is_top_)
89 constraint->vector_->set_top_constraints(list1);
90 else
91 constraint->vector_->set_bottom_constraints(list1);
92 }
93 it = list1;
94 it.add_list_before(list2);
95 delete list2;
96 }
97
98 // Set all the tops and bottoms as appropriate to a mean of the
99 // constrained range. Delete all the constraints and list.
ApplyConstraints(TabConstraint_LIST * constraints)100 void TabConstraint::ApplyConstraints(TabConstraint_LIST* constraints) {
101 int y_min = -MAX_INT32;
102 int y_max = MAX_INT32;
103 GetConstraints(constraints, &y_min, &y_max);
104 int y = (y_min + y_max) / 2;
105 TabConstraint_IT it(constraints);
106 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
107 TabConstraint* constraint = it.data();
108 TabVector* v = constraint->vector_;
109 if (constraint->is_top_) {
110 v->SetYEnd(y);
111 v->set_top_constraints(NULL);
112 } else {
113 v->SetYStart(y);
114 v->set_bottom_constraints(NULL);
115 }
116 }
117 delete constraints;
118 }
119
TabConstraint(TabVector * vector,bool is_top)120 TabConstraint::TabConstraint(TabVector* vector, bool is_top)
121 : vector_(vector), is_top_(is_top) {
122 if (is_top) {
123 y_min_ = vector->endpt().y();
124 y_max_ = vector->extended_ymax();
125 } else {
126 y_max_ = vector->startpt().y();
127 y_min_ = vector->extended_ymin();
128 }
129 }
130
131 // Get the max of the mins and the min of the maxes.
GetConstraints(TabConstraint_LIST * constraints,int * y_min,int * y_max)132 void TabConstraint::GetConstraints(TabConstraint_LIST* constraints,
133 int* y_min, int* y_max) {
134 TabConstraint_IT it(constraints);
135 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
136 TabConstraint* constraint = it.data();
137 if (textord_debug_tabfind > 3) {
138 tprintf("Constraint is [%d,%d]", constraint->y_min_, constraint->y_max_);
139 constraint->vector_->Print(" for");
140 }
141 *y_min = MAX(*y_min, constraint->y_min_);
142 *y_max = MIN(*y_max, constraint->y_max_);
143 }
144 }
145
146 ELIST2IZE(TabVector)
CLISTIZE(TabVector)147 CLISTIZE(TabVector)
148
149 // The constructor is private. See the bottom of the file...
150
151 TabVector::~TabVector() {
152 }
153
154
155 // Public factory to build a TabVector from a list of boxes.
156 // The TabVector will be of the given alignment type.
157 // The input vertical vector is used in fitting, and the output
158 // vertical_x, vertical_y have the resulting line vector added to them
159 // if the alignment is not ragged.
160 // The extended_start_y and extended_end_y are the maximum possible
161 // extension to the line segment that can be used to align with others.
162 // The input CLIST of BLOBNBOX good_points is consumed and taken over.
FitVector(TabAlignment alignment,ICOORD vertical,int extended_start_y,int extended_end_y,BLOBNBOX_CLIST * good_points,int * vertical_x,int * vertical_y)163 TabVector* TabVector::FitVector(TabAlignment alignment, ICOORD vertical,
164 int extended_start_y, int extended_end_y,
165 BLOBNBOX_CLIST* good_points,
166 int* vertical_x, int* vertical_y) {
167 TabVector* vector = new TabVector(extended_start_y, extended_end_y,
168 alignment, good_points);
169 if (!vector->Fit(vertical, false)) {
170 delete vector;
171 return NULL;
172 }
173 if (!vector->IsRagged()) {
174 vertical = vector->endpt_ - vector->startpt_;
175 int weight = vector->BoxCount();
176 *vertical_x += vertical.x() * weight;
177 *vertical_y += vertical.y() * weight;
178 }
179 return vector;
180 }
181
182 // Build a ragged TabVector by copying another's direction, shifting it
183 // to match the given blob, and making its initial extent the height
184 // of the blob, but its extended bounds from the bounds of the original.
TabVector(const TabVector & src,TabAlignment alignment,const ICOORD & vertical_skew,BLOBNBOX * blob)185 TabVector::TabVector(const TabVector& src, TabAlignment alignment,
186 const ICOORD& vertical_skew, BLOBNBOX* blob)
187 : extended_ymin_(src.extended_ymin_), extended_ymax_(src.extended_ymax_),
188 sort_key_(0), percent_score_(0),
189 needs_refit_(true), needs_evaluation_(true), alignment_(alignment),
190 top_constraints_(NULL), bottom_constraints_(NULL) {
191 BLOBNBOX_C_IT it(&boxes_);
192 it.add_to_end(blob);
193 TBOX box = blob->bounding_box();
194 if (IsLeftTab()) {
195 startpt_ = box.botleft();
196 endpt_ = box.topleft();
197 } else {
198 startpt_ = box.botright();
199 endpt_ = box.topright();
200 }
201 sort_key_ = SortKey(vertical_skew,
202 (startpt_.x() + endpt_.x()) / 2,
203 (startpt_.y() + endpt_.y()) / 2);
204 if (textord_debug_tabfind > 3)
205 Print("Constructed a new tab vector:");
206 }
207
208 // Extend this vector to include the supplied blob if it doesn't
209 // already have it.
ExtendToBox(BLOBNBOX * new_blob)210 void TabVector::ExtendToBox(BLOBNBOX* new_blob) {
211 TBOX new_box = new_blob->bounding_box();
212 BLOBNBOX_C_IT it(&boxes_);
213 if (!it.empty()) {
214 BLOBNBOX* blob = it.data();
215 TBOX box = blob->bounding_box();
216 while (!it.at_last() && box.top() <= new_box.top()) {
217 if (blob == new_blob)
218 return; // We have it already.
219 it.forward();
220 blob = it.data();
221 box = blob->bounding_box();
222 }
223 if (box.top() >= new_box.top()) {
224 it.add_before_stay_put(new_blob);
225 needs_refit_ = true;
226 return;
227 }
228 }
229 needs_refit_ = true;
230 it.add_after_stay_put(new_blob);
231 }
232
233 // Set the ycoord of the start and move the xcoord to match.
SetYStart(int start_y)234 void TabVector::SetYStart(int start_y) {
235 startpt_.set_x(XAtY(start_y));
236 startpt_.set_y(start_y);
237 }
238 // Set the ycoord of the end and move the xcoord to match.
SetYEnd(int end_y)239 void TabVector::SetYEnd(int end_y) {
240 endpt_.set_x(XAtY(end_y));
241 endpt_.set_y(end_y);
242 }
243
244 // Rotate the ends by the given vector.
Rotate(const FCOORD & rotation)245 void TabVector::Rotate(const FCOORD& rotation) {
246 startpt_.rotate(rotation);
247 endpt_.rotate(rotation);
248 }
249
250 // Setup the initial constraints, being the limits of
251 // the vector and the extended ends.
SetupConstraints()252 void TabVector::SetupConstraints() {
253 TabConstraint::CreateConstraint(this, false);
254 TabConstraint::CreateConstraint(this, true);
255 }
256
257 // Setup the constraints between the partners of this TabVector.
SetupPartnerConstraints()258 void TabVector::SetupPartnerConstraints() {
259 // With the first and last partner, we want a common bottom and top,
260 // respectively, and for each change of partner, we want a common
261 // top of first with bottom of next.
262 TabVector_C_IT it(&partners_);
263 TabVector* prev_partner = NULL;
264 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
265 TabVector* partner = it.data();
266 if (partner->top_constraints_ == NULL ||
267 partner->bottom_constraints_ == NULL) {
268 partner->Print("Impossible: has no constraints");
269 Print("This vector has it as a partner");
270 continue;
271 }
272 if (prev_partner == NULL) {
273 // This is the first partner, so common bottom.
274 if (TabConstraint::CompatibleConstraints(bottom_constraints_,
275 partner->bottom_constraints_))
276 TabConstraint::MergeConstraints(bottom_constraints_,
277 partner->bottom_constraints_);
278 } else {
279 // We need prev top to be common with partner bottom.
280 if (TabConstraint::CompatibleConstraints(prev_partner->top_constraints_,
281 partner->bottom_constraints_))
282 TabConstraint::MergeConstraints(prev_partner->top_constraints_,
283 partner->bottom_constraints_);
284 }
285 prev_partner = partner;
286 if (it.at_last()) {
287 // This is the last partner, so common top.
288 if (TabConstraint::CompatibleConstraints(top_constraints_,
289 partner->top_constraints_))
290 TabConstraint::MergeConstraints(top_constraints_,
291 partner->top_constraints_);
292 }
293 }
294 }
295
296 // Setup the constraints between this and its partner.
SetupPartnerConstraints(TabVector * partner)297 void TabVector::SetupPartnerConstraints(TabVector* partner) {
298 if (TabConstraint::CompatibleConstraints(bottom_constraints_,
299 partner->bottom_constraints_))
300 TabConstraint::MergeConstraints(bottom_constraints_,
301 partner->bottom_constraints_);
302 if (TabConstraint::CompatibleConstraints(top_constraints_,
303 partner->top_constraints_))
304 TabConstraint::MergeConstraints(top_constraints_,
305 partner->top_constraints_);
306 }
307
308 // Use the constraints to modify the top and bottom.
ApplyConstraints()309 void TabVector::ApplyConstraints() {
310 if (top_constraints_ != NULL)
311 TabConstraint::ApplyConstraints(top_constraints_);
312 if (bottom_constraints_ != NULL)
313 TabConstraint::ApplyConstraints(bottom_constraints_);
314 }
315
316 // Merge close tab vectors of the same side that overlap.
MergeSimilarTabVectors(const ICOORD & vertical,TabVector_LIST * vectors,BlobGrid * grid)317 void TabVector::MergeSimilarTabVectors(const ICOORD& vertical,
318 TabVector_LIST* vectors,
319 BlobGrid* grid) {
320 TabVector_IT it1(vectors);
321 for (it1.mark_cycle_pt(); !it1.cycled_list(); it1.forward()) {
322 TabVector* v1 = it1.data();
323 TabVector_IT it2(it1);
324 for (it2.forward(); !it2.at_first(); it2.forward()) {
325 TabVector* v2 = it2.data();
326 if (v2->SimilarTo(vertical, *v1, grid)) {
327 // Merge into the forward one, in case the combined vector now
328 // overlaps one in between.
329 if (textord_debug_tabfind) {
330 v2->Print("Merging");
331 v1->Print("by deleting");
332 }
333 v2->MergeWith(vertical, it1.extract());
334 ICOORD merged_vector = v2->endpt();
335 merged_vector -= v2->startpt();
336 if (abs(merged_vector.x()) > 100) {
337 v2->Print("Garbage result of merge?");
338 }
339 break;
340 }
341 }
342 }
343 }
344
345 // Return true if this vector is the same side, overlaps, and close
346 // enough to the other to be merged.
SimilarTo(const ICOORD & vertical,const TabVector & other,BlobGrid * grid) const347 bool TabVector::SimilarTo(const ICOORD& vertical,
348 const TabVector& other, BlobGrid* grid) const {
349 if ((IsRightTab() && other.IsRightTab()) ||
350 (IsLeftTab() && other.IsLeftTab())) {
351 // If they don't overlap, at least in extensions, then there is no chance.
352 if (ExtendedOverlap(other.extended_ymax_, other.extended_ymin_) < 0)
353 return false;
354 // A fast approximation to the scale factor of the sort_key_.
355 int v_scale = abs(vertical.y());
356 if (v_scale == 0)
357 v_scale = 1;
358 // If they are close enough, then OK.
359 if (sort_key_ + kSimilarVectorDist * v_scale >= other.sort_key_ &&
360 sort_key_ - kSimilarVectorDist * v_scale <= other.sort_key_)
361 return true;
362 // Ragged tabs get a bigger threshold.
363 if (!IsRagged() || !other.IsRagged() ||
364 sort_key_ + kSimilarRaggedDist * v_scale < other.sort_key_ ||
365 sort_key_ - kSimilarRaggedDist * v_scale > other.sort_key_)
366 return false;
367 if (grid == NULL) {
368 // There is nothing else to test!
369 return true;
370 }
371 // If there is nothing in the rectangle between the vector that is going to
372 // move, and the place it is moving to, then they can be merged.
373 // Setup a vertical search for any blob.
374 const TabVector* mover = (IsRightTab() &&
375 sort_key_ < other.sort_key_) ? this : &other;
376 int top_y = mover->endpt_.y();
377 int bottom_y = mover->startpt_.y();
378 int left = MIN(mover->XAtY(top_y), mover->XAtY(bottom_y));
379 int right = MAX(mover->XAtY(top_y), mover->XAtY(bottom_y));
380 int shift = abs(sort_key_ - other.sort_key_) / v_scale;
381 if (IsRightTab()) {
382 right += shift;
383 } else {
384 left -= shift;
385 }
386
387 GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> vsearch(grid);
388 vsearch.StartVerticalSearch(left, right, top_y);
389 BLOBNBOX* blob;
390 while ((blob = vsearch.NextVerticalSearch(true)) != NULL) {
391 TBOX box = blob->bounding_box();
392 if (box.top() > bottom_y)
393 return true; // Nothing found.
394 if (box.bottom() < top_y)
395 continue; // Doesn't overlap.
396 int left_at_box = XAtY(box.bottom());
397 int right_at_box = left_at_box;
398 if (IsRightTab())
399 right_at_box += shift;
400 else
401 left_at_box -= shift;
402 if (MIN(right_at_box, box.right()) > MAX(left_at_box, box.left()))
403 return false;
404 }
405 return true; // Nothing found.
406 }
407 return false;
408 }
409
410 // Eat the other TabVector into this and delete it.
MergeWith(const ICOORD & vertical,TabVector * other)411 void TabVector::MergeWith(const ICOORD& vertical, TabVector* other) {
412 extended_ymin_ = MIN(extended_ymin_, other->extended_ymin_);
413 extended_ymax_ = MAX(extended_ymax_, other->extended_ymax_);
414 if (other->IsRagged()) {
415 alignment_ = other->alignment_;
416 }
417 // Merge sort the two lists of boxes.
418 BLOBNBOX_C_IT it1(&boxes_);
419 BLOBNBOX_C_IT it2(&other->boxes_);
420 while (!it2.empty()) {
421 BLOBNBOX* bbox2 = it2.extract();
422 it2.forward();
423 TBOX box2 = bbox2->bounding_box();
424 BLOBNBOX* bbox1 = it1.data();
425 TBOX box1 = bbox1->bounding_box();
426 while (box1.bottom() < box2.bottom() && !it1.at_last()) {
427 it1.forward();
428 bbox1 = it1.data();
429 box1 = bbox1->bounding_box();
430 }
431 if (box1.bottom() < box2.bottom()) {
432 it1.add_to_end(bbox2);
433 } else if (bbox1 != bbox2) {
434 it1.add_before_stay_put(bbox2);
435 }
436 }
437 Fit(vertical, true);
438 other->Delete(this);
439 }
440
441 // Add a new element to the list of partner TabVectors.
442 // Partners must be added in order of increasing y coordinate of the text line
443 // that makes them partners.
444 // Groups of identical partners are merged into one.
AddPartner(TabVector * partner)445 void TabVector::AddPartner(TabVector* partner) {
446 if (IsSeparator() || partner->IsSeparator())
447 return;
448 TabVector_C_IT it(&partners_);
449 if (!it.empty()) {
450 it.move_to_last();
451 if (it.data() == partner)
452 return;
453 }
454 it.add_after_then_move(partner);
455 }
456
457 // Return true if other is a partner of this.
IsAPartner(const TabVector * other)458 bool TabVector::IsAPartner(const TabVector* other) {
459 TabVector_C_IT it(&partners_);
460 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
461 if (it.data() == other)
462 return true;
463 }
464 return false;
465 }
466
467 // These names must be synced with the TabAlignment enum in tabvector.h.
468 const char* kAlignmentNames[] = {
469 "Left Aligned",
470 "Left Ragged",
471 "Center",
472 "Right Aligned",
473 "Right Ragged",
474 "Separator"
475 };
476
477 // Print basic information about this tab vector.
Print(const char * prefix)478 void TabVector::Print(const char* prefix) {
479 if (this == NULL) {
480 tprintf("%s <null>\n", prefix);
481 } else {
482 tprintf("%s %s (%d,%d)->(%d,%d) s=%d, sort key=%d, boxes=%d, partners=%d\n",
483 prefix, kAlignmentNames[alignment_],
484 startpt_.x(), startpt_.y(), endpt_.x(), endpt_.y(),
485 percent_score_, sort_key_,
486 boxes_.length(), partners_.length());
487 }
488 }
489
490 // Print basic information about this tab vector and every box in it.
Debug(const char * prefix)491 void TabVector::Debug(const char* prefix) {
492 Print(prefix);
493 BLOBNBOX_C_IT it(&boxes_);
494 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
495 BLOBNBOX* bbox = it.data();
496 const TBOX& box = bbox->bounding_box();
497 tprintf("Box at (%d,%d)->(%d,%d)\n",
498 box.left(), box.bottom(), box.right(), box.top());
499 }
500 }
501
502 // Draw this tabvector in place in the given window.
Display(ScrollView * tab_win)503 void TabVector::Display(ScrollView* tab_win) {
504 #ifndef GRAPHICS_DISABLED
505 if (textord_debug_printable)
506 tab_win->Pen(ScrollView::BLUE);
507 else if (alignment_ == TA_LEFT_ALIGNED)
508 tab_win->Pen(ScrollView::LIME_GREEN);
509 else if (alignment_ == TA_LEFT_RAGGED)
510 tab_win->Pen(ScrollView::DARK_GREEN);
511 else if (alignment_ == TA_RIGHT_ALIGNED)
512 tab_win->Pen(ScrollView::PINK);
513 else if (alignment_ == TA_RIGHT_RAGGED)
514 tab_win->Pen(ScrollView::CORAL);
515 else
516 tab_win->Pen(ScrollView::WHITE);
517 tab_win->Line(startpt_.x(), startpt_.y(), endpt_.x(), endpt_.y());
518 tab_win->Pen(ScrollView::GREY);
519 tab_win->Line(startpt_.x(), startpt_.y(), startpt_.x(), extended_ymin_);
520 tab_win->Line(endpt_.x(), extended_ymax_, endpt_.x(), endpt_.y());
521 char score_buf[64];
522 snprintf(score_buf, sizeof(score_buf), "%d", percent_score_);
523 tab_win->TextAttributes("Times", 50, false, false, false);
524 tab_win->Text(startpt_.x(), startpt_.y(), score_buf);
525 #endif
526 }
527
528 // Refit the line and/or re-evaluate the vector if the dirty flags are set.
FitAndEvaluateIfNeeded(const ICOORD & vertical,TabFind * finder)529 void TabVector::FitAndEvaluateIfNeeded(const ICOORD& vertical,
530 TabFind* finder) {
531 if (needs_refit_)
532 Fit(vertical, true);
533 if (needs_evaluation_)
534 Evaluate(vertical, finder);
535 }
536
537 // Evaluate the vector in terms of coverage of its length by good-looking
538 // box edges. A good looking box is one where its nearest neighbour on the
539 // inside is nearer than half the distance its nearest neighbour on the
540 // outside of the putative column. Bad boxes are removed from the line.
541 // A second pass then further filters boxes by requiring that the gutter
542 // width be a minimum fraction of the mean gutter along the line.
Evaluate(const ICOORD & vertical,TabFind * finder)543 void TabVector::Evaluate(const ICOORD& vertical, TabFind* finder) {
544 needs_evaluation_ = false;
545 int length = endpt_.y() - startpt_.y();
546 if (length == 0 || boxes_.empty()) {
547 percent_score_ = 0;
548 Print("Zero length in evaluate");
549 return;
550 }
551 // Compute the mean box height.
552 BLOBNBOX_C_IT it(&boxes_);
553 int mean_height = 0;
554 int height_count = 0;
555 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
556 BLOBNBOX* bbox = it.data();
557 const TBOX& box = bbox->bounding_box();
558 int height = box.height();
559 mean_height += height;
560 ++height_count;
561 }
562 mean_height /= height_count;
563
564 // Evaluate the boxes for their goodness, calculating the coverage as we go.
565 // Remove boxes that are not good and shorten the list to the first and
566 // last good boxes.
567 bool deleted_a_box = false;
568 int mean_gutter = 0;
569 int gutter_count = 0;
570 int good_length = 0;
571 const TBOX* prev_good_box = NULL;
572 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
573 BLOBNBOX* bbox = it.data();
574 const TBOX& box = bbox->bounding_box();
575 int mid_y = (box.top() + box.bottom()) / 2;
576 // A good box is one where the nearest neighbour on the inside is closer
577 // than half the distance to the nearest neighbour on the outside
578 // (of the putative column).
579 bool left = IsLeftTab();
580 int tab_x = XAtY(mid_y);
581 int max_gutter = kGutterMultiple * mean_height;
582 if (IsRagged()) {
583 // Ragged edges face a tougher test in that the gap must always be within
584 // the height of the blob.
585 max_gutter = kGutterToNeighbourRatio * mean_height;
586 }
587 int gutter_width;
588 int neighbour_gap;
589 finder->GutterWidthAndNeighbourGap(tab_x, mean_height, max_gutter, left,
590 bbox, &gutter_width, &neighbour_gap);
591 if (TabFind::WithinTestRegion(2, tab_x, mid_y)) {
592 tprintf("Box (%d,%d)->(%d,%d) has gutter %d, ndist %d\n",
593 box.left(), box.bottom(), box.right(), box.top(),
594 gutter_width, neighbour_gap);
595 }
596 // Now we can make the test.
597 if (neighbour_gap * kGutterToNeighbourRatio <= gutter_width) {
598 // A good box contributes its height to the good_length.
599 good_length += box.top() - box.bottom();
600 mean_gutter += gutter_width;
601 ++gutter_count;
602 // Two good boxes together contribute the gap between them
603 // to the good_length as well, as long as the gap is not
604 // too big.
605 if (prev_good_box != NULL) {
606 int vertical_gap = box.bottom() - prev_good_box->top();
607 double size1 = sqrt(static_cast<double>(prev_good_box->area()));
608 double size2 = sqrt(static_cast<double>(box.area()));
609 if (vertical_gap < kMaxFillinMultiple * MIN(size1, size2))
610 good_length += vertical_gap;
611 if (TabFind::WithinTestRegion(2, tab_x, mid_y))
612 tprintf("Box and prev good, gap=%d, target %g, goodlength=%d\n",
613 vertical_gap, kMaxFillinMultiple * MIN(size1, size2),
614 good_length);
615 } else {
616 // Adjust the start to the first good box.
617 SetYStart(box.bottom());
618 }
619 prev_good_box = &box;
620 } else {
621 // Get rid of boxes that are not good.
622 if (TabFind::WithinTestRegion(2, tab_x, mid_y)) {
623 tprintf("Bad Box (%d,%d)->(%d,%d) with gutter %d, ndist %d\n",
624 box.left(), box.bottom(), box.right(), box.top(),
625 gutter_width, neighbour_gap);
626 }
627 it.extract();
628 deleted_a_box = true;
629 }
630 }
631 // If there are any good boxes, do it again, except this time get rid of
632 // boxes that have a gutter that is a small fraction of the mean gutter.
633 // This filters out ends that run into a coincidental gap in the text.
634 if (gutter_count > 0) {
635 mean_gutter /= gutter_count;
636 prev_good_box = NULL;
637 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
638 BLOBNBOX* bbox = it.data();
639 const TBOX& box = bbox->bounding_box();
640 int mid_y = (box.top() + box.bottom()) / 2;
641 // A good box is one where the gutter width is at least some constant
642 // fraction of the mean gutter width.
643 bool left = IsLeftTab();
644 int tab_x = XAtY(mid_y);
645 int max_gutter = kGutterMultiple * mean_height;
646 if (IsRagged()) {
647 // Ragged edges face a tougher test in that the gap must always be
648 // within the height of the blob.
649 max_gutter = kGutterToNeighbourRatio * mean_height;
650 }
651 int gutter_width;
652 int neighbour_gap;
653 finder->GutterWidthAndNeighbourGap(tab_x, mean_height, max_gutter, left,
654 bbox, &gutter_width, &neighbour_gap);
655 // Now we can make the test.
656 if (gutter_width >= mean_gutter * kMinGutterFraction) {
657 if (prev_good_box == NULL) {
658 // Adjust the start to the first good box.
659 SetYStart(box.bottom());
660 }
661 prev_good_box = &box;
662 } else {
663 // Get rid of boxes that are not good.
664 if (TabFind::WithinTestRegion(2, tab_x, mid_y)) {
665 tprintf("Bad Box (%d,%d)->(%d,%d) with gutter %d, mean gutter %d\n",
666 box.left(), box.bottom(), box.right(), box.top(),
667 gutter_width, mean_gutter);
668 }
669 it.extract();
670 deleted_a_box = true;
671 }
672 }
673 }
674 // If there has been a good box, adjust the end.
675 if (prev_good_box != NULL) {
676 SetYEnd(prev_good_box->top());
677 // Compute the percentage of the vector that is occupied by good boxes.
678 int length = endpt_.y() - startpt_.y();
679 percent_score_ = 100 * good_length / length;
680 if (deleted_a_box) {
681 needs_refit_ = true;
682 FitAndEvaluateIfNeeded(vertical, finder);
683 }
684 } else {
685 // There are no good boxes left, so score is 0.
686 percent_score_ = 0;
687 }
688 }
689
690 // (Re)Fit a line to the stored points. Returns false if the line
691 // is degenerate.
Fit(ICOORD vertical,bool force_parallel)692 bool TabVector::Fit(ICOORD vertical, bool force_parallel) {
693 needs_refit_ = false;
694 if (boxes_.empty() && !force_parallel) {
695 // Don't refit something with no boxes, as that only happens
696 // in Evaluate, and we don't want to end up with a zero vector.
697 // If we are forcing parallel, then that is OK.
698 return false;
699 }
700 if (!force_parallel && !IsRagged()) {
701 // Use a fitted line as the vertical.
702 DetLineFit linepoints;
703 BLOBNBOX_C_IT it(&boxes_);
704 // Fit a line to all the boxes in the list.
705 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
706 BLOBNBOX* bbox = it.data();
707 TBOX box = bbox->bounding_box();
708 int x1 = IsRightTab() ? box.right() : box.left();
709 ICOORD boxpt(x1, box.bottom());
710 linepoints.Add(boxpt);
711 if (it.at_last()) {
712 ICOORD top_pt(x1, box.top());
713 linepoints.Add(top_pt);
714 }
715 }
716 linepoints.Fit(&startpt_, &endpt_);
717 if (startpt_.y() != endpt_.y()) {
718 vertical = endpt_;
719 vertical -= startpt_;
720 }
721 }
722 int start_y = startpt_.y();
723 int end_y = endpt_.y();
724 sort_key_ = IsLeftTab() ? MAX_INT32 : -MAX_INT32;
725 BLOBNBOX_C_IT it(&boxes_);
726 // Choose a line parallel to the vertical such that all boxes are on the
727 // correct side of it.
728 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
729 BLOBNBOX* bbox = it.data();
730 TBOX box = bbox->bounding_box();
731 int x1 = IsRightTab() ? box.right() : box.left();
732 // Test both the bottom and the top, as one will be more extreme, depending
733 // on the direction of skew.
734 int bottom_y = box.bottom();
735 int top_y = box.top();
736 int key = SortKey(vertical, x1, bottom_y);
737 if (IsLeftTab() == (key < sort_key_)) {
738 sort_key_ = key;
739 startpt_ = ICOORD(x1, bottom_y);
740 }
741 key = SortKey(vertical, x1, top_y);
742 if (IsLeftTab() == (key < sort_key_)) {
743 sort_key_ = key;
744 startpt_ = ICOORD(x1, top_y);
745 }
746 if (it.at_first())
747 start_y = bottom_y;
748 if (it.at_last())
749 end_y = top_y;
750 }
751 if (boxes_.empty()) {
752 ICOORD midpt = startpt_;
753 midpt += endpt_;
754 midpt /= 2;
755 sort_key_ = SortKey(vertical, midpt.x(), midpt.y());
756 }
757 endpt_ = startpt_ + vertical;
758 needs_evaluation_ = true;
759 if (start_y != end_y) {
760 // Set the ends of the vector to fully include the first and last blobs.
761 startpt_.set_x(XAtY(vertical, sort_key_, start_y));
762 startpt_.set_y(start_y);
763 endpt_.set_x(XAtY(vertical, sort_key_, end_y));
764 endpt_.set_y(end_y);
765 return true;
766 }
767 return false;
768 }
769
770 // Return the partner of this TabVector if the vector qualifies as
771 // being a vertical text line, otherwise NULL.
VerticalTextlinePartner()772 TabVector* TabVector::VerticalTextlinePartner() {
773 if (!partners_.singleton())
774 return NULL;
775 TabVector_C_IT partner_it(&partners_);
776 TabVector* partner = partner_it.data();
777 BLOBNBOX_C_IT box_it1(&boxes_);
778 BLOBNBOX_C_IT box_it2(&partner->boxes_);
779 // Count how many boxes are also in the other list.
780 // At the same time, gather the mean width and median vertical gap.
781 int num_matched = 0;
782 int num_unmatched = 0;
783 int total_widths = 0;
784 int width = startpt().x() - partner->startpt().x();
785 if (width < 0)
786 width = -width;
787 STATS gaps(0, width * 2);
788 BLOBNBOX* prev_bbox = NULL;
789 box_it2.mark_cycle_pt();
790 for (box_it1.mark_cycle_pt(); !box_it1.cycled_list(); box_it1.forward()) {
791 BLOBNBOX* bbox = box_it1.data();
792 TBOX box = bbox->bounding_box();
793 if (prev_bbox != NULL) {
794 gaps.add(box.bottom() - prev_bbox->bounding_box().top(), 1);
795 }
796 while (!box_it2.cycled_list() && box_it2.data() != bbox &&
797 box_it2.data()->bounding_box().bottom() < box.bottom()) {
798 box_it2.forward();
799 }
800 if (!box_it2.cycled_list() && box_it2.data() == bbox &&
801 bbox->region_type() >= BRT_UNKNOWN &&
802 (prev_bbox == NULL || prev_bbox->region_type() >= BRT_UNKNOWN))
803 ++num_matched;
804 else
805 ++num_unmatched;
806 total_widths += box.width();
807 prev_bbox = bbox;
808 }
809 if (textord_debug_tabfind > 1) {
810 Print("Testing for vertical text");
811 tprintf("gaps=%d, matched=%d, unmatched=%d, median gap=%.2f, width=%.2f\n",
812 gaps.get_total(), num_matched, num_unmatched,
813 gaps.median(),
814 total_widths * 1.0 / (num_unmatched + num_matched));
815 }
816 if (gaps.get_total() == 0 || num_matched <= num_unmatched) {
817 return NULL;
818 }
819 // It qualifies if the median gap is less than kVerticalTextGapFraction *
820 // mean width.
821 if (gaps.median() >= total_widths * kVerticalTextGapFraction /
822 (num_unmatched + num_matched)) {
823 return NULL;
824 }
825 if (textord_debug_tabfind > 1) {
826 tprintf("Vertical text found\n");
827 }
828 return partner;
829 }
830
831 // The constructor is private.
TabVector(int extended_ymin,int extended_ymax,TabAlignment alignment,BLOBNBOX_CLIST * boxes)832 TabVector::TabVector(int extended_ymin, int extended_ymax,
833 TabAlignment alignment, BLOBNBOX_CLIST* boxes)
834 : extended_ymin_(extended_ymin), extended_ymax_(extended_ymax),
835 sort_key_(0), percent_score_(0),
836 needs_refit_(true), needs_evaluation_(true), alignment_(alignment),
837 top_constraints_(NULL), bottom_constraints_(NULL) {
838 BLOBNBOX_C_IT it(&boxes_);
839 it.add_list_after(boxes);
840 }
841
842 // Delete this, but first, repoint all the partners to point to
843 // replacement. If replacement is NULL, then partner relationships
844 // are removed.
Delete(TabVector * replacement)845 void TabVector::Delete(TabVector* replacement) {
846 TabVector_C_IT it(&partners_);
847 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
848 TabVector* partner = it.data();
849 TabVector_C_IT p_it(&partner->partners_);
850 // If partner already has replacement in its list, then make
851 // replacement null, and just remove this TabVector when we find it.
852 TabVector* partner_replacement = replacement;
853 for (p_it.mark_cycle_pt(); !p_it.cycled_list(); p_it.forward()) {
854 TabVector* p_partner = p_it.data();
855 if (p_partner == partner_replacement) {
856 partner_replacement = NULL;
857 break;
858 }
859 }
860 // Remove all references to this, and replace with replacement if not NULL.
861 for (p_it.mark_cycle_pt(); !p_it.cycled_list(); p_it.forward()) {
862 TabVector* p_partner = p_it.data();
863 if (p_partner == this) {
864 p_it.extract();
865 if (partner_replacement != NULL)
866 p_it.add_before_stay_put(partner_replacement);
867 }
868 }
869 if (partner_replacement != NULL) {
870 partner_replacement->AddPartner(partner);
871 }
872 }
873 delete this;
874 }
875
876
877 } // namespace tesseract.
878
879