• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 ///////////////////////////////////////////////////////////////////////
2 // File:        tabvector.h
3 // Description: Class to hold a near-vertical vector representing a tab-stop.
4 // Author:      Ray Smith
5 // Created:     Thu Apr 10 16:25:01 PST 2008
6 //
7 // (C) Copyright 2008, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
18 ///////////////////////////////////////////////////////////////////////
19 
20 #ifndef TESSERACT_TEXTORD_TABVECTOR_H__
21 #define TESSERACT_TEXTORD_TABVECTOR_H__
22 
23 #include "clst.h"
24 #include "elst.h"
25 #include "elst2.h"
26 #include "rect.h"
27 #include "bbgrid.h"
28 
29 #undef TA_CENTER
30 
31 class BLOBNBOX;
32 class ScrollView;
33 
CLISTIZEH(BLOBNBOX)34 CLISTIZEH(BLOBNBOX)
35 
36 namespace tesseract {
37 
38 // The alignment type that a tab vector represents.
39 // Keep this enum synced with kAlignmentNames in tabvector.cpp.
40 enum TabAlignment {
41   TA_LEFT_ALIGNED,
42   TA_LEFT_RAGGED,
43   TA_CENTER,
44   TA_RIGHT_ALIGNED,
45   TA_RIGHT_RAGGED,
46   TA_SEPARATOR,
47   TA_COUNT
48 };
49 
50 // Forward declarations. The classes use their own list types, so we
51 // need to make the list types first.
52 class TabFind;
53 class TabVector;
54 class TabConstraint;
55 typedef BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> BlobGrid;
56 
57 ELIST2IZEH(TabVector)
58 CLISTIZEH(TabVector)
59 ELISTIZEH(TabConstraint)
60 
61 // TabConstraint is a totally self-contained class to maintain
62 // a list of [min,max] constraints, each referring to a TabVector.
63 // The constraints are manipulated through static methods that act
64 // on a list of constraints. The list itself is cooperatively owned
65 // by the TabVectors of the constraints on the list and managed
66 // by implicit reference counting via the elements of the list.
67 class TabConstraint : public ELIST_LINK {
68  public:
69   TabConstraint() {
70     // This empty constructor is here only so that the class can be ELISTIZED.
71     // TODO(rays) change deep_copy in elst.h line 955 to take a callback copier
72     // and eliminate CLASSNAME##_copier.
73   }
74 
75   // Create a constraint for the top or bottom of this TabVector.
76   static void CreateConstraint(TabVector* vector, bool is_top);
77 
78   // Test to see if the constraints are compatible enough to merge.
79   static bool CompatibleConstraints(TabConstraint_LIST* list1,
80                                     TabConstraint_LIST* list2);
81 
82   // Merge the lists of constraints and update the TabVector pointers.
83   // The second list is deleted.
84   static void MergeConstraints(TabConstraint_LIST* list1,
85                                TabConstraint_LIST* list2);
86 
87   // Set all the tops and bottoms as appropriate to a mean of the
88   // constrained range. Delete all the constraints and list.
89   static void ApplyConstraints(TabConstraint_LIST* constraints);
90 
91  private:
92   TabConstraint(TabVector* vector, bool is_top);
93 
94   // Get the max of the mins and the min of the maxes.
95   static void GetConstraints(TabConstraint_LIST* constraints,
96                              int* y_min, int* y_max);
97 
98   // The TabVector this constraint applies to.
99   TabVector* vector_;
100   // If true then we refer to the top of the vector_.
101   bool is_top_;
102   // The allowed range of this vector_.
103   int y_min_;
104   int y_max_;
105 };
106 
107 // Class to hold information about a single vector
108 // that represents a tab stop or a rule line.
109 class TabVector : public ELIST2_LINK {
110  public:
111   TabVector() {
112     // TODO(rays) fix this in elst.h line 1076, where it should use the
113     // copy constructor instead of operator=.
114   }
115   ~TabVector();
116 
117   // Public factory to build a TabVector from a list of boxes.
118   // The TabVector will be of the given alignment type.
119   // The input vertical vector is used in fitting, and the output
120   // vertical_x, vertical_y have the resulting line vector added to them
121   // if the alignment is not ragged.
122   // The extended_start_y and extended_end_y are the maximum possible
123   // extension to the line segment that can be used to align with others.
124   // The input CLIST of BLOBNBOX good_points is consumed and taken over.
125   static TabVector* FitVector(TabAlignment alignment, ICOORD vertical,
126                               int  extended_start_y, int extended_end_y,
127                               BLOBNBOX_CLIST* good_points,
128                               int* vertical_x, int* vertical_y);
129 
130   // Build a ragged TabVector by copying another's direction, shifting it
131   // to match the given blob, and making its initial extent the height
132   // of the blob, but its extended bounds from the bounds of the original.
133   TabVector(const TabVector& src, TabAlignment alignment,
134             const ICOORD& vertical_skew, BLOBNBOX* blob);
135 
136   // Simple accessors.
137   const ICOORD& startpt() const {
138     return startpt_;
139   }
140   const ICOORD& endpt() const {
141     return endpt_;
142   }
143   int extended_ymax() const {
144     return extended_ymax_;
145   }
146   int extended_ymin() const {
147     return extended_ymin_;
148   }
149   int sort_key() const {
150     return sort_key_;
151   }
152   void set_top_constraints(TabConstraint_LIST* constraints) {
153     top_constraints_ = constraints;
154   }
155   void set_bottom_constraints(TabConstraint_LIST* constraints) {
156     bottom_constraints_ = constraints;
157   }
158   TabVector_CLIST* partners() {
159     return &partners_;
160   }
161 
162   // Inline quasi-accessors that require some computation.
163 
164   // Compute the x coordinate at the given y coordinate.
165   int XAtY(int y) const {
166     int height = endpt_.y() - startpt_.y();
167     if (height != 0)
168       return (y - startpt_.y()) * (endpt_.x() - startpt_.x()) / height +
169              startpt_.x();
170     else
171       return startpt_.x();
172   }
173 
174   // Compute the vertical overlap with the other TabVector.
175   int VOverlap(const TabVector& other) const {
176     return MIN(other.endpt_.y(), endpt_.y()) -
177            MAX(other.startpt_.y(), startpt_.y());
178   }
179   // Compute the vertical overlap with the given y bounds.
180   int VOverlap(int top_y, int bottom_y) const {
181     return MIN(top_y, endpt_.y()) - MAX(bottom_y, startpt_.y());
182   }
183   // Compute the extended vertical overlap with the given y bounds.
184   int ExtendedOverlap(int top_y, int bottom_y) const {
185     return MIN(top_y, extended_ymax_) - MAX(bottom_y, extended_ymin_);
186   }
187 
188   // Return true if this is a left tab stop, either aligned, or ragged.
189   bool IsLeftTab() const {
190     return alignment_ == TA_LEFT_ALIGNED || alignment_ == TA_LEFT_RAGGED;
191   }
192   // Return true if this is a right tab stop, either aligned, or ragged.
193   bool IsRightTab() const {
194     return alignment_ == TA_RIGHT_ALIGNED || alignment_ == TA_RIGHT_RAGGED;
195   }
196   // Return true if this is a separator.
197   bool IsSeparator() const {
198     return alignment_ == TA_SEPARATOR;
199   }
200   // Return true if this is a ragged tab top, either left or right.
201   bool IsRagged() const {
202     return alignment_ == TA_LEFT_RAGGED || alignment_ == TA_RIGHT_RAGGED;
203   }
204 
205   // Return true if this vector is to the left of the other in terms
206   // of sort_key_.
207   bool IsLeftOf(const TabVector& other) const {
208     return sort_key_ < other.sort_key_;
209   }
210 
211   // Return true if the vector has no partners.
212   bool Partnerless() {
213     return partners_.empty();
214   }
215 
216   // Return the number of tab boxes in this vector.
217   int BoxCount() {
218     return boxes_.length();
219   }
220 
221   // Lock the vector from refits by clearing the boxes_ list.
222   void Freeze() {
223     boxes_.shallow_clear();
224   }
225 
226   // Flip x and y on the ends so a vector can be created from flipped input.
227   void XYFlip() {
228     int x = startpt_.y();
229     startpt_.set_y(startpt_.x());
230     startpt_.set_x(x);
231     x = endpt_.y();
232     endpt_.set_y(endpt_.x());
233     endpt_.set_x(x);
234   }
235 
236   // Separate function to compute the sort key for a given coordinate pair.
237   static int SortKey(const ICOORD& vertical, int x, int y) {
238     ICOORD pt(x, y);
239     return pt * vertical;
240   }
241 
242   // Return the x at the given y for the given sort key.
243   static int XAtY(const ICOORD& vertical, int sort_key, int y) {
244     if (vertical.y() != 0)
245       return (vertical.x() * y + sort_key) / vertical.y();
246     else
247       return sort_key;
248   }
249 
250   // Sort function for E2LIST::sort to sort by sort_key_.
251   static int SortVectorsByKey(const void* v1, const void* v2) {
252     const TabVector* tv1 = *reinterpret_cast<const TabVector* const *>(v1);
253     const TabVector* tv2 = *reinterpret_cast<const TabVector* const *>(v2);
254     return tv1->sort_key_ - tv2->sort_key_;
255   }
256 
257   // More complex members.
258 
259   // Extend this vector to include the supplied blob if it doesn't
260   // already have it.
261   void ExtendToBox(BLOBNBOX* blob);
262 
263   // Set the ycoord of the start and move the xcoord to match.
264   void SetYStart(int start_y);
265   // Set the ycoord of the end and move the xcoord to match.
266   void SetYEnd(int end_y);
267 
268   // Rotate the ends by the given vector.
269   void Rotate(const FCOORD& rotation);
270 
271   // Setup the initial constraints, being the limits of
272   // the vector and the extended ends.
273   void SetupConstraints();
274 
275   // Setup the constraints between the partners of this TabVector.
276   void SetupPartnerConstraints();
277 
278   // Setup the constraints between this and its partner.
279   void SetupPartnerConstraints(TabVector* partner);
280 
281   // Use the constraints to modify the top and bottom.
282   void ApplyConstraints();
283 
284   // Merge close tab vectors of the same side that overlap.
285   static void MergeSimilarTabVectors(const ICOORD& vertical,
286                                      TabVector_LIST* vectors, BlobGrid* grid);
287 
288   // Return true if this vector is the same side, overlaps, and close
289   // enough to the other to be merged.
290   bool SimilarTo(const ICOORD& vertical,
291                  const TabVector& other, BlobGrid* grid) const;
292 
293   // Eat the other TabVector into this and delete it.
294   void MergeWith(const ICOORD& vertical, TabVector* other);
295 
296   // Add a new element to the list of partner TabVectors.
297   // Partners must be added in order of increasing y coordinate of the text line
298   // that makes them partners.
299   // Groups of identical partners are merged into one.
300   void AddPartner(TabVector* partner);
301 
302   // Return true if other is a partner of this.
303   bool IsAPartner(const TabVector* other);
304 
305   // Print basic information about this tab vector.
306   void Print(const char* prefix);
307 
308   // Print basic information about this tab vector and every box in it.
309   void Debug(const char* prefix);
310 
311   // Draw this tabvector in place in the given window.
312   void Display(ScrollView* tab_win);
313 
314   // Refit the line and/or re-evaluate the vector if the dirty flags are set.
315   void FitAndEvaluateIfNeeded(const ICOORD& vertical, TabFind* finder);
316 
317   // Evaluate the vector in terms of coverage of its length by good-looking
318   // box edges. A good looking box is one where its nearest neighbour on the
319   // inside is nearer than half the distance its nearest neighbour on the
320   // outside of the putative column. Bad boxes are removed from the line.
321   // A second pass then further filters boxes by requiring that the gutter
322   // width be a minimum fraction of the mean gutter along the line.
323   void Evaluate(const ICOORD& vertical, TabFind* finder);
324 
325   // (Re)Fit a line to the stored points. Returns false if the line
326   // is degenerate.
327   bool Fit(ICOORD vertical, bool force_parallel);
328 
329   // Return the partner of this TabVector if the vector qualifies as
330   // being a vertical text line, otherwise NULL.
331   TabVector* VerticalTextlinePartner();
332 
333  private:
334   // Constructor is private as the static factory is the external way
335   // to build a TabVector.
336   TabVector(int extended_ymin, int extended_ymax,
337             TabAlignment alignment, BLOBNBOX_CLIST* boxes);
338 
339   // Delete this, but first, repoint all the partners to point to
340   // replacement. If replacement is NULL, then partner relationships
341   // are removed.
342   void Delete(TabVector* replacement);
343 
344  private:
345   // The bottom of the tab line.
346   ICOORD startpt_;
347   // The top of the tab line.
348   ICOORD endpt_;
349   // The lowest y that the vector might extend to.
350   int extended_ymin_;
351   // The highest y that the vector might extend to.
352   int extended_ymax_;
353   // Perpendicular distance of vector from a given vertical for sorting.
354   int sort_key_;
355   // Result of Evaluate 0-100. Coverage of line with good boxes.
356   int percent_score_;
357   // True if the boxes_ list has been modified, so a refit is needed.
358   bool needs_refit_;
359   // True if a fit has been done, so re-evaluation is needed.
360   bool needs_evaluation_;
361   // The type of this TabVector.
362   TabAlignment alignment_;
363   // The list of boxes whose edges are aligned at this TabVector.
364   BLOBNBOX_CLIST boxes_;
365   // List of TabVectors that have a connection with this via a text line.
366   TabVector_CLIST partners_;
367   // Constraints used to resolve the exact location of the top and bottom
368   // of the tab line.
369   TabConstraint_LIST* top_constraints_;
370   TabConstraint_LIST* bottom_constraints_;
371 };
372 
373 }  // namespace tesseract.
374 
375 #endif  // TESSERACT_TEXTORD_TABVECTOR_H__
376 
377 
378