1 /////////////////////////////////////////////////////////////////////// 2 // File: alignedblob.h 3 // Description: A class to find vertically aligned blobs in a BBGrid, 4 // and a struct to hold control parameters. 5 // Author: Ray Smith 6 // Created: Fri Mar 21 15:03:01 PST 2008 7 // 8 // (C) Copyright 2008, Google Inc. 9 // Licensed under the Apache License, Version 2.0 (the "License"); 10 // you may not use this file except in compliance with the License. 11 // You may obtain a copy of the License at 12 // http://www.apache.org/licenses/LICENSE-2.0 13 // Unless required by applicable law or agreed to in writing, software 14 // distributed under the License is distributed on an "AS IS" BASIS, 15 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 // See the License for the specific language governing permissions and 17 // limitations under the License. 18 // 19 /////////////////////////////////////////////////////////////////////// 20 21 #ifndef TESSERACT_TEXTORD_ALIGNEDBLOB_H__ 22 #define TESSERACT_TEXTORD_ALIGNEDBLOB_H__ 23 24 #include "bbgrid.h" 25 #include "blobbox.h" 26 #include "strngs.h" 27 #include "tabvector.h" 28 29 extern INT_VAR_H(textord_debug_bugs, 0, 30 "Turn on output related to bugs in tab finding"); 31 extern INT_VAR_H(textord_debug_tabfind, 2, "Debug tab finding"); 32 extern BOOL_VAR_H(textord_debug_images, false, 33 "Use greyed image background for debug"); 34 extern BOOL_VAR_H(textord_debug_printable, false, 35 "Make debug windows printable"); 36 37 namespace tesseract { 38 39 // Simple structure to hold the search parameters for AlignedBlob. 40 // The members are mostly derived from constants, which are 41 // conditioned on the alignment parameter. 42 // For finding vertical lines, a different set of constants are 43 // used, conditioned on the different constructor. 44 struct AlignedBlobParams { 45 // Constructor to set the parameters for finding aligned and ragged tabs. 46 // Vertical_x and vertical_y are the current estimates of the true vertical 47 // direction (up) in the image. Height is the height of the starter blob. 48 // v_gap_multiple is the multiple of height that will be used as a limit 49 // on vertical gap before giving up and calling the line ended. 50 // resolution is the original image resolution, and align0 indicates the 51 // type of tab stop to be found. 52 AlignedBlobParams(int vertical_x, int vertical_y, int height, 53 int v_gap_multiple, int resolution, 54 TabAlignment alignment0); 55 // Constructor to set the parameters for finding vertical lines. 56 // Vertical_x and vertical_y are the current estimates of the true vertical 57 // direction (up) in the image. Width is the width of the starter blob. 58 AlignedBlobParams(int vertical_x, int vertical_y, int width); 59 60 // Fit the vertical vector into an ICOORD, which is 16 bit. 61 void set_vertical(int vertical_x, int vertical_y); 62 63 double gutter_fraction; // Multiple of height used for min_gutter. 64 bool right_tab; // We are looking at right edges. 65 bool ragged; // We are looking for a ragged (vs aligned) edge. 66 TabAlignment alignment; // The type we are trying to produce. 67 TabType confirmed_type; // Type to flag blobs if accepted. 68 int max_v_gap; // Max vertical gap to be tolerated. 69 int min_gutter; // Minimum gutter between columns. 70 // Tolerances allowed on horizontal alignment of aligned edges. 71 int l_align_tolerance; // Left edges. 72 int r_align_tolerance; // Right edges. 73 // Conditions for accepting a line. 74 int min_points; // Minimum number of points to be OK. 75 int min_length; // Min length of completed line. 76 77 ICOORD vertical; // Current estimate of logical vertical. 78 }; 79 80 // The AlignedBlob class contains code to find vertically aligned blobs. 81 // This is factored out into a separate class, so it can be used by both 82 // vertical line finding (LineFind) and tabstop finding (TabFind). 83 class AlignedBlob : public BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> { 84 public: 85 AlignedBlob(int gridsize, const ICOORD& bleft, const ICOORD& tright); 86 virtual ~AlignedBlob(); 87 88 // Return true if the given coordinates are within the test rectangle 89 // and the debug level is at least the given detail level. 90 static bool WithinTestRegion(int detail_level, int x, int y); 91 92 // Display the tab codes of the BLOBNBOXes in this grid. 93 ScrollView* DisplayTabs(const char* window_name, ScrollView* tab_win); 94 95 // Finds a vector corresponding to a set of vertically aligned blob edges 96 // running through the given box. The type of vector returned and the 97 // search parameters are determined by the AlignedBlobParams. 98 // vertical_x and y are updated with an estimate of the real 99 // vertical direction. (skew finding.) 100 // Returns NULL if no decent vector can be found. 101 TabVector* FindVerticalAlignment(AlignedBlobParams align_params, 102 BLOBNBOX* bbox, 103 int* vertical_x, int* vertical_y); 104 105 // Increment the serial number counter and set the string to use 106 // for a filename if textord_debug_images is true. 107 static void IncrementDebugPix(); 108 109 // Return the string to use for a filename if textord_debug_images is true. 110 // Use IncrementDebugPix first to set the filename, and each time is 111 // to be incremented. textord_debug_pix()112 static const STRING& textord_debug_pix() { 113 return textord_debug_pix_; 114 } 115 116 private: 117 // Find a set of blobs that are aligned in the given vertical 118 // direction with the given blob. Returns a list of aligned 119 // blobs and the number in the list. 120 // For other parameters see FindAlignedBlob below. 121 int AlignTabs(const AlignedBlobParams& params, 122 bool top_to_bottom, BLOBNBOX* bbox, 123 BLOBNBOX_CLIST* good_points, int* end_y); 124 125 // Search vertically for a blob that is aligned with the input bbox. 126 // The search parameters are determined by AlignedBlobParams. 127 // top_to_bottom tells whether to search down or up. 128 // The return value is NULL if nothing was found in the search box 129 // or if a blob was found in the gutter. On a NULL return, end_y 130 // is set to the edge of the search box or the leading edge of the 131 // gutter blob if one was found. 132 BLOBNBOX* FindAlignedBlob(const AlignedBlobParams& p, 133 bool top_to_bottom, BLOBNBOX* bbox, 134 int x_start, int* end_y); 135 136 // Name of image file to use if textord_debug_images is true. 137 static STRING textord_debug_pix_; 138 // Index to image file to use if textord_debug_images is true. 139 static int debug_pix_index_; 140 }; 141 142 } // namespace tesseract. 143 144 #endif // TESSERACT_TEXTORD_ALIGNEDBLOB_H__ 145 146