1 ///////////////////////////////////////////////////////////////////////
2 // File: linefind.cpp
3 // Description: Class to find vertical lines in an image and create
4 // a corresponding list of empty blobs.
5 // Author: Ray Smith
6 // Created: Thu Mar 20 09:49:01 PDT 2008
7 //
8 // (C) Copyright 2008, Google Inc.
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 // http://www.apache.org/licenses/LICENSE-2.0
13 // Unless required by applicable law or agreed to in writing, software
14 // distributed under the License is distributed on an "AS IS" BASIS,
15 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 // See the License for the specific language governing permissions and
17 // limitations under the License.
18 //
19 ///////////////////////////////////////////////////////////////////////
20
21 #include "linefind.h"
22 #include "alignedblob.h"
23 #include "tabvector.h"
24 #include "blobbox.h"
25 #include "edgblob.h"
26 // This entire file is dependent upon leptonica. If you don't have it,
27 // then the code doesn't do anything useful.
28 #ifdef HAVE_CONFIG_H
29 #include "config_auto.h"
30 #endif
31 #ifdef HAVE_LIBLEPT
32 #include "allheaders.h"
33 #endif
34
35 BOOL_VAR(textord_tabfind_show_vlines, false, "Show vertical rule lines");
36
37 namespace tesseract {
38
39 // Denominator of resolution makes max pixel width to allow thin lines.
40 const int kThinLineFraction = 30;
41 // Denominator of resolution makes min pixels to demand line lengths to be.
42 const int kMinLineLengthFraction = 8;
43 // Spacing of cracks across the page to break up tall vertical lines.
44 const int kCrackSpacing = 100;
45 // Grid size used by line finder. Not very critical.
46 const int kLineFindGridSize = 50;
47
48 // Finds vertical line objects in the given pix.
49 // Uses the given resolution to determine size thresholds instead of any
50 // that may be present in the pix.
51 // The output vertical_x and vertical_y contain a sum of the output vectors,
52 // thereby giving the mean vertical direction.
53 // The output vectors are owned by the list and Frozen (cannot refit) by
54 // having no boxes, as there is no need to refit or merge separator lines.
FindVerticalLines(int resolution,Pix * pix,int * vertical_x,int * vertical_y,TabVector_LIST * vectors)55 void LineFinder::FindVerticalLines(int resolution, Pix* pix,
56 int* vertical_x, int* vertical_y,
57 TabVector_LIST* vectors) {
58 #ifdef HAVE_LIBLEPT
59 Pix* line_pix;
60 Boxa* boxes = GetVLineBoxes(resolution, pix, &line_pix);
61 C_BLOB_LIST line_cblobs;
62 int width = pixGetWidth(pix);
63 int height = pixGetHeight(pix);
64 ConvertBoxaToBlobs(width, height, &boxes, &line_cblobs);
65 // Make the BLOBNBOXes from the C_BLOBs.
66 BLOBNBOX_LIST line_bblobs;
67 C_BLOB_IT blob_it(&line_cblobs);
68 BLOBNBOX_IT bbox_it(&line_bblobs);
69 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
70 C_BLOB* cblob = blob_it.data();
71 BLOBNBOX* bblob = new BLOBNBOX(cblob);
72 bbox_it.add_to_end(bblob);
73 }
74 ICOORD bleft(0, 0);
75 ICOORD tright(width, height);
76 FindLineVectors(bleft, tright, &line_bblobs, vertical_x, vertical_y, vectors);
77 if (!vectors->empty()) {
78 // Some lines were found, so erase the unused blobs from the line image
79 // and then subtract the line image from the source.
80 bbox_it.move_to_first();
81 for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) {
82 BLOBNBOX* blob = bbox_it.data();
83 if (blob->left_tab_type() == TT_UNCONFIRMED) {
84 const TBOX& box = blob->bounding_box();
85 Box* pixbox = boxCreate(box.left(), height - box.top(),
86 box.width(), box.height());
87 pixClearInRect(line_pix, pixbox);
88 boxDestroy(&pixbox);
89 }
90 }
91 pixDilateBrick(line_pix, line_pix, 1, 3);
92 pixSubtract(pix, pix, line_pix);
93 if (textord_tabfind_show_vlines)
94 pixWrite("vlinesclean.png", line_pix, IFF_PNG);
95 ICOORD vertical;
96 vertical.set_with_shrink(*vertical_x, *vertical_y);
97 TabVector::MergeSimilarTabVectors(vertical, vectors, NULL);
98 }
99 pixDestroy(&line_pix);
100 #endif
101 }
102
103 // Finds horizontal line objects in the given pix.
104 // Uses the given resolution to determine size thresholds instead of any
105 // that may be present in the pix.
106 // The output vectors are owned by the list and Frozen (cannot refit) by
107 // having no boxes, as there is no need to refit or merge separator lines.
FindHorizontalLines(int resolution,Pix * pix,TabVector_LIST * vectors)108 void LineFinder::FindHorizontalLines(int resolution, Pix* pix,
109 TabVector_LIST* vectors) {
110 #ifdef HAVE_LIBLEPT
111 Pix* line_pix;
112 Boxa* boxes = GetHLineBoxes(resolution, pix, &line_pix);
113 C_BLOB_LIST line_cblobs;
114 int width = pixGetWidth(pix);
115 int height = pixGetHeight(pix);
116 ConvertBoxaToBlobs(height, width, &boxes, &line_cblobs);
117 // Make the BLOBNBOXes from the C_BLOBs.
118 BLOBNBOX_LIST line_bblobs;
119 C_BLOB_IT blob_it(&line_cblobs);
120 BLOBNBOX_IT bbox_it(&line_bblobs);
121 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
122 C_BLOB* cblob = blob_it.data();
123 BLOBNBOX* bblob = new BLOBNBOX(cblob);
124 bbox_it.add_to_end(bblob);
125 }
126 ICOORD bleft(0, 0);
127 ICOORD tright(height, width);
128 int vertical_x, vertical_y;
129 FindLineVectors(bleft, tright, &line_bblobs, &vertical_x, &vertical_y,
130 vectors);
131 if (!vectors->empty()) {
132 // Some lines were found, so erase the unused blobs from the line image
133 // and then subtract the line image from the source.
134 bbox_it.move_to_first();
135 for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) {
136 BLOBNBOX* blob = bbox_it.data();
137 if (blob->left_tab_type() == TT_UNCONFIRMED) {
138 const TBOX& box = blob->bounding_box();
139 // Coords are in tess format so filp x and y and then covert
140 // to leptonica by height -y.
141 Box* pixbox = boxCreate(box.bottom(), height - box.right(),
142 box.height(), box.width());
143 pixClearInRect(line_pix, pixbox);
144 boxDestroy(&pixbox);
145 }
146 }
147 pixDilateBrick(line_pix, line_pix, 3, 1);
148 pixSubtract(pix, pix, line_pix);
149 if (textord_tabfind_show_vlines)
150 pixWrite("hlinesclean.png", line_pix, IFF_PNG);
151 ICOORD vertical;
152 vertical.set_with_shrink(vertical_x, vertical_y);
153 TabVector::MergeSimilarTabVectors(vertical, vectors, NULL);
154 // Iterate the vectors to flip them.
155 TabVector_IT h_it(vectors);
156 for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) {
157 h_it.data()->XYFlip();
158 }
159 }
160 pixDestroy(&line_pix);
161 #endif
162 }
163
164 // Converts the Boxa array to a list of C_BLOB, getting rid of severely
165 // overlapping outlines and those that are children of a bigger one.
166 // The output is a list of C_BLOBs that are owned by the list.
167 // The C_OUTLINEs in the C_BLOBs contain no outline data - just empty
168 // bounding boxes. The Boxa is consumed and destroyed.
ConvertBoxaToBlobs(int image_width,int image_height,Boxa ** boxes,C_BLOB_LIST * blobs)169 void LineFinder::ConvertBoxaToBlobs(int image_width, int image_height,
170 Boxa** boxes, C_BLOB_LIST* blobs) {
171 #ifdef HAVE_LIBLEPT
172 C_OUTLINE_LIST outlines;
173 C_OUTLINE_IT ol_it = &outlines;
174 // Iterate the boxes to convert to outlines.
175 int nboxes = boxaGetCount(*boxes);
176 for (int i = 0; i < nboxes; ++i) {
177 l_int32 x, y, width, height;
178 boxaGetBoxGeometry(*boxes, i, &x, &y, &width, &height);
179 // Make a C_OUTLINE from the leptonica box. This is a bit of a hack,
180 // as there is no outline, just a bounding box, but with some very
181 // small changes to coutln.cpp, it works nicely.
182 ICOORD top_left(x, image_height - y);
183 ICOORD bot_right(x + width, image_height - (y + height));
184 CRACKEDGE startpt;
185 startpt.pos = top_left;
186 C_OUTLINE* outline = new C_OUTLINE(&startpt, top_left, bot_right, 0);
187 ol_it.add_after_then_move(outline);
188 }
189 // Use outlines_to_blobs to convert the outlines to blobs and find
190 // overlapping and contained objects. The output list of blobs in the block
191 // has all the bad ones filtered out and deleted.
192 BLOCK block;
193 ICOORD page_tl(0, 0);
194 ICOORD page_br(image_width, image_height);
195 outlines_to_blobs(&block, page_tl, page_br, &outlines);
196 // Transfer the created blobs to the output list.
197 C_BLOB_IT blob_it(blobs);
198 blob_it.add_list_after(block.blob_list());
199 // The boxes aren't needed any more.
200 boxaDestroy(boxes);
201 #endif
202 }
203
204 // Finds vertical lines in the given list of BLOBNBOXes. bleft and tright
205 // are the bounds of the image on which the input line_bblobs were found.
206 // The input line_bblobs list is const really.
207 // The output vertical_x and vertical_y are the total of all the vectors.
208 // The output list of TabVector makes no reference to the input BLOBNBOXes.
FindLineVectors(const ICOORD & bleft,const ICOORD & tright,BLOBNBOX_LIST * line_bblobs,int * vertical_x,int * vertical_y,TabVector_LIST * vectors)209 void LineFinder::FindLineVectors(const ICOORD& bleft, const ICOORD& tright,
210 BLOBNBOX_LIST* line_bblobs,
211 int* vertical_x, int* vertical_y,
212 TabVector_LIST* vectors) {
213 BLOBNBOX_IT bbox_it(line_bblobs);
214 int b_count = 0;
215 // Put all the blobs into the grid to find the lines, and move the blobs
216 // to the output lists.
217 AlignedBlob blob_grid(kLineFindGridSize, bleft, tright);
218 for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) {
219 BLOBNBOX* bblob = bbox_it.data();
220 bblob->set_left_tab_type(TT_UNCONFIRMED);
221 bblob->set_left_rule(bleft.x());
222 bblob->set_right_rule(tright.x());
223 bblob->set_left_crossing_rule(bleft.x());
224 bblob->set_right_crossing_rule(tright.x());
225 blob_grid.InsertBBox(false, true, bblob);
226 ++b_count;
227 }
228 if (textord_debug_tabfind)
229 tprintf("Inserted %d line blobs into grid\n", b_count);
230 if (b_count == 0)
231 return;
232
233 // Search the entire grid, looking for vertical line vectors.
234 GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> lsearch(&blob_grid);
235 BLOBNBOX* bbox;
236 TabVector_IT vector_it(vectors);
237 *vertical_x = 0;
238 *vertical_y = 1;
239 lsearch.StartFullSearch();
240 while ((bbox = lsearch.NextFullSearch()) != NULL) {
241 if (bbox->left_tab_type() == TT_UNCONFIRMED) {
242 const TBOX& box = bbox->bounding_box();
243 if (AlignedBlob::WithinTestRegion(2, box.left(), box.bottom()))
244 tprintf("Finding line vector starting at bbox (%d,%d)\n",
245 box.left(), box.bottom());
246 AlignedBlobParams align_params(*vertical_x, *vertical_y, box.width());
247 TabVector* vector = blob_grid.FindVerticalAlignment(align_params, bbox,
248 vertical_x,
249 vertical_y);
250 if (vector != NULL) {
251 vector->Freeze();
252 vector_it.add_to_end(vector);
253 }
254 }
255 }
256 ScrollView* line_win = NULL;
257 if (textord_tabfind_show_vlines) {
258 line_win = blob_grid.MakeWindow(0, 50, "Vlines");
259 blob_grid.DisplayBoxes(line_win);
260 line_win = blob_grid.DisplayTabs("Vlines", line_win);
261 }
262 }
263
264 // Get a set of bounding boxes of possible vertical lines in the image.
265 // The input resolution overrides any resolution set in src_pix.
266 // The output line_pix contains just all the detected lines.
GetVLineBoxes(int resolution,Pix * src_pix,Pix ** line_pix)267 Boxa* LineFinder::GetVLineBoxes(int resolution, Pix* src_pix, Pix** line_pix) {
268 #ifdef HAVE_LIBLEPT
269 // Remove any parts of 1 inch/kThinLineFraction wide or more, by opening
270 // away the thin lines and subtracting what's left.
271 // This is very generous and will leave in even quite wide lines.
272 Pix* pixt1 = pixOpenBrick(NULL, src_pix, resolution / kThinLineFraction, 1);
273 pixSubtract(pixt1, src_pix, pixt1);
274 // Spread sideways to allow for some skew.
275 Pix* pixt2 = pixDilateBrick(NULL, pixt1, 3, 1);
276 // Now keep only tall stuff of height at least 1 inch/kMinLineLengthFraction.
277 pixOpenBrick(pixt1, pixt2, 1, resolution / kMinLineLengthFraction);
278 pixDestroy(&pixt2);
279 // Put a single pixel crack in every line at an arbitrary spacing,
280 // so they break up and the bounding boxes can be used to get the
281 // direction accurately enough without needing outlines.
282 int wpl = pixGetWpl(pixt1);
283 int height = pixGetHeight(pixt1);
284 l_uint32* data = pixGetData(pixt1);
285 for (int y = kCrackSpacing; y < height; y += kCrackSpacing) {
286 memset(data + wpl * y, 0, wpl * sizeof(*data));
287 }
288 if (textord_tabfind_show_vlines)
289 pixWrite("vlines.png", pixt1, IFF_PNG);
290 Boxa* boxa = pixConnComp(pixt1, NULL, 8);
291 *line_pix = pixt1;
292 return boxa;
293 #else
294 return NULL;
295 #endif
296 }
297
298 // Get a set of bounding boxes of possible horizontal lines in the image.
299 // The input resolution overrides any resolution set in src_pix.
300 // The output line_pix contains just all the detected lines.
301 // The output boxes undergo the transformation (x,y)->(height-y,x) so the
302 // lines can be found with a vertical line finder afterwards.
303 // This transformation allows a simple x/y flip to reverse it in tesseract
304 // coordinates and it is faster to flip the lines than rotate the image.
GetHLineBoxes(int resolution,Pix * src_pix,Pix ** line_pix)305 Boxa* LineFinder::GetHLineBoxes(int resolution, Pix* src_pix, Pix** line_pix) {
306 #ifdef HAVE_LIBLEPT
307 // Remove any parts of 1 inch/kThinLineFraction high or more, by opening
308 // away the thin lines and subtracting what's left.
309 // This is very generous and will leave in even quite wide lines.
310 Pix* pixt1 = pixOpenBrick(NULL, src_pix, 1, resolution / kThinLineFraction);
311 pixSubtract(pixt1, src_pix, pixt1);
312 // Spread vertically to allow for some skew.
313 Pix* pixt2 = pixDilateBrick(NULL, pixt1, 1, 3);
314 // Now keep only wide stuff of width at least 1 inch/kMinLineLengthFraction.
315 pixOpenBrick(pixt1, pixt2, resolution / kMinLineLengthFraction, 1);
316 pixDestroy(&pixt2);
317 // Put a single pixel crack in every line at an arbitrary spacing,
318 // so they break up and the bounding boxes can be used to get the
319 // direction accurately enough without needing outlines.
320 int wpl = pixGetWpl(pixt1);
321 int width = pixGetWidth(pixt1);
322 int height = pixGetHeight(pixt1);
323 l_uint32* data = pixGetData(pixt1);
324 for (int y = 0; y < height; ++y, data += wpl) {
325 for (int x = kCrackSpacing; x < width; x += kCrackSpacing) {
326 CLEAR_DATA_BIT(data, x);
327 }
328 }
329 if (textord_tabfind_show_vlines)
330 pixWrite("hlines.png", pixt1, IFF_PNG);
331 Boxa* boxa = pixConnComp(pixt1, NULL, 8);
332 *line_pix = pixt1;
333
334 // Iterate the boxes to flip x and y.
335 int nboxes = boxaGetCount(boxa);
336 for (int i = 0; i < nboxes; ++i) {
337 l_int32 x, y, box_width, box_height;
338 boxaGetBoxGeometry(boxa, i, &x, &y, &box_width, &box_height);
339 Box* box = boxCreate(height - (y + box_height),
340 width - (x + box_width), box_height, box_width);
341 boxaReplaceBox(boxa, i, box);
342 }
343 return boxa;
344 #else
345 return NULL;
346 #endif
347 }
348
349 } // namespace tesseract.
350
351