• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 ///////////////////////////////////////////////////////////////////////
2 // File:        imagefind.cpp
3 // Description: Function to find image and drawing regions in an image
4 //              and create a corresponding list of empty blobs.
5 // Author:      Ray Smith
6 // Created:     Thu Mar 20 09:49:01 PDT 2008
7 //
8 // (C) Copyright 2008, Google Inc.
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 // http://www.apache.org/licenses/LICENSE-2.0
13 // Unless required by applicable law or agreed to in writing, software
14 // distributed under the License is distributed on an "AS IS" BASIS,
15 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 // See the License for the specific language governing permissions and
17 // limitations under the License.
18 //
19 ///////////////////////////////////////////////////////////////////////
20 
21 #include "imagefind.h"
22 #include "varable.h"
23 
24 // This entire file is dependent upon leptonica. If you don't have it,
25 // you don't get this functionality.
26 #ifdef HAVE_CONFIG_H
27 #include "config_auto.h"
28 #endif
29 #ifdef HAVE_LIBLEPT
30 #include "allheaders.h"
31 #endif
32 
33 BOOL_VAR(textord_tabfind_show_images, false, "Show image blobs");
34 
35 namespace tesseract {
36 
37 // Fraction of width or height of on pixels that can be discarded from a
38 // roughly rectangular image.
39 const double kMinRectangularFraction = 0.125;
40 // Fraction of width or height to consider image completely used.
41 const double kMaxRectangularFraction = 0.75;
42 // Fraction of width or height to allow transition from kMinRectangularFraction
43 // to kMaxRectangularFraction, equivalent to a dy/dx skew.
44 const double kMaxRectangularGradient = 0.1;  // About 6 degrees.
45 
46 // Finds image regions within the source pix (page image) and returns
47 // the image regions as a Boxa, Pixa pair, analgous to pixConnComp.
48 // The returned boxa, pixa may be NULL, meaning no images found.
49 // If not NULL, they must be destroyed by the caller.
FindImages(Pix * pix,Boxa ** boxa,Pixa ** pixa)50 void ImageFinder::FindImages(Pix* pix, Boxa** boxa, Pixa** pixa) {
51   *boxa = NULL;
52   *pixa = NULL;
53 
54 #ifdef HAVE_LIBLEPT
55   // Reduce by factor 2.
56   Pix *pixr = pixReduceRankBinaryCascade(pix, 1, 0, 0, 0);
57   pixDisplayWrite(pixr, textord_tabfind_show_images);
58 
59   // Get the halftone mask directly from Leptonica.
60   Pix *pixht2 = pixGenHalftoneMask(pixr, NULL, NULL,
61                                    textord_tabfind_show_images);
62   pixDestroy(&pixr);
63   if (pixht2 == NULL)
64     return;
65 
66   // Expand back up again.
67   Pix *pixht = pixExpandReplicate(pixht2, 2);
68   pixDisplayWrite(pixht, textord_tabfind_show_images);
69   pixDestroy(&pixht2);
70 
71   // Fill to capture pixels near the mask edges that were missed
72   Pix *pixt = pixSeedfillBinary(NULL, pixht, pix, 8);
73   pixOr(pixht, pixht, pixt);
74   pixDestroy(&pixt);
75 
76   // Eliminate lines and bars that may be joined to images.
77   Pix* pixfinemask = pixReduceRankBinaryCascade(pixht, 1, 1, 3, 3);
78   pixDilateBrick(pixfinemask, pixfinemask, 5, 5);
79   pixDisplayWrite(pixfinemask, textord_tabfind_show_images);
80   Pix* pixreduced = pixReduceRankBinaryCascade(pixht, 1, 1, 1, 1);
81   Pix* pixreduced2 = pixReduceRankBinaryCascade(pixreduced, 3, 3, 3, 0);
82   pixDestroy(&pixreduced);
83   pixDilateBrick(pixreduced2, pixreduced2, 5, 5);
84   Pix* pixcoarsemask = pixExpandReplicate(pixreduced2, 8);
85   pixDestroy(&pixreduced2);
86   pixDisplayWrite(pixcoarsemask, textord_tabfind_show_images);
87   // Combine the coarse and fine image masks.
88   pixAnd(pixcoarsemask, pixcoarsemask, pixfinemask);
89   pixDestroy(&pixfinemask);
90   // Dilate a bit to make sure we get everything.
91   pixDilateBrick(pixcoarsemask, pixcoarsemask, 3, 3);
92   Pix* pixmask = pixExpandReplicate(pixcoarsemask, 16);
93   pixDestroy(&pixcoarsemask);
94   pixDisplayWrite(pixmask, textord_tabfind_show_images);
95   // And the image mask with the line and bar remover.
96   pixAnd(pixht, pixht, pixmask);
97   pixDestroy(&pixmask);
98   pixDisplayWrite(pixht, textord_tabfind_show_images);
99   // Find the individual image regions in the mask image.
100   *boxa = pixConnComp(pixht, pixa, 8);
101   pixDestroy(&pixht);
102   // Rectangularize the individual images. If a sharp edge in vertical and/or
103   // horizontal occupancy can be found, it indicates a probably rectangular
104   // image with unwanted bits merged on, so clip to the approximate rectangle.
105   int npixes = pixaGetCount(*pixa);
106   for (int i = 0; i < npixes; ++i) {
107     int x_start, x_end, y_start, y_end;
108     Pix* img_pix = pixaGetPix(*pixa, i, L_CLONE);
109     pixDisplayWrite(img_pix, textord_tabfind_show_images);
110     if (pixNearlyRectangular(img_pix, kMinRectangularFraction,
111                              kMaxRectangularFraction,
112                              kMaxRectangularGradient,
113                              &x_start, &y_start, &x_end, &y_end)) {
114       // Add 1 to the size as a kludgy flag to indicate to the later stages
115       // of processing that it is a clipped rectangular image .
116       Pix* simple_pix = pixCreate(pixGetWidth(img_pix) + 1,
117                                   pixGetHeight(img_pix), 1);
118       pixDestroy(&img_pix);
119       pixRasterop(simple_pix, x_start, y_start, x_end - x_start,
120                   y_end - y_start, PIX_SET, NULL, 0, 0);
121       // pixaReplacePix takes ownership of the simple_pix.
122       pixaReplacePix(*pixa, i, simple_pix, NULL);
123       img_pix = pixaGetPix(*pixa, i, L_CLONE);
124     }
125     // Subtract the pix from the correct location in the master image.
126     l_int32 x, y, width, height;
127     pixDisplayWrite(img_pix, textord_tabfind_show_images);
128     boxaGetBoxGeometry(*boxa, i, &x, &y, &width, &height);
129     pixRasterop(pix, x, y, width, height, PIX_NOT(PIX_SRC) & PIX_DST,
130                 img_pix, 0, 0);
131     pixDestroy(&img_pix);
132   }
133 #endif
134 }
135 
136 #ifdef HAVE_LIBLEPT
137 // Scans horizontally on x=[x_start,x_end), starting with y=*y_start,
138 // stepping y+=y_step, until y=y_end. *ystart is input/output.
139 // If the number of black pixels in a row, pix_count fits this pattern:
140 // 0 or more rows with pix_count < min_count then
141 // <= mid_width rows with min_count <= pix_count <= max_count then
142 // a row with pix_count > max_count then
143 // true is returned, and *y_start = the first y with pix_count >= min_count.
HScanForEdge(uinT32 * data,int wpl,int x_start,int x_end,int min_count,int mid_width,int max_count,int y_end,int y_step,int * y_start)144 static bool HScanForEdge(uinT32* data, int wpl, int x_start, int x_end,
145                          int min_count, int mid_width, int max_count,
146                          int y_end, int y_step, int* y_start) {
147   int mid_rows = 0;
148   for (int y = *y_start; y != y_end; y += y_step) {
149     // Need pixCountPixelsInRow(pix, y, &pix_count, NULL) to count in a subset.
150     int pix_count = 0;
151     uinT32* line = data + wpl * y;
152     for (int x = x_start; x < x_end; ++x) {
153       if (GET_DATA_BIT(line, x))
154         ++pix_count;
155     }
156     if (mid_rows == 0 && pix_count < min_count)
157       continue;      // In the min phase.
158     if (mid_rows == 0)
159       *y_start = y;  // Save the y_start where we came out of the min phase.
160     if (pix_count > max_count)
161       return true;   // Found the pattern.
162     ++mid_rows;
163     if (mid_rows > mid_width)
164       break;         // Middle too big.
165   }
166   return false;      // Never found max_count.
167 }
168 
169 // Scans vertically on y=[y_start,y_end), starting with x=*x_start,
170 // stepping x+=x_step, until x=x_end. *x_start is input/output.
171 // If the number of black pixels in a column, pix_count fits this pattern:
172 // 0 or more cols with pix_count < min_count then
173 // <= mid_width cols with min_count <= pix_count <= max_count then
174 // a column with pix_count > max_count then
175 // true is returned, and *x_start = the first x with pix_count >= min_count.
VScanForEdge(uinT32 * data,int wpl,int y_start,int y_end,int min_count,int mid_width,int max_count,int x_end,int x_step,int * x_start)176 static bool VScanForEdge(uinT32* data, int wpl, int y_start, int y_end,
177                          int min_count, int mid_width, int max_count,
178                          int x_end, int x_step, int* x_start) {
179   int mid_cols = 0;
180   for (int x = *x_start; x != x_end; x += x_step) {
181     int pix_count = 0;
182     uinT32* line = data + y_start * wpl;
183     for (int y = y_start; y < y_end; ++y, line += wpl) {
184       if (GET_DATA_BIT(line, x))
185         ++pix_count;
186     }
187     if (mid_cols == 0 && pix_count < min_count)
188       continue;      // In the min phase.
189     if (mid_cols == 0)
190       *x_start = x;  // Save the place where we came out of the min phase.
191     if (pix_count > max_count)
192       return true;   // found the pattern.
193     ++mid_cols;
194     if (mid_cols > mid_width)
195       break;         // Middle too big.
196   }
197   return false;      // Never found max_count.
198 }
199 #endif
200 
201 // Returns true if there is a rectangle in the source pix, such that all
202 // pixel rows and column slices outside of it have less than
203 // min_fraction of the pixels black, and within max_skew_gradient fraction
204 // of the pixels on the inside, there are at least max_fraction of the
205 // pixels black. In other words, the inside of the rectangle looks roughly
206 // rectangular, and the outside of it looks like extra bits.
207 // On return, the rectangle is defined by x_start, y_start, x_end and y_end.
208 // Note: the algorithm is iterative, allowing it to slice off pixels from
209 // one edge, allowing it to then slice off more pixels from another edge.
pixNearlyRectangular(Pix * pix,double min_fraction,double max_fraction,double max_skew_gradient,int * x_start,int * y_start,int * x_end,int * y_end)210 bool ImageFinder::pixNearlyRectangular(Pix* pix,
211                                        double min_fraction, double max_fraction,
212                                        double max_skew_gradient,
213                                        int* x_start, int* y_start,
214                                        int* x_end, int* y_end) {
215 #ifdef HAVE_LIBLEPT
216   *x_start = 0;
217   *x_end = pixGetWidth(pix);
218   *y_start = 0;
219   *y_end = pixGetHeight(pix);
220 
221   uinT32* data = pixGetData(pix);
222   int wpl = pixGetWpl(pix);
223   bool any_cut = false;
224   bool left_done = false;
225   bool right_done = false;
226   bool top_done = false;
227   bool bottom_done = false;
228   do {
229     any_cut = false;
230     // Find the top/bottom edges.
231     int width = *x_end - *x_start;
232     int min_count = static_cast<int>(width * min_fraction);
233     int max_count = static_cast<int>(width * max_fraction);
234     int edge_width = static_cast<int>(width * max_skew_gradient);
235     if (!top_done && HScanForEdge(data, wpl, *x_start, *x_end,
236                                   min_count, edge_width, max_count,
237                                   *y_end, 1, y_start)) {
238       top_done = true;
239       any_cut = true;
240     }
241     --(*y_end);
242     if (!bottom_done && HScanForEdge(data, wpl, *x_start, *x_end,
243                                      min_count, edge_width, max_count,
244                                      *y_start, -1, y_end)) {
245       bottom_done = true;
246       any_cut = true;
247     }
248     ++(*y_end);
249 
250     // Find the left/right edges.
251     int height = *y_end - *y_start;
252     min_count = static_cast<int>(height * min_fraction);
253     max_count = static_cast<int>(height * max_fraction);
254     edge_width = static_cast<int>(height * max_skew_gradient);
255     if (!left_done && VScanForEdge(data, wpl, *y_start, *y_end,
256                                    min_count, edge_width, max_count,
257                                    *x_end, 1, x_start)) {
258       left_done = true;
259       any_cut = true;
260     }
261     --(*x_end);
262     if (!right_done && VScanForEdge(data, wpl, *y_start, *y_end,
263                                     min_count, edge_width, max_count,
264                                     *x_start, -1, x_end)) {
265       right_done = true;
266       any_cut = true;
267     }
268     ++(*x_end);
269   } while (any_cut);
270 
271   // All edges must satisfy the condition of sharp gradient in pixel density
272   // in order for the full rectangle to be present.
273   return left_done && right_done && top_done && bottom_done;
274 #else
275   return false;
276 #endif
277 }
278 
279 #ifdef HAVE_LIBLEPT
280 // Scanning rows horizontally on x=[x_start, x_end), returns the first y row
281 // starting at y_start, stepping by y_step to y_end in which there is
282 // any black pixel.
HScanForBlack(uinT32 * data,int wpl,int x_start,int x_end,int y_start,int y_end,int y_step)283 static int HScanForBlack(uinT32* data, int wpl, int x_start, int x_end,
284                          int y_start, int y_end, int y_step) {
285   for (int y = y_start; y != y_end; y += y_step) {
286     uinT32* line = data + wpl * y;
287     for (int x = x_start; x < x_end; ++x) {
288       if (GET_DATA_BIT(line, x))
289         return y;
290     }
291   }
292   return y_end;
293 }
294 
295 // Scanning columns vertically on y=[y_start, y_end), returns the first x
296 // colum starting at x_start, stepping by x_step to x_end in which there is
297 // any black pixel.
VScanForBlack(uinT32 * data,int wpl,int x_start,int x_end,int y_start,int y_end,int x_step)298 static int VScanForBlack(uinT32* data, int wpl, int x_start, int x_end,
299                          int y_start, int y_end, int x_step) {
300   for (int x = x_start; x != x_end; x += x_step) {
301     uinT32* line = data + y_start * wpl;
302     for (int y = y_start; y < y_end; ++y, line += wpl) {
303       if (GET_DATA_BIT(line, x))
304         return x;
305     }
306   }
307   return x_end;
308 }
309 #endif
310 
311 // Given an input pix, and a bounding rectangle, the sides of the rectangle
312 // are shrunk inwards until they bound any black pixels found within the
313 // original rectangle.
BoundsWithinRect(Pix * pix,int * x_start,int * y_start,int * x_end,int * y_end)314 void ImageFinder::BoundsWithinRect(Pix* pix, int* x_start, int* y_start,
315                                    int* x_end, int* y_end) {
316 #ifdef HAVE_LIBLEPT
317   // This can probably be done with a lot less code using pixClipRect and
318   // pixConnComp, but this code is probably a lot faster, given that most
319   // uses will be applied to a solid black region.
320   int width = pixGetWidth(pix);
321   int height = pixGetHeight(pix);
322   if (*x_start < 0) *x_start = 0;
323   if (*x_end > width) *x_end = width;
324   if (*y_start < 0) *y_start = 0;
325   if (*y_end > height) *y_end = height;
326 
327   uinT32* data = pixGetData(pix);
328   int wpl = pixGetWpl(pix);
329   *y_start = HScanForBlack(data, wpl, *x_start, *x_end, *y_start, *y_end, 1);
330   if (*y_end <= *y_start)
331     return;
332   *y_end = HScanForBlack(data, wpl, *x_start, *x_end,
333                          *y_end - 1, *y_start - 1, -1) + 1;
334   *x_start = VScanForBlack(data, wpl, *x_start, *x_end, *y_start, *y_end, 1);
335   *x_end = VScanForBlack(data, wpl, *x_end - 1, *x_start - 1,
336                          *y_start, *y_end, -1) + 1;
337 #endif
338 }
339 
340 }  // namespace tesseract.
341 
342