• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*====================================================================*
2  -  Copyright (C) 2001 Leptonica.  All rights reserved.
3  -  This software is distributed in the hope that it will be
4  -  useful, but with NO WARRANTY OF ANY KIND.
5  -  No author or distributor accepts responsibility to anyone for the
6  -  consequences of using this software, or for whether it serves any
7  -  particular purpose or works at all, unless he or she says so in
8  -  writing.  Everyone is granted permission to copy, modify and
9  -  redistribute this source code, for commercial or non-commercial
10  -  purposes, with the following restrictions: (1) the origin of this
11  -  source code must not be misrepresented; (2) modified versions must
12  -  be plainly marked as such; and (3) this notice may not be removed
13  -  or altered from any source or modified source distribution.
14  *====================================================================*/
15 
16 /*
17  *   pageseg.c
18  *
19  *      Top level page segmentation
20  *          l_int32   pixGetRegionsBinary()
21  *
22  *      Halftone region extraction
23  *          PIX      *pixGenHalftoneMask()
24  *
25  *      Textline extraction
26  *          PIX      *pixGenTextlineMask()
27  *
28  *      Textblock extraction
29  *          PIX      *pixGenTextblockMask()
30  */
31 
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include "allheaders.h"
35 
36 
37 /*------------------------------------------------------------------*
38  *                     Top level page segmentation                  *
39  *------------------------------------------------------------------*/
40 /*!
41  *  pixGetRegionsBinary()
42  *
43  *      Input:  pixs (1 bpp, assumed to be 300 to 400 ppi)
44  *              &pixhm (<optional return> halftone mask)
45  *              &pixtm (<optional return> textline mask)
46  *              &pixtb (<optional return> textblock mask)
47  *              debug (flag: set to 1 for debug output)
48  *      Return: 0 if OK, 1 on error
49  *
50  *  Notes:
51  *      (1) It is best to deskew the image before segmenting.
52  *      (2) The debug flag enables a number of outputs.  These
53  *          are included to show how to generate and save/display
54  *          these results.
55  */
56 l_int32
pixGetRegionsBinary(PIX * pixs,PIX ** ppixhm,PIX ** ppixtm,PIX ** ppixtb,l_int32 debug)57 pixGetRegionsBinary(PIX     *pixs,
58                     PIX    **ppixhm,
59                     PIX    **ppixtm,
60                     PIX    **ppixtb,
61                     l_int32  debug)
62 {
63 l_int32  htfound, tlfound;
64 PIX     *pixr, *pixt1, *pixt2;
65 PIX     *pixtext;  /* text pixels only */
66 PIX     *pixhm2;   /* halftone mask; 2x reduction */
67 PIX     *pixhm;    /* halftone mask;  */
68 PIX     *pixtm2;   /* textline mask; 2x reduction */
69 PIX     *pixtm;    /* textline mask */
70 PIX     *pixvws;   /* vertical white space mask */
71 PIX     *pixtb2;   /* textblock mask; 2x reduction */
72 PIX     *pixtbf2;  /* textblock mask; 2x reduction; small comps filtered */
73 PIX     *pixtb;    /* textblock mask */
74 
75     PROCNAME("pixGetRegionsBinary");
76 
77     if (ppixhm) *ppixhm = NULL;
78     if (ppixtm) *ppixtm = NULL;
79     if (ppixtb) *ppixtb = NULL;
80     if (!pixs)
81         return ERROR_INT("pixs not defined", procName, 1);
82     if (pixGetDepth(pixs) != 1)
83         return ERROR_INT("pixs not 1 bpp", procName, 1);
84 
85         /* 2x reduce, to 150 -200 ppi */
86     pixr = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0);
87     pixDisplayWrite(pixr, debug);
88 
89         /* Get the halftone mask */
90     pixhm2 = pixGenHalftoneMask(pixr, &pixtext, &htfound, debug);
91 
92         /* Get the textline mask from the text pixels */
93     pixtm2 = pixGenTextlineMask(pixtext, &pixvws, &tlfound, debug);
94 
95         /* Get the textblock mask from the textline mask */
96     pixtb2 = pixGenTextblockMask(pixtm2, pixvws, debug);
97     pixDestroy(&pixr);
98     pixDestroy(&pixtext);
99     pixDestroy(&pixvws);
100 
101         /* Remove small components from the mask, where a small
102          * component is defined as one with both width and height < 60 */
103     pixtbf2 = pixSelectBySize(pixtb2, 60, 60, 4, L_SELECT_IF_EITHER,
104                               L_SELECT_IF_GTE, NULL);
105     pixDestroy(&pixtb2);
106     pixDisplayWrite(pixtbf2, debug);
107 
108         /* Expand all masks to full resolution, and do filling or
109          * small dilations for better coverage. */
110     pixhm = pixExpandReplicate(pixhm2, 2);
111     pixt1 = pixSeedfillBinary(NULL, pixhm, pixs, 8);
112     pixOr(pixhm, pixhm, pixt1);
113     pixDestroy(&pixt1);
114     pixDisplayWrite(pixhm, debug);
115 
116     pixt1 = pixExpandReplicate(pixtm2, 2);
117     pixtm = pixDilateBrick(NULL, pixt1, 3, 3);
118     pixDestroy(&pixt1);
119     pixDisplayWrite(pixtm, debug);
120 
121     pixt1 = pixExpandReplicate(pixtbf2, 2);
122     pixtb = pixDilateBrick(NULL, pixt1, 3, 3);
123     pixDestroy(&pixt1);
124     pixDisplayWrite(pixtb, debug);
125 
126     pixDestroy(&pixhm2);
127     pixDestroy(&pixtm2);
128     pixDestroy(&pixtbf2);
129 
130         /* Debug: identify objects that are neither text nor halftone image */
131     if (debug) {
132         pixt1 = pixSubtract(NULL, pixs, pixtm);  /* remove text pixels */
133         pixt2 = pixSubtract(NULL, pixt1, pixhm);  /* remove halftone pixels */
134         pixDisplayWrite(pixt2, 1);
135         pixDestroy(&pixt1);
136         pixDestroy(&pixt2);
137     }
138 
139         /* Debug: display textline components with random colors */
140     if (debug) {
141         l_int32  w, h;
142         BOXA    *boxa;
143         PIXA    *pixa;
144         boxa = pixConnComp(pixtm, &pixa, 8);
145         pixGetDimensions(pixtm, &w, &h, NULL);
146         pixt1 = pixaDisplayRandomCmap(pixa, w, h);
147         pixcmapResetColor(pixGetColormap(pixt1), 0, 255, 255, 255);
148         pixDisplay(pixt1, 100, 100);
149         pixDisplayWrite(pixt1, 1);
150         pixaDestroy(&pixa);
151         boxaDestroy(&boxa);
152         pixDestroy(&pixt1);
153     }
154 
155         /* Debug: identify the outlines of each textblock */
156     if (debug) {
157         PIXCMAP  *cmap;
158         PTAA     *ptaa;
159         ptaa = pixGetOuterBordersPtaa(pixtb);
160 	ptaaWrite("/tmp/junk_tb_outlines.ptaa", ptaa, 1);
161         pixt1 = pixRenderRandomCmapPtaa(pixtb, ptaa, 1, 16, 1);
162         cmap = pixGetColormap(pixt1);
163         pixcmapResetColor(cmap, 0, 130, 130, 130);
164         pixDisplay(pixt1, 500, 100);
165         pixDisplayWrite(pixt1, 1);
166         pixDestroy(&pixt1);
167         ptaaDestroy(&ptaa);
168     }
169 
170         /* Debug: get b.b. for all mask components */
171     if (debug) {
172         BOXA  *bahm, *batm, *batb;
173         bahm = pixConnComp(pixhm, NULL, 4);
174         batm = pixConnComp(pixtm, NULL, 4);
175         batb = pixConnComp(pixtb, NULL, 4);
176         boxaWrite("junk_htmask.boxa", bahm);
177         boxaWrite("junk_textmask.boxa", batm);
178         boxaWrite("junk_textblock.boxa", batb);
179 	boxaDestroy(&bahm);
180 	boxaDestroy(&batm);
181 	boxaDestroy(&batb);
182     }
183 
184     if (ppixhm)
185         *ppixhm = pixhm;
186     else
187         pixDestroy(&pixhm);
188     if (ppixtm)
189         *ppixtm = pixtm;
190     else
191         pixDestroy(&pixtm);
192     if (ppixtb)
193         *ppixtb = pixtb;
194     else
195         pixDestroy(&pixtb);
196 
197     return 0;
198 }
199 
200 
201 /*------------------------------------------------------------------*
202  *                    Halftone region extraction                    *
203  *------------------------------------------------------------------*/
204 /*!
205  *  pixGenHalftoneMask()
206  *
207  *      Input:  pixs (1 bpp, assumed to be 150 to 200 ppi)
208  *              &pixtext (<optional return> text part of pixs)
209  *              &htfound (<optional return> 1 if the mask is not empty)
210  *              debug (flag: 1 for debug output)
211  *      Return: pixd (halftone mask), or null on error
212  */
213 PIX *
pixGenHalftoneMask(PIX * pixs,PIX ** ppixtext,l_int32 * phtfound,l_int32 debug)214 pixGenHalftoneMask(PIX      *pixs,
215                    PIX     **ppixtext,
216                    l_int32  *phtfound,
217                    l_int32   debug)
218 {
219 l_int32  empty;
220 PIX     *pixt1, *pixt2, *pixhs, *pixhm, *pixd;
221 
222     PROCNAME("pixGenHalftoneMask");
223 
224     if (ppixtext) *ppixtext = NULL;
225     if (!pixs)
226         return (PIX *)ERROR_PTR("pixs not defined", procName, NULL);
227     if (pixGetDepth(pixs) != 1)
228         return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, NULL);
229 
230         /* Compute seed for halftone parts at 8x reduction */
231     pixt1 = pixReduceRankBinaryCascade(pixs, 4, 4, 3, 0);
232     pixt2 = pixOpenBrick(NULL, pixt1, 5, 5);
233     pixhs = pixExpandReplicate(pixt2, 8);  /* back to 2x reduction */
234     pixDestroy(&pixt1);
235     pixDestroy(&pixt2);
236     pixDisplayWrite(pixhs, debug);
237 
238         /* Compute mask for connected regions */
239     pixhm = pixCloseSafeBrick(NULL, pixs, 4, 4);
240     pixDisplayWrite(pixhm, debug);
241 
242         /* Fill seed into mask to get halftone mask */
243     pixd = pixSeedfillBinary(NULL, pixhs, pixhm, 4);
244 
245 #if 0
246         /* Moderate opening to remove thin lines, etc. */
247     pixOpenBrick(pixd, pixd, 10, 10);
248     pixDisplayWrite(pixd, debug);
249 #endif
250 
251         /* Check if mask is empty */
252     pixZero(pixd, &empty);
253     if (phtfound) {
254         *phtfound = 0;
255         if (!empty)
256             *phtfound = 1;
257     }
258 
259         /* Optionally, get all pixels that are not under the halftone mask */
260     if (ppixtext) {
261         if (empty)
262             *ppixtext = pixCopy(NULL, pixs);
263         else
264             *ppixtext = pixSubtract(NULL, pixs, pixd);
265         pixDisplayWrite(*ppixtext, debug);
266     }
267 
268     pixDestroy(&pixhs);
269     pixDestroy(&pixhm);
270     return pixd;
271 }
272 
273 
274 /*------------------------------------------------------------------*
275  *                         Textline extraction                      *
276  *------------------------------------------------------------------*/
277 /*!
278  *  pixGenTextlineMask()
279  *
280  *      Input:  pixs (1 bpp, assumed to be 150 to 200 ppi)
281  *              &pixvws (<return> vertical whitespace mask)
282  *              &tlfound (<optional return> 1 if the mask is not empty)
283  *              debug (flag: 1 for debug output)
284  *      Return: pixd (textline mask), or null on error
285  *
286  *  Notes:
287  *      (1) The input pixs should be deskewed.
288  *      (2) pixs should have no halftone pixels.
289  *      (3) Both the input image and the returned textline mask
290  *          are at the same resolution.
291  */
292 PIX *
pixGenTextlineMask(PIX * pixs,PIX ** ppixvws,l_int32 * ptlfound,l_int32 debug)293 pixGenTextlineMask(PIX      *pixs,
294                    PIX     **ppixvws,
295                    l_int32  *ptlfound,
296                    l_int32   debug)
297 {
298 l_int32  empty;
299 PIX     *pixt1, *pixt2, *pixvws, *pixd;
300 
301     PROCNAME("pixGenTextlineMask");
302 
303     if (!pixs)
304         return (PIX *)ERROR_PTR("pixs not defined", procName, NULL);
305     if (!ppixvws)
306         return (PIX *)ERROR_PTR("&pixvws not defined", procName, NULL);
307     if (pixGetDepth(pixs) != 1)
308         return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, NULL);
309 
310         /* First we need a vertical whitespace mask.  Invert the image. */
311     pixt1 = pixInvert(NULL, pixs);
312 
313         /* The whitespace mask will break textlines where there
314          * is a large amount of white space below or above.
315          * This can be prevented by identifying regions of the
316          * inverted image that have large horizontal extent (bigger than
317 	 * the separation between columns) and significant
318          * vertical extent (bigger than the separation between
319 	 * textlines), and subtracting this from the bg. */
320     pixt2 = pixMorphCompSequence(pixt1, "o80.60", 0);
321     pixSubtract(pixt1, pixt1, pixt2);
322     pixDisplayWrite(pixt1, debug);
323     pixDestroy(&pixt2);
324 
325         /* Identify vertical whitespace by opening the remaining bg.
326          * o5.1 removes thin vertical bg lines and o1.200 extracts
327          * long vertical bg lines. */
328     pixvws = pixMorphCompSequence(pixt1, "o5.1 + o1.200", 0);
329     *ppixvws = pixvws;
330     pixDisplayWrite(pixvws, debug);
331     pixDestroy(&pixt1);
332 
333         /* Three steps to getting text line mask:
334          *   (1) close the characters and words in the textlines
335          *   (2) open the vertical whitespace corridors back up
336          *   (3) small opening to remove noise    */
337     pixt1 = pixCloseSafeBrick(NULL, pixs, 30, 1);
338     pixDisplayWrite(pixt1, debug);
339     pixd = pixSubtract(NULL, pixt1, pixvws);
340     pixOpenBrick(pixd, pixd, 3, 3);
341     pixDisplayWrite(pixd, debug);
342     pixDestroy(&pixt1);
343 
344         /* Check if text line mask is empty */
345     if (ptlfound) {
346         *ptlfound = 0;
347         pixZero(pixd, &empty);
348         if (!empty)
349             *ptlfound = 1;
350     }
351 
352     return pixd;
353 }
354 
355 
356 /*------------------------------------------------------------------*
357  *                       Textblock extraction                       *
358  *------------------------------------------------------------------*/
359 /*!
360  *  pixGenTextblockMask()
361  *
362  *      Input:  pixs (1 bpp, textline mask, assumed to be 150 to 200 ppi)
363  *              pixvws (vertical white space mask)
364  *              debug (flag: 1 for debug output)
365  *      Return: pixd (textblock mask), or null on error
366  *
367  *  Notes:
368  *      (1) Both the input masks (textline and vertical white space) and
369  *          the returned textblock mask are at the same resolution.
370  *      (2) The result is somewhat noisy, in that small "blocks" of
371  *          text may be included.  These can be removed by post-processing,
372  *          using, e.g.,
373  *             pixSelectBySize(pix, 60, 60, 4, L_SELECT_IF_EITHER,
374  *                             L_SELECT_IF_GTE, NULL);
375  */
376 PIX *
pixGenTextblockMask(PIX * pixs,PIX * pixvws,l_int32 debug)377 pixGenTextblockMask(PIX     *pixs,
378                     PIX     *pixvws,
379                     l_int32  debug)
380 {
381 PIX  *pixt1, *pixt2, *pixt3, *pixd;
382 
383     PROCNAME("pixGenTextblockMask");
384 
385     if (!pixs)
386         return (PIX *)ERROR_PTR("pixs not defined", procName, NULL);
387     if (!pixvws)
388         return (PIX *)ERROR_PTR("pixvws not defined", procName, NULL);
389     if (pixGetDepth(pixs) != 1)
390         return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, NULL);
391 
392         /* Join pixels vertically to make a textblock mask */
393     pixt1 = pixMorphSequence(pixs, "c1.10 + o4.1", 0);
394     pixDisplayWrite(pixt1, debug);
395 
396         /* Solidify the textblock mask and remove noise:
397          *   (1) For each cc, close the blocks and dilate slightly
398 	 *       to form a solid mask.
399          *   (2) Small horizontal closing between components.
400          *   (3) Open the white space between columns, again.
401          *   (4) Remove small components. */
402     pixt2 = pixMorphSequenceByComponent(pixt1, "c30.30 + d3.3", 8, 0, 0, NULL);
403     pixCloseSafeBrick(pixt2, pixt2, 10, 1);
404     pixDisplayWrite(pixt2, debug);
405     pixt3 = pixSubtract(NULL, pixt2, pixvws);
406     pixDisplayWrite(pixt3, debug);
407     pixd = pixSelectBySize(pixt3, 25, 5, 8, L_SELECT_IF_BOTH,
408                             L_SELECT_IF_GTE, NULL);
409     pixDisplayWrite(pixd, debug);
410 
411     pixDestroy(&pixt1);
412     pixDestroy(&pixt2);
413     pixDestroy(&pixt3);
414     return pixd;
415 }
416 
417 
418