1 /*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 - This software is distributed in the hope that it will be
4 - useful, but with NO WARRANTY OF ANY KIND.
5 - No author or distributor accepts responsibility to anyone for the
6 - consequences of using this software, or for whether it serves any
7 - particular purpose or works at all, unless he or she says so in
8 - writing. Everyone is granted permission to copy, modify and
9 - redistribute this source code, for commercial or non-commercial
10 - purposes, with the following restrictions: (1) the origin of this
11 - source code must not be misrepresented; (2) modified versions must
12 - be plainly marked as such; and (3) this notice may not be removed
13 - or altered from any source or modified source distribution.
14 *====================================================================*/
15
16 /*
17 * pageseg.c
18 *
19 * Top level page segmentation
20 * l_int32 pixGetRegionsBinary()
21 *
22 * Halftone region extraction
23 * PIX *pixGenHalftoneMask()
24 *
25 * Textline extraction
26 * PIX *pixGenTextlineMask()
27 *
28 * Textblock extraction
29 * PIX *pixGenTextblockMask()
30 */
31
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include "allheaders.h"
35
36
37 /*------------------------------------------------------------------*
38 * Top level page segmentation *
39 *------------------------------------------------------------------*/
40 /*!
41 * pixGetRegionsBinary()
42 *
43 * Input: pixs (1 bpp, assumed to be 300 to 400 ppi)
44 * &pixhm (<optional return> halftone mask)
45 * &pixtm (<optional return> textline mask)
46 * &pixtb (<optional return> textblock mask)
47 * debug (flag: set to 1 for debug output)
48 * Return: 0 if OK, 1 on error
49 *
50 * Notes:
51 * (1) It is best to deskew the image before segmenting.
52 * (2) The debug flag enables a number of outputs. These
53 * are included to show how to generate and save/display
54 * these results.
55 */
56 l_int32
pixGetRegionsBinary(PIX * pixs,PIX ** ppixhm,PIX ** ppixtm,PIX ** ppixtb,l_int32 debug)57 pixGetRegionsBinary(PIX *pixs,
58 PIX **ppixhm,
59 PIX **ppixtm,
60 PIX **ppixtb,
61 l_int32 debug)
62 {
63 l_int32 htfound, tlfound;
64 PIX *pixr, *pixt1, *pixt2;
65 PIX *pixtext; /* text pixels only */
66 PIX *pixhm2; /* halftone mask; 2x reduction */
67 PIX *pixhm; /* halftone mask; */
68 PIX *pixtm2; /* textline mask; 2x reduction */
69 PIX *pixtm; /* textline mask */
70 PIX *pixvws; /* vertical white space mask */
71 PIX *pixtb2; /* textblock mask; 2x reduction */
72 PIX *pixtbf2; /* textblock mask; 2x reduction; small comps filtered */
73 PIX *pixtb; /* textblock mask */
74
75 PROCNAME("pixGetRegionsBinary");
76
77 if (ppixhm) *ppixhm = NULL;
78 if (ppixtm) *ppixtm = NULL;
79 if (ppixtb) *ppixtb = NULL;
80 if (!pixs)
81 return ERROR_INT("pixs not defined", procName, 1);
82 if (pixGetDepth(pixs) != 1)
83 return ERROR_INT("pixs not 1 bpp", procName, 1);
84
85 /* 2x reduce, to 150 -200 ppi */
86 pixr = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0);
87 pixDisplayWrite(pixr, debug);
88
89 /* Get the halftone mask */
90 pixhm2 = pixGenHalftoneMask(pixr, &pixtext, &htfound, debug);
91
92 /* Get the textline mask from the text pixels */
93 pixtm2 = pixGenTextlineMask(pixtext, &pixvws, &tlfound, debug);
94
95 /* Get the textblock mask from the textline mask */
96 pixtb2 = pixGenTextblockMask(pixtm2, pixvws, debug);
97 pixDestroy(&pixr);
98 pixDestroy(&pixtext);
99 pixDestroy(&pixvws);
100
101 /* Remove small components from the mask, where a small
102 * component is defined as one with both width and height < 60 */
103 pixtbf2 = pixSelectBySize(pixtb2, 60, 60, 4, L_SELECT_IF_EITHER,
104 L_SELECT_IF_GTE, NULL);
105 pixDestroy(&pixtb2);
106 pixDisplayWrite(pixtbf2, debug);
107
108 /* Expand all masks to full resolution, and do filling or
109 * small dilations for better coverage. */
110 pixhm = pixExpandReplicate(pixhm2, 2);
111 pixt1 = pixSeedfillBinary(NULL, pixhm, pixs, 8);
112 pixOr(pixhm, pixhm, pixt1);
113 pixDestroy(&pixt1);
114 pixDisplayWrite(pixhm, debug);
115
116 pixt1 = pixExpandReplicate(pixtm2, 2);
117 pixtm = pixDilateBrick(NULL, pixt1, 3, 3);
118 pixDestroy(&pixt1);
119 pixDisplayWrite(pixtm, debug);
120
121 pixt1 = pixExpandReplicate(pixtbf2, 2);
122 pixtb = pixDilateBrick(NULL, pixt1, 3, 3);
123 pixDestroy(&pixt1);
124 pixDisplayWrite(pixtb, debug);
125
126 pixDestroy(&pixhm2);
127 pixDestroy(&pixtm2);
128 pixDestroy(&pixtbf2);
129
130 /* Debug: identify objects that are neither text nor halftone image */
131 if (debug) {
132 pixt1 = pixSubtract(NULL, pixs, pixtm); /* remove text pixels */
133 pixt2 = pixSubtract(NULL, pixt1, pixhm); /* remove halftone pixels */
134 pixDisplayWrite(pixt2, 1);
135 pixDestroy(&pixt1);
136 pixDestroy(&pixt2);
137 }
138
139 /* Debug: display textline components with random colors */
140 if (debug) {
141 l_int32 w, h;
142 BOXA *boxa;
143 PIXA *pixa;
144 boxa = pixConnComp(pixtm, &pixa, 8);
145 pixGetDimensions(pixtm, &w, &h, NULL);
146 pixt1 = pixaDisplayRandomCmap(pixa, w, h);
147 pixcmapResetColor(pixGetColormap(pixt1), 0, 255, 255, 255);
148 pixDisplay(pixt1, 100, 100);
149 pixDisplayWrite(pixt1, 1);
150 pixaDestroy(&pixa);
151 boxaDestroy(&boxa);
152 pixDestroy(&pixt1);
153 }
154
155 /* Debug: identify the outlines of each textblock */
156 if (debug) {
157 PIXCMAP *cmap;
158 PTAA *ptaa;
159 ptaa = pixGetOuterBordersPtaa(pixtb);
160 ptaaWrite("/tmp/junk_tb_outlines.ptaa", ptaa, 1);
161 pixt1 = pixRenderRandomCmapPtaa(pixtb, ptaa, 1, 16, 1);
162 cmap = pixGetColormap(pixt1);
163 pixcmapResetColor(cmap, 0, 130, 130, 130);
164 pixDisplay(pixt1, 500, 100);
165 pixDisplayWrite(pixt1, 1);
166 pixDestroy(&pixt1);
167 ptaaDestroy(&ptaa);
168 }
169
170 /* Debug: get b.b. for all mask components */
171 if (debug) {
172 BOXA *bahm, *batm, *batb;
173 bahm = pixConnComp(pixhm, NULL, 4);
174 batm = pixConnComp(pixtm, NULL, 4);
175 batb = pixConnComp(pixtb, NULL, 4);
176 boxaWrite("junk_htmask.boxa", bahm);
177 boxaWrite("junk_textmask.boxa", batm);
178 boxaWrite("junk_textblock.boxa", batb);
179 boxaDestroy(&bahm);
180 boxaDestroy(&batm);
181 boxaDestroy(&batb);
182 }
183
184 if (ppixhm)
185 *ppixhm = pixhm;
186 else
187 pixDestroy(&pixhm);
188 if (ppixtm)
189 *ppixtm = pixtm;
190 else
191 pixDestroy(&pixtm);
192 if (ppixtb)
193 *ppixtb = pixtb;
194 else
195 pixDestroy(&pixtb);
196
197 return 0;
198 }
199
200
201 /*------------------------------------------------------------------*
202 * Halftone region extraction *
203 *------------------------------------------------------------------*/
204 /*!
205 * pixGenHalftoneMask()
206 *
207 * Input: pixs (1 bpp, assumed to be 150 to 200 ppi)
208 * &pixtext (<optional return> text part of pixs)
209 * &htfound (<optional return> 1 if the mask is not empty)
210 * debug (flag: 1 for debug output)
211 * Return: pixd (halftone mask), or null on error
212 */
213 PIX *
pixGenHalftoneMask(PIX * pixs,PIX ** ppixtext,l_int32 * phtfound,l_int32 debug)214 pixGenHalftoneMask(PIX *pixs,
215 PIX **ppixtext,
216 l_int32 *phtfound,
217 l_int32 debug)
218 {
219 l_int32 empty;
220 PIX *pixt1, *pixt2, *pixhs, *pixhm, *pixd;
221
222 PROCNAME("pixGenHalftoneMask");
223
224 if (ppixtext) *ppixtext = NULL;
225 if (!pixs)
226 return (PIX *)ERROR_PTR("pixs not defined", procName, NULL);
227 if (pixGetDepth(pixs) != 1)
228 return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, NULL);
229
230 /* Compute seed for halftone parts at 8x reduction */
231 pixt1 = pixReduceRankBinaryCascade(pixs, 4, 4, 3, 0);
232 pixt2 = pixOpenBrick(NULL, pixt1, 5, 5);
233 pixhs = pixExpandReplicate(pixt2, 8); /* back to 2x reduction */
234 pixDestroy(&pixt1);
235 pixDestroy(&pixt2);
236 pixDisplayWrite(pixhs, debug);
237
238 /* Compute mask for connected regions */
239 pixhm = pixCloseSafeBrick(NULL, pixs, 4, 4);
240 pixDisplayWrite(pixhm, debug);
241
242 /* Fill seed into mask to get halftone mask */
243 pixd = pixSeedfillBinary(NULL, pixhs, pixhm, 4);
244
245 #if 0
246 /* Moderate opening to remove thin lines, etc. */
247 pixOpenBrick(pixd, pixd, 10, 10);
248 pixDisplayWrite(pixd, debug);
249 #endif
250
251 /* Check if mask is empty */
252 pixZero(pixd, &empty);
253 if (phtfound) {
254 *phtfound = 0;
255 if (!empty)
256 *phtfound = 1;
257 }
258
259 /* Optionally, get all pixels that are not under the halftone mask */
260 if (ppixtext) {
261 if (empty)
262 *ppixtext = pixCopy(NULL, pixs);
263 else
264 *ppixtext = pixSubtract(NULL, pixs, pixd);
265 pixDisplayWrite(*ppixtext, debug);
266 }
267
268 pixDestroy(&pixhs);
269 pixDestroy(&pixhm);
270 return pixd;
271 }
272
273
274 /*------------------------------------------------------------------*
275 * Textline extraction *
276 *------------------------------------------------------------------*/
277 /*!
278 * pixGenTextlineMask()
279 *
280 * Input: pixs (1 bpp, assumed to be 150 to 200 ppi)
281 * &pixvws (<return> vertical whitespace mask)
282 * &tlfound (<optional return> 1 if the mask is not empty)
283 * debug (flag: 1 for debug output)
284 * Return: pixd (textline mask), or null on error
285 *
286 * Notes:
287 * (1) The input pixs should be deskewed.
288 * (2) pixs should have no halftone pixels.
289 * (3) Both the input image and the returned textline mask
290 * are at the same resolution.
291 */
292 PIX *
pixGenTextlineMask(PIX * pixs,PIX ** ppixvws,l_int32 * ptlfound,l_int32 debug)293 pixGenTextlineMask(PIX *pixs,
294 PIX **ppixvws,
295 l_int32 *ptlfound,
296 l_int32 debug)
297 {
298 l_int32 empty;
299 PIX *pixt1, *pixt2, *pixvws, *pixd;
300
301 PROCNAME("pixGenTextlineMask");
302
303 if (!pixs)
304 return (PIX *)ERROR_PTR("pixs not defined", procName, NULL);
305 if (!ppixvws)
306 return (PIX *)ERROR_PTR("&pixvws not defined", procName, NULL);
307 if (pixGetDepth(pixs) != 1)
308 return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, NULL);
309
310 /* First we need a vertical whitespace mask. Invert the image. */
311 pixt1 = pixInvert(NULL, pixs);
312
313 /* The whitespace mask will break textlines where there
314 * is a large amount of white space below or above.
315 * This can be prevented by identifying regions of the
316 * inverted image that have large horizontal extent (bigger than
317 * the separation between columns) and significant
318 * vertical extent (bigger than the separation between
319 * textlines), and subtracting this from the bg. */
320 pixt2 = pixMorphCompSequence(pixt1, "o80.60", 0);
321 pixSubtract(pixt1, pixt1, pixt2);
322 pixDisplayWrite(pixt1, debug);
323 pixDestroy(&pixt2);
324
325 /* Identify vertical whitespace by opening the remaining bg.
326 * o5.1 removes thin vertical bg lines and o1.200 extracts
327 * long vertical bg lines. */
328 pixvws = pixMorphCompSequence(pixt1, "o5.1 + o1.200", 0);
329 *ppixvws = pixvws;
330 pixDisplayWrite(pixvws, debug);
331 pixDestroy(&pixt1);
332
333 /* Three steps to getting text line mask:
334 * (1) close the characters and words in the textlines
335 * (2) open the vertical whitespace corridors back up
336 * (3) small opening to remove noise */
337 pixt1 = pixCloseSafeBrick(NULL, pixs, 30, 1);
338 pixDisplayWrite(pixt1, debug);
339 pixd = pixSubtract(NULL, pixt1, pixvws);
340 pixOpenBrick(pixd, pixd, 3, 3);
341 pixDisplayWrite(pixd, debug);
342 pixDestroy(&pixt1);
343
344 /* Check if text line mask is empty */
345 if (ptlfound) {
346 *ptlfound = 0;
347 pixZero(pixd, &empty);
348 if (!empty)
349 *ptlfound = 1;
350 }
351
352 return pixd;
353 }
354
355
356 /*------------------------------------------------------------------*
357 * Textblock extraction *
358 *------------------------------------------------------------------*/
359 /*!
360 * pixGenTextblockMask()
361 *
362 * Input: pixs (1 bpp, textline mask, assumed to be 150 to 200 ppi)
363 * pixvws (vertical white space mask)
364 * debug (flag: 1 for debug output)
365 * Return: pixd (textblock mask), or null on error
366 *
367 * Notes:
368 * (1) Both the input masks (textline and vertical white space) and
369 * the returned textblock mask are at the same resolution.
370 * (2) The result is somewhat noisy, in that small "blocks" of
371 * text may be included. These can be removed by post-processing,
372 * using, e.g.,
373 * pixSelectBySize(pix, 60, 60, 4, L_SELECT_IF_EITHER,
374 * L_SELECT_IF_GTE, NULL);
375 */
376 PIX *
pixGenTextblockMask(PIX * pixs,PIX * pixvws,l_int32 debug)377 pixGenTextblockMask(PIX *pixs,
378 PIX *pixvws,
379 l_int32 debug)
380 {
381 PIX *pixt1, *pixt2, *pixt3, *pixd;
382
383 PROCNAME("pixGenTextblockMask");
384
385 if (!pixs)
386 return (PIX *)ERROR_PTR("pixs not defined", procName, NULL);
387 if (!pixvws)
388 return (PIX *)ERROR_PTR("pixvws not defined", procName, NULL);
389 if (pixGetDepth(pixs) != 1)
390 return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, NULL);
391
392 /* Join pixels vertically to make a textblock mask */
393 pixt1 = pixMorphSequence(pixs, "c1.10 + o4.1", 0);
394 pixDisplayWrite(pixt1, debug);
395
396 /* Solidify the textblock mask and remove noise:
397 * (1) For each cc, close the blocks and dilate slightly
398 * to form a solid mask.
399 * (2) Small horizontal closing between components.
400 * (3) Open the white space between columns, again.
401 * (4) Remove small components. */
402 pixt2 = pixMorphSequenceByComponent(pixt1, "c30.30 + d3.3", 8, 0, 0, NULL);
403 pixCloseSafeBrick(pixt2, pixt2, 10, 1);
404 pixDisplayWrite(pixt2, debug);
405 pixt3 = pixSubtract(NULL, pixt2, pixvws);
406 pixDisplayWrite(pixt3, debug);
407 pixd = pixSelectBySize(pixt3, 25, 5, 8, L_SELECT_IF_BOTH,
408 L_SELECT_IF_GTE, NULL);
409 pixDisplayWrite(pixd, debug);
410
411 pixDestroy(&pixt1);
412 pixDestroy(&pixt2);
413 pixDestroy(&pixt3);
414 return pixd;
415 }
416
417
418