1 /*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 - This software is distributed in the hope that it will be
4 - useful, but with NO WARRANTY OF ANY KIND.
5 - No author or distributor accepts responsibility to anyone for the
6 - consequences of using this software, or for whether it serves any
7 - particular purpose or works at all, unless he or she says so in
8 - writing. Everyone is granted permission to copy, modify and
9 - redistribute this source code, for commercial or non-commercial
10 - purposes, with the following restrictions: (1) the origin of this
11 - source code must not be misrepresented; (2) modified versions must
12 - be plainly marked as such; and (3) this notice may not be removed
13 - or altered from any source or modified source distribution.
14 *====================================================================*/
15
16 /*
17 * binarize.c
18 *
19 * ===================================================================
20 * Image binarization algorithms are found in:
21 * grayquant.c: standard, simple, general grayscale quantization
22 * adaptmap.c: local adaptive; mostly gray-to-gray in preparation
23 * for binarization
24 * binarize.c: special binarization methods, locally adaptive.
25 * ===================================================================
26 *
27 * Adaptive Otsu-based thresholding
28 * l_int32 pixOtsuAdaptiveThreshold() 8 bpp
29 *
30 * Otsu thresholding on adaptive background normalization
31 * PIX *pixOtsuThreshOnBackgroundNorm() 8 bpp
32 *
33 * Masking and Otsu estimate on adaptive background normalization
34 * PIX *pixMaskedThreshOnBackgroundNorm() 8 bpp
35 *
36 * Sauvola local thresholding
37 * l_int32 pixSauvolaBinarizeTiled()
38 * l_int32 pixSauvolaBinarize()
39 * PIX *pixSauvolaGetThreshold()
40 * PIX *pixApplyLocalThreshold();
41 *
42 * Notes:
43 * (1) pixOtsuAdaptiveThreshold() computes a global threshold over each
44 * tile and performs the threshold operation, resulting in a
45 * binary image for each tile. These are stitched into the
46 * final result.
47 * (2) pixOtsuThreshOnBackgroundNorm() and
48 * pixMaskedThreshOnBackgroundNorm() are binarization functions
49 * that use background normalization with other techniques.
50 * (3) Sauvola binarization computes a local threshold based on
51 * the local average and square average. It takes two constants:
52 * the window size for the measurment at each pixel and a
53 * parameter that determines the amount of normalized local
54 * standard deviation to subtract from the local average value.
55 */
56
57 #include <stdio.h>
58 #include <stdlib.h>
59 #include <math.h>
60 #include "allheaders.h"
61
62
63 /*------------------------------------------------------------------*
64 * Adaptive Otsu-based thresholding *
65 *------------------------------------------------------------------*/
66 /*!
67 * pixOtsuAdaptiveThreshold()
68 *
69 * Input: pixs (8 bpp)
70 * sx, sy (desired tile dimensions; actual size may vary)
71 * smoothx, smoothy (half-width of convolution kernel applied to
72 * threshold array: use 0 for no smoothing)
73 * scorefract (fraction of the max Otsu score; typ. 0.1)
74 * &pixth (<optional return> array of threshold values
75 * found for each tile)
76 * &pixd (<optional return> thresholded input pixs, based on
77 * the threshold array)
78 * Return: 0 if OK, 1 on error
79 *
80 * Notes:
81 * (1) The Otsu method finds a single global threshold for an image.
82 * This function allows a locally adapted threshold to be
83 * found for each tile into which the image is broken up.
84 * (2) The array of threshold values, one for each tile, constitutes
85 * a highly downscaled image. This array is optionally
86 * smoothed using a convolution. The full width and height of the
87 * convolution kernel are (2 * @smoothx + 1) and (2 * @smoothy + 1).
88 * (3) To get a single global threshold for the entire image, use
89 * input values of @sx and @sy that are larger than the image.
90 * For this situation, the smoothing parameters are ignored.
91 * (4) The scorefract is the fraction of the maximum Otsu score, which
92 * is used to determine the range over which the histogram minimum
93 * is searched. See numaSplitDistribution() for details on the
94 * underlying method of choosing a threshold.
95 * (5) This uses a modified version of the Otsu criterion for
96 * splitting the distribution of pixels in each tile into a
97 * fg and bg part. The modification consists of searching for
98 * a minimum in the histogram over a range of pixel values where
99 * the Otsu score is within a defined fraction, @scorefract,
100 * of the max score.
101 */
102 l_int32
pixOtsuAdaptiveThreshold(PIX * pixs,l_int32 sx,l_int32 sy,l_int32 smoothx,l_int32 smoothy,l_float32 scorefract,PIX ** ppixth,PIX ** ppixd)103 pixOtsuAdaptiveThreshold(PIX *pixs,
104 l_int32 sx,
105 l_int32 sy,
106 l_int32 smoothx,
107 l_int32 smoothy,
108 l_float32 scorefract,
109 PIX **ppixth,
110 PIX **ppixd)
111 {
112 l_int32 w, h, d, nx, ny, i, j, thresh;
113 l_uint32 val;
114 PIX *pixt, *pixb, *pixthresh, *pixth, *pixd;
115 PIXTILING *pt;
116
117 PROCNAME("pixOtsuAdaptiveThreshold");
118
119 if (!pixs)
120 return ERROR_INT("pixs not defined", procName, 1);
121 pixGetDimensions(pixs, &w, &h, &d);
122 if (d != 8)
123 return ERROR_INT("pixs not 8 bpp", procName, 1);
124 if (sx < 16 || sy < 16)
125 return ERROR_INT("sx and sy must be >= 16", procName, 1);
126 if (!ppixth && !ppixd)
127 return ERROR_INT("neither &pixth nor &pixd defined", procName, 1);
128
129 /* Compute the threshold array for the tiles */
130 nx = L_MAX(1, w / sx);
131 ny = L_MAX(1, h / sy);
132 smoothx = L_MIN(smoothx, (nx - 1) / 2);
133 smoothy = L_MIN(smoothy, (ny - 1) / 2);
134 pt = pixTilingCreate(pixs, nx, ny, 0, 0, 0, 0);
135 pixthresh = pixCreate(nx, ny, 8);
136 for (i = 0; i < ny; i++) {
137 for (j = 0; j < nx; j++) {
138 pixt = pixTilingGetTile(pt, i, j);
139 pixSplitDistributionFgBg(pixt, scorefract, 1, &thresh,
140 NULL, NULL, 0);
141 pixSetPixel(pixthresh, j, i, thresh);
142 pixDestroy(&pixt);
143 }
144 }
145
146 /* Optionally smooth the threshold array */
147 if (smoothx > 0 || smoothy > 0)
148 pixth = pixBlockconv(pixthresh, smoothx, smoothy);
149 else
150 pixth = pixClone(pixthresh);
151 pixDestroy(&pixthresh);
152
153 /* Optionally apply the threshold array to binarize pixs */
154 if (ppixd) {
155 pixd = pixCreate(w, h, 1);
156 for (i = 0; i < ny; i++) {
157 for (j = 0; j < nx; j++) {
158 pixt = pixTilingGetTile(pt, i, j);
159 pixGetPixel(pixth, j, i, &val);
160 pixb = pixThresholdToBinary(pixt, val);
161 pixTilingPaintTile(pixd, i, j, pixb, pt);
162 pixDestroy(&pixt);
163 pixDestroy(&pixb);
164 }
165 }
166 *ppixd = pixd;
167 }
168
169 if (ppixth)
170 *ppixth = pixth;
171 else
172 pixDestroy(&pixth);
173
174 pixTilingDestroy(&pt);
175 return 0;
176 }
177
178
179
180 /*------------------------------------------------------------------*
181 * Otsu thresholding on adaptive background normalization *
182 *------------------------------------------------------------------*/
183 /*!
184 * pixOtsuThreshOnBackgroundNorm()
185 *
186 * Input: pixs (8 bpp grayscale or 32 bpp rgb)
187 * pixim (<optional> 1 bpp 'image' mask; can be null)
188 * sx, sy (tile size in pixels)
189 * thresh (threshold for determining foreground)
190 * mincount (min threshold on counts in a tile)
191 * bgval (target bg val; typ. > 128)
192 * smoothx (half-width of block convolution kernel width)
193 * smoothy (half-width of block convolution kernel height)
194 * scorefract (fraction of the max Otsu score; typ. 0.1)
195 * &thresh (<optional return> threshold value that was
196 * used on the normalized image)
197 * Return: pixd (1 bpp thresholded image), or null on error
198 *
199 * Notes:
200 * (1) This does background normalization followed by Otsu
201 * thresholding. Otsu binarization attempts to split the
202 * image into two roughly equal sets of pixels, and it does
203 * a very poor job when there are large amounts of dark
204 * background. By doing a background normalization first,
205 * to get the background near 255, we remove this problem.
206 * Then we use a modified Otsu to estimate the best global
207 * threshold on the normalized image.
208 * (2) See pixBackgroundNorm() for meaning and typical values
209 * of input parameters. For a start, you can try:
210 * sx, sy = 10, 15
211 * thresh = 100
212 * mincount = 50
213 * bgval = 255
214 * smoothx, smoothy = 2
215 */
216 PIX *
pixOtsuThreshOnBackgroundNorm(PIX * pixs,PIX * pixim,l_int32 sx,l_int32 sy,l_int32 thresh,l_int32 mincount,l_int32 bgval,l_int32 smoothx,l_int32 smoothy,l_float32 scorefract,l_int32 * pthresh)217 pixOtsuThreshOnBackgroundNorm(PIX *pixs,
218 PIX *pixim,
219 l_int32 sx,
220 l_int32 sy,
221 l_int32 thresh,
222 l_int32 mincount,
223 l_int32 bgval,
224 l_int32 smoothx,
225 l_int32 smoothy,
226 l_float32 scorefract,
227 l_int32 *pthresh)
228 {
229 l_int32 w, h, d;
230 l_uint32 val;
231 PIX *pixn, *pixt, *pixd;
232
233 PROCNAME("pixOtsuThreshOnBackgroundNorm");
234
235 if (pthresh) *pthresh = 0;
236 if (!pixs)
237 return (PIX *)ERROR_PTR("pixs not defined", procName, NULL);
238 pixGetDimensions(pixs, &w, &h, &d);
239 if (d != 8 && d != 32)
240 return (PIX *)ERROR_PTR("pixs not 8 or 32 bpp", procName, NULL);
241 if (sx < 4 || sy < 4)
242 return (PIX *)ERROR_PTR("sx and sy must be >= 4", procName, NULL);
243 if (mincount > sx * sy) {
244 L_WARNING("mincount too large for tile size", procName);
245 mincount = (sx * sy) / 3;
246 }
247
248 pixn = pixBackgroundNorm(pixs, pixim, NULL, sx, sy, thresh,
249 mincount, bgval, smoothx, smoothy);
250 if (!pixn)
251 return (PIX *)ERROR_PTR("pixn not made", procName, NULL);
252
253 /* Just use 1 tile for a global threshold, which is stored
254 * as a single pixel in pixt. */
255 pixOtsuAdaptiveThreshold(pixn, w, h, 0, 0, scorefract, &pixt, &pixd);
256 pixDestroy(&pixn);
257
258 if (pixt && pthresh) {
259 pixGetPixel(pixt, 0, 0, &val);
260 *pthresh = val;
261 }
262 pixDestroy(&pixt);
263
264 if (!pixd)
265 return (PIX *)ERROR_PTR("pixd not made", procName, NULL);
266 else
267 return pixd;
268 }
269
270
271
272 /*----------------------------------------------------------------------*
273 * Masking and Otsu estimate on adaptive background normalization *
274 *----------------------------------------------------------------------*/
275 /*!
276 * pixMaskedThreshOnBackgroundNorm()
277 *
278 * Input: pixs (8 bpp grayscale or 32 bpp rgb)
279 * pixim (<optional> 1 bpp 'image' mask; can be null)
280 * sx, sy (tile size in pixels)
281 * thresh (threshold for determining foreground)
282 * mincount (min threshold on counts in a tile)
283 * smoothx (half-width of block convolution kernel width)
284 * smoothy (half-width of block convolution kernel height)
285 * scorefract (fraction of the max Otsu score; typ. ~ 0.1)
286 * &thresh (<optional return> threshold value that was
287 * used on the normalized image)
288 * Return: pixd (1 bpp thresholded image), or null on error
289 *
290 * Notes:
291 * (1) This begins with a standard background normalization.
292 * Additionally, there is a flexible background norm, that
293 * will adapt to a rapidly varying background, and this
294 * puts white pixels in the background near regions with
295 * significant foreground. The white pixels are turned into
296 * a 1 bpp selection mask by binarization followed by dilation.
297 * Otsu thresholding is performed on the input image to get an
298 * estimate of the threshold in the non-mask regions.
299 * The background normalized image is thresholded with two
300 * different values, and the result is combined using
301 * the selection mask.
302 * (2) Note that the numbers 255 (for bgval target) and 190 (for
303 * thresholding on pixn) are tied together, and explicitly
304 * defined in this function.
305 * (3) See pixBackgroundNorm() for meaning and typical values
306 * of input parameters. For a start, you can try:
307 * sx, sy = 10, 15
308 * thresh = 100
309 * mincount = 50
310 * smoothx, smoothy = 2
311 */
312 PIX *
pixMaskedThreshOnBackgroundNorm(PIX * pixs,PIX * pixim,l_int32 sx,l_int32 sy,l_int32 thresh,l_int32 mincount,l_int32 smoothx,l_int32 smoothy,l_float32 scorefract,l_int32 * pthresh)313 pixMaskedThreshOnBackgroundNorm(PIX *pixs,
314 PIX *pixim,
315 l_int32 sx,
316 l_int32 sy,
317 l_int32 thresh,
318 l_int32 mincount,
319 l_int32 smoothx,
320 l_int32 smoothy,
321 l_float32 scorefract,
322 l_int32 *pthresh)
323 {
324 l_int32 w, h, d;
325 l_uint32 val;
326 PIX *pixn, *pixm, *pixd, *pixt1, *pixt2, *pixt3, *pixt4;
327
328 PROCNAME("pixMaskedThreshOnBackgroundNorm");
329
330 if (pthresh) *pthresh = 0;
331 if (!pixs)
332 return (PIX *)ERROR_PTR("pixs not defined", procName, NULL);
333 pixGetDimensions(pixs, &w, &h, &d);
334 if (d != 8 && d != 32)
335 return (PIX *)ERROR_PTR("pixs not 8 or 32 bpp", procName, NULL);
336 if (sx < 4 || sy < 4)
337 return (PIX *)ERROR_PTR("sx and sy must be >= 4", procName, NULL);
338 if (mincount > sx * sy) {
339 L_WARNING("mincount too large for tile size", procName);
340 mincount = (sx * sy) / 3;
341 }
342
343 /* Standard background normalization */
344 pixn = pixBackgroundNorm(pixs, pixim, NULL, sx, sy, thresh,
345 mincount, 255, smoothx, smoothy);
346 if (!pixn)
347 return (PIX *)ERROR_PTR("pixn not made", procName, NULL);
348
349 /* Special background normalization for adaptation to quickly
350 * varying background. Threshold on the very light parts,
351 * which tend to be near significant edges, and dilate to
352 * form a mask over regions that are typically text. The
353 * dilation size is chosen to cover the text completely,
354 * except for very thick fonts. */
355 pixt1 = pixBackgroundNormFlex(pixs, 7, 7, 1, 1, 20);
356 pixt2 = pixThresholdToBinary(pixt1, 240);
357 pixInvert(pixt2, pixt2);
358 pixm = pixMorphSequence(pixt2, "d21.21", 0);
359 pixDestroy(&pixt1);
360 pixDestroy(&pixt2);
361
362 /* Use Otsu to get a global threshold estimate for the image,
363 * which is stored as a single pixel in pixt3. */
364 pixOtsuAdaptiveThreshold(pixs, w, h, 0, 0, scorefract, &pixt3, NULL);
365 if (pixt3 && pthresh) {
366 pixGetPixel(pixt3, 0, 0, &val);
367 *pthresh = val;
368 }
369 pixDestroy(&pixt3);
370
371 /* Threshold the background normalized images differentially,
372 * using a high value correlated with the background normalization
373 * for the part of the image under the mask (i.e., near the
374 * darker, thicker foreground), and a value that depends on the Otsu
375 * threshold for the rest of the image. This gives a solid
376 * (high) thresholding for the foreground parts of the image,
377 * while allowing the background and light foreground to be
378 * reasonably well cleaned using a threshold adapted to the
379 * input image. */
380 pixd = pixThresholdToBinary(pixn, val + 30); /* for bg and light fg */
381 pixt4 = pixThresholdToBinary(pixn, 190); /* for heavier fg */
382 pixCombineMasked(pixd, pixt4, pixm);
383 pixDestroy(&pixt4);
384 pixDestroy(&pixm);
385 pixDestroy(&pixn);
386
387 if (!pixd)
388 return (PIX *)ERROR_PTR("pixd not made", procName, NULL);
389 else
390 return pixd;
391 }
392
393
394 /*----------------------------------------------------------------------*
395 * Sauvola binarization *
396 *----------------------------------------------------------------------*/
397 /*!
398 * pixSauvolaBinarizeTiled()
399 *
400 * Input: pixs (8 bpp grayscale, not cmapped)
401 * whsize (window half-width for measuring local statistics)
402 * factor (factor for reducing threshold due to variance; >= 0)
403 * nx, ny (subdivision into tiles; >= 1)
404 * &pixth (<optional return> Sauvola threshold values)
405 * &pixd (<optional return> thresholded image)
406 * Return: 0 if OK, 1 on error
407 *
408 * Notes:
409 * (1) The window width and height are 2 * @whsize + 1. The minimum
410 * value for @whsize is 2; typically it is >= 7..
411 * (2) For nx == ny == 1, this defaults to pixSauvolaBinarize().
412 * (3) Why a tiled version?
413 * (a) Because the mean value accumulator is a uint32, overflow
414 * can occur for an image with more than 16M pixels.
415 * (b) The mean value accumulator array for 16M pixels is 64 MB.
416 * The mean square accumulator array for 16M pixels is 128 MB.
417 * Using tiles reduces the size of these arrays.
418 * (c) Each tile can be processed independently, in parallel,
419 * on a multicore processor.
420 * (4) The Sauvola threshold is determined from the formula:
421 * t = m * (1 - k * (1 - s / 128))
422 * See pixSauvolaBinarize() for details.
423 */
424 l_int32
pixSauvolaBinarizeTiled(PIX * pixs,l_int32 whsize,l_float32 factor,l_int32 nx,l_int32 ny,PIX ** ppixth,PIX ** ppixd)425 pixSauvolaBinarizeTiled(PIX *pixs,
426 l_int32 whsize,
427 l_float32 factor,
428 l_int32 nx,
429 l_int32 ny,
430 PIX **ppixth,
431 PIX **ppixd)
432 {
433 l_int32 i, j, w, h, xrat, yrat;
434 PIX *pixth, *pixd, *tileth, *tiled, *pixt;
435 PIX **ptileth, **ptiled;
436 PIXTILING *pt;
437
438 PROCNAME("pixSauvolaBinarizeTiled");
439
440 if (!ppixth && !ppixd)
441 return ERROR_INT("no outputs", procName, 1);
442 if (ppixth) *ppixth = NULL;
443 if (ppixd) *ppixd = NULL;
444 if (!pixs || pixGetDepth(pixs) != 8)
445 return ERROR_INT("pixs undefined or not 8 bpp", procName, 1);
446 if (pixGetColormap(pixs))
447 return ERROR_INT("pixs is cmapped", procName, 1);
448 pixGetDimensions(pixs, &w, &h, NULL);
449 if (whsize < 2)
450 return ERROR_INT("whsize must be >= 2", procName, 1);
451 if (w < 2 * whsize + 3 || h < 2 * whsize + 3)
452 return ERROR_INT("whsize too large for image", procName, 1);
453 if (factor < 0.0)
454 return ERROR_INT("factor must be >= 0", procName, 1);
455
456 if (nx <= 1 && ny <= 1)
457 return pixSauvolaBinarize(pixs, whsize, factor, 1, NULL, NULL,
458 ppixth, ppixd);
459
460 /* Test to see if the tiles are too small. The required
461 * condition is that the tile dimensions must be at least
462 * (whsize + 2) x (whsize + 2). */
463 xrat = w / nx;
464 yrat = h / ny;
465 if (xrat < whsize + 2) {
466 nx = w / (whsize + 2);
467 L_WARNING_INT("tile width too small; nx reduced to %d", procName, nx);
468 }
469 if (yrat < whsize + 2) {
470 ny = h / (whsize + 2);
471 L_WARNING_INT("tile height too small; ny reduced to %d", procName, ny);
472 }
473 if (nx <= 1 && ny <= 1)
474 return pixSauvolaBinarize(pixs, whsize, factor, 1, NULL, NULL,
475 ppixth, ppixd);
476
477 /* We can use pixtiling for painting both outputs, if requested */
478 if (ppixth) {
479 pixth = pixCreateNoInit(w, h, 8);
480 *ppixth = pixth;
481 }
482 if (ppixd) {
483 pixd = pixCreateNoInit(w, h, 1);
484 *ppixd = pixd;
485 }
486 pt = pixTilingCreate(pixs, nx, ny, 0, 0, whsize + 1, whsize + 1);
487 pixTilingNoStripOnPaint(pt); /* pixSauvolaBinarize() does the stripping */
488
489 for (i = 0; i < ny; i++) {
490 for (j = 0; j < nx; j++) {
491 pixt = pixTilingGetTile(pt, i, j);
492 ptileth = (ppixth) ? &tileth : NULL;
493 ptiled = (ppixd) ? &tiled : NULL;
494 pixSauvolaBinarize(pixt, whsize, factor, 0, NULL, NULL,
495 ptileth, ptiled);
496 if (ppixth) { /* do not strip */
497 pixTilingPaintTile(pixth, i, j, tileth, pt);
498 pixDestroy(&tileth);
499 }
500 if (ppixd) {
501 pixTilingPaintTile(pixd, i, j, tiled, pt);
502 pixDestroy(&tiled);
503 }
504 pixDestroy(&pixt);
505 }
506 }
507
508 pixTilingDestroy(&pt);
509 return 0;
510 }
511
512
513 /*!
514 * pixSauvolaBinarize()
515 *
516 * Input: pixs (8 bpp grayscale)
517 * whsize (window half-width for measuring local statistics)
518 * factor (factor for reducing threshold due to variance; >= 0)
519 * addborder (1 to add border of width (@whsize + 1) on all sides)
520 * &pixm (<optional return> local mean values)
521 * &pixsd (<optional return> local standard deviation values)
522 * &pixth (<optional return> threshold values)
523 * &pixd (<optional return> thresholded image)
524 * Return: 0 if OK, 1 on error
525 *
526 * Notes:
527 * (1) The window width and height are 2 * @whsize + 1. The minimum
528 * value for @whsize is 2; typically it is >= 7..
529 * (2) The local statistics, measured over the window, are the
530 * average and standard deviation.
531 * (3) The measurements of the mean and standard deviation are
532 * performed inside a border of (@whsize + 1) pixels. If pixs does
533 * not have these added border pixels, use @addborder = 1 to add
534 * it here; otherwise use @addborder = 0.
535 * (4) The Sauvola threshold is determined from the formula:
536 * t = m * (1 - k * (1 - s / 128))
537 * where:
538 * t = local threshold
539 * m = local mean
540 * k = @factor (>= 0) [ typ. 0.35 ]
541 * s = local standard deviation, which is maximized at
542 * 127.5 when half the samples are 0 and half are 255.
543 * (5) The basic idea of Niblack and Sauvola binarization is that
544 * the local threshold should be less than the median value,
545 * and the larger the variance, the closer to the median
546 * it should be chosen. Typical values for k are between
547 * 0.2 and 0.5.
548 */
549 l_int32
pixSauvolaBinarize(PIX * pixs,l_int32 whsize,l_float32 factor,l_int32 addborder,PIX ** ppixm,PIX ** ppixsd,PIX ** ppixth,PIX ** ppixd)550 pixSauvolaBinarize(PIX *pixs,
551 l_int32 whsize,
552 l_float32 factor,
553 l_int32 addborder,
554 PIX **ppixm,
555 PIX **ppixsd,
556 PIX **ppixth,
557 PIX **ppixd)
558 {
559 l_int32 w, h;
560 PIX *pixg, *pixsc, *pixm, *pixms, *pixth, *pixd;
561
562 PROCNAME("pixSauvolaBinarize");
563
564
565 if (!ppixm && !ppixsd && !ppixth && !ppixd)
566 return ERROR_INT("no outputs", procName, 1);
567 if (ppixm) *ppixm = NULL;
568 if (ppixsd) *ppixsd = NULL;
569 if (ppixth) *ppixth = NULL;
570 if (ppixd) *ppixd = NULL;
571 if (!pixs || pixGetDepth(pixs) != 8)
572 return ERROR_INT("pixs undefined or not 8 bpp", procName, 1);
573 if (pixGetColormap(pixs))
574 return ERROR_INT("pixs is cmapped", procName, 1);
575 pixGetDimensions(pixs, &w, &h, NULL);
576 if (whsize < 2)
577 return ERROR_INT("whsize must be >= 2", procName, 1);
578 if (w < 2 * whsize + 3 || h < 2 * whsize + 3)
579 return ERROR_INT("whsize too large for image", procName, 1);
580 if (factor < 0.0)
581 return ERROR_INT("factor must be >= 0", procName, 1);
582
583 if (addborder) {
584 pixg = pixAddMirroredBorder(pixs, whsize + 1, whsize + 1,
585 whsize + 1, whsize + 1);
586 pixsc = pixClone(pixs);
587 }
588 else {
589 pixg = pixClone(pixs);
590 pixsc = pixRemoveBorder(pixs, whsize + 1);
591 }
592 if (!pixg || !pixsc)
593 return ERROR_INT("pixg and pixsc not made", procName, 1);
594
595 /* All these functions strip off the border pixels. */
596 if (ppixm || ppixth || ppixd)
597 pixm = pixWindowedMean(pixg, whsize, whsize, 1);
598 if (ppixsd || ppixth || ppixd)
599 pixms = pixWindowedMeanSquare(pixg, whsize);
600 if (ppixth || ppixd)
601 pixth = pixSauvolaGetThreshold(pixm, pixms, factor, ppixsd);
602 if (ppixd)
603 pixd = pixApplyLocalThreshold(pixsc, pixth, 1);
604
605 if (ppixm)
606 *ppixm = pixm;
607 else
608 pixDestroy(&pixm);
609 pixDestroy(&pixms);
610 if (ppixth)
611 *ppixth = pixth;
612 else
613 pixDestroy(&pixth);
614 if (ppixd)
615 *ppixd = pixd;
616 else
617 pixDestroy(&pixd);
618 pixDestroy(&pixg);
619 pixDestroy(&pixsc);
620 return 0;
621 }
622
623
624 /*!
625 * pixSauvolaGetThreshold()
626 *
627 * Input: pixm (8 bpp grayscale)
628 * pixms (32 bpp)
629 * factor (factor for reducing threshold due to variance; >= 0)
630 * &pixsd (<optional return> local standard deviation)
631 * Return: pixd (8 bpp, sauvola threshold values), or null on error
632 *
633 * Notes:
634 * (1) The Sauvola threshold is determined from the formula:
635 * t = m * (1 - k * (1 - s / 128))
636 * where:
637 * t = local threshold
638 * m = local mean
639 * k = @factor (>= 0) [ typ. 0.35 ]
640 * s = local standard deviation, which is maximized at
641 * 127.5 when half the samples are 0 and half are 255.
642 * (2) See pixSauvolaBinarize() for other details.
643 * (3) Important definitions and relations for computing averages:
644 * v == pixel value
645 * E(p) == expected value of p == average of p over some pixel set
646 * S(v) == square of v == v * v
647 * mv == E(v) == expected pixel value == mean value
648 * ms == E(S(v)) == expected square of pixel values
649 * == mean square value
650 * var == variance == expected square of deviation from mean
651 * == E(S(v - mv)) = E(S(v) - 2 * S(v * mv) + S(mv))
652 * = E(S(v)) - S(mv)
653 * = ms - mv * mv
654 * s == standard deviation = sqrt(var)
655 * So for evaluating the standard deviation in the Sauvola
656 * threshold, we take
657 * s = sqrt(ms - mv * mv)
658 */
659 PIX *
pixSauvolaGetThreshold(PIX * pixm,PIX * pixms,l_float32 factor,PIX ** ppixsd)660 pixSauvolaGetThreshold(PIX *pixm,
661 PIX *pixms,
662 l_float32 factor,
663 PIX **ppixsd)
664 {
665 l_int32 i, j, w, h, tabsize, wplm, wplms, wplsd, wpld, usetab;
666 l_int32 mv, ms, var, thresh;
667 l_uint32 *datam, *datams, *datasd, *datad;
668 l_uint32 *linem, *linems, *linesd, *lined;
669 l_float32 sd;
670 l_float32 *tab; /* of 2^16 square roots */
671 PIX *pixsd, *pixd;
672
673 PROCNAME("pixSauvolaGetThreshold");
674
675 if (ppixsd) *ppixsd = NULL;
676 if (!pixm || pixGetDepth(pixm) != 8)
677 return (PIX *)ERROR_PTR("pixs undefined or not 8 bpp", procName, NULL);
678 if (!pixms || pixGetDepth(pixms) != 32)
679 return (PIX *)ERROR_PTR("pixms undefined or not 32 bpp",
680 procName, NULL);
681 if (factor < 0.0)
682 return (PIX *)ERROR_PTR("factor must be >= 0", procName, NULL);
683
684 /* Only make a table of 2^16 square roots if there
685 * are enough pixels to justify it. */
686 pixGetDimensions(pixm, &w, &h, NULL);
687 usetab = (w * h > 100000) ? 1 : 0;
688 if (usetab) {
689 tabsize = 1 << 16;
690 tab = (l_float32 *)CALLOC(tabsize, sizeof(l_float32));
691 for (i = 0; i < tabsize; i++)
692 tab[i] = (l_float32)sqrt(i);
693 }
694
695 pixd = pixCreate(w, h, 8);
696 if (ppixsd) {
697 pixsd = pixCreate(w, h, 8);
698 *ppixsd = pixsd;
699 }
700 datam = pixGetData(pixm);
701 datams = pixGetData(pixms);
702 if (ppixsd) datasd = pixGetData(pixsd);
703 datad = pixGetData(pixd);
704 wplm = pixGetWpl(pixm);
705 wplms = pixGetWpl(pixms);
706 if (ppixsd) wplsd = pixGetWpl(pixsd);
707 wpld = pixGetWpl(pixd);
708 for (i = 0; i < h; i++) {
709 linem = datam + i * wplm;
710 linems = datams + i * wplms;
711 if (ppixsd) linesd = datasd + i * wplsd;
712 lined = datad + i * wpld;
713 for (j = 0; j < w; j++) {
714 mv = GET_DATA_BYTE(linem, j);
715 ms = linems[j];
716 var = ms - mv * mv;
717 if (usetab)
718 sd = tab[var];
719 else
720 sd = (l_float32)sqrt(var);
721 if (ppixsd) SET_DATA_BYTE(linesd, j, (l_int32)sd);
722 thresh = (l_int32)(mv * (1.0 - factor * (1.0 - sd / 128.)));
723 SET_DATA_BYTE(lined, j, thresh);
724 }
725 }
726
727 if (usetab) FREE(tab);
728 return pixd;
729 }
730
731
732 /*!
733 * pixApplyLocalThreshold()
734 *
735 * Input: pixs (8 bpp grayscale)
736 * pixth (8 bpp array of local thresholds)
737 * redfactor ( ... )
738 * Return: pixd (1 bpp, thresholded image), or null on error
739 */
740 PIX *
pixApplyLocalThreshold(PIX * pixs,PIX * pixth,l_int32 redfactor)741 pixApplyLocalThreshold(PIX *pixs,
742 PIX *pixth,
743 l_int32 redfactor)
744 {
745 l_int32 i, j, w, h, wpls, wplt, wpld, vals, valt;
746 l_uint32 *datas, *datat, *datad, *lines, *linet, *lined;
747 PIX *pixd;
748
749 PROCNAME("pixApplyLocalThreshold");
750
751 if (!pixs || pixGetDepth(pixs) != 8)
752 return (PIX *)ERROR_PTR("pixs undefined or not 8 bpp", procName, NULL);
753 if (!pixth || pixGetDepth(pixth) != 8)
754 return (PIX *)ERROR_PTR("pixth undefined or not 8 bpp", procName, NULL);
755
756 pixGetDimensions(pixs, &w, &h, NULL);
757 pixd = pixCreate(w, h, 1);
758 datas = pixGetData(pixs);
759 datat = pixGetData(pixth);
760 datad = pixGetData(pixd);
761 wpls = pixGetWpl(pixs);
762 wplt = pixGetWpl(pixth);
763 wpld = pixGetWpl(pixd);
764 for (i = 0; i < h; i++) {
765 lines = datas + i * wpls;
766 linet = datat + i * wplt;
767 lined = datad + i * wpld;
768 for (j = 0; j < w; j++) {
769 vals = GET_DATA_BYTE(lines, j);
770 valt = GET_DATA_BYTE(linet, j);
771 if (vals < valt)
772 SET_DATA_BIT(lined, j);
773 }
774 }
775
776 return pixd;
777 }
778
779
780