• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*====================================================================*
2  -  Copyright (C) 2001 Leptonica.  All rights reserved.
3  -  This software is distributed in the hope that it will be
4  -  useful, but with NO WARRANTY OF ANY KIND.
5  -  No author or distributor accepts responsibility to anyone for the
6  -  consequences of using this software, or for whether it serves any
7  -  particular purpose or works at all, unless he or she says so in
8  -  writing.  Everyone is granted permission to copy, modify and
9  -  redistribute this source code, for commercial or non-commercial
10  -  purposes, with the following restrictions: (1) the origin of this
11  -  source code must not be misrepresented; (2) modified versions must
12  -  be plainly marked as such; and (3) this notice may not be removed
13  -  or altered from any source or modified source distribution.
14  *====================================================================*/
15 
16 /*
17  *  binarize.c
18  *
19  *  ===================================================================
20  *  Image binarization algorithms are found in:
21  *    grayquant.c:   standard, simple, general grayscale quantization
22  *    adaptmap.c:    local adaptive; mostly gray-to-gray in preparation
23  *                   for binarization
24  *    binarize.c:    special binarization methods, locally adaptive.
25  *  ===================================================================
26  *
27  *      Adaptive Otsu-based thresholding
28  *          l_int32    pixOtsuAdaptiveThreshold()       8 bpp
29  *
30  *      Otsu thresholding on adaptive background normalization
31  *          PIX       *pixOtsuThreshOnBackgroundNorm()  8 bpp
32  *
33  *      Masking and Otsu estimate on adaptive background normalization
34  *          PIX       *pixMaskedThreshOnBackgroundNorm()  8 bpp
35  *
36  *      Sauvola local thresholding
37  *          l_int32    pixSauvolaBinarizeTiled()
38  *          l_int32    pixSauvolaBinarize()
39  *          PIX       *pixSauvolaGetThreshold()
40  *          PIX       *pixApplyLocalThreshold();
41  *
42  *  Notes:
43  *      (1) pixOtsuAdaptiveThreshold() computes a global threshold over each
44  *          tile and performs the threshold operation, resulting in a
45  *          binary image for each tile.  These are stitched into the
46  *          final result.
47  *      (2) pixOtsuThreshOnBackgroundNorm() and
48  *          pixMaskedThreshOnBackgroundNorm() are binarization functions
49  *          that use background normalization with other techniques.
50  *      (3) Sauvola binarization computes a local threshold based on
51  *          the local average and square average.  It takes two constants:
52  *          the window size for the measurment at each pixel and a
53  *          parameter that determines the amount of normalized local
54  *          standard deviation to subtract from the local average value.
55  */
56 
57 #include <stdio.h>
58 #include <stdlib.h>
59 #include <math.h>
60 #include "allheaders.h"
61 
62 
63 /*------------------------------------------------------------------*
64  *                 Adaptive Otsu-based thresholding                 *
65  *------------------------------------------------------------------*/
66 /*!
67  *  pixOtsuAdaptiveThreshold()
68  *
69  *      Input:  pixs (8 bpp)
70  *              sx, sy (desired tile dimensions; actual size may vary)
71  *              smoothx, smoothy (half-width of convolution kernel applied to
72  *                                threshold array: use 0 for no smoothing)
73  *              scorefract (fraction of the max Otsu score; typ. 0.1)
74  *              &pixth (<optional return> array of threshold values
75  *                      found for each tile)
76  *              &pixd (<optional return> thresholded input pixs, based on
77  *                     the threshold array)
78  *      Return: 0 if OK, 1 on error
79  *
80  *  Notes:
81  *      (1) The Otsu method finds a single global threshold for an image.
82  *          This function allows a locally adapted threshold to be
83  *          found for each tile into which the image is broken up.
84  *      (2) The array of threshold values, one for each tile, constitutes
85  *          a highly downscaled image.  This array is optionally
86  *          smoothed using a convolution.  The full width and height of the
87  *          convolution kernel are (2 * @smoothx + 1) and (2 * @smoothy + 1).
88  *      (3) To get a single global threshold for the entire image, use
89  *          input values of @sx and @sy that are larger than the image.
90  *          For this situation, the smoothing parameters are ignored.
91  *      (4) The scorefract is the fraction of the maximum Otsu score, which
92  *          is used to determine the range over which the histogram minimum
93  *          is searched.  See numaSplitDistribution() for details on the
94  *          underlying method of choosing a threshold.
95  *      (5) This uses a modified version of the Otsu criterion for
96  *          splitting the distribution of pixels in each tile into a
97  *          fg and bg part.  The modification consists of searching for
98  *          a minimum in the histogram over a range of pixel values where
99  *          the Otsu score is within a defined fraction, @scorefract,
100  *          of the max score.
101  */
102 l_int32
pixOtsuAdaptiveThreshold(PIX * pixs,l_int32 sx,l_int32 sy,l_int32 smoothx,l_int32 smoothy,l_float32 scorefract,PIX ** ppixth,PIX ** ppixd)103 pixOtsuAdaptiveThreshold(PIX       *pixs,
104                          l_int32    sx,
105                          l_int32    sy,
106                          l_int32    smoothx,
107                          l_int32    smoothy,
108                          l_float32  scorefract,
109                          PIX      **ppixth,
110                          PIX      **ppixd)
111 {
112 l_int32     w, h, d, nx, ny, i, j, thresh;
113 l_uint32    val;
114 PIX        *pixt, *pixb, *pixthresh, *pixth, *pixd;
115 PIXTILING  *pt;
116 
117     PROCNAME("pixOtsuAdaptiveThreshold");
118 
119     if (!pixs)
120         return ERROR_INT("pixs not defined", procName, 1);
121     pixGetDimensions(pixs, &w, &h, &d);
122     if (d != 8)
123         return ERROR_INT("pixs not 8 bpp", procName, 1);
124     if (sx < 16 || sy < 16)
125         return ERROR_INT("sx and sy must be >= 16", procName, 1);
126     if (!ppixth && !ppixd)
127         return ERROR_INT("neither &pixth nor &pixd defined", procName, 1);
128 
129         /* Compute the threshold array for the tiles */
130     nx = L_MAX(1, w / sx);
131     ny = L_MAX(1, h / sy);
132     smoothx = L_MIN(smoothx, (nx - 1) / 2);
133     smoothy = L_MIN(smoothy, (ny - 1) / 2);
134     pt = pixTilingCreate(pixs, nx, ny, 0, 0, 0, 0);
135     pixthresh = pixCreate(nx, ny, 8);
136     for (i = 0; i < ny; i++) {
137         for (j = 0; j < nx; j++) {
138             pixt = pixTilingGetTile(pt, i, j);
139             pixSplitDistributionFgBg(pixt, scorefract, 1, &thresh,
140                                      NULL, NULL, 0);
141             pixSetPixel(pixthresh, j, i, thresh);
142             pixDestroy(&pixt);
143         }
144     }
145 
146         /* Optionally smooth the threshold array */
147     if (smoothx > 0 || smoothy > 0)
148         pixth = pixBlockconv(pixthresh, smoothx, smoothy);
149     else
150         pixth = pixClone(pixthresh);
151     pixDestroy(&pixthresh);
152 
153         /* Optionally apply the threshold array to binarize pixs */
154     if (ppixd) {
155         pixd = pixCreate(w, h, 1);
156         for (i = 0; i < ny; i++) {
157             for (j = 0; j < nx; j++) {
158                 pixt = pixTilingGetTile(pt, i, j);
159                 pixGetPixel(pixth, j, i, &val);
160                 pixb = pixThresholdToBinary(pixt, val);
161                 pixTilingPaintTile(pixd, i, j, pixb, pt);
162                 pixDestroy(&pixt);
163                 pixDestroy(&pixb);
164             }
165         }
166         *ppixd = pixd;
167     }
168 
169     if (ppixth)
170         *ppixth = pixth;
171     else
172         pixDestroy(&pixth);
173 
174     pixTilingDestroy(&pt);
175     return 0;
176 }
177 
178 
179 
180 /*------------------------------------------------------------------*
181  *      Otsu thresholding on adaptive background normalization      *
182  *------------------------------------------------------------------*/
183 /*!
184  *  pixOtsuThreshOnBackgroundNorm()
185  *
186  *      Input:  pixs (8 bpp grayscale or 32 bpp rgb)
187  *              pixim (<optional> 1 bpp 'image' mask; can be null)
188  *              sx, sy (tile size in pixels)
189  *              thresh (threshold for determining foreground)
190  *              mincount (min threshold on counts in a tile)
191  *              bgval (target bg val; typ. > 128)
192  *              smoothx (half-width of block convolution kernel width)
193  *              smoothy (half-width of block convolution kernel height)
194  *              scorefract (fraction of the max Otsu score; typ. 0.1)
195  *              &thresh (<optional return> threshold value that was
196  *                       used on the normalized image)
197  *      Return: pixd (1 bpp thresholded image), or null on error
198  *
199  *  Notes:
200  *      (1) This does background normalization followed by Otsu
201  *          thresholding.  Otsu binarization attempts to split the
202  *          image into two roughly equal sets of pixels, and it does
203  *          a very poor job when there are large amounts of dark
204  *          background.  By doing a background normalization first,
205  *          to get the background near 255, we remove this problem.
206  *          Then we use a modified Otsu to estimate the best global
207  *          threshold on the normalized image.
208  *      (2) See pixBackgroundNorm() for meaning and typical values
209  *          of input parameters.  For a start, you can try:
210  *            sx, sy = 10, 15
211  *            thresh = 100
212  *            mincount = 50
213  *            bgval = 255
214  *            smoothx, smoothy = 2
215  */
216 PIX *
pixOtsuThreshOnBackgroundNorm(PIX * pixs,PIX * pixim,l_int32 sx,l_int32 sy,l_int32 thresh,l_int32 mincount,l_int32 bgval,l_int32 smoothx,l_int32 smoothy,l_float32 scorefract,l_int32 * pthresh)217 pixOtsuThreshOnBackgroundNorm(PIX       *pixs,
218                               PIX       *pixim,
219                               l_int32    sx,
220                               l_int32    sy,
221                               l_int32    thresh,
222                               l_int32    mincount,
223                               l_int32    bgval,
224                               l_int32    smoothx,
225                               l_int32    smoothy,
226                               l_float32  scorefract,
227                               l_int32   *pthresh)
228 {
229 l_int32   w, h, d;
230 l_uint32  val;
231 PIX      *pixn, *pixt, *pixd;
232 
233     PROCNAME("pixOtsuThreshOnBackgroundNorm");
234 
235     if (pthresh) *pthresh = 0;
236     if (!pixs)
237         return (PIX *)ERROR_PTR("pixs not defined", procName, NULL);
238     pixGetDimensions(pixs, &w, &h, &d);
239     if (d != 8 && d != 32)
240         return (PIX *)ERROR_PTR("pixs not 8 or 32 bpp", procName, NULL);
241     if (sx < 4 || sy < 4)
242         return (PIX *)ERROR_PTR("sx and sy must be >= 4", procName, NULL);
243     if (mincount > sx * sy) {
244         L_WARNING("mincount too large for tile size", procName);
245         mincount = (sx * sy) / 3;
246     }
247 
248     pixn = pixBackgroundNorm(pixs, pixim, NULL, sx, sy, thresh,
249                              mincount, bgval, smoothx, smoothy);
250     if (!pixn)
251         return (PIX *)ERROR_PTR("pixn not made", procName, NULL);
252 
253         /* Just use 1 tile for a global threshold, which is stored
254          * as a single pixel in pixt. */
255     pixOtsuAdaptiveThreshold(pixn, w, h, 0, 0, scorefract, &pixt, &pixd);
256     pixDestroy(&pixn);
257 
258     if (pixt && pthresh) {
259         pixGetPixel(pixt, 0, 0, &val);
260         *pthresh = val;
261     }
262     pixDestroy(&pixt);
263 
264     if (!pixd)
265         return (PIX *)ERROR_PTR("pixd not made", procName, NULL);
266     else
267         return pixd;
268 }
269 
270 
271 
272 /*----------------------------------------------------------------------*
273  *    Masking and Otsu estimate on adaptive background normalization    *
274  *----------------------------------------------------------------------*/
275 /*!
276  *  pixMaskedThreshOnBackgroundNorm()
277  *
278  *      Input:  pixs (8 bpp grayscale or 32 bpp rgb)
279  *              pixim (<optional> 1 bpp 'image' mask; can be null)
280  *              sx, sy (tile size in pixels)
281  *              thresh (threshold for determining foreground)
282  *              mincount (min threshold on counts in a tile)
283  *              smoothx (half-width of block convolution kernel width)
284  *              smoothy (half-width of block convolution kernel height)
285  *              scorefract (fraction of the max Otsu score; typ. ~ 0.1)
286  *              &thresh (<optional return> threshold value that was
287  *                       used on the normalized image)
288  *      Return: pixd (1 bpp thresholded image), or null on error
289  *
290  *  Notes:
291  *      (1) This begins with a standard background normalization.
292  *          Additionally, there is a flexible background norm, that
293  *          will adapt to a rapidly varying background, and this
294  *          puts white pixels in the background near regions with
295  *          significant foreground.  The white pixels are turned into
296  *          a 1 bpp selection mask by binarization followed by dilation.
297  *          Otsu thresholding is performed on the input image to get an
298  *          estimate of the threshold in the non-mask regions.
299  *          The background normalized image is thresholded with two
300  *          different values, and the result is combined using
301  *          the selection mask.
302  *      (2) Note that the numbers 255 (for bgval target) and 190 (for
303  *          thresholding on pixn) are tied together, and explicitly
304  *          defined in this function.
305  *      (3) See pixBackgroundNorm() for meaning and typical values
306  *          of input parameters.  For a start, you can try:
307  *            sx, sy = 10, 15
308  *            thresh = 100
309  *            mincount = 50
310  *            smoothx, smoothy = 2
311  */
312 PIX *
pixMaskedThreshOnBackgroundNorm(PIX * pixs,PIX * pixim,l_int32 sx,l_int32 sy,l_int32 thresh,l_int32 mincount,l_int32 smoothx,l_int32 smoothy,l_float32 scorefract,l_int32 * pthresh)313 pixMaskedThreshOnBackgroundNorm(PIX       *pixs,
314                                 PIX       *pixim,
315                                 l_int32    sx,
316                                 l_int32    sy,
317                                 l_int32    thresh,
318                                 l_int32    mincount,
319                                 l_int32    smoothx,
320                                 l_int32    smoothy,
321                                 l_float32  scorefract,
322                                 l_int32   *pthresh)
323 {
324 l_int32   w, h, d;
325 l_uint32  val;
326 PIX      *pixn, *pixm, *pixd, *pixt1, *pixt2, *pixt3, *pixt4;
327 
328     PROCNAME("pixMaskedThreshOnBackgroundNorm");
329 
330     if (pthresh) *pthresh = 0;
331     if (!pixs)
332         return (PIX *)ERROR_PTR("pixs not defined", procName, NULL);
333     pixGetDimensions(pixs, &w, &h, &d);
334     if (d != 8 && d != 32)
335         return (PIX *)ERROR_PTR("pixs not 8 or 32 bpp", procName, NULL);
336     if (sx < 4 || sy < 4)
337         return (PIX *)ERROR_PTR("sx and sy must be >= 4", procName, NULL);
338     if (mincount > sx * sy) {
339         L_WARNING("mincount too large for tile size", procName);
340         mincount = (sx * sy) / 3;
341     }
342 
343         /* Standard background normalization */
344     pixn = pixBackgroundNorm(pixs, pixim, NULL, sx, sy, thresh,
345                              mincount, 255, smoothx, smoothy);
346     if (!pixn)
347         return (PIX *)ERROR_PTR("pixn not made", procName, NULL);
348 
349         /* Special background normalization for adaptation to quickly
350          * varying background.  Threshold on the very light parts,
351          * which tend to be near significant edges, and dilate to
352          * form a mask over regions that are typically text.  The
353          * dilation size is chosen to cover the text completely,
354          * except for very thick fonts. */
355     pixt1 = pixBackgroundNormFlex(pixs, 7, 7, 1, 1, 20);
356     pixt2 = pixThresholdToBinary(pixt1, 240);
357     pixInvert(pixt2, pixt2);
358     pixm = pixMorphSequence(pixt2, "d21.21", 0);
359     pixDestroy(&pixt1);
360     pixDestroy(&pixt2);
361 
362         /* Use Otsu to get a global threshold estimate for the image,
363          * which is stored as a single pixel in pixt3. */
364     pixOtsuAdaptiveThreshold(pixs, w, h, 0, 0, scorefract, &pixt3, NULL);
365     if (pixt3 && pthresh) {
366         pixGetPixel(pixt3, 0, 0, &val);
367         *pthresh = val;
368     }
369     pixDestroy(&pixt3);
370 
371         /* Threshold the background normalized images differentially,
372          * using a high value correlated with the background normalization
373          * for the part of the image under the mask (i.e., near the
374          * darker, thicker foreground), and a value that depends on the Otsu
375          * threshold for the rest of the image.  This gives a solid
376          * (high) thresholding for the foreground parts of the image,
377          * while allowing the background and light foreground to be
378          * reasonably well cleaned using a threshold adapted to the
379          * input image. */
380     pixd = pixThresholdToBinary(pixn, val + 30);  /* for bg and light fg */
381     pixt4 = pixThresholdToBinary(pixn, 190);  /* for heavier fg */
382     pixCombineMasked(pixd, pixt4, pixm);
383     pixDestroy(&pixt4);
384     pixDestroy(&pixm);
385     pixDestroy(&pixn);
386 
387     if (!pixd)
388         return (PIX *)ERROR_PTR("pixd not made", procName, NULL);
389     else
390         return pixd;
391 }
392 
393 
394 /*----------------------------------------------------------------------*
395  *                           Sauvola binarization                       *
396  *----------------------------------------------------------------------*/
397 /*!
398  *  pixSauvolaBinarizeTiled()
399  *
400  *      Input:  pixs (8 bpp grayscale, not cmapped)
401  *              whsize (window half-width for measuring local statistics)
402  *              factor (factor for reducing threshold due to variance; >= 0)
403  *              nx, ny (subdivision into tiles; >= 1)
404  *              &pixth (<optional return> Sauvola threshold values)
405  *              &pixd (<optional return> thresholded image)
406  *      Return: 0 if OK, 1 on error
407  *
408  *  Notes:
409  *      (1) The window width and height are 2 * @whsize + 1.  The minimum
410  *          value for @whsize is 2; typically it is >= 7..
411  *      (2) For nx == ny == 1, this defaults to pixSauvolaBinarize().
412  *      (3) Why a tiled version?
413  *          (a) Because the mean value accumulator is a uint32, overflow
414  *              can occur for an image with more than 16M pixels.
415  *          (b) The mean value accumulator array for 16M pixels is 64 MB.
416  *              The mean square accumulator array for 16M pixels is 128 MB.
417  *              Using tiles reduces the size of these arrays.
418  *          (c) Each tile can be processed independently, in parallel,
419  *              on a multicore processor.
420  *      (4) The Sauvola threshold is determined from the formula:
421  *              t = m * (1 - k * (1 - s / 128))
422  *          See pixSauvolaBinarize() for details.
423  */
424 l_int32
pixSauvolaBinarizeTiled(PIX * pixs,l_int32 whsize,l_float32 factor,l_int32 nx,l_int32 ny,PIX ** ppixth,PIX ** ppixd)425 pixSauvolaBinarizeTiled(PIX       *pixs,
426                         l_int32    whsize,
427                         l_float32  factor,
428                         l_int32    nx,
429                         l_int32    ny,
430                         PIX      **ppixth,
431                         PIX      **ppixd)
432 {
433 l_int32     i, j, w, h, xrat, yrat;
434 PIX        *pixth, *pixd, *tileth, *tiled, *pixt;
435 PIX       **ptileth, **ptiled;
436 PIXTILING  *pt;
437 
438     PROCNAME("pixSauvolaBinarizeTiled");
439 
440     if (!ppixth && !ppixd)
441         return ERROR_INT("no outputs", procName, 1);
442     if (ppixth) *ppixth = NULL;
443     if (ppixd) *ppixd = NULL;
444     if (!pixs || pixGetDepth(pixs) != 8)
445         return ERROR_INT("pixs undefined or not 8 bpp", procName, 1);
446     if (pixGetColormap(pixs))
447         return ERROR_INT("pixs is cmapped", procName, 1);
448     pixGetDimensions(pixs, &w, &h, NULL);
449     if (whsize < 2)
450         return ERROR_INT("whsize must be >= 2", procName, 1);
451     if (w < 2 * whsize + 3 || h < 2 * whsize + 3)
452         return ERROR_INT("whsize too large for image", procName, 1);
453     if (factor < 0.0)
454         return ERROR_INT("factor must be >= 0", procName, 1);
455 
456     if (nx <= 1 && ny <= 1)
457         return pixSauvolaBinarize(pixs, whsize, factor, 1, NULL, NULL,
458                                   ppixth, ppixd);
459 
460         /* Test to see if the tiles are too small.  The required
461          * condition is that the tile dimensions must be at least
462          * (whsize + 2) x (whsize + 2).  */
463     xrat = w / nx;
464     yrat = h / ny;
465     if (xrat < whsize + 2) {
466         nx = w / (whsize + 2);
467         L_WARNING_INT("tile width too small; nx reduced to %d", procName, nx);
468     }
469     if (yrat < whsize + 2) {
470         ny = h / (whsize + 2);
471         L_WARNING_INT("tile height too small; ny reduced to %d", procName, ny);
472     }
473     if (nx <= 1 && ny <= 1)
474         return pixSauvolaBinarize(pixs, whsize, factor, 1, NULL, NULL,
475                                   ppixth, ppixd);
476 
477         /* We can use pixtiling for painting both outputs, if requested */
478     if (ppixth) {
479         pixth = pixCreateNoInit(w, h, 8);
480         *ppixth = pixth;
481     }
482     if (ppixd) {
483         pixd = pixCreateNoInit(w, h, 1);
484         *ppixd = pixd;
485     }
486     pt = pixTilingCreate(pixs, nx, ny, 0, 0, whsize + 1, whsize + 1);
487     pixTilingNoStripOnPaint(pt);  /* pixSauvolaBinarize() does the stripping */
488 
489     for (i = 0; i < ny; i++) {
490         for (j = 0; j < nx; j++) {
491             pixt = pixTilingGetTile(pt, i, j);
492             ptileth = (ppixth) ? &tileth : NULL;
493             ptiled = (ppixd) ? &tiled : NULL;
494             pixSauvolaBinarize(pixt, whsize, factor, 0, NULL, NULL,
495                                ptileth, ptiled);
496             if (ppixth) {  /* do not strip */
497                 pixTilingPaintTile(pixth, i, j, tileth, pt);
498                 pixDestroy(&tileth);
499             }
500             if (ppixd) {
501                 pixTilingPaintTile(pixd, i, j, tiled, pt);
502                 pixDestroy(&tiled);
503             }
504             pixDestroy(&pixt);
505         }
506     }
507 
508     pixTilingDestroy(&pt);
509     return 0;
510 }
511 
512 
513 /*!
514  *  pixSauvolaBinarize()
515  *
516  *      Input:  pixs (8 bpp grayscale)
517  *              whsize (window half-width for measuring local statistics)
518  *              factor (factor for reducing threshold due to variance; >= 0)
519  *              addborder (1 to add border of width (@whsize + 1) on all sides)
520  *              &pixm (<optional return> local mean values)
521  *              &pixsd (<optional return> local standard deviation values)
522  *              &pixth (<optional return> threshold values)
523  *              &pixd (<optional return> thresholded image)
524  *      Return: 0 if OK, 1 on error
525  *
526  *  Notes:
527  *      (1) The window width and height are 2 * @whsize + 1.  The minimum
528  *          value for @whsize is 2; typically it is >= 7..
529  *      (2) The local statistics, measured over the window, are the
530  *          average and standard deviation.
531  *      (3) The measurements of the mean and standard deviation are
532  *          performed inside a border of (@whsize + 1) pixels.  If pixs does
533  *          not have these added border pixels, use @addborder = 1 to add
534  *          it here; otherwise use @addborder = 0.
535  *      (4) The Sauvola threshold is determined from the formula:
536  *            t = m * (1 - k * (1 - s / 128))
537  *          where:
538  *            t = local threshold
539  *            m = local mean
540  *            k = @factor (>= 0)   [ typ. 0.35 ]
541  *            s = local standard deviation, which is maximized at
542  *                127.5 when half the samples are 0 and half are 255.
543  *      (5) The basic idea of Niblack and Sauvola binarization is that
544  *          the local threshold should be less than the median value,
545  *          and the larger the variance, the closer to the median
546  *          it should be chosen.  Typical values for k are between
547  *          0.2 and 0.5.
548  */
549 l_int32
pixSauvolaBinarize(PIX * pixs,l_int32 whsize,l_float32 factor,l_int32 addborder,PIX ** ppixm,PIX ** ppixsd,PIX ** ppixth,PIX ** ppixd)550 pixSauvolaBinarize(PIX       *pixs,
551                    l_int32    whsize,
552                    l_float32  factor,
553                    l_int32    addborder,
554                    PIX      **ppixm,
555                    PIX      **ppixsd,
556                    PIX      **ppixth,
557                    PIX      **ppixd)
558 {
559 l_int32  w, h;
560 PIX     *pixg, *pixsc, *pixm, *pixms, *pixth, *pixd;
561 
562     PROCNAME("pixSauvolaBinarize");
563 
564 
565     if (!ppixm && !ppixsd && !ppixth && !ppixd)
566         return ERROR_INT("no outputs", procName, 1);
567     if (ppixm) *ppixm = NULL;
568     if (ppixsd) *ppixsd = NULL;
569     if (ppixth) *ppixth = NULL;
570     if (ppixd) *ppixd = NULL;
571     if (!pixs || pixGetDepth(pixs) != 8)
572         return ERROR_INT("pixs undefined or not 8 bpp", procName, 1);
573     if (pixGetColormap(pixs))
574         return ERROR_INT("pixs is cmapped", procName, 1);
575     pixGetDimensions(pixs, &w, &h, NULL);
576     if (whsize < 2)
577         return ERROR_INT("whsize must be >= 2", procName, 1);
578     if (w < 2 * whsize + 3 || h < 2 * whsize + 3)
579         return ERROR_INT("whsize too large for image", procName, 1);
580     if (factor < 0.0)
581         return ERROR_INT("factor must be >= 0", procName, 1);
582 
583     if (addborder) {
584         pixg = pixAddMirroredBorder(pixs, whsize + 1, whsize + 1,
585                                     whsize + 1, whsize + 1);
586         pixsc = pixClone(pixs);
587     }
588     else {
589         pixg = pixClone(pixs);
590         pixsc = pixRemoveBorder(pixs, whsize + 1);
591     }
592     if (!pixg || !pixsc)
593         return ERROR_INT("pixg and pixsc not made", procName, 1);
594 
595         /* All these functions strip off the border pixels. */
596     if (ppixm || ppixth || ppixd)
597         pixm = pixWindowedMean(pixg, whsize, whsize, 1);
598     if (ppixsd || ppixth || ppixd)
599         pixms = pixWindowedMeanSquare(pixg, whsize);
600     if (ppixth || ppixd)
601         pixth = pixSauvolaGetThreshold(pixm, pixms, factor, ppixsd);
602     if (ppixd)
603         pixd = pixApplyLocalThreshold(pixsc, pixth, 1);
604 
605     if (ppixm)
606         *ppixm = pixm;
607     else
608         pixDestroy(&pixm);
609     pixDestroy(&pixms);
610     if (ppixth)
611         *ppixth = pixth;
612     else
613         pixDestroy(&pixth);
614     if (ppixd)
615         *ppixd = pixd;
616     else
617         pixDestroy(&pixd);
618     pixDestroy(&pixg);
619     pixDestroy(&pixsc);
620     return 0;
621 }
622 
623 
624 /*!
625  *  pixSauvolaGetThreshold()
626  *
627  *      Input:  pixm (8 bpp grayscale)
628  *              pixms (32 bpp)
629  *              factor (factor for reducing threshold due to variance; >= 0)
630  *              &pixsd (<optional return> local standard deviation)
631  *      Return: pixd (8 bpp, sauvola threshold values), or null on error
632  *
633  *  Notes:
634  *      (1) The Sauvola threshold is determined from the formula:
635  *            t = m * (1 - k * (1 - s / 128))
636  *          where:
637  *            t = local threshold
638  *            m = local mean
639  *            k = @factor (>= 0)   [ typ. 0.35 ]
640  *            s = local standard deviation, which is maximized at
641  *                127.5 when half the samples are 0 and half are 255.
642  *      (2) See pixSauvolaBinarize() for other details.
643  *      (3) Important definitions and relations for computing averages:
644  *            v == pixel value
645  *            E(p) == expected value of p == average of p over some pixel set
646  *            S(v) == square of v == v * v
647  *            mv == E(v) == expected pixel value == mean value
648  *            ms == E(S(v)) == expected square of pixel values
649  *               == mean square value
650  *            var == variance == expected square of deviation from mean
651  *                == E(S(v - mv)) = E(S(v) - 2 * S(v * mv) + S(mv))
652  *                                = E(S(v)) - S(mv)
653  *                                = ms - mv * mv
654  *            s == standard deviation = sqrt(var)
655  *          So for evaluating the standard deviation in the Sauvola
656  *          threshold, we take
657  *            s = sqrt(ms - mv * mv)
658  */
659 PIX *
pixSauvolaGetThreshold(PIX * pixm,PIX * pixms,l_float32 factor,PIX ** ppixsd)660 pixSauvolaGetThreshold(PIX       *pixm,
661                        PIX       *pixms,
662                        l_float32  factor,
663                        PIX      **ppixsd)
664 {
665 l_int32     i, j, w, h, tabsize, wplm, wplms, wplsd, wpld, usetab;
666 l_int32     mv, ms, var, thresh;
667 l_uint32   *datam, *datams, *datasd, *datad;
668 l_uint32   *linem, *linems, *linesd, *lined;
669 l_float32   sd;
670 l_float32  *tab;  /* of 2^16 square roots */
671 PIX        *pixsd, *pixd;
672 
673     PROCNAME("pixSauvolaGetThreshold");
674 
675     if (ppixsd) *ppixsd = NULL;
676     if (!pixm || pixGetDepth(pixm) != 8)
677         return (PIX *)ERROR_PTR("pixs undefined or not 8 bpp", procName, NULL);
678     if (!pixms || pixGetDepth(pixms) != 32)
679         return (PIX *)ERROR_PTR("pixms undefined or not 32 bpp",
680                                 procName, NULL);
681     if (factor < 0.0)
682         return (PIX *)ERROR_PTR("factor must be >= 0", procName, NULL);
683 
684         /* Only make a table of 2^16 square roots if there
685          * are enough pixels to justify it. */
686     pixGetDimensions(pixm, &w, &h, NULL);
687     usetab = (w * h > 100000) ? 1 : 0;
688     if (usetab) {
689         tabsize = 1 << 16;
690         tab = (l_float32 *)CALLOC(tabsize, sizeof(l_float32));
691         for (i = 0; i < tabsize; i++)
692             tab[i] = (l_float32)sqrt(i);
693     }
694 
695     pixd = pixCreate(w, h, 8);
696     if (ppixsd) {
697         pixsd = pixCreate(w, h, 8);
698         *ppixsd = pixsd;
699     }
700     datam = pixGetData(pixm);
701     datams = pixGetData(pixms);
702     if (ppixsd) datasd = pixGetData(pixsd);
703     datad = pixGetData(pixd);
704     wplm = pixGetWpl(pixm);
705     wplms = pixGetWpl(pixms);
706     if (ppixsd) wplsd = pixGetWpl(pixsd);
707     wpld = pixGetWpl(pixd);
708     for (i = 0; i < h; i++) {
709         linem = datam + i * wplm;
710         linems = datams + i * wplms;
711         if (ppixsd) linesd = datasd + i * wplsd;
712         lined = datad + i * wpld;
713         for (j = 0; j < w; j++) {
714             mv = GET_DATA_BYTE(linem, j);
715             ms = linems[j];
716             var = ms - mv * mv;
717             if (usetab)
718                 sd = tab[var];
719             else
720                 sd = (l_float32)sqrt(var);
721             if (ppixsd) SET_DATA_BYTE(linesd, j, (l_int32)sd);
722             thresh = (l_int32)(mv * (1.0 - factor * (1.0 - sd / 128.)));
723             SET_DATA_BYTE(lined, j, thresh);
724         }
725     }
726 
727     if (usetab) FREE(tab);
728     return pixd;
729 }
730 
731 
732 /*!
733  *  pixApplyLocalThreshold()
734  *
735  *      Input:  pixs (8 bpp grayscale)
736  *              pixth (8 bpp array of local thresholds)
737  *              redfactor ( ... )
738  *      Return: pixd (1 bpp, thresholded image), or null on error
739  */
740 PIX *
pixApplyLocalThreshold(PIX * pixs,PIX * pixth,l_int32 redfactor)741 pixApplyLocalThreshold(PIX     *pixs,
742                        PIX     *pixth,
743                        l_int32  redfactor)
744 {
745 l_int32    i, j, w, h, wpls, wplt, wpld, vals, valt;
746 l_uint32  *datas, *datat, *datad, *lines, *linet, *lined;
747 PIX       *pixd;
748 
749     PROCNAME("pixApplyLocalThreshold");
750 
751     if (!pixs || pixGetDepth(pixs) != 8)
752         return (PIX *)ERROR_PTR("pixs undefined or not 8 bpp", procName, NULL);
753     if (!pixth || pixGetDepth(pixth) != 8)
754         return (PIX *)ERROR_PTR("pixth undefined or not 8 bpp", procName, NULL);
755 
756     pixGetDimensions(pixs, &w, &h, NULL);
757     pixd = pixCreate(w, h, 1);
758     datas = pixGetData(pixs);
759     datat = pixGetData(pixth);
760     datad = pixGetData(pixd);
761     wpls = pixGetWpl(pixs);
762     wplt = pixGetWpl(pixth);
763     wpld = pixGetWpl(pixd);
764     for (i = 0; i < h; i++) {
765         lines = datas + i * wpls;
766         linet = datat + i * wplt;
767         lined = datad + i * wpld;
768         for (j = 0; j < w; j++) {
769             vals = GET_DATA_BYTE(lines, j);
770             valt = GET_DATA_BYTE(linet, j);
771             if (vals < valt)
772                 SET_DATA_BIT(lined, j);
773         }
774     }
775 
776     return pixd;
777 }
778 
779 
780