1 #/*====================================================================*
2 # - Copyright (C) 2001 Leptonica. All rights reserved.
3 # - This software is distributed in the hope that it will be
4 # - useful, but with NO WARRANTY OF ANY KIND.
5 # - No author or distributor accepts responsibility to anyone for the
6 # - consequences of using this software, or for whether it serves any
7 # - particular purpose or works at all, unless he or she says so in
8 # - writing. Everyone is granted permission to copy, modify and
9 # - redistribute this source code, for commercial or non-commercial
10 # - purposes, with the following restrictions: (1) the origin of this
11 # - source code must not be misrepresented; (2) modified versions must
12 # - be plainly marked as such; and (3) this notice may not be removed
13 # - or altered from any source or modified source distribution.
14 # *====================================================================*/
15
16 /*
17 * skew.c
18 *
19 * Simple top-level deskew interfaces
20 * PIX *pixDeskew()
21 * PIX *pixFindSkewAndDeskew()
22 *
23 * Simple top-level angle-finding interface
24 * l_int32 pixFindSkew()
25 *
26 * Basic angle-finding functions with all parameters
27 * l_int32 pixFindSkewSweep()
28 * l_int32 pixFindSkewSweepAndSearch()
29 * l_int32 pixFindSkewSweepAndSearchScore()
30 * l_int32 pixFindSkewSweepAndSearchScorePivot()
31 *
32 * Search over arbitrary range of angles in orthogonal directions
33 * l_int32 pixFindSkewOrthogonalRange()
34 *
35 * Differential square sum function for scoring
36 * l_int32 pixFindDifferentialSquareSum()
37 *
38 * Measures of variance of row sums
39 * l_int32 pixFindNormalizedSquareSum()
40 *
41 *
42 * ==============================================================
43 * Page skew detection
44 *
45 * Skew is determined by pixel profiles, which are computed
46 * as pixel sums along the raster line for each line in the
47 * image. By vertically shearing the image by a given angle,
48 * the sums can be computed quickly along the raster lines
49 * rather than along lines at that angle. The score is
50 * computed from these line sums by taking the square of
51 * the DIFFERENCE between adjacent line sums, summed over
52 * all lines. The skew angle is then found as the angle
53 * that maximizes the score. The actual computation for
54 * any sheared image is done in the function
55 * pixFindDifferentialSquareSum().
56 *
57 * The search for the angle that maximizes this score is
58 * most efficiently performed by first sweeping coarsely
59 * over angles, using a significantly reduced image (say, 4x
60 * reduction), to find the approximate maximum within a half
61 * degree or so, and then doing an interval-halving binary
62 * search at higher resolution to get the skew angle to
63 * within 1/20 degree or better.
64 *
65 * The differential signal is used (rather than just using
66 * that variance of line sums) because it rejects the
67 * background noise due to total number of black pixels,
68 * and has maximum contributions from the baselines and
69 * x-height lines of text when the textlines are aligned
70 * with the raster lines. It also works well in multicolumn
71 * pages where the textlines do not line up across columns.
72 *
73 * The method is fast, accurate to within an angle (in radians)
74 * of approximately the inverse width in pixels of the image,
75 * and will work on a surprisingly small amount of text data
76 * (just a couple of text lines). Consequently, it can
77 * also be used to find local skew if the skew were to vary
78 * significantly over the page. Local skew determination
79 * is not very important except for locating lines of
80 * handwritten text that may be mixed with printed text.
81 */
82
83 #include <stdio.h>
84 #include <stdlib.h>
85 #include <math.h>
86 #include "allheaders.h"
87
88 /* Default sweep angle parameters for pixFindSkew() */
89 static const l_float32 DEFAULT_SWEEP_RANGE = 5.; /* degrees */
90 static const l_float32 DEFAULT_SWEEP_DELTA = 1.; /* degrees */
91
92 /* Default final angle difference parameter for binary
93 * search in pixFindSkew(). The expected accuracy is
94 * not better than the inverse image width in pixels,
95 * say, 1/2000 radians, or about 0.03 degrees. */
96 static const l_float32 DEFAULT_MINBS_DELTA = 0.01; /* degrees */
97
98 /* Default scale factors for pixFindSkew() */
99 static const l_int32 DEFAULT_SWEEP_REDUCTION = 4; /* sweep part; 4 is good */
100 static const l_int32 DEFAULT_BS_REDUCTION = 2; /* binary search part */
101
102 /* Minimum angle for deskewing in pixDeskew() */
103 static const l_float32 MIN_DESKEW_ANGLE = 0.1; /* degree */
104
105 /* Minimum allowed confidence (ratio) for deskewing in pixDeskew() */
106 static const l_float32 MIN_ALLOWED_CONFIDENCE = 3.0;
107
108 /* Minimum allowed maxscore to give nonzero confidence */
109 static const l_int32 MIN_VALID_MAXSCORE = 10000;
110
111 /* Constant setting threshold for minimum allowed minscore
112 * to give nonzero confidence; multiply this constant by
113 * (height * width^2) */
114 static const l_float32 MINSCORE_THRESHOLD_CONSTANT = 0.000002;
115
116
117 #ifndef NO_CONSOLE_IO
118 #define DEBUG_PRINT_SCORES 0
119 #define DEBUG_PRINT_SWEEP 0
120 #define DEBUG_PRINT_BINARY 0
121 #define DEBUG_PRINT_ORTH 0
122 #define DEBUG_THRESHOLD 0
123 #define DEBUG_PLOT_SCORES 0
124 #endif /* ~NO_CONSOLE_IO */
125
126
127
128 /*----------------------------------------------------------------*
129 * Top-level interfaces *
130 *----------------------------------------------------------------*/
131 /*!
132 * pixDeskew()
133 *
134 * Input: pixs (1 bpp)
135 * redsearch (for binary search: reduction factor = 1, 2 or 4)
136 * Return: deskewed pix, or null on error
137 *
138 * Notes:
139 * (1) This is the most simple high level interface, for 1 bpp input.
140 * (2) It first finds the skew angle. If the angle is large enough,
141 * it returns a deskewed image; otherwise, it returns a clone.
142 */
143 PIX *
pixDeskew(PIX * pixs,l_int32 redsearch)144 pixDeskew(PIX *pixs,
145 l_int32 redsearch)
146 {
147 PROCNAME("pixDeskew");
148
149 if (!pixs)
150 return (PIX *)ERROR_PTR("pixs not defined", procName, NULL);
151 if (pixGetDepth(pixs) != 1)
152 return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, NULL);
153 if (redsearch != 1 && redsearch != 2 && redsearch != 4)
154 return (PIX *)ERROR_PTR("redsearch not in {1,2,4}", procName, NULL);
155
156 return pixFindSkewAndDeskew(pixs, redsearch, NULL, NULL);
157 }
158
159
160 /*!
161 * pixFindSkewAndDeskew()
162 *
163 * Input: pixs (1 bpp)
164 * redsearch (for binary search: reduction factor = 1, 2 or 4)
165 * &angle (<optional return> angle required to deskew,
166 * in degrees)
167 * &conf (<optional return> conf value is ratio max/min scores)
168 * Return: deskewed pix, or null on error
169 *
170 * Notes:
171 * (1) This first finds the skew angle. If the angle is large enough,
172 * it returns a deskewed image; otherwise, it returns a clone.
173 * (2) Use NULL for &angle and/or &conf if you don't want those values
174 * returned.
175 */
176 PIX *
pixFindSkewAndDeskew(PIX * pixs,l_int32 redsearch,l_float32 * pangle,l_float32 * pconf)177 pixFindSkewAndDeskew(PIX *pixs,
178 l_int32 redsearch,
179 l_float32 *pangle,
180 l_float32 *pconf)
181 {
182 l_int32 ret;
183 l_float32 angle, conf, deg2rad;
184 PIX *pixd;
185
186 PROCNAME("pixFindSkewAndDeskew");
187
188 if (!pixs)
189 return (PIX *)ERROR_PTR("pixs not defined", procName, NULL);
190 if (pixGetDepth(pixs) != 1)
191 return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, NULL);
192 if (redsearch != 1 && redsearch != 2 && redsearch != 4)
193 return (PIX *)ERROR_PTR("redsearch not in {1,2,4}", procName, NULL);
194
195 deg2rad = 3.1415926535 / 180.;
196 ret = pixFindSkewSweepAndSearch(pixs, &angle, &conf,
197 DEFAULT_SWEEP_REDUCTION, redsearch,
198 DEFAULT_SWEEP_RANGE, DEFAULT_SWEEP_DELTA,
199 DEFAULT_MINBS_DELTA);
200 if (pangle)
201 *pangle = angle;
202 if (pconf)
203 *pconf = conf;
204 if (ret)
205 return pixClone(pixs);
206
207 if (L_ABS(angle) < MIN_DESKEW_ANGLE || conf < MIN_ALLOWED_CONFIDENCE)
208 return pixClone(pixs);
209
210 if ((pixd = pixRotateShear(pixs, 0, 0, deg2rad * angle, L_BRING_IN_WHITE))
211 == NULL)
212 return pixClone(pixs);
213 else
214 return pixd;
215 }
216
217
218 /*!
219 * pixFindSkew()
220 *
221 * Input: pixs (1 bpp)
222 * &angle (<return> angle required to deskew, in degrees)
223 * &conf (<return> confidence value is ratio max/min scores)
224 * Return: 0 if OK, 1 on error or if angle measurment not valid
225 *
226 * Notes:
227 * (1) This is a simple high-level interface, that uses default
228 * values of the parameters for reasonable speed and accuracy.
229 * (2) The angle returned is the negative of the skew angle of
230 * the image. It is the angle required for deskew.
231 * Clockwise rotations are positive angles.
232 */
233 l_int32
pixFindSkew(PIX * pixs,l_float32 * pangle,l_float32 * pconf)234 pixFindSkew(PIX *pixs,
235 l_float32 *pangle,
236 l_float32 *pconf)
237 {
238 PROCNAME("pixFindSkew");
239
240 if (!pixs)
241 return ERROR_INT("pixs not defined", procName, 1);
242 if (pixGetDepth(pixs) != 1)
243 return ERROR_INT("pixs not 1 bpp", procName, 1);
244 if (!pangle)
245 return ERROR_INT("&angle not defined", procName, 1);
246 if (!pconf)
247 return ERROR_INT("&conf not defined", procName, 1);
248
249 return pixFindSkewSweepAndSearch(pixs, pangle, pconf,
250 DEFAULT_SWEEP_REDUCTION,
251 DEFAULT_BS_REDUCTION,
252 DEFAULT_SWEEP_RANGE,
253 DEFAULT_SWEEP_DELTA,
254 DEFAULT_MINBS_DELTA);
255 }
256
257
258 /*----------------------------------------------------------------*
259 * Basic angle-finding functions with all parameters *
260 *----------------------------------------------------------------*/
261 /*!
262 * pixFindSkewSweep()
263 *
264 * Input: pixs (1 bpp)
265 * &angle (<return> angle required to deskew, in degrees)
266 * reduction (factor = 1, 2, 4 or 8)
267 * sweeprange (half the full range; assumed about 0; in degrees)
268 * sweepdelta (angle increment of sweep; in degrees)
269 * Return: 0 if OK, 1 on error or if angle measurment not valid
270 *
271 * Notes:
272 * (1) This examines the 'score' for skew angles with equal intervals.
273 * (2) Caller must check the return value for validity of the result.
274 */
275 l_int32
pixFindSkewSweep(PIX * pixs,l_float32 * pangle,l_int32 reduction,l_float32 sweeprange,l_float32 sweepdelta)276 pixFindSkewSweep(PIX *pixs,
277 l_float32 *pangle,
278 l_int32 reduction,
279 l_float32 sweeprange,
280 l_float32 sweepdelta)
281 {
282 l_int32 ret, bzero, i, nangles;
283 l_float32 deg2rad, theta;
284 l_float32 sum, maxscore, maxangle;
285 NUMA *natheta, *nascore;
286 PIX *pix, *pixt;
287
288 PROCNAME("pixFindSkewSweep");
289
290 if (!pixs)
291 return ERROR_INT("pixs not defined", procName, 1);
292 if (pixGetDepth(pixs) != 1)
293 return ERROR_INT("pixs not 1 bpp", procName, 1);
294 if (!pangle)
295 return ERROR_INT("&angle not defined", procName, 1);
296 if (reduction != 1 && reduction != 2 && reduction != 4 && reduction != 8)
297 return ERROR_INT("reduction must be in {1,2,4,8}", procName, 1);
298
299 *pangle = 0.0; /* init */
300 deg2rad = 3.1415926535 / 180.;
301 ret = 0;
302
303 /* Generate reduced image, if requested */
304 if (reduction == 1)
305 pix = pixClone(pixs);
306 else if (reduction == 2)
307 pix = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0);
308 else if (reduction == 4)
309 pix = pixReduceRankBinaryCascade(pixs, 1, 1, 0, 0);
310 else /* reduction == 8 */
311 pix = pixReduceRankBinaryCascade(pixs, 1, 1, 2, 0);
312
313 pixZero(pix, &bzero);
314 if (bzero) {
315 pixDestroy(&pix);
316 return 1;
317 }
318
319 nangles = (l_int32)((2. * sweeprange) / sweepdelta + 1);
320 natheta = numaCreate(nangles);
321 nascore = numaCreate(nangles);
322 pixt = pixCreateTemplate(pix);
323
324 if (!pix || !pixt) {
325 ret = ERROR_INT("pix and pixt not both made", procName, 1);
326 goto cleanup;
327 }
328 if (!natheta || !nascore) {
329 ret = ERROR_INT("natheta and nascore not both made", procName, 1);
330 goto cleanup;
331 }
332
333 for (i = 0; i < nangles; i++) {
334 theta = -sweeprange + i * sweepdelta; /* degrees */
335
336 /* Shear pix about the UL corner and put the result in pixt */
337 pixVShearCorner(pixt, pix, deg2rad * theta, L_BRING_IN_WHITE);
338
339 /* Get score */
340 pixFindDifferentialSquareSum(pixt, &sum);
341
342 #if DEBUG_PRINT_SCORES
343 L_INFO_FLOAT2("sum(%7.2f) = %7.0f", procName, theta, sum);
344 #endif /* DEBUG_PRINT_SCORES */
345
346 /* Save the result in the output arrays */
347 numaAddNumber(nascore, sum);
348 numaAddNumber(natheta, theta);
349 }
350
351 /* Find the location of the maximum (i.e., the skew angle)
352 * by fitting the largest data point and its two neighbors
353 * to a quadratic, using lagrangian interpolation. */
354 numaFitMax(nascore, &maxscore, natheta, &maxangle);
355 *pangle = maxangle;
356
357 #if DEBUG_PRINT_SWEEP
358 L_INFO_FLOAT2(" From sweep: angle = %7.3f, score = %7.3f", procName,
359 maxangle, maxscore);
360 #endif /* DEBUG_PRINT_SWEEP */
361
362 #if DEBUG_PLOT_SCORES
363 /* Plot the result -- the scores versus rotation angle --
364 * using gnuplot with GPLOT_LINES (lines connecting data points).
365 * The GPLOT data structure is first created, with the
366 * appropriate data incorporated from the two input NUMAs,
367 * and then the function gplotMakeOutput() uses gnuplot to
368 * generate the output plot. This can be either a .png file
369 * or a .ps file, depending on whether you use GPLOT_PNG
370 * or GPLOT_PS. */
371 {GPLOT *gplot;
372 gplot = gplotCreate("sweep_output", GPLOT_PNG,
373 "Sweep. Variance of difference of ON pixels vs. angle",
374 "angle (deg)", "score");
375 gplotAddPlot(gplot, natheta, nascore, GPLOT_LINES, "plot1");
376 gplotAddPlot(gplot, natheta, nascore, GPLOT_POINTS, "plot2");
377 gplotMakeOutput(gplot);
378 gplotDestroy(&gplot);
379 }
380 #endif /* DEBUG_PLOT_SCORES */
381
382 cleanup:
383 pixDestroy(&pix);
384 pixDestroy(&pixt);
385 numaDestroy(&nascore);
386 numaDestroy(&natheta);
387 return 0;
388 }
389
390
391 /*!
392 * pixFindSkewSweepAndSearch()
393 *
394 * Input: pixs (1 bpp)
395 * &angle (<return> angle required to deskew; in degrees)
396 * &conf (<return> confidence given by ratio of max/min score)
397 * redsweep (sweep reduction factor = 1, 2, 4 or 8)
398 * redsearch (binary search reduction factor = 1, 2, 4 or 8;
399 * and must not exceed redsweep)
400 * sweeprange (half the full range, assumed about 0; in degrees)
401 * sweepdelta (angle increment of sweep; in degrees)
402 * minbsdelta (min binary search increment angle; in degrees)
403 * Return: 0 if OK, 1 on error or if angle measurment not valid
404 *
405 * Notes:
406 * (1) This finds the skew angle, doing first a sweep through a set
407 * of equal angles, and then doing a binary search until
408 * convergence.
409 * (2) Caller must check the return value for validity of the result.
410 * (3) In computing the differential line sum variance score, we sum
411 * the result over scanlines, but we always skip:
412 * - at least one scanline
413 * - not more than 10% of the image height
414 * - not more than 5% of the image width
415 * (4) See also notes in pixFindSkewSweepAndSearchScore()
416 */
417 l_int32
pixFindSkewSweepAndSearch(PIX * pixs,l_float32 * pangle,l_float32 * pconf,l_int32 redsweep,l_int32 redsearch,l_float32 sweeprange,l_float32 sweepdelta,l_float32 minbsdelta)418 pixFindSkewSweepAndSearch(PIX *pixs,
419 l_float32 *pangle,
420 l_float32 *pconf,
421 l_int32 redsweep,
422 l_int32 redsearch,
423 l_float32 sweeprange,
424 l_float32 sweepdelta,
425 l_float32 minbsdelta)
426 {
427 return pixFindSkewSweepAndSearchScore(pixs, pangle, pconf, NULL,
428 redsweep, redsearch, 0.0, sweeprange,
429 sweepdelta, minbsdelta);
430 }
431
432
433 /*!
434 * pixFindSkewSweepAndSearchScore()
435 *
436 * Input: pixs (1 bpp)
437 * &angle (<return> angle required to deskew; in degrees)
438 * &conf (<return> confidence given by ratio of max/min score)
439 * &endscore (<optional return> max score; use NULL to ignore)
440 * redsweep (sweep reduction factor = 1, 2, 4 or 8)
441 * redsearch (binary search reduction factor = 1, 2, 4 or 8;
442 * and must not exceed redsweep)
443 * sweepcenter (angle about which sweep is performed; in degrees)
444 * sweeprange (half the full range, taken about sweepcenter;
445 * in degrees)
446 * sweepdelta (angle increment of sweep; in degrees)
447 * minbsdelta (min binary search increment angle; in degrees)
448 * Return: 0 if OK, 1 on error or if angle measurment not valid
449 *
450 * Notes:
451 * (1) This finds the skew angle, doing first a sweep through a set
452 * of equal angles, and then doing a binary search until convergence.
453 * (2) There are two built-in constants that determine if the
454 * returned confidence is nonzero:
455 * - MIN_VALID_MAXSCORE (minimum allowed maxscore)
456 * - MINSCORE_THRESHOLD_CONSTANT (determines minimum allowed
457 * minscore, by multiplying by (height * width^2)
458 * If either of these conditions is not satisfied, the returned
459 * confidence value will be zero. The maxscore is optionally
460 * returned in this function to allow evaluation of the
461 * resulting angle by a method that is independent of the
462 * returned confidence value.
463 * (3) The larger the confidence value, the greater the probability
464 * that the proper alignment is given by the angle that maximizes
465 * variance. It should be compared to a threshold, which depends
466 * on the application. Values between 3.0 and 6.0 are common.
467 * (4) By default, the shear is about the UL corner.
468 */
469 l_int32
pixFindSkewSweepAndSearchScore(PIX * pixs,l_float32 * pangle,l_float32 * pconf,l_float32 * pendscore,l_int32 redsweep,l_int32 redsearch,l_float32 sweepcenter,l_float32 sweeprange,l_float32 sweepdelta,l_float32 minbsdelta)470 pixFindSkewSweepAndSearchScore(PIX *pixs,
471 l_float32 *pangle,
472 l_float32 *pconf,
473 l_float32 *pendscore,
474 l_int32 redsweep,
475 l_int32 redsearch,
476 l_float32 sweepcenter,
477 l_float32 sweeprange,
478 l_float32 sweepdelta,
479 l_float32 minbsdelta)
480 {
481 return pixFindSkewSweepAndSearchScorePivot(pixs, pangle, pconf, pendscore,
482 redsweep, redsearch, 0.0,
483 sweeprange, sweepdelta,
484 minbsdelta,
485 L_SHEAR_ABOUT_CORNER);
486 }
487
488
489 /*!
490 * pixFindSkewSweepAndSearchScorePivot()
491 *
492 * Input: pixs (1 bpp)
493 * &angle (<return> angle required to deskew; in degrees)
494 * &conf (<return> confidence given by ratio of max/min score)
495 * &endscore (<optional return> max score; use NULL to ignore)
496 * redsweep (sweep reduction factor = 1, 2, 4 or 8)
497 * redsearch (binary search reduction factor = 1, 2, 4 or 8;
498 * and must not exceed redsweep)
499 * sweepcenter (angle about which sweep is performed; in degrees)
500 * sweeprange (half the full range, taken about sweepcenter;
501 * in degrees)
502 * sweepdelta (angle increment of sweep; in degrees)
503 * minbsdelta (min binary search increment angle; in degrees)
504 * pivot (L_SHEAR_ABOUT_CORNER, L_SHEAR_ABOUT_CENTER)
505 * Return: 0 if OK, 1 on error or if angle measurment not valid
506 *
507 * Notes:
508 * (1) See notes in pixFindSkewSweepAndSearchScore().
509 * (2) This allows choice of shear pivoting from either the UL corner
510 * or the center. For small angles, the ability to discriminate
511 * angles is better with shearing from the UL corner. However,
512 * for large angles (say, greater than 20 degrees), it is better
513 * to shear about the center because a shear from the UL corner
514 * loses too much of the image.
515 */
516 l_int32
pixFindSkewSweepAndSearchScorePivot(PIX * pixs,l_float32 * pangle,l_float32 * pconf,l_float32 * pendscore,l_int32 redsweep,l_int32 redsearch,l_float32 sweepcenter,l_float32 sweeprange,l_float32 sweepdelta,l_float32 minbsdelta,l_int32 pivot)517 pixFindSkewSweepAndSearchScorePivot(PIX *pixs,
518 l_float32 *pangle,
519 l_float32 *pconf,
520 l_float32 *pendscore,
521 l_int32 redsweep,
522 l_int32 redsearch,
523 l_float32 sweepcenter,
524 l_float32 sweeprange,
525 l_float32 sweepdelta,
526 l_float32 minbsdelta,
527 l_int32 pivot)
528 {
529 l_int32 ret, bzero, i, nangles, n, ratio, maxindex, minloc;
530 l_int32 width, height;
531 l_float32 deg2rad, theta, delta;
532 l_float32 sum, maxscore, maxangle;
533 l_float32 centerangle, leftcenterangle, rightcenterangle;
534 l_float32 lefttemp, righttemp;
535 l_float32 bsearchscore[5];
536 l_float32 minscore, minthresh;
537 l_float32 rangeleft;
538 NUMA *natheta, *nascore;
539 PIX *pixsw, *pixsch, *pixt1, *pixt2;
540
541 PROCNAME("pixFindSkewSweepAndSearchScorePivot");
542
543 if (!pixs)
544 return ERROR_INT("pixs not defined", procName, 1);
545 if (pixGetDepth(pixs) != 1)
546 return ERROR_INT("pixs not 1 bpp", procName, 1);
547 if (!pangle)
548 return ERROR_INT("&angle not defined", procName, 1);
549 if (!pconf)
550 return ERROR_INT("&conf not defined", procName, 1);
551 if (redsweep != 1 && redsweep != 2 && redsweep != 4 && redsweep != 8)
552 return ERROR_INT("redsweep must be in {1,2,4,8}", procName, 1);
553 if (redsearch != 1 && redsearch != 2 && redsearch != 4 && redsearch != 8)
554 return ERROR_INT("redsearch must be in {1,2,4,8}", procName, 1);
555 if (redsearch > redsweep)
556 return ERROR_INT("redsearch must not exceed redsweep", procName, 1);
557 if (pivot != L_SHEAR_ABOUT_CORNER && pivot != L_SHEAR_ABOUT_CENTER)
558 return ERROR_INT("invalid pivot", procName, 1);
559
560 *pangle = 0.0;
561 *pconf = 0.0;
562 deg2rad = 3.1415926535 / 180.;
563 ret = 0;
564
565 /* Generate reduced image for binary search, if requested */
566 if (redsearch == 1)
567 pixsch = pixClone(pixs);
568 else if (redsearch == 2)
569 pixsch = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0);
570 else if (redsearch == 4)
571 pixsch = pixReduceRankBinaryCascade(pixs, 1, 1, 0, 0);
572 else /* redsearch == 8 */
573 pixsch = pixReduceRankBinaryCascade(pixs, 1, 1, 2, 0);
574
575 pixZero(pixsch, &bzero);
576 if (bzero) {
577 pixDestroy(&pixsch);
578 return 1;
579 }
580
581 /* Generate reduced image for sweep, if requested */
582 ratio = redsweep / redsearch;
583 if (ratio == 1)
584 pixsw = pixClone(pixsch);
585 else { /* ratio > 1 */
586 if (ratio == 2)
587 pixsw = pixReduceRankBinaryCascade(pixsch, 1, 0, 0, 0);
588 else if (ratio == 4)
589 pixsw = pixReduceRankBinaryCascade(pixsch, 1, 2, 0, 0);
590 else /* ratio == 8 */
591 pixsw = pixReduceRankBinaryCascade(pixsch, 1, 2, 2, 0);
592 }
593
594 pixt1 = pixCreateTemplate(pixsw);
595 if (ratio == 1)
596 pixt2 = pixClone(pixt1);
597 else
598 pixt2 = pixCreateTemplate(pixsch);
599
600 nangles = (l_int32)((2. * sweeprange) / sweepdelta + 1);
601 natheta = numaCreate(nangles);
602 nascore = numaCreate(nangles);
603
604 if (!pixsch || !pixsw) {
605 ret = ERROR_INT("pixsch and pixsw not both made", procName, 1);
606 goto cleanup;
607 }
608 if (!pixt1 || !pixt2) {
609 ret = ERROR_INT("pixt1 and pixt2 not both made", procName, 1);
610 goto cleanup;
611 }
612 if (!natheta || !nascore) {
613 ret = ERROR_INT("natheta and nascore not both made", procName, 1);
614 goto cleanup;
615 }
616
617 /* Do sweep */
618 rangeleft = sweepcenter - sweeprange;
619 for (i = 0; i < nangles; i++) {
620 theta = rangeleft + i * sweepdelta; /* degrees */
621
622 /* Shear pix and put the result in pixt1 */
623 if (pivot == L_SHEAR_ABOUT_CORNER)
624 pixVShearCorner(pixt1, pixsw, deg2rad * theta, L_BRING_IN_WHITE);
625 else
626 pixVShearCenter(pixt1, pixsw, deg2rad * theta, L_BRING_IN_WHITE);
627
628 /* Get score */
629 pixFindDifferentialSquareSum(pixt1, &sum);
630
631 #if DEBUG_PRINT_SCORES
632 L_INFO_FLOAT2("sum(%7.2f) = %7.0f", procName, theta, sum);
633 #endif /* DEBUG_PRINT_SCORES */
634
635 /* Save the result in the output arrays */
636 numaAddNumber(nascore, sum);
637 numaAddNumber(natheta, theta);
638 }
639
640 /* Find the largest of the set (maxscore at maxangle) */
641 numaGetMax(nascore, &maxscore, &maxindex);
642 numaGetFValue(natheta, maxindex, &maxangle);
643
644 #if DEBUG_PRINT_SWEEP
645 L_INFO_FLOAT2(" From sweep: angle = %7.3f, score = %7.3f", procName,
646 maxangle, maxscore);
647 #endif /* DEBUG_PRINT_SWEEP */
648
649 #if DEBUG_PLOT_SCORES
650 /* Plot the sweep result -- the scores versus rotation angle --
651 * using gnuplot with GPLOT_LINES (lines connecting data points). */
652 {GPLOT *gplot;
653 gplot = gplotCreate("sweep_output", GPLOT_PNG,
654 "Sweep. Variance of difference of ON pixels vs. angle",
655 "angle (deg)", "score");
656 gplotAddPlot(gplot, natheta, nascore, GPLOT_LINES, "plot1");
657 gplotAddPlot(gplot, natheta, nascore, GPLOT_POINTS, "plot2");
658 gplotMakeOutput(gplot);
659 gplotDestroy(&gplot);
660 }
661 #endif /* DEBUG_PLOT_SCORES */
662
663 /* Check if the max is at the end of the sweep. */
664 n = numaGetCount(natheta);
665 if (maxindex == 0 || maxindex == n - 1) {
666 L_WARNING("max found at sweep edge", procName);
667 goto cleanup;
668 }
669
670 /* Empty the numas for re-use */
671 numaEmpty(nascore);
672 numaEmpty(natheta);
673
674 /* Do binary search to find skew angle.
675 * First, set up initial three points. */
676 centerangle = maxangle;
677 if (pivot == L_SHEAR_ABOUT_CORNER) {
678 pixVShearCorner(pixt2, pixsch, deg2rad * centerangle, L_BRING_IN_WHITE);
679 pixFindDifferentialSquareSum(pixt2, &bsearchscore[2]);
680 pixVShearCorner(pixt2, pixsch, deg2rad * (centerangle - sweepdelta),
681 L_BRING_IN_WHITE);
682 pixFindDifferentialSquareSum(pixt2, &bsearchscore[0]);
683 pixVShearCorner(pixt2, pixsch, deg2rad * (centerangle + sweepdelta),
684 L_BRING_IN_WHITE);
685 pixFindDifferentialSquareSum(pixt2, &bsearchscore[4]);
686 }
687 else {
688 pixVShearCenter(pixt2, pixsch, deg2rad * centerangle, L_BRING_IN_WHITE);
689 pixFindDifferentialSquareSum(pixt2, &bsearchscore[2]);
690 pixVShearCenter(pixt2, pixsch, deg2rad * (centerangle - sweepdelta),
691 L_BRING_IN_WHITE);
692 pixFindDifferentialSquareSum(pixt2, &bsearchscore[0]);
693 pixVShearCenter(pixt2, pixsch, deg2rad * (centerangle + sweepdelta),
694 L_BRING_IN_WHITE);
695 pixFindDifferentialSquareSum(pixt2, &bsearchscore[4]);
696 }
697
698 numaAddNumber(nascore, bsearchscore[2]);
699 numaAddNumber(natheta, centerangle);
700 numaAddNumber(nascore, bsearchscore[0]);
701 numaAddNumber(natheta, centerangle - sweepdelta);
702 numaAddNumber(nascore, bsearchscore[4]);
703 numaAddNumber(natheta, centerangle + sweepdelta);
704
705 /* Start the search */
706 delta = 0.5 * sweepdelta;
707 while (delta >= minbsdelta)
708 {
709 /* Get the left intermediate score */
710 leftcenterangle = centerangle - delta;
711 if (pivot == L_SHEAR_ABOUT_CORNER)
712 pixVShearCorner(pixt2, pixsch, deg2rad * leftcenterangle,
713 L_BRING_IN_WHITE);
714 else
715 pixVShearCenter(pixt2, pixsch, deg2rad * leftcenterangle,
716 L_BRING_IN_WHITE);
717 pixFindDifferentialSquareSum(pixt2, &bsearchscore[1]);
718 numaAddNumber(nascore, bsearchscore[1]);
719 numaAddNumber(natheta, leftcenterangle);
720
721 /* Get the right intermediate score */
722 rightcenterangle = centerangle + delta;
723 if (pivot == L_SHEAR_ABOUT_CORNER)
724 pixVShearCorner(pixt2, pixsch, deg2rad * rightcenterangle,
725 L_BRING_IN_WHITE);
726 else
727 pixVShearCenter(pixt2, pixsch, deg2rad * rightcenterangle,
728 L_BRING_IN_WHITE);
729 pixFindDifferentialSquareSum(pixt2, &bsearchscore[3]);
730 numaAddNumber(nascore, bsearchscore[3]);
731 numaAddNumber(natheta, rightcenterangle);
732
733 /* Find the maximum of the five scores and its location.
734 * Note that the maximum must be in the center
735 * three values, not in the end two. */
736 maxscore = bsearchscore[1];
737 maxindex = 1;
738 for (i = 2; i < 4; i++) {
739 if (bsearchscore[i] > maxscore) {
740 maxscore = bsearchscore[i];
741 maxindex = i;
742 }
743 }
744
745 /* Set up score array to interpolate for the next iteration */
746 lefttemp = bsearchscore[maxindex - 1];
747 righttemp = bsearchscore[maxindex + 1];
748 bsearchscore[2] = maxscore;
749 bsearchscore[0] = lefttemp;
750 bsearchscore[4] = righttemp;
751
752 /* Get new center angle and delta for next iteration */
753 centerangle = centerangle + delta * (maxindex - 2);
754 delta = 0.5 * delta;
755 }
756 *pangle = centerangle;
757
758 #if DEBUG_PRINT_SCORES
759 L_INFO_FLOAT(" Binary search score = %7.3f", procName, bsearchscore[2]);
760 #endif /* DEBUG_PRINT_SCORES */
761
762 if (pendscore) /* save if requested */
763 *pendscore = bsearchscore[2];
764
765 /* Return the ratio of Max score over Min score
766 * as a confidence value. Don't trust if the Min score
767 * is too small, which can happen if the image is all black
768 * with only a few white pixels interspersed. In that case,
769 * we get a contribution from the top and bottom edges when
770 * vertically sheared, but this contribution becomes zero when
771 * the shear angle is zero. For zero shear angle, the only
772 * contribution will be from the white pixels. We expect that
773 * the signal goes as the product of the (height * width^2),
774 * so we compute a (hopefully) normalized minimum threshold as
775 * a function of these dimensions. */
776 numaGetMin(nascore, &minscore, &minloc);
777 width = pixGetWidth(pixsch);
778 height = pixGetHeight(pixsch);
779 minthresh = MINSCORE_THRESHOLD_CONSTANT * width * width * height;
780
781 #if DEBUG_THRESHOLD
782 L_INFO_FLOAT2(" minthresh = %10.2f, minscore = %10.2f", procName,
783 minthresh, minscore);
784 L_INFO_FLOAT(" maxscore = %10.2f", procName, maxscore);
785 #endif /* DEBUG_THRESHOLD */
786
787 if (minscore > minthresh)
788 *pconf = maxscore / minscore;
789 else
790 *pconf = 0.0;
791
792 /* Don't trust it if too close to the edge of the sweep
793 * range or if maxscore is small */
794 if ((centerangle > rangeleft + 2 * sweeprange - sweepdelta) ||
795 (centerangle < rangeleft + sweepdelta) ||
796 (maxscore < MIN_VALID_MAXSCORE))
797 *pconf = 0.0;
798
799 #if DEBUG_PRINT_BINARY
800 fprintf(stderr, "Binary search: angle = %7.3f, score ratio = %6.2f\n",
801 *pangle, *pconf);
802 fprintf(stderr, " max score = %8.0f\n", maxscore);
803 #endif /* DEBUG_PRINT_BINARY */
804
805 #if DEBUG_PLOT_SCORES
806 /* Plot the result -- the scores versus rotation angle --
807 * using gnuplot with GPLOT_POINTS. Because the data
808 * points are not ordered by theta (increasing or decreasing),
809 * using GPLOT_LINES would be confusing! */
810 {GPLOT *gplot;
811 gplot = gplotCreate("search_output", GPLOT_PNG,
812 "Binary search. Variance of difference of ON pixels vs. angle",
813 "angle (deg)", "score");
814 gplotAddPlot(gplot, natheta, nascore, GPLOT_POINTS, "plot1");
815 gplotMakeOutput(gplot);
816 gplotDestroy(&gplot);
817 }
818 #endif /* DEBUG_PLOT_SCORES */
819
820 cleanup:
821 pixDestroy(&pixsw);
822 pixDestroy(&pixsch);
823 pixDestroy(&pixt1);
824 pixDestroy(&pixt2);
825 numaDestroy(&nascore);
826 numaDestroy(&natheta);
827 return ret;
828 }
829
830
831 /*---------------------------------------------------------------------*
832 * Search over arbitrary range of angles in orthogonal directions *
833 *---------------------------------------------------------------------*/
834 /*
835 * pixFindSkewOrthogonalRange()
836 *
837 * Input: pixs (1 bpp)
838 * &angle (<return> angle required to deskew; in degrees cw)
839 * &conf (<return> confidence given by ratio of max/min score)
840 * redsweep (sweep reduction factor = 1, 2, 4 or 8)
841 * redsearch (binary search reduction factor = 1, 2, 4 or 8;
842 * and must not exceed redsweep)
843 * sweeprange (half the full range in each orthogonal
844 * direction, taken about 0, in degrees)
845 * sweepdelta (angle increment of sweep; in degrees)
846 * minbsdelta (min binary search increment angle; in degrees)
847 * confprior (amount by which confidence of 90 degree rotated
848 * result is reduced when comparing with unrotated
849 * confidence value)
850 * Return: 0 if OK, 1 on error or if angle measurment not valid
851 *
852 * Notes:
853 * (1) This searches for the skew angle, first in the range
854 * [-sweeprange, sweeprange], and then in
855 * [90 - sweeprange, 90 + sweeprange], with angles measured
856 * clockwise. For exploring the full range of possibilities,
857 * suggest using sweeprange = 47.0 degrees, giving some overlap
858 * at 45 and 135 degrees. From these results, and discounting
859 * the the second confidence by @confprior, it selects the
860 * angle for maximal differential variance. If the angle
861 * is larger than pi/4, the angle found after 90 degree rotation
862 * is selected.
863 * (2) The larger the confidence value, the greater the probability
864 * that the proper alignment is given by the angle that maximizes
865 * variance. It should be compared to a threshold, which depends
866 * on the application. Values between 3.0 and 6.0 are common.
867 * (3) Allowing for both portrait and landscape searches is more
868 * difficult, because if the signal from the text lines is weak,
869 * a signal from vertical rules can be larger!
870 * The most difficult documents to deskew have some or all of:
871 * (a) Multiple columns, not aligned
872 * (b) Black lines along the vertical edges
873 * (c) Text from two pages, and at different angles
874 * Rule of thumb for resolution:
875 * (a) If the margins are clean, you can work at 75 ppi,
876 * although 100 ppi is safer.
877 * (b) If there are vertical lines in the margins, do not
878 * work below 150 ppi. The signal from the text lines must
879 * exceed that from the margin lines.
880 * (4) Choosing the @confprior parameter depends on knowing something
881 * about the source of image. However, we're not using
882 * real probabilities here, so its use is qualitative.
883 * If landscape and portrait are equally likely, use
884 * @confprior = 0.0. If the likelihood of portrait (non-rotated)
885 * is 100 times higher than that of landscape, we want to reduce
886 * the chance that we rotate to landscape in a situation where
887 * the landscape signal is accidentally larger than the
888 * portrait signal. To do this use a positive value of
889 * @confprior; say 1.5.
890 */
891 l_int32
pixFindSkewOrthogonalRange(PIX * pixs,l_float32 * pangle,l_float32 * pconf,l_int32 redsweep,l_int32 redsearch,l_float32 sweeprange,l_float32 sweepdelta,l_float32 minbsdelta,l_float32 confprior)892 pixFindSkewOrthogonalRange(PIX *pixs,
893 l_float32 *pangle,
894 l_float32 *pconf,
895 l_int32 redsweep,
896 l_int32 redsearch,
897 l_float32 sweeprange,
898 l_float32 sweepdelta,
899 l_float32 minbsdelta,
900 l_float32 confprior)
901 {
902 l_float32 angle1, conf1, score1, angle2, conf2, score2;
903 PIX *pixr;
904
905 PROCNAME("pixFindSkewOrthogonalRange");
906
907 if (!pangle || !pconf)
908 return ERROR_INT("&angle and &conf not both defined", procName, 1);
909 *pangle = *pconf = 0.0;
910 if (!pixs || pixGetDepth(pixs) != 1)
911 return ERROR_INT("pixs not defined or not 1 bpp", procName, 1);
912
913 pixFindSkewSweepAndSearchScorePivot(pixs, &angle1, &conf1, &score1,
914 redsweep, redsearch, 0.0,
915 sweeprange, sweepdelta, minbsdelta,
916 L_SHEAR_ABOUT_CORNER);
917 pixr = pixRotateOrth(pixs, 1);
918 pixFindSkewSweepAndSearchScorePivot(pixr, &angle2, &conf2, &score2,
919 redsweep, redsearch, 0.0,
920 sweeprange, sweepdelta, minbsdelta,
921 L_SHEAR_ABOUT_CORNER);
922 pixDestroy(&pixr);
923
924 if (conf1 > conf2 - confprior) {
925 *pangle = angle1;
926 *pconf = conf1;
927 } else {
928 *pangle = -90.0 + angle2;
929 *pconf = conf2;
930 }
931
932 #if DEBUG_PRINT_ORTH
933 fprintf(stderr, " About 0: angle1 = %7.3f, conf1 = %7.3f, score1 = %f\n",
934 angle1, conf1, score1);
935 fprintf(stderr, " About 90: angle2 = %7.3f, conf2 = %7.3f, score2 = %f\n",
936 angle2, conf2, score2);
937 fprintf(stderr, " Final: angle = %7.3f, conf = %7.3f\n", *pangle, *pconf);
938 #endif /* DEBUG_PRINT_ORTH */
939
940 return 0;
941 }
942
943
944
945 /*----------------------------------------------------------------*
946 * Differential square sum function *
947 *----------------------------------------------------------------*/
948 /*!
949 * pixFindDifferentialSquareSum()
950 *
951 * Input: pixs
952 * &sum (<return> result)
953 * Return: 0 if OK, 1 on error
954 *
955 * Notes:
956 * (1) At the top and bottom, we skip:
957 * - at least one scanline
958 * - not more than 10% of the image height
959 * - not more than 5% of the image width
960 */
961 l_int32
pixFindDifferentialSquareSum(PIX * pixs,l_float32 * psum)962 pixFindDifferentialSquareSum(PIX *pixs,
963 l_float32 *psum)
964 {
965 l_int32 i, n;
966 l_int32 w, h, skiph, skip, nskip;
967 l_float32 val1, val2, diff, sum;
968 NUMA *na;
969
970 PROCNAME("pixFindDifferentialSquareSum");
971
972 if (!pixs)
973 return ERROR_INT("pixs not defined", procName, 1);
974
975 /* Generate a number array consisting of the sum
976 * of pixels in each row of pixs */
977 if ((na = pixCountPixelsByRow(pixs, NULL)) == NULL)
978 return ERROR_INT("na not made", procName, 1);
979
980 /* Compute the number of rows at top and bottom to omit.
981 * We omit these to avoid getting a spurious signal from
982 * the top and bottom of a (nearly) all black image. */
983 w = pixGetWidth(pixs);
984 h = pixGetHeight(pixs);
985 skiph = (l_int32)(0.05 * w); /* skip for max shear of 0.025 radians */
986 skip = L_MIN(h / 10, skiph); /* don't remove more than 10% of image */
987 nskip = L_MAX(skip / 2, 1); /* at top & bot; skip at least one line */
988
989 /* Sum the squares of differential row sums, on the
990 * allowed rows. Note that nskip must be >= 1. */
991 n = numaGetCount(na);
992 sum = 0.0;
993 for (i = nskip; i < n - nskip; i++) {
994 numaGetFValue(na, i - 1, &val1);
995 numaGetFValue(na, i, &val2);
996 diff = val2 - val1;
997 sum += diff * diff;
998 }
999 numaDestroy(&na);
1000 *psum = sum;
1001 return 0;
1002 }
1003
1004
1005 /*----------------------------------------------------------------*
1006 * Normalized square sum *
1007 *----------------------------------------------------------------*/
1008 /*!
1009 * pixFindNormalizedSquareSum()
1010 *
1011 * Input: pixs
1012 * &hratio (<optional return> ratio of normalized horiz square sum
1013 * to result if the pixel distribution were uniform)
1014 * &vratio (<optional return> ratio of normalized vert square sum
1015 * to result if the pixel distribution were uniform)
1016 * &fract (<optional return> ratio of fg pixels to total pixels)
1017 * Return: 0 if OK, 1 on error or if there are no fg pixels
1018 *
1019 * Notes:
1020 * (1) Let the image have h scanlines and N fg pixels.
1021 * If the pixels were uniformly distributed on scanlines,
1022 * the sum of squares of fg pixels on each scanline would be
1023 * h * (N / h)^2. However, if the pixels are not uniformly
1024 * distributed (e.g., for text), the sum of squares of fg
1025 * pixels will be larger. We return in hratio and vratio the
1026 * ratio of these two values.
1027 * (2) If there are no fg pixels, hratio and vratio are returned as 0.0.
1028 */
1029 l_int32
pixFindNormalizedSquareSum(PIX * pixs,l_float32 * phratio,l_float32 * pvratio,l_float32 * pfract)1030 pixFindNormalizedSquareSum(PIX *pixs,
1031 l_float32 *phratio,
1032 l_float32 *pvratio,
1033 l_float32 *pfract)
1034 {
1035 l_int32 i, w, h, empty;
1036 l_float32 sum, sumsq, uniform, val;
1037 NUMA *na;
1038 PIX *pixt;
1039
1040 PROCNAME("pixFindNormalizedSquareSum");
1041
1042 if (!pixs || pixGetDepth(pixs) != 1)
1043 return ERROR_INT("pixs not defined or not 1 bpp", procName, 1);
1044 pixGetDimensions(pixs, &w, &h, NULL);
1045
1046 if (!phratio && !pvratio)
1047 return ERROR_INT("nothing to do", procName, 1);
1048 if (phratio) *phratio = 0.0;
1049 if (pvratio) *pvratio = 0.0;
1050
1051 empty = 0;
1052 if (phratio) {
1053 na = pixCountPixelsByRow(pixs, NULL);
1054 numaGetSum(na, &sum); /* fg pixels */
1055 if (pfract) *pfract = sum / (l_float32)(w * h);
1056 if (sum != 0.0) {
1057 uniform = sum * sum / h; /* h*(sum / h)^2 */
1058 sumsq = 0.0;
1059 for (i = 0; i < h; i++) {
1060 numaGetFValue(na, i, &val);
1061 sumsq += val * val;
1062 }
1063 *phratio = sumsq / uniform;
1064 }
1065 else
1066 empty = 1;
1067 numaDestroy(&na);
1068 }
1069
1070 if (pvratio) {
1071 if (empty == 1) return 1;
1072 pixt = pixRotateOrth(pixs, 1);
1073 na = pixCountPixelsByRow(pixt, NULL);
1074 numaGetSum(na, &sum);
1075 if (pfract) *pfract = sum / (l_float32)(w * h);
1076 if (sum != 0.0) {
1077 uniform = sum * sum / w;
1078 sumsq = 0.0;
1079 for (i = 0; i < w; i++) {
1080 numaGetFValue(na, i, &val);
1081 sumsq += val * val;
1082 }
1083 *pvratio = sumsq / uniform;
1084 }
1085 else
1086 empty = 1;
1087 pixDestroy(&pixt);
1088 numaDestroy(&na);
1089 }
1090
1091 return empty;
1092 }
1093
1094
1095