• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*====================================================================*
2  -  Copyright (C) 2001 Leptonica.  All rights reserved.
3  -  This software is distributed in the hope that it will be
4  -  useful, but with NO WARRANTY OF ANY KIND.
5  -  No author or distributor accepts responsibility to anyone for the
6  -  consequences of using this software, or for whether it serves any
7  -  particular purpose or works at all, unless he or she says so in
8  -  writing.  Everyone is granted permission to copy, modify and
9  -  redistribute this source code, for commercial or non-commercial
10  -  purposes, with the following restrictions: (1) the origin of this
11  -  source code must not be misrepresented; (2) modified versions must
12  -  be plainly marked as such; and (3) this notice may not be removed
13  -  or altered from any source or modified source distribution.
14  *====================================================================*/
15 
16 
17 /*
18  *  scalelow.c
19  *
20  *         Color (interpolated) scaling: general case
21  *                  void       scaleColorLILow()
22  *
23  *         Grayscale (interpolated) scaling: general case
24  *                  void       scaleGrayLILow()
25  *
26  *         Color (interpolated) scaling: 2x upscaling
27  *                  void       scaleColor2xLILow()
28  *                  void       scaleColor2xLILineLow()
29  *
30  *         Grayscale (interpolated) scaling: 2x upscaling
31  *                  void       scaleGray2xLILow()
32  *                  void       scaleGray2xLILineLow()
33  *
34  *         Grayscale (interpolated) scaling: 4x upscaling
35  *                  void       scaleGray4xLILow()
36  *                  void       scaleGray4xLILineLow()
37  *
38  *         Grayscale and color scaling by closest pixel sampling
39  *                  l_int32    scaleBySamplingLow()
40  *
41  *         Color and grayscale downsampling with (antialias) lowpass filter
42  *                  l_int32    scaleSmoothLow()
43  *                  void       scaleRGBToGray2Low()
44  *
45  *         Color and grayscale downsampling with (antialias) area mapping
46  *                  l_int32    scaleColorAreaMapLow()
47  *                  l_int32    scaleGrayAreaMapLow()
48  *                  l_int32    scaleAreaMapLow2()
49  *
50  *         Binary scaling by closest pixel sampling
51  *                  l_int32    scaleBinaryLow()
52  *
53  *         Scale-to-gray 2x
54  *                  void       scaleToGray2Low()
55  *                  l_uint32  *makeSumTabSG2()
56  *                  l_uint8   *makeValTabSG2()
57  *
58  *         Scale-to-gray 3x
59  *                  void       scaleToGray3Low()
60  *                  l_uint32  *makeSumTabSG3()
61  *                  l_uint8   *makeValTabSG3()
62  *
63  *         Scale-to-gray 4x
64  *                  void       scaleToGray4Low()
65  *                  l_uint32  *makeSumTabSG4()
66  *                  l_uint8   *makeValTabSG4()
67  *
68  *         Scale-to-gray 6x
69  *                  void       scaleToGray6Low()
70  *                  l_uint8   *makeValTabSG6()
71  *
72  *         Scale-to-gray 8x
73  *                  void       scaleToGray8Low()
74  *                  l_uint8   *makeValTabSG8()
75  *
76  *         Scale-to-gray 16x
77  *                  void       scaleToGray16Low()
78  *
79  *         Grayscale mipmap
80  *                  l_int32    scaleMipmapLow()
81  *
82  */
83 
84 #include <stdio.h>
85 #include <stdlib.h>
86 #include <string.h>
87 #include "allheaders.h"
88 
89 #ifndef  NO_CONSOLE_IO
90 #define  DEBUG_OVERFLOW   0
91 #define  DEBUG_UNROLLING  0
92 #endif  /* ~NO_CONSOLE_IO */
93 
94 
95 /*------------------------------------------------------------------*
96  *            General linear interpolated color scaling             *
97  *------------------------------------------------------------------*/
98 /*!
99  *  scaleColorLILow()
100  *
101  *  We choose to divide each pixel into 16 x 16 sub-pixels.
102  *  Linear interpolation is equivalent to finding the
103  *  fractional area (i.e., number of sub-pixels divided
104  *  by 256) associated with each of the four nearest src pixels,
105  *  and weighting each pixel value by this fractional area.
106  *
107  *  P3 speed is about 7 x 10^6 dst pixels/sec/GHz
108  */
109 void
scaleColorLILow(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 ws,l_int32 hs,l_int32 wpls)110 scaleColorLILow(l_uint32  *datad,
111                l_int32    wd,
112                l_int32    hd,
113                l_int32    wpld,
114                l_uint32  *datas,
115                l_int32    ws,
116                l_int32    hs,
117                l_int32    wpls)
118 {
119 l_int32    i, j, wm2, hm2;
120 l_int32    xpm, ypm;  /* location in src image, to 1/16 of a pixel */
121 l_int32    xp, yp, xf, yf;  /* src pixel and pixel fraction coordinates */
122 l_int32    v00r, v01r, v10r, v11r, v00g, v01g, v10g, v11g;
123 l_int32    v00b, v01b, v10b, v11b, area00, area01, area10, area11;
124 l_uint32   pixels1, pixels2, pixels3, pixels4, pixel;
125 l_uint32  *lines, *lined;
126 l_float32  scx, scy;
127 
128         /* (scx, scy) are scaling factors that are applied to the
129          * dest coords to get the corresponding src coords.
130          * We need them because we iterate over dest pixels
131          * and must find the corresponding set of src pixels. */
132     scx = 16. * (l_float32)ws / (l_float32)wd;
133     scy = 16. * (l_float32)hs / (l_float32)hd;
134 
135     wm2 = ws - 2;
136     hm2 = hs - 2;
137 
138         /* iterate over the destination pixels */
139     for (i = 0; i < hd; i++) {
140         ypm = (l_int32)(scy * (l_float32)i);
141         yp = ypm >> 4;
142         yf = ypm & 0x0f;
143         lined = datad + i * wpld;
144         lines = datas + yp * wpls;
145         for (j = 0; j < wd; j++) {
146             xpm = (l_int32)(scx * (l_float32)j);
147             xp = xpm >> 4;
148             xf = xpm & 0x0f;
149 
150                 /* if near the edge, just use the src pixel value */
151             if (xp > wm2 || yp > hm2) {
152                 *(lined + j) = *(lines + xp);
153                 continue;
154             }
155 
156                 /* do bilinear interpolation.  This is a simple
157                  * generalization of the calculation in scaleGrayLILow() */
158             pixels1 = *(lines + xp);
159             pixels2 = *(lines + xp + 1);
160             pixels3 = *(lines + wpls + xp);
161             pixels4 = *(lines + wpls + xp + 1);
162             area00 = (16 - xf) * (16 - yf);
163             area10 = xf * (16 - yf);
164             area01 = (16 - xf) * yf;
165             area11 = xf * yf;
166             v00r = area00 * ((pixels1 >> L_RED_SHIFT) & 0xff);
167             v00g = area00 * ((pixels1 >> L_GREEN_SHIFT) & 0xff);
168             v00b = area00 * ((pixels1 >> L_BLUE_SHIFT) & 0xff);
169             v10r = area10 * ((pixels2 >> L_RED_SHIFT) & 0xff);
170             v10g = area10 * ((pixels2 >> L_GREEN_SHIFT) & 0xff);
171             v10b = area10 * ((pixels2 >> L_BLUE_SHIFT) & 0xff);
172             v01r = area01 * ((pixels3 >> L_RED_SHIFT) & 0xff);
173             v01g = area01 * ((pixels3 >> L_GREEN_SHIFT) & 0xff);
174             v01b = area01 * ((pixels3 >> L_BLUE_SHIFT) & 0xff);
175             v11r = area11 * ((pixels4 >> L_RED_SHIFT) & 0xff);
176             v11g = area11 * ((pixels4 >> L_GREEN_SHIFT) & 0xff);
177             v11b = area11 * ((pixels4 >> L_BLUE_SHIFT) & 0xff);
178             pixel = (((v00r + v10r + v01r + v11r + 128) << 16) & 0xff000000) |
179                     (((v00g + v10g + v01g + v11g + 128) << 8) & 0x00ff0000) |
180                     ((v00b + v10b + v01b + v11b + 128) & 0x0000ff00);
181             *(lined + j) = pixel;
182         }
183     }
184 
185     return;
186 }
187 
188 
189 /*------------------------------------------------------------------*
190  *            General linear interpolated gray scaling              *
191  *------------------------------------------------------------------*/
192 /*!
193  *  scaleGrayLILow()
194  *
195  *  We choose to divide each pixel into 16 x 16 sub-pixels.
196  *  Linear interpolation is equivalent to finding the
197  *  fractional area (i.e., number of sub-pixels divided
198  *  by 256) associated with each of the four nearest src pixels,
199  *  and weighting each pixel value by this fractional area.
200  */
201 void
scaleGrayLILow(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 ws,l_int32 hs,l_int32 wpls)202 scaleGrayLILow(l_uint32  *datad,
203                l_int32    wd,
204                l_int32    hd,
205                l_int32    wpld,
206                l_uint32  *datas,
207                l_int32    ws,
208                l_int32    hs,
209                l_int32    wpls)
210 {
211 l_int32    i, j, wm2, hm2;
212 l_int32    xpm, ypm;  /* location in src image, to 1/16 of a pixel */
213 l_int32    xp, yp, xf, yf;  /* src pixel and pixel fraction coordinates */
214 l_int32    v00, v01, v10, v11;
215 l_uint8    val;
216 l_uint32  *lines, *lined;
217 l_float32  scx, scy;
218 
219         /* (scx, scy) are scaling factors that are applied to the
220          * dest coords to get the corresponding src coords.
221          * We need them because we iterate over dest pixels
222          * and must find the corresponding set of src pixels. */
223     scx = 16. * (l_float32)ws / (l_float32)wd;
224     scy = 16. * (l_float32)hs / (l_float32)hd;
225 
226     wm2 = ws - 2;
227     hm2 = hs - 2;
228 
229         /* Iterate over the destination pixels */
230     for (i = 0; i < hd; i++) {
231         ypm = (l_int32)(scy * (l_float32)i);
232         yp = ypm >> 4;
233         yf = ypm & 0x0f;
234         lined = datad + i * wpld;
235         lines = datas + yp * wpls;
236         for (j = 0; j < wd; j++) {
237             xpm = (l_int32)(scx * (l_float32)j);
238             xp = xpm >> 4;
239             xf = xpm & 0x0f;
240 
241                 /* If near the dest boundary, just use the src pixel value */
242             if (xp > wm2 || yp > hm2) {
243                 SET_DATA_BYTE(lined, j, GET_DATA_BYTE(lines, xp));
244                 continue;
245             }
246 
247                 /* Do bilinear interpolation.  Without this, we could
248                  * simply subsample:
249                  *   SET_DATA_BYTE(lined, j, GET_DATA_BYTE(lines, xp));
250                  * which is faster but gives lousy results!
251                  */
252             v00 = (16 - xf) * (16 - yf) * GET_DATA_BYTE(lines, xp);
253             v10 = xf * (16 - yf) * GET_DATA_BYTE(lines, xp + 1);
254             v01 = (16 - xf) * yf * GET_DATA_BYTE(lines + wpls, xp);
255             v11 = xf * yf * GET_DATA_BYTE(lines + wpls, xp + 1);
256             val = (l_uint8)((v00 + v01 + v10 + v11 + 128) / 256);
257             SET_DATA_BYTE(lined, j, val);
258         }
259     }
260 
261     return;
262 }
263 
264 
265 /*------------------------------------------------------------------*
266  *                2x linear interpolated color scaling              *
267  *------------------------------------------------------------------*/
268 /*!
269  *  scaleColor2xLILow()
270  *
271  *  This is a special case of 2x expansion by linear
272  *  interpolation.  Each src pixel contains 4 dest pixels.
273  *  The 4 dest pixels in src pixel 1 are numbered at
274  *  their UL corners.  The 4 dest pixels in src pixel 1
275  *  are related to that src pixel and its 3 neighboring
276  *  src pixels as follows:
277  *
278  *             1-----2-----|-----|-----|
279  *             |     |     |     |     |
280  *             |     |     |     |     |
281  *  src 1 -->  3-----4-----|     |     |  <-- src 2
282  *             |     |     |     |     |
283  *             |     |     |     |     |
284  *             |-----|-----|-----|-----|
285  *             |     |     |     |     |
286  *             |     |     |     |     |
287  *  src 3 -->  |     |     |     |     |  <-- src 4
288  *             |     |     |     |     |
289  *             |     |     |     |     |
290  *             |-----|-----|-----|-----|
291  *
292  *           dest      src
293  *           ----      ---
294  *           dp1    =  sp1
295  *           dp2    =  (sp1 + sp2) / 2
296  *           dp3    =  (sp1 + sp3) / 2
297  *           dp4    =  (sp1 + sp2 + sp3 + sp4) / 4
298  *
299  *  We iterate over the src pixels, and unroll the calculation
300  *  for each set of 4 dest pixels corresponding to that src
301  *  pixel, caching pixels for the next src pixel whenever possible.
302  *  The method is exactly analogous to the one we use for
303  *  scaleGray2xLILow() and its line version.
304  *
305  *  P3 speed is about 5 x 10^7 dst pixels/sec/GHz
306  */
307 void
scaleColor2xLILow(l_uint32 * datad,l_int32 wpld,l_uint32 * datas,l_int32 ws,l_int32 hs,l_int32 wpls)308 scaleColor2xLILow(l_uint32  *datad,
309                   l_int32    wpld,
310                   l_uint32  *datas,
311                   l_int32    ws,
312                   l_int32    hs,
313                   l_int32    wpls)
314 {
315 l_int32    i, hsm;
316 l_uint32  *lines, *lined;
317 
318     hsm = hs - 1;
319 
320         /* We're taking 2 src and 2 dest lines at a time,
321          * and for each src line, we're computing 2 dest lines.
322          * Call these 2 dest lines:  destline1 and destline2.
323          * The first src line is used for destline 1.
324          * On all but the last src line, both src lines are
325          * used in the linear interpolation for destline2.
326          * On the last src line, both destline1 and destline2
327          * are computed using only that src line (because there
328          * isn't a lower src line). */
329 
330         /* iterate over all but the last src line */
331     for (i = 0; i < hsm; i++) {
332         lines = datas + i * wpls;
333         lined = datad + 2 * i * wpld;
334         scaleColor2xLILineLow(lined, wpld, lines, ws, wpls, 0);
335     }
336 
337         /* last src line */
338     lines = datas + hsm * wpls;
339     lined = datad + 2 * hsm * wpld;
340     scaleColor2xLILineLow(lined, wpld, lines, ws, wpls, 1);
341 
342     return;
343 }
344 
345 
346 /*!
347  *  scaleColor2xLILineLow()
348  *
349  *      Input:  lined   (ptr to top destline, to be made from current src line)
350  *              wpld
351  *              lines   (ptr to current src line)
352  *              ws
353  *              wpls
354  *              lastlineflag  (1 if last src line; 0 otherwise)
355  *      Return: void
356  *
357  *  *** Warning: implicit assumption about RGB component ordering ***
358  */
359 void
scaleColor2xLILineLow(l_uint32 * lined,l_int32 wpld,l_uint32 * lines,l_int32 ws,l_int32 wpls,l_int32 lastlineflag)360 scaleColor2xLILineLow(l_uint32  *lined,
361                       l_int32    wpld,
362                       l_uint32  *lines,
363                       l_int32    ws,
364                       l_int32    wpls,
365                       l_int32    lastlineflag)
366 {
367 l_int32    j, jd, wsm;
368 l_int32    rval1, rval2, rval3, rval4, gval1, gval2, gval3, gval4;
369 l_int32    bval1, bval2, bval3, bval4;
370 l_uint32   pixels1, pixels2, pixels3, pixels4, pixel;
371 l_uint32  *linesp, *linedp;
372 
373     wsm = ws - 1;
374 
375     if (lastlineflag == 0) {
376         linesp = lines + wpls;
377         linedp = lined + wpld;
378         pixels1 = *lines;
379         pixels3 = *linesp;
380 
381             /* initialize with v(2) and v(4) */
382         rval2 = pixels1 >> 24;
383         gval2 = (pixels1 >> 16) & 0xff;
384         bval2 = (pixels1 >> 8) & 0xff;
385         rval4 = pixels3 >> 24;
386         gval4 = (pixels3 >> 16) & 0xff;
387         bval4 = (pixels3 >> 8) & 0xff;
388 
389         for (j = 0, jd = 0; j < wsm; j++, jd += 2) {
390                 /* shift in previous src values */
391             rval1 = rval2;
392             gval1 = gval2;
393             bval1 = bval2;
394             rval3 = rval4;
395             gval3 = gval4;
396             bval3 = bval4;
397                 /* get new src values */
398             pixels2 = *(lines + j + 1);
399             pixels4 = *(linesp + j + 1);
400             rval2 = pixels2 >> 24;
401             gval2 = (pixels2 >> 16) & 0xff;
402             bval2 = (pixels2 >> 8) & 0xff;
403             rval4 = pixels4 >> 24;
404             gval4 = (pixels4 >> 16) & 0xff;
405             bval4 = (pixels4 >> 8) & 0xff;
406                 /* save dest values */
407             pixel = (rval1 << 24 | gval1 << 16 | bval1 << 8);
408             *(lined + jd) = pixel;                               /* pix 1 */
409             pixel = ((((rval1 + rval2) << 23) & 0xff000000) |
410                      (((gval1 + gval2) << 15) & 0x00ff0000) |
411                      (((bval1 + bval2) << 7) & 0x0000ff00));
412             *(lined + jd + 1) = pixel;                           /* pix 2 */
413             pixel = ((((rval1 + rval3) << 23) & 0xff000000) |
414                      (((gval1 + gval3) << 15) & 0x00ff0000) |
415                      (((bval1 + bval3) << 7) & 0x0000ff00));
416             *(linedp + jd) = pixel;                              /* pix 3 */
417             pixel = ((((rval1 + rval2 + rval3 + rval4) << 22) & 0xff000000) |
418                      (((gval1 + gval2 + gval3 + gval4) << 14) & 0x00ff0000) |
419                      (((bval1 + bval2 + bval3 + bval4) << 6) & 0x0000ff00));
420             *(linedp + jd + 1) = pixel;                          /* pix 4 */
421         }
422             /* last src pixel on line */
423         rval1 = rval2;
424         gval1 = gval2;
425         bval1 = bval2;
426         rval3 = rval4;
427         gval3 = gval4;
428         bval3 = bval4;
429         pixel = (rval1 << 24 | gval1 << 16 | bval1 << 8);
430         *(lined + 2 * wsm) = pixel;                        /* pix 1 */
431         *(lined + 2 * wsm + 1) = pixel;                    /* pix 2 */
432         pixel = ((((rval1 + rval3) << 23) & 0xff000000) |
433                  (((gval1 + gval3) << 15) & 0x00ff0000) |
434                  (((bval1 + bval3) << 7) & 0x0000ff00));
435         *(linedp + 2 * wsm) = pixel;                       /* pix 3 */
436         *(linedp + 2 * wsm + 1) = pixel;                   /* pix 4 */
437     }
438     else {   /* last row of src pixels: lastlineflag == 1 */
439         linedp = lined + wpld;
440         pixels2 = *lines;
441         rval2 = pixels2 >> 24;
442         gval2 = (pixels2 >> 16) & 0xff;
443         bval2 = (pixels2 >> 8) & 0xff;
444         for (j = 0, jd = 0; j < wsm; j++, jd += 2) {
445             rval1 = rval2;
446             gval1 = gval2;
447             bval1 = bval2;
448             pixels2 = *(lines + j + 1);
449             rval2 = pixels2 >> 24;
450             gval2 = (pixels2 >> 16) & 0xff;
451             bval2 = (pixels2 >> 8) & 0xff;
452             pixel = (rval1 << 24 | gval1 << 16 | bval1 << 8);
453             *(lined + jd) = pixel;                            /* pix 1 */
454             *(linedp + jd) = pixel;                           /* pix 2 */
455             pixel = ((((rval1 + rval2) << 23) & 0xff000000) |
456                      (((gval1 + gval2) << 15) & 0x00ff0000) |
457                      (((bval1 + bval2) << 7) & 0x0000ff00));
458             *(lined + jd + 1) = pixel;                        /* pix 3 */
459             *(linedp + jd + 1) = pixel;                       /* pix 4 */
460         }
461         rval1 = rval2;
462         gval1 = gval2;
463         bval1 = bval2;
464         pixel = (rval1 << 24 | gval1 << 16 | bval1 << 8);
465         *(lined + 2 * wsm) = pixel;                           /* pix 1 */
466         *(lined + 2 * wsm + 1) = pixel;                       /* pix 2 */
467         *(linedp + 2 * wsm) = pixel;                          /* pix 3 */
468         *(linedp + 2 * wsm + 1) = pixel;                      /* pix 4 */
469     }
470 
471     return;
472 }
473 
474 
475 /*------------------------------------------------------------------*
476  *                2x linear interpolated gray scaling               *
477  *------------------------------------------------------------------*/
478 /*!
479  *  scaleGray2xLILow()
480  *
481  *  This is a special case of 2x expansion by linear
482  *  interpolation.  Each src pixel contains 4 dest pixels.
483  *  The 4 dest pixels in src pixel 1 are numbered at
484  *  their UL corners.  The 4 dest pixels in src pixel 1
485  *  are related to that src pixel and its 3 neighboring
486  *  src pixels as follows:
487  *
488  *             1-----2-----|-----|-----|
489  *             |     |     |     |     |
490  *             |     |     |     |     |
491  *  src 1 -->  3-----4-----|     |     |  <-- src 2
492  *             |     |     |     |     |
493  *             |     |     |     |     |
494  *             |-----|-----|-----|-----|
495  *             |     |     |     |     |
496  *             |     |     |     |     |
497  *  src 3 -->  |     |     |     |     |  <-- src 4
498  *             |     |     |     |     |
499  *             |     |     |     |     |
500  *             |-----|-----|-----|-----|
501  *
502  *           dest      src
503  *           ----      ---
504  *           dp1    =  sp1
505  *           dp2    =  (sp1 + sp2) / 2
506  *           dp3    =  (sp1 + sp3) / 2
507  *           dp4    =  (sp1 + sp2 + sp3 + sp4) / 4
508  *
509  *  We iterate over the src pixels, and unroll the calculation
510  *  for each set of 4 dest pixels corresponding to that src
511  *  pixel, caching pixels for the next src pixel whenever possible.
512  */
513 void
scaleGray2xLILow(l_uint32 * datad,l_int32 wpld,l_uint32 * datas,l_int32 ws,l_int32 hs,l_int32 wpls)514 scaleGray2xLILow(l_uint32  *datad,
515                  l_int32    wpld,
516                  l_uint32  *datas,
517                  l_int32    ws,
518                  l_int32    hs,
519                  l_int32    wpls)
520 {
521 l_int32    i, hsm;
522 l_uint32  *lines, *lined;
523 
524     hsm = hs - 1;
525 
526         /* We're taking 2 src and 2 dest lines at a time,
527          * and for each src line, we're computing 2 dest lines.
528          * Call these 2 dest lines:  destline1 and destline2.
529          * The first src line is used for destline 1.
530          * On all but the last src line, both src lines are
531          * used in the linear interpolation for destline2.
532          * On the last src line, both destline1 and destline2
533          * are computed using only that src line (because there
534          * isn't a lower src line). */
535 
536         /* iterate over all but the last src line */
537     for (i = 0; i < hsm; i++) {
538         lines = datas + i * wpls;
539         lined = datad + 2 * i * wpld;
540         scaleGray2xLILineLow(lined, wpld, lines, ws, wpls, 0);
541     }
542 
543         /* last src line */
544     lines = datas + hsm * wpls;
545     lined = datad + 2 * hsm * wpld;
546     scaleGray2xLILineLow(lined, wpld, lines, ws, wpls, 1);
547 
548     return;
549 }
550 
551 
552 /*!
553  *  scaleGray2xLILineLow()
554  *
555  *      Input:  lined   (ptr to top destline, to be made from current src line)
556  *              wpld
557  *              lines   (ptr to current src line)
558  *              ws
559  *              wpls
560  *              lastlineflag  (1 if last src line; 0 otherwise)
561  *      Return: void
562  */
563 void
scaleGray2xLILineLow(l_uint32 * lined,l_int32 wpld,l_uint32 * lines,l_int32 ws,l_int32 wpls,l_int32 lastlineflag)564 scaleGray2xLILineLow(l_uint32  *lined,
565                      l_int32    wpld,
566                      l_uint32  *lines,
567                      l_int32    ws,
568                      l_int32    wpls,
569                      l_int32    lastlineflag)
570 {
571 l_int32    j, jd, wsm, w;
572 l_int32    sval1, sval2, sval3, sval4;
573 l_uint32  *linesp, *linedp;
574 l_uint32   words, wordsp, wordd, worddp;
575 
576     wsm = ws - 1;
577 
578     if (lastlineflag == 0) {
579         linesp = lines + wpls;
580         linedp = lined + wpld;
581 
582             /* Unroll the loop 4x and work on full words */
583         words = lines[0];
584         wordsp = linesp[0];
585         sval2 = (words >> 24) & 0xff;
586         sval4 = (wordsp >> 24) & 0xff;
587         for (j = 0, jd = 0, w = 0; j + 3 < wsm; j += 4, jd += 8, w++) {
588                 /* At the top of the loop,
589                  * words == lines[w], wordsp == linesp[w]
590                  * and the top bytes of those have been loaded into
591                  * sval2 and sval4. */
592             sval1 = sval2;
593             sval2 = (words >> 16) & 0xff;
594             sval3 = sval4;
595             sval4 = (wordsp >> 16) & 0xff;
596             wordd = (sval1 << 24) | (((sval1 + sval2) >> 1) << 16);
597             worddp = (((sval1 + sval3) >> 1) << 24) |
598                 (((sval1 + sval2 + sval3 + sval4) >> 2) << 16);
599 
600             sval1 = sval2;
601             sval2 = (words >> 8) & 0xff;
602             sval3 = sval4;
603             sval4 = (wordsp >> 8) & 0xff;
604             wordd |= (sval1 << 8) | ((sval1 + sval2) >> 1);
605             worddp |= (((sval1 + sval3) >> 1) << 8) |
606                 ((sval1 + sval2 + sval3 + sval4) >> 2);
607             lined[w * 2] = wordd;
608             linedp[w * 2] = worddp;
609 
610             sval1 = sval2;
611             sval2 = words & 0xff;
612             sval3 = sval4;
613             sval4 = wordsp & 0xff;
614             wordd = (sval1 << 24) |                              /* pix 1 */
615                 (((sval1 + sval2) >> 1) << 16);                  /* pix 2 */
616             worddp = (((sval1 + sval3) >> 1) << 24) |            /* pix 3 */
617                 (((sval1 + sval2 + sval3 + sval4) >> 2) << 16);  /* pix 4 */
618 
619                 /* Load the next word as we need its first byte */
620             words = lines[w + 1];
621             wordsp = linesp[w + 1];
622             sval1 = sval2;
623             sval2 = (words >> 24) & 0xff;
624             sval3 = sval4;
625             sval4 = (wordsp >> 24) & 0xff;
626             wordd |= (sval1 << 8) |                              /* pix 1 */
627                 ((sval1 + sval2) >> 1);                          /* pix 2 */
628             worddp |= (((sval1 + sval3) >> 1) << 8) |            /* pix 3 */
629                 ((sval1 + sval2 + sval3 + sval4) >> 2);          /* pix 4 */
630             lined[w * 2 + 1] = wordd;
631             linedp[w * 2 + 1] = worddp;
632         }
633 
634             /* Finish up the last word */
635         for (; j < wsm; j++, jd += 2) {
636             sval1 = sval2;
637             sval3 = sval4;
638             sval2 = GET_DATA_BYTE(lines, j + 1);
639             sval4 = GET_DATA_BYTE(linesp, j + 1);
640             SET_DATA_BYTE(lined, jd, sval1);                     /* pix 1 */
641             SET_DATA_BYTE(lined, jd + 1, (sval1 + sval2) / 2);   /* pix 2 */
642             SET_DATA_BYTE(linedp, jd, (sval1 + sval3) / 2);      /* pix 3 */
643             SET_DATA_BYTE(linedp, jd + 1,
644                           (sval1 + sval2 + sval3 + sval4) / 4);  /* pix 4 */
645         }
646         sval1 = sval2;
647         sval3 = sval4;
648         SET_DATA_BYTE(lined, 2 * wsm, sval1);                     /* pix 1 */
649         SET_DATA_BYTE(lined, 2 * wsm + 1, sval1);                 /* pix 2 */
650         SET_DATA_BYTE(linedp, 2 * wsm, (sval1 + sval3) / 2);      /* pix 3 */
651         SET_DATA_BYTE(linedp, 2 * wsm + 1, (sval1 + sval3) / 2);  /* pix 4 */
652 
653 #if DEBUG_UNROLLING
654 #define CHECK_BYTE(a, b, c) if (GET_DATA_BYTE(a, b) != c) {\
655      fprintf(stderr, "Error: mismatch at %d, %d vs %d\n", \
656              j, GET_DATA_BYTE(a, b), c); }
657 
658         sval2 = GET_DATA_BYTE(lines, 0);
659         sval4 = GET_DATA_BYTE(linesp, 0);
660         for (j = 0, jd = 0; j < wsm; j++, jd += 2) {
661             sval1 = sval2;
662             sval3 = sval4;
663             sval2 = GET_DATA_BYTE(lines, j + 1);
664             sval4 = GET_DATA_BYTE(linesp, j + 1);
665             CHECK_BYTE(lined, jd, sval1);                     /* pix 1 */
666             CHECK_BYTE(lined, jd + 1, (sval1 + sval2) / 2);   /* pix 2 */
667             CHECK_BYTE(linedp, jd, (sval1 + sval3) / 2);      /* pix 3 */
668             CHECK_BYTE(linedp, jd + 1,
669                           (sval1 + sval2 + sval3 + sval4) / 4);  /* pix 4 */
670         }
671         sval1 = sval2;
672         sval3 = sval4;
673         CHECK_BYTE(lined, 2 * wsm, sval1);                     /* pix 1 */
674         CHECK_BYTE(lined, 2 * wsm + 1, sval1);                 /* pix 2 */
675         CHECK_BYTE(linedp, 2 * wsm, (sval1 + sval3) / 2);      /* pix 3 */
676         CHECK_BYTE(linedp, 2 * wsm + 1, (sval1 + sval3) / 2);  /* pix 4 */
677 #undef CHECK_BYTE
678 #endif
679     }
680     else {   /* last row of src pixels: lastlineflag == 1 */
681         linedp = lined + wpld;
682         sval2 = GET_DATA_BYTE(lines, 0);
683         for (j = 0, jd = 0; j < wsm; j++, jd += 2) {
684             sval1 = sval2;
685             sval2 = GET_DATA_BYTE(lines, j + 1);
686             SET_DATA_BYTE(lined, jd, sval1);                       /* pix 1 */
687             SET_DATA_BYTE(linedp, jd, sval1);                      /* pix 3 */
688             SET_DATA_BYTE(lined, jd + 1, (sval1 + sval2) / 2);     /* pix 2 */
689             SET_DATA_BYTE(linedp, jd + 1, (sval1 + sval2) / 2);    /* pix 4 */
690         }
691         sval1 = sval2;
692         SET_DATA_BYTE(lined, 2 * wsm, sval1);                     /* pix 1 */
693         SET_DATA_BYTE(lined, 2 * wsm + 1, sval1);                 /* pix 2 */
694         SET_DATA_BYTE(linedp, 2 * wsm, sval1);                    /* pix 3 */
695         SET_DATA_BYTE(linedp, 2 * wsm + 1, sval1);                /* pix 4 */
696     }
697 
698     return;
699 }
700 
701 
702 /*------------------------------------------------------------------*
703  *               4x linear interpolated gray scaling                *
704  *------------------------------------------------------------------*/
705 /*!
706  *  scaleGray4xLILow()
707  *
708  *  This is a special case of 4x expansion by linear
709  *  interpolation.  Each src pixel contains 16 dest pixels.
710  *  The 16 dest pixels in src pixel 1 are numbered at
711  *  their UL corners.  The 16 dest pixels in src pixel 1
712  *  are related to that src pixel and its 3 neighboring
713  *  src pixels as follows:
714  *
715  *             1---2---3---4---|---|---|---|---|
716  *             |   |   |   |   |   |   |   |   |
717  *             5---6---7---8---|---|---|---|---|
718  *             |   |   |   |   |   |   |   |   |
719  *  src 1 -->  9---a---b---c---|---|---|---|---|  <-- src 2
720  *             |   |   |   |   |   |   |   |   |
721  *             d---e---f---g---|---|---|---|---|
722  *             |   |   |   |   |   |   |   |   |
723  *             |===|===|===|===|===|===|===|===|
724  *             |   |   |   |   |   |   |   |   |
725  *             |---|---|---|---|---|---|---|---|
726  *             |   |   |   |   |   |   |   |   |
727  *  src 3 -->  |---|---|---|---|---|---|---|---|  <-- src 4
728  *             |   |   |   |   |   |   |   |   |
729  *             |---|---|---|---|---|---|---|---|
730  *             |   |   |   |   |   |   |   |   |
731  *             |---|---|---|---|---|---|---|---|
732  *
733  *           dest      src
734  *           ----      ---
735  *           dp1    =  sp1
736  *           dp2    =  (3 * sp1 + sp2) / 4
737  *           dp3    =  (sp1 + sp2) / 2
738  *           dp4    =  (sp1 + 3 * sp2) / 4
739  *           dp5    =  (3 * sp1 + sp3) / 4
740  *           dp6    =  (9 * sp1 + 3 * sp2 + 3 * sp3 + sp4) / 16
741  *           dp7    =  (3 * sp1 + 3 * sp2 + sp3 + sp4) / 8
742  *           dp8    =  (3 * sp1 + 9 * sp2 + 1 * sp3 + 3 * sp4) / 16
743  *           dp9    =  (sp1 + sp3) / 2
744  *           dp10   =  (3 * sp1 + sp2 + 3 * sp3 + sp4) / 8
745  *           dp11   =  (sp1 + sp2 + sp3 + sp4) / 4
746  *           dp12   =  (sp1 + 3 * sp2 + sp3 + 3 * sp4) / 8
747  *           dp13   =  (sp1 + 3 * sp3) / 4
748  *           dp14   =  (3 * sp1 + sp2 + 9 * sp3 + 3 * sp4) / 16
749  *           dp15   =  (sp1 + sp2 + 3 * sp3 + 3 * sp4) / 8
750  *           dp16   =  (sp1 + 3 * sp2 + 3 * sp3 + 9 * sp4) / 16
751  *
752  *  We iterate over the src pixels, and unroll the calculation
753  *  for each set of 16 dest pixels corresponding to that src
754  *  pixel, caching pixels for the next src pixel whenever possible.
755  */
756 void
scaleGray4xLILow(l_uint32 * datad,l_int32 wpld,l_uint32 * datas,l_int32 ws,l_int32 hs,l_int32 wpls)757 scaleGray4xLILow(l_uint32  *datad,
758                  l_int32    wpld,
759                  l_uint32  *datas,
760                  l_int32    ws,
761                  l_int32    hs,
762                  l_int32    wpls)
763 {
764 l_int32    i, hsm;
765 l_uint32  *lines, *lined;
766 
767     hsm = hs - 1;
768 
769         /* We're taking 2 src and 4 dest lines at a time,
770          * and for each src line, we're computing 4 dest lines.
771          * Call these 4 dest lines:  destline1 - destline4.
772          * The first src line is used for destline 1.
773          * Two src lines are used for all other dest lines,
774          * except for the last 4 dest lines, which are computed
775          * using only the last src line. */
776 
777         /* iterate over all but the last src line */
778     for (i = 0; i < hsm; i++) {
779         lines = datas + i * wpls;
780         lined = datad + 4 * i * wpld;
781         scaleGray4xLILineLow(lined, wpld, lines, ws, wpls, 0);
782     }
783 
784         /* last src line */
785     lines = datas + hsm * wpls;
786     lined = datad + 4 * hsm * wpld;
787     scaleGray4xLILineLow(lined, wpld, lines, ws, wpls, 1);
788 
789     return;
790 }
791 
792 
793 /*!
794  *  scaleGray4xLILineLow()
795  *
796  *      Input:  lined   (ptr to top destline, to be made from current src line)
797  *              wpld
798  *              lines   (ptr to current src line)
799  *              ws
800  *              wpls
801  *              lastlineflag  (1 if last src line; 0 otherwise)
802  *      Return: void
803  */
804 void
scaleGray4xLILineLow(l_uint32 * lined,l_int32 wpld,l_uint32 * lines,l_int32 ws,l_int32 wpls,l_int32 lastlineflag)805 scaleGray4xLILineLow(l_uint32  *lined,
806                      l_int32    wpld,
807                      l_uint32  *lines,
808                      l_int32    ws,
809                      l_int32    wpls,
810                      l_int32    lastlineflag)
811 {
812 l_int32    j, jd, wsm, wsm4;
813 l_int32    s1, s2, s3, s4, s1t, s2t, s3t, s4t;
814 l_uint32  *linesp, *linedp1, *linedp2, *linedp3;
815 
816     wsm = ws - 1;
817     wsm4 = 4 * wsm;
818 
819     if (lastlineflag == 0) {
820         linesp = lines + wpls;
821         linedp1 = lined + wpld;
822         linedp2 = lined + 2 * wpld;
823         linedp3 = lined + 3 * wpld;
824         s2 = GET_DATA_BYTE(lines, 0);
825         s4 = GET_DATA_BYTE(linesp, 0);
826         for (j = 0, jd = 0; j < wsm; j++, jd += 4) {
827             s1 = s2;
828             s3 = s4;
829             s2 = GET_DATA_BYTE(lines, j + 1);
830             s4 = GET_DATA_BYTE(linesp, j + 1);
831             s1t = 3 * s1;
832             s2t = 3 * s2;
833             s3t = 3 * s3;
834             s4t = 3 * s4;
835             SET_DATA_BYTE(lined, jd, s1);                             /* d1 */
836             SET_DATA_BYTE(lined, jd + 1, (s1t + s2) / 4);             /* d2 */
837             SET_DATA_BYTE(lined, jd + 2, (s1 + s2) / 2);              /* d3 */
838             SET_DATA_BYTE(lined, jd + 3, (s1 + s2t) / 4);             /* d4 */
839             SET_DATA_BYTE(linedp1, jd, (s1t + s3) / 4);                /* d5 */
840             SET_DATA_BYTE(linedp1, jd + 1, (9*s1 + s2t + s3t + s4) / 16); /*d6*/
841             SET_DATA_BYTE(linedp1, jd + 2, (s1t + s2t + s3 + s4) / 8); /* d7 */
842             SET_DATA_BYTE(linedp1, jd + 3, (s1t + 9*s2 + s3 + s4t) / 16);/*d8*/
843             SET_DATA_BYTE(linedp2, jd, (s1 + s3) / 2);                /* d9 */
844             SET_DATA_BYTE(linedp2, jd + 1, (s1t + s2 + s3t + s4) / 8);/* d10 */
845             SET_DATA_BYTE(linedp2, jd + 2, (s1 + s2 + s3 + s4) / 4);  /* d11 */
846             SET_DATA_BYTE(linedp2, jd + 3, (s1 + s2t + s3 + s4t) / 8);/* d12 */
847             SET_DATA_BYTE(linedp3, jd, (s1 + s3t) / 4);               /* d13 */
848             SET_DATA_BYTE(linedp3, jd + 1, (s1t + s2 + 9*s3 + s4t) / 16);/*d14*/
849             SET_DATA_BYTE(linedp3, jd + 2, (s1 + s2 + s3t + s4t) / 8); /* d15 */
850             SET_DATA_BYTE(linedp3, jd + 3, (s1 + s2t + s3t + 9*s4) / 16);/*d16*/
851         }
852         s1 = s2;
853         s3 = s4;
854         s1t = 3 * s1;
855         s3t = 3 * s3;
856         SET_DATA_BYTE(lined, wsm4, s1);                               /* d1 */
857         SET_DATA_BYTE(lined, wsm4 + 1, s1);                           /* d2 */
858         SET_DATA_BYTE(lined, wsm4 + 2, s1);                           /* d3 */
859         SET_DATA_BYTE(lined, wsm4 + 3, s1);                           /* d4 */
860         SET_DATA_BYTE(linedp1, wsm4, (s1t + s3) / 4);                 /* d5 */
861         SET_DATA_BYTE(linedp1, wsm4 + 1, (s1t + s3) / 4);             /* d6 */
862         SET_DATA_BYTE(linedp1, wsm4 + 2, (s1t + s3) / 4);             /* d7 */
863         SET_DATA_BYTE(linedp1, wsm4 + 3, (s1t + s3) / 4);             /* d8 */
864         SET_DATA_BYTE(linedp2, wsm4, (s1 + s3) / 2);                  /* d9 */
865         SET_DATA_BYTE(linedp2, wsm4 + 1, (s1 + s3) / 2);              /* d10 */
866         SET_DATA_BYTE(linedp2, wsm4 + 2, (s1 + s3) / 2);              /* d11 */
867         SET_DATA_BYTE(linedp2, wsm4 + 3, (s1 + s3) / 2);              /* d12 */
868         SET_DATA_BYTE(linedp3, wsm4, (s1 + s3t) / 4);                 /* d13 */
869         SET_DATA_BYTE(linedp3, wsm4 + 1, (s1 + s3t) / 4);             /* d14 */
870         SET_DATA_BYTE(linedp3, wsm4 + 2, (s1 + s3t) / 4);             /* d15 */
871         SET_DATA_BYTE(linedp3, wsm4 + 3, (s1 + s3t) / 4);             /* d16 */
872     }
873     else {   /* last row of src pixels: lastlineflag == 1 */
874         linedp1 = lined + wpld;
875         linedp2 = lined + 2 * wpld;
876         linedp3 = lined + 3 * wpld;
877         s2 = GET_DATA_BYTE(lines, 0);
878         for (j = 0, jd = 0; j < wsm; j++, jd += 4) {
879             s1 = s2;
880             s2 = GET_DATA_BYTE(lines, j + 1);
881             s1t = 3 * s1;
882             s2t = 3 * s2;
883             SET_DATA_BYTE(lined, jd, s1);                            /* d1 */
884             SET_DATA_BYTE(lined, jd + 1, (s1t + s2) / 4 );           /* d2 */
885             SET_DATA_BYTE(lined, jd + 2, (s1 + s2) / 2 );            /* d3 */
886             SET_DATA_BYTE(lined, jd + 3, (s1 + s2t) / 4 );           /* d4 */
887             SET_DATA_BYTE(linedp1, jd, s1);                          /* d5 */
888             SET_DATA_BYTE(linedp1, jd + 1, (s1t + s2) / 4 );         /* d6 */
889             SET_DATA_BYTE(linedp1, jd + 2, (s1 + s2) / 2 );          /* d7 */
890             SET_DATA_BYTE(linedp1, jd + 3, (s1 + s2t) / 4 );         /* d8 */
891             SET_DATA_BYTE(linedp2, jd, s1);                          /* d9 */
892             SET_DATA_BYTE(linedp2, jd + 1, (s1t + s2) / 4 );         /* d10 */
893             SET_DATA_BYTE(linedp2, jd + 2, (s1 + s2) / 2 );          /* d11 */
894             SET_DATA_BYTE(linedp2, jd + 3, (s1 + s2t) / 4 );         /* d12 */
895             SET_DATA_BYTE(linedp3, jd, s1);                          /* d13 */
896             SET_DATA_BYTE(linedp3, jd + 1, (s1t + s2) / 4 );         /* d14 */
897             SET_DATA_BYTE(linedp3, jd + 2, (s1 + s2) / 2 );          /* d15 */
898             SET_DATA_BYTE(linedp3, jd + 3, (s1 + s2t) / 4 );         /* d16 */
899         }
900         s1 = s2;
901         SET_DATA_BYTE(lined, wsm4, s1);                              /* d1 */
902         SET_DATA_BYTE(lined, wsm4 + 1, s1);                          /* d2 */
903         SET_DATA_BYTE(lined, wsm4 + 2, s1);                          /* d3 */
904         SET_DATA_BYTE(lined, wsm4 + 3, s1);                          /* d4 */
905         SET_DATA_BYTE(linedp1, wsm4, s1);                            /* d5 */
906         SET_DATA_BYTE(linedp1, wsm4 + 1, s1);                        /* d6 */
907         SET_DATA_BYTE(linedp1, wsm4 + 2, s1);                        /* d7 */
908         SET_DATA_BYTE(linedp1, wsm4 + 3, s1);                        /* d8 */
909         SET_DATA_BYTE(linedp2, wsm4, s1);                            /* d9 */
910         SET_DATA_BYTE(linedp2, wsm4 + 1, s1);                        /* d10 */
911         SET_DATA_BYTE(linedp2, wsm4 + 2, s1);                        /* d11 */
912         SET_DATA_BYTE(linedp2, wsm4 + 3, s1);                        /* d12 */
913         SET_DATA_BYTE(linedp3, wsm4, s1);                            /* d13 */
914         SET_DATA_BYTE(linedp3, wsm4 + 1, s1);                        /* d14 */
915         SET_DATA_BYTE(linedp3, wsm4 + 2, s1);                        /* d15 */
916         SET_DATA_BYTE(linedp3, wsm4 + 3, s1);                        /* d16 */
917     }
918 
919     return;
920 }
921 
922 
923 /*------------------------------------------------------------------*
924  *       Grayscale and color scaling by closest pixel sampling      *
925  *------------------------------------------------------------------*/
926 /*!
927  *  scaleBySamplingLow()
928  *
929  *  Notes:
930  *      (1) The dest must be cleared prior to this operation,
931  *          and we clear it here in the low-level code.
932  *      (2) We reuse dest pixels and dest pixel rows whenever
933  *          possible.  This speeds the upscaling; downscaling
934  *          is done by strict subsampling and is unaffected.
935  *      (3) Because we are sampling and not interpolating, this
936  *          routine works directly, without conversion to full
937  *          RGB color, for 2, 4 or 8 bpp palette color images.
938  */
939 l_int32
scaleBySamplingLow(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 ws,l_int32 hs,l_int32 d,l_int32 wpls)940 scaleBySamplingLow(l_uint32  *datad,
941                    l_int32    wd,
942                    l_int32    hd,
943                    l_int32    wpld,
944                    l_uint32  *datas,
945                    l_int32    ws,
946                    l_int32    hs,
947                    l_int32    d,
948                    l_int32    wpls)
949 {
950 l_int32    i, j, bpld;
951 l_int32    xs, prevxs, sval;
952 l_int32   *srow, *scol;
953 l_uint32   csval;
954 l_uint32  *lines, *prevlines, *lined, *prevlined;
955 l_float32  wratio, hratio;
956 
957     PROCNAME("scaleBySamplingLow");
958 
959         /* clear dest */
960     bpld = 4 * wpld;
961     memset((char *)datad, 0, hd * bpld);
962 
963         /* the source row corresponding to dest row i ==> srow[i]
964          * the source col corresponding to dest col j ==> scol[j]  */
965     if ((srow = (l_int32 *)CALLOC(hd, sizeof(l_int32))) == NULL)
966         return ERROR_INT("srow not made", procName, 1);
967     if ((scol = (l_int32 *)CALLOC(wd, sizeof(l_int32))) == NULL)
968         return ERROR_INT("scol not made", procName, 1);
969 
970     wratio = (l_float32)ws / (l_float32)wd;
971     hratio = (l_float32)hs / (l_float32)hd;
972     for (i = 0; i < hd; i++)
973         srow[i] = L_MIN((l_int32)(hratio * i + 0.5), hs - 1);
974     for (j = 0; j < wd; j++)
975         scol[j] = L_MIN((l_int32)(wratio * j + 0.5), ws - 1);
976 
977     prevlines = NULL;
978     for (i = 0; i < hd; i++) {
979         lines = datas + srow[i] * wpls;
980         lined = datad + i * wpld;
981         if (lines != prevlines) {  /* make dest from new source row */
982             prevxs = -1;
983             sval = 0;
984             csval = 0;
985             switch (d)
986             {
987             case 2:
988                 for (j = 0; j < wd; j++) {
989                     xs = scol[j];
990                     if (xs != prevxs) {  /* get dest pix from source col */
991                         sval = GET_DATA_DIBIT(lines, xs);
992                         SET_DATA_DIBIT(lined, j, sval);
993                         prevxs = xs;
994                     }
995                     else   /* copy prev dest pix */
996                         SET_DATA_DIBIT(lined, j, sval);
997                 }
998                 break;
999             case 4:
1000                 for (j = 0; j < wd; j++) {
1001                     xs = scol[j];
1002                     if (xs != prevxs) {  /* get dest pix from source col */
1003                         sval = GET_DATA_QBIT(lines, xs);
1004                         SET_DATA_QBIT(lined, j, sval);
1005                         prevxs = xs;
1006                     }
1007                     else   /* copy prev dest pix */
1008                         SET_DATA_QBIT(lined, j, sval);
1009                 }
1010                 break;
1011             case 8:
1012                 for (j = 0; j < wd; j++) {
1013                     xs = scol[j];
1014                     if (xs != prevxs) {  /* get dest pix from source col */
1015                         sval = GET_DATA_BYTE(lines, xs);
1016                         SET_DATA_BYTE(lined, j, sval);
1017                         prevxs = xs;
1018                     }
1019                     else   /* copy prev dest pix */
1020                         SET_DATA_BYTE(lined, j, sval);
1021                 }
1022                 break;
1023             case 16:
1024                 for (j = 0; j < wd; j++) {
1025                     xs = scol[j];
1026                     if (xs != prevxs) {  /* get dest pix from source col */
1027                         sval = GET_DATA_TWO_BYTES(lines, xs);
1028                         SET_DATA_TWO_BYTES(lined, j, sval);
1029                         prevxs = xs;
1030                     }
1031                     else   /* copy prev dest pix */
1032                         SET_DATA_TWO_BYTES(lined, j, sval);
1033                 }
1034                 break;
1035             case 32:
1036                 for (j = 0; j < wd; j++) {
1037                     xs = scol[j];
1038                     if (xs != prevxs) {  /* get dest pix from source col */
1039                         csval = lines[xs];
1040                         lined[j] = csval;
1041                         prevxs = xs;
1042                     }
1043                     else   /* copy prev dest pix */
1044                         lined[j] = csval;
1045                 }
1046                 break;
1047             default:
1048                 return ERROR_INT("pixel depth not supported", procName, 1);
1049                 break;
1050             }
1051         }
1052         else {  /* lines == prevlines; copy prev dest row */
1053             prevlined = lined - wpld;
1054             memcpy((char *)lined, (char *)prevlined, bpld);
1055         }
1056         prevlines = lines;
1057     }
1058 
1059     FREE(srow);
1060     FREE(scol);
1061 
1062     return 0;
1063 }
1064 
1065 
1066 /*------------------------------------------------------------------*
1067  *    Color and grayscale downsampling with (antialias) smoothing   *
1068  *------------------------------------------------------------------*/
1069 /*!
1070  *  scaleSmoothLow()
1071  *
1072  *  Notes:
1073  *      (1) This function is called on 8 or 32 bpp src and dest images.
1074  *      (2) size is the full width of the lowpass smoothing filter.
1075  *          It is correlated with the reduction ratio, being the
1076  *          nearest integer such that size is approximately equal to hs / hd.
1077  */
1078 l_int32
scaleSmoothLow(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 ws,l_int32 hs,l_int32 d,l_int32 wpls,l_int32 size)1079 scaleSmoothLow(l_uint32  *datad,
1080                l_int32    wd,
1081                l_int32    hd,
1082                l_int32    wpld,
1083                l_uint32  *datas,
1084                l_int32    ws,
1085                l_int32    hs,
1086                l_int32    d,
1087                l_int32    wpls,
1088                l_int32    size)
1089 {
1090 l_int32    i, j, m, n, xstart;
1091 l_int32    val, rval, gval, bval;
1092 l_int32   *srow, *scol;
1093 l_uint32  *lines, *lined, *line, *ppixel;
1094 l_uint32   pixel;
1095 l_float32  wratio, hratio, norm;
1096 
1097     PROCNAME("scaleSmoothLow");
1098 
1099         /* Clear dest */
1100     memset((char *)datad, 0, 4 * wpld * hd);
1101 
1102         /* Each dest pixel at (j,i) is computed as the average
1103            of size^2 corresponding src pixels.
1104            We store the UL corner location of the square of
1105            src pixels that correspond to dest pixel (j,i).
1106            The are labelled by the arrays srow[i] and scol[j]. */
1107     if ((srow = (l_int32 *)CALLOC(hd, sizeof(l_int32))) == NULL)
1108         return ERROR_INT("srow not made", procName, 1);
1109     if ((scol = (l_int32 *)CALLOC(wd, sizeof(l_int32))) == NULL)
1110         return ERROR_INT("scol not made", procName, 1);
1111 
1112     norm = 1. / (l_float32)(size * size);
1113     wratio = (l_float32)ws / (l_float32)wd;
1114     hratio = (l_float32)hs / (l_float32)hd;
1115     for (i = 0; i < hd; i++)
1116         srow[i] = L_MIN((l_int32)(hratio * i), hs - size);
1117     for (j = 0; j < wd; j++)
1118         scol[j] = L_MIN((l_int32)(wratio * j), ws - size);
1119 
1120         /* For each dest pixel, compute average */
1121     if (d == 8) {
1122         for (i = 0; i < hd; i++) {
1123             lines = datas + srow[i] * wpls;
1124             lined = datad + i * wpld;
1125             for (j = 0; j < wd; j++) {
1126                 xstart = scol[j];
1127                 val = 0;
1128                 for (m = 0; m < size; m++) {
1129                     line = lines + m * wpls;
1130                     for (n = 0; n < size; n++) {
1131                         val += GET_DATA_BYTE(line, xstart + n);
1132                     }
1133                 }
1134                 val = (l_int32)((l_float32)val * norm);
1135                 SET_DATA_BYTE(lined, j, val);
1136             }
1137         }
1138     }
1139     else {  /* d == 32 */
1140         for (i = 0; i < hd; i++) {
1141             lines = datas + srow[i] * wpls;
1142             lined = datad + i * wpld;
1143             for (j = 0; j < wd; j++) {
1144                 xstart = scol[j];
1145                 rval = gval = bval = 0;
1146                 for (m = 0; m < size; m++) {
1147                     ppixel = lines + m * wpls + xstart;
1148                     for (n = 0; n < size; n++) {
1149                         pixel = *(ppixel + n);
1150                         rval += (pixel >> L_RED_SHIFT) & 0xff;
1151                         gval += (pixel >> L_GREEN_SHIFT) & 0xff;
1152                         bval += (pixel >> L_BLUE_SHIFT) & 0xff;
1153                     }
1154                 }
1155                 rval = (l_int32)((l_float32)rval * norm);
1156                 gval = (l_int32)((l_float32)gval * norm);
1157                 bval = (l_int32)((l_float32)bval * norm);
1158                 *(lined + j) = rval << L_RED_SHIFT |
1159                                gval << L_GREEN_SHIFT |
1160                                bval << L_BLUE_SHIFT;
1161             }
1162         }
1163     }
1164 
1165     FREE(srow);
1166     FREE(scol);
1167     return 0;
1168 }
1169 
1170 
1171 /*!
1172  *  scaleRGBToGray2Low()
1173  *
1174  *  Note: This function is called with 32 bpp RGB src and 8 bpp,
1175  *        half-resolution dest.  The weights should add to 1.0.
1176  */
1177 void
scaleRGBToGray2Low(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 wpls,l_float32 rwt,l_float32 gwt,l_float32 bwt)1178 scaleRGBToGray2Low(l_uint32  *datad,
1179                    l_int32    wd,
1180                    l_int32    hd,
1181                    l_int32    wpld,
1182                    l_uint32  *datas,
1183                    l_int32    wpls,
1184                    l_float32  rwt,
1185                    l_float32  gwt,
1186                    l_float32  bwt)
1187 {
1188 l_int32    i, j, val, rval, gval, bval;
1189 l_uint32  *lines, *lined;
1190 l_uint32   pixel;
1191 
1192     rwt *= 0.25;
1193     gwt *= 0.25;
1194     bwt *= 0.25;
1195     for (i = 0; i < hd; i++) {
1196         lines = datas + 2 * i * wpls;
1197         lined = datad + i * wpld;
1198         for (j = 0; j < wd; j++) {
1199                 /* Sum each of the color components from 4 src pixels */
1200             pixel = *(lines + 2 * j);
1201             rval = (pixel >> L_RED_SHIFT) & 0xff;
1202             gval = (pixel >> L_GREEN_SHIFT) & 0xff;
1203             bval = (pixel >> L_BLUE_SHIFT) & 0xff;
1204             pixel = *(lines + 2 * j + 1);
1205             rval += (pixel >> L_RED_SHIFT) & 0xff;
1206             gval += (pixel >> L_GREEN_SHIFT) & 0xff;
1207             bval += (pixel >> L_BLUE_SHIFT) & 0xff;
1208             pixel = *(lines + wpls + 2 * j);
1209             rval += (pixel >> L_RED_SHIFT) & 0xff;
1210             gval += (pixel >> L_GREEN_SHIFT) & 0xff;
1211             bval += (pixel >> L_BLUE_SHIFT) & 0xff;
1212             pixel = *(lines + wpls + 2 * j + 1);
1213             rval += (pixel >> L_RED_SHIFT) & 0xff;
1214             gval += (pixel >> L_GREEN_SHIFT) & 0xff;
1215             bval += (pixel >> L_BLUE_SHIFT) & 0xff;
1216                 /* Generate the dest byte as a weighted sum of the averages */
1217             val = (l_int32)(rwt * rval + gwt * gval + bwt * bval);
1218             SET_DATA_BYTE(lined, j, val);
1219         }
1220     }
1221 }
1222 
1223 
1224 /*------------------------------------------------------------------*
1225  *                  General area mapped gray scaling                *
1226  *------------------------------------------------------------------*/
1227 /*!
1228  *  scaleColorAreaMapLow()
1229  *
1230  *  This should only be used for downscaling.
1231  *  We choose to divide each pixel into 16 x 16 sub-pixels.
1232  *  This is much slower than scaleSmoothLow(), but it gives a
1233  *  better representation, esp. for downscaling factors between
1234  *  1.5 and 5.  All src pixels are subdivided into 256 sub-pixels,
1235  *  and are weighted by the number of sub-pixels covered by
1236  *  the dest pixel.  This is about 2x slower than scaleSmoothLow(),
1237  *  but the results are significantly better on small text.
1238  */
1239 void
scaleColorAreaMapLow(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 ws,l_int32 hs,l_int32 wpls)1240 scaleColorAreaMapLow(l_uint32  *datad,
1241                     l_int32    wd,
1242                     l_int32    hd,
1243                     l_int32    wpld,
1244                     l_uint32  *datas,
1245                     l_int32    ws,
1246                     l_int32    hs,
1247                     l_int32    wpls)
1248 {
1249 l_int32    i, j, k, m, wm2, hm2;
1250 l_int32    area00, area10, area01, area11, areal, arear, areat, areab;
1251 l_int32    xu, yu;  /* UL corner in src image, to 1/16 of a pixel */
1252 l_int32    xl, yl;  /* LR corner in src image, to 1/16 of a pixel */
1253 l_int32    xup, yup, xuf, yuf;  /* UL src pixel: integer and fraction */
1254 l_int32    xlp, ylp, xlf, ylf;  /* LR src pixel: integer and fraction */
1255 l_int32    delx, dely, area;
1256 l_int32    v00r, v00g, v00b;  /* contrib. from UL src pixel */
1257 l_int32    v01r, v01g, v01b;  /* contrib. from LL src pixel */
1258 l_int32    v10r, v10g, v10b;  /* contrib from UR src pixel */
1259 l_int32    v11r, v11g, v11b;  /* contrib from LR src pixel */
1260 l_int32    vinr, ving, vinb;  /* contrib from all full interior src pixels */
1261 l_int32    vmidr, vmidg, vmidb;  /* contrib from side parts */
1262 l_int32    rval, gval, bval;
1263 l_uint32   pixel00, pixel10, pixel01, pixel11, pixel;
1264 l_uint32  *lines, *lined;
1265 l_float32  scx, scy;
1266 
1267         /* (scx, scy) are scaling factors that are applied to the
1268          * dest coords to get the corresponding src coords.
1269          * We need them because we iterate over dest pixels
1270          * and must find the corresponding set of src pixels. */
1271     scx = 16. * (l_float32)ws / (l_float32)wd;
1272     scy = 16. * (l_float32)hs / (l_float32)hd;
1273 
1274     wm2 = ws - 2;
1275     hm2 = hs - 2;
1276 
1277         /* Iterate over the destination pixels */
1278     for (i = 0; i < hd; i++) {
1279         yu = (l_int32)(scy * i);
1280         yl = (l_int32)(scy * (i + 1.0));
1281         yup = yu >> 4;
1282         yuf = yu & 0x0f;
1283         ylp = yl >> 4;
1284         ylf = yl & 0x0f;
1285         dely = ylp - yup;
1286         lined = datad + i * wpld;
1287         lines = datas + yup * wpls;
1288         for (j = 0; j < wd; j++) {
1289             xu = (l_int32)(scx * j);
1290             xl = (l_int32)(scx * (j + 1.0));
1291             xup = xu >> 4;
1292             xuf = xu & 0x0f;
1293             xlp = xl >> 4;
1294             xlf = xl & 0x0f;
1295             delx = xlp - xup;
1296 
1297                 /* If near the edge, just use a src pixel value */
1298             if (xlp > wm2 || ylp > hm2) {
1299                 *(lined + j) = *(lines + xup);
1300                 continue;
1301             }
1302 
1303                 /* Area summed over, in subpixels.  This varies
1304                  * due to the quantization, so we can't simply take
1305                  * the area to be a constant: area = scx * scy. */
1306             area = ((16 - xuf) + 16 * (delx - 1) + xlf) *
1307                    ((16 - yuf) + 16 * (dely - 1) + ylf);
1308 
1309                 /* Do area map summation */
1310             pixel00 = *(lines + xup);
1311             pixel10 = *(lines + xlp);
1312             pixel01 = *(lines + dely * wpls +  xup);
1313             pixel11 = *(lines + dely * wpls +  xlp);
1314             area00 = (16 - xuf) * (16 - yuf);
1315             area10 = xlf * (16 - yuf);
1316             area01 = (16 - xuf) * ylf;
1317             area11 = xlf * ylf;
1318             v00r = area00 * ((pixel00 >> L_RED_SHIFT) & 0xff);
1319             v00g = area00 * ((pixel00 >> L_GREEN_SHIFT) & 0xff);
1320             v00b = area00 * ((pixel00 >> L_BLUE_SHIFT) & 0xff);
1321             v10r = area10 * ((pixel10 >> L_RED_SHIFT) & 0xff);
1322             v10g = area10 * ((pixel10 >> L_GREEN_SHIFT) & 0xff);
1323             v10b = area10 * ((pixel10 >> L_BLUE_SHIFT) & 0xff);
1324             v01r = area01 * ((pixel01 >> L_RED_SHIFT) & 0xff);
1325             v01g = area01 * ((pixel01 >> L_GREEN_SHIFT) & 0xff);
1326             v01b = area01 * ((pixel01 >> L_BLUE_SHIFT) & 0xff);
1327             v11r = area11 * ((pixel11 >> L_RED_SHIFT) & 0xff);
1328             v11g = area11 * ((pixel11 >> L_GREEN_SHIFT) & 0xff);
1329             v11b = area11 * ((pixel11 >> L_BLUE_SHIFT) & 0xff);
1330             vinr = ving = vinb = 0;
1331             for (k = 1; k < dely; k++) {  /* for full src pixels */
1332                 for (m = 1; m < delx; m++) {
1333                     pixel = *(lines + k * wpls + xup + m);
1334                     vinr += 256 * ((pixel >> L_RED_SHIFT) & 0xff);
1335                     ving += 256 * ((pixel >> L_GREEN_SHIFT) & 0xff);
1336                     vinb += 256 * ((pixel >> L_BLUE_SHIFT) & 0xff);
1337                 }
1338             }
1339             vmidr = vmidg = vmidb = 0;
1340             areal = (16 - xuf) * 16;
1341             arear = xlf * 16;
1342             areat = 16 * (16 - yuf);
1343             areab = 16 * ylf;
1344             for (k = 1; k < dely; k++) {  /* for left side */
1345                 pixel = *(lines + k * wpls + xup);
1346                 vmidr += areal * ((pixel >> L_RED_SHIFT) & 0xff);
1347                 vmidg += areal * ((pixel >> L_GREEN_SHIFT) & 0xff);
1348                 vmidb += areal * ((pixel >> L_BLUE_SHIFT) & 0xff);
1349             }
1350             for (k = 1; k < dely; k++) {  /* for right side */
1351                 pixel = *(lines + k * wpls + xlp);
1352                 vmidr += arear * ((pixel >> L_RED_SHIFT) & 0xff);
1353                 vmidg += arear * ((pixel >> L_GREEN_SHIFT) & 0xff);
1354                 vmidb += arear * ((pixel >> L_BLUE_SHIFT) & 0xff);
1355             }
1356             for (m = 1; m < delx; m++) {  /* for top side */
1357                 pixel = *(lines + xup + m);
1358                 vmidr += areat * ((pixel >> L_RED_SHIFT) & 0xff);
1359                 vmidg += areat * ((pixel >> L_GREEN_SHIFT) & 0xff);
1360                 vmidb += areat * ((pixel >> L_BLUE_SHIFT) & 0xff);
1361             }
1362             for (m = 1; m < delx; m++) {  /* for bottom side */
1363                 pixel = *(lines + dely * wpls + xup + m);
1364                 vmidr += areab * ((pixel >> L_RED_SHIFT) & 0xff);
1365                 vmidg += areab * ((pixel >> L_GREEN_SHIFT) & 0xff);
1366                 vmidb += areab * ((pixel >> L_BLUE_SHIFT) & 0xff);
1367             }
1368 
1369                 /* Sum all the contributions */
1370             rval = (v00r + v01r + v10r + v11r + vinr + vmidr + 128) / area;
1371             gval = (v00g + v01g + v10g + v11g + ving + vmidg + 128) / area;
1372             bval = (v00b + v01b + v10b + v11b + vinb + vmidb + 128) / area;
1373 #if  DEBUG_OVERFLOW
1374             if (rval > 255) fprintf(stderr, "rval ovfl: %d\n", rval);
1375             if (rval > 255) fprintf(stderr, "gval ovfl: %d\n", gval);
1376             if (rval > 255) fprintf(stderr, "bval ovfl: %d\n", bval);
1377 #endif  /* DEBUG_OVERFLOW */
1378             composeRGBPixel(rval, gval, bval, lined + j);
1379         }
1380     }
1381 
1382     return;
1383 }
1384 
1385 
1386 /*!
1387  *  scaleGrayAreaMapLow()
1388  *
1389  *  This should only be used for downscaling.
1390  *  We choose to divide each pixel into 16 x 16 sub-pixels.
1391  *  This is about 2x slower than scaleSmoothLow(), but the results
1392  *  are significantly better on small text, esp. for downscaling
1393  *  factors between 1.5 and 5.  All src pixels are subdivided
1394  *  into 256 sub-pixels, and are weighted by the number of
1395  *  sub-pixels covered by the dest pixel.
1396  */
1397 void
scaleGrayAreaMapLow(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 ws,l_int32 hs,l_int32 wpls)1398 scaleGrayAreaMapLow(l_uint32  *datad,
1399                     l_int32    wd,
1400                     l_int32    hd,
1401                     l_int32    wpld,
1402                     l_uint32  *datas,
1403                     l_int32    ws,
1404                     l_int32    hs,
1405                     l_int32    wpls)
1406 {
1407 l_int32    i, j, k, m, wm2, hm2;
1408 l_int32    xu, yu;  /* UL corner in src image, to 1/16 of a pixel */
1409 l_int32    xl, yl;  /* LR corner in src image, to 1/16 of a pixel */
1410 l_int32    xup, yup, xuf, yuf;  /* UL src pixel: integer and fraction */
1411 l_int32    xlp, ylp, xlf, ylf;  /* LR src pixel: integer and fraction */
1412 l_int32    delx, dely, area;
1413 l_int32    v00;  /* contrib. from UL src pixel */
1414 l_int32    v01;  /* contrib. from LL src pixel */
1415 l_int32    v10;  /* contrib from UR src pixel */
1416 l_int32    v11;  /* contrib from LR src pixel */
1417 l_int32    vin;  /* contrib from all full interior src pixels */
1418 l_int32    vmid;  /* contrib from side parts that are full in 1 direction */
1419 l_int32    val;
1420 l_uint32  *lines, *lined;
1421 l_float32  scx, scy;
1422 
1423         /* (scx, scy) are scaling factors that are applied to the
1424          * dest coords to get the corresponding src coords.
1425          * We need them because we iterate over dest pixels
1426          * and must find the corresponding set of src pixels. */
1427     scx = 16. * (l_float32)ws / (l_float32)wd;
1428     scy = 16. * (l_float32)hs / (l_float32)hd;
1429 
1430     wm2 = ws - 2;
1431     hm2 = hs - 2;
1432 
1433         /* Iterate over the destination pixels */
1434     for (i = 0; i < hd; i++) {
1435         yu = (l_int32)(scy * i);
1436         yl = (l_int32)(scy * (i + 1.0));
1437         yup = yu >> 4;
1438         yuf = yu & 0x0f;
1439         ylp = yl >> 4;
1440         ylf = yl & 0x0f;
1441         dely = ylp - yup;
1442         lined = datad + i * wpld;
1443         lines = datas + yup * wpls;
1444         for (j = 0; j < wd; j++) {
1445             xu = (l_int32)(scx * j);
1446             xl = (l_int32)(scx * (j + 1.0));
1447             xup = xu >> 4;
1448             xuf = xu & 0x0f;
1449             xlp = xl >> 4;
1450             xlf = xl & 0x0f;
1451             delx = xlp - xup;
1452 
1453                 /* If near the edge, just use a src pixel value */
1454             if (xlp > wm2 || ylp > hm2) {
1455                 SET_DATA_BYTE(lined, j, GET_DATA_BYTE(lines, xup));
1456                 continue;
1457             }
1458 
1459                 /* Area summed over, in subpixels.  This varies
1460                  * due to the quantization, so we can't simply take
1461                  * the area to be a constant: area = scx * scy. */
1462             area = ((16 - xuf) + 16 * (delx - 1) + xlf) *
1463                    ((16 - yuf) + 16 * (dely - 1) + ylf);
1464 
1465                 /* Do area map summation */
1466             v00 = (16 - xuf) * (16 - yuf) * GET_DATA_BYTE(lines, xup);
1467             v10 = xlf * (16 - yuf) * GET_DATA_BYTE(lines, xlp);
1468             v01 = (16 - xuf) * ylf * GET_DATA_BYTE(lines + dely * wpls, xup);
1469             v11 = xlf * ylf * GET_DATA_BYTE(lines + dely * wpls, xlp);
1470             for (vin = 0, k = 1; k < dely; k++) {  /* for full src pixels */
1471                  for (m = 1; m < delx; m++) {
1472                      vin += 256 * GET_DATA_BYTE(lines + k * wpls, xup + m);
1473                  }
1474             }
1475             for (vmid = 0, k = 1; k < dely; k++)  /* for left side */
1476                 vmid += (16 - xuf) * 16 * GET_DATA_BYTE(lines + k * wpls, xup);
1477             for (k = 1; k < dely; k++)  /* for right side */
1478                 vmid += xlf * 16 * GET_DATA_BYTE(lines + k * wpls, xlp);
1479             for (m = 1; m < delx; m++)  /* for top side */
1480                 vmid += 16 * (16 - yuf) * GET_DATA_BYTE(lines, xup + m);
1481             for (m = 1; m < delx; m++)  /* for bottom side */
1482                 vmid += 16 * ylf * GET_DATA_BYTE(lines + dely * wpls, xup + m);
1483             val = (v00 + v01 + v10 + v11 + vin + vmid + 128) / area;
1484 #if  DEBUG_OVERFLOW
1485             if (val > 255) fprintf(stderr, "val overflow: %d\n", val);
1486 #endif  /* DEBUG_OVERFLOW */
1487             SET_DATA_BYTE(lined, j, val);
1488         }
1489     }
1490 
1491     return;
1492 }
1493 
1494 
1495 /*------------------------------------------------------------------*
1496  *                     2x area mapped downscaling                   *
1497  *------------------------------------------------------------------*/
1498 /*!
1499  *  scaleAreaMapLow2()
1500  *
1501  *  Note: This function is called with either 8 bpp gray or 32 bpp RGB.
1502  *        The result is a 2x reduced dest.
1503  */
1504 void
scaleAreaMapLow2(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 d,l_int32 wpls)1505 scaleAreaMapLow2(l_uint32  *datad,
1506                  l_int32    wd,
1507                  l_int32    hd,
1508                  l_int32    wpld,
1509                  l_uint32  *datas,
1510                  l_int32    d,
1511                  l_int32    wpls)
1512 {
1513 l_int32    i, j, val, rval, gval, bval;
1514 l_uint32  *lines, *lined;
1515 l_uint32   pixel;
1516 
1517     if (d == 8) {
1518         for (i = 0; i < hd; i++) {
1519             lines = datas + 2 * i * wpls;
1520             lined = datad + i * wpld;
1521             for (j = 0; j < wd; j++) {
1522                     /* Average each dest pixel using 4 src pixels */
1523                 val = GET_DATA_BYTE(lines, 2 * j);
1524                 val += GET_DATA_BYTE(lines, 2 * j + 1);
1525                 val += GET_DATA_BYTE(lines + wpls, 2 * j);
1526                 val += GET_DATA_BYTE(lines + wpls, 2 * j + 1);
1527                 val >>= 2;
1528                 SET_DATA_BYTE(lined, j, val);
1529             }
1530         }
1531     }
1532     else {  /* d == 32 */
1533         for (i = 0; i < hd; i++) {
1534             lines = datas + 2 * i * wpls;
1535             lined = datad + i * wpld;
1536             for (j = 0; j < wd; j++) {
1537                     /* Average each of the color components from 4 src pixels */
1538                 pixel = *(lines + 2 * j);
1539                 rval = (pixel >> L_RED_SHIFT) & 0xff;
1540                 gval = (pixel >> L_GREEN_SHIFT) & 0xff;
1541                 bval = (pixel >> L_BLUE_SHIFT) & 0xff;
1542                 pixel = *(lines + 2 * j + 1);
1543                 rval += (pixel >> L_RED_SHIFT) & 0xff;
1544                 gval += (pixel >> L_GREEN_SHIFT) & 0xff;
1545                 bval += (pixel >> L_BLUE_SHIFT) & 0xff;
1546                 pixel = *(lines + wpls + 2 * j);
1547                 rval += (pixel >> L_RED_SHIFT) & 0xff;
1548                 gval += (pixel >> L_GREEN_SHIFT) & 0xff;
1549                 bval += (pixel >> L_BLUE_SHIFT) & 0xff;
1550                 pixel = *(lines + wpls + 2 * j + 1);
1551                 rval += (pixel >> L_RED_SHIFT) & 0xff;
1552                 gval += (pixel >> L_GREEN_SHIFT) & 0xff;
1553                 bval += (pixel >> L_BLUE_SHIFT) & 0xff;
1554                 composeRGBPixel(rval >> 2, gval >> 2, bval >> 2, &pixel);
1555                 *(lined + j) = pixel;
1556             }
1557         }
1558     }
1559     return;
1560 }
1561 
1562 
1563 /*------------------------------------------------------------------*
1564  *              Binary scaling by closest pixel sampling            *
1565  *------------------------------------------------------------------*/
1566 /*
1567  *  scaleBinaryLow()
1568  *
1569  *  Notes:
1570  *      (1) The dest must be cleared prior to this operation,
1571  *          and we clear it here in the low-level code.
1572  *      (2) We reuse dest pixels and dest pixel rows whenever
1573  *          possible for upscaling; downscaling is done by
1574  *          strict subsampling.
1575  */
1576 l_int32
scaleBinaryLow(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 ws,l_int32 hs,l_int32 wpls)1577 scaleBinaryLow(l_uint32  *datad,
1578                l_int32    wd,
1579                l_int32    hd,
1580                l_int32    wpld,
1581                l_uint32  *datas,
1582                l_int32    ws,
1583                l_int32    hs,
1584                l_int32    wpls)
1585 {
1586 l_int32    i, j, bpld;
1587 l_int32    xs, prevxs, sval;
1588 l_int32   *srow, *scol;
1589 l_uint32  *lines, *prevlines, *lined, *prevlined;
1590 l_float32  wratio, hratio;
1591 
1592     PROCNAME("scaleBinaryLow");
1593 
1594         /* clear dest */
1595     bpld = 4 * wpld;
1596     memset((char *)datad, 0, hd * bpld);
1597 
1598         /* The source row corresponding to dest row i ==> srow[i]
1599          * The source col corresponding to dest col j ==> scol[j]  */
1600     if ((srow = (l_int32 *)CALLOC(hd, sizeof(l_int32))) == NULL)
1601         return ERROR_INT("srow not made", procName, 1);
1602     if ((scol = (l_int32 *)CALLOC(wd, sizeof(l_int32))) == NULL)
1603         return ERROR_INT("scol not made", procName, 1);
1604 
1605     wratio = (l_float32)ws / (l_float32)wd;
1606     hratio = (l_float32)hs / (l_float32)hd;
1607     for (i = 0; i < hd; i++)
1608         srow[i] = L_MIN((l_int32)(hratio * i + 0.5), hs - 1);
1609     for (j = 0; j < wd; j++)
1610         scol[j] = L_MIN((l_int32)(wratio * j + 0.5), ws - 1);
1611 
1612     prevlines = NULL;
1613     prevxs = -1;
1614     sval = 0;
1615     for (i = 0; i < hd; i++) {
1616         lines = datas + srow[i] * wpls;
1617         lined = datad + i * wpld;
1618         if (lines != prevlines) {  /* make dest from new source row */
1619             for (j = 0; j < wd; j++) {
1620                 xs = scol[j];
1621                 if (xs != prevxs) {  /* get dest pix from source col */
1622                     if ((sval = GET_DATA_BIT(lines, xs)))
1623                         SET_DATA_BIT(lined, j);
1624                     prevxs = xs;
1625                 }
1626                 else {  /* copy prev dest pix, if set */
1627                     if (sval)
1628                         SET_DATA_BIT(lined, j);
1629                 }
1630             }
1631         }
1632         else {  /* lines == prevlines; copy prev dest row */
1633             prevlined = lined - wpld;
1634             memcpy((char *)lined, (char *)prevlined, bpld);
1635         }
1636         prevlines = lines;
1637     }
1638 
1639     FREE(srow);
1640     FREE(scol);
1641 
1642     return 0;
1643 }
1644 
1645 
1646 /*------------------------------------------------------------------*
1647  *                         Scale-to-gray 2x                         *
1648  *------------------------------------------------------------------*/
1649 /*!
1650  *  scaleToGray2Low()
1651  *
1652  *      Input:  usual image variables
1653  *              sumtab  (made from makeSumTabSG2())
1654  *              valtab  (made from makeValTabSG2())
1655  *      Return: 0 if OK; 1 on error.
1656  *
1657  *  The output is processed in sets of 4 output bytes on a row,
1658  *  corresponding to 4 2x2 bit-blocks in the input image.
1659  *  Two lookup tables are used.  The first, sumtab, gets the
1660  *  sum of ON pixels in 4 sets of two adjacent bits,
1661  *  storing the result in 4 adjacent bytes.  After sums from
1662  *  two rows have been added, the second table, valtab,
1663  *  converts from the sum of ON pixels in the 2x2 block to
1664  *  an 8 bpp grayscale value between 0 (for 4 bits ON)
1665  *  and 255 (for 0 bits ON).
1666  */
1667 void
scaleToGray2Low(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 wpls,l_uint32 * sumtab,l_uint8 * valtab)1668 scaleToGray2Low(l_uint32  *datad,
1669                 l_int32    wd,
1670                 l_int32    hd,
1671                 l_int32    wpld,
1672                 l_uint32  *datas,
1673                 l_int32    wpls,
1674                 l_uint32  *sumtab,
1675                 l_uint8   *valtab)
1676 {
1677 l_int32    i, j, l, k, m, wd4, extra;
1678 l_uint32   sbyte1, sbyte2, sum;
1679 l_uint32  *lines, *lined;
1680 
1681         /* i indexes the dest lines
1682          * l indexes the source lines
1683          * j indexes the dest bytes
1684          * k indexes the source bytes
1685          * We take two bytes from the source (in 2 lines of 8 pixels
1686          * each) and convert them into four 8 bpp bytes of the dest. */
1687     wd4 = wd & 0xfffffffc;
1688     extra = wd - wd4;
1689     for (i = 0, l = 0; i < hd; i++, l += 2) {
1690         lines = datas + l * wpls;
1691         lined = datad + i * wpld;
1692         for (j = 0, k = 0; j < wd4; j += 4, k++) {
1693             sbyte1 = GET_DATA_BYTE(lines, k);
1694             sbyte2 = GET_DATA_BYTE(lines + wpls, k);
1695             sum = sumtab[sbyte1] + sumtab[sbyte2];
1696             SET_DATA_BYTE(lined, j, valtab[sum >> 24]);
1697             SET_DATA_BYTE(lined, j + 1, valtab[(sum >> 16) & 0xff]);
1698             SET_DATA_BYTE(lined, j + 2, valtab[(sum >> 8) & 0xff]);
1699             SET_DATA_BYTE(lined, j + 3, valtab[sum & 0xff]);
1700         }
1701         if (extra > 0) {
1702             sbyte1 = GET_DATA_BYTE(lines, k);
1703             sbyte2 = GET_DATA_BYTE(lines + wpls, k);
1704             sum = sumtab[sbyte1] + sumtab[sbyte2];
1705             for (m = 0; m < extra; m++) {
1706                 SET_DATA_BYTE(lined, j + m,
1707                               valtab[((sum >> (24 - 8 * m)) & 0xff)]);
1708             }
1709         }
1710 
1711     }
1712 
1713     return;
1714 }
1715 
1716 
1717 /*!
1718  *  makeSumTabSG2()
1719  *
1720  *  Returns a table of 256 l_uint32s, giving the four output
1721  *  8-bit grayscale sums corresponding to 8 input bits of a binary
1722  *  image, for a 2x scale-to-gray op.  The sums from two
1723  *  adjacent scanlines are then added and transformed to
1724  *  output four 8 bpp pixel values, using makeValTabSG2().
1725  */
1726 l_uint32  *
makeSumTabSG2(void)1727 makeSumTabSG2(void)
1728 {
1729 l_int32    i;
1730 l_int32    sum[] = {0, 1, 1, 2};
1731 l_uint32  *tab;
1732 
1733     PROCNAME("makeSumTabSG2");
1734 
1735     if ((tab = (l_uint32 *)CALLOC(256, sizeof(l_uint32))) == NULL)
1736         return (l_uint32 *)ERROR_PTR("calloc fail for tab", procName, NULL);
1737 
1738         /* Pack the four sums separately in four bytes */
1739     for (i = 0; i < 256; i++) {
1740         tab[i] = (sum[i & 0x3] | sum[(i >> 2) & 0x3] << 8 |
1741                   sum[(i >> 4) & 0x3] << 16 | sum[(i >> 6) & 0x3] << 24);
1742     }
1743 
1744     return tab;
1745 }
1746 
1747 
1748 /*!
1749  *  makeValTabSG2()
1750  *
1751  *  Returns an 8 bit value for the sum of ON pixels
1752  *  in a 2x2 square, according to
1753  *
1754  *         val = 255 - (255 * sum)/4
1755  *
1756  *  where sum is in set {0,1,2,3,4}
1757  */
1758 l_uint8 *
makeValTabSG2(void)1759 makeValTabSG2(void)
1760 {
1761 l_int32   i;
1762 l_uint8  *tab;
1763 
1764     PROCNAME("makeValTabSG2");
1765 
1766     if ((tab = (l_uint8 *)CALLOC(5, sizeof(l_uint8))) == NULL)
1767         return (l_uint8 *)ERROR_PTR("calloc fail for tab", procName, NULL);
1768 
1769     for (i = 0; i < 5; i++)
1770         tab[i] = 255 - (i * 255) / 4;
1771 
1772     return tab;
1773 }
1774 
1775 
1776 /*------------------------------------------------------------------*
1777  *                         Scale-to-gray 3x                         *
1778  *------------------------------------------------------------------*/
1779 /*!
1780  *  scaleToGray3Low()
1781  *
1782  *      Input:  usual image variables
1783  *              sumtab  (made from makeSumTabSG3())
1784  *              valtab  (made from makeValTabSG3())
1785  *      Return: 0 if OK; 1 on error
1786  *
1787  *  Each set of 8 3x3 bit-blocks in the source image, which
1788  *  consist of 72 pixels arranged 24 pixels wide by 3 scanlines,
1789  *  is converted to a row of 8 8-bit pixels in the dest image.
1790  *  These 72 pixels of the input image are runs of 24 pixels
1791  *  in three adjacent scanlines.  Each run of 24 pixels is
1792  *  stored in the 24 LSbits of a 32-bit word.  We use 2 LUTs.
1793  *  The first, sumtab, takes 6 of these bits and stores
1794  *  sum, taken 3 bits at a time, in two bytes.  (See
1795  *  makeSumTabSG3).  This is done for each of the 3 scanlines,
1796  *  and the results are added.  We now have the sum of ON pixels
1797  *  in the first two 3x3 blocks in two bytes.  The valtab LUT
1798  *  then converts these values (which go from 0 to 9) to
1799  *  grayscale values between between 255 and 0.  (See makeValTabSG3).
1800  *  This process is repeated for each of the other 3 sets of
1801  *  6x3 input pixels, giving 8 output pixels in total.
1802  *
1803  *  Note: because the input image is processed in groups of
1804  *        24 x 3 pixels, the process clips the input height to
1805  *        (h - h % 3) and the input width to (w - w % 24).
1806  */
1807 void
scaleToGray3Low(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 wpls,l_uint32 * sumtab,l_uint8 * valtab)1808 scaleToGray3Low(l_uint32  *datad,
1809                 l_int32    wd,
1810                 l_int32    hd,
1811                 l_int32    wpld,
1812                 l_uint32  *datas,
1813                 l_int32    wpls,
1814                 l_uint32  *sumtab,
1815                 l_uint8   *valtab)
1816 {
1817 l_int32    i, j, l, k;
1818 l_uint32   threebytes1, threebytes2, threebytes3, sum;
1819 l_uint32  *lines, *lined;
1820 
1821         /* i indexes the dest lines
1822          * l indexes the source lines
1823          * j indexes the dest bytes
1824          * k indexes the source bytes
1825          * We take 9 bytes from the source (72 binary pixels
1826          * in three lines of 24 pixels each) and convert it
1827          * into 8 bytes of the dest (8 8bpp pixels in one line)   */
1828     for (i = 0, l = 0; i < hd; i++, l += 3) {
1829         lines = datas + l * wpls;
1830         lined = datad + i * wpld;
1831         for (j = 0, k = 0; j < wd; j += 8, k += 3) {
1832             threebytes1 = (GET_DATA_BYTE(lines, k) << 16) |
1833                           (GET_DATA_BYTE(lines, k + 1) << 8) |
1834                           GET_DATA_BYTE(lines, k + 2);
1835             threebytes2 = (GET_DATA_BYTE(lines + wpls, k) << 16) |
1836                           (GET_DATA_BYTE(lines + wpls, k + 1) << 8) |
1837                           GET_DATA_BYTE(lines + wpls, k + 2);
1838             threebytes3 = (GET_DATA_BYTE(lines + 2 * wpls, k) << 16) |
1839                           (GET_DATA_BYTE(lines + 2 * wpls, k + 1) << 8) |
1840                           GET_DATA_BYTE(lines + 2 * wpls, k + 2);
1841 
1842             sum = sumtab[(threebytes1 >> 18)] +
1843                   sumtab[(threebytes2 >> 18)] +
1844                   sumtab[(threebytes3 >> 18)];
1845             SET_DATA_BYTE(lined, j, valtab[GET_DATA_BYTE(&sum, 2)]);
1846             SET_DATA_BYTE(lined, j + 1, valtab[GET_DATA_BYTE(&sum, 3)]);
1847 
1848             sum = sumtab[((threebytes1 >> 12) & 0x3f)] +
1849                   sumtab[((threebytes2 >> 12) & 0x3f)] +
1850                   sumtab[((threebytes3 >> 12) & 0x3f)];
1851             SET_DATA_BYTE(lined, j + 2, valtab[GET_DATA_BYTE(&sum, 2)]);
1852             SET_DATA_BYTE(lined, j + 3, valtab[GET_DATA_BYTE(&sum, 3)]);
1853 
1854             sum = sumtab[((threebytes1 >> 6) & 0x3f)] +
1855                   sumtab[((threebytes2 >> 6) & 0x3f)] +
1856                   sumtab[((threebytes3 >> 6) & 0x3f)];
1857             SET_DATA_BYTE(lined, j + 4, valtab[GET_DATA_BYTE(&sum, 2)]);
1858             SET_DATA_BYTE(lined, j + 5, valtab[GET_DATA_BYTE(&sum, 3)]);
1859 
1860             sum = sumtab[(threebytes1 & 0x3f)] +
1861                   sumtab[(threebytes2 & 0x3f)] +
1862                   sumtab[(threebytes3 & 0x3f)];
1863             SET_DATA_BYTE(lined, j + 6, valtab[GET_DATA_BYTE(&sum, 2)]);
1864             SET_DATA_BYTE(lined, j + 7, valtab[GET_DATA_BYTE(&sum, 3)]);
1865         }
1866     }
1867 
1868     return;
1869 }
1870 
1871 
1872 
1873 /*!
1874  *  makeSumTabSG3()
1875  *
1876  *  Returns a table of 64 l_uint32s, giving the two output
1877  *  8-bit grayscale sums corresponding to 6 input bits of a binary
1878  *  image, for a 3x scale-to-gray op.  In practice, this would
1879  *  be used three times (on adjacent scanlines), and the sums would
1880  *  be added and then transformed to output 8 bpp pixel values,
1881  *  using makeValTabSG3().
1882  */
1883 l_uint32  *
makeSumTabSG3(void)1884 makeSumTabSG3(void)
1885 {
1886 l_int32    i;
1887 l_int32    sum[] = {0, 1, 1, 2, 1, 2, 2, 3};
1888 l_uint32  *tab;
1889 
1890     PROCNAME("makeSumTabSG3");
1891 
1892     if ((tab = (l_uint32 *)CALLOC(64, sizeof(l_uint32))) == NULL)
1893         return (l_uint32 *)ERROR_PTR("calloc fail for tab", procName, NULL);
1894 
1895         /* Pack the two sums separately in two bytes */
1896     for (i = 0; i < 64; i++) {
1897         tab[i] = (sum[i & 0x07]) | (sum[(i >> 3) & 0x07] << 8);
1898     }
1899 
1900     return tab;
1901 }
1902 
1903 
1904 /*!
1905  *  makeValTabSG3()
1906  *
1907  *  Returns an 8 bit value for the sum of ON pixels
1908  *  in a 3x3 square, according to
1909  *      val = 255 - (255 * sum)/9
1910  *  where sum is in set {0, ... ,9}
1911  */
1912 l_uint8 *
makeValTabSG3(void)1913 makeValTabSG3(void)
1914 {
1915 l_int32   i;
1916 l_uint8  *tab;
1917 
1918     PROCNAME("makeValTabSG3");
1919 
1920     if ((tab = (l_uint8 *)CALLOC(10, sizeof(l_uint8))) == NULL)
1921         return (l_uint8 *)ERROR_PTR("calloc fail for tab", procName, NULL);
1922 
1923     for (i = 0; i < 10; i++)
1924         tab[i] = 0xff - (i * 255) / 9;
1925 
1926     return tab;
1927 }
1928 
1929 
1930 /*------------------------------------------------------------------*
1931  *                         Scale-to-gray 4x                         *
1932  *------------------------------------------------------------------*/
1933 /*!
1934  *  scaleToGray4Low()
1935  *
1936  *      Input:  usual image variables
1937  *              sumtab  (made from makeSumTabSG4())
1938  *              valtab  (made from makeValTabSG4())
1939  *      Return: 0 if OK; 1 on error.
1940  *
1941  *  The output is processed in sets of 2 output bytes on a row,
1942  *  corresponding to 2 4x4 bit-blocks in the input image.
1943  *  Two lookup tables are used.  The first, sumtab, gets the
1944  *  sum of ON pixels in two sets of four adjacent bits,
1945  *  storing the result in 2 adjacent bytes.  After sums from
1946  *  four rows have been added, the second table, valtab,
1947  *  converts from the sum of ON pixels in the 4x4 block to
1948  *  an 8 bpp grayscale value between 0 (for 16 bits ON)
1949  *  and 255 (for 0 bits ON).
1950  */
1951 void
scaleToGray4Low(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 wpls,l_uint32 * sumtab,l_uint8 * valtab)1952 scaleToGray4Low(l_uint32  *datad,
1953                 l_int32    wd,
1954                 l_int32    hd,
1955                 l_int32    wpld,
1956                 l_uint32  *datas,
1957                 l_int32    wpls,
1958                 l_uint32  *sumtab,
1959                 l_uint8   *valtab)
1960 {
1961 l_int32    i, j, l, k;
1962 l_uint32   sbyte1, sbyte2, sbyte3, sbyte4, sum;
1963 l_uint32  *lines, *lined;
1964 
1965         /* i indexes the dest lines
1966          * l indexes the source lines
1967          * j indexes the dest bytes
1968          * k indexes the source bytes
1969          * We take four bytes from the source (in 4 lines of 8 pixels
1970          * each) and convert it into two 8 bpp bytes of the dest. */
1971     for (i = 0, l = 0; i < hd; i++, l += 4) {
1972         lines = datas + l * wpls;
1973         lined = datad + i * wpld;
1974         for (j = 0, k = 0; j < wd; j += 2, k++) {
1975             sbyte1 = GET_DATA_BYTE(lines, k);
1976             sbyte2 = GET_DATA_BYTE(lines + wpls, k);
1977             sbyte3 = GET_DATA_BYTE(lines + 2 * wpls, k);
1978             sbyte4 = GET_DATA_BYTE(lines + 3 * wpls, k);
1979             sum = sumtab[sbyte1] + sumtab[sbyte2] +
1980                   sumtab[sbyte3] + sumtab[sbyte4];
1981             SET_DATA_BYTE(lined, j, valtab[GET_DATA_BYTE(&sum, 2)]);
1982             SET_DATA_BYTE(lined, j + 1, valtab[GET_DATA_BYTE(&sum, 3)]);
1983         }
1984     }
1985 
1986     return;
1987 }
1988 
1989 
1990 /*!
1991  *  makeSumTabSG4()
1992  *
1993  *  Returns a table of 256 l_uint32s, giving the two output
1994  *  8-bit grayscale sums corresponding to 8 input bits of a binary
1995  *  image, for a 4x scale-to-gray op.  The sums from four
1996  *  adjacent scanlines are then added and transformed to
1997  *  output 8 bpp pixel values, using makeValTabSG4().
1998  */
1999 l_uint32  *
makeSumTabSG4(void)2000 makeSumTabSG4(void)
2001 {
2002 l_int32    i;
2003 l_int32    sum[] = {0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4};
2004 l_uint32  *tab;
2005 
2006     PROCNAME("makeSumTabSG4");
2007 
2008     if ((tab = (l_uint32 *)CALLOC(256, sizeof(l_uint32))) == NULL)
2009         return (l_uint32 *)ERROR_PTR("calloc fail for tab", procName, NULL);
2010 
2011         /* Pack the two sums separately in two bytes */
2012     for (i = 0; i < 256; i++) {
2013         tab[i] = (sum[i & 0xf]) | (sum[(i >> 4) & 0xf] << 8);
2014     }
2015 
2016     return tab;
2017 }
2018 
2019 
2020 /*!
2021  *  makeValTabSG4()
2022  *
2023  *  Returns an 8 bit value for the sum of ON pixels
2024  *  in a 4x4 square, according to
2025  *
2026  *         val = 255 - (255 * sum)/16
2027  *
2028  *  where sum is in set {0, ... ,16}
2029  */
2030 l_uint8 *
makeValTabSG4(void)2031 makeValTabSG4(void)
2032 {
2033 l_int32   i;
2034 l_uint8  *tab;
2035 
2036     PROCNAME("makeValTabSG4");
2037 
2038     if ((tab = (l_uint8 *)CALLOC(17, sizeof(l_uint8))) == NULL)
2039         return (l_uint8 *)ERROR_PTR("calloc fail for tab", procName, NULL);
2040 
2041     for (i = 0; i < 17; i++)
2042         tab[i] = 0xff - (i * 255) / 16;
2043 
2044     return tab;
2045 }
2046 
2047 
2048 /*------------------------------------------------------------------*
2049  *                         Scale-to-gray 6x                         *
2050  *------------------------------------------------------------------*/
2051 /*!
2052  *  scaleToGray6Low()
2053  *
2054  *      Input:  usual image variables
2055  *              tab8  (made from makePixelSumTab8())
2056  *              valtab  (made from makeValTabSG6())
2057  *      Return: 0 if OK; 1 on error
2058  *
2059  *  Each set of 4 6x6 bit-blocks in the source image, which
2060  *  consist of 144 pixels arranged 24 pixels wide by 6 scanlines,
2061  *  is converted to a row of 4 8-bit pixels in the dest image.
2062  *  These 144 pixels of the input image are runs of 24 pixels
2063  *  in six adjacent scanlines.  Each run of 24 pixels is
2064  *  stored in the 24 LSbits of a 32-bit word.  We use 2 LUTs.
2065  *  The first, tab8, takes 6 of these bits and stores
2066  *  sum in one byte.  This is done for each of the 6 scanlines,
2067  *  and the results are added.
2068  *  We now have the sum of ON pixels in the first 6x6 block.  The
2069  *  valtab LUT then converts these values (which go from 0 to 36) to
2070  *  grayscale values between between 255 and 0.  (See makeValTabSG6).
2071  *  This process is repeated for each of the other 3 sets of
2072  *  6x6 input pixels, giving 4 output pixels in total.
2073  *
2074  *  Note: because the input image is processed in groups of
2075  *        24 x 6 pixels, the process clips the input height to
2076  *        (h - h % 6) and the input width to (w - w % 24).
2077  *
2078  */
2079 void
scaleToGray6Low(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 wpls,l_int32 * tab8,l_uint8 * valtab)2080 scaleToGray6Low(l_uint32  *datad,
2081                 l_int32    wd,
2082                 l_int32    hd,
2083                 l_int32    wpld,
2084                 l_uint32  *datas,
2085                 l_int32    wpls,
2086                 l_int32   *tab8,
2087                 l_uint8   *valtab)
2088 {
2089 l_int32    i, j, l, k;
2090 l_uint32   threebytes1, threebytes2, threebytes3;
2091 l_uint32   threebytes4, threebytes5, threebytes6, sum;
2092 l_uint32  *lines, *lined;
2093 
2094         /* i indexes the dest lines
2095          * l indexes the source lines
2096          * j indexes the dest bytes
2097          * k indexes the source bytes
2098          * We take 18 bytes from the source (144 binary pixels
2099          * in six lines of 24 pixels each) and convert it
2100          * into 4 bytes of the dest (four 8 bpp pixels in one line)   */
2101     for (i = 0, l = 0; i < hd; i++, l += 6) {
2102         lines = datas + l * wpls;
2103         lined = datad + i * wpld;
2104         for (j = 0, k = 0; j < wd; j += 4, k += 3) {
2105                 /* First grab the 18 bytes, 3 at a time, and put each set
2106                  * of 3 bytes into the LS bytes of a 32-bit word. */
2107             threebytes1 = (GET_DATA_BYTE(lines, k) << 16) |
2108                           (GET_DATA_BYTE(lines, k + 1) << 8) |
2109                           GET_DATA_BYTE(lines, k + 2);
2110             threebytes2 = (GET_DATA_BYTE(lines + wpls, k) << 16) |
2111                           (GET_DATA_BYTE(lines + wpls, k + 1) << 8) |
2112                           GET_DATA_BYTE(lines + wpls, k + 2);
2113             threebytes3 = (GET_DATA_BYTE(lines + 2 * wpls, k) << 16) |
2114                           (GET_DATA_BYTE(lines + 2 * wpls, k + 1) << 8) |
2115                           GET_DATA_BYTE(lines + 2 * wpls, k + 2);
2116             threebytes4 = (GET_DATA_BYTE(lines + 3 * wpls, k) << 16) |
2117                           (GET_DATA_BYTE(lines + 3 * wpls, k + 1) << 8) |
2118                           GET_DATA_BYTE(lines + 3 * wpls, k + 2);
2119             threebytes5 = (GET_DATA_BYTE(lines + 4 * wpls, k) << 16) |
2120                           (GET_DATA_BYTE(lines + 4 * wpls, k + 1) << 8) |
2121                           GET_DATA_BYTE(lines + 4 * wpls, k + 2);
2122             threebytes6 = (GET_DATA_BYTE(lines + 5 * wpls, k) << 16) |
2123                           (GET_DATA_BYTE(lines + 5 * wpls, k + 1) << 8) |
2124                           GET_DATA_BYTE(lines + 5 * wpls, k + 2);
2125 
2126                 /* Sum first set of 36 bits and convert to 0-255 */
2127             sum = tab8[(threebytes1 >> 18)] +
2128                   tab8[(threebytes2 >> 18)] +
2129                   tab8[(threebytes3 >> 18)] +
2130                   tab8[(threebytes4 >> 18)] +
2131                   tab8[(threebytes5 >> 18)] +
2132                    tab8[(threebytes6 >> 18)];
2133             SET_DATA_BYTE(lined, j, valtab[GET_DATA_BYTE(&sum, 3)]);
2134 
2135                 /* Ditto for second set */
2136             sum = tab8[((threebytes1 >> 12) & 0x3f)] +
2137                   tab8[((threebytes2 >> 12) & 0x3f)] +
2138                   tab8[((threebytes3 >> 12) & 0x3f)] +
2139                   tab8[((threebytes4 >> 12) & 0x3f)] +
2140                   tab8[((threebytes5 >> 12) & 0x3f)] +
2141                   tab8[((threebytes6 >> 12) & 0x3f)];
2142             SET_DATA_BYTE(lined, j + 1, valtab[GET_DATA_BYTE(&sum, 3)]);
2143 
2144             sum = tab8[((threebytes1 >> 6) & 0x3f)] +
2145                   tab8[((threebytes2 >> 6) & 0x3f)] +
2146                   tab8[((threebytes3 >> 6) & 0x3f)] +
2147                   tab8[((threebytes4 >> 6) & 0x3f)] +
2148                   tab8[((threebytes5 >> 6) & 0x3f)] +
2149                   tab8[((threebytes6 >> 6) & 0x3f)];
2150             SET_DATA_BYTE(lined, j + 2, valtab[GET_DATA_BYTE(&sum, 3)]);
2151 
2152             sum = tab8[(threebytes1 & 0x3f)] +
2153                   tab8[(threebytes2 & 0x3f)] +
2154                   tab8[(threebytes3 & 0x3f)] +
2155                   tab8[(threebytes4 & 0x3f)] +
2156                   tab8[(threebytes5 & 0x3f)] +
2157                   tab8[(threebytes6 & 0x3f)];
2158             SET_DATA_BYTE(lined, j + 3, valtab[GET_DATA_BYTE(&sum, 3)]);
2159         }
2160     }
2161 
2162     return;
2163 }
2164 
2165 
2166 /*!
2167  *  makeValTabSG6()
2168  *
2169  *  Returns an 8 bit value for the sum of ON pixels
2170  *  in a 6x6 square, according to
2171  *      val = 255 - (255 * sum)/36
2172  *  where sum is in set {0, ... ,36}
2173  */
2174 l_uint8 *
makeValTabSG6(void)2175 makeValTabSG6(void)
2176 {
2177 l_int32   i;
2178 l_uint8  *tab;
2179 
2180     PROCNAME("makeValTabSG6");
2181 
2182     if ((tab = (l_uint8 *)CALLOC(37, sizeof(l_uint8))) == NULL)
2183         return (l_uint8 *)ERROR_PTR("calloc fail for tab", procName, NULL);
2184 
2185     for (i = 0; i < 37; i++)
2186         tab[i] = 0xff - (i * 255) / 36;
2187 
2188     return tab;
2189 }
2190 
2191 
2192 /*------------------------------------------------------------------*
2193  *                         Scale-to-gray 8x                         *
2194  *------------------------------------------------------------------*/
2195 /*!
2196  *  scaleToGray8Low()
2197  *
2198  *      Input:  usual image variables
2199  *              tab8  (made from makePixelSumTab8())
2200  *              valtab  (made from makeValTabSG8())
2201  *      Return: 0 if OK; 1 on error.
2202  *
2203  *  The output is processed one dest byte at a time,
2204  *  corresponding to 8 rows of src bytes in the input image.
2205  *  Two lookup tables are used.  The first, tab8, gets the
2206  *  sum of ON pixels in a byte.  After sums from 8 rows have
2207  *  been added, the second table, valtab, converts from this
2208  *  value (which is between 0 and 64) to an 8 bpp grayscale
2209  *  value between 0 (for all 64 bits ON) and 255 (for 0 bits ON).
2210  */
2211 void
scaleToGray8Low(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 wpls,l_int32 * tab8,l_uint8 * valtab)2212 scaleToGray8Low(l_uint32  *datad,
2213                 l_int32    wd,
2214                 l_int32    hd,
2215                 l_int32    wpld,
2216                 l_uint32  *datas,
2217                 l_int32    wpls,
2218                 l_int32   *tab8,
2219                 l_uint8   *valtab)
2220 {
2221 l_int32    i, j, k;
2222 l_int32    sbyte0, sbyte1, sbyte2, sbyte3, sbyte4, sbyte5, sbyte6, sbyte7, sum;
2223 l_uint32  *lines, *lined;
2224 
2225         /* i indexes the dest lines
2226          * k indexes the source lines
2227          * j indexes the src and dest bytes
2228          * We take 8 bytes from the source (in 8 lines of 8 pixels
2229          * each) and convert it into one 8 bpp byte of the dest. */
2230     for (i = 0, k = 0; i < hd; i++, k += 8) {
2231         lines = datas + k * wpls;
2232         lined = datad + i * wpld;
2233         for (j = 0; j < wd; j++) {
2234             sbyte0 = GET_DATA_BYTE(lines, j);
2235             sbyte1 = GET_DATA_BYTE(lines + wpls, j);
2236             sbyte2 = GET_DATA_BYTE(lines + 2 * wpls, j);
2237             sbyte3 = GET_DATA_BYTE(lines + 3 * wpls, j);
2238             sbyte4 = GET_DATA_BYTE(lines + 4 * wpls, j);
2239             sbyte5 = GET_DATA_BYTE(lines + 5 * wpls, j);
2240             sbyte6 = GET_DATA_BYTE(lines + 6 * wpls, j);
2241             sbyte7 = GET_DATA_BYTE(lines + 7 * wpls, j);
2242             sum = tab8[sbyte0] + tab8[sbyte1] +
2243                   tab8[sbyte2] + tab8[sbyte3] +
2244                   tab8[sbyte4] + tab8[sbyte5] +
2245                   tab8[sbyte6] + tab8[sbyte7];
2246             SET_DATA_BYTE(lined, j, valtab[sum]);
2247         }
2248     }
2249 
2250     return;
2251 }
2252 
2253 
2254 /*!
2255  *  makeValTabSG8()
2256  *
2257  *  Returns an 8 bit value for the sum of ON pixels
2258  *  in an 8x8 square, according to
2259  *      val = 255 - (255 * sum)/64
2260  *  where sum is in set {0, ... ,64}
2261  */
2262 l_uint8 *
makeValTabSG8(void)2263 makeValTabSG8(void)
2264 {
2265 l_int32   i;
2266 l_uint8  *tab;
2267 
2268     PROCNAME("makeValTabSG8");
2269 
2270     if ((tab = (l_uint8 *)CALLOC(65, sizeof(l_uint8))) == NULL)
2271         return (l_uint8 *)ERROR_PTR("calloc fail for tab", procName, NULL);
2272 
2273     for (i = 0; i < 65; i++)
2274         tab[i] = 0xff - (i * 255) / 64;
2275 
2276     return tab;
2277 }
2278 
2279 
2280 /*------------------------------------------------------------------*
2281  *                         Scale-to-gray 16x                        *
2282  *------------------------------------------------------------------*/
2283 /*!
2284  *  scaleToGray16Low()
2285  *
2286  *      Input:  usual image variables
2287  *              tab8  (made from makePixelSumTab8())
2288  *      Return: 0 if OK; 1 on error.
2289  *
2290  *  The output is processed one dest byte at a time, corresponding
2291  *  to 16 rows consisting each of 2 src bytes in the input image.
2292  *  This uses one lookup table, tab8, which gives the sum of
2293  *  ON pixels in a byte.  After summing for all ON pixels in the
2294  *  32 src bytes, which is between 0 and 256, this is converted
2295  *  to an 8 bpp grayscale value between 0 (for 255 or 256 bits ON)
2296  *  and 255 (for 0 bits ON).
2297  */
2298 void
scaleToGray16Low(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 wpls,l_int32 * tab8)2299 scaleToGray16Low(l_uint32  *datad,
2300                   l_int32    wd,
2301                  l_int32    hd,
2302                  l_int32    wpld,
2303                  l_uint32  *datas,
2304                  l_int32    wpls,
2305                  l_int32   *tab8)
2306 {
2307 l_int32    i, j, k, m;
2308 l_int32    sum;
2309 l_uint32  *lines, *lined;
2310 
2311         /* i indexes the dest lines
2312          * k indexes the source lines
2313          * j indexes the dest bytes
2314          * m indexes the src bytes
2315          * We take 32 bytes from the source (in 16 lines of 16 pixels
2316          * each) and convert it into one 8 bpp byte of the dest. */
2317     for (i = 0, k = 0; i < hd; i++, k += 16) {
2318         lines = datas + k * wpls;
2319         lined = datad + i * wpld;
2320         for (j = 0; j < wd; j++) {
2321             m = 2 * j;
2322             sum = tab8[GET_DATA_BYTE(lines, m)];
2323             sum += tab8[GET_DATA_BYTE(lines, m + 1)];
2324             sum += tab8[GET_DATA_BYTE(lines + wpls, m)];
2325             sum += tab8[GET_DATA_BYTE(lines + wpls, m + 1)];
2326             sum += tab8[GET_DATA_BYTE(lines + 2 * wpls, m)];
2327             sum += tab8[GET_DATA_BYTE(lines + 2 * wpls, m + 1)];
2328             sum += tab8[GET_DATA_BYTE(lines + 3 * wpls, m)];
2329             sum += tab8[GET_DATA_BYTE(lines + 3 * wpls, m + 1)];
2330             sum += tab8[GET_DATA_BYTE(lines + 4 * wpls, m)];
2331             sum += tab8[GET_DATA_BYTE(lines + 4 * wpls, m + 1)];
2332             sum += tab8[GET_DATA_BYTE(lines + 5 * wpls, m)];
2333             sum += tab8[GET_DATA_BYTE(lines + 5 * wpls, m + 1)];
2334             sum += tab8[GET_DATA_BYTE(lines + 6 * wpls, m)];
2335             sum += tab8[GET_DATA_BYTE(lines + 6 * wpls, m + 1)];
2336             sum += tab8[GET_DATA_BYTE(lines + 7 * wpls, m)];
2337             sum += tab8[GET_DATA_BYTE(lines + 7 * wpls, m + 1)];
2338             sum += tab8[GET_DATA_BYTE(lines + 8 * wpls, m)];
2339             sum += tab8[GET_DATA_BYTE(lines + 8 * wpls, m + 1)];
2340             sum += tab8[GET_DATA_BYTE(lines + 9 * wpls, m)];
2341             sum += tab8[GET_DATA_BYTE(lines + 9 * wpls, m + 1)];
2342             sum += tab8[GET_DATA_BYTE(lines + 10 * wpls, m)];
2343             sum += tab8[GET_DATA_BYTE(lines + 10 * wpls, m + 1)];
2344             sum += tab8[GET_DATA_BYTE(lines + 11 * wpls, m)];
2345             sum += tab8[GET_DATA_BYTE(lines + 11 * wpls, m + 1)];
2346             sum += tab8[GET_DATA_BYTE(lines + 12 * wpls, m)];
2347             sum += tab8[GET_DATA_BYTE(lines + 12 * wpls, m + 1)];
2348             sum += tab8[GET_DATA_BYTE(lines + 13 * wpls, m)];
2349             sum += tab8[GET_DATA_BYTE(lines + 13 * wpls, m + 1)];
2350             sum += tab8[GET_DATA_BYTE(lines + 14 * wpls, m)];
2351             sum += tab8[GET_DATA_BYTE(lines + 14 * wpls, m + 1)];
2352             sum += tab8[GET_DATA_BYTE(lines + 15 * wpls, m)];
2353             sum += tab8[GET_DATA_BYTE(lines + 15 * wpls, m + 1)];
2354             sum = L_MIN(sum, 255);
2355             SET_DATA_BYTE(lined, j, 255 - sum);
2356         }
2357     }
2358 
2359     return;
2360 }
2361 
2362 
2363 
2364 /*------------------------------------------------------------------*
2365  *                         Grayscale mipmap                         *
2366  *------------------------------------------------------------------*/
2367 /*!
2368  *  scaleMipmapLow()
2369  *
2370  *  See notes in scale.c for pixScaleToGrayMipmap().  This function
2371  *  is here for pedagogical reasons.  It gives poor results on document
2372  *  images because of aliasing.
2373  */
2374 l_int32
scaleMipmapLow(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas1,l_int32 wpls1,l_uint32 * datas2,l_int32 wpls2,l_float32 red)2375 scaleMipmapLow(l_uint32  *datad,
2376                l_int32    wd,
2377                l_int32    hd,
2378                l_int32    wpld,
2379                l_uint32  *datas1,
2380                l_int32    wpls1,
2381                l_uint32  *datas2,
2382                l_int32    wpls2,
2383                l_float32  red)
2384 {
2385 l_int32    i, j, val1, val2, val, row2, col2;
2386 l_int32   *srow, *scol;
2387 l_uint32  *lines1, *lines2, *lined;
2388 l_float32  ratio, w1, w2;
2389 
2390     PROCNAME("scaleMipmapLow");
2391 
2392         /* Clear dest */
2393     memset((char *)datad, 0, 4 * wpld * hd);
2394 
2395         /* Each dest pixel at (j,i) is computed by interpolating
2396            between the two src images at the corresponding location.
2397            We store the UL corner locations of the square of
2398            src pixels in thelower-resolution image that correspond
2399            to dest pixel (j,i).  The are labelled by the arrays
2400            srow[i], scol[j].  The UL corner locations of the higher
2401            resolution src pixels are obtained from these arrays
2402            by multiplying by 2. */
2403     if ((srow = (l_int32 *)CALLOC(hd, sizeof(l_int32))) == NULL)
2404         return ERROR_INT("srow not made", procName, 1);
2405     if ((scol = (l_int32 *)CALLOC(wd, sizeof(l_int32))) == NULL)
2406         return ERROR_INT("scol not made", procName, 1);
2407     ratio = 1. / (2. * red);  /* 0.5 for red = 1, 1 for red = 0.5 */
2408     for (i = 0; i < hd; i++)
2409         srow[i] = (l_int32)(ratio * i);
2410     for (j = 0; j < wd; j++)
2411         scol[j] = (l_int32)(ratio * j);
2412 
2413         /* Get weights for linear interpolation: these are the
2414          * 'distances' of the dest image plane from the two
2415          * src image planes. */
2416     w1 = 2. * red - 1.;   /* w1 --> 1 as red --> 1 */
2417     w2 = 1. - w1;
2418 
2419         /* For each dest pixel, compute linear interpolation */
2420     for (i = 0; i < hd; i++) {
2421         row2 = srow[i];
2422         lines1 = datas1 + 2 * row2 * wpls1;
2423         lines2 = datas2 + row2 * wpls2;
2424         lined = datad + i * wpld;
2425         for (j = 0; j < wd; j++) {
2426             col2 = scol[j];
2427             val1 = GET_DATA_BYTE(lines1, 2 * col2);
2428             val2 = GET_DATA_BYTE(lines2, col2);
2429             val = (l_int32)(w1 * val1 + w2 * val2);
2430             SET_DATA_BYTE(lined, j, val);
2431         }
2432     }
2433 
2434     FREE(srow);
2435     FREE(scol);
2436     return 0;
2437 }
2438