1 /*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 - This software is distributed in the hope that it will be
4 - useful, but with NO WARRANTY OF ANY KIND.
5 - No author or distributor accepts responsibility to anyone for the
6 - consequences of using this software, or for whether it serves any
7 - particular purpose or works at all, unless he or she says so in
8 - writing. Everyone is granted permission to copy, modify and
9 - redistribute this source code, for commercial or non-commercial
10 - purposes, with the following restrictions: (1) the origin of this
11 - source code must not be misrepresented; (2) modified versions must
12 - be plainly marked as such; and (3) this notice may not be removed
13 - or altered from any source or modified source distribution.
14 *====================================================================*/
15
16
17 /*
18 * scalelow.c
19 *
20 * Color (interpolated) scaling: general case
21 * void scaleColorLILow()
22 *
23 * Grayscale (interpolated) scaling: general case
24 * void scaleGrayLILow()
25 *
26 * Color (interpolated) scaling: 2x upscaling
27 * void scaleColor2xLILow()
28 * void scaleColor2xLILineLow()
29 *
30 * Grayscale (interpolated) scaling: 2x upscaling
31 * void scaleGray2xLILow()
32 * void scaleGray2xLILineLow()
33 *
34 * Grayscale (interpolated) scaling: 4x upscaling
35 * void scaleGray4xLILow()
36 * void scaleGray4xLILineLow()
37 *
38 * Grayscale and color scaling by closest pixel sampling
39 * l_int32 scaleBySamplingLow()
40 *
41 * Color and grayscale downsampling with (antialias) lowpass filter
42 * l_int32 scaleSmoothLow()
43 * void scaleRGBToGray2Low()
44 *
45 * Color and grayscale downsampling with (antialias) area mapping
46 * l_int32 scaleColorAreaMapLow()
47 * l_int32 scaleGrayAreaMapLow()
48 * l_int32 scaleAreaMapLow2()
49 *
50 * Binary scaling by closest pixel sampling
51 * l_int32 scaleBinaryLow()
52 *
53 * Scale-to-gray 2x
54 * void scaleToGray2Low()
55 * l_uint32 *makeSumTabSG2()
56 * l_uint8 *makeValTabSG2()
57 *
58 * Scale-to-gray 3x
59 * void scaleToGray3Low()
60 * l_uint32 *makeSumTabSG3()
61 * l_uint8 *makeValTabSG3()
62 *
63 * Scale-to-gray 4x
64 * void scaleToGray4Low()
65 * l_uint32 *makeSumTabSG4()
66 * l_uint8 *makeValTabSG4()
67 *
68 * Scale-to-gray 6x
69 * void scaleToGray6Low()
70 * l_uint8 *makeValTabSG6()
71 *
72 * Scale-to-gray 8x
73 * void scaleToGray8Low()
74 * l_uint8 *makeValTabSG8()
75 *
76 * Scale-to-gray 16x
77 * void scaleToGray16Low()
78 *
79 * Grayscale mipmap
80 * l_int32 scaleMipmapLow()
81 *
82 */
83
84 #include <stdio.h>
85 #include <stdlib.h>
86 #include <string.h>
87 #include "allheaders.h"
88
89 #ifndef NO_CONSOLE_IO
90 #define DEBUG_OVERFLOW 0
91 #define DEBUG_UNROLLING 0
92 #endif /* ~NO_CONSOLE_IO */
93
94
95 /*------------------------------------------------------------------*
96 * General linear interpolated color scaling *
97 *------------------------------------------------------------------*/
98 /*!
99 * scaleColorLILow()
100 *
101 * We choose to divide each pixel into 16 x 16 sub-pixels.
102 * Linear interpolation is equivalent to finding the
103 * fractional area (i.e., number of sub-pixels divided
104 * by 256) associated with each of the four nearest src pixels,
105 * and weighting each pixel value by this fractional area.
106 *
107 * P3 speed is about 7 x 10^6 dst pixels/sec/GHz
108 */
109 void
scaleColorLILow(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 ws,l_int32 hs,l_int32 wpls)110 scaleColorLILow(l_uint32 *datad,
111 l_int32 wd,
112 l_int32 hd,
113 l_int32 wpld,
114 l_uint32 *datas,
115 l_int32 ws,
116 l_int32 hs,
117 l_int32 wpls)
118 {
119 l_int32 i, j, wm2, hm2;
120 l_int32 xpm, ypm; /* location in src image, to 1/16 of a pixel */
121 l_int32 xp, yp, xf, yf; /* src pixel and pixel fraction coordinates */
122 l_int32 v00r, v01r, v10r, v11r, v00g, v01g, v10g, v11g;
123 l_int32 v00b, v01b, v10b, v11b, area00, area01, area10, area11;
124 l_uint32 pixels1, pixels2, pixels3, pixels4, pixel;
125 l_uint32 *lines, *lined;
126 l_float32 scx, scy;
127
128 /* (scx, scy) are scaling factors that are applied to the
129 * dest coords to get the corresponding src coords.
130 * We need them because we iterate over dest pixels
131 * and must find the corresponding set of src pixels. */
132 scx = 16. * (l_float32)ws / (l_float32)wd;
133 scy = 16. * (l_float32)hs / (l_float32)hd;
134
135 wm2 = ws - 2;
136 hm2 = hs - 2;
137
138 /* iterate over the destination pixels */
139 for (i = 0; i < hd; i++) {
140 ypm = (l_int32)(scy * (l_float32)i);
141 yp = ypm >> 4;
142 yf = ypm & 0x0f;
143 lined = datad + i * wpld;
144 lines = datas + yp * wpls;
145 for (j = 0; j < wd; j++) {
146 xpm = (l_int32)(scx * (l_float32)j);
147 xp = xpm >> 4;
148 xf = xpm & 0x0f;
149
150 /* if near the edge, just use the src pixel value */
151 if (xp > wm2 || yp > hm2) {
152 *(lined + j) = *(lines + xp);
153 continue;
154 }
155
156 /* do bilinear interpolation. This is a simple
157 * generalization of the calculation in scaleGrayLILow() */
158 pixels1 = *(lines + xp);
159 pixels2 = *(lines + xp + 1);
160 pixels3 = *(lines + wpls + xp);
161 pixels4 = *(lines + wpls + xp + 1);
162 area00 = (16 - xf) * (16 - yf);
163 area10 = xf * (16 - yf);
164 area01 = (16 - xf) * yf;
165 area11 = xf * yf;
166 v00r = area00 * ((pixels1 >> L_RED_SHIFT) & 0xff);
167 v00g = area00 * ((pixels1 >> L_GREEN_SHIFT) & 0xff);
168 v00b = area00 * ((pixels1 >> L_BLUE_SHIFT) & 0xff);
169 v10r = area10 * ((pixels2 >> L_RED_SHIFT) & 0xff);
170 v10g = area10 * ((pixels2 >> L_GREEN_SHIFT) & 0xff);
171 v10b = area10 * ((pixels2 >> L_BLUE_SHIFT) & 0xff);
172 v01r = area01 * ((pixels3 >> L_RED_SHIFT) & 0xff);
173 v01g = area01 * ((pixels3 >> L_GREEN_SHIFT) & 0xff);
174 v01b = area01 * ((pixels3 >> L_BLUE_SHIFT) & 0xff);
175 v11r = area11 * ((pixels4 >> L_RED_SHIFT) & 0xff);
176 v11g = area11 * ((pixels4 >> L_GREEN_SHIFT) & 0xff);
177 v11b = area11 * ((pixels4 >> L_BLUE_SHIFT) & 0xff);
178 pixel = (((v00r + v10r + v01r + v11r + 128) << 16) & 0xff000000) |
179 (((v00g + v10g + v01g + v11g + 128) << 8) & 0x00ff0000) |
180 ((v00b + v10b + v01b + v11b + 128) & 0x0000ff00);
181 *(lined + j) = pixel;
182 }
183 }
184
185 return;
186 }
187
188
189 /*------------------------------------------------------------------*
190 * General linear interpolated gray scaling *
191 *------------------------------------------------------------------*/
192 /*!
193 * scaleGrayLILow()
194 *
195 * We choose to divide each pixel into 16 x 16 sub-pixels.
196 * Linear interpolation is equivalent to finding the
197 * fractional area (i.e., number of sub-pixels divided
198 * by 256) associated with each of the four nearest src pixels,
199 * and weighting each pixel value by this fractional area.
200 */
201 void
scaleGrayLILow(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 ws,l_int32 hs,l_int32 wpls)202 scaleGrayLILow(l_uint32 *datad,
203 l_int32 wd,
204 l_int32 hd,
205 l_int32 wpld,
206 l_uint32 *datas,
207 l_int32 ws,
208 l_int32 hs,
209 l_int32 wpls)
210 {
211 l_int32 i, j, wm2, hm2;
212 l_int32 xpm, ypm; /* location in src image, to 1/16 of a pixel */
213 l_int32 xp, yp, xf, yf; /* src pixel and pixel fraction coordinates */
214 l_int32 v00, v01, v10, v11;
215 l_uint8 val;
216 l_uint32 *lines, *lined;
217 l_float32 scx, scy;
218
219 /* (scx, scy) are scaling factors that are applied to the
220 * dest coords to get the corresponding src coords.
221 * We need them because we iterate over dest pixels
222 * and must find the corresponding set of src pixels. */
223 scx = 16. * (l_float32)ws / (l_float32)wd;
224 scy = 16. * (l_float32)hs / (l_float32)hd;
225
226 wm2 = ws - 2;
227 hm2 = hs - 2;
228
229 /* Iterate over the destination pixels */
230 for (i = 0; i < hd; i++) {
231 ypm = (l_int32)(scy * (l_float32)i);
232 yp = ypm >> 4;
233 yf = ypm & 0x0f;
234 lined = datad + i * wpld;
235 lines = datas + yp * wpls;
236 for (j = 0; j < wd; j++) {
237 xpm = (l_int32)(scx * (l_float32)j);
238 xp = xpm >> 4;
239 xf = xpm & 0x0f;
240
241 /* If near the dest boundary, just use the src pixel value */
242 if (xp > wm2 || yp > hm2) {
243 SET_DATA_BYTE(lined, j, GET_DATA_BYTE(lines, xp));
244 continue;
245 }
246
247 /* Do bilinear interpolation. Without this, we could
248 * simply subsample:
249 * SET_DATA_BYTE(lined, j, GET_DATA_BYTE(lines, xp));
250 * which is faster but gives lousy results!
251 */
252 v00 = (16 - xf) * (16 - yf) * GET_DATA_BYTE(lines, xp);
253 v10 = xf * (16 - yf) * GET_DATA_BYTE(lines, xp + 1);
254 v01 = (16 - xf) * yf * GET_DATA_BYTE(lines + wpls, xp);
255 v11 = xf * yf * GET_DATA_BYTE(lines + wpls, xp + 1);
256 val = (l_uint8)((v00 + v01 + v10 + v11 + 128) / 256);
257 SET_DATA_BYTE(lined, j, val);
258 }
259 }
260
261 return;
262 }
263
264
265 /*------------------------------------------------------------------*
266 * 2x linear interpolated color scaling *
267 *------------------------------------------------------------------*/
268 /*!
269 * scaleColor2xLILow()
270 *
271 * This is a special case of 2x expansion by linear
272 * interpolation. Each src pixel contains 4 dest pixels.
273 * The 4 dest pixels in src pixel 1 are numbered at
274 * their UL corners. The 4 dest pixels in src pixel 1
275 * are related to that src pixel and its 3 neighboring
276 * src pixels as follows:
277 *
278 * 1-----2-----|-----|-----|
279 * | | | | |
280 * | | | | |
281 * src 1 --> 3-----4-----| | | <-- src 2
282 * | | | | |
283 * | | | | |
284 * |-----|-----|-----|-----|
285 * | | | | |
286 * | | | | |
287 * src 3 --> | | | | | <-- src 4
288 * | | | | |
289 * | | | | |
290 * |-----|-----|-----|-----|
291 *
292 * dest src
293 * ---- ---
294 * dp1 = sp1
295 * dp2 = (sp1 + sp2) / 2
296 * dp3 = (sp1 + sp3) / 2
297 * dp4 = (sp1 + sp2 + sp3 + sp4) / 4
298 *
299 * We iterate over the src pixels, and unroll the calculation
300 * for each set of 4 dest pixels corresponding to that src
301 * pixel, caching pixels for the next src pixel whenever possible.
302 * The method is exactly analogous to the one we use for
303 * scaleGray2xLILow() and its line version.
304 *
305 * P3 speed is about 5 x 10^7 dst pixels/sec/GHz
306 */
307 void
scaleColor2xLILow(l_uint32 * datad,l_int32 wpld,l_uint32 * datas,l_int32 ws,l_int32 hs,l_int32 wpls)308 scaleColor2xLILow(l_uint32 *datad,
309 l_int32 wpld,
310 l_uint32 *datas,
311 l_int32 ws,
312 l_int32 hs,
313 l_int32 wpls)
314 {
315 l_int32 i, hsm;
316 l_uint32 *lines, *lined;
317
318 hsm = hs - 1;
319
320 /* We're taking 2 src and 2 dest lines at a time,
321 * and for each src line, we're computing 2 dest lines.
322 * Call these 2 dest lines: destline1 and destline2.
323 * The first src line is used for destline 1.
324 * On all but the last src line, both src lines are
325 * used in the linear interpolation for destline2.
326 * On the last src line, both destline1 and destline2
327 * are computed using only that src line (because there
328 * isn't a lower src line). */
329
330 /* iterate over all but the last src line */
331 for (i = 0; i < hsm; i++) {
332 lines = datas + i * wpls;
333 lined = datad + 2 * i * wpld;
334 scaleColor2xLILineLow(lined, wpld, lines, ws, wpls, 0);
335 }
336
337 /* last src line */
338 lines = datas + hsm * wpls;
339 lined = datad + 2 * hsm * wpld;
340 scaleColor2xLILineLow(lined, wpld, lines, ws, wpls, 1);
341
342 return;
343 }
344
345
346 /*!
347 * scaleColor2xLILineLow()
348 *
349 * Input: lined (ptr to top destline, to be made from current src line)
350 * wpld
351 * lines (ptr to current src line)
352 * ws
353 * wpls
354 * lastlineflag (1 if last src line; 0 otherwise)
355 * Return: void
356 *
357 * *** Warning: implicit assumption about RGB component ordering ***
358 */
359 void
scaleColor2xLILineLow(l_uint32 * lined,l_int32 wpld,l_uint32 * lines,l_int32 ws,l_int32 wpls,l_int32 lastlineflag)360 scaleColor2xLILineLow(l_uint32 *lined,
361 l_int32 wpld,
362 l_uint32 *lines,
363 l_int32 ws,
364 l_int32 wpls,
365 l_int32 lastlineflag)
366 {
367 l_int32 j, jd, wsm;
368 l_int32 rval1, rval2, rval3, rval4, gval1, gval2, gval3, gval4;
369 l_int32 bval1, bval2, bval3, bval4;
370 l_uint32 pixels1, pixels2, pixels3, pixels4, pixel;
371 l_uint32 *linesp, *linedp;
372
373 wsm = ws - 1;
374
375 if (lastlineflag == 0) {
376 linesp = lines + wpls;
377 linedp = lined + wpld;
378 pixels1 = *lines;
379 pixels3 = *linesp;
380
381 /* initialize with v(2) and v(4) */
382 rval2 = pixels1 >> 24;
383 gval2 = (pixels1 >> 16) & 0xff;
384 bval2 = (pixels1 >> 8) & 0xff;
385 rval4 = pixels3 >> 24;
386 gval4 = (pixels3 >> 16) & 0xff;
387 bval4 = (pixels3 >> 8) & 0xff;
388
389 for (j = 0, jd = 0; j < wsm; j++, jd += 2) {
390 /* shift in previous src values */
391 rval1 = rval2;
392 gval1 = gval2;
393 bval1 = bval2;
394 rval3 = rval4;
395 gval3 = gval4;
396 bval3 = bval4;
397 /* get new src values */
398 pixels2 = *(lines + j + 1);
399 pixels4 = *(linesp + j + 1);
400 rval2 = pixels2 >> 24;
401 gval2 = (pixels2 >> 16) & 0xff;
402 bval2 = (pixels2 >> 8) & 0xff;
403 rval4 = pixels4 >> 24;
404 gval4 = (pixels4 >> 16) & 0xff;
405 bval4 = (pixels4 >> 8) & 0xff;
406 /* save dest values */
407 pixel = (rval1 << 24 | gval1 << 16 | bval1 << 8);
408 *(lined + jd) = pixel; /* pix 1 */
409 pixel = ((((rval1 + rval2) << 23) & 0xff000000) |
410 (((gval1 + gval2) << 15) & 0x00ff0000) |
411 (((bval1 + bval2) << 7) & 0x0000ff00));
412 *(lined + jd + 1) = pixel; /* pix 2 */
413 pixel = ((((rval1 + rval3) << 23) & 0xff000000) |
414 (((gval1 + gval3) << 15) & 0x00ff0000) |
415 (((bval1 + bval3) << 7) & 0x0000ff00));
416 *(linedp + jd) = pixel; /* pix 3 */
417 pixel = ((((rval1 + rval2 + rval3 + rval4) << 22) & 0xff000000) |
418 (((gval1 + gval2 + gval3 + gval4) << 14) & 0x00ff0000) |
419 (((bval1 + bval2 + bval3 + bval4) << 6) & 0x0000ff00));
420 *(linedp + jd + 1) = pixel; /* pix 4 */
421 }
422 /* last src pixel on line */
423 rval1 = rval2;
424 gval1 = gval2;
425 bval1 = bval2;
426 rval3 = rval4;
427 gval3 = gval4;
428 bval3 = bval4;
429 pixel = (rval1 << 24 | gval1 << 16 | bval1 << 8);
430 *(lined + 2 * wsm) = pixel; /* pix 1 */
431 *(lined + 2 * wsm + 1) = pixel; /* pix 2 */
432 pixel = ((((rval1 + rval3) << 23) & 0xff000000) |
433 (((gval1 + gval3) << 15) & 0x00ff0000) |
434 (((bval1 + bval3) << 7) & 0x0000ff00));
435 *(linedp + 2 * wsm) = pixel; /* pix 3 */
436 *(linedp + 2 * wsm + 1) = pixel; /* pix 4 */
437 }
438 else { /* last row of src pixels: lastlineflag == 1 */
439 linedp = lined + wpld;
440 pixels2 = *lines;
441 rval2 = pixels2 >> 24;
442 gval2 = (pixels2 >> 16) & 0xff;
443 bval2 = (pixels2 >> 8) & 0xff;
444 for (j = 0, jd = 0; j < wsm; j++, jd += 2) {
445 rval1 = rval2;
446 gval1 = gval2;
447 bval1 = bval2;
448 pixels2 = *(lines + j + 1);
449 rval2 = pixels2 >> 24;
450 gval2 = (pixels2 >> 16) & 0xff;
451 bval2 = (pixels2 >> 8) & 0xff;
452 pixel = (rval1 << 24 | gval1 << 16 | bval1 << 8);
453 *(lined + jd) = pixel; /* pix 1 */
454 *(linedp + jd) = pixel; /* pix 2 */
455 pixel = ((((rval1 + rval2) << 23) & 0xff000000) |
456 (((gval1 + gval2) << 15) & 0x00ff0000) |
457 (((bval1 + bval2) << 7) & 0x0000ff00));
458 *(lined + jd + 1) = pixel; /* pix 3 */
459 *(linedp + jd + 1) = pixel; /* pix 4 */
460 }
461 rval1 = rval2;
462 gval1 = gval2;
463 bval1 = bval2;
464 pixel = (rval1 << 24 | gval1 << 16 | bval1 << 8);
465 *(lined + 2 * wsm) = pixel; /* pix 1 */
466 *(lined + 2 * wsm + 1) = pixel; /* pix 2 */
467 *(linedp + 2 * wsm) = pixel; /* pix 3 */
468 *(linedp + 2 * wsm + 1) = pixel; /* pix 4 */
469 }
470
471 return;
472 }
473
474
475 /*------------------------------------------------------------------*
476 * 2x linear interpolated gray scaling *
477 *------------------------------------------------------------------*/
478 /*!
479 * scaleGray2xLILow()
480 *
481 * This is a special case of 2x expansion by linear
482 * interpolation. Each src pixel contains 4 dest pixels.
483 * The 4 dest pixels in src pixel 1 are numbered at
484 * their UL corners. The 4 dest pixels in src pixel 1
485 * are related to that src pixel and its 3 neighboring
486 * src pixels as follows:
487 *
488 * 1-----2-----|-----|-----|
489 * | | | | |
490 * | | | | |
491 * src 1 --> 3-----4-----| | | <-- src 2
492 * | | | | |
493 * | | | | |
494 * |-----|-----|-----|-----|
495 * | | | | |
496 * | | | | |
497 * src 3 --> | | | | | <-- src 4
498 * | | | | |
499 * | | | | |
500 * |-----|-----|-----|-----|
501 *
502 * dest src
503 * ---- ---
504 * dp1 = sp1
505 * dp2 = (sp1 + sp2) / 2
506 * dp3 = (sp1 + sp3) / 2
507 * dp4 = (sp1 + sp2 + sp3 + sp4) / 4
508 *
509 * We iterate over the src pixels, and unroll the calculation
510 * for each set of 4 dest pixels corresponding to that src
511 * pixel, caching pixels for the next src pixel whenever possible.
512 */
513 void
scaleGray2xLILow(l_uint32 * datad,l_int32 wpld,l_uint32 * datas,l_int32 ws,l_int32 hs,l_int32 wpls)514 scaleGray2xLILow(l_uint32 *datad,
515 l_int32 wpld,
516 l_uint32 *datas,
517 l_int32 ws,
518 l_int32 hs,
519 l_int32 wpls)
520 {
521 l_int32 i, hsm;
522 l_uint32 *lines, *lined;
523
524 hsm = hs - 1;
525
526 /* We're taking 2 src and 2 dest lines at a time,
527 * and for each src line, we're computing 2 dest lines.
528 * Call these 2 dest lines: destline1 and destline2.
529 * The first src line is used for destline 1.
530 * On all but the last src line, both src lines are
531 * used in the linear interpolation for destline2.
532 * On the last src line, both destline1 and destline2
533 * are computed using only that src line (because there
534 * isn't a lower src line). */
535
536 /* iterate over all but the last src line */
537 for (i = 0; i < hsm; i++) {
538 lines = datas + i * wpls;
539 lined = datad + 2 * i * wpld;
540 scaleGray2xLILineLow(lined, wpld, lines, ws, wpls, 0);
541 }
542
543 /* last src line */
544 lines = datas + hsm * wpls;
545 lined = datad + 2 * hsm * wpld;
546 scaleGray2xLILineLow(lined, wpld, lines, ws, wpls, 1);
547
548 return;
549 }
550
551
552 /*!
553 * scaleGray2xLILineLow()
554 *
555 * Input: lined (ptr to top destline, to be made from current src line)
556 * wpld
557 * lines (ptr to current src line)
558 * ws
559 * wpls
560 * lastlineflag (1 if last src line; 0 otherwise)
561 * Return: void
562 */
563 void
scaleGray2xLILineLow(l_uint32 * lined,l_int32 wpld,l_uint32 * lines,l_int32 ws,l_int32 wpls,l_int32 lastlineflag)564 scaleGray2xLILineLow(l_uint32 *lined,
565 l_int32 wpld,
566 l_uint32 *lines,
567 l_int32 ws,
568 l_int32 wpls,
569 l_int32 lastlineflag)
570 {
571 l_int32 j, jd, wsm, w;
572 l_int32 sval1, sval2, sval3, sval4;
573 l_uint32 *linesp, *linedp;
574 l_uint32 words, wordsp, wordd, worddp;
575
576 wsm = ws - 1;
577
578 if (lastlineflag == 0) {
579 linesp = lines + wpls;
580 linedp = lined + wpld;
581
582 /* Unroll the loop 4x and work on full words */
583 words = lines[0];
584 wordsp = linesp[0];
585 sval2 = (words >> 24) & 0xff;
586 sval4 = (wordsp >> 24) & 0xff;
587 for (j = 0, jd = 0, w = 0; j + 3 < wsm; j += 4, jd += 8, w++) {
588 /* At the top of the loop,
589 * words == lines[w], wordsp == linesp[w]
590 * and the top bytes of those have been loaded into
591 * sval2 and sval4. */
592 sval1 = sval2;
593 sval2 = (words >> 16) & 0xff;
594 sval3 = sval4;
595 sval4 = (wordsp >> 16) & 0xff;
596 wordd = (sval1 << 24) | (((sval1 + sval2) >> 1) << 16);
597 worddp = (((sval1 + sval3) >> 1) << 24) |
598 (((sval1 + sval2 + sval3 + sval4) >> 2) << 16);
599
600 sval1 = sval2;
601 sval2 = (words >> 8) & 0xff;
602 sval3 = sval4;
603 sval4 = (wordsp >> 8) & 0xff;
604 wordd |= (sval1 << 8) | ((sval1 + sval2) >> 1);
605 worddp |= (((sval1 + sval3) >> 1) << 8) |
606 ((sval1 + sval2 + sval3 + sval4) >> 2);
607 lined[w * 2] = wordd;
608 linedp[w * 2] = worddp;
609
610 sval1 = sval2;
611 sval2 = words & 0xff;
612 sval3 = sval4;
613 sval4 = wordsp & 0xff;
614 wordd = (sval1 << 24) | /* pix 1 */
615 (((sval1 + sval2) >> 1) << 16); /* pix 2 */
616 worddp = (((sval1 + sval3) >> 1) << 24) | /* pix 3 */
617 (((sval1 + sval2 + sval3 + sval4) >> 2) << 16); /* pix 4 */
618
619 /* Load the next word as we need its first byte */
620 words = lines[w + 1];
621 wordsp = linesp[w + 1];
622 sval1 = sval2;
623 sval2 = (words >> 24) & 0xff;
624 sval3 = sval4;
625 sval4 = (wordsp >> 24) & 0xff;
626 wordd |= (sval1 << 8) | /* pix 1 */
627 ((sval1 + sval2) >> 1); /* pix 2 */
628 worddp |= (((sval1 + sval3) >> 1) << 8) | /* pix 3 */
629 ((sval1 + sval2 + sval3 + sval4) >> 2); /* pix 4 */
630 lined[w * 2 + 1] = wordd;
631 linedp[w * 2 + 1] = worddp;
632 }
633
634 /* Finish up the last word */
635 for (; j < wsm; j++, jd += 2) {
636 sval1 = sval2;
637 sval3 = sval4;
638 sval2 = GET_DATA_BYTE(lines, j + 1);
639 sval4 = GET_DATA_BYTE(linesp, j + 1);
640 SET_DATA_BYTE(lined, jd, sval1); /* pix 1 */
641 SET_DATA_BYTE(lined, jd + 1, (sval1 + sval2) / 2); /* pix 2 */
642 SET_DATA_BYTE(linedp, jd, (sval1 + sval3) / 2); /* pix 3 */
643 SET_DATA_BYTE(linedp, jd + 1,
644 (sval1 + sval2 + sval3 + sval4) / 4); /* pix 4 */
645 }
646 sval1 = sval2;
647 sval3 = sval4;
648 SET_DATA_BYTE(lined, 2 * wsm, sval1); /* pix 1 */
649 SET_DATA_BYTE(lined, 2 * wsm + 1, sval1); /* pix 2 */
650 SET_DATA_BYTE(linedp, 2 * wsm, (sval1 + sval3) / 2); /* pix 3 */
651 SET_DATA_BYTE(linedp, 2 * wsm + 1, (sval1 + sval3) / 2); /* pix 4 */
652
653 #if DEBUG_UNROLLING
654 #define CHECK_BYTE(a, b, c) if (GET_DATA_BYTE(a, b) != c) {\
655 fprintf(stderr, "Error: mismatch at %d, %d vs %d\n", \
656 j, GET_DATA_BYTE(a, b), c); }
657
658 sval2 = GET_DATA_BYTE(lines, 0);
659 sval4 = GET_DATA_BYTE(linesp, 0);
660 for (j = 0, jd = 0; j < wsm; j++, jd += 2) {
661 sval1 = sval2;
662 sval3 = sval4;
663 sval2 = GET_DATA_BYTE(lines, j + 1);
664 sval4 = GET_DATA_BYTE(linesp, j + 1);
665 CHECK_BYTE(lined, jd, sval1); /* pix 1 */
666 CHECK_BYTE(lined, jd + 1, (sval1 + sval2) / 2); /* pix 2 */
667 CHECK_BYTE(linedp, jd, (sval1 + sval3) / 2); /* pix 3 */
668 CHECK_BYTE(linedp, jd + 1,
669 (sval1 + sval2 + sval3 + sval4) / 4); /* pix 4 */
670 }
671 sval1 = sval2;
672 sval3 = sval4;
673 CHECK_BYTE(lined, 2 * wsm, sval1); /* pix 1 */
674 CHECK_BYTE(lined, 2 * wsm + 1, sval1); /* pix 2 */
675 CHECK_BYTE(linedp, 2 * wsm, (sval1 + sval3) / 2); /* pix 3 */
676 CHECK_BYTE(linedp, 2 * wsm + 1, (sval1 + sval3) / 2); /* pix 4 */
677 #undef CHECK_BYTE
678 #endif
679 }
680 else { /* last row of src pixels: lastlineflag == 1 */
681 linedp = lined + wpld;
682 sval2 = GET_DATA_BYTE(lines, 0);
683 for (j = 0, jd = 0; j < wsm; j++, jd += 2) {
684 sval1 = sval2;
685 sval2 = GET_DATA_BYTE(lines, j + 1);
686 SET_DATA_BYTE(lined, jd, sval1); /* pix 1 */
687 SET_DATA_BYTE(linedp, jd, sval1); /* pix 3 */
688 SET_DATA_BYTE(lined, jd + 1, (sval1 + sval2) / 2); /* pix 2 */
689 SET_DATA_BYTE(linedp, jd + 1, (sval1 + sval2) / 2); /* pix 4 */
690 }
691 sval1 = sval2;
692 SET_DATA_BYTE(lined, 2 * wsm, sval1); /* pix 1 */
693 SET_DATA_BYTE(lined, 2 * wsm + 1, sval1); /* pix 2 */
694 SET_DATA_BYTE(linedp, 2 * wsm, sval1); /* pix 3 */
695 SET_DATA_BYTE(linedp, 2 * wsm + 1, sval1); /* pix 4 */
696 }
697
698 return;
699 }
700
701
702 /*------------------------------------------------------------------*
703 * 4x linear interpolated gray scaling *
704 *------------------------------------------------------------------*/
705 /*!
706 * scaleGray4xLILow()
707 *
708 * This is a special case of 4x expansion by linear
709 * interpolation. Each src pixel contains 16 dest pixels.
710 * The 16 dest pixels in src pixel 1 are numbered at
711 * their UL corners. The 16 dest pixels in src pixel 1
712 * are related to that src pixel and its 3 neighboring
713 * src pixels as follows:
714 *
715 * 1---2---3---4---|---|---|---|---|
716 * | | | | | | | | |
717 * 5---6---7---8---|---|---|---|---|
718 * | | | | | | | | |
719 * src 1 --> 9---a---b---c---|---|---|---|---| <-- src 2
720 * | | | | | | | | |
721 * d---e---f---g---|---|---|---|---|
722 * | | | | | | | | |
723 * |===|===|===|===|===|===|===|===|
724 * | | | | | | | | |
725 * |---|---|---|---|---|---|---|---|
726 * | | | | | | | | |
727 * src 3 --> |---|---|---|---|---|---|---|---| <-- src 4
728 * | | | | | | | | |
729 * |---|---|---|---|---|---|---|---|
730 * | | | | | | | | |
731 * |---|---|---|---|---|---|---|---|
732 *
733 * dest src
734 * ---- ---
735 * dp1 = sp1
736 * dp2 = (3 * sp1 + sp2) / 4
737 * dp3 = (sp1 + sp2) / 2
738 * dp4 = (sp1 + 3 * sp2) / 4
739 * dp5 = (3 * sp1 + sp3) / 4
740 * dp6 = (9 * sp1 + 3 * sp2 + 3 * sp3 + sp4) / 16
741 * dp7 = (3 * sp1 + 3 * sp2 + sp3 + sp4) / 8
742 * dp8 = (3 * sp1 + 9 * sp2 + 1 * sp3 + 3 * sp4) / 16
743 * dp9 = (sp1 + sp3) / 2
744 * dp10 = (3 * sp1 + sp2 + 3 * sp3 + sp4) / 8
745 * dp11 = (sp1 + sp2 + sp3 + sp4) / 4
746 * dp12 = (sp1 + 3 * sp2 + sp3 + 3 * sp4) / 8
747 * dp13 = (sp1 + 3 * sp3) / 4
748 * dp14 = (3 * sp1 + sp2 + 9 * sp3 + 3 * sp4) / 16
749 * dp15 = (sp1 + sp2 + 3 * sp3 + 3 * sp4) / 8
750 * dp16 = (sp1 + 3 * sp2 + 3 * sp3 + 9 * sp4) / 16
751 *
752 * We iterate over the src pixels, and unroll the calculation
753 * for each set of 16 dest pixels corresponding to that src
754 * pixel, caching pixels for the next src pixel whenever possible.
755 */
756 void
scaleGray4xLILow(l_uint32 * datad,l_int32 wpld,l_uint32 * datas,l_int32 ws,l_int32 hs,l_int32 wpls)757 scaleGray4xLILow(l_uint32 *datad,
758 l_int32 wpld,
759 l_uint32 *datas,
760 l_int32 ws,
761 l_int32 hs,
762 l_int32 wpls)
763 {
764 l_int32 i, hsm;
765 l_uint32 *lines, *lined;
766
767 hsm = hs - 1;
768
769 /* We're taking 2 src and 4 dest lines at a time,
770 * and for each src line, we're computing 4 dest lines.
771 * Call these 4 dest lines: destline1 - destline4.
772 * The first src line is used for destline 1.
773 * Two src lines are used for all other dest lines,
774 * except for the last 4 dest lines, which are computed
775 * using only the last src line. */
776
777 /* iterate over all but the last src line */
778 for (i = 0; i < hsm; i++) {
779 lines = datas + i * wpls;
780 lined = datad + 4 * i * wpld;
781 scaleGray4xLILineLow(lined, wpld, lines, ws, wpls, 0);
782 }
783
784 /* last src line */
785 lines = datas + hsm * wpls;
786 lined = datad + 4 * hsm * wpld;
787 scaleGray4xLILineLow(lined, wpld, lines, ws, wpls, 1);
788
789 return;
790 }
791
792
793 /*!
794 * scaleGray4xLILineLow()
795 *
796 * Input: lined (ptr to top destline, to be made from current src line)
797 * wpld
798 * lines (ptr to current src line)
799 * ws
800 * wpls
801 * lastlineflag (1 if last src line; 0 otherwise)
802 * Return: void
803 */
804 void
scaleGray4xLILineLow(l_uint32 * lined,l_int32 wpld,l_uint32 * lines,l_int32 ws,l_int32 wpls,l_int32 lastlineflag)805 scaleGray4xLILineLow(l_uint32 *lined,
806 l_int32 wpld,
807 l_uint32 *lines,
808 l_int32 ws,
809 l_int32 wpls,
810 l_int32 lastlineflag)
811 {
812 l_int32 j, jd, wsm, wsm4;
813 l_int32 s1, s2, s3, s4, s1t, s2t, s3t, s4t;
814 l_uint32 *linesp, *linedp1, *linedp2, *linedp3;
815
816 wsm = ws - 1;
817 wsm4 = 4 * wsm;
818
819 if (lastlineflag == 0) {
820 linesp = lines + wpls;
821 linedp1 = lined + wpld;
822 linedp2 = lined + 2 * wpld;
823 linedp3 = lined + 3 * wpld;
824 s2 = GET_DATA_BYTE(lines, 0);
825 s4 = GET_DATA_BYTE(linesp, 0);
826 for (j = 0, jd = 0; j < wsm; j++, jd += 4) {
827 s1 = s2;
828 s3 = s4;
829 s2 = GET_DATA_BYTE(lines, j + 1);
830 s4 = GET_DATA_BYTE(linesp, j + 1);
831 s1t = 3 * s1;
832 s2t = 3 * s2;
833 s3t = 3 * s3;
834 s4t = 3 * s4;
835 SET_DATA_BYTE(lined, jd, s1); /* d1 */
836 SET_DATA_BYTE(lined, jd + 1, (s1t + s2) / 4); /* d2 */
837 SET_DATA_BYTE(lined, jd + 2, (s1 + s2) / 2); /* d3 */
838 SET_DATA_BYTE(lined, jd + 3, (s1 + s2t) / 4); /* d4 */
839 SET_DATA_BYTE(linedp1, jd, (s1t + s3) / 4); /* d5 */
840 SET_DATA_BYTE(linedp1, jd + 1, (9*s1 + s2t + s3t + s4) / 16); /*d6*/
841 SET_DATA_BYTE(linedp1, jd + 2, (s1t + s2t + s3 + s4) / 8); /* d7 */
842 SET_DATA_BYTE(linedp1, jd + 3, (s1t + 9*s2 + s3 + s4t) / 16);/*d8*/
843 SET_DATA_BYTE(linedp2, jd, (s1 + s3) / 2); /* d9 */
844 SET_DATA_BYTE(linedp2, jd + 1, (s1t + s2 + s3t + s4) / 8);/* d10 */
845 SET_DATA_BYTE(linedp2, jd + 2, (s1 + s2 + s3 + s4) / 4); /* d11 */
846 SET_DATA_BYTE(linedp2, jd + 3, (s1 + s2t + s3 + s4t) / 8);/* d12 */
847 SET_DATA_BYTE(linedp3, jd, (s1 + s3t) / 4); /* d13 */
848 SET_DATA_BYTE(linedp3, jd + 1, (s1t + s2 + 9*s3 + s4t) / 16);/*d14*/
849 SET_DATA_BYTE(linedp3, jd + 2, (s1 + s2 + s3t + s4t) / 8); /* d15 */
850 SET_DATA_BYTE(linedp3, jd + 3, (s1 + s2t + s3t + 9*s4) / 16);/*d16*/
851 }
852 s1 = s2;
853 s3 = s4;
854 s1t = 3 * s1;
855 s3t = 3 * s3;
856 SET_DATA_BYTE(lined, wsm4, s1); /* d1 */
857 SET_DATA_BYTE(lined, wsm4 + 1, s1); /* d2 */
858 SET_DATA_BYTE(lined, wsm4 + 2, s1); /* d3 */
859 SET_DATA_BYTE(lined, wsm4 + 3, s1); /* d4 */
860 SET_DATA_BYTE(linedp1, wsm4, (s1t + s3) / 4); /* d5 */
861 SET_DATA_BYTE(linedp1, wsm4 + 1, (s1t + s3) / 4); /* d6 */
862 SET_DATA_BYTE(linedp1, wsm4 + 2, (s1t + s3) / 4); /* d7 */
863 SET_DATA_BYTE(linedp1, wsm4 + 3, (s1t + s3) / 4); /* d8 */
864 SET_DATA_BYTE(linedp2, wsm4, (s1 + s3) / 2); /* d9 */
865 SET_DATA_BYTE(linedp2, wsm4 + 1, (s1 + s3) / 2); /* d10 */
866 SET_DATA_BYTE(linedp2, wsm4 + 2, (s1 + s3) / 2); /* d11 */
867 SET_DATA_BYTE(linedp2, wsm4 + 3, (s1 + s3) / 2); /* d12 */
868 SET_DATA_BYTE(linedp3, wsm4, (s1 + s3t) / 4); /* d13 */
869 SET_DATA_BYTE(linedp3, wsm4 + 1, (s1 + s3t) / 4); /* d14 */
870 SET_DATA_BYTE(linedp3, wsm4 + 2, (s1 + s3t) / 4); /* d15 */
871 SET_DATA_BYTE(linedp3, wsm4 + 3, (s1 + s3t) / 4); /* d16 */
872 }
873 else { /* last row of src pixels: lastlineflag == 1 */
874 linedp1 = lined + wpld;
875 linedp2 = lined + 2 * wpld;
876 linedp3 = lined + 3 * wpld;
877 s2 = GET_DATA_BYTE(lines, 0);
878 for (j = 0, jd = 0; j < wsm; j++, jd += 4) {
879 s1 = s2;
880 s2 = GET_DATA_BYTE(lines, j + 1);
881 s1t = 3 * s1;
882 s2t = 3 * s2;
883 SET_DATA_BYTE(lined, jd, s1); /* d1 */
884 SET_DATA_BYTE(lined, jd + 1, (s1t + s2) / 4 ); /* d2 */
885 SET_DATA_BYTE(lined, jd + 2, (s1 + s2) / 2 ); /* d3 */
886 SET_DATA_BYTE(lined, jd + 3, (s1 + s2t) / 4 ); /* d4 */
887 SET_DATA_BYTE(linedp1, jd, s1); /* d5 */
888 SET_DATA_BYTE(linedp1, jd + 1, (s1t + s2) / 4 ); /* d6 */
889 SET_DATA_BYTE(linedp1, jd + 2, (s1 + s2) / 2 ); /* d7 */
890 SET_DATA_BYTE(linedp1, jd + 3, (s1 + s2t) / 4 ); /* d8 */
891 SET_DATA_BYTE(linedp2, jd, s1); /* d9 */
892 SET_DATA_BYTE(linedp2, jd + 1, (s1t + s2) / 4 ); /* d10 */
893 SET_DATA_BYTE(linedp2, jd + 2, (s1 + s2) / 2 ); /* d11 */
894 SET_DATA_BYTE(linedp2, jd + 3, (s1 + s2t) / 4 ); /* d12 */
895 SET_DATA_BYTE(linedp3, jd, s1); /* d13 */
896 SET_DATA_BYTE(linedp3, jd + 1, (s1t + s2) / 4 ); /* d14 */
897 SET_DATA_BYTE(linedp3, jd + 2, (s1 + s2) / 2 ); /* d15 */
898 SET_DATA_BYTE(linedp3, jd + 3, (s1 + s2t) / 4 ); /* d16 */
899 }
900 s1 = s2;
901 SET_DATA_BYTE(lined, wsm4, s1); /* d1 */
902 SET_DATA_BYTE(lined, wsm4 + 1, s1); /* d2 */
903 SET_DATA_BYTE(lined, wsm4 + 2, s1); /* d3 */
904 SET_DATA_BYTE(lined, wsm4 + 3, s1); /* d4 */
905 SET_DATA_BYTE(linedp1, wsm4, s1); /* d5 */
906 SET_DATA_BYTE(linedp1, wsm4 + 1, s1); /* d6 */
907 SET_DATA_BYTE(linedp1, wsm4 + 2, s1); /* d7 */
908 SET_DATA_BYTE(linedp1, wsm4 + 3, s1); /* d8 */
909 SET_DATA_BYTE(linedp2, wsm4, s1); /* d9 */
910 SET_DATA_BYTE(linedp2, wsm4 + 1, s1); /* d10 */
911 SET_DATA_BYTE(linedp2, wsm4 + 2, s1); /* d11 */
912 SET_DATA_BYTE(linedp2, wsm4 + 3, s1); /* d12 */
913 SET_DATA_BYTE(linedp3, wsm4, s1); /* d13 */
914 SET_DATA_BYTE(linedp3, wsm4 + 1, s1); /* d14 */
915 SET_DATA_BYTE(linedp3, wsm4 + 2, s1); /* d15 */
916 SET_DATA_BYTE(linedp3, wsm4 + 3, s1); /* d16 */
917 }
918
919 return;
920 }
921
922
923 /*------------------------------------------------------------------*
924 * Grayscale and color scaling by closest pixel sampling *
925 *------------------------------------------------------------------*/
926 /*!
927 * scaleBySamplingLow()
928 *
929 * Notes:
930 * (1) The dest must be cleared prior to this operation,
931 * and we clear it here in the low-level code.
932 * (2) We reuse dest pixels and dest pixel rows whenever
933 * possible. This speeds the upscaling; downscaling
934 * is done by strict subsampling and is unaffected.
935 * (3) Because we are sampling and not interpolating, this
936 * routine works directly, without conversion to full
937 * RGB color, for 2, 4 or 8 bpp palette color images.
938 */
939 l_int32
scaleBySamplingLow(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 ws,l_int32 hs,l_int32 d,l_int32 wpls)940 scaleBySamplingLow(l_uint32 *datad,
941 l_int32 wd,
942 l_int32 hd,
943 l_int32 wpld,
944 l_uint32 *datas,
945 l_int32 ws,
946 l_int32 hs,
947 l_int32 d,
948 l_int32 wpls)
949 {
950 l_int32 i, j, bpld;
951 l_int32 xs, prevxs, sval;
952 l_int32 *srow, *scol;
953 l_uint32 csval;
954 l_uint32 *lines, *prevlines, *lined, *prevlined;
955 l_float32 wratio, hratio;
956
957 PROCNAME("scaleBySamplingLow");
958
959 /* clear dest */
960 bpld = 4 * wpld;
961 memset((char *)datad, 0, hd * bpld);
962
963 /* the source row corresponding to dest row i ==> srow[i]
964 * the source col corresponding to dest col j ==> scol[j] */
965 if ((srow = (l_int32 *)CALLOC(hd, sizeof(l_int32))) == NULL)
966 return ERROR_INT("srow not made", procName, 1);
967 if ((scol = (l_int32 *)CALLOC(wd, sizeof(l_int32))) == NULL)
968 return ERROR_INT("scol not made", procName, 1);
969
970 wratio = (l_float32)ws / (l_float32)wd;
971 hratio = (l_float32)hs / (l_float32)hd;
972 for (i = 0; i < hd; i++)
973 srow[i] = L_MIN((l_int32)(hratio * i + 0.5), hs - 1);
974 for (j = 0; j < wd; j++)
975 scol[j] = L_MIN((l_int32)(wratio * j + 0.5), ws - 1);
976
977 prevlines = NULL;
978 for (i = 0; i < hd; i++) {
979 lines = datas + srow[i] * wpls;
980 lined = datad + i * wpld;
981 if (lines != prevlines) { /* make dest from new source row */
982 prevxs = -1;
983 sval = 0;
984 csval = 0;
985 switch (d)
986 {
987 case 2:
988 for (j = 0; j < wd; j++) {
989 xs = scol[j];
990 if (xs != prevxs) { /* get dest pix from source col */
991 sval = GET_DATA_DIBIT(lines, xs);
992 SET_DATA_DIBIT(lined, j, sval);
993 prevxs = xs;
994 }
995 else /* copy prev dest pix */
996 SET_DATA_DIBIT(lined, j, sval);
997 }
998 break;
999 case 4:
1000 for (j = 0; j < wd; j++) {
1001 xs = scol[j];
1002 if (xs != prevxs) { /* get dest pix from source col */
1003 sval = GET_DATA_QBIT(lines, xs);
1004 SET_DATA_QBIT(lined, j, sval);
1005 prevxs = xs;
1006 }
1007 else /* copy prev dest pix */
1008 SET_DATA_QBIT(lined, j, sval);
1009 }
1010 break;
1011 case 8:
1012 for (j = 0; j < wd; j++) {
1013 xs = scol[j];
1014 if (xs != prevxs) { /* get dest pix from source col */
1015 sval = GET_DATA_BYTE(lines, xs);
1016 SET_DATA_BYTE(lined, j, sval);
1017 prevxs = xs;
1018 }
1019 else /* copy prev dest pix */
1020 SET_DATA_BYTE(lined, j, sval);
1021 }
1022 break;
1023 case 16:
1024 for (j = 0; j < wd; j++) {
1025 xs = scol[j];
1026 if (xs != prevxs) { /* get dest pix from source col */
1027 sval = GET_DATA_TWO_BYTES(lines, xs);
1028 SET_DATA_TWO_BYTES(lined, j, sval);
1029 prevxs = xs;
1030 }
1031 else /* copy prev dest pix */
1032 SET_DATA_TWO_BYTES(lined, j, sval);
1033 }
1034 break;
1035 case 32:
1036 for (j = 0; j < wd; j++) {
1037 xs = scol[j];
1038 if (xs != prevxs) { /* get dest pix from source col */
1039 csval = lines[xs];
1040 lined[j] = csval;
1041 prevxs = xs;
1042 }
1043 else /* copy prev dest pix */
1044 lined[j] = csval;
1045 }
1046 break;
1047 default:
1048 return ERROR_INT("pixel depth not supported", procName, 1);
1049 break;
1050 }
1051 }
1052 else { /* lines == prevlines; copy prev dest row */
1053 prevlined = lined - wpld;
1054 memcpy((char *)lined, (char *)prevlined, bpld);
1055 }
1056 prevlines = lines;
1057 }
1058
1059 FREE(srow);
1060 FREE(scol);
1061
1062 return 0;
1063 }
1064
1065
1066 /*------------------------------------------------------------------*
1067 * Color and grayscale downsampling with (antialias) smoothing *
1068 *------------------------------------------------------------------*/
1069 /*!
1070 * scaleSmoothLow()
1071 *
1072 * Notes:
1073 * (1) This function is called on 8 or 32 bpp src and dest images.
1074 * (2) size is the full width of the lowpass smoothing filter.
1075 * It is correlated with the reduction ratio, being the
1076 * nearest integer such that size is approximately equal to hs / hd.
1077 */
1078 l_int32
scaleSmoothLow(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 ws,l_int32 hs,l_int32 d,l_int32 wpls,l_int32 size)1079 scaleSmoothLow(l_uint32 *datad,
1080 l_int32 wd,
1081 l_int32 hd,
1082 l_int32 wpld,
1083 l_uint32 *datas,
1084 l_int32 ws,
1085 l_int32 hs,
1086 l_int32 d,
1087 l_int32 wpls,
1088 l_int32 size)
1089 {
1090 l_int32 i, j, m, n, xstart;
1091 l_int32 val, rval, gval, bval;
1092 l_int32 *srow, *scol;
1093 l_uint32 *lines, *lined, *line, *ppixel;
1094 l_uint32 pixel;
1095 l_float32 wratio, hratio, norm;
1096
1097 PROCNAME("scaleSmoothLow");
1098
1099 /* Clear dest */
1100 memset((char *)datad, 0, 4 * wpld * hd);
1101
1102 /* Each dest pixel at (j,i) is computed as the average
1103 of size^2 corresponding src pixels.
1104 We store the UL corner location of the square of
1105 src pixels that correspond to dest pixel (j,i).
1106 The are labelled by the arrays srow[i] and scol[j]. */
1107 if ((srow = (l_int32 *)CALLOC(hd, sizeof(l_int32))) == NULL)
1108 return ERROR_INT("srow not made", procName, 1);
1109 if ((scol = (l_int32 *)CALLOC(wd, sizeof(l_int32))) == NULL)
1110 return ERROR_INT("scol not made", procName, 1);
1111
1112 norm = 1. / (l_float32)(size * size);
1113 wratio = (l_float32)ws / (l_float32)wd;
1114 hratio = (l_float32)hs / (l_float32)hd;
1115 for (i = 0; i < hd; i++)
1116 srow[i] = L_MIN((l_int32)(hratio * i), hs - size);
1117 for (j = 0; j < wd; j++)
1118 scol[j] = L_MIN((l_int32)(wratio * j), ws - size);
1119
1120 /* For each dest pixel, compute average */
1121 if (d == 8) {
1122 for (i = 0; i < hd; i++) {
1123 lines = datas + srow[i] * wpls;
1124 lined = datad + i * wpld;
1125 for (j = 0; j < wd; j++) {
1126 xstart = scol[j];
1127 val = 0;
1128 for (m = 0; m < size; m++) {
1129 line = lines + m * wpls;
1130 for (n = 0; n < size; n++) {
1131 val += GET_DATA_BYTE(line, xstart + n);
1132 }
1133 }
1134 val = (l_int32)((l_float32)val * norm);
1135 SET_DATA_BYTE(lined, j, val);
1136 }
1137 }
1138 }
1139 else { /* d == 32 */
1140 for (i = 0; i < hd; i++) {
1141 lines = datas + srow[i] * wpls;
1142 lined = datad + i * wpld;
1143 for (j = 0; j < wd; j++) {
1144 xstart = scol[j];
1145 rval = gval = bval = 0;
1146 for (m = 0; m < size; m++) {
1147 ppixel = lines + m * wpls + xstart;
1148 for (n = 0; n < size; n++) {
1149 pixel = *(ppixel + n);
1150 rval += (pixel >> L_RED_SHIFT) & 0xff;
1151 gval += (pixel >> L_GREEN_SHIFT) & 0xff;
1152 bval += (pixel >> L_BLUE_SHIFT) & 0xff;
1153 }
1154 }
1155 rval = (l_int32)((l_float32)rval * norm);
1156 gval = (l_int32)((l_float32)gval * norm);
1157 bval = (l_int32)((l_float32)bval * norm);
1158 *(lined + j) = rval << L_RED_SHIFT |
1159 gval << L_GREEN_SHIFT |
1160 bval << L_BLUE_SHIFT;
1161 }
1162 }
1163 }
1164
1165 FREE(srow);
1166 FREE(scol);
1167 return 0;
1168 }
1169
1170
1171 /*!
1172 * scaleRGBToGray2Low()
1173 *
1174 * Note: This function is called with 32 bpp RGB src and 8 bpp,
1175 * half-resolution dest. The weights should add to 1.0.
1176 */
1177 void
scaleRGBToGray2Low(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 wpls,l_float32 rwt,l_float32 gwt,l_float32 bwt)1178 scaleRGBToGray2Low(l_uint32 *datad,
1179 l_int32 wd,
1180 l_int32 hd,
1181 l_int32 wpld,
1182 l_uint32 *datas,
1183 l_int32 wpls,
1184 l_float32 rwt,
1185 l_float32 gwt,
1186 l_float32 bwt)
1187 {
1188 l_int32 i, j, val, rval, gval, bval;
1189 l_uint32 *lines, *lined;
1190 l_uint32 pixel;
1191
1192 rwt *= 0.25;
1193 gwt *= 0.25;
1194 bwt *= 0.25;
1195 for (i = 0; i < hd; i++) {
1196 lines = datas + 2 * i * wpls;
1197 lined = datad + i * wpld;
1198 for (j = 0; j < wd; j++) {
1199 /* Sum each of the color components from 4 src pixels */
1200 pixel = *(lines + 2 * j);
1201 rval = (pixel >> L_RED_SHIFT) & 0xff;
1202 gval = (pixel >> L_GREEN_SHIFT) & 0xff;
1203 bval = (pixel >> L_BLUE_SHIFT) & 0xff;
1204 pixel = *(lines + 2 * j + 1);
1205 rval += (pixel >> L_RED_SHIFT) & 0xff;
1206 gval += (pixel >> L_GREEN_SHIFT) & 0xff;
1207 bval += (pixel >> L_BLUE_SHIFT) & 0xff;
1208 pixel = *(lines + wpls + 2 * j);
1209 rval += (pixel >> L_RED_SHIFT) & 0xff;
1210 gval += (pixel >> L_GREEN_SHIFT) & 0xff;
1211 bval += (pixel >> L_BLUE_SHIFT) & 0xff;
1212 pixel = *(lines + wpls + 2 * j + 1);
1213 rval += (pixel >> L_RED_SHIFT) & 0xff;
1214 gval += (pixel >> L_GREEN_SHIFT) & 0xff;
1215 bval += (pixel >> L_BLUE_SHIFT) & 0xff;
1216 /* Generate the dest byte as a weighted sum of the averages */
1217 val = (l_int32)(rwt * rval + gwt * gval + bwt * bval);
1218 SET_DATA_BYTE(lined, j, val);
1219 }
1220 }
1221 }
1222
1223
1224 /*------------------------------------------------------------------*
1225 * General area mapped gray scaling *
1226 *------------------------------------------------------------------*/
1227 /*!
1228 * scaleColorAreaMapLow()
1229 *
1230 * This should only be used for downscaling.
1231 * We choose to divide each pixel into 16 x 16 sub-pixels.
1232 * This is much slower than scaleSmoothLow(), but it gives a
1233 * better representation, esp. for downscaling factors between
1234 * 1.5 and 5. All src pixels are subdivided into 256 sub-pixels,
1235 * and are weighted by the number of sub-pixels covered by
1236 * the dest pixel. This is about 2x slower than scaleSmoothLow(),
1237 * but the results are significantly better on small text.
1238 */
1239 void
scaleColorAreaMapLow(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 ws,l_int32 hs,l_int32 wpls)1240 scaleColorAreaMapLow(l_uint32 *datad,
1241 l_int32 wd,
1242 l_int32 hd,
1243 l_int32 wpld,
1244 l_uint32 *datas,
1245 l_int32 ws,
1246 l_int32 hs,
1247 l_int32 wpls)
1248 {
1249 l_int32 i, j, k, m, wm2, hm2;
1250 l_int32 area00, area10, area01, area11, areal, arear, areat, areab;
1251 l_int32 xu, yu; /* UL corner in src image, to 1/16 of a pixel */
1252 l_int32 xl, yl; /* LR corner in src image, to 1/16 of a pixel */
1253 l_int32 xup, yup, xuf, yuf; /* UL src pixel: integer and fraction */
1254 l_int32 xlp, ylp, xlf, ylf; /* LR src pixel: integer and fraction */
1255 l_int32 delx, dely, area;
1256 l_int32 v00r, v00g, v00b; /* contrib. from UL src pixel */
1257 l_int32 v01r, v01g, v01b; /* contrib. from LL src pixel */
1258 l_int32 v10r, v10g, v10b; /* contrib from UR src pixel */
1259 l_int32 v11r, v11g, v11b; /* contrib from LR src pixel */
1260 l_int32 vinr, ving, vinb; /* contrib from all full interior src pixels */
1261 l_int32 vmidr, vmidg, vmidb; /* contrib from side parts */
1262 l_int32 rval, gval, bval;
1263 l_uint32 pixel00, pixel10, pixel01, pixel11, pixel;
1264 l_uint32 *lines, *lined;
1265 l_float32 scx, scy;
1266
1267 /* (scx, scy) are scaling factors that are applied to the
1268 * dest coords to get the corresponding src coords.
1269 * We need them because we iterate over dest pixels
1270 * and must find the corresponding set of src pixels. */
1271 scx = 16. * (l_float32)ws / (l_float32)wd;
1272 scy = 16. * (l_float32)hs / (l_float32)hd;
1273
1274 wm2 = ws - 2;
1275 hm2 = hs - 2;
1276
1277 /* Iterate over the destination pixels */
1278 for (i = 0; i < hd; i++) {
1279 yu = (l_int32)(scy * i);
1280 yl = (l_int32)(scy * (i + 1.0));
1281 yup = yu >> 4;
1282 yuf = yu & 0x0f;
1283 ylp = yl >> 4;
1284 ylf = yl & 0x0f;
1285 dely = ylp - yup;
1286 lined = datad + i * wpld;
1287 lines = datas + yup * wpls;
1288 for (j = 0; j < wd; j++) {
1289 xu = (l_int32)(scx * j);
1290 xl = (l_int32)(scx * (j + 1.0));
1291 xup = xu >> 4;
1292 xuf = xu & 0x0f;
1293 xlp = xl >> 4;
1294 xlf = xl & 0x0f;
1295 delx = xlp - xup;
1296
1297 /* If near the edge, just use a src pixel value */
1298 if (xlp > wm2 || ylp > hm2) {
1299 *(lined + j) = *(lines + xup);
1300 continue;
1301 }
1302
1303 /* Area summed over, in subpixels. This varies
1304 * due to the quantization, so we can't simply take
1305 * the area to be a constant: area = scx * scy. */
1306 area = ((16 - xuf) + 16 * (delx - 1) + xlf) *
1307 ((16 - yuf) + 16 * (dely - 1) + ylf);
1308
1309 /* Do area map summation */
1310 pixel00 = *(lines + xup);
1311 pixel10 = *(lines + xlp);
1312 pixel01 = *(lines + dely * wpls + xup);
1313 pixel11 = *(lines + dely * wpls + xlp);
1314 area00 = (16 - xuf) * (16 - yuf);
1315 area10 = xlf * (16 - yuf);
1316 area01 = (16 - xuf) * ylf;
1317 area11 = xlf * ylf;
1318 v00r = area00 * ((pixel00 >> L_RED_SHIFT) & 0xff);
1319 v00g = area00 * ((pixel00 >> L_GREEN_SHIFT) & 0xff);
1320 v00b = area00 * ((pixel00 >> L_BLUE_SHIFT) & 0xff);
1321 v10r = area10 * ((pixel10 >> L_RED_SHIFT) & 0xff);
1322 v10g = area10 * ((pixel10 >> L_GREEN_SHIFT) & 0xff);
1323 v10b = area10 * ((pixel10 >> L_BLUE_SHIFT) & 0xff);
1324 v01r = area01 * ((pixel01 >> L_RED_SHIFT) & 0xff);
1325 v01g = area01 * ((pixel01 >> L_GREEN_SHIFT) & 0xff);
1326 v01b = area01 * ((pixel01 >> L_BLUE_SHIFT) & 0xff);
1327 v11r = area11 * ((pixel11 >> L_RED_SHIFT) & 0xff);
1328 v11g = area11 * ((pixel11 >> L_GREEN_SHIFT) & 0xff);
1329 v11b = area11 * ((pixel11 >> L_BLUE_SHIFT) & 0xff);
1330 vinr = ving = vinb = 0;
1331 for (k = 1; k < dely; k++) { /* for full src pixels */
1332 for (m = 1; m < delx; m++) {
1333 pixel = *(lines + k * wpls + xup + m);
1334 vinr += 256 * ((pixel >> L_RED_SHIFT) & 0xff);
1335 ving += 256 * ((pixel >> L_GREEN_SHIFT) & 0xff);
1336 vinb += 256 * ((pixel >> L_BLUE_SHIFT) & 0xff);
1337 }
1338 }
1339 vmidr = vmidg = vmidb = 0;
1340 areal = (16 - xuf) * 16;
1341 arear = xlf * 16;
1342 areat = 16 * (16 - yuf);
1343 areab = 16 * ylf;
1344 for (k = 1; k < dely; k++) { /* for left side */
1345 pixel = *(lines + k * wpls + xup);
1346 vmidr += areal * ((pixel >> L_RED_SHIFT) & 0xff);
1347 vmidg += areal * ((pixel >> L_GREEN_SHIFT) & 0xff);
1348 vmidb += areal * ((pixel >> L_BLUE_SHIFT) & 0xff);
1349 }
1350 for (k = 1; k < dely; k++) { /* for right side */
1351 pixel = *(lines + k * wpls + xlp);
1352 vmidr += arear * ((pixel >> L_RED_SHIFT) & 0xff);
1353 vmidg += arear * ((pixel >> L_GREEN_SHIFT) & 0xff);
1354 vmidb += arear * ((pixel >> L_BLUE_SHIFT) & 0xff);
1355 }
1356 for (m = 1; m < delx; m++) { /* for top side */
1357 pixel = *(lines + xup + m);
1358 vmidr += areat * ((pixel >> L_RED_SHIFT) & 0xff);
1359 vmidg += areat * ((pixel >> L_GREEN_SHIFT) & 0xff);
1360 vmidb += areat * ((pixel >> L_BLUE_SHIFT) & 0xff);
1361 }
1362 for (m = 1; m < delx; m++) { /* for bottom side */
1363 pixel = *(lines + dely * wpls + xup + m);
1364 vmidr += areab * ((pixel >> L_RED_SHIFT) & 0xff);
1365 vmidg += areab * ((pixel >> L_GREEN_SHIFT) & 0xff);
1366 vmidb += areab * ((pixel >> L_BLUE_SHIFT) & 0xff);
1367 }
1368
1369 /* Sum all the contributions */
1370 rval = (v00r + v01r + v10r + v11r + vinr + vmidr + 128) / area;
1371 gval = (v00g + v01g + v10g + v11g + ving + vmidg + 128) / area;
1372 bval = (v00b + v01b + v10b + v11b + vinb + vmidb + 128) / area;
1373 #if DEBUG_OVERFLOW
1374 if (rval > 255) fprintf(stderr, "rval ovfl: %d\n", rval);
1375 if (rval > 255) fprintf(stderr, "gval ovfl: %d\n", gval);
1376 if (rval > 255) fprintf(stderr, "bval ovfl: %d\n", bval);
1377 #endif /* DEBUG_OVERFLOW */
1378 composeRGBPixel(rval, gval, bval, lined + j);
1379 }
1380 }
1381
1382 return;
1383 }
1384
1385
1386 /*!
1387 * scaleGrayAreaMapLow()
1388 *
1389 * This should only be used for downscaling.
1390 * We choose to divide each pixel into 16 x 16 sub-pixels.
1391 * This is about 2x slower than scaleSmoothLow(), but the results
1392 * are significantly better on small text, esp. for downscaling
1393 * factors between 1.5 and 5. All src pixels are subdivided
1394 * into 256 sub-pixels, and are weighted by the number of
1395 * sub-pixels covered by the dest pixel.
1396 */
1397 void
scaleGrayAreaMapLow(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 ws,l_int32 hs,l_int32 wpls)1398 scaleGrayAreaMapLow(l_uint32 *datad,
1399 l_int32 wd,
1400 l_int32 hd,
1401 l_int32 wpld,
1402 l_uint32 *datas,
1403 l_int32 ws,
1404 l_int32 hs,
1405 l_int32 wpls)
1406 {
1407 l_int32 i, j, k, m, wm2, hm2;
1408 l_int32 xu, yu; /* UL corner in src image, to 1/16 of a pixel */
1409 l_int32 xl, yl; /* LR corner in src image, to 1/16 of a pixel */
1410 l_int32 xup, yup, xuf, yuf; /* UL src pixel: integer and fraction */
1411 l_int32 xlp, ylp, xlf, ylf; /* LR src pixel: integer and fraction */
1412 l_int32 delx, dely, area;
1413 l_int32 v00; /* contrib. from UL src pixel */
1414 l_int32 v01; /* contrib. from LL src pixel */
1415 l_int32 v10; /* contrib from UR src pixel */
1416 l_int32 v11; /* contrib from LR src pixel */
1417 l_int32 vin; /* contrib from all full interior src pixels */
1418 l_int32 vmid; /* contrib from side parts that are full in 1 direction */
1419 l_int32 val;
1420 l_uint32 *lines, *lined;
1421 l_float32 scx, scy;
1422
1423 /* (scx, scy) are scaling factors that are applied to the
1424 * dest coords to get the corresponding src coords.
1425 * We need them because we iterate over dest pixels
1426 * and must find the corresponding set of src pixels. */
1427 scx = 16. * (l_float32)ws / (l_float32)wd;
1428 scy = 16. * (l_float32)hs / (l_float32)hd;
1429
1430 wm2 = ws - 2;
1431 hm2 = hs - 2;
1432
1433 /* Iterate over the destination pixels */
1434 for (i = 0; i < hd; i++) {
1435 yu = (l_int32)(scy * i);
1436 yl = (l_int32)(scy * (i + 1.0));
1437 yup = yu >> 4;
1438 yuf = yu & 0x0f;
1439 ylp = yl >> 4;
1440 ylf = yl & 0x0f;
1441 dely = ylp - yup;
1442 lined = datad + i * wpld;
1443 lines = datas + yup * wpls;
1444 for (j = 0; j < wd; j++) {
1445 xu = (l_int32)(scx * j);
1446 xl = (l_int32)(scx * (j + 1.0));
1447 xup = xu >> 4;
1448 xuf = xu & 0x0f;
1449 xlp = xl >> 4;
1450 xlf = xl & 0x0f;
1451 delx = xlp - xup;
1452
1453 /* If near the edge, just use a src pixel value */
1454 if (xlp > wm2 || ylp > hm2) {
1455 SET_DATA_BYTE(lined, j, GET_DATA_BYTE(lines, xup));
1456 continue;
1457 }
1458
1459 /* Area summed over, in subpixels. This varies
1460 * due to the quantization, so we can't simply take
1461 * the area to be a constant: area = scx * scy. */
1462 area = ((16 - xuf) + 16 * (delx - 1) + xlf) *
1463 ((16 - yuf) + 16 * (dely - 1) + ylf);
1464
1465 /* Do area map summation */
1466 v00 = (16 - xuf) * (16 - yuf) * GET_DATA_BYTE(lines, xup);
1467 v10 = xlf * (16 - yuf) * GET_DATA_BYTE(lines, xlp);
1468 v01 = (16 - xuf) * ylf * GET_DATA_BYTE(lines + dely * wpls, xup);
1469 v11 = xlf * ylf * GET_DATA_BYTE(lines + dely * wpls, xlp);
1470 for (vin = 0, k = 1; k < dely; k++) { /* for full src pixels */
1471 for (m = 1; m < delx; m++) {
1472 vin += 256 * GET_DATA_BYTE(lines + k * wpls, xup + m);
1473 }
1474 }
1475 for (vmid = 0, k = 1; k < dely; k++) /* for left side */
1476 vmid += (16 - xuf) * 16 * GET_DATA_BYTE(lines + k * wpls, xup);
1477 for (k = 1; k < dely; k++) /* for right side */
1478 vmid += xlf * 16 * GET_DATA_BYTE(lines + k * wpls, xlp);
1479 for (m = 1; m < delx; m++) /* for top side */
1480 vmid += 16 * (16 - yuf) * GET_DATA_BYTE(lines, xup + m);
1481 for (m = 1; m < delx; m++) /* for bottom side */
1482 vmid += 16 * ylf * GET_DATA_BYTE(lines + dely * wpls, xup + m);
1483 val = (v00 + v01 + v10 + v11 + vin + vmid + 128) / area;
1484 #if DEBUG_OVERFLOW
1485 if (val > 255) fprintf(stderr, "val overflow: %d\n", val);
1486 #endif /* DEBUG_OVERFLOW */
1487 SET_DATA_BYTE(lined, j, val);
1488 }
1489 }
1490
1491 return;
1492 }
1493
1494
1495 /*------------------------------------------------------------------*
1496 * 2x area mapped downscaling *
1497 *------------------------------------------------------------------*/
1498 /*!
1499 * scaleAreaMapLow2()
1500 *
1501 * Note: This function is called with either 8 bpp gray or 32 bpp RGB.
1502 * The result is a 2x reduced dest.
1503 */
1504 void
scaleAreaMapLow2(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 d,l_int32 wpls)1505 scaleAreaMapLow2(l_uint32 *datad,
1506 l_int32 wd,
1507 l_int32 hd,
1508 l_int32 wpld,
1509 l_uint32 *datas,
1510 l_int32 d,
1511 l_int32 wpls)
1512 {
1513 l_int32 i, j, val, rval, gval, bval;
1514 l_uint32 *lines, *lined;
1515 l_uint32 pixel;
1516
1517 if (d == 8) {
1518 for (i = 0; i < hd; i++) {
1519 lines = datas + 2 * i * wpls;
1520 lined = datad + i * wpld;
1521 for (j = 0; j < wd; j++) {
1522 /* Average each dest pixel using 4 src pixels */
1523 val = GET_DATA_BYTE(lines, 2 * j);
1524 val += GET_DATA_BYTE(lines, 2 * j + 1);
1525 val += GET_DATA_BYTE(lines + wpls, 2 * j);
1526 val += GET_DATA_BYTE(lines + wpls, 2 * j + 1);
1527 val >>= 2;
1528 SET_DATA_BYTE(lined, j, val);
1529 }
1530 }
1531 }
1532 else { /* d == 32 */
1533 for (i = 0; i < hd; i++) {
1534 lines = datas + 2 * i * wpls;
1535 lined = datad + i * wpld;
1536 for (j = 0; j < wd; j++) {
1537 /* Average each of the color components from 4 src pixels */
1538 pixel = *(lines + 2 * j);
1539 rval = (pixel >> L_RED_SHIFT) & 0xff;
1540 gval = (pixel >> L_GREEN_SHIFT) & 0xff;
1541 bval = (pixel >> L_BLUE_SHIFT) & 0xff;
1542 pixel = *(lines + 2 * j + 1);
1543 rval += (pixel >> L_RED_SHIFT) & 0xff;
1544 gval += (pixel >> L_GREEN_SHIFT) & 0xff;
1545 bval += (pixel >> L_BLUE_SHIFT) & 0xff;
1546 pixel = *(lines + wpls + 2 * j);
1547 rval += (pixel >> L_RED_SHIFT) & 0xff;
1548 gval += (pixel >> L_GREEN_SHIFT) & 0xff;
1549 bval += (pixel >> L_BLUE_SHIFT) & 0xff;
1550 pixel = *(lines + wpls + 2 * j + 1);
1551 rval += (pixel >> L_RED_SHIFT) & 0xff;
1552 gval += (pixel >> L_GREEN_SHIFT) & 0xff;
1553 bval += (pixel >> L_BLUE_SHIFT) & 0xff;
1554 composeRGBPixel(rval >> 2, gval >> 2, bval >> 2, &pixel);
1555 *(lined + j) = pixel;
1556 }
1557 }
1558 }
1559 return;
1560 }
1561
1562
1563 /*------------------------------------------------------------------*
1564 * Binary scaling by closest pixel sampling *
1565 *------------------------------------------------------------------*/
1566 /*
1567 * scaleBinaryLow()
1568 *
1569 * Notes:
1570 * (1) The dest must be cleared prior to this operation,
1571 * and we clear it here in the low-level code.
1572 * (2) We reuse dest pixels and dest pixel rows whenever
1573 * possible for upscaling; downscaling is done by
1574 * strict subsampling.
1575 */
1576 l_int32
scaleBinaryLow(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 ws,l_int32 hs,l_int32 wpls)1577 scaleBinaryLow(l_uint32 *datad,
1578 l_int32 wd,
1579 l_int32 hd,
1580 l_int32 wpld,
1581 l_uint32 *datas,
1582 l_int32 ws,
1583 l_int32 hs,
1584 l_int32 wpls)
1585 {
1586 l_int32 i, j, bpld;
1587 l_int32 xs, prevxs, sval;
1588 l_int32 *srow, *scol;
1589 l_uint32 *lines, *prevlines, *lined, *prevlined;
1590 l_float32 wratio, hratio;
1591
1592 PROCNAME("scaleBinaryLow");
1593
1594 /* clear dest */
1595 bpld = 4 * wpld;
1596 memset((char *)datad, 0, hd * bpld);
1597
1598 /* The source row corresponding to dest row i ==> srow[i]
1599 * The source col corresponding to dest col j ==> scol[j] */
1600 if ((srow = (l_int32 *)CALLOC(hd, sizeof(l_int32))) == NULL)
1601 return ERROR_INT("srow not made", procName, 1);
1602 if ((scol = (l_int32 *)CALLOC(wd, sizeof(l_int32))) == NULL)
1603 return ERROR_INT("scol not made", procName, 1);
1604
1605 wratio = (l_float32)ws / (l_float32)wd;
1606 hratio = (l_float32)hs / (l_float32)hd;
1607 for (i = 0; i < hd; i++)
1608 srow[i] = L_MIN((l_int32)(hratio * i + 0.5), hs - 1);
1609 for (j = 0; j < wd; j++)
1610 scol[j] = L_MIN((l_int32)(wratio * j + 0.5), ws - 1);
1611
1612 prevlines = NULL;
1613 prevxs = -1;
1614 sval = 0;
1615 for (i = 0; i < hd; i++) {
1616 lines = datas + srow[i] * wpls;
1617 lined = datad + i * wpld;
1618 if (lines != prevlines) { /* make dest from new source row */
1619 for (j = 0; j < wd; j++) {
1620 xs = scol[j];
1621 if (xs != prevxs) { /* get dest pix from source col */
1622 if ((sval = GET_DATA_BIT(lines, xs)))
1623 SET_DATA_BIT(lined, j);
1624 prevxs = xs;
1625 }
1626 else { /* copy prev dest pix, if set */
1627 if (sval)
1628 SET_DATA_BIT(lined, j);
1629 }
1630 }
1631 }
1632 else { /* lines == prevlines; copy prev dest row */
1633 prevlined = lined - wpld;
1634 memcpy((char *)lined, (char *)prevlined, bpld);
1635 }
1636 prevlines = lines;
1637 }
1638
1639 FREE(srow);
1640 FREE(scol);
1641
1642 return 0;
1643 }
1644
1645
1646 /*------------------------------------------------------------------*
1647 * Scale-to-gray 2x *
1648 *------------------------------------------------------------------*/
1649 /*!
1650 * scaleToGray2Low()
1651 *
1652 * Input: usual image variables
1653 * sumtab (made from makeSumTabSG2())
1654 * valtab (made from makeValTabSG2())
1655 * Return: 0 if OK; 1 on error.
1656 *
1657 * The output is processed in sets of 4 output bytes on a row,
1658 * corresponding to 4 2x2 bit-blocks in the input image.
1659 * Two lookup tables are used. The first, sumtab, gets the
1660 * sum of ON pixels in 4 sets of two adjacent bits,
1661 * storing the result in 4 adjacent bytes. After sums from
1662 * two rows have been added, the second table, valtab,
1663 * converts from the sum of ON pixels in the 2x2 block to
1664 * an 8 bpp grayscale value between 0 (for 4 bits ON)
1665 * and 255 (for 0 bits ON).
1666 */
1667 void
scaleToGray2Low(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 wpls,l_uint32 * sumtab,l_uint8 * valtab)1668 scaleToGray2Low(l_uint32 *datad,
1669 l_int32 wd,
1670 l_int32 hd,
1671 l_int32 wpld,
1672 l_uint32 *datas,
1673 l_int32 wpls,
1674 l_uint32 *sumtab,
1675 l_uint8 *valtab)
1676 {
1677 l_int32 i, j, l, k, m, wd4, extra;
1678 l_uint32 sbyte1, sbyte2, sum;
1679 l_uint32 *lines, *lined;
1680
1681 /* i indexes the dest lines
1682 * l indexes the source lines
1683 * j indexes the dest bytes
1684 * k indexes the source bytes
1685 * We take two bytes from the source (in 2 lines of 8 pixels
1686 * each) and convert them into four 8 bpp bytes of the dest. */
1687 wd4 = wd & 0xfffffffc;
1688 extra = wd - wd4;
1689 for (i = 0, l = 0; i < hd; i++, l += 2) {
1690 lines = datas + l * wpls;
1691 lined = datad + i * wpld;
1692 for (j = 0, k = 0; j < wd4; j += 4, k++) {
1693 sbyte1 = GET_DATA_BYTE(lines, k);
1694 sbyte2 = GET_DATA_BYTE(lines + wpls, k);
1695 sum = sumtab[sbyte1] + sumtab[sbyte2];
1696 SET_DATA_BYTE(lined, j, valtab[sum >> 24]);
1697 SET_DATA_BYTE(lined, j + 1, valtab[(sum >> 16) & 0xff]);
1698 SET_DATA_BYTE(lined, j + 2, valtab[(sum >> 8) & 0xff]);
1699 SET_DATA_BYTE(lined, j + 3, valtab[sum & 0xff]);
1700 }
1701 if (extra > 0) {
1702 sbyte1 = GET_DATA_BYTE(lines, k);
1703 sbyte2 = GET_DATA_BYTE(lines + wpls, k);
1704 sum = sumtab[sbyte1] + sumtab[sbyte2];
1705 for (m = 0; m < extra; m++) {
1706 SET_DATA_BYTE(lined, j + m,
1707 valtab[((sum >> (24 - 8 * m)) & 0xff)]);
1708 }
1709 }
1710
1711 }
1712
1713 return;
1714 }
1715
1716
1717 /*!
1718 * makeSumTabSG2()
1719 *
1720 * Returns a table of 256 l_uint32s, giving the four output
1721 * 8-bit grayscale sums corresponding to 8 input bits of a binary
1722 * image, for a 2x scale-to-gray op. The sums from two
1723 * adjacent scanlines are then added and transformed to
1724 * output four 8 bpp pixel values, using makeValTabSG2().
1725 */
1726 l_uint32 *
makeSumTabSG2(void)1727 makeSumTabSG2(void)
1728 {
1729 l_int32 i;
1730 l_int32 sum[] = {0, 1, 1, 2};
1731 l_uint32 *tab;
1732
1733 PROCNAME("makeSumTabSG2");
1734
1735 if ((tab = (l_uint32 *)CALLOC(256, sizeof(l_uint32))) == NULL)
1736 return (l_uint32 *)ERROR_PTR("calloc fail for tab", procName, NULL);
1737
1738 /* Pack the four sums separately in four bytes */
1739 for (i = 0; i < 256; i++) {
1740 tab[i] = (sum[i & 0x3] | sum[(i >> 2) & 0x3] << 8 |
1741 sum[(i >> 4) & 0x3] << 16 | sum[(i >> 6) & 0x3] << 24);
1742 }
1743
1744 return tab;
1745 }
1746
1747
1748 /*!
1749 * makeValTabSG2()
1750 *
1751 * Returns an 8 bit value for the sum of ON pixels
1752 * in a 2x2 square, according to
1753 *
1754 * val = 255 - (255 * sum)/4
1755 *
1756 * where sum is in set {0,1,2,3,4}
1757 */
1758 l_uint8 *
makeValTabSG2(void)1759 makeValTabSG2(void)
1760 {
1761 l_int32 i;
1762 l_uint8 *tab;
1763
1764 PROCNAME("makeValTabSG2");
1765
1766 if ((tab = (l_uint8 *)CALLOC(5, sizeof(l_uint8))) == NULL)
1767 return (l_uint8 *)ERROR_PTR("calloc fail for tab", procName, NULL);
1768
1769 for (i = 0; i < 5; i++)
1770 tab[i] = 255 - (i * 255) / 4;
1771
1772 return tab;
1773 }
1774
1775
1776 /*------------------------------------------------------------------*
1777 * Scale-to-gray 3x *
1778 *------------------------------------------------------------------*/
1779 /*!
1780 * scaleToGray3Low()
1781 *
1782 * Input: usual image variables
1783 * sumtab (made from makeSumTabSG3())
1784 * valtab (made from makeValTabSG3())
1785 * Return: 0 if OK; 1 on error
1786 *
1787 * Each set of 8 3x3 bit-blocks in the source image, which
1788 * consist of 72 pixels arranged 24 pixels wide by 3 scanlines,
1789 * is converted to a row of 8 8-bit pixels in the dest image.
1790 * These 72 pixels of the input image are runs of 24 pixels
1791 * in three adjacent scanlines. Each run of 24 pixels is
1792 * stored in the 24 LSbits of a 32-bit word. We use 2 LUTs.
1793 * The first, sumtab, takes 6 of these bits and stores
1794 * sum, taken 3 bits at a time, in two bytes. (See
1795 * makeSumTabSG3). This is done for each of the 3 scanlines,
1796 * and the results are added. We now have the sum of ON pixels
1797 * in the first two 3x3 blocks in two bytes. The valtab LUT
1798 * then converts these values (which go from 0 to 9) to
1799 * grayscale values between between 255 and 0. (See makeValTabSG3).
1800 * This process is repeated for each of the other 3 sets of
1801 * 6x3 input pixels, giving 8 output pixels in total.
1802 *
1803 * Note: because the input image is processed in groups of
1804 * 24 x 3 pixels, the process clips the input height to
1805 * (h - h % 3) and the input width to (w - w % 24).
1806 */
1807 void
scaleToGray3Low(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 wpls,l_uint32 * sumtab,l_uint8 * valtab)1808 scaleToGray3Low(l_uint32 *datad,
1809 l_int32 wd,
1810 l_int32 hd,
1811 l_int32 wpld,
1812 l_uint32 *datas,
1813 l_int32 wpls,
1814 l_uint32 *sumtab,
1815 l_uint8 *valtab)
1816 {
1817 l_int32 i, j, l, k;
1818 l_uint32 threebytes1, threebytes2, threebytes3, sum;
1819 l_uint32 *lines, *lined;
1820
1821 /* i indexes the dest lines
1822 * l indexes the source lines
1823 * j indexes the dest bytes
1824 * k indexes the source bytes
1825 * We take 9 bytes from the source (72 binary pixels
1826 * in three lines of 24 pixels each) and convert it
1827 * into 8 bytes of the dest (8 8bpp pixels in one line) */
1828 for (i = 0, l = 0; i < hd; i++, l += 3) {
1829 lines = datas + l * wpls;
1830 lined = datad + i * wpld;
1831 for (j = 0, k = 0; j < wd; j += 8, k += 3) {
1832 threebytes1 = (GET_DATA_BYTE(lines, k) << 16) |
1833 (GET_DATA_BYTE(lines, k + 1) << 8) |
1834 GET_DATA_BYTE(lines, k + 2);
1835 threebytes2 = (GET_DATA_BYTE(lines + wpls, k) << 16) |
1836 (GET_DATA_BYTE(lines + wpls, k + 1) << 8) |
1837 GET_DATA_BYTE(lines + wpls, k + 2);
1838 threebytes3 = (GET_DATA_BYTE(lines + 2 * wpls, k) << 16) |
1839 (GET_DATA_BYTE(lines + 2 * wpls, k + 1) << 8) |
1840 GET_DATA_BYTE(lines + 2 * wpls, k + 2);
1841
1842 sum = sumtab[(threebytes1 >> 18)] +
1843 sumtab[(threebytes2 >> 18)] +
1844 sumtab[(threebytes3 >> 18)];
1845 SET_DATA_BYTE(lined, j, valtab[GET_DATA_BYTE(&sum, 2)]);
1846 SET_DATA_BYTE(lined, j + 1, valtab[GET_DATA_BYTE(&sum, 3)]);
1847
1848 sum = sumtab[((threebytes1 >> 12) & 0x3f)] +
1849 sumtab[((threebytes2 >> 12) & 0x3f)] +
1850 sumtab[((threebytes3 >> 12) & 0x3f)];
1851 SET_DATA_BYTE(lined, j + 2, valtab[GET_DATA_BYTE(&sum, 2)]);
1852 SET_DATA_BYTE(lined, j + 3, valtab[GET_DATA_BYTE(&sum, 3)]);
1853
1854 sum = sumtab[((threebytes1 >> 6) & 0x3f)] +
1855 sumtab[((threebytes2 >> 6) & 0x3f)] +
1856 sumtab[((threebytes3 >> 6) & 0x3f)];
1857 SET_DATA_BYTE(lined, j + 4, valtab[GET_DATA_BYTE(&sum, 2)]);
1858 SET_DATA_BYTE(lined, j + 5, valtab[GET_DATA_BYTE(&sum, 3)]);
1859
1860 sum = sumtab[(threebytes1 & 0x3f)] +
1861 sumtab[(threebytes2 & 0x3f)] +
1862 sumtab[(threebytes3 & 0x3f)];
1863 SET_DATA_BYTE(lined, j + 6, valtab[GET_DATA_BYTE(&sum, 2)]);
1864 SET_DATA_BYTE(lined, j + 7, valtab[GET_DATA_BYTE(&sum, 3)]);
1865 }
1866 }
1867
1868 return;
1869 }
1870
1871
1872
1873 /*!
1874 * makeSumTabSG3()
1875 *
1876 * Returns a table of 64 l_uint32s, giving the two output
1877 * 8-bit grayscale sums corresponding to 6 input bits of a binary
1878 * image, for a 3x scale-to-gray op. In practice, this would
1879 * be used three times (on adjacent scanlines), and the sums would
1880 * be added and then transformed to output 8 bpp pixel values,
1881 * using makeValTabSG3().
1882 */
1883 l_uint32 *
makeSumTabSG3(void)1884 makeSumTabSG3(void)
1885 {
1886 l_int32 i;
1887 l_int32 sum[] = {0, 1, 1, 2, 1, 2, 2, 3};
1888 l_uint32 *tab;
1889
1890 PROCNAME("makeSumTabSG3");
1891
1892 if ((tab = (l_uint32 *)CALLOC(64, sizeof(l_uint32))) == NULL)
1893 return (l_uint32 *)ERROR_PTR("calloc fail for tab", procName, NULL);
1894
1895 /* Pack the two sums separately in two bytes */
1896 for (i = 0; i < 64; i++) {
1897 tab[i] = (sum[i & 0x07]) | (sum[(i >> 3) & 0x07] << 8);
1898 }
1899
1900 return tab;
1901 }
1902
1903
1904 /*!
1905 * makeValTabSG3()
1906 *
1907 * Returns an 8 bit value for the sum of ON pixels
1908 * in a 3x3 square, according to
1909 * val = 255 - (255 * sum)/9
1910 * where sum is in set {0, ... ,9}
1911 */
1912 l_uint8 *
makeValTabSG3(void)1913 makeValTabSG3(void)
1914 {
1915 l_int32 i;
1916 l_uint8 *tab;
1917
1918 PROCNAME("makeValTabSG3");
1919
1920 if ((tab = (l_uint8 *)CALLOC(10, sizeof(l_uint8))) == NULL)
1921 return (l_uint8 *)ERROR_PTR("calloc fail for tab", procName, NULL);
1922
1923 for (i = 0; i < 10; i++)
1924 tab[i] = 0xff - (i * 255) / 9;
1925
1926 return tab;
1927 }
1928
1929
1930 /*------------------------------------------------------------------*
1931 * Scale-to-gray 4x *
1932 *------------------------------------------------------------------*/
1933 /*!
1934 * scaleToGray4Low()
1935 *
1936 * Input: usual image variables
1937 * sumtab (made from makeSumTabSG4())
1938 * valtab (made from makeValTabSG4())
1939 * Return: 0 if OK; 1 on error.
1940 *
1941 * The output is processed in sets of 2 output bytes on a row,
1942 * corresponding to 2 4x4 bit-blocks in the input image.
1943 * Two lookup tables are used. The first, sumtab, gets the
1944 * sum of ON pixels in two sets of four adjacent bits,
1945 * storing the result in 2 adjacent bytes. After sums from
1946 * four rows have been added, the second table, valtab,
1947 * converts from the sum of ON pixels in the 4x4 block to
1948 * an 8 bpp grayscale value between 0 (for 16 bits ON)
1949 * and 255 (for 0 bits ON).
1950 */
1951 void
scaleToGray4Low(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 wpls,l_uint32 * sumtab,l_uint8 * valtab)1952 scaleToGray4Low(l_uint32 *datad,
1953 l_int32 wd,
1954 l_int32 hd,
1955 l_int32 wpld,
1956 l_uint32 *datas,
1957 l_int32 wpls,
1958 l_uint32 *sumtab,
1959 l_uint8 *valtab)
1960 {
1961 l_int32 i, j, l, k;
1962 l_uint32 sbyte1, sbyte2, sbyte3, sbyte4, sum;
1963 l_uint32 *lines, *lined;
1964
1965 /* i indexes the dest lines
1966 * l indexes the source lines
1967 * j indexes the dest bytes
1968 * k indexes the source bytes
1969 * We take four bytes from the source (in 4 lines of 8 pixels
1970 * each) and convert it into two 8 bpp bytes of the dest. */
1971 for (i = 0, l = 0; i < hd; i++, l += 4) {
1972 lines = datas + l * wpls;
1973 lined = datad + i * wpld;
1974 for (j = 0, k = 0; j < wd; j += 2, k++) {
1975 sbyte1 = GET_DATA_BYTE(lines, k);
1976 sbyte2 = GET_DATA_BYTE(lines + wpls, k);
1977 sbyte3 = GET_DATA_BYTE(lines + 2 * wpls, k);
1978 sbyte4 = GET_DATA_BYTE(lines + 3 * wpls, k);
1979 sum = sumtab[sbyte1] + sumtab[sbyte2] +
1980 sumtab[sbyte3] + sumtab[sbyte4];
1981 SET_DATA_BYTE(lined, j, valtab[GET_DATA_BYTE(&sum, 2)]);
1982 SET_DATA_BYTE(lined, j + 1, valtab[GET_DATA_BYTE(&sum, 3)]);
1983 }
1984 }
1985
1986 return;
1987 }
1988
1989
1990 /*!
1991 * makeSumTabSG4()
1992 *
1993 * Returns a table of 256 l_uint32s, giving the two output
1994 * 8-bit grayscale sums corresponding to 8 input bits of a binary
1995 * image, for a 4x scale-to-gray op. The sums from four
1996 * adjacent scanlines are then added and transformed to
1997 * output 8 bpp pixel values, using makeValTabSG4().
1998 */
1999 l_uint32 *
makeSumTabSG4(void)2000 makeSumTabSG4(void)
2001 {
2002 l_int32 i;
2003 l_int32 sum[] = {0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4};
2004 l_uint32 *tab;
2005
2006 PROCNAME("makeSumTabSG4");
2007
2008 if ((tab = (l_uint32 *)CALLOC(256, sizeof(l_uint32))) == NULL)
2009 return (l_uint32 *)ERROR_PTR("calloc fail for tab", procName, NULL);
2010
2011 /* Pack the two sums separately in two bytes */
2012 for (i = 0; i < 256; i++) {
2013 tab[i] = (sum[i & 0xf]) | (sum[(i >> 4) & 0xf] << 8);
2014 }
2015
2016 return tab;
2017 }
2018
2019
2020 /*!
2021 * makeValTabSG4()
2022 *
2023 * Returns an 8 bit value for the sum of ON pixels
2024 * in a 4x4 square, according to
2025 *
2026 * val = 255 - (255 * sum)/16
2027 *
2028 * where sum is in set {0, ... ,16}
2029 */
2030 l_uint8 *
makeValTabSG4(void)2031 makeValTabSG4(void)
2032 {
2033 l_int32 i;
2034 l_uint8 *tab;
2035
2036 PROCNAME("makeValTabSG4");
2037
2038 if ((tab = (l_uint8 *)CALLOC(17, sizeof(l_uint8))) == NULL)
2039 return (l_uint8 *)ERROR_PTR("calloc fail for tab", procName, NULL);
2040
2041 for (i = 0; i < 17; i++)
2042 tab[i] = 0xff - (i * 255) / 16;
2043
2044 return tab;
2045 }
2046
2047
2048 /*------------------------------------------------------------------*
2049 * Scale-to-gray 6x *
2050 *------------------------------------------------------------------*/
2051 /*!
2052 * scaleToGray6Low()
2053 *
2054 * Input: usual image variables
2055 * tab8 (made from makePixelSumTab8())
2056 * valtab (made from makeValTabSG6())
2057 * Return: 0 if OK; 1 on error
2058 *
2059 * Each set of 4 6x6 bit-blocks in the source image, which
2060 * consist of 144 pixels arranged 24 pixels wide by 6 scanlines,
2061 * is converted to a row of 4 8-bit pixels in the dest image.
2062 * These 144 pixels of the input image are runs of 24 pixels
2063 * in six adjacent scanlines. Each run of 24 pixels is
2064 * stored in the 24 LSbits of a 32-bit word. We use 2 LUTs.
2065 * The first, tab8, takes 6 of these bits and stores
2066 * sum in one byte. This is done for each of the 6 scanlines,
2067 * and the results are added.
2068 * We now have the sum of ON pixels in the first 6x6 block. The
2069 * valtab LUT then converts these values (which go from 0 to 36) to
2070 * grayscale values between between 255 and 0. (See makeValTabSG6).
2071 * This process is repeated for each of the other 3 sets of
2072 * 6x6 input pixels, giving 4 output pixels in total.
2073 *
2074 * Note: because the input image is processed in groups of
2075 * 24 x 6 pixels, the process clips the input height to
2076 * (h - h % 6) and the input width to (w - w % 24).
2077 *
2078 */
2079 void
scaleToGray6Low(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 wpls,l_int32 * tab8,l_uint8 * valtab)2080 scaleToGray6Low(l_uint32 *datad,
2081 l_int32 wd,
2082 l_int32 hd,
2083 l_int32 wpld,
2084 l_uint32 *datas,
2085 l_int32 wpls,
2086 l_int32 *tab8,
2087 l_uint8 *valtab)
2088 {
2089 l_int32 i, j, l, k;
2090 l_uint32 threebytes1, threebytes2, threebytes3;
2091 l_uint32 threebytes4, threebytes5, threebytes6, sum;
2092 l_uint32 *lines, *lined;
2093
2094 /* i indexes the dest lines
2095 * l indexes the source lines
2096 * j indexes the dest bytes
2097 * k indexes the source bytes
2098 * We take 18 bytes from the source (144 binary pixels
2099 * in six lines of 24 pixels each) and convert it
2100 * into 4 bytes of the dest (four 8 bpp pixels in one line) */
2101 for (i = 0, l = 0; i < hd; i++, l += 6) {
2102 lines = datas + l * wpls;
2103 lined = datad + i * wpld;
2104 for (j = 0, k = 0; j < wd; j += 4, k += 3) {
2105 /* First grab the 18 bytes, 3 at a time, and put each set
2106 * of 3 bytes into the LS bytes of a 32-bit word. */
2107 threebytes1 = (GET_DATA_BYTE(lines, k) << 16) |
2108 (GET_DATA_BYTE(lines, k + 1) << 8) |
2109 GET_DATA_BYTE(lines, k + 2);
2110 threebytes2 = (GET_DATA_BYTE(lines + wpls, k) << 16) |
2111 (GET_DATA_BYTE(lines + wpls, k + 1) << 8) |
2112 GET_DATA_BYTE(lines + wpls, k + 2);
2113 threebytes3 = (GET_DATA_BYTE(lines + 2 * wpls, k) << 16) |
2114 (GET_DATA_BYTE(lines + 2 * wpls, k + 1) << 8) |
2115 GET_DATA_BYTE(lines + 2 * wpls, k + 2);
2116 threebytes4 = (GET_DATA_BYTE(lines + 3 * wpls, k) << 16) |
2117 (GET_DATA_BYTE(lines + 3 * wpls, k + 1) << 8) |
2118 GET_DATA_BYTE(lines + 3 * wpls, k + 2);
2119 threebytes5 = (GET_DATA_BYTE(lines + 4 * wpls, k) << 16) |
2120 (GET_DATA_BYTE(lines + 4 * wpls, k + 1) << 8) |
2121 GET_DATA_BYTE(lines + 4 * wpls, k + 2);
2122 threebytes6 = (GET_DATA_BYTE(lines + 5 * wpls, k) << 16) |
2123 (GET_DATA_BYTE(lines + 5 * wpls, k + 1) << 8) |
2124 GET_DATA_BYTE(lines + 5 * wpls, k + 2);
2125
2126 /* Sum first set of 36 bits and convert to 0-255 */
2127 sum = tab8[(threebytes1 >> 18)] +
2128 tab8[(threebytes2 >> 18)] +
2129 tab8[(threebytes3 >> 18)] +
2130 tab8[(threebytes4 >> 18)] +
2131 tab8[(threebytes5 >> 18)] +
2132 tab8[(threebytes6 >> 18)];
2133 SET_DATA_BYTE(lined, j, valtab[GET_DATA_BYTE(&sum, 3)]);
2134
2135 /* Ditto for second set */
2136 sum = tab8[((threebytes1 >> 12) & 0x3f)] +
2137 tab8[((threebytes2 >> 12) & 0x3f)] +
2138 tab8[((threebytes3 >> 12) & 0x3f)] +
2139 tab8[((threebytes4 >> 12) & 0x3f)] +
2140 tab8[((threebytes5 >> 12) & 0x3f)] +
2141 tab8[((threebytes6 >> 12) & 0x3f)];
2142 SET_DATA_BYTE(lined, j + 1, valtab[GET_DATA_BYTE(&sum, 3)]);
2143
2144 sum = tab8[((threebytes1 >> 6) & 0x3f)] +
2145 tab8[((threebytes2 >> 6) & 0x3f)] +
2146 tab8[((threebytes3 >> 6) & 0x3f)] +
2147 tab8[((threebytes4 >> 6) & 0x3f)] +
2148 tab8[((threebytes5 >> 6) & 0x3f)] +
2149 tab8[((threebytes6 >> 6) & 0x3f)];
2150 SET_DATA_BYTE(lined, j + 2, valtab[GET_DATA_BYTE(&sum, 3)]);
2151
2152 sum = tab8[(threebytes1 & 0x3f)] +
2153 tab8[(threebytes2 & 0x3f)] +
2154 tab8[(threebytes3 & 0x3f)] +
2155 tab8[(threebytes4 & 0x3f)] +
2156 tab8[(threebytes5 & 0x3f)] +
2157 tab8[(threebytes6 & 0x3f)];
2158 SET_DATA_BYTE(lined, j + 3, valtab[GET_DATA_BYTE(&sum, 3)]);
2159 }
2160 }
2161
2162 return;
2163 }
2164
2165
2166 /*!
2167 * makeValTabSG6()
2168 *
2169 * Returns an 8 bit value for the sum of ON pixels
2170 * in a 6x6 square, according to
2171 * val = 255 - (255 * sum)/36
2172 * where sum is in set {0, ... ,36}
2173 */
2174 l_uint8 *
makeValTabSG6(void)2175 makeValTabSG6(void)
2176 {
2177 l_int32 i;
2178 l_uint8 *tab;
2179
2180 PROCNAME("makeValTabSG6");
2181
2182 if ((tab = (l_uint8 *)CALLOC(37, sizeof(l_uint8))) == NULL)
2183 return (l_uint8 *)ERROR_PTR("calloc fail for tab", procName, NULL);
2184
2185 for (i = 0; i < 37; i++)
2186 tab[i] = 0xff - (i * 255) / 36;
2187
2188 return tab;
2189 }
2190
2191
2192 /*------------------------------------------------------------------*
2193 * Scale-to-gray 8x *
2194 *------------------------------------------------------------------*/
2195 /*!
2196 * scaleToGray8Low()
2197 *
2198 * Input: usual image variables
2199 * tab8 (made from makePixelSumTab8())
2200 * valtab (made from makeValTabSG8())
2201 * Return: 0 if OK; 1 on error.
2202 *
2203 * The output is processed one dest byte at a time,
2204 * corresponding to 8 rows of src bytes in the input image.
2205 * Two lookup tables are used. The first, tab8, gets the
2206 * sum of ON pixels in a byte. After sums from 8 rows have
2207 * been added, the second table, valtab, converts from this
2208 * value (which is between 0 and 64) to an 8 bpp grayscale
2209 * value between 0 (for all 64 bits ON) and 255 (for 0 bits ON).
2210 */
2211 void
scaleToGray8Low(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 wpls,l_int32 * tab8,l_uint8 * valtab)2212 scaleToGray8Low(l_uint32 *datad,
2213 l_int32 wd,
2214 l_int32 hd,
2215 l_int32 wpld,
2216 l_uint32 *datas,
2217 l_int32 wpls,
2218 l_int32 *tab8,
2219 l_uint8 *valtab)
2220 {
2221 l_int32 i, j, k;
2222 l_int32 sbyte0, sbyte1, sbyte2, sbyte3, sbyte4, sbyte5, sbyte6, sbyte7, sum;
2223 l_uint32 *lines, *lined;
2224
2225 /* i indexes the dest lines
2226 * k indexes the source lines
2227 * j indexes the src and dest bytes
2228 * We take 8 bytes from the source (in 8 lines of 8 pixels
2229 * each) and convert it into one 8 bpp byte of the dest. */
2230 for (i = 0, k = 0; i < hd; i++, k += 8) {
2231 lines = datas + k * wpls;
2232 lined = datad + i * wpld;
2233 for (j = 0; j < wd; j++) {
2234 sbyte0 = GET_DATA_BYTE(lines, j);
2235 sbyte1 = GET_DATA_BYTE(lines + wpls, j);
2236 sbyte2 = GET_DATA_BYTE(lines + 2 * wpls, j);
2237 sbyte3 = GET_DATA_BYTE(lines + 3 * wpls, j);
2238 sbyte4 = GET_DATA_BYTE(lines + 4 * wpls, j);
2239 sbyte5 = GET_DATA_BYTE(lines + 5 * wpls, j);
2240 sbyte6 = GET_DATA_BYTE(lines + 6 * wpls, j);
2241 sbyte7 = GET_DATA_BYTE(lines + 7 * wpls, j);
2242 sum = tab8[sbyte0] + tab8[sbyte1] +
2243 tab8[sbyte2] + tab8[sbyte3] +
2244 tab8[sbyte4] + tab8[sbyte5] +
2245 tab8[sbyte6] + tab8[sbyte7];
2246 SET_DATA_BYTE(lined, j, valtab[sum]);
2247 }
2248 }
2249
2250 return;
2251 }
2252
2253
2254 /*!
2255 * makeValTabSG8()
2256 *
2257 * Returns an 8 bit value for the sum of ON pixels
2258 * in an 8x8 square, according to
2259 * val = 255 - (255 * sum)/64
2260 * where sum is in set {0, ... ,64}
2261 */
2262 l_uint8 *
makeValTabSG8(void)2263 makeValTabSG8(void)
2264 {
2265 l_int32 i;
2266 l_uint8 *tab;
2267
2268 PROCNAME("makeValTabSG8");
2269
2270 if ((tab = (l_uint8 *)CALLOC(65, sizeof(l_uint8))) == NULL)
2271 return (l_uint8 *)ERROR_PTR("calloc fail for tab", procName, NULL);
2272
2273 for (i = 0; i < 65; i++)
2274 tab[i] = 0xff - (i * 255) / 64;
2275
2276 return tab;
2277 }
2278
2279
2280 /*------------------------------------------------------------------*
2281 * Scale-to-gray 16x *
2282 *------------------------------------------------------------------*/
2283 /*!
2284 * scaleToGray16Low()
2285 *
2286 * Input: usual image variables
2287 * tab8 (made from makePixelSumTab8())
2288 * Return: 0 if OK; 1 on error.
2289 *
2290 * The output is processed one dest byte at a time, corresponding
2291 * to 16 rows consisting each of 2 src bytes in the input image.
2292 * This uses one lookup table, tab8, which gives the sum of
2293 * ON pixels in a byte. After summing for all ON pixels in the
2294 * 32 src bytes, which is between 0 and 256, this is converted
2295 * to an 8 bpp grayscale value between 0 (for 255 or 256 bits ON)
2296 * and 255 (for 0 bits ON).
2297 */
2298 void
scaleToGray16Low(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas,l_int32 wpls,l_int32 * tab8)2299 scaleToGray16Low(l_uint32 *datad,
2300 l_int32 wd,
2301 l_int32 hd,
2302 l_int32 wpld,
2303 l_uint32 *datas,
2304 l_int32 wpls,
2305 l_int32 *tab8)
2306 {
2307 l_int32 i, j, k, m;
2308 l_int32 sum;
2309 l_uint32 *lines, *lined;
2310
2311 /* i indexes the dest lines
2312 * k indexes the source lines
2313 * j indexes the dest bytes
2314 * m indexes the src bytes
2315 * We take 32 bytes from the source (in 16 lines of 16 pixels
2316 * each) and convert it into one 8 bpp byte of the dest. */
2317 for (i = 0, k = 0; i < hd; i++, k += 16) {
2318 lines = datas + k * wpls;
2319 lined = datad + i * wpld;
2320 for (j = 0; j < wd; j++) {
2321 m = 2 * j;
2322 sum = tab8[GET_DATA_BYTE(lines, m)];
2323 sum += tab8[GET_DATA_BYTE(lines, m + 1)];
2324 sum += tab8[GET_DATA_BYTE(lines + wpls, m)];
2325 sum += tab8[GET_DATA_BYTE(lines + wpls, m + 1)];
2326 sum += tab8[GET_DATA_BYTE(lines + 2 * wpls, m)];
2327 sum += tab8[GET_DATA_BYTE(lines + 2 * wpls, m + 1)];
2328 sum += tab8[GET_DATA_BYTE(lines + 3 * wpls, m)];
2329 sum += tab8[GET_DATA_BYTE(lines + 3 * wpls, m + 1)];
2330 sum += tab8[GET_DATA_BYTE(lines + 4 * wpls, m)];
2331 sum += tab8[GET_DATA_BYTE(lines + 4 * wpls, m + 1)];
2332 sum += tab8[GET_DATA_BYTE(lines + 5 * wpls, m)];
2333 sum += tab8[GET_DATA_BYTE(lines + 5 * wpls, m + 1)];
2334 sum += tab8[GET_DATA_BYTE(lines + 6 * wpls, m)];
2335 sum += tab8[GET_DATA_BYTE(lines + 6 * wpls, m + 1)];
2336 sum += tab8[GET_DATA_BYTE(lines + 7 * wpls, m)];
2337 sum += tab8[GET_DATA_BYTE(lines + 7 * wpls, m + 1)];
2338 sum += tab8[GET_DATA_BYTE(lines + 8 * wpls, m)];
2339 sum += tab8[GET_DATA_BYTE(lines + 8 * wpls, m + 1)];
2340 sum += tab8[GET_DATA_BYTE(lines + 9 * wpls, m)];
2341 sum += tab8[GET_DATA_BYTE(lines + 9 * wpls, m + 1)];
2342 sum += tab8[GET_DATA_BYTE(lines + 10 * wpls, m)];
2343 sum += tab8[GET_DATA_BYTE(lines + 10 * wpls, m + 1)];
2344 sum += tab8[GET_DATA_BYTE(lines + 11 * wpls, m)];
2345 sum += tab8[GET_DATA_BYTE(lines + 11 * wpls, m + 1)];
2346 sum += tab8[GET_DATA_BYTE(lines + 12 * wpls, m)];
2347 sum += tab8[GET_DATA_BYTE(lines + 12 * wpls, m + 1)];
2348 sum += tab8[GET_DATA_BYTE(lines + 13 * wpls, m)];
2349 sum += tab8[GET_DATA_BYTE(lines + 13 * wpls, m + 1)];
2350 sum += tab8[GET_DATA_BYTE(lines + 14 * wpls, m)];
2351 sum += tab8[GET_DATA_BYTE(lines + 14 * wpls, m + 1)];
2352 sum += tab8[GET_DATA_BYTE(lines + 15 * wpls, m)];
2353 sum += tab8[GET_DATA_BYTE(lines + 15 * wpls, m + 1)];
2354 sum = L_MIN(sum, 255);
2355 SET_DATA_BYTE(lined, j, 255 - sum);
2356 }
2357 }
2358
2359 return;
2360 }
2361
2362
2363
2364 /*------------------------------------------------------------------*
2365 * Grayscale mipmap *
2366 *------------------------------------------------------------------*/
2367 /*!
2368 * scaleMipmapLow()
2369 *
2370 * See notes in scale.c for pixScaleToGrayMipmap(). This function
2371 * is here for pedagogical reasons. It gives poor results on document
2372 * images because of aliasing.
2373 */
2374 l_int32
scaleMipmapLow(l_uint32 * datad,l_int32 wd,l_int32 hd,l_int32 wpld,l_uint32 * datas1,l_int32 wpls1,l_uint32 * datas2,l_int32 wpls2,l_float32 red)2375 scaleMipmapLow(l_uint32 *datad,
2376 l_int32 wd,
2377 l_int32 hd,
2378 l_int32 wpld,
2379 l_uint32 *datas1,
2380 l_int32 wpls1,
2381 l_uint32 *datas2,
2382 l_int32 wpls2,
2383 l_float32 red)
2384 {
2385 l_int32 i, j, val1, val2, val, row2, col2;
2386 l_int32 *srow, *scol;
2387 l_uint32 *lines1, *lines2, *lined;
2388 l_float32 ratio, w1, w2;
2389
2390 PROCNAME("scaleMipmapLow");
2391
2392 /* Clear dest */
2393 memset((char *)datad, 0, 4 * wpld * hd);
2394
2395 /* Each dest pixel at (j,i) is computed by interpolating
2396 between the two src images at the corresponding location.
2397 We store the UL corner locations of the square of
2398 src pixels in thelower-resolution image that correspond
2399 to dest pixel (j,i). The are labelled by the arrays
2400 srow[i], scol[j]. The UL corner locations of the higher
2401 resolution src pixels are obtained from these arrays
2402 by multiplying by 2. */
2403 if ((srow = (l_int32 *)CALLOC(hd, sizeof(l_int32))) == NULL)
2404 return ERROR_INT("srow not made", procName, 1);
2405 if ((scol = (l_int32 *)CALLOC(wd, sizeof(l_int32))) == NULL)
2406 return ERROR_INT("scol not made", procName, 1);
2407 ratio = 1. / (2. * red); /* 0.5 for red = 1, 1 for red = 0.5 */
2408 for (i = 0; i < hd; i++)
2409 srow[i] = (l_int32)(ratio * i);
2410 for (j = 0; j < wd; j++)
2411 scol[j] = (l_int32)(ratio * j);
2412
2413 /* Get weights for linear interpolation: these are the
2414 * 'distances' of the dest image plane from the two
2415 * src image planes. */
2416 w1 = 2. * red - 1.; /* w1 --> 1 as red --> 1 */
2417 w2 = 1. - w1;
2418
2419 /* For each dest pixel, compute linear interpolation */
2420 for (i = 0; i < hd; i++) {
2421 row2 = srow[i];
2422 lines1 = datas1 + 2 * row2 * wpls1;
2423 lines2 = datas2 + row2 * wpls2;
2424 lined = datad + i * wpld;
2425 for (j = 0; j < wd; j++) {
2426 col2 = scol[j];
2427 val1 = GET_DATA_BYTE(lines1, 2 * col2);
2428 val2 = GET_DATA_BYTE(lines2, col2);
2429 val = (l_int32)(w1 * val1 + w2 * val2);
2430 SET_DATA_BYTE(lined, j, val);
2431 }
2432 }
2433
2434 FREE(srow);
2435 FREE(scol);
2436 return 0;
2437 }
2438