1 /* ------------------------------------------------------------------
2 * Copyright (C) 1998-2009 PacketVideo
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13 * express or implied.
14 * See the License for the specific language governing permissions
15 * and limitations under the License.
16 * -------------------------------------------------------------------
17 */
18 #include "oscl_base_macros.h" // for OSCL_UNUSED_ARG
19 #include "mp4def.h"
20 #include "mp4lib_int.h"
21 #include "mp4enc_lib.h"
22 #include "dct.h"
23 #include "m4venc_oscl.h"
24
25 /* ======================================================================== */
26 /* Function : CodeMB_H263( ) */
27 /* Date : 8/15/2001 */
28 /* Purpose : Perform residue calc (only zero MV), DCT, H263 Quant/Dequant,*/
29 /* IDCT and motion compensation.Modified from FastCodeMB() */
30 /* Input : */
31 /* video Video encoder data structure */
32 /* function Approximate DCT function, scaling and threshold */
33 /* ncoefblck Array for last nonzero coeff for speedup in VlcEncode */
34 /* QP Combined offset from the origin to the current */
35 /* macroblock and QP for current MB. */
36 /* Output : */
37 /* video->outputMB Quantized DCT coefficients. */
38 /* currVop->yChan,uChan,vChan Reconstructed pixels */
39 /* */
40 /* Return : PV_STATUS */
41 /* Modified : */
42 /* 2/26/01
43 -modified threshold based on correlation coeff 0.75 only for mode H.263
44 -ncoefblck[] as input, to keep position of last non-zero coeff*/
45 /* 8/10/01
46 -modified threshold based on correlation coeff 0.5
47 -used column threshold to speedup column DCT.
48 -used bitmap zigzag to speedup RunLevel(). */
49 /* ======================================================================== */
50
CodeMB_H263(VideoEncData * video,approxDCT * function,Int QP,Int ncoefblck[])51 PV_STATUS CodeMB_H263(VideoEncData *video, approxDCT *function, Int QP, Int ncoefblck[])
52 {
53 Int sad, k, CBP, mbnum = video->mbnum;
54 Short *output, *dataBlock;
55 UChar Mode = video->headerInfo.Mode[mbnum];
56 UChar *bitmapcol, *bitmaprow = video->bitmaprow;
57 UInt *bitmapzz ;
58 UChar shortHeader = video->vol[video->currLayer]->shortVideoHeader;
59 Int dc_scaler = 8;
60 Int intra = (Mode == MODE_INTRA || Mode == MODE_INTRA_Q);
61 struct QPstruct QuantParam;
62 Int dctMode, DctTh1;
63 Int ColTh;
64 Int(*BlockQuantDequantH263)(Short *, Short *, struct QPstruct *,
65 UChar[], UChar *, UInt *, Int, Int, Int, UChar);
66 Int(*BlockQuantDequantH263DC)(Short *, Short *, struct QPstruct *,
67 UChar *, UInt *, Int, UChar);
68 void (*BlockDCT1x1)(Short *, UChar *, UChar *, Int);
69 void (*BlockDCT2x2)(Short *, UChar *, UChar *, Int);
70 void (*BlockDCT4x4)(Short *, UChar *, UChar *, Int);
71 void (*BlockDCT8x8)(Short *, UChar *, UChar *, Int);
72
73 /* motion comp. related var. */
74 Vop *currVop = video->currVop;
75 VideoEncFrameIO *inputFrame = video->input;
76 Int ind_x = video->outputMB->mb_x;
77 Int ind_y = video->outputMB->mb_y;
78 Int lx = currVop->pitch;
79 Int width = currVop->width;
80 UChar *rec, *input, *pred;
81 Int offset = QP >> 5; /* QP is combined offset and QP */
82 Int offsetc = (offset >> 2) + (ind_x << 2); /* offset for chrom */
83 /*****************************/
84
85 OSCL_UNUSED_ARG(function);
86
87 output = video->outputMB->block[0];
88 CBP = 0;
89 QP = QP & 0x1F;
90 // M4VENC_MEMSET(output,0,(sizeof(Short)<<6)*6); /* reset quantized coeff. to zero , 7/24/01*/
91
92 QuantParam.QPx2 = QP << 1;
93 QuantParam.QP = QP;
94 QuantParam.QPdiv2 = QP >> 1;
95 QuantParam.QPx2plus = QuantParam.QPx2 + QuantParam.QPdiv2;
96 QuantParam.Addition = QP - 1 + (QP & 0x1);
97
98 if (intra)
99 {
100 BlockDCT1x1 = &Block1x1DCTIntra;
101 BlockDCT2x2 = &Block2x2DCT_AANIntra;
102 BlockDCT4x4 = &Block4x4DCT_AANIntra;
103 BlockDCT8x8 = &BlockDCT_AANIntra;
104 BlockQuantDequantH263 = &BlockQuantDequantH263Intra;
105 BlockQuantDequantH263DC = &BlockQuantDequantH263DCIntra;
106 if (shortHeader)
107 {
108 dc_scaler = 8;
109 }
110 else
111 {
112 dc_scaler = cal_dc_scalerENC(QP, 1); /* luminance blocks */
113 }
114 DctTh1 = (Int)(dc_scaler * 3);//*1.829
115 ColTh = ColThIntra[QP];
116 }
117 else
118 {
119 BlockDCT1x1 = &Block1x1DCTwSub;
120 BlockDCT2x2 = &Block2x2DCT_AANwSub;
121 BlockDCT4x4 = &Block4x4DCT_AANwSub;
122 BlockDCT8x8 = &BlockDCT_AANwSub;
123
124 BlockQuantDequantH263 = &BlockQuantDequantH263Inter;
125 BlockQuantDequantH263DC = &BlockQuantDequantH263DCInter;
126 ColTh = ColThInter[QP];
127 DctTh1 = (Int)(16 * QP); //9*QP;
128 }
129
130 rec = currVop->yChan + offset;
131 input = inputFrame->yChan + offset;
132 if (lx != width) input -= (ind_y << 9); /* non-padded offset */
133
134 dataBlock = video->dataBlock;
135 pred = video->predictedMB;
136
137 for (k = 0; k < 6; k++)
138 {
139 CBP <<= 1;
140 bitmapcol = video->bitmapcol[k];
141 bitmapzz = video->bitmapzz[k]; /* 7/30/01 */
142 if (k < 4)
143 {
144 sad = video->mot[mbnum][k+1].sad;
145 if (k&1)
146 {
147 rec += 8;
148 input += 8;
149 }
150 else if (k == 2)
151 {
152 dctMode = ((width << 3) - 8);
153 input += dctMode;
154 dctMode = ((lx << 3) - 8);
155 rec += dctMode;
156 }
157 }
158 else
159 {
160 if (k == 4)
161 {
162 rec = currVop->uChan + offsetc;
163 input = inputFrame->uChan + offsetc;
164 if (lx != width) input -= (ind_y << 7);
165 lx >>= 1;
166 width >>= 1;
167 if (intra)
168 {
169 sad = getBlockSum(input, width);
170 if (shortHeader)
171 dc_scaler = 8;
172 else
173 {
174 dc_scaler = cal_dc_scalerENC(QP, 2); /* chrominance blocks */
175 }
176 DctTh1 = (Int)(dc_scaler * 3);//*1.829
177 }
178 else
179 sad = Sad8x8(input, pred, width);
180 }
181 else
182 {
183 rec = currVop->vChan + offsetc;
184 input = inputFrame->vChan + offsetc;
185 if (lx != width) input -= (ind_y << 7);
186 if (intra)
187 {
188 sad = getBlockSum(input, width);
189 }
190 else
191 sad = Sad8x8(input, pred, width);
192 }
193 }
194
195 if (sad < DctTh1 && !(shortHeader && intra)) /* all-zero */
196 { /* For shortHeader intra block, DC value cannot be zero */
197 dctMode = 0;
198 CBP |= 0;
199 ncoefblck[k] = 0;
200 }
201 else if (sad < 18*QP/*(QP<<4)*/) /* DC-only */
202 {
203 dctMode = 1;
204 BlockDCT1x1(dataBlock, input, pred, width);
205
206 CBP |= (*BlockQuantDequantH263DC)(dataBlock, output, &QuantParam,
207 bitmaprow + k, bitmapzz, dc_scaler, shortHeader);
208 ncoefblck[k] = 1;
209 }
210 else
211 {
212
213 dataBlock[64] = ColTh;
214
215 if (sad < 22*QP/*(QP<<4)+(QP<<1)*/) /* 2x2 DCT */
216 {
217 dctMode = 2;
218 BlockDCT2x2(dataBlock, input, pred, width);
219 ncoefblck[k] = 6;
220 }
221 else if (sad < (QP << 5)) /* 4x4 DCT */
222 {
223 dctMode = 4;
224 BlockDCT4x4(dataBlock, input, pred, width);
225 ncoefblck[k] = 26;
226 }
227 else /* Full-DCT */
228 {
229 dctMode = 8;
230 BlockDCT8x8(dataBlock, input, pred, width);
231 ncoefblck[k] = 64;
232 }
233
234 CBP |= (*BlockQuantDequantH263)(dataBlock, output, &QuantParam,
235 bitmapcol, bitmaprow + k, bitmapzz, dctMode, k, dc_scaler, shortHeader);
236 }
237 BlockIDCTMotionComp(dataBlock, bitmapcol, bitmaprow[k], dctMode, rec, pred, (lx << 1) | intra);
238 output += 64;
239 if (!(k&1))
240 {
241 pred += 8;
242 }
243 else
244 {
245 pred += 120;
246 }
247 }
248
249 video->headerInfo.CBP[mbnum] = CBP; /* 5/18/2001 */
250 return PV_SUCCESS;
251 }
252
253 #ifndef NO_MPEG_QUANT
254 /* ======================================================================== */
255 /* Function : CodeMB_MPEG( ) */
256 /* Date : 8/15/2001 */
257 /* Purpose : Perform residue calc (only zero MV), DCT, MPEG Quant/Dequant,*/
258 /* IDCT and motion compensation.Modified from FastCodeMB() */
259 /* Input : */
260 /* video Video encoder data structure */
261 /* function Approximate DCT function, scaling and threshold */
262 /* ncoefblck Array for last nonzero coeff for speedup in VlcEncode */
263 /* QP Combined offset from the origin to the current */
264 /* macroblock and QP for current MB. */
265 /* Output : */
266 /* video->outputMB Quantized DCT coefficients. */
267 /* currVop->yChan,uChan,vChan Reconstructed pixels */
268 /* */
269 /* Return : PV_STATUS */
270 /* Modified : */
271 /* 2/26/01
272 -modified threshold based on correlation coeff 0.75 only for mode H.263
273 -ncoefblck[] as input, keep position of last non-zero coeff*/
274 /* 8/10/01
275 -modified threshold based on correlation coeff 0.5
276 -used column threshold to speedup column DCT.
277 -used bitmap zigzag to speedup RunLevel(). */
278 /* ======================================================================== */
279
CodeMB_MPEG(VideoEncData * video,approxDCT * function,Int QP,Int ncoefblck[])280 PV_STATUS CodeMB_MPEG(VideoEncData *video, approxDCT *function, Int QP, Int ncoefblck[])
281 {
282 Int sad, k, CBP, mbnum = video->mbnum;
283 Short *output, *dataBlock;
284 UChar Mode = video->headerInfo.Mode[mbnum];
285 UChar *bitmapcol, *bitmaprow = video->bitmaprow;
286 UInt *bitmapzz ;
287 Int dc_scaler = 8;
288 Vol *currVol = video->vol[video->currLayer];
289 Int intra = (Mode == MODE_INTRA || Mode == MODE_INTRA_Q);
290 Int *qmat;
291 Int dctMode, DctTh1, DctTh2, DctTh3, DctTh4;
292 Int ColTh;
293
294 Int(*BlockQuantDequantMPEG)(Short *, Short *, Int, Int *,
295 UChar [], UChar *, UInt *, Int, Int, Int);
296 Int(*BlockQuantDequantMPEGDC)(Short *, Short *, Int, Int *,
297 UChar [], UChar *, UInt *, Int);
298
299 void (*BlockDCT1x1)(Short *, UChar *, UChar *, Int);
300 void (*BlockDCT2x2)(Short *, UChar *, UChar *, Int);
301 void (*BlockDCT4x4)(Short *, UChar *, UChar *, Int);
302 void (*BlockDCT8x8)(Short *, UChar *, UChar *, Int);
303
304 /* motion comp. related var. */
305 Vop *currVop = video->currVop;
306 VideoEncFrameIO *inputFrame = video->input;
307 Int ind_x = video->outputMB->mb_x;
308 Int ind_y = video->outputMB->mb_y;
309 Int lx = currVop->pitch;
310 Int width = currVop->width;
311 UChar *rec, *input, *pred;
312 Int offset = QP >> 5;
313 Int offsetc = (offset >> 2) + (ind_x << 2); /* offset for chrom */
314 /*****************************/
315
316 OSCL_UNUSED_ARG(function);
317
318 output = video->outputMB->block[0];
319 CBP = 0;
320 QP = QP & 0x1F;
321 // M4VENC_MEMSET(output,0,(sizeof(Short)<<6)*6); /* reset quantized coeff. to zero , 7/24/01*/
322
323 if (intra)
324 {
325 BlockDCT1x1 = &Block1x1DCTIntra;
326 BlockDCT2x2 = &Block2x2DCT_AANIntra;
327 BlockDCT4x4 = &Block4x4DCT_AANIntra;
328 BlockDCT8x8 = &BlockDCT_AANIntra;
329
330 BlockQuantDequantMPEG = &BlockQuantDequantMPEGIntra;
331 BlockQuantDequantMPEGDC = &BlockQuantDequantMPEGDCIntra;
332 dc_scaler = cal_dc_scalerENC(QP, 1); /* luminance blocks */
333 qmat = currVol->iqmat;
334 DctTh1 = (Int)(3 * dc_scaler);//2*dc_scaler);
335 DctTh2 = (Int)((1.25 * QP - 1) * qmat[1] * 0.45);//0.567);//0.567);
336 DctTh3 = (Int)((1.25 * QP - 1) * qmat[2] * 0.55);//1.162); /* 8/2/2001 */
337 DctTh4 = (Int)((1.25 * QP - 1) * qmat[32] * 0.8);//1.7583);//0.7942);
338 ColTh = ColThIntra[QP];
339 }
340 else
341 {
342 BlockDCT1x1 = &Block1x1DCTwSub;
343 BlockDCT2x2 = &Block2x2DCT_AANwSub;
344 BlockDCT4x4 = &Block4x4DCT_AANwSub;
345 BlockDCT8x8 = &BlockDCT_AANwSub;
346
347 BlockQuantDequantMPEG = &BlockQuantDequantMPEGInter;
348 BlockQuantDequantMPEGDC = &BlockQuantDequantMPEGDCInter;
349 qmat = currVol->niqmat;
350 DctTh1 = (Int)(((QP << 1) - 0.5) * qmat[0] * 0.4);//0.2286);//0.3062);
351 DctTh2 = (Int)(((QP << 1) - 0.5) * qmat[1] * 0.45);//0.567);//0.4);
352 DctTh3 = (Int)(((QP << 1) - 0.5) * qmat[2] * 0.55);//1.162); /* 8/2/2001 */
353 DctTh4 = (Int)(((QP << 1) - 0.5) * qmat[32] * 0.8);//1.7583);//0.7942);
354 ColTh = ColThInter[QP];
355 }// get qmat, DctTh1, DctTh2, DctTh3
356
357 rec = currVop->yChan + offset;
358 input = inputFrame->yChan + offset;
359 if (lx != width) input -= (ind_y << 9); /* non-padded offset */
360
361 dataBlock = video->dataBlock;
362 pred = video->predictedMB;
363
364 for (k = 0; k < 6; k++)
365 {
366 CBP <<= 1;
367 bitmapcol = video->bitmapcol[k];
368 bitmapzz = video->bitmapzz[k]; /* 8/2/01 */
369 if (k < 4)
370 {//Y block
371 sad = video->mot[mbnum][k+1].sad;
372 if (k&1)
373 {
374 rec += 8;
375 input += 8;
376 }
377 else if (k == 2)
378 {
379 dctMode = ((width << 3) - 8);
380 input += dctMode;
381 dctMode = ((lx << 3) - 8);
382 rec += dctMode;
383 }
384 }
385 else
386 {// U, V block
387 if (k == 4)
388 {
389 rec = currVop->uChan + offsetc;
390 input = inputFrame->uChan + offsetc;
391 if (lx != width) input -= (ind_y << 7);
392 lx >>= 1;
393 width >>= 1;
394 if (intra)
395 {
396 dc_scaler = cal_dc_scalerENC(QP, 2); /* luminance blocks */
397 DctTh1 = dc_scaler * 3;
398 sad = getBlockSum(input, width);
399 }
400 else
401 sad = Sad8x8(input, pred, width);
402 }
403 else
404 {
405 rec = currVop->vChan + offsetc;
406 input = inputFrame->vChan + offsetc;
407 if (lx != width) input -= (ind_y << 7);
408 if (intra)
409 sad = getBlockSum(input, width);
410 else
411 sad = Sad8x8(input, pred, width);
412 }
413 }
414
415 if (sad < DctTh1) /* all-zero */
416 {
417 dctMode = 0;
418 CBP |= 0;
419 ncoefblck[k] = 0;
420 }
421 else if (sad < DctTh2) /* DC-only */
422 {
423 dctMode = 1;
424 BlockDCT1x1(dataBlock, input, pred, width);
425
426 CBP |= (*BlockQuantDequantMPEGDC)(dataBlock, output, QP, qmat,
427 bitmapcol, bitmaprow + k, bitmapzz, dc_scaler);
428 ncoefblck[k] = 1;
429 }
430 else
431 {
432 dataBlock[64] = ColTh;
433
434 if (sad < DctTh3) /* 2x2-DCT */
435 {
436 dctMode = 2;
437 BlockDCT2x2(dataBlock, input, pred, width);
438 ncoefblck[k] = 6;
439 }
440 else if (sad < DctTh4) /* 4x4 DCT */
441 {
442 dctMode = 4;
443 BlockDCT4x4(dataBlock, input, pred, width);
444 ncoefblck[k] = 26;
445 }
446 else /* full-DCT */
447 {
448 dctMode = 8;
449 BlockDCT8x8(dataBlock, input, pred, width);
450 ncoefblck[k] = 64;
451 }
452
453 CBP |= (*BlockQuantDequantMPEG)(dataBlock, output, QP, qmat,
454 bitmapcol, bitmaprow + k, bitmapzz, dctMode, k, dc_scaler); //
455 }
456 dctMode = 8; /* for mismatch handle */
457 BlockIDCTMotionComp(dataBlock, bitmapcol, bitmaprow[k], dctMode, rec, pred, (lx << 1) | (intra));
458
459 output += 64;
460 if (!(k&1))
461 {
462 pred += 8;
463 }
464 else
465 {
466 pred += 120;
467 }
468 }
469
470 video->headerInfo.CBP[mbnum] = CBP; /* 5/18/2001 */
471 return PV_SUCCESS;
472 }
473
474 #endif
475
476 /* ======================================================================== */
477 /* Function : getBlockSAV( ) */
478 /* Date : 8/10/2000 */
479 /* Purpose : Get SAV for one block */
480 /* In/out : block[64] contain one block data */
481 /* Return : */
482 /* Modified : */
483 /* ======================================================================== */
484 /* can be written in MMX or SSE, 2/22/2001 */
getBlockSAV(Short block[])485 Int getBlockSAV(Short block[])
486 {
487 Int i, val, sav = 0;
488
489 i = 8;
490 while (i--)
491 {
492 val = *block++;
493 if (val > 0) sav += val;
494 else sav -= val;
495 val = *block++;
496 if (val > 0) sav += val;
497 else sav -= val;
498 val = *block++;
499 if (val > 0) sav += val;
500 else sav -= val;
501 val = *block++;
502 if (val > 0) sav += val;
503 else sav -= val;
504 val = *block++;
505 if (val > 0) sav += val;
506 else sav -= val;
507 val = *block++;
508 if (val > 0) sav += val;
509 else sav -= val;
510 val = *block++;
511 if (val > 0) sav += val;
512 else sav -= val;
513 val = *block++;
514 if (val > 0) sav += val;
515 else sav -= val;
516 }
517
518 return sav;
519
520 }
521
522 /* ======================================================================== */
523 /* Function : Sad8x8( ) */
524 /* Date : 8/10/2000 */
525 /* Purpose : Find SAD between prev block and current block */
526 /* In/out : Previous and current frame block pointers, and frame width */
527 /* Return : */
528 /* Modified : */
529 /* 8/15/01, - do 4 pixel at a time assuming 32 bit register */
530 /* ======================================================================== */
Sad8x8(UChar * cur,UChar * prev,Int width)531 Int Sad8x8(UChar *cur, UChar *prev, Int width)
532 {
533 UChar *end = cur + (width << 3);
534 Int sad = 0;
535 Int *curInt = (Int*) cur;
536 Int *prevInt = (Int*) prev;
537 Int cur1, cur2, prev1, prev2;
538 UInt mask, sgn_msk = 0x80808080;
539 Int sum2 = 0, sum4 = 0;
540 Int tmp;
541 do
542 {
543 mask = ~(0xFF00);
544 cur1 = curInt[1]; /* load cur[4..7] */
545 cur2 = curInt[0];
546 curInt += (width >> 2); /* load cur[0..3] and +=lx */
547 prev1 = prevInt[1];
548 prev2 = prevInt[0];
549 prevInt += 4;
550
551 tmp = prev2 ^ cur2;
552 cur2 = prev2 - cur2;
553 tmp = tmp ^ cur2; /* (^)^(-) last bit is one if carry */
554 tmp = sgn_msk & ((UInt)tmp >> 1); /* check the sign of each byte */
555 if (cur2 < 0) tmp = tmp | 0x80000000; /* corcurt sign of first byte */
556 tmp = (tmp << 8) - tmp; /* carry borrowed bytes are marked with 0x1FE */
557 cur2 = cur2 + (tmp >> 7); /* negative bytes is added with 0xFF, -1 */
558 cur2 = cur2 ^(tmp >> 7); /* take absolute by inverting bits (EOR) */
559
560 tmp = prev1 ^ cur1;
561 cur1 = prev1 - cur1;
562 tmp = tmp ^ cur1; /* (^)^(-) last bit is one if carry */
563 tmp = sgn_msk & ((UInt)tmp >> 1); /* check the sign of each byte */
564 if (cur1 < 0) tmp = tmp | 0x80000000; /* corcurt sign of first byte */
565 tmp = (tmp << 8) - tmp; /* carry borrowed bytes are marked with 0x1FE */
566 cur1 = cur1 + (tmp >> 7); /* negative bytes is added with 0xFF, -1 */
567 cur1 = cur1 ^(tmp >> 7); /* take absolute by inverting bits (EOR) */
568
569 sum4 = sum4 + cur1;
570 cur1 = cur1 & (mask << 8); /* mask first and third bytes */
571 sum2 = sum2 + ((UInt)cur1 >> 8);
572 sum4 = sum4 + cur2;
573 cur2 = cur2 & (mask << 8); /* mask first and third bytes */
574 sum2 = sum2 + ((UInt)cur2 >> 8);
575 }
576 while ((UInt)curInt < (UInt)end);
577
578 cur1 = sum4 - (sum2 << 8); /* get even-sum */
579 cur1 = cur1 + sum2; /* add 16 bit even-sum and odd-sum*/
580 cur1 = cur1 + (cur1 << 16); /* add upper and lower 16 bit sum */
581 sad = ((UInt)cur1 >> 16); /* take upper 16 bit */
582 return sad;
583 }
584
585 /* ======================================================================== */
586 /* Function : getBlockSum( ) */
587 /* Date : 8/10/2000 */
588 /* Purpose : Find summation of value within a block. */
589 /* In/out : Pointer to current block in a frame and frame width */
590 /* Return : */
591 /* Modified : */
592 /* 8/15/01, - SIMD 4 pixels at a time */
593 /* ======================================================================== */
594
getBlockSum(UChar * cur,Int width)595 Int getBlockSum(UChar *cur, Int width)
596 {
597 Int sad = 0, sum4 = 0, sum2 = 0;
598 UChar *end = cur + (width << 3);
599 Int *curInt = (Int*)cur;
600 UInt mask = ~(0xFF00);
601 Int load1, load2;
602
603 do
604 {
605 load1 = curInt[1];
606 load2 = curInt[0];
607 curInt += (width >> 2);
608 sum4 += load1;
609 load1 = load1 & (mask << 8); /* even bytes */
610 sum2 += ((UInt)load1 >> 8); /* sum even bytes, 16 bit */
611 sum4 += load2;
612 load2 = load2 & (mask << 8); /* even bytes */
613 sum2 += ((UInt)load2 >> 8); /* sum even bytes, 16 bit */
614 }
615 while ((UInt)curInt < (UInt)end);
616 load1 = sum4 - (sum2 << 8); /* get even-sum */
617 load1 = load1 + sum2; /* add 16 bit even-sum and odd-sum*/
618 load1 = load1 + (load1 << 16); /* add upper and lower 16 bit sum */
619 sad = ((UInt)load1 >> 16); /* take upper 16 bit */
620
621 return sad;
622 }
623
624