• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* ------------------------------------------------------------------
2  * Copyright (C) 1998-2009 PacketVideo
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13  * express or implied.
14  * See the License for the specific language governing permissions
15  * and limitations under the License.
16  * -------------------------------------------------------------------
17  */
18 #include "oscl_base_macros.h" // for OSCL_UNUSED_ARG
19 #include "mp4def.h"
20 #include "mp4lib_int.h"
21 #include "mp4enc_lib.h"
22 #include "dct.h"
23 #include "m4venc_oscl.h"
24 
25 /* ======================================================================== */
26 /*  Function : CodeMB_H263( )                                               */
27 /*  Date     : 8/15/2001                                                    */
28 /*  Purpose  : Perform residue calc (only zero MV), DCT, H263 Quant/Dequant,*/
29 /*              IDCT and motion compensation.Modified from FastCodeMB()     */
30 /*  Input    :                                                              */
31 /*      video       Video encoder data structure                            */
32 /*      function    Approximate DCT function, scaling and threshold         */
33 /*      ncoefblck   Array for last nonzero coeff for speedup in VlcEncode   */
34 /*      QP      Combined offset from the origin to the current          */
35 /*                  macroblock  and QP  for current MB.                     */
36 /*    Output     :                                                          */
37 /*      video->outputMB     Quantized DCT coefficients.                     */
38 /*      currVop->yChan,uChan,vChan  Reconstructed pixels                    */
39 /*                                                                          */
40 /*  Return   :   PV_STATUS                                                  */
41 /*  Modified :                                                              */
42 /*           2/26/01
43             -modified threshold based on correlation coeff 0.75 only for mode H.263
44             -ncoefblck[] as input,  to keep position of last non-zero coeff*/
45 /*           8/10/01
46             -modified threshold based on correlation coeff 0.5
47             -used column threshold to speedup column DCT.
48             -used bitmap zigzag to speedup RunLevel().                      */
49 /* ======================================================================== */
50 
CodeMB_H263(VideoEncData * video,approxDCT * function,Int QP,Int ncoefblck[])51 PV_STATUS CodeMB_H263(VideoEncData *video, approxDCT *function, Int QP, Int ncoefblck[])
52 {
53     Int sad, k, CBP, mbnum = video->mbnum;
54     Short *output, *dataBlock;
55     UChar Mode = video->headerInfo.Mode[mbnum];
56     UChar *bitmapcol, *bitmaprow = video->bitmaprow;
57     UInt  *bitmapzz ;
58     UChar shortHeader = video->vol[video->currLayer]->shortVideoHeader;
59     Int dc_scaler = 8;
60     Int intra = (Mode == MODE_INTRA || Mode == MODE_INTRA_Q);
61     struct QPstruct QuantParam;
62     Int dctMode, DctTh1;
63     Int ColTh;
64     Int(*BlockQuantDequantH263)(Short *, Short *, struct QPstruct *,
65                                 UChar[], UChar *, UInt *, Int, Int, Int, UChar);
66     Int(*BlockQuantDequantH263DC)(Short *, Short *, struct QPstruct *,
67                                   UChar *, UInt *, Int, UChar);
68     void (*BlockDCT1x1)(Short *, UChar *, UChar *, Int);
69     void (*BlockDCT2x2)(Short *, UChar *, UChar *, Int);
70     void (*BlockDCT4x4)(Short *, UChar *, UChar *, Int);
71     void (*BlockDCT8x8)(Short *, UChar *, UChar *, Int);
72 
73     /* motion comp. related var. */
74     Vop *currVop = video->currVop;
75     VideoEncFrameIO *inputFrame = video->input;
76     Int ind_x = video->outputMB->mb_x;
77     Int ind_y = video->outputMB->mb_y;
78     Int lx = currVop->pitch;
79     Int width = currVop->width;
80     UChar *rec, *input, *pred;
81     Int offset = QP >> 5;  /* QP is combined offset and QP */
82     Int offsetc = (offset >> 2) + (ind_x << 2); /* offset for chrom */
83     /*****************************/
84 
85     OSCL_UNUSED_ARG(function);
86 
87     output = video->outputMB->block[0];
88     CBP = 0;
89     QP = QP & 0x1F;
90 //  M4VENC_MEMSET(output,0,(sizeof(Short)<<6)*6); /* reset quantized coeff. to zero , 7/24/01*/
91 
92     QuantParam.QPx2 = QP << 1;
93     QuantParam.QP = QP;
94     QuantParam.QPdiv2 = QP >> 1;
95     QuantParam.QPx2plus = QuantParam.QPx2 + QuantParam.QPdiv2;
96     QuantParam.Addition = QP - 1 + (QP & 0x1);
97 
98     if (intra)
99     {
100         BlockDCT1x1 = &Block1x1DCTIntra;
101         BlockDCT2x2 = &Block2x2DCT_AANIntra;
102         BlockDCT4x4 = &Block4x4DCT_AANIntra;
103         BlockDCT8x8 = &BlockDCT_AANIntra;
104         BlockQuantDequantH263 = &BlockQuantDequantH263Intra;
105         BlockQuantDequantH263DC = &BlockQuantDequantH263DCIntra;
106         if (shortHeader)
107         {
108             dc_scaler = 8;
109         }
110         else
111         {
112             dc_scaler = cal_dc_scalerENC(QP, 1); /* luminance blocks */
113         }
114         DctTh1 = (Int)(dc_scaler * 3);//*1.829
115         ColTh = ColThIntra[QP];
116     }
117     else
118     {
119         BlockDCT1x1 = &Block1x1DCTwSub;
120         BlockDCT2x2 = &Block2x2DCT_AANwSub;
121         BlockDCT4x4 = &Block4x4DCT_AANwSub;
122         BlockDCT8x8 = &BlockDCT_AANwSub;
123 
124         BlockQuantDequantH263 = &BlockQuantDequantH263Inter;
125         BlockQuantDequantH263DC = &BlockQuantDequantH263DCInter;
126         ColTh = ColThInter[QP];
127         DctTh1 = (Int)(16 * QP);  //9*QP;
128     }
129 
130     rec = currVop->yChan + offset;
131     input = inputFrame->yChan + offset;
132     if (lx != width) input -= (ind_y << 9);  /* non-padded offset */
133 
134     dataBlock = video->dataBlock;
135     pred = video->predictedMB;
136 
137     for (k = 0; k < 6; k++)
138     {
139         CBP <<= 1;
140         bitmapcol = video->bitmapcol[k];
141         bitmapzz = video->bitmapzz[k];  /*  7/30/01 */
142         if (k < 4)
143         {
144             sad = video->mot[mbnum][k+1].sad;
145             if (k&1)
146             {
147                 rec += 8;
148                 input += 8;
149             }
150             else if (k == 2)
151             {
152                 dctMode = ((width << 3) - 8);
153                 input += dctMode;
154                 dctMode = ((lx << 3) - 8);
155                 rec += dctMode;
156             }
157         }
158         else
159         {
160             if (k == 4)
161             {
162                 rec = currVop->uChan + offsetc;
163                 input = inputFrame->uChan + offsetc;
164                 if (lx != width) input -= (ind_y << 7);
165                 lx >>= 1;
166                 width >>= 1;
167                 if (intra)
168                 {
169                     sad = getBlockSum(input, width);
170                     if (shortHeader)
171                         dc_scaler = 8;
172                     else
173                     {
174                         dc_scaler = cal_dc_scalerENC(QP, 2); /* chrominance blocks */
175                     }
176                     DctTh1 = (Int)(dc_scaler * 3);//*1.829
177                 }
178                 else
179                     sad = Sad8x8(input, pred, width);
180             }
181             else
182             {
183                 rec = currVop->vChan + offsetc;
184                 input = inputFrame->vChan + offsetc;
185                 if (lx != width) input -= (ind_y << 7);
186                 if (intra)
187                 {
188                     sad = getBlockSum(input, width);
189                 }
190                 else
191                     sad = Sad8x8(input, pred, width);
192             }
193         }
194 
195         if (sad < DctTh1 && !(shortHeader && intra)) /* all-zero */
196         {                       /* For shortHeader intra block, DC value cannot be zero */
197             dctMode = 0;
198             CBP |= 0;
199             ncoefblck[k] = 0;
200         }
201         else if (sad < 18*QP/*(QP<<4)*/) /* DC-only */
202         {
203             dctMode = 1;
204             BlockDCT1x1(dataBlock, input, pred, width);
205 
206             CBP |= (*BlockQuantDequantH263DC)(dataBlock, output, &QuantParam,
207                                               bitmaprow + k, bitmapzz, dc_scaler, shortHeader);
208             ncoefblck[k] = 1;
209         }
210         else
211         {
212 
213             dataBlock[64] = ColTh;
214 
215             if (sad < 22*QP/*(QP<<4)+(QP<<1)*/)  /* 2x2 DCT */
216             {
217                 dctMode = 2;
218                 BlockDCT2x2(dataBlock, input, pred, width);
219                 ncoefblck[k] = 6;
220             }
221             else if (sad < (QP << 5)) /* 4x4 DCT */
222             {
223                 dctMode = 4;
224                 BlockDCT4x4(dataBlock, input, pred, width);
225                 ncoefblck[k] = 26;
226             }
227             else /* Full-DCT */
228             {
229                 dctMode = 8;
230                 BlockDCT8x8(dataBlock, input, pred, width);
231                 ncoefblck[k] = 64;
232             }
233 
234             CBP |= (*BlockQuantDequantH263)(dataBlock, output, &QuantParam,
235                                             bitmapcol, bitmaprow + k, bitmapzz, dctMode, k, dc_scaler, shortHeader);
236         }
237         BlockIDCTMotionComp(dataBlock, bitmapcol, bitmaprow[k], dctMode, rec, pred, (lx << 1) | intra);
238         output += 64;
239         if (!(k&1))
240         {
241             pred += 8;
242         }
243         else
244         {
245             pred += 120;
246         }
247     }
248 
249     video->headerInfo.CBP[mbnum] = CBP; /*  5/18/2001 */
250     return PV_SUCCESS;
251 }
252 
253 #ifndef NO_MPEG_QUANT
254 /* ======================================================================== */
255 /*  Function : CodeMB_MPEG( )                                               */
256 /*  Date     : 8/15/2001                                                    */
257 /*  Purpose  : Perform residue calc (only zero MV), DCT, MPEG Quant/Dequant,*/
258 /*              IDCT and motion compensation.Modified from FastCodeMB()     */
259 /*  Input    :                                                              */
260 /*      video       Video encoder data structure                            */
261 /*      function    Approximate DCT function, scaling and threshold         */
262 /*      ncoefblck   Array for last nonzero coeff for speedup in VlcEncode   */
263 /*      QP      Combined offset from the origin to the current          */
264 /*                  macroblock  and QP  for current MB.                     */
265 /*    Output     :                                                          */
266 /*      video->outputMB     Quantized DCT coefficients.                     */
267 /*      currVop->yChan,uChan,vChan  Reconstructed pixels                    */
268 /*                                                                          */
269 /*  Return   :   PV_STATUS                                                  */
270 /*  Modified :                                                              */
271 /*           2/26/01
272             -modified threshold based on correlation coeff 0.75 only for mode H.263
273             -ncoefblck[] as input, keep position of last non-zero coeff*/
274 /*           8/10/01
275             -modified threshold based on correlation coeff 0.5
276             -used column threshold to speedup column DCT.
277             -used bitmap zigzag to speedup RunLevel().                      */
278 /* ======================================================================== */
279 
CodeMB_MPEG(VideoEncData * video,approxDCT * function,Int QP,Int ncoefblck[])280 PV_STATUS CodeMB_MPEG(VideoEncData *video, approxDCT *function, Int QP, Int ncoefblck[])
281 {
282     Int sad, k, CBP, mbnum = video->mbnum;
283     Short *output, *dataBlock;
284     UChar Mode = video->headerInfo.Mode[mbnum];
285     UChar *bitmapcol, *bitmaprow = video->bitmaprow;
286     UInt  *bitmapzz ;
287     Int dc_scaler = 8;
288     Vol *currVol = video->vol[video->currLayer];
289     Int intra = (Mode == MODE_INTRA || Mode == MODE_INTRA_Q);
290     Int *qmat;
291     Int dctMode, DctTh1, DctTh2, DctTh3, DctTh4;
292     Int ColTh;
293 
294     Int(*BlockQuantDequantMPEG)(Short *, Short *, Int, Int *,
295                                 UChar [], UChar *, UInt *, Int,  Int, Int);
296     Int(*BlockQuantDequantMPEGDC)(Short *, Short *, Int, Int *,
297                                   UChar [], UChar *, UInt *, Int);
298 
299     void (*BlockDCT1x1)(Short *, UChar *, UChar *, Int);
300     void (*BlockDCT2x2)(Short *, UChar *, UChar *, Int);
301     void (*BlockDCT4x4)(Short *, UChar *, UChar *, Int);
302     void (*BlockDCT8x8)(Short *, UChar *, UChar *, Int);
303 
304     /* motion comp. related var. */
305     Vop *currVop = video->currVop;
306     VideoEncFrameIO *inputFrame = video->input;
307     Int ind_x = video->outputMB->mb_x;
308     Int ind_y = video->outputMB->mb_y;
309     Int lx = currVop->pitch;
310     Int width = currVop->width;
311     UChar *rec, *input, *pred;
312     Int offset = QP >> 5;
313     Int offsetc = (offset >> 2) + (ind_x << 2); /* offset for chrom */
314     /*****************************/
315 
316     OSCL_UNUSED_ARG(function);
317 
318     output = video->outputMB->block[0];
319     CBP = 0;
320     QP = QP & 0x1F;
321 //  M4VENC_MEMSET(output,0,(sizeof(Short)<<6)*6); /* reset quantized coeff. to zero ,  7/24/01*/
322 
323     if (intra)
324     {
325         BlockDCT1x1 = &Block1x1DCTIntra;
326         BlockDCT2x2 = &Block2x2DCT_AANIntra;
327         BlockDCT4x4 = &Block4x4DCT_AANIntra;
328         BlockDCT8x8 = &BlockDCT_AANIntra;
329 
330         BlockQuantDequantMPEG = &BlockQuantDequantMPEGIntra;
331         BlockQuantDequantMPEGDC = &BlockQuantDequantMPEGDCIntra;
332         dc_scaler = cal_dc_scalerENC(QP, 1); /* luminance blocks */
333         qmat = currVol->iqmat;
334         DctTh1 = (Int)(3 * dc_scaler);//2*dc_scaler);
335         DctTh2 = (Int)((1.25 * QP - 1) * qmat[1] * 0.45);//0.567);//0.567);
336         DctTh3 = (Int)((1.25 * QP - 1) * qmat[2] * 0.55);//1.162); /*  8/2/2001 */
337         DctTh4 = (Int)((1.25 * QP - 1) * qmat[32] * 0.8);//1.7583);//0.7942);
338         ColTh = ColThIntra[QP];
339     }
340     else
341     {
342         BlockDCT1x1 = &Block1x1DCTwSub;
343         BlockDCT2x2 = &Block2x2DCT_AANwSub;
344         BlockDCT4x4 = &Block4x4DCT_AANwSub;
345         BlockDCT8x8 = &BlockDCT_AANwSub;
346 
347         BlockQuantDequantMPEG = &BlockQuantDequantMPEGInter;
348         BlockQuantDequantMPEGDC = &BlockQuantDequantMPEGDCInter;
349         qmat = currVol->niqmat;
350         DctTh1 = (Int)(((QP << 1) - 0.5) * qmat[0] * 0.4);//0.2286);//0.3062);
351         DctTh2 = (Int)(((QP << 1) - 0.5) * qmat[1] * 0.45);//0.567);//0.4);
352         DctTh3 = (Int)(((QP << 1) - 0.5) * qmat[2] * 0.55);//1.162); /*  8/2/2001 */
353         DctTh4 = (Int)(((QP << 1) - 0.5) * qmat[32] * 0.8);//1.7583);//0.7942);
354         ColTh = ColThInter[QP];
355     }// get qmat, DctTh1, DctTh2, DctTh3
356 
357     rec = currVop->yChan + offset;
358     input = inputFrame->yChan + offset;
359     if (lx != width) input -= (ind_y << 9);  /* non-padded offset */
360 
361     dataBlock = video->dataBlock;
362     pred = video->predictedMB;
363 
364     for (k = 0; k < 6; k++)
365     {
366         CBP <<= 1;
367         bitmapcol = video->bitmapcol[k];
368         bitmapzz = video->bitmapzz[k];  /*  8/2/01 */
369         if (k < 4)
370         {//Y block
371             sad = video->mot[mbnum][k+1].sad;
372             if (k&1)
373             {
374                 rec += 8;
375                 input += 8;
376             }
377             else if (k == 2)
378             {
379                 dctMode = ((width << 3) - 8);
380                 input += dctMode;
381                 dctMode = ((lx << 3) - 8);
382                 rec += dctMode;
383             }
384         }
385         else
386         {// U, V block
387             if (k == 4)
388             {
389                 rec = currVop->uChan + offsetc;
390                 input = inputFrame->uChan + offsetc;
391                 if (lx != width) input -= (ind_y << 7);
392                 lx >>= 1;
393                 width >>= 1;
394                 if (intra)
395                 {
396                     dc_scaler = cal_dc_scalerENC(QP, 2); /* luminance blocks */
397                     DctTh1 = dc_scaler * 3;
398                     sad = getBlockSum(input, width);
399                 }
400                 else
401                     sad = Sad8x8(input, pred, width);
402             }
403             else
404             {
405                 rec = currVop->vChan + offsetc;
406                 input = inputFrame->vChan + offsetc;
407                 if (lx != width) input -= (ind_y << 7);
408                 if (intra)
409                     sad = getBlockSum(input, width);
410                 else
411                     sad = Sad8x8(input, pred, width);
412             }
413         }
414 
415         if (sad < DctTh1) /* all-zero */
416         {
417             dctMode = 0;
418             CBP |= 0;
419             ncoefblck[k] = 0;
420         }
421         else if (sad < DctTh2) /* DC-only */
422         {
423             dctMode = 1;
424             BlockDCT1x1(dataBlock, input, pred, width);
425 
426             CBP |= (*BlockQuantDequantMPEGDC)(dataBlock, output, QP, qmat,
427                                               bitmapcol, bitmaprow + k, bitmapzz, dc_scaler);
428             ncoefblck[k] = 1;
429         }
430         else
431         {
432             dataBlock[64] = ColTh;
433 
434             if (sad < DctTh3) /* 2x2-DCT */
435             {
436                 dctMode = 2;
437                 BlockDCT2x2(dataBlock, input, pred, width);
438                 ncoefblck[k] = 6;
439             }
440             else if (sad < DctTh4) /* 4x4 DCT */
441             {
442                 dctMode = 4;
443                 BlockDCT4x4(dataBlock, input, pred, width);
444                 ncoefblck[k] = 26;
445             }
446             else /* full-DCT */
447             {
448                 dctMode = 8;
449                 BlockDCT8x8(dataBlock, input, pred, width);
450                 ncoefblck[k] = 64;
451             }
452 
453             CBP |= (*BlockQuantDequantMPEG)(dataBlock, output, QP, qmat,
454                                             bitmapcol, bitmaprow + k, bitmapzz, dctMode, k, dc_scaler); //
455         }
456         dctMode = 8; /* for mismatch handle */
457         BlockIDCTMotionComp(dataBlock, bitmapcol, bitmaprow[k], dctMode, rec, pred, (lx << 1) | (intra));
458 
459         output += 64;
460         if (!(k&1))
461         {
462             pred += 8;
463         }
464         else
465         {
466             pred += 120;
467         }
468     }
469 
470     video->headerInfo.CBP[mbnum] = CBP; /*  5/18/2001 */
471     return PV_SUCCESS;
472 }
473 
474 #endif
475 
476 /* ======================================================================== */
477 /*  Function : getBlockSAV( )                                               */
478 /*  Date     : 8/10/2000                                                    */
479 /*  Purpose  : Get SAV for one block                                        */
480 /*  In/out   : block[64] contain one block data                             */
481 /*  Return   :                                                              */
482 /*  Modified :                                                              */
483 /* ======================================================================== */
484 /* can be written in MMX or SSE,  2/22/2001 */
getBlockSAV(Short block[])485 Int getBlockSAV(Short block[])
486 {
487     Int i, val, sav = 0;
488 
489     i = 8;
490     while (i--)
491     {
492         val = *block++;
493         if (val > 0)    sav += val;
494         else        sav -= val;
495         val = *block++;
496         if (val > 0)    sav += val;
497         else        sav -= val;
498         val = *block++;
499         if (val > 0)    sav += val;
500         else        sav -= val;
501         val = *block++;
502         if (val > 0)    sav += val;
503         else        sav -= val;
504         val = *block++;
505         if (val > 0)    sav += val;
506         else        sav -= val;
507         val = *block++;
508         if (val > 0)    sav += val;
509         else        sav -= val;
510         val = *block++;
511         if (val > 0)    sav += val;
512         else        sav -= val;
513         val = *block++;
514         if (val > 0)    sav += val;
515         else        sav -= val;
516     }
517 
518     return sav;
519 
520 }
521 
522 /* ======================================================================== */
523 /*  Function : Sad8x8( )                                                    */
524 /*  Date     : 8/10/2000                                                    */
525 /*  Purpose  : Find SAD between prev block and current block                */
526 /*  In/out   : Previous and current frame block pointers, and frame width   */
527 /*  Return   :                                                              */
528 /*  Modified :                                                              */
529 /*      8/15/01,  - do 4 pixel at a time    assuming 32 bit register        */
530 /* ======================================================================== */
Sad8x8(UChar * cur,UChar * prev,Int width)531 Int Sad8x8(UChar *cur, UChar *prev, Int width)
532 {
533     UChar *end = cur + (width << 3);
534     Int sad = 0;
535     Int *curInt = (Int*) cur;
536     Int *prevInt = (Int*) prev;
537     Int cur1, cur2, prev1, prev2;
538     UInt mask, sgn_msk = 0x80808080;
539     Int  sum2 = 0, sum4 = 0;
540     Int  tmp;
541     do
542     {
543         mask    = ~(0xFF00);
544         cur1    = curInt[1];        /* load cur[4..7] */
545         cur2    = curInt[0];
546         curInt += (width >> 2);     /* load cur[0..3] and +=lx */
547         prev1   = prevInt[1];
548         prev2   = prevInt[0];
549         prevInt += 4;
550 
551         tmp     = prev2 ^ cur2;
552         cur2    = prev2 - cur2;
553         tmp     = tmp ^ cur2;       /* (^)^(-) last bit is one if carry */
554         tmp     = sgn_msk & ((UInt)tmp >> 1); /* check the sign of each byte */
555         if (cur2 < 0)   tmp = tmp | 0x80000000; /* corcurt sign of first byte */
556         tmp     = (tmp << 8) - tmp;     /* carry borrowed bytes are marked with 0x1FE */
557         cur2    = cur2 + (tmp >> 7);     /* negative bytes is added with 0xFF, -1 */
558         cur2    = cur2 ^(tmp >> 7); /* take absolute by inverting bits (EOR) */
559 
560         tmp     = prev1 ^ cur1;
561         cur1    = prev1 - cur1;
562         tmp     = tmp ^ cur1;       /* (^)^(-) last bit is one if carry */
563         tmp     = sgn_msk & ((UInt)tmp >> 1); /* check the sign of each byte */
564         if (cur1 < 0)   tmp = tmp | 0x80000000; /* corcurt sign of first byte */
565         tmp     = (tmp << 8) - tmp;     /* carry borrowed bytes are marked with 0x1FE */
566         cur1    = cur1 + (tmp >> 7);     /* negative bytes is added with 0xFF, -1 */
567         cur1    = cur1 ^(tmp >> 7); /* take absolute by inverting bits (EOR) */
568 
569         sum4    = sum4 + cur1;
570         cur1    = cur1 & (mask << 8);   /* mask first and third bytes */
571         sum2    = sum2 + ((UInt)cur1 >> 8);
572         sum4    = sum4 + cur2;
573         cur2    = cur2 & (mask << 8);   /* mask first and third bytes */
574         sum2    = sum2 + ((UInt)cur2 >> 8);
575     }
576     while ((UInt)curInt < (UInt)end);
577 
578     cur1 = sum4 - (sum2 << 8);  /* get even-sum */
579     cur1 = cur1 + sum2;         /* add 16 bit even-sum and odd-sum*/
580     cur1 = cur1 + (cur1 << 16); /* add upper and lower 16 bit sum */
581     sad  = ((UInt)cur1 >> 16);  /* take upper 16 bit */
582     return sad;
583 }
584 
585 /* ======================================================================== */
586 /*  Function : getBlockSum( )                                               */
587 /*  Date     : 8/10/2000                                                    */
588 /*  Purpose  : Find summation of value within a block.                      */
589 /*  In/out   : Pointer to current block in a frame and frame width          */
590 /*  Return   :                                                              */
591 /*  Modified :                                                              */
592 /*          8/15/01,  - SIMD 4 pixels at a time                         */
593 /* ======================================================================== */
594 
getBlockSum(UChar * cur,Int width)595 Int getBlockSum(UChar *cur, Int width)
596 {
597     Int sad = 0, sum4 = 0, sum2 = 0;
598     UChar *end = cur + (width << 3);
599     Int *curInt = (Int*)cur;
600     UInt mask   = ~(0xFF00);
601     Int load1, load2;
602 
603     do
604     {
605         load1 = curInt[1];
606         load2 = curInt[0];
607         curInt += (width >> 2);
608         sum4 += load1;
609         load1 = load1 & (mask << 8); /* even bytes */
610         sum2 += ((UInt)load1 >> 8); /* sum even bytes, 16 bit */
611         sum4 += load2;
612         load2 = load2 & (mask << 8); /* even bytes */
613         sum2 += ((UInt)load2 >> 8); /* sum even bytes, 16 bit */
614     }
615     while ((UInt)curInt < (UInt)end);
616     load1 = sum4 - (sum2 << 8);     /* get even-sum */
617     load1 = load1 + sum2;           /* add 16 bit even-sum and odd-sum*/
618     load1 = load1 + (load1 << 16);  /* add upper and lower 16 bit sum */
619     sad  = ((UInt)load1 >> 16); /* take upper 16 bit */
620 
621     return sad;
622 }
623 
624