• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* ------------------------------------------------------------------
2  * Copyright (C) 1998-2009 PacketVideo
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13  * express or implied.
14  * See the License for the specific language governing permissions
15  * and limitations under the License.
16  * -------------------------------------------------------------------
17  */
18 #include "oscl_base_macros.h"  // for OSCL_UNUSED_ARG
19 #include "mp4lib_int.h"
20 #include "mp4enc_lib.h"
21 
22 //const static Int roundtab4[] = {0,1,1,1};
23 //const static Int roundtab8[] = {0,0,1,1,1,1,1,2};
24 //const static Int roundtab12[] = {0,0,0,1,1,1,1,1,1,1,2,2};
25 const static Int roundtab16[] = {0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2};
26 
27 #define FORWARD_MODE    1
28 #define BACKWARD_MODE   2
29 #define BIDIRECTION_MODE    3
30 #define DIRECT_MODE         4
31 
32 #ifdef __cplusplus
33 extern "C"
34 {
35 #endif
36     /*Function Prototype */
37     /* no-edge padding */
38     Int EncGetPredOutside(Int xpos, Int ypos, UChar *c_prev, UChar *rec,
39     Int width, Int height, Int rnd1);
40 
41     void Copy_MB_from_Vop(UChar *comp, Int yChan[][NCOEFF_BLOCK], Int width);
42     void Copy_B_from_Vop(UChar *comp, Int cChan[], Int width);
43     void Copy_MB_into_Vop(UChar *comp, Int yChan[][NCOEFF_BLOCK], Int width);
44     void Copy_B_into_Vop(UChar *comp, Int cChan[], Int width);
45     void get_MB(UChar *c_prev, UChar *c_prev_u  , UChar *c_prev_v,
46                 Short mb[6][64], Int lx, Int lx_uv);
47 
48     Int GetPredAdvBy0x0(
49         UChar *c_prev,      /* i */
50         UChar *pred_block,      /* i */
51         Int lx,     /* i */
52         Int rnd1 /* i */
53     );
54 
55     Int GetPredAdvBy0x1(
56         UChar *c_prev,      /* i */
57         UChar *pred_block,      /* i */
58         Int lx,     /* i */
59         Int rnd1 /* i */
60     );
61 
62     Int GetPredAdvBy1x0(
63         UChar *c_prev,      /* i */
64         UChar *pred_block,      /* i */
65         Int lx,     /* i */
66         Int rnd1 /* i */
67     );
68 
69     Int GetPredAdvBy1x1(
70         UChar *c_prev,      /* i */
71         UChar *pred_block,      /* i */
72         Int lx,     /* i */
73         Int rnd1 /* i */
74     );
75 
76     static Int(*const GetPredAdvBTable[2][2])(UChar*, UChar*, Int, Int) =
77     {
78         {&GetPredAdvBy0x0, &GetPredAdvBy0x1},
79         {&GetPredAdvBy1x0, &GetPredAdvBy1x1}
80     };
81 
82 
83 #ifdef __cplusplus
84 }
85 #endif
86 
87 
88 /* ======================================================================== */
89 /*  Function : getMotionCompensatedMB( )                                    */
90 /*  Date     : 4/17/2001                                                    */
91 /*  Purpose  : Get the motion compensate block into video->predictionMB     */
92 /*              and generate video->predictionErrorMB                       */
93 /*              modified from MBMotionComp() function in the decoder        */
94 /*  In/out   :                                                              */
95 /*  Return   :                                                              */
96 /*  Modified :                                                              */
97 /* ======================================================================== */
98 
getMotionCompensatedMB(VideoEncData * video,Int ind_x,Int ind_y,Int offset)99 void getMotionCompensatedMB(VideoEncData *video, Int ind_x, Int ind_y, Int offset)
100 {
101     Vop *prevVop = video->forwardRefVop; //reference frame
102     Vop *currVop = video->currVop;
103     Int mbnum = video->mbnum;       //mb index
104     MOT *mot = video->mot[mbnum];
105     Int ypos, xpos;
106     UChar *c_prev, *cu_prev, *cv_prev;
107     UChar *c_rec, *cu_rec, *cv_rec;
108     Int height, pitch, pitch_uv, height_uv;
109     Int mode = video->headerInfo.Mode[mbnum];  /* get mode */
110     Int dx, dy;
111     Int xpred, ypred;
112     Int xsum, ysum;
113     Int round1;
114 
115     OSCL_UNUSED_ARG(offset);
116 
117     round1 = (Int)(1 - video->currVop->roundingType);
118 
119     pitch  = currVop->pitch;
120     height = currVop->height;
121     pitch_uv  = pitch >> 1;
122     height_uv = height >> 1;
123 
124     ypos = ind_y << 4 ;
125     xpos = ind_x << 4 ;
126 
127     c_rec = video->predictedMB;
128     cu_rec = video->predictedMB + 256;
129     cv_rec = video->predictedMB + 264;
130 
131     if (mode == MODE_INTER || mode == MODE_INTER_Q)
132     {
133         /* Motion vector in x direction       */
134         dx = mot[0].x;
135         dy = mot[0].y;
136 
137         c_prev  = prevVop->yChan;
138 
139         xpred = (xpos << 1) + dx ;
140         ypred = (ypos << 1) + dy ;
141 
142         /* Call function that performs luminance prediction */
143         EncPrediction_INTER(xpred, ypred, c_prev, c_rec,
144                             pitch, round1);
145 
146         if ((dx & 3) == 0)  dx = dx >> 1;
147         else        dx = (dx >> 1) | 1;
148 
149         if ((dy & 3) == 0)      dy = dy >> 1;
150         else        dy = (dy >> 1) | 1;
151 
152         xpred = xpos + dx;
153         ypred = ypos + dy;
154 
155         cu_prev = prevVop->uChan;
156         cv_prev = prevVop->vChan;
157 
158         EncPrediction_Chrom(xpred, ypred, cu_prev, cv_prev, cu_rec, cv_rec,
159                             pitch_uv, (currVop->width) >> 1, height_uv, round1);
160     }
161 #ifndef NO_INTER4V
162     else if (mode == MODE_INTER4V)
163     {
164         c_prev  = prevVop->yChan;
165         cu_prev = prevVop->uChan;
166         cv_prev = prevVop->vChan;
167 
168         EncPrediction_INTER4V(xpos, ypos, mot, c_prev, c_rec,
169                               pitch, round1);
170 
171         xsum = mot[1].x + mot[2].x + mot[3].x + mot[4].x;
172         ysum = mot[1].y + mot[2].y + mot[3].y + mot[4].y;
173 
174         dx = PV_SIGN(xsum) * (roundtab16[(PV_ABS(xsum)) & 0xF] +
175                               (((PV_ABS(xsum)) >> 4) << 1));
176         dy = PV_SIGN(ysum) * (roundtab16[(PV_ABS(ysum)) & 0xF] +
177                               (((PV_ABS(ysum)) >> 4) << 1));
178 
179         ypred = ypos + dy;
180         xpred = xpos + dx;
181 
182         EncPrediction_Chrom(xpred, ypred, cu_prev, cv_prev, cu_rec, cv_rec,
183                             pitch_uv, (currVop->width) >> 1, height_uv, round1);
184     }
185 #endif
186     else
187     {
188         ;//printf("Error, MODE_SKIPPED is not decided yet!\n");
189     }
190 
191     return ;
192 }
193 
194 /***************************************************************************
195     Function:   EncPrediction_INTER
196     Date:       04/17/2001
197     Purpose:    Get predicted area for luminance and compensate with the residue.
198                 Modified from luminance_pred_mode_inter() in decoder.
199 ***************************************************************************/
200 
EncPrediction_INTER(Int xpred,Int ypred,UChar * c_prev,UChar * c_rec,Int lx,Int round1)201 void EncPrediction_INTER(
202     Int xpred,          /* i */
203     Int ypred,          /* i */
204     UChar *c_prev,          /* i */
205     UChar *c_rec,       /* i */
206     Int lx,         /* i */
207     Int round1          /* i */
208 )
209 {
210     c_prev += (xpred >> 1) + ((ypred >> 1) * lx);
211 
212     GetPredAdvBTable[ypred&1][xpred&1](c_prev, c_rec, lx, round1);
213 
214     c_prev += B_SIZE;
215     c_rec += B_SIZE;
216 
217     GetPredAdvBTable[ypred&1][xpred&1](c_prev, c_rec, lx, round1);
218 
219     c_prev += (lx << 3) - B_SIZE;
220     c_rec += (16 << 3) - B_SIZE; /* padding */
221 
222     GetPredAdvBTable[ypred&1][xpred&1](c_prev, c_rec, lx, round1);
223 
224     c_prev += B_SIZE;
225     c_rec += B_SIZE;
226 
227     GetPredAdvBTable[ypred&1][xpred&1](c_prev, c_rec, lx, round1);
228 
229     return;
230 }
231 
232 #ifndef NO_INTER4V
233 /***************************************************************************
234     Function:   EncPrediction_INTER4V
235     Date:       04/17/2001
236     Purpose:    Get predicted area for luminance and compensate with the residue.
237                 Modified from luminance_pred_mode_inter4v() in decoder.
238 ***************************************************************************/
239 
EncPrediction_INTER4V(Int xpos,Int ypos,MOT * mot,UChar * c_prev,UChar * c_rec,Int lx,Int round1)240 void EncPrediction_INTER4V(
241     Int xpos,           /* i */
242     Int ypos,           /* i */
243     MOT *mot,           /* i */
244     UChar *c_prev,          /* i */
245     UChar *c_rec,           /* i */
246     Int lx,         /* i */
247     Int round1          /* i */
248 )
249 {
250     Int ypred, xpred;
251 
252     xpred = (Int)((xpos << 1) + mot[1].x);
253     ypred = (Int)((ypos << 1) + mot[1].y);
254 
255     GetPredAdvBTable[ypred&1][xpred&1](c_prev + (xpred >> 1) + ((ypred >> 1)*lx),
256                                        c_rec, lx, round1);
257 
258     c_rec += B_SIZE;
259 
260     xpred = (Int)(((xpos + B_SIZE) << 1) + mot[2].x);
261     ypred = (Int)((ypos << 1) + mot[2].y);
262 
263     GetPredAdvBTable[ypred&1][xpred&1](c_prev + (xpred >> 1) + ((ypred >> 1)*lx),
264                                        c_rec, lx, round1);
265 
266     c_rec += (16 << 3) - B_SIZE; /* padding */
267 
268     xpred = (Int)((xpos << 1) + mot[3].x);
269     ypred = (Int)(((ypos + B_SIZE) << 1) + mot[3].y);
270 
271     GetPredAdvBTable[ypred&1][xpred&1](c_prev + (xpred >> 1) + ((ypred >> 1)*lx),
272                                        c_rec, lx, round1);
273 
274     c_rec += B_SIZE;
275 
276     xpred = (Int)(((xpos + B_SIZE) << 1) + mot[4].x);
277     ypred = (Int)(((ypos + B_SIZE) << 1) + mot[4].y);
278 
279     GetPredAdvBTable[ypred&1][xpred&1](c_prev + (xpred >> 1) + ((ypred >> 1)*lx),
280                                        c_rec, lx, round1);
281 
282     return;
283 }
284 #endif /* NO_INTER4V */
285 
286 /***************************************************************************
287     Function:   EncPrediction_Chrom
288     Date:       04/17/2001
289     Purpose:    Get predicted area for chrominance and compensate with the residue.
290                 Modified from chrominance_pred() in decoder.
291 ***************************************************************************/
292 
EncPrediction_Chrom(Int xpred,Int ypred,UChar * cu_prev,UChar * cv_prev,UChar * cu_rec,UChar * cv_rec,Int lx,Int width_uv,Int height_uv,Int round1)293 void EncPrediction_Chrom(
294     Int xpred,          /* i */
295     Int ypred,          /* i */
296     UChar *cu_prev,         /* i */
297     UChar *cv_prev,         /* i */
298     UChar *cu_rec,
299     UChar *cv_rec,
300     Int lx,
301     Int width_uv,           /* i */
302     Int height_uv,          /* i */
303     Int round1          /* i */
304 )
305 {
306     /* check whether the MV points outside the frame */
307     /* Compute prediction for Chrominance b block (block[4]) */
308     if (xpred >= 0 && xpred <= ((width_uv << 1) - (2*B_SIZE)) && ypred >= 0 &&
309             ypred <= ((height_uv << 1) - (2*B_SIZE)))
310     {
311         /*****************************/
312         /* (x,y) is inside the frame */
313         /*****************************/
314 
315         /* Compute prediction for Chrominance b (block[4]) */
316         GetPredAdvBTable[ypred&1][xpred&1](cu_prev + (xpred >> 1) + ((ypred >> 1)*lx),
317                                            cu_rec, lx, round1);
318 
319         /* Compute prediction for Chrominance r (block[5]) */
320         GetPredAdvBTable[ypred&1][xpred&1](cv_prev + (xpred >> 1) + ((ypred >> 1)*lx),
321                                            cv_rec,  lx, round1);
322     }
323     else
324     {
325         /******************************/
326         /* (x,y) is outside the frame */
327         /******************************/
328 
329         /* Compute prediction for Chrominance b (block[4]) */
330         EncGetPredOutside(xpred, ypred,
331                           cu_prev, cu_rec,
332                           width_uv, height_uv, round1);
333 
334         /* Compute prediction for Chrominance r (block[5]) */
335         EncGetPredOutside(xpred, ypred,
336                           cv_prev, cv_rec,
337                           width_uv, height_uv, round1);
338     }
339 
340     return;
341 }
342 /***************************************************************************
343     Function:   GetPredAdvancedB
344     Date:       04/17/2001
345     Purpose:    Get predicted area (block) and compensate with the residue.
346                 - modified from GetPredAdvancedBAdd in decoder.
347     Intput/Output:
348     Modified:
349 ***************************************************************************/
350 
GetPredAdvBy0x0(UChar * prev,UChar * rec,Int lx,Int rnd)351 Int GetPredAdvBy0x0(
352     UChar *prev,        /* i */
353     UChar *rec,     /* i */
354     Int lx,     /* i */
355     Int rnd /* i */
356 )
357 {
358     Int i;      /* loop variable */
359     ULong  pred_word, word1, word2;
360     Int tmp;
361 
362     OSCL_UNUSED_ARG(rnd);
363 
364     /* initialize offset to adjust pixel counter */
365     /*    the next row; full-pel resolution      */
366 
367     tmp = (ULong)prev & 0x3;
368 
369     if (tmp == 0)  /* word-aligned */
370     {
371         rec -= 16; /* preset */
372         prev -= lx;
373 
374         for (i = 8; i > 0; i--)
375         {
376             *((ULong*)(rec += 16)) = *((ULong*)(prev += lx));
377             *((ULong*)(rec + 4)) = *((ULong*)(prev + 4));
378         }
379         return 1;
380     }
381     else if (tmp == 1) /* first position */
382     {
383         prev--; /* word-aligned */
384         rec -= 16; /* preset */
385         prev -= lx;
386 
387         for (i = 8; i > 0; i--)
388         {
389             word1 = *((ULong*)(prev += lx)); /* read 4 bytes, b4 b3 b2 b1 */
390             word2 = *((ULong*)(prev + 4));  /* read 4 bytes, b8 b7 b6 b5 */
391             word1 >>= 8; /* 0 b4 b3 b2 */
392             pred_word = word1 | (word2 << 24);  /* b5 b4 b3 b2 */
393             *((ULong*)(rec += 16)) = pred_word;
394 
395             word1 = *((ULong*)(prev + 8)); /* b12 b11 b10 b9 */
396             word2 >>= 8; /* 0 b8 b7 b6 */
397             pred_word = word2 | (word1 << 24); /* b9 b8 b7 b6 */
398             *((ULong*)(rec + 4)) = pred_word;
399         }
400 
401         return 1;
402     }
403     else if (tmp == 2) /* second position */
404     {
405         prev -= 2; /* word1-aligned */
406         rec -= 16; /* preset */
407         prev -= lx;
408 
409         for (i = 8; i > 0; i--)
410         {
411             word1 = *((ULong*)(prev += lx)); /* read 4 bytes, b4 b3 b2 b1 */
412             word2 = *((ULong*)(prev + 4));  /* read 4 bytes, b8 b7 b6 b5 */
413             word1 >>= 16; /* 0 0 b4 b3 */
414             pred_word = word1 | (word2 << 16);  /* b6 b5 b4 b3 */
415             *((ULong*)(rec += 16)) = pred_word;
416 
417             word1 = *((ULong*)(prev + 8)); /* b12 b11 b10 b9 */
418             word2 >>= 16; /* 0 0 b8 b7 */
419             pred_word = word2 | (word1 << 16); /* b10 b9 b8 b7 */
420             *((ULong*)(rec + 4)) = pred_word;
421         }
422 
423         return 1;
424     }
425     else /* third position */
426     {
427         prev -= 3; /* word1-aligned */
428         rec -= 16; /* preset */
429         prev -= lx;
430 
431         for (i = 8; i > 0; i--)
432         {
433             word1 = *((ULong*)(prev += lx)); /* read 4 bytes, b4 b3 b2 b1 */
434             word2 = *((ULong*)(prev + 4));  /* read 4 bytes, b8 b7 b6 b5 */
435             word1 >>= 24; /* 0 0 0 b4 */
436             pred_word = word1 | (word2 << 8);   /* b7 b6 b5 b4 */
437             *((ULong*)(rec += 16)) = pred_word;
438 
439             word1 = *((ULong*)(prev + 8)); /* b12 b11 b10 b9 */
440             word2 >>= 24; /* 0 0 0 b8 */
441             pred_word = word2 | (word1 << 8); /* b11 b10 b9 b8 */
442             *((ULong*)(rec + 4)) = pred_word;
443 
444         }
445 
446         return 1;
447     }
448 }
449 /**************************************************************************/
GetPredAdvBy0x1(UChar * prev,UChar * rec,Int lx,Int rnd1)450 Int GetPredAdvBy0x1(
451     UChar *prev,        /* i */
452     UChar *rec,     /* i */
453     Int lx,     /* i */
454     Int rnd1 /* i */
455 )
456 {
457     Int i;      /* loop variable */
458     Int offset;
459     ULong word1, word2, word3, word12;
460     Int tmp;
461     ULong mask;
462 
463     /* initialize offset to adjust pixel counter */
464     /*    the next row; full-pel resolution      */
465     offset = lx - B_SIZE; /* offset for prev */
466 
467     /* Branch based on pixel location (half-pel or full-pel) for x and y */
468     rec -= 12; /* preset */
469 
470     tmp = (ULong)prev & 3;
471     mask = 254;
472     mask |= (mask << 8);
473     mask |= (mask << 16); /* 0xFEFEFEFE */
474 
475     if (tmp == 0) /* word-aligned */
476     {
477         if (rnd1 == 1)
478         {
479             for (i = B_SIZE; i > 0; i--)
480             {
481                 word1 = *((ULong*)prev); /* b4 b3 b2 b1 */
482                 word2 = *((ULong*)(prev += 4)); /* b8 b7 b6 b5 */
483                 word12 = (word1 >> 8); /* 0 b4 b3 b2 */
484                 word12 |= (word2 << 24); /* b5 b4 b3 b2 */
485                 word3 = word1 | word12; // rnd1 = 1; otherwise word3 = word1&word12
486                 word1 &= mask;
487                 word3 &= (~mask); /* 0x1010101, check last bit */
488                 word12 &= mask;
489                 word1 >>= 1;
490                 word1 = word1 + (word12 >> 1);
491                 word1 += word3;
492                 *((ULong*)(rec += 12)) = word1; /* write 4 pixels */
493 
494                 word1 = *((ULong*)(prev += 4)); /* b12 b11 b10 b9 */
495                 word12 = (word2 >> 8); /* 0 b8 b7 b6 */
496                 word12 |= (word1 << 24); /* b9 b8 b7 b6 */
497                 word3 = word2 | word12;
498                 word2 &= mask;
499                 word3 &= (~mask);  /* 0x1010101, check last bit */
500                 word12 &= mask;
501                 word2 >>= 1;
502                 word2 = word2 + (word12 >> 1);
503                 word2 += word3;
504                 *((ULong*)(rec += 4)) = word2; /* write 4 pixels */
505 
506                 prev += offset;
507             }
508             return 1;
509         }
510         else /* rnd1 == 0 */
511         {
512             for (i = B_SIZE; i > 0; i--)
513             {
514                 word1 = *((ULong*)prev); /* b4 b3 b2 b1 */
515 
516                 word2 = *((ULong*)(prev += 4)); /* b8 b7 b6 b5 */
517                 word12 = (word1 >> 8); /* 0 b4 b3 b2 */
518                 word12 |= (word2 << 24); /* b5 b4 b3 b2 */
519                 word3 = word1 & word12; // rnd1 = 1; otherwise word3 = word1&word12
520                 word1 &= mask;
521                 word3 &= (~mask); /* 0x1010101, check last bit */
522                 word12 &= mask;
523                 word1 >>= 1;
524                 word1 = word1 + (word12 >> 1);
525                 word1 += word3;
526                 *((ULong*)(rec += 12)) = word1; /* write 4 pixels */
527 
528                 word1 = *((ULong*)(prev += 4)); /* b12 b11 b10 b9 */
529                 word12 = (word2 >> 8); /* 0 b8 b7 b6 */
530                 word12 |= (word1 << 24); /* b9 b8 b7 b6 */
531                 word3 = word2 & word12;
532                 word2 &= mask;
533                 word3 &= (~mask);  /* 0x1010101, check last bit */
534                 word12 &= mask;
535                 word2 >>= 1;
536                 word2 = word2 + (word12 >> 1);
537                 word2 += word3;
538                 *((ULong*)(rec += 4)) = word2; /* write 4 pixels */
539 
540                 prev += offset;
541             }
542             return 1;
543         } /* rnd1 */
544     }
545     else if (tmp == 1)
546     {
547         prev--; /* word-aligned */
548         if (rnd1 == 1)
549         {
550             for (i = B_SIZE; i > 0; i--)
551             {
552                 word1 = *((ULong*)prev); /* b3 b2 b1 b0 */
553                 word2 = *((ULong*)(prev += 4)); /* b7 b6 b5 b4 */
554                 word12 = (word1 >> 8); /* 0 b3 b2 b1 */
555                 word1 >>= 16; /* 0 0 b3 b2 */
556                 word12 |= (word2 << 24); /* b4 b3 b2 b1 */
557                 word1 |= (word2 << 16); /* b5 b4 b3 b2 */
558                 word3 = word1 | word12; // rnd1 = 1; otherwise word3 = word1&word12
559                 word1 &= mask;
560                 word3 &= (~mask); /* 0x1010101, check last bit */
561                 word12 &= mask;
562                 word1 >>= 1;
563                 word1 = word1 + (word12 >> 1);
564                 word1 += word3;
565                 *((ULong*)(rec += 12)) = word1; /* write 4 pixels */
566 
567                 word1 = *((ULong*)(prev += 4)); /* b11 b10 b9 b8 */
568                 word12 = (word2 >> 8); /* 0 b7 b6 b5 */
569                 word2 >>= 16; /* 0 0 b7 b6 */
570                 word12 |= (word1 << 24); /* b8 b7 b6 b5 */
571                 word2 |= (word1 << 16); /* b9 b8 b7 b6 */
572                 word3 = word2 | word12; // rnd1 = 1; otherwise word3 = word2&word12
573                 word2 &= mask;
574                 word3 &= (~mask); /* 0x1010101, check last bit */
575                 word12 &= mask;
576                 word2 >>= 1;
577                 word2 = word2 + (word12 >> 1);
578                 word2 += word3;
579                 *((ULong*)(rec += 4)) = word2; /* write 4 pixels */
580 
581                 prev += offset;
582             }
583             return 1;
584         }
585         else /* rnd1 = 0 */
586         {
587             for (i = B_SIZE; i > 0; i--)
588             {
589                 word1 = *((ULong*)prev); /* b3 b2 b1 b0 */
590 
591                 word2 = *((ULong*)(prev += 4)); /* b7 b6 b5 b4 */
592                 word12 = (word1 >> 8); /* 0 b3 b2 b1 */
593                 word1 >>= 16; /* 0 0 b3 b2 */
594                 word12 |= (word2 << 24); /* b4 b3 b2 b1 */
595                 word1 |= (word2 << 16); /* b5 b4 b3 b2 */
596                 word3 = word1 & word12;
597                 word1 &= mask;
598                 word3 &= (~mask); /* 0x1010101, check last bit */
599                 word12 &= mask;
600                 word1 >>= 1;
601                 word1 = word1 + (word12 >> 1);
602                 word1 += word3;
603                 *((ULong*)(rec += 12)) = word1; /* write 4 pixels */
604 
605                 word1 = *((ULong*)(prev += 4)); /* b11 b10 b9 b8 */
606                 word12 = (word2 >> 8); /* 0 b7 b6 b5 */
607                 word2 >>= 16; /* 0 0 b7 b6 */
608                 word12 |= (word1 << 24); /* b8 b7 b6 b5 */
609                 word2 |= (word1 << 16); /* b9 b8 b7 b6 */
610                 word3 = word2 & word12;
611                 word2 &= mask;
612                 word3 &= (~mask); /* 0x1010101, check last bit */
613                 word12 &= mask;
614                 word2 >>= 1;
615                 word2 = word2 + (word12 >> 1);
616                 word2 += word3;
617                 *((ULong*)(rec += 4)) = word2; /* write 4 pixels */
618 
619                 prev += offset;
620             }
621             return 1;
622         } /* rnd1 */
623     }
624     else if (tmp == 2)
625     {
626         prev -= 2; /* word-aligned */
627         if (rnd1 == 1)
628         {
629             for (i = B_SIZE; i > 0; i--)
630             {
631                 word1 = *((ULong*)prev); /* b2 b1 b0 bN1 */
632                 word2 = *((ULong*)(prev += 4)); /* b6 b5 b4 b3 */
633                 word12 = (word1 >> 16); /* 0 0 b2 b1 */
634                 word1 >>= 24; /* 0 0 0 b2 */
635                 word12 |= (word2 << 16); /* b4 b3 b2 b1 */
636                 word1 |= (word2 << 8); /* b5 b4 b3 b2 */
637                 word3 = word1 | word12; // rnd1 = 1; otherwise word3 = word1&word12
638                 word1 &= mask;
639                 word3 &= (~mask); /* 0x1010101, check last bit */
640                 word12 &= mask;
641                 word1 >>= 1;
642                 word1 = word1 + (word12 >> 1);
643                 word1 += word3;
644                 *((ULong*)(rec += 12)) = word1; /* write 4 pixels */
645 
646                 word1 = *((ULong*)(prev += 4)); /* b10 b9 b8 b7 */
647                 word12 = (word2 >> 16); /* 0 0 b6 b5 */
648                 word2 >>= 24; /* 0 0 0 b6 */
649                 word12 |= (word1 << 16); /* b8 b7 b6 b5 */
650                 word2 |= (word1 << 8); /* b9 b8 b7 b6 */
651                 word3 = word2 | word12; // rnd1 = 1; otherwise word3 = word1&word12
652                 word2 &= mask;
653                 word3 &= (~mask); /* 0x1010101, check last bit */
654                 word12 &= mask;
655                 word2 >>= 1;
656                 word2 = word2 + (word12 >> 1);
657                 word2 += word3;
658                 *((ULong*)(rec += 4)) = word2; /* write 4 pixels */
659                 prev += offset;
660             }
661             return 1;
662         }
663         else /* rnd1 == 0 */
664         {
665             for (i = B_SIZE; i > 0; i--)
666             {
667                 word1 = *((ULong*)prev); /* b2 b1 b0 bN1 */
668                 word2 = *((ULong*)(prev += 4)); /* b6 b5 b4 b3 */
669                 word12 = (word1 >> 16); /* 0 0 b2 b1 */
670                 word1 >>= 24; /* 0 0 0 b2 */
671                 word12 |= (word2 << 16); /* b4 b3 b2 b1 */
672                 word1 |= (word2 << 8); /* b5 b4 b3 b2 */
673                 word3 = word1 & word12; // rnd1 = 1; otherwise word3 = word1&word12
674                 word1 &= mask;
675                 word3 &= (~mask); /* 0x1010101, check last bit */
676                 word12 &= mask;
677                 word1 >>= 1;
678                 word1 = word1 + (word12 >> 1);
679                 word1 += word3;
680                 *((ULong*)(rec += 12)) = word1; /* write 4 pixels */
681 
682                 word1 = *((ULong*)(prev += 4)); /* b10 b9 b8 b7 */
683                 word12 = (word2 >> 16); /* 0 0 b6 b5 */
684                 word2 >>= 24; /* 0 0 0 b6 */
685                 word12 |= (word1 << 16); /* b8 b7 b6 b5 */
686                 word2 |= (word1 << 8); /* b9 b8 b7 b6 */
687                 word3 = word2 & word12; // rnd1 = 1; otherwise word3 = word1&word12
688                 word2 &= mask;
689                 word3 &= (~mask); /* 0x1010101, check last bit */
690                 word12 &= mask;
691                 word2 >>= 1;
692                 word2 = word2 + (word12 >> 1);
693                 word2 += word3;
694                 *((ULong*)(rec += 4)) = word2; /* write 4 pixels */
695                 prev += offset;
696             }
697             return 1;
698         }
699     }
700     else /* tmp = 3 */
701     {
702         prev -= 3; /* word-aligned */
703         if (rnd1 == 1)
704         {
705             for (i = B_SIZE; i > 0; i--)
706             {
707                 word1 = *((ULong*)prev); /* b1 b0 bN1 bN2 */
708                 word2 = *((ULong*)(prev += 4)); /* b5 b4 b3 b2 */
709                 word12 = (word1 >> 24); /* 0 0 0 b1 */
710                 word12 |= (word2 << 8); /* b4 b3 b2 b1 */
711                 word1 = word2;
712                 word3 = word1 | word12; // rnd1 = 1; otherwise word3 = word1&word12
713                 word1 &= mask;
714                 word3 &= (~mask); /* 0x1010101, check last bit */
715                 word12 &= mask;
716                 word1 >>= 1;
717                 word1 = word1 + (word12 >> 1);
718                 word1 += word3;
719                 *((ULong*)(rec += 12)) = word1; /* write 4 pixels */
720 
721                 word1 = *((ULong*)(prev += 4)); /* b9 b8 b7 b6 */
722                 word12 = (word2 >> 24); /* 0 0 0 b5 */
723                 word12 |= (word1 << 8); /* b8 b7 b6 b5 */
724                 word2 = word1; /* b9 b8 b7 b6 */
725                 word3 = word2 | word12; // rnd1 = 1; otherwise word3 = word1&word12
726                 word2 &= mask;
727                 word3 &= (~mask); /* 0x1010101, check last bit */
728                 word12 &= mask;
729                 word2 >>= 1;
730                 word2 = word2 + (word12 >> 1);
731                 word2 += word3;
732                 *((ULong*)(rec += 4)) = word2; /* write 4 pixels */
733                 prev += offset;
734             }
735             return 1;
736         }
737         else
738         {
739             for (i = B_SIZE; i > 0; i--)
740             {
741                 word1 = *((ULong*)prev); /* b1 b0 bN1 bN2 */
742                 word2 = *((ULong*)(prev += 4)); /* b5 b4 b3 b2 */
743                 word12 = (word1 >> 24); /* 0 0 0 b1 */
744                 word12 |= (word2 << 8); /* b4 b3 b2 b1 */
745                 word1 = word2;
746                 word3 = word1 & word12; // rnd1 = 1; otherwise word3 = word1&word12
747                 word1 &= mask;
748                 word3 &= (~mask); /* 0x1010101, check last bit */
749                 word12 &= mask;
750                 word1 >>= 1;
751                 word1 = word1 + (word12 >> 1);
752                 word1 += word3;
753                 *((ULong*)(rec += 12)) = word1; /* write 4 pixels */
754 
755                 word1 = *((ULong*)(prev += 4)); /* b9 b8 b7 b6 */
756                 word12 = (word2 >> 24); /* 0 0 0 b5 */
757                 word12 |= (word1 << 8); /* b8 b7 b6 b5 */
758                 word2 = word1; /* b9 b8 b7 b6 */
759                 word3 = word2 & word12; // rnd1 = 1; otherwise word3 = word1&word12
760                 word2 &= mask;
761                 word3 &= (~mask); /* 0x1010101, check last bit */
762                 word12 &= mask;
763                 word2 >>= 1;
764                 word2 = word2 + (word12 >> 1);
765                 word2 += word3;
766                 *((ULong*)(rec += 4)) = word2; /* write 4 pixels */
767                 prev += offset;
768             }
769             return 1;
770         }
771     }
772 }
773 
774 /**************************************************************************/
GetPredAdvBy1x0(UChar * prev,UChar * rec,Int lx,Int rnd1)775 Int GetPredAdvBy1x0(
776     UChar *prev,        /* i */
777     UChar *rec,     /* i */
778     Int lx,     /* i */
779     Int rnd1 /* i */
780 )
781 {
782     Int i;      /* loop variable */
783     Int offset;
784     ULong  word1, word2, word3, word12, word22;
785     Int tmp;
786     ULong mask;
787 
788     /* initialize offset to adjust pixel counter */
789     /*    the next row; full-pel resolution      */
790     offset = lx - B_SIZE; /* offset for prev */
791 
792     /* Branch based on pixel location (half-pel or full-pel) for x and y */
793     rec -= 12; /* preset */
794 
795     tmp = (ULong)prev & 3;
796     mask = 254;
797     mask |= (mask << 8);
798     mask |= (mask << 16); /* 0xFEFEFEFE */
799 
800     if (tmp == 0) /* word-aligned */
801     {
802         prev -= 4;
803         if (rnd1 == 1)
804         {
805             for (i = B_SIZE; i > 0; i--)
806             {
807                 word1 = *((ULong*)(prev += 4));
808                 word2 = *((ULong*)(prev + lx));
809                 word3 = word1 | word2; // rnd1 = 1; otherwise word3 = word1&word2
810                 word1 &= mask;
811                 word3 &= (~mask); /* 0x1010101, check last bit */
812                 word2 &= mask;
813                 word1 >>= 1;
814                 word1 = word1 + (word2 >> 1);
815                 word1 += word3;
816                 *((ULong*)(rec += 12)) = word1;
817                 word1 = *((ULong*)(prev += 4));
818                 word2 = *((ULong*)(prev + lx));
819                 word3 = word1 | word2; // rnd1 = 1; otherwise word3 = word1&word2
820                 word1 &= mask;
821                 word3 &= (~mask); /* 0x1010101, check last bit */
822                 word2 &= mask;
823                 word1 >>= 1;
824                 word1 = word1 + (word2 >> 1);
825                 word1 += word3;
826                 *((ULong*)(rec += 4)) = word1;
827 
828                 prev += offset;
829             }
830             return 1;
831         }
832         else   /* rnd1 = 0 */
833         {
834             for (i = B_SIZE; i > 0; i--)
835             {
836                 word1 = *((ULong*)(prev += 4));
837                 word2 = *((ULong*)(prev + lx));
838                 word3 = word1 & word2;  /* rnd1 = 0; */
839                 word1 &= mask;
840                 word3 &= (~mask); /* 0x1010101, check last bit */
841                 word2 &= mask;
842                 word1 >>= 1;
843                 word1 = word1 + (word2 >> 1);
844                 word1 += word3;
845                 *((ULong*)(rec += 12)) = word1;
846                 word1 = *((ULong*)(prev += 4));
847                 word2 = *((ULong*)(prev + lx));
848                 word3 = word1 & word2;  /* rnd1 = 0; */
849                 word1 &= mask;
850                 word3 &= (~mask); /* 0x1010101, check last bit */
851                 word2 &= mask;
852                 word1 >>= 1;
853                 word1 = word1 + (word2 >> 1);
854                 word1 += word3;
855                 *((ULong*)(rec += 4)) = word1;
856 
857                 prev += offset;
858             }
859             return 1;
860         }
861     }
862     else if (tmp == 1)
863     {
864         prev--; /* word-aligned */
865         if (rnd1 == 1)
866         {
867             for (i = B_SIZE; i > 0; i--)
868             {
869                 word12 = *((ULong*)prev); /* read b4 b3 b2 b1 */
870                 word22 = *((ULong*)(prev + lx));
871 
872                 word1 = *((ULong*)(prev += 4)); /* read b8 b7 b6 b5 */
873                 word2 = *((ULong*)(prev + lx));
874                 word12 >>= 8; /* 0 b4 b3 b2 */
875                 word22 >>= 8;
876                 word12 = word12 | (word1 << 24); /* b5 b4 b3 b2 */
877                 word22 = word22 | (word2 << 24);
878                 word3 = word12 | word22;
879                 word12 &= mask;
880                 word22 &= mask;
881                 word3 &= (~mask); /* 0x1010101, check last bit */
882                 word12 >>= 1;
883                 word12 = word12 + (word22 >> 1);
884                 word12 += word3;
885                 *((ULong*)(rec += 12)) = word12;
886 
887                 word12 = *((ULong*)(prev += 4)); /* read b12 b11 b10 b9 */
888                 word22 = *((ULong*)(prev + lx));
889                 word1 >>= 8; /* 0 b8 b7 b6 */
890                 word2 >>= 8;
891                 word1 = word1 | (word12 << 24); /* b9 b8 b7 b6 */
892                 word2 = word2 | (word22 << 24);
893                 word3 = word1 | word2;
894                 word1 &= mask;
895                 word2 &= mask;
896                 word3 &= (~mask); /* 0x1010101, check last bit */
897                 word1 >>= 1;
898                 word1 = word1 + (word2 >> 1);
899                 word1 += word3;
900                 *((ULong*)(rec += 4)) = word1;
901                 prev += offset;
902             }
903             return 1;
904         }
905         else /* rnd1 = 0 */
906         {
907             for (i = B_SIZE; i > 0; i--)
908             {
909                 word12 = *((ULong*)prev); /* read b4 b3 b2 b1 */
910                 word22 = *((ULong*)(prev + lx));
911 
912                 word1 = *((ULong*)(prev += 4)); /* read b8 b7 b6 b5 */
913                 word2 = *((ULong*)(prev + lx));
914                 word12 >>= 8; /* 0 b4 b3 b2 */
915                 word22 >>= 8;
916                 word12 = word12 | (word1 << 24); /* b5 b4 b3 b2 */
917                 word22 = word22 | (word2 << 24);
918                 word3 = word12 & word22;
919                 word12 &= mask;
920                 word22 &= mask;
921                 word3 &= (~mask); /* 0x1010101, check last bit */
922                 word12 >>= 1;
923                 word12 = word12 + (word22 >> 1);
924                 word12 += word3;
925                 *((ULong*)(rec += 12)) = word12;
926 
927                 word12 = *((ULong*)(prev += 4)); /* read b12 b11 b10 b9 */
928                 word22 = *((ULong*)(prev + lx));
929                 word1 >>= 8; /* 0 b8 b7 b6 */
930                 word2 >>= 8;
931                 word1 = word1 | (word12 << 24); /* b9 b8 b7 b6 */
932                 word2 = word2 | (word22 << 24);
933                 word3 = word1 & word2;
934                 word1 &= mask;
935                 word2 &= mask;
936                 word3 &= (~mask); /* 0x1010101, check last bit */
937                 word1 >>= 1;
938                 word1 = word1 + (word2 >> 1);
939                 word1 += word3;
940                 *((ULong*)(rec += 4)) = word1;
941                 prev += offset;
942             }
943             return 1;
944         }
945     }
946     else if (tmp == 2)
947     {
948         prev -= 2; /* word-aligned */
949         if (rnd1 == 1)
950         {
951             for (i = B_SIZE; i > 0; i--)
952             {
953                 word12 = *((ULong*)prev); /* read b4 b3 b2 b1 */
954                 word22 = *((ULong*)(prev + lx));
955 
956                 word1 = *((ULong*)(prev += 4)); /* read b8 b7 b6 b5 */
957                 word2 = *((ULong*)(prev + lx));
958                 word12 >>= 16; /* 0 0 b4 b3 */
959                 word22 >>= 16;
960                 word12 = word12 | (word1 << 16); /* b6 b5 b4 b3 */
961                 word22 = word22 | (word2 << 16);
962                 word3 = word12 | word22;
963                 word12 &= mask;
964                 word22 &= mask;
965                 word3 &= (~mask); /* 0x1010101, check last bit */
966                 word12 >>= 1;
967                 word12 = word12 + (word22 >> 1);
968                 word12 += word3;
969                 *((ULong*)(rec += 12)) = word12;
970 
971                 word12 = *((ULong*)(prev += 4)); /* read b12 b11 b10 b9 */
972                 word22 = *((ULong*)(prev + lx));
973                 word1 >>= 16; /* 0 0 b8 b7 */
974                 word2 >>= 16;
975                 word1 = word1 | (word12 << 16); /* b10 b9 b8 b7 */
976                 word2 = word2 | (word22 << 16);
977                 word3 = word1 | word2;
978                 word1 &= mask;
979                 word2 &= mask;
980                 word3 &= (~mask); /* 0x1010101, check last bit */
981                 word1 >>= 1;
982                 word1 = word1 + (word2 >> 1);
983                 word1 += word3;
984                 *((ULong*)(rec += 4)) = word1;
985                 prev += offset;
986             }
987             return 1;
988         }
989         else /* rnd1 = 0 */
990         {
991             for (i = B_SIZE; i > 0; i--)
992             {
993                 word12 = *((ULong*)prev); /* read b4 b3 b2 b1 */
994                 word22 = *((ULong*)(prev + lx));
995 
996                 word1 = *((ULong*)(prev += 4)); /* read b8 b7 b6 b5 */
997                 word2 = *((ULong*)(prev + lx));
998                 word12 >>= 16; /* 0 0 b4 b3 */
999                 word22 >>= 16;
1000                 word12 = word12 | (word1 << 16); /* b6 b5 b4 b3 */
1001                 word22 = word22 | (word2 << 16);
1002                 word3 = word12 & word22;
1003                 word12 &= mask;
1004                 word22 &= mask;
1005                 word3 &= (~mask); /* 0x1010101, check last bit */
1006                 word12 >>= 1;
1007                 word12 = word12 + (word22 >> 1);
1008                 word12 += word3;
1009                 *((ULong*)(rec += 12)) = word12;
1010 
1011                 word12 = *((ULong*)(prev += 4)); /* read b12 b11 b10 b9 */
1012                 word22 = *((ULong*)(prev + lx));
1013                 word1 >>= 16; /* 0 0 b8 b7 */
1014                 word2 >>= 16;
1015                 word1 = word1 | (word12 << 16); /* b10 b9 b8 b7 */
1016                 word2 = word2 | (word22 << 16);
1017                 word3 = word1 & word2;
1018                 word1 &= mask;
1019                 word2 &= mask;
1020                 word3 &= (~mask); /* 0x1010101, check last bit */
1021                 word1 >>= 1;
1022                 word1 = word1 + (word2 >> 1);
1023                 word1 += word3;
1024                 *((ULong*)(rec += 4)) = word1;
1025                 prev += offset;
1026             }
1027 
1028             return 1;
1029         }
1030     }
1031     else /* tmp == 3 */
1032     {
1033         prev -= 3; /* word-aligned */
1034         if (rnd1 == 1)
1035         {
1036             for (i = B_SIZE; i > 0; i--)
1037             {
1038                 word12 = *((ULong*)prev); /* read b4 b3 b2 b1 */
1039                 word22 = *((ULong*)(prev + lx));
1040 
1041                 word1 = *((ULong*)(prev += 4)); /* read b8 b7 b6 b5 */
1042                 word2 = *((ULong*)(prev + lx));
1043                 word12 >>= 24; /* 0 0 0 b4 */
1044                 word22 >>= 24;
1045                 word12 = word12 | (word1 << 8); /* b7 b6 b5 b4 */
1046                 word22 = word22 | (word2 << 8);
1047                 word3 = word12 | word22;
1048                 word12 &= mask;
1049                 word22 &= mask;
1050                 word3 &= (~mask); /* 0x1010101, check last bit */
1051                 word12 >>= 1;
1052                 word12 = word12 + (word22 >> 1);
1053                 word12 += word3;
1054                 *((ULong*)(rec += 12)) = word12;
1055 
1056                 word12 = *((ULong*)(prev += 4)); /* read b12 b11 b10 b9 */
1057                 word22 = *((ULong*)(prev + lx));
1058                 word1 >>= 24; /* 0 0 0 b8 */
1059                 word2 >>= 24;
1060                 word1 = word1 | (word12 << 8); /* b11 b10 b9 b8 */
1061                 word2 = word2 | (word22 << 8);
1062                 word3 = word1 | word2;
1063                 word1 &= mask;
1064                 word2 &= mask;
1065                 word3 &= (~mask); /* 0x1010101, check last bit */
1066                 word1 >>= 1;
1067                 word1 = word1 + (word2 >> 1);
1068                 word1 += word3;
1069                 *((ULong*)(rec += 4)) = word1;
1070                 prev += offset;
1071             }
1072             return 1;
1073         }
1074         else /* rnd1 = 0 */
1075         {
1076             for (i = B_SIZE; i > 0; i--)
1077             {
1078                 word12 = *((ULong*)prev); /* read b4 b3 b2 b1 */
1079                 word22 = *((ULong*)(prev + lx));
1080 
1081                 word1 = *((ULong*)(prev += 4)); /* read b8 b7 b6 b5 */
1082                 word2 = *((ULong*)(prev + lx));
1083                 word12 >>= 24; /* 0 0 0 b4 */
1084                 word22 >>= 24;
1085                 word12 = word12 | (word1 << 8); /* b7 b6 b5 b4 */
1086                 word22 = word22 | (word2 << 8);
1087                 word3 = word12 & word22;
1088                 word12 &= mask;
1089                 word22 &= mask;
1090                 word3 &= (~mask); /* 0x1010101, check last bit */
1091                 word12 >>= 1;
1092                 word12 = word12 + (word22 >> 1);
1093                 word12 += word3;
1094                 *((ULong*)(rec += 12)) = word12;
1095 
1096                 word12 = *((ULong*)(prev += 4)); /* read b12 b11 b10 b9 */
1097                 word22 = *((ULong*)(prev + lx));
1098                 word1 >>= 24; /* 0 0 0 b8 */
1099                 word2 >>= 24;
1100                 word1 = word1 | (word12 << 8); /* b11 b10 b9 b8 */
1101                 word2 = word2 | (word22 << 8);
1102                 word3 = word1 & word2;
1103                 word1 &= mask;
1104                 word2 &= mask;
1105                 word3 &= (~mask); /* 0x1010101, check last bit */
1106                 word1 >>= 1;
1107                 word1 = word1 + (word2 >> 1);
1108                 word1 += word3;
1109                 *((ULong*)(rec += 4)) = word1;
1110                 prev += offset;
1111             }
1112             return 1;
1113         } /* rnd */
1114     } /* tmp */
1115 }
1116 
1117 /**********************************************************************************/
GetPredAdvBy1x1(UChar * prev,UChar * rec,Int lx,Int rnd1)1118 Int GetPredAdvBy1x1(
1119     UChar *prev,        /* i */
1120     UChar *rec,     /* i */
1121     Int lx,     /* i */
1122     Int rnd1 /* i */
1123 )
1124 {
1125     Int i;      /* loop variable */
1126     Int offset;
1127     ULong  x1, x2, x1m, x2m, y1, y2, y1m, y2m; /* new way */
1128     Int tmp;
1129     Int rnd2;
1130     ULong mask;
1131 
1132     /* initialize offset to adjust pixel counter */
1133     /*    the next row; full-pel resolution      */
1134     offset = lx - B_SIZE; /* offset for prev */
1135 
1136     rnd2 = rnd1 + 1;
1137     rnd2 |= (rnd2 << 8);
1138     rnd2 |= (rnd2 << 16);
1139 
1140     mask = 0x3F;
1141     mask |= (mask << 8);
1142     mask |= (mask << 16); /* 0x3f3f3f3f */
1143 
1144     tmp = (ULong)prev & 3;
1145 
1146     rec -= 4; /* preset */
1147 
1148     if (tmp == 0) /* word-aligned */
1149     {
1150         for (i = B_SIZE; i > 0; i--)
1151         {
1152             x1 = *((ULong*)prev); /* load a3 a2 a1 a0 */
1153             x2 = *((ULong*)(prev + lx)); /* load b3 b2 b1 b0, another line */
1154             y1 = *((ULong*)(prev += 4)); /* a7 a6 a5 a4 */
1155             y2 = *((ULong*)(prev + lx)); /* b7 b6 b5 b4 */
1156 
1157             x1m = (x1 >> 2) & mask; /* zero out last 2 bits */
1158             x2m = (x2 >> 2) & mask;
1159             x1 = x1 ^(x1m << 2);
1160             x2 = x2 ^(x2m << 2);
1161             x1m += x2m;
1162             x1 += x2;
1163 
1164             /* x2m, x2 free */
1165             y1m = (y1 >> 2) & mask; /* zero out last 2 bits */
1166             y2m = (y2 >> 2) & mask;
1167             y1 = y1 ^(y1m << 2);
1168             y2 = y2 ^(y2m << 2);
1169             y1m += y2m;
1170             y1 += y2;
1171 
1172             /* y2m, y2 free */
1173             /* x2m, x2 free */
1174             x2 = *((ULong*)(prev += 4)); /* a11 a10 a9 a8 */
1175             y2 = *((ULong*)(prev + lx)); /* b11 b10 b9 b8 */
1176             x2m = (x2 >> 2) & mask;
1177             y2m = (y2 >> 2) & mask;
1178             x2 = x2 ^(x2m << 2);
1179             y2 = y2 ^(y2m << 2);
1180             x2m += y2m;
1181             x2 += y2;
1182             /* y2m, y2 free */
1183 
1184             /* now operate on x1m, x1, y1m, y1, x2m, x2 */
1185             /* x1m = a3+b3, a2+b2, a1+b1, a0+b0 */
1186             /* y1m = a7+b7, a6+b6, a5+b5, a4+b4 */
1187             /* x2m = a11+b11, a10+b10, a9+b9, a8+b8 */
1188             /* x1, y1, x2 */
1189 
1190             y2m = x1m >> 8;
1191             y2 = x1 >> 8;
1192             y2m |= (y1m << 24);  /* a4+b4, a3+b3, a2+b2, a1+b1 */
1193             y2 |= (y1 << 24);
1194             x1m += y2m;  /* a3+b3+a4+b4, ....., a0+b0+a1+b1 */
1195             x1 += y2;
1196             x1 += rnd2;
1197             x1 &= (mask << 2);
1198             x1m += (x1 >> 2);
1199             *((ULong*)(rec += 4)) = x1m; /* save x1m */
1200 
1201             y2m = y1m >> 8;
1202             y2 = y1 >> 8;
1203             y2m |= (x2m << 24); /* a8+b8, a7+b7, a6+b6, a5+b5 */
1204             y2 |= (x2 << 24);
1205             y1m += y2m;  /* a7+b7+a8+b8, ....., a4+b4+a5+b5 */
1206             y1 += y2;
1207             y1 += rnd2;
1208             y1 &= (mask << 2);
1209             y1m += (y1 >> 2);
1210             *((ULong*)(rec += 4)) = y1m; /* save y1m */
1211 
1212             rec += 8;
1213             prev += offset;
1214         }
1215 
1216         return 1;
1217     }
1218     else if (tmp == 1)
1219     {
1220         prev--; /* to word-aligned */
1221         for (i = B_SIZE; i > 0; i--)
1222         {
1223             x1 = *((ULong*)prev); /* load a3 a2 a1 a0 */
1224             x2 = *((ULong*)(prev + lx)); /* load b3 b2 b1 b0, another line */
1225             y1 = *((ULong*)(prev += 4)); /* a7 a6 a5 a4 */
1226             y2 = *((ULong*)(prev + lx)); /* b7 b6 b5 b4 */
1227 
1228             x1m = (x1 >> 2) & mask; /* zero out last 2 bits */
1229             x2m = (x2 >> 2) & mask;
1230             x1 = x1 ^(x1m << 2);
1231             x2 = x2 ^(x2m << 2);
1232             x1m += x2m;
1233             x1 += x2;
1234 
1235             /* x2m, x2 free */
1236             y1m = (y1 >> 2) & mask; /* zero out last 2 bits */
1237             y2m = (y2 >> 2) & mask;
1238             y1 = y1 ^(y1m << 2);
1239             y2 = y2 ^(y2m << 2);
1240             y1m += y2m;
1241             y1 += y2;
1242 
1243             /* y2m, y2 free */
1244             /* x2m, x2 free */
1245             x2 = *((ULong*)(prev += 4)); /* a11 a10 a9 a8 */
1246             y2 = *((ULong*)(prev + lx)); /* b11 b10 b9 b8 */
1247             x2m = (x2 >> 2) & mask;
1248             y2m = (y2 >> 2) & mask;
1249             x2 = x2 ^(x2m << 2);
1250             y2 = y2 ^(y2m << 2);
1251             x2m += y2m;
1252             x2 += y2;
1253             /* y2m, y2 free */
1254 
1255             /* now operate on x1m, x1, y1m, y1, x2m, x2 */
1256             /* x1m = a3+b3, a2+b2, a1+b1, a0+b0 */
1257             /* y1m = a7+b7, a6+b6, a5+b5, a4+b4 */
1258             /* x2m = a11+b11, a10+b10, a9+b9, a8+b8 */
1259             /* x1, y1, x2 */
1260 
1261             x1m >>= 8 ;
1262             x1 >>= 8;
1263             x1m |= (y1m << 24);  /* a4+b4, a3+b3, a2+b2, a1+b1 */
1264             x1 |= (y1 << 24);
1265             y2m = (y1m << 16);
1266             y2 = (y1 << 16);
1267             y2m |= (x1m >> 8); /* a5+b5, a4+b4, a3+b3, a2+b2 */
1268             y2 |= (x1 >> 8);
1269             x1 += rnd2;
1270             x1m += y2m;  /* a4+b4+a5+b5, ....., a1+b1+a2+b2 */
1271             x1 += y2;
1272             x1 &= (mask << 2);
1273             x1m += (x1 >> 2);
1274             *((ULong*)(rec += 4)) = x1m; /* save x1m */
1275 
1276             y1m >>= 8;
1277             y1 >>= 8;
1278             y1m |= (x2m << 24); /* a8+b8, a7+b7, a6+b6, a5+b5 */
1279             y1 |= (x2 << 24);
1280             y2m = (x2m << 16);
1281             y2 = (x2 << 16);
1282             y2m |= (y1m >> 8); /*  a9+b9, a8+b8, a7+b7, a6+b6,*/
1283             y2 |= (y1 >> 8);
1284             y1 += rnd2;
1285             y1m += y2m;  /* a8+b8+a9+b9, ....., a5+b5+a6+b6 */
1286             y1 += y2;
1287             y1 &= (mask << 2);
1288             y1m += (y1 >> 2);
1289             *((ULong*)(rec += 4)) = y1m; /* save y1m */
1290 
1291             rec += 8;
1292             prev += offset;
1293         }
1294         return 1;
1295     }
1296     else if (tmp == 2)
1297     {
1298         prev -= 2; /* to word-aligned */
1299         for (i = B_SIZE; i > 0; i--)
1300         {
1301             x1 = *((ULong*)prev); /* load a3 a2 a1 a0 */
1302             x2 = *((ULong*)(prev + lx)); /* load b3 b2 b1 b0, another line */
1303             y1 = *((ULong*)(prev += 4)); /* a7 a6 a5 a4 */
1304             y2 = *((ULong*)(prev + lx)); /* b7 b6 b5 b4 */
1305 
1306             x1m = (x1 >> 2) & mask; /* zero out last 2 bits */
1307             x2m = (x2 >> 2) & mask;
1308             x1 = x1 ^(x1m << 2);
1309             x2 = x2 ^(x2m << 2);
1310             x1m += x2m;
1311             x1 += x2;
1312 
1313             /* x2m, x2 free */
1314             y1m = (y1 >> 2) & mask; /* zero out last 2 bits */
1315             y2m = (y2 >> 2) & mask;
1316             y1 = y1 ^(y1m << 2);
1317             y2 = y2 ^(y2m << 2);
1318             y1m += y2m;
1319             y1 += y2;
1320 
1321             /* y2m, y2 free */
1322             /* x2m, x2 free */
1323             x2 = *((ULong*)(prev += 4)); /* a11 a10 a9 a8 */
1324             y2 = *((ULong*)(prev + lx)); /* b11 b10 b9 b8 */
1325             x2m = (x2 >> 2) & mask;
1326             y2m = (y2 >> 2) & mask;
1327             x2 = x2 ^(x2m << 2);
1328             y2 = y2 ^(y2m << 2);
1329             x2m += y2m;
1330             x2 += y2;
1331             /* y2m, y2 free */
1332 
1333             /* now operate on x1m, x1, y1m, y1, x2m, x2 */
1334             /* x1m = a3+b3, a2+b2, a1+b1, a0+b0 */
1335             /* y1m = a7+b7, a6+b6, a5+b5, a4+b4 */
1336             /* x2m = a11+b11, a10+b10, a9+b9, a8+b8 */
1337             /* x1, y1, x2 */
1338 
1339             x1m >>= 16 ;
1340             x1 >>= 16;
1341             x1m |= (y1m << 16);  /* a5+b5, a4+b4, a3+b3, a2+b2 */
1342             x1 |= (y1 << 16);
1343             y2m = (y1m << 8);
1344             y2 = (y1 << 8);
1345             y2m |= (x1m >> 8); /* a6+b6, a5+b5, a4+b4, a3+b3 */
1346             y2 |= (x1 >> 8);
1347             x1 += rnd2;
1348             x1m += y2m;  /* a5+b5+a6+b6, ....., a2+b2+a3+b3 */
1349             x1 += y2;
1350             x1 &= (mask << 2);
1351             x1m += (x1 >> 2);
1352             *((ULong*)(rec += 4)) = x1m; /* save x1m */
1353 
1354             y1m >>= 16;
1355             y1 >>= 16;
1356             y1m |= (x2m << 16); /* a9+b9, a8+b8, a7+b7, a6+b6 */
1357             y1 |= (x2 << 16);
1358             y2m = (x2m << 8);
1359             y2 = (x2 << 8);
1360             y2m |= (y1m >> 8); /*  a10+b10, a9+b9, a8+b8, a7+b7,*/
1361             y2 |= (y1 >> 8);
1362             y1 += rnd2;
1363             y1m += y2m;  /* a9+b9+a10+b10, ....., a6+b6+a7+b7 */
1364             y1 += y2;
1365             y1 &= (mask << 2);
1366             y1m += (y1 >> 2);
1367             *((ULong*)(rec += 4)) = y1m; /* save y1m */
1368 
1369             rec += 8;
1370             prev += offset;
1371         }
1372         return 1;
1373     }
1374     else /* tmp == 3 */
1375     {
1376         prev -= 3; /* to word-aligned */
1377         for (i = B_SIZE; i > 0; i--)
1378         {
1379             x1 = *((ULong*)prev); /* load a3 a2 a1 a0 */
1380             x2 = *((ULong*)(prev + lx)); /* load b3 b2 b1 b0, another line */
1381             y1 = *((ULong*)(prev += 4)); /* a7 a6 a5 a4 */
1382             y2 = *((ULong*)(prev + lx)); /* b7 b6 b5 b4 */
1383 
1384             x1m = (x1 >> 2) & mask; /* zero out last 2 bits */
1385             x2m = (x2 >> 2) & mask;
1386             x1 = x1 ^(x1m << 2);
1387             x2 = x2 ^(x2m << 2);
1388             x1m += x2m;
1389             x1 += x2;
1390 
1391             /* x2m, x2 free */
1392             y1m = (y1 >> 2) & mask; /* zero out last 2 bits */
1393             y2m = (y2 >> 2) & mask;
1394             y1 = y1 ^(y1m << 2);
1395             y2 = y2 ^(y2m << 2);
1396             y1m += y2m;
1397             y1 += y2;
1398 
1399             /* y2m, y2 free */
1400             /* x2m, x2 free */
1401             x2 = *((ULong*)(prev += 4)); /* a11 a10 a9 a8 */
1402             y2 = *((ULong*)(prev + lx)); /* b11 b10 b9 b8 */
1403             x2m = (x2 >> 2) & mask;
1404             y2m = (y2 >> 2) & mask;
1405             x2 = x2 ^(x2m << 2);
1406             y2 = y2 ^(y2m << 2);
1407             x2m += y2m;
1408             x2 += y2;
1409             /* y2m, y2 free */
1410 
1411             /* now operate on x1m, x1, y1m, y1, x2m, x2 */
1412             /* x1m = a3+b3, a2+b2, a1+b1, a0+b0 */
1413             /* y1m = a7+b7, a6+b6, a5+b5, a4+b4 */
1414             /* x2m = a11+b11, a10+b10, a9+b9, a8+b8 */
1415             /* x1, y1, x2 */
1416 
1417             x1m >>= 24 ;
1418             x1 >>= 24;
1419             x1m |= (y1m << 8);  /* a6+b6, a5+b5, a4+b4, a3+b3 */
1420             x1 |= (y1 << 8);
1421 
1422             x1m += y1m;  /* a6+b6+a7+b7, ....., a3+b3+a4+b4 */
1423             x1 += y1;
1424             x1 += rnd2;
1425             x1 &= (mask << 2);
1426             x1m += (x1 >> 2);
1427             *((ULong*)(rec += 4)) = x1m; /* save x1m */
1428 
1429             y1m >>= 24;
1430             y1 >>= 24;
1431             y1m |= (x2m << 8); /* a10+b10, a9+b9, a8+b8, a7+b7 */
1432             y1 |= (x2 << 8);
1433             y1m += x2m;  /* a10+b10+a11+b11, ....., a7+b7+a8+b8 */
1434             y1 += x2;
1435             y1 += rnd2;
1436             y1 &= (mask << 2);
1437             y1m += (y1 >> 2);
1438             *((ULong*)(rec += 4)) = y1m; /* save y1m */
1439 
1440             rec += 8;
1441             prev += offset;
1442         }
1443         return 1;
1444     }
1445 }
1446 
1447 
1448 /*=============================================================================
1449     Function:   EncGetPredOutside
1450     Date:       04/17/2001
1451     Purpose:    - modified from GetPredOutside in the decoder.
1452     Modified:    09/24/05
1453                 use the existing non-initialized padded region
1454 =============================================================================*/
1455 // not really needed since padding is included
1456 #define PAD_CORNER  { temp = *src; \
1457                      temp |= (temp<<8); \
1458                      temp |= (temp<<16); \
1459                      *((ULong*)dst) = temp; \
1460                      *((ULong*)(dst+4)) = temp; \
1461                      *((ULong*)(dst+=lx)) = temp; \
1462                      *((ULong*)(dst+4)) = temp; \
1463                      *((ULong*)(dst+=lx)) = temp; \
1464                      *((ULong*)(dst+4)) = temp; \
1465                      *((ULong*)(dst+=lx)) = temp; \
1466                      *((ULong*)(dst+4)) = temp; \
1467                      *((ULong*)(dst+=lx)) = temp; \
1468                      *((ULong*)(dst+4)) = temp; \
1469                      *((ULong*)(dst+=lx)) = temp; \
1470                      *((ULong*)(dst+4)) = temp; \
1471                      *((ULong*)(dst+=lx)) = temp; \
1472                      *((ULong*)(dst+4)) = temp; \
1473                      *((ULong*)(dst+=lx)) = temp; \
1474                      *((ULong*)(dst+4)) = temp; }
1475 
1476 #define PAD_ROW     { temp = *((ULong*)src); \
1477                       temp2 = *((ULong*)(src+4)); \
1478                       *((ULong*)dst) = temp; \
1479                       *((ULong*)(dst+4)) = temp2; \
1480                       *((ULong*)(dst+=lx)) = temp; \
1481                       *((ULong*)(dst+4)) = temp2; \
1482                       *((ULong*)(dst+=lx)) = temp; \
1483                       *((ULong*)(dst+4)) = temp2; \
1484                       *((ULong*)(dst+=lx)) = temp; \
1485                       *((ULong*)(dst+4)) = temp2; \
1486                       *((ULong*)(dst+=lx)) = temp; \
1487                       *((ULong*)(dst+4)) = temp2; \
1488                       *((ULong*)(dst+=lx)) = temp; \
1489                       *((ULong*)(dst+4)) = temp2; \
1490                       *((ULong*)(dst+=lx)) = temp; \
1491                       *((ULong*)(dst+4)) = temp2; \
1492                       *((ULong*)(dst+=lx)) = temp; \
1493                       *((ULong*)(dst+4)) = temp2; }
1494 
1495 #define PAD_COL     { temp = *src;   temp |= (temp<<8);  temp |= (temp<<16); \
1496                       *((ULong*)dst) = temp; \
1497                      *((ULong*)(dst+4)) = temp; \
1498                       temp = *(src+=lx);     temp |= (temp<<8);  temp |= (temp<<16); \
1499                       *((ULong*)(dst+=lx)) = temp; \
1500                      *((ULong*)(dst+4)) = temp; \
1501                       temp = *(src+=lx);     temp |= (temp<<8);  temp |= (temp<<16); \
1502                       *((ULong*)(dst+=lx)) = temp; \
1503                      *((ULong*)(dst+4)) = temp; \
1504                       temp = *(src+=lx);     temp |= (temp<<8);  temp |= (temp<<16); \
1505                       *((ULong*)(dst+=lx)) = temp; \
1506                      *((ULong*)(dst+4)) = temp; \
1507                       temp = *(src+=lx);     temp |= (temp<<8);  temp |= (temp<<16); \
1508                       *((ULong*)(dst+=lx)) = temp; \
1509                      *((ULong*)(dst+4)) = temp; \
1510                       temp = *(src+=lx);     temp |= (temp<<8);  temp |= (temp<<16); \
1511                       *((ULong*)(dst+=lx)) = temp; \
1512                      *((ULong*)(dst+4)) = temp; \
1513                       temp = *(src+=lx);     temp |= (temp<<8);  temp |= (temp<<16); \
1514                       *((ULong*)(dst+=lx)) = temp; \
1515                      *((ULong*)(dst+4)) = temp; \
1516                       temp = *(src+=lx);     temp |= (temp<<8);  temp |= (temp<<16); \
1517                       *((ULong*)(dst+=lx)) = temp; \
1518                       *((ULong*)(dst+4)) = temp; }
1519 
1520 
EncGetPredOutside(Int xpos,Int ypos,UChar * c_prev,UChar * rec,Int width,Int height,Int rnd1)1521 Int EncGetPredOutside(Int xpos, Int ypos, UChar *c_prev, UChar *rec,
1522                       Int width, Int height, Int rnd1)
1523 {
1524     Int lx;
1525     UChar *src, *dst;
1526     ULong temp, temp2;
1527     Int xoffset;
1528 
1529     lx = width + 16; /* only works for chroma */
1530 
1531     if (xpos < 0)
1532     {
1533         if (ypos < 0) /* pad top-left */
1534         {
1535             /* pad corner */
1536             src = c_prev;
1537             dst = c_prev - (lx << 3) - 8;
1538             PAD_CORNER
1539 
1540             /* pad top */
1541             dst = c_prev - (lx << 3);
1542             PAD_ROW
1543 
1544             /* pad left */
1545             dst = c_prev - 8;
1546             PAD_COL
1547 
1548             GetPredAdvBTable[ypos&1][xpos&1](c_prev + (xpos >> 1) + ((ypos >> 1)*lx),
1549                                              rec, lx, rnd1);
1550 
1551             return 1;
1552         }
1553         else if ((ypos >> 1) < (height - 8)) /* pad left of frame */
1554         {
1555             /* pad left */
1556             src = c_prev + (ypos >> 1) * lx;
1557             dst = src - 8;
1558             PAD_COL
1559             /* pad extra row */
1560             temp = *(src += lx);
1561             temp |= (temp << 8);
1562             temp |= (temp << 16);
1563             *((ULong*)(dst += lx)) = temp;
1564             *((ULong*)(dst + 4)) = temp;
1565 
1566             GetPredAdvBTable[ypos&1][xpos&1](c_prev + (xpos >> 1) + ((ypos >> 1)*lx),
1567                                              rec, lx, rnd1);
1568 
1569             return 1;
1570         }
1571         else /* pad bottom-left */
1572         {
1573             /* pad corner */
1574             src = c_prev + (height - 1) * lx;
1575             dst = src + lx - 8;
1576             PAD_CORNER
1577 
1578             /* pad bottom */
1579             dst = src + lx;
1580             PAD_ROW
1581 
1582             /* pad left */
1583             src -= (lx << 3);
1584             src += lx;
1585             dst = src - 8;
1586             PAD_COL
1587 
1588             GetPredAdvBTable[ypos&1][xpos&1](c_prev + (xpos >> 1) + ((ypos >> 1)*lx),
1589                                              rec, lx, rnd1);
1590 
1591             return 1;
1592         }
1593     }
1594     else if ((xpos >> 1) < (width - 8))
1595     {
1596         if (ypos < 0) /* pad top of frame */
1597         {
1598             xoffset = (xpos >> 1) & 0x3;
1599             src = c_prev + (xpos >> 1) - xoffset;
1600             dst = src - (lx << 3);
1601             PAD_ROW
1602             if (xoffset || (xpos&1))
1603             {
1604                 temp = *((ULong*)(src + 8));
1605                 dst = src - (lx << 3) + 8;
1606                 *((ULong*)dst) = temp;
1607                 *((ULong*)(dst += lx)) = temp;
1608                 *((ULong*)(dst += lx)) = temp;
1609                 *((ULong*)(dst += lx)) = temp;
1610                 *((ULong*)(dst += lx)) = temp;
1611                 *((ULong*)(dst += lx)) = temp;
1612                 *((ULong*)(dst += lx)) = temp;
1613                 *((ULong*)(dst += lx)) = temp;
1614             }
1615 
1616             GetPredAdvBTable[ypos&1][xpos&1](c_prev + (xpos >> 1) + ((ypos >> 1)*lx),
1617                                              rec, lx, rnd1);
1618 
1619             return 1;
1620         }
1621         else /* pad bottom of frame */
1622         {
1623             xoffset = (xpos >> 1) & 0x3;
1624             src = c_prev + (xpos >> 1) - xoffset + (height - 1) * lx;
1625             dst = src + lx;
1626             PAD_ROW
1627             if (xoffset || (xpos&1))
1628             {
1629                 temp = *((ULong*)(src + 8));
1630                 dst = src + lx + 8;
1631                 *((ULong*)dst) = temp;
1632                 *((ULong*)(dst += lx)) = temp;
1633                 *((ULong*)(dst += lx)) = temp;
1634                 *((ULong*)(dst += lx)) = temp;
1635                 *((ULong*)(dst += lx)) = temp;
1636                 *((ULong*)(dst += lx)) = temp;
1637                 *((ULong*)(dst += lx)) = temp;
1638                 *((ULong*)(dst += lx)) = temp;
1639             }
1640 
1641             GetPredAdvBTable[ypos&1][xpos&1](c_prev + (xpos >> 1) + ((ypos >> 1)*lx),
1642                                              rec, lx, rnd1);
1643 
1644             return 1;
1645         }
1646     }
1647     else
1648     {
1649         if (ypos < 0) /* pad top-right */
1650         {
1651             /* pad corner */
1652             src = c_prev + width - 1;
1653             dst = src - (lx << 3) + 1;
1654             PAD_CORNER
1655 
1656             /* pad top */
1657             src -= 7;
1658             dst = src - (lx << 3);
1659             PAD_ROW
1660 
1661             /* pad left */
1662             src += 7;
1663             dst = src + 1;
1664             PAD_COL
1665 
1666             GetPredAdvBTable[ypos&1][xpos&1](c_prev + (xpos >> 1) + ((ypos >> 1)*lx),
1667                                              rec, lx, rnd1);
1668 
1669             return 1;
1670         }
1671         else if ((ypos >> 1) < (height - B_SIZE)) /* pad right of frame */
1672         {
1673             /* pad left */
1674             src = c_prev + (ypos >> 1) * lx + width - 1;
1675             dst = src + 1;
1676             PAD_COL
1677             /* pad extra row */
1678             temp = *(src += lx);
1679             temp |= (temp << 8);
1680             temp |= (temp << 16);
1681             *((ULong*)(dst += lx)) = temp;
1682             *((ULong*)(dst + 4)) = temp;
1683 
1684             GetPredAdvBTable[ypos&1][xpos&1](c_prev + (xpos >> 1) + ((ypos >> 1)*lx),
1685                                              rec, lx, rnd1);
1686 
1687             return 1;
1688         }
1689         else /* pad bottom-right */
1690         {
1691             /* pad left */
1692             src = c_prev + (height - 8) * lx + width - 1;
1693             dst = src + 1;
1694             PAD_COL
1695 
1696             /* pad corner */
1697             dst = src + lx + 1;
1698             PAD_CORNER
1699 
1700             /* pad bottom */
1701             src -= 7;
1702             dst = src + lx;
1703             PAD_ROW
1704 
1705             GetPredAdvBTable[ypos&1][xpos&1](c_prev + (xpos >> 1) + ((ypos >> 1)*lx),
1706                                              rec, lx, rnd1);
1707 
1708             return 1;
1709         }
1710     }
1711 }
1712 
1713 /* ====================================================================== /
1714     Function : Copy_MB_from_Vop()
1715     Date     : 04/17/2001
1716  ====================================================================== */
1717 
Copy_MB_from_Vop(UChar * comp,Int yChan[][NCOEFF_BLOCK],Int pitch)1718 void Copy_MB_from_Vop(UChar *comp, Int yChan[][NCOEFF_BLOCK], Int pitch)
1719 {
1720     Int row, col, i;
1721     Int *src1, *src2;
1722     Int offset = pitch - MB_SIZE;
1723     ULong temp;
1724 
1725     for (i = 0; i < 4; i += 2)
1726     {
1727         src1 = yChan[i];
1728         src2 = yChan[i+1];
1729 
1730         row = B_SIZE;
1731         while (row--)
1732         {
1733             col = B_SIZE;
1734             while (col)
1735             {
1736                 temp = *((ULong*)comp);
1737                 *src1++ = (Int)(temp & 0xFF);
1738                 *src1++ = (Int)((temp >> 8) & 0xFF);
1739                 *src1++ = (Int)((temp >> 16) & 0xFF);
1740                 *src1++ = (Int)((temp >> 24) & 0xFF);
1741                 comp += 4;
1742                 col -= 4;
1743             }
1744             col = B_SIZE;
1745             while (col)
1746             {
1747                 temp = *((ULong*)comp);
1748                 *src2++ = (Int)(temp & 0xFF);
1749                 *src2++ = (Int)((temp >> 8) & 0xFF);
1750                 *src2++ = (Int)((temp >> 16) & 0xFF);
1751                 *src2++ = (Int)((temp >> 24) & 0xFF);
1752                 comp += 4;
1753                 col -= 4;
1754             }
1755             comp += offset;
1756         }
1757     }
1758     return ;
1759 }
1760 
1761 /* ====================================================================== /
1762     Function : Copy_B_from_Vop()
1763     Date     : 04/17/2001
1764 / ====================================================================== */
1765 
Copy_B_from_Vop(UChar * comp,Int cChan[],Int pitch)1766 void Copy_B_from_Vop(UChar *comp, Int cChan[], Int pitch)
1767 {
1768     Int row, col;
1769     Int offset = pitch - B_SIZE;
1770     ULong temp;
1771 
1772     row = B_SIZE;
1773     while (row--)
1774     {
1775         col = B_SIZE;
1776         while (col)
1777         {
1778             temp = *((ULong*)comp);
1779             *cChan++ = (Int)(temp & 0xFF);
1780             *cChan++ = (Int)((temp >> 8) & 0xFF);
1781             *cChan++ = (Int)((temp >> 16) & 0xFF);
1782             *cChan++ = (Int)((temp >> 24) & 0xFF);
1783             comp += 4;
1784             col -= 4;
1785         }
1786         comp += offset;
1787     }
1788 }
1789 
1790 /* ====================================================================== /
1791     Function : Copy_MB_into_Vop()
1792     Date     : 04/17/2001
1793     History  : From decoder
1794 / ====================================================================== */
1795 
Copy_MB_into_Vop(UChar * comp,Int yChan[][NCOEFF_BLOCK],Int pitch)1796 void Copy_MB_into_Vop(UChar *comp, Int yChan[][NCOEFF_BLOCK], Int pitch)
1797 {
1798     Int row, col, i;
1799     Int *src1, *src2;
1800     Int offset = pitch - MB_SIZE;
1801     UChar mask = 0xFF;
1802     Int tmp;
1803     ULong temp;
1804 
1805     for (i = 0; i < 4; i += 2)
1806     {
1807         src1 = yChan[i];
1808         src2 = yChan[i+1];
1809 
1810         row = B_SIZE;
1811         while (row--)
1812         {
1813             col = B_SIZE;
1814             while (col)
1815             {
1816                 tmp = (*src1++);
1817                 if ((UInt)tmp > mask) tmp = mask & (~(tmp >> 31));
1818                 temp = tmp << 24;
1819                 tmp = (*src1++);
1820                 if ((UInt)tmp > mask) tmp = mask & (~(tmp >> 31));
1821                 temp |= (tmp << 16);
1822                 tmp = (*src1++);
1823                 if ((UInt)tmp > mask) tmp = mask & (~(tmp >> 31));
1824                 temp |= (tmp << 8);
1825                 tmp = (*src1++);
1826                 if ((UInt)tmp > mask) tmp = mask & (~(tmp >> 31));
1827                 temp |= tmp;
1828                 *((ULong*)comp) = temp;
1829                 comp += 4;
1830                 col -= 4;
1831             }
1832             col = B_SIZE;
1833             while (col)
1834             {
1835                 tmp = (*src2++);
1836                 if ((UInt)tmp > mask) tmp = mask & (~(tmp >> 31));
1837                 temp = tmp << 24;
1838                 tmp = (*src2++);
1839                 if ((UInt)tmp > mask) tmp = mask & (~(tmp >> 31));
1840                 temp |= (tmp << 16);
1841                 tmp = (*src2++);
1842                 if ((UInt)tmp > mask) tmp = mask & (~(tmp >> 31));
1843                 temp |= (tmp << 8);
1844                 tmp = (*src2++);
1845                 if ((UInt)tmp > mask) tmp = mask & (~(tmp >> 31));
1846                 temp |= tmp;
1847                 *((ULong*)comp) = temp;
1848                 comp += 4;
1849                 col -= 4;
1850             }
1851             comp += offset;
1852         }
1853     }
1854     return ;
1855 }
1856 
1857 
1858 /* ====================================================================== /
1859     Function : Copy_B_into_Vop()
1860     Date     : 04/17/2001
1861     History  : From decoder
1862 / ====================================================================== */
1863 
Copy_B_into_Vop(UChar * comp,Int cChan[],Int pitch)1864 void Copy_B_into_Vop(UChar *comp, Int cChan[], Int pitch)
1865 {
1866     Int row, col;
1867     Int offset = pitch - B_SIZE;
1868     Int tmp;
1869     UChar mask = 0xFF;
1870     ULong temp;
1871 
1872     row = B_SIZE;
1873     while (row--)
1874     {
1875         col = B_SIZE;
1876         while (col)
1877         {
1878             tmp = (*cChan++);
1879             if ((UInt)tmp > mask) tmp = mask & (~(tmp >> 31));
1880             temp = tmp << 24;
1881             tmp = (*cChan++);
1882             if ((UInt)tmp > mask) tmp = mask & (~(tmp >> 31));
1883             temp |= (tmp << 16);
1884             tmp = (*cChan++);
1885             if ((UInt)tmp > mask) tmp = mask & (~(tmp >> 31));
1886             temp |= (tmp << 8);
1887             tmp = (*cChan++);
1888             if ((UInt)tmp > mask) tmp = mask & (~(tmp >> 31));
1889             temp |= tmp;
1890             *((ULong*)comp) = temp;
1891             comp += 4;
1892             col -= 4;
1893         }
1894         comp += offset;
1895     }
1896 }
1897 
1898 /* ======================================================================== */
1899 /*  Function : get_MB( )                                                    */
1900 /*  Date     : 10/03/2000                                                   */
1901 /*  Purpose  : Copy 4 Y to reference frame                                  */
1902 /*  In/out   :                                                              */
1903 /*  Return   :                                                              */
1904 /*  Modified :                                                              */
1905 /* ======================================================================== */
get_MB(UChar * c_prev,UChar * c_prev_u,UChar * c_prev_v,Short mb[6][64],Int lx,Int lx_uv)1906 void get_MB(UChar *c_prev, UChar *c_prev_u  , UChar *c_prev_v,
1907             Short mb[6][64], Int lx, Int lx_uv)
1908 
1909 {
1910     Int i, j, count = 0, count1 = 0;
1911     Int k1 = lx - MB_SIZE, k2 = lx_uv - B_SIZE;
1912 
1913     for (i = 0; i < B_SIZE; i++)
1914     {
1915         for (j = 0; j < B_SIZE; j++)
1916         {
1917             mb[0][count] = (Int)(*c_prev++);
1918             mb[4][count] = (Int)(*c_prev_u++);
1919             mb[5][count++] = (Int)(*c_prev_v++);
1920         }
1921 
1922         for (j = 0; j < B_SIZE; j++)
1923             mb[1][count1++] = (Int)(*c_prev++);
1924 
1925         c_prev += k1;
1926         c_prev_u += k2;
1927         c_prev_v += k2;
1928 
1929 
1930     }
1931 
1932     count = count1 = 0;
1933     for (i = 0; i < B_SIZE; i++)
1934     {
1935         for (j = 0; j < B_SIZE; j++)
1936             mb[2][count++] = (Int)(*c_prev++);
1937 
1938         for (j = 0; j < B_SIZE; j++)
1939             mb[3][count1++] = (Int)(*c_prev++);
1940 
1941         c_prev += k1;
1942     }
1943 }
1944 
PutSkippedBlock(UChar * rec,UChar * prev,Int lx)1945 void PutSkippedBlock(UChar *rec, UChar *prev, Int lx)
1946 {
1947     UChar *end;
1948     Int offset = (lx - 8) >> 2;
1949     Int *src, *dst;
1950 
1951     dst = (Int*)rec;
1952     src = (Int*)prev;
1953 
1954     end = prev + (lx << 3);
1955 
1956     do
1957     {
1958         *dst++ = *src++;
1959         *dst++ = *src++;
1960         dst += offset;
1961         src += offset;
1962     }
1963     while ((UInt)src < (UInt)end);
1964 
1965     return ;
1966 }
1967