• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* ------------------------------------------------------------------
2  * Copyright (C) 1998-2009 PacketVideo
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13  * express or implied.
14  * See the License for the specific language governing permissions
15  * and limitations under the License.
16  * -------------------------------------------------------------------
17  */
18 #include "oscl_mem.h"
19 #include "avcenc_lib.h"
20 
21 #define MIN_GOP     1   /* minimum size of GOP, 1/23/01, need to be tested */
22 
23 #define DEFAULT_REF_IDX     0  /* always from the first frame in the reflist */
24 
25 #define ALL_CAND_EQUAL  10  /*  any number greater than 5 will work */
26 
27 
28 /* from TMN 3.2 */
29 #define PREF_NULL_VEC 129   /* zero vector bias */
30 #define PREF_16_VEC 129     /* 1MV bias versus 4MVs*/
31 #define PREF_INTRA  3024//512       /* bias for INTRA coding */
32 
33 const static int tab_exclude[9][9] =  // [last_loc][curr_loc]
34 {
35     {0, 0, 0, 0, 0, 0, 0, 0, 0},
36     {0, 0, 0, 0, 1, 1, 1, 0, 0},
37     {0, 0, 0, 0, 1, 1, 1, 1, 1},
38     {0, 0, 0, 0, 0, 0, 1, 1, 1},
39     {0, 1, 1, 0, 0, 0, 1, 1, 1},
40     {0, 1, 1, 0, 0, 0, 0, 0, 1},
41     {0, 1, 1, 1, 1, 0, 0, 0, 1},
42     {0, 0, 1, 1, 1, 0, 0, 0, 0},
43     {0, 0, 1, 1, 1, 1, 1, 0, 0}
44 }; //to decide whether to continue or compute
45 
46 const static int refine_next[8][2] =    /* [curr_k][increment] */
47 {
48     {0, 0}, {2, 0}, {1, 1}, {0, 2}, { -1, 1}, { -2, 0}, { -1, -1}, {0, -2}
49 };
50 
51 #ifdef _SAD_STAT
52 uint32 num_MB = 0;
53 uint32 num_cand = 0;
54 #endif
55 
56 /************************************************************************/
57 #define TH_INTER_2  100  /* temporary for now */
58 
59 //#define FIXED_INTERPRED_MODE  AVC_P16
60 #define FIXED_REF_IDX   0
61 #define FIXED_MVX 0
62 #define FIXED_MVY 0
63 
64 // only use when AVC_P8 or AVC_P8ref0
65 #define FIXED_SUBMB_MODE    AVC_4x4
66 /*************************************************************************/
67 
68 /* Initialize arrays necessary for motion search */
InitMotionSearchModule(AVCHandle * avcHandle)69 AVCEnc_Status InitMotionSearchModule(AVCHandle *avcHandle)
70 {
71     AVCEncObject *encvid = (AVCEncObject*) avcHandle->AVCObject;
72     AVCRateControl *rateCtrl = encvid->rateCtrl;
73     int search_range = rateCtrl->mvRange;
74     int number_of_subpel_positions = 4 * (2 * search_range + 3);
75     int max_mv_bits, max_mvd;
76     int temp_bits = 0;
77     uint8 *mvbits;
78     int bits, imax, imin, i;
79     uint8* subpel_pred = (uint8*) encvid->subpel_pred; // all 16 sub-pel positions
80 
81 
82     while (number_of_subpel_positions > 0)
83     {
84         temp_bits++;
85         number_of_subpel_positions >>= 1;
86     }
87 
88     max_mv_bits = 3 + 2 * temp_bits;
89     max_mvd  = (1 << (max_mv_bits >> 1)) - 1;
90 
91     encvid->mvbits_array = (uint8*) avcHandle->CBAVC_Malloc(encvid->avcHandle->userData,
92                            sizeof(uint8) * (2 * max_mvd + 1), DEFAULT_ATTR);
93 
94     if (encvid->mvbits_array == NULL)
95     {
96         return AVCENC_MEMORY_FAIL;
97     }
98 
99     mvbits = encvid->mvbits  = encvid->mvbits_array + max_mvd;
100 
101     mvbits[0] = 1;
102     for (bits = 3; bits <= max_mv_bits; bits += 2)
103     {
104         imax = 1    << (bits >> 1);
105         imin = imax >> 1;
106 
107         for (i = imin; i < imax; i++)   mvbits[-i] = mvbits[i] = bits;
108     }
109 
110     /* initialize half-pel search */
111     encvid->hpel_cand[0] = subpel_pred + REF_CENTER;
112     encvid->hpel_cand[1] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 1 ;
113     encvid->hpel_cand[2] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 1;
114     encvid->hpel_cand[3] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 25;
115     encvid->hpel_cand[4] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 25;
116     encvid->hpel_cand[5] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 25;
117     encvid->hpel_cand[6] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24;
118     encvid->hpel_cand[7] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24;
119     encvid->hpel_cand[8] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE;
120 
121     /* For quarter-pel interpolation around best half-pel result */
122 
123     encvid->bilin_base[0][0] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE;
124     encvid->bilin_base[0][1] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 1;
125     encvid->bilin_base[0][2] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24;
126     encvid->bilin_base[0][3] = subpel_pred + REF_CENTER;
127 
128 
129     encvid->bilin_base[1][0] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE;
130     encvid->bilin_base[1][1] = subpel_pred + REF_CENTER - 24;
131     encvid->bilin_base[1][2] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE;
132     encvid->bilin_base[1][3] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 1;
133 
134     encvid->bilin_base[2][0] = subpel_pred + REF_CENTER - 24;
135     encvid->bilin_base[2][1] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 1;
136     encvid->bilin_base[2][2] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 1;
137     encvid->bilin_base[2][3] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 1;
138 
139     encvid->bilin_base[3][0] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 1;
140     encvid->bilin_base[3][1] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 1;
141     encvid->bilin_base[3][2] = subpel_pred + REF_CENTER;
142     encvid->bilin_base[3][3] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 25;
143 
144     encvid->bilin_base[4][0] = subpel_pred + REF_CENTER;
145     encvid->bilin_base[4][1] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 25;
146     encvid->bilin_base[4][2] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 25;
147     encvid->bilin_base[4][3] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 25;
148 
149     encvid->bilin_base[5][0] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24;
150     encvid->bilin_base[5][1] = subpel_pred + REF_CENTER;
151     encvid->bilin_base[5][2] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24;
152     encvid->bilin_base[5][3] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 25;
153 
154     encvid->bilin_base[6][0] = subpel_pred + REF_CENTER - 1;
155     encvid->bilin_base[6][1] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24;
156     encvid->bilin_base[6][2] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 24;
157     encvid->bilin_base[6][3] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24;
158 
159     encvid->bilin_base[7][0] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE;
160     encvid->bilin_base[7][1] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE;
161     encvid->bilin_base[7][2] = subpel_pred + REF_CENTER - 1;
162     encvid->bilin_base[7][3] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24;
163 
164     encvid->bilin_base[8][0] = subpel_pred + REF_CENTER - 25;
165     encvid->bilin_base[8][1] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE;
166     encvid->bilin_base[8][2] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE;
167     encvid->bilin_base[8][3] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE;
168 
169 
170     return AVCENC_SUCCESS;
171 }
172 
173 /* Clean-up memory */
CleanMotionSearchModule(AVCHandle * avcHandle)174 void CleanMotionSearchModule(AVCHandle *avcHandle)
175 {
176     AVCEncObject *encvid = (AVCEncObject*) avcHandle->AVCObject;
177 
178     if (encvid->mvbits_array)
179     {
180         avcHandle->CBAVC_Free(avcHandle->userData, (int)(encvid->mvbits_array));
181         encvid->mvbits = NULL;
182     }
183 
184     return ;
185 }
186 
187 
IntraDecisionABE(int * min_cost,uint8 * cur,int pitch,bool ave)188 bool IntraDecisionABE(int *min_cost, uint8 *cur, int pitch, bool ave)
189 {
190     int j;
191     uint8 *out;
192     int temp, SBE;
193     OsclFloat ABE;
194     bool intra = true;
195 
196     SBE = 0;
197     /* top neighbor */
198     out = cur - pitch;
199     for (j = 0; j < 16; j++)
200     {
201         temp = out[j] - cur[j];
202         SBE += ((temp >= 0) ? temp : -temp);
203     }
204 
205     /* left neighbor */
206     out = cur - 1;
207     out -= pitch;
208     cur -= pitch;
209     for (j = 0; j < 16; j++)
210     {
211         temp = *(out += pitch) - *(cur += pitch);
212         SBE += ((temp >= 0) ? temp : -temp);
213     }
214 
215     /* compare mincost/384 and SBE/64 */
216     ABE = SBE / 32.0; //ABE = SBE/64.0; //
217     if (ABE >= *min_cost / 256.0) //if( ABE*0.8 >= min_cost/384.0) //
218     {
219         intra = false; // no possibility of intra, just use inter
220     }
221     else
222     {
223         if (ave == true)
224         {
225             *min_cost = (*min_cost + (int)(SBE * 8)) >> 1; // possibility of intra, averaging the cost
226         }
227         else
228         {
229             *min_cost = (int)(SBE * 8);
230         }
231     }
232 
233     return intra;
234 }
235 
236 /******* main function for macroblock prediction for the entire frame ***/
237 /* if turns out to be IDR frame, set video->nal_unit_type to AVC_NALTYPE_IDR */
AVCMotionEstimation(AVCEncObject * encvid)238 void AVCMotionEstimation(AVCEncObject *encvid)
239 {
240     AVCCommonObj *video = encvid->common;
241     int slice_type = video->slice_type;
242     AVCFrameIO *currInput = encvid->currInput;
243     AVCPictureData *refPic = video->RefPicList0[0];
244     int i, j, k;
245     int mbwidth = video->PicWidthInMbs;
246     int mbheight = video->PicHeightInMbs;
247     int totalMB = video->PicSizeInMbs;
248     int pitch = currInput->pitch;
249     AVCMacroblock *currMB, *mblock = video->mblock;
250     AVCMV *mot_mb_16x16, *mot16x16 = encvid->mot16x16;
251     // AVCMV *mot_mb_16x8, *mot_mb_8x16, *mot_mb_8x8, etc;
252     AVCRateControl *rateCtrl = encvid->rateCtrl;
253     uint8 *intraSearch = encvid->intraSearch;
254     uint FS_en = encvid->fullsearch_enable;
255 
256     int NumIntraSearch, start_i, numLoop, incr_i;
257     int mbnum, offset;
258     uint8 *cur, *best_cand[5];
259     int totalSAD = 0;   /* average SAD for rate control */
260     int type_pred;
261     int abe_cost;
262 
263 #ifdef HTFM
264     /***** HYPOTHESIS TESTING ********/  /* 2/28/01 */
265     int collect = 0;
266     HTFM_Stat htfm_stat;
267     double newvar[16];
268     double exp_lamda[15];
269     /*********************************/
270 #endif
271     int hp_guess = 0;
272     uint32 mv_uint32;
273 
274     offset = 0;
275 
276     if (slice_type == AVC_I_SLICE)
277     {
278         /* cannot do I16 prediction here because it needs full decoding. */
279         for (i = 0; i < totalMB; i++)
280         {
281             encvid->min_cost[i] = 0x7FFFFFFF;  /* max value for int */
282         }
283 
284         oscl_memset(intraSearch, 1, sizeof(uint8)*totalMB);
285 
286         encvid->firstIntraRefreshMBIndx = 0; /* reset this */
287 
288         return ;
289     }
290     else   // P_SLICE
291     {
292         for (i = 0; i < totalMB; i++)
293         {
294             mblock[i].mb_intra = 0;
295         }
296         oscl_memset(intraSearch, 1, sizeof(uint8)*totalMB);
297     }
298 
299     if (refPic->padded == 0)
300     {
301         AVCPaddingEdge(refPic);
302         refPic->padded = 1;
303     }
304     /* Random INTRA update */
305     if (rateCtrl->intraMBRate)
306     {
307         AVCRasterIntraUpdate(encvid, mblock, totalMB, rateCtrl->intraMBRate);
308     }
309 
310     encvid->sad_extra_info = NULL;
311 #ifdef HTFM
312     /***** HYPOTHESIS TESTING ********/
313     InitHTFM(video, &htfm_stat, newvar, &collect);
314     /*********************************/
315 #endif
316 
317     if ((rateCtrl->scdEnable == 1)
318             && ((rateCtrl->frame_rate < 5.0) || (video->sliceHdr->frame_num > MIN_GOP)))
319         /* do not try to detect a new scene if low frame rate and too close to previous I-frame */
320     {
321         incr_i = 2;
322         numLoop = 2;
323         start_i = 1;
324         type_pred = 0; /* for initial candidate selection */
325     }
326     else
327     {
328         incr_i = 1;
329         numLoop = 1;
330         start_i = 0;
331         type_pred = 2;
332     }
333 
334     /* First pass, loop thru half the macroblock */
335     /* determine scene change */
336     /* Second pass, for the rest of macroblocks */
337     NumIntraSearch = 0; // to be intra searched in the encoding loop.
338     while (numLoop--)
339     {
340         for (j = 0; j < mbheight; j++)
341         {
342             if (incr_i > 1)
343                 start_i = (start_i == 0 ? 1 : 0) ; /* toggle 0 and 1 */
344 
345             offset = pitch * (j << 4) + (start_i << 4);
346 
347             mbnum = j * mbwidth + start_i;
348 
349             for (i = start_i; i < mbwidth; i += incr_i)
350             {
351                 video->mbNum = mbnum;
352                 video->currMB = currMB = mblock + mbnum;
353                 mot_mb_16x16 = mot16x16 + mbnum;
354 
355                 cur = currInput->YCbCr[0] + offset;
356 
357                 if (currMB->mb_intra == 0) /* for INTER mode */
358                 {
359 #if defined(HTFM)
360                     HTFMPrepareCurMB_AVC(encvid, &htfm_stat, cur, pitch);
361 #else
362                     AVCPrepareCurMB(encvid, cur, pitch);
363 #endif
364                     /************************************************************/
365                     /******** full-pel 1MV search **********************/
366 
367                     AVCMBMotionSearch(encvid, cur, best_cand, i << 4, j << 4, type_pred,
368                                       FS_en, &hp_guess);
369 
370                     abe_cost = encvid->min_cost[mbnum] = mot_mb_16x16->sad;
371 
372                     /* set mbMode and MVs */
373                     currMB->mbMode = AVC_P16;
374                     currMB->MBPartPredMode[0][0] = AVC_Pred_L0;
375                     mv_uint32 = ((mot_mb_16x16->y) << 16) | ((mot_mb_16x16->x) & 0xffff);
376                     for (k = 0; k < 32; k += 2)
377                     {
378                         currMB->mvL0[k>>1] = mv_uint32;
379                     }
380 
381                     /* make a decision whether it should be tested for intra or not */
382                     if (i != mbwidth - 1 && j != mbheight - 1 && i != 0 && j != 0)
383                     {
384                         if (false == IntraDecisionABE(&abe_cost, cur, pitch, true))
385                         {
386                             intraSearch[mbnum] = 0;
387                         }
388                         else
389                         {
390                             NumIntraSearch++;
391                             rateCtrl->MADofMB[mbnum] = abe_cost;
392                         }
393                     }
394                     else // boundary MBs, always do intra search
395                     {
396                         NumIntraSearch++;
397                     }
398 
399                     totalSAD += (int) rateCtrl->MADofMB[mbnum];//mot_mb_16x16->sad;
400                 }
401                 else    /* INTRA update, use for prediction */
402                 {
403                     mot_mb_16x16[0].x = mot_mb_16x16[0].y = 0;
404 
405                     /* reset all other MVs to zero */
406                     /* mot_mb_16x8, mot_mb_8x16, mot_mb_8x8, etc. */
407                     abe_cost = encvid->min_cost[mbnum] = 0x7FFFFFFF;  /* max value for int */
408 
409                     if (i != mbwidth - 1 && j != mbheight - 1 && i != 0 && j != 0)
410                     {
411                         IntraDecisionABE(&abe_cost, cur, pitch, false);
412 
413                         rateCtrl->MADofMB[mbnum] = abe_cost;
414                         totalSAD += abe_cost;
415                     }
416 
417                     NumIntraSearch++ ;
418                     /* cannot do I16 prediction here because it needs full decoding. */
419                     // intraSearch[mbnum] = 1;
420 
421                 }
422 
423                 mbnum += incr_i;
424                 offset += (incr_i << 4);
425 
426             } /* for i */
427         } /* for j */
428 
429         /* since we cannot do intra/inter decision here, the SCD has to be
430         based on other criteria such as motion vectors coherency or the SAD */
431         if (incr_i > 1 && numLoop) /* scene change on and first loop */
432         {
433             //if(NumIntraSearch > ((totalMB>>3)<<1) + (totalMB>>3)) /* 75% of 50%MBs */
434             if (NumIntraSearch*99 > (48*totalMB)) /* 20% of 50%MBs */
435                 /* need to do more investigation about this threshold since the NumIntraSearch
436                 only show potential intra MBs, not the actual one */
437             {
438                 /* we can choose to just encode I_SLICE without IDR */
439                 //video->nal_unit_type = AVC_NALTYPE_IDR;
440                 video->nal_unit_type = AVC_NALTYPE_SLICE;
441                 video->sliceHdr->slice_type = AVC_I_ALL_SLICE;
442                 video->slice_type = AVC_I_SLICE;
443                 oscl_memset(intraSearch, 1, sizeof(uint8)*totalMB);
444                 i = totalMB;
445                 while (i--)
446                 {
447                     mblock[i].mb_intra = 1;
448                     encvid->min_cost[i] = 0x7FFFFFFF;  /* max value for int */
449                 }
450 
451                 rateCtrl->totalSAD = totalSAD * 2;  /* SAD */
452 
453                 return ;
454             }
455         }
456         /******** no scene change, continue motion search **********************/
457         start_i = 0;
458         type_pred++; /* second pass */
459     }
460 
461     rateCtrl->totalSAD = totalSAD;  /* SAD */
462 
463 #ifdef HTFM
464     /***** HYPOTHESIS TESTING ********/
465     if (collect)
466     {
467         collect = 0;
468         UpdateHTFM(encvid, newvar, exp_lamda, &htfm_stat);
469     }
470     /*********************************/
471 #endif
472 
473     return ;
474 }
475 
476 /*=====================================================================
477     Function:   PaddingEdge
478     Date:       09/16/2000
479     Purpose:    Pad edge of a Vop
480 =====================================================================*/
481 
AVCPaddingEdge(AVCPictureData * refPic)482 void  AVCPaddingEdge(AVCPictureData *refPic)
483 {
484     uint8 *src, *dst;
485     int i;
486     int pitch, width, height;
487     uint32 temp1, temp2;
488 
489     width = refPic->width;
490     height = refPic->height;
491     pitch = refPic->pitch;
492 
493     /* pad top */
494     src = refPic->Sl;
495 
496     temp1 = *src; /* top-left corner */
497     temp2 = src[width-1]; /* top-right corner */
498     temp1 |= (temp1 << 8);
499     temp1 |= (temp1 << 16);
500     temp2 |= (temp2 << 8);
501     temp2 |= (temp2 << 16);
502 
503     dst = src - (pitch << 4);
504 
505     *((uint32*)(dst - 16)) = temp1;
506     *((uint32*)(dst - 12)) = temp1;
507     *((uint32*)(dst - 8)) = temp1;
508     *((uint32*)(dst - 4)) = temp1;
509 
510     oscl_memcpy(dst, src, width);
511 
512     *((uint32*)(dst += width)) = temp2;
513     *((uint32*)(dst + 4)) = temp2;
514     *((uint32*)(dst + 8)) = temp2;
515     *((uint32*)(dst + 12)) = temp2;
516 
517     dst = dst - width - 16;
518 
519     i = 15;
520     while (i--)
521     {
522         oscl_memcpy(dst + pitch, dst, pitch);
523         dst += pitch;
524     }
525 
526     /* pad sides */
527     dst += (pitch + 16);
528     src = dst;
529     i = height;
530     while (i--)
531     {
532         temp1 = *src;
533         temp2 = src[width-1];
534         temp1 |= (temp1 << 8);
535         temp1 |= (temp1 << 16);
536         temp2 |= (temp2 << 8);
537         temp2 |= (temp2 << 16);
538 
539         *((uint32*)(dst - 16)) = temp1;
540         *((uint32*)(dst - 12)) = temp1;
541         *((uint32*)(dst - 8)) = temp1;
542         *((uint32*)(dst - 4)) = temp1;
543 
544         *((uint32*)(dst += width)) = temp2;
545         *((uint32*)(dst + 4)) = temp2;
546         *((uint32*)(dst + 8)) = temp2;
547         *((uint32*)(dst + 12)) = temp2;
548 
549         src += pitch;
550         dst = src;
551     }
552 
553     /* pad bottom */
554     dst -= 16;
555     i = 16;
556     while (i--)
557     {
558         oscl_memcpy(dst, dst - pitch, pitch);
559         dst += pitch;
560     }
561 
562 
563     return ;
564 }
565 
566 /*===========================================================================
567     Function:   AVCRasterIntraUpdate
568     Date:       2/26/01
569     Purpose:    To raster-scan assign INTRA-update .
570                 N macroblocks are updated (also was programmable).
571 ===========================================================================*/
AVCRasterIntraUpdate(AVCEncObject * encvid,AVCMacroblock * mblock,int totalMB,int numRefresh)572 void AVCRasterIntraUpdate(AVCEncObject *encvid, AVCMacroblock *mblock, int totalMB, int numRefresh)
573 {
574     int indx, i;
575 
576     indx = encvid->firstIntraRefreshMBIndx;
577     for (i = 0; i < numRefresh && indx < totalMB; i++)
578     {
579         (mblock + indx)->mb_intra = 1;
580         encvid->intraSearch[indx++] = 1;
581     }
582 
583     /* if read the end of frame, reset and loop around */
584     if (indx >= totalMB - 1)
585     {
586         indx = 0;
587         while (i < numRefresh && indx < totalMB)
588         {
589             (mblock + indx)->mb_intra = 1;
590             encvid->intraSearch[indx++] = 1;
591             i++;
592         }
593     }
594 
595     encvid->firstIntraRefreshMBIndx = indx; /* update with a new value */
596 
597     return ;
598 }
599 
600 
601 #ifdef HTFM
InitHTFM(VideoEncData * encvid,HTFM_Stat * htfm_stat,double * newvar,int * collect)602 void InitHTFM(VideoEncData *encvid, HTFM_Stat *htfm_stat, double *newvar, int *collect)
603 {
604     AVCCommonObj *video = encvid->common;
605     int i;
606     int lx = video->currPic->width; // padding
607     int lx2 = lx << 1;
608     int lx3 = lx2 + lx;
609     int rx = video->currPic->pitch;
610     int rx2 = rx << 1;
611     int rx3 = rx2 + rx;
612 
613     int *offset, *offset2;
614 
615     /* 4/11/01, collect data every 30 frames, doesn't have to be base layer */
616     if (((int)video->sliceHdr->frame_num) % 30 == 1)
617     {
618 
619         *collect = 1;
620 
621         htfm_stat->countbreak = 0;
622         htfm_stat->abs_dif_mad_avg = 0;
623 
624         for (i = 0; i < 16; i++)
625         {
626             newvar[i] = 0.0;
627         }
628 //      encvid->functionPointer->SAD_MB_PADDING = &SAD_MB_PADDING_HTFM_Collect;
629         encvid->functionPointer->SAD_Macroblock = &SAD_MB_HTFM_Collect;
630         encvid->functionPointer->SAD_MB_HalfPel[0] = NULL;
631         encvid->functionPointer->SAD_MB_HalfPel[1] = &SAD_MB_HP_HTFM_Collectxh;
632         encvid->functionPointer->SAD_MB_HalfPel[2] = &SAD_MB_HP_HTFM_Collectyh;
633         encvid->functionPointer->SAD_MB_HalfPel[3] = &SAD_MB_HP_HTFM_Collectxhyh;
634         encvid->sad_extra_info = (void*)(htfm_stat);
635         offset = htfm_stat->offsetArray;
636         offset2 = htfm_stat->offsetRef;
637     }
638     else
639     {
640 //      encvid->functionPointer->SAD_MB_PADDING = &SAD_MB_PADDING_HTFM;
641         encvid->functionPointer->SAD_Macroblock = &SAD_MB_HTFM;
642         encvid->functionPointer->SAD_MB_HalfPel[0] = NULL;
643         encvid->functionPointer->SAD_MB_HalfPel[1] = &SAD_MB_HP_HTFMxh;
644         encvid->functionPointer->SAD_MB_HalfPel[2] = &SAD_MB_HP_HTFMyh;
645         encvid->functionPointer->SAD_MB_HalfPel[3] = &SAD_MB_HP_HTFMxhyh;
646         encvid->sad_extra_info = (void*)(encvid->nrmlz_th);
647         offset = encvid->nrmlz_th + 16;
648         offset2 = encvid->nrmlz_th + 32;
649     }
650 
651     offset[0] = 0;
652     offset[1] = lx2 + 2;
653     offset[2] = 2;
654     offset[3] = lx2;
655     offset[4] = lx + 1;
656     offset[5] = lx3 + 3;
657     offset[6] = lx + 3;
658     offset[7] = lx3 + 1;
659     offset[8] = lx;
660     offset[9] = lx3 + 2;
661     offset[10] = lx3 ;
662     offset[11] = lx + 2 ;
663     offset[12] = 1;
664     offset[13] = lx2 + 3;
665     offset[14] = lx2 + 1;
666     offset[15] = 3;
667 
668     offset2[0] = 0;
669     offset2[1] = rx2 + 2;
670     offset2[2] = 2;
671     offset2[3] = rx2;
672     offset2[4] = rx + 1;
673     offset2[5] = rx3 + 3;
674     offset2[6] = rx + 3;
675     offset2[7] = rx3 + 1;
676     offset2[8] = rx;
677     offset2[9] = rx3 + 2;
678     offset2[10] = rx3 ;
679     offset2[11] = rx + 2 ;
680     offset2[12] = 1;
681     offset2[13] = rx2 + 3;
682     offset2[14] = rx2 + 1;
683     offset2[15] = 3;
684 
685     return ;
686 }
687 
UpdateHTFM(AVCEncObject * encvid,double * newvar,double * exp_lamda,HTFM_Stat * htfm_stat)688 void UpdateHTFM(AVCEncObject *encvid, double *newvar, double *exp_lamda, HTFM_Stat *htfm_stat)
689 {
690     if (htfm_stat->countbreak == 0)
691         htfm_stat->countbreak = 1;
692 
693     newvar[0] = (double)(htfm_stat->abs_dif_mad_avg) / (htfm_stat->countbreak * 16.);
694 
695     if (newvar[0] < 0.001)
696     {
697         newvar[0] = 0.001; /* to prevent floating overflow */
698     }
699     exp_lamda[0] =  1 / (newvar[0] * 1.4142136);
700     exp_lamda[1] = exp_lamda[0] * 1.5825;
701     exp_lamda[2] = exp_lamda[0] * 2.1750;
702     exp_lamda[3] = exp_lamda[0] * 3.5065;
703     exp_lamda[4] = exp_lamda[0] * 3.1436;
704     exp_lamda[5] = exp_lamda[0] * 3.5315;
705     exp_lamda[6] = exp_lamda[0] * 3.7449;
706     exp_lamda[7] = exp_lamda[0] * 4.5854;
707     exp_lamda[8] = exp_lamda[0] * 4.6191;
708     exp_lamda[9] = exp_lamda[0] * 5.4041;
709     exp_lamda[10] = exp_lamda[0] * 6.5974;
710     exp_lamda[11] = exp_lamda[0] * 10.5341;
711     exp_lamda[12] = exp_lamda[0] * 10.0719;
712     exp_lamda[13] = exp_lamda[0] * 12.0516;
713     exp_lamda[14] = exp_lamda[0] * 15.4552;
714 
715     CalcThreshold(HTFM_Pf, exp_lamda, encvid->nrmlz_th);
716     return ;
717 }
718 
719 
CalcThreshold(double pf,double exp_lamda[],int nrmlz_th[])720 void CalcThreshold(double pf, double exp_lamda[], int nrmlz_th[])
721 {
722     int i;
723     double temp[15];
724     //  printf("\nLamda: ");
725 
726     /* parametric PREMODELling */
727     for (i = 0; i < 15; i++)
728     {
729         //    printf("%g ",exp_lamda[i]);
730         if (pf < 0.5)
731             temp[i] = 1 / exp_lamda[i] * M4VENC_LOG(2 * pf);
732         else
733             temp[i] = -1 / exp_lamda[i] * M4VENC_LOG(2 * (1 - pf));
734     }
735 
736     nrmlz_th[15] = 0;
737     for (i = 0; i < 15; i++)        /* scale upto no.pixels */
738         nrmlz_th[i] = (int)(temp[i] * ((i + 1) << 4) + 0.5);
739 
740     return ;
741 }
742 
HTFMPrepareCurMB_AVC(AVCEncObject * encvid,HTFM_Stat * htfm_stat,uint8 * cur,int pitch)743 void    HTFMPrepareCurMB_AVC(AVCEncObject *encvid, HTFM_Stat *htfm_stat, uint8 *cur, int pitch)
744 {
745     AVCCommonObj *video = encvid->common;
746     uint32 *htfmMB = (uint32*)(encvid->currYMB);
747     uint8 *ptr, byte;
748     int *offset;
749     int i;
750     uint32 word;
751 
752     if (((int)video->sliceHdr->frame_num) % 30 == 1)
753     {
754         offset = htfm_stat->offsetArray;
755     }
756     else
757     {
758         offset = encvid->nrmlz_th + 16;
759     }
760 
761     for (i = 0; i < 16; i++)
762     {
763         ptr = cur + offset[i];
764         word = ptr[0];
765         byte = ptr[4];
766         word |= (byte << 8);
767         byte = ptr[8];
768         word |= (byte << 16);
769         byte = ptr[12];
770         word |= (byte << 24);
771         *htfmMB++ = word;
772 
773         word = *(ptr += (pitch << 2));
774         byte = ptr[4];
775         word |= (byte << 8);
776         byte = ptr[8];
777         word |= (byte << 16);
778         byte = ptr[12];
779         word |= (byte << 24);
780         *htfmMB++ = word;
781 
782         word = *(ptr += (pitch << 2));
783         byte = ptr[4];
784         word |= (byte << 8);
785         byte = ptr[8];
786         word |= (byte << 16);
787         byte = ptr[12];
788         word |= (byte << 24);
789         *htfmMB++ = word;
790 
791         word = *(ptr += (pitch << 2));
792         byte = ptr[4];
793         word |= (byte << 8);
794         byte = ptr[8];
795         word |= (byte << 16);
796         byte = ptr[12];
797         word |= (byte << 24);
798         *htfmMB++ = word;
799     }
800 
801     return ;
802 }
803 
804 
805 #endif // HTFM
806 
AVCPrepareCurMB(AVCEncObject * encvid,uint8 * cur,int pitch)807 void    AVCPrepareCurMB(AVCEncObject *encvid, uint8 *cur, int pitch)
808 {
809     void* tmp = (void*)(encvid->currYMB);
810     uint32 *currYMB = (uint32*) tmp;
811     int i;
812 
813     cur -= pitch;
814 
815     for (i = 0; i < 16; i++)
816     {
817         *currYMB++ = *((uint32*)(cur += pitch));
818         *currYMB++ = *((uint32*)(cur + 4));
819         *currYMB++ = *((uint32*)(cur + 8));
820         *currYMB++ = *((uint32*)(cur + 12));
821     }
822 
823     return ;
824 }
825 
826 #ifdef FIXED_INTERPRED_MODE
827 
828 /* due to the complexity of the predicted motion vector, we may not decide to skip
829 a macroblock here just yet. */
830 /* We will find the best motion vector and the best intra prediction mode for each block. */
831 /* output are
832     currMB->NumMbPart,  currMB->MbPartWidth, currMB->MbPartHeight,
833     currMB->NumSubMbPart[], currMB->SubMbPartWidth[], currMB->SubMbPartHeight,
834     currMB->MBPartPredMode[][] (L0 or L1 or BiPred)
835     currMB->RefIdx[], currMB->ref_idx_L0[],
836     currMB->mvL0[], currMB->mvL1[]
837     */
838 
AVCMBMotionSearch(AVCEncObject * encvid,AVCMacroblock * currMB,int mbNum,int num_pass)839 AVCEnc_Status AVCMBMotionSearch(AVCEncObject *encvid, AVCMacroblock *currMB, int mbNum,
840                                 int num_pass)
841 {
842     AVCCommonObj *video = encvid->common;
843     int mbPartIdx, subMbPartIdx;
844     int16 *mv;
845     int i;
846     int SubMbPartHeight, SubMbPartWidth, NumSubMbPart;
847 
848     /* assign value to currMB->MBPartPredMode[][x],subMbMode[],NumSubMbPart[],SubMbPartWidth[],SubMbPartHeight[] */
849 
850     currMB->mbMode = FIXED_INTERPRED_MODE;
851     currMB->mb_intra = 0;
852 
853     if (currMB->mbMode == AVC_P16)
854     {
855         currMB->NumMbPart = 1;
856         currMB->MbPartWidth = 16;
857         currMB->MbPartHeight = 16;
858         currMB->SubMbPartHeight[0] = 16;
859         currMB->SubMbPartWidth[0] = 16;
860         currMB->NumSubMbPart[0] =  1;
861     }
862     else if (currMB->mbMode == AVC_P16x8)
863     {
864         currMB->NumMbPart = 2;
865         currMB->MbPartWidth = 16;
866         currMB->MbPartHeight = 8;
867         for (i = 0; i < 2; i++)
868         {
869             currMB->SubMbPartWidth[i] = 16;
870             currMB->SubMbPartHeight[i] = 8;
871             currMB->NumSubMbPart[i] = 1;
872         }
873     }
874     else if (currMB->mbMode == AVC_P8x16)
875     {
876         currMB->NumMbPart = 2;
877         currMB->MbPartWidth = 8;
878         currMB->MbPartHeight = 16;
879         for (i = 0; i < 2; i++)
880         {
881             currMB->SubMbPartWidth[i] = 8;
882             currMB->SubMbPartHeight[i] = 16;
883             currMB->NumSubMbPart[i] = 1;
884         }
885     }
886     else if (currMB->mbMode == AVC_P8 || currMB->mbMode == AVC_P8ref0)
887     {
888         currMB->NumMbPart = 4;
889         currMB->MbPartWidth = 8;
890         currMB->MbPartHeight = 8;
891         if (FIXED_SUBMB_MODE == AVC_8x8)
892         {
893             SubMbPartHeight = 8;
894             SubMbPartWidth = 8;
895             NumSubMbPart = 1;
896         }
897         else if (FIXED_SUBMB_MODE == AVC_8x4)
898         {
899             SubMbPartHeight = 4;
900             SubMbPartWidth = 8;
901             NumSubMbPart = 2;
902         }
903         else if (FIXED_SUBMB_MODE == AVC_4x8)
904         {
905             SubMbPartHeight = 8;
906             SubMbPartWidth = 4;
907             NumSubMbPart = 2;
908         }
909         else if (FIXED_SUBMB_MODE == AVC_4x4)
910         {
911             SubMbPartHeight = 4;
912             SubMbPartWidth = 4;
913             NumSubMbPart = 4;
914         }
915 
916         for (i = 0; i < 4; i++)
917         {
918             currMB->subMbMode[i] = FIXED_SUBMB_MODE;
919             currMB->SubMbPartHeight[i] = SubMbPartHeight;
920             currMB->SubMbPartWidth[i] = SubMbPartWidth;
921             currMB->NumSubMbPart[i] = NumSubMbPart;
922         }
923     }
924     else /* it's probably intra mode */
925     {
926         return AVCENC_SUCCESS;
927     }
928 
929     for (mbPartIdx = 0; mbPartIdx < 4; mbPartIdx++)
930     {
931         currMB->MBPartPredMode[mbPartIdx][0]  = AVC_Pred_L0;
932         currMB->ref_idx_L0[mbPartIdx] = FIXED_REF_IDX;
933         currMB->RefIdx[mbPartIdx] = video->RefPicList0[FIXED_REF_IDX]->RefIdx;
934 
935         for (subMbPartIdx = 0; subMbPartIdx < 4; subMbPartIdx++)
936         {
937             mv = (int16*)(currMB->mvL0 + (mbPartIdx << 2) + subMbPartIdx);
938 
939             *mv++ = FIXED_MVX;
940             *mv = FIXED_MVY;
941         }
942     }
943 
944     encvid->min_cost = 0;
945 
946     return AVCENC_SUCCESS;
947 }
948 
949 #else /* perform the search */
950 
951 /* This option #1 search is very similar to PV's MPEG4 motion search algorithm.
952   The search is done in hierarchical manner from 16x16 MB down to smaller and smaller
953   partition. At each level, a decision can be made to stop the search if the expected
954   prediction gain is not worth the computation. The decision can also be made at the finest
955   level for more fullsearch-like behavior with the price of heavier computation. */
AVCMBMotionSearch(AVCEncObject * encvid,uint8 * cur,uint8 * best_cand[],int i0,int j0,int type_pred,int FS_en,int * hp_guess)956 void AVCMBMotionSearch(AVCEncObject *encvid, uint8 *cur, uint8 *best_cand[],
957                        int i0, int j0, int type_pred, int FS_en, int *hp_guess)
958 {
959     AVCCommonObj *video = encvid->common;
960     AVCPictureData *currPic = video->currPic;
961     AVCSeqParamSet *currSPS = video->currSeqParams;
962     AVCRateControl *rateCtrl = encvid->rateCtrl;
963     AVCMacroblock *currMB = video->currMB;
964     uint8 *ref, *cand, *ncand;
965     void *extra_info = encvid->sad_extra_info;
966     int mbnum = video->mbNum;
967     int width = currPic->width; /* 6/12/01, must be multiple of 16 */
968     int height = currPic->height;
969     AVCMV *mot16x16 = encvid->mot16x16;
970     int (*SAD_Macroblock)(uint8*, uint8*, int, void*) = encvid->functionPointer->SAD_Macroblock;
971 
972     int range = rateCtrl->mvRange;
973 
974     int lx = currPic->pitch; /*  padding */
975     int i, j, imin, jmin, ilow, ihigh, jlow, jhigh;
976     int d, dmin, dn[9];
977     int k;
978     int mvx[5], mvy[5];
979     int num_can, center_again;
980     int last_loc, new_loc = 0;
981     int step, max_step = range >> 1;
982     int next;
983 
984     int cmvx, cmvy; /* estimated predicted MV */
985     int lev_idx;
986     int lambda_motion = encvid->lambda_motion;
987     uint8 *mvbits = encvid->mvbits;
988     int mvshift = 2;
989     int mvcost;
990 
991     int min_sad = 65535;
992 
993     ref = video->RefPicList0[DEFAULT_REF_IDX]->Sl; /* origin of actual frame */
994 
995     /* have to initialize these params, necessary for interprediction part */
996     currMB->NumMbPart = 1;
997     currMB->SubMbPartHeight[0] = 16;
998     currMB->SubMbPartWidth[0] = 16;
999     currMB->NumSubMbPart[0] = 1;
1000     currMB->ref_idx_L0[0] = currMB->ref_idx_L0[1] =
1001                                 currMB->ref_idx_L0[2] = currMB->ref_idx_L0[3] = DEFAULT_REF_IDX;
1002     currMB->ref_idx_L1[0] = currMB->ref_idx_L1[1] =
1003                                 currMB->ref_idx_L1[2] = currMB->ref_idx_L1[3] = DEFAULT_REF_IDX;
1004     currMB->RefIdx[0] = currMB->RefIdx[1] =
1005                             currMB->RefIdx[2] = currMB->RefIdx[3] = video->RefPicList0[DEFAULT_REF_IDX]->RefIdx;
1006 
1007     cur = encvid->currYMB; /* use smaller memory space for current MB */
1008 
1009     /*  find limit of the search (adjusting search range)*/
1010     lev_idx = mapLev2Idx[currSPS->level_idc];
1011 
1012     /* we can make this part dynamic based on previous statistics */
1013     ilow = i0 - range;
1014     if (i0 - ilow > 2047) /* clip to conform with the standard */
1015     {
1016         ilow = i0 - 2047;
1017     }
1018     if (ilow < -13)  // change it from -15 to -13 because of 6-tap filter needs extra 2 lines.
1019     {
1020         ilow = -13;
1021     }
1022 
1023     ihigh = i0 + range - 1;
1024     if (ihigh - i0 > 2047) /* clip to conform with the standard */
1025     {
1026         ihigh = i0 + 2047;
1027     }
1028     if (ihigh > width - 3)
1029     {
1030         ihigh = width - 3;  // change from width-1 to width-3 for the same reason as above
1031     }
1032 
1033     jlow = j0 - range;
1034     if (j0 - jlow > MaxVmvR[lev_idx] - 1) /* clip to conform with the standard */
1035     {
1036         jlow = j0 - MaxVmvR[lev_idx] + 1;
1037     }
1038     if (jlow < -13)     // same reason as above
1039     {
1040         jlow = -13;
1041     }
1042 
1043     jhigh = j0 + range - 1;
1044     if (jhigh - j0 > MaxVmvR[lev_idx] - 1) /* clip to conform with the standard */
1045     {
1046         jhigh = j0 + MaxVmvR[lev_idx] - 1;
1047     }
1048     if (jhigh > height - 3) // same reason as above
1049     {
1050         jhigh = height - 3;
1051     }
1052 
1053     /* find initial motion vector & predicted MV*/
1054     AVCCandidateSelection(mvx, mvy, &num_can, i0 >> 4, j0 >> 4, encvid, type_pred, &cmvx, &cmvy);
1055 
1056     imin = i0;
1057     jmin = j0; /* needed for fullsearch */
1058     ncand = ref + i0 + j0 * lx;
1059 
1060     /* for first row of MB, fullsearch can be used */
1061     if (FS_en)
1062     {
1063         *hp_guess = 0; /* no guess for fast half-pel */
1064 
1065         dmin =  AVCFullSearch(encvid, ref, cur, &imin, &jmin, ilow, ihigh, jlow, jhigh, cmvx, cmvy);
1066 
1067         ncand = ref + imin + jmin * lx;
1068     }
1069     else
1070     {   /*       fullsearch the top row to only upto (0,3) MB */
1071         /*       upto 30% complexity saving with the same complexity */
1072         if (video->PrevRefFrameNum == 0 && j0 == 0 && i0 <= 64 && type_pred != 1)
1073         {
1074             *hp_guess = 0; /* no guess for fast half-pel */
1075             dmin =  AVCFullSearch(encvid, ref, cur, &imin, &jmin, ilow, ihigh, jlow, jhigh, cmvx, cmvy);
1076             ncand = ref + imin + jmin * lx;
1077         }
1078         else
1079         {
1080             /************** initialize candidate **************************/
1081 
1082             dmin = 65535;
1083 
1084             /* check if all are equal */
1085             if (num_can == ALL_CAND_EQUAL)
1086             {
1087                 i = i0 + mvx[0];
1088                 j = j0 + mvy[0];
1089 
1090                 if (i >= ilow && i <= ihigh && j >= jlow && j <= jhigh)
1091                 {
1092                     cand = ref + i + j * lx;
1093 
1094                     d = (*SAD_Macroblock)(cand, cur, (dmin << 16) | lx, extra_info);
1095                     mvcost = MV_COST(lambda_motion, mvshift, i - i0, j - j0, cmvx, cmvy);
1096                     d +=  mvcost;
1097 
1098                     if (d < dmin)
1099                     {
1100                         dmin = d;
1101                         imin = i;
1102                         jmin = j;
1103                         ncand = cand;
1104                         min_sad = d - mvcost; // for rate control
1105                     }
1106                 }
1107             }
1108             else
1109             {
1110                 /************** evaluate unique candidates **********************/
1111                 for (k = 0; k < num_can; k++)
1112                 {
1113                     i = i0 + mvx[k];
1114                     j = j0 + mvy[k];
1115 
1116                     if (i >= ilow && i <= ihigh && j >= jlow && j <= jhigh)
1117                     {
1118                         cand = ref + i + j * lx;
1119                         d = (*SAD_Macroblock)(cand, cur, (dmin << 16) | lx, extra_info);
1120                         mvcost = MV_COST(lambda_motion, mvshift, i - i0, j - j0, cmvx, cmvy);
1121                         d +=  mvcost;
1122 
1123                         if (d < dmin)
1124                         {
1125                             dmin = d;
1126                             imin = i;
1127                             jmin = j;
1128                             ncand = cand;
1129                             min_sad = d - mvcost; // for rate control
1130                         }
1131                     }
1132                 }
1133             }
1134 
1135             /******************* local refinement ***************************/
1136             center_again = 0;
1137             last_loc = new_loc = 0;
1138             //          ncand = ref + jmin*lx + imin;  /* center of the search */
1139             step = 0;
1140             dn[0] = dmin;
1141             while (!center_again && step <= max_step)
1142             {
1143 
1144                 AVCMoveNeighborSAD(dn, last_loc);
1145 
1146                 center_again = 1;
1147                 i = imin;
1148                 j = jmin - 1;
1149                 cand = ref + i + j * lx;
1150 
1151                 /*  starting from [0,-1] */
1152                 /* spiral check one step at a time*/
1153                 for (k = 2; k <= 8; k += 2)
1154                 {
1155                     if (!tab_exclude[last_loc][k]) /* exclude last step computation */
1156                     {       /* not already computed */
1157                         if (i >= ilow && i <= ihigh && j >= jlow && j <= jhigh)
1158                         {
1159                             d = (*SAD_Macroblock)(cand, cur, (dmin << 16) | lx, extra_info);
1160                             mvcost = MV_COST(lambda_motion, mvshift, i - i0, j - j0, cmvx, cmvy);
1161                             d += mvcost;
1162 
1163                             dn[k] = d; /* keep it for half pel use */
1164 
1165                             if (d < dmin)
1166                             {
1167                                 ncand = cand;
1168                                 dmin = d;
1169                                 imin = i;
1170                                 jmin = j;
1171                                 center_again = 0;
1172                                 new_loc = k;
1173                                 min_sad = d - mvcost; // for rate control
1174                             }
1175                         }
1176                     }
1177                     if (k == 8)  /* end side search*/
1178                     {
1179                         if (!center_again)
1180                         {
1181                             k = -1; /* start diagonal search */
1182                             cand -= lx;
1183                             j--;
1184                         }
1185                     }
1186                     else
1187                     {
1188                         next = refine_next[k][0];
1189                         i += next;
1190                         cand += next;
1191                         next = refine_next[k][1];
1192                         j += next;
1193                         cand += lx * next;
1194                     }
1195                 }
1196                 last_loc = new_loc;
1197                 step ++;
1198             }
1199             if (!center_again)
1200                 AVCMoveNeighborSAD(dn, last_loc);
1201 
1202             *hp_guess = AVCFindMin(dn);
1203 
1204             encvid->rateCtrl->MADofMB[mbnum] = min_sad / 256.0;
1205         }
1206     }
1207 
1208     mot16x16[mbnum].sad = dmin;
1209     mot16x16[mbnum].x = (imin - i0) << 2;
1210     mot16x16[mbnum].y = (jmin - j0) << 2;
1211     best_cand[0] = ncand;
1212 
1213     if (rateCtrl->subPelEnable) // always enable half-pel search
1214     {
1215         /* find half-pel resolution motion vector */
1216         min_sad = AVCFindHalfPelMB(encvid, cur, mot16x16 + mbnum, best_cand[0], i0, j0, *hp_guess, cmvx, cmvy);
1217 
1218         encvid->rateCtrl->MADofMB[mbnum] = min_sad / 256.0;
1219 
1220 
1221         if (encvid->best_qpel_pos == -1)
1222         {
1223             ncand = encvid->hpel_cand[encvid->best_hpel_pos];
1224         }
1225         else
1226         {
1227             ncand = encvid->qpel_cand[encvid->best_qpel_pos];
1228         }
1229     }
1230     else
1231     {
1232         encvid->rateCtrl->MADofMB[mbnum] = min_sad / 256.0;
1233     }
1234 
1235     /** do motion comp here for now */
1236     ref = currPic->Sl + i0 + j0 * lx;
1237     /* copy from the best result to current Picture */
1238     for (j = 0; j < 16; j++)
1239     {
1240         for (i = 0; i < 16; i++)
1241         {
1242             *ref++ = *ncand++;
1243         }
1244         ref += (lx - 16);
1245         ncand += 8;
1246     }
1247 
1248     return ;
1249 }
1250 
1251 #endif
1252 
1253 /*===============================================================================
1254     Function:   AVCFullSearch
1255     Date:       09/16/2000
1256     Purpose:    Perform full-search motion estimation over the range of search
1257                 region in a spiral-outward manner.
1258     Input/Output:   VideoEncData, current Vol, previou Vop, pointer to the left corner of
1259                 current VOP, current coord (also output), boundaries.
1260 ===============================================================================*/
AVCFullSearch(AVCEncObject * encvid,uint8 * prev,uint8 * cur,int * imin,int * jmin,int ilow,int ihigh,int jlow,int jhigh,int cmvx,int cmvy)1261 int AVCFullSearch(AVCEncObject *encvid, uint8 *prev, uint8 *cur,
1262                   int *imin, int *jmin, int ilow, int ihigh, int jlow, int jhigh,
1263                   int cmvx, int cmvy)
1264 {
1265     int range = encvid->rateCtrl->mvRange;
1266     AVCPictureData *currPic = encvid->common->currPic;
1267     uint8 *cand;
1268     int i, j, k, l;
1269     int d, dmin;
1270     int i0 = *imin; /* current position */
1271     int j0 = *jmin;
1272     int (*SAD_Macroblock)(uint8*, uint8*, int, void*) = encvid->functionPointer->SAD_Macroblock;
1273     void *extra_info = encvid->sad_extra_info;
1274     int lx = currPic->pitch; /* with padding */
1275 
1276     int offset = i0 + j0 * lx;
1277 
1278     int lambda_motion = encvid->lambda_motion;
1279     uint8 *mvbits = encvid->mvbits;
1280     int mvshift = 2;
1281     int mvcost;
1282     int min_sad;
1283 
1284     cand = prev + offset;
1285 
1286     dmin  = (*SAD_Macroblock)(cand, cur, (65535 << 16) | lx, (void*)extra_info);
1287     mvcost = MV_COST(lambda_motion, mvshift, 0, 0, cmvx, cmvy);
1288     min_sad = dmin;
1289     dmin += mvcost;
1290 
1291     /* perform spiral search */
1292     for (k = 1; k <= range; k++)
1293     {
1294 
1295         i = i0 - k;
1296         j = j0 - k;
1297 
1298         cand = prev + i + j * lx;
1299 
1300         for (l = 0; l < 8*k; l++)
1301         {
1302             /* no need for boundary checking again */
1303             if (i >= ilow && i <= ihigh && j >= jlow && j <= jhigh)
1304             {
1305                 d = (*SAD_Macroblock)(cand, cur, (dmin << 16) | lx, (void*)extra_info);
1306                 mvcost = MV_COST(lambda_motion, mvshift, i - i0, j - j0, cmvx, cmvy);
1307                 d +=  mvcost;
1308 
1309                 if (d < dmin)
1310                 {
1311                     dmin = d;
1312                     *imin = i;
1313                     *jmin = j;
1314                     min_sad = d - mvcost;
1315                 }
1316             }
1317 
1318             if (l < (k << 1))
1319             {
1320                 i++;
1321                 cand++;
1322             }
1323             else if (l < (k << 2))
1324             {
1325                 j++;
1326                 cand += lx;
1327             }
1328             else if (l < ((k << 2) + (k << 1)))
1329             {
1330                 i--;
1331                 cand--;
1332             }
1333             else
1334             {
1335                 j--;
1336                 cand -= lx;
1337             }
1338         }
1339     }
1340 
1341     encvid->rateCtrl->MADofMB[encvid->common->mbNum] = (min_sad / 256.0); // for rate control
1342 
1343     return dmin;
1344 }
1345 
1346 /*===============================================================================
1347     Function:   AVCCandidateSelection
1348     Date:       09/16/2000
1349     Purpose:    Fill up the list of candidate using spatio-temporal correlation
1350                 among neighboring blocks.
1351     Input/Output:   type_pred = 0: first pass, 1: second pass, or no SCD
1352     Modified:   , 09/23/01, get rid of redundant candidates before passing back.
1353                 , 09/11/07, added return for modified predicted MV, this will be
1354                     needed for both fast search and fullsearch.
1355 ===============================================================================*/
1356 
AVCCandidateSelection(int * mvx,int * mvy,int * num_can,int imb,int jmb,AVCEncObject * encvid,int type_pred,int * cmvx,int * cmvy)1357 void AVCCandidateSelection(int *mvx, int *mvy, int *num_can, int imb, int jmb,
1358                            AVCEncObject *encvid, int type_pred, int *cmvx, int *cmvy)
1359 {
1360     AVCCommonObj *video = encvid->common;
1361     AVCMV *mot16x16 = encvid->mot16x16;
1362     AVCMV *pmot;
1363     int mbnum = video->mbNum;
1364     int mbwidth = video->PicWidthInMbs;
1365     int mbheight = video->PicHeightInMbs;
1366     int i, j, same, num1;
1367 
1368     /* this part is for predicted MV */
1369     int pmvA_x = 0, pmvA_y = 0, pmvB_x = 0, pmvB_y = 0, pmvC_x = 0, pmvC_y = 0;
1370     int availA = 0, availB = 0, availC = 0;
1371 
1372     *num_can = 0;
1373 
1374     if (video->PrevRefFrameNum != 0) // previous frame is an IDR frame
1375     {
1376         /* Spatio-Temporal Candidate (five candidates) */
1377         if (type_pred == 0) /* first pass */
1378         {
1379             pmot = &mot16x16[mbnum]; /* same coordinate previous frame */
1380             mvx[(*num_can)] = (pmot->x) >> 2;
1381             mvy[(*num_can)++] = (pmot->y) >> 2;
1382             if (imb >= (mbwidth >> 1) && imb > 0)  /*left neighbor previous frame */
1383             {
1384                 pmot = &mot16x16[mbnum-1];
1385                 mvx[(*num_can)] = (pmot->x) >> 2;
1386                 mvy[(*num_can)++] = (pmot->y) >> 2;
1387             }
1388             else if (imb + 1 < mbwidth)   /*right neighbor previous frame */
1389             {
1390                 pmot = &mot16x16[mbnum+1];
1391                 mvx[(*num_can)] = (pmot->x) >> 2;
1392                 mvy[(*num_can)++] = (pmot->y) >> 2;
1393             }
1394 
1395             if (jmb < mbheight - 1)  /*bottom neighbor previous frame */
1396             {
1397                 pmot = &mot16x16[mbnum+mbwidth];
1398                 mvx[(*num_can)] = (pmot->x) >> 2;
1399                 mvy[(*num_can)++] = (pmot->y) >> 2;
1400             }
1401             else if (jmb > 0)   /*upper neighbor previous frame */
1402             {
1403                 pmot = &mot16x16[mbnum-mbwidth];
1404                 mvx[(*num_can)] = (pmot->x) >> 2;
1405                 mvy[(*num_can)++] = (pmot->y) >> 2;
1406             }
1407 
1408             if (imb > 0 && jmb > 0)  /* upper-left neighbor current frame*/
1409             {
1410                 pmot = &mot16x16[mbnum-mbwidth-1];
1411                 mvx[(*num_can)] = (pmot->x) >> 2;
1412                 mvy[(*num_can)++] = (pmot->y) >> 2;
1413             }
1414             if (jmb > 0 && imb < mbheight - 1)  /* upper right neighbor current frame*/
1415             {
1416                 pmot = &mot16x16[mbnum-mbwidth+1];
1417                 mvx[(*num_can)] = (pmot->x) >> 2;
1418                 mvy[(*num_can)++] = (pmot->y) >> 2;
1419             }
1420         }
1421         else    /* second pass */
1422             /* original ST1 algorithm */
1423         {
1424             pmot = &mot16x16[mbnum]; /* same coordinate previous frame */
1425             mvx[(*num_can)] = (pmot->x) >> 2;
1426             mvy[(*num_can)++] = (pmot->y) >> 2;
1427 
1428             if (imb > 0)  /*left neighbor current frame */
1429             {
1430                 pmot = &mot16x16[mbnum-1];
1431                 mvx[(*num_can)] = (pmot->x) >> 2;
1432                 mvy[(*num_can)++] = (pmot->y) >> 2;
1433             }
1434             if (jmb > 0)  /*upper neighbor current frame */
1435             {
1436                 pmot = &mot16x16[mbnum-mbwidth];
1437                 mvx[(*num_can)] = (pmot->x) >> 2;
1438                 mvy[(*num_can)++] = (pmot->y) >> 2;
1439             }
1440             if (imb < mbwidth - 1)  /*right neighbor previous frame */
1441             {
1442                 pmot = &mot16x16[mbnum+1];
1443                 mvx[(*num_can)] = (pmot->x) >> 2;
1444                 mvy[(*num_can)++] = (pmot->y) >> 2;
1445             }
1446             if (jmb < mbheight - 1)  /*bottom neighbor previous frame */
1447             {
1448                 pmot = &mot16x16[mbnum+mbwidth];
1449                 mvx[(*num_can)] = (pmot->x) >> 2;
1450                 mvy[(*num_can)++] = (pmot->y) >> 2;
1451             }
1452         }
1453 
1454         /* get predicted MV */
1455         if (imb > 0)    /* get MV from left (A) neighbor either on current or previous frame */
1456         {
1457             availA = 1;
1458             pmot = &mot16x16[mbnum-1];
1459             pmvA_x = pmot->x;
1460             pmvA_y = pmot->y;
1461         }
1462 
1463         if (jmb > 0) /* get MV from top (B) neighbor either on current or previous frame */
1464         {
1465             availB = 1;
1466             pmot = &mot16x16[mbnum-mbwidth];
1467             pmvB_x = pmot->x;
1468             pmvB_y = pmot->y;
1469 
1470             availC = 1;
1471 
1472             if (imb < mbwidth - 1) /* get MV from top-right (C) neighbor of current frame */
1473             {
1474                 pmot = &mot16x16[mbnum-mbwidth+1];
1475             }
1476             else /* get MV from top-left (D) neighbor of current frame */
1477             {
1478                 pmot = &mot16x16[mbnum-mbwidth-1];
1479             }
1480             pmvC_x = pmot->x;
1481             pmvC_y = pmot->y;
1482         }
1483 
1484     }
1485     else  /* only Spatial Candidate (four candidates)*/
1486     {
1487         if (type_pred == 0) /*first pass*/
1488         {
1489             if (imb > 1)  /* neighbor two blocks away to the left */
1490             {
1491                 pmot = &mot16x16[mbnum-2];
1492                 mvx[(*num_can)] = (pmot->x) >> 2;
1493                 mvy[(*num_can)++] = (pmot->y) >> 2;
1494             }
1495             if (imb > 0 && jmb > 0)  /* upper-left neighbor */
1496             {
1497                 pmot = &mot16x16[mbnum-mbwidth-1];
1498                 mvx[(*num_can)] = (pmot->x) >> 2;
1499                 mvy[(*num_can)++] = (pmot->y) >> 2;
1500             }
1501             if (jmb > 0 && imb < mbheight - 1)  /* upper right neighbor */
1502             {
1503                 pmot = &mot16x16[mbnum-mbwidth+1];
1504                 mvx[(*num_can)] = (pmot->x) >> 2;
1505                 mvy[(*num_can)++] = (pmot->y) >> 2;
1506             }
1507 
1508             /* get predicted MV */
1509             if (imb > 1)    /* get MV from 2nd left (A) neighbor either of current frame */
1510             {
1511                 availA = 1;
1512                 pmot = &mot16x16[mbnum-2];
1513                 pmvA_x = pmot->x;
1514                 pmvA_y = pmot->y;
1515             }
1516 
1517             if (jmb > 0 && imb > 0) /* get MV from top-left (B) neighbor of current frame */
1518             {
1519                 availB = 1;
1520                 pmot = &mot16x16[mbnum-mbwidth-1];
1521                 pmvB_x = pmot->x;
1522                 pmvB_y = pmot->y;
1523             }
1524 
1525             if (jmb > 0 && imb < mbwidth - 1)
1526             {
1527                 availC = 1;
1528                 pmot = &mot16x16[mbnum-mbwidth+1];
1529                 pmvC_x = pmot->x;
1530                 pmvC_y = pmot->y;
1531             }
1532         }
1533 //#ifdef SCENE_CHANGE_DETECTION
1534         /* second pass (ST2 algorithm)*/
1535         else
1536         {
1537             if (type_pred == 1) /*  4/7/01 */
1538             {
1539                 if (imb > 0)  /*left neighbor current frame */
1540                 {
1541                     pmot = &mot16x16[mbnum-1];
1542                     mvx[(*num_can)] = (pmot->x) >> 2;
1543                     mvy[(*num_can)++] = (pmot->y) >> 2;
1544                 }
1545                 if (jmb > 0)  /*upper neighbor current frame */
1546                 {
1547                     pmot = &mot16x16[mbnum-mbwidth];
1548                     mvx[(*num_can)] = (pmot->x) >> 2;
1549                     mvy[(*num_can)++] = (pmot->y) >> 2;
1550                 }
1551                 if (imb < mbwidth - 1)  /*right neighbor current frame */
1552                 {
1553                     pmot = &mot16x16[mbnum+1];
1554                     mvx[(*num_can)] = (pmot->x) >> 2;
1555                     mvy[(*num_can)++] = (pmot->y) >> 2;
1556                 }
1557                 if (jmb < mbheight - 1)  /*bottom neighbor current frame */
1558                 {
1559                     pmot = &mot16x16[mbnum+mbwidth];
1560                     mvx[(*num_can)] = (pmot->x) >> 2;
1561                     mvy[(*num_can)++] = (pmot->y) >> 2;
1562                 }
1563             }
1564             //#else
1565             else /* original ST1 algorithm */
1566             {
1567                 if (imb > 0)  /*left neighbor current frame */
1568                 {
1569                     pmot = &mot16x16[mbnum-1];
1570                     mvx[(*num_can)] = (pmot->x) >> 2;
1571                     mvy[(*num_can)++] = (pmot->y) >> 2;
1572 
1573                     if (jmb > 0)  /*upper-left neighbor current frame */
1574                     {
1575                         pmot = &mot16x16[mbnum-mbwidth-1];
1576                         mvx[(*num_can)] = (pmot->x) >> 2;
1577                         mvy[(*num_can)++] = (pmot->y) >> 2;
1578                     }
1579 
1580                 }
1581                 if (jmb > 0)  /*upper neighbor current frame */
1582                 {
1583                     pmot = &mot16x16[mbnum-mbwidth];
1584                     mvx[(*num_can)] = (pmot->x) >> 2;
1585                     mvy[(*num_can)++] = (pmot->y) >> 2;
1586 
1587                     if (imb < mbheight - 1)  /*upper-right neighbor current frame */
1588                     {
1589                         pmot = &mot16x16[mbnum-mbwidth+1];
1590                         mvx[(*num_can)] = (pmot->x) >> 2;
1591                         mvy[(*num_can)++] = (pmot->y) >> 2;
1592                     }
1593                 }
1594             }
1595 
1596             /* get predicted MV */
1597             if (imb > 0)    /* get MV from left (A) neighbor either on current or previous frame */
1598             {
1599                 availA = 1;
1600                 pmot = &mot16x16[mbnum-1];
1601                 pmvA_x = pmot->x;
1602                 pmvA_y = pmot->y;
1603             }
1604 
1605             if (jmb > 0) /* get MV from top (B) neighbor either on current or previous frame */
1606             {
1607                 availB = 1;
1608                 pmot = &mot16x16[mbnum-mbwidth];
1609                 pmvB_x = pmot->x;
1610                 pmvB_y = pmot->y;
1611 
1612                 availC = 1;
1613 
1614                 if (imb < mbwidth - 1) /* get MV from top-right (C) neighbor of current frame */
1615                 {
1616                     pmot = &mot16x16[mbnum-mbwidth+1];
1617                 }
1618                 else /* get MV from top-left (D) neighbor of current frame */
1619                 {
1620                     pmot = &mot16x16[mbnum-mbwidth-1];
1621                 }
1622                 pmvC_x = pmot->x;
1623                 pmvC_y = pmot->y;
1624             }
1625         }
1626 //#endif
1627     }
1628 
1629     /*  3/23/01, remove redundant candidate (possible k-mean) */
1630     num1 = *num_can;
1631     *num_can = 1;
1632     for (i = 1; i < num1; i++)
1633     {
1634         same = 0;
1635         j = 0;
1636         while (!same && j < *num_can)
1637         {
1638 #if (CANDIDATE_DISTANCE==0)
1639             if (mvx[i] == mvx[j] && mvy[i] == mvy[j])
1640 #else
1641             // modified k-mean,  3/24/01, shouldn't be greater than 3
1642             if (AVC_ABS(mvx[i] - mvx[j]) + AVC_ABS(mvy[i] - mvy[j]) < CANDIDATE_DISTANCE)
1643 #endif
1644                 same = 1;
1645             j++;
1646         }
1647         if (!same)
1648         {
1649             mvx[*num_can] = mvx[i];
1650             mvy[*num_can] = mvy[i];
1651             (*num_can)++;
1652         }
1653     }
1654 
1655     if (num1 == 5 && *num_can == 1)
1656         *num_can = ALL_CAND_EQUAL; /* all are equal */
1657 
1658     /* calculate predicted MV */
1659 
1660     if (availA && !(availB || availC))
1661     {
1662         *cmvx = pmvA_x;
1663         *cmvy = pmvA_y;
1664     }
1665     else
1666     {
1667         *cmvx = AVC_MEDIAN(pmvA_x, pmvB_x, pmvC_x);
1668         *cmvy = AVC_MEDIAN(pmvA_y, pmvB_y, pmvC_y);
1669     }
1670 
1671     return ;
1672 }
1673 
1674 
1675 /*************************************************************
1676     Function:   AVCMoveNeighborSAD
1677     Date:       3/27/01
1678     Purpose:    Move neighboring SAD around when center has shifted
1679 *************************************************************/
1680 
AVCMoveNeighborSAD(int dn[],int new_loc)1681 void AVCMoveNeighborSAD(int dn[], int new_loc)
1682 {
1683     int tmp[9];
1684     tmp[0] = dn[0];
1685     tmp[1] = dn[1];
1686     tmp[2] = dn[2];
1687     tmp[3] = dn[3];
1688     tmp[4] = dn[4];
1689     tmp[5] = dn[5];
1690     tmp[6] = dn[6];
1691     tmp[7] = dn[7];
1692     tmp[8] = dn[8];
1693     dn[0] = dn[1] = dn[2] = dn[3] = dn[4] = dn[5] = dn[6] = dn[7] = dn[8] = 65536;
1694 
1695     switch (new_loc)
1696     {
1697         case 0:
1698             break;
1699         case 1:
1700             dn[4] = tmp[2];
1701             dn[5] = tmp[0];
1702             dn[6] = tmp[8];
1703             break;
1704         case 2:
1705             dn[4] = tmp[3];
1706             dn[5] = tmp[4];
1707             dn[6] = tmp[0];
1708             dn[7] = tmp[8];
1709             dn[8] = tmp[1];
1710             break;
1711         case 3:
1712             dn[6] = tmp[4];
1713             dn[7] = tmp[0];
1714             dn[8] = tmp[2];
1715             break;
1716         case 4:
1717             dn[1] = tmp[2];
1718             dn[2] = tmp[3];
1719             dn[6] = tmp[5];
1720             dn[7] = tmp[6];
1721             dn[8] = tmp[0];
1722             break;
1723         case 5:
1724             dn[1] = tmp[0];
1725             dn[2] = tmp[4];
1726             dn[8] = tmp[6];
1727             break;
1728         case 6:
1729             dn[1] = tmp[8];
1730             dn[2] = tmp[0];
1731             dn[3] = tmp[4];
1732             dn[4] = tmp[5];
1733             dn[8] = tmp[7];
1734             break;
1735         case 7:
1736             dn[2] = tmp[8];
1737             dn[3] = tmp[0];
1738             dn[4] = tmp[6];
1739             break;
1740         case 8:
1741             dn[2] = tmp[1];
1742             dn[3] = tmp[2];
1743             dn[4] = tmp[0];
1744             dn[5] = tmp[6];
1745             dn[6] = tmp[7];
1746             break;
1747     }
1748     dn[0] = tmp[new_loc];
1749 
1750     return ;
1751 }
1752 
1753 /*  3/28/01, find minimal of dn[9] */
1754 
AVCFindMin(int dn[])1755 int AVCFindMin(int dn[])
1756 {
1757     int min, i;
1758     int dmin;
1759 
1760     dmin = dn[1];
1761     min = 1;
1762     for (i = 2; i < 9; i++)
1763     {
1764         if (dn[i] < dmin)
1765         {
1766             dmin = dn[i];
1767             min = i;
1768         }
1769     }
1770 
1771     return min;
1772 }
1773 
1774 
1775 
1776