1 /* ------------------------------------------------------------------
2 * Copyright (C) 1998-2009 PacketVideo
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13 * express or implied.
14 * See the License for the specific language governing permissions
15 * and limitations under the License.
16 * -------------------------------------------------------------------
17 */
18 #include "oscl_mem.h"
19 #include "avcenc_lib.h"
20
21 #define MIN_GOP 1 /* minimum size of GOP, 1/23/01, need to be tested */
22
23 #define DEFAULT_REF_IDX 0 /* always from the first frame in the reflist */
24
25 #define ALL_CAND_EQUAL 10 /* any number greater than 5 will work */
26
27
28 /* from TMN 3.2 */
29 #define PREF_NULL_VEC 129 /* zero vector bias */
30 #define PREF_16_VEC 129 /* 1MV bias versus 4MVs*/
31 #define PREF_INTRA 3024//512 /* bias for INTRA coding */
32
33 const static int tab_exclude[9][9] = // [last_loc][curr_loc]
34 {
35 {0, 0, 0, 0, 0, 0, 0, 0, 0},
36 {0, 0, 0, 0, 1, 1, 1, 0, 0},
37 {0, 0, 0, 0, 1, 1, 1, 1, 1},
38 {0, 0, 0, 0, 0, 0, 1, 1, 1},
39 {0, 1, 1, 0, 0, 0, 1, 1, 1},
40 {0, 1, 1, 0, 0, 0, 0, 0, 1},
41 {0, 1, 1, 1, 1, 0, 0, 0, 1},
42 {0, 0, 1, 1, 1, 0, 0, 0, 0},
43 {0, 0, 1, 1, 1, 1, 1, 0, 0}
44 }; //to decide whether to continue or compute
45
46 const static int refine_next[8][2] = /* [curr_k][increment] */
47 {
48 {0, 0}, {2, 0}, {1, 1}, {0, 2}, { -1, 1}, { -2, 0}, { -1, -1}, {0, -2}
49 };
50
51 #ifdef _SAD_STAT
52 uint32 num_MB = 0;
53 uint32 num_cand = 0;
54 #endif
55
56 /************************************************************************/
57 #define TH_INTER_2 100 /* temporary for now */
58
59 //#define FIXED_INTERPRED_MODE AVC_P16
60 #define FIXED_REF_IDX 0
61 #define FIXED_MVX 0
62 #define FIXED_MVY 0
63
64 // only use when AVC_P8 or AVC_P8ref0
65 #define FIXED_SUBMB_MODE AVC_4x4
66 /*************************************************************************/
67
68 /* Initialize arrays necessary for motion search */
InitMotionSearchModule(AVCHandle * avcHandle)69 AVCEnc_Status InitMotionSearchModule(AVCHandle *avcHandle)
70 {
71 AVCEncObject *encvid = (AVCEncObject*) avcHandle->AVCObject;
72 AVCRateControl *rateCtrl = encvid->rateCtrl;
73 int search_range = rateCtrl->mvRange;
74 int number_of_subpel_positions = 4 * (2 * search_range + 3);
75 int max_mv_bits, max_mvd;
76 int temp_bits = 0;
77 uint8 *mvbits;
78 int bits, imax, imin, i;
79 uint8* subpel_pred = (uint8*) encvid->subpel_pred; // all 16 sub-pel positions
80
81
82 while (number_of_subpel_positions > 0)
83 {
84 temp_bits++;
85 number_of_subpel_positions >>= 1;
86 }
87
88 max_mv_bits = 3 + 2 * temp_bits;
89 max_mvd = (1 << (max_mv_bits >> 1)) - 1;
90
91 encvid->mvbits_array = (uint8*) avcHandle->CBAVC_Malloc(encvid->avcHandle->userData,
92 sizeof(uint8) * (2 * max_mvd + 1), DEFAULT_ATTR);
93
94 if (encvid->mvbits_array == NULL)
95 {
96 return AVCENC_MEMORY_FAIL;
97 }
98
99 mvbits = encvid->mvbits = encvid->mvbits_array + max_mvd;
100
101 mvbits[0] = 1;
102 for (bits = 3; bits <= max_mv_bits; bits += 2)
103 {
104 imax = 1 << (bits >> 1);
105 imin = imax >> 1;
106
107 for (i = imin; i < imax; i++) mvbits[-i] = mvbits[i] = bits;
108 }
109
110 /* initialize half-pel search */
111 encvid->hpel_cand[0] = subpel_pred + REF_CENTER;
112 encvid->hpel_cand[1] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 1 ;
113 encvid->hpel_cand[2] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 1;
114 encvid->hpel_cand[3] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 25;
115 encvid->hpel_cand[4] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 25;
116 encvid->hpel_cand[5] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 25;
117 encvid->hpel_cand[6] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24;
118 encvid->hpel_cand[7] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24;
119 encvid->hpel_cand[8] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE;
120
121 /* For quarter-pel interpolation around best half-pel result */
122
123 encvid->bilin_base[0][0] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE;
124 encvid->bilin_base[0][1] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 1;
125 encvid->bilin_base[0][2] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24;
126 encvid->bilin_base[0][3] = subpel_pred + REF_CENTER;
127
128
129 encvid->bilin_base[1][0] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE;
130 encvid->bilin_base[1][1] = subpel_pred + REF_CENTER - 24;
131 encvid->bilin_base[1][2] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE;
132 encvid->bilin_base[1][3] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 1;
133
134 encvid->bilin_base[2][0] = subpel_pred + REF_CENTER - 24;
135 encvid->bilin_base[2][1] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 1;
136 encvid->bilin_base[2][2] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 1;
137 encvid->bilin_base[2][3] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 1;
138
139 encvid->bilin_base[3][0] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 1;
140 encvid->bilin_base[3][1] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 1;
141 encvid->bilin_base[3][2] = subpel_pred + REF_CENTER;
142 encvid->bilin_base[3][3] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 25;
143
144 encvid->bilin_base[4][0] = subpel_pred + REF_CENTER;
145 encvid->bilin_base[4][1] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 25;
146 encvid->bilin_base[4][2] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 25;
147 encvid->bilin_base[4][3] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 25;
148
149 encvid->bilin_base[5][0] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24;
150 encvid->bilin_base[5][1] = subpel_pred + REF_CENTER;
151 encvid->bilin_base[5][2] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24;
152 encvid->bilin_base[5][3] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 25;
153
154 encvid->bilin_base[6][0] = subpel_pred + REF_CENTER - 1;
155 encvid->bilin_base[6][1] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24;
156 encvid->bilin_base[6][2] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 24;
157 encvid->bilin_base[6][3] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24;
158
159 encvid->bilin_base[7][0] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE;
160 encvid->bilin_base[7][1] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE;
161 encvid->bilin_base[7][2] = subpel_pred + REF_CENTER - 1;
162 encvid->bilin_base[7][3] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24;
163
164 encvid->bilin_base[8][0] = subpel_pred + REF_CENTER - 25;
165 encvid->bilin_base[8][1] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE;
166 encvid->bilin_base[8][2] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE;
167 encvid->bilin_base[8][3] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE;
168
169
170 return AVCENC_SUCCESS;
171 }
172
173 /* Clean-up memory */
CleanMotionSearchModule(AVCHandle * avcHandle)174 void CleanMotionSearchModule(AVCHandle *avcHandle)
175 {
176 AVCEncObject *encvid = (AVCEncObject*) avcHandle->AVCObject;
177
178 if (encvid->mvbits_array)
179 {
180 avcHandle->CBAVC_Free(avcHandle->userData, (int)(encvid->mvbits_array));
181 encvid->mvbits = NULL;
182 }
183
184 return ;
185 }
186
187
IntraDecisionABE(int * min_cost,uint8 * cur,int pitch,bool ave)188 bool IntraDecisionABE(int *min_cost, uint8 *cur, int pitch, bool ave)
189 {
190 int j;
191 uint8 *out;
192 int temp, SBE;
193 OsclFloat ABE;
194 bool intra = true;
195
196 SBE = 0;
197 /* top neighbor */
198 out = cur - pitch;
199 for (j = 0; j < 16; j++)
200 {
201 temp = out[j] - cur[j];
202 SBE += ((temp >= 0) ? temp : -temp);
203 }
204
205 /* left neighbor */
206 out = cur - 1;
207 out -= pitch;
208 cur -= pitch;
209 for (j = 0; j < 16; j++)
210 {
211 temp = *(out += pitch) - *(cur += pitch);
212 SBE += ((temp >= 0) ? temp : -temp);
213 }
214
215 /* compare mincost/384 and SBE/64 */
216 ABE = SBE / 32.0; //ABE = SBE/64.0; //
217 if (ABE >= *min_cost / 256.0) //if( ABE*0.8 >= min_cost/384.0) //
218 {
219 intra = false; // no possibility of intra, just use inter
220 }
221 else
222 {
223 if (ave == true)
224 {
225 *min_cost = (*min_cost + (int)(SBE * 8)) >> 1; // possibility of intra, averaging the cost
226 }
227 else
228 {
229 *min_cost = (int)(SBE * 8);
230 }
231 }
232
233 return intra;
234 }
235
236 /******* main function for macroblock prediction for the entire frame ***/
237 /* if turns out to be IDR frame, set video->nal_unit_type to AVC_NALTYPE_IDR */
AVCMotionEstimation(AVCEncObject * encvid)238 void AVCMotionEstimation(AVCEncObject *encvid)
239 {
240 AVCCommonObj *video = encvid->common;
241 int slice_type = video->slice_type;
242 AVCFrameIO *currInput = encvid->currInput;
243 AVCPictureData *refPic = video->RefPicList0[0];
244 int i, j, k;
245 int mbwidth = video->PicWidthInMbs;
246 int mbheight = video->PicHeightInMbs;
247 int totalMB = video->PicSizeInMbs;
248 int pitch = currInput->pitch;
249 AVCMacroblock *currMB, *mblock = video->mblock;
250 AVCMV *mot_mb_16x16, *mot16x16 = encvid->mot16x16;
251 // AVCMV *mot_mb_16x8, *mot_mb_8x16, *mot_mb_8x8, etc;
252 AVCRateControl *rateCtrl = encvid->rateCtrl;
253 uint8 *intraSearch = encvid->intraSearch;
254 uint FS_en = encvid->fullsearch_enable;
255
256 int NumIntraSearch, start_i, numLoop, incr_i;
257 int mbnum, offset;
258 uint8 *cur, *best_cand[5];
259 int totalSAD = 0; /* average SAD for rate control */
260 int type_pred;
261 int abe_cost;
262
263 #ifdef HTFM
264 /***** HYPOTHESIS TESTING ********/ /* 2/28/01 */
265 int collect = 0;
266 HTFM_Stat htfm_stat;
267 double newvar[16];
268 double exp_lamda[15];
269 /*********************************/
270 #endif
271 int hp_guess = 0;
272 uint32 mv_uint32;
273
274 offset = 0;
275
276 if (slice_type == AVC_I_SLICE)
277 {
278 /* cannot do I16 prediction here because it needs full decoding. */
279 for (i = 0; i < totalMB; i++)
280 {
281 encvid->min_cost[i] = 0x7FFFFFFF; /* max value for int */
282 }
283
284 oscl_memset(intraSearch, 1, sizeof(uint8)*totalMB);
285
286 encvid->firstIntraRefreshMBIndx = 0; /* reset this */
287
288 return ;
289 }
290 else // P_SLICE
291 {
292 for (i = 0; i < totalMB; i++)
293 {
294 mblock[i].mb_intra = 0;
295 }
296 oscl_memset(intraSearch, 1, sizeof(uint8)*totalMB);
297 }
298
299 if (refPic->padded == 0)
300 {
301 AVCPaddingEdge(refPic);
302 refPic->padded = 1;
303 }
304 /* Random INTRA update */
305 if (rateCtrl->intraMBRate)
306 {
307 AVCRasterIntraUpdate(encvid, mblock, totalMB, rateCtrl->intraMBRate);
308 }
309
310 encvid->sad_extra_info = NULL;
311 #ifdef HTFM
312 /***** HYPOTHESIS TESTING ********/
313 InitHTFM(video, &htfm_stat, newvar, &collect);
314 /*********************************/
315 #endif
316
317 if ((rateCtrl->scdEnable == 1)
318 && ((rateCtrl->frame_rate < 5.0) || (video->sliceHdr->frame_num > MIN_GOP)))
319 /* do not try to detect a new scene if low frame rate and too close to previous I-frame */
320 {
321 incr_i = 2;
322 numLoop = 2;
323 start_i = 1;
324 type_pred = 0; /* for initial candidate selection */
325 }
326 else
327 {
328 incr_i = 1;
329 numLoop = 1;
330 start_i = 0;
331 type_pred = 2;
332 }
333
334 /* First pass, loop thru half the macroblock */
335 /* determine scene change */
336 /* Second pass, for the rest of macroblocks */
337 NumIntraSearch = 0; // to be intra searched in the encoding loop.
338 while (numLoop--)
339 {
340 for (j = 0; j < mbheight; j++)
341 {
342 if (incr_i > 1)
343 start_i = (start_i == 0 ? 1 : 0) ; /* toggle 0 and 1 */
344
345 offset = pitch * (j << 4) + (start_i << 4);
346
347 mbnum = j * mbwidth + start_i;
348
349 for (i = start_i; i < mbwidth; i += incr_i)
350 {
351 video->mbNum = mbnum;
352 video->currMB = currMB = mblock + mbnum;
353 mot_mb_16x16 = mot16x16 + mbnum;
354
355 cur = currInput->YCbCr[0] + offset;
356
357 if (currMB->mb_intra == 0) /* for INTER mode */
358 {
359 #if defined(HTFM)
360 HTFMPrepareCurMB_AVC(encvid, &htfm_stat, cur, pitch);
361 #else
362 AVCPrepareCurMB(encvid, cur, pitch);
363 #endif
364 /************************************************************/
365 /******** full-pel 1MV search **********************/
366
367 AVCMBMotionSearch(encvid, cur, best_cand, i << 4, j << 4, type_pred,
368 FS_en, &hp_guess);
369
370 abe_cost = encvid->min_cost[mbnum] = mot_mb_16x16->sad;
371
372 /* set mbMode and MVs */
373 currMB->mbMode = AVC_P16;
374 currMB->MBPartPredMode[0][0] = AVC_Pred_L0;
375 mv_uint32 = ((mot_mb_16x16->y) << 16) | ((mot_mb_16x16->x) & 0xffff);
376 for (k = 0; k < 32; k += 2)
377 {
378 currMB->mvL0[k>>1] = mv_uint32;
379 }
380
381 /* make a decision whether it should be tested for intra or not */
382 if (i != mbwidth - 1 && j != mbheight - 1 && i != 0 && j != 0)
383 {
384 if (false == IntraDecisionABE(&abe_cost, cur, pitch, true))
385 {
386 intraSearch[mbnum] = 0;
387 }
388 else
389 {
390 NumIntraSearch++;
391 rateCtrl->MADofMB[mbnum] = abe_cost;
392 }
393 }
394 else // boundary MBs, always do intra search
395 {
396 NumIntraSearch++;
397 }
398
399 totalSAD += (int) rateCtrl->MADofMB[mbnum];//mot_mb_16x16->sad;
400 }
401 else /* INTRA update, use for prediction */
402 {
403 mot_mb_16x16[0].x = mot_mb_16x16[0].y = 0;
404
405 /* reset all other MVs to zero */
406 /* mot_mb_16x8, mot_mb_8x16, mot_mb_8x8, etc. */
407 abe_cost = encvid->min_cost[mbnum] = 0x7FFFFFFF; /* max value for int */
408
409 if (i != mbwidth - 1 && j != mbheight - 1 && i != 0 && j != 0)
410 {
411 IntraDecisionABE(&abe_cost, cur, pitch, false);
412
413 rateCtrl->MADofMB[mbnum] = abe_cost;
414 totalSAD += abe_cost;
415 }
416
417 NumIntraSearch++ ;
418 /* cannot do I16 prediction here because it needs full decoding. */
419 // intraSearch[mbnum] = 1;
420
421 }
422
423 mbnum += incr_i;
424 offset += (incr_i << 4);
425
426 } /* for i */
427 } /* for j */
428
429 /* since we cannot do intra/inter decision here, the SCD has to be
430 based on other criteria such as motion vectors coherency or the SAD */
431 if (incr_i > 1 && numLoop) /* scene change on and first loop */
432 {
433 //if(NumIntraSearch > ((totalMB>>3)<<1) + (totalMB>>3)) /* 75% of 50%MBs */
434 if (NumIntraSearch*99 > (48*totalMB)) /* 20% of 50%MBs */
435 /* need to do more investigation about this threshold since the NumIntraSearch
436 only show potential intra MBs, not the actual one */
437 {
438 /* we can choose to just encode I_SLICE without IDR */
439 //video->nal_unit_type = AVC_NALTYPE_IDR;
440 video->nal_unit_type = AVC_NALTYPE_SLICE;
441 video->sliceHdr->slice_type = AVC_I_ALL_SLICE;
442 video->slice_type = AVC_I_SLICE;
443 oscl_memset(intraSearch, 1, sizeof(uint8)*totalMB);
444 i = totalMB;
445 while (i--)
446 {
447 mblock[i].mb_intra = 1;
448 encvid->min_cost[i] = 0x7FFFFFFF; /* max value for int */
449 }
450
451 rateCtrl->totalSAD = totalSAD * 2; /* SAD */
452
453 return ;
454 }
455 }
456 /******** no scene change, continue motion search **********************/
457 start_i = 0;
458 type_pred++; /* second pass */
459 }
460
461 rateCtrl->totalSAD = totalSAD; /* SAD */
462
463 #ifdef HTFM
464 /***** HYPOTHESIS TESTING ********/
465 if (collect)
466 {
467 collect = 0;
468 UpdateHTFM(encvid, newvar, exp_lamda, &htfm_stat);
469 }
470 /*********************************/
471 #endif
472
473 return ;
474 }
475
476 /*=====================================================================
477 Function: PaddingEdge
478 Date: 09/16/2000
479 Purpose: Pad edge of a Vop
480 =====================================================================*/
481
AVCPaddingEdge(AVCPictureData * refPic)482 void AVCPaddingEdge(AVCPictureData *refPic)
483 {
484 uint8 *src, *dst;
485 int i;
486 int pitch, width, height;
487 uint32 temp1, temp2;
488
489 width = refPic->width;
490 height = refPic->height;
491 pitch = refPic->pitch;
492
493 /* pad top */
494 src = refPic->Sl;
495
496 temp1 = *src; /* top-left corner */
497 temp2 = src[width-1]; /* top-right corner */
498 temp1 |= (temp1 << 8);
499 temp1 |= (temp1 << 16);
500 temp2 |= (temp2 << 8);
501 temp2 |= (temp2 << 16);
502
503 dst = src - (pitch << 4);
504
505 *((uint32*)(dst - 16)) = temp1;
506 *((uint32*)(dst - 12)) = temp1;
507 *((uint32*)(dst - 8)) = temp1;
508 *((uint32*)(dst - 4)) = temp1;
509
510 oscl_memcpy(dst, src, width);
511
512 *((uint32*)(dst += width)) = temp2;
513 *((uint32*)(dst + 4)) = temp2;
514 *((uint32*)(dst + 8)) = temp2;
515 *((uint32*)(dst + 12)) = temp2;
516
517 dst = dst - width - 16;
518
519 i = 15;
520 while (i--)
521 {
522 oscl_memcpy(dst + pitch, dst, pitch);
523 dst += pitch;
524 }
525
526 /* pad sides */
527 dst += (pitch + 16);
528 src = dst;
529 i = height;
530 while (i--)
531 {
532 temp1 = *src;
533 temp2 = src[width-1];
534 temp1 |= (temp1 << 8);
535 temp1 |= (temp1 << 16);
536 temp2 |= (temp2 << 8);
537 temp2 |= (temp2 << 16);
538
539 *((uint32*)(dst - 16)) = temp1;
540 *((uint32*)(dst - 12)) = temp1;
541 *((uint32*)(dst - 8)) = temp1;
542 *((uint32*)(dst - 4)) = temp1;
543
544 *((uint32*)(dst += width)) = temp2;
545 *((uint32*)(dst + 4)) = temp2;
546 *((uint32*)(dst + 8)) = temp2;
547 *((uint32*)(dst + 12)) = temp2;
548
549 src += pitch;
550 dst = src;
551 }
552
553 /* pad bottom */
554 dst -= 16;
555 i = 16;
556 while (i--)
557 {
558 oscl_memcpy(dst, dst - pitch, pitch);
559 dst += pitch;
560 }
561
562
563 return ;
564 }
565
566 /*===========================================================================
567 Function: AVCRasterIntraUpdate
568 Date: 2/26/01
569 Purpose: To raster-scan assign INTRA-update .
570 N macroblocks are updated (also was programmable).
571 ===========================================================================*/
AVCRasterIntraUpdate(AVCEncObject * encvid,AVCMacroblock * mblock,int totalMB,int numRefresh)572 void AVCRasterIntraUpdate(AVCEncObject *encvid, AVCMacroblock *mblock, int totalMB, int numRefresh)
573 {
574 int indx, i;
575
576 indx = encvid->firstIntraRefreshMBIndx;
577 for (i = 0; i < numRefresh && indx < totalMB; i++)
578 {
579 (mblock + indx)->mb_intra = 1;
580 encvid->intraSearch[indx++] = 1;
581 }
582
583 /* if read the end of frame, reset and loop around */
584 if (indx >= totalMB - 1)
585 {
586 indx = 0;
587 while (i < numRefresh && indx < totalMB)
588 {
589 (mblock + indx)->mb_intra = 1;
590 encvid->intraSearch[indx++] = 1;
591 i++;
592 }
593 }
594
595 encvid->firstIntraRefreshMBIndx = indx; /* update with a new value */
596
597 return ;
598 }
599
600
601 #ifdef HTFM
InitHTFM(VideoEncData * encvid,HTFM_Stat * htfm_stat,double * newvar,int * collect)602 void InitHTFM(VideoEncData *encvid, HTFM_Stat *htfm_stat, double *newvar, int *collect)
603 {
604 AVCCommonObj *video = encvid->common;
605 int i;
606 int lx = video->currPic->width; // padding
607 int lx2 = lx << 1;
608 int lx3 = lx2 + lx;
609 int rx = video->currPic->pitch;
610 int rx2 = rx << 1;
611 int rx3 = rx2 + rx;
612
613 int *offset, *offset2;
614
615 /* 4/11/01, collect data every 30 frames, doesn't have to be base layer */
616 if (((int)video->sliceHdr->frame_num) % 30 == 1)
617 {
618
619 *collect = 1;
620
621 htfm_stat->countbreak = 0;
622 htfm_stat->abs_dif_mad_avg = 0;
623
624 for (i = 0; i < 16; i++)
625 {
626 newvar[i] = 0.0;
627 }
628 // encvid->functionPointer->SAD_MB_PADDING = &SAD_MB_PADDING_HTFM_Collect;
629 encvid->functionPointer->SAD_Macroblock = &SAD_MB_HTFM_Collect;
630 encvid->functionPointer->SAD_MB_HalfPel[0] = NULL;
631 encvid->functionPointer->SAD_MB_HalfPel[1] = &SAD_MB_HP_HTFM_Collectxh;
632 encvid->functionPointer->SAD_MB_HalfPel[2] = &SAD_MB_HP_HTFM_Collectyh;
633 encvid->functionPointer->SAD_MB_HalfPel[3] = &SAD_MB_HP_HTFM_Collectxhyh;
634 encvid->sad_extra_info = (void*)(htfm_stat);
635 offset = htfm_stat->offsetArray;
636 offset2 = htfm_stat->offsetRef;
637 }
638 else
639 {
640 // encvid->functionPointer->SAD_MB_PADDING = &SAD_MB_PADDING_HTFM;
641 encvid->functionPointer->SAD_Macroblock = &SAD_MB_HTFM;
642 encvid->functionPointer->SAD_MB_HalfPel[0] = NULL;
643 encvid->functionPointer->SAD_MB_HalfPel[1] = &SAD_MB_HP_HTFMxh;
644 encvid->functionPointer->SAD_MB_HalfPel[2] = &SAD_MB_HP_HTFMyh;
645 encvid->functionPointer->SAD_MB_HalfPel[3] = &SAD_MB_HP_HTFMxhyh;
646 encvid->sad_extra_info = (void*)(encvid->nrmlz_th);
647 offset = encvid->nrmlz_th + 16;
648 offset2 = encvid->nrmlz_th + 32;
649 }
650
651 offset[0] = 0;
652 offset[1] = lx2 + 2;
653 offset[2] = 2;
654 offset[3] = lx2;
655 offset[4] = lx + 1;
656 offset[5] = lx3 + 3;
657 offset[6] = lx + 3;
658 offset[7] = lx3 + 1;
659 offset[8] = lx;
660 offset[9] = lx3 + 2;
661 offset[10] = lx3 ;
662 offset[11] = lx + 2 ;
663 offset[12] = 1;
664 offset[13] = lx2 + 3;
665 offset[14] = lx2 + 1;
666 offset[15] = 3;
667
668 offset2[0] = 0;
669 offset2[1] = rx2 + 2;
670 offset2[2] = 2;
671 offset2[3] = rx2;
672 offset2[4] = rx + 1;
673 offset2[5] = rx3 + 3;
674 offset2[6] = rx + 3;
675 offset2[7] = rx3 + 1;
676 offset2[8] = rx;
677 offset2[9] = rx3 + 2;
678 offset2[10] = rx3 ;
679 offset2[11] = rx + 2 ;
680 offset2[12] = 1;
681 offset2[13] = rx2 + 3;
682 offset2[14] = rx2 + 1;
683 offset2[15] = 3;
684
685 return ;
686 }
687
UpdateHTFM(AVCEncObject * encvid,double * newvar,double * exp_lamda,HTFM_Stat * htfm_stat)688 void UpdateHTFM(AVCEncObject *encvid, double *newvar, double *exp_lamda, HTFM_Stat *htfm_stat)
689 {
690 if (htfm_stat->countbreak == 0)
691 htfm_stat->countbreak = 1;
692
693 newvar[0] = (double)(htfm_stat->abs_dif_mad_avg) / (htfm_stat->countbreak * 16.);
694
695 if (newvar[0] < 0.001)
696 {
697 newvar[0] = 0.001; /* to prevent floating overflow */
698 }
699 exp_lamda[0] = 1 / (newvar[0] * 1.4142136);
700 exp_lamda[1] = exp_lamda[0] * 1.5825;
701 exp_lamda[2] = exp_lamda[0] * 2.1750;
702 exp_lamda[3] = exp_lamda[0] * 3.5065;
703 exp_lamda[4] = exp_lamda[0] * 3.1436;
704 exp_lamda[5] = exp_lamda[0] * 3.5315;
705 exp_lamda[6] = exp_lamda[0] * 3.7449;
706 exp_lamda[7] = exp_lamda[0] * 4.5854;
707 exp_lamda[8] = exp_lamda[0] * 4.6191;
708 exp_lamda[9] = exp_lamda[0] * 5.4041;
709 exp_lamda[10] = exp_lamda[0] * 6.5974;
710 exp_lamda[11] = exp_lamda[0] * 10.5341;
711 exp_lamda[12] = exp_lamda[0] * 10.0719;
712 exp_lamda[13] = exp_lamda[0] * 12.0516;
713 exp_lamda[14] = exp_lamda[0] * 15.4552;
714
715 CalcThreshold(HTFM_Pf, exp_lamda, encvid->nrmlz_th);
716 return ;
717 }
718
719
CalcThreshold(double pf,double exp_lamda[],int nrmlz_th[])720 void CalcThreshold(double pf, double exp_lamda[], int nrmlz_th[])
721 {
722 int i;
723 double temp[15];
724 // printf("\nLamda: ");
725
726 /* parametric PREMODELling */
727 for (i = 0; i < 15; i++)
728 {
729 // printf("%g ",exp_lamda[i]);
730 if (pf < 0.5)
731 temp[i] = 1 / exp_lamda[i] * M4VENC_LOG(2 * pf);
732 else
733 temp[i] = -1 / exp_lamda[i] * M4VENC_LOG(2 * (1 - pf));
734 }
735
736 nrmlz_th[15] = 0;
737 for (i = 0; i < 15; i++) /* scale upto no.pixels */
738 nrmlz_th[i] = (int)(temp[i] * ((i + 1) << 4) + 0.5);
739
740 return ;
741 }
742
HTFMPrepareCurMB_AVC(AVCEncObject * encvid,HTFM_Stat * htfm_stat,uint8 * cur,int pitch)743 void HTFMPrepareCurMB_AVC(AVCEncObject *encvid, HTFM_Stat *htfm_stat, uint8 *cur, int pitch)
744 {
745 AVCCommonObj *video = encvid->common;
746 uint32 *htfmMB = (uint32*)(encvid->currYMB);
747 uint8 *ptr, byte;
748 int *offset;
749 int i;
750 uint32 word;
751
752 if (((int)video->sliceHdr->frame_num) % 30 == 1)
753 {
754 offset = htfm_stat->offsetArray;
755 }
756 else
757 {
758 offset = encvid->nrmlz_th + 16;
759 }
760
761 for (i = 0; i < 16; i++)
762 {
763 ptr = cur + offset[i];
764 word = ptr[0];
765 byte = ptr[4];
766 word |= (byte << 8);
767 byte = ptr[8];
768 word |= (byte << 16);
769 byte = ptr[12];
770 word |= (byte << 24);
771 *htfmMB++ = word;
772
773 word = *(ptr += (pitch << 2));
774 byte = ptr[4];
775 word |= (byte << 8);
776 byte = ptr[8];
777 word |= (byte << 16);
778 byte = ptr[12];
779 word |= (byte << 24);
780 *htfmMB++ = word;
781
782 word = *(ptr += (pitch << 2));
783 byte = ptr[4];
784 word |= (byte << 8);
785 byte = ptr[8];
786 word |= (byte << 16);
787 byte = ptr[12];
788 word |= (byte << 24);
789 *htfmMB++ = word;
790
791 word = *(ptr += (pitch << 2));
792 byte = ptr[4];
793 word |= (byte << 8);
794 byte = ptr[8];
795 word |= (byte << 16);
796 byte = ptr[12];
797 word |= (byte << 24);
798 *htfmMB++ = word;
799 }
800
801 return ;
802 }
803
804
805 #endif // HTFM
806
AVCPrepareCurMB(AVCEncObject * encvid,uint8 * cur,int pitch)807 void AVCPrepareCurMB(AVCEncObject *encvid, uint8 *cur, int pitch)
808 {
809 void* tmp = (void*)(encvid->currYMB);
810 uint32 *currYMB = (uint32*) tmp;
811 int i;
812
813 cur -= pitch;
814
815 for (i = 0; i < 16; i++)
816 {
817 *currYMB++ = *((uint32*)(cur += pitch));
818 *currYMB++ = *((uint32*)(cur + 4));
819 *currYMB++ = *((uint32*)(cur + 8));
820 *currYMB++ = *((uint32*)(cur + 12));
821 }
822
823 return ;
824 }
825
826 #ifdef FIXED_INTERPRED_MODE
827
828 /* due to the complexity of the predicted motion vector, we may not decide to skip
829 a macroblock here just yet. */
830 /* We will find the best motion vector and the best intra prediction mode for each block. */
831 /* output are
832 currMB->NumMbPart, currMB->MbPartWidth, currMB->MbPartHeight,
833 currMB->NumSubMbPart[], currMB->SubMbPartWidth[], currMB->SubMbPartHeight,
834 currMB->MBPartPredMode[][] (L0 or L1 or BiPred)
835 currMB->RefIdx[], currMB->ref_idx_L0[],
836 currMB->mvL0[], currMB->mvL1[]
837 */
838
AVCMBMotionSearch(AVCEncObject * encvid,AVCMacroblock * currMB,int mbNum,int num_pass)839 AVCEnc_Status AVCMBMotionSearch(AVCEncObject *encvid, AVCMacroblock *currMB, int mbNum,
840 int num_pass)
841 {
842 AVCCommonObj *video = encvid->common;
843 int mbPartIdx, subMbPartIdx;
844 int16 *mv;
845 int i;
846 int SubMbPartHeight, SubMbPartWidth, NumSubMbPart;
847
848 /* assign value to currMB->MBPartPredMode[][x],subMbMode[],NumSubMbPart[],SubMbPartWidth[],SubMbPartHeight[] */
849
850 currMB->mbMode = FIXED_INTERPRED_MODE;
851 currMB->mb_intra = 0;
852
853 if (currMB->mbMode == AVC_P16)
854 {
855 currMB->NumMbPart = 1;
856 currMB->MbPartWidth = 16;
857 currMB->MbPartHeight = 16;
858 currMB->SubMbPartHeight[0] = 16;
859 currMB->SubMbPartWidth[0] = 16;
860 currMB->NumSubMbPart[0] = 1;
861 }
862 else if (currMB->mbMode == AVC_P16x8)
863 {
864 currMB->NumMbPart = 2;
865 currMB->MbPartWidth = 16;
866 currMB->MbPartHeight = 8;
867 for (i = 0; i < 2; i++)
868 {
869 currMB->SubMbPartWidth[i] = 16;
870 currMB->SubMbPartHeight[i] = 8;
871 currMB->NumSubMbPart[i] = 1;
872 }
873 }
874 else if (currMB->mbMode == AVC_P8x16)
875 {
876 currMB->NumMbPart = 2;
877 currMB->MbPartWidth = 8;
878 currMB->MbPartHeight = 16;
879 for (i = 0; i < 2; i++)
880 {
881 currMB->SubMbPartWidth[i] = 8;
882 currMB->SubMbPartHeight[i] = 16;
883 currMB->NumSubMbPart[i] = 1;
884 }
885 }
886 else if (currMB->mbMode == AVC_P8 || currMB->mbMode == AVC_P8ref0)
887 {
888 currMB->NumMbPart = 4;
889 currMB->MbPartWidth = 8;
890 currMB->MbPartHeight = 8;
891 if (FIXED_SUBMB_MODE == AVC_8x8)
892 {
893 SubMbPartHeight = 8;
894 SubMbPartWidth = 8;
895 NumSubMbPart = 1;
896 }
897 else if (FIXED_SUBMB_MODE == AVC_8x4)
898 {
899 SubMbPartHeight = 4;
900 SubMbPartWidth = 8;
901 NumSubMbPart = 2;
902 }
903 else if (FIXED_SUBMB_MODE == AVC_4x8)
904 {
905 SubMbPartHeight = 8;
906 SubMbPartWidth = 4;
907 NumSubMbPart = 2;
908 }
909 else if (FIXED_SUBMB_MODE == AVC_4x4)
910 {
911 SubMbPartHeight = 4;
912 SubMbPartWidth = 4;
913 NumSubMbPart = 4;
914 }
915
916 for (i = 0; i < 4; i++)
917 {
918 currMB->subMbMode[i] = FIXED_SUBMB_MODE;
919 currMB->SubMbPartHeight[i] = SubMbPartHeight;
920 currMB->SubMbPartWidth[i] = SubMbPartWidth;
921 currMB->NumSubMbPart[i] = NumSubMbPart;
922 }
923 }
924 else /* it's probably intra mode */
925 {
926 return AVCENC_SUCCESS;
927 }
928
929 for (mbPartIdx = 0; mbPartIdx < 4; mbPartIdx++)
930 {
931 currMB->MBPartPredMode[mbPartIdx][0] = AVC_Pred_L0;
932 currMB->ref_idx_L0[mbPartIdx] = FIXED_REF_IDX;
933 currMB->RefIdx[mbPartIdx] = video->RefPicList0[FIXED_REF_IDX]->RefIdx;
934
935 for (subMbPartIdx = 0; subMbPartIdx < 4; subMbPartIdx++)
936 {
937 mv = (int16*)(currMB->mvL0 + (mbPartIdx << 2) + subMbPartIdx);
938
939 *mv++ = FIXED_MVX;
940 *mv = FIXED_MVY;
941 }
942 }
943
944 encvid->min_cost = 0;
945
946 return AVCENC_SUCCESS;
947 }
948
949 #else /* perform the search */
950
951 /* This option #1 search is very similar to PV's MPEG4 motion search algorithm.
952 The search is done in hierarchical manner from 16x16 MB down to smaller and smaller
953 partition. At each level, a decision can be made to stop the search if the expected
954 prediction gain is not worth the computation. The decision can also be made at the finest
955 level for more fullsearch-like behavior with the price of heavier computation. */
AVCMBMotionSearch(AVCEncObject * encvid,uint8 * cur,uint8 * best_cand[],int i0,int j0,int type_pred,int FS_en,int * hp_guess)956 void AVCMBMotionSearch(AVCEncObject *encvid, uint8 *cur, uint8 *best_cand[],
957 int i0, int j0, int type_pred, int FS_en, int *hp_guess)
958 {
959 AVCCommonObj *video = encvid->common;
960 AVCPictureData *currPic = video->currPic;
961 AVCSeqParamSet *currSPS = video->currSeqParams;
962 AVCRateControl *rateCtrl = encvid->rateCtrl;
963 AVCMacroblock *currMB = video->currMB;
964 uint8 *ref, *cand, *ncand;
965 void *extra_info = encvid->sad_extra_info;
966 int mbnum = video->mbNum;
967 int width = currPic->width; /* 6/12/01, must be multiple of 16 */
968 int height = currPic->height;
969 AVCMV *mot16x16 = encvid->mot16x16;
970 int (*SAD_Macroblock)(uint8*, uint8*, int, void*) = encvid->functionPointer->SAD_Macroblock;
971
972 int range = rateCtrl->mvRange;
973
974 int lx = currPic->pitch; /* padding */
975 int i, j, imin, jmin, ilow, ihigh, jlow, jhigh;
976 int d, dmin, dn[9];
977 int k;
978 int mvx[5], mvy[5];
979 int num_can, center_again;
980 int last_loc, new_loc = 0;
981 int step, max_step = range >> 1;
982 int next;
983
984 int cmvx, cmvy; /* estimated predicted MV */
985 int lev_idx;
986 int lambda_motion = encvid->lambda_motion;
987 uint8 *mvbits = encvid->mvbits;
988 int mvshift = 2;
989 int mvcost;
990
991 int min_sad = 65535;
992
993 ref = video->RefPicList0[DEFAULT_REF_IDX]->Sl; /* origin of actual frame */
994
995 /* have to initialize these params, necessary for interprediction part */
996 currMB->NumMbPart = 1;
997 currMB->SubMbPartHeight[0] = 16;
998 currMB->SubMbPartWidth[0] = 16;
999 currMB->NumSubMbPart[0] = 1;
1000 currMB->ref_idx_L0[0] = currMB->ref_idx_L0[1] =
1001 currMB->ref_idx_L0[2] = currMB->ref_idx_L0[3] = DEFAULT_REF_IDX;
1002 currMB->ref_idx_L1[0] = currMB->ref_idx_L1[1] =
1003 currMB->ref_idx_L1[2] = currMB->ref_idx_L1[3] = DEFAULT_REF_IDX;
1004 currMB->RefIdx[0] = currMB->RefIdx[1] =
1005 currMB->RefIdx[2] = currMB->RefIdx[3] = video->RefPicList0[DEFAULT_REF_IDX]->RefIdx;
1006
1007 cur = encvid->currYMB; /* use smaller memory space for current MB */
1008
1009 /* find limit of the search (adjusting search range)*/
1010 lev_idx = mapLev2Idx[currSPS->level_idc];
1011
1012 /* we can make this part dynamic based on previous statistics */
1013 ilow = i0 - range;
1014 if (i0 - ilow > 2047) /* clip to conform with the standard */
1015 {
1016 ilow = i0 - 2047;
1017 }
1018 if (ilow < -13) // change it from -15 to -13 because of 6-tap filter needs extra 2 lines.
1019 {
1020 ilow = -13;
1021 }
1022
1023 ihigh = i0 + range - 1;
1024 if (ihigh - i0 > 2047) /* clip to conform with the standard */
1025 {
1026 ihigh = i0 + 2047;
1027 }
1028 if (ihigh > width - 3)
1029 {
1030 ihigh = width - 3; // change from width-1 to width-3 for the same reason as above
1031 }
1032
1033 jlow = j0 - range;
1034 if (j0 - jlow > MaxVmvR[lev_idx] - 1) /* clip to conform with the standard */
1035 {
1036 jlow = j0 - MaxVmvR[lev_idx] + 1;
1037 }
1038 if (jlow < -13) // same reason as above
1039 {
1040 jlow = -13;
1041 }
1042
1043 jhigh = j0 + range - 1;
1044 if (jhigh - j0 > MaxVmvR[lev_idx] - 1) /* clip to conform with the standard */
1045 {
1046 jhigh = j0 + MaxVmvR[lev_idx] - 1;
1047 }
1048 if (jhigh > height - 3) // same reason as above
1049 {
1050 jhigh = height - 3;
1051 }
1052
1053 /* find initial motion vector & predicted MV*/
1054 AVCCandidateSelection(mvx, mvy, &num_can, i0 >> 4, j0 >> 4, encvid, type_pred, &cmvx, &cmvy);
1055
1056 imin = i0;
1057 jmin = j0; /* needed for fullsearch */
1058 ncand = ref + i0 + j0 * lx;
1059
1060 /* for first row of MB, fullsearch can be used */
1061 if (FS_en)
1062 {
1063 *hp_guess = 0; /* no guess for fast half-pel */
1064
1065 dmin = AVCFullSearch(encvid, ref, cur, &imin, &jmin, ilow, ihigh, jlow, jhigh, cmvx, cmvy);
1066
1067 ncand = ref + imin + jmin * lx;
1068 }
1069 else
1070 { /* fullsearch the top row to only upto (0,3) MB */
1071 /* upto 30% complexity saving with the same complexity */
1072 if (video->PrevRefFrameNum == 0 && j0 == 0 && i0 <= 64 && type_pred != 1)
1073 {
1074 *hp_guess = 0; /* no guess for fast half-pel */
1075 dmin = AVCFullSearch(encvid, ref, cur, &imin, &jmin, ilow, ihigh, jlow, jhigh, cmvx, cmvy);
1076 ncand = ref + imin + jmin * lx;
1077 }
1078 else
1079 {
1080 /************** initialize candidate **************************/
1081
1082 dmin = 65535;
1083
1084 /* check if all are equal */
1085 if (num_can == ALL_CAND_EQUAL)
1086 {
1087 i = i0 + mvx[0];
1088 j = j0 + mvy[0];
1089
1090 if (i >= ilow && i <= ihigh && j >= jlow && j <= jhigh)
1091 {
1092 cand = ref + i + j * lx;
1093
1094 d = (*SAD_Macroblock)(cand, cur, (dmin << 16) | lx, extra_info);
1095 mvcost = MV_COST(lambda_motion, mvshift, i - i0, j - j0, cmvx, cmvy);
1096 d += mvcost;
1097
1098 if (d < dmin)
1099 {
1100 dmin = d;
1101 imin = i;
1102 jmin = j;
1103 ncand = cand;
1104 min_sad = d - mvcost; // for rate control
1105 }
1106 }
1107 }
1108 else
1109 {
1110 /************** evaluate unique candidates **********************/
1111 for (k = 0; k < num_can; k++)
1112 {
1113 i = i0 + mvx[k];
1114 j = j0 + mvy[k];
1115
1116 if (i >= ilow && i <= ihigh && j >= jlow && j <= jhigh)
1117 {
1118 cand = ref + i + j * lx;
1119 d = (*SAD_Macroblock)(cand, cur, (dmin << 16) | lx, extra_info);
1120 mvcost = MV_COST(lambda_motion, mvshift, i - i0, j - j0, cmvx, cmvy);
1121 d += mvcost;
1122
1123 if (d < dmin)
1124 {
1125 dmin = d;
1126 imin = i;
1127 jmin = j;
1128 ncand = cand;
1129 min_sad = d - mvcost; // for rate control
1130 }
1131 }
1132 }
1133 }
1134
1135 /******************* local refinement ***************************/
1136 center_again = 0;
1137 last_loc = new_loc = 0;
1138 // ncand = ref + jmin*lx + imin; /* center of the search */
1139 step = 0;
1140 dn[0] = dmin;
1141 while (!center_again && step <= max_step)
1142 {
1143
1144 AVCMoveNeighborSAD(dn, last_loc);
1145
1146 center_again = 1;
1147 i = imin;
1148 j = jmin - 1;
1149 cand = ref + i + j * lx;
1150
1151 /* starting from [0,-1] */
1152 /* spiral check one step at a time*/
1153 for (k = 2; k <= 8; k += 2)
1154 {
1155 if (!tab_exclude[last_loc][k]) /* exclude last step computation */
1156 { /* not already computed */
1157 if (i >= ilow && i <= ihigh && j >= jlow && j <= jhigh)
1158 {
1159 d = (*SAD_Macroblock)(cand, cur, (dmin << 16) | lx, extra_info);
1160 mvcost = MV_COST(lambda_motion, mvshift, i - i0, j - j0, cmvx, cmvy);
1161 d += mvcost;
1162
1163 dn[k] = d; /* keep it for half pel use */
1164
1165 if (d < dmin)
1166 {
1167 ncand = cand;
1168 dmin = d;
1169 imin = i;
1170 jmin = j;
1171 center_again = 0;
1172 new_loc = k;
1173 min_sad = d - mvcost; // for rate control
1174 }
1175 }
1176 }
1177 if (k == 8) /* end side search*/
1178 {
1179 if (!center_again)
1180 {
1181 k = -1; /* start diagonal search */
1182 cand -= lx;
1183 j--;
1184 }
1185 }
1186 else
1187 {
1188 next = refine_next[k][0];
1189 i += next;
1190 cand += next;
1191 next = refine_next[k][1];
1192 j += next;
1193 cand += lx * next;
1194 }
1195 }
1196 last_loc = new_loc;
1197 step ++;
1198 }
1199 if (!center_again)
1200 AVCMoveNeighborSAD(dn, last_loc);
1201
1202 *hp_guess = AVCFindMin(dn);
1203
1204 encvid->rateCtrl->MADofMB[mbnum] = min_sad / 256.0;
1205 }
1206 }
1207
1208 mot16x16[mbnum].sad = dmin;
1209 mot16x16[mbnum].x = (imin - i0) << 2;
1210 mot16x16[mbnum].y = (jmin - j0) << 2;
1211 best_cand[0] = ncand;
1212
1213 if (rateCtrl->subPelEnable) // always enable half-pel search
1214 {
1215 /* find half-pel resolution motion vector */
1216 min_sad = AVCFindHalfPelMB(encvid, cur, mot16x16 + mbnum, best_cand[0], i0, j0, *hp_guess, cmvx, cmvy);
1217
1218 encvid->rateCtrl->MADofMB[mbnum] = min_sad / 256.0;
1219
1220
1221 if (encvid->best_qpel_pos == -1)
1222 {
1223 ncand = encvid->hpel_cand[encvid->best_hpel_pos];
1224 }
1225 else
1226 {
1227 ncand = encvid->qpel_cand[encvid->best_qpel_pos];
1228 }
1229 }
1230 else
1231 {
1232 encvid->rateCtrl->MADofMB[mbnum] = min_sad / 256.0;
1233 }
1234
1235 /** do motion comp here for now */
1236 ref = currPic->Sl + i0 + j0 * lx;
1237 /* copy from the best result to current Picture */
1238 for (j = 0; j < 16; j++)
1239 {
1240 for (i = 0; i < 16; i++)
1241 {
1242 *ref++ = *ncand++;
1243 }
1244 ref += (lx - 16);
1245 ncand += 8;
1246 }
1247
1248 return ;
1249 }
1250
1251 #endif
1252
1253 /*===============================================================================
1254 Function: AVCFullSearch
1255 Date: 09/16/2000
1256 Purpose: Perform full-search motion estimation over the range of search
1257 region in a spiral-outward manner.
1258 Input/Output: VideoEncData, current Vol, previou Vop, pointer to the left corner of
1259 current VOP, current coord (also output), boundaries.
1260 ===============================================================================*/
AVCFullSearch(AVCEncObject * encvid,uint8 * prev,uint8 * cur,int * imin,int * jmin,int ilow,int ihigh,int jlow,int jhigh,int cmvx,int cmvy)1261 int AVCFullSearch(AVCEncObject *encvid, uint8 *prev, uint8 *cur,
1262 int *imin, int *jmin, int ilow, int ihigh, int jlow, int jhigh,
1263 int cmvx, int cmvy)
1264 {
1265 int range = encvid->rateCtrl->mvRange;
1266 AVCPictureData *currPic = encvid->common->currPic;
1267 uint8 *cand;
1268 int i, j, k, l;
1269 int d, dmin;
1270 int i0 = *imin; /* current position */
1271 int j0 = *jmin;
1272 int (*SAD_Macroblock)(uint8*, uint8*, int, void*) = encvid->functionPointer->SAD_Macroblock;
1273 void *extra_info = encvid->sad_extra_info;
1274 int lx = currPic->pitch; /* with padding */
1275
1276 int offset = i0 + j0 * lx;
1277
1278 int lambda_motion = encvid->lambda_motion;
1279 uint8 *mvbits = encvid->mvbits;
1280 int mvshift = 2;
1281 int mvcost;
1282 int min_sad;
1283
1284 cand = prev + offset;
1285
1286 dmin = (*SAD_Macroblock)(cand, cur, (65535 << 16) | lx, (void*)extra_info);
1287 mvcost = MV_COST(lambda_motion, mvshift, 0, 0, cmvx, cmvy);
1288 min_sad = dmin;
1289 dmin += mvcost;
1290
1291 /* perform spiral search */
1292 for (k = 1; k <= range; k++)
1293 {
1294
1295 i = i0 - k;
1296 j = j0 - k;
1297
1298 cand = prev + i + j * lx;
1299
1300 for (l = 0; l < 8*k; l++)
1301 {
1302 /* no need for boundary checking again */
1303 if (i >= ilow && i <= ihigh && j >= jlow && j <= jhigh)
1304 {
1305 d = (*SAD_Macroblock)(cand, cur, (dmin << 16) | lx, (void*)extra_info);
1306 mvcost = MV_COST(lambda_motion, mvshift, i - i0, j - j0, cmvx, cmvy);
1307 d += mvcost;
1308
1309 if (d < dmin)
1310 {
1311 dmin = d;
1312 *imin = i;
1313 *jmin = j;
1314 min_sad = d - mvcost;
1315 }
1316 }
1317
1318 if (l < (k << 1))
1319 {
1320 i++;
1321 cand++;
1322 }
1323 else if (l < (k << 2))
1324 {
1325 j++;
1326 cand += lx;
1327 }
1328 else if (l < ((k << 2) + (k << 1)))
1329 {
1330 i--;
1331 cand--;
1332 }
1333 else
1334 {
1335 j--;
1336 cand -= lx;
1337 }
1338 }
1339 }
1340
1341 encvid->rateCtrl->MADofMB[encvid->common->mbNum] = (min_sad / 256.0); // for rate control
1342
1343 return dmin;
1344 }
1345
1346 /*===============================================================================
1347 Function: AVCCandidateSelection
1348 Date: 09/16/2000
1349 Purpose: Fill up the list of candidate using spatio-temporal correlation
1350 among neighboring blocks.
1351 Input/Output: type_pred = 0: first pass, 1: second pass, or no SCD
1352 Modified: , 09/23/01, get rid of redundant candidates before passing back.
1353 , 09/11/07, added return for modified predicted MV, this will be
1354 needed for both fast search and fullsearch.
1355 ===============================================================================*/
1356
AVCCandidateSelection(int * mvx,int * mvy,int * num_can,int imb,int jmb,AVCEncObject * encvid,int type_pred,int * cmvx,int * cmvy)1357 void AVCCandidateSelection(int *mvx, int *mvy, int *num_can, int imb, int jmb,
1358 AVCEncObject *encvid, int type_pred, int *cmvx, int *cmvy)
1359 {
1360 AVCCommonObj *video = encvid->common;
1361 AVCMV *mot16x16 = encvid->mot16x16;
1362 AVCMV *pmot;
1363 int mbnum = video->mbNum;
1364 int mbwidth = video->PicWidthInMbs;
1365 int mbheight = video->PicHeightInMbs;
1366 int i, j, same, num1;
1367
1368 /* this part is for predicted MV */
1369 int pmvA_x = 0, pmvA_y = 0, pmvB_x = 0, pmvB_y = 0, pmvC_x = 0, pmvC_y = 0;
1370 int availA = 0, availB = 0, availC = 0;
1371
1372 *num_can = 0;
1373
1374 if (video->PrevRefFrameNum != 0) // previous frame is an IDR frame
1375 {
1376 /* Spatio-Temporal Candidate (five candidates) */
1377 if (type_pred == 0) /* first pass */
1378 {
1379 pmot = &mot16x16[mbnum]; /* same coordinate previous frame */
1380 mvx[(*num_can)] = (pmot->x) >> 2;
1381 mvy[(*num_can)++] = (pmot->y) >> 2;
1382 if (imb >= (mbwidth >> 1) && imb > 0) /*left neighbor previous frame */
1383 {
1384 pmot = &mot16x16[mbnum-1];
1385 mvx[(*num_can)] = (pmot->x) >> 2;
1386 mvy[(*num_can)++] = (pmot->y) >> 2;
1387 }
1388 else if (imb + 1 < mbwidth) /*right neighbor previous frame */
1389 {
1390 pmot = &mot16x16[mbnum+1];
1391 mvx[(*num_can)] = (pmot->x) >> 2;
1392 mvy[(*num_can)++] = (pmot->y) >> 2;
1393 }
1394
1395 if (jmb < mbheight - 1) /*bottom neighbor previous frame */
1396 {
1397 pmot = &mot16x16[mbnum+mbwidth];
1398 mvx[(*num_can)] = (pmot->x) >> 2;
1399 mvy[(*num_can)++] = (pmot->y) >> 2;
1400 }
1401 else if (jmb > 0) /*upper neighbor previous frame */
1402 {
1403 pmot = &mot16x16[mbnum-mbwidth];
1404 mvx[(*num_can)] = (pmot->x) >> 2;
1405 mvy[(*num_can)++] = (pmot->y) >> 2;
1406 }
1407
1408 if (imb > 0 && jmb > 0) /* upper-left neighbor current frame*/
1409 {
1410 pmot = &mot16x16[mbnum-mbwidth-1];
1411 mvx[(*num_can)] = (pmot->x) >> 2;
1412 mvy[(*num_can)++] = (pmot->y) >> 2;
1413 }
1414 if (jmb > 0 && imb < mbheight - 1) /* upper right neighbor current frame*/
1415 {
1416 pmot = &mot16x16[mbnum-mbwidth+1];
1417 mvx[(*num_can)] = (pmot->x) >> 2;
1418 mvy[(*num_can)++] = (pmot->y) >> 2;
1419 }
1420 }
1421 else /* second pass */
1422 /* original ST1 algorithm */
1423 {
1424 pmot = &mot16x16[mbnum]; /* same coordinate previous frame */
1425 mvx[(*num_can)] = (pmot->x) >> 2;
1426 mvy[(*num_can)++] = (pmot->y) >> 2;
1427
1428 if (imb > 0) /*left neighbor current frame */
1429 {
1430 pmot = &mot16x16[mbnum-1];
1431 mvx[(*num_can)] = (pmot->x) >> 2;
1432 mvy[(*num_can)++] = (pmot->y) >> 2;
1433 }
1434 if (jmb > 0) /*upper neighbor current frame */
1435 {
1436 pmot = &mot16x16[mbnum-mbwidth];
1437 mvx[(*num_can)] = (pmot->x) >> 2;
1438 mvy[(*num_can)++] = (pmot->y) >> 2;
1439 }
1440 if (imb < mbwidth - 1) /*right neighbor previous frame */
1441 {
1442 pmot = &mot16x16[mbnum+1];
1443 mvx[(*num_can)] = (pmot->x) >> 2;
1444 mvy[(*num_can)++] = (pmot->y) >> 2;
1445 }
1446 if (jmb < mbheight - 1) /*bottom neighbor previous frame */
1447 {
1448 pmot = &mot16x16[mbnum+mbwidth];
1449 mvx[(*num_can)] = (pmot->x) >> 2;
1450 mvy[(*num_can)++] = (pmot->y) >> 2;
1451 }
1452 }
1453
1454 /* get predicted MV */
1455 if (imb > 0) /* get MV from left (A) neighbor either on current or previous frame */
1456 {
1457 availA = 1;
1458 pmot = &mot16x16[mbnum-1];
1459 pmvA_x = pmot->x;
1460 pmvA_y = pmot->y;
1461 }
1462
1463 if (jmb > 0) /* get MV from top (B) neighbor either on current or previous frame */
1464 {
1465 availB = 1;
1466 pmot = &mot16x16[mbnum-mbwidth];
1467 pmvB_x = pmot->x;
1468 pmvB_y = pmot->y;
1469
1470 availC = 1;
1471
1472 if (imb < mbwidth - 1) /* get MV from top-right (C) neighbor of current frame */
1473 {
1474 pmot = &mot16x16[mbnum-mbwidth+1];
1475 }
1476 else /* get MV from top-left (D) neighbor of current frame */
1477 {
1478 pmot = &mot16x16[mbnum-mbwidth-1];
1479 }
1480 pmvC_x = pmot->x;
1481 pmvC_y = pmot->y;
1482 }
1483
1484 }
1485 else /* only Spatial Candidate (four candidates)*/
1486 {
1487 if (type_pred == 0) /*first pass*/
1488 {
1489 if (imb > 1) /* neighbor two blocks away to the left */
1490 {
1491 pmot = &mot16x16[mbnum-2];
1492 mvx[(*num_can)] = (pmot->x) >> 2;
1493 mvy[(*num_can)++] = (pmot->y) >> 2;
1494 }
1495 if (imb > 0 && jmb > 0) /* upper-left neighbor */
1496 {
1497 pmot = &mot16x16[mbnum-mbwidth-1];
1498 mvx[(*num_can)] = (pmot->x) >> 2;
1499 mvy[(*num_can)++] = (pmot->y) >> 2;
1500 }
1501 if (jmb > 0 && imb < mbheight - 1) /* upper right neighbor */
1502 {
1503 pmot = &mot16x16[mbnum-mbwidth+1];
1504 mvx[(*num_can)] = (pmot->x) >> 2;
1505 mvy[(*num_can)++] = (pmot->y) >> 2;
1506 }
1507
1508 /* get predicted MV */
1509 if (imb > 1) /* get MV from 2nd left (A) neighbor either of current frame */
1510 {
1511 availA = 1;
1512 pmot = &mot16x16[mbnum-2];
1513 pmvA_x = pmot->x;
1514 pmvA_y = pmot->y;
1515 }
1516
1517 if (jmb > 0 && imb > 0) /* get MV from top-left (B) neighbor of current frame */
1518 {
1519 availB = 1;
1520 pmot = &mot16x16[mbnum-mbwidth-1];
1521 pmvB_x = pmot->x;
1522 pmvB_y = pmot->y;
1523 }
1524
1525 if (jmb > 0 && imb < mbwidth - 1)
1526 {
1527 availC = 1;
1528 pmot = &mot16x16[mbnum-mbwidth+1];
1529 pmvC_x = pmot->x;
1530 pmvC_y = pmot->y;
1531 }
1532 }
1533 //#ifdef SCENE_CHANGE_DETECTION
1534 /* second pass (ST2 algorithm)*/
1535 else
1536 {
1537 if (type_pred == 1) /* 4/7/01 */
1538 {
1539 if (imb > 0) /*left neighbor current frame */
1540 {
1541 pmot = &mot16x16[mbnum-1];
1542 mvx[(*num_can)] = (pmot->x) >> 2;
1543 mvy[(*num_can)++] = (pmot->y) >> 2;
1544 }
1545 if (jmb > 0) /*upper neighbor current frame */
1546 {
1547 pmot = &mot16x16[mbnum-mbwidth];
1548 mvx[(*num_can)] = (pmot->x) >> 2;
1549 mvy[(*num_can)++] = (pmot->y) >> 2;
1550 }
1551 if (imb < mbwidth - 1) /*right neighbor current frame */
1552 {
1553 pmot = &mot16x16[mbnum+1];
1554 mvx[(*num_can)] = (pmot->x) >> 2;
1555 mvy[(*num_can)++] = (pmot->y) >> 2;
1556 }
1557 if (jmb < mbheight - 1) /*bottom neighbor current frame */
1558 {
1559 pmot = &mot16x16[mbnum+mbwidth];
1560 mvx[(*num_can)] = (pmot->x) >> 2;
1561 mvy[(*num_can)++] = (pmot->y) >> 2;
1562 }
1563 }
1564 //#else
1565 else /* original ST1 algorithm */
1566 {
1567 if (imb > 0) /*left neighbor current frame */
1568 {
1569 pmot = &mot16x16[mbnum-1];
1570 mvx[(*num_can)] = (pmot->x) >> 2;
1571 mvy[(*num_can)++] = (pmot->y) >> 2;
1572
1573 if (jmb > 0) /*upper-left neighbor current frame */
1574 {
1575 pmot = &mot16x16[mbnum-mbwidth-1];
1576 mvx[(*num_can)] = (pmot->x) >> 2;
1577 mvy[(*num_can)++] = (pmot->y) >> 2;
1578 }
1579
1580 }
1581 if (jmb > 0) /*upper neighbor current frame */
1582 {
1583 pmot = &mot16x16[mbnum-mbwidth];
1584 mvx[(*num_can)] = (pmot->x) >> 2;
1585 mvy[(*num_can)++] = (pmot->y) >> 2;
1586
1587 if (imb < mbheight - 1) /*upper-right neighbor current frame */
1588 {
1589 pmot = &mot16x16[mbnum-mbwidth+1];
1590 mvx[(*num_can)] = (pmot->x) >> 2;
1591 mvy[(*num_can)++] = (pmot->y) >> 2;
1592 }
1593 }
1594 }
1595
1596 /* get predicted MV */
1597 if (imb > 0) /* get MV from left (A) neighbor either on current or previous frame */
1598 {
1599 availA = 1;
1600 pmot = &mot16x16[mbnum-1];
1601 pmvA_x = pmot->x;
1602 pmvA_y = pmot->y;
1603 }
1604
1605 if (jmb > 0) /* get MV from top (B) neighbor either on current or previous frame */
1606 {
1607 availB = 1;
1608 pmot = &mot16x16[mbnum-mbwidth];
1609 pmvB_x = pmot->x;
1610 pmvB_y = pmot->y;
1611
1612 availC = 1;
1613
1614 if (imb < mbwidth - 1) /* get MV from top-right (C) neighbor of current frame */
1615 {
1616 pmot = &mot16x16[mbnum-mbwidth+1];
1617 }
1618 else /* get MV from top-left (D) neighbor of current frame */
1619 {
1620 pmot = &mot16x16[mbnum-mbwidth-1];
1621 }
1622 pmvC_x = pmot->x;
1623 pmvC_y = pmot->y;
1624 }
1625 }
1626 //#endif
1627 }
1628
1629 /* 3/23/01, remove redundant candidate (possible k-mean) */
1630 num1 = *num_can;
1631 *num_can = 1;
1632 for (i = 1; i < num1; i++)
1633 {
1634 same = 0;
1635 j = 0;
1636 while (!same && j < *num_can)
1637 {
1638 #if (CANDIDATE_DISTANCE==0)
1639 if (mvx[i] == mvx[j] && mvy[i] == mvy[j])
1640 #else
1641 // modified k-mean, 3/24/01, shouldn't be greater than 3
1642 if (AVC_ABS(mvx[i] - mvx[j]) + AVC_ABS(mvy[i] - mvy[j]) < CANDIDATE_DISTANCE)
1643 #endif
1644 same = 1;
1645 j++;
1646 }
1647 if (!same)
1648 {
1649 mvx[*num_can] = mvx[i];
1650 mvy[*num_can] = mvy[i];
1651 (*num_can)++;
1652 }
1653 }
1654
1655 if (num1 == 5 && *num_can == 1)
1656 *num_can = ALL_CAND_EQUAL; /* all are equal */
1657
1658 /* calculate predicted MV */
1659
1660 if (availA && !(availB || availC))
1661 {
1662 *cmvx = pmvA_x;
1663 *cmvy = pmvA_y;
1664 }
1665 else
1666 {
1667 *cmvx = AVC_MEDIAN(pmvA_x, pmvB_x, pmvC_x);
1668 *cmvy = AVC_MEDIAN(pmvA_y, pmvB_y, pmvC_y);
1669 }
1670
1671 return ;
1672 }
1673
1674
1675 /*************************************************************
1676 Function: AVCMoveNeighborSAD
1677 Date: 3/27/01
1678 Purpose: Move neighboring SAD around when center has shifted
1679 *************************************************************/
1680
AVCMoveNeighborSAD(int dn[],int new_loc)1681 void AVCMoveNeighborSAD(int dn[], int new_loc)
1682 {
1683 int tmp[9];
1684 tmp[0] = dn[0];
1685 tmp[1] = dn[1];
1686 tmp[2] = dn[2];
1687 tmp[3] = dn[3];
1688 tmp[4] = dn[4];
1689 tmp[5] = dn[5];
1690 tmp[6] = dn[6];
1691 tmp[7] = dn[7];
1692 tmp[8] = dn[8];
1693 dn[0] = dn[1] = dn[2] = dn[3] = dn[4] = dn[5] = dn[6] = dn[7] = dn[8] = 65536;
1694
1695 switch (new_loc)
1696 {
1697 case 0:
1698 break;
1699 case 1:
1700 dn[4] = tmp[2];
1701 dn[5] = tmp[0];
1702 dn[6] = tmp[8];
1703 break;
1704 case 2:
1705 dn[4] = tmp[3];
1706 dn[5] = tmp[4];
1707 dn[6] = tmp[0];
1708 dn[7] = tmp[8];
1709 dn[8] = tmp[1];
1710 break;
1711 case 3:
1712 dn[6] = tmp[4];
1713 dn[7] = tmp[0];
1714 dn[8] = tmp[2];
1715 break;
1716 case 4:
1717 dn[1] = tmp[2];
1718 dn[2] = tmp[3];
1719 dn[6] = tmp[5];
1720 dn[7] = tmp[6];
1721 dn[8] = tmp[0];
1722 break;
1723 case 5:
1724 dn[1] = tmp[0];
1725 dn[2] = tmp[4];
1726 dn[8] = tmp[6];
1727 break;
1728 case 6:
1729 dn[1] = tmp[8];
1730 dn[2] = tmp[0];
1731 dn[3] = tmp[4];
1732 dn[4] = tmp[5];
1733 dn[8] = tmp[7];
1734 break;
1735 case 7:
1736 dn[2] = tmp[8];
1737 dn[3] = tmp[0];
1738 dn[4] = tmp[6];
1739 break;
1740 case 8:
1741 dn[2] = tmp[1];
1742 dn[3] = tmp[2];
1743 dn[4] = tmp[0];
1744 dn[5] = tmp[6];
1745 dn[6] = tmp[7];
1746 break;
1747 }
1748 dn[0] = tmp[new_loc];
1749
1750 return ;
1751 }
1752
1753 /* 3/28/01, find minimal of dn[9] */
1754
AVCFindMin(int dn[])1755 int AVCFindMin(int dn[])
1756 {
1757 int min, i;
1758 int dmin;
1759
1760 dmin = dn[1];
1761 min = 1;
1762 for (i = 2; i < 9; i++)
1763 {
1764 if (dn[i] < dmin)
1765 {
1766 dmin = dn[i];
1767 min = i;
1768 }
1769 }
1770
1771 return min;
1772 }
1773
1774
1775
1776