1 /******************************************************************************
2 *
3 * Copyright (C) 2015 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20
21 /**
22 *******************************************************************************
23 * @file
24 * ih264e_intra_modes_eval.c
25 *
26 * @brief
27 * This file contains definitions of routines that perform rate distortion
28 * analysis on a macroblock if they are to be coded as intra.
29 *
30 * @author
31 * ittiam
32 *
33 * @par List of Functions:
34 * - ih264e_derive_neighbor_availability_of_mbs()
35 * - ih264e_derive_ngbr_avbl_of_mb_partitions()
36 * - ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff()
37 * - ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff()
38 * - ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff()
39 * - ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton()
40 * - ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff()
41 * - ih264e_evaluate_intra16x16_modes()
42 * - ih264e_evaluate_intra4x4_modes()
43 * - ih264e_evaluate_intra_chroma_modes()
44 *
45 * @remarks
46 * None
47 *
48 *******************************************************************************
49 */
50
51 /*****************************************************************************/
52 /* File Includes */
53 /*****************************************************************************/
54
55 /* System include files */
56 #include <stdio.h>
57 #include <string.h>
58 #include <limits.h>
59 #include <assert.h>
60
61 /* User include files */
62 #include "ih264e_config.h"
63 #include "ih264_typedefs.h"
64 #include "ih264e_defs.h"
65 #include "iv2.h"
66 #include "ive2.h"
67 #include "ih264_debug.h"
68 #include "ih264_defs.h"
69 #include "ih264_macros.h"
70 #include "ih264_intra_pred_filters.h"
71 #include "ih264_structs.h"
72 #include "ih264_common_tables.h"
73 #include "ih264_trans_quant_itrans_iquant.h"
74 #include "ih264_inter_pred_filters.h"
75 #include "ih264_mem_fns.h"
76 #include "ih264_padding.h"
77 #include "ih264_deblk_edge_filters.h"
78 #include "ih264_cabac_tables.h"
79 #include "ime_distortion_metrics.h"
80 #include "ih264e_error.h"
81 #include "ih264e_bitstream.h"
82 #include "ime_defs.h"
83 #include "ime_structs.h"
84 #include "irc_cntrl_param.h"
85 #include "irc_frame_info_collector.h"
86 #include "ih264e_rate_control.h"
87 #include "ih264e_cabac_structs.h"
88 #include "ih264e_structs.h"
89 #include "ih264e_intra_modes_eval.h"
90 #include "ih264e_globals.h"
91 #include "ime_platform_macros.h"
92
93
94 /*****************************************************************************/
95 /* Function Definitions */
96 /*****************************************************************************/
97
98 /**
99 ******************************************************************************
100 *
101 * @brief
102 * derivation process for macroblock availability
103 *
104 * @par Description
105 * Calculates the availability of the left, top, topright and topleft macroblocks.
106 *
107 * @param[in] ps_proc_ctxt
108 * pointer to proc context (handle)
109 *
110 * @remarks Based on section 6.4.5 in H264 spec
111 *
112 * @return none
113 *
114 ******************************************************************************
115 */
ih264e_derive_nghbr_avbl_of_mbs(process_ctxt_t * ps_proc)116 void ih264e_derive_nghbr_avbl_of_mbs(process_ctxt_t *ps_proc)
117 {
118 UWORD8 *pu1_slice_idx_curr = ps_proc->pu1_slice_idx;
119 UWORD8 *pu1_slice_idx_b;
120 UWORD8 *pu1_slice_idx_a;
121 UWORD8 *pu1_slice_idx_c;
122 UWORD8 *pu1_slice_idx_d;
123 block_neighbors_t *ps_ngbr_avbl;
124 WORD32 i4_mb_x, i4_mb_y;
125 WORD32 i4_wd_mbs;
126
127 i4_mb_x = ps_proc->i4_mb_x;
128 i4_mb_y = ps_proc->i4_mb_y;
129
130 i4_wd_mbs = ps_proc->i4_wd_mbs;
131
132 pu1_slice_idx_curr += (i4_mb_y * i4_wd_mbs) + i4_mb_x;
133 pu1_slice_idx_a = pu1_slice_idx_curr - 1;
134 pu1_slice_idx_b = pu1_slice_idx_curr - i4_wd_mbs;
135 pu1_slice_idx_c = pu1_slice_idx_b + 1;
136 pu1_slice_idx_d = pu1_slice_idx_b - 1;
137 ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
138
139 /**********************************************************************/
140 /* The macroblock is marked as available, unless one of the following */
141 /* conditions is true in which case the macroblock shall be marked as */
142 /* not available. */
143 /* 1. mbAddr < 0 */
144 /* 2 mbAddr > CurrMbAddr */
145 /* 3. the macroblock with address mbAddr belongs to a different slice */
146 /* than the macroblock with address CurrMbAddr */
147 /**********************************************************************/
148
149 /* left macroblock availability */
150 if (i4_mb_x == 0)
151 { /* macroblocks along first column */
152 ps_ngbr_avbl->u1_mb_a = 0;
153 }
154 else
155 { /* macroblocks belong to same slice? */
156 if (*pu1_slice_idx_a != *pu1_slice_idx_curr)
157 ps_ngbr_avbl->u1_mb_a = 0;
158 else
159 ps_ngbr_avbl->u1_mb_a = 1;
160 }
161
162 /* top macroblock availability */
163 if (i4_mb_y == 0)
164 { /* macroblocks along first row */
165 ps_ngbr_avbl->u1_mb_b = 0;
166 }
167 else
168 { /* macroblocks belong to same slice? */
169 if (*pu1_slice_idx_b != *pu1_slice_idx_curr)
170 ps_ngbr_avbl->u1_mb_b = 0;
171 else
172 ps_ngbr_avbl->u1_mb_b = 1;
173 }
174
175 /* top right macroblock availability */
176 if (i4_mb_x == i4_wd_mbs-1 || i4_mb_y == 0)
177 { /* macroblocks along last column */
178 ps_ngbr_avbl->u1_mb_c = 0;
179 }
180 else
181 { /* macroblocks belong to same slice? */
182 if (*pu1_slice_idx_c != *pu1_slice_idx_curr)
183 ps_ngbr_avbl->u1_mb_c = 0;
184 else
185 ps_ngbr_avbl->u1_mb_c = 1;
186 }
187
188 /* top left macroblock availability */
189 if (i4_mb_x == 0 || i4_mb_y == 0)
190 { /* macroblocks along first column */
191 ps_ngbr_avbl->u1_mb_d = 0;
192 }
193 else
194 { /* macroblocks belong to same slice? */
195 if (*pu1_slice_idx_d != *pu1_slice_idx_curr)
196 ps_ngbr_avbl->u1_mb_d = 0;
197 else
198 ps_ngbr_avbl->u1_mb_d = 1;
199 }
200 }
201
202 /**
203 ******************************************************************************
204 *
205 * @brief
206 * derivation process for subblock/partition availability
207 *
208 * @par Description
209 * Calculates the availability of the left, top, topright and topleft subblock
210 * or partitions.
211 *
212 * @param[in] ps_proc_ctxt
213 * pointer to macroblock context (handle)
214 *
215 * @param[in] i1_pel_pos_x
216 * column position of the pel wrt the current block
217 *
218 * @param[in] i1_pel_pos_y
219 * row position of the pel in wrt current block
220 *
221 * @remarks Assumptions: before calling this function it is assumed that
222 * the neighbor availability of the current macroblock is already derived.
223 * Based on table 6-3 of H264 specification
224 *
225 * @return availability status (yes or no)
226 *
227 ******************************************************************************
228 */
ih264e_derive_ngbr_avbl_of_mb_partitions(block_neighbors_t * ps_ngbr_avbl,WORD8 i1_pel_pos_x,WORD8 i1_pel_pos_y)229 UWORD8 ih264e_derive_ngbr_avbl_of_mb_partitions(block_neighbors_t *ps_ngbr_avbl,
230 WORD8 i1_pel_pos_x,
231 WORD8 i1_pel_pos_y)
232 {
233 UWORD8 u1_neighbor_avail=0;
234
235 /**********************************************************************/
236 /* values of i1_pel_pos_x in the range 0-15 inclusive correspond to */
237 /* various columns of a macroblock */
238 /* */
239 /* values of i1_pel_pos_y in the range 0-15 inclusive correspond to */
240 /* various rows of a macroblock */
241 /* */
242 /* other values of i1_pel_pos_x & i1_pel_pos_y represents elements */
243 /* outside the bound of an mb ie., represents its neighbors. */
244 /**********************************************************************/
245 if (i1_pel_pos_x < 0)
246 { /* column(-1) */
247 if (i1_pel_pos_y < 0)
248 { /* row(-1) */
249 u1_neighbor_avail = ps_ngbr_avbl->u1_mb_d; /* current mb topleft availability */
250 }
251 else if (i1_pel_pos_y >= 0 && i1_pel_pos_y < 16)
252 { /* all rows of a macroblock */
253 u1_neighbor_avail = ps_ngbr_avbl->u1_mb_a; /* current mb left availability */
254 }
255 else /* if (i1_pel_pos_y >= 16) */
256 { /* rows(+16) */
257 u1_neighbor_avail = 0; /* current mb bottom left availability */
258 }
259 }
260 else if (i1_pel_pos_x >= 0 && i1_pel_pos_x < 16)
261 { /* all columns of a macroblock */
262 if (i1_pel_pos_y < 0)
263 { /* row(-1) */
264 u1_neighbor_avail = ps_ngbr_avbl->u1_mb_b; /* current mb top availability */
265 }
266 else if (i1_pel_pos_y >= 0 && i1_pel_pos_y < 16)
267 { /* all rows of a macroblock */
268 u1_neighbor_avail = 1; /* current mb availability */
269 /* availability of the partition is dependent on the position of the partition inside the mb */
270 /* although the availability is declared as 1 in all cases these needs to be corrected somewhere else and this is not done in here */
271 }
272 else /* if (i1_pel_pos_y >= 16) */
273 { /* rows(+16) */
274 u1_neighbor_avail = 0; /* current mb bottom availability */
275 }
276 }
277 else if (i1_pel_pos_x >= 16)
278 { /* column(+16) */
279 if (i1_pel_pos_y < 0)
280 { /* row(-1) */
281 u1_neighbor_avail = ps_ngbr_avbl->u1_mb_c; /* current mb top right availability */
282 }
283 else /* if (i1_pel_pos_y >= 0) */
284 { /* all other rows */
285 u1_neighbor_avail = 0; /* current mb right & bottom right availability */
286 }
287 }
288
289 return u1_neighbor_avail;
290 }
291
292 /**
293 ******************************************************************************
294 *
295 * @brief
296 * evaluate best intra 16x16 mode (rate distortion opt off)
297 *
298 * @par Description
299 * This function evaluates all the possible intra 16x16 modes and finds the mode
300 * that best represents the macro-block (least distortion) and occupies fewer
301 * bits in the bit-stream.
302 *
303 * @param[in] ps_proc_ctxt
304 * pointer to process context (handle)
305 *
306 * @remarks
307 * Ideally the cost of encoding a macroblock is calculated as
308 * (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
309 * input block and the reconstructed block and rate is the number of bits taken
310 * to place the macroblock in the bit-stream. In this routine the rate does not
311 * exactly point to the total number of bits it takes, rather it points to header
312 * bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
313 * and residual bits fall in to texture bits the number of bits taken to encoding
314 * mbtype is considered as rate, we compute cost. Further we will approximate
315 * the distortion as the deviation b/w input and the predicted block as opposed
316 * to input and reconstructed block.
317 *
318 * NOTE: As per the Document JVT-O079, for intra 16x16 macroblock,
319 * the SAD and cost are one and the same.
320 *
321 * @return none
322 *
323 ******************************************************************************
324 */
325
ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff(process_ctxt_t * ps_proc)326 void ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff(process_ctxt_t *ps_proc)
327 {
328 /* Codec Context */
329 codec_t *ps_codec = ps_proc->ps_codec;
330
331 /* SAD(distortion metric) of an 8x8 block */
332 WORD32 i4_mb_distortion = INT_MAX, i4_mb_distortion_least = INT_MAX;
333
334 /* lambda */
335 UWORD32 u4_lambda = ps_proc->u4_lambda;
336
337 /* cost = distortion + lambda*rate */
338 WORD32 i4_mb_cost= INT_MAX, i4_mb_cost_least = INT_MAX;
339
340 /* intra mode */
341 UWORD32 u4_intra_mode, u4_best_intra_16x16_mode = DC_I16x16;
342
343 /* neighbor pels for intra prediction */
344 UWORD8 *pu1_ngbr_pels_i16 = ps_proc->au1_ngbr_pels;
345
346 /* neighbor availability */
347 WORD32 i4_ngbr_avbl;
348
349 /* pointer to src macro block */
350 UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_luma;
351 UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_luma;
352
353 /* pointer to prediction macro block */
354 UWORD8 *pu1_pred_mb_intra_16x16 = ps_proc->pu1_pred_mb_intra_16x16;
355 UWORD8 *pu1_pred_mb_intra_16x16_plane = ps_proc->pu1_pred_mb_intra_16x16_plane;
356
357 /* strides */
358 WORD32 i4_src_strd = ps_proc->i4_src_strd;
359 WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
360 WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
361
362 /* pointer to neighbors left, top, topleft */
363 UWORD8 *pu1_mb_a = pu1_ref_mb - 1;
364 UWORD8 *pu1_mb_b = pu1_ref_mb - i4_rec_strd;
365 UWORD8 *pu1_mb_d = pu1_mb_b - 1;
366 UWORD8 u1_mb_a, u1_mb_b, u1_mb_d;
367 /* valid intra modes map */
368 UWORD32 u4_valid_intra_modes;
369
370 /* lut for valid intra modes */
371 const UWORD8 u1_valid_intra_modes[8] = {4, 6, 4, 6, 5, 7, 5, 15};
372
373 /* temp var */
374 UWORD32 i, u4_enable_fast_sad = 0, offset = 0;
375 mb_info_t *ps_top_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
376 UWORD32 u4_constrained_intra_pred = ps_proc->ps_codec->s_cfg.u4_constrained_intra_pred;
377
378 /* init temp var */
379 if (ps_proc->i4_slice_type != ISLICE)
380 {
381 /* Offset for MBtype */
382 offset = (ps_proc->i4_slice_type == PSLICE) ? 5 : 23;
383 u4_enable_fast_sad = ps_proc->s_me_ctxt.u4_enable_fast_sad;
384 }
385
386 /* locating neighbors that are available for prediction */
387
388 /* gather prediction pels from the neighbors, if particular set is not available
389 * it is set to zero*/
390 /* left pels */
391 u1_mb_a = ((ps_proc->ps_ngbr_avbl->u1_mb_a)
392 && (u4_constrained_intra_pred ? ps_proc->s_left_mb_syntax_ele.u2_is_intra : 1));
393 if (u1_mb_a)
394 {
395 for(i = 0; i < 16; i++)
396 pu1_ngbr_pels_i16[16-1-i] = pu1_mb_a[i * i4_rec_strd];
397 }
398 else
399 {
400 ps_codec->pf_mem_set_mul8(pu1_ngbr_pels_i16,0,MB_SIZE);
401 }
402 /* top pels */
403 u1_mb_b = ((ps_proc->ps_ngbr_avbl->u1_mb_b)
404 && (u4_constrained_intra_pred ? ps_top_mb_syn_ele->u2_is_intra : 1));
405 if (u1_mb_b)
406 {
407 ps_codec->pf_mem_cpy_mul8(pu1_ngbr_pels_i16+16+1,pu1_mb_b,16);
408 }
409 else
410 {
411 ps_codec->pf_mem_set_mul8(pu1_ngbr_pels_i16+16+1,0,MB_SIZE);
412 }
413 /* topleft pels */
414 u1_mb_d = ((ps_proc->ps_ngbr_avbl->u1_mb_d)
415 && (u4_constrained_intra_pred ? ps_proc->s_top_left_mb_syntax_ele.u2_is_intra : 1));
416 if (u1_mb_d)
417 {
418 pu1_ngbr_pels_i16[16] = *pu1_mb_d;
419 }
420 else
421 {
422 pu1_ngbr_pels_i16[16] = 0;
423 }
424
425 i4_ngbr_avbl = (u1_mb_a) + (u1_mb_b << 2) + (u1_mb_d << 1);
426 ps_proc->i4_ngbr_avbl_16x16_mb = i4_ngbr_avbl;
427
428 /* set valid intra modes for evaluation */
429 u4_valid_intra_modes = u1_valid_intra_modes[i4_ngbr_avbl];
430
431 if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_FAST)
432 u4_valid_intra_modes &= ~(1 << PLANE_I16x16);
433
434 /* evaluate b/w HORZ_I16x16, VERT_I16x16 & DC_I16x16 */
435 ps_codec->pf_ih264e_evaluate_intra16x16_modes(pu1_curr_mb, pu1_ngbr_pels_i16, pu1_pred_mb_intra_16x16,
436 i4_src_strd, i4_pred_strd,
437 i4_ngbr_avbl, &u4_intra_mode, &i4_mb_distortion_least,
438 u4_valid_intra_modes);
439
440 /* cost = distortion + lambda*rate */
441 i4_mb_cost_least = i4_mb_distortion_least;
442
443 if ((( (u4_valid_intra_modes >> 3) & 1) != 0) && (ps_codec->s_cfg.u4_enc_speed_preset != IVE_FASTEST ||
444 ps_proc->i4_slice_type == ISLICE))
445 {
446 /* intra prediction for PLANE mode*/
447 (ps_codec->apf_intra_pred_16_l)[PLANE_I16x16](pu1_ngbr_pels_i16, pu1_pred_mb_intra_16x16_plane, 0, i4_pred_strd, i4_ngbr_avbl);
448
449 /* evaluate distortion between the actual blk and the estimated blk for the given mode */
450 ps_codec->apf_compute_sad_16x16[u4_enable_fast_sad](pu1_curr_mb, pu1_pred_mb_intra_16x16_plane, i4_src_strd, i4_pred_strd, i4_mb_cost_least, &i4_mb_distortion);
451
452 /* cost = distortion + lambda*rate */
453 i4_mb_cost = i4_mb_distortion;
454
455 /* update the least cost information if necessary */
456 if(i4_mb_cost < i4_mb_distortion_least)
457 {
458 u4_intra_mode = PLANE_I16x16;
459
460 i4_mb_cost_least = i4_mb_cost;
461 i4_mb_distortion_least = i4_mb_distortion;
462 }
463 }
464
465 u4_best_intra_16x16_mode = u4_intra_mode;
466
467 DEBUG("%d partition cost, %d intra mode\n", i4_mb_cost_least * 32, u4_best_intra_16x16_mode);
468
469 ps_proc->u1_l_i16_mode = u4_best_intra_16x16_mode;
470
471 /* cost = distortion + lambda*rate */
472 i4_mb_cost_least = i4_mb_distortion_least + u4_lambda*u1_uev_codelength[offset + u4_best_intra_16x16_mode];
473
474
475 /* update the type of the mb if necessary */
476 if (i4_mb_cost_least < ps_proc->i4_mb_cost)
477 {
478 ps_proc->i4_mb_cost = i4_mb_cost_least;
479 ps_proc->i4_mb_distortion = i4_mb_distortion_least;
480 ps_proc->u4_mb_type = I16x16;
481 }
482
483 return ;
484 }
485
486
487 /**
488 ******************************************************************************
489 *
490 * @brief
491 * evaluate best intra 8x8 mode (rate distortion opt on)
492 *
493 * @par Description
494 * This function evaluates all the possible intra 8x8 modes and finds the mode
495 * that best represents the macro-block (least distortion) and occupies fewer
496 * bits in the bit-stream.
497 *
498 * @param[in] ps_proc_ctxt
499 * pointer to proc ctxt
500 *
501 * @remarks Ideally the cost of encoding a macroblock is calculated as
502 * (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
503 * input block and the reconstructed block and rate is the number of bits taken
504 * to place the macroblock in the bit-stream. In this routine the rate does not
505 * exactly point to the total number of bits it takes, rather it points to header
506 * bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
507 * and residual bits fall in to texture bits the number of bits taken to encoding
508 * mbtype is considered as rate, we compute cost. Further we will approximate
509 * the distortion as the deviation b/w input and the predicted block as opposed
510 * to input and reconstructed block.
511 *
512 * NOTE: TODO: This function needs to be tested
513 *
514 * @return none
515 *
516 ******************************************************************************
517 */
ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff(process_ctxt_t * ps_proc)518 void ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff(process_ctxt_t *ps_proc)
519 {
520 /* Codec Context */
521 codec_t *ps_codec = ps_proc->ps_codec;
522
523 /* SAD(distortion metric) of an 4x4 block */
524 WORD32 i4_partition_distortion, i4_partition_distortion_least = INT_MAX, i4_total_distortion = 0;
525
526 /* lambda */
527 UWORD32 u4_lambda = ps_proc->u4_lambda;
528
529 /* cost = distortion + lambda*rate */
530 WORD32 i4_partition_cost, i4_partition_cost_least, i4_total_cost = u4_lambda;
531
532 /* cost due to mbtype */
533 UWORD32 u4_cost_one_bit = u4_lambda, u4_cost_four_bits = 4 * u4_lambda;
534
535 /* intra mode */
536 UWORD32 u4_intra_mode, u4_best_intra_8x8_mode = DC_I8x8, u4_estimated_intra_8x8_mode;
537
538 /* neighbor pels for intra prediction */
539 UWORD8 *pu1_ngbr_pels_i8 = ps_proc->au1_ngbr_pels;
540
541 /* pointer to curr partition */
542 UWORD8 *pu1_mb_curr;
543
544 /* pointer to prediction macro block */
545 UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
546
547 /* strides */
548 WORD32 i4_src_strd = ps_proc->i4_src_strd;
549 WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
550
551 /* neighbors left, top, top right, top left */
552 UWORD8 *pu1_mb_a;
553 UWORD8 *pu1_mb_b;
554 UWORD8 *pu1_mb_d;
555
556 /* neighbor availability */
557 WORD32 i4_ngbr_avbl;
558 block_neighbors_t s_ngbr_avbl;
559
560 /* temp vars */
561 UWORD32 b8, u4_pix_x, u4_pix_y;
562 UWORD32 u4_constrained_intra_pred = ps_proc->ps_codec->s_cfg.u4_constrained_intra_pred;
563 block_neighbors_t s_ngbr_avbl_MB;
564
565 /* ngbr mb syntax information */
566 UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (ps_proc->i4_mb_x << 4);
567 mb_info_t *ps_top_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
568 mb_info_t *ps_top_right_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
569 /* valid intra modes map */
570 UWORD32 u4_valid_intra_modes;
571
572 if (ps_proc->ps_ngbr_avbl->u1_mb_c)
573 {
574 ps_top_right_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + (ps_proc->i4_mb_x + 1);
575 }
576 /* left pels */
577 s_ngbr_avbl_MB.u1_mb_a = ((ps_proc->ps_ngbr_avbl->u1_mb_a)
578 && (u4_constrained_intra_pred ? ps_proc->s_left_mb_syntax_ele.u2_is_intra : 1));
579
580 /* top pels */
581 s_ngbr_avbl_MB.u1_mb_b = ((ps_proc->ps_ngbr_avbl->u1_mb_b)
582 && (u4_constrained_intra_pred ? ps_top_mb_syn_ele->u2_is_intra : 1));
583
584 /* topleft pels */
585 s_ngbr_avbl_MB.u1_mb_d = ((ps_proc->ps_ngbr_avbl->u1_mb_d)
586 && (u4_constrained_intra_pred ? ps_proc->s_top_left_mb_syntax_ele.u2_is_intra : 1));
587
588 /* top right */
589 s_ngbr_avbl_MB.u1_mb_c = ((ps_proc->ps_ngbr_avbl->u1_mb_c)
590 && (u4_constrained_intra_pred ? ps_top_right_mb_syn_ele->u2_is_intra : 1));
591
592
593 for(b8 = 0; b8 < 4; b8++)
594 {
595 u4_pix_x = (b8 & 0x01) << 3;
596 u4_pix_y = (b8 >> 1) << 3;
597
598 pu1_mb_curr = ps_proc->pu1_src_buf_luma + u4_pix_x + (u4_pix_y * i4_src_strd);
599 /* when rdopt is off, we use the input as reference for constructing prediction buffer */
600 /* as opposed to using the recon pels. (open loop intra prediction) */
601 pu1_mb_a = pu1_mb_curr - 1; /* pointer to left macro block */
602 pu1_mb_b = pu1_mb_curr - i4_src_strd; /* pointer to top macro block */
603 pu1_mb_d = pu1_mb_b - 1; /* pointer to top left macro block */
604
605 /* locating neighbors that are available for prediction */
606 /* TODO : update the neighbor availability information basing on constrained intra pred information */
607 /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be split in to distinct routines */
608 /* basing on neighbors available and hence evade the computation of neighbor availability totally. */
609 s_ngbr_avbl.u1_mb_a = ih264e_derive_ngbr_avbl_of_mb_partitions(&s_ngbr_avbl_MB, u4_pix_x - 1, u4_pix_y); /* xD = -1, yD = 0 */
610 s_ngbr_avbl.u1_mb_b = ih264e_derive_ngbr_avbl_of_mb_partitions(&s_ngbr_avbl_MB, u4_pix_x, u4_pix_y - 1); /* xD = 0, yD = -1 */
611 s_ngbr_avbl.u1_mb_c = ih264e_derive_ngbr_avbl_of_mb_partitions(&s_ngbr_avbl_MB, u4_pix_x + 8, u4_pix_y - 1); /* xD = BLK_8x8_SIZE, yD = -1 */
612 s_ngbr_avbl.u1_mb_d = ih264e_derive_ngbr_avbl_of_mb_partitions(&s_ngbr_avbl_MB, u4_pix_x - 1, u4_pix_y - 1); /* xD = -1, yD = -1 */
613
614 /* i4_ngbr_avbl = blk_a * LEFT_MB_AVAILABLE_MASK + blk_b * TOP_MB_AVAILABLE_MASK + blk_c * TOP_RIGHT_MB_AVAILABLE_MASK + blk_d * TOP_LEFT_MB_AVAILABLE_MASK */
615 i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + (s_ngbr_avbl.u1_mb_b << 2) + (s_ngbr_avbl.u1_mb_c << 3) +
616 (s_ngbr_avbl.u1_mb_a << 4);
617 /* if top partition is available and top right is not available for intra prediction, then */
618 /* padd top right samples using top sample and make top right also available */
619 /* i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + (s_ngbr_avbl.u1_mb_b << 2) + ((s_ngbr_avbl.u1_mb_b | s_ngbr_avbl.u1_mb_c) << 3); */
620 ps_proc->ai4_neighbor_avail_8x8_subblks[b8] = i4_ngbr_avbl;
621
622
623 ih264_intra_pred_luma_8x8_mode_ref_filtering(pu1_mb_a, pu1_mb_b, pu1_mb_d, pu1_ngbr_pels_i8,
624 i4_src_strd, i4_ngbr_avbl);
625
626 i4_partition_cost_least = INT_MAX;
627 /* set valid intra modes for evaluation */
628 u4_valid_intra_modes = 0x1ff;
629
630 if (!s_ngbr_avbl.u1_mb_b)
631 {
632 u4_valid_intra_modes &= ~(1 << VERT_I4x4);
633 u4_valid_intra_modes &= ~(1 << DIAG_DL_I4x4);
634 u4_valid_intra_modes &= ~(1 << VERT_L_I4x4);
635 }
636 if (!s_ngbr_avbl.u1_mb_a)
637 {
638 u4_valid_intra_modes &= ~(1 << HORZ_I4x4);
639 u4_valid_intra_modes &= ~(1 << HORZ_U_I4x4);
640 }
641 if (!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b || !s_ngbr_avbl.u1_mb_d)
642 {
643 u4_valid_intra_modes &= ~(1 << DIAG_DR_I4x4);
644 u4_valid_intra_modes &= ~(1 << VERT_R_I4x4);
645 u4_valid_intra_modes &= ~(1 << HORZ_D_I4x4);
646 }
647
648 /* estimate the intra 8x8 mode for the current partition (for evaluating cost) */
649 if (!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b)
650 {
651 u4_estimated_intra_8x8_mode = DC_I8x8;
652 }
653 else
654 {
655 UWORD32 u4_left_intra_8x8_mode = DC_I8x8;
656 UWORD32 u4_top_intra_8x8_mode = DC_I8x8;
657
658 if (u4_pix_x == 0)
659 {
660 if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I8x8)
661 {
662 u4_left_intra_8x8_mode = ps_proc->au1_left_mb_intra_modes[b8+1];
663 }
664 else if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I4x4)
665 {
666 u4_left_intra_8x8_mode = ps_proc->au1_left_mb_intra_modes[(b8+1)*4+2];
667 }
668 }
669 else
670 {
671 u4_left_intra_8x8_mode = ps_proc->au1_intra_luma_mb_8x8_modes[b8-1];
672 }
673
674 if (u4_pix_y == 0)
675 {
676 if (ps_top_mb_syn_ele->u2_mb_type == I8x8)
677 {
678 u4_top_intra_8x8_mode = pu1_top_mb_intra_modes[b8+2];
679 }
680 else if (ps_top_mb_syn_ele->u2_mb_type == I4x4)
681 {
682 u4_top_intra_8x8_mode = pu1_top_mb_intra_modes[(b8+2)*4+2];
683 }
684 }
685 else
686 {
687 u4_top_intra_8x8_mode = ps_proc->au1_intra_luma_mb_8x8_modes[b8-2];
688 }
689
690 u4_estimated_intra_8x8_mode = MIN(u4_left_intra_8x8_mode, u4_top_intra_8x8_mode);
691 }
692
693 /* perform intra mode 8x8 evaluation */
694 for (u4_intra_mode = VERT_I8x8; u4_valid_intra_modes != 0; u4_intra_mode++, u4_valid_intra_modes >>= 1)
695 {
696 if ( (u4_valid_intra_modes & 1) == 0)
697 continue;
698
699 /* intra prediction */
700 (ps_codec->apf_intra_pred_8_l)[u4_intra_mode](pu1_ngbr_pels_i8, pu1_pred_mb, 0, i4_pred_strd, i4_ngbr_avbl);
701
702 /* evaluate distortion between the actual blk and the estimated blk for the given mode */
703 ime_compute_sad_8x8(pu1_mb_curr, pu1_pred_mb, i4_src_strd, i4_pred_strd, i4_partition_cost_least, &i4_partition_distortion);
704
705 i4_partition_cost = i4_partition_distortion + ((u4_estimated_intra_8x8_mode == u4_intra_mode)?u4_cost_one_bit:u4_cost_four_bits);
706
707 /* update the least cost information if necessary */
708 if (i4_partition_cost < i4_partition_cost_least)
709 {
710 i4_partition_cost_least = i4_partition_cost;
711 i4_partition_distortion_least = i4_partition_distortion;
712 u4_best_intra_8x8_mode = u4_intra_mode;
713 }
714 }
715 /* macroblock distortion */
716 i4_total_cost += i4_partition_cost_least;
717 i4_total_distortion += i4_partition_distortion_least;
718 /* mb partition mode */
719 ps_proc->au1_intra_luma_mb_8x8_modes[b8] = u4_best_intra_8x8_mode;
720
721 }
722
723 /* update the type of the mb if necessary */
724 if (i4_total_cost < ps_proc->i4_mb_cost)
725 {
726 ps_proc->i4_mb_cost = i4_total_cost;
727 ps_proc->i4_mb_distortion = i4_total_distortion;
728 ps_proc->u4_mb_type = I8x8;
729 }
730
731 return ;
732 }
733
734
735 /**
736 ******************************************************************************
737 *
738 * @brief
739 * evaluate best intra 4x4 mode (rate distortion opt off)
740 *
741 * @par Description
742 * This function evaluates all the possible intra 4x4 modes and finds the mode
743 * that best represents the macro-block (least distortion) and occupies fewer
744 * bits in the bit-stream.
745 *
746 * @param[in] ps_proc_ctxt
747 * pointer to proc ctxt
748 *
749 * @remarks
750 * Ideally the cost of encoding a macroblock is calculated as
751 * (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
752 * input block and the reconstructed block and rate is the number of bits taken
753 * to place the macroblock in the bit-stream. In this routine the rate does not
754 * exactly point to the total number of bits it takes, rather it points to header
755 * bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
756 * and residual bits fall in to texture bits the number of bits taken to encoding
757 * mbtype is considered as rate, we compute cost. Further we will approximate
758 * the distortion as the deviation b/w input and the predicted block as opposed
759 * to input and reconstructed block.
760 *
761 * NOTE: As per the Document JVT-O079, for the whole intra 4x4 macroblock,
762 * 24*lambda is added to the SAD before comparison with the best SAD for
763 * inter prediction. This is an empirical value to prevent using too many intra
764 * blocks.
765 *
766 * @return none
767 *
768 ******************************************************************************
769 */
ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff(process_ctxt_t * ps_proc)770 void ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff(process_ctxt_t *ps_proc)
771 {
772 /* Codec Context */
773 codec_t *ps_codec = ps_proc->ps_codec;
774
775 /* SAD(distortion metric) of an 4x4 block */
776 WORD32 i4_partition_distortion_least = INT_MAX, i4_total_distortion = 0;
777
778 /* lambda */
779 UWORD32 u4_lambda = ps_proc->u4_lambda;
780
781 /* cost = distortion + lambda*rate */
782 WORD32 i4_partition_cost_least, i4_total_cost = (24 + 1) * u4_lambda;
783
784 /* cost due to mbtype */
785 UWORD32 u4_cost_one_bit = u4_lambda, u4_cost_four_bits = 4 * u4_lambda;
786
787 /* intra mode */
788 UWORD32 u4_best_intra_4x4_mode = DC_I4x4, u4_estimated_intra_4x4_mode;
789
790 /* neighbor pels for intra prediction */
791 UWORD8 *pu1_ngbr_pels_i4 = ps_proc->au1_ngbr_pels;
792
793 /* pointer to curr partition */
794 UWORD8 *pu1_mb_curr;
795
796 /* pointer to prediction macro block */
797 UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
798
799 /* strides */
800 WORD32 i4_src_strd = ps_proc->i4_src_strd;
801 WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
802
803 /* neighbors left, top, top right, top left */
804 UWORD8 *pu1_mb_a;
805 UWORD8 *pu1_mb_b;
806 UWORD8 *pu1_mb_c;
807 UWORD8 *pu1_mb_d;
808
809 /* neighbor availability */
810 WORD32 i4_ngbr_avbl;
811 block_neighbors_t s_ngbr_avbl;
812
813 /* temp vars */
814 UWORD32 i, b8, b4, u4_blk_x, u4_blk_y, u4_pix_x, u4_pix_y;
815
816 /* scan order inside 4x4 block */
817 const UWORD8 u1_scan_order[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15};
818
819 /* ngbr sub mb modes */
820 UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (ps_proc->i4_mb_x << 4);
821 mb_info_t *ps_top_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
822 mb_info_t *ps_top_right_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
823
824 /* valid intra modes map */
825 UWORD32 u4_valid_intra_modes;
826 UWORD16 u2_valid_modes[8] = {4, 262, 4, 262, 141, 399, 141, 511};
827
828 UWORD32 u4_constrained_intra_pred = ps_proc->ps_codec->s_cfg.u4_constrained_intra_pred;
829 UWORD8 u1_mb_a, u1_mb_b, u1_mb_c, u1_mb_d;
830 if (ps_proc->ps_ngbr_avbl->u1_mb_c)
831 {
832 ps_top_right_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x + 1;
833 }
834 /* left pels */
835 u1_mb_a = ((ps_proc->ps_ngbr_avbl->u1_mb_a)
836 && (u4_constrained_intra_pred ? ps_proc->s_left_mb_syntax_ele.u2_is_intra : 1));
837
838 /* top pels */
839 u1_mb_b = ((ps_proc->ps_ngbr_avbl->u1_mb_b)
840 && (u4_constrained_intra_pred ? ps_top_mb_syn_ele->u2_is_intra : 1));
841
842 /* topleft pels */
843 u1_mb_d = ((ps_proc->ps_ngbr_avbl->u1_mb_d)
844 && (u4_constrained_intra_pred ? ps_proc->s_top_left_mb_syntax_ele.u2_is_intra : 1));
845
846 /* top right */
847 u1_mb_c = ((ps_proc->ps_ngbr_avbl->u1_mb_c)
848 && (u4_constrained_intra_pred ? ps_top_right_mb_syn_ele->u2_is_intra : 1));
849
850 i4_ngbr_avbl = (u1_mb_a) + (u1_mb_d << 1) + (u1_mb_b << 2) + (u1_mb_c << 3);
851 memcpy(ps_proc->au1_ngbr_avbl_4x4_subblks, gau1_ih264_4x4_ngbr_avbl[i4_ngbr_avbl], 16);
852
853 for (b8 = 0; b8 < 4; b8++)
854 {
855 u4_blk_x = (b8 & 0x01) << 3;
856 u4_blk_y = (b8 >> 1) << 3;
857 for (b4 = 0; b4 < 4; b4++)
858 {
859 u4_pix_x = u4_blk_x + ((b4 & 0x01) << 2);
860 u4_pix_y = u4_blk_y + ((b4 >> 1) << 2);
861
862 pu1_mb_curr = ps_proc->pu1_src_buf_luma + u4_pix_x + (u4_pix_y * i4_src_strd);
863 /* when rdopt is off, we use the input as reference for constructing prediction buffer */
864 /* as opposed to using the recon pels. (open loop intra prediction) */
865 pu1_mb_a = pu1_mb_curr - 1; /* pointer to left macro block */
866 pu1_mb_b = pu1_mb_curr - i4_src_strd; /* pointer to top macro block */
867 pu1_mb_c = pu1_mb_b + 4; /* pointer to top macro block */
868 pu1_mb_d = pu1_mb_b - 1; /* pointer to top left macro block */
869
870 /* locating neighbors that are available for prediction */
871 /* TODO : update the neighbor availability information basing on constrained intra pred information */
872 /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be split in to distinct routines */
873 /* basing on neighbors available and hence evade the computation of neighbor availability totally. */
874
875 i4_ngbr_avbl = ps_proc->au1_ngbr_avbl_4x4_subblks[(b8 << 2) + b4];
876 s_ngbr_avbl.u1_mb_a = (i4_ngbr_avbl & 0x1);
877 s_ngbr_avbl.u1_mb_d = (i4_ngbr_avbl & 0x2) >> 1;
878 s_ngbr_avbl.u1_mb_b = (i4_ngbr_avbl & 0x4) >> 2;
879 s_ngbr_avbl.u1_mb_c = (i4_ngbr_avbl & 0x8) >> 3;
880 /* set valid intra modes for evaluation */
881 u4_valid_intra_modes = u2_valid_modes[i4_ngbr_avbl & 0x7];
882
883 /* if top partition is available and top right is not available for intra prediction, then */
884 /* padd top right samples using top sample and make top right also available */
885 /* i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + (s_ngbr_avbl.u1_mb_b << 2) + ((s_ngbr_avbl.u1_mb_b | s_ngbr_avbl.u1_mb_c) << 3); */
886
887 /* gather prediction pels from the neighbors */
888 if (s_ngbr_avbl.u1_mb_a)
889 {
890 for(i = 0; i < 4; i++)
891 pu1_ngbr_pels_i4[4 - 1 -i] = pu1_mb_a[i * i4_src_strd];
892 }
893 else
894 {
895 memset(pu1_ngbr_pels_i4, 0, 4);
896 }
897
898 if (s_ngbr_avbl.u1_mb_b)
899 {
900 memcpy(pu1_ngbr_pels_i4 + 4 + 1, pu1_mb_b, 4);
901 }
902 else
903 {
904 memset(pu1_ngbr_pels_i4 + 5, 0, 4);
905 }
906
907 if (s_ngbr_avbl.u1_mb_d)
908 pu1_ngbr_pels_i4[4] = *pu1_mb_d;
909 else
910 pu1_ngbr_pels_i4[4] = 0;
911
912 if (s_ngbr_avbl.u1_mb_c)
913 {
914 memcpy(pu1_ngbr_pels_i4 + 8 + 1, pu1_mb_c, 4);
915 }
916 else if (s_ngbr_avbl.u1_mb_b)
917 {
918 memset(pu1_ngbr_pels_i4 + 8 + 1, pu1_ngbr_pels_i4[8], 4);
919 s_ngbr_avbl.u1_mb_c = s_ngbr_avbl.u1_mb_b;
920 }
921
922 i4_partition_cost_least = INT_MAX;
923
924 /* predict the intra 4x4 mode for the current partition (for evaluating cost) */
925 if (!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b)
926 {
927 u4_estimated_intra_4x4_mode = DC_I4x4;
928 }
929 else
930 {
931 UWORD32 u4_left_intra_4x4_mode = DC_I4x4;
932 UWORD32 u4_top_intra_4x4_mode = DC_I4x4;
933
934 if (u4_pix_x == 0)
935 {
936 if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I4x4)
937 {
938 u4_left_intra_4x4_mode = ps_proc->au1_left_mb_intra_modes[u1_scan_order[3 + u4_pix_y]];
939 }
940 else if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I8x8)
941 {
942 u4_left_intra_4x4_mode = ps_proc->au1_left_mb_intra_modes[b8 + 1];
943 }
944 }
945 else
946 {
947 u4_left_intra_4x4_mode = ps_proc->au1_intra_luma_mb_4x4_modes[u1_scan_order[(u4_pix_x >> 2) + u4_pix_y - 1]];
948 }
949
950 if (u4_pix_y == 0)
951 {
952 if (ps_top_mb_syn_ele->u2_mb_type == I4x4)
953 {
954 u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[u1_scan_order[12 + (u4_pix_x >> 2)]];
955 }
956 else if (ps_top_mb_syn_ele->u2_mb_type == I8x8)
957 {
958 u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[b8 + 2];
959 }
960 }
961 else
962 {
963 u4_top_intra_4x4_mode = ps_proc->au1_intra_luma_mb_4x4_modes[u1_scan_order[(u4_pix_x >> 2) + u4_pix_y - 4]];
964 }
965
966 u4_estimated_intra_4x4_mode = MIN(u4_left_intra_4x4_mode, u4_top_intra_4x4_mode);
967 }
968
969 ps_proc->au1_predicted_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_estimated_intra_4x4_mode;
970
971 /* mode evaluation and prediction */
972 ps_codec->pf_ih264e_evaluate_intra_4x4_modes(pu1_mb_curr,
973 pu1_ngbr_pels_i4,
974 pu1_pred_mb, i4_src_strd,
975 i4_pred_strd, i4_ngbr_avbl,
976 &u4_best_intra_4x4_mode,
977 &i4_partition_cost_least,
978 u4_valid_intra_modes,
979 u4_lambda,
980 u4_estimated_intra_4x4_mode);
981
982
983 i4_partition_distortion_least = i4_partition_cost_least - ((u4_estimated_intra_4x4_mode == u4_best_intra_4x4_mode) ? u4_cost_one_bit : u4_cost_four_bits);
984
985 DEBUG("%d partition cost, %d intra mode\n", i4_partition_cost_least, u4_best_intra_4x4_mode);
986 /* macroblock distortion */
987 i4_total_distortion += i4_partition_distortion_least;
988 i4_total_cost += i4_partition_cost_least;
989 /* mb partition mode */
990 ps_proc->au1_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_best_intra_4x4_mode;
991 }
992 }
993
994 /* update the type of the mb if necessary */
995 if (i4_total_cost < ps_proc->i4_mb_cost)
996 {
997 ps_proc->i4_mb_cost = i4_total_cost;
998 ps_proc->i4_mb_distortion = i4_total_distortion;
999 ps_proc->u4_mb_type = I4x4;
1000 }
1001
1002 return ;
1003 }
1004
1005 /**
1006 ******************************************************************************
1007 *
1008 * @brief evaluate best intra 4x4 mode (rate distortion opt on)
1009 *
1010 * @par Description
1011 * This function evaluates all the possible intra 4x4 modes and finds the mode
1012 * that best represents the macro-block (least distortion) and occupies fewer
1013 * bits in the bit-stream.
1014 *
1015 * @param[in] ps_proc_ctxt
1016 * pointer to proc ctxt
1017 *
1018 * @remarks
1019 * Ideally the cost of encoding a macroblock is calculated as
1020 * (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
1021 * input block and the reconstructed block and rate is the number of bits taken
1022 * to place the macroblock in the bit-stream. In this routine the rate does not
1023 * exactly point to the total number of bits it takes, rather it points to header
1024 * bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
1025 * and residual bits fall in to texture bits the number of bits taken to encoding
1026 * mbtype is considered as rate, we compute cost. Further we will approximate
1027 * the distortion as the deviation b/w input and the predicted block as opposed
1028 * to input and reconstructed block.
1029 *
1030 * NOTE: As per the Document JVT-O079, for the whole intra 4x4 macroblock,
1031 * 24*lambda is added to the SAD before comparison with the best SAD for
1032 * inter prediction. This is an empirical value to prevent using too many intra
1033 * blocks.
1034 *
1035 * @return none
1036 *
1037 ******************************************************************************
1038 */
ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton(process_ctxt_t * ps_proc)1039 void ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton(process_ctxt_t *ps_proc)
1040 {
1041 /* Codec Context */
1042 codec_t *ps_codec = ps_proc->ps_codec;
1043
1044 /* SAD(distortion metric) of an 4x4 block */
1045 WORD32 i4_partition_distortion_least = INT_MAX, i4_total_distortion = 0;
1046
1047 /* lambda */
1048 UWORD32 u4_lambda = ps_proc->u4_lambda;
1049
1050 /* cost = distortion + lambda*rate */
1051 WORD32 i4_partition_cost_least, i4_total_cost = (24 + 1) * u4_lambda;
1052
1053 /* cost due to mbtype */
1054 UWORD32 u4_cost_one_bit = u4_lambda, u4_cost_four_bits = 4 * u4_lambda;
1055
1056 /* intra mode */
1057 UWORD32 u4_best_intra_4x4_mode = DC_I4x4, u4_estimated_intra_4x4_mode;
1058
1059 /* neighbor pels for intra prediction */
1060 UWORD8 *pu1_ngbr_pels_i4 = ps_proc->au1_ngbr_pels;
1061
1062 /* pointer to curr partition */
1063 UWORD8 *pu1_mb_curr;
1064 UWORD8 *pu1_mb_ref_left, *pu1_mb_ref_top;
1065 UWORD8 *pu1_ref_mb_intra_4x4;
1066
1067 /* pointer to residual macro block */
1068 WORD16 *pi2_res_mb = ps_proc->pi2_res_buf_intra_4x4;
1069
1070 /* pointer to prediction macro block */
1071 UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
1072
1073 /* strides */
1074 WORD32 i4_src_strd = ps_proc->i4_src_strd;
1075 WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
1076 WORD32 i4_ref_strd_left, i4_ref_strd_top;
1077
1078 /* neighbors left, top, top right, top left */
1079 UWORD8 *pu1_mb_a;
1080 UWORD8 *pu1_mb_b;
1081 UWORD8 *pu1_mb_c;
1082 UWORD8 *pu1_mb_d;
1083
1084 /* number of non zero coeffs*/
1085 UWORD8 *pu1_nnz = (UWORD8 *)ps_proc->au4_nnz_intra_4x4;
1086
1087 /* quantization parameters */
1088 quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
1089
1090 /* neighbor availability */
1091 WORD32 i4_ngbr_avbl;
1092 block_neighbors_t s_ngbr_avbl;
1093
1094 /* temp vars */
1095 UWORD32 i, b8, b4, u4_blk_x, u4_blk_y, u4_pix_x, u4_pix_y;
1096
1097 /* scan order inside 4x4 block */
1098 const UWORD8 u1_scan_order[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15};
1099
1100 /* ngbr sub mb modes */
1101 UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (ps_proc->i4_mb_x << 4);
1102 mb_info_t *ps_top_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
1103 mb_info_t *ps_top_right_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
1104
1105 /* valid intra modes map */
1106 UWORD32 u4_valid_intra_modes;
1107 UWORD16 u2_valid_modes[8] = {4, 262, 4, 262, 141, 399, 141, 511};
1108
1109 /* Dummy variable for 4x4 trans function */
1110 WORD16 i2_dc_dummy;
1111 UWORD8 u1_mb_a, u1_mb_b, u1_mb_c, u1_mb_d;
1112 UWORD32 u4_constrained_intra_pred = ps_proc->ps_codec->s_cfg.u4_constrained_intra_pred;
1113
1114 /* compute ngbr availability for sub blks */
1115 if (ps_proc->ps_ngbr_avbl->u1_mb_c)
1116 {
1117 ps_top_right_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + (ps_proc->i4_mb_x + 1);
1118 }
1119
1120 /* left pels */
1121 u1_mb_a = ((ps_proc->ps_ngbr_avbl->u1_mb_a)
1122 && (u4_constrained_intra_pred ? ps_proc->s_left_mb_syntax_ele.u2_is_intra : 1));
1123
1124 /* top pels */
1125 u1_mb_b = ((ps_proc->ps_ngbr_avbl->u1_mb_b)
1126 && (u4_constrained_intra_pred ? ps_top_mb_syn_ele->u2_is_intra : 1));
1127
1128 /* topleft pels */
1129 u1_mb_d = ((ps_proc->ps_ngbr_avbl->u1_mb_d)
1130 && (u4_constrained_intra_pred ? ps_proc->s_top_left_mb_syntax_ele.u2_is_intra : 1));
1131
1132 /* top right pels */
1133 u1_mb_c = ((ps_proc->ps_ngbr_avbl->u1_mb_c)
1134 && (u4_constrained_intra_pred ? ps_top_right_mb_syn_ele->u2_is_intra : 1));
1135
1136 i4_ngbr_avbl = (u1_mb_a) + (u1_mb_d << 1) + (u1_mb_b << 2) + (u1_mb_c << 3);
1137 memcpy(ps_proc->au1_ngbr_avbl_4x4_subblks, gau1_ih264_4x4_ngbr_avbl[i4_ngbr_avbl], 16);
1138
1139 for(b8 = 0; b8 < 4; b8++)
1140 {
1141 u4_blk_x = (b8 & 0x01) << 3;
1142 u4_blk_y = (b8 >> 1) << 3;
1143 for(b4 = 0; b4 < 4; b4++, pu1_nnz++, pi2_res_mb += MB_SIZE)
1144 {
1145 u4_pix_x = u4_blk_x + ((b4 & 0x01) << 2);
1146 u4_pix_y = u4_blk_y + ((b4 >> 1) << 2);
1147
1148 pu1_ref_mb_intra_4x4 = ps_proc->pu1_ref_mb_intra_4x4 + u4_pix_x + (u4_pix_y * i4_pred_strd);
1149 pu1_mb_curr = ps_proc->pu1_src_buf_luma + u4_pix_x + (u4_pix_y * i4_src_strd);
1150 if (u4_pix_x == 0)
1151 {
1152 i4_ref_strd_left = ps_proc->i4_rec_strd;
1153 pu1_mb_ref_left = ps_proc->pu1_rec_buf_luma + u4_pix_x + (u4_pix_y * i4_ref_strd_left);
1154 }
1155 else
1156 {
1157 i4_ref_strd_left = i4_pred_strd;
1158 pu1_mb_ref_left = pu1_ref_mb_intra_4x4;
1159 }
1160 if (u4_pix_y == 0)
1161 {
1162 i4_ref_strd_top = ps_proc->i4_rec_strd;
1163 pu1_mb_ref_top = ps_proc->pu1_rec_buf_luma + u4_pix_x + (u4_pix_y * i4_ref_strd_top);
1164 }
1165 else
1166 {
1167 i4_ref_strd_top = i4_pred_strd;
1168 pu1_mb_ref_top = pu1_ref_mb_intra_4x4;
1169 }
1170
1171 pu1_mb_a = pu1_mb_ref_left - 1; /* pointer to left macro block */
1172 pu1_mb_b = pu1_mb_ref_top - i4_ref_strd_top; /* pointer to top macro block */
1173 pu1_mb_c = pu1_mb_b + 4; /* pointer to top right macro block */
1174 if (u4_pix_y == 0)
1175 pu1_mb_d = pu1_mb_b - 1;
1176 else
1177 pu1_mb_d = pu1_mb_a - i4_ref_strd_left; /* pointer to top left macro block */
1178
1179 /* locating neighbors that are available for prediction */
1180 /* TODO : update the neighbor availability information basing on constrained intra pred information */
1181 /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be split in to distinct routines */
1182 /* basing on neighbors available and hence evade the computation of neighbor availability totally. */
1183
1184 i4_ngbr_avbl = ps_proc->au1_ngbr_avbl_4x4_subblks[(b8 << 2) + b4];
1185 s_ngbr_avbl.u1_mb_a = (i4_ngbr_avbl & 0x1);
1186 s_ngbr_avbl.u1_mb_d = (i4_ngbr_avbl & 0x2) >> 1;
1187 s_ngbr_avbl.u1_mb_b = (i4_ngbr_avbl & 0x4) >> 2;
1188 s_ngbr_avbl.u1_mb_c = (i4_ngbr_avbl & 0x8) >> 3;
1189 /* set valid intra modes for evaluation */
1190 u4_valid_intra_modes = u2_valid_modes[i4_ngbr_avbl & 0x7];
1191
1192 /* if top partition is available and top right is not available for intra prediction, then */
1193 /* padd top right samples using top sample and make top right also available */
1194 /* i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + (s_ngbr_avbl.u1_mb_b << 2) + ((s_ngbr_avbl.u1_mb_b | s_ngbr_avbl.u1_mb_c) << 3); */
1195
1196 /* gather prediction pels from the neighbors */
1197 if (s_ngbr_avbl.u1_mb_a)
1198 {
1199 for(i = 0; i < 4; i++)
1200 pu1_ngbr_pels_i4[4 - 1 -i] = pu1_mb_a[i * i4_ref_strd_left];
1201 }
1202 else
1203 {
1204 memset(pu1_ngbr_pels_i4,0,4);
1205 }
1206 if(s_ngbr_avbl.u1_mb_b)
1207 {
1208 memcpy(pu1_ngbr_pels_i4 + 4 + 1, pu1_mb_b, 4);
1209 }
1210 else
1211 {
1212 memset(pu1_ngbr_pels_i4 + 4 + 1, 0, 4);
1213 }
1214 if (s_ngbr_avbl.u1_mb_d)
1215 pu1_ngbr_pels_i4[4] = *pu1_mb_d;
1216 else
1217 pu1_ngbr_pels_i4[4] = 0;
1218 if (s_ngbr_avbl.u1_mb_c)
1219 {
1220 memcpy(pu1_ngbr_pels_i4 + 8 + 1, pu1_mb_c, 4);
1221 }
1222 else if (s_ngbr_avbl.u1_mb_b)
1223 {
1224 memset(pu1_ngbr_pels_i4 + 8 + 1, pu1_ngbr_pels_i4[8], 4);
1225 s_ngbr_avbl.u1_mb_c = s_ngbr_avbl.u1_mb_b;
1226 }
1227
1228 i4_partition_cost_least = INT_MAX;
1229
1230 /* predict the intra 4x4 mode for the current partition (for evaluating cost) */
1231 if (!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b)
1232 {
1233 u4_estimated_intra_4x4_mode = DC_I4x4;
1234 }
1235 else
1236 {
1237 UWORD32 u4_left_intra_4x4_mode = DC_I4x4;
1238 UWORD32 u4_top_intra_4x4_mode = DC_I4x4;
1239
1240 if (u4_pix_x == 0)
1241 {
1242 if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I4x4)
1243 {
1244 u4_left_intra_4x4_mode = ps_proc->au1_left_mb_intra_modes[u1_scan_order[3 + u4_pix_y]];
1245 }
1246 else if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I8x8)
1247 {
1248 u4_left_intra_4x4_mode = ps_proc->au1_left_mb_intra_modes[b8 + 1];
1249 }
1250 }
1251 else
1252 {
1253 u4_left_intra_4x4_mode = ps_proc->au1_intra_luma_mb_4x4_modes[u1_scan_order[(u4_pix_x >> 2) + u4_pix_y - 1]];
1254 }
1255
1256 if (u4_pix_y == 0)
1257 {
1258 if (ps_top_mb_syn_ele->u2_mb_type == I4x4)
1259 {
1260 u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[u1_scan_order[12 + (u4_pix_x >> 2)]];
1261 }
1262 else if (ps_top_mb_syn_ele->u2_mb_type == I8x8)
1263 {
1264 u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[b8 + 2];
1265 }
1266 }
1267 else
1268 {
1269 u4_top_intra_4x4_mode = ps_proc->au1_intra_luma_mb_4x4_modes[u1_scan_order[(u4_pix_x >> 2) + u4_pix_y - 4]];
1270 }
1271
1272 u4_estimated_intra_4x4_mode = MIN(u4_left_intra_4x4_mode, u4_top_intra_4x4_mode);
1273 }
1274
1275 ps_proc->au1_predicted_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_estimated_intra_4x4_mode;
1276
1277 /*mode evaluation and prediction*/
1278 ps_codec->pf_ih264e_evaluate_intra_4x4_modes(pu1_mb_curr,
1279 pu1_ngbr_pels_i4,
1280 pu1_pred_mb, i4_src_strd,
1281 i4_pred_strd, i4_ngbr_avbl,
1282 &u4_best_intra_4x4_mode,
1283 &i4_partition_cost_least,
1284 u4_valid_intra_modes,
1285 u4_lambda,
1286 u4_estimated_intra_4x4_mode);
1287
1288
1289 i4_partition_distortion_least = i4_partition_cost_least - ((u4_estimated_intra_4x4_mode == u4_best_intra_4x4_mode)?u4_cost_one_bit:u4_cost_four_bits);
1290
1291 DEBUG("%d partition cost, %d intra mode\n", i4_partition_cost_least, u4_best_intra_4x4_mode);
1292
1293 /* macroblock distortion */
1294 i4_total_distortion += i4_partition_distortion_least;
1295 i4_total_cost += i4_partition_cost_least;
1296
1297 /* mb partition mode */
1298 ps_proc->au1_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_best_intra_4x4_mode;
1299
1300
1301 /********************************************************/
1302 /* error estimation, */
1303 /* transform */
1304 /* quantization */
1305 /********************************************************/
1306 ps_codec->pf_resi_trans_quant_4x4(pu1_mb_curr, pu1_pred_mb,
1307 pi2_res_mb, i4_src_strd,
1308 i4_pred_strd,
1309 /* No op stride, this implies a buff of lenght 1x16 */
1310 ps_qp_params->pu2_scale_mat,
1311 ps_qp_params->pu2_thres_mat,
1312 ps_qp_params->u1_qbits,
1313 ps_qp_params->u4_dead_zone,
1314 pu1_nnz, &i2_dc_dummy);
1315
1316 /********************************************************/
1317 /* ierror estimation, */
1318 /* itransform */
1319 /* iquantization */
1320 /********************************************************/
1321 ps_codec->pf_iquant_itrans_recon_4x4(pi2_res_mb, pu1_pred_mb,
1322 pu1_ref_mb_intra_4x4,
1323 i4_pred_strd, i4_pred_strd,
1324 ps_qp_params->pu2_iscale_mat,
1325 ps_qp_params->pu2_weigh_mat,
1326 ps_qp_params->u1_qp_div,
1327 ps_proc->pv_scratch_buff, 0,
1328 NULL);
1329 }
1330 }
1331
1332 /* update the type of the mb if necessary */
1333 if (i4_total_cost < ps_proc->i4_mb_cost)
1334 {
1335 ps_proc->i4_mb_cost = i4_total_cost;
1336 ps_proc->i4_mb_distortion = i4_total_distortion;
1337 ps_proc->u4_mb_type = I4x4;
1338 }
1339
1340 return ;
1341 }
1342
1343 /**
1344 ******************************************************************************
1345 *
1346 * @brief
1347 * evaluate best chroma intra 8x8 mode (rate distortion opt off)
1348 *
1349 * @par Description
1350 * This function evaluates all the possible chroma intra 8x8 modes and finds
1351 * the mode that best represents the macroblock (least distortion) and occupies
1352 * fewer bits in the bitstream.
1353 *
1354 * @param[in] ps_proc_ctxt
1355 * pointer to macroblock context (handle)
1356 *
1357 * @remarks
1358 * For chroma best intra pred mode is calculated based only on SAD
1359 *
1360 * @returns none
1361 *
1362 ******************************************************************************
1363 */
1364
ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(process_ctxt_t * ps_proc)1365 void ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(process_ctxt_t *ps_proc)
1366 {
1367 /* Codec Context */
1368 codec_t *ps_codec = ps_proc->ps_codec;
1369
1370 /* SAD(distortion metric) of an 8x8 block */
1371 WORD32 i4_mb_distortion, i4_chroma_mb_distortion;
1372
1373 /* intra mode */
1374 UWORD32 u4_best_chroma_intra_8x8_mode = DC_CH_I8x8;
1375
1376 /* neighbor pels for intra prediction */
1377 UWORD8 *pu1_ngbr_pels_c_i8x8 = ps_proc->au1_ngbr_pels;
1378
1379 /* pointer to curr macro block */
1380 UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_chroma;
1381 UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_chroma;
1382
1383 /* pointer to prediction macro block */
1384 UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb_intra_chroma;
1385 UWORD8 *pu1_pred_mb_plane = ps_proc->pu1_pred_mb_intra_chroma_plane;
1386
1387 /* strides */
1388 WORD32 i4_src_strd_c = ps_proc->i4_src_chroma_strd;
1389 WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
1390 WORD32 i4_rec_strd_c = ps_proc->i4_rec_strd;
1391
1392 /* neighbors left, top, top left */
1393 UWORD8 *pu1_mb_a = pu1_ref_mb - 2;
1394 UWORD8 *pu1_mb_b = pu1_ref_mb - i4_rec_strd_c;
1395 UWORD8 *pu1_mb_d = pu1_mb_b - 2;
1396
1397 /* neighbor availability */
1398 const UWORD8 u1_valid_intra_modes[8] = {1, 3, 1, 3, 5, 7, 5, 15};
1399 WORD32 i4_ngbr_avbl;
1400
1401 /* valid intra modes map */
1402 UWORD32 u4_valid_intra_modes;
1403 mb_info_t *ps_top_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
1404
1405 /* temp var */
1406 UWORD8 i;
1407 UWORD32 u4_constrained_intra_pred = ps_proc->ps_codec->s_cfg.u4_constrained_intra_pred;
1408 UWORD8 u1_mb_a, u1_mb_b, u1_mb_d;
1409 /* locating neighbors that are available for prediction */
1410
1411 /* gather prediction pels from the neighbors */
1412 /* left pels */
1413 u1_mb_a = ((ps_proc->ps_ngbr_avbl->u1_mb_a)
1414 && (u4_constrained_intra_pred ? ps_proc->s_left_mb_syntax_ele.u2_is_intra : 1));
1415 if (u1_mb_a)
1416 {
1417 for (i = 0; i < 16; i += 2)
1418 {
1419 pu1_ngbr_pels_c_i8x8[16 - 2 - i] = pu1_mb_a[(i / 2) * i4_rec_strd_c];
1420 pu1_ngbr_pels_c_i8x8[16 - 1 - i] = pu1_mb_a[(i / 2) * i4_rec_strd_c + 1];
1421 }
1422 }
1423 else
1424 {
1425 ps_codec->pf_mem_set_mul8(pu1_ngbr_pels_c_i8x8, 0, MB_SIZE);
1426 }
1427
1428 /* top pels */
1429 u1_mb_b = ((ps_proc->ps_ngbr_avbl->u1_mb_b)
1430 && (u4_constrained_intra_pred ? ps_top_mb_syn_ele->u2_is_intra : 1));
1431 if (u1_mb_b)
1432 {
1433 ps_codec->pf_mem_cpy_mul8(&pu1_ngbr_pels_c_i8x8[18], pu1_mb_b, 16);
1434 }
1435 else
1436 {
1437 ps_codec->pf_mem_set_mul8((pu1_ngbr_pels_c_i8x8 + 18), 0, MB_SIZE);
1438 }
1439
1440 /* top left pels */
1441 u1_mb_d = ((ps_proc->ps_ngbr_avbl->u1_mb_d)
1442 && (u4_constrained_intra_pred ? ps_proc->s_top_left_mb_syntax_ele.u2_is_intra : 1));
1443 if (u1_mb_d)
1444 {
1445 pu1_ngbr_pels_c_i8x8[16] = *pu1_mb_d;
1446 pu1_ngbr_pels_c_i8x8[17] = *(pu1_mb_d + 1);
1447 }
1448 i4_ngbr_avbl = (u1_mb_a) + (u1_mb_b << 2) + (u1_mb_d << 1);
1449 ps_proc->i4_chroma_neighbor_avail_8x8_mb = i4_ngbr_avbl;
1450
1451 u4_valid_intra_modes = u1_valid_intra_modes[i4_ngbr_avbl];
1452
1453 if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_FAST)
1454 u4_valid_intra_modes &= ~(1 << PLANE_CH_I8x8);
1455
1456 i4_chroma_mb_distortion = INT_MAX;
1457
1458 /* perform intra mode chroma 8x8 evaluation */
1459 /* intra prediction */
1460 ps_codec->pf_ih264e_evaluate_intra_chroma_modes(pu1_curr_mb,
1461 pu1_ngbr_pels_c_i8x8,
1462 pu1_pred_mb,
1463 i4_src_strd_c,
1464 i4_pred_strd,
1465 i4_ngbr_avbl,
1466 &u4_best_chroma_intra_8x8_mode,
1467 &i4_chroma_mb_distortion,
1468 u4_valid_intra_modes);
1469
1470 if (u4_valid_intra_modes & 8)/* if Chroma PLANE is valid*/
1471 {
1472 (ps_codec->apf_intra_pred_c)[PLANE_CH_I8x8](pu1_ngbr_pels_c_i8x8, pu1_pred_mb_plane, 0, i4_pred_strd, i4_ngbr_avbl);
1473
1474 /* evaluate distortion(sad) */
1475 ps_codec->pf_compute_sad_16x8(pu1_curr_mb, pu1_pred_mb_plane, i4_src_strd_c, i4_pred_strd, i4_chroma_mb_distortion, &i4_mb_distortion);
1476
1477 /* update the least distortion information if necessary */
1478 if(i4_mb_distortion < i4_chroma_mb_distortion)
1479 {
1480 i4_chroma_mb_distortion = i4_mb_distortion;
1481 u4_best_chroma_intra_8x8_mode = PLANE_CH_I8x8;
1482 }
1483 }
1484
1485 DEBUG("%d partition cost, %d intra mode\n", i4_chroma_mb_distortion, u4_best_chroma_intra_8x8_mode);
1486
1487 ps_proc->u1_c_i8_mode = u4_best_chroma_intra_8x8_mode;
1488
1489 return ;
1490 }
1491
1492
1493 /**
1494 ******************************************************************************
1495 *
1496 * @brief
1497 * Evaluate best intra 16x16 mode (among VERT, HORZ and DC) and do the
1498 * prediction.
1499 *
1500 * @par Description
1501 * This function evaluates first three 16x16 modes and compute corresponding sad
1502 * and return the buffer predicted with best mode.
1503 *
1504 * @param[in] pu1_src
1505 * UWORD8 pointer to the source
1506 *
1507 * @param[in] pu1_ngbr_pels_i16
1508 * UWORD8 pointer to neighbouring pels
1509 *
1510 * @param[out] pu1_dst
1511 * UWORD8 pointer to the destination
1512 *
1513 * @param[in] src_strd
1514 * integer source stride
1515 *
1516 * @param[in] dst_strd
1517 * integer destination stride
1518 *
1519 * @param[in] u4_n_avblty
1520 * availability of neighbouring pixels
1521 *
1522 * @param[in] u4_intra_mode
1523 * Pointer to the variable in which best mode is returned
1524 *
1525 * @param[in] pu4_sadmin
1526 * Pointer to the variable in which minimum sad is returned
1527 *
1528 * @param[in] u4_valid_intra_modes
1529 * Says what all modes are valid
1530 *
1531 * @returns none
1532 *
1533 ******************************************************************************
1534 */
ih264e_evaluate_intra16x16_modes(UWORD8 * pu1_src,UWORD8 * pu1_ngbr_pels_i16,UWORD8 * pu1_dst,UWORD32 src_strd,UWORD32 dst_strd,WORD32 u4_n_avblty,UWORD32 * u4_intra_mode,WORD32 * pu4_sadmin,UWORD32 u4_valid_intra_modes)1535 void ih264e_evaluate_intra16x16_modes(UWORD8 *pu1_src,
1536 UWORD8 *pu1_ngbr_pels_i16,
1537 UWORD8 *pu1_dst,
1538 UWORD32 src_strd,
1539 UWORD32 dst_strd,
1540 WORD32 u4_n_avblty,
1541 UWORD32 *u4_intra_mode,
1542 WORD32 *pu4_sadmin,
1543 UWORD32 u4_valid_intra_modes)
1544 {
1545 UWORD8 *pu1_neighbour;
1546 UWORD8 *pu1_src_temp = pu1_src;
1547 UWORD8 left = 0, top = 0;
1548 WORD32 u4_dcval = 0;
1549 WORD32 i, j;
1550 WORD32 i4_sad_vert = INT_MAX, i4_sad_horz = INT_MAX, i4_sad_dc = INT_MAX,
1551 i4_min_sad = INT_MAX;
1552 UWORD8 val;
1553
1554 left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK);
1555 top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2;
1556
1557 /* left available */
1558 if (left)
1559 {
1560 i4_sad_horz = 0;
1561
1562 for (i = 0; i < 16; i++)
1563 {
1564 val = pu1_ngbr_pels_i16[15 - i];
1565
1566 u4_dcval += val;
1567
1568 for (j = 0; j < 16; j++)
1569 {
1570 i4_sad_horz += ABS(val - pu1_src_temp[j]);
1571 }
1572
1573 pu1_src_temp += src_strd;
1574 }
1575 u4_dcval += 8;
1576 }
1577
1578 pu1_src_temp = pu1_src;
1579 /* top available */
1580 if (top)
1581 {
1582 i4_sad_vert = 0;
1583
1584 for (i = 0; i < 16; i++)
1585 {
1586 u4_dcval += pu1_ngbr_pels_i16[17 + i];
1587
1588 for (j = 0; j < 16; j++)
1589 {
1590 i4_sad_vert += ABS(pu1_ngbr_pels_i16[17 + j] - pu1_src_temp[j]);
1591 }
1592 pu1_src_temp += src_strd;
1593
1594 }
1595 u4_dcval += 8;
1596 }
1597
1598 u4_dcval = (u4_dcval) >> (3 + left + top);
1599
1600 pu1_src_temp = pu1_src;
1601
1602 /* none available */
1603 u4_dcval += (left == 0) * (top == 0) * 128;
1604
1605 i4_sad_dc = 0;
1606
1607 for (i = 0; i < 16; i++)
1608 {
1609 for (j = 0; j < 16; j++)
1610 {
1611 i4_sad_dc += ABS(u4_dcval - pu1_src_temp[j]);
1612 }
1613 pu1_src_temp += src_strd;
1614 }
1615
1616 if ((u4_valid_intra_modes & 04) == 0)/* If DC is disabled */
1617 i4_sad_dc = INT_MAX;
1618
1619 if ((u4_valid_intra_modes & 01) == 0)/* If VERT is disabled */
1620 i4_sad_vert = INT_MAX;
1621
1622 if ((u4_valid_intra_modes & 02) == 0)/* If HORZ is disabled */
1623 i4_sad_horz = INT_MAX;
1624
1625 i4_min_sad = MIN3(i4_sad_horz, i4_sad_dc, i4_sad_vert);
1626
1627 /* Finding Minimum sad and doing corresponding prediction */
1628 if (i4_min_sad < *pu4_sadmin)
1629 {
1630 *pu4_sadmin = i4_min_sad;
1631 if (i4_min_sad == i4_sad_vert)
1632 {
1633 *u4_intra_mode = VERT_I16x16;
1634 pu1_neighbour = pu1_ngbr_pels_i16 + 17;
1635 for (j = 0; j < 16; j++)
1636 {
1637 memcpy(pu1_dst, pu1_neighbour, MB_SIZE);
1638 pu1_dst += dst_strd;
1639 }
1640 }
1641 else if (i4_min_sad == i4_sad_horz)
1642 {
1643 *u4_intra_mode = HORZ_I16x16;
1644 for (j = 0; j < 16; j++)
1645 {
1646 val = pu1_ngbr_pels_i16[15 - j];
1647 memset(pu1_dst, val, MB_SIZE);
1648 pu1_dst += dst_strd;
1649 }
1650 }
1651 else
1652 {
1653 *u4_intra_mode = DC_I16x16;
1654 for (j = 0; j < 16; j++)
1655 {
1656 memset(pu1_dst, u4_dcval, MB_SIZE);
1657 pu1_dst += dst_strd;
1658 }
1659 }
1660 }
1661 return;
1662 }
1663
1664 /**
1665 ******************************************************************************
1666 *
1667 * @brief
1668 * Evaluate best intra 4x4 mode and perform prediction.
1669 *
1670 * @par Description
1671 * This function evaluates 4x4 modes and compute corresponding sad
1672 * and return the buffer predicted with best mode.
1673 *
1674 * @param[in] pu1_src
1675 * UWORD8 pointer to the source
1676 *
1677 * @param[in] pu1_ngbr_pels
1678 * UWORD8 pointer to neighbouring pels
1679 *
1680 * @param[out] pu1_dst
1681 * UWORD8 pointer to the destination
1682 *
1683 * @param[in] src_strd
1684 * integer source stride
1685 *
1686 * @param[in] dst_strd
1687 * integer destination stride
1688 *
1689 * @param[in] u4_n_avblty
1690 * availability of neighbouring pixels
1691 *
1692 * @param[in] u4_intra_mode
1693 * Pointer to the variable in which best mode is returned
1694 *
1695 * @param[in] pu4_sadmin
1696 * Pointer to the variable in which minimum cost is returned
1697 *
1698 * @param[in] u4_valid_intra_modes
1699 * Says what all modes are valid
1700 *
1701 * @param[in] u4_lambda
1702 * Lamda value for computing cost from SAD
1703 *
1704 * @param[in] u4_predictd_mode
1705 * Predicted mode for cost computation
1706 *
1707 * @returns none
1708 *
1709 ******************************************************************************
1710 */
ih264e_evaluate_intra_4x4_modes(UWORD8 * pu1_src,UWORD8 * pu1_ngbr_pels,UWORD8 * pu1_dst,UWORD32 src_strd,UWORD32 dst_strd,WORD32 u4_n_avblty,UWORD32 * u4_intra_mode,WORD32 * pu4_sadmin,UWORD32 u4_valid_intra_modes,UWORD32 u4_lambda,UWORD32 u4_predictd_mode)1711 void ih264e_evaluate_intra_4x4_modes(UWORD8 *pu1_src,
1712 UWORD8 *pu1_ngbr_pels,
1713 UWORD8 *pu1_dst,
1714 UWORD32 src_strd,
1715 UWORD32 dst_strd,
1716 WORD32 u4_n_avblty,
1717 UWORD32 *u4_intra_mode,
1718 WORD32 *pu4_sadmin,
1719 UWORD32 u4_valid_intra_modes,
1720 UWORD32 u4_lambda,
1721 UWORD32 u4_predictd_mode)
1722 {
1723 UWORD8 *pu1_src_temp = pu1_src;
1724 UWORD8 *pu1_pred = pu1_ngbr_pels;
1725 UWORD8 left = 0, top = 0;
1726 UWORD8 u1_pred_val = 0;
1727 UWORD8 u1_pred_vals[4] = {0};
1728 UWORD8 *pu1_pred_val = NULL;
1729 /* To store FILT121 operated values*/
1730 UWORD8 u1_pred_vals_diag_121[15] = {0};
1731 /* To store FILT11 operated values*/
1732 UWORD8 u1_pred_vals_diag_11[15] = {0};
1733 UWORD8 u1_pred_vals_vert_r[8] = {0};
1734 UWORD8 u1_pred_vals_horz_d[10] = {0};
1735 UWORD8 u1_pred_vals_horz_u[10] = {0};
1736 WORD32 u4_dcval = 0;
1737 WORD32 i4_sad[MAX_I4x4] = {INT_MAX, INT_MAX, INT_MAX, INT_MAX, INT_MAX,
1738 INT_MAX, INT_MAX, INT_MAX, INT_MAX};
1739
1740 WORD32 i4_cost[MAX_I4x4] = {INT_MAX, INT_MAX, INT_MAX, INT_MAX, INT_MAX,
1741 INT_MAX, INT_MAX, INT_MAX, INT_MAX};
1742 WORD32 i, i4_min_cost = INT_MAX;
1743
1744 left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK);
1745 top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2;
1746
1747 /* Computing SAD */
1748
1749 /* VERT mode valid */
1750 if (u4_valid_intra_modes & 1)
1751 {
1752 pu1_pred = pu1_ngbr_pels + 5;
1753 i4_sad[VERT_I4x4] = 0;
1754 i4_cost[VERT_I4x4] = 0;
1755
1756 USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]);
1757 pu1_src_temp += src_strd;
1758 USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]);
1759 pu1_src_temp += src_strd;
1760 USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]);
1761 pu1_src_temp += src_strd;
1762 USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]);
1763
1764 i4_cost[VERT_I4x4] = i4_sad[VERT_I4x4] + ((u4_predictd_mode == VERT_I4x4) ?
1765 u4_lambda : 4 * u4_lambda);
1766 }
1767
1768 /* HORZ mode valid */
1769 if (u4_valid_intra_modes & 2)
1770 {
1771 i4_sad[HORZ_I4x4] = 0;
1772 i4_cost[HORZ_I4x4] =0;
1773 pu1_src_temp = pu1_src;
1774
1775 u1_pred_val = pu1_ngbr_pels[3];
1776
1777 i4_sad[HORZ_I4x4] += ABS(pu1_src_temp[0] - u1_pred_val)
1778 + ABS(pu1_src_temp[1] - u1_pred_val)
1779 + ABS(pu1_src_temp[2] - u1_pred_val)
1780 + ABS(pu1_src_temp[3] - u1_pred_val);
1781 pu1_src_temp += src_strd;
1782
1783 u1_pred_val = pu1_ngbr_pels[2];
1784
1785 i4_sad[HORZ_I4x4] += ABS(pu1_src_temp[0] - u1_pred_val)
1786 + ABS(pu1_src_temp[1] - u1_pred_val)
1787 + ABS(pu1_src_temp[2] - u1_pred_val)
1788 + ABS(pu1_src_temp[3] - u1_pred_val);
1789 pu1_src_temp += src_strd;
1790
1791 u1_pred_val = pu1_ngbr_pels[1];
1792
1793 i4_sad[HORZ_I4x4] += ABS(pu1_src_temp[0] - u1_pred_val)
1794 + ABS(pu1_src_temp[1] - u1_pred_val)
1795 + ABS(pu1_src_temp[2] - u1_pred_val)
1796 + ABS(pu1_src_temp[3] - u1_pred_val);
1797 pu1_src_temp += src_strd;
1798
1799 u1_pred_val = pu1_ngbr_pels[0];
1800
1801 i4_sad[HORZ_I4x4] += ABS(pu1_src_temp[0] - u1_pred_val)
1802 + ABS(pu1_src_temp[1] - u1_pred_val)
1803 + ABS(pu1_src_temp[2] - u1_pred_val)
1804 + ABS(pu1_src_temp[3] - u1_pred_val);
1805
1806 i4_cost[HORZ_I4x4] = i4_sad[HORZ_I4x4] + ((u4_predictd_mode == HORZ_I4x4) ?
1807 u4_lambda : 4 * u4_lambda);
1808 }
1809
1810 /* DC mode valid */
1811 if (u4_valid_intra_modes & 4)
1812 {
1813 i4_sad[DC_I4x4] = 0;
1814 i4_cost[DC_I4x4] = 0;
1815 pu1_src_temp = pu1_src;
1816
1817 if (left)
1818 u4_dcval = pu1_ngbr_pels[0] + pu1_ngbr_pels[1] + pu1_ngbr_pels[2]
1819 + pu1_ngbr_pels[3] + 2;
1820 if (top)
1821 u4_dcval += pu1_ngbr_pels[5] + pu1_ngbr_pels[6] + pu1_ngbr_pels[7]
1822 + pu1_ngbr_pels[8] + 2;
1823
1824 u4_dcval = (u4_dcval) ? (u4_dcval >> (1 + left + top)) : 128;
1825
1826 /* none available */
1827 memset(u1_pred_vals, u4_dcval, 4);
1828 USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]);
1829 pu1_src_temp += src_strd;
1830 USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]);
1831 pu1_src_temp += src_strd;
1832 USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]);
1833 pu1_src_temp += src_strd;
1834 USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]);
1835 pu1_src_temp += src_strd;
1836
1837 i4_cost[DC_I4x4] = i4_sad[DC_I4x4] + ((u4_predictd_mode == DC_I4x4) ?
1838 u4_lambda : 4 * u4_lambda);
1839 }
1840
1841 /* if modes other than VERT, HORZ and DC are valid */
1842 if (u4_valid_intra_modes > 7)
1843 {
1844 pu1_pred = pu1_ngbr_pels;
1845 pu1_pred[13] = pu1_pred[14] = pu1_pred[12];
1846
1847 /* Performing FILT121 and FILT11 operation for all neighbour values*/
1848 for (i = 0; i < 13; i++)
1849 {
1850 u1_pred_vals_diag_121[i] = FILT121(pu1_pred[0], pu1_pred[1], pu1_pred[2]);
1851 u1_pred_vals_diag_11[i] = FILT11(pu1_pred[0], pu1_pred[1]);
1852
1853 pu1_pred++;
1854 }
1855
1856 if (u4_valid_intra_modes & 8)/* DIAG_DL */
1857 {
1858 i4_sad[DIAG_DL_I4x4] = 0;
1859 i4_cost[DIAG_DL_I4x4] = 0;
1860 pu1_src_temp = pu1_src;
1861 pu1_pred_val = u1_pred_vals_diag_121 + 5;
1862
1863 USADA8(pu1_src_temp, pu1_pred_val, i4_sad[DIAG_DL_I4x4]);
1864 pu1_src_temp += src_strd;
1865 USADA8(pu1_src_temp, (pu1_pred_val + 1), i4_sad[DIAG_DL_I4x4]);
1866 pu1_src_temp += src_strd;
1867 USADA8(pu1_src_temp, (pu1_pred_val + 2), i4_sad[DIAG_DL_I4x4]);
1868 pu1_src_temp += src_strd;
1869 USADA8(pu1_src_temp, (pu1_pred_val + 3), i4_sad[DIAG_DL_I4x4]);
1870 pu1_src_temp += src_strd;
1871 i4_cost[DIAG_DL_I4x4] = i4_sad[DIAG_DL_I4x4] + ((u4_predictd_mode == DIAG_DL_I4x4) ?
1872 u4_lambda : 4 * u4_lambda);
1873 }
1874
1875 if (u4_valid_intra_modes & 16)/* DIAG_DR */
1876 {
1877 i4_sad[DIAG_DR_I4x4] = 0;
1878 i4_cost[DIAG_DR_I4x4] = 0;
1879 pu1_src_temp = pu1_src;
1880 pu1_pred_val = u1_pred_vals_diag_121 + 3;
1881
1882 USADA8(pu1_src_temp, pu1_pred_val, i4_sad[DIAG_DR_I4x4]);
1883 pu1_src_temp += src_strd;
1884 USADA8(pu1_src_temp, (pu1_pred_val - 1), i4_sad[DIAG_DR_I4x4]);
1885 pu1_src_temp += src_strd;
1886 USADA8(pu1_src_temp, (pu1_pred_val - 2), i4_sad[DIAG_DR_I4x4]);
1887 pu1_src_temp += src_strd;
1888 USADA8(pu1_src_temp, (pu1_pred_val - 3), i4_sad[DIAG_DR_I4x4]);
1889 pu1_src_temp += src_strd;
1890 i4_cost[DIAG_DR_I4x4] = i4_sad[DIAG_DR_I4x4] + ((u4_predictd_mode == DIAG_DR_I4x4) ?
1891 u4_lambda : 4 * u4_lambda);
1892
1893 }
1894
1895 if (u4_valid_intra_modes & 32)/* VERT_R mode valid ????*/
1896 {
1897 i4_sad[VERT_R_I4x4] = 0;
1898
1899 pu1_src_temp = pu1_src;
1900 u1_pred_vals_vert_r[0] = u1_pred_vals_diag_121[2];
1901 memcpy((u1_pred_vals_vert_r + 1), (u1_pred_vals_diag_11 + 4), 3);
1902 u1_pred_vals_vert_r[4] = u1_pred_vals_diag_121[1];
1903 memcpy((u1_pred_vals_vert_r + 5), (u1_pred_vals_diag_121 + 3), 3);
1904
1905 pu1_pred_val = u1_pred_vals_diag_11 + 4;
1906 USADA8(pu1_src_temp, pu1_pred_val, i4_sad[VERT_R_I4x4]);
1907 pu1_pred_val = u1_pred_vals_diag_121 + 3;
1908 pu1_src_temp += src_strd;
1909 USADA8(pu1_src_temp, pu1_pred_val, i4_sad[VERT_R_I4x4]);
1910 pu1_src_temp += src_strd;
1911 USADA8(pu1_src_temp, (u1_pred_vals_vert_r), i4_sad[VERT_R_I4x4]);
1912 pu1_src_temp += src_strd;
1913 USADA8(pu1_src_temp, (u1_pred_vals_vert_r + 4),
1914 i4_sad[VERT_R_I4x4]);
1915
1916 i4_cost[VERT_R_I4x4] = i4_sad[VERT_R_I4x4] + ((u4_predictd_mode == VERT_R_I4x4) ?
1917 u4_lambda : 4 * u4_lambda);
1918 }
1919
1920 if (u4_valid_intra_modes & 64)/* HORZ_D mode valid ????*/
1921 {
1922 i4_sad[HORZ_D_I4x4] = 0;
1923
1924 pu1_src_temp = pu1_src;
1925 u1_pred_vals_horz_d[6] = u1_pred_vals_diag_11[3];
1926 memcpy((u1_pred_vals_horz_d + 7), (u1_pred_vals_diag_121 + 3), 3);
1927 u1_pred_vals_horz_d[0] = u1_pred_vals_diag_11[0];
1928 u1_pred_vals_horz_d[1] = u1_pred_vals_diag_121[0];
1929 u1_pred_vals_horz_d[2] = u1_pred_vals_diag_11[1];
1930 u1_pred_vals_horz_d[3] = u1_pred_vals_diag_121[1];
1931 u1_pred_vals_horz_d[4] = u1_pred_vals_diag_11[2];
1932 u1_pred_vals_horz_d[5] = u1_pred_vals_diag_121[2];
1933
1934 pu1_pred_val = u1_pred_vals_horz_d;
1935 USADA8(pu1_src_temp, (pu1_pred_val + 6), i4_sad[HORZ_D_I4x4]);
1936 pu1_src_temp += src_strd;
1937 USADA8(pu1_src_temp, (pu1_pred_val + 4), i4_sad[HORZ_D_I4x4]);
1938 pu1_src_temp += src_strd;
1939 USADA8(pu1_src_temp, (pu1_pred_val + 2), i4_sad[HORZ_D_I4x4]);
1940 pu1_src_temp += src_strd;
1941 USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[HORZ_D_I4x4]);
1942
1943 i4_cost[HORZ_D_I4x4] = i4_sad[HORZ_D_I4x4] + ((u4_predictd_mode == HORZ_D_I4x4) ?
1944 u4_lambda : 4 * u4_lambda);
1945 }
1946
1947 if (u4_valid_intra_modes & 128)/* VERT_L mode valid ????*/
1948 {
1949 i4_sad[VERT_L_I4x4] = 0;
1950 pu1_src_temp = pu1_src;
1951 pu1_pred_val = u1_pred_vals_diag_11 + 5;
1952 USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]);
1953 pu1_src_temp += src_strd;
1954 pu1_pred_val = u1_pred_vals_diag_121 + 5;
1955 USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]);
1956 pu1_src_temp += src_strd;
1957 pu1_pred_val = u1_pred_vals_diag_11 + 6;
1958 USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]);
1959 pu1_src_temp += src_strd;
1960 pu1_pred_val = u1_pred_vals_diag_121 + 6;
1961 USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]);
1962
1963 i4_cost[VERT_L_I4x4] = i4_sad[VERT_L_I4x4] + ((u4_predictd_mode == VERT_L_I4x4) ?
1964 u4_lambda : 4 * u4_lambda);
1965 }
1966
1967 if (u4_valid_intra_modes & 256)/* HORZ_U mode valid ????*/
1968 {
1969 i4_sad[HORZ_U_I4x4] = 0;
1970 pu1_src_temp = pu1_src;
1971 u1_pred_vals_horz_u[0] = u1_pred_vals_diag_11[2];
1972 u1_pred_vals_horz_u[1] = u1_pred_vals_diag_121[1];
1973 u1_pred_vals_horz_u[2] = u1_pred_vals_diag_11[1];
1974 u1_pred_vals_horz_u[3] = u1_pred_vals_diag_121[0];
1975 u1_pred_vals_horz_u[4] = u1_pred_vals_diag_11[0];
1976 u1_pred_vals_horz_u[5] = FILT121(pu1_ngbr_pels[0], pu1_ngbr_pels[0], pu1_ngbr_pels[1]);
1977
1978 memset((u1_pred_vals_horz_u + 6), pu1_ngbr_pels[0], 4);
1979
1980 pu1_pred_val = u1_pred_vals_horz_u;
1981 USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[HORZ_U_I4x4]);
1982 pu1_src_temp += src_strd;
1983 USADA8(pu1_src_temp, (pu1_pred_val + 2), i4_sad[HORZ_U_I4x4]);
1984 pu1_src_temp += src_strd;
1985 USADA8(pu1_src_temp, (pu1_pred_val + 4), i4_sad[HORZ_U_I4x4]);
1986 pu1_src_temp += src_strd;
1987 USADA8(pu1_src_temp, (pu1_pred_val + 6), i4_sad[HORZ_U_I4x4]);
1988
1989 i4_cost[HORZ_U_I4x4] = i4_sad[HORZ_U_I4x4] + ((u4_predictd_mode == HORZ_U_I4x4) ?
1990 u4_lambda : 4 * u4_lambda);
1991 }
1992
1993 i4_min_cost = MIN3(MIN3(i4_cost[0], i4_cost[1], i4_cost[2]),
1994 MIN3(i4_cost[3], i4_cost[4], i4_cost[5]),
1995 MIN3(i4_cost[6], i4_cost[7], i4_cost[8]));
1996
1997 }
1998 else
1999 {
2000 /* Only first three modes valid */
2001 i4_min_cost = MIN3(i4_cost[0], i4_cost[1], i4_cost[2]);
2002 }
2003
2004 *pu4_sadmin = i4_min_cost;
2005
2006 if (i4_min_cost == i4_cost[0])
2007 {
2008 *u4_intra_mode = VERT_I4x4;
2009 pu1_pred_val = pu1_ngbr_pels + 5;
2010 memcpy(pu1_dst, (pu1_pred_val), 4);
2011 pu1_dst += dst_strd;
2012 memcpy(pu1_dst, (pu1_pred_val), 4);
2013 pu1_dst += dst_strd;
2014 memcpy(pu1_dst, (pu1_pred_val), 4);
2015 pu1_dst += dst_strd;
2016 memcpy(pu1_dst, (pu1_pred_val), 4);
2017 }
2018 else if (i4_min_cost == i4_cost[1])
2019 {
2020 *u4_intra_mode = HORZ_I4x4;
2021 memset(pu1_dst, pu1_ngbr_pels[3], 4);
2022 pu1_dst += dst_strd;
2023 memset(pu1_dst, pu1_ngbr_pels[2], 4);
2024 pu1_dst += dst_strd;
2025 memset(pu1_dst, pu1_ngbr_pels[1], 4);
2026 pu1_dst += dst_strd;
2027 memset(pu1_dst, pu1_ngbr_pels[0], 4);
2028 }
2029 else if (i4_min_cost == i4_cost[2])
2030 {
2031 *u4_intra_mode = DC_I4x4;
2032 memset(pu1_dst, u4_dcval, 4);
2033 pu1_dst += dst_strd;
2034 memset(pu1_dst, u4_dcval, 4);
2035 pu1_dst += dst_strd;
2036 memset(pu1_dst, u4_dcval, 4);
2037 pu1_dst += dst_strd;
2038 memset(pu1_dst, u4_dcval, 4);
2039 }
2040
2041 else if (i4_min_cost == i4_cost[3])
2042 {
2043 *u4_intra_mode = DIAG_DL_I4x4;
2044 pu1_pred_val = u1_pred_vals_diag_121 + 5;
2045 memcpy(pu1_dst, (pu1_pred_val), 4);
2046 pu1_dst += dst_strd;
2047 memcpy(pu1_dst, (pu1_pred_val + 1), 4);
2048 pu1_dst += dst_strd;
2049 memcpy(pu1_dst, (pu1_pred_val + 2), 4);
2050 pu1_dst += dst_strd;
2051 memcpy(pu1_dst, (pu1_pred_val + 3), 4);
2052 }
2053 else if (i4_min_cost == i4_cost[4])
2054 {
2055 *u4_intra_mode = DIAG_DR_I4x4;
2056 pu1_pred_val = u1_pred_vals_diag_121 + 3;
2057
2058 memcpy(pu1_dst, (pu1_pred_val), 4);
2059 pu1_dst += dst_strd;
2060 memcpy(pu1_dst, (pu1_pred_val - 1), 4);
2061 pu1_dst += dst_strd;
2062 memcpy(pu1_dst, (pu1_pred_val - 2), 4);
2063 pu1_dst += dst_strd;
2064 memcpy(pu1_dst, (pu1_pred_val - 3), 4);
2065 }
2066
2067 else if (i4_min_cost == i4_cost[5])
2068 {
2069 *u4_intra_mode = VERT_R_I4x4;
2070 pu1_pred_val = u1_pred_vals_diag_11 + 4;
2071 memcpy(pu1_dst, (pu1_pred_val), 4);
2072 pu1_dst += dst_strd;
2073 pu1_pred_val = u1_pred_vals_diag_121 + 3;
2074 memcpy(pu1_dst, (pu1_pred_val), 4);
2075 pu1_dst += dst_strd;
2076 memcpy(pu1_dst, (u1_pred_vals_vert_r), 4);
2077 pu1_dst += dst_strd;
2078 memcpy(pu1_dst, (u1_pred_vals_vert_r + 4), 4);
2079 }
2080 else if (i4_min_cost == i4_cost[6])
2081 {
2082 *u4_intra_mode = HORZ_D_I4x4;
2083 pu1_pred_val = u1_pred_vals_horz_d;
2084 memcpy(pu1_dst, (pu1_pred_val + 6), 4);
2085 pu1_dst += dst_strd;
2086 memcpy(pu1_dst, (pu1_pred_val + 4), 4);
2087 pu1_dst += dst_strd;
2088 memcpy(pu1_dst, (pu1_pred_val + 2), 4);
2089 pu1_dst += dst_strd;
2090 memcpy(pu1_dst, (pu1_pred_val), 4);
2091 pu1_dst += dst_strd;
2092 }
2093 else if (i4_min_cost == i4_cost[7])
2094 {
2095 *u4_intra_mode = VERT_L_I4x4;
2096 pu1_pred_val = u1_pred_vals_diag_11 + 5;
2097 memcpy(pu1_dst, (pu1_pred_val), 4);
2098 pu1_dst += dst_strd;
2099 pu1_pred_val = u1_pred_vals_diag_121 + 5;
2100 memcpy(pu1_dst, (pu1_pred_val), 4);
2101 pu1_dst += dst_strd;
2102 pu1_pred_val = u1_pred_vals_diag_11 + 6;
2103 memcpy(pu1_dst, (pu1_pred_val), 4);
2104 pu1_dst += dst_strd;
2105 pu1_pred_val = u1_pred_vals_diag_121 + 6;
2106 memcpy(pu1_dst, (pu1_pred_val), 4);
2107 }
2108 else if (i4_min_cost == i4_cost[8])
2109 {
2110 *u4_intra_mode = HORZ_U_I4x4;
2111 pu1_pred_val = u1_pred_vals_horz_u;
2112 memcpy(pu1_dst, (pu1_pred_val), 4);
2113 pu1_dst += dst_strd;
2114 memcpy(pu1_dst, (pu1_pred_val + 2), 4);
2115 pu1_dst += dst_strd;
2116 memcpy(pu1_dst, (pu1_pred_val + 4), 4);
2117 pu1_dst += dst_strd;
2118 memcpy(pu1_dst, (pu1_pred_val + 6), 4);
2119 pu1_dst += dst_strd;
2120 }
2121
2122 return;
2123 }
2124
2125 /**
2126 ******************************************************************************
2127 *
2128 * @brief:
2129 * Evaluate best intr chroma mode (among VERT, HORZ and DC ) and do the prediction.
2130 *
2131 * @par Description
2132 * This function evaluates first three intra chroma modes and compute corresponding sad
2133 * and return the buffer predicted with best mode.
2134 *
2135 * @param[in] pu1_src
2136 * UWORD8 pointer to the source
2137 *
2138 * @param[in] pu1_ngbr_pels
2139 * UWORD8 pointer to neighbouring pels
2140 *
2141 * @param[out] pu1_dst
2142 * UWORD8 pointer to the destination
2143 *
2144 * @param[in] src_strd
2145 * integer source stride
2146 *
2147 * @param[in] dst_strd
2148 * integer destination stride
2149 *
2150 * @param[in] u4_n_avblty
2151 * availability of neighbouring pixels
2152 *
2153 * @param[in] u4_intra_mode
2154 * Pointer to the variable in which best mode is returned
2155 *
2156 * @param[in] pu4_sadmin
2157 * Pointer to the variable in which minimum sad is returned
2158 *
2159 * @param[in] u4_valid_intra_modes
2160 * Says what all modes are valid
2161 *
2162 * @return none
2163 *
2164 ******************************************************************************
2165 */
ih264e_evaluate_intra_chroma_modes(UWORD8 * pu1_src,UWORD8 * pu1_ngbr_pels,UWORD8 * pu1_dst,UWORD32 src_strd,UWORD32 dst_strd,WORD32 u4_n_avblty,UWORD32 * u4_intra_mode,WORD32 * pu4_sadmin,UWORD32 u4_valid_intra_modes)2166 void ih264e_evaluate_intra_chroma_modes(UWORD8 *pu1_src,
2167 UWORD8 *pu1_ngbr_pels,
2168 UWORD8 *pu1_dst,
2169 UWORD32 src_strd,
2170 UWORD32 dst_strd,
2171 WORD32 u4_n_avblty,
2172 UWORD32 *u4_intra_mode,
2173 WORD32 *pu4_sadmin,
2174 UWORD32 u4_valid_intra_modes)
2175 {
2176 UWORD8 *pu1_neighbour;
2177 UWORD8 *pu1_src_temp = pu1_src;
2178 UWORD8 left = 0, top = 0;
2179 WORD32 u4_dcval_u_l[2] = { 0, 0 }, /*sum left neighbours for 'U' ,two separate sets - sum of first four from top,and sum of four values from bottom */
2180 u4_dcval_u_t[2] = { 0, 0 }; /*sum top neighbours for 'U'*/
2181
2182 WORD32 u4_dcval_v_l[2] = { 0, 0 }, /*sum left neighbours for 'V'*/
2183 u4_dcval_v_t[2] = { 0, 0 }; /*sum top neighbours for 'V'*/
2184
2185 WORD32 i, j, row, col, i4_sad_vert = INT_MAX, i4_sad_horz = INT_MAX,
2186 i4_sad_dc = INT_MAX, i4_min_sad = INT_MAX;
2187 UWORD8 val_u, val_v;
2188
2189 WORD32 u4_dc_val[2][2][2];/* -----------
2190 | | | Chroma can have four
2191 | 00 | 01 | separate dc value...
2192 ----------- u4_dc_val corresponds to this dc values
2193 | | | with u4_dc_val[2][2][U] and u4_dc_val[2][2][V]
2194 | 10 | 11 |
2195 ----------- */
2196 left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK);
2197 top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2;
2198
2199 /*Evaluating HORZ*/
2200 if (left)/* Ifleft available*/
2201 {
2202 i4_sad_horz = 0;
2203
2204 for (i = 0; i < 8; i++)
2205 {
2206 val_v = pu1_ngbr_pels[15 - 2 * i];
2207 val_u = pu1_ngbr_pels[15 - 2 * i - 1];
2208 row = i / 4;
2209 u4_dcval_u_l[row] += val_u;
2210 u4_dcval_v_l[row] += val_v;
2211 for (j = 0; j < 8; j++)
2212 {
2213 i4_sad_horz += ABS(val_u - pu1_src_temp[2 * j]);/* Finding SAD for HORZ mode*/
2214 i4_sad_horz += ABS(val_v - pu1_src_temp[2 * j + 1]);
2215 }
2216
2217 pu1_src_temp += src_strd;
2218 }
2219 u4_dcval_u_l[0] += 2;
2220 u4_dcval_u_l[1] += 2;
2221 u4_dcval_v_l[0] += 2;
2222 u4_dcval_v_l[1] += 2;
2223 }
2224
2225 /*Evaluating VERT**/
2226 pu1_src_temp = pu1_src;
2227 if (top) /* top available*/
2228 {
2229 i4_sad_vert = 0;
2230
2231 for (i = 0; i < 8; i++)
2232 {
2233 col = i / 4;
2234
2235 val_u = pu1_ngbr_pels[18 + i * 2];
2236 val_v = pu1_ngbr_pels[18 + i * 2 + 1];
2237 u4_dcval_u_t[col] += val_u;
2238 u4_dcval_v_t[col] += val_v;
2239
2240 for (j = 0; j < 16; j++)
2241 {
2242 i4_sad_vert += ABS(pu1_ngbr_pels[18 + j] - pu1_src_temp[j]);/* Finding SAD for VERT mode*/
2243 }
2244 pu1_src_temp += src_strd;
2245
2246 }
2247 u4_dcval_u_t[0] += 2;
2248 u4_dcval_u_t[1] += 2;
2249 u4_dcval_v_t[0] += 2;
2250 u4_dcval_v_t[1] += 2;
2251 }
2252
2253 /* computing DC value*/
2254 /* Equation 8-128 in spec*/
2255 u4_dc_val[0][0][0] = (u4_dcval_u_l[0] + u4_dcval_u_t[0]) >> (1 + left + top);
2256 u4_dc_val[0][0][1] = (u4_dcval_v_l[0] + u4_dcval_v_t[0]) >> (1 + left + top);
2257 u4_dc_val[1][1][0] = (u4_dcval_u_l[1] + u4_dcval_u_t[1]) >> (1 + left + top);
2258 u4_dc_val[1][1][1] = (u4_dcval_v_l[1] + u4_dcval_v_t[1]) >> (1 + left + top);
2259
2260 if (top)
2261 {
2262 /* Equation 8-132 in spec*/
2263 u4_dc_val[0][1][0] = (u4_dcval_u_t[1]) >> (1 + top);
2264 u4_dc_val[0][1][1] = (u4_dcval_v_t[1]) >> (1 + top);
2265 }
2266 else
2267 {
2268 u4_dc_val[0][1][0] = (u4_dcval_u_l[0]) >> (1 + left);
2269 u4_dc_val[0][1][1] = (u4_dcval_v_l[0]) >> (1 + left);
2270 }
2271
2272 if (left)
2273 {
2274 u4_dc_val[1][0][0] = (u4_dcval_u_l[1]) >> (1 + left);
2275 u4_dc_val[1][0][1] = (u4_dcval_v_l[1]) >> (1 + left);
2276 }
2277 else
2278 {
2279 u4_dc_val[1][0][0] = (u4_dcval_u_t[0]) >> (1 + top);
2280 u4_dc_val[1][0][1] = (u4_dcval_v_t[0]) >> (1 + top);
2281 }
2282
2283 if (!(left || top))
2284 {
2285 /*none available*/
2286 u4_dc_val[0][0][0] = u4_dc_val[0][0][1] =
2287 u4_dc_val[0][1][0] = u4_dc_val[0][1][1] =
2288 u4_dc_val[1][0][0] = u4_dc_val[1][0][1] =
2289 u4_dc_val[1][1][0] = u4_dc_val[1][1][1] = 128;
2290 }
2291
2292 /* Evaluating DC */
2293 pu1_src_temp = pu1_src;
2294 i4_sad_dc = 0;
2295 for (i = 0; i < 8; i++)
2296 {
2297 for (j = 0; j < 8; j++)
2298 {
2299 col = j / 4;
2300 row = i / 4;
2301 val_u = u4_dc_val[row][col][0];
2302 val_v = u4_dc_val[row][col][1];
2303
2304 i4_sad_dc += ABS(val_u - pu1_src_temp[2 * j]);/* Finding SAD for DC mode*/
2305 i4_sad_dc += ABS(val_v - pu1_src_temp[2 * j + 1]);
2306 }
2307 pu1_src_temp += src_strd;
2308 }
2309
2310 if ((u4_valid_intra_modes & 01) == 0)/* If DC is disabled*/
2311 i4_sad_dc = INT_MAX;
2312 if ((u4_valid_intra_modes & 02) == 0)/* If HORZ is disabled*/
2313 i4_sad_horz = INT_MAX;
2314 if ((u4_valid_intra_modes & 04) == 0)/* If VERT is disabled*/
2315 i4_sad_vert = INT_MAX;
2316
2317 i4_min_sad = MIN3(i4_sad_horz, i4_sad_dc, i4_sad_vert);
2318
2319 /* Finding Minimum sad and doing corresponding prediction*/
2320 if (i4_min_sad < *pu4_sadmin)
2321 {
2322 *pu4_sadmin = i4_min_sad;
2323
2324 if (i4_min_sad == i4_sad_dc)
2325 {
2326 *u4_intra_mode = DC_CH_I8x8;
2327 for (i = 0; i < 8; i++)
2328 {
2329 for (j = 0; j < 8; j++)
2330 {
2331 col = j / 4;
2332 row = i / 4;
2333
2334 pu1_dst[2 * j] = u4_dc_val[row][col][0];
2335 pu1_dst[2 * j + 1] = u4_dc_val[row][col][1];
2336 }
2337 pu1_dst += dst_strd;
2338 }
2339 }
2340 else if (i4_min_sad == i4_sad_horz)
2341 {
2342 *u4_intra_mode = HORZ_CH_I8x8;
2343 for (j = 0; j < 8; j++)
2344 {
2345 val_v = pu1_ngbr_pels[15 - 2 * j];
2346 val_u = pu1_ngbr_pels[15 - 2 * j - 1];
2347
2348 for (i = 0; i < 8; i++)
2349 {
2350 pu1_dst[2 * i] = val_u;
2351 pu1_dst[2 * i + 1] = val_v;
2352
2353 }
2354 pu1_dst += dst_strd;
2355 }
2356 }
2357 else
2358 {
2359 *u4_intra_mode = VERT_CH_I8x8;
2360 pu1_neighbour = pu1_ngbr_pels + 18;
2361 for (j = 0; j < 8; j++)
2362 {
2363 memcpy(pu1_dst, pu1_neighbour, MB_SIZE);
2364 pu1_dst += dst_strd;
2365 }
2366 }
2367 }
2368
2369 return;
2370 }
2371