1 /******************************************************************************
2 *
3 * Copyright (C) 2015 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20
21 /**
22 *******************************************************************************
23 * @file
24 * ih264e_intra_modes_eval.c
25 *
26 * @brief
27 * This file contains definitions of routines that perform rate distortion
28 * analysis on a macroblock if they are to be coded as intra.
29 *
30 * @author
31 * ittiam
32 *
33 * @par List of Functions:
34 * - ih264e_derive_neighbor_availability_of_mbs()
35 * - ih264e_derive_ngbr_avbl_of_mb_partitions()
36 * - ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff()
37 * - ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff()
38 * - ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff()
39 * - ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton()
40 * - ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff()
41 * - ih264e_evaluate_intra16x16_modes()
42 * - ih264e_evaluate_intra4x4_modes()
43 * - ih264e_evaluate_intra_chroma_modes()
44 *
45 * @remarks
46 * None
47 *
48 *******************************************************************************
49 */
50
51 /*****************************************************************************/
52 /* File Includes */
53 /*****************************************************************************/
54
55 /* System include files */
56 #include <stdio.h>
57 #include <string.h>
58 #include <limits.h>
59 #include <assert.h>
60
61 /* User include files */
62 #include "ih264e_config.h"
63 #include "ih264_typedefs.h"
64 #include "ih264e_defs.h"
65 #include "iv2.h"
66 #include "ive2.h"
67 #include "ih264_debug.h"
68 #include "ih264_defs.h"
69 #include "ih264_macros.h"
70 #include "ih264_intra_pred_filters.h"
71 #include "ih264_structs.h"
72 #include "ih264_common_tables.h"
73 #include "ih264_trans_quant_itrans_iquant.h"
74 #include "ih264_inter_pred_filters.h"
75 #include "ih264_mem_fns.h"
76 #include "ih264_padding.h"
77 #include "ih264_deblk_edge_filters.h"
78 #include "ih264_cabac_tables.h"
79 #include "ime_distortion_metrics.h"
80 #include "ih264e_error.h"
81 #include "ih264e_bitstream.h"
82 #include "ime_defs.h"
83 #include "ime_structs.h"
84 #include "irc_cntrl_param.h"
85 #include "irc_frame_info_collector.h"
86 #include "ih264e_rate_control.h"
87 #include "ih264e_cabac_structs.h"
88 #include "ih264e_structs.h"
89 #include "ih264e_intra_modes_eval.h"
90 #include "ih264e_globals.h"
91 #include "ime_platform_macros.h"
92
93
94 /*****************************************************************************/
95 /* Function Definitions */
96 /*****************************************************************************/
97
98 /**
99 ******************************************************************************
100 *
101 * @brief
102 * derivation process for macroblock availability
103 *
104 * @par Description
105 * Calculates the availability of the left, top, topright and topleft macroblocks.
106 *
107 * @param[in] ps_proc_ctxt
108 * pointer to proc context (handle)
109 *
110 * @remarks Based on section 6.4.5 in H264 spec
111 *
112 * @return none
113 *
114 ******************************************************************************
115 */
ih264e_derive_nghbr_avbl_of_mbs(process_ctxt_t * ps_proc)116 void ih264e_derive_nghbr_avbl_of_mbs(process_ctxt_t *ps_proc)
117 {
118 UWORD8 *pu1_slice_idx_curr = ps_proc->pu1_slice_idx;
119 UWORD8 *pu1_slice_idx_b;
120 UWORD8 *pu1_slice_idx_a;
121 UWORD8 *pu1_slice_idx_c;
122 UWORD8 *pu1_slice_idx_d;
123 block_neighbors_t *ps_ngbr_avbl;
124 WORD32 i4_mb_x, i4_mb_y;
125 WORD32 i4_wd_mbs;
126
127 i4_mb_x = ps_proc->i4_mb_x;
128 i4_mb_y = ps_proc->i4_mb_y;
129
130 i4_wd_mbs = ps_proc->i4_wd_mbs;
131
132 pu1_slice_idx_curr += (i4_mb_y * i4_wd_mbs) + i4_mb_x;
133 pu1_slice_idx_a = pu1_slice_idx_curr - 1;
134 pu1_slice_idx_b = pu1_slice_idx_curr - i4_wd_mbs;
135 pu1_slice_idx_c = pu1_slice_idx_b + 1;
136 pu1_slice_idx_d = pu1_slice_idx_b - 1;
137 ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
138
139 /**********************************************************************/
140 /* The macroblock is marked as available, unless one of the following */
141 /* conditions is true in which case the macroblock shall be marked as */
142 /* not available. */
143 /* 1. mbAddr < 0 */
144 /* 2 mbAddr > CurrMbAddr */
145 /* 3. the macroblock with address mbAddr belongs to a different slice */
146 /* than the macroblock with address CurrMbAddr */
147 /**********************************************************************/
148
149 /* left macroblock availability */
150 if (i4_mb_x == 0)
151 { /* macroblocks along first column */
152 ps_ngbr_avbl->u1_mb_a = 0;
153 }
154 else
155 { /* macroblocks belong to same slice? */
156 if (*pu1_slice_idx_a != *pu1_slice_idx_curr)
157 ps_ngbr_avbl->u1_mb_a = 0;
158 else
159 ps_ngbr_avbl->u1_mb_a = 1;
160 }
161
162 /* top macroblock availability */
163 if (i4_mb_y == 0)
164 { /* macroblocks along first row */
165 ps_ngbr_avbl->u1_mb_b = 0;
166 }
167 else
168 { /* macroblocks belong to same slice? */
169 if (*pu1_slice_idx_b != *pu1_slice_idx_curr)
170 ps_ngbr_avbl->u1_mb_b = 0;
171 else
172 ps_ngbr_avbl->u1_mb_b = 1;
173 }
174
175 /* top right macroblock availability */
176 if (i4_mb_x == i4_wd_mbs-1 || i4_mb_y == 0)
177 { /* macroblocks along last column */
178 ps_ngbr_avbl->u1_mb_c = 0;
179 }
180 else
181 { /* macroblocks belong to same slice? */
182 if (*pu1_slice_idx_c != *pu1_slice_idx_curr)
183 ps_ngbr_avbl->u1_mb_c = 0;
184 else
185 ps_ngbr_avbl->u1_mb_c = 1;
186 }
187
188 /* top left macroblock availability */
189 if (i4_mb_x == 0 || i4_mb_y == 0)
190 { /* macroblocks along first column */
191 ps_ngbr_avbl->u1_mb_d = 0;
192 }
193 else
194 { /* macroblocks belong to same slice? */
195 if (*pu1_slice_idx_d != *pu1_slice_idx_curr)
196 ps_ngbr_avbl->u1_mb_d = 0;
197 else
198 ps_ngbr_avbl->u1_mb_d = 1;
199 }
200 }
201
202 /**
203 ******************************************************************************
204 *
205 * @brief
206 * derivation process for subblock/partition availability
207 *
208 * @par Description
209 * Calculates the availability of the left, top, topright and topleft subblock
210 * or partitions.
211 *
212 * @param[in] ps_proc_ctxt
213 * pointer to macroblock context (handle)
214 *
215 * @param[in] i1_pel_pos_x
216 * column position of the pel wrt the current block
217 *
218 * @param[in] i1_pel_pos_y
219 * row position of the pel in wrt current block
220 *
221 * @remarks Assumptions: before calling this function it is assumed that
222 * the neighbor availability of the current macroblock is already derived.
223 * Based on table 6-3 of H264 specification
224 *
225 * @return availability status (yes or no)
226 *
227 ******************************************************************************
228 */
ih264e_derive_ngbr_avbl_of_mb_partitions(block_neighbors_t * ps_ngbr_avbl,WORD8 i1_pel_pos_x,WORD8 i1_pel_pos_y)229 UWORD8 ih264e_derive_ngbr_avbl_of_mb_partitions(block_neighbors_t *ps_ngbr_avbl,
230 WORD8 i1_pel_pos_x,
231 WORD8 i1_pel_pos_y)
232 {
233 UWORD8 u1_neighbor_avail=0;
234
235 /**********************************************************************/
236 /* values of i1_pel_pos_x in the range 0-15 inclusive correspond to */
237 /* various columns of a macroblock */
238 /* */
239 /* values of i1_pel_pos_y in the range 0-15 inclusive correspond to */
240 /* various rows of a macroblock */
241 /* */
242 /* other values of i1_pel_pos_x & i1_pel_pos_y represents elements */
243 /* outside the bound of an mb ie., represents its neighbors. */
244 /**********************************************************************/
245 if (i1_pel_pos_x < 0)
246 { /* column(-1) */
247 if (i1_pel_pos_y < 0)
248 { /* row(-1) */
249 u1_neighbor_avail = ps_ngbr_avbl->u1_mb_d; /* current mb topleft availability */
250 }
251 else if (i1_pel_pos_y >= 0 && i1_pel_pos_y < 16)
252 { /* all rows of a macroblock */
253 u1_neighbor_avail = ps_ngbr_avbl->u1_mb_a; /* current mb left availability */
254 }
255 else /* if (i1_pel_pos_y >= 16) */
256 { /* rows(+16) */
257 u1_neighbor_avail = 0; /* current mb bottom left availability */
258 }
259 }
260 else if (i1_pel_pos_x >= 0 && i1_pel_pos_x < 16)
261 { /* all columns of a macroblock */
262 if (i1_pel_pos_y < 0)
263 { /* row(-1) */
264 u1_neighbor_avail = ps_ngbr_avbl->u1_mb_b; /* current mb top availability */
265 }
266 else if (i1_pel_pos_y >= 0 && i1_pel_pos_y < 16)
267 { /* all rows of a macroblock */
268 u1_neighbor_avail = 1; /* current mb availability */
269 /* availability of the partition is dependent on the position of the partition inside the mb */
270 /* although the availability is declared as 1 in all cases these needs to be corrected somewhere else and this is not done in here */
271 }
272 else /* if (i1_pel_pos_y >= 16) */
273 { /* rows(+16) */
274 u1_neighbor_avail = 0; /* current mb bottom availability */
275 }
276 }
277 else if (i1_pel_pos_x >= 16)
278 { /* column(+16) */
279 if (i1_pel_pos_y < 0)
280 { /* row(-1) */
281 u1_neighbor_avail = ps_ngbr_avbl->u1_mb_c; /* current mb top right availability */
282 }
283 else /* if (i1_pel_pos_y >= 0) */
284 { /* all other rows */
285 u1_neighbor_avail = 0; /* current mb right & bottom right availability */
286 }
287 }
288
289 return u1_neighbor_avail;
290 }
291
292 /**
293 ******************************************************************************
294 *
295 * @brief
296 * evaluate best intra 16x16 mode (rate distortion opt off)
297 *
298 * @par Description
299 * This function evaluates all the possible intra 16x16 modes and finds the mode
300 * that best represents the macro-block (least distortion) and occupies fewer
301 * bits in the bit-stream.
302 *
303 * @param[in] ps_proc_ctxt
304 * pointer to process context (handle)
305 *
306 * @remarks
307 * Ideally the cost of encoding a macroblock is calculated as
308 * (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
309 * input block and the reconstructed block and rate is the number of bits taken
310 * to place the macroblock in the bit-stream. In this routine the rate does not
311 * exactly point to the total number of bits it takes, rather it points to header
312 * bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
313 * and residual bits fall in to texture bits the number of bits taken to encoding
314 * mbtype is considered as rate, we compute cost. Further we will approximate
315 * the distortion as the deviation b/w input and the predicted block as opposed
316 * to input and reconstructed block.
317 *
318 * NOTE: As per the Document JVT-O079, for intra 16x16 macroblock,
319 * the SAD and cost are one and the same.
320 *
321 * @return none
322 *
323 ******************************************************************************
324 */
325
ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff(process_ctxt_t * ps_proc)326 void ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff(process_ctxt_t *ps_proc)
327 {
328 /* Codec Context */
329 codec_t *ps_codec = ps_proc->ps_codec;
330
331 /* SAD(distortion metric) of an 8x8 block */
332 WORD32 i4_mb_distortion = INT_MAX, i4_mb_distortion_least = INT_MAX;
333
334 /* lambda */
335 UWORD32 u4_lambda = ps_proc->u4_lambda;
336
337 /* cost = distortion + lambda*rate */
338 WORD32 i4_mb_cost= INT_MAX, i4_mb_cost_least = INT_MAX;
339
340 /* intra mode */
341 UWORD32 u4_intra_mode, u4_best_intra_16x16_mode = DC_I16x16;
342
343 /* neighbor pels for intra prediction */
344 UWORD8 *pu1_ngbr_pels_i16 = ps_proc->au1_ngbr_pels;
345
346 /* neighbor availability */
347 WORD32 i4_ngbr_avbl;
348
349 /* pointer to src macro block */
350 UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_luma;
351 UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_luma;
352
353 /* pointer to prediction macro block */
354 UWORD8 *pu1_pred_mb_intra_16x16 = ps_proc->pu1_pred_mb_intra_16x16;
355 UWORD8 *pu1_pred_mb_intra_16x16_plane = ps_proc->pu1_pred_mb_intra_16x16_plane;
356
357 /* strides */
358 WORD32 i4_src_strd = ps_proc->i4_src_strd;
359 WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
360 WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
361
362 /* pointer to neighbors left, top, topleft */
363 UWORD8 *pu1_mb_a = pu1_ref_mb - 1;
364 UWORD8 *pu1_mb_b = pu1_ref_mb - i4_rec_strd;
365 UWORD8 *pu1_mb_d = pu1_mb_b - 1;
366 UWORD8 u1_mb_a, u1_mb_b, u1_mb_d;
367 /* valid intra modes map */
368 UWORD32 u4_valid_intra_modes;
369
370 /* lut for valid intra modes */
371 const UWORD8 u1_valid_intra_modes[8] = {4, 6, 4, 6, 5, 7, 5, 15};
372
373 /* temp var */
374 UWORD32 i, u4_enable_fast_sad = 0, offset = 0;
375 mb_info_t *ps_top_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
376 UWORD32 u4_constrained_intra_pred = ps_proc->ps_codec->s_cfg.u4_constrained_intra_pred;
377
378 /* init temp var */
379 if (ps_proc->i4_slice_type != ISLICE)
380 {
381 /* Offset for MBtype */
382 offset = (ps_proc->i4_slice_type == PSLICE) ? 5 : 23;
383 u4_enable_fast_sad = ps_proc->s_me_ctxt.u4_enable_fast_sad;
384 }
385
386 /* locating neighbors that are available for prediction */
387
388 /* gather prediction pels from the neighbors, if particular set is not available
389 * it is set to zero*/
390 /* left pels */
391 u1_mb_a = ((ps_proc->ps_ngbr_avbl->u1_mb_a)
392 && (u4_constrained_intra_pred ? ps_proc->s_left_mb_syntax_ele.u2_is_intra : 1));
393 if (u1_mb_a)
394 {
395 for(i = 0; i < 16; i++)
396 pu1_ngbr_pels_i16[16-1-i] = pu1_mb_a[i * i4_rec_strd];
397 }
398 else
399 {
400 ps_codec->pf_mem_set_mul8(pu1_ngbr_pels_i16,0,MB_SIZE);
401 }
402 /* top pels */
403 u1_mb_b = ((ps_proc->ps_ngbr_avbl->u1_mb_b)
404 && (u4_constrained_intra_pred ? ps_top_mb_syn_ele->u2_is_intra : 1));
405 if (u1_mb_b)
406 {
407 ps_codec->pf_mem_cpy_mul8(pu1_ngbr_pels_i16+16+1,pu1_mb_b,16);
408 }
409 else
410 {
411 ps_codec->pf_mem_set_mul8(pu1_ngbr_pels_i16+16+1,0,MB_SIZE);
412 }
413 /* topleft pels */
414 u1_mb_d = ((ps_proc->ps_ngbr_avbl->u1_mb_d)
415 && (u4_constrained_intra_pred ? ps_proc->s_top_left_mb_syntax_ele.u2_is_intra : 1));
416 if (u1_mb_d)
417 {
418 pu1_ngbr_pels_i16[16] = *pu1_mb_d;
419 }
420 else
421 {
422 pu1_ngbr_pels_i16[16] = 0;
423 }
424
425 i4_ngbr_avbl = (u1_mb_a) + (u1_mb_b << 2) + (u1_mb_d << 1);
426 ps_proc->i4_ngbr_avbl_16x16_mb = i4_ngbr_avbl;
427
428 /* set valid intra modes for evaluation */
429 u4_valid_intra_modes = u1_valid_intra_modes[i4_ngbr_avbl];
430
431 if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_FAST ||
432 ps_codec->s_cfg.u4_enc_speed_preset == IVE_FASTEST)
433 u4_valid_intra_modes &= ~(1 << PLANE_I16x16);
434
435 /* evaluate b/w HORZ_I16x16, VERT_I16x16 & DC_I16x16 */
436 ps_codec->pf_ih264e_evaluate_intra16x16_modes(pu1_curr_mb, pu1_ngbr_pels_i16, pu1_pred_mb_intra_16x16,
437 i4_src_strd, i4_pred_strd,
438 i4_ngbr_avbl, &u4_intra_mode, &i4_mb_distortion_least,
439 u4_valid_intra_modes);
440
441 /* cost = distortion + lambda*rate */
442 i4_mb_cost_least = i4_mb_distortion_least;
443
444 if (((u4_valid_intra_modes >> 3) & 1) != 0)
445 {
446 /* intra prediction for PLANE mode*/
447 (ps_codec->apf_intra_pred_16_l)[PLANE_I16x16](pu1_ngbr_pels_i16, pu1_pred_mb_intra_16x16_plane, 0, i4_pred_strd, i4_ngbr_avbl);
448
449 /* evaluate distortion between the actual blk and the estimated blk for the given mode */
450 ps_codec->apf_compute_sad_16x16[u4_enable_fast_sad](pu1_curr_mb, pu1_pred_mb_intra_16x16_plane, i4_src_strd, i4_pred_strd, i4_mb_cost_least, &i4_mb_distortion);
451
452 /* cost = distortion + lambda*rate */
453 i4_mb_cost = i4_mb_distortion;
454
455 /* update the least cost information if necessary */
456 if(i4_mb_cost < i4_mb_distortion_least)
457 {
458 u4_intra_mode = PLANE_I16x16;
459
460 i4_mb_cost_least = i4_mb_cost;
461 i4_mb_distortion_least = i4_mb_distortion;
462 }
463 }
464
465 u4_best_intra_16x16_mode = u4_intra_mode;
466
467 DEBUG("%d partition cost, %d intra mode\n", i4_mb_cost_least * 32, u4_best_intra_16x16_mode);
468
469 ps_proc->u1_l_i16_mode = u4_best_intra_16x16_mode;
470
471 /* cost = distortion + lambda*rate */
472 i4_mb_cost_least = i4_mb_distortion_least + u4_lambda*u1_uev_codelength[offset + u4_best_intra_16x16_mode];
473
474
475 /* update the type of the mb if necessary */
476 if (i4_mb_cost_least < ps_proc->i4_mb_cost)
477 {
478 ps_proc->i4_mb_cost = i4_mb_cost_least;
479 ps_proc->i4_mb_distortion = i4_mb_distortion_least;
480 ps_proc->u4_mb_type = I16x16;
481 }
482
483 return ;
484 }
485
486
487 /**
488 ******************************************************************************
489 *
490 * @brief
491 * evaluate best intra 8x8 mode (rate distortion opt on)
492 *
493 * @par Description
494 * This function evaluates all the possible intra 8x8 modes and finds the mode
495 * that best represents the macro-block (least distortion) and occupies fewer
496 * bits in the bit-stream.
497 *
498 * @param[in] ps_proc_ctxt
499 * pointer to proc ctxt
500 *
501 * @remarks Ideally the cost of encoding a macroblock is calculated as
502 * (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
503 * input block and the reconstructed block and rate is the number of bits taken
504 * to place the macroblock in the bit-stream. In this routine the rate does not
505 * exactly point to the total number of bits it takes, rather it points to header
506 * bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
507 * and residual bits fall in to texture bits the number of bits taken to encoding
508 * mbtype is considered as rate, we compute cost. Further we will approximate
509 * the distortion as the deviation b/w input and the predicted block as opposed
510 * to input and reconstructed block.
511 *
512 * NOTE: TODO: This function needs to be tested
513 *
514 * @return none
515 *
516 ******************************************************************************
517 */
ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff(process_ctxt_t * ps_proc)518 void ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff(process_ctxt_t *ps_proc)
519 {
520 /* Codec Context */
521 codec_t *ps_codec = ps_proc->ps_codec;
522
523 /* SAD(distortion metric) of an 4x4 block */
524 WORD32 i4_partition_distortion, i4_partition_distortion_least = INT_MAX, i4_total_distortion = 0;
525
526 /* lambda */
527 UWORD32 u4_lambda = ps_proc->u4_lambda;
528
529 /* cost = distortion + lambda*rate */
530 WORD32 i4_partition_cost, i4_partition_cost_least, i4_total_cost = u4_lambda;
531
532 /* cost due to mbtype */
533 UWORD32 u4_cost_one_bit = u4_lambda, u4_cost_four_bits = 4 * u4_lambda;
534
535 /* intra mode */
536 UWORD32 u4_intra_mode, u4_best_intra_8x8_mode = DC_I8x8, u4_estimated_intra_8x8_mode;
537
538 /* neighbor pels for intra prediction */
539 UWORD8 *pu1_ngbr_pels_i8 = ps_proc->au1_ngbr_pels;
540
541 /* pointer to curr partition */
542 UWORD8 *pu1_mb_curr;
543
544 /* pointer to prediction macro block */
545 UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
546
547 /* strides */
548 WORD32 i4_src_strd = ps_proc->i4_src_strd;
549 WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
550
551 /* neighbors left, top, top right, top left */
552 UWORD8 *pu1_mb_a;
553 UWORD8 *pu1_mb_b;
554 UWORD8 *pu1_mb_d;
555
556 /* neighbor availability */
557 WORD32 i4_ngbr_avbl;
558 block_neighbors_t s_ngbr_avbl;
559
560 /* temp vars */
561 UWORD32 b8, u4_pix_x, u4_pix_y;
562 UWORD32 u4_constrained_intra_pred = ps_proc->ps_codec->s_cfg.u4_constrained_intra_pred;
563 block_neighbors_t s_ngbr_avbl_MB;
564
565 /* ngbr mb syntax information */
566 UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (ps_proc->i4_mb_x << 4);
567 mb_info_t *ps_top_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
568 mb_info_t *ps_top_right_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
569 /* valid intra modes map */
570 UWORD32 u4_valid_intra_modes;
571
572 if (ps_proc->ps_ngbr_avbl->u1_mb_c)
573 {
574 ps_top_right_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + (ps_proc->i4_mb_x + 1);
575 }
576 /* left pels */
577 s_ngbr_avbl_MB.u1_mb_a = ((ps_proc->ps_ngbr_avbl->u1_mb_a)
578 && (u4_constrained_intra_pred ? ps_proc->s_left_mb_syntax_ele.u2_is_intra : 1));
579
580 /* top pels */
581 s_ngbr_avbl_MB.u1_mb_b = ((ps_proc->ps_ngbr_avbl->u1_mb_b)
582 && (u4_constrained_intra_pred ? ps_top_mb_syn_ele->u2_is_intra : 1));
583
584 /* topleft pels */
585 s_ngbr_avbl_MB.u1_mb_d = ((ps_proc->ps_ngbr_avbl->u1_mb_d)
586 && (u4_constrained_intra_pred ? ps_proc->s_top_left_mb_syntax_ele.u2_is_intra : 1));
587
588 /* top right */
589 s_ngbr_avbl_MB.u1_mb_c = ((ps_proc->ps_ngbr_avbl->u1_mb_c)
590 && (u4_constrained_intra_pred ? ps_top_right_mb_syn_ele->u2_is_intra : 1));
591
592
593 for(b8 = 0; b8 < 4; b8++)
594 {
595 u4_pix_x = (b8 & 0x01) << 3;
596 u4_pix_y = (b8 >> 1) << 3;
597
598 pu1_mb_curr = ps_proc->pu1_src_buf_luma + u4_pix_x + (u4_pix_y * i4_src_strd);
599 /* when rdopt is off, we use the input as reference for constructing prediction buffer */
600 /* as opposed to using the recon pels. (open loop intra prediction) */
601 pu1_mb_a = pu1_mb_curr - 1; /* pointer to left macro block */
602 pu1_mb_b = pu1_mb_curr - i4_src_strd; /* pointer to top macro block */
603 pu1_mb_d = pu1_mb_b - 1; /* pointer to top left macro block */
604
605 /* locating neighbors that are available for prediction */
606 /* TODO : update the neighbor availability information basing on constrained intra pred information */
607 /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be split in to distinct routines */
608 /* basing on neighbors available and hence evade the computation of neighbor availability totally. */
609 s_ngbr_avbl.u1_mb_a = ih264e_derive_ngbr_avbl_of_mb_partitions(&s_ngbr_avbl_MB, u4_pix_x - 1, u4_pix_y); /* xD = -1, yD = 0 */
610 s_ngbr_avbl.u1_mb_b = ih264e_derive_ngbr_avbl_of_mb_partitions(&s_ngbr_avbl_MB, u4_pix_x, u4_pix_y - 1); /* xD = 0, yD = -1 */
611 s_ngbr_avbl.u1_mb_c = ih264e_derive_ngbr_avbl_of_mb_partitions(&s_ngbr_avbl_MB, u4_pix_x + 8, u4_pix_y - 1); /* xD = BLK_8x8_SIZE, yD = -1 */
612 s_ngbr_avbl.u1_mb_d = ih264e_derive_ngbr_avbl_of_mb_partitions(&s_ngbr_avbl_MB, u4_pix_x - 1, u4_pix_y - 1); /* xD = -1, yD = -1 */
613
614 /* i4_ngbr_avbl = blk_a * LEFT_MB_AVAILABLE_MASK + blk_b * TOP_MB_AVAILABLE_MASK + blk_c * TOP_RIGHT_MB_AVAILABLE_MASK + blk_d * TOP_LEFT_MB_AVAILABLE_MASK */
615 i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + (s_ngbr_avbl.u1_mb_b << 2) + (s_ngbr_avbl.u1_mb_c << 3) +
616 (s_ngbr_avbl.u1_mb_a << 4);
617 /* if top partition is available and top right is not available for intra prediction, then */
618 /* padd top right samples using top sample and make top right also available */
619 /* i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + (s_ngbr_avbl.u1_mb_b << 2) + ((s_ngbr_avbl.u1_mb_b | s_ngbr_avbl.u1_mb_c) << 3); */
620 ps_proc->ai4_neighbor_avail_8x8_subblks[b8] = i4_ngbr_avbl;
621
622
623 ih264_intra_pred_luma_8x8_mode_ref_filtering(pu1_mb_a, pu1_mb_b, pu1_mb_d, pu1_ngbr_pels_i8,
624 i4_src_strd, i4_ngbr_avbl);
625
626 i4_partition_cost_least = INT_MAX;
627 /* set valid intra modes for evaluation */
628 u4_valid_intra_modes = 0x1ff;
629
630 if (!s_ngbr_avbl.u1_mb_b)
631 {
632 u4_valid_intra_modes &= ~(1 << VERT_I4x4);
633 u4_valid_intra_modes &= ~(1 << DIAG_DL_I4x4);
634 u4_valid_intra_modes &= ~(1 << VERT_L_I4x4);
635 }
636 if (!s_ngbr_avbl.u1_mb_a)
637 {
638 u4_valid_intra_modes &= ~(1 << HORZ_I4x4);
639 u4_valid_intra_modes &= ~(1 << HORZ_U_I4x4);
640 }
641 if (!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b || !s_ngbr_avbl.u1_mb_d)
642 {
643 u4_valid_intra_modes &= ~(1 << DIAG_DR_I4x4);
644 u4_valid_intra_modes &= ~(1 << VERT_R_I4x4);
645 u4_valid_intra_modes &= ~(1 << HORZ_D_I4x4);
646 }
647
648 /* estimate the intra 8x8 mode for the current partition (for evaluating cost) */
649 if (!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b)
650 {
651 u4_estimated_intra_8x8_mode = DC_I8x8;
652 }
653 else
654 {
655 UWORD32 u4_left_intra_8x8_mode = DC_I8x8;
656 UWORD32 u4_top_intra_8x8_mode = DC_I8x8;
657
658 if (u4_pix_x == 0)
659 {
660 if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I8x8)
661 {
662 u4_left_intra_8x8_mode = ps_proc->au1_left_mb_intra_modes[b8+1];
663 }
664 else if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I4x4)
665 {
666 u4_left_intra_8x8_mode = ps_proc->au1_left_mb_intra_modes[(b8+1)*4+2];
667 }
668 }
669 else
670 {
671 u4_left_intra_8x8_mode = ps_proc->au1_intra_luma_mb_8x8_modes[b8-1];
672 }
673
674 if (u4_pix_y == 0)
675 {
676 if (ps_top_mb_syn_ele->u2_mb_type == I8x8)
677 {
678 u4_top_intra_8x8_mode = pu1_top_mb_intra_modes[b8+2];
679 }
680 else if (ps_top_mb_syn_ele->u2_mb_type == I4x4)
681 {
682 u4_top_intra_8x8_mode = pu1_top_mb_intra_modes[(b8+2)*4+2];
683 }
684 }
685 else
686 {
687 u4_top_intra_8x8_mode = ps_proc->au1_intra_luma_mb_8x8_modes[b8-2];
688 }
689
690 u4_estimated_intra_8x8_mode = MIN(u4_left_intra_8x8_mode, u4_top_intra_8x8_mode);
691 }
692
693 /* perform intra mode 8x8 evaluation */
694 for (u4_intra_mode = VERT_I8x8; u4_valid_intra_modes != 0; u4_intra_mode++, u4_valid_intra_modes >>= 1)
695 {
696 if ( (u4_valid_intra_modes & 1) == 0)
697 continue;
698
699 /* intra prediction */
700 (ps_codec->apf_intra_pred_8_l)[u4_intra_mode](pu1_ngbr_pels_i8, pu1_pred_mb, 0, i4_pred_strd, i4_ngbr_avbl);
701
702 /* evaluate distortion between the actual blk and the estimated blk for the given mode */
703 ime_compute_sad_8x8(pu1_mb_curr, pu1_pred_mb, i4_src_strd, i4_pred_strd, i4_partition_cost_least, &i4_partition_distortion);
704
705 i4_partition_cost = i4_partition_distortion + ((u4_estimated_intra_8x8_mode == u4_intra_mode)?u4_cost_one_bit:u4_cost_four_bits);
706
707 /* update the least cost information if necessary */
708 if (i4_partition_cost < i4_partition_cost_least)
709 {
710 i4_partition_cost_least = i4_partition_cost;
711 i4_partition_distortion_least = i4_partition_distortion;
712 u4_best_intra_8x8_mode = u4_intra_mode;
713 }
714 }
715 /* macroblock distortion */
716 i4_total_cost += i4_partition_cost_least;
717 i4_total_distortion += i4_partition_distortion_least;
718 /* mb partition mode */
719 ps_proc->au1_intra_luma_mb_8x8_modes[b8] = u4_best_intra_8x8_mode;
720
721 }
722
723 /* update the type of the mb if necessary */
724 if (i4_total_cost < ps_proc->i4_mb_cost)
725 {
726 ps_proc->i4_mb_cost = i4_total_cost;
727 ps_proc->i4_mb_distortion = i4_total_distortion;
728 ps_proc->u4_mb_type = I8x8;
729 }
730
731 return ;
732 }
733
734
735 /**
736 ******************************************************************************
737 *
738 * @brief
739 * evaluate best intra 4x4 mode (rate distortion opt off)
740 *
741 * @par Description
742 * This function evaluates all the possible intra 4x4 modes and finds the mode
743 * that best represents the macro-block (least distortion) and occupies fewer
744 * bits in the bit-stream.
745 *
746 * @param[in] ps_proc_ctxt
747 * pointer to proc ctxt
748 *
749 * @remarks
750 * Ideally the cost of encoding a macroblock is calculated as
751 * (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
752 * input block and the reconstructed block and rate is the number of bits taken
753 * to place the macroblock in the bit-stream. In this routine the rate does not
754 * exactly point to the total number of bits it takes, rather it points to header
755 * bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
756 * and residual bits fall in to texture bits the number of bits taken to encoding
757 * mbtype is considered as rate, we compute cost. Further we will approximate
758 * the distortion as the deviation b/w input and the predicted block as opposed
759 * to input and reconstructed block.
760 *
761 * NOTE: As per the Document JVT-O079, for the whole intra 4x4 macroblock,
762 * 24*lambda is added to the SAD before comparison with the best SAD for
763 * inter prediction. This is an empirical value to prevent using too many intra
764 * blocks.
765 *
766 * @return none
767 *
768 ******************************************************************************
769 */
ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff(process_ctxt_t * ps_proc)770 void ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff(process_ctxt_t *ps_proc)
771 {
772 /* Codec Context */
773 codec_t *ps_codec = ps_proc->ps_codec;
774
775 /* SAD(distortion metric) of an 4x4 block */
776 WORD32 i4_partition_distortion_least = INT_MAX, i4_total_distortion = 0;
777
778 /* lambda */
779 UWORD32 u4_lambda = ps_proc->u4_lambda;
780
781 /* cost = distortion + lambda*rate */
782 WORD32 i4_partition_cost_least, i4_total_cost = (24 + 1) * u4_lambda;
783
784 /* cost due to mbtype */
785 UWORD32 u4_cost_one_bit = u4_lambda, u4_cost_four_bits = 4 * u4_lambda;
786
787 /* intra mode */
788 UWORD32 u4_best_intra_4x4_mode = DC_I4x4, u4_estimated_intra_4x4_mode;
789
790 /* neighbor pels for intra prediction */
791 UWORD8 *pu1_ngbr_pels_i4 = ps_proc->au1_ngbr_pels;
792
793 /* pointer to curr partition */
794 UWORD8 *pu1_mb_curr;
795
796 /* pointer to prediction macro block */
797 UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
798
799 /* strides */
800 WORD32 i4_src_strd = ps_proc->i4_src_strd;
801 WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
802
803 /* neighbors left, top, top right, top left */
804 UWORD8 *pu1_mb_a;
805 UWORD8 *pu1_mb_b;
806 UWORD8 *pu1_mb_c;
807 UWORD8 *pu1_mb_d;
808
809 /* neighbor availability */
810 WORD32 i4_ngbr_avbl;
811 block_neighbors_t s_ngbr_avbl;
812
813 /* temp vars */
814 UWORD32 i, b8, b4, u4_blk_x, u4_blk_y, u4_pix_x, u4_pix_y;
815
816 /* scan order inside 4x4 block */
817 const UWORD8 u1_scan_order[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15};
818
819 /* ngbr sub mb modes */
820 UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (ps_proc->i4_mb_x << 4);
821 mb_info_t *ps_top_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
822 mb_info_t *ps_top_right_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
823
824 /* valid intra modes map */
825 UWORD32 u4_valid_intra_modes;
826 UWORD16 u2_valid_modes[8] = {4, 262, 4, 262, 141, 399, 141, 511};
827
828 UWORD32 u4_constrained_intra_pred = ps_proc->ps_codec->s_cfg.u4_constrained_intra_pred;
829 UWORD8 u1_mb_a, u1_mb_b, u1_mb_c, u1_mb_d;
830 if (ps_proc->ps_ngbr_avbl->u1_mb_c)
831 {
832 ps_top_right_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x + 1;
833 }
834 /* left pels */
835 u1_mb_a = ((ps_proc->ps_ngbr_avbl->u1_mb_a)
836 && (u4_constrained_intra_pred ? ps_proc->s_left_mb_syntax_ele.u2_is_intra : 1));
837
838 /* top pels */
839 u1_mb_b = ((ps_proc->ps_ngbr_avbl->u1_mb_b)
840 && (u4_constrained_intra_pred ? ps_top_mb_syn_ele->u2_is_intra : 1));
841
842 /* topleft pels */
843 u1_mb_d = ((ps_proc->ps_ngbr_avbl->u1_mb_d)
844 && (u4_constrained_intra_pred ? ps_proc->s_top_left_mb_syntax_ele.u2_is_intra : 1));
845
846 /* top right */
847 u1_mb_c = ((ps_proc->ps_ngbr_avbl->u1_mb_c)
848 && (u4_constrained_intra_pred ? ps_top_right_mb_syn_ele->u2_is_intra : 1));
849
850 i4_ngbr_avbl = (u1_mb_a) + (u1_mb_d << 1) + (u1_mb_b << 2) + (u1_mb_c << 3);
851 memcpy(ps_proc->au1_ngbr_avbl_4x4_subblks, gau1_ih264_4x4_ngbr_avbl[i4_ngbr_avbl], 16);
852
853 for (b8 = 0; b8 < 4; b8++)
854 {
855 u4_blk_x = (b8 & 0x01) << 3;
856 u4_blk_y = (b8 >> 1) << 3;
857 for (b4 = 0; b4 < 4; b4++)
858 {
859 u4_pix_x = u4_blk_x + ((b4 & 0x01) << 2);
860 u4_pix_y = u4_blk_y + ((b4 >> 1) << 2);
861
862 pu1_mb_curr = ps_proc->pu1_src_buf_luma + u4_pix_x + (u4_pix_y * i4_src_strd);
863 /* when rdopt is off, we use the input as reference for constructing prediction buffer */
864 /* as opposed to using the recon pels. (open loop intra prediction) */
865 pu1_mb_a = pu1_mb_curr - 1; /* pointer to left macro block */
866 pu1_mb_b = pu1_mb_curr - i4_src_strd; /* pointer to top macro block */
867 pu1_mb_c = pu1_mb_b + 4; /* pointer to top macro block */
868 pu1_mb_d = pu1_mb_b - 1; /* pointer to top left macro block */
869
870 /* locating neighbors that are available for prediction */
871 /* TODO : update the neighbor availability information basing on constrained intra pred information */
872 /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be split in to distinct routines */
873 /* basing on neighbors available and hence evade the computation of neighbor availability totally. */
874
875 i4_ngbr_avbl = ps_proc->au1_ngbr_avbl_4x4_subblks[(b8 << 2) + b4];
876 s_ngbr_avbl.u1_mb_a = (i4_ngbr_avbl & 0x1);
877 s_ngbr_avbl.u1_mb_d = (i4_ngbr_avbl & 0x2) >> 1;
878 s_ngbr_avbl.u1_mb_b = (i4_ngbr_avbl & 0x4) >> 2;
879 s_ngbr_avbl.u1_mb_c = (i4_ngbr_avbl & 0x8) >> 3;
880 /* set valid intra modes for evaluation */
881 u4_valid_intra_modes = u2_valid_modes[i4_ngbr_avbl & 0x7];
882
883 /* if top partition is available and top right is not available for intra prediction, then */
884 /* padd top right samples using top sample and make top right also available */
885 /* i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + (s_ngbr_avbl.u1_mb_b << 2) + ((s_ngbr_avbl.u1_mb_b | s_ngbr_avbl.u1_mb_c) << 3); */
886
887 /* gather prediction pels from the neighbors */
888 if (s_ngbr_avbl.u1_mb_a)
889 {
890 for(i = 0; i < 4; i++)
891 pu1_ngbr_pels_i4[4 - 1 -i] = pu1_mb_a[i * i4_src_strd];
892 }
893 else
894 {
895 memset(pu1_ngbr_pels_i4, 0, 4);
896 }
897
898 if (s_ngbr_avbl.u1_mb_b)
899 {
900 memcpy(pu1_ngbr_pels_i4 + 4 + 1, pu1_mb_b, 4);
901 }
902 else
903 {
904 memset(pu1_ngbr_pels_i4 + 5, 0, 4);
905 }
906
907 if (s_ngbr_avbl.u1_mb_d)
908 pu1_ngbr_pels_i4[4] = *pu1_mb_d;
909 else
910 pu1_ngbr_pels_i4[4] = 0;
911
912 if (s_ngbr_avbl.u1_mb_c)
913 {
914 memcpy(pu1_ngbr_pels_i4 + 8 + 1, pu1_mb_c, 4);
915 }
916 else if (s_ngbr_avbl.u1_mb_b)
917 {
918 memset(pu1_ngbr_pels_i4 + 8 + 1, pu1_ngbr_pels_i4[8], 4);
919 s_ngbr_avbl.u1_mb_c = s_ngbr_avbl.u1_mb_b;
920 }
921
922 i4_partition_cost_least = INT_MAX;
923
924 /* predict the intra 4x4 mode for the current partition (for evaluating cost) */
925 if (!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b)
926 {
927 u4_estimated_intra_4x4_mode = DC_I4x4;
928 }
929 else
930 {
931 UWORD32 u4_left_intra_4x4_mode = DC_I4x4;
932 UWORD32 u4_top_intra_4x4_mode = DC_I4x4;
933
934 if (u4_pix_x == 0)
935 {
936 if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I4x4)
937 {
938 u4_left_intra_4x4_mode = ps_proc->au1_left_mb_intra_modes[u1_scan_order[3 + u4_pix_y]];
939 }
940 else if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I8x8)
941 {
942 u4_left_intra_4x4_mode = ps_proc->au1_left_mb_intra_modes[b8 + 1];
943 }
944 }
945 else
946 {
947 u4_left_intra_4x4_mode = ps_proc->au1_intra_luma_mb_4x4_modes[u1_scan_order[(u4_pix_x >> 2) + u4_pix_y - 1]];
948 }
949
950 if (u4_pix_y == 0)
951 {
952 if (ps_top_mb_syn_ele->u2_mb_type == I4x4)
953 {
954 u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[u1_scan_order[12 + (u4_pix_x >> 2)]];
955 }
956 else if (ps_top_mb_syn_ele->u2_mb_type == I8x8)
957 {
958 u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[b8 + 2];
959 }
960 }
961 else
962 {
963 u4_top_intra_4x4_mode = ps_proc->au1_intra_luma_mb_4x4_modes[u1_scan_order[(u4_pix_x >> 2) + u4_pix_y - 4]];
964 }
965
966 u4_estimated_intra_4x4_mode = MIN(u4_left_intra_4x4_mode, u4_top_intra_4x4_mode);
967 }
968
969 ps_proc->au1_predicted_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_estimated_intra_4x4_mode;
970
971 /* mode evaluation and prediction */
972 ps_codec->pf_ih264e_evaluate_intra_4x4_modes(pu1_mb_curr,
973 pu1_ngbr_pels_i4,
974 pu1_pred_mb, i4_src_strd,
975 i4_pred_strd, i4_ngbr_avbl,
976 &u4_best_intra_4x4_mode,
977 &i4_partition_cost_least,
978 u4_valid_intra_modes,
979 u4_lambda,
980 u4_estimated_intra_4x4_mode);
981
982
983 i4_partition_distortion_least = i4_partition_cost_least - ((u4_estimated_intra_4x4_mode == u4_best_intra_4x4_mode) ? u4_cost_one_bit : u4_cost_four_bits);
984
985 DEBUG("%d partition cost, %d intra mode\n", i4_partition_cost_least, u4_best_intra_4x4_mode);
986 /* macroblock distortion */
987 i4_total_distortion += i4_partition_distortion_least;
988 i4_total_cost += i4_partition_cost_least;
989 /* mb partition mode */
990 ps_proc->au1_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_best_intra_4x4_mode;
991 }
992 }
993
994 /* update the type of the mb if necessary */
995 if (i4_total_cost < ps_proc->i4_mb_cost)
996 {
997 ps_proc->i4_mb_cost = i4_total_cost;
998 ps_proc->i4_mb_distortion = i4_total_distortion;
999 ps_proc->u4_mb_type = I4x4;
1000 }
1001
1002 return ;
1003 }
1004
1005 /**
1006 ******************************************************************************
1007 *
1008 * @brief evaluate best intra 4x4 mode (rate distortion opt on)
1009 *
1010 * @par Description
1011 * This function evaluates all the possible intra 4x4 modes and finds the mode
1012 * that best represents the macro-block (least distortion) and occupies fewer
1013 * bits in the bit-stream.
1014 *
1015 * @param[in] ps_proc_ctxt
1016 * pointer to proc ctxt
1017 *
1018 * @remarks
1019 * Ideally the cost of encoding a macroblock is calculated as
1020 * (distortion + lambda*rate). Where distortion is SAD/SATD,... between the
1021 * input block and the reconstructed block and rate is the number of bits taken
1022 * to place the macroblock in the bit-stream. In this routine the rate does not
1023 * exactly point to the total number of bits it takes, rather it points to header
1024 * bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits
1025 * and residual bits fall in to texture bits the number of bits taken to encoding
1026 * mbtype is considered as rate, we compute cost. Further we will approximate
1027 * the distortion as the deviation b/w input and the predicted block as opposed
1028 * to input and reconstructed block.
1029 *
1030 * NOTE: As per the Document JVT-O079, for the whole intra 4x4 macroblock,
1031 * 24*lambda is added to the SAD before comparison with the best SAD for
1032 * inter prediction. This is an empirical value to prevent using too many intra
1033 * blocks.
1034 *
1035 * @return none
1036 *
1037 ******************************************************************************
1038 */
ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton(process_ctxt_t * ps_proc)1039 void ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton(process_ctxt_t *ps_proc)
1040 {
1041 /* Codec Context */
1042 codec_t *ps_codec = ps_proc->ps_codec;
1043
1044 /* SAD(distortion metric) of an 4x4 block */
1045 WORD32 i4_partition_distortion_least = INT_MAX, i4_total_distortion = 0;
1046
1047 /* lambda */
1048 UWORD32 u4_lambda = ps_proc->u4_lambda;
1049
1050 /* cost = distortion + lambda*rate */
1051 WORD32 i4_partition_cost_least, i4_total_cost = (24 + 1) * u4_lambda;
1052
1053 /* cost due to mbtype */
1054 UWORD32 u4_cost_one_bit = u4_lambda, u4_cost_four_bits = 4 * u4_lambda;
1055
1056 /* intra mode */
1057 UWORD32 u4_best_intra_4x4_mode = DC_I4x4, u4_estimated_intra_4x4_mode;
1058
1059 /* neighbor pels for intra prediction */
1060 UWORD8 *pu1_ngbr_pels_i4 = ps_proc->au1_ngbr_pels;
1061
1062 /* pointer to curr partition */
1063 UWORD8 *pu1_mb_curr;
1064 UWORD8 *pu1_mb_ref_left, *pu1_mb_ref_top;
1065 UWORD8 *pu1_ref_mb_intra_4x4;
1066
1067 /* pointer to residual macro block */
1068 WORD16 *pi2_res_mb = ps_proc->pi2_res_buf_intra_4x4;
1069
1070 /* pointer to prediction macro block */
1071 UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb;
1072
1073 /* strides */
1074 WORD32 i4_src_strd = ps_proc->i4_src_strd;
1075 WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
1076 WORD32 i4_ref_strd_left, i4_ref_strd_top;
1077
1078 /* neighbors left, top, top right, top left */
1079 UWORD8 *pu1_mb_a;
1080 UWORD8 *pu1_mb_b;
1081 UWORD8 *pu1_mb_c;
1082 UWORD8 *pu1_mb_d;
1083
1084 /* number of non zero coeffs*/
1085 UWORD8 *pu1_nnz = (UWORD8 *)ps_proc->au4_nnz_intra_4x4;
1086
1087 /* quantization parameters */
1088 quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
1089
1090 /* neighbor availability */
1091 WORD32 i4_ngbr_avbl;
1092 block_neighbors_t s_ngbr_avbl;
1093
1094 /* temp vars */
1095 UWORD32 i, b8, b4, u4_blk_x, u4_blk_y, u4_pix_x, u4_pix_y;
1096
1097 /* scan order inside 4x4 block */
1098 const UWORD8 u1_scan_order[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15};
1099
1100 /* ngbr sub mb modes */
1101 UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (ps_proc->i4_mb_x << 4);
1102 mb_info_t *ps_top_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
1103 mb_info_t *ps_top_right_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
1104
1105 /* valid intra modes map */
1106 UWORD32 u4_valid_intra_modes;
1107 UWORD16 u2_valid_modes[8] = {4, 262, 4, 262, 141, 399, 141, 511};
1108
1109 /* Dummy variable for 4x4 trans function */
1110 WORD16 i2_dc_dummy;
1111 UWORD8 u1_mb_a, u1_mb_b, u1_mb_c, u1_mb_d;
1112 UWORD32 u4_constrained_intra_pred = ps_proc->ps_codec->s_cfg.u4_constrained_intra_pred;
1113
1114 /* compute ngbr availability for sub blks */
1115 if (ps_proc->ps_ngbr_avbl->u1_mb_c)
1116 {
1117 ps_top_right_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + (ps_proc->i4_mb_x + 1);
1118 }
1119
1120 /* left pels */
1121 u1_mb_a = ((ps_proc->ps_ngbr_avbl->u1_mb_a)
1122 && (u4_constrained_intra_pred ? ps_proc->s_left_mb_syntax_ele.u2_is_intra : 1));
1123
1124 /* top pels */
1125 u1_mb_b = ((ps_proc->ps_ngbr_avbl->u1_mb_b)
1126 && (u4_constrained_intra_pred ? ps_top_mb_syn_ele->u2_is_intra : 1));
1127
1128 /* topleft pels */
1129 u1_mb_d = ((ps_proc->ps_ngbr_avbl->u1_mb_d)
1130 && (u4_constrained_intra_pred ? ps_proc->s_top_left_mb_syntax_ele.u2_is_intra : 1));
1131
1132 /* top right pels */
1133 u1_mb_c = ((ps_proc->ps_ngbr_avbl->u1_mb_c)
1134 && (u4_constrained_intra_pred ? ps_top_right_mb_syn_ele->u2_is_intra : 1));
1135
1136 i4_ngbr_avbl = (u1_mb_a) + (u1_mb_d << 1) + (u1_mb_b << 2) + (u1_mb_c << 3);
1137 memcpy(ps_proc->au1_ngbr_avbl_4x4_subblks, gau1_ih264_4x4_ngbr_avbl[i4_ngbr_avbl], 16);
1138
1139 for(b8 = 0; b8 < 4; b8++)
1140 {
1141 u4_blk_x = (b8 & 0x01) << 3;
1142 u4_blk_y = (b8 >> 1) << 3;
1143 for(b4 = 0; b4 < 4; b4++, pu1_nnz++, pi2_res_mb += MB_SIZE)
1144 {
1145 u4_pix_x = u4_blk_x + ((b4 & 0x01) << 2);
1146 u4_pix_y = u4_blk_y + ((b4 >> 1) << 2);
1147
1148 pu1_ref_mb_intra_4x4 = ps_proc->pu1_ref_mb_intra_4x4 + u4_pix_x + (u4_pix_y * i4_pred_strd);
1149 pu1_mb_curr = ps_proc->pu1_src_buf_luma + u4_pix_x + (u4_pix_y * i4_src_strd);
1150 if (u4_pix_x == 0)
1151 {
1152 i4_ref_strd_left = ps_proc->i4_rec_strd;
1153 pu1_mb_ref_left = ps_proc->pu1_rec_buf_luma + u4_pix_x + (u4_pix_y * i4_ref_strd_left);
1154 }
1155 else
1156 {
1157 i4_ref_strd_left = i4_pred_strd;
1158 pu1_mb_ref_left = pu1_ref_mb_intra_4x4;
1159 }
1160 if (u4_pix_y == 0)
1161 {
1162 i4_ref_strd_top = ps_proc->i4_rec_strd;
1163 pu1_mb_ref_top = ps_proc->pu1_rec_buf_luma + u4_pix_x + (u4_pix_y * i4_ref_strd_top);
1164 }
1165 else
1166 {
1167 i4_ref_strd_top = i4_pred_strd;
1168 pu1_mb_ref_top = pu1_ref_mb_intra_4x4;
1169 }
1170
1171 pu1_mb_a = pu1_mb_ref_left - 1; /* pointer to left macro block */
1172 pu1_mb_b = pu1_mb_ref_top - i4_ref_strd_top; /* pointer to top macro block */
1173 pu1_mb_c = pu1_mb_b + 4; /* pointer to top right macro block */
1174 if (u4_pix_y == 0)
1175 pu1_mb_d = pu1_mb_b - 1;
1176 else
1177 pu1_mb_d = pu1_mb_a - i4_ref_strd_left; /* pointer to top left macro block */
1178
1179 /* locating neighbors that are available for prediction */
1180 /* TODO : update the neighbor availability information basing on constrained intra pred information */
1181 /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be split in to distinct routines */
1182 /* basing on neighbors available and hence evade the computation of neighbor availability totally. */
1183
1184 i4_ngbr_avbl = ps_proc->au1_ngbr_avbl_4x4_subblks[(b8 << 2) + b4];
1185 s_ngbr_avbl.u1_mb_a = (i4_ngbr_avbl & 0x1);
1186 s_ngbr_avbl.u1_mb_d = (i4_ngbr_avbl & 0x2) >> 1;
1187 s_ngbr_avbl.u1_mb_b = (i4_ngbr_avbl & 0x4) >> 2;
1188 s_ngbr_avbl.u1_mb_c = (i4_ngbr_avbl & 0x8) >> 3;
1189 /* set valid intra modes for evaluation */
1190 u4_valid_intra_modes = u2_valid_modes[i4_ngbr_avbl & 0x7];
1191
1192 /* if top partition is available and top right is not available for intra prediction, then */
1193 /* padd top right samples using top sample and make top right also available */
1194 /* i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + (s_ngbr_avbl.u1_mb_b << 2) + ((s_ngbr_avbl.u1_mb_b | s_ngbr_avbl.u1_mb_c) << 3); */
1195
1196 /* gather prediction pels from the neighbors */
1197 if (s_ngbr_avbl.u1_mb_a)
1198 {
1199 for(i = 0; i < 4; i++)
1200 pu1_ngbr_pels_i4[4 - 1 -i] = pu1_mb_a[i * i4_ref_strd_left];
1201 }
1202 else
1203 {
1204 memset(pu1_ngbr_pels_i4,0,4);
1205 }
1206 if(s_ngbr_avbl.u1_mb_b)
1207 {
1208 memcpy(pu1_ngbr_pels_i4 + 4 + 1, pu1_mb_b, 4);
1209 }
1210 else
1211 {
1212 memset(pu1_ngbr_pels_i4 + 4 + 1, 0, 4);
1213 }
1214 if (s_ngbr_avbl.u1_mb_d)
1215 pu1_ngbr_pels_i4[4] = *pu1_mb_d;
1216 else
1217 pu1_ngbr_pels_i4[4] = 0;
1218 if (s_ngbr_avbl.u1_mb_c)
1219 {
1220 memcpy(pu1_ngbr_pels_i4 + 8 + 1, pu1_mb_c, 4);
1221 }
1222 else if (s_ngbr_avbl.u1_mb_b)
1223 {
1224 memset(pu1_ngbr_pels_i4 + 8 + 1, pu1_ngbr_pels_i4[8], 4);
1225 s_ngbr_avbl.u1_mb_c = s_ngbr_avbl.u1_mb_b;
1226 }
1227
1228 i4_partition_cost_least = INT_MAX;
1229
1230 /* predict the intra 4x4 mode for the current partition (for evaluating cost) */
1231 if (!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b)
1232 {
1233 u4_estimated_intra_4x4_mode = DC_I4x4;
1234 }
1235 else
1236 {
1237 UWORD32 u4_left_intra_4x4_mode = DC_I4x4;
1238 UWORD32 u4_top_intra_4x4_mode = DC_I4x4;
1239
1240 if (u4_pix_x == 0)
1241 {
1242 if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I4x4)
1243 {
1244 u4_left_intra_4x4_mode = ps_proc->au1_left_mb_intra_modes[u1_scan_order[3 + u4_pix_y]];
1245 }
1246 else if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I8x8)
1247 {
1248 u4_left_intra_4x4_mode = ps_proc->au1_left_mb_intra_modes[b8 + 1];
1249 }
1250 }
1251 else
1252 {
1253 u4_left_intra_4x4_mode = ps_proc->au1_intra_luma_mb_4x4_modes[u1_scan_order[(u4_pix_x >> 2) + u4_pix_y - 1]];
1254 }
1255
1256 if (u4_pix_y == 0)
1257 {
1258 if (ps_top_mb_syn_ele->u2_mb_type == I4x4)
1259 {
1260 u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[u1_scan_order[12 + (u4_pix_x >> 2)]];
1261 }
1262 else if (ps_top_mb_syn_ele->u2_mb_type == I8x8)
1263 {
1264 u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[b8 + 2];
1265 }
1266 }
1267 else
1268 {
1269 u4_top_intra_4x4_mode = ps_proc->au1_intra_luma_mb_4x4_modes[u1_scan_order[(u4_pix_x >> 2) + u4_pix_y - 4]];
1270 }
1271
1272 u4_estimated_intra_4x4_mode = MIN(u4_left_intra_4x4_mode, u4_top_intra_4x4_mode);
1273 }
1274
1275 ps_proc->au1_predicted_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_estimated_intra_4x4_mode;
1276
1277 /*mode evaluation and prediction*/
1278 ps_codec->pf_ih264e_evaluate_intra_4x4_modes(pu1_mb_curr,
1279 pu1_ngbr_pels_i4,
1280 pu1_pred_mb, i4_src_strd,
1281 i4_pred_strd, i4_ngbr_avbl,
1282 &u4_best_intra_4x4_mode,
1283 &i4_partition_cost_least,
1284 u4_valid_intra_modes,
1285 u4_lambda,
1286 u4_estimated_intra_4x4_mode);
1287
1288
1289 i4_partition_distortion_least = i4_partition_cost_least - ((u4_estimated_intra_4x4_mode == u4_best_intra_4x4_mode)?u4_cost_one_bit:u4_cost_four_bits);
1290
1291 DEBUG("%d partition cost, %d intra mode\n", i4_partition_cost_least, u4_best_intra_4x4_mode);
1292
1293 /* macroblock distortion */
1294 i4_total_distortion += i4_partition_distortion_least;
1295 i4_total_cost += i4_partition_cost_least;
1296
1297 /* mb partition mode */
1298 ps_proc->au1_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_best_intra_4x4_mode;
1299
1300
1301 /********************************************************/
1302 /* error estimation, */
1303 /* transform */
1304 /* quantization */
1305 /********************************************************/
1306 ps_codec->pf_resi_trans_quant_4x4(pu1_mb_curr, pu1_pred_mb,
1307 pi2_res_mb, i4_src_strd,
1308 i4_pred_strd,
1309 /* No op stride, this implies a buff of lenght 1x16 */
1310 ps_qp_params->pu2_scale_mat,
1311 ps_qp_params->pu2_thres_mat,
1312 ps_qp_params->u1_qbits,
1313 ps_qp_params->u4_dead_zone,
1314 pu1_nnz, &i2_dc_dummy);
1315
1316 /********************************************************/
1317 /* ierror estimation, */
1318 /* itransform */
1319 /* iquantization */
1320 /********************************************************/
1321 ps_codec->pf_iquant_itrans_recon_4x4(pi2_res_mb, pu1_pred_mb,
1322 pu1_ref_mb_intra_4x4,
1323 i4_pred_strd, i4_pred_strd,
1324 ps_qp_params->pu2_iscale_mat,
1325 ps_qp_params->pu2_weigh_mat,
1326 ps_qp_params->u1_qp_div,
1327 ps_proc->pv_scratch_buff, 0,
1328 NULL);
1329 }
1330 }
1331
1332 /* update the type of the mb if necessary */
1333 if (i4_total_cost < ps_proc->i4_mb_cost)
1334 {
1335 ps_proc->i4_mb_cost = i4_total_cost;
1336 ps_proc->i4_mb_distortion = i4_total_distortion;
1337 ps_proc->u4_mb_type = I4x4;
1338 }
1339
1340 return ;
1341 }
1342
1343 /**
1344 ******************************************************************************
1345 *
1346 * @brief
1347 * evaluate best chroma intra 8x8 mode (rate distortion opt off)
1348 *
1349 * @par Description
1350 * This function evaluates all the possible chroma intra 8x8 modes and finds
1351 * the mode that best represents the macroblock (least distortion) and occupies
1352 * fewer bits in the bitstream.
1353 *
1354 * @param[in] ps_proc_ctxt
1355 * pointer to macroblock context (handle)
1356 *
1357 * @remarks
1358 * For chroma best intra pred mode is calculated based only on SAD
1359 *
1360 * @returns none
1361 *
1362 ******************************************************************************
1363 */
1364
ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(process_ctxt_t * ps_proc)1365 void ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(process_ctxt_t *ps_proc)
1366 {
1367 /* Codec Context */
1368 codec_t *ps_codec = ps_proc->ps_codec;
1369
1370 /* SAD(distortion metric) of an 8x8 block */
1371 WORD32 i4_mb_distortion, i4_chroma_mb_distortion;
1372
1373 /* intra mode */
1374 UWORD32 u4_best_chroma_intra_8x8_mode = DC_CH_I8x8;
1375
1376 /* neighbor pels for intra prediction */
1377 UWORD8 *pu1_ngbr_pels_c_i8x8 = ps_proc->au1_ngbr_pels;
1378
1379 /* pointer to curr macro block */
1380 UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_chroma;
1381 UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_chroma;
1382
1383 /* pointer to prediction macro block */
1384 UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb_intra_chroma;
1385 UWORD8 *pu1_pred_mb_plane = ps_proc->pu1_pred_mb_intra_chroma_plane;
1386
1387 /* strides */
1388 WORD32 i4_src_strd_c = ps_proc->i4_src_chroma_strd;
1389 WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
1390 WORD32 i4_rec_strd_c = ps_proc->i4_rec_strd;
1391
1392 /* neighbors left, top, top left */
1393 UWORD8 *pu1_mb_a = pu1_ref_mb - 2;
1394 UWORD8 *pu1_mb_b = pu1_ref_mb - i4_rec_strd_c;
1395 UWORD8 *pu1_mb_d = pu1_mb_b - 2;
1396
1397 /* neighbor availability */
1398 const UWORD8 u1_valid_intra_modes[8] = {1, 3, 1, 3, 5, 7, 5, 15};
1399 WORD32 i4_ngbr_avbl;
1400
1401 /* valid intra modes map */
1402 UWORD32 u4_valid_intra_modes;
1403 mb_info_t *ps_top_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
1404
1405 /* temp var */
1406 UWORD8 i;
1407 UWORD32 u4_constrained_intra_pred = ps_proc->ps_codec->s_cfg.u4_constrained_intra_pred;
1408 UWORD8 u1_mb_a, u1_mb_b, u1_mb_d;
1409 /* locating neighbors that are available for prediction */
1410
1411 /* gather prediction pels from the neighbors */
1412 /* left pels */
1413 u1_mb_a = ((ps_proc->ps_ngbr_avbl->u1_mb_a)
1414 && (u4_constrained_intra_pred ? ps_proc->s_left_mb_syntax_ele.u2_is_intra : 1));
1415 if (u1_mb_a)
1416 {
1417 for (i = 0; i < 16; i += 2)
1418 {
1419 pu1_ngbr_pels_c_i8x8[16 - 2 - i] = pu1_mb_a[(i / 2) * i4_rec_strd_c];
1420 pu1_ngbr_pels_c_i8x8[16 - 1 - i] = pu1_mb_a[(i / 2) * i4_rec_strd_c + 1];
1421 }
1422 }
1423 else
1424 {
1425 ps_codec->pf_mem_set_mul8(pu1_ngbr_pels_c_i8x8, 0, MB_SIZE);
1426 }
1427
1428 /* top pels */
1429 u1_mb_b = ((ps_proc->ps_ngbr_avbl->u1_mb_b)
1430 && (u4_constrained_intra_pred ? ps_top_mb_syn_ele->u2_is_intra : 1));
1431 if (u1_mb_b)
1432 {
1433 ps_codec->pf_mem_cpy_mul8(&pu1_ngbr_pels_c_i8x8[18], pu1_mb_b, 16);
1434 }
1435 else
1436 {
1437 ps_codec->pf_mem_set_mul8((pu1_ngbr_pels_c_i8x8 + 18), 0, MB_SIZE);
1438 }
1439
1440 /* top left pels */
1441 u1_mb_d = ((ps_proc->ps_ngbr_avbl->u1_mb_d)
1442 && (u4_constrained_intra_pred ? ps_proc->s_top_left_mb_syntax_ele.u2_is_intra : 1));
1443 if (u1_mb_d)
1444 {
1445 pu1_ngbr_pels_c_i8x8[16] = *pu1_mb_d;
1446 pu1_ngbr_pels_c_i8x8[17] = *(pu1_mb_d + 1);
1447 }
1448 i4_ngbr_avbl = (u1_mb_a) + (u1_mb_b << 2) + (u1_mb_d << 1);
1449 ps_proc->i4_chroma_neighbor_avail_8x8_mb = i4_ngbr_avbl;
1450
1451 u4_valid_intra_modes = u1_valid_intra_modes[i4_ngbr_avbl];
1452
1453 if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_FAST ||
1454 ps_codec->s_cfg.u4_enc_speed_preset == IVE_FASTEST)
1455 u4_valid_intra_modes &= ~(1 << PLANE_CH_I8x8);
1456
1457 i4_chroma_mb_distortion = INT_MAX;
1458
1459 /* perform intra mode chroma 8x8 evaluation */
1460 /* intra prediction */
1461 ps_codec->pf_ih264e_evaluate_intra_chroma_modes(pu1_curr_mb,
1462 pu1_ngbr_pels_c_i8x8,
1463 pu1_pred_mb,
1464 i4_src_strd_c,
1465 i4_pred_strd,
1466 i4_ngbr_avbl,
1467 &u4_best_chroma_intra_8x8_mode,
1468 &i4_chroma_mb_distortion,
1469 u4_valid_intra_modes);
1470
1471 if (u4_valid_intra_modes & 8)/* if Chroma PLANE is valid*/
1472 {
1473 (ps_codec->apf_intra_pred_c)[PLANE_CH_I8x8](pu1_ngbr_pels_c_i8x8, pu1_pred_mb_plane, 0, i4_pred_strd, i4_ngbr_avbl);
1474
1475 /* evaluate distortion(sad) */
1476 ps_codec->pf_compute_sad_16x8(pu1_curr_mb, pu1_pred_mb_plane, i4_src_strd_c, i4_pred_strd, i4_chroma_mb_distortion, &i4_mb_distortion);
1477
1478 /* update the least distortion information if necessary */
1479 if(i4_mb_distortion < i4_chroma_mb_distortion)
1480 {
1481 i4_chroma_mb_distortion = i4_mb_distortion;
1482 u4_best_chroma_intra_8x8_mode = PLANE_CH_I8x8;
1483 }
1484 }
1485
1486 DEBUG("%d partition cost, %d intra mode\n", i4_chroma_mb_distortion, u4_best_chroma_intra_8x8_mode);
1487
1488 ps_proc->u1_c_i8_mode = u4_best_chroma_intra_8x8_mode;
1489
1490 return ;
1491 }
1492
1493
1494 /**
1495 ******************************************************************************
1496 *
1497 * @brief
1498 * Evaluate best intra 16x16 mode (among VERT, HORZ and DC) and do the
1499 * prediction.
1500 *
1501 * @par Description
1502 * This function evaluates first three 16x16 modes and compute corresponding sad
1503 * and return the buffer predicted with best mode.
1504 *
1505 * @param[in] pu1_src
1506 * UWORD8 pointer to the source
1507 *
1508 * @param[in] pu1_ngbr_pels_i16
1509 * UWORD8 pointer to neighbouring pels
1510 *
1511 * @param[out] pu1_dst
1512 * UWORD8 pointer to the destination
1513 *
1514 * @param[in] src_strd
1515 * integer source stride
1516 *
1517 * @param[in] dst_strd
1518 * integer destination stride
1519 *
1520 * @param[in] u4_n_avblty
1521 * availability of neighbouring pixels
1522 *
1523 * @param[in] u4_intra_mode
1524 * Pointer to the variable in which best mode is returned
1525 *
1526 * @param[in] pu4_sadmin
1527 * Pointer to the variable in which minimum sad is returned
1528 *
1529 * @param[in] u4_valid_intra_modes
1530 * Says what all modes are valid
1531 *
1532 * @returns none
1533 *
1534 ******************************************************************************
1535 */
ih264e_evaluate_intra16x16_modes(UWORD8 * pu1_src,UWORD8 * pu1_ngbr_pels_i16,UWORD8 * pu1_dst,UWORD32 src_strd,UWORD32 dst_strd,WORD32 u4_n_avblty,UWORD32 * u4_intra_mode,WORD32 * pu4_sadmin,UWORD32 u4_valid_intra_modes)1536 void ih264e_evaluate_intra16x16_modes(UWORD8 *pu1_src,
1537 UWORD8 *pu1_ngbr_pels_i16,
1538 UWORD8 *pu1_dst,
1539 UWORD32 src_strd,
1540 UWORD32 dst_strd,
1541 WORD32 u4_n_avblty,
1542 UWORD32 *u4_intra_mode,
1543 WORD32 *pu4_sadmin,
1544 UWORD32 u4_valid_intra_modes)
1545 {
1546 UWORD8 *pu1_neighbour;
1547 UWORD8 *pu1_src_temp = pu1_src;
1548 UWORD8 left = 0, top = 0;
1549 WORD32 u4_dcval = 0;
1550 WORD32 i, j;
1551 WORD32 i4_sad_vert = INT_MAX, i4_sad_horz = INT_MAX, i4_sad_dc = INT_MAX,
1552 i4_min_sad = INT_MAX;
1553 UWORD8 val;
1554
1555 left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK);
1556 top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2;
1557
1558 /* left available */
1559 if (left)
1560 {
1561 i4_sad_horz = 0;
1562
1563 for (i = 0; i < 16; i++)
1564 {
1565 val = pu1_ngbr_pels_i16[15 - i];
1566
1567 u4_dcval += val;
1568
1569 for (j = 0; j < 16; j++)
1570 {
1571 i4_sad_horz += ABS(val - pu1_src_temp[j]);
1572 }
1573
1574 pu1_src_temp += src_strd;
1575 }
1576 u4_dcval += 8;
1577 }
1578
1579 pu1_src_temp = pu1_src;
1580 /* top available */
1581 if (top)
1582 {
1583 i4_sad_vert = 0;
1584
1585 for (i = 0; i < 16; i++)
1586 {
1587 u4_dcval += pu1_ngbr_pels_i16[17 + i];
1588
1589 for (j = 0; j < 16; j++)
1590 {
1591 i4_sad_vert += ABS(pu1_ngbr_pels_i16[17 + j] - pu1_src_temp[j]);
1592 }
1593 pu1_src_temp += src_strd;
1594
1595 }
1596 u4_dcval += 8;
1597 }
1598
1599 u4_dcval = (u4_dcval) >> (3 + left + top);
1600
1601 pu1_src_temp = pu1_src;
1602
1603 /* none available */
1604 u4_dcval += (left == 0) * (top == 0) * 128;
1605
1606 i4_sad_dc = 0;
1607
1608 for (i = 0; i < 16; i++)
1609 {
1610 for (j = 0; j < 16; j++)
1611 {
1612 i4_sad_dc += ABS(u4_dcval - pu1_src_temp[j]);
1613 }
1614 pu1_src_temp += src_strd;
1615 }
1616
1617 if ((u4_valid_intra_modes & 04) == 0)/* If DC is disabled */
1618 i4_sad_dc = INT_MAX;
1619
1620 if ((u4_valid_intra_modes & 01) == 0)/* If VERT is disabled */
1621 i4_sad_vert = INT_MAX;
1622
1623 if ((u4_valid_intra_modes & 02) == 0)/* If HORZ is disabled */
1624 i4_sad_horz = INT_MAX;
1625
1626 i4_min_sad = MIN3(i4_sad_horz, i4_sad_dc, i4_sad_vert);
1627
1628 /* Finding Minimum sad and doing corresponding prediction */
1629 if (i4_min_sad < *pu4_sadmin)
1630 {
1631 *pu4_sadmin = i4_min_sad;
1632 if (i4_min_sad == i4_sad_vert)
1633 {
1634 *u4_intra_mode = VERT_I16x16;
1635 pu1_neighbour = pu1_ngbr_pels_i16 + 17;
1636 for (j = 0; j < 16; j++)
1637 {
1638 memcpy(pu1_dst, pu1_neighbour, MB_SIZE);
1639 pu1_dst += dst_strd;
1640 }
1641 }
1642 else if (i4_min_sad == i4_sad_horz)
1643 {
1644 *u4_intra_mode = HORZ_I16x16;
1645 for (j = 0; j < 16; j++)
1646 {
1647 val = pu1_ngbr_pels_i16[15 - j];
1648 memset(pu1_dst, val, MB_SIZE);
1649 pu1_dst += dst_strd;
1650 }
1651 }
1652 else
1653 {
1654 *u4_intra_mode = DC_I16x16;
1655 for (j = 0; j < 16; j++)
1656 {
1657 memset(pu1_dst, u4_dcval, MB_SIZE);
1658 pu1_dst += dst_strd;
1659 }
1660 }
1661 }
1662 return;
1663 }
1664
1665 /**
1666 ******************************************************************************
1667 *
1668 * @brief
1669 * Evaluate best intra 4x4 mode and perform prediction.
1670 *
1671 * @par Description
1672 * This function evaluates 4x4 modes and compute corresponding sad
1673 * and return the buffer predicted with best mode.
1674 *
1675 * @param[in] pu1_src
1676 * UWORD8 pointer to the source
1677 *
1678 * @param[in] pu1_ngbr_pels
1679 * UWORD8 pointer to neighbouring pels
1680 *
1681 * @param[out] pu1_dst
1682 * UWORD8 pointer to the destination
1683 *
1684 * @param[in] src_strd
1685 * integer source stride
1686 *
1687 * @param[in] dst_strd
1688 * integer destination stride
1689 *
1690 * @param[in] u4_n_avblty
1691 * availability of neighbouring pixels
1692 *
1693 * @param[in] u4_intra_mode
1694 * Pointer to the variable in which best mode is returned
1695 *
1696 * @param[in] pu4_sadmin
1697 * Pointer to the variable in which minimum cost is returned
1698 *
1699 * @param[in] u4_valid_intra_modes
1700 * Says what all modes are valid
1701 *
1702 * @param[in] u4_lambda
1703 * Lamda value for computing cost from SAD
1704 *
1705 * @param[in] u4_predictd_mode
1706 * Predicted mode for cost computation
1707 *
1708 * @returns none
1709 *
1710 ******************************************************************************
1711 */
ih264e_evaluate_intra_4x4_modes(UWORD8 * pu1_src,UWORD8 * pu1_ngbr_pels,UWORD8 * pu1_dst,UWORD32 src_strd,UWORD32 dst_strd,WORD32 u4_n_avblty,UWORD32 * u4_intra_mode,WORD32 * pu4_sadmin,UWORD32 u4_valid_intra_modes,UWORD32 u4_lambda,UWORD32 u4_predictd_mode)1712 void ih264e_evaluate_intra_4x4_modes(UWORD8 *pu1_src,
1713 UWORD8 *pu1_ngbr_pels,
1714 UWORD8 *pu1_dst,
1715 UWORD32 src_strd,
1716 UWORD32 dst_strd,
1717 WORD32 u4_n_avblty,
1718 UWORD32 *u4_intra_mode,
1719 WORD32 *pu4_sadmin,
1720 UWORD32 u4_valid_intra_modes,
1721 UWORD32 u4_lambda,
1722 UWORD32 u4_predictd_mode)
1723 {
1724 UWORD8 *pu1_src_temp = pu1_src;
1725 UWORD8 *pu1_pred = pu1_ngbr_pels;
1726 UWORD8 left = 0, top = 0;
1727 UWORD8 u1_pred_val = 0;
1728 UWORD8 u1_pred_vals[4] = {0};
1729 UWORD8 *pu1_pred_val = NULL;
1730 /* To store FILT121 operated values*/
1731 UWORD8 u1_pred_vals_diag_121[15] = {0};
1732 /* To store FILT11 operated values*/
1733 UWORD8 u1_pred_vals_diag_11[15] = {0};
1734 UWORD8 u1_pred_vals_vert_r[8] = {0};
1735 UWORD8 u1_pred_vals_horz_d[10] = {0};
1736 UWORD8 u1_pred_vals_horz_u[10] = {0};
1737 WORD32 u4_dcval = 0;
1738 WORD32 i4_sad[MAX_I4x4] = {INT_MAX, INT_MAX, INT_MAX, INT_MAX, INT_MAX,
1739 INT_MAX, INT_MAX, INT_MAX, INT_MAX};
1740
1741 WORD32 i4_cost[MAX_I4x4] = {INT_MAX, INT_MAX, INT_MAX, INT_MAX, INT_MAX,
1742 INT_MAX, INT_MAX, INT_MAX, INT_MAX};
1743 WORD32 i, i4_min_cost = INT_MAX;
1744
1745 left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK);
1746 top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2;
1747
1748 /* Computing SAD */
1749
1750 /* VERT mode valid */
1751 if (u4_valid_intra_modes & 1)
1752 {
1753 pu1_pred = pu1_ngbr_pels + 5;
1754 i4_sad[VERT_I4x4] = 0;
1755 i4_cost[VERT_I4x4] = 0;
1756
1757 USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]);
1758 pu1_src_temp += src_strd;
1759 USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]);
1760 pu1_src_temp += src_strd;
1761 USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]);
1762 pu1_src_temp += src_strd;
1763 USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]);
1764
1765 i4_cost[VERT_I4x4] = i4_sad[VERT_I4x4] + ((u4_predictd_mode == VERT_I4x4) ?
1766 u4_lambda : 4 * u4_lambda);
1767 }
1768
1769 /* HORZ mode valid */
1770 if (u4_valid_intra_modes & 2)
1771 {
1772 i4_sad[HORZ_I4x4] = 0;
1773 i4_cost[HORZ_I4x4] =0;
1774 pu1_src_temp = pu1_src;
1775
1776 u1_pred_val = pu1_ngbr_pels[3];
1777
1778 i4_sad[HORZ_I4x4] += ABS(pu1_src_temp[0] - u1_pred_val)
1779 + ABS(pu1_src_temp[1] - u1_pred_val)
1780 + ABS(pu1_src_temp[2] - u1_pred_val)
1781 + ABS(pu1_src_temp[3] - u1_pred_val);
1782 pu1_src_temp += src_strd;
1783
1784 u1_pred_val = pu1_ngbr_pels[2];
1785
1786 i4_sad[HORZ_I4x4] += ABS(pu1_src_temp[0] - u1_pred_val)
1787 + ABS(pu1_src_temp[1] - u1_pred_val)
1788 + ABS(pu1_src_temp[2] - u1_pred_val)
1789 + ABS(pu1_src_temp[3] - u1_pred_val);
1790 pu1_src_temp += src_strd;
1791
1792 u1_pred_val = pu1_ngbr_pels[1];
1793
1794 i4_sad[HORZ_I4x4] += ABS(pu1_src_temp[0] - u1_pred_val)
1795 + ABS(pu1_src_temp[1] - u1_pred_val)
1796 + ABS(pu1_src_temp[2] - u1_pred_val)
1797 + ABS(pu1_src_temp[3] - u1_pred_val);
1798 pu1_src_temp += src_strd;
1799
1800 u1_pred_val = pu1_ngbr_pels[0];
1801
1802 i4_sad[HORZ_I4x4] += ABS(pu1_src_temp[0] - u1_pred_val)
1803 + ABS(pu1_src_temp[1] - u1_pred_val)
1804 + ABS(pu1_src_temp[2] - u1_pred_val)
1805 + ABS(pu1_src_temp[3] - u1_pred_val);
1806
1807 i4_cost[HORZ_I4x4] = i4_sad[HORZ_I4x4] + ((u4_predictd_mode == HORZ_I4x4) ?
1808 u4_lambda : 4 * u4_lambda);
1809 }
1810
1811 /* DC mode valid */
1812 if (u4_valid_intra_modes & 4)
1813 {
1814 i4_sad[DC_I4x4] = 0;
1815 i4_cost[DC_I4x4] = 0;
1816 pu1_src_temp = pu1_src;
1817
1818 if (left)
1819 u4_dcval = pu1_ngbr_pels[0] + pu1_ngbr_pels[1] + pu1_ngbr_pels[2]
1820 + pu1_ngbr_pels[3] + 2;
1821 if (top)
1822 u4_dcval += pu1_ngbr_pels[5] + pu1_ngbr_pels[6] + pu1_ngbr_pels[7]
1823 + pu1_ngbr_pels[8] + 2;
1824
1825 u4_dcval = (u4_dcval) ? (u4_dcval >> (1 + left + top)) : 128;
1826
1827 /* none available */
1828 memset(u1_pred_vals, u4_dcval, 4);
1829 USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]);
1830 pu1_src_temp += src_strd;
1831 USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]);
1832 pu1_src_temp += src_strd;
1833 USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]);
1834 pu1_src_temp += src_strd;
1835 USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]);
1836 pu1_src_temp += src_strd;
1837
1838 i4_cost[DC_I4x4] = i4_sad[DC_I4x4] + ((u4_predictd_mode == DC_I4x4) ?
1839 u4_lambda : 4 * u4_lambda);
1840 }
1841
1842 /* if modes other than VERT, HORZ and DC are valid */
1843 if (u4_valid_intra_modes > 7)
1844 {
1845 pu1_pred = pu1_ngbr_pels;
1846 pu1_pred[13] = pu1_pred[14] = pu1_pred[12];
1847
1848 /* Performing FILT121 and FILT11 operation for all neighbour values*/
1849 for (i = 0; i < 13; i++)
1850 {
1851 u1_pred_vals_diag_121[i] = FILT121(pu1_pred[0], pu1_pred[1], pu1_pred[2]);
1852 u1_pred_vals_diag_11[i] = FILT11(pu1_pred[0], pu1_pred[1]);
1853
1854 pu1_pred++;
1855 }
1856
1857 if (u4_valid_intra_modes & 8)/* DIAG_DL */
1858 {
1859 i4_sad[DIAG_DL_I4x4] = 0;
1860 i4_cost[DIAG_DL_I4x4] = 0;
1861 pu1_src_temp = pu1_src;
1862 pu1_pred_val = u1_pred_vals_diag_121 + 5;
1863
1864 USADA8(pu1_src_temp, pu1_pred_val, i4_sad[DIAG_DL_I4x4]);
1865 pu1_src_temp += src_strd;
1866 USADA8(pu1_src_temp, (pu1_pred_val + 1), i4_sad[DIAG_DL_I4x4]);
1867 pu1_src_temp += src_strd;
1868 USADA8(pu1_src_temp, (pu1_pred_val + 2), i4_sad[DIAG_DL_I4x4]);
1869 pu1_src_temp += src_strd;
1870 USADA8(pu1_src_temp, (pu1_pred_val + 3), i4_sad[DIAG_DL_I4x4]);
1871 pu1_src_temp += src_strd;
1872 i4_cost[DIAG_DL_I4x4] = i4_sad[DIAG_DL_I4x4] + ((u4_predictd_mode == DIAG_DL_I4x4) ?
1873 u4_lambda : 4 * u4_lambda);
1874 }
1875
1876 if (u4_valid_intra_modes & 16)/* DIAG_DR */
1877 {
1878 i4_sad[DIAG_DR_I4x4] = 0;
1879 i4_cost[DIAG_DR_I4x4] = 0;
1880 pu1_src_temp = pu1_src;
1881 pu1_pred_val = u1_pred_vals_diag_121 + 3;
1882
1883 USADA8(pu1_src_temp, pu1_pred_val, i4_sad[DIAG_DR_I4x4]);
1884 pu1_src_temp += src_strd;
1885 USADA8(pu1_src_temp, (pu1_pred_val - 1), i4_sad[DIAG_DR_I4x4]);
1886 pu1_src_temp += src_strd;
1887 USADA8(pu1_src_temp, (pu1_pred_val - 2), i4_sad[DIAG_DR_I4x4]);
1888 pu1_src_temp += src_strd;
1889 USADA8(pu1_src_temp, (pu1_pred_val - 3), i4_sad[DIAG_DR_I4x4]);
1890 pu1_src_temp += src_strd;
1891 i4_cost[DIAG_DR_I4x4] = i4_sad[DIAG_DR_I4x4] + ((u4_predictd_mode == DIAG_DR_I4x4) ?
1892 u4_lambda : 4 * u4_lambda);
1893
1894 }
1895
1896 if (u4_valid_intra_modes & 32)/* VERT_R mode valid ????*/
1897 {
1898 i4_sad[VERT_R_I4x4] = 0;
1899
1900 pu1_src_temp = pu1_src;
1901 u1_pred_vals_vert_r[0] = u1_pred_vals_diag_121[2];
1902 memcpy((u1_pred_vals_vert_r + 1), (u1_pred_vals_diag_11 + 4), 3);
1903 u1_pred_vals_vert_r[4] = u1_pred_vals_diag_121[1];
1904 memcpy((u1_pred_vals_vert_r + 5), (u1_pred_vals_diag_121 + 3), 3);
1905
1906 pu1_pred_val = u1_pred_vals_diag_11 + 4;
1907 USADA8(pu1_src_temp, pu1_pred_val, i4_sad[VERT_R_I4x4]);
1908 pu1_pred_val = u1_pred_vals_diag_121 + 3;
1909 pu1_src_temp += src_strd;
1910 USADA8(pu1_src_temp, pu1_pred_val, i4_sad[VERT_R_I4x4]);
1911 pu1_src_temp += src_strd;
1912 USADA8(pu1_src_temp, (u1_pred_vals_vert_r), i4_sad[VERT_R_I4x4]);
1913 pu1_src_temp += src_strd;
1914 USADA8(pu1_src_temp, (u1_pred_vals_vert_r + 4),
1915 i4_sad[VERT_R_I4x4]);
1916
1917 i4_cost[VERT_R_I4x4] = i4_sad[VERT_R_I4x4] + ((u4_predictd_mode == VERT_R_I4x4) ?
1918 u4_lambda : 4 * u4_lambda);
1919 }
1920
1921 if (u4_valid_intra_modes & 64)/* HORZ_D mode valid ????*/
1922 {
1923 i4_sad[HORZ_D_I4x4] = 0;
1924
1925 pu1_src_temp = pu1_src;
1926 u1_pred_vals_horz_d[6] = u1_pred_vals_diag_11[3];
1927 memcpy((u1_pred_vals_horz_d + 7), (u1_pred_vals_diag_121 + 3), 3);
1928 u1_pred_vals_horz_d[0] = u1_pred_vals_diag_11[0];
1929 u1_pred_vals_horz_d[1] = u1_pred_vals_diag_121[0];
1930 u1_pred_vals_horz_d[2] = u1_pred_vals_diag_11[1];
1931 u1_pred_vals_horz_d[3] = u1_pred_vals_diag_121[1];
1932 u1_pred_vals_horz_d[4] = u1_pred_vals_diag_11[2];
1933 u1_pred_vals_horz_d[5] = u1_pred_vals_diag_121[2];
1934
1935 pu1_pred_val = u1_pred_vals_horz_d;
1936 USADA8(pu1_src_temp, (pu1_pred_val + 6), i4_sad[HORZ_D_I4x4]);
1937 pu1_src_temp += src_strd;
1938 USADA8(pu1_src_temp, (pu1_pred_val + 4), i4_sad[HORZ_D_I4x4]);
1939 pu1_src_temp += src_strd;
1940 USADA8(pu1_src_temp, (pu1_pred_val + 2), i4_sad[HORZ_D_I4x4]);
1941 pu1_src_temp += src_strd;
1942 USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[HORZ_D_I4x4]);
1943
1944 i4_cost[HORZ_D_I4x4] = i4_sad[HORZ_D_I4x4] + ((u4_predictd_mode == HORZ_D_I4x4) ?
1945 u4_lambda : 4 * u4_lambda);
1946 }
1947
1948 if (u4_valid_intra_modes & 128)/* VERT_L mode valid ????*/
1949 {
1950 i4_sad[VERT_L_I4x4] = 0;
1951 pu1_src_temp = pu1_src;
1952 pu1_pred_val = u1_pred_vals_diag_11 + 5;
1953 USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]);
1954 pu1_src_temp += src_strd;
1955 pu1_pred_val = u1_pred_vals_diag_121 + 5;
1956 USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]);
1957 pu1_src_temp += src_strd;
1958 pu1_pred_val = u1_pred_vals_diag_11 + 6;
1959 USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]);
1960 pu1_src_temp += src_strd;
1961 pu1_pred_val = u1_pred_vals_diag_121 + 6;
1962 USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]);
1963
1964 i4_cost[VERT_L_I4x4] = i4_sad[VERT_L_I4x4] + ((u4_predictd_mode == VERT_L_I4x4) ?
1965 u4_lambda : 4 * u4_lambda);
1966 }
1967
1968 if (u4_valid_intra_modes & 256)/* HORZ_U mode valid ????*/
1969 {
1970 i4_sad[HORZ_U_I4x4] = 0;
1971 pu1_src_temp = pu1_src;
1972 u1_pred_vals_horz_u[0] = u1_pred_vals_diag_11[2];
1973 u1_pred_vals_horz_u[1] = u1_pred_vals_diag_121[1];
1974 u1_pred_vals_horz_u[2] = u1_pred_vals_diag_11[1];
1975 u1_pred_vals_horz_u[3] = u1_pred_vals_diag_121[0];
1976 u1_pred_vals_horz_u[4] = u1_pred_vals_diag_11[0];
1977 u1_pred_vals_horz_u[5] = FILT121(pu1_ngbr_pels[0], pu1_ngbr_pels[0], pu1_ngbr_pels[1]);
1978
1979 memset((u1_pred_vals_horz_u + 6), pu1_ngbr_pels[0], 4);
1980
1981 pu1_pred_val = u1_pred_vals_horz_u;
1982 USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[HORZ_U_I4x4]);
1983 pu1_src_temp += src_strd;
1984 USADA8(pu1_src_temp, (pu1_pred_val + 2), i4_sad[HORZ_U_I4x4]);
1985 pu1_src_temp += src_strd;
1986 USADA8(pu1_src_temp, (pu1_pred_val + 4), i4_sad[HORZ_U_I4x4]);
1987 pu1_src_temp += src_strd;
1988 USADA8(pu1_src_temp, (pu1_pred_val + 6), i4_sad[HORZ_U_I4x4]);
1989
1990 i4_cost[HORZ_U_I4x4] = i4_sad[HORZ_U_I4x4] + ((u4_predictd_mode == HORZ_U_I4x4) ?
1991 u4_lambda : 4 * u4_lambda);
1992 }
1993
1994 i4_min_cost = MIN3(MIN3(i4_cost[0], i4_cost[1], i4_cost[2]),
1995 MIN3(i4_cost[3], i4_cost[4], i4_cost[5]),
1996 MIN3(i4_cost[6], i4_cost[7], i4_cost[8]));
1997
1998 }
1999 else
2000 {
2001 /* Only first three modes valid */
2002 i4_min_cost = MIN3(i4_cost[0], i4_cost[1], i4_cost[2]);
2003 }
2004
2005 *pu4_sadmin = i4_min_cost;
2006
2007 if (i4_min_cost == i4_cost[0])
2008 {
2009 *u4_intra_mode = VERT_I4x4;
2010 pu1_pred_val = pu1_ngbr_pels + 5;
2011 memcpy(pu1_dst, (pu1_pred_val), 4);
2012 pu1_dst += dst_strd;
2013 memcpy(pu1_dst, (pu1_pred_val), 4);
2014 pu1_dst += dst_strd;
2015 memcpy(pu1_dst, (pu1_pred_val), 4);
2016 pu1_dst += dst_strd;
2017 memcpy(pu1_dst, (pu1_pred_val), 4);
2018 }
2019 else if (i4_min_cost == i4_cost[1])
2020 {
2021 *u4_intra_mode = HORZ_I4x4;
2022 memset(pu1_dst, pu1_ngbr_pels[3], 4);
2023 pu1_dst += dst_strd;
2024 memset(pu1_dst, pu1_ngbr_pels[2], 4);
2025 pu1_dst += dst_strd;
2026 memset(pu1_dst, pu1_ngbr_pels[1], 4);
2027 pu1_dst += dst_strd;
2028 memset(pu1_dst, pu1_ngbr_pels[0], 4);
2029 }
2030 else if (i4_min_cost == i4_cost[2])
2031 {
2032 *u4_intra_mode = DC_I4x4;
2033 memset(pu1_dst, u4_dcval, 4);
2034 pu1_dst += dst_strd;
2035 memset(pu1_dst, u4_dcval, 4);
2036 pu1_dst += dst_strd;
2037 memset(pu1_dst, u4_dcval, 4);
2038 pu1_dst += dst_strd;
2039 memset(pu1_dst, u4_dcval, 4);
2040 }
2041
2042 else if (i4_min_cost == i4_cost[3])
2043 {
2044 *u4_intra_mode = DIAG_DL_I4x4;
2045 pu1_pred_val = u1_pred_vals_diag_121 + 5;
2046 memcpy(pu1_dst, (pu1_pred_val), 4);
2047 pu1_dst += dst_strd;
2048 memcpy(pu1_dst, (pu1_pred_val + 1), 4);
2049 pu1_dst += dst_strd;
2050 memcpy(pu1_dst, (pu1_pred_val + 2), 4);
2051 pu1_dst += dst_strd;
2052 memcpy(pu1_dst, (pu1_pred_val + 3), 4);
2053 }
2054 else if (i4_min_cost == i4_cost[4])
2055 {
2056 *u4_intra_mode = DIAG_DR_I4x4;
2057 pu1_pred_val = u1_pred_vals_diag_121 + 3;
2058
2059 memcpy(pu1_dst, (pu1_pred_val), 4);
2060 pu1_dst += dst_strd;
2061 memcpy(pu1_dst, (pu1_pred_val - 1), 4);
2062 pu1_dst += dst_strd;
2063 memcpy(pu1_dst, (pu1_pred_val - 2), 4);
2064 pu1_dst += dst_strd;
2065 memcpy(pu1_dst, (pu1_pred_val - 3), 4);
2066 }
2067
2068 else if (i4_min_cost == i4_cost[5])
2069 {
2070 *u4_intra_mode = VERT_R_I4x4;
2071 pu1_pred_val = u1_pred_vals_diag_11 + 4;
2072 memcpy(pu1_dst, (pu1_pred_val), 4);
2073 pu1_dst += dst_strd;
2074 pu1_pred_val = u1_pred_vals_diag_121 + 3;
2075 memcpy(pu1_dst, (pu1_pred_val), 4);
2076 pu1_dst += dst_strd;
2077 memcpy(pu1_dst, (u1_pred_vals_vert_r), 4);
2078 pu1_dst += dst_strd;
2079 memcpy(pu1_dst, (u1_pred_vals_vert_r + 4), 4);
2080 }
2081 else if (i4_min_cost == i4_cost[6])
2082 {
2083 *u4_intra_mode = HORZ_D_I4x4;
2084 pu1_pred_val = u1_pred_vals_horz_d;
2085 memcpy(pu1_dst, (pu1_pred_val + 6), 4);
2086 pu1_dst += dst_strd;
2087 memcpy(pu1_dst, (pu1_pred_val + 4), 4);
2088 pu1_dst += dst_strd;
2089 memcpy(pu1_dst, (pu1_pred_val + 2), 4);
2090 pu1_dst += dst_strd;
2091 memcpy(pu1_dst, (pu1_pred_val), 4);
2092 pu1_dst += dst_strd;
2093 }
2094 else if (i4_min_cost == i4_cost[7])
2095 {
2096 *u4_intra_mode = VERT_L_I4x4;
2097 pu1_pred_val = u1_pred_vals_diag_11 + 5;
2098 memcpy(pu1_dst, (pu1_pred_val), 4);
2099 pu1_dst += dst_strd;
2100 pu1_pred_val = u1_pred_vals_diag_121 + 5;
2101 memcpy(pu1_dst, (pu1_pred_val), 4);
2102 pu1_dst += dst_strd;
2103 pu1_pred_val = u1_pred_vals_diag_11 + 6;
2104 memcpy(pu1_dst, (pu1_pred_val), 4);
2105 pu1_dst += dst_strd;
2106 pu1_pred_val = u1_pred_vals_diag_121 + 6;
2107 memcpy(pu1_dst, (pu1_pred_val), 4);
2108 }
2109 else if (i4_min_cost == i4_cost[8])
2110 {
2111 *u4_intra_mode = HORZ_U_I4x4;
2112 pu1_pred_val = u1_pred_vals_horz_u;
2113 memcpy(pu1_dst, (pu1_pred_val), 4);
2114 pu1_dst += dst_strd;
2115 memcpy(pu1_dst, (pu1_pred_val + 2), 4);
2116 pu1_dst += dst_strd;
2117 memcpy(pu1_dst, (pu1_pred_val + 4), 4);
2118 pu1_dst += dst_strd;
2119 memcpy(pu1_dst, (pu1_pred_val + 6), 4);
2120 pu1_dst += dst_strd;
2121 }
2122
2123 return;
2124 }
2125
2126 /**
2127 ******************************************************************************
2128 *
2129 * @brief:
2130 * Evaluate best intr chroma mode (among VERT, HORZ and DC ) and do the prediction.
2131 *
2132 * @par Description
2133 * This function evaluates first three intra chroma modes and compute corresponding sad
2134 * and return the buffer predicted with best mode.
2135 *
2136 * @param[in] pu1_src
2137 * UWORD8 pointer to the source
2138 *
2139 * @param[in] pu1_ngbr_pels
2140 * UWORD8 pointer to neighbouring pels
2141 *
2142 * @param[out] pu1_dst
2143 * UWORD8 pointer to the destination
2144 *
2145 * @param[in] src_strd
2146 * integer source stride
2147 *
2148 * @param[in] dst_strd
2149 * integer destination stride
2150 *
2151 * @param[in] u4_n_avblty
2152 * availability of neighbouring pixels
2153 *
2154 * @param[in] u4_intra_mode
2155 * Pointer to the variable in which best mode is returned
2156 *
2157 * @param[in] pu4_sadmin
2158 * Pointer to the variable in which minimum sad is returned
2159 *
2160 * @param[in] u4_valid_intra_modes
2161 * Says what all modes are valid
2162 *
2163 * @return none
2164 *
2165 ******************************************************************************
2166 */
ih264e_evaluate_intra_chroma_modes(UWORD8 * pu1_src,UWORD8 * pu1_ngbr_pels,UWORD8 * pu1_dst,UWORD32 src_strd,UWORD32 dst_strd,WORD32 u4_n_avblty,UWORD32 * u4_intra_mode,WORD32 * pu4_sadmin,UWORD32 u4_valid_intra_modes)2167 void ih264e_evaluate_intra_chroma_modes(UWORD8 *pu1_src,
2168 UWORD8 *pu1_ngbr_pels,
2169 UWORD8 *pu1_dst,
2170 UWORD32 src_strd,
2171 UWORD32 dst_strd,
2172 WORD32 u4_n_avblty,
2173 UWORD32 *u4_intra_mode,
2174 WORD32 *pu4_sadmin,
2175 UWORD32 u4_valid_intra_modes)
2176 {
2177 UWORD8 *pu1_neighbour;
2178 UWORD8 *pu1_src_temp = pu1_src;
2179 UWORD8 left = 0, top = 0;
2180 WORD32 u4_dcval_u_l[2] = { 0, 0 }, /*sum left neighbours for 'U' ,two separate sets - sum of first four from top,and sum of four values from bottom */
2181 u4_dcval_u_t[2] = { 0, 0 }; /*sum top neighbours for 'U'*/
2182
2183 WORD32 u4_dcval_v_l[2] = { 0, 0 }, /*sum left neighbours for 'V'*/
2184 u4_dcval_v_t[2] = { 0, 0 }; /*sum top neighbours for 'V'*/
2185
2186 WORD32 i, j, row, col, i4_sad_vert = INT_MAX, i4_sad_horz = INT_MAX,
2187 i4_sad_dc = INT_MAX, i4_min_sad = INT_MAX;
2188 UWORD8 val_u, val_v;
2189
2190 WORD32 u4_dc_val[2][2][2];/* -----------
2191 | | | Chroma can have four
2192 | 00 | 01 | separate dc value...
2193 ----------- u4_dc_val corresponds to this dc values
2194 | | | with u4_dc_val[2][2][U] and u4_dc_val[2][2][V]
2195 | 10 | 11 |
2196 ----------- */
2197 left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK);
2198 top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2;
2199
2200 /*Evaluating HORZ*/
2201 if (left)/* Ifleft available*/
2202 {
2203 i4_sad_horz = 0;
2204
2205 for (i = 0; i < 8; i++)
2206 {
2207 val_v = pu1_ngbr_pels[15 - 2 * i];
2208 val_u = pu1_ngbr_pels[15 - 2 * i - 1];
2209 row = i / 4;
2210 u4_dcval_u_l[row] += val_u;
2211 u4_dcval_v_l[row] += val_v;
2212 for (j = 0; j < 8; j++)
2213 {
2214 i4_sad_horz += ABS(val_u - pu1_src_temp[2 * j]);/* Finding SAD for HORZ mode*/
2215 i4_sad_horz += ABS(val_v - pu1_src_temp[2 * j + 1]);
2216 }
2217
2218 pu1_src_temp += src_strd;
2219 }
2220 u4_dcval_u_l[0] += 2;
2221 u4_dcval_u_l[1] += 2;
2222 u4_dcval_v_l[0] += 2;
2223 u4_dcval_v_l[1] += 2;
2224 }
2225
2226 /*Evaluating VERT**/
2227 pu1_src_temp = pu1_src;
2228 if (top) /* top available*/
2229 {
2230 i4_sad_vert = 0;
2231
2232 for (i = 0; i < 8; i++)
2233 {
2234 col = i / 4;
2235
2236 val_u = pu1_ngbr_pels[18 + i * 2];
2237 val_v = pu1_ngbr_pels[18 + i * 2 + 1];
2238 u4_dcval_u_t[col] += val_u;
2239 u4_dcval_v_t[col] += val_v;
2240
2241 for (j = 0; j < 16; j++)
2242 {
2243 i4_sad_vert += ABS(pu1_ngbr_pels[18 + j] - pu1_src_temp[j]);/* Finding SAD for VERT mode*/
2244 }
2245 pu1_src_temp += src_strd;
2246
2247 }
2248 u4_dcval_u_t[0] += 2;
2249 u4_dcval_u_t[1] += 2;
2250 u4_dcval_v_t[0] += 2;
2251 u4_dcval_v_t[1] += 2;
2252 }
2253
2254 /* computing DC value*/
2255 /* Equation 8-128 in spec*/
2256 u4_dc_val[0][0][0] = (u4_dcval_u_l[0] + u4_dcval_u_t[0]) >> (1 + left + top);
2257 u4_dc_val[0][0][1] = (u4_dcval_v_l[0] + u4_dcval_v_t[0]) >> (1 + left + top);
2258 u4_dc_val[1][1][0] = (u4_dcval_u_l[1] + u4_dcval_u_t[1]) >> (1 + left + top);
2259 u4_dc_val[1][1][1] = (u4_dcval_v_l[1] + u4_dcval_v_t[1]) >> (1 + left + top);
2260
2261 if (top)
2262 {
2263 /* Equation 8-132 in spec*/
2264 u4_dc_val[0][1][0] = (u4_dcval_u_t[1]) >> (1 + top);
2265 u4_dc_val[0][1][1] = (u4_dcval_v_t[1]) >> (1 + top);
2266 }
2267 else
2268 {
2269 u4_dc_val[0][1][0] = (u4_dcval_u_l[0]) >> (1 + left);
2270 u4_dc_val[0][1][1] = (u4_dcval_v_l[0]) >> (1 + left);
2271 }
2272
2273 if (left)
2274 {
2275 u4_dc_val[1][0][0] = (u4_dcval_u_l[1]) >> (1 + left);
2276 u4_dc_val[1][0][1] = (u4_dcval_v_l[1]) >> (1 + left);
2277 }
2278 else
2279 {
2280 u4_dc_val[1][0][0] = (u4_dcval_u_t[0]) >> (1 + top);
2281 u4_dc_val[1][0][1] = (u4_dcval_v_t[0]) >> (1 + top);
2282 }
2283
2284 if (!(left || top))
2285 {
2286 /*none available*/
2287 u4_dc_val[0][0][0] = u4_dc_val[0][0][1] =
2288 u4_dc_val[0][1][0] = u4_dc_val[0][1][1] =
2289 u4_dc_val[1][0][0] = u4_dc_val[1][0][1] =
2290 u4_dc_val[1][1][0] = u4_dc_val[1][1][1] = 128;
2291 }
2292
2293 /* Evaluating DC */
2294 pu1_src_temp = pu1_src;
2295 i4_sad_dc = 0;
2296 for (i = 0; i < 8; i++)
2297 {
2298 for (j = 0; j < 8; j++)
2299 {
2300 col = j / 4;
2301 row = i / 4;
2302 val_u = u4_dc_val[row][col][0];
2303 val_v = u4_dc_val[row][col][1];
2304
2305 i4_sad_dc += ABS(val_u - pu1_src_temp[2 * j]);/* Finding SAD for DC mode*/
2306 i4_sad_dc += ABS(val_v - pu1_src_temp[2 * j + 1]);
2307 }
2308 pu1_src_temp += src_strd;
2309 }
2310
2311 if ((u4_valid_intra_modes & 01) == 0)/* If DC is disabled*/
2312 i4_sad_dc = INT_MAX;
2313 if ((u4_valid_intra_modes & 02) == 0)/* If HORZ is disabled*/
2314 i4_sad_horz = INT_MAX;
2315 if ((u4_valid_intra_modes & 04) == 0)/* If VERT is disabled*/
2316 i4_sad_vert = INT_MAX;
2317
2318 i4_min_sad = MIN3(i4_sad_horz, i4_sad_dc, i4_sad_vert);
2319
2320 /* Finding Minimum sad and doing corresponding prediction*/
2321 if (i4_min_sad < *pu4_sadmin)
2322 {
2323 *pu4_sadmin = i4_min_sad;
2324
2325 if (i4_min_sad == i4_sad_dc)
2326 {
2327 *u4_intra_mode = DC_CH_I8x8;
2328 for (i = 0; i < 8; i++)
2329 {
2330 for (j = 0; j < 8; j++)
2331 {
2332 col = j / 4;
2333 row = i / 4;
2334
2335 pu1_dst[2 * j] = u4_dc_val[row][col][0];
2336 pu1_dst[2 * j + 1] = u4_dc_val[row][col][1];
2337 }
2338 pu1_dst += dst_strd;
2339 }
2340 }
2341 else if (i4_min_sad == i4_sad_horz)
2342 {
2343 *u4_intra_mode = HORZ_CH_I8x8;
2344 for (j = 0; j < 8; j++)
2345 {
2346 val_v = pu1_ngbr_pels[15 - 2 * j];
2347 val_u = pu1_ngbr_pels[15 - 2 * j - 1];
2348
2349 for (i = 0; i < 8; i++)
2350 {
2351 pu1_dst[2 * i] = val_u;
2352 pu1_dst[2 * i + 1] = val_v;
2353
2354 }
2355 pu1_dst += dst_strd;
2356 }
2357 }
2358 else
2359 {
2360 *u4_intra_mode = VERT_CH_I8x8;
2361 pu1_neighbour = pu1_ngbr_pels + 18;
2362 for (j = 0; j < 8; j++)
2363 {
2364 memcpy(pu1_dst, pu1_neighbour, MB_SIZE);
2365 pu1_dst += dst_strd;
2366 }
2367 }
2368 }
2369
2370 return;
2371 }
2372