1 /******************************************************************************
2 *
3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 ******************************************************************************/
18 /**
19 *******************************************************************************
20 * @file
21 * ihevcd_iquant_itrans_recon_ctb.c
22 *
23 * @brief
24 * Contains functions for inverse quantization, inverse transform and recon
25 *
26 * @author
27 * Ittiam
28 *
29 * @par List of Functions:
30 * - ihevcd_iquant_itrans_recon_ctb()
31 *
32 * @remarks
33 * None
34 *
35 *******************************************************************************
36 */
37 /*****************************************************************************/
38 /* File Includes */
39 /*****************************************************************************/
40 #include <stdio.h>
41 #include <stddef.h>
42 #include <stdlib.h>
43 #include <string.h>
44
45 #include "ihevc_typedefs.h"
46 #include "iv.h"
47 #include "ivd.h"
48 #include "ihevcd_cxa.h"
49
50 #include "ihevc_defs.h"
51 #include "ihevc_debug.h"
52 #include "ihevc_structs.h"
53 #include "ihevc_cabac_tables.h"
54 #include "ihevc_macros.h"
55 #include "ihevc_platform_macros.h"
56
57 #include "ihevcd_defs.h"
58 #include "ihevcd_function_selector.h"
59 #include "ihevcd_structs.h"
60 #include "ihevcd_error.h"
61 #include "ihevcd_bitstream.h"
62 #include "ihevc_common_tables.h"
63
64 /* Intra pred includes */
65 #include "ihevc_intra_pred.h"
66
67 /* Inverse transform common module includes */
68 #include "ihevc_trans_tables.h"
69 #include "ihevc_trans_macros.h"
70 #include "ihevc_itrans_recon.h"
71 #include "ihevc_recon.h"
72 #include "ihevc_chroma_itrans_recon.h"
73 #include "ihevc_chroma_recon.h"
74
75 /* Decoder includes */
76 #include "ihevcd_common_tables.h"
77 #include "ihevcd_iquant_itrans_recon_ctb.h"
78 #include "ihevcd_debug.h"
79 #include "ihevcd_profile.h"
80 #include "ihevcd_statistics.h"
81 #include "ihevcd_itrans_recon_dc.h"
82
83 static const UWORD32 gau4_ihevcd_4_bit_reverse[] = { 0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15 };
84
85
86 /* Globals */
87 static const WORD32 g_i4_ip_funcs[MAX_NUM_IP_MODES] =
88 { IP_FUNC_MODE_0, /* Mode 0 */
89 IP_FUNC_MODE_1, /* Mode 1 */
90 IP_FUNC_MODE_2, /* Mode 2 */
91 IP_FUNC_MODE_3TO9, /* Mode 3 */
92 IP_FUNC_MODE_3TO9, /* Mode 4 */
93 IP_FUNC_MODE_3TO9, /* Mode 5 */
94 IP_FUNC_MODE_3TO9, /* Mode 6 */
95 IP_FUNC_MODE_3TO9, /* Mode 7 */
96 IP_FUNC_MODE_3TO9, /* Mode 8 */
97 IP_FUNC_MODE_3TO9, /* Mode 9 */
98 IP_FUNC_MODE_10, /* Mode 10 */
99 IP_FUNC_MODE_11TO17, /* Mode 11 */
100 IP_FUNC_MODE_11TO17, /* Mode 12 */
101 IP_FUNC_MODE_11TO17, /* Mode 13 */
102 IP_FUNC_MODE_11TO17, /* Mode 14 */
103 IP_FUNC_MODE_11TO17, /* Mode 15 */
104 IP_FUNC_MODE_11TO17, /* Mode 16 */
105 IP_FUNC_MODE_11TO17, /* Mode 17 */
106 IP_FUNC_MODE_18_34, /* Mode 18 */
107 IP_FUNC_MODE_19TO25, /* Mode 19 */
108 IP_FUNC_MODE_19TO25, /* Mode 20 */
109 IP_FUNC_MODE_19TO25, /* Mode 21 */
110 IP_FUNC_MODE_19TO25, /* Mode 22 */
111 IP_FUNC_MODE_19TO25, /* Mode 23 */
112 IP_FUNC_MODE_19TO25, /* Mode 24 */
113 IP_FUNC_MODE_19TO25, /* Mode 25 */
114 IP_FUNC_MODE_26, /* Mode 26 */
115 IP_FUNC_MODE_27TO33, /* Mode 27 */
116 IP_FUNC_MODE_27TO33, /* Mode 26 */
117 IP_FUNC_MODE_27TO33, /* Mode 29 */
118 IP_FUNC_MODE_27TO33, /* Mode 30 */
119 IP_FUNC_MODE_27TO33, /* Mode 31 */
120 IP_FUNC_MODE_27TO33, /* Mode 32 */
121 IP_FUNC_MODE_27TO33, /* Mode 33 */
122 IP_FUNC_MODE_18_34, /* Mode 34 */
123 };
124
125
126 const WORD16 *g_ai2_ihevc_trans_tables[] =
127 { &g_ai2_ihevc_trans_dst_4[0][0],
128 &g_ai2_ihevc_trans_4[0][0],
129 &g_ai2_ihevc_trans_8[0][0],
130 &g_ai2_ihevc_trans_16[0][0],
131 &g_ai2_ihevc_trans_32[0][0]
132 };
133
134
135 /*****************************************************************************/
136 /* Function Prototypes */
137 /*****************************************************************************/
138 /* Returns number of ai2_level read from ps_sblk_coeff */
ihevcd_unpack_coeffs(WORD16 * pi2_tu_coeff,WORD32 log2_trans_size,UWORD8 * pu1_tu_coeff_data,WORD16 * pi2_dequant_matrix,WORD32 qp_rem,WORD32 qp_div,TRANSFORM_TYPE e_trans_type,WORD32 trans_quant_bypass,UWORD32 * pu4_zero_cols,UWORD32 * pu4_zero_rows,UWORD32 * pu4_coeff_type,WORD16 * pi2_coeff_value)139 UWORD8* ihevcd_unpack_coeffs(WORD16 *pi2_tu_coeff,
140 WORD32 log2_trans_size,
141 UWORD8 *pu1_tu_coeff_data,
142 WORD16 *pi2_dequant_matrix,
143 WORD32 qp_rem,
144 WORD32 qp_div,
145 TRANSFORM_TYPE e_trans_type,
146 WORD32 trans_quant_bypass,
147 UWORD32 *pu4_zero_cols,
148 UWORD32 *pu4_zero_rows,
149 UWORD32 *pu4_coeff_type,
150 WORD16 *pi2_coeff_value)
151 {
152 /* Generating coeffs from coeff-map */
153 WORD32 i;
154 WORD16 *pi2_sblk_ptr;
155 WORD32 subblk_pos_x, subblk_pos_y;
156 WORD32 sblk_scan_idx, coeff_raster_idx;
157 WORD32 sblk_non_zero_coeff_idx;
158 tu_sblk_coeff_data_t *ps_tu_sblk_coeff_data;
159 UWORD8 u1_num_coded_sblks, u1_scan_type;
160 UWORD8 *pu1_new_tu_coeff_data;
161 WORD32 trans_size;
162 WORD32 xs, ys;
163 WORD32 trans_skip;
164 WORD16 iquant_out;
165 WORD32 shift_iq;
166 {
167 WORD32 bit_depth;
168
169 bit_depth = 8 + 0;
170 shift_iq = bit_depth + log2_trans_size - 5;
171 }
172 trans_size = (1 << log2_trans_size);
173
174 /* First byte points to number of coded blocks */
175 u1_num_coded_sblks = *pu1_tu_coeff_data++;
176
177 /* Next byte points to scan type */
178 u1_scan_type = *pu1_tu_coeff_data++;
179 /* 0th bit has trans_skip */
180 trans_skip = u1_scan_type & 1;
181 u1_scan_type >>= 1;
182
183 pi2_sblk_ptr = pi2_tu_coeff;
184
185 /* Initially all columns are assumed to be zero */
186 *pu4_zero_cols = 0xFFFFFFFF;
187 /* Initially all rows are assumed to be zero */
188 *pu4_zero_rows = 0xFFFFFFFF;
189
190 ps_tu_sblk_coeff_data = (tu_sblk_coeff_data_t *)(pu1_tu_coeff_data);
191
192 if(trans_skip)
193 memset(pi2_tu_coeff, 0, trans_size * trans_size * sizeof(WORD16));
194
195 STATS_INIT_SBLK_AND_COEFF_POS();
196
197 /* DC only case */
198 if((e_trans_type != DST_4x4) && (1 == u1_num_coded_sblks)
199 && (0 == ps_tu_sblk_coeff_data->u2_subblk_pos)
200 && (1 == ps_tu_sblk_coeff_data->u2_sig_coeff_map))
201 {
202 *pu4_coeff_type = 1;
203
204 if(!trans_quant_bypass)
205 {
206 if(4 == trans_size)
207 {
208 IQUANT_4x4(iquant_out,
209 ps_tu_sblk_coeff_data->ai2_level[0],
210 pi2_dequant_matrix[0]
211 * g_ihevc_iquant_scales[qp_rem],
212 shift_iq, qp_div);
213 }
214 else
215 {
216 IQUANT(iquant_out, ps_tu_sblk_coeff_data->ai2_level[0],
217 pi2_dequant_matrix[0] * g_ihevc_iquant_scales[qp_rem],
218 shift_iq, qp_div);
219 }
220 if(trans_skip)
221 iquant_out = (iquant_out + 16) >> 5;
222 }
223 else
224 {
225 /* setting the column to zero */
226 for(i = 0; i < trans_size; i++)
227 *(pi2_tu_coeff + i * trans_size) = 0;
228
229 iquant_out = ps_tu_sblk_coeff_data->ai2_level[0];
230 }
231 *pi2_coeff_value = iquant_out;
232 *pi2_tu_coeff = iquant_out;
233 *pu4_zero_cols &= ~0x1;
234 *pu4_zero_rows &= ~0x1;
235 ps_tu_sblk_coeff_data =
236 (void *)&ps_tu_sblk_coeff_data->ai2_level[1];
237
238 STATS_UPDATE_COEFF_COUNT();
239 STATS_LAST_SBLK_POS_UPDATE(e_trans_type, (trans_skip || trans_quant_bypass), 0, 0);
240 STATS_UPDATE_SBLK_AND_COEFF_HISTOGRAM(e_trans_type, (trans_quant_bypass || trans_skip));
241 return ((UWORD8 *)ps_tu_sblk_coeff_data);
242 }
243 else
244 {
245 *pu4_coeff_type = 0;
246 /* In case of trans skip, memset has already happened */
247 if(!trans_skip)
248 memset(pi2_tu_coeff, 0, trans_size * trans_size * sizeof(WORD16));
249 }
250
251 for(i = 0; i < u1_num_coded_sblks; i++)
252 {
253 UWORD32 u4_sig_coeff_map;
254 subblk_pos_x = ps_tu_sblk_coeff_data->u2_subblk_pos & 0x00FF;
255 subblk_pos_y = (ps_tu_sblk_coeff_data->u2_subblk_pos & 0xFF00) >> 8;
256
257 STATS_LAST_SBLK_POS_UPDATE(e_trans_type, (trans_skip || trans_quant_bypass), subblk_pos_x, subblk_pos_y);
258
259 subblk_pos_x = subblk_pos_x * MIN_TU_SIZE;
260 subblk_pos_y = subblk_pos_y * MIN_TU_SIZE;
261
262 pi2_sblk_ptr = pi2_tu_coeff + subblk_pos_y * trans_size
263 + subblk_pos_x;
264
265 //*pu4_zero_cols &= ~(0xF << subblk_pos_x);
266
267 sblk_non_zero_coeff_idx = 0;
268 u4_sig_coeff_map = ps_tu_sblk_coeff_data->u2_sig_coeff_map;
269 //for(sblk_scan_idx = (31 - CLZ(u4_sig_coeff_map)); sblk_scan_idx >= 0; sblk_scan_idx--)
270 sblk_scan_idx = 31;
271 do
272 {
273 WORD32 clz = CLZ(u4_sig_coeff_map);
274
275 sblk_scan_idx -= clz;
276 /* when clz is 31, u4_sig_coeff_map << (clz+1) might result in unknown behaviour in some cases */
277 /* Hence either use SHL which takes care of handling these issues based on platform or shift in two stages */
278 u4_sig_coeff_map = u4_sig_coeff_map << clz;
279 /* Copying coeffs and storing in reverse order */
280 {
281 STATS_UPDATE_COEFF_COUNT();
282 coeff_raster_idx =
283 gau1_ihevc_invscan4x4[u1_scan_type][sblk_scan_idx];
284
285 xs = coeff_raster_idx & 0x3;
286 ys = coeff_raster_idx >> 2;
287
288 if(!trans_quant_bypass)
289 {
290 if(4 == trans_size)
291 {
292 IQUANT_4x4(iquant_out,
293 ps_tu_sblk_coeff_data->ai2_level[sblk_non_zero_coeff_idx],
294 pi2_dequant_matrix[(subblk_pos_x + xs)
295 + (subblk_pos_y + ys)
296 * trans_size]
297 * g_ihevc_iquant_scales[qp_rem],
298 shift_iq, qp_div);
299 sblk_non_zero_coeff_idx++;
300 }
301 else
302 {
303 IQUANT(iquant_out,
304 ps_tu_sblk_coeff_data->ai2_level[sblk_non_zero_coeff_idx],
305 pi2_dequant_matrix[(subblk_pos_x + xs)
306 + (subblk_pos_y + ys)
307 * trans_size]
308 * g_ihevc_iquant_scales[qp_rem],
309 shift_iq, qp_div);
310 sblk_non_zero_coeff_idx++;
311 }
312
313 if(trans_skip)
314 iquant_out = (iquant_out + 16) >> 5;
315 }
316 else
317 {
318 iquant_out = ps_tu_sblk_coeff_data->ai2_level[sblk_non_zero_coeff_idx++];
319 }
320 *pu4_zero_cols &= ~(0x1 << (subblk_pos_x + xs));
321 *pu4_zero_rows &= ~(0x1 << (subblk_pos_y + ys));
322 *(pi2_sblk_ptr + xs + ys * trans_size) = iquant_out;
323 }
324 sblk_scan_idx--;
325 u4_sig_coeff_map <<= 1;
326
327 }while(u4_sig_coeff_map);
328 /* Updating the sblk pointer */
329 ps_tu_sblk_coeff_data =
330 (void *)&ps_tu_sblk_coeff_data->ai2_level[sblk_non_zero_coeff_idx];
331 }
332
333 STATS_UPDATE_SBLK_AND_COEFF_HISTOGRAM(e_trans_type, (trans_quant_bypass || trans_skip));
334
335 pu1_new_tu_coeff_data = (UWORD8 *)ps_tu_sblk_coeff_data;
336
337 return pu1_new_tu_coeff_data;
338 }
339
ihevcd_get_intra_nbr_flag(process_ctxt_t * ps_proc,tu_t * ps_tu,UWORD32 * pu4_intra_nbr_avail,WORD16 i2_pic_width_in_luma_samples,UWORD8 i1_constrained_intra_pred_flag,WORD32 trans_size,WORD32 ctb_size)340 WORD32 ihevcd_get_intra_nbr_flag(process_ctxt_t *ps_proc,
341 tu_t *ps_tu,
342 UWORD32 *pu4_intra_nbr_avail,
343 WORD16 i2_pic_width_in_luma_samples,
344 UWORD8 i1_constrained_intra_pred_flag,
345 WORD32 trans_size,
346 WORD32 ctb_size)
347 {
348 sps_t *ps_sps;
349 UWORD8 u1_bot_lt_avail, u1_left_avail, u1_top_avail, u1_top_rt_avail,
350 u1_top_lt_avail;
351 WORD32 x_cur, y_cur, x_nbr, y_nbr;
352 UWORD8 *pu1_nbr_intra_flag;
353 UWORD8 *pu1_pic_intra_flag;
354 UWORD8 top_right, top, top_left, left, bot_left;
355 WORD32 intra_pos;
356 WORD32 num_8_blks, num_8_blks_in_bits;
357 WORD32 numbytes_row = (i2_pic_width_in_luma_samples + 63) / 64;
358 WORD32 cur_x, cur_y;
359 WORD32 i;
360 WORD32 nbr_flags;
361
362 ps_sps = ps_proc->ps_sps;
363 cur_x = ps_tu->b4_pos_x;
364 cur_y = ps_tu->b4_pos_y;
365
366 u1_bot_lt_avail = (pu4_intra_nbr_avail[1 + cur_y + trans_size / MIN_TU_SIZE]
367 >> (31 - (1 + cur_x - 1))) & 1;
368 u1_left_avail = (pu4_intra_nbr_avail[1 + cur_y] >> (31 - (1 + cur_x - 1)))
369 & 1;
370 u1_top_avail = (pu4_intra_nbr_avail[1 + cur_y - 1] >> (31 - (1 + cur_x)))
371 & 1;
372 u1_top_rt_avail = (pu4_intra_nbr_avail[1 + cur_y - 1]
373 >> (31 - (1 + cur_x + trans_size / MIN_TU_SIZE))) & 1;
374 u1_top_lt_avail = (pu4_intra_nbr_avail[1 + cur_y - 1]
375 >> (31 - (1 + cur_x - 1))) & 1;
376
377 x_cur = ps_proc->i4_ctb_x * ctb_size + cur_x * MIN_TU_SIZE;
378 y_cur = ps_proc->i4_ctb_y * ctb_size + cur_y * MIN_TU_SIZE;
379
380 pu1_pic_intra_flag = ps_proc->pu1_pic_intra_flag;
381
382 /* WORD32 nbr_flags as below MSB --> LSB */
383 /* Top-Left | Top-Right | Top | Left | Bottom-Left
384 * 1 4 4 4 4
385 */
386 bot_left = 0;
387 left = 0;
388 top_right = 0;
389 top = 0;
390 top_left = 0;
391
392 num_8_blks = trans_size > 4 ? trans_size / 8 : 1;
393 num_8_blks_in_bits = ((1 << num_8_blks) - 1);
394
395 if(i1_constrained_intra_pred_flag)
396 {
397 /* TODO: constrained intra pred not tested */
398 if(u1_bot_lt_avail)
399 {
400 x_nbr = x_cur - 1;
401 y_nbr = y_cur + trans_size;
402
403 pu1_nbr_intra_flag = pu1_pic_intra_flag + y_nbr / 8 * numbytes_row
404 + x_nbr / 64;
405 intra_pos = ((x_nbr / 8) % 8);
406 for(i = 0; i < num_8_blks; i++)
407 {
408 bot_left |= ((*(pu1_nbr_intra_flag + i * numbytes_row)
409 >> intra_pos) & 1) << i;
410 }
411 bot_left &= num_8_blks_in_bits;
412 }
413 if(u1_left_avail)
414 {
415 x_nbr = x_cur - 1;
416 y_nbr = y_cur;
417
418 pu1_nbr_intra_flag = pu1_pic_intra_flag + y_nbr / 8 * numbytes_row
419 + x_nbr / 64;
420 intra_pos = ((x_nbr / 8) % 8);
421
422 for(i = 0; i < num_8_blks; i++)
423 {
424 left |= ((*(pu1_nbr_intra_flag + i * numbytes_row) >> intra_pos)
425 & 1) << i;
426 }
427 left &= num_8_blks_in_bits;
428 }
429 if(u1_top_avail)
430 {
431 x_nbr = x_cur;
432 y_nbr = y_cur - 1;
433
434 pu1_nbr_intra_flag = pu1_pic_intra_flag + y_nbr / 8 * numbytes_row
435 + x_nbr / 64;
436 intra_pos = ((x_nbr / 8) % 8);
437
438 top = (*pu1_nbr_intra_flag >> intra_pos);
439 top &= num_8_blks_in_bits;
440 /*
441 for(i=0;i<num_8_blks;i++)
442 {
443 top |= ( (*pu1_nbr_intra_flag >> (intra_pos+i)) & 1) << i;
444 }
445 */
446 }
447 if(u1_top_rt_avail)
448 {
449 x_nbr = x_cur + trans_size;
450 y_nbr = y_cur - 1;
451
452 pu1_nbr_intra_flag = pu1_pic_intra_flag + y_nbr / 8 * numbytes_row
453 + x_nbr / 64;
454 intra_pos = ((x_nbr / 8) % 8);
455
456 top_right = (*pu1_nbr_intra_flag >> intra_pos);
457 top_right &= num_8_blks_in_bits;
458 /*
459 for(i=0;i<num_8_blks;i++)
460 {
461 top_right |= ( (*pu1_nbr_intra_flag >> (intra_pos+i)) & 1) << i;
462 }
463 */
464 }
465 if(u1_top_lt_avail)
466 {
467 x_nbr = x_cur - 1;
468 y_nbr = y_cur - 1;
469
470 pu1_nbr_intra_flag = pu1_pic_intra_flag + y_nbr / 8 * numbytes_row
471 + x_nbr / 64;
472 intra_pos = ((x_nbr / 8) % 8);
473
474 top_left = (*pu1_nbr_intra_flag >> intra_pos) & 1;
475 }
476 }
477 else
478 {
479 if(u1_top_avail)
480 top = 0xF;
481 if(u1_top_rt_avail)
482 top_right = 0xF;
483 if(u1_bot_lt_avail)
484 bot_left = 0xF;
485 if(u1_left_avail)
486 left = 0xF;
487 if(u1_top_lt_avail)
488 top_left = 0x1;
489 }
490
491 /* Handling incomplete CTBs */
492 {
493 WORD32 pu_size_limit = MIN(trans_size, 8);
494 WORD32 cols_remaining = ps_sps->i2_pic_width_in_luma_samples
495 - (ps_proc->i4_ctb_x << ps_sps->i1_log2_ctb_size)
496 - (ps_tu->b4_pos_x * MIN_TU_SIZE)
497 - (1 << (ps_tu->b3_size + 2));
498 /* ctb_size_top gives number of valid pixels remaining in the current row */
499 WORD32 ctb_size_top = MIN(ctb_size, cols_remaining);
500 WORD32 ctb_size_top_bits = (1 << (ctb_size_top / pu_size_limit)) - 1;
501
502 WORD32 rows_remaining = ps_sps->i2_pic_height_in_luma_samples
503 - (ps_proc->i4_ctb_y << ps_sps->i1_log2_ctb_size)
504 - (ps_tu->b4_pos_y * MIN_TU_SIZE)
505 - (1 << (ps_tu->b3_size + 2));
506 /* ctb_size_bot gives number of valid pixels remaining in the current column */
507 WORD32 ctb_size_bot = MIN(ctb_size, rows_remaining);
508 WORD32 ctb_size_bot_bits = (1 << (ctb_size_bot / pu_size_limit)) - 1;
509
510 top_right &= ctb_size_top_bits;
511 bot_left &= ctb_size_bot_bits;
512 }
513
514 /* Top-Left | Top-Right | Top | Left | Bottom-Left
515 * 1 4 4 4 4
516 */
517
518 /*
519 nbr_flags = (top_left << 16) | (gau4_ihevcd_4_bit_reverse[top_right] << 12) | (gau4_ihevcd_4_bit_reverse[top] << 8) | (gau4_ihevcd_4_bit_reverse[left] << 4)
520 | gau4_ihevcd_4_bit_reverse[bot_left];
521 */
522 nbr_flags = (top_left << 16) | (top_right << 12) | (top << 8) | (gau4_ihevcd_4_bit_reverse[left] << 4)
523 | gau4_ihevcd_4_bit_reverse[bot_left];
524
525
526 return nbr_flags;
527
528 }
529
ihevcd_iquant_itrans_recon_ctb(process_ctxt_t * ps_proc)530 WORD32 ihevcd_iquant_itrans_recon_ctb(process_ctxt_t *ps_proc)
531 {
532 WORD16 *pi2_scaling_mat;
533 UWORD8 *pu1_y_dst_ctb;
534 UWORD8 *pu1_uv_dst_ctb;
535 WORD32 ctb_size;
536 codec_t *ps_codec;
537 slice_header_t *ps_slice_hdr;
538 tu_t *ps_tu;
539 WORD16 *pi2_ctb_coeff;
540 WORD32 tu_cnt;
541 WORD16 *pi2_tu_coeff;
542 WORD16 *pi2_tmp;
543 WORD32 pic_strd;
544 WORD32 luma_nbr_flags;
545 WORD32 chroma_nbr_flags = 0;
546 UWORD8 u1_luma_pred_mode_first_tu = 0;
547 /* Pointers for generating 2d coeffs from coeff-map */
548 UWORD8 *pu1_tu_coeff_data;
549 /* nbr avail map for CTB */
550 /* 1st bit points to neighbor (left/top_left/bot_left) */
551 /* 1Tb starts at 2nd bit from msb of 2nd value in array, followed by number of min_tu's in that ctb */
552 UWORD32 au4_intra_nbr_avail[MAX_CTB_SIZE / MIN_TU_SIZE
553 + 2 /* Top nbr + bot nbr */]; UWORD32
554 top_avail_bits;
555 sps_t *ps_sps;
556 pps_t *ps_pps;
557 WORD32 intra_flag;
558 UWORD8 *pu1_pic_intra_flag;
559 /*************************************************************************/
560 /* Contanis scaling matrix offset in the following order in a 1D buffer */
561 /* Intra 4 x 4 Y, 4 x 4 U, 4 x 4 V */
562 /* Inter 4 x 4 Y, 4 x 4 U, 4 x 4 V */
563 /* Intra 8 x 8 Y, 8 x 8 U, 8 x 8 V */
564 /* Inter 8 x 8 Y, 8 x 8 U, 8 x 8 V */
565 /* Intra 16x16 Y, 16x16 U, 16x16 V */
566 /* Inter 16x16 Y, 16x16 U, 16x16 V */
567 /* Intra 32x32 Y */
568 /* Inter 32x32 Y */
569 /*************************************************************************/
570 /* Only first 20 entries are used. Array is extended to avoid out of bound
571 reads. Skip CUs (64x64) read this table, but don't really use the value */
572 static const WORD32 scaling_mat_offset[] =
573 { 0, 16, 32, 48, 64, 80, 96, 160, 224, 288, 352, 416, 480, 736, 992,
574 1248, 1504, 1760, 2016, 3040, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
575
576 PROFILE_DISABLE_IQ_IT_RECON_INTRA_PRED();
577
578 ps_sps = ps_proc->ps_sps;
579 ps_pps = ps_proc->ps_pps;
580 ps_slice_hdr = ps_proc->ps_slice_hdr;
581 ps_codec = ps_proc->ps_codec;
582
583 pu1_y_dst_ctb = ps_proc->pu1_cur_ctb_luma;
584 pu1_uv_dst_ctb = ps_proc->pu1_cur_ctb_chroma;
585
586 pi2_ctb_coeff = ps_proc->pi2_invscan_out;
587
588 ctb_size = (1 << ps_sps->i1_log2_ctb_size);
589 pu1_tu_coeff_data = (UWORD8 *)ps_proc->pv_tu_coeff_data;
590
591 pic_strd = ps_codec->i4_strd;
592
593 pi2_tmp = ps_proc->pi2_itrans_intrmd_buf;
594
595 pi2_tu_coeff = pi2_ctb_coeff;
596
597 ps_tu = ps_proc->ps_tu;
598
599 if((1 == ps_sps->i1_scaling_list_enable_flag) && (1 == ps_pps->i1_pps_scaling_list_data_present_flag))
600 {
601 pi2_scaling_mat = ps_pps->pi2_scaling_mat;
602 }
603 else
604 {
605 pi2_scaling_mat = ps_sps->pi2_scaling_mat;
606 }
607
608 {
609 /* Updating the initial availability map */
610 WORD32 i;
611 UWORD8 u1_left_ctb_avail, u1_top_lt_ctb_avail, u1_top_rt_ctb_avail,
612 u1_top_ctb_avail;
613
614 u1_left_ctb_avail = ps_proc->u1_left_ctb_avail;
615 u1_top_lt_ctb_avail = ps_proc->u1_top_lt_ctb_avail;
616 u1_top_ctb_avail = ps_proc->u1_top_ctb_avail;
617 u1_top_rt_ctb_avail = ps_proc->u1_top_rt_ctb_avail;
618
619 /* Initializing the availability array */
620 memset(au4_intra_nbr_avail, 0,
621 (MAX_CTB_SIZE / MIN_TU_SIZE + 2) * sizeof(UWORD32));
622 /* Initializing the availability array with CTB level availability flags */
623 {
624 WORD32 rows_remaining = ps_sps->i2_pic_height_in_luma_samples - (ps_proc->i4_ctb_y << ps_sps->i1_log2_ctb_size);
625 WORD32 ctb_size_left = MIN(ctb_size, rows_remaining);
626 for(i = 0; i < ctb_size_left / MIN_TU_SIZE; i++)
627 {
628 au4_intra_nbr_avail[i + 1] = ((UWORD32)u1_left_ctb_avail << 31);
629 }
630 }
631 au4_intra_nbr_avail[0] |= (((UWORD32)u1_top_rt_ctb_avail << 31)
632 >> (1 + ctb_size / MIN_TU_SIZE)); /* 1+ctb_size/4 position bit pos from msb */
633
634 au4_intra_nbr_avail[0] |= ((UWORD32)u1_top_lt_ctb_avail << 31);
635
636 {
637 WORD32 cols_remaining = ps_sps->i2_pic_width_in_luma_samples - (ps_proc->i4_ctb_x << ps_sps->i1_log2_ctb_size);
638 WORD32 ctb_size_top = MIN(ctb_size, cols_remaining);
639 WORD32 shift = (31 - (ctb_size / MIN_TU_SIZE));
640
641 /* ctb_size_top gives number of valid pixels remaining in the current row */
642 /* Since we need pattern of 1's starting from the MSB, an additional shift */
643 /* is needed */
644 shift += ((ctb_size - ctb_size_top) / MIN_TU_SIZE);
645
646 top_avail_bits = ((1 << (ctb_size_top / MIN_TU_SIZE)) - 1)
647 << shift;
648 }
649 au4_intra_nbr_avail[0] |= (
650 (u1_top_ctb_avail == 1) ? top_avail_bits : 0x0);
651 /* Starting from msb 2nd bit to (1+ctb_size/4) bit, set 1 if top avail,or 0 */
652
653 }
654
655 /* Applying Inverse transform on all the TU's in CTB */
656 for(tu_cnt = 0; tu_cnt < ps_proc->i4_ctb_tu_cnt; tu_cnt++, ps_tu++)
657 {
658 WORD32 transform_skip_flag = 0;
659 WORD32 transform_skip_flag_v = 0;
660 WORD32 num_comp, c_idx, func_idx;
661 WORD32 src_strd, pred_strd, dst_strd;
662 WORD32 qp_div = 0, qp_rem = 0;
663 WORD32 qp_div_v = 0, qp_rem_v = 0;
664 UWORD32 zero_cols = 0, zero_cols_v = 0;
665 UWORD32 zero_rows = 0, zero_rows_v = 0;
666 UWORD32 coeff_type = 0, coeff_type_v = 0;
667 WORD16 i2_coeff_value, i2_coeff_value_v;
668 WORD32 trans_size = 0;
669 TRANSFORM_TYPE e_trans_type;
670 WORD32 log2_y_trans_size_minus_2, log2_uv_trans_size_minus_2;
671 WORD32 log2_trans_size;
672 WORD32 chroma_qp_idx;
673 WORD16 *pi2_src = NULL, *pi2_src_v = NULL;
674 UWORD8 *pu1_pred = NULL, *pu1_pred_v = NULL;
675 UWORD8 *pu1_dst = NULL, *pu1_dst_v = NULL;
676 WORD16 *pi2_dequant_matrix = NULL, *pi2_dequant_matrix_v = NULL;
677 WORD32 tu_x, tu_y;
678 WORD32 tu_y_offset, tu_uv_offset;
679 WORD8 i1_chroma_pic_qp_offset, i1_chroma_slice_qp_offset;
680 UWORD8 u1_cbf = 0, u1_cbf_v = 0, u1_luma_pred_mode, u1_chroma_pred_mode;
681 WORD32 luma_nbr_flags_4x4[4];
682 WORD32 offset;
683 WORD32 pcm_flag;
684 WORD32 chroma_yuv420sp_vu = (ps_codec->e_ref_chroma_fmt == IV_YUV_420SP_VU);
685 /* If 420SP_VU is chroma format, pred and dst pointer */
686 /* will be added +1 to point to U */
687 WORD32 chroma_yuv420sp_vu_u_offset = 1 * chroma_yuv420sp_vu;
688 /* If 420SP_VU is chroma format, pred and dst pointer */
689 /* will be added U offset of +1 and subtracted 2 */
690 /* to point to V */
691 WORD32 chroma_yuv420sp_vu_v_offset = -2 * chroma_yuv420sp_vu;
692
693 tu_x = ps_tu->b4_pos_x * 4; /* Converting minTU unit to pixel unit */
694 tu_y = ps_tu->b4_pos_y * 4; /* Converting minTU unit to pixel unit */
695 {
696 WORD32 tu_abs_x = (ps_proc->i4_ctb_x << ps_sps->i1_log2_ctb_size) + (tu_x);
697 WORD32 tu_abs_y = (ps_proc->i4_ctb_y << ps_sps->i1_log2_ctb_size) + (tu_y);
698
699 WORD32 numbytes_row = (ps_sps->i2_pic_width_in_luma_samples + 63) / 64;
700
701 pu1_pic_intra_flag = ps_proc->pu1_pic_intra_flag;
702 pu1_pic_intra_flag += (tu_abs_y >> 3) * numbytes_row;
703 pu1_pic_intra_flag += (tu_abs_x >> 6);
704
705 intra_flag = *pu1_pic_intra_flag;
706 intra_flag &= (1 << ((tu_abs_x >> 3) % 8));
707 }
708
709 u1_luma_pred_mode = ps_tu->b6_luma_intra_mode;
710 u1_chroma_pred_mode = ps_tu->b3_chroma_intra_mode_idx;
711
712 if(u1_chroma_pred_mode != 7)
713 num_comp = 2; /* Y and UV */
714 else
715 num_comp = 1; /* Y */
716
717
718 pcm_flag = 0;
719
720 if((intra_flag) && (u1_luma_pred_mode == INTRA_PRED_NONE))
721 {
722 UWORD8 *pu1_buf;
723 UWORD8 *pu1_y_dst = pu1_y_dst_ctb;
724 UWORD8 *pu1_uv_dst = pu1_uv_dst_ctb;
725 WORD32 i, j;
726 tu_sblk_coeff_data_t *ps_tu_sblk_coeff_data;
727 WORD32 cb_size = 1 << (ps_tu->b3_size + 2);
728
729 /* trans_size is used to update availability after reconstruction */
730 trans_size = cb_size;
731
732 pcm_flag = 1;
733
734 tu_y_offset = tu_x + tu_y * pic_strd;
735 pu1_y_dst += tu_x + tu_y * pic_strd;
736 pu1_uv_dst += tu_x + (tu_y >> 1) * pic_strd;
737
738 /* First byte points to number of coded blocks */
739 pu1_tu_coeff_data++;
740
741 /* Next byte points to scan type */
742 pu1_tu_coeff_data++;
743
744 ps_tu_sblk_coeff_data = (tu_sblk_coeff_data_t *)pu1_tu_coeff_data;
745
746 pu1_buf = (UWORD8 *)&ps_tu_sblk_coeff_data->ai2_level[0];
747 {
748
749 for(i = 0; i < cb_size; i++)
750 {
751 //pu1_y_dst[i * pic_strd + j] = *pu1_buf++;
752 memcpy(&pu1_y_dst[i * pic_strd], pu1_buf, cb_size);
753 pu1_buf += cb_size;
754 }
755
756 pu1_uv_dst = pu1_uv_dst + chroma_yuv420sp_vu_u_offset;
757
758 /* U */
759 for(i = 0; i < cb_size / 2; i++)
760 {
761 for(j = 0; j < cb_size / 2; j++)
762 {
763 pu1_uv_dst[i * pic_strd + 2 * j] = *pu1_buf++;
764 }
765 }
766
767 pu1_uv_dst = pu1_uv_dst + 1 + chroma_yuv420sp_vu_v_offset;
768
769 /* V */
770 for(i = 0; i < cb_size / 2; i++)
771 {
772 for(j = 0; j < cb_size / 2; j++)
773 {
774 pu1_uv_dst[i * pic_strd + 2 * j] = *pu1_buf++;
775 }
776 }
777 }
778
779 pu1_tu_coeff_data = pu1_buf;
780
781 }
782
783
784
785
786
787 for(c_idx = 0; c_idx < num_comp; c_idx++)
788 {
789 if(0 == pcm_flag)
790 {
791 /* Initializing variables */
792 pred_strd = pic_strd;
793 dst_strd = pic_strd;
794
795 if(c_idx == 0) /* Y */
796 {
797 log2_y_trans_size_minus_2 = ps_tu->b3_size;
798 trans_size = 1 << (log2_y_trans_size_minus_2 + 2);
799 log2_trans_size = log2_y_trans_size_minus_2 + 2;
800
801 tu_y_offset = tu_x + tu_y * pic_strd;
802
803 pi2_src = pi2_tu_coeff;
804 pu1_pred = pu1_y_dst_ctb + tu_y_offset;
805 pu1_dst = pu1_y_dst_ctb + tu_y_offset;
806
807 /* Calculating scaling matrix offset */
808 offset = log2_y_trans_size_minus_2 * 6
809 + (!intra_flag)
810 * ((log2_y_trans_size_minus_2
811 == 3) ? 1 : 3)
812 + c_idx;
813 pi2_dequant_matrix = pi2_scaling_mat
814 + scaling_mat_offset[offset];
815
816 src_strd = trans_size;
817
818 /* 4x4 transform Luma in INTRA mode is DST */
819 if(log2_y_trans_size_minus_2 == 0 && intra_flag)
820 {
821 func_idx = log2_y_trans_size_minus_2;
822 e_trans_type = DST_4x4;
823 }
824 else
825 {
826 func_idx = log2_y_trans_size_minus_2 + 1;
827 e_trans_type = (TRANSFORM_TYPE)(log2_y_trans_size_minus_2 + 1);
828 }
829
830 qp_div = ps_tu->b7_qp / 6;
831 qp_rem = ps_tu->b7_qp % 6;
832
833 u1_cbf = ps_tu->b1_y_cbf;
834
835 transform_skip_flag = pu1_tu_coeff_data[1] & 1;
836 /* Unpacking coeffs */
837 if(1 == u1_cbf)
838 {
839 pu1_tu_coeff_data = ihevcd_unpack_coeffs(
840 pi2_src, log2_y_trans_size_minus_2 + 2,
841 pu1_tu_coeff_data, pi2_dequant_matrix,
842 qp_rem, qp_div, e_trans_type,
843 ps_tu->b1_transquant_bypass, &zero_cols,
844 &zero_rows, &coeff_type,
845 &i2_coeff_value);
846 }
847 }
848 else /* UV interleaved */
849 {
850 /* Chroma :If Transform size is 4x4, keep 4x4 else do transform on (trans_size/2 x trans_size/2) */
851 if(ps_tu->b3_size == 0)
852 {
853 /* Chroma 4x4 is present with 4th luma 4x4 block. For this case chroma postion has to be (luma pos x- 4,luma pos y- 4) */
854 log2_uv_trans_size_minus_2 = ps_tu->b3_size;
855 tu_uv_offset = (tu_x - 4) + ((tu_y - 4) / 2) * pic_strd;
856 }
857 else
858 {
859 log2_uv_trans_size_minus_2 = ps_tu->b3_size - 1;
860 tu_uv_offset = tu_x + (tu_y >> 1) * pic_strd;
861 }
862 trans_size = 1 << (log2_uv_trans_size_minus_2 + 2);
863 log2_trans_size = log2_uv_trans_size_minus_2 + 2;
864
865 pi2_src = pi2_tu_coeff;
866 pi2_src_v = pi2_tu_coeff + trans_size * trans_size;
867 pu1_pred = pu1_uv_dst_ctb + tu_uv_offset + chroma_yuv420sp_vu_u_offset; /* Pointing to start byte of U*/
868 pu1_pred_v = pu1_pred + 1 + chroma_yuv420sp_vu_v_offset; /* Pointing to start byte of V*/
869 pu1_dst = pu1_uv_dst_ctb + tu_uv_offset + chroma_yuv420sp_vu_u_offset; /* Pointing to start byte of U*/
870 pu1_dst_v = pu1_dst + 1 + chroma_yuv420sp_vu_v_offset; /* Pointing to start byte of V*/
871
872 /*TODO: Add support for choosing different tables for U and V,
873 * change this to a single array to handle flat/default/custom, intra/inter, luma/chroma and various sizes
874 */
875 /* Calculating scaling matrix offset */
876 /* ((log2_uv_trans_size_minus_2 == 3) ? 1:3) condition check is not needed, since
877 * max uv trans size is 16x16
878 */
879 offset = log2_uv_trans_size_minus_2 * 6
880 + (!intra_flag) * 3 + c_idx;
881 pi2_dequant_matrix = pi2_scaling_mat
882 + scaling_mat_offset[offset];
883 pi2_dequant_matrix_v = pi2_scaling_mat
884 + scaling_mat_offset[offset + 1];
885
886 src_strd = trans_size;
887
888 func_idx = 1 + 4 + log2_uv_trans_size_minus_2; /* DST func + Y funcs + cur func index*/
889
890 /* Handle error cases where 64x64 TU is signalled which results in 32x32 chroma.
891 * By limiting func_idx to 7, max of 16x16 chroma is called */
892 func_idx = MIN(func_idx, 7);
893
894 e_trans_type = (TRANSFORM_TYPE)(log2_uv_trans_size_minus_2 + 1);
895 /* QP for U */
896 i1_chroma_pic_qp_offset = ps_pps->i1_pic_cb_qp_offset;
897 i1_chroma_slice_qp_offset = ps_slice_hdr->i1_slice_cb_qp_offset;
898 u1_cbf = ps_tu->b1_cb_cbf;
899
900 chroma_qp_idx = ps_tu->b7_qp + i1_chroma_pic_qp_offset
901 + i1_chroma_slice_qp_offset;
902 chroma_qp_idx = CLIP3(chroma_qp_idx, 0, 57);
903 qp_div = gai2_ihevcd_chroma_qp[chroma_qp_idx] / 6;
904 qp_rem = gai2_ihevcd_chroma_qp[chroma_qp_idx] % 6;
905
906 /* QP for V */
907 i1_chroma_pic_qp_offset = ps_pps->i1_pic_cr_qp_offset;
908 i1_chroma_slice_qp_offset = ps_slice_hdr->i1_slice_cr_qp_offset;
909 u1_cbf_v = ps_tu->b1_cr_cbf;
910
911 chroma_qp_idx = ps_tu->b7_qp + i1_chroma_pic_qp_offset
912 + i1_chroma_slice_qp_offset;
913 chroma_qp_idx = CLIP3(chroma_qp_idx, 0, 57);
914 qp_div_v = gai2_ihevcd_chroma_qp[chroma_qp_idx] / 6;
915 qp_rem_v = gai2_ihevcd_chroma_qp[chroma_qp_idx] % 6;
916
917 /* Unpacking coeffs */
918 transform_skip_flag = pu1_tu_coeff_data[1] & 1;
919 if(1 == u1_cbf)
920 {
921 pu1_tu_coeff_data = ihevcd_unpack_coeffs(
922 pi2_src, log2_uv_trans_size_minus_2 + 2,
923 pu1_tu_coeff_data, pi2_dequant_matrix,
924 qp_rem, qp_div, e_trans_type,
925 ps_tu->b1_transquant_bypass, &zero_cols,
926 &zero_rows, &coeff_type,
927 &i2_coeff_value);
928 }
929
930 transform_skip_flag_v = pu1_tu_coeff_data[1] & 1;
931 if(1 == u1_cbf_v)
932 {
933 pu1_tu_coeff_data = ihevcd_unpack_coeffs(
934 pi2_src_v, log2_uv_trans_size_minus_2 + 2,
935 pu1_tu_coeff_data, pi2_dequant_matrix_v,
936 qp_rem_v, qp_div_v, e_trans_type,
937 ps_tu->b1_transquant_bypass, &zero_cols_v,
938 &zero_rows_v, &coeff_type_v, &i2_coeff_value_v);
939 }
940 }
941 /***************************************************************/
942 /****************** Intra Prediction **************************/
943 /***************************************************************/
944 if(intra_flag) /* Intra */
945 {
946 /* While (MAX_TU_SIZE * 2 * 2) + 1 is the actaul size needed,
947 au1_ref_sub_out size is kept as multiple of 8,
948 so that SIMD functions can load 64 bits */
949 UWORD8 au1_ref_sub_out[(MAX_TU_SIZE * 2 * 2) + 8];
950 UWORD8 *pu1_top_left, *pu1_top, *pu1_left;
951 WORD32 luma_pred_func_idx, chroma_pred_func_idx;
952
953 /* Get the neighbour availability flags */
954 /* Done for only Y */
955 if(c_idx == 0)
956 {
957 /* Get neighbor availability for Y only */
958 luma_nbr_flags = ihevcd_get_intra_nbr_flag(ps_proc,
959 ps_tu,
960 au4_intra_nbr_avail,
961 ps_sps->i2_pic_width_in_luma_samples,
962 ps_pps->i1_constrained_intra_pred_flag,
963 trans_size,
964 ctb_size);
965
966 if(trans_size == 4)
967 luma_nbr_flags_4x4[(ps_tu->b4_pos_x % 2) + (ps_tu->b4_pos_y % 2) * 2] = luma_nbr_flags;
968
969 if((ps_tu->b4_pos_x % 2 == 0) && (ps_tu->b4_pos_y % 2 == 0))
970 {
971 chroma_nbr_flags = luma_nbr_flags;
972 }
973
974 /* Initializing nbr pointers */
975 pu1_top = pu1_pred - pic_strd;
976 pu1_left = pu1_pred - 1;
977 pu1_top_left = pu1_pred - pic_strd - 1;
978
979 /* call reference array substitution */
980 if(luma_nbr_flags == 0x1ffff)
981 ps_codec->s_func_selector.ihevc_intra_pred_luma_ref_subst_all_avlble_fptr(
982 pu1_top_left,
983 pu1_top, pu1_left, pred_strd, trans_size, luma_nbr_flags, au1_ref_sub_out, 1);
984 else
985 ps_codec->s_func_selector.ihevc_intra_pred_luma_ref_substitution_fptr(
986 pu1_top_left,
987 pu1_top, pu1_left, pred_strd, trans_size, luma_nbr_flags, au1_ref_sub_out, 1);
988
989 /* call reference filtering */
990 ps_codec->s_func_selector.ihevc_intra_pred_ref_filtering_fptr(
991 au1_ref_sub_out, trans_size,
992 au1_ref_sub_out,
993 u1_luma_pred_mode, ps_sps->i1_strong_intra_smoothing_enable_flag);
994
995 /* use the look up to get the function idx */
996 luma_pred_func_idx = g_i4_ip_funcs[u1_luma_pred_mode];
997
998 /* call the intra prediction function */
999 ps_codec->apf_intra_pred_luma[luma_pred_func_idx](au1_ref_sub_out, 1, pu1_pred, pred_strd, trans_size, u1_luma_pred_mode);
1000 }
1001 else
1002 {
1003 /* In case of yuv420sp_vu, prediction happens as usual. */
1004 /* So point the pu1_pred pointer to original prediction pointer */
1005 UWORD8 *pu1_pred_orig = pu1_pred - chroma_yuv420sp_vu_u_offset;
1006
1007 /* Top-Left | Top-Right | Top | Left | Bottom-Left
1008 * 1 4 4 4 4
1009 *
1010 * Generating chroma_nbr_flags depending upon the transform size */
1011 if(ps_tu->b3_size == 0)
1012 {
1013 /* Take TL,T,L flags of First luma 4x4 block */
1014 chroma_nbr_flags = (luma_nbr_flags_4x4[0] & 0x10FF0);
1015 /* Take TR flags of Second luma 4x4 block */
1016 chroma_nbr_flags |= (luma_nbr_flags_4x4[1] & 0x0F000);
1017 /* Take BL flags of Third luma 4x4 block */
1018 chroma_nbr_flags |= (luma_nbr_flags_4x4[2] & 0x0000F);
1019 }
1020
1021 /* Initializing nbr pointers */
1022 pu1_top = pu1_pred_orig - pic_strd;
1023 pu1_left = pu1_pred_orig - 2;
1024 pu1_top_left = pu1_pred_orig - pic_strd - 2;
1025
1026 /* Chroma pred mode derivation from luma pred mode */
1027 {
1028 tu_t *ps_tu_tmp = ps_tu;
1029 while(!ps_tu_tmp->b1_first_tu_in_cu)
1030 {
1031 ps_tu_tmp--;
1032 }
1033 u1_luma_pred_mode_first_tu = ps_tu_tmp->b6_luma_intra_mode;
1034 }
1035 if(4 == u1_chroma_pred_mode)
1036 u1_chroma_pred_mode = u1_luma_pred_mode_first_tu;
1037 else
1038 {
1039 u1_chroma_pred_mode = gau1_intra_pred_chroma_modes[u1_chroma_pred_mode];
1040
1041 if(u1_chroma_pred_mode ==
1042 u1_luma_pred_mode_first_tu)
1043 {
1044 u1_chroma_pred_mode = INTRA_ANGULAR(34);
1045 }
1046 }
1047
1048 /* call the chroma reference array substitution */
1049 ps_codec->s_func_selector.ihevc_intra_pred_chroma_ref_substitution_fptr(
1050 pu1_top_left,
1051 pu1_top, pu1_left, pic_strd, trans_size, chroma_nbr_flags, au1_ref_sub_out, 1);
1052
1053 /* use the look up to get the function idx */
1054 chroma_pred_func_idx =
1055 g_i4_ip_funcs[u1_chroma_pred_mode];
1056
1057 /* call the intra prediction function */
1058 ps_codec->apf_intra_pred_chroma[chroma_pred_func_idx](au1_ref_sub_out, 1, pu1_pred_orig, pred_strd, trans_size, u1_chroma_pred_mode);
1059 }
1060 }
1061
1062 /* Updating number of transform types */
1063 STATS_UPDATE_ALL_TRANS(e_trans_type, c_idx);
1064
1065 /* IQ, IT and Recon for Y if c_idx == 0, and U if c_idx !=0 */
1066 if(1 == u1_cbf)
1067 {
1068 if(ps_tu->b1_transquant_bypass || transform_skip_flag)
1069 {
1070 /* Recon */
1071 ps_codec->apf_recon[func_idx](pi2_src, pu1_pred, pu1_dst,
1072 src_strd, pred_strd, dst_strd,
1073 zero_cols);
1074 }
1075 else
1076 {
1077
1078 /* Updating coded number of transform types(excluding trans skip and trans quant skip) */
1079 STATS_UPDATE_CODED_TRANS(e_trans_type, c_idx, 0);
1080
1081 /* iQuant , iTrans and Recon */
1082 if((0 == coeff_type))
1083 {
1084 ps_codec->apf_itrans_recon[func_idx](pi2_src, pi2_tmp,
1085 pu1_pred, pu1_dst,
1086 src_strd, pred_strd,
1087 dst_strd, zero_cols,
1088 zero_rows);
1089 }
1090 else /* DC only */
1091 {
1092 STATS_UPDATE_CODED_TRANS(e_trans_type, c_idx, 1);
1093 ps_codec->apf_itrans_recon_dc[c_idx](pu1_pred, pu1_dst,
1094 pred_strd, dst_strd,
1095 log2_trans_size,
1096 i2_coeff_value);
1097 }
1098 }
1099 }
1100 /* IQ, IT and Recon for V */
1101 if(c_idx != 0)
1102 {
1103 if(1 == u1_cbf_v)
1104 {
1105 if(ps_tu->b1_transquant_bypass || transform_skip_flag_v)
1106 {
1107 /* Recon */
1108 ps_codec->apf_recon[func_idx](pi2_src_v, pu1_pred_v,
1109 pu1_dst_v, src_strd,
1110 pred_strd, dst_strd,
1111 zero_cols_v);
1112 }
1113 else
1114 {
1115 /* Updating number of transform types */
1116 STATS_UPDATE_CODED_TRANS(e_trans_type, c_idx, 0);
1117
1118 /* iQuant , iTrans and Recon */
1119 if((0 == coeff_type_v))
1120 {
1121 ps_codec->apf_itrans_recon[func_idx](pi2_src_v,
1122 pi2_tmp,
1123 pu1_pred_v,
1124 pu1_dst_v,
1125 src_strd,
1126 pred_strd,
1127 dst_strd,
1128 zero_cols_v,
1129 zero_rows_v);
1130 }
1131 else /* DC only */
1132 {
1133 STATS_UPDATE_CODED_TRANS(e_trans_type, c_idx, 1);
1134 ps_codec->apf_itrans_recon_dc[c_idx](pu1_pred_v, pu1_dst_v,
1135 pred_strd, dst_strd,
1136 log2_trans_size,
1137 i2_coeff_value_v);
1138 }
1139 }
1140 }
1141 }
1142 }
1143
1144 /* Neighbor availability inside CTB */
1145 /* 1bit per 4x4. Indicates whether that 4x4 block has been reconstructed(avialable) */
1146 /* Used for neighbor availability in intra pred */
1147 if(c_idx == 0)
1148 {
1149 WORD32 i;
1150 WORD32 trans_in_min_tu;
1151 UWORD32 cur_tu_in_bits;
1152 UWORD32 cur_tu_avail_flag;
1153
1154 trans_in_min_tu = trans_size / MIN_TU_SIZE;
1155 cur_tu_in_bits = (1 << trans_in_min_tu) - 1;
1156 cur_tu_in_bits = cur_tu_in_bits << (32 - trans_in_min_tu);
1157
1158 cur_tu_avail_flag = cur_tu_in_bits >> (ps_tu->b4_pos_x + 1);
1159
1160 for(i = 0; i < trans_in_min_tu; i++)
1161 au4_intra_nbr_avail[1 + ps_tu->b4_pos_y + i] |=
1162 cur_tu_avail_flag;
1163 }
1164 }
1165 }
1166 ps_proc->pv_tu_coeff_data = pu1_tu_coeff_data;
1167
1168 return ps_proc->i4_ctb_tu_cnt;
1169 }
1170
1171