1 /******************************************************************************
2 *
3 * Copyright (C) 2015 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 /**
21 *******************************************************************************
22 * @file
23 * ih264_iquant_itrans_recon.c
24 *
25 * @brief
26 * Contains definition of functions for h264 inverse quantization inverse transformation and recon
27 *
28 * @author
29 * Ittiam
30 *
31 * @par List of Functions:
32 * - ih264_iquant_itrans_recon_4x4()
33 * - ih264_iquant_itrans_recon_8x8()
34 * - ih264_iquant_itrans_recon_4x4_dc()
35 * - ih264_iquant_itrans_recon_8x8_dc()
36 * - ih264_iquant_itrans_recon_chroma_4x4()
37 * -ih264_iquant_itrans_recon_chroma_4x4_dc()
38 *
39 * @remarks
40 *
41 *******************************************************************************
42 */
43
44 /*****************************************************************************/
45 /* File Includes */
46 /*****************************************************************************/
47
48 /* User include files */
49 #include "ih264_typedefs.h"
50 #include "ih264_defs.h"
51 #include "ih264_trans_macros.h"
52 #include "ih264_macros.h"
53 #include "ih264_platform_macros.h"
54 #include "ih264_trans_data.h"
55 #include "ih264_size_defs.h"
56 #include "ih264_structs.h"
57 #include "ih264_trans_quant_itrans_iquant.h"
58
59 /*
60 ********************************************************************************
61 *
62 * @brief This function reconstructs a 4x4 sub block from quantized resiude and
63 * prediction buffer
64 *
65 * @par Description:
66 * The quantized residue is first inverse quantized, then inverse transformed.
67 * This inverse transformed content is added to the prediction buffer to recon-
68 * struct the end output
69 *
70 * @param[in] pi2_src
71 * quantized 4x4 block
72 *
73 * @param[in] pu1_pred
74 * prediction 4x4 block
75 *
76 * @param[out] pu1_out
77 * reconstructed 4x4 block
78 *
79 * @param[in] src_strd
80 * quantization buffer stride
81 *
82 * @param[in] pred_strd,
83 * Prediction buffer stride
84 *
85 * @param[in] out_strd
86 * recon buffer Stride
87 *
88 * @param[in] pu2_scaling_list
89 * pointer to scaling list
90 *
91 * @param[in] pu2_norm_adjust
92 * pointer to inverse scale matrix
93 *
94 * @param[in] u4_qp_div_6
95 * Floor (qp/6)
96 *
97 * @param[in] pi4_tmp
98 * temporary buffer of size 1*16
99 *
100 * @returns none
101 *
102 * @remarks none
103 *
104 *******************************************************************************
105 */
ih264_iquant_itrans_recon_4x4(WORD16 * pi2_src,UWORD8 * pu1_pred,UWORD8 * pu1_out,WORD32 pred_strd,WORD32 out_strd,const UWORD16 * pu2_iscal_mat,const UWORD16 * pu2_weigh_mat,UWORD32 u4_qp_div_6,WORD16 * pi2_tmp,WORD32 iq_start_idx,WORD16 * pi2_dc_ld_addr)106 void ih264_iquant_itrans_recon_4x4(WORD16 *pi2_src,
107 UWORD8 *pu1_pred,
108 UWORD8 *pu1_out,
109 WORD32 pred_strd,
110 WORD32 out_strd,
111 const UWORD16 *pu2_iscal_mat,
112 const UWORD16 *pu2_weigh_mat,
113 UWORD32 u4_qp_div_6,
114 WORD16 *pi2_tmp,
115 WORD32 iq_start_idx,
116 WORD16 *pi2_dc_ld_addr
117 )
118 {
119 WORD16 *pi2_src_ptr = pi2_src;
120 WORD16 *pi2_tmp_ptr = pi2_tmp;
121 UWORD8 *pu1_pred_ptr = pu1_pred;
122 UWORD8 *pu1_out_ptr = pu1_out;
123 WORD16 x0, x1, x2, x3, i;
124 WORD32 q0, q1, q2, q3;
125 WORD16 i_macro;
126 WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
127
128 /* inverse quant */
129 /*horizontal inverse transform */
130 for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
131 {
132 q0 = pi2_src_ptr[0];
133 INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact,
134 4);
135 if (i==0 && iq_start_idx == 1)
136 q0 = pi2_dc_ld_addr[0]; // Restoring dc value for intra case
137
138 q2 = pi2_src_ptr[2];
139 INV_QUANT(q2, pu2_iscal_mat[2], pu2_weigh_mat[2], u4_qp_div_6, rnd_fact,
140 4);
141
142 x0 = q0 + q2;
143 x1 = q0 - q2;
144
145 q1 = pi2_src_ptr[1];
146 INV_QUANT(q1, pu2_iscal_mat[1], pu2_weigh_mat[1], u4_qp_div_6, rnd_fact,
147 4);
148
149 q3 = pi2_src_ptr[3];
150 INV_QUANT(q3, pu2_iscal_mat[3], pu2_weigh_mat[3], u4_qp_div_6, rnd_fact,
151 4);
152
153 x2 = (q1 >> 1) - q3;
154 x3 = q1 + (q3 >> 1);
155
156 pi2_tmp_ptr[0] = x0 + x3;
157 pi2_tmp_ptr[1] = x1 + x2;
158 pi2_tmp_ptr[2] = x1 - x2;
159 pi2_tmp_ptr[3] = x0 - x3;
160
161 pi2_src_ptr += SUB_BLK_WIDTH_4x4;
162 pi2_tmp_ptr += SUB_BLK_WIDTH_4x4;
163 pu2_iscal_mat += SUB_BLK_WIDTH_4x4;
164 pu2_weigh_mat += SUB_BLK_WIDTH_4x4;
165 }
166
167 /* vertical inverse transform */
168 pi2_tmp_ptr = pi2_tmp;
169 for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
170 {
171 pu1_pred_ptr = pu1_pred;
172 pu1_out = pu1_out_ptr;
173
174 x0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[8]);
175 x1 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[8]);
176 x2 = (pi2_tmp_ptr[4] >> 1) - pi2_tmp_ptr[12];
177 x3 = pi2_tmp_ptr[4] + (pi2_tmp_ptr[12] >> 1);
178
179 /* inverse prediction */
180 i_macro = x0 + x3;
181 i_macro = ((i_macro + 32) >> 6);
182 i_macro += *pu1_pred_ptr;
183 *pu1_out = CLIP_U8(i_macro);
184 pu1_pred_ptr += pred_strd;
185 pu1_out += out_strd;
186
187 i_macro = x1 + x2;
188 i_macro = ((i_macro + 32) >> 6);
189 i_macro += *pu1_pred_ptr;
190 *pu1_out = CLIP_U8(i_macro);
191 pu1_pred_ptr += pred_strd;
192 pu1_out += out_strd;
193
194 i_macro = x1 - x2;
195 i_macro = ((i_macro + 32) >> 6);
196 i_macro += *pu1_pred_ptr;
197 *pu1_out = CLIP_U8(i_macro);
198 pu1_pred_ptr += pred_strd;
199 pu1_out += out_strd;
200
201 i_macro = x0 - x3;
202 i_macro = ((i_macro + 32) >> 6);
203 i_macro += *pu1_pred_ptr;
204 *pu1_out = CLIP_U8(i_macro);
205
206 pi2_tmp_ptr++;
207 pu1_out_ptr++;
208 pu1_pred++;
209 }
210
211 }
212
ih264_iquant_itrans_recon_4x4_dc(WORD16 * pi2_src,UWORD8 * pu1_pred,UWORD8 * pu1_out,WORD32 pred_strd,WORD32 out_strd,const UWORD16 * pu2_iscal_mat,const UWORD16 * pu2_weigh_mat,UWORD32 u4_qp_div_6,WORD16 * pi2_tmp,WORD32 iq_start_idx,WORD16 * pi2_dc_ld_addr)213 void ih264_iquant_itrans_recon_4x4_dc(WORD16 *pi2_src,
214 UWORD8 *pu1_pred,
215 UWORD8 *pu1_out,
216 WORD32 pred_strd,
217 WORD32 out_strd,
218 const UWORD16 *pu2_iscal_mat,
219 const UWORD16 *pu2_weigh_mat,
220 UWORD32 u4_qp_div_6,
221 WORD16 *pi2_tmp,
222 WORD32 iq_start_idx,
223 WORD16 *pi2_dc_ld_addr)
224 {
225 UWORD8 *pu1_pred_ptr = pu1_pred;
226 UWORD8 *pu1_out_ptr = pu1_out;
227 WORD32 q0;
228 WORD16 x, i_macro, i;
229 WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
230 UNUSED(pi2_tmp);
231
232 if (iq_start_idx == 0)
233 {
234 q0 = pi2_src[0];
235 INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
236 }
237 else
238 {
239 q0 = pi2_dc_ld_addr[0]; // Restoring dc value for intra case3
240 }
241 i_macro = ((q0 + 32) >> 6);
242 for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
243 {
244 pu1_pred_ptr = pu1_pred;
245 pu1_out = pu1_out_ptr;
246
247 /* inverse prediction */
248
249 x = i_macro + *pu1_pred_ptr;
250 *pu1_out = CLIP_U8(x);
251 pu1_pred_ptr += pred_strd;
252 pu1_out += out_strd;
253
254 x = i_macro + *pu1_pred_ptr;
255 *pu1_out = CLIP_U8(x);
256 pu1_pred_ptr += pred_strd;
257 pu1_out += out_strd;
258
259 x = i_macro + *pu1_pred_ptr;
260 *pu1_out = CLIP_U8(x);
261 pu1_pred_ptr += pred_strd;
262 pu1_out += out_strd;
263
264 x = i_macro + *pu1_pred_ptr;
265 *pu1_out = CLIP_U8(x);
266
267 pu1_out_ptr++;
268 pu1_pred++;
269 }
270 }
271
272 /**
273 *******************************************************************************
274 *
275 * @brief
276 * This function performs inverse quant and Inverse transform type Ci4 for 8x8 block
277 *
278 * @par Description:
279 * Performs inverse transform Ci8 and adds the residue to get the
280 * reconstructed block
281 *
282 * @param[in] pi2_src
283 * Input 8x8coefficients
284 *
285 * @param[in] pu1_pred
286 * Prediction 8x8 block
287 *
288 * @param[out] pu1_recon
289 * Output 8x8 block
290 *
291 * @param[in] q_div
292 * QP/6
293 *
294 * @param[in] q_rem
295 * QP%6
296 *
297 * @param[in] q_lev
298 * Quantizer level
299 *
300 * @param[in] src_strd
301 * Input stride
302 *
303 * @param[in] pred_strd,
304 * Prediction stride
305 *
306 * @param[in] out_strd
307 * Output Stride
308 *
309 * @param[in] pi4_tmp
310 * temporary buffer of size 1*16 we dont need a bigger blcok since we reuse
311 * the tmp for each block
312 *
313 * @param[in] pu4_iquant_mat
314 * Pointer to the inverse quantization matrix
315 *
316 * @returns Void
317 *
318 * @remarks
319 * None
320 *
321 *******************************************************************************
322 */
ih264_iquant_itrans_recon_8x8(WORD16 * pi2_src,UWORD8 * pu1_pred,UWORD8 * pu1_out,WORD32 pred_strd,WORD32 out_strd,const UWORD16 * pu2_iscale_mat,const UWORD16 * pu2_weigh_mat,UWORD32 qp_div,WORD16 * pi2_tmp,WORD32 iq_start_idx,WORD16 * pi2_dc_ld_addr)323 void ih264_iquant_itrans_recon_8x8(WORD16 *pi2_src,
324 UWORD8 *pu1_pred,
325 UWORD8 *pu1_out,
326 WORD32 pred_strd,
327 WORD32 out_strd,
328 const UWORD16 *pu2_iscale_mat,
329 const UWORD16 *pu2_weigh_mat,
330 UWORD32 qp_div,
331 WORD16 *pi2_tmp,
332 WORD32 iq_start_idx,
333 WORD16 *pi2_dc_ld_addr
334 )
335 {
336 WORD32 i;
337 WORD16 *pi2_tmp_ptr = pi2_tmp;
338 UWORD8 *pu1_pred_ptr = pu1_pred;
339 UWORD8 *pu1_out_ptr = pu1_out;
340 WORD16 i_z0, i_z1, i_z2, i_z3, i_z4, i_z5, i_z6, i_z7;
341 WORD16 i_y0, i_y1, i_y2, i_y3, i_y4, i_y5, i_y6, i_y7;
342 WORD16 i_macro;
343 WORD32 q;
344 WORD32 rnd_fact = (qp_div < 6) ? (1 << (5 - qp_div)) : 0;
345 UNUSED(iq_start_idx);
346 UNUSED(pi2_dc_ld_addr);
347 /*************************************************************/
348 /* De quantization of coefficients. Will be replaced by SIMD */
349 /* operations on platform. Note : DC coeff is not scaled */
350 /*************************************************************/
351 for(i = 0; i < (SUB_BLK_WIDTH_8x8 * SUB_BLK_WIDTH_8x8); i++)
352 {
353 q = pi2_src[i];
354 INV_QUANT(q, pu2_iscale_mat[i], pu2_weigh_mat[i], qp_div, rnd_fact, 6);
355 pi2_tmp_ptr[i] = q;
356 }
357 /* Perform Inverse transform */
358 /*--------------------------------------------------------------------*/
359 /* IDCT [ Horizontal transformation ] */
360 /*--------------------------------------------------------------------*/
361 for(i = 0; i < SUB_BLK_WIDTH_8x8; i++)
362 {
363 /*------------------------------------------------------------------*/
364 /* y0 = w0 + w4 */
365 /* y1 = -w3 + w5 - w7 - (w7 >> 1) */
366 /* y2 = w0 - w4 */
367 /* y3 = w1 + w7 - w3 - (w3 >> 1) */
368 /* y4 = (w2 >> 1) - w6 */
369 /* y5 = -w1 + w7 + w5 + (w5 >> 1) */
370 /* y6 = w2 + (w6 >> 1) */
371 /* y7 = w3 + w5 + w1 + (w1 >> 1) */
372 /*------------------------------------------------------------------*/
373 i_y0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[4] );
374
375 i_y1 = ((WORD32)(-pi2_tmp_ptr[3]) + pi2_tmp_ptr[5] - pi2_tmp_ptr[7]
376 - (pi2_tmp_ptr[7] >> 1));
377
378 i_y2 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[4] );
379
380 i_y3 = ((WORD32)pi2_tmp_ptr[1] + pi2_tmp_ptr[7] - pi2_tmp_ptr[3]
381 - (pi2_tmp_ptr[3] >> 1));
382
383 i_y4 = ((pi2_tmp_ptr[2] >> 1) - pi2_tmp_ptr[6] );
384
385 i_y5 = ((WORD32)(-pi2_tmp_ptr[1]) + pi2_tmp_ptr[7] + pi2_tmp_ptr[5]
386 + (pi2_tmp_ptr[5] >> 1));
387
388 i_y6 = (pi2_tmp_ptr[2] + (pi2_tmp_ptr[6] >> 1));
389
390 i_y7 = ((WORD32)pi2_tmp_ptr[3] + pi2_tmp_ptr[5] + pi2_tmp_ptr[1]
391 + (pi2_tmp_ptr[1] >> 1));
392
393 /*------------------------------------------------------------------*/
394 /* z0 = y0 + y6 */
395 /* z1 = y1 + (y7 >> 2) */
396 /* z2 = y2 + y4 */
397 /* z3 = y3 + (y5 >> 2) */
398 /* z4 = y2 - y4 */
399 /* z5 = (y3 >> 2) - y5 */
400 /* z6 = y0 - y6 */
401 /* z7 = y7 - (y1 >> 2) */
402 /*------------------------------------------------------------------*/
403 i_z0 = i_y0 + i_y6;
404 i_z1 = i_y1 + (i_y7 >> 2);
405 i_z2 = i_y2 + i_y4;
406 i_z3 = i_y3 + (i_y5 >> 2);
407 i_z4 = i_y2 - i_y4;
408 i_z5 = (i_y3 >> 2) - i_y5;
409 i_z6 = i_y0 - i_y6;
410 i_z7 = i_y7 - (i_y1 >> 2);
411
412 /*------------------------------------------------------------------*/
413 /* x0 = z0 + z7 */
414 /* x1 = z2 + z5 */
415 /* x2 = z4 + z3 */
416 /* x3 = z6 + z1 */
417 /* x4 = z6 - z1 */
418 /* x5 = z4 - z3 */
419 /* x6 = z2 - z5 */
420 /* x7 = z0 - z7 */
421 /*------------------------------------------------------------------*/
422 pi2_tmp_ptr[0] = i_z0 + i_z7;
423 pi2_tmp_ptr[1] = i_z2 + i_z5;
424 pi2_tmp_ptr[2] = i_z4 + i_z3;
425 pi2_tmp_ptr[3] = i_z6 + i_z1;
426 pi2_tmp_ptr[4] = i_z6 - i_z1;
427 pi2_tmp_ptr[5] = i_z4 - i_z3;
428 pi2_tmp_ptr[6] = i_z2 - i_z5;
429 pi2_tmp_ptr[7] = i_z0 - i_z7;
430
431 /* move to the next row */
432 //pi2_src_ptr += SUB_BLK_WIDTH_8x8;
433 pi2_tmp_ptr += SUB_BLK_WIDTH_8x8;
434 }
435 /*--------------------------------------------------------------------*/
436 /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6 */
437 /* */
438 /* Add the prediction and store it back to reconstructed frame buffer */
439 /* [Prediction buffer itself in this case] */
440 /*--------------------------------------------------------------------*/
441
442 pi2_tmp_ptr = pi2_tmp;
443 for(i = 0; i < SUB_BLK_WIDTH_8x8; i++)
444 {
445 pu1_pred_ptr = pu1_pred;
446 pu1_out = pu1_out_ptr;
447 /*------------------------------------------------------------------*/
448 /* y0j = w0j + w4j */
449 /* y1j = -w3j + w5j -w7j -(w7j >> 1) */
450 /* y2j = w0j -w4j */
451 /* y3j = w1j + w7j -w3j -(w3j >> 1) */
452 /* y4j = ( w2j >> 1 ) -w6j */
453 /* y5j = -w1j + w7j + w5j + (w5j >> 1) */
454 /* y6j = w2j + ( w6j >> 1 ) */
455 /* y7j = w3j + w5j + w1j + (w1j >> 1) */
456 /*------------------------------------------------------------------*/
457 i_y0 = pi2_tmp_ptr[0] + pi2_tmp_ptr[32];
458
459 i_y1 = (WORD32)(-pi2_tmp_ptr[24]) + pi2_tmp_ptr[40] - pi2_tmp_ptr[56]
460 - (pi2_tmp_ptr[56] >> 1);
461
462 i_y2 = pi2_tmp_ptr[0] - pi2_tmp_ptr[32];
463
464 i_y3 = (WORD32)pi2_tmp_ptr[8] + pi2_tmp_ptr[56] - pi2_tmp_ptr[24]
465 - (pi2_tmp_ptr[24] >> 1);
466
467 i_y4 = (pi2_tmp_ptr[16] >> 1) - pi2_tmp_ptr[48];
468
469 i_y5 = (WORD32)(-pi2_tmp_ptr[8]) + pi2_tmp_ptr[56] + pi2_tmp_ptr[40]
470 + (pi2_tmp_ptr[40] >> 1);
471
472 i_y6 = pi2_tmp_ptr[16] + (pi2_tmp_ptr[48] >> 1);
473
474 i_y7 = (WORD32)pi2_tmp_ptr[24] + pi2_tmp_ptr[40] + pi2_tmp_ptr[8]
475 + (pi2_tmp_ptr[8] >> 1);
476
477 /*------------------------------------------------------------------*/
478 /* z0j = y0j + y6j */
479 /* z1j = y1j + (y7j >> 2) */
480 /* z2j = y2j + y4j */
481 /* z3j = y3j + (y5j >> 2) */
482 /* z4j = y2j -y4j */
483 /* z5j = (y3j >> 2) -y5j */
484 /* z6j = y0j -y6j */
485 /* z7j = y7j -(y1j >> 2) */
486 /*------------------------------------------------------------------*/
487 i_z0 = i_y0 + i_y6;
488 i_z1 = i_y1 + (i_y7 >> 2);
489 i_z2 = i_y2 + i_y4;
490 i_z3 = i_y3 + (i_y5 >> 2);
491 i_z4 = i_y2 - i_y4;
492 i_z5 = (i_y3 >> 2) - i_y5;
493 i_z6 = i_y0 - i_y6;
494 i_z7 = i_y7 - (i_y1 >> 2);
495
496 /*------------------------------------------------------------------*/
497 /* x0j = z0j + z7j */
498 /* x1j = z2j + z5j */
499 /* x2j = z4j + z3j */
500 /* x3j = z6j + z1j */
501 /* x4j = z6j -z1j */
502 /* x5j = z4j -z3j */
503 /* x6j = z2j -z5j */
504 /* x7j = z0j -z7j */
505 /*------------------------------------------------------------------*/
506 i_macro = ((i_z0 + i_z7 + 32) >> 6) + *pu1_pred_ptr;
507 *pu1_out = CLIP_U8(i_macro);
508 /* Change uc_recBuffer to Point to next element in the same column*/
509 pu1_pred_ptr += pred_strd;
510 pu1_out += out_strd;
511
512 i_macro = ((i_z2 + i_z5 + 32) >> 6) + *pu1_pred_ptr;
513 *pu1_out = CLIP_U8(i_macro);
514 pu1_pred_ptr += pred_strd;
515 pu1_out += out_strd;
516
517 i_macro = ((i_z4 + i_z3 + 32) >> 6) + *pu1_pred_ptr;
518 *pu1_out = CLIP_U8(i_macro);
519 pu1_pred_ptr += pred_strd;
520 pu1_out += out_strd;
521
522 i_macro = ((i_z6 + i_z1 + 32) >> 6) + *pu1_pred_ptr;
523 *pu1_out = CLIP_U8(i_macro);
524 pu1_pred_ptr += pred_strd;
525 pu1_out += out_strd;
526
527 i_macro = ((i_z6 - i_z1 + 32) >> 6) + *pu1_pred_ptr;
528 *pu1_out = CLIP_U8(i_macro);
529 pu1_pred_ptr += pred_strd;
530 pu1_out += out_strd;
531
532 i_macro = ((i_z4 - i_z3 + 32) >> 6) + *pu1_pred_ptr;
533 *pu1_out = CLIP_U8(i_macro);
534 pu1_pred_ptr += pred_strd;
535 pu1_out += out_strd;
536
537 i_macro = ((i_z2 - i_z5 + 32) >> 6) + *pu1_pred_ptr;
538 *pu1_out = CLIP_U8(i_macro);
539 pu1_pred_ptr += pred_strd;
540 pu1_out += out_strd;
541
542 i_macro = ((i_z0 - i_z7 + 32) >> 6) + *pu1_pred_ptr;
543 *pu1_out = CLIP_U8(i_macro);
544
545 pi2_tmp_ptr++;
546 pu1_out_ptr++;
547 pu1_pred++;
548 }
549 }
550
ih264_iquant_itrans_recon_8x8_dc(WORD16 * pi2_src,UWORD8 * pu1_pred,UWORD8 * pu1_out,WORD32 pred_strd,WORD32 out_strd,const UWORD16 * pu2_iscale_mat,const UWORD16 * pu2_weigh_mat,UWORD32 qp_div,WORD16 * pi2_tmp,WORD32 iq_start_idx,WORD16 * pi2_dc_ld_addr)551 void ih264_iquant_itrans_recon_8x8_dc(WORD16 *pi2_src,
552 UWORD8 *pu1_pred,
553 UWORD8 *pu1_out,
554 WORD32 pred_strd,
555 WORD32 out_strd,
556 const UWORD16 *pu2_iscale_mat,
557 const UWORD16 *pu2_weigh_mat,
558 UWORD32 qp_div,
559 WORD16 *pi2_tmp,
560 WORD32 iq_start_idx,
561 WORD16 *pi2_dc_ld_addr)
562 {
563 UWORD8 *pu1_pred_ptr = pu1_pred;
564 UWORD8 *pu1_out_ptr = pu1_out;
565 WORD16 x, i, i_macro;
566 WORD32 q;
567 WORD32 rnd_fact = (qp_div < 6) ? (1 << (5 - qp_div)) : 0;
568 UNUSED(pi2_tmp);
569 UNUSED(iq_start_idx);
570 UNUSED(pi2_dc_ld_addr);
571 /*************************************************************/
572 /* Dequantization of coefficients. Will be replaced by SIMD */
573 /* operations on platform. Note : DC coeff is not scaled */
574 /*************************************************************/
575 q = pi2_src[0];
576 INV_QUANT(q, pu2_iscale_mat[0], pu2_weigh_mat[0], qp_div, rnd_fact, 6);
577 i_macro = (q + 32) >> 6;
578 /* Perform Inverse transform */
579 /*--------------------------------------------------------------------*/
580 /* IDCT [ Horizontal transformation ] */
581 /*--------------------------------------------------------------------*/
582 /*--------------------------------------------------------------------*/
583 /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6 */
584 /* */
585 /* Add the prediction and store it back to reconstructed frame buffer */
586 /* [Prediction buffer itself in this case] */
587 /*--------------------------------------------------------------------*/
588 for(i = 0; i < SUB_BLK_WIDTH_8x8; i++)
589 {
590 pu1_pred_ptr = pu1_pred;
591 pu1_out = pu1_out_ptr;
592
593 x = i_macro + *pu1_pred_ptr;
594 *pu1_out = CLIP_U8(x);
595 /* Change uc_recBuffer to Point to next element in the same column*/
596 pu1_pred_ptr += pred_strd;
597 pu1_out += out_strd;
598
599 x = i_macro + *pu1_pred_ptr;
600 *pu1_out = CLIP_U8(x);
601 pu1_pred_ptr += pred_strd;
602 pu1_out += out_strd;
603
604 x = i_macro + *pu1_pred_ptr;
605 *pu1_out = CLIP_U8(x);
606 pu1_pred_ptr += pred_strd;
607 pu1_out += out_strd;
608
609 x = i_macro + *pu1_pred_ptr;
610 *pu1_out = CLIP_U8(x);
611 pu1_pred_ptr += pred_strd;
612 pu1_out += out_strd;
613
614 x = i_macro + *pu1_pred_ptr;
615 *pu1_out = CLIP_U8(x);
616 pu1_pred_ptr += pred_strd;
617 pu1_out += out_strd;
618
619 x = i_macro + *pu1_pred_ptr;
620 *pu1_out = CLIP_U8(x);
621 pu1_pred_ptr += pred_strd;
622 pu1_out += out_strd;
623
624 x = i_macro + *pu1_pred_ptr;
625 *pu1_out = CLIP_U8(x);
626 pu1_pred_ptr += pred_strd;
627 pu1_out += out_strd;
628
629 x = i_macro + *pu1_pred_ptr;
630 *pu1_out = CLIP_U8(x);
631
632 pu1_out_ptr++;
633 pu1_pred++;
634 }
635 }
636
637 /*
638 ********************************************************************************
639 *
640 * @brief This function reconstructs a 4x4 sub block from quantized resiude and
641 * prediction buffer
642 *
643 * @par Description:
644 * The quantized residue is first inverse quantized, then inverse transformed.
645 * This inverse transformed content is added to the prediction buffer to recon-
646 * struct the end output
647 *
648 * @param[in] pi2_src
649 * quantized 4x4 block
650 *
651 * @param[in] pu1_pred
652 * prediction 4x4 block
653 *
654 * @param[out] pu1_out
655 * reconstructed 4x4 block
656 *
657 * @param[in] src_strd
658 * quantization buffer stride
659 *
660 * @param[in] pred_strd,
661 * Prediction buffer stride
662 *
663 * @param[in] out_strd
664 * recon buffer Stride
665 *
666 * @param[in] pu2_scaling_list
667 * pointer to scaling list
668 *
669 * @param[in] pu2_norm_adjust
670 * pointer to inverse scale matrix
671 *
672 * @param[in] u4_qp_div_6
673 * Floor (qp/6)
674 *
675 * @param[in] pi4_tmp
676 * temporary buffer of size 1*16
677 *
678 * @returns none
679 *
680 * @remarks none
681 *
682 *******************************************************************************
683 */
ih264_iquant_itrans_recon_chroma_4x4(WORD16 * pi2_src,UWORD8 * pu1_pred,UWORD8 * pu1_out,WORD32 pred_strd,WORD32 out_strd,const UWORD16 * pu2_iscal_mat,const UWORD16 * pu2_weigh_mat,UWORD32 u4_qp_div_6,WORD16 * pi2_tmp,WORD16 * pi2_dc_src)684 void ih264_iquant_itrans_recon_chroma_4x4(WORD16 *pi2_src,
685 UWORD8 *pu1_pred,
686 UWORD8 *pu1_out,
687 WORD32 pred_strd,
688 WORD32 out_strd,
689 const UWORD16 *pu2_iscal_mat,
690 const UWORD16 *pu2_weigh_mat,
691 UWORD32 u4_qp_div_6,
692 WORD16 *pi2_tmp,
693 WORD16 *pi2_dc_src)
694 {
695 WORD16 *pi2_src_ptr = pi2_src;
696 WORD16 *pi2_tmp_ptr = pi2_tmp;
697 UWORD8 *pu1_pred_ptr = pu1_pred;
698 UWORD8 *pu1_out_ptr = pu1_out;
699 WORD16 x0, x1, x2, x3, i;
700 WORD32 q0, q1, q2, q3;
701 WORD16 i_macro;
702 WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
703
704 /* inverse quant */
705 /*horizontal inverse transform */
706 for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
707 {
708 if(i==0)
709 {
710 q0 = pi2_dc_src[0];
711 }
712 else
713 {
714 q0 = pi2_src_ptr[0];
715 INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
716 }
717
718 q2 = pi2_src_ptr[2];
719 INV_QUANT(q2, pu2_iscal_mat[2], pu2_weigh_mat[2], u4_qp_div_6, rnd_fact,
720 4);
721
722 x0 = q0 + q2;
723 x1 = q0 - q2;
724
725 q1 = pi2_src_ptr[1];
726 INV_QUANT(q1, pu2_iscal_mat[1], pu2_weigh_mat[1], u4_qp_div_6, rnd_fact,
727 4);
728
729 q3 = pi2_src_ptr[3];
730 INV_QUANT(q3, pu2_iscal_mat[3], pu2_weigh_mat[3], u4_qp_div_6, rnd_fact,
731 4);
732
733 x2 = (q1 >> 1) - q3;
734 x3 = q1 + (q3 >> 1);
735
736 pi2_tmp_ptr[0] = x0 + x3;
737 pi2_tmp_ptr[1] = x1 + x2;
738 pi2_tmp_ptr[2] = x1 - x2;
739 pi2_tmp_ptr[3] = x0 - x3;
740
741 pi2_src_ptr += SUB_BLK_WIDTH_4x4;
742 pi2_tmp_ptr += SUB_BLK_WIDTH_4x4;
743 pu2_iscal_mat += SUB_BLK_WIDTH_4x4;
744 pu2_weigh_mat += SUB_BLK_WIDTH_4x4;
745 }
746
747 /* vertical inverse transform */
748 pi2_tmp_ptr = pi2_tmp;
749 for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
750 {
751 pu1_pred_ptr = pu1_pred;
752 pu1_out = pu1_out_ptr;
753
754 x0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[8]);
755 x1 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[8]);
756 x2 = (pi2_tmp_ptr[4] >> 1) - pi2_tmp_ptr[12];
757 x3 = pi2_tmp_ptr[4] + (pi2_tmp_ptr[12] >> 1);
758
759 /* inverse prediction */
760 i_macro = x0 + x3;
761 i_macro = ((i_macro + 32) >> 6);
762 i_macro += *pu1_pred_ptr;
763 *pu1_out = CLIP_U8(i_macro);
764 pu1_pred_ptr += pred_strd;
765 pu1_out += out_strd;
766
767 i_macro = x1 + x2;
768 i_macro = ((i_macro + 32) >> 6);
769 i_macro += *pu1_pred_ptr;
770 *pu1_out = CLIP_U8(i_macro);
771 pu1_pred_ptr += pred_strd;
772 pu1_out += out_strd;
773
774 i_macro = x1 - x2;
775 i_macro = ((i_macro + 32) >> 6);
776 i_macro += *pu1_pred_ptr;
777 *pu1_out = CLIP_U8(i_macro);
778 pu1_pred_ptr += pred_strd;
779 pu1_out += out_strd;
780
781 i_macro = x0 - x3;
782 i_macro = ((i_macro + 32) >> 6);
783 i_macro += *pu1_pred_ptr;
784 *pu1_out = CLIP_U8(i_macro);
785
786 pi2_tmp_ptr++;
787 pu1_out_ptr+= 2; //Interleaved store for output
788 pu1_pred+= 2; //Interleaved load for pred buffer
789 }
790 }
791
792 /*
793 ********************************************************************************
794 *
795 * @brief This function reconstructs a 4x4 sub block from quantized resiude and
796 * prediction buffer if only dc value is present for residue
797 *
798 * @par Description:
799 * The quantized residue is first inverse quantized,
800 * This inverse quantized content is added to the prediction buffer to recon-
801 * struct the end output
802 *
803 * @param[in] pi2_src
804 * quantized dc coefficient
805 *
806 * @param[in] pu1_pred
807 * prediction 4x4 block in interleaved format
808 *
809 * @param[in] pred_strd,
810 * Prediction buffer stride in interleaved format
811 *
812 * @param[in] out_strd
813 * recon buffer Stride
814 *
815 * @returns none
816 *
817 * @remarks none
818 *
819 *******************************************************************************
820 */
821
ih264_iquant_itrans_recon_chroma_4x4_dc(WORD16 * pi2_src,UWORD8 * pu1_pred,UWORD8 * pu1_out,WORD32 pred_strd,WORD32 out_strd,const UWORD16 * pu2_iscal_mat,const UWORD16 * pu2_weigh_mat,UWORD32 u4_qp_div_6,WORD16 * pi2_tmp,WORD16 * pi2_dc_src)822 void ih264_iquant_itrans_recon_chroma_4x4_dc(WORD16 *pi2_src,
823 UWORD8 *pu1_pred,
824 UWORD8 *pu1_out,
825 WORD32 pred_strd,
826 WORD32 out_strd,
827 const UWORD16 *pu2_iscal_mat,
828 const UWORD16 *pu2_weigh_mat,
829 UWORD32 u4_qp_div_6,
830 WORD16 *pi2_tmp,
831 WORD16 *pi2_dc_src)
832 {
833 UWORD8 *pu1_pred_ptr = pu1_pred;
834 UWORD8 *pu1_out_ptr = pu1_out;
835 WORD32 q0;
836 WORD16 x, i_macro, i;
837 UNUSED(pi2_src);
838 UNUSED(pu2_iscal_mat);
839 UNUSED(pu2_weigh_mat);
840 UNUSED(u4_qp_div_6);
841 UNUSED(pi2_tmp);
842
843 q0 = pi2_dc_src[0]; // Restoring dc value for intra case3
844 i_macro = ((q0 + 32) >> 6);
845
846 for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
847 {
848 pu1_pred_ptr = pu1_pred;
849 pu1_out = pu1_out_ptr;
850
851 /* inverse prediction */
852 x = i_macro + *pu1_pred_ptr;
853 *pu1_out = CLIP_U8(x);
854 pu1_pred_ptr += pred_strd;
855 pu1_out += out_strd;
856
857 x = i_macro + *pu1_pred_ptr;
858 *pu1_out = CLIP_U8(x);
859 pu1_pred_ptr += pred_strd;
860 pu1_out += out_strd;
861
862 x = i_macro + *pu1_pred_ptr;
863 *pu1_out = CLIP_U8(x);
864 pu1_pred_ptr += pred_strd;
865 pu1_out += out_strd;
866
867 x = i_macro + *pu1_pred_ptr;
868 *pu1_out = CLIP_U8(x);
869
870 pu1_out_ptr+=2;
871 pu1_pred+=2;
872 }
873 }
874