1 /******************************************************************************
2 *
3 * Copyright (C) 2022 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 /**
21 *******************************************************************************
22 * @file
23 * ih264_resi_trans_quant.c
24 *
25 * @brief
26 * Contains function definitions single stage forward transform for H.264
27 * It will calculate the residue, do the cf and then do quantization
28 *
29 * @author
30 * Ittiam
31 *
32 * @par List of Functions:
33 * - ih264_resi_trans_quant_4x4()
34 * - ih264_resi_trans_quant_chroma_4x4
35 * - ih264_hadamard_quant_4x4
36 * - ih264_hadamard_quant_2x2_uv
37 * - ih264_resi_trans_quant_8x8
38 *
39 * @remarks
40 *******************************************************************************
41 */
42 /* System include files */
43 #include <stdbool.h>
44 #include <stddef.h>
45
46 /* User include files */
47 #include "ih264_typedefs.h"
48 #include "ih264_defs.h"
49 #include "ih264_size_defs.h"
50 #include "ih264_macros.h"
51 #include "ih264_trans_macros.h"
52 #include "ih264_trans_data.h"
53 #include "ih264_structs.h"
54 #include "isvc_trans_quant_itrans_iquant.h"
55
isvc_subtract_upsampled_res(WORD16 i2_residue,WORD16 i2_upsampled_res)56 static FORCEINLINE WORD16 isvc_subtract_upsampled_res(WORD16 i2_residue, WORD16 i2_upsampled_res)
57 {
58 return (CLIP3(-((WORD16) UINT8_MAX), ((WORD16) UINT8_MAX), i2_residue - i2_upsampled_res));
59 }
60
61 /**
62 *******************************************************************************
63 *
64 * @brief
65 * This function performs forward transform and quantization on a 4*4 block
66 *
67 * @par Description:
68 * The function accepts source buffer and estimation buffer. From these, it
69 * computes the residue. This is residue is then transformed and quantized.
70 * The transform and quantization are in placed computed. They use the residue
71 * buffer for this.
72 *
73 * @param[in] pu1_src
74 * Pointer to source sub-block
75 *
76 * @param[in] pu1_pred
77 * Pointer to prediction sub-block
78 *
79 * @param[in] pi2_out
80 * Pointer to residual sub-block
81 *
82 * @param[in] i4_src_stride
83 * Source stride
84 *
85 * @param[in] i4_pred_stride
86 * Prediction stride
87 *
88 * @param[in] dst_strd
89 * Destination stride
90 *
91 * @param[in] u4_qbits
92 * QP_BITS_h264_4x4 + floor(QP/6)
93 *
94 * @param[in] pu2_threshold_matrix
95 * Pointer to Forward Quant Threshold Matrix
96 *
97 * @param[in] pu2_scale_matrix
98 * Pointer to Forward Quant Scale Matrix
99 *
100 * @param[in] u4_round_factor
101 * Quantization Round factor
102 *
103 * @param[out] pu1_nnz
104 * Total non-zero coefficients in the current sub-block
105 *
106 * @returns
107 *
108 * @remarks
109 * None
110 *
111 *******************************************************************************
112 */
isvc_resi_trans_quant_4x4(buffer_container_t * ps_src,buffer_container_t * ps_pred,buffer_container_t * ps_out,buffer_container_t * ps_upsampled_res,resi_trans_quant_constants_t * ps_quant_constants,UWORD8 * pu1_nnz,WORD16 * pi2_dc_out,UWORD8 u1_use_upsampled_res)113 void isvc_resi_trans_quant_4x4(buffer_container_t *ps_src, buffer_container_t *ps_pred,
114 buffer_container_t *ps_out, buffer_container_t *ps_upsampled_res,
115 resi_trans_quant_constants_t *ps_quant_constants, UWORD8 *pu1_nnz,
116 WORD16 *pi2_dc_out, UWORD8 u1_use_upsampled_res)
117 {
118 UWORD32 i;
119 WORD32 x0, x1, x2, x3, x4, x5, x6, x7;
120 WORD32 i4_value;
121
122 UWORD8 *pu1_src = ps_src->pv_data;
123 UWORD8 *pu1_pred = ps_pred->pv_data;
124 WORD16 *pi2_out = ps_out->pv_data;
125 WORD16 *pi2_upsampled_res = ps_upsampled_res ? ps_upsampled_res->pv_data : NULL;
126 WORD32 i4_src_stride = ps_src->i4_data_stride;
127 WORD32 i4_pred_stride = ps_pred->i4_data_stride;
128 WORD32 i4_upsampled_res_stride = ps_upsampled_res ? ps_upsampled_res->i4_data_stride : 0;
129 WORD16 *pi2_out_tmp = pi2_out;
130 UWORD32 u4_nonzero_coeff = 0;
131 const UWORD16 *pu2_scale_matrix = ps_quant_constants->pu2_scale_matrix;
132 const UWORD16 *pu2_threshold_matrix = ps_quant_constants->pu2_threshold_matrix;
133 UWORD32 u4_qbits = ps_quant_constants->u4_qbits;
134 UWORD32 u4_round_factor = ps_quant_constants->u4_round_factor;
135
136 for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
137 {
138 /* computing prediction error (residue) */
139 x4 = pu1_src[0] - pu1_pred[0];
140 x5 = pu1_src[1] - pu1_pred[1];
141 x6 = pu1_src[2] - pu1_pred[2];
142 x7 = pu1_src[3] - pu1_pred[3];
143
144 if(u1_use_upsampled_res)
145 {
146 x4 = isvc_subtract_upsampled_res(x4, pi2_upsampled_res[0]);
147 x5 = isvc_subtract_upsampled_res(x5, pi2_upsampled_res[1]);
148 x6 = isvc_subtract_upsampled_res(x6, pi2_upsampled_res[2]);
149 x7 = isvc_subtract_upsampled_res(x7, pi2_upsampled_res[3]);
150 }
151
152 /* Horizontal transform */
153 x0 = x4 + x7;
154 x1 = x5 + x6;
155 x2 = x5 - x6;
156 x3 = x4 - x7;
157
158 pi2_out_tmp[0] = x0 + x1;
159 pi2_out_tmp[1] = (x3 << 1) + x2;
160 pi2_out_tmp[2] = x0 - x1;
161 pi2_out_tmp[3] = x3 - (x2 << 1);
162
163 /* pointing to next row; */
164 pu1_src += i4_src_stride;
165 pu1_pred += i4_pred_stride;
166 pi2_out_tmp += 4;
167 pi2_upsampled_res += i4_upsampled_res_stride;
168 }
169
170 pi2_out_tmp = pi2_out;
171
172 for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
173 {
174 /* Vertical transform and quantization */
175 x4 = pi2_out_tmp[0];
176 x5 = pi2_out_tmp[4];
177 x6 = pi2_out_tmp[8];
178 x7 = pi2_out_tmp[12];
179
180 x0 = x4 + x7;
181 x1 = x5 + x6;
182 x2 = x5 - x6;
183 x3 = x4 - x7;
184
185 /* quantization is done in place */
186
187 i4_value = x0 + x1;
188
189 if(i == 0)
190 {
191 (*pi2_dc_out) = i4_value;
192 }
193
194 FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits,
195 u4_nonzero_coeff);
196 pi2_out_tmp[0] = i4_value;
197
198 i4_value = (x3 << 1) + x2;
199 FWD_QUANT(i4_value, pu2_threshold_matrix[4], pu2_scale_matrix[4], u4_round_factor, u4_qbits,
200 u4_nonzero_coeff);
201 pi2_out_tmp[4] = i4_value;
202
203 i4_value = x0 - x1;
204 FWD_QUANT(i4_value, pu2_threshold_matrix[8], pu2_scale_matrix[8], u4_round_factor, u4_qbits,
205 u4_nonzero_coeff);
206 pi2_out_tmp[8] = i4_value;
207
208 i4_value = x3 - (x2 << 1);
209 FWD_QUANT(i4_value, pu2_threshold_matrix[12], pu2_scale_matrix[12], u4_round_factor,
210 u4_qbits, u4_nonzero_coeff);
211 pi2_out_tmp[12] = i4_value;
212
213 pi2_out_tmp++;
214 pu2_scale_matrix++;
215 pu2_threshold_matrix++;
216 }
217
218 /* Return total nonzero coefficients in the current sub block */
219 *pu1_nnz = u4_nonzero_coeff;
220 }
221
222 /**
223 *******************************************************************************
224 *
225 * @brief
226 * This function performs forward transform and quantization on a 4*4 chroma
227 *block with interleaved values
228 *
229 * @par Description:
230 * The function accepts source buffer and estimation buffer. From these, it
231 * computes the residue. This is residue is then transformed and quantized.
232 * The transform and quantization are in placed computed. They use the residue
233 * buffer for this.
234 *
235 * @param[in] pu1_src
236 * Pointer to source sub-block
237 *
238 * @param[in] pu1_pred
239 * Pointer to prediction sub-block
240 *
241 * @param[in] pi2_out
242 * Pointer to residual sub-block
243 *
244 * @param[in] i4_src_stride
245 * Source stride
246 *
247 * @param[in] i4_pred_stride
248 * Prediction stride
249 *
250 * @param[in] dst_strd
251 * Destination stride
252 *
253 * @param[in] u4_qbits
254 * QP_BITS_h264_4x4 + floor(QP/6)
255 *
256 * @param[in] pu2_threshold_matrix
257 * Pointer to Forward Quant Threshold Matrix
258 *
259 * @param[in] pu2_scale_matrix
260 * Pointer to Forward Quant Scale Matrix
261 *
262 * @param[in] u4_round_factor
263 * Quantization Round factor
264 *
265 * @param[out] pu1_nnz
266 * Total non-zero coefficients in the current sub-block
267 *
268 * @returns
269 *
270 * @remarks
271 * None
272 *
273 *******************************************************************************
274 */
isvc_resi_trans_quant_chroma_4x4(buffer_container_t * ps_src,buffer_container_t * ps_pred,buffer_container_t * ps_out,buffer_container_t * ps_upsampled_res,resi_trans_quant_constants_t * ps_quant_constants,UWORD8 * pu1_nnz,WORD16 * pi2_dc_out,UWORD8 u1_use_upsampled_res)275 void isvc_resi_trans_quant_chroma_4x4(buffer_container_t *ps_src, buffer_container_t *ps_pred,
276 buffer_container_t *ps_out,
277 buffer_container_t *ps_upsampled_res,
278 resi_trans_quant_constants_t *ps_quant_constants,
279 UWORD8 *pu1_nnz, WORD16 *pi2_dc_out,
280 UWORD8 u1_use_upsampled_res)
281 {
282 UWORD32 i;
283 WORD32 x0, x1, x2, x3, x4, x5, x6, x7;
284 WORD32 i4_value;
285
286 UWORD8 *pu1_src = ps_src->pv_data;
287 UWORD8 *pu1_pred = ps_pred->pv_data;
288 WORD16 *pi2_out = ps_out->pv_data;
289 WORD16 *pi2_upsampled_res = ps_upsampled_res ? ps_upsampled_res->pv_data : NULL;
290 WORD32 i4_src_stride = ps_src->i4_data_stride;
291 WORD32 i4_pred_stride = ps_pred->i4_data_stride;
292 WORD32 i4_upsampled_res_stride = ps_upsampled_res ? ps_upsampled_res->i4_data_stride : 0;
293 WORD16 *pi2_out_tmp = pi2_out;
294 UWORD32 u4_nonzero_coeff = 0;
295 const UWORD16 *pu2_scale_matrix = ps_quant_constants->pu2_scale_matrix;
296 const UWORD16 *pu2_threshold_matrix = ps_quant_constants->pu2_threshold_matrix;
297 UWORD32 u4_qbits = ps_quant_constants->u4_qbits;
298 UWORD32 u4_round_factor = ps_quant_constants->u4_round_factor;
299
300 for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
301 {
302 /* computing prediction error (residue) */
303 x4 = pu1_src[0] - pu1_pred[0];
304 x5 = pu1_src[2] - pu1_pred[2];
305 x6 = pu1_src[4] - pu1_pred[4];
306 x7 = pu1_src[6] - pu1_pred[6];
307
308 if(u1_use_upsampled_res)
309 {
310 x4 = isvc_subtract_upsampled_res(x4, pi2_upsampled_res[0]);
311 x5 = isvc_subtract_upsampled_res(x5, pi2_upsampled_res[1]);
312 x6 = isvc_subtract_upsampled_res(x6, pi2_upsampled_res[2]);
313 x7 = isvc_subtract_upsampled_res(x7, pi2_upsampled_res[3]);
314 }
315
316 /* Horizontal transform */
317 x0 = x4 + x7;
318 x1 = x5 + x6;
319 x2 = x5 - x6;
320 x3 = x4 - x7;
321
322 pi2_out_tmp[0] = x0 + x1;
323 pi2_out_tmp[1] = (x3 << 1) + x2;
324 pi2_out_tmp[2] = x0 - x1;
325 pi2_out_tmp[3] = x3 - (x2 << 1);
326
327 /* pointing to next row; */
328 pu1_src += i4_src_stride;
329 pu1_pred += i4_pred_stride;
330 pi2_out_tmp += 4;
331 pi2_upsampled_res += i4_upsampled_res_stride;
332 }
333 pi2_out_tmp = pi2_out;
334 for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
335 {
336 /* Vertical transform and quantization */
337 x4 = pi2_out_tmp[0];
338 x5 = pi2_out_tmp[4];
339 x6 = pi2_out_tmp[8];
340 x7 = pi2_out_tmp[12];
341
342 x0 = x4 + x7;
343 x1 = x5 + x6;
344 x2 = x5 - x6;
345 x3 = x4 - x7;
346
347 /* quantization is done in place */
348
349 i4_value = x0 + x1;
350
351 if(i == 0)
352 {
353 *pi2_dc_out = i4_value;
354 }
355
356 FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits,
357 u4_nonzero_coeff);
358 pi2_out_tmp[0] = i4_value;
359
360 i4_value = (x3 << 1) + x2;
361 FWD_QUANT(i4_value, pu2_threshold_matrix[4], pu2_scale_matrix[4], u4_round_factor, u4_qbits,
362 u4_nonzero_coeff);
363 pi2_out_tmp[4] = i4_value;
364
365 i4_value = x0 - x1;
366 FWD_QUANT(i4_value, pu2_threshold_matrix[8], pu2_scale_matrix[8], u4_round_factor, u4_qbits,
367 u4_nonzero_coeff);
368 pi2_out_tmp[8] = i4_value;
369
370 i4_value = x3 - (x2 << 1);
371 FWD_QUANT(i4_value, pu2_threshold_matrix[12], pu2_scale_matrix[12], u4_round_factor,
372 u4_qbits, u4_nonzero_coeff);
373 pi2_out_tmp[12] = i4_value;
374
375 pi2_out_tmp++;
376 pu2_scale_matrix++;
377 pu2_threshold_matrix++;
378 }
379
380 /* Return total nonzero coefficients in the current sub block */
381 *pu1_nnz = u4_nonzero_coeff;
382 }
383
384 /**
385 *******************************************************************************
386 *
387 * @brief
388 * This function performs forward hadamard transform and quantization on a 4*4
389 *block
390 *
391 * @par Description:
392 * The function accepts source buffer and estimation buffer. From these, it
393 * computes the residue. This is residue is then transformed and quantized.
394 * The transform and quantization are in placed computed. They use the residue
395 * buffer for this.
396 *
397 * @param[in] pu1_src
398 * Pointer to source sub-block
399 *
400 * @param[in] pu1_pred
401 * Pointer to prediction sub-block
402 *
403 * @param[in] pi2_out
404 * Pointer to residual sub-block
405 *
406 * @param[in] i4_src_stride
407 * Source stride
408 *
409 * @param[in] i4_pred_stride
410 * Prediction stride
411 *
412 * @param[in] dst_strd
413 * Destination stride
414 *
415 * @param[in] u4_qbits
416 * QP_BITS_h264_4x4 + floor(QP/6)
417 *
418 * @param[in] pu2_threshold_matrix
419 * Pointer to Forward Quant Threshold Matrix
420 *
421 * @param[in] pu2_scale_matrix
422 * Pointer to Forward Quant Scale Matrix
423 *
424 * @param[in] u4_round_factor
425 * Quantization Round factor
426 *
427 * @param[out] pu1_nnz
428 * Total non-zero coefficients in the current sub-block
429 *
430 * @returns
431 *
432 * @remarks
433 * None
434 *
435 */
436
isvc_hadamard_quant_4x4(WORD16 * pi2_src,WORD16 * pi2_dst,resi_trans_quant_constants_t * ps_quant_constants,UWORD8 * pu1_nnz)437 void isvc_hadamard_quant_4x4(WORD16 *pi2_src, WORD16 *pi2_dst,
438 resi_trans_quant_constants_t *ps_quant_constants, UWORD8 *pu1_nnz)
439 {
440 WORD32 i;
441 WORD32 x0, x1, x2, x3, x4, x5, x6, x7, i4_value;
442
443 const UWORD16 *pu2_scale_matrix = ps_quant_constants->pu2_scale_matrix;
444 const UWORD16 *pu2_threshold_matrix = ps_quant_constants->pu2_threshold_matrix;
445 UWORD32 u4_qbits = ps_quant_constants->u4_qbits;
446 UWORD32 u4_round_factor = ps_quant_constants->u4_round_factor;
447
448 *pu1_nnz = 0;
449
450 for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
451 {
452 x4 = pi2_src[0];
453 x5 = pi2_src[1];
454 x6 = pi2_src[2];
455 x7 = pi2_src[3];
456
457 x0 = x4 + x7;
458 x1 = x5 + x6;
459 x2 = x5 - x6;
460 x3 = x4 - x7;
461
462 pi2_dst[0] = x0 + x1;
463 pi2_dst[1] = x3 + x2;
464 pi2_dst[2] = x0 - x1;
465 pi2_dst[3] = x3 - x2;
466
467 pi2_src += 4;
468 pi2_dst += 4;
469 }
470
471 /* Vertical transform and quantization */
472 pi2_dst -= SUB_BLK_WIDTH_4x4 << 2;
473
474 for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
475 {
476 x4 = pi2_dst[0];
477 x5 = pi2_dst[4];
478 x6 = pi2_dst[8];
479 x7 = pi2_dst[12];
480
481 x0 = x4 + x7;
482 x1 = x5 + x6;
483 x2 = x5 - x6;
484 x3 = x4 - x7;
485
486 i4_value = (x0 + x1) >> 1;
487 FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits,
488 pu1_nnz[0]);
489 pi2_dst[0] = i4_value;
490
491 i4_value = (x3 + x2) >> 1;
492 FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits,
493 pu1_nnz[0]);
494 pi2_dst[4] = i4_value;
495
496 i4_value = (x0 - x1) >> 1;
497 FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits,
498 pu1_nnz[0]);
499 pi2_dst[8] = i4_value;
500
501 i4_value = (x3 - x2) >> 1;
502 FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits,
503 pu1_nnz[0]);
504 pi2_dst[12] = i4_value;
505
506 pi2_dst++;
507 }
508 }
509
510 /**
511 *******************************************************************************
512 *
513 * @brief
514 * This function performs forward hadamard transform and quantization on a 2*2
515 *block for both U and V planes
516 *
517 * @par Description:
518 * The function accepts source buffer and estimation buffer. From these, it
519 * computes the residue. This is residue is then transformed and quantized.
520 * The transform and quantization are in placed computed. They use the residue
521 * buffer for this.
522 *
523 * @param[in] pu1_src
524 * Pointer to source sub-block
525 *
526 * @param[in] pu1_pred
527 * Pointer to prediction sub-block
528 *
529 * @param[in] pi2_out
530 * Pointer to residual sub-block
531 *
532 * @param[in] i4_src_stride
533 * Source stride
534 *
535 * @param[in] i4_pred_stride
536 * Prediction stride
537 *
538 * @param[in] dst_strd
539 * Destination stride
540 *
541 * @param[in] u4_qbits
542 * QP_BITS_h264_4x4 + floor(QP/6)
543 *
544 * @param[in] pu2_threshold_matrix
545 * Pointer to Forward Quant Threshold Matrix
546 *
547 * @param[in] pu2_scale_matrix
548 * Pointer to Forward Quant Scale Matrix
549 *
550 * @param[in] u4_round_factor
551 * Quantization Round factor
552 *
553 * @param[out] pu1_nnz
554 * Total non-zero coefficients in the current sub-block
555 *
556 * @returns
557 *
558 * @remarks
559 * NNZ for dc is populated at 0 and 5th position of pu1_nnz
560 *
561 */
562
isvc_hadamard_quant_2x2_uv(WORD16 * pi2_src,WORD16 * pi2_dst,resi_trans_quant_constants_t * ps_quant_constants,UWORD8 * pu1_nnz)563 void isvc_hadamard_quant_2x2_uv(WORD16 *pi2_src, WORD16 *pi2_dst,
564 resi_trans_quant_constants_t *ps_quant_constants, UWORD8 *pu1_nnz)
565 {
566 WORD32 x0, x1, x2, x3, x4, x5, x6, x7;
567 WORD32 i4_value, plane;
568
569 const UWORD16 *pu2_scale_matrix = ps_quant_constants->pu2_scale_matrix;
570 const UWORD16 *pu2_threshold_matrix = ps_quant_constants->pu2_threshold_matrix;
571 UWORD32 u4_qbits = ps_quant_constants->u4_qbits;
572 UWORD32 u4_round_factor = ps_quant_constants->u4_round_factor;
573
574 for(plane = 0; plane < 2; plane++)
575 {
576 pu1_nnz[plane] = 0;
577
578 /* Horizontal transform */
579 x4 = pi2_src[0];
580 x5 = pi2_src[1];
581 x6 = pi2_src[2];
582 x7 = pi2_src[3];
583
584 x0 = x4 + x5;
585 x1 = x4 - x5;
586 x2 = x6 + x7;
587 x3 = x6 - x7;
588
589 /* Vertical transform and quantization */
590 i4_value = (x0 + x2);
591 FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits,
592 pu1_nnz[plane]);
593 pi2_dst[0] = i4_value;
594
595 i4_value = (x0 - x2);
596 FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits,
597 pu1_nnz[plane]);
598 pi2_dst[2] = i4_value;
599
600 i4_value = (x1 - x3);
601 FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits,
602 pu1_nnz[plane]);
603 pi2_dst[3] = i4_value;
604
605 i4_value = (x1 + x3);
606 FWD_QUANT(i4_value, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits,
607 pu1_nnz[plane]);
608 pi2_dst[1] = i4_value;
609
610 pi2_dst += 4;
611 pi2_src += 4;
612 }
613 }
614
615 /*
616 *******************************************************************************
617 *
618 * @brief
619 * This function performs Single stage forward transform CF8 and quantization
620 *on 8*8 blocks for h.264
621 *
622 * @par Description:
623 * Performs single stage 8x8 forward transform CF8 after calculating the
624 *residue The result is then quantized
625 *
626 * @param[in] pu1_src
627 * Input 8x8 pixels
628 *
629 * @param[in] pu1_pred
630 * Input 8x8 pixels
631 *
632 * @param[in] pi1_out
633 * Output 8x8 pixels
634 *
635 * @param[in] u4_thresh
636 * Threshold under which the coeffs are not quantized
637 *
638 * @param[in] u4_qp_div
639 * QP/6
640 *
641 * @param[in] u4_qp_rem
642 * QP%6
643 *
644 * @param[in] u2_src_stride
645 * Source stride
646 *
647 * @param[in] i4_pred_stride
648 * stride for prediciton buffer
649 *
650 * @param[in] dst_strd
651 * stride for destination buffer
652 *
653 * @param[in] pu4_quant_mat
654 * Pointer to the 4x4 quantization matrix
655 *
656 * @returns Void
657 *
658 *
659 *******************************************************************************
660 */
isvc_resi_trans_quant_8x8(buffer_container_t * ps_src,buffer_container_t * ps_pred,buffer_container_t * ps_out,buffer_container_t * ps_upsampled_res,resi_trans_quant_constants_t * ps_quant_constants,UWORD8 * pu1_nnz,WORD16 * pi2_dc_out,UWORD8 u1_use_upsampled_res)661 void isvc_resi_trans_quant_8x8(buffer_container_t *ps_src, buffer_container_t *ps_pred,
662 buffer_container_t *ps_out, buffer_container_t *ps_upsampled_res,
663 resi_trans_quant_constants_t *ps_quant_constants, UWORD8 *pu1_nnz,
664 WORD16 *pi2_dc_out, UWORD8 u1_use_upsampled_res)
665 {
666 UWORD32 i;
667 WORD32 a0, a1, a2, a3, a4, a5, a6, a7;
668 WORD32 r0, r1, r2, r3, r4, r5, r6, r7;
669
670 UWORD8 *pu1_src = ps_src->pv_data;
671 UWORD8 *pu1_pred = ps_pred->pv_data;
672 WORD16 *pi2_out = ps_out->pv_data;
673 WORD16 *pi2_upsampled_res = ps_upsampled_res ? ps_upsampled_res->pv_data : NULL;
674 WORD32 i4_src_stride = ps_src->i4_data_stride;
675 WORD32 i4_pred_stride = ps_pred->i4_data_stride;
676 WORD32 i4_upsampled_res_stride = ps_upsampled_res ? ps_upsampled_res->i4_data_stride : 0;
677 WORD16 *pi2_out_tmp = pi2_out;
678 UWORD32 u4_nonzero_coeff = 0;
679 const UWORD16 *pu2_scale_matrix = ps_quant_constants->pu2_scale_matrix;
680 const UWORD16 *pu2_threshold_matrix = ps_quant_constants->pu2_threshold_matrix;
681 UWORD32 u4_qbits = ps_quant_constants->u4_qbits;
682 UWORD32 u4_round_factor = ps_quant_constants->u4_round_factor;
683
684 UNUSED(pi2_dc_out);
685
686 /*Horizontal transform */
687 /* we are going to use the a's and r's in a twisted way since */
688 /*i dont want to declare more variables */
689 for(i = 0; i < SUB_BLK_WIDTH_8x8; ++i)
690 {
691 r0 = pu1_src[0];
692 r0 -= pu1_pred[0];
693 r1 = pu1_src[1];
694 r1 -= pu1_pred[1];
695 r2 = pu1_src[2];
696 r2 -= pu1_pred[2];
697 r3 = pu1_src[3];
698 r3 -= pu1_pred[3];
699 r4 = pu1_src[4];
700 r4 -= pu1_pred[4];
701 r5 = pu1_src[5];
702 r5 -= pu1_pred[5];
703 r6 = pu1_src[6];
704 r6 -= pu1_pred[6];
705 r7 = pu1_src[7];
706 r7 -= pu1_pred[7];
707
708 if(u1_use_upsampled_res)
709 {
710 r0 = isvc_subtract_upsampled_res(r0, pi2_upsampled_res[0]);
711 r1 = isvc_subtract_upsampled_res(r1, pi2_upsampled_res[1]);
712 r2 = isvc_subtract_upsampled_res(r2, pi2_upsampled_res[2]);
713 r3 = isvc_subtract_upsampled_res(r3, pi2_upsampled_res[3]);
714 r4 = isvc_subtract_upsampled_res(r4, pi2_upsampled_res[4]);
715 r5 = isvc_subtract_upsampled_res(r5, pi2_upsampled_res[5]);
716 r6 = isvc_subtract_upsampled_res(r6, pi2_upsampled_res[6]);
717 r7 = isvc_subtract_upsampled_res(r7, pi2_upsampled_res[7]);
718 }
719
720 a0 = r0 + r7;
721 a1 = r1 + r6;
722 a2 = r2 + r5;
723 a3 = r3 + r4;
724
725 a4 = a0 + a3;
726 a5 = a1 + a2;
727 a6 = a0 - a3;
728 a7 = a1 - a2;
729
730 pi2_out_tmp[0] = a4 + a5;
731
732 pi2_out_tmp[2] = a6 + (a7 >> 1);
733 pi2_out_tmp[4] = a4 - a5;
734 pi2_out_tmp[6] = (a6 >> 1) - a7;
735
736 a0 = r0 - r7;
737 a1 = r1 - r6;
738 a2 = r2 - r5;
739 a3 = r3 - r4;
740
741 a4 = a1 + a2 + ((a0 >> 1) + a0);
742 a5 = a0 - a3 - ((a2 >> 1) + a2);
743 a6 = a0 + a3 - ((a1 >> 1) + a1);
744 a7 = a1 - a2 + ((a3 >> 1) + a3);
745
746 pi2_out_tmp[1] = a4 + (a7 >> 2);
747 pi2_out_tmp[3] = a5 + (a6 >> 2);
748 pi2_out_tmp[5] = a6 - (a5 >> 2);
749 pi2_out_tmp[7] = (a4 >> 2) - a7;
750
751 pu1_src += i4_src_stride;
752 pu1_pred += i4_pred_stride;
753 pi2_out_tmp += 8;
754 pi2_upsampled_res += i4_upsampled_res_stride;
755 }
756
757 /*vertical transform and quant */
758
759 pi2_out_tmp = pi2_out;
760
761 for(i = 0; i < SUB_BLK_WIDTH_8x8; ++i)
762 {
763 r0 = pi2_out_tmp[0];
764 r1 = pi2_out_tmp[8];
765 r2 = pi2_out_tmp[16];
766 r3 = pi2_out_tmp[24];
767 r4 = pi2_out_tmp[32];
768 r5 = pi2_out_tmp[40];
769 r6 = pi2_out_tmp[48];
770 r7 = pi2_out_tmp[56];
771
772 a0 = r0 + r7;
773 a1 = r1 + r6;
774 a2 = r2 + r5;
775 a3 = r3 + r4;
776
777 a4 = a0 + a3;
778 a5 = a1 + a2;
779 a6 = a0 - a3;
780 a7 = a1 - a2;
781
782 a0 = r0 - r7;
783 a1 = r1 - r6;
784 a2 = r2 - r5;
785 a3 = r3 - r4;
786
787 r0 = a4 + a5;
788 r2 = a6 + (a7 >> 1);
789 r4 = a4 - a5;
790 r6 = (a6 >> 1) - a7;
791
792 a4 = a1 + a2 + ((a0 >> 1) + a0);
793 a5 = a0 - a3 - ((a2 >> 1) + a2);
794 a6 = a0 + a3 - ((a1 >> 1) + a1);
795 a7 = a1 - a2 + ((a3 >> 1) + a3);
796
797 r1 = a4 + (a7 >> 2);
798 r3 = a5 + (a6 >> 2);
799 r5 = a6 - (a5 >> 2);
800 r7 = (a4 >> 2) - a7;
801
802 FWD_QUANT(r0, pu2_threshold_matrix[0], pu2_scale_matrix[0], u4_round_factor, u4_qbits,
803 u4_nonzero_coeff);
804 pi2_out_tmp[0] = r0;
805
806 FWD_QUANT(r1, pu2_threshold_matrix[8], pu2_scale_matrix[8], u4_round_factor, u4_qbits,
807 u4_nonzero_coeff);
808 pi2_out_tmp[8] = r1;
809
810 FWD_QUANT(r2, pu2_threshold_matrix[16], pu2_scale_matrix[16], u4_round_factor, u4_qbits,
811 u4_nonzero_coeff);
812 pi2_out_tmp[16] = r2;
813
814 FWD_QUANT(r3, pu2_threshold_matrix[24], pu2_scale_matrix[24], u4_round_factor, u4_qbits,
815 u4_nonzero_coeff);
816 pi2_out_tmp[24] = r3;
817
818 FWD_QUANT(r4, pu2_threshold_matrix[32], pu2_scale_matrix[32], u4_round_factor, u4_qbits,
819 u4_nonzero_coeff);
820 pi2_out_tmp[32] = r4;
821
822 FWD_QUANT(r5, pu2_threshold_matrix[40], pu2_scale_matrix[40], u4_round_factor, u4_qbits,
823 u4_nonzero_coeff);
824 pi2_out_tmp[40] = r5;
825
826 FWD_QUANT(r6, pu2_threshold_matrix[48], pu2_scale_matrix[48], u4_round_factor, u4_qbits,
827 u4_nonzero_coeff);
828 pi2_out_tmp[48] = r6;
829
830 FWD_QUANT(r7, pu2_threshold_matrix[56], pu2_scale_matrix[56], u4_round_factor, u4_qbits,
831 u4_nonzero_coeff);
832 pi2_out_tmp[56] = r7;
833
834 pi2_out_tmp++;
835 pu2_scale_matrix++;
836 pu2_threshold_matrix++;
837 }
838 /* Return total nonzero coefficients in the current sub block */
839 *pu1_nnz = u4_nonzero_coeff;
840 }
841