1 /******************************************************************************
2 *
3 * Copyright (C) 2018 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 /**
21 *******************************************************************************
22 * @file
23 * ihevc_quant_iquant_ssd.c
24 *
25 * @brief
26 * Contains function definitions for quantization, followed by Inverse
27 * quantization to find transform domain SSD
28 *
29 * @author
30 * 100453, 100578
31 *
32 * @par List of Functions:
33 * - ihevc_quant_iquant_ssd()
34 * - ihevc_quant_iquant_ssd_flat_scale_mat()
35 *
36 * @remarks
37 * None
38 *
39 *******************************************************************************
40 */
41
42 #include <stdio.h>
43 #include <string.h>
44 #include <stdlib.h>
45 #include "ihevc_typedefs.h"
46 #include "ihevc_macros.h"
47 #include "ihevc_platform_macros.h"
48 #include "ihevc_defs.h"
49 #include "ihevc_debug.h"
50 #include "ihevc_trans_tables.h"
51 #include "ihevc_quant_iquant_ssd.h"
52 #include "ihevc_func_selector.h"
53 #include "ihevc_trans_macros.h"
54 #include <assert.h>
55
56 /*****************************************************************************/
57 /* Globals */
58 /*****************************************************************************/
59
60
61 /**
62 *******************************************************************************
63 *
64 * @brief
65 * This function performs quantization, followed by Inverse
66 * quantization to find transform domain SSD
67 *
68 * @par Description:
69 * Performs quantization on coeffs
70 *
71 * @param[in] pi2_coeffs
72 * 4x4 Coeffs
73 *
74 * @param[in] pi2_quant_coeff
75 * Scaling Matrix
76 *
77 * @param[out] pi2_dst
78 * Output 4x4 coefficients
79 *
80 * @param[in] qp_div
81 * Quantization parameter / 6
82 *
83 * @param[in] qp_rem
84 * Quantization parameter % 6
85 *
86 * @param[in] src_strd
87 * Input stride
88 *
89 * @param[in] dst_strd
90 * Output Stride
91 *
92 * @param[out] csbf
93 * coded sub block flag
94 *
95 * @param[in] csbf_strd
96 * coded sub block flag
97 *
98 * @param[out] zero_col
99 * zero column flag
100 *
101 * @param[out] zero_row
102 * zero column flag
103 *
104 * @returns cbf
105 * coded block flag
106 *
107 * @remarks
108 * None
109 *
110 *******************************************************************************
111 */
112
ihevc_quant_iquant_ssd(WORD16 * pi2_coeffs,WORD16 * pi2_quant_coeff,WORD16 * pi2_q_dst,WORD16 * pi2_iq_dst,WORD32 trans_size,WORD32 qp_div,WORD32 qp_rem,WORD32 q_add,WORD32 * pi4_quant_round_factor_0_1,WORD32 * pi4_quant_round_factor_1_2,WORD32 src_strd,WORD32 dst_q_strd,WORD32 dst_iq_strd,UWORD8 * csbf,WORD32 csbf_strd,WORD32 * zero_col,WORD32 * zero_row,WORD16 * pi2_dequant_coeff,LWORD64 * pi8_cost)113 WORD32 ihevc_quant_iquant_ssd
114 (
115 WORD16 *pi2_coeffs,
116 WORD16 *pi2_quant_coeff,
117 WORD16 *pi2_q_dst,
118 WORD16 *pi2_iq_dst,
119 WORD32 trans_size,
120 WORD32 qp_div,/* qpscaled / 6 */
121 WORD32 qp_rem,/* qpscaled % 6 */
122 WORD32 q_add,
123 WORD32 *pi4_quant_round_factor_0_1,
124 WORD32 *pi4_quant_round_factor_1_2,
125 WORD32 src_strd,
126 WORD32 dst_q_strd,
127 WORD32 dst_iq_strd,
128 UWORD8 *csbf,
129 WORD32 csbf_strd,
130 WORD32 *zero_col,
131 WORD32 *zero_row,
132 WORD16 *pi2_dequant_coeff,
133 LWORD64 *pi8_cost
134 )
135 {
136 WORD32 i, j;
137 WORD32 log2_size;
138 WORD16 *pi2_q_dst_orig;
139 WORD32 cbf = 0;
140 WORD32 bit_depth,shift_iq;
141 WORD32 val;
142 WORD16 i2_temp;
143 WORD32 ssd_cost = 0;
144
145 (void)pi4_quant_round_factor_0_1;
146 (void)pi4_quant_round_factor_1_2;
147 pi2_q_dst_orig = pi2_q_dst;
148
149 /* Quant initialization */
150 GETRANGE(log2_size, trans_size);
151 log2_size -= 1;
152
153 bit_depth = 8 + 0;
154 shift_iq = bit_depth + log2_size - 5;
155
156 for(i = 0; i < trans_size; i++)
157 {
158 for(j = 0; j < trans_size; j++)
159 {
160 /* Back up the coefficients before Quantization */
161 i2_temp = pi2_coeffs[j];
162
163 /* Quantization */
164 QUANT(pi2_q_dst[j], pi2_coeffs[j],
165 pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
166 log2_size, q_add);
167
168 /* Inverse Quantization */
169 IQUANT(pi2_iq_dst[j],
170 pi2_q_dst[j], /*pi2_src[index*src_strd]*/
171 pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem],
172 /*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */
173 shift_iq,
174 qp_div);
175
176 /* SSD Computation & Accumulation */
177 val = i2_temp - pi2_iq_dst[j];
178 ssd_cost += val*val;
179
180 }
181
182 pi2_q_dst += dst_q_strd;
183 pi2_iq_dst += dst_iq_strd;
184 pi2_quant_coeff += trans_size;
185 pi2_coeffs += src_strd;
186 pi2_dequant_coeff += trans_size;
187 }
188
189 /* Store the cost */
190 *pi8_cost = ssd_cost;
191
192 /* CSBF update */
193 {
194 WORD32 block_row, block_col;
195 WORD32 row, col;
196 WORD16 *pi2_block;
197 UWORD32 temp_zero_col = 0;
198 UWORD32 temp_zero_row = 0;
199
200 pi2_q_dst = pi2_q_dst_orig;
201
202 for(block_row = 0; block_row < trans_size; block_row += 4)
203 {
204 //block_col is incrementing by 1 for easy update of csbf pointer
205 for(block_col = 0; block_col < trans_size / 4; block_col++)
206 {
207 pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4;
208 *(csbf + block_col) = 0;
209
210 for(row = 0; row < 4; row++)
211 {
212 for(col = 0; col < 4; col++)
213 {
214 if(pi2_block[row * dst_q_strd + col] != 0)
215 {
216 *(csbf + block_col) = 1;
217 break;
218 }
219 }
220 if(*(csbf + block_col) == 1)
221 {
222 /* zero_col update *//* temp_zero_col = ~zero_col */
223 temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4);
224 // zero col can be optimized further. Now clearing the
225 // entire 4 bits corresponding to 4 colums of 4x4 block
226 // even if any 4x4 csbf is set
227
228 /* zero row update */ /* temp_zero_row = ~zero_row */
229 temp_zero_row = (temp_zero_row) | (0xFU << block_row);
230 // zero row can be optimized further. Now clearing the
231 // entire 4 bits corresponding to 4 rows of 4x4 block
232 // even if any 4x4 csbf is set
233
234 break;
235 }
236 }
237
238 cbf = cbf || (*(csbf + block_col)); // cbf update
239 }
240 csbf += csbf_strd;
241 }
242
243 *zero_col = ~temp_zero_col; //final zero_col storing
244 *zero_row = ~temp_zero_row; //final zero_row storing
245 }
246
247 return cbf;
248 }
249
250 /**
251 *******************************************************************************
252 *
253 * @brief
254 * This function performs quantization, followed by Inverse
255 * quantization
256 *
257 * @par Description:
258 * Performs quantization on coeffs
259 *
260 * @param[in] pi2_coeffs
261 * 4x4 Coeffs
262 *
263 * @param[in] pi2_quant_coeff
264 * Scaling Matrix
265 *
266 * @param[out] pi2_dst
267 * Output 4x4 coefficients
268 *
269 * @param[in] qp_div
270 * Quantization parameter / 6
271 *
272 * @param[in] qp_rem
273 * Quantization parameter % 6
274 *
275 * @param[in] src_strd
276 * Input stride
277 *
278 * @param[in] dst_strd
279 * Output Stride
280 *
281 * @param[out] csbf
282 * coded sub block flag
283 *
284 * @param[in] csbf_strd
285 * coded sub block flag
286 *
287 * @param[out] zero_col
288 * zero column flag
289 *
290 * @param[out] zero_row
291 * zero column flag
292 *
293 * @returns cbf
294 * coded block flag
295 *
296 * @remarks
297 * None
298 *
299 *******************************************************************************
300 */
301
ihevc_quant_iquant(WORD16 * pi2_coeffs,WORD16 * pi2_quant_coeff,WORD16 * pi2_q_dst,WORD16 * pi2_iq_dst,WORD32 trans_size,WORD32 qp_div,WORD32 qp_rem,WORD32 q_add,WORD32 * pi4_quant_round_factor_0_1,WORD32 * pi4_quant_round_factor_1_2,WORD32 src_strd,WORD32 dst_q_strd,WORD32 dst_iq_strd,UWORD8 * csbf,WORD32 csbf_strd,WORD32 * zero_col,WORD32 * zero_row,WORD16 * pi2_dequant_coeff,LWORD64 * pi8_cost)302 WORD32 ihevc_quant_iquant
303 (
304 WORD16 *pi2_coeffs,
305 WORD16 *pi2_quant_coeff,
306 WORD16 *pi2_q_dst,
307 WORD16 *pi2_iq_dst,
308 WORD32 trans_size,
309 WORD32 qp_div,/* qpscaled / 6 */
310 WORD32 qp_rem,/* qpscaled % 6 */
311 WORD32 q_add,
312 WORD32 *pi4_quant_round_factor_0_1,
313 WORD32 *pi4_quant_round_factor_1_2,
314 WORD32 src_strd,
315 WORD32 dst_q_strd,
316 WORD32 dst_iq_strd,
317 UWORD8 *csbf,
318 WORD32 csbf_strd,
319 WORD32 *zero_col,
320 WORD32 *zero_row,
321 WORD16 *pi2_dequant_coeff,
322 LWORD64 *pi8_cost
323 )
324 {
325 WORD32 i, j;
326 WORD32 log2_size;
327 WORD16 *pi2_q_dst_orig;
328 WORD32 cbf = 0;
329 WORD32 bit_depth,shift_iq;
330 WORD16 i2_temp;
331
332 (void)pi8_cost;
333 (void)pi4_quant_round_factor_0_1;
334 (void)pi4_quant_round_factor_1_2;
335 pi2_q_dst_orig = pi2_q_dst;
336
337 /* Quant initialization */
338 GETRANGE(log2_size, trans_size);
339 log2_size -= 1;
340
341 bit_depth = 8;
342 shift_iq = bit_depth + log2_size - 5;
343
344 for(i = 0; i < trans_size; i++)
345 {
346 for(j = 0; j < trans_size; j++)
347 {
348 /* Back up the coefficients before Quantization */
349 i2_temp = pi2_coeffs[j];
350
351 /* Quantization */
352 QUANT(pi2_q_dst[j], pi2_coeffs[j],
353 pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
354 log2_size, q_add);
355
356 /* Inverse Quantization */
357 IQUANT(pi2_iq_dst[j],
358 pi2_q_dst[j], /*pi2_src[index*src_strd]*/
359 pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem],
360 shift_iq,
361 qp_div);
362 }
363
364 pi2_q_dst += dst_q_strd;
365 pi2_iq_dst += dst_iq_strd;
366 pi2_quant_coeff += trans_size;
367 pi2_coeffs += src_strd;
368 pi2_dequant_coeff += trans_size;
369 }
370
371 /* CSBF update */
372 {
373 WORD32 block_row, block_col;
374 WORD32 row, col;
375 WORD16 *pi2_block;
376 UWORD32 temp_zero_col = 0;
377 UWORD32 temp_zero_row = 0;
378
379 pi2_q_dst = pi2_q_dst_orig;
380
381 for(block_row = 0; block_row < trans_size; block_row += 4)
382 {
383 //block_col is incrementing by 1 for easy update of csbf pointer
384 for(block_col = 0; block_col < trans_size / 4; block_col++)
385 {
386 pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4;
387 *(csbf + block_col) = 0;
388
389 for(row = 0; row < 4; row++)
390 {
391 for(col = 0; col < 4; col++)
392 {
393 if(pi2_block[row * dst_q_strd + col] != 0)
394 {
395 *(csbf + block_col) = 1;
396 break;
397 }
398 }
399 if(*(csbf + block_col) == 1)
400 {
401 /* zero_col update *//* temp_zero_col = ~zero_col */
402 temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4);
403 // zero col can be optimized further. Now clearing the
404 // entire 4 bits corresponding to 4 colums of 4x4 block
405 // even if any 4x4 csbf is set
406
407 /* zero row update */ /* temp_zero_row = ~zero_row */
408 temp_zero_row = (temp_zero_row) | (0xFU << block_row);
409 // zero row can be optimized further. Now clearing the
410 // entire 4 bits corresponding to 4 rows of 4x4 block
411 // even if any 4x4 csbf is set
412
413 break;
414 }
415 }
416
417 cbf = cbf || (*(csbf + block_col)); // cbf update
418 }
419
420 csbf += csbf_strd;
421 }
422
423 *zero_col = ~temp_zero_col; //final zero_col storing
424 *zero_row = ~temp_zero_row; //final zero_row storing
425 }
426
427 return cbf;
428 }
429
430 /**
431 *******************************************************************************
432 *
433 * @brief
434 * This function performs quantization, followed by Inverse
435 * quantization to find transform domain SSD
436 *
437 * @par Description:
438 * Performs quantization on coeffs
439 *
440 * @param[in] pi2_coeffs
441 * 4x4 Coeffs
442 *
443 * @param[in] pi2_quant_coeff
444 * Scaling Matrix
445 *
446 * @param[out] pi2_dst
447 * Output 4x4 coefficients
448 *
449 * @param[in] qp_div
450 * Quantization parameter / 6
451 *
452 * @param[in] qp_rem
453 * Quantization parameter % 6
454 *
455 * @param[in] src_strd
456 * Input stride
457 *
458 * @param[in] dst_strd
459 * Output Stride
460 *
461 * @param[out] csbf
462 * coded sub block flag
463 *
464 * @param[in] csbf_strd
465 * coded sub block flag
466 *
467 * @param[out] zero_col
468 * zero column flag
469 *
470 * @param[out] zero_row
471 * zero column flag
472 *
473 * @returns cbf
474 * coded block flag
475 *
476 * @remarks
477 * None
478 *
479 *******************************************************************************
480 */
481
ihevc_quant_iquant_ssd_rdoq(WORD16 * pi2_coeffs,WORD16 * pi2_quant_coeff,WORD16 * pi2_q_dst,WORD16 * pi2_iq_dst,WORD32 trans_size,WORD32 qp_div,WORD32 qp_rem,WORD32 q_add,WORD32 * pi4_quant_round_factor_0_1,WORD32 * pi4_quant_round_factor_1_2,WORD32 src_strd,WORD32 dst_q_strd,WORD32 dst_iq_strd,UWORD8 * csbf,WORD32 csbf_strd,WORD32 * zero_col,WORD32 * zero_row,WORD16 * pi2_dequant_coeff,LWORD64 * pi8_cost)482 WORD32 ihevc_quant_iquant_ssd_rdoq
483 (
484 WORD16 *pi2_coeffs,
485 WORD16 *pi2_quant_coeff,
486 WORD16 *pi2_q_dst,
487 WORD16 *pi2_iq_dst,
488 WORD32 trans_size,
489 WORD32 qp_div,/* qpscaled / 6 */
490 WORD32 qp_rem,/* qpscaled % 6 */
491 WORD32 q_add,
492 WORD32 *pi4_quant_round_factor_0_1,
493 WORD32 *pi4_quant_round_factor_1_2,
494 WORD32 src_strd,
495 WORD32 dst_q_strd,
496 WORD32 dst_iq_strd,
497 UWORD8 *csbf,
498 WORD32 csbf_strd,
499 WORD32 *zero_col,
500 WORD32 *zero_row,
501 WORD16 *pi2_dequant_coeff,
502 LWORD64 *pi8_cost
503 )
504 {
505 WORD32 i, j;
506 WORD32 log2_size;
507 WORD16 *pi2_q_dst_orig;
508 WORD32 cbf = 0;
509 WORD32 bit_depth,shift_iq;
510 WORD32 val;
511 WORD16 i2_temp;
512 WORD32 ssd_cost = 0;
513
514 (void)pi4_quant_round_factor_0_1;
515 (void)pi4_quant_round_factor_1_2;
516 pi2_q_dst_orig = pi2_q_dst;
517
518 GETRANGE(log2_size, trans_size);
519 log2_size -= 1;
520
521 bit_depth = 8 + 0;
522 shift_iq = bit_depth + log2_size - 5;
523
524 for(i = 0; i < trans_size; i++)
525 {
526 for(j = 0; j < trans_size; j++)
527 {
528 /* Back up the coefficients before Quantization */
529 i2_temp = pi2_coeffs[j];
530
531 /* Quantization */
532 QUANT(pi2_q_dst[j], pi2_coeffs[j],
533 pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
534 log2_size, q_add);
535
536
537 if (abs(pi2_q_dst[j]) > 1)
538 {
539 QUANT(pi2_q_dst[j],i2_temp,
540 pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
541 log2_size, ((1 << QUANT_ROUND_FACTOR_Q)/2));
542
543 }
544
545
546 /* Inverse Quantization */
547 IQUANT(pi2_iq_dst[j],
548 pi2_q_dst[j], /*pi2_src[index*src_strd]*/
549 pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem],
550 /*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */
551 shift_iq,
552 qp_div);
553
554 /* SSD Computation & Accumulation */
555 val = i2_temp - pi2_iq_dst[j];
556 ssd_cost += val*val;
557
558 }
559
560 pi2_q_dst += dst_q_strd;
561 pi2_iq_dst += dst_iq_strd;
562 pi2_quant_coeff += trans_size;
563 pi2_coeffs += src_strd;
564 pi2_dequant_coeff += trans_size;
565 }
566 /* Store the cost */
567 *pi8_cost = ssd_cost;
568
569 /* CSBF update */
570 {
571 WORD32 block_row, block_col;
572 WORD32 row, col;
573 WORD16 *pi2_block;
574 UWORD32 temp_zero_col = 0;
575 UWORD32 temp_zero_row = 0;
576
577 pi2_q_dst = pi2_q_dst_orig;
578
579 for(block_row = 0; block_row < trans_size; block_row += 4)
580 {
581 //block_col is incrementing by 1 for easy update of csbf pointer
582 for(block_col = 0; block_col < trans_size / 4; block_col++)
583 {
584 pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4;
585 *(csbf + block_col) = 0;
586
587 for(row = 0; row < 4; row++)
588 {
589 for(col = 0; col < 4; col++)
590 {
591 if(pi2_block[row * dst_q_strd + col] != 0)
592 {
593 *(csbf + block_col) = 1;
594 break;
595 }
596 }
597 if(*(csbf + block_col) == 1)
598 {
599 /* zero_col update *//* temp_zero_col = ~zero_col */
600 temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4);
601 // zero col can be optimized further. Now clearing the
602 // entire 4 bits corresponding to 4 colums of 4x4 block
603 // even if any 4x4 csbf is set
604
605 /* zero row update */ /* temp_zero_row = ~zero_row */
606 temp_zero_row = (temp_zero_row) | (0xFU << block_row);
607 // zero row can be optimized further. Now clearing the
608 // entire 4 bits corresponding to 4 rows of 4x4 block
609 // even if any 4x4 csbf is set
610
611 break;
612 }
613 }
614
615 cbf = cbf || (*(csbf + block_col)); // cbf update
616 }
617 csbf += csbf_strd;
618 }
619
620 *zero_col = ~temp_zero_col; //final zero_col storing
621 *zero_row = ~temp_zero_row; //final zero_row storing
622 }
623
624 return cbf;
625 }
626
ihevc_quant_iquant_rdoq(WORD16 * pi2_coeffs,WORD16 * pi2_quant_coeff,WORD16 * pi2_q_dst,WORD16 * pi2_iq_dst,WORD32 trans_size,WORD32 qp_div,WORD32 qp_rem,WORD32 q_add,WORD32 * pi4_quant_round_factor_0_1,WORD32 * pi4_quant_round_factor_1_2,WORD32 src_strd,WORD32 dst_q_strd,WORD32 dst_iq_strd,UWORD8 * csbf,WORD32 csbf_strd,WORD32 * zero_col,WORD32 * zero_row,WORD16 * pi2_dequant_coeff,LWORD64 * pi8_cost)627 WORD32 ihevc_quant_iquant_rdoq
628 (
629 WORD16 *pi2_coeffs,
630 WORD16 *pi2_quant_coeff,
631 WORD16 *pi2_q_dst,
632 WORD16 *pi2_iq_dst,
633 WORD32 trans_size,
634 WORD32 qp_div,/* qpscaled / 6 */
635 WORD32 qp_rem,/* qpscaled % 6 */
636 WORD32 q_add,
637 WORD32 *pi4_quant_round_factor_0_1,
638 WORD32 *pi4_quant_round_factor_1_2,
639 WORD32 src_strd,
640 WORD32 dst_q_strd,
641 WORD32 dst_iq_strd,
642 UWORD8 *csbf,
643 WORD32 csbf_strd,
644 WORD32 *zero_col,
645 WORD32 *zero_row,
646 WORD16 *pi2_dequant_coeff,
647 LWORD64 *pi8_cost
648 )
649 {
650 WORD32 i, j;
651 WORD32 log2_size;
652 WORD16 *pi2_q_dst_orig;
653 WORD32 cbf = 0;
654 WORD32 bit_depth,shift_iq;
655 WORD16 i2_temp;
656
657 (void)pi8_cost;
658 (void)pi4_quant_round_factor_0_1;
659 (void)pi4_quant_round_factor_1_2;
660 pi2_q_dst_orig = pi2_q_dst;
661
662 GETRANGE(log2_size, trans_size);
663 log2_size -= 1;
664
665 bit_depth = 8 + 0;
666 shift_iq = bit_depth + log2_size - 5;
667
668 for(i = 0; i < trans_size; i++)
669 {
670 for(j = 0; j < trans_size; j++)
671 {
672 /* Back up the coefficients before Quantization */
673 i2_temp = pi2_coeffs[j];
674
675 /* Quantization */
676 QUANT(pi2_q_dst[j], pi2_coeffs[j],
677 pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
678 log2_size, q_add);
679
680 if (abs(pi2_q_dst[j]) > 1)
681 {
682 QUANT(pi2_q_dst[j],i2_temp,
683 pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
684 log2_size, ((1 << QUANT_ROUND_FACTOR_Q)/2));
685 }
686
687 /* Inverse Quantization */
688 IQUANT(pi2_iq_dst[j],
689 pi2_q_dst[j], /*pi2_src[index*src_strd]*/
690 pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem],
691 shift_iq,
692 qp_div);
693 }
694
695 pi2_q_dst += dst_q_strd;
696 pi2_iq_dst += dst_iq_strd;
697 pi2_quant_coeff += trans_size;
698 pi2_coeffs += src_strd;
699 pi2_dequant_coeff += trans_size;
700 }
701
702 /* CSBF update */
703 {
704 WORD32 block_row, block_col;
705 WORD32 row, col;
706 WORD16 *pi2_block;
707 UWORD32 temp_zero_col = 0;
708 UWORD32 temp_zero_row = 0;
709
710 pi2_q_dst = pi2_q_dst_orig;
711
712 for(block_row = 0; block_row < trans_size; block_row += 4)
713 {
714 //block_col is incrementing by 1 for easy update of csbf pointer
715 for(block_col = 0; block_col < trans_size / 4; block_col++)
716 {
717 pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4;
718 *(csbf + block_col) = 0;
719
720 for(row = 0; row < 4; row++)
721 {
722 for(col = 0; col < 4; col++)
723 {
724 if(pi2_block[row * dst_q_strd + col] != 0)
725 {
726 *(csbf + block_col) = 1;
727 break;
728 }
729 }
730 if(*(csbf + block_col) == 1)
731 {
732 /* zero_col update *//* temp_zero_col = ~zero_col */
733 temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4);
734 // zero col can be optimized further. Now clearing the
735 // entire 4 bits corresponding to 4 colums of 4x4 block
736 // even if any 4x4 csbf is set
737
738 /* zero row update */ /* temp_zero_row = ~zero_row */
739 temp_zero_row = (temp_zero_row) | (0xFU << block_row);
740 // zero row can be optimized further. Now clearing the
741 // entire 4 bits corresponding to 4 rows of 4x4 block
742 // even if any 4x4 csbf is set
743
744 break;
745 }
746 }
747
748 cbf = cbf || (*(csbf + block_col)); // cbf update
749 }
750 csbf += csbf_strd;
751 }
752
753 *zero_col = ~temp_zero_col; //final zero_col storing
754 *zero_row = ~temp_zero_row; //final zero_row storing
755 }
756
757 return cbf;
758 }
759
760 /**
761 *******************************************************************************
762 *
763 * @brief
764 * This function performs quantization(using flat scale matrix), followed by
765 * inverse quantization to find transform domain SSD
766 *
767 * @par Description:
768 * Performs quantization on coeffs
769 *
770 * @param[in] pi2_coeffs
771 * 4x4 Coeffs
772 *
773 * @param[in] pi2_quant_coeff
774 * Scaling Matrix
775 *
776 * @param[out] pi2_dst
777 * Output 4x4 coefficients
778 *
779 * @param[in] qp_div
780 * Quantization parameter / 6
781 *
782 * @param[in] qp_rem
783 * Quantization parameter % 6
784 *
785 * @param[in] src_strd
786 * Input stride
787 *
788 * @param[in] dst_strd
789 * Output Stride
790 *
791 * @param[out] csbf
792 * coded sub block flag
793 *
794 * @param[in] csbf_strd
795 * coded sub block flag
796 *
797 * @param[out] zero_col
798 * zero column flag
799 *
800 * @param[out] zero_row
801 * zero column flag
802 *
803 * @returns cbf
804 * coded block flag
805 *
806 * @remarks
807 * None
808 *
809 *******************************************************************************
810 */
811
ihevc_quant_iquant_ssd_flat_scale_mat(WORD16 * pi2_coeffs,WORD16 * pi2_quant_coeff,WORD16 * pi2_q_dst,WORD16 * pi2_iq_dst,WORD32 trans_size,WORD32 qp_div,WORD32 qp_rem,WORD32 q_add,WORD32 * pi4_quant_round_factor_0_1,WORD32 * pi4_quant_round_factor_1_2,WORD32 src_strd,WORD32 dst_q_strd,WORD32 dst_iq_strd,UWORD8 * csbf,WORD32 csbf_strd,WORD32 * zero_col,WORD32 * zero_row,WORD16 * pi2_dequant_coeff,LWORD64 * pi8_cost)812 WORD32 ihevc_quant_iquant_ssd_flat_scale_mat
813 (
814 WORD16 *pi2_coeffs,
815 WORD16 *pi2_quant_coeff,
816 WORD16 *pi2_q_dst,
817 WORD16 *pi2_iq_dst,
818 WORD32 trans_size,
819 WORD32 qp_div,/* qpscaled / 6 */
820 WORD32 qp_rem,/* qpscaled % 6 */
821 WORD32 q_add,
822 WORD32 *pi4_quant_round_factor_0_1,
823 WORD32 *pi4_quant_round_factor_1_2,
824 WORD32 src_strd,
825 WORD32 dst_q_strd,
826 WORD32 dst_iq_strd,
827 UWORD8 *csbf,
828 WORD32 csbf_strd,
829 WORD32 *zero_col,
830 WORD32 *zero_row,
831 WORD16 *pi2_dequant_coeff,
832 LWORD64 *pi8_cost
833 )
834 {
835 WORD32 i, j;
836 WORD32 log2_size;
837 WORD16 *pi2_q_dst_orig;
838 WORD32 cbf = 0;
839 WORD32 bit_depth,shift_iq;
840 WORD32 val;
841 WORD16 i2_temp;
842 /* Initialize cost to zero */
843 WORD32 ssd_cost = 0;
844
845 (void)pi4_quant_round_factor_0_1;
846 (void)pi4_quant_round_factor_1_2;
847 pi2_q_dst_orig = pi2_q_dst;
848
849 /* Quant initialization */
850 GETRANGE(log2_size, trans_size);
851 log2_size -= 1;
852
853 bit_depth = 8 + 0;
854 shift_iq = bit_depth + log2_size - 5;
855
856 for(i = 0; i < trans_size; i++)
857 {
858 for(j = 0; j < trans_size; j++)
859 {
860 /* Back up the coefficients before Quantization */
861 i2_temp = pi2_coeffs[j];
862
863 /*QUANT(pi2_dst[j], pi2_coeffs[j],
864 pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
865 log2_size, q_add);*/
866
867 /* modified by 1028 */
868 /* Quantization */
869 QUANT_NO_WEIGHTMAT(pi2_q_dst[j], pi2_coeffs[j],
870 g_ihevc_quant_scales[qp_rem], qp_div,
871 log2_size, q_add);
872
873 if(pi2_q_dst[j] == 0)
874 {
875 pi2_iq_dst[j] = 0;
876 }
877 else
878 {
879 /* Inverse Quantization */
880 IQUANT(pi2_iq_dst[j],
881 pi2_q_dst[j], /*pi2_src[index*src_strd]*/
882 pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem], /*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */
883 shift_iq,
884 qp_div);
885 }
886
887 /* SSD Computation & Accumulation */
888 val = i2_temp - pi2_iq_dst[j];
889 ssd_cost += val*val;
890
891 }
892
893 pi2_q_dst += dst_q_strd;
894 pi2_iq_dst += dst_iq_strd;
895 pi2_quant_coeff += trans_size;
896 pi2_coeffs += src_strd;
897 pi2_dequant_coeff += trans_size;
898 }
899 /* Store the cost */
900 *pi8_cost = ssd_cost;
901
902 /* CSBF update */
903 {
904 WORD32 block_row, block_col;
905 WORD32 row, col;
906 WORD16 *pi2_block;
907 UWORD32 temp_zero_col = 0;
908 UWORD32 temp_zero_row = 0;
909
910 pi2_q_dst = pi2_q_dst_orig;
911
912 for(block_row = 0; block_row < trans_size; block_row += 4)
913 {
914 //block_col is incrementing by 1 for easy update of csbf pointer
915 for(block_col = 0; block_col < trans_size / 4; block_col++)
916 {
917 pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4;
918 *(csbf + block_col) = 0;
919
920 for(row = 0; row < 4; row++)
921 {
922 for(col = 0; col < 4; col++)
923 {
924 if(pi2_block[row * dst_q_strd + col] != 0)
925 {
926 *(csbf + block_col) = 1;
927 break;
928 }
929 }
930 if(*(csbf + block_col) == 1)
931 {
932 /* zero_col update *//* temp_zero_col = ~zero_col */
933 temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4);
934 // zero col can be optimized further. Now clearing the
935 // entire 4 bits corresponding to 4 colums of 4x4 block
936 // even if any 4x4 csbf is set
937
938 /* zero row update */ /* temp_zero_row = ~zero_row */
939 temp_zero_row = (temp_zero_row) | (0xFU << block_row);
940 // zero row can be optimized further. Now clearing the
941 // entire 4 bits corresponding to 4 rows of 4x4 block
942 // even if any 4x4 csbf is set
943
944 break;
945 }
946 }
947
948 cbf = cbf || (*(csbf + block_col)); // cbf update
949 }
950 csbf += csbf_strd;
951 }
952
953 *zero_col = ~temp_zero_col; //final zero_col storing
954 *zero_row = ~temp_zero_row; //final zero_row storing
955 }
956
957 return cbf;
958 }
959
ihevc_quant_iquant_flat_scale_mat(WORD16 * pi2_coeffs,WORD16 * pi2_quant_coeff,WORD16 * pi2_q_dst,WORD16 * pi2_iq_dst,WORD32 trans_size,WORD32 qp_div,WORD32 qp_rem,WORD32 q_add,WORD32 * pi4_quant_round_factor_0_1,WORD32 * pi4_quant_round_factor_1_2,WORD32 src_strd,WORD32 dst_q_strd,WORD32 dst_iq_strd,UWORD8 * csbf,WORD32 csbf_strd,WORD32 * zero_col,WORD32 * zero_row,WORD16 * pi2_dequant_coeff,LWORD64 * pi8_cost)960 WORD32 ihevc_quant_iquant_flat_scale_mat
961 (
962 WORD16 *pi2_coeffs,
963 WORD16 *pi2_quant_coeff,
964 WORD16 *pi2_q_dst,
965 WORD16 *pi2_iq_dst,
966 WORD32 trans_size,
967 WORD32 qp_div,/* qpscaled / 6 */
968 WORD32 qp_rem,/* qpscaled % 6 */
969 WORD32 q_add,
970 WORD32 *pi4_quant_round_factor_0_1,
971 WORD32 *pi4_quant_round_factor_1_2,
972 WORD32 src_strd,
973 WORD32 dst_q_strd,
974 WORD32 dst_iq_strd,
975 UWORD8 *csbf,
976 WORD32 csbf_strd,
977 WORD32 *zero_col,
978 WORD32 *zero_row,
979 WORD16 *pi2_dequant_coeff,
980 LWORD64 *pi8_cost
981 )
982 {
983 WORD32 i, j;
984 WORD32 log2_size;
985 WORD16 *pi2_q_dst_orig;
986 WORD32 cbf = 0;
987 WORD32 bit_depth,shift_iq;
988 WORD16 i2_temp;
989
990 (void)pi8_cost;
991 (void)pi4_quant_round_factor_0_1;
992 (void)pi4_quant_round_factor_1_2;
993 pi2_q_dst_orig = pi2_q_dst;
994
995 /* Quant initialization */
996 GETRANGE(log2_size, trans_size);
997 log2_size -= 1;
998
999 bit_depth = 8 + 0;
1000 shift_iq = bit_depth + log2_size - 5;
1001
1002 for(i = 0; i < trans_size; i++)
1003 {
1004 for(j = 0; j < trans_size; j++)
1005 {
1006 /* Back up the coefficients before Quantization */
1007 i2_temp = pi2_coeffs[j];
1008
1009 /* Quantization */
1010 QUANT_NO_WEIGHTMAT(pi2_q_dst[j], pi2_coeffs[j],
1011 g_ihevc_quant_scales[qp_rem], qp_div,
1012 log2_size, q_add);
1013
1014 if(pi2_q_dst[j] == 0)
1015 {
1016 pi2_iq_dst[j] = 0;
1017 }
1018 else
1019 {
1020 /* Inverse Quantization */
1021 IQUANT(pi2_iq_dst[j],
1022 pi2_q_dst[j], /*pi2_src[index*src_strd]*/
1023 pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem], /*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */
1024 shift_iq,
1025 qp_div);
1026 }
1027 }
1028
1029 pi2_q_dst += dst_q_strd;
1030 pi2_iq_dst += dst_iq_strd;
1031 pi2_quant_coeff += trans_size;
1032 pi2_coeffs += src_strd;
1033 pi2_dequant_coeff += trans_size;
1034 }
1035
1036 /* CSBF update */
1037 {
1038 WORD32 block_row, block_col;
1039 WORD32 row, col;
1040 WORD16 *pi2_block;
1041 UWORD32 temp_zero_col = 0;
1042 UWORD32 temp_zero_row = 0;
1043
1044 pi2_q_dst = pi2_q_dst_orig;
1045
1046 for(block_row = 0; block_row < trans_size; block_row += 4)
1047 {
1048 //block_col is incrementing by 1 for easy update of csbf pointer
1049 for(block_col = 0; block_col < trans_size / 4; block_col++)
1050 {
1051 pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4;
1052 *(csbf + block_col) = 0;
1053
1054 for(row = 0; row < 4; row++)
1055 {
1056 for(col = 0; col < 4; col++)
1057 {
1058 if(pi2_block[row * dst_q_strd + col] != 0)
1059 {
1060 *(csbf + block_col) = 1;
1061 break;
1062 }
1063 }
1064 if(*(csbf + block_col) == 1)
1065 {
1066 /* zero_col update *//* temp_zero_col = ~zero_col */
1067 temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4);
1068 // zero col can be optimized further. Now clearing the
1069 // entire 4 bits corresponding to 4 colums of 4x4 block
1070 // even if any 4x4 csbf is set
1071
1072 /* zero row update */ /* temp_zero_row = ~zero_row */
1073 temp_zero_row = (temp_zero_row) | (0xFU << block_row);
1074 // zero row can be optimized further. Now clearing the
1075 // entire 4 bits corresponding to 4 rows of 4x4 block
1076 // even if any 4x4 csbf is set
1077
1078 break;
1079 }
1080 }
1081
1082 cbf = cbf || (*(csbf + block_col)); // cbf update
1083 }
1084 csbf += csbf_strd;
1085 }
1086
1087 *zero_col = ~temp_zero_col; //final zero_col storing
1088 *zero_row = ~temp_zero_row; //final zero_row storing
1089 }
1090
1091 return cbf;
1092 }
1093
1094 /**
1095 *******************************************************************************
1096 *
1097 * @brief
1098 * This function performs quantization(using flat scale matrix), followed by
1099 * inverse quantization to find transform domain SSD; when we perform RDOQ.
1100 * In case the quantized value turns out to be grater than 1, we then requantize
1101 * use half rounding.
1102 *
1103 * @par Description:
1104 * Performs quantization on coeffs
1105 *
1106 * @param[in] pi2_coeffs
1107 * 4x4 Coeffs
1108 *
1109 * @param[in] pi2_quant_coeff
1110 * Scaling Matrix
1111 *
1112 * @param[out] pi2_dst
1113 * Output 4x4 coefficients
1114 *
1115 * @param[in] qp_div
1116 * Quantization parameter / 6
1117 *
1118 * @param[in] qp_rem
1119 * Quantization parameter % 6
1120 *
1121 * @param[in] src_strd
1122 * Input stride
1123 *
1124 * @param[in] dst_strd
1125 * Output Stride
1126 *
1127 * @param[out] csbf
1128 * coded sub block flag
1129 *
1130 * @param[in] csbf_strd
1131 * coded sub block flag
1132 *
1133 * @param[out] zero_col
1134 * zero column flag
1135 *
1136 * @param[out] zero_row
1137 * zero column flag
1138 *
1139 * @returns cbf
1140 * coded block flag
1141 *
1142 * @remarks
1143 * None
1144 *
1145 *******************************************************************************
1146 */
1147
ihevc_quant_iquant_ssd_flat_scale_mat_rdoq(WORD16 * pi2_coeffs,WORD16 * pi2_quant_coeff,WORD16 * pi2_q_dst,WORD16 * pi2_iq_dst,WORD32 trans_size,WORD32 qp_div,WORD32 qp_rem,WORD32 q_add,WORD32 * pi4_quant_round_factor_0_1,WORD32 * pi4_quant_round_factor_1_2,WORD32 src_strd,WORD32 dst_q_strd,WORD32 dst_iq_strd,UWORD8 * csbf,WORD32 csbf_strd,WORD32 * zero_col,WORD32 * zero_row,WORD16 * pi2_dequant_coeff,LWORD64 * pi8_cost)1148 WORD32 ihevc_quant_iquant_ssd_flat_scale_mat_rdoq
1149 (
1150 WORD16 *pi2_coeffs,
1151 WORD16 *pi2_quant_coeff,
1152 WORD16 *pi2_q_dst,
1153 WORD16 *pi2_iq_dst,
1154 WORD32 trans_size,
1155 WORD32 qp_div,/* qpscaled / 6 */
1156 WORD32 qp_rem,/* qpscaled % 6 */
1157 WORD32 q_add,
1158 WORD32 *pi4_quant_round_factor_0_1,
1159 WORD32 *pi4_quant_round_factor_1_2,
1160 WORD32 src_strd,
1161 WORD32 dst_q_strd,
1162 WORD32 dst_iq_strd,
1163 UWORD8 *csbf,
1164 WORD32 csbf_strd,
1165 WORD32 *zero_col,
1166 WORD32 *zero_row,
1167 WORD16 *pi2_dequant_coeff,
1168 LWORD64 *pi8_cost
1169 )
1170 {
1171 WORD32 i, j;
1172 WORD32 log2_size;
1173 WORD16 *pi2_q_dst_orig;
1174 WORD32 cbf = 0;
1175 WORD32 bit_depth,shift_iq;
1176 WORD32 val;
1177 WORD16 i2_temp;
1178 /* Initialize cost to zero */
1179 WORD32 ssd_cost = 0;
1180
1181 (void)pi4_quant_round_factor_0_1;
1182 (void)pi4_quant_round_factor_1_2;
1183 pi2_q_dst_orig = pi2_q_dst;
1184
1185 /* Quant initialization */
1186 GETRANGE(log2_size, trans_size);
1187 log2_size -= 1;
1188
1189 bit_depth = 8 + 0;
1190 shift_iq = bit_depth + log2_size - 5;
1191
1192 for(i = 0; i < trans_size; i++)
1193 {
1194 for(j = 0; j < trans_size; j++)
1195 {
1196 WORD16 i2_temp1;
1197 /* Back up the coefficients before Quantization */
1198 i2_temp = pi2_coeffs[j];
1199
1200 /*QUANT(pi2_dst[j], pi2_coeffs[j],
1201 pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
1202 log2_size, q_add);*/
1203
1204 /* modified by 1028 */
1205 /* Quantization */
1206
1207 if (1)
1208 {
1209 QUANT_NO_WEIGHTMAT(pi2_q_dst[j], pi2_coeffs[j],
1210 g_ihevc_quant_scales[qp_rem], qp_div,
1211 log2_size, q_add);
1212 }
1213 else
1214 { \
1215 WORD16 inp = pi2_coeffs[j],out = pi2_q_dst[j];
1216 WORD32 quant_coeff = g_ihevc_quant_scales[qp_rem];
1217 WORD32 log2_trans_size = log2_size;
1218 WORD32 tmp; \
1219 WORD32 sign; \
1220 WORD32 bit_depth,transform_shift; \
1221 WORD32 q_bits, quant_multiplier; \
1222 \
1223 /* q_bits and q_add calculation*/ \
1224 /* To be moved outside in neon. To be computer once per transform call */ \
1225 bit_depth = 8; \
1226 transform_shift = MAX_TR_DYNAMIC_RANGE - bit_depth - log2_trans_size; \
1227 quant_multiplier = 4 ; /* because quant_coeff are multiplied by 16. Instead of multiplying, we can reduce the division factor q_bits by 4 */ \
1228 q_bits = QUANT_SHIFT + qp_div + transform_shift + SCALING_Q_SHIFT - quant_multiplier - FLAT_RESCALE_MAT_Q_SHIFT /* 2048 */; \
1229 \
1230 sign = (inp)<0 ? -1:1; \
1231 \
1232 tmp = (WORD32)(abs(inp)); \
1233 tmp = tmp * (quant_coeff); \
1234 tmp = tmp + (((WORD32)q_add) << (q_bits - QUANT_ROUND_FACTOR_Q)); \
1235 tmp = tmp >> q_bits; \
1236 \
1237 tmp = tmp * sign; \
1238 out = (WORD16) CLIP_S16(tmp); \
1239 }
1240 i2_temp1 = pi2_q_dst[j];
1241 if (abs(pi2_q_dst[j]) > 1)
1242 {
1243 QUANT_NO_WEIGHTMAT(pi2_q_dst[j], i2_temp,
1244 g_ihevc_quant_scales[qp_rem], qp_div,
1245 log2_size, ((1 << QUANT_ROUND_FACTOR_Q)/2));
1246 }
1247
1248
1249 ASSERT(abs(i2_temp1-pi2_q_dst[j]) <= 1);
1250 ASSERT(abs(i2_temp1) <= abs(pi2_q_dst[j]));
1251
1252
1253 /* Inverse Quantization */
1254 IQUANT(pi2_iq_dst[j],
1255 pi2_q_dst[j], /*pi2_src[index*src_strd]*/
1256 pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem], /*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */
1257 shift_iq,
1258 qp_div);
1259
1260 /* SSD Computation & Accumulation */
1261 val = i2_temp - pi2_iq_dst[j];
1262 ssd_cost += val*val;
1263
1264 }
1265
1266 pi2_q_dst += dst_q_strd;
1267 pi2_iq_dst += dst_iq_strd;
1268 pi2_quant_coeff += trans_size;
1269 pi2_coeffs += src_strd;
1270 pi2_dequant_coeff += trans_size;
1271
1272 }
1273 /* Store the cost */
1274 *pi8_cost = ssd_cost;
1275
1276 /* CSBF update */
1277 {
1278 WORD32 block_row, block_col;
1279 WORD32 row, col;
1280 WORD16 *pi2_block;
1281 UWORD32 temp_zero_col = 0;
1282 UWORD32 temp_zero_row = 0;
1283
1284 pi2_q_dst = pi2_q_dst_orig;
1285
1286 for(block_row = 0; block_row < trans_size; block_row += 4)
1287 {
1288 //block_col is incrementing by 1 for easy update of csbf pointer
1289 for(block_col = 0; block_col < trans_size / 4; block_col++)
1290 {
1291 pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4;
1292 *(csbf + block_col) = 0;
1293
1294 for(row = 0; row < 4; row++)
1295 {
1296 for(col = 0; col < 4; col++)
1297 {
1298 if(pi2_block[row * dst_q_strd + col] != 0)
1299 {
1300 *(csbf + block_col) = 1;
1301 break;
1302 }
1303 }
1304 if(*(csbf + block_col) == 1)
1305 {
1306 /* zero_col update *//* temp_zero_col = ~zero_col */
1307 temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4);
1308 // zero col can be optimized further. Now clearing the
1309 // entire 4 bits corresponding to 4 colums of 4x4 block
1310 // even if any 4x4 csbf is set
1311
1312 /* zero row update */ /* temp_zero_row = ~zero_row */
1313 temp_zero_row = (temp_zero_row) | (0xFU << block_row);
1314 // zero row can be optimized further. Now clearing the
1315 // entire 4 bits corresponding to 4 rows of 4x4 block
1316 // even if any 4x4 csbf is set
1317
1318 break;
1319 }
1320 }
1321
1322 cbf = cbf || (*(csbf + block_col)); // cbf update
1323 }
1324 csbf += csbf_strd;
1325 }
1326
1327 *zero_col = ~temp_zero_col; //final zero_col storing
1328 *zero_row = ~temp_zero_row; //final zero_row storing
1329 }
1330 return cbf;
1331 }
1332
ihevc_quant_iquant_flat_scale_mat_rdoq(WORD16 * pi2_coeffs,WORD16 * pi2_quant_coeff,WORD16 * pi2_q_dst,WORD16 * pi2_iq_dst,WORD32 trans_size,WORD32 qp_div,WORD32 qp_rem,WORD32 q_add,WORD32 * pi4_quant_round_factor_0_1,WORD32 * pi4_quant_round_factor_1_2,WORD32 src_strd,WORD32 dst_q_strd,WORD32 dst_iq_strd,UWORD8 * csbf,WORD32 csbf_strd,WORD32 * zero_col,WORD32 * zero_row,WORD16 * pi2_dequant_coeff,LWORD64 * pi8_cost)1333 WORD32 ihevc_quant_iquant_flat_scale_mat_rdoq
1334 (
1335 WORD16 *pi2_coeffs,
1336 WORD16 *pi2_quant_coeff,
1337 WORD16 *pi2_q_dst,
1338 WORD16 *pi2_iq_dst,
1339 WORD32 trans_size,
1340 WORD32 qp_div,/* qpscaled / 6 */
1341 WORD32 qp_rem,/* qpscaled % 6 */
1342 WORD32 q_add,
1343 WORD32 *pi4_quant_round_factor_0_1,
1344 WORD32 *pi4_quant_round_factor_1_2,
1345 WORD32 src_strd,
1346 WORD32 dst_q_strd,
1347 WORD32 dst_iq_strd,
1348 UWORD8 *csbf,
1349 WORD32 csbf_strd,
1350 WORD32 *zero_col,
1351 WORD32 *zero_row,
1352 WORD16 *pi2_dequant_coeff,
1353 LWORD64 *pi8_cost
1354 )
1355 {
1356 WORD32 i, j;
1357 WORD32 log2_size;
1358 WORD16 *pi2_q_dst_orig;
1359 WORD32 cbf = 0;
1360 WORD32 bit_depth,shift_iq;
1361 WORD16 i2_temp;
1362
1363 (void)pi8_cost;
1364 (void)pi4_quant_round_factor_0_1;
1365 (void)pi4_quant_round_factor_1_2;
1366 pi2_q_dst_orig = pi2_q_dst;
1367
1368 /* Quant initialization */
1369 GETRANGE(log2_size, trans_size);
1370 log2_size -= 1;
1371
1372 bit_depth = 8 + 0;
1373 shift_iq = bit_depth + log2_size - 5;
1374
1375 for(i = 0; i < trans_size; i++)
1376 {
1377 for(j = 0; j < trans_size; j++)
1378 {
1379 WORD16 i2_temp1;
1380 /* Back up the coefficients before Quantization */
1381 i2_temp = pi2_coeffs[j];
1382
1383 QUANT_NO_WEIGHTMAT(pi2_q_dst[j], pi2_coeffs[j],
1384 g_ihevc_quant_scales[qp_rem], qp_div,
1385 log2_size, q_add);
1386
1387 i2_temp1 = pi2_q_dst[j];
1388
1389 if (abs(pi2_q_dst[j]) > 1)
1390 {
1391 QUANT_NO_WEIGHTMAT(pi2_q_dst[j], i2_temp,
1392 g_ihevc_quant_scales[qp_rem], qp_div,
1393 log2_size, ((1 << QUANT_ROUND_FACTOR_Q)/2));
1394 }
1395
1396 ASSERT(abs(i2_temp1-pi2_q_dst[j]) <= 1);
1397 ASSERT(abs(i2_temp1) <= abs(pi2_q_dst[j]));
1398
1399 IQUANT(pi2_iq_dst[j],
1400 pi2_q_dst[j], /*pi2_src[index*src_strd]*/
1401 pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem], /*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */
1402 shift_iq,
1403 qp_div);
1404 }
1405
1406 pi2_q_dst += dst_q_strd;
1407 pi2_iq_dst += dst_iq_strd;
1408 pi2_quant_coeff += trans_size;
1409 pi2_coeffs += src_strd;
1410 pi2_dequant_coeff += trans_size;
1411 }
1412
1413 /* CSBF update */
1414 {
1415 WORD32 block_row, block_col;
1416 WORD32 row, col;
1417 WORD16 *pi2_block;
1418 UWORD32 temp_zero_col = 0;
1419 UWORD32 temp_zero_row = 0;
1420
1421 pi2_q_dst = pi2_q_dst_orig;
1422
1423 for(block_row = 0; block_row < trans_size; block_row += 4)
1424 {
1425 //block_col is incrementing by 1 for easy update of csbf pointer
1426 for(block_col = 0; block_col < trans_size / 4; block_col++)
1427 {
1428 pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4;
1429 *(csbf + block_col) = 0;
1430
1431 for(row = 0; row < 4; row++)
1432 {
1433 for(col = 0; col < 4; col++)
1434 {
1435 if(pi2_block[row * dst_q_strd + col] != 0)
1436 {
1437 *(csbf + block_col) = 1;
1438 break;
1439 }
1440 }
1441 if(*(csbf + block_col) == 1)
1442 {
1443 /* zero_col update *//* temp_zero_col = ~zero_col */
1444 temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4);
1445 // zero col can be optimized further. Now clearing the
1446 // entire 4 bits corresponding to 4 colums of 4x4 block
1447 // even if any 4x4 csbf is set
1448
1449 /* zero row update */ /* temp_zero_row = ~zero_row */
1450 temp_zero_row = (temp_zero_row) | (0xFU << block_row);
1451 // zero row can be optimized further. Now clearing the
1452 // entire 4 bits corresponding to 4 rows of 4x4 block
1453 // even if any 4x4 csbf is set
1454
1455 break;
1456 }
1457 }
1458
1459 cbf = cbf || (*(csbf + block_col)); // cbf update
1460 }
1461 csbf += csbf_strd;
1462 }
1463
1464 *zero_col = ~temp_zero_col; //final zero_col storing
1465 *zero_row = ~temp_zero_row; //final zero_row storing
1466 }
1467
1468 return cbf;
1469 }
1470
1471
1472 /**
1473 *******************************************************************************
1474 *
1475 * @brief
1476 * This function performs quantization, followed by Inverse
1477 * quantization to find transform domain SSD
1478 *
1479 * @par Description:
1480 * Performs quantization on coeffs
1481 *
1482 * @param[in] pi2_coeffs
1483 * 4x4 Coeffs
1484 *
1485 * @param[in] pi2_quant_coeff
1486 * Scaling Matrix
1487 *
1488 * @param[out] pi2_dst
1489 * Output 4x4 coefficients
1490 *
1491 * @param[in] qp_div
1492 * Quantization parameter / 6
1493 *
1494 * @param[in] qp_rem
1495 * Quantization parameter % 6
1496 *
1497 * @param[in] src_strd
1498 * Input stride
1499 *
1500 * @param[in] dst_strd
1501 * Output Stride
1502 *
1503 * @param[out] csbf
1504 * coded sub block flag
1505 *
1506 * @param[in] csbf_strd
1507 * coded sub block flag
1508 *
1509 * @param[out] zero_col
1510 * zero column flag
1511 *
1512 * @param[out] zero_row
1513 * zero column flag
1514 *
1515 * @returns cbf
1516 * coded block flag
1517 *
1518 * @remarks
1519 * None
1520 *
1521 *******************************************************************************
1522 */
1523
ihevc_q_iq_ssd_var_rnd_fact(WORD16 * pi2_coeffs,WORD16 * pi2_quant_coeff,WORD16 * pi2_q_dst,WORD16 * pi2_iq_dst,WORD32 trans_size,WORD32 qp_div,WORD32 qp_rem,WORD32 q_add,WORD32 * pi4_quant_round_factor_0_1,WORD32 * pi4_quant_round_factor_1_2,WORD32 src_strd,WORD32 dst_q_strd,WORD32 dst_iq_strd,UWORD8 * csbf,WORD32 csbf_strd,WORD32 * zero_col,WORD32 * zero_row,WORD16 * pi2_dequant_coeff,LWORD64 * pi8_cost)1524 WORD32 ihevc_q_iq_ssd_var_rnd_fact
1525 (
1526 WORD16 *pi2_coeffs,
1527 WORD16 *pi2_quant_coeff,
1528 WORD16 *pi2_q_dst,
1529 WORD16 *pi2_iq_dst,
1530 WORD32 trans_size,
1531 WORD32 qp_div,/* qpscaled / 6 */
1532 WORD32 qp_rem,/* qpscaled % 6 */
1533 WORD32 q_add,
1534 WORD32 *pi4_quant_round_factor_0_1,
1535 WORD32 *pi4_quant_round_factor_1_2,
1536 WORD32 src_strd,
1537 WORD32 dst_q_strd,
1538 WORD32 dst_iq_strd,
1539 UWORD8 *csbf,
1540 WORD32 csbf_strd,
1541 WORD32 *zero_col,
1542 WORD32 *zero_row,
1543 WORD16 *pi2_dequant_coeff,
1544 LWORD64 *pi8_cost
1545 )
1546 {
1547 WORD32 i, j;
1548 WORD32 log2_size;
1549 WORD16 *pi2_q_dst_orig;
1550 WORD32 cbf = 0;
1551 WORD32 bit_depth,shift_iq;
1552 WORD32 val;
1553 WORD16 i2_temp;
1554 //WORD16 i2_temp_1;
1555 /* Initialize cost to zero */
1556 WORD32 ssd_cost = 0;
1557
1558 (void)q_add;
1559 pi2_q_dst_orig = pi2_q_dst;
1560
1561
1562 /* Quant initialization */
1563 GETRANGE(log2_size, trans_size);
1564 log2_size -= 1;
1565
1566 bit_depth = 8 + 0;
1567 shift_iq = bit_depth + log2_size - 5;
1568
1569 for(i = 0; i < trans_size; i++)
1570 {
1571 for(j = 0; j < trans_size; j++)
1572 {
1573 /* Back up the coefficients before Quantization */
1574 i2_temp = pi2_coeffs[j];
1575
1576
1577 {
1578 /* Quantization */
1579 QUANT(pi2_q_dst[j],i2_temp,
1580 pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
1581 log2_size, 0);
1582 if (abs(pi2_q_dst[j]) >= 2)
1583 {
1584 QUANT(pi2_q_dst[j],i2_temp,
1585 pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
1586 log2_size, ((1 << QUANT_ROUND_FACTOR_Q)/2));
1587
1588 }
1589 else if (abs(pi2_q_dst[j]) >= 1)
1590 {
1591 QUANT(pi2_q_dst[j],i2_temp,
1592 pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
1593 log2_size, *pi4_quant_round_factor_1_2);
1594 }
1595
1596 else
1597 {
1598 /* Quantization */
1599 QUANT(pi2_q_dst[j],i2_temp,
1600 pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
1601 log2_size, *pi4_quant_round_factor_0_1);
1602 }
1603
1604 }
1605
1606
1607
1608 /* Inverse Quantization */
1609 IQUANT(pi2_iq_dst[j],
1610 pi2_q_dst[j], /*pi2_src[index*src_strd]*/
1611 pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem],
1612 /*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */
1613 shift_iq,
1614 qp_div);
1615
1616 /* SSD Computation & Accumulation */
1617 val = i2_temp - pi2_iq_dst[j];
1618 ssd_cost += val*val;
1619
1620 pi4_quant_round_factor_0_1++;
1621 pi4_quant_round_factor_1_2++;
1622 }
1623
1624 pi2_q_dst += dst_q_strd;
1625 pi2_iq_dst += dst_iq_strd;
1626 pi2_quant_coeff += trans_size;
1627 pi2_coeffs += src_strd;
1628 pi2_dequant_coeff += trans_size;
1629 }
1630 /* Store the cost */
1631 *pi8_cost = ssd_cost;
1632
1633 /* CSBF update */
1634 {
1635 WORD32 block_row, block_col;
1636 WORD32 row, col;
1637 WORD16 *pi2_block;
1638 UWORD32 temp_zero_col = 0;
1639 UWORD32 temp_zero_row = 0;
1640
1641 pi2_q_dst = pi2_q_dst_orig;
1642
1643 for(block_row = 0; block_row < trans_size; block_row += 4)
1644 {
1645 //block_col is incrementing by 1 for easy update of csbf pointer
1646 for(block_col = 0; block_col < trans_size / 4; block_col++)
1647 {
1648 pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4;
1649 *(csbf + block_col) = 0;
1650
1651 for(row = 0; row < 4; row++)
1652 {
1653 for(col = 0; col < 4; col++)
1654 {
1655 if(pi2_block[row * dst_q_strd + col] != 0)
1656 {
1657 *(csbf + block_col) = 1;
1658 break;
1659 }
1660 }
1661 if(*(csbf + block_col) == 1)
1662 {
1663 /* zero_col update *//* temp_zero_col = ~zero_col */
1664 temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4);
1665 // zero col can be optimized further. Now clearing the
1666 // entire 4 bits corresponding to 4 colums of 4x4 block
1667 // even if any 4x4 csbf is set
1668
1669 /* zero row update */ /* temp_zero_row = ~zero_row */
1670 temp_zero_row = (temp_zero_row) | (0xFU << block_row);
1671 // zero row can be optimized further. Now clearing the
1672 // entire 4 bits corresponding to 4 rows of 4x4 block
1673 // even if any 4x4 csbf is set
1674
1675 break;
1676 }
1677 }
1678
1679 cbf = cbf || (*(csbf + block_col)); // cbf update
1680 }
1681 csbf += csbf_strd;
1682 }
1683
1684 *zero_col = ~temp_zero_col; //final zero_col storing
1685 *zero_row = ~temp_zero_row; //final zero_row storing
1686 }
1687
1688 return cbf;
1689 }
1690
ihevc_q_iq_var_rnd_fact(WORD16 * pi2_coeffs,WORD16 * pi2_quant_coeff,WORD16 * pi2_q_dst,WORD16 * pi2_iq_dst,WORD32 trans_size,WORD32 qp_div,WORD32 qp_rem,WORD32 q_add,WORD32 * pi4_quant_round_factor_0_1,WORD32 * pi4_quant_round_factor_1_2,WORD32 src_strd,WORD32 dst_q_strd,WORD32 dst_iq_strd,UWORD8 * csbf,WORD32 csbf_strd,WORD32 * zero_col,WORD32 * zero_row,WORD16 * pi2_dequant_coeff,LWORD64 * pi8_cost)1691 WORD32 ihevc_q_iq_var_rnd_fact
1692 (
1693 WORD16 *pi2_coeffs,
1694 WORD16 *pi2_quant_coeff,
1695 WORD16 *pi2_q_dst,
1696 WORD16 *pi2_iq_dst,
1697 WORD32 trans_size,
1698 WORD32 qp_div,/* qpscaled / 6 */
1699 WORD32 qp_rem,/* qpscaled % 6 */
1700 WORD32 q_add,
1701 WORD32 *pi4_quant_round_factor_0_1,
1702 WORD32 *pi4_quant_round_factor_1_2,
1703 WORD32 src_strd,
1704 WORD32 dst_q_strd,
1705 WORD32 dst_iq_strd,
1706 UWORD8 *csbf,
1707 WORD32 csbf_strd,
1708 WORD32 *zero_col,
1709 WORD32 *zero_row,
1710 WORD16 *pi2_dequant_coeff,
1711 LWORD64 *pi8_cost
1712 )
1713 {
1714 WORD32 i, j;
1715 WORD32 log2_size;
1716 WORD16 *pi2_q_dst_orig;
1717 WORD32 cbf = 0;
1718 WORD32 bit_depth,shift_iq;
1719 WORD16 i2_temp;
1720
1721 (void)q_add;
1722 (void)pi8_cost;
1723 pi2_q_dst_orig = pi2_q_dst;
1724
1725 GETRANGE(log2_size, trans_size);
1726 log2_size -= 1;
1727
1728 bit_depth = 8 + 0;
1729 shift_iq = bit_depth + log2_size - 5;
1730
1731 for(i = 0; i < trans_size; i++)
1732 {
1733 for(j = 0; j < trans_size; j++)
1734 {
1735 i2_temp = pi2_coeffs[j];
1736
1737 {
1738 QUANT(pi2_q_dst[j],i2_temp,
1739 pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
1740 log2_size, 0);
1741
1742 if (abs(pi2_q_dst[j]) >= 2)
1743 {
1744 QUANT(pi2_q_dst[j],i2_temp,
1745 pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
1746 log2_size, ((1 << QUANT_ROUND_FACTOR_Q)/2));
1747 }
1748 else if (abs(pi2_q_dst[j]) >= 1)
1749 {
1750 QUANT(pi2_q_dst[j],i2_temp,
1751 pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
1752 log2_size, *pi4_quant_round_factor_1_2);
1753 }
1754 else
1755 {
1756 QUANT(pi2_q_dst[j],i2_temp,
1757 pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
1758 log2_size, *pi4_quant_round_factor_0_1);
1759 }
1760 }
1761
1762 IQUANT(pi2_iq_dst[j],
1763 pi2_q_dst[j], /*pi2_src[index*src_strd]*/
1764 pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem],
1765 shift_iq,
1766 qp_div);
1767
1768 pi4_quant_round_factor_0_1++;
1769 pi4_quant_round_factor_1_2++;
1770 }
1771
1772 pi2_q_dst += dst_q_strd;
1773 pi2_iq_dst += dst_iq_strd;
1774 pi2_quant_coeff += trans_size;
1775 pi2_coeffs += src_strd;
1776 pi2_dequant_coeff += trans_size;
1777 }
1778
1779 /* CSBF update */
1780 {
1781 WORD32 block_row, block_col;
1782 WORD32 row, col;
1783 WORD16 *pi2_block;
1784 UWORD32 temp_zero_col = 0;
1785 UWORD32 temp_zero_row = 0;
1786
1787 pi2_q_dst = pi2_q_dst_orig;
1788
1789 for(block_row = 0; block_row < trans_size; block_row += 4)
1790 {
1791 //block_col is incrementing by 1 for easy update of csbf pointer
1792 for(block_col = 0; block_col < trans_size / 4; block_col++)
1793 {
1794 pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4;
1795 *(csbf + block_col) = 0;
1796
1797 for(row = 0; row < 4; row++)
1798 {
1799 for(col = 0; col < 4; col++)
1800 {
1801 if(pi2_block[row * dst_q_strd + col] != 0)
1802 {
1803 *(csbf + block_col) = 1;
1804 break;
1805 }
1806 }
1807 if(*(csbf + block_col) == 1)
1808 {
1809 /* zero_col update *//* temp_zero_col = ~zero_col */
1810 temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4);
1811 // zero col can be optimized further. Now clearing the
1812 // entire 4 bits corresponding to 4 colums of 4x4 block
1813 // even if any 4x4 csbf is set
1814
1815 /* zero row update */ /* temp_zero_row = ~zero_row */
1816 temp_zero_row = (temp_zero_row) | (0xFU << block_row);
1817 // zero row can be optimized further. Now clearing the
1818 // entire 4 bits corresponding to 4 rows of 4x4 block
1819 // even if any 4x4 csbf is set
1820
1821 break;
1822 }
1823 }
1824
1825 cbf = cbf || (*(csbf + block_col)); // cbf update
1826 }
1827 csbf += csbf_strd;
1828 }
1829
1830 *zero_col = ~temp_zero_col; //final zero_col storing
1831 *zero_row = ~temp_zero_row; //final zero_row storing
1832 }
1833
1834 return cbf;
1835 }
1836
1837 /**
1838 *******************************************************************************
1839 *
1840 * @brief
1841 * This function performs quantization(using flat scale matrix), followed by
1842 * inverse quantization to find transform domain SSD; when we perform RDOQ.
1843 * In case the quantized value turns out to be grater than 1, we then requantize
1844 * use half rounding.
1845 *
1846 * @par Description:
1847 * Performs quantization on coeffs
1848 *
1849 * @param[in] pi2_coeffs
1850 * 4x4 Coeffs
1851 *
1852 * @param[in] pi2_quant_coeff
1853 * Scaling Matrix
1854 *
1855 * @param[out] pi2_dst
1856 * Output 4x4 coefficients
1857 *
1858 * @param[in] qp_div
1859 * Quantization parameter / 6
1860 *
1861 * @param[in] qp_rem
1862 * Quantization parameter % 6
1863 *
1864 * @param[in] src_strd
1865 * Input stride
1866 *
1867 * @param[in] dst_strd
1868 * Output Stride
1869 *
1870 * @param[out] csbf
1871 * coded sub block flag
1872 *
1873 * @param[in] csbf_strd
1874 * coded sub block flag
1875 *
1876 * @param[out] zero_col
1877 * zero column flag
1878 *
1879 * @param[out] zero_row
1880 * zero column flag
1881 *
1882 * @returns cbf
1883 * coded block flag
1884 *
1885 * @remarks
1886 * None
1887 *
1888 *******************************************************************************
1889 */
1890
ihevc_q_iq_ssd_flat_scale_mat_var_rnd_fact(WORD16 * pi2_coeffs,WORD16 * pi2_quant_coeff,WORD16 * pi2_q_dst,WORD16 * pi2_iq_dst,WORD32 trans_size,WORD32 qp_div,WORD32 qp_rem,WORD32 q_add,WORD32 * pi4_quant_round_factor_0_1,WORD32 * pi4_quant_round_factor_1_2,WORD32 src_strd,WORD32 dst_q_strd,WORD32 dst_iq_strd,UWORD8 * csbf,WORD32 csbf_strd,WORD32 * zero_col,WORD32 * zero_row,WORD16 * pi2_dequant_coeff,LWORD64 * pi8_cost)1891 WORD32 ihevc_q_iq_ssd_flat_scale_mat_var_rnd_fact
1892 (
1893 WORD16 *pi2_coeffs,
1894 WORD16 *pi2_quant_coeff,
1895 WORD16 *pi2_q_dst,
1896 WORD16 *pi2_iq_dst,
1897 WORD32 trans_size,
1898 WORD32 qp_div,/* qpscaled / 6 */
1899 WORD32 qp_rem,/* qpscaled % 6 */
1900 WORD32 q_add,
1901 WORD32 *pi4_quant_round_factor_0_1,
1902 WORD32 *pi4_quant_round_factor_1_2,
1903 WORD32 src_strd,
1904 WORD32 dst_q_strd,
1905 WORD32 dst_iq_strd,
1906 UWORD8 *csbf,
1907 WORD32 csbf_strd,
1908 WORD32 *zero_col,
1909 WORD32 *zero_row,
1910 WORD16 *pi2_dequant_coeff,
1911 LWORD64 *pi8_cost
1912 )
1913 {
1914 WORD32 i, j;
1915 WORD32 log2_size;
1916 WORD16 *pi2_q_dst_orig;
1917 WORD32 cbf = 0;
1918 WORD32 bit_depth,shift_iq;
1919 WORD32 val;
1920 WORD16 i2_temp;
1921 /* Initialize cost to zero */
1922 WORD32 ssd_cost = 0;
1923
1924 (void)q_add;
1925 pi2_q_dst_orig = pi2_q_dst;
1926
1927 /* Quant initialization */
1928 GETRANGE(log2_size, trans_size);
1929 log2_size -= 1;
1930
1931 bit_depth = 8 + 0;
1932 shift_iq = bit_depth + log2_size - 5;
1933
1934 for(i = 0; i < trans_size; i++)
1935 {
1936 for(j = 0; j < trans_size; j++)
1937 {
1938 WORD16 i2_temp1;
1939 /* Back up the coefficients before Quantization */
1940 i2_temp = pi2_coeffs[j];
1941
1942 /*QUANT(pi2_dst[j], pi2_coeffs[j],
1943 pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
1944 log2_size, q_add);*/
1945
1946 /* modified by 1028 */
1947 /* Quantization */
1948
1949
1950 {
1951 QUANT_NO_WEIGHTMAT(pi2_q_dst[j], pi2_coeffs[j],
1952 g_ihevc_quant_scales[qp_rem], qp_div,
1953 log2_size, 0);
1954
1955 i2_temp1 = pi2_q_dst[j];
1956
1957 if (abs(pi2_q_dst[j]) >= 2)
1958 {
1959 QUANT_NO_WEIGHTMAT(pi2_q_dst[j], i2_temp,
1960 g_ihevc_quant_scales[qp_rem], qp_div,
1961 log2_size, ((1 << QUANT_ROUND_FACTOR_Q)/2));
1962 }
1963 else if (abs(pi2_q_dst[j]) >= 1)
1964 {
1965 QUANT_NO_WEIGHTMAT(pi2_q_dst[j], pi2_coeffs[j],
1966 g_ihevc_quant_scales[qp_rem], qp_div,
1967 log2_size, *pi4_quant_round_factor_1_2);
1968 }
1969
1970 else
1971 {
1972 QUANT_NO_WEIGHTMAT(pi2_q_dst[j], pi2_coeffs[j],
1973 g_ihevc_quant_scales[qp_rem], qp_div,
1974 log2_size, *pi4_quant_round_factor_0_1);
1975 }
1976
1977 }
1978
1979
1980
1981
1982 ASSERT(abs(i2_temp1-pi2_q_dst[j]) <= 1);
1983
1984
1985 /* Inverse Quantization */
1986 IQUANT(pi2_iq_dst[j],
1987 pi2_q_dst[j], /*pi2_src[index*src_strd]*/
1988 pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem], /*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */
1989 shift_iq,
1990 qp_div);
1991
1992 /* SSD Computation & Accumulation */
1993 val = i2_temp - pi2_iq_dst[j];
1994 ssd_cost += val*val;
1995
1996 pi4_quant_round_factor_0_1++;
1997 pi4_quant_round_factor_1_2++;
1998 }
1999
2000 pi2_q_dst += dst_q_strd;
2001 pi2_iq_dst += dst_iq_strd;
2002 pi2_quant_coeff += trans_size;
2003 pi2_coeffs += src_strd;
2004 pi2_dequant_coeff += trans_size;
2005
2006 }
2007 /* Store the cost */
2008 *pi8_cost = ssd_cost;
2009
2010 /* CSBF update */
2011 {
2012 WORD32 block_row, block_col;
2013 WORD32 row, col;
2014 WORD16 *pi2_block;
2015 UWORD32 temp_zero_col = 0;
2016 UWORD32 temp_zero_row = 0;
2017
2018 pi2_q_dst = pi2_q_dst_orig;
2019
2020 for(block_row = 0; block_row < trans_size; block_row += 4)
2021 {
2022 //block_col is incrementing by 1 for easy update of csbf pointer
2023 for(block_col = 0; block_col < trans_size / 4; block_col++)
2024 {
2025 pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4;
2026 *(csbf + block_col) = 0;
2027
2028 for(row = 0; row < 4; row++)
2029 {
2030 for(col = 0; col < 4; col++)
2031 {
2032 if(pi2_block[row * dst_q_strd + col] != 0)
2033 {
2034 *(csbf + block_col) = 1;
2035 break;
2036 }
2037 }
2038 if(*(csbf + block_col) == 1)
2039 {
2040 /* zero_col update *//* temp_zero_col = ~zero_col */
2041 temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4);
2042 // zero col can be optimized further. Now clearing the
2043 // entire 4 bits corresponding to 4 colums of 4x4 block
2044 // even if any 4x4 csbf is set
2045
2046 /* zero row update */ /* temp_zero_row = ~zero_row */
2047 temp_zero_row = (temp_zero_row) | (0xFU << block_row);
2048 // zero row can be optimized further. Now clearing the
2049 // entire 4 bits corresponding to 4 rows of 4x4 block
2050 // even if any 4x4 csbf is set
2051
2052 break;
2053 }
2054 }
2055
2056 cbf = cbf || (*(csbf + block_col)); // cbf update
2057 }
2058 csbf += csbf_strd;
2059 }
2060
2061 *zero_col = ~temp_zero_col; //final zero_col storing
2062 *zero_row = ~temp_zero_row; //final zero_row storing
2063 }
2064 return cbf;
2065 }
2066
ihevc_q_iq_flat_scale_mat_var_rnd_fact(WORD16 * pi2_coeffs,WORD16 * pi2_quant_coeff,WORD16 * pi2_q_dst,WORD16 * pi2_iq_dst,WORD32 trans_size,WORD32 qp_div,WORD32 qp_rem,WORD32 q_add,WORD32 * pi4_quant_round_factor_0_1,WORD32 * pi4_quant_round_factor_1_2,WORD32 src_strd,WORD32 dst_q_strd,WORD32 dst_iq_strd,UWORD8 * csbf,WORD32 csbf_strd,WORD32 * zero_col,WORD32 * zero_row,WORD16 * pi2_dequant_coeff,LWORD64 * pi8_cost)2067 WORD32 ihevc_q_iq_flat_scale_mat_var_rnd_fact
2068 (
2069 WORD16 *pi2_coeffs,
2070 WORD16 *pi2_quant_coeff,
2071 WORD16 *pi2_q_dst,
2072 WORD16 *pi2_iq_dst,
2073 WORD32 trans_size,
2074 WORD32 qp_div,/* qpscaled / 6 */
2075 WORD32 qp_rem,/* qpscaled % 6 */
2076 WORD32 q_add,
2077 WORD32 *pi4_quant_round_factor_0_1,
2078 WORD32 *pi4_quant_round_factor_1_2,
2079 WORD32 src_strd,
2080 WORD32 dst_q_strd,
2081 WORD32 dst_iq_strd,
2082 UWORD8 *csbf,
2083 WORD32 csbf_strd,
2084 WORD32 *zero_col,
2085 WORD32 *zero_row,
2086 WORD16 *pi2_dequant_coeff,
2087 LWORD64 *pi8_cost
2088 )
2089 {
2090 WORD32 i, j;
2091 WORD32 log2_size;
2092 WORD16 *pi2_q_dst_orig;
2093 WORD32 cbf = 0;
2094 WORD32 bit_depth,shift_iq;
2095 WORD16 i2_temp;
2096
2097 (void)q_add;
2098 (void)pi8_cost;
2099 pi2_q_dst_orig = pi2_q_dst;
2100
2101 GETRANGE(log2_size, trans_size);
2102 log2_size -= 1;
2103
2104 bit_depth = 8 + 0;
2105 shift_iq = bit_depth + log2_size - 5;
2106
2107 for(i = 0; i < trans_size; i++)
2108 {
2109 for(j = 0; j < trans_size; j++)
2110 {
2111 WORD16 i2_temp1;
2112
2113 i2_temp = pi2_coeffs[j];
2114
2115 {
2116 QUANT_NO_WEIGHTMAT(pi2_q_dst[j], pi2_coeffs[j],
2117 g_ihevc_quant_scales[qp_rem], qp_div,
2118 log2_size, 0);
2119
2120 i2_temp1 = pi2_q_dst[j];
2121
2122 if (abs(pi2_q_dst[j]) >= 2)
2123 {
2124 QUANT_NO_WEIGHTMAT(pi2_q_dst[j], i2_temp,
2125 g_ihevc_quant_scales[qp_rem], qp_div,
2126 log2_size, ((1 << QUANT_ROUND_FACTOR_Q)/2));
2127 }
2128 else if (abs(pi2_q_dst[j]) >= 1)
2129 {
2130 QUANT_NO_WEIGHTMAT(pi2_q_dst[j], pi2_coeffs[j],
2131 g_ihevc_quant_scales[qp_rem], qp_div,
2132 log2_size, *pi4_quant_round_factor_1_2);
2133 }
2134 else
2135 {
2136 QUANT_NO_WEIGHTMAT(pi2_q_dst[j], pi2_coeffs[j],
2137 g_ihevc_quant_scales[qp_rem], qp_div,
2138 log2_size, *pi4_quant_round_factor_0_1);
2139 }
2140 }
2141
2142 ASSERT(abs(i2_temp1-pi2_q_dst[j]) <= 1);
2143
2144 IQUANT(pi2_iq_dst[j],
2145 pi2_q_dst[j], /*pi2_src[index*src_strd]*/
2146 pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem],
2147 shift_iq,
2148 qp_div);
2149
2150 pi4_quant_round_factor_0_1++;
2151 pi4_quant_round_factor_1_2++;
2152 }
2153
2154 pi2_q_dst += dst_q_strd;
2155 pi2_iq_dst += dst_iq_strd;
2156 pi2_quant_coeff += trans_size;
2157 pi2_coeffs += src_strd;
2158 pi2_dequant_coeff += trans_size;
2159
2160 }
2161
2162 /* CSBF update */
2163 {
2164 WORD32 block_row, block_col;
2165 WORD32 row, col;
2166 WORD16 *pi2_block;
2167 UWORD32 temp_zero_col = 0;
2168 UWORD32 temp_zero_row = 0;
2169
2170 pi2_q_dst = pi2_q_dst_orig;
2171
2172 for(block_row = 0; block_row < trans_size; block_row += 4)
2173 {
2174 //block_col is incrementing by 1 for easy update of csbf pointer
2175 for(block_col = 0; block_col < trans_size / 4; block_col++)
2176 {
2177 pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4;
2178 *(csbf + block_col) = 0;
2179
2180 for(row = 0; row < 4; row++)
2181 {
2182 for(col = 0; col < 4; col++)
2183 {
2184 if(pi2_block[row * dst_q_strd + col] != 0)
2185 {
2186 *(csbf + block_col) = 1;
2187 break;
2188 }
2189 }
2190 if(*(csbf + block_col) == 1)
2191 {
2192 /* zero_col update *//* temp_zero_col = ~zero_col */
2193 temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4);
2194 // zero col can be optimized further. Now clearing the
2195 // entire 4 bits corresponding to 4 colums of 4x4 block
2196 // even if any 4x4 csbf is set
2197
2198 /* zero row update */ /* temp_zero_row = ~zero_row */
2199 temp_zero_row = (temp_zero_row) | (0xFU << block_row);
2200 // zero row can be optimized further. Now clearing the
2201 // entire 4 bits corresponding to 4 rows of 4x4 block
2202 // even if any 4x4 csbf is set
2203
2204 break;
2205 }
2206 }
2207
2208 cbf = cbf || (*(csbf + block_col)); // cbf update
2209 }
2210 csbf += csbf_strd;
2211 }
2212
2213 *zero_col = ~temp_zero_col; //final zero_col storing
2214 *zero_row = ~temp_zero_row; //final zero_row storing
2215 }
2216 return cbf;
2217 }
2218