1 /****************************************************************************** 2 * 3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ******************************************************************************/ 18 /** 19 ******************************************************************************* 20 * @file 21 * ihevc_trans_macros.h 22 * 23 * @brief 24 * Macros used in the forward transform and inverse transform functions 25 * 26 * @author 27 * Ittiam 28 * 29 * @remarks 30 * None 31 * 32 ******************************************************************************* 33 */ 34 #ifndef IHEVC_TRANS_MACROS_H_ 35 #define IHEVC_TRANS_MACROS_H_ 36 37 #define QUANT(out, inp, quant_coeff, qp_div, log2_trans_size, q_add) \ 38 { \ 39 LWORD64 tmp; \ 40 WORD32 sign; \ 41 WORD32 bit_depth,transform_shift; \ 42 WORD32 q_bits, quant_multiplier; \ 43 \ 44 /* q_bits and q_add calculation*/ \ 45 /* To be moved outside in neon. To be computer once per transform call */ \ 46 bit_depth = 8; \ 47 transform_shift = MAX_TR_DYNAMIC_RANGE - bit_depth - log2_trans_size; \ 48 quant_multiplier = 4 ; /* because quant_coeff are multiplied by 16. Instead of multiplying, we can reduce the division factor q_bits by 4 */ \ 49 q_bits = QUANT_SHIFT + qp_div + transform_shift + SCALING_Q_SHIFT - quant_multiplier ; \ 50 \ 51 sign = (inp)<0 ? -1:1; \ 52 \ 53 tmp = (LWORD64)(abs(inp)); \ 54 tmp = tmp * (quant_coeff); \ 55 tmp = tmp + (((LWORD64)q_add) << (q_bits - QUANT_ROUND_FACTOR_Q)); \ 56 tmp = tmp >> q_bits; \ 57 \ 58 tmp = tmp * sign; \ 59 out = (WORD16) CLIP_S16(tmp); \ 60 } \ 61 62 #define QUANT_HBD(out, inp, quant_coeff, qp_div, log2_trans_size, q_add, bit_depth) \ 63 { \ 64 LWORD64 tmp; \ 65 WORD32 sign; \ 66 WORD32 transform_shift; \ 67 WORD32 q_bits, quant_multiplier; \ 68 \ 69 /* q_bits and q_add calculation*/ \ 70 /* To be moved outside in neon. To be computer once per transform call */ \ 71 \ 72 transform_shift = MAX_TR_DYNAMIC_RANGE - bit_depth - log2_trans_size; \ 73 quant_multiplier = 4 ; /* because quant_coeff are multiplied by 16. Instead of multiplying, we can reduce the division factor q_bits by 4 */ \ 74 q_bits = QUANT_SHIFT + qp_div + transform_shift + SCALING_Q_SHIFT - quant_multiplier ; \ 75 \ 76 sign = (inp)<0 ? -1:1; \ 77 \ 78 tmp = (LWORD64)(abs(inp)); \ 79 tmp = tmp * (quant_coeff); \ 80 tmp = tmp + (((LWORD64)q_add) << (q_bits - QUANT_ROUND_FACTOR_Q)); \ 81 tmp = tmp >> q_bits; \ 82 \ 83 tmp = tmp * sign; \ 84 out = (WORD16) CLIP_S16(tmp); \ 85 } 86 /* added by 100028 */ 87 #define QUANT_NO_WEIGHTMAT(out, inp, quant_coeff, qp_div, log2_trans_size, q_add) \ 88 { \ 89 WORD32 tmp; \ 90 WORD32 sign; \ 91 WORD32 bit_depth,transform_shift; \ 92 WORD32 q_bits, quant_multiplier; \ 93 \ 94 /* q_bits and q_add calculation*/ \ 95 /* To be moved outside in neon. To be computer once per transform call */ \ 96 bit_depth = 8; \ 97 transform_shift = MAX_TR_DYNAMIC_RANGE - bit_depth - log2_trans_size; \ 98 quant_multiplier = 4 ; /* because quant_coeff are multiplied by 16. Instead of multiplying, we can reduce the division factor q_bits by 4 */ \ 99 q_bits = QUANT_SHIFT + qp_div + transform_shift + SCALING_Q_SHIFT - quant_multiplier - FLAT_RESCALE_MAT_Q_SHIFT /* 2048 */; \ 100 \ 101 sign = (inp)<0 ? -1:1; \ 102 \ 103 tmp = (WORD32)(abs(inp)); \ 104 tmp = tmp * (quant_coeff); \ 105 tmp = tmp + (((WORD32)q_add) << (q_bits - QUANT_ROUND_FACTOR_Q)); \ 106 tmp = tmp >> q_bits; \ 107 \ 108 tmp = tmp * sign; \ 109 out = (WORD16) CLIP_S16(tmp); \ 110 } 111 112 #define QUANT_NO_WEIGHTMAT_HBD(out, inp, quant_coeff, qp_div, log2_trans_size, q_add, bit_depth) \ 113 { \ 114 WORD32 tmp; \ 115 WORD32 sign; \ 116 WORD32 transform_shift; \ 117 WORD32 q_bits, quant_multiplier; \ 118 \ 119 /* q_bits and q_add calculation*/ \ 120 /* To be moved outside in neon. To be computer once per transform call */ \ 121 \ 122 transform_shift = MAX_TR_DYNAMIC_RANGE - bit_depth - log2_trans_size; \ 123 quant_multiplier = 4 ; /* because quant_coeff are multiplied by 16. Instead of multiplying, we can reduce the division factor q_bits by 4 */ \ 124 q_bits = QUANT_SHIFT + qp_div + transform_shift + SCALING_Q_SHIFT - quant_multiplier - FLAT_RESCALE_MAT_Q_SHIFT /* 2048 */; \ 125 \ 126 sign = (inp)<0 ? -1:1; \ 127 \ 128 tmp = (WORD32)(abs(inp)); \ 129 tmp = tmp * (quant_coeff); \ 130 tmp = tmp + (((WORD32)q_add) << (q_bits - QUANT_ROUND_FACTOR_Q)); \ 131 tmp = tmp >> q_bits; \ 132 \ 133 tmp = tmp * sign; \ 134 out = (WORD16) CLIP_S16(tmp); \ 135 } 136 /* Reference Inverse Quantization: "pi2_src"(Coefficients) will be clipped to 15 or 14 bits when (qp_div > shift_iq). Spec doesn't have any clip mentioned */ 137 138 /* Inverse quantization other than 4x4 */ 139 /* No clipping is needed for "pi2_src"(coefficients) */ 140 #define IQUANT(res, coeff /*pi2_src[index*src_strd]*/, dequant_coeff /*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */, shift_iq, qp_div) \ 141 { \ 142 WORD32 tmp, add_iq; \ 143 \ 144 add_iq = SHL_NEG(1 , (shift_iq - qp_div - 1)); /* To be moved outside in neon. To be computed once per transform call */ \ 145 \ 146 tmp = coeff * dequant_coeff ; \ 147 tmp = tmp + add_iq; \ 148 tmp = SHR_NEG(tmp,(shift_iq - qp_div)); \ 149 \ 150 res = CLIP_S16(tmp); \ 151 } 152 153 /* 4x4 inverse quantization */ 154 /* Options : */ 155 /* 1. Clip "pi2_src"(coefficients) to 10 bits if "(qp_div >= shift_iq)" or 16 bits if "(qp_div < shift_iq)"*/ 156 /* 2. Increasing precision of "pi2_src"(coefficients) to 64 bits */ 157 158 #define IQUANT_4x4(res, coeff /*pi2_src[index*src_strd]*/, dequant_coeff /*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */, shift_iq, qp_div) \ 159 { \ 160 WORD32 clip_coeff, tmp; \ 161 WORD32 coeff_min,coeff_max; \ 162 WORD32 coeff_bit_range; \ 163 WORD32 add_iq; \ 164 add_iq = SHL_NEG(1 , (shift_iq - qp_div - 1)); /* To be moved outside in neon. To be computed once per transform call */ \ 165 \ 166 coeff_bit_range = 16; \ 167 if(qp_div > shift_iq) \ 168 coeff_bit_range = 10; \ 169 \ 170 coeff_min = -(1<<(coeff_bit_range-1)); \ 171 coeff_max = (1<<(coeff_bit_range-1)) - 1; \ 172 \ 173 clip_coeff = CLIP3(coeff,coeff_min,coeff_max); \ 174 \ 175 tmp = clip_coeff * dequant_coeff ; \ 176 tmp = tmp + add_iq; \ 177 tmp = SHR_NEG(tmp,(shift_iq - qp_div)); \ 178 \ 179 res = CLIP_S16(tmp); \ 180 } 181 182 #endif /* IHEVC_TRANS_MACROS_H_ */ 183